WIP: second index is created and working on populating it
This commit is contained in:
parent
b22b34a1d7
commit
81fbd50dc5
|
@ -1,4 +1,31 @@
|
|||
|
||||
[[ TODO ]]
|
||||
|
||||
-MD5 Index
|
||||
-do I create an index of MD5 table hashes?
|
||||
(how do I lookup each table to compare)
|
||||
-add index comparison feature
|
||||
|
||||
-add "-idxf" "--index_force" to force a re-index
|
||||
-first give info about current index
|
||||
-next, create a new index
|
||||
-give info about new index
|
||||
"152 fingerprints in the index"
|
||||
"Creating a new index. Moved previous to backup"
|
||||
"Creating... FINISHED."
|
||||
"168 fingerprints in the new index"
|
||||
|
||||
|
||||
-Processing of android data
|
||||
-removed verbose messsage for each fingerprint
|
||||
-fix error message to look good
|
||||
|
||||
-remove print statements (from modules)
|
||||
-add logging statements
|
||||
-add exception throwing
|
||||
-add unit testing
|
||||
-test md5 sum create of tables
|
||||
-test md5 sum create of entire db (tables concatenated)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ class FingerprintDB:
|
|||
|
||||
sqlmaster = "SELECT name, sql FROM sqlite_master WHERE type='table'"
|
||||
# version of the scanner used to create the fingerprint
|
||||
scanner_ver = "0.90"
|
||||
scanner_ver = "0.95"
|
||||
# version of the json file format, this # is inserted in the json fingerprint file and can be used to determine what is supported at the time of that version
|
||||
format_ver = "0.91"
|
||||
|
||||
|
|
|
@ -48,10 +48,15 @@ class FingerprintIndex:
|
|||
try:
|
||||
self.db_conn = sql.connect(fq_fpidx)
|
||||
self.db_conn.execute('''
|
||||
CREATE TABLE md5_index (
|
||||
md5_all TEXT PRIMARY KEY,
|
||||
CREATE TABLE md5_all (
|
||||
md5_db TEXT PRIMARY KEY,
|
||||
md5_list TEXT,
|
||||
file_name TEXT);
|
||||
fp_name TEXT);
|
||||
''')
|
||||
self.db_conn.execute('''
|
||||
CREATE TABLE md5_tables (
|
||||
md5_list TEXT,
|
||||
fp_name TEXT);
|
||||
''')
|
||||
logging.info("Successfully created index table")
|
||||
self.__populateIndex(fp_dir)
|
||||
|
@ -68,22 +73,18 @@ class FingerprintIndex:
|
|||
""" read each file, pull md5, add row to database """
|
||||
failCount = 0
|
||||
finCount = 0
|
||||
# self.db_conn.execute("INSERT INTO md5_index VALUES(?, ?, ?)", ["AAA", "BBB", "CCC"])
|
||||
# self.db_conn.execute("INSERT INTO md5_index VALUES('DDD', 'EEE', 'FFF')")
|
||||
try:
|
||||
db = FingerprintDB()
|
||||
files = os.listdir(fp_dir)
|
||||
# print ("Populating DB, files=={}".format(files))
|
||||
for file in files:
|
||||
try:
|
||||
fq_file = fp_dir + os.path.sep + file
|
||||
# print ("importJson file=={}".format(fq_file))
|
||||
db.importJson(fq_file)
|
||||
self.__insertRecord(db.db_hash, db.table_hashes.values(), file)
|
||||
# print("db_hash=={}".format(db.db_hash))
|
||||
# print("table_hashes={}".format(db.table_hashes))
|
||||
#md5_all = __createMD5Index(dbht)
|
||||
self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file)
|
||||
#self.__insertMod_md5_tables(db.table_hashes.values(), file)
|
||||
finCount = finCount+1
|
||||
if ((finCount % 5) == 0):
|
||||
self.db_conn.commit()
|
||||
except:
|
||||
failCount = failCount+1
|
||||
except:
|
||||
|
@ -93,17 +94,37 @@ class FingerprintIndex:
|
|||
logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount)))
|
||||
|
||||
#
|
||||
def __insertRecord(self, md5_all, md5_list, filename):
|
||||
def __insertMod_md5_all(self, md5_all, md5_list, filename):
|
||||
try:
|
||||
# logging.info("INSERT INTO md5_index VALUES(?, ?, ?): {}; {}; {}".format(md5_all, str(md5_list), filename))
|
||||
self.db_conn.execute(
|
||||
'''
|
||||
INSERT INTO md5_index VALUES(?, ?, ?)
|
||||
INSERT INTO md5_all VALUES(?, ?, ?)
|
||||
''', [md5_all, ','.join(md5_list), filename])
|
||||
except sql.IntegrityError as e:
|
||||
logging.error("*** Got here bitches!!!")
|
||||
except Exception as e:
|
||||
logging.error("Error inserting a row\n{}".format(e))
|
||||
logging.error("Error inserting a row\n{}\n{}".format(e, e.__class__.__name__))
|
||||
logging.error("md5_all: {}\nmd5_list: {}\nfilename: {}".format(md5_all, ','.join(md5_list), filename))
|
||||
raise FingerprintIndexWrite("Error inserting a row")
|
||||
|
||||
#
|
||||
def __insertMod_md5_tables(self, md5_list, filename):
|
||||
# insert the md5 of the table schemas
|
||||
for md5_table in md5_list:
|
||||
try:
|
||||
self.db_conn.execute(
|
||||
'''
|
||||
INSERT INTO md5_tables VALUES(?, ?)
|
||||
''', [md5_table, filename])
|
||||
except:
|
||||
try:
|
||||
# modify row, add filename
|
||||
pass
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
#
|
||||
def __checkIntegrity(self):
|
||||
""" Sanity check the number of files against the index rows """
|
||||
|
|
Loading…
Reference in New Issue