WIP: second index is created and working on populating it

This commit is contained in:
JohnE 2015-12-14 15:19:20 -08:00
parent b22b34a1d7
commit 81fbd50dc5
3 changed files with 63 additions and 15 deletions

View File

@ -1,4 +1,31 @@
[[ TODO ]]
-MD5 Index
-do I create an index of MD5 table hashes?
(how do I lookup each table to compare)
-add index comparison feature
-add "-idxf" "--index_force" to force a re-index
-first give info about current index
-next, create a new index
-give info about new index
"152 fingerprints in the index"
"Creating a new index. Moved previous to backup"
"Creating... FINISHED."
"168 fingerprints in the new index"
-Processing of android data
-removed verbose messsage for each fingerprint
-fix error message to look good
-remove print statements (from modules)
-add logging statements
-add exception throwing
-add unit testing
-test md5 sum create of tables
-test md5 sum create of entire db (tables concatenated)

View File

@ -35,7 +35,7 @@ class FingerprintDB:
sqlmaster = "SELECT name, sql FROM sqlite_master WHERE type='table'" sqlmaster = "SELECT name, sql FROM sqlite_master WHERE type='table'"
# version of the scanner used to create the fingerprint # version of the scanner used to create the fingerprint
scanner_ver = "0.90" scanner_ver = "0.95"
# version of the json file format, this # is inserted in the json fingerprint file and can be used to determine what is supported at the time of that version # version of the json file format, this # is inserted in the json fingerprint file and can be used to determine what is supported at the time of that version
format_ver = "0.91" format_ver = "0.91"

View File

@ -48,10 +48,15 @@ class FingerprintIndex:
try: try:
self.db_conn = sql.connect(fq_fpidx) self.db_conn = sql.connect(fq_fpidx)
self.db_conn.execute(''' self.db_conn.execute('''
CREATE TABLE md5_index ( CREATE TABLE md5_all (
md5_all TEXT PRIMARY KEY, md5_db TEXT PRIMARY KEY,
md5_list TEXT, md5_list TEXT,
file_name TEXT); fp_name TEXT);
''')
self.db_conn.execute('''
CREATE TABLE md5_tables (
md5_list TEXT,
fp_name TEXT);
''') ''')
logging.info("Successfully created index table") logging.info("Successfully created index table")
self.__populateIndex(fp_dir) self.__populateIndex(fp_dir)
@ -68,22 +73,18 @@ class FingerprintIndex:
""" read each file, pull md5, add row to database """ """ read each file, pull md5, add row to database """
failCount = 0 failCount = 0
finCount = 0 finCount = 0
# self.db_conn.execute("INSERT INTO md5_index VALUES(?, ?, ?)", ["AAA", "BBB", "CCC"])
# self.db_conn.execute("INSERT INTO md5_index VALUES('DDD', 'EEE', 'FFF')")
try: try:
db = FingerprintDB() db = FingerprintDB()
files = os.listdir(fp_dir) files = os.listdir(fp_dir)
# print ("Populating DB, files=={}".format(files))
for file in files: for file in files:
try: try:
fq_file = fp_dir + os.path.sep + file fq_file = fp_dir + os.path.sep + file
# print ("importJson file=={}".format(fq_file))
db.importJson(fq_file) db.importJson(fq_file)
self.__insertRecord(db.db_hash, db.table_hashes.values(), file) self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file)
# print("db_hash=={}".format(db.db_hash)) #self.__insertMod_md5_tables(db.table_hashes.values(), file)
# print("table_hashes={}".format(db.table_hashes))
#md5_all = __createMD5Index(dbht)
finCount = finCount+1 finCount = finCount+1
if ((finCount % 5) == 0):
self.db_conn.commit()
except: except:
failCount = failCount+1 failCount = failCount+1
except: except:
@ -93,17 +94,37 @@ class FingerprintIndex:
logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount))) logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount)))
# #
def __insertRecord(self, md5_all, md5_list, filename): def __insertMod_md5_all(self, md5_all, md5_list, filename):
try: try:
# logging.info("INSERT INTO md5_index VALUES(?, ?, ?): {}; {}; {}".format(md5_all, str(md5_list), filename)) # logging.info("INSERT INTO md5_index VALUES(?, ?, ?): {}; {}; {}".format(md5_all, str(md5_list), filename))
self.db_conn.execute( self.db_conn.execute(
''' '''
INSERT INTO md5_index VALUES(?, ?, ?) INSERT INTO md5_all VALUES(?, ?, ?)
''', [md5_all, ','.join(md5_list), filename]) ''', [md5_all, ','.join(md5_list), filename])
except sql.IntegrityError as e:
logging.error("*** Got here bitches!!!")
except Exception as e: except Exception as e:
logging.error("Error inserting a row\n{}".format(e)) logging.error("Error inserting a row\n{}\n{}".format(e, e.__class__.__name__))
logging.error("md5_all: {}\nmd5_list: {}\nfilename: {}".format(md5_all, ','.join(md5_list), filename))
raise FingerprintIndexWrite("Error inserting a row") raise FingerprintIndexWrite("Error inserting a row")
#
def __insertMod_md5_tables(self, md5_list, filename):
# insert the md5 of the table schemas
for md5_table in md5_list:
try:
self.db_conn.execute(
'''
INSERT INTO md5_tables VALUES(?, ?)
''', [md5_table, filename])
except:
try:
# modify row, add filename
pass
except:
pass
# #
def __checkIntegrity(self): def __checkIntegrity(self):
""" Sanity check the number of files against the index rows """ """ Sanity check the number of files against the index rows """