From 2312d54984814bd841054376259701e7ae4df8ae Mon Sep 17 00:00:00 2001 From: JohnE Date: Mon, 14 Dec 2015 20:56:10 -0800 Subject: [PATCH] FIN: updated the insert into md5_all table with collision detection, updates the row and adds another fingerprint to the fp_list column --- docs/devnotes | 5 ++-- libs/fingerprint_index.py | 51 +++++++++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/docs/devnotes b/docs/devnotes index bd86d78..e7b17c2 100644 --- a/docs/devnotes +++ b/docs/devnotes @@ -2,9 +2,8 @@ [[ TODO ]] -MD5 Index - -do I create an index of MD5 table hashes? - (how do I lookup each table to compare) - -add index comparison feature + -add column, fp_count NUMBER (number of fingerprints for this md5 schema) + -complete populating the second index -add "-idxf" "--index_force" to force a re-index -first give info about current index diff --git a/libs/fingerprint_index.py b/libs/fingerprint_index.py index 0cc14d8..5b0d03e 100644 --- a/libs/fingerprint_index.py +++ b/libs/fingerprint_index.py @@ -23,6 +23,7 @@ class FingerprintIndex: # def __init__(self): self.db_conn = None + self.cur = None return # @@ -51,18 +52,18 @@ class FingerprintIndex: CREATE TABLE md5_all ( md5_db TEXT PRIMARY KEY, md5_list TEXT, - fp_name TEXT); + fp_list TEXT); ''') self.db_conn.execute(''' CREATE TABLE md5_tables ( - md5_list TEXT, - fp_name TEXT); + md5_table TEXT, + fp_list TEXT); ''') logging.info("Successfully created index table") self.__populateIndex(fp_dir) logging.info("Successfully populated the index") - except: - raise FingerprintIndexWrite("Error creating an index file") + except Exception as e: + raise FingerprintIndexWrite("Error creating an index file\n{}".format(e)) finally: if self.db_conn: self.db_conn.close() @@ -74,6 +75,7 @@ class FingerprintIndex: failCount = 0 finCount = 0 try: + self.cur = self.db_conn.cursor() db = FingerprintDB() files = os.listdir(fp_dir) for file in files: @@ -85,7 +87,8 @@ class FingerprintIndex: finCount = finCount+1 if ((finCount % 5) == 0): self.db_conn.commit() - except: + except Exception as e: + logging.error(e) failCount = failCount+1 except: pass @@ -94,24 +97,31 @@ class FingerprintIndex: logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount))) # - def __insertMod_md5_all(self, md5_all, md5_list, filename): + def __insertMod_md5_all(self, md5_db, md5_list, filename): try: # logging.info("INSERT INTO md5_index VALUES(?, ?, ?): {}; {}; {}".format(md5_all, str(md5_list), filename)) self.db_conn.execute( ''' INSERT INTO md5_all VALUES(?, ?, ?) - ''', [md5_all, ','.join(md5_list), filename]) - except sql.IntegrityError as e: - logging.error("*** Got here bitches!!!") + ''', [md5_db, ','.join(md5_list), filename]) + except sql.IntegrityError: + try: + fp_list = self.__selectFileList(md5_db) + fp_list += ","+filename + # logging.info("fp_list=={}".format(fp_list)) + self.db_conn.execute( + ''' + UPDATE md5_all SET fp_list=? WHERE md5_db=? + ''', [fp_list, md5_db]) + except Exception as e: + raise FingerprintIndexWrite("Error updating a row\n{}".format(e)) except Exception as e: - logging.error("Error inserting a row\n{}\n{}".format(e, e.__class__.__name__)) - logging.error("md5_all: {}\nmd5_list: {}\nfilename: {}".format(md5_all, ','.join(md5_list), filename)) - raise FingerprintIndexWrite("Error inserting a row") + raise FingerprintIndexWrite("Error inserting a row\n{}".format(e)) # - def __insertMod_md5_tables(self, md5_list, filename): + def __insertMod_md5_tables(self, md5_db, filename): # insert the md5 of the table schemas - for md5_table in md5_list: + for md5_table in md5_db: try: self.db_conn.execute( ''' @@ -124,6 +134,17 @@ class FingerprintIndex: except: pass + # + def __selectFileList(self, md5_db): + try: + rows = self.cur.execute( + ''' + SELECT fp_list from md5_all WHERE md5_db=? + ''', [md5_db]) + for row in rows: + return row[0] + except: + raise FingerprintIndexWrite("Error selecting fp_list\n{}".format(e)) # def __checkIntegrity(self):