From 6a79f61cb65d20e3da3b43abdc8326048320b13d Mon Sep 17 00:00:00 2001 From: JohnE Date: Thu, 17 Dec 2015 13:47:21 -0800 Subject: [PATCH] FIN: second index population complete, fingerprint count for each md5 complete (fp_count), collision detection complete, primary keys complete --- docs/devnotes | 6 ++++- libs/fingerprint_index.py | 48 +++++++++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/docs/devnotes b/docs/devnotes index e7b17c2..5ab623e 100644 --- a/docs/devnotes +++ b/docs/devnotes @@ -14,11 +14,12 @@ "Creating... FINISHED." "168 fingerprints in the new index" - +[ Android Data ] -Processing of android data -removed verbose messsage for each fingerprint -fix error message to look good +[ Module Cleanup ] -remove print statements (from modules) -add logging statements -add exception throwing @@ -26,6 +27,9 @@ -test md5 sum create of tables -test md5 sum create of entire db (tables concatenated) +[ Incorporate with another Product ] +-add module to other python project +-add unit test to other python project [[ Code Snippets ]] diff --git a/libs/fingerprint_index.py b/libs/fingerprint_index.py index 4d96b01..d0c4bd6 100644 --- a/libs/fingerprint_index.py +++ b/libs/fingerprint_index.py @@ -57,7 +57,7 @@ class FingerprintIndex: ''') self.db_conn.execute(''' CREATE TABLE md5_tables ( - md5_table TEXT, + md5_table TEXT PRIMARY KEY, fp_list TEXT, fp_count INTEGER); ''') @@ -85,7 +85,7 @@ class FingerprintIndex: fq_file = fp_dir + os.path.sep + file db.importJson(fq_file) self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file) - #self.__insertMod_md5_tables(db.table_hashes.values(), file) + self.__insertMod_md5_tables(db.table_hashes.values(), file) finCount = finCount+1 if ((finCount % 5) == 0): self.db_conn.commit() @@ -108,13 +108,14 @@ class FingerprintIndex: ''', [md5_db, ','.join(md5_list), filename, 1]) except sql.IntegrityError: try: - fp_list = self.__selectFileList(md5_db) + (fp_list, fp_count) = self.__selectFileList(md5_db) fp_list += ","+filename + fp_count += 1 # logging.info("fp_list=={}".format(fp_list)) self.db_conn.execute( ''' - UPDATE md5_all SET fp_list=? WHERE md5_db=? - ''', [fp_list, md5_db]) + UPDATE md5_all SET fp_list=?, fp_count=? WHERE md5_db=? + ''', [fp_list, fp_count, md5_db]) except Exception as e: raise FingerprintIndexWrite("Error updating a row\n{}".format(e)) except Exception as e: @@ -127,24 +128,43 @@ class FingerprintIndex: try: self.db_conn.execute( ''' - INSERT INTO md5_tables VALUES(?, ?) - ''', [md5_table, filename]) - except: + INSERT INTO md5_tables VALUES(?, ?, ?) + ''', [md5_table, filename, 1]) + except sql.IntegrityError: try: - # modify row, add filename - pass - except: - pass + (fp_list, fp_count) = self.__selectFileList222(md5_table) + fp_list += ","+filename + fp_count += 1 + self.db_conn.execute( + ''' + UPDATE md5_tables SET fp_list=?, fp_count=? WHERE md5_table=? + ''', [fp_list, fp_count, md5_table]) + except Exception as e: + raise FingerprintIndexWrite("Error updating a row\n{}".format(e)) + except Exception as e: + raise FingerprintIndexWrite("Error inserting a row\n{}".format(e)) # def __selectFileList(self, md5_db): try: rows = self.cur.execute( ''' - SELECT fp_list from md5_all WHERE md5_db=? + SELECT fp_list, fp_count FROM md5_all WHERE md5_db=? ''', [md5_db]) for row in rows: - return row[0] + return (row[0], row[1]) + except: + raise FingerprintIndexWrite("Error selecting fp_list\n{}".format(e)) + + # + def __selectFileList222(self, md5_table): + try: + rows = self.cur.execute( + ''' + SELECT fp_list, fp_count FROM md5_tables WHERE md5_table=? + ''', [md5_table]) + for row in rows: + return (row[0], row[1]) except: raise FingerprintIndexWrite("Error selecting fp_list\n{}".format(e))