FIN: second index population complete, fingerprint count for each md5 complete (fp_count), collision detection complete, primary keys complete

This commit is contained in:
JohnE 2015-12-17 13:47:21 -08:00
parent cb4ab6d169
commit 6a79f61cb6
2 changed files with 39 additions and 15 deletions

View File

@ -14,11 +14,12 @@
"Creating... FINISHED." "Creating... FINISHED."
"168 fingerprints in the new index" "168 fingerprints in the new index"
[ Android Data ]
-Processing of android data -Processing of android data
-removed verbose messsage for each fingerprint -removed verbose messsage for each fingerprint
-fix error message to look good -fix error message to look good
[ Module Cleanup ]
-remove print statements (from modules) -remove print statements (from modules)
-add logging statements -add logging statements
-add exception throwing -add exception throwing
@ -26,6 +27,9 @@
-test md5 sum create of tables -test md5 sum create of tables
-test md5 sum create of entire db (tables concatenated) -test md5 sum create of entire db (tables concatenated)
[ Incorporate with another Product ]
-add module to other python project
-add unit test to other python project
[[ Code Snippets ]] [[ Code Snippets ]]

View File

@ -57,7 +57,7 @@ class FingerprintIndex:
''') ''')
self.db_conn.execute(''' self.db_conn.execute('''
CREATE TABLE md5_tables ( CREATE TABLE md5_tables (
md5_table TEXT, md5_table TEXT PRIMARY KEY,
fp_list TEXT, fp_list TEXT,
fp_count INTEGER); fp_count INTEGER);
''') ''')
@ -85,7 +85,7 @@ class FingerprintIndex:
fq_file = fp_dir + os.path.sep + file fq_file = fp_dir + os.path.sep + file
db.importJson(fq_file) db.importJson(fq_file)
self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file) self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file)
#self.__insertMod_md5_tables(db.table_hashes.values(), file) self.__insertMod_md5_tables(db.table_hashes.values(), file)
finCount = finCount+1 finCount = finCount+1
if ((finCount % 5) == 0): if ((finCount % 5) == 0):
self.db_conn.commit() self.db_conn.commit()
@ -108,13 +108,14 @@ class FingerprintIndex:
''', [md5_db, ','.join(md5_list), filename, 1]) ''', [md5_db, ','.join(md5_list), filename, 1])
except sql.IntegrityError: except sql.IntegrityError:
try: try:
fp_list = self.__selectFileList(md5_db) (fp_list, fp_count) = self.__selectFileList(md5_db)
fp_list += ","+filename fp_list += ","+filename
fp_count += 1
# logging.info("fp_list=={}".format(fp_list)) # logging.info("fp_list=={}".format(fp_list))
self.db_conn.execute( self.db_conn.execute(
''' '''
UPDATE md5_all SET fp_list=? WHERE md5_db=? UPDATE md5_all SET fp_list=?, fp_count=? WHERE md5_db=?
''', [fp_list, md5_db]) ''', [fp_list, fp_count, md5_db])
except Exception as e: except Exception as e:
raise FingerprintIndexWrite("Error updating a row\n{}".format(e)) raise FingerprintIndexWrite("Error updating a row\n{}".format(e))
except Exception as e: except Exception as e:
@ -127,24 +128,43 @@ class FingerprintIndex:
try: try:
self.db_conn.execute( self.db_conn.execute(
''' '''
INSERT INTO md5_tables VALUES(?, ?) INSERT INTO md5_tables VALUES(?, ?, ?)
''', [md5_table, filename]) ''', [md5_table, filename, 1])
except: except sql.IntegrityError:
try: try:
# modify row, add filename (fp_list, fp_count) = self.__selectFileList222(md5_table)
pass fp_list += ","+filename
except: fp_count += 1
pass self.db_conn.execute(
'''
UPDATE md5_tables SET fp_list=?, fp_count=? WHERE md5_table=?
''', [fp_list, fp_count, md5_table])
except Exception as e:
raise FingerprintIndexWrite("Error updating a row\n{}".format(e))
except Exception as e:
raise FingerprintIndexWrite("Error inserting a row\n{}".format(e))
# #
def __selectFileList(self, md5_db): def __selectFileList(self, md5_db):
try: try:
rows = self.cur.execute( rows = self.cur.execute(
''' '''
SELECT fp_list from md5_all WHERE md5_db=? SELECT fp_list, fp_count FROM md5_all WHERE md5_db=?
''', [md5_db]) ''', [md5_db])
for row in rows: for row in rows:
return row[0] return (row[0], row[1])
except:
raise FingerprintIndexWrite("Error selecting fp_list\n{}".format(e))
#
def __selectFileList222(self, md5_table):
try:
rows = self.cur.execute(
'''
SELECT fp_list, fp_count FROM md5_tables WHERE md5_table=?
''', [md5_table])
for row in rows:
return (row[0], row[1])
except: except:
raise FingerprintIndexWrite("Error selecting fp_list\n{}".format(e)) raise FingerprintIndexWrite("Error selecting fp_list\n{}".format(e))