FIN: second index population complete, fingerprint count for each md5 complete (fp_count), collision detection complete, primary keys complete

This commit is contained in:
JohnE 2015-12-17 13:47:21 -08:00
parent cb4ab6d169
commit 6a79f61cb6
2 changed files with 39 additions and 15 deletions

View File

@ -14,11 +14,12 @@
"Creating... FINISHED."
"168 fingerprints in the new index"
[ Android Data ]
-Processing of android data
-removed verbose messsage for each fingerprint
-fix error message to look good
[ Module Cleanup ]
-remove print statements (from modules)
-add logging statements
-add exception throwing
@ -26,6 +27,9 @@
-test md5 sum create of tables
-test md5 sum create of entire db (tables concatenated)
[ Incorporate with another Product ]
-add module to other python project
-add unit test to other python project
[[ Code Snippets ]]

View File

@ -57,7 +57,7 @@ class FingerprintIndex:
''')
self.db_conn.execute('''
CREATE TABLE md5_tables (
md5_table TEXT,
md5_table TEXT PRIMARY KEY,
fp_list TEXT,
fp_count INTEGER);
''')
@ -85,7 +85,7 @@ class FingerprintIndex:
fq_file = fp_dir + os.path.sep + file
db.importJson(fq_file)
self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file)
#self.__insertMod_md5_tables(db.table_hashes.values(), file)
self.__insertMod_md5_tables(db.table_hashes.values(), file)
finCount = finCount+1
if ((finCount % 5) == 0):
self.db_conn.commit()
@ -108,13 +108,14 @@ class FingerprintIndex:
''', [md5_db, ','.join(md5_list), filename, 1])
except sql.IntegrityError:
try:
fp_list = self.__selectFileList(md5_db)
(fp_list, fp_count) = self.__selectFileList(md5_db)
fp_list += ","+filename
fp_count += 1
# logging.info("fp_list=={}".format(fp_list))
self.db_conn.execute(
'''
UPDATE md5_all SET fp_list=? WHERE md5_db=?
''', [fp_list, md5_db])
UPDATE md5_all SET fp_list=?, fp_count=? WHERE md5_db=?
''', [fp_list, fp_count, md5_db])
except Exception as e:
raise FingerprintIndexWrite("Error updating a row\n{}".format(e))
except Exception as e:
@ -127,24 +128,43 @@ class FingerprintIndex:
try:
self.db_conn.execute(
'''
INSERT INTO md5_tables VALUES(?, ?)
''', [md5_table, filename])
except:
INSERT INTO md5_tables VALUES(?, ?, ?)
''', [md5_table, filename, 1])
except sql.IntegrityError:
try:
# modify row, add filename
pass
except:
pass
(fp_list, fp_count) = self.__selectFileList222(md5_table)
fp_list += ","+filename
fp_count += 1
self.db_conn.execute(
'''
UPDATE md5_tables SET fp_list=?, fp_count=? WHERE md5_table=?
''', [fp_list, fp_count, md5_table])
except Exception as e:
raise FingerprintIndexWrite("Error updating a row\n{}".format(e))
except Exception as e:
raise FingerprintIndexWrite("Error inserting a row\n{}".format(e))
#
def __selectFileList(self, md5_db):
try:
rows = self.cur.execute(
'''
SELECT fp_list from md5_all WHERE md5_db=?
SELECT fp_list, fp_count FROM md5_all WHERE md5_db=?
''', [md5_db])
for row in rows:
return row[0]
return (row[0], row[1])
except:
raise FingerprintIndexWrite("Error selecting fp_list\n{}".format(e))
#
def __selectFileList222(self, md5_table):
try:
rows = self.cur.execute(
'''
SELECT fp_list, fp_count FROM md5_tables WHERE md5_table=?
''', [md5_table])
for row in rows:
return (row[0], row[1])
except:
raise FingerprintIndexWrite("Error selecting fp_list\n{}".format(e))