FIN: updated the insert into md5_all table with collision detection, updates the row and adds another fingerprint to the fp_list column

This commit is contained in:
JohnE 2015-12-14 20:56:10 -08:00
parent 81fbd50dc5
commit 2312d54984
2 changed files with 38 additions and 18 deletions

View File

@ -2,9 +2,8 @@
[[ TODO ]]
-MD5 Index
-do I create an index of MD5 table hashes?
(how do I lookup each table to compare)
-add index comparison feature
-add column, fp_count NUMBER (number of fingerprints for this md5 schema)
-complete populating the second index
-add "-idxf" "--index_force" to force a re-index
-first give info about current index

View File

@ -23,6 +23,7 @@ class FingerprintIndex:
#
def __init__(self):
self.db_conn = None
self.cur = None
return
#
@ -51,18 +52,18 @@ class FingerprintIndex:
CREATE TABLE md5_all (
md5_db TEXT PRIMARY KEY,
md5_list TEXT,
fp_name TEXT);
fp_list TEXT);
''')
self.db_conn.execute('''
CREATE TABLE md5_tables (
md5_list TEXT,
fp_name TEXT);
md5_table TEXT,
fp_list TEXT);
''')
logging.info("Successfully created index table")
self.__populateIndex(fp_dir)
logging.info("Successfully populated the index")
except:
raise FingerprintIndexWrite("Error creating an index file")
except Exception as e:
raise FingerprintIndexWrite("Error creating an index file\n{}".format(e))
finally:
if self.db_conn:
self.db_conn.close()
@ -74,6 +75,7 @@ class FingerprintIndex:
failCount = 0
finCount = 0
try:
self.cur = self.db_conn.cursor()
db = FingerprintDB()
files = os.listdir(fp_dir)
for file in files:
@ -85,7 +87,8 @@ class FingerprintIndex:
finCount = finCount+1
if ((finCount % 5) == 0):
self.db_conn.commit()
except:
except Exception as e:
logging.error(e)
failCount = failCount+1
except:
pass
@ -94,24 +97,31 @@ class FingerprintIndex:
logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount)))
#
def __insertMod_md5_all(self, md5_all, md5_list, filename):
def __insertMod_md5_all(self, md5_db, md5_list, filename):
try:
# logging.info("INSERT INTO md5_index VALUES(?, ?, ?): {}; {}; {}".format(md5_all, str(md5_list), filename))
self.db_conn.execute(
'''
INSERT INTO md5_all VALUES(?, ?, ?)
''', [md5_all, ','.join(md5_list), filename])
except sql.IntegrityError as e:
logging.error("*** Got here bitches!!!")
''', [md5_db, ','.join(md5_list), filename])
except sql.IntegrityError:
try:
fp_list = self.__selectFileList(md5_db)
fp_list += ","+filename
# logging.info("fp_list=={}".format(fp_list))
self.db_conn.execute(
'''
UPDATE md5_all SET fp_list=? WHERE md5_db=?
''', [fp_list, md5_db])
except Exception as e:
logging.error("Error inserting a row\n{}\n{}".format(e, e.__class__.__name__))
logging.error("md5_all: {}\nmd5_list: {}\nfilename: {}".format(md5_all, ','.join(md5_list), filename))
raise FingerprintIndexWrite("Error inserting a row")
raise FingerprintIndexWrite("Error updating a row\n{}".format(e))
except Exception as e:
raise FingerprintIndexWrite("Error inserting a row\n{}".format(e))
#
def __insertMod_md5_tables(self, md5_list, filename):
def __insertMod_md5_tables(self, md5_db, filename):
# insert the md5 of the table schemas
for md5_table in md5_list:
for md5_table in md5_db:
try:
self.db_conn.execute(
'''
@ -124,6 +134,17 @@ class FingerprintIndex:
except:
pass
#
def __selectFileList(self, md5_db):
try:
rows = self.cur.execute(
'''
SELECT fp_list from md5_all WHERE md5_db=?
''', [md5_db])
for row in rows:
return row[0]
except:
raise FingerprintIndexWrite("Error selecting fp_list\n{}".format(e))
#
def __checkIntegrity(self):