FIN: updated the insert into md5_all table with collision detection, updates the row and adds another fingerprint to the fp_list column

This commit is contained in:
JohnE 2015-12-14 20:56:10 -08:00
parent 81fbd50dc5
commit 2312d54984
2 changed files with 38 additions and 18 deletions

View File

@ -2,9 +2,8 @@
[[ TODO ]] [[ TODO ]]
-MD5 Index -MD5 Index
-do I create an index of MD5 table hashes? -add column, fp_count NUMBER (number of fingerprints for this md5 schema)
(how do I lookup each table to compare) -complete populating the second index
-add index comparison feature
-add "-idxf" "--index_force" to force a re-index -add "-idxf" "--index_force" to force a re-index
-first give info about current index -first give info about current index

View File

@ -23,6 +23,7 @@ class FingerprintIndex:
# #
def __init__(self): def __init__(self):
self.db_conn = None self.db_conn = None
self.cur = None
return return
# #
@ -51,18 +52,18 @@ class FingerprintIndex:
CREATE TABLE md5_all ( CREATE TABLE md5_all (
md5_db TEXT PRIMARY KEY, md5_db TEXT PRIMARY KEY,
md5_list TEXT, md5_list TEXT,
fp_name TEXT); fp_list TEXT);
''') ''')
self.db_conn.execute(''' self.db_conn.execute('''
CREATE TABLE md5_tables ( CREATE TABLE md5_tables (
md5_list TEXT, md5_table TEXT,
fp_name TEXT); fp_list TEXT);
''') ''')
logging.info("Successfully created index table") logging.info("Successfully created index table")
self.__populateIndex(fp_dir) self.__populateIndex(fp_dir)
logging.info("Successfully populated the index") logging.info("Successfully populated the index")
except: except Exception as e:
raise FingerprintIndexWrite("Error creating an index file") raise FingerprintIndexWrite("Error creating an index file\n{}".format(e))
finally: finally:
if self.db_conn: if self.db_conn:
self.db_conn.close() self.db_conn.close()
@ -74,6 +75,7 @@ class FingerprintIndex:
failCount = 0 failCount = 0
finCount = 0 finCount = 0
try: try:
self.cur = self.db_conn.cursor()
db = FingerprintDB() db = FingerprintDB()
files = os.listdir(fp_dir) files = os.listdir(fp_dir)
for file in files: for file in files:
@ -85,7 +87,8 @@ class FingerprintIndex:
finCount = finCount+1 finCount = finCount+1
if ((finCount % 5) == 0): if ((finCount % 5) == 0):
self.db_conn.commit() self.db_conn.commit()
except: except Exception as e:
logging.error(e)
failCount = failCount+1 failCount = failCount+1
except: except:
pass pass
@ -94,24 +97,31 @@ class FingerprintIndex:
logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount))) logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount)))
# #
def __insertMod_md5_all(self, md5_all, md5_list, filename): def __insertMod_md5_all(self, md5_db, md5_list, filename):
try: try:
# logging.info("INSERT INTO md5_index VALUES(?, ?, ?): {}; {}; {}".format(md5_all, str(md5_list), filename)) # logging.info("INSERT INTO md5_index VALUES(?, ?, ?): {}; {}; {}".format(md5_all, str(md5_list), filename))
self.db_conn.execute( self.db_conn.execute(
''' '''
INSERT INTO md5_all VALUES(?, ?, ?) INSERT INTO md5_all VALUES(?, ?, ?)
''', [md5_all, ','.join(md5_list), filename]) ''', [md5_db, ','.join(md5_list), filename])
except sql.IntegrityError as e: except sql.IntegrityError:
logging.error("*** Got here bitches!!!") try:
fp_list = self.__selectFileList(md5_db)
fp_list += ","+filename
# logging.info("fp_list=={}".format(fp_list))
self.db_conn.execute(
'''
UPDATE md5_all SET fp_list=? WHERE md5_db=?
''', [fp_list, md5_db])
except Exception as e:
raise FingerprintIndexWrite("Error updating a row\n{}".format(e))
except Exception as e: except Exception as e:
logging.error("Error inserting a row\n{}\n{}".format(e, e.__class__.__name__)) raise FingerprintIndexWrite("Error inserting a row\n{}".format(e))
logging.error("md5_all: {}\nmd5_list: {}\nfilename: {}".format(md5_all, ','.join(md5_list), filename))
raise FingerprintIndexWrite("Error inserting a row")
# #
def __insertMod_md5_tables(self, md5_list, filename): def __insertMod_md5_tables(self, md5_db, filename):
# insert the md5 of the table schemas # insert the md5 of the table schemas
for md5_table in md5_list: for md5_table in md5_db:
try: try:
self.db_conn.execute( self.db_conn.execute(
''' '''
@ -124,6 +134,17 @@ class FingerprintIndex:
except: except:
pass pass
#
def __selectFileList(self, md5_db):
try:
rows = self.cur.execute(
'''
SELECT fp_list from md5_all WHERE md5_db=?
''', [md5_db])
for row in rows:
return row[0]
except:
raise FingerprintIndexWrite("Error selecting fp_list\n{}".format(e))
# #
def __checkIntegrity(self): def __checkIntegrity(self):