# # # [ md5_all, md5_list, file_name ] # # import os import sys import logging import sqlite3 as sql from libs.exceptions import FingerprintIndexWrite from libs.exceptions import FingerprintIndexOpen from libs.fingerprint import FingerprintDB # prefixed with "_" so that it will be listed first and visible INDEX_FILENAME = '_index_dpfp.db' class FingerprintIndex: """ Class handling an index of fingerprints for effeciently locating a fingerprint """ # def __init__(self): self.db_conn = None return # def openIndex(self, fp_dir): fq_fpidx = fp_dir + os.path.sep + INDEX_FILENAME try: if (os.path.isfile(fq_fpidx)): self.db_conn = sql.connect(fq_fpidx) logging.info("DB Open SUCCESSFUL") else: logging.info("No index file found, creating index now...") self.createIndex(fp_dir) except: raise FingerprintIndexOpen("Error opening/creating an index file") finally: if self.db_conn: self.db_conn.close() self.db_conn = None # def createIndex(self, fp_dir): fq_fpidx = fp_dir + os.path.sep + INDEX_FILENAME try: self.db_conn = sql.connect(fq_fpidx) self.db_conn.execute(''' CREATE TABLE md5_all ( md5_db TEXT PRIMARY KEY, md5_list TEXT, fp_name TEXT); ''') self.db_conn.execute(''' CREATE TABLE md5_tables ( md5_list TEXT, fp_name TEXT); ''') logging.info("Successfully created index table") self.__populateIndex(fp_dir) logging.info("Successfully populated the index") except: raise FingerprintIndexWrite("Error creating an index file") finally: if self.db_conn: self.db_conn.close() self.db_conn = None # def __populateIndex(self, fp_dir): """ read each file, pull md5, add row to database """ failCount = 0 finCount = 0 try: db = FingerprintDB() files = os.listdir(fp_dir) for file in files: try: fq_file = fp_dir + os.path.sep + file db.importJson(fq_file) self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file) #self.__insertMod_md5_tables(db.table_hashes.values(), file) finCount = finCount+1 if ((finCount % 5) == 0): self.db_conn.commit() except: failCount = failCount+1 except: pass finally: self.db_conn.commit() logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount))) # def __insertMod_md5_all(self, md5_all, md5_list, filename): try: # logging.info("INSERT INTO md5_index VALUES(?, ?, ?): {}; {}; {}".format(md5_all, str(md5_list), filename)) self.db_conn.execute( ''' INSERT INTO md5_all VALUES(?, ?, ?) ''', [md5_all, ','.join(md5_list), filename]) except sql.IntegrityError as e: logging.error("*** Got here bitches!!!") except Exception as e: logging.error("Error inserting a row\n{}\n{}".format(e, e.__class__.__name__)) logging.error("md5_all: {}\nmd5_list: {}\nfilename: {}".format(md5_all, ','.join(md5_list), filename)) raise FingerprintIndexWrite("Error inserting a row") # def __insertMod_md5_tables(self, md5_list, filename): # insert the md5 of the table schemas for md5_table in md5_list: try: self.db_conn.execute( ''' INSERT INTO md5_tables VALUES(?, ?) ''', [md5_table, filename]) except: try: # modify row, add filename pass except: pass # def __checkIntegrity(self): """ Sanity check the number of files against the index rows """ pass # def dirCompare(self, folder): pass def compareFingerprint(self, fp1, fp2): pass