diff --git a/docs/devnotes b/docs/devnotes index 0c51695..bd86d78 100644 --- a/docs/devnotes +++ b/docs/devnotes @@ -1,4 +1,31 @@ +[[ TODO ]] + +-MD5 Index + -do I create an index of MD5 table hashes? + (how do I lookup each table to compare) + -add index comparison feature + +-add "-idxf" "--index_force" to force a re-index + -first give info about current index + -next, create a new index + -give info about new index +"152 fingerprints in the index" +"Creating a new index. Moved previous to backup" +"Creating... FINISHED." +"168 fingerprints in the new index" + + +-Processing of android data + -removed verbose messsage for each fingerprint + -fix error message to look good + +-remove print statements (from modules) +-add logging statements +-add exception throwing +-add unit testing + -test md5 sum create of tables + -test md5 sum create of entire db (tables concatenated) diff --git a/libs/fingerprint.py b/libs/fingerprint.py index bd2a83f..9054b69 100644 --- a/libs/fingerprint.py +++ b/libs/fingerprint.py @@ -35,7 +35,7 @@ class FingerprintDB: sqlmaster = "SELECT name, sql FROM sqlite_master WHERE type='table'" # version of the scanner used to create the fingerprint - scanner_ver = "0.90" + scanner_ver = "0.95" # version of the json file format, this # is inserted in the json fingerprint file and can be used to determine what is supported at the time of that version format_ver = "0.91" diff --git a/libs/fingerprint_index.py b/libs/fingerprint_index.py index 45d4f37..0cc14d8 100644 --- a/libs/fingerprint_index.py +++ b/libs/fingerprint_index.py @@ -48,10 +48,15 @@ class FingerprintIndex: try: self.db_conn = sql.connect(fq_fpidx) self.db_conn.execute(''' - CREATE TABLE md5_index ( - md5_all TEXT PRIMARY KEY, + CREATE TABLE md5_all ( + md5_db TEXT PRIMARY KEY, md5_list TEXT, - file_name TEXT); + fp_name TEXT); + ''') + self.db_conn.execute(''' + CREATE TABLE md5_tables ( + md5_list TEXT, + fp_name TEXT); ''') logging.info("Successfully created index table") self.__populateIndex(fp_dir) @@ -68,22 +73,18 @@ class FingerprintIndex: """ read each file, pull md5, add row to database """ failCount = 0 finCount = 0 - # self.db_conn.execute("INSERT INTO md5_index VALUES(?, ?, ?)", ["AAA", "BBB", "CCC"]) - # self.db_conn.execute("INSERT INTO md5_index VALUES('DDD', 'EEE', 'FFF')") try: db = FingerprintDB() files = os.listdir(fp_dir) - # print ("Populating DB, files=={}".format(files)) for file in files: try: fq_file = fp_dir + os.path.sep + file - # print ("importJson file=={}".format(fq_file)) db.importJson(fq_file) - self.__insertRecord(db.db_hash, db.table_hashes.values(), file) - # print("db_hash=={}".format(db.db_hash)) - # print("table_hashes={}".format(db.table_hashes)) - #md5_all = __createMD5Index(dbht) + self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file) + #self.__insertMod_md5_tables(db.table_hashes.values(), file) finCount = finCount+1 + if ((finCount % 5) == 0): + self.db_conn.commit() except: failCount = failCount+1 except: @@ -93,17 +94,37 @@ class FingerprintIndex: logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount))) # - def __insertRecord(self, md5_all, md5_list, filename): + def __insertMod_md5_all(self, md5_all, md5_list, filename): try: # logging.info("INSERT INTO md5_index VALUES(?, ?, ?): {}; {}; {}".format(md5_all, str(md5_list), filename)) self.db_conn.execute( ''' - INSERT INTO md5_index VALUES(?, ?, ?) + INSERT INTO md5_all VALUES(?, ?, ?) ''', [md5_all, ','.join(md5_list), filename]) + except sql.IntegrityError as e: + logging.error("*** Got here bitches!!!") except Exception as e: - logging.error("Error inserting a row\n{}".format(e)) + logging.error("Error inserting a row\n{}\n{}".format(e, e.__class__.__name__)) + logging.error("md5_all: {}\nmd5_list: {}\nfilename: {}".format(md5_all, ','.join(md5_list), filename)) raise FingerprintIndexWrite("Error inserting a row") + # + def __insertMod_md5_tables(self, md5_list, filename): + # insert the md5 of the table schemas + for md5_table in md5_list: + try: + self.db_conn.execute( + ''' + INSERT INTO md5_tables VALUES(?, ?) + ''', [md5_table, filename]) + except: + try: + # modify row, add filename + pass + except: + pass + + # def __checkIntegrity(self): """ Sanity check the number of files against the index rows """