diff --git a/dbfp.py b/dbfp.py index 122a4a6..080bb4d 100644 --- a/dbfp.py +++ b/dbfp.py @@ -19,16 +19,16 @@ def main(): parseArgs() # -def compareFingerprint(filein, filejson): +def compareFingerprint(file_in, file_json): db = FingerprintDB() - db.scanDBFile(filein) - percent = db.compareDB(filejson) + db.scanDBFile(file_in) + percent = db.compareDB(file_json) print "Percent match: {}".format(str(percent)) # -def createFingerprint(filein, verbose, app_name, app_ver, notes): +def createFingerprint(file_in, verbose, app_name, app_ver, notes): db = FingerprintDB() - retVal = db.scanDBFile(filein) + retVal = db.scanDBFile(file_in) if (retVal > 0): if verbose: db.debugFingerprint() @@ -52,17 +52,18 @@ def indexFingerprints(fp_dir): # def compareFingerprintDir(file_in, fp_dir): db = FingerprintDB() - db.scanDBFile(filein) + db.scanDBFile(file_in) md5_db = db.getMD5DB() - md5_tables = db.getMD5Tables() - #percent = db.compareDB(filejson) + md5_tables = db.getMD5TablesArray() fp = FingerprintIndex() fp.openIndex(fp_dir) fp_list = fp.findFP(md5_db, md5_tables) results = [] for fp in fp_list: - percent = db.compareDB(fp) - results.add(percent) + fq_fp = fp_dir + os.path.sep + fp + print "[ OPEN fingerprint ] [ {} ]".format(fq_fp) + percent = db.compareDB(fq_fp) + results.append(percent) print "RESULTS: {}".format(results) results.sort() @@ -147,7 +148,7 @@ def parseArgs(): print ' DB Fingerprint' print '***** ***** ***** *****\n' parser = argparse.ArgumentParser(description="Fingerprint a sqlite database based on its schema") - parser.add_argument('-f', '--file', required=False, help="path to file to be fingerprinted") + parser.add_argument('-db', '--database', required=False, help="path to file to be fingerprinted") parser.add_argument('-fd', '--fpdir', required=False, help="path to directory of fingerprint files") parser.add_argument('-fp', '--fingerprint', required=False, help="fingerprint file to use in comparison") parser.add_argument('-dd', '--data_dir', required=False, help="path to a directory with sqlite files") @@ -169,18 +170,18 @@ def parseArgs(): else: logging.basicConfig(level=logging.CRITICAL) - if (args.file and args.fingerprint): - compareFingerprint(args.file, args.fingerprint) - elif (args.file and args.fpdir): - compareFingerprintDir(args.file, args.fpdir) + if (args.database and args.fingerprint): + compareFingerprint(args.database, args.fingerprint) + elif (args.database and args.fpdir): + compareFingerprintDir(args.database, args.fpdir) elif (args.data_dir): androidData(args.data_dir) elif (args.index_fingerprints): indexFingerprints(args.index_fingerprints) elif (args.pull): androidPull() - elif (args.file): - createFingerprint(args.file, args.verbose, args.app_name, args.app_version, args.notes) + elif (args.database): + createFingerprint(args.database, args.verbose, args.app_name, args.app_version, args.notes) else: parser.print_help() diff --git a/libs/fingerprint.py b/libs/fingerprint.py index 9054b69..3ff9bc0 100644 --- a/libs/fingerprint.py +++ b/libs/fingerprint.py @@ -8,7 +8,7 @@ import hashlib import time import logging from libs.toolbox import ToolBox -from libs.exceptions import FingerprintWrite +from libs.exceptions import FingerprintWrite, FingerprintMD5 delimeter = "|" @@ -45,10 +45,10 @@ class FingerprintDB: self.cur = None self.table_names = [] self.tables = {} + self.db_hash = None # hashtables of json data self.tables_json = {} - self.db_hash = {} - self.table_hashes = {} + self.db_hash_json = None # fingerprint metadata self.db_name = "" self.app_name = "" @@ -74,6 +74,8 @@ class FingerprintDB: try: # read database schema, parse the schema self.__readDatabase() + # concat all the table create statements, then md5 + self.__createMD5DB() except Exception, ex: print ex return -3 @@ -128,14 +130,25 @@ class FingerprintDB: # def compareDB(self, filejson): + """ return the percentage of the match between two fingerprints """ if (not self.scanned): return - """ return the percentage of the match between two fingerprints """ self.__importJsonDBSchema(filejson) result = self.__DBSchemaCompare() print "[ Percetage == {}]".format(result) return result + # + def getMD5DB(self): + return self.db_hash + + # + def getMD5TablesArray(self): + retval = [] + for key in self.tables.keys(): + retval.append(self.tables[key].hash()) + return retval + # def __importJsonDBSchema(self, file_json): """ import fingerprint from a json file """ @@ -154,11 +167,10 @@ class FingerprintDB: newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name]) tables[table_name] = newTable - self.tables_json = tables - self.db_hash = dbmd5 - self.table_hashes = dbht - except Exception as e: - logging.error("ERROR: problem loading json file: " + file_json + e) + self.tables_json = tables + self.db_hash_json = dbmd5 + except Exception as ex: + logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex)) # def __DBMD5Compare(self): @@ -174,19 +186,36 @@ class FingerprintDB: diff_total = 0 all_total = 0 for tableName in self.tables_json.keys(): - table = self.tables[tableName] - print "[[ Comparing Table: " + tableName + " ]]" - if (table): - if not (self.tables_json[tableName].hash() == table.hash()): - logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash())) - (total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table) - all_total += total - diff_total += diff_num + try: + print "[[ Comparing Table: " + tableName + " ]]" + table = self.tables[tableName] + if (table): + if not (self.tables_json[tableName].hash() == table.hash()): + logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash())) + (total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table) + all_total += total + diff_total += diff_num + else: + # get the number of fields from the other table and add to the all_total + logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName)) + else: + self.__FuzzyTable() + except KeyError as ex: + # get the number of fields from the other table to add to the diff_total + all_total += 10 + diff_total += 10 + logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName)) + + print "diff_total=={}, all_total=={}".format(diff_total, all_total) + if (all_total == diff_total): + percentage = 0 + else: + # percentage = 100 + if (diff_total > 0): + percentage = float(diff_total / all_total) else: - self.__FuzzyTable() - percentage = 100 - if (diff_total > 0): - percentage = percentage - float(diff_total) / all_total + percentage = 100 + # percentage = percentage - float(diff_total / all_total) return percentage @@ -294,26 +323,27 @@ class FingerprintDB: dmhash[table] = self.tables[table].SQLstr() shash[table] = self.tables[table].sqlStrHash - ahash['db-metadata-md5'] = self.__createMD5Index(shash) + ahash['db-metadata-md5'] = self.db_hash json.dump(ahash, filehandle, sort_keys=True, indent=4) # - def __createMD5Index(self, dbht): - retval = "hieeee!!!" + def __createMD5DB(self): + retval = None concat_str = "" try: - bitchasskeys = dbht.keys() + bitchasskeys = self.tables.keys() bitchasskeys.sort() for key in bitchasskeys: - concat_str += dbht[key] + concat_str += self.tables[key].hash() + #print "---> {}".format(concat_str) m = hashlib.md5() m.update(concat_str) - retval = m.hexdigest() + retval = m.hexdigest() + self.db_hash = retval except Exception, ex: logging.error(ex) raise FingerprintMD5("Problem creating a MD5 sum") - return retval # def setAppName(self, name): @@ -359,7 +389,7 @@ class TableSchema: self.tableName = tableName self.sqlStr = sqlStr - print "[[ TABLE: <{}> ] processing...]".format(tableName) + logging.info("[[ TABLE: <{}> ] processing...]".format(tableName)) # hash the sql create string for quicker fingerprint matching try: m = hashlib.md5() diff --git a/libs/fingerprint_index.py b/libs/fingerprint_index.py index 62042fe..9d91ed7 100644 --- a/libs/fingerprint_index.py +++ b/libs/fingerprint_index.py @@ -3,6 +3,7 @@ # [ md5_all, md5_list, file_name ] # # +import re import os import sys import logging @@ -33,6 +34,7 @@ class FingerprintIndex: if (os.path.isfile(fq_fpidx)): self.db_conn = sql.connect(fq_fpidx) logging.info("DB Open SUCCESSFUL") + self.cur = self.db_conn.cursor() else: logging.info("No index file found, creating index now...") self.__createIndex(fp_dir) @@ -52,10 +54,23 @@ class FingerprintIndex: # def findFP(self, md5_db, md5_tables): - rows = __qDatabaseMD5(md5_db) - print "***** ***** *****\n{}\n".format(rows) - rows = __qTableMD5(md5_tables) - print "***** ***** *****\n{}\n".format(rows) + #print "***** findFP *****\nmd5_db=={} md5_tables=={}\n".format(md5_db, md5_tables) + rows = self.__qDatabaseMD5(md5_db) + # rowcount will be -1 if nothing was returned + if rows.rowcount > 0: + #print "***** __qDatabaseMD5 *****\n{}\n".format(rows) + return rows + + for md5_table in md5_tables: + retval = {} + rows = self.__qTableMD5(md5_table) + for row in rows: + fp_list = row[0] + fps = fp_list.split(',') + for fp in fps: + retval[fp] = 1 + #print "***** __qTableMD5 *****\n{}\n".format(retval.keys()) + return retval.keys() # def __qDatabaseMD5(self, md5_db): @@ -106,13 +121,19 @@ class FingerprintIndex: # def __populateIndex(self, fp_dir): """ read each file, pull md5, add row to database """ + finCount = 0 failCount = 0 - finCount = 0 + naCount = 0 try: db = FingerprintDB() files = os.listdir(fp_dir) for file in files: try: + # only parese files with .json eextension + if not re.search(r'.*\.json', file): + naCount = naCount+1 + pass + print file fq_file = fp_dir + os.path.sep + file db.importJson(fq_file) self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file) @@ -127,7 +148,7 @@ class FingerprintIndex: logging.error(ex) finally: self.db_conn.commit() - logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount))) + logging.info("Completed populating the index. Completed: {} Failed: {} NA: {}".format(str(finCount), str(failCount), str(naCount))) # def __insertMod_md5_all(self, md5_db, md5_list, filename):