diff --git a/dbfp.py b/dbfp.py index 2fb4576..742fb4d 100644 --- a/dbfp.py +++ b/dbfp.py @@ -54,7 +54,7 @@ def compareFingerprintDir(file_in, fp_dir): db = FingerprintDB() db.scanDBFile(file_in) md5_db = db.getMD5DB() - md5_tables = db.getMD5TablesArray() + md5_tables = db.getMD5Tables() fp = FingerprintIndex() fp.openIndex(fp_dir) fp_list = fp.findFP(md5_db, md5_tables) diff --git a/libs/fingerprint.py b/libs/fingerprint.py index 3ff9bc0..9803594 100644 --- a/libs/fingerprint.py +++ b/libs/fingerprint.py @@ -46,16 +46,14 @@ class FingerprintDB: self.table_names = [] self.tables = {} self.db_hash = None - # hashtables of json data - self.tables_json = {} - self.db_hash_json = None + self.table_hashes = None # fingerprint metadata self.db_name = "" self.app_name = "" self.app_ver = "" self.notes = "" self.filein = "" - self.scanned = False + self.init = False # def scanDBFile(self, filein): @@ -81,13 +79,13 @@ class FingerprintDB: return -3 # flag is used to determine if the class has data - self.scanned = True + self.init = True self.filein = filein return 1 # def writeFingerprint(self): - if (not self.scanned): + if (not self.init): return try: @@ -103,7 +101,7 @@ class FingerprintDB: # def writeFingerprintFile(self, filename): - if (not self.scanned): + if (not self.init): return try: @@ -131,11 +129,12 @@ class FingerprintDB: # def compareDB(self, filejson): """ return the percentage of the match between two fingerprints """ - if (not self.scanned): + if (not self.init): return - self.__importJsonDBSchema(filejson) - result = self.__DBSchemaCompare() - print "[ Percetage == {}]".format(result) + fp = FingerprintDB(); + fp.__importJsonDBSchema(filejson) + result = self.__DBSchemaCompare(fp) + print "[ Percetage == %f]".format(result) return result # @@ -143,15 +142,19 @@ class FingerprintDB: return self.db_hash # - def getMD5TablesArray(self): - retval = [] + def getMD5Tables(self): + if (self.table_hashes): + return self.table_hashes + + self.table_hashes = [] for key in self.tables.keys(): - retval.append(self.tables[key].hash()) - return retval + self.table_hashes.append(self.tables[key].hash()) + return self.table_hashes # def __importJsonDBSchema(self, file_json): """ import fingerprint from a json file """ + self.__init__() tables = {} try: fh = open(file_json, "r") @@ -167,8 +170,9 @@ class FingerprintDB: newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name]) tables[table_name] = newTable - self.tables_json = tables - self.db_hash_json = dbmd5 + self.tables = tables + self.db_hash = dbmd5 + self.table_hashes = dbht except Exception as ex: logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex)) @@ -176,7 +180,7 @@ class FingerprintDB: def __DBMD5Compare(self): pass - def __DBSchemaCompare(self): + def __DBSchemaCompare(self, fp): # the json database schema definition is what our tools is expecting... # ...so we use it as the baseline # look for table, if exists, compare each @@ -185,18 +189,18 @@ class FingerprintDB: diff_num = 0 diff_total = 0 all_total = 0 - for tableName in self.tables_json.keys(): + for tableName in fp.tables.keys(): try: print "[[ Comparing Table: " + tableName + " ]]" table = self.tables[tableName] if (table): - if not (self.tables_json[tableName].hash() == table.hash()): - logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash())) - (total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table) + if not (fp.tables[tableName].hash() == table.hash()): + logging.info("*** Hash difference 1:{}!={}".format(fp.tables[tableName].hash(), table.hash())) + (total, diff_num) = self.__CompareTable(fp.tables[tableName], table) all_total += total diff_total += diff_num else: - # get the number of fields from the other table and add to the all_total + all_total += 10 logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName)) else: self.__FuzzyTable() @@ -207,16 +211,14 @@ class FingerprintDB: logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName)) print "diff_total=={}, all_total=={}".format(diff_total, all_total) - if (all_total == diff_total): - percentage = 0 - else: - # percentage = 100 - if (diff_total > 0): - percentage = float(diff_total / all_total) - else: - percentage = 100 - # percentage = percentage - float(diff_total / all_total) + if (diff_total > 0): + if (diff_total == all_total): + percentage = 0 + else: + percentage = float(diff_total / all_total) + else: + percentage = 100 return percentage # @@ -245,13 +247,8 @@ class FingerprintDB: else: fields_diff_count += 1 - if (prop_error_count == 0 and fields_diff_count == 0): - print "100% compatible" - else: - totals = prop_total_count + fields_total_count - diff_total = prop_error_count + fields_diff_count - print "Table difference found: " + str(diff_total) - #print str((diff_total/totals) * 100) + '% compatible total == ' + str(totals) + " diff_total == " + str(diff_total) + totals = prop_total_count + fields_total_count + diff_total = prop_error_count + fields_diff_count return (totals, diff_total) # look at un-identified tables and try to match fields by their properties @@ -410,9 +407,9 @@ class TableSchema: # def importTable(self, tbName, fields, sqlStr, hashStr): - self.tableName = tbName - self.sqlStr = sqlStr - self.fields = fields + self.tableName = tbName + self.sqlStr = sqlStr + self.fields = fields self.sqlStrHash = hashStr