FIX: fixed bugs introduced with the importJSON and the fingerprint index cmoparison feature

This commit is contained in:
JohnE 2016-01-12 01:10:05 -08:00
parent 4a52287c59
commit 734f5ba4b2
2 changed files with 40 additions and 43 deletions

View File

@ -54,7 +54,7 @@ def compareFingerprintDir(file_in, fp_dir):
db = FingerprintDB()
db.scanDBFile(file_in)
md5_db = db.getMD5DB()
md5_tables = db.getMD5TablesArray()
md5_tables = db.getMD5Tables()
fp = FingerprintIndex()
fp.openIndex(fp_dir)
fp_list = fp.findFP(md5_db, md5_tables)

View File

@ -46,16 +46,14 @@ class FingerprintDB:
self.table_names = []
self.tables = {}
self.db_hash = None
# hashtables of json data
self.tables_json = {}
self.db_hash_json = None
self.table_hashes = None
# fingerprint metadata
self.db_name = ""
self.app_name = ""
self.app_ver = ""
self.notes = ""
self.filein = ""
self.scanned = False
self.init = False
#
def scanDBFile(self, filein):
@ -81,13 +79,13 @@ class FingerprintDB:
return -3
# flag is used to determine if the class has data
self.scanned = True
self.init = True
self.filein = filein
return 1
#
def writeFingerprint(self):
if (not self.scanned):
if (not self.init):
return
try:
@ -103,7 +101,7 @@ class FingerprintDB:
#
def writeFingerprintFile(self, filename):
if (not self.scanned):
if (not self.init):
return
try:
@ -131,11 +129,12 @@ class FingerprintDB:
#
def compareDB(self, filejson):
""" return the percentage of the match between two fingerprints """
if (not self.scanned):
if (not self.init):
return
self.__importJsonDBSchema(filejson)
result = self.__DBSchemaCompare()
print "[ Percetage == {}]".format(result)
fp = FingerprintDB();
fp.__importJsonDBSchema(filejson)
result = self.__DBSchemaCompare(fp)
print "[ Percetage == %f]".format(result)
return result
#
@ -143,15 +142,19 @@ class FingerprintDB:
return self.db_hash
#
def getMD5TablesArray(self):
retval = []
def getMD5Tables(self):
if (self.table_hashes):
return self.table_hashes
self.table_hashes = []
for key in self.tables.keys():
retval.append(self.tables[key].hash())
return retval
self.table_hashes.append(self.tables[key].hash())
return self.table_hashes
#
def __importJsonDBSchema(self, file_json):
""" import fingerprint from a json file """
self.__init__()
tables = {}
try:
fh = open(file_json, "r")
@ -167,8 +170,9 @@ class FingerprintDB:
newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name])
tables[table_name] = newTable
self.tables_json = tables
self.db_hash_json = dbmd5
self.tables = tables
self.db_hash = dbmd5
self.table_hashes = dbht
except Exception as ex:
logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex))
@ -176,7 +180,7 @@ class FingerprintDB:
def __DBMD5Compare(self):
pass
def __DBSchemaCompare(self):
def __DBSchemaCompare(self, fp):
# the json database schema definition is what our tools is expecting...
# ...so we use it as the baseline
# look for table, if exists, compare each
@ -185,18 +189,18 @@ class FingerprintDB:
diff_num = 0
diff_total = 0
all_total = 0
for tableName in self.tables_json.keys():
for tableName in fp.tables.keys():
try:
print "[[ Comparing Table: " + tableName + " ]]"
table = self.tables[tableName]
if (table):
if not (self.tables_json[tableName].hash() == table.hash()):
logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash()))
(total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table)
if not (fp.tables[tableName].hash() == table.hash()):
logging.info("*** Hash difference 1:{}!={}".format(fp.tables[tableName].hash(), table.hash()))
(total, diff_num) = self.__CompareTable(fp.tables[tableName], table)
all_total += total
diff_total += diff_num
else:
# get the number of fields from the other table and add to the all_total
all_total += 10
logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName))
else:
self.__FuzzyTable()
@ -207,16 +211,14 @@ class FingerprintDB:
logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName))
print "diff_total=={}, all_total=={}".format(diff_total, all_total)
if (all_total == diff_total):
if (diff_total > 0):
if (diff_total == all_total):
percentage = 0
else:
# percentage = 100
if (diff_total > 0):
percentage = float(diff_total / all_total)
else:
percentage = 100
# percentage = percentage - float(diff_total / all_total)
return percentage
#
@ -245,13 +247,8 @@ class FingerprintDB:
else:
fields_diff_count += 1
if (prop_error_count == 0 and fields_diff_count == 0):
print "100% compatible"
else:
totals = prop_total_count + fields_total_count
diff_total = prop_error_count + fields_diff_count
print "Table difference found: " + str(diff_total)
#print str((diff_total/totals) * 100) + '% compatible total == ' + str(totals) + " diff_total == " + str(diff_total)
return (totals, diff_total)
# look at un-identified tables and try to match fields by their properties