FIX: fixed bugs introduced with the importJSON and the fingerprint index cmoparison feature

This commit is contained in:
JohnE 2016-01-12 01:10:05 -08:00
parent 4a52287c59
commit 734f5ba4b2
2 changed files with 40 additions and 43 deletions

View File

@ -54,7 +54,7 @@ def compareFingerprintDir(file_in, fp_dir):
db = FingerprintDB() db = FingerprintDB()
db.scanDBFile(file_in) db.scanDBFile(file_in)
md5_db = db.getMD5DB() md5_db = db.getMD5DB()
md5_tables = db.getMD5TablesArray() md5_tables = db.getMD5Tables()
fp = FingerprintIndex() fp = FingerprintIndex()
fp.openIndex(fp_dir) fp.openIndex(fp_dir)
fp_list = fp.findFP(md5_db, md5_tables) fp_list = fp.findFP(md5_db, md5_tables)

View File

@ -46,16 +46,14 @@ class FingerprintDB:
self.table_names = [] self.table_names = []
self.tables = {} self.tables = {}
self.db_hash = None self.db_hash = None
# hashtables of json data self.table_hashes = None
self.tables_json = {}
self.db_hash_json = None
# fingerprint metadata # fingerprint metadata
self.db_name = "" self.db_name = ""
self.app_name = "" self.app_name = ""
self.app_ver = "" self.app_ver = ""
self.notes = "" self.notes = ""
self.filein = "" self.filein = ""
self.scanned = False self.init = False
# #
def scanDBFile(self, filein): def scanDBFile(self, filein):
@ -81,13 +79,13 @@ class FingerprintDB:
return -3 return -3
# flag is used to determine if the class has data # flag is used to determine if the class has data
self.scanned = True self.init = True
self.filein = filein self.filein = filein
return 1 return 1
# #
def writeFingerprint(self): def writeFingerprint(self):
if (not self.scanned): if (not self.init):
return return
try: try:
@ -103,7 +101,7 @@ class FingerprintDB:
# #
def writeFingerprintFile(self, filename): def writeFingerprintFile(self, filename):
if (not self.scanned): if (not self.init):
return return
try: try:
@ -131,11 +129,12 @@ class FingerprintDB:
# #
def compareDB(self, filejson): def compareDB(self, filejson):
""" return the percentage of the match between two fingerprints """ """ return the percentage of the match between two fingerprints """
if (not self.scanned): if (not self.init):
return return
self.__importJsonDBSchema(filejson) fp = FingerprintDB();
result = self.__DBSchemaCompare() fp.__importJsonDBSchema(filejson)
print "[ Percetage == {}]".format(result) result = self.__DBSchemaCompare(fp)
print "[ Percetage == %f]".format(result)
return result return result
# #
@ -143,15 +142,19 @@ class FingerprintDB:
return self.db_hash return self.db_hash
# #
def getMD5TablesArray(self): def getMD5Tables(self):
retval = [] if (self.table_hashes):
return self.table_hashes
self.table_hashes = []
for key in self.tables.keys(): for key in self.tables.keys():
retval.append(self.tables[key].hash()) self.table_hashes.append(self.tables[key].hash())
return retval return self.table_hashes
# #
def __importJsonDBSchema(self, file_json): def __importJsonDBSchema(self, file_json):
""" import fingerprint from a json file """ """ import fingerprint from a json file """
self.__init__()
tables = {} tables = {}
try: try:
fh = open(file_json, "r") fh = open(file_json, "r")
@ -167,8 +170,9 @@ class FingerprintDB:
newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name]) newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name])
tables[table_name] = newTable tables[table_name] = newTable
self.tables_json = tables self.tables = tables
self.db_hash_json = dbmd5 self.db_hash = dbmd5
self.table_hashes = dbht
except Exception as ex: except Exception as ex:
logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex)) logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex))
@ -176,7 +180,7 @@ class FingerprintDB:
def __DBMD5Compare(self): def __DBMD5Compare(self):
pass pass
def __DBSchemaCompare(self): def __DBSchemaCompare(self, fp):
# the json database schema definition is what our tools is expecting... # the json database schema definition is what our tools is expecting...
# ...so we use it as the baseline # ...so we use it as the baseline
# look for table, if exists, compare each # look for table, if exists, compare each
@ -185,18 +189,18 @@ class FingerprintDB:
diff_num = 0 diff_num = 0
diff_total = 0 diff_total = 0
all_total = 0 all_total = 0
for tableName in self.tables_json.keys(): for tableName in fp.tables.keys():
try: try:
print "[[ Comparing Table: " + tableName + " ]]" print "[[ Comparing Table: " + tableName + " ]]"
table = self.tables[tableName] table = self.tables[tableName]
if (table): if (table):
if not (self.tables_json[tableName].hash() == table.hash()): if not (fp.tables[tableName].hash() == table.hash()):
logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash())) logging.info("*** Hash difference 1:{}!={}".format(fp.tables[tableName].hash(), table.hash()))
(total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table) (total, diff_num) = self.__CompareTable(fp.tables[tableName], table)
all_total += total all_total += total
diff_total += diff_num diff_total += diff_num
else: else:
# get the number of fields from the other table and add to the all_total all_total += 10
logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName)) logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName))
else: else:
self.__FuzzyTable() self.__FuzzyTable()
@ -207,16 +211,14 @@ class FingerprintDB:
logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName)) logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName))
print "diff_total=={}, all_total=={}".format(diff_total, all_total) print "diff_total=={}, all_total=={}".format(diff_total, all_total)
if (all_total == diff_total):
percentage = 0
else:
# percentage = 100
if (diff_total > 0):
percentage = float(diff_total / all_total)
else:
percentage = 100
# percentage = percentage - float(diff_total / all_total)
if (diff_total > 0):
if (diff_total == all_total):
percentage = 0
else:
percentage = float(diff_total / all_total)
else:
percentage = 100
return percentage return percentage
# #
@ -245,13 +247,8 @@ class FingerprintDB:
else: else:
fields_diff_count += 1 fields_diff_count += 1
if (prop_error_count == 0 and fields_diff_count == 0): totals = prop_total_count + fields_total_count
print "100% compatible" diff_total = prop_error_count + fields_diff_count
else:
totals = prop_total_count + fields_total_count
diff_total = prop_error_count + fields_diff_count
print "Table difference found: " + str(diff_total)
#print str((diff_total/totals) * 100) + '% compatible total == ' + str(totals) + " diff_total == " + str(diff_total)
return (totals, diff_total) return (totals, diff_total)
# look at un-identified tables and try to match fields by their properties # look at un-identified tables and try to match fields by their properties
@ -410,9 +407,9 @@ class TableSchema:
# #
def importTable(self, tbName, fields, sqlStr, hashStr): def importTable(self, tbName, fields, sqlStr, hashStr):
self.tableName = tbName self.tableName = tbName
self.sqlStr = sqlStr self.sqlStr = sqlStr
self.fields = fields self.fields = fields
self.sqlStrHash = hashStr self.sqlStrHash = hashStr