FIX: fixed bugs introduced with the importJSON and the fingerprint index cmoparison feature
This commit is contained in:
parent
4a52287c59
commit
734f5ba4b2
2
dbfp.py
2
dbfp.py
|
@ -54,7 +54,7 @@ def compareFingerprintDir(file_in, fp_dir):
|
||||||
db = FingerprintDB()
|
db = FingerprintDB()
|
||||||
db.scanDBFile(file_in)
|
db.scanDBFile(file_in)
|
||||||
md5_db = db.getMD5DB()
|
md5_db = db.getMD5DB()
|
||||||
md5_tables = db.getMD5TablesArray()
|
md5_tables = db.getMD5Tables()
|
||||||
fp = FingerprintIndex()
|
fp = FingerprintIndex()
|
||||||
fp.openIndex(fp_dir)
|
fp.openIndex(fp_dir)
|
||||||
fp_list = fp.findFP(md5_db, md5_tables)
|
fp_list = fp.findFP(md5_db, md5_tables)
|
||||||
|
|
|
@ -46,16 +46,14 @@ class FingerprintDB:
|
||||||
self.table_names = []
|
self.table_names = []
|
||||||
self.tables = {}
|
self.tables = {}
|
||||||
self.db_hash = None
|
self.db_hash = None
|
||||||
# hashtables of json data
|
self.table_hashes = None
|
||||||
self.tables_json = {}
|
|
||||||
self.db_hash_json = None
|
|
||||||
# fingerprint metadata
|
# fingerprint metadata
|
||||||
self.db_name = ""
|
self.db_name = ""
|
||||||
self.app_name = ""
|
self.app_name = ""
|
||||||
self.app_ver = ""
|
self.app_ver = ""
|
||||||
self.notes = ""
|
self.notes = ""
|
||||||
self.filein = ""
|
self.filein = ""
|
||||||
self.scanned = False
|
self.init = False
|
||||||
|
|
||||||
#
|
#
|
||||||
def scanDBFile(self, filein):
|
def scanDBFile(self, filein):
|
||||||
|
@ -81,13 +79,13 @@ class FingerprintDB:
|
||||||
return -3
|
return -3
|
||||||
|
|
||||||
# flag is used to determine if the class has data
|
# flag is used to determine if the class has data
|
||||||
self.scanned = True
|
self.init = True
|
||||||
self.filein = filein
|
self.filein = filein
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
#
|
#
|
||||||
def writeFingerprint(self):
|
def writeFingerprint(self):
|
||||||
if (not self.scanned):
|
if (not self.init):
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -103,7 +101,7 @@ class FingerprintDB:
|
||||||
|
|
||||||
#
|
#
|
||||||
def writeFingerprintFile(self, filename):
|
def writeFingerprintFile(self, filename):
|
||||||
if (not self.scanned):
|
if (not self.init):
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -131,11 +129,12 @@ class FingerprintDB:
|
||||||
#
|
#
|
||||||
def compareDB(self, filejson):
|
def compareDB(self, filejson):
|
||||||
""" return the percentage of the match between two fingerprints """
|
""" return the percentage of the match between two fingerprints """
|
||||||
if (not self.scanned):
|
if (not self.init):
|
||||||
return
|
return
|
||||||
self.__importJsonDBSchema(filejson)
|
fp = FingerprintDB();
|
||||||
result = self.__DBSchemaCompare()
|
fp.__importJsonDBSchema(filejson)
|
||||||
print "[ Percetage == {}]".format(result)
|
result = self.__DBSchemaCompare(fp)
|
||||||
|
print "[ Percetage == %f]".format(result)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -143,15 +142,19 @@ class FingerprintDB:
|
||||||
return self.db_hash
|
return self.db_hash
|
||||||
|
|
||||||
#
|
#
|
||||||
def getMD5TablesArray(self):
|
def getMD5Tables(self):
|
||||||
retval = []
|
if (self.table_hashes):
|
||||||
|
return self.table_hashes
|
||||||
|
|
||||||
|
self.table_hashes = []
|
||||||
for key in self.tables.keys():
|
for key in self.tables.keys():
|
||||||
retval.append(self.tables[key].hash())
|
self.table_hashes.append(self.tables[key].hash())
|
||||||
return retval
|
return self.table_hashes
|
||||||
|
|
||||||
#
|
#
|
||||||
def __importJsonDBSchema(self, file_json):
|
def __importJsonDBSchema(self, file_json):
|
||||||
""" import fingerprint from a json file """
|
""" import fingerprint from a json file """
|
||||||
|
self.__init__()
|
||||||
tables = {}
|
tables = {}
|
||||||
try:
|
try:
|
||||||
fh = open(file_json, "r")
|
fh = open(file_json, "r")
|
||||||
|
@ -167,8 +170,9 @@ class FingerprintDB:
|
||||||
newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name])
|
newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name])
|
||||||
tables[table_name] = newTable
|
tables[table_name] = newTable
|
||||||
|
|
||||||
self.tables_json = tables
|
self.tables = tables
|
||||||
self.db_hash_json = dbmd5
|
self.db_hash = dbmd5
|
||||||
|
self.table_hashes = dbht
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex))
|
logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex))
|
||||||
|
|
||||||
|
@ -176,7 +180,7 @@ class FingerprintDB:
|
||||||
def __DBMD5Compare(self):
|
def __DBMD5Compare(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def __DBSchemaCompare(self):
|
def __DBSchemaCompare(self, fp):
|
||||||
# the json database schema definition is what our tools is expecting...
|
# the json database schema definition is what our tools is expecting...
|
||||||
# ...so we use it as the baseline
|
# ...so we use it as the baseline
|
||||||
# look for table, if exists, compare each
|
# look for table, if exists, compare each
|
||||||
|
@ -185,18 +189,18 @@ class FingerprintDB:
|
||||||
diff_num = 0
|
diff_num = 0
|
||||||
diff_total = 0
|
diff_total = 0
|
||||||
all_total = 0
|
all_total = 0
|
||||||
for tableName in self.tables_json.keys():
|
for tableName in fp.tables.keys():
|
||||||
try:
|
try:
|
||||||
print "[[ Comparing Table: " + tableName + " ]]"
|
print "[[ Comparing Table: " + tableName + " ]]"
|
||||||
table = self.tables[tableName]
|
table = self.tables[tableName]
|
||||||
if (table):
|
if (table):
|
||||||
if not (self.tables_json[tableName].hash() == table.hash()):
|
if not (fp.tables[tableName].hash() == table.hash()):
|
||||||
logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash()))
|
logging.info("*** Hash difference 1:{}!={}".format(fp.tables[tableName].hash(), table.hash()))
|
||||||
(total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table)
|
(total, diff_num) = self.__CompareTable(fp.tables[tableName], table)
|
||||||
all_total += total
|
all_total += total
|
||||||
diff_total += diff_num
|
diff_total += diff_num
|
||||||
else:
|
else:
|
||||||
# get the number of fields from the other table and add to the all_total
|
all_total += 10
|
||||||
logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName))
|
logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName))
|
||||||
else:
|
else:
|
||||||
self.__FuzzyTable()
|
self.__FuzzyTable()
|
||||||
|
@ -207,16 +211,14 @@ class FingerprintDB:
|
||||||
logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName))
|
logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName))
|
||||||
|
|
||||||
print "diff_total=={}, all_total=={}".format(diff_total, all_total)
|
print "diff_total=={}, all_total=={}".format(diff_total, all_total)
|
||||||
if (all_total == diff_total):
|
|
||||||
percentage = 0
|
|
||||||
else:
|
|
||||||
# percentage = 100
|
|
||||||
if (diff_total > 0):
|
|
||||||
percentage = float(diff_total / all_total)
|
|
||||||
else:
|
|
||||||
percentage = 100
|
|
||||||
# percentage = percentage - float(diff_total / all_total)
|
|
||||||
|
|
||||||
|
if (diff_total > 0):
|
||||||
|
if (diff_total == all_total):
|
||||||
|
percentage = 0
|
||||||
|
else:
|
||||||
|
percentage = float(diff_total / all_total)
|
||||||
|
else:
|
||||||
|
percentage = 100
|
||||||
return percentage
|
return percentage
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -245,13 +247,8 @@ class FingerprintDB:
|
||||||
else:
|
else:
|
||||||
fields_diff_count += 1
|
fields_diff_count += 1
|
||||||
|
|
||||||
if (prop_error_count == 0 and fields_diff_count == 0):
|
totals = prop_total_count + fields_total_count
|
||||||
print "100% compatible"
|
diff_total = prop_error_count + fields_diff_count
|
||||||
else:
|
|
||||||
totals = prop_total_count + fields_total_count
|
|
||||||
diff_total = prop_error_count + fields_diff_count
|
|
||||||
print "Table difference found: " + str(diff_total)
|
|
||||||
#print str((diff_total/totals) * 100) + '% compatible total == ' + str(totals) + " diff_total == " + str(diff_total)
|
|
||||||
return (totals, diff_total)
|
return (totals, diff_total)
|
||||||
|
|
||||||
# look at un-identified tables and try to match fields by their properties
|
# look at un-identified tables and try to match fields by their properties
|
||||||
|
@ -410,9 +407,9 @@ class TableSchema:
|
||||||
|
|
||||||
#
|
#
|
||||||
def importTable(self, tbName, fields, sqlStr, hashStr):
|
def importTable(self, tbName, fields, sqlStr, hashStr):
|
||||||
self.tableName = tbName
|
self.tableName = tbName
|
||||||
self.sqlStr = sqlStr
|
self.sqlStr = sqlStr
|
||||||
self.fields = fields
|
self.fields = fields
|
||||||
self.sqlStrHash = hashStr
|
self.sqlStrHash = hashStr
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue