FIN: completed the fingerprint comparison with and index of fingerprints
This commit is contained in:
parent
2984a84a81
commit
96ef570419
35
dbfp.py
35
dbfp.py
|
@ -19,16 +19,16 @@ def main():
|
|||
parseArgs()
|
||||
|
||||
#
|
||||
def compareFingerprint(filein, filejson):
|
||||
def compareFingerprint(file_in, file_json):
|
||||
db = FingerprintDB()
|
||||
db.scanDBFile(filein)
|
||||
percent = db.compareDB(filejson)
|
||||
db.scanDBFile(file_in)
|
||||
percent = db.compareDB(file_json)
|
||||
print "Percent match: {}".format(str(percent))
|
||||
|
||||
#
|
||||
def createFingerprint(filein, verbose, app_name, app_ver, notes):
|
||||
def createFingerprint(file_in, verbose, app_name, app_ver, notes):
|
||||
db = FingerprintDB()
|
||||
retVal = db.scanDBFile(filein)
|
||||
retVal = db.scanDBFile(file_in)
|
||||
if (retVal > 0):
|
||||
if verbose:
|
||||
db.debugFingerprint()
|
||||
|
@ -52,17 +52,18 @@ def indexFingerprints(fp_dir):
|
|||
#
|
||||
def compareFingerprintDir(file_in, fp_dir):
|
||||
db = FingerprintDB()
|
||||
db.scanDBFile(filein)
|
||||
db.scanDBFile(file_in)
|
||||
md5_db = db.getMD5DB()
|
||||
md5_tables = db.getMD5Tables()
|
||||
#percent = db.compareDB(filejson)
|
||||
md5_tables = db.getMD5TablesArray()
|
||||
fp = FingerprintIndex()
|
||||
fp.openIndex(fp_dir)
|
||||
fp_list = fp.findFP(md5_db, md5_tables)
|
||||
results = []
|
||||
for fp in fp_list:
|
||||
percent = db.compareDB(fp)
|
||||
results.add(percent)
|
||||
fq_fp = fp_dir + os.path.sep + fp
|
||||
print "[ OPEN fingerprint ] [ {} ]".format(fq_fp)
|
||||
percent = db.compareDB(fq_fp)
|
||||
results.append(percent)
|
||||
|
||||
print "RESULTS: {}".format(results)
|
||||
results.sort()
|
||||
|
@ -147,7 +148,7 @@ def parseArgs():
|
|||
print ' DB Fingerprint'
|
||||
print '***** ***** ***** *****\n'
|
||||
parser = argparse.ArgumentParser(description="Fingerprint a sqlite database based on its schema")
|
||||
parser.add_argument('-f', '--file', required=False, help="path to file to be fingerprinted")
|
||||
parser.add_argument('-db', '--database', required=False, help="path to file to be fingerprinted")
|
||||
parser.add_argument('-fd', '--fpdir', required=False, help="path to directory of fingerprint files")
|
||||
parser.add_argument('-fp', '--fingerprint', required=False, help="fingerprint file to use in comparison")
|
||||
parser.add_argument('-dd', '--data_dir', required=False, help="path to a directory with sqlite files")
|
||||
|
@ -169,18 +170,18 @@ def parseArgs():
|
|||
else:
|
||||
logging.basicConfig(level=logging.CRITICAL)
|
||||
|
||||
if (args.file and args.fingerprint):
|
||||
compareFingerprint(args.file, args.fingerprint)
|
||||
elif (args.file and args.fpdir):
|
||||
compareFingerprintDir(args.file, args.fpdir)
|
||||
if (args.database and args.fingerprint):
|
||||
compareFingerprint(args.database, args.fingerprint)
|
||||
elif (args.database and args.fpdir):
|
||||
compareFingerprintDir(args.database, args.fpdir)
|
||||
elif (args.data_dir):
|
||||
androidData(args.data_dir)
|
||||
elif (args.index_fingerprints):
|
||||
indexFingerprints(args.index_fingerprints)
|
||||
elif (args.pull):
|
||||
androidPull()
|
||||
elif (args.file):
|
||||
createFingerprint(args.file, args.verbose, args.app_name, args.app_version, args.notes)
|
||||
elif (args.database):
|
||||
createFingerprint(args.database, args.verbose, args.app_name, args.app_version, args.notes)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ import hashlib
|
|||
import time
|
||||
import logging
|
||||
from libs.toolbox import ToolBox
|
||||
from libs.exceptions import FingerprintWrite
|
||||
from libs.exceptions import FingerprintWrite, FingerprintMD5
|
||||
|
||||
delimeter = "|"
|
||||
|
||||
|
@ -45,10 +45,10 @@ class FingerprintDB:
|
|||
self.cur = None
|
||||
self.table_names = []
|
||||
self.tables = {}
|
||||
self.db_hash = None
|
||||
# hashtables of json data
|
||||
self.tables_json = {}
|
||||
self.db_hash = {}
|
||||
self.table_hashes = {}
|
||||
self.db_hash_json = None
|
||||
# fingerprint metadata
|
||||
self.db_name = ""
|
||||
self.app_name = ""
|
||||
|
@ -74,6 +74,8 @@ class FingerprintDB:
|
|||
try:
|
||||
# read database schema, parse the schema
|
||||
self.__readDatabase()
|
||||
# concat all the table create statements, then md5
|
||||
self.__createMD5DB()
|
||||
except Exception, ex:
|
||||
print ex
|
||||
return -3
|
||||
|
@ -128,14 +130,25 @@ class FingerprintDB:
|
|||
|
||||
#
|
||||
def compareDB(self, filejson):
|
||||
""" return the percentage of the match between two fingerprints """
|
||||
if (not self.scanned):
|
||||
return
|
||||
""" return the percentage of the match between two fingerprints """
|
||||
self.__importJsonDBSchema(filejson)
|
||||
result = self.__DBSchemaCompare()
|
||||
print "[ Percetage == {}]".format(result)
|
||||
return result
|
||||
|
||||
#
|
||||
def getMD5DB(self):
|
||||
return self.db_hash
|
||||
|
||||
#
|
||||
def getMD5TablesArray(self):
|
||||
retval = []
|
||||
for key in self.tables.keys():
|
||||
retval.append(self.tables[key].hash())
|
||||
return retval
|
||||
|
||||
#
|
||||
def __importJsonDBSchema(self, file_json):
|
||||
""" import fingerprint from a json file """
|
||||
|
@ -154,11 +167,10 @@ class FingerprintDB:
|
|||
newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name])
|
||||
tables[table_name] = newTable
|
||||
|
||||
self.tables_json = tables
|
||||
self.db_hash = dbmd5
|
||||
self.table_hashes = dbht
|
||||
except Exception as e:
|
||||
logging.error("ERROR: problem loading json file: " + file_json + e)
|
||||
self.tables_json = tables
|
||||
self.db_hash_json = dbmd5
|
||||
except Exception as ex:
|
||||
logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex))
|
||||
|
||||
#
|
||||
def __DBMD5Compare(self):
|
||||
|
@ -174,19 +186,36 @@ class FingerprintDB:
|
|||
diff_total = 0
|
||||
all_total = 0
|
||||
for tableName in self.tables_json.keys():
|
||||
table = self.tables[tableName]
|
||||
print "[[ Comparing Table: " + tableName + " ]]"
|
||||
if (table):
|
||||
if not (self.tables_json[tableName].hash() == table.hash()):
|
||||
logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash()))
|
||||
(total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table)
|
||||
all_total += total
|
||||
diff_total += diff_num
|
||||
try:
|
||||
print "[[ Comparing Table: " + tableName + " ]]"
|
||||
table = self.tables[tableName]
|
||||
if (table):
|
||||
if not (self.tables_json[tableName].hash() == table.hash()):
|
||||
logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash()))
|
||||
(total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table)
|
||||
all_total += total
|
||||
diff_total += diff_num
|
||||
else:
|
||||
# get the number of fields from the other table and add to the all_total
|
||||
logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName))
|
||||
else:
|
||||
self.__FuzzyTable()
|
||||
except KeyError as ex:
|
||||
# get the number of fields from the other table to add to the diff_total
|
||||
all_total += 10
|
||||
diff_total += 10
|
||||
logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName))
|
||||
|
||||
print "diff_total=={}, all_total=={}".format(diff_total, all_total)
|
||||
if (all_total == diff_total):
|
||||
percentage = 0
|
||||
else:
|
||||
# percentage = 100
|
||||
if (diff_total > 0):
|
||||
percentage = float(diff_total / all_total)
|
||||
else:
|
||||
self.__FuzzyTable()
|
||||
percentage = 100
|
||||
if (diff_total > 0):
|
||||
percentage = percentage - float(diff_total) / all_total
|
||||
percentage = 100
|
||||
# percentage = percentage - float(diff_total / all_total)
|
||||
|
||||
return percentage
|
||||
|
||||
|
@ -294,26 +323,27 @@ class FingerprintDB:
|
|||
dmhash[table] = self.tables[table].SQLstr()
|
||||
shash[table] = self.tables[table].sqlStrHash
|
||||
|
||||
ahash['db-metadata-md5'] = self.__createMD5Index(shash)
|
||||
ahash['db-metadata-md5'] = self.db_hash
|
||||
|
||||
json.dump(ahash, filehandle, sort_keys=True, indent=4)
|
||||
|
||||
#
|
||||
def __createMD5Index(self, dbht):
|
||||
retval = "hieeee!!!"
|
||||
def __createMD5DB(self):
|
||||
retval = None
|
||||
concat_str = ""
|
||||
try:
|
||||
bitchasskeys = dbht.keys()
|
||||
bitchasskeys = self.tables.keys()
|
||||
bitchasskeys.sort()
|
||||
for key in bitchasskeys:
|
||||
concat_str += dbht[key]
|
||||
concat_str += self.tables[key].hash()
|
||||
#print "---> {}".format(concat_str)
|
||||
m = hashlib.md5()
|
||||
m.update(concat_str)
|
||||
retval = m.hexdigest()
|
||||
self.db_hash = retval
|
||||
except Exception, ex:
|
||||
logging.error(ex)
|
||||
raise FingerprintMD5("Problem creating a MD5 sum")
|
||||
return retval
|
||||
|
||||
#
|
||||
def setAppName(self, name):
|
||||
|
@ -359,7 +389,7 @@ class TableSchema:
|
|||
self.tableName = tableName
|
||||
self.sqlStr = sqlStr
|
||||
|
||||
print "[[ TABLE: <{}> ] processing...]".format(tableName)
|
||||
logging.info("[[ TABLE: <{}> ] processing...]".format(tableName))
|
||||
# hash the sql create string for quicker fingerprint matching
|
||||
try:
|
||||
m = hashlib.md5()
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# [ md5_all, md5_list, file_name ]
|
||||
#
|
||||
#
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
|
@ -33,6 +34,7 @@ class FingerprintIndex:
|
|||
if (os.path.isfile(fq_fpidx)):
|
||||
self.db_conn = sql.connect(fq_fpidx)
|
||||
logging.info("DB Open SUCCESSFUL")
|
||||
self.cur = self.db_conn.cursor()
|
||||
else:
|
||||
logging.info("No index file found, creating index now...")
|
||||
self.__createIndex(fp_dir)
|
||||
|
@ -52,10 +54,23 @@ class FingerprintIndex:
|
|||
|
||||
#
|
||||
def findFP(self, md5_db, md5_tables):
|
||||
rows = __qDatabaseMD5(md5_db)
|
||||
print "***** ***** *****\n{}\n".format(rows)
|
||||
rows = __qTableMD5(md5_tables)
|
||||
print "***** ***** *****\n{}\n".format(rows)
|
||||
#print "***** findFP *****\nmd5_db=={} md5_tables=={}\n".format(md5_db, md5_tables)
|
||||
rows = self.__qDatabaseMD5(md5_db)
|
||||
# rowcount will be -1 if nothing was returned
|
||||
if rows.rowcount > 0:
|
||||
#print "***** __qDatabaseMD5 *****\n{}\n".format(rows)
|
||||
return rows
|
||||
|
||||
for md5_table in md5_tables:
|
||||
retval = {}
|
||||
rows = self.__qTableMD5(md5_table)
|
||||
for row in rows:
|
||||
fp_list = row[0]
|
||||
fps = fp_list.split(',')
|
||||
for fp in fps:
|
||||
retval[fp] = 1
|
||||
#print "***** __qTableMD5 *****\n{}\n".format(retval.keys())
|
||||
return retval.keys()
|
||||
|
||||
#
|
||||
def __qDatabaseMD5(self, md5_db):
|
||||
|
@ -106,13 +121,19 @@ class FingerprintIndex:
|
|||
#
|
||||
def __populateIndex(self, fp_dir):
|
||||
""" read each file, pull md5, add row to database """
|
||||
finCount = 0
|
||||
failCount = 0
|
||||
finCount = 0
|
||||
naCount = 0
|
||||
try:
|
||||
db = FingerprintDB()
|
||||
files = os.listdir(fp_dir)
|
||||
for file in files:
|
||||
try:
|
||||
# only parese files with .json eextension
|
||||
if not re.search(r'.*\.json', file):
|
||||
naCount = naCount+1
|
||||
pass
|
||||
print file
|
||||
fq_file = fp_dir + os.path.sep + file
|
||||
db.importJson(fq_file)
|
||||
self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file)
|
||||
|
@ -127,7 +148,7 @@ class FingerprintIndex:
|
|||
logging.error(ex)
|
||||
finally:
|
||||
self.db_conn.commit()
|
||||
logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount)))
|
||||
logging.info("Completed populating the index. Completed: {} Failed: {} NA: {}".format(str(finCount), str(failCount), str(naCount)))
|
||||
|
||||
#
|
||||
def __insertMod_md5_all(self, md5_db, md5_list, filename):
|
||||
|
|
Loading…
Reference in New Issue