FIN: completed the fingerprint comparison with and index of fingerprints

This commit is contained in:
JohnE 2016-01-08 02:49:49 -08:00
parent 2984a84a81
commit 96ef570419
3 changed files with 104 additions and 52 deletions

35
dbfp.py
View File

@ -19,16 +19,16 @@ def main():
parseArgs()
#
def compareFingerprint(filein, filejson):
def compareFingerprint(file_in, file_json):
db = FingerprintDB()
db.scanDBFile(filein)
percent = db.compareDB(filejson)
db.scanDBFile(file_in)
percent = db.compareDB(file_json)
print "Percent match: {}".format(str(percent))
#
def createFingerprint(filein, verbose, app_name, app_ver, notes):
def createFingerprint(file_in, verbose, app_name, app_ver, notes):
db = FingerprintDB()
retVal = db.scanDBFile(filein)
retVal = db.scanDBFile(file_in)
if (retVal > 0):
if verbose:
db.debugFingerprint()
@ -52,17 +52,18 @@ def indexFingerprints(fp_dir):
#
def compareFingerprintDir(file_in, fp_dir):
db = FingerprintDB()
db.scanDBFile(filein)
db.scanDBFile(file_in)
md5_db = db.getMD5DB()
md5_tables = db.getMD5Tables()
#percent = db.compareDB(filejson)
md5_tables = db.getMD5TablesArray()
fp = FingerprintIndex()
fp.openIndex(fp_dir)
fp_list = fp.findFP(md5_db, md5_tables)
results = []
for fp in fp_list:
percent = db.compareDB(fp)
results.add(percent)
fq_fp = fp_dir + os.path.sep + fp
print "[ OPEN fingerprint ] [ {} ]".format(fq_fp)
percent = db.compareDB(fq_fp)
results.append(percent)
print "RESULTS: {}".format(results)
results.sort()
@ -147,7 +148,7 @@ def parseArgs():
print ' DB Fingerprint'
print '***** ***** ***** *****\n'
parser = argparse.ArgumentParser(description="Fingerprint a sqlite database based on its schema")
parser.add_argument('-f', '--file', required=False, help="path to file to be fingerprinted")
parser.add_argument('-db', '--database', required=False, help="path to file to be fingerprinted")
parser.add_argument('-fd', '--fpdir', required=False, help="path to directory of fingerprint files")
parser.add_argument('-fp', '--fingerprint', required=False, help="fingerprint file to use in comparison")
parser.add_argument('-dd', '--data_dir', required=False, help="path to a directory with sqlite files")
@ -169,18 +170,18 @@ def parseArgs():
else:
logging.basicConfig(level=logging.CRITICAL)
if (args.file and args.fingerprint):
compareFingerprint(args.file, args.fingerprint)
elif (args.file and args.fpdir):
compareFingerprintDir(args.file, args.fpdir)
if (args.database and args.fingerprint):
compareFingerprint(args.database, args.fingerprint)
elif (args.database and args.fpdir):
compareFingerprintDir(args.database, args.fpdir)
elif (args.data_dir):
androidData(args.data_dir)
elif (args.index_fingerprints):
indexFingerprints(args.index_fingerprints)
elif (args.pull):
androidPull()
elif (args.file):
createFingerprint(args.file, args.verbose, args.app_name, args.app_version, args.notes)
elif (args.database):
createFingerprint(args.database, args.verbose, args.app_name, args.app_version, args.notes)
else:
parser.print_help()

View File

@ -8,7 +8,7 @@ import hashlib
import time
import logging
from libs.toolbox import ToolBox
from libs.exceptions import FingerprintWrite
from libs.exceptions import FingerprintWrite, FingerprintMD5
delimeter = "|"
@ -45,10 +45,10 @@ class FingerprintDB:
self.cur = None
self.table_names = []
self.tables = {}
self.db_hash = None
# hashtables of json data
self.tables_json = {}
self.db_hash = {}
self.table_hashes = {}
self.db_hash_json = None
# fingerprint metadata
self.db_name = ""
self.app_name = ""
@ -74,6 +74,8 @@ class FingerprintDB:
try:
# read database schema, parse the schema
self.__readDatabase()
# concat all the table create statements, then md5
self.__createMD5DB()
except Exception, ex:
print ex
return -3
@ -128,14 +130,25 @@ class FingerprintDB:
#
def compareDB(self, filejson):
""" return the percentage of the match between two fingerprints """
if (not self.scanned):
return
""" return the percentage of the match between two fingerprints """
self.__importJsonDBSchema(filejson)
result = self.__DBSchemaCompare()
print "[ Percetage == {}]".format(result)
return result
#
def getMD5DB(self):
return self.db_hash
#
def getMD5TablesArray(self):
retval = []
for key in self.tables.keys():
retval.append(self.tables[key].hash())
return retval
#
def __importJsonDBSchema(self, file_json):
""" import fingerprint from a json file """
@ -154,11 +167,10 @@ class FingerprintDB:
newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name])
tables[table_name] = newTable
self.tables_json = tables
self.db_hash = dbmd5
self.table_hashes = dbht
except Exception as e:
logging.error("ERROR: problem loading json file: " + file_json + e)
self.tables_json = tables
self.db_hash_json = dbmd5
except Exception as ex:
logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex))
#
def __DBMD5Compare(self):
@ -174,19 +186,36 @@ class FingerprintDB:
diff_total = 0
all_total = 0
for tableName in self.tables_json.keys():
table = self.tables[tableName]
print "[[ Comparing Table: " + tableName + " ]]"
if (table):
if not (self.tables_json[tableName].hash() == table.hash()):
logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash()))
(total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table)
all_total += total
diff_total += diff_num
try:
print "[[ Comparing Table: " + tableName + " ]]"
table = self.tables[tableName]
if (table):
if not (self.tables_json[tableName].hash() == table.hash()):
logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash()))
(total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table)
all_total += total
diff_total += diff_num
else:
# get the number of fields from the other table and add to the all_total
logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName))
else:
self.__FuzzyTable()
except KeyError as ex:
# get the number of fields from the other table to add to the diff_total
all_total += 10
diff_total += 10
logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName))
print "diff_total=={}, all_total=={}".format(diff_total, all_total)
if (all_total == diff_total):
percentage = 0
else:
# percentage = 100
if (diff_total > 0):
percentage = float(diff_total / all_total)
else:
self.__FuzzyTable()
percentage = 100
if (diff_total > 0):
percentage = percentage - float(diff_total) / all_total
percentage = 100
# percentage = percentage - float(diff_total / all_total)
return percentage
@ -294,26 +323,27 @@ class FingerprintDB:
dmhash[table] = self.tables[table].SQLstr()
shash[table] = self.tables[table].sqlStrHash
ahash['db-metadata-md5'] = self.__createMD5Index(shash)
ahash['db-metadata-md5'] = self.db_hash
json.dump(ahash, filehandle, sort_keys=True, indent=4)
#
def __createMD5Index(self, dbht):
retval = "hieeee!!!"
def __createMD5DB(self):
retval = None
concat_str = ""
try:
bitchasskeys = dbht.keys()
bitchasskeys = self.tables.keys()
bitchasskeys.sort()
for key in bitchasskeys:
concat_str += dbht[key]
concat_str += self.tables[key].hash()
#print "---> {}".format(concat_str)
m = hashlib.md5()
m.update(concat_str)
retval = m.hexdigest()
retval = m.hexdigest()
self.db_hash = retval
except Exception, ex:
logging.error(ex)
raise FingerprintMD5("Problem creating a MD5 sum")
return retval
#
def setAppName(self, name):
@ -359,7 +389,7 @@ class TableSchema:
self.tableName = tableName
self.sqlStr = sqlStr
print "[[ TABLE: <{}> ] processing...]".format(tableName)
logging.info("[[ TABLE: <{}> ] processing...]".format(tableName))
# hash the sql create string for quicker fingerprint matching
try:
m = hashlib.md5()

View File

@ -3,6 +3,7 @@
# [ md5_all, md5_list, file_name ]
#
#
import re
import os
import sys
import logging
@ -33,6 +34,7 @@ class FingerprintIndex:
if (os.path.isfile(fq_fpidx)):
self.db_conn = sql.connect(fq_fpidx)
logging.info("DB Open SUCCESSFUL")
self.cur = self.db_conn.cursor()
else:
logging.info("No index file found, creating index now...")
self.__createIndex(fp_dir)
@ -52,10 +54,23 @@ class FingerprintIndex:
#
def findFP(self, md5_db, md5_tables):
rows = __qDatabaseMD5(md5_db)
print "***** ***** *****\n{}\n".format(rows)
rows = __qTableMD5(md5_tables)
print "***** ***** *****\n{}\n".format(rows)
#print "***** findFP *****\nmd5_db=={} md5_tables=={}\n".format(md5_db, md5_tables)
rows = self.__qDatabaseMD5(md5_db)
# rowcount will be -1 if nothing was returned
if rows.rowcount > 0:
#print "***** __qDatabaseMD5 *****\n{}\n".format(rows)
return rows
for md5_table in md5_tables:
retval = {}
rows = self.__qTableMD5(md5_table)
for row in rows:
fp_list = row[0]
fps = fp_list.split(',')
for fp in fps:
retval[fp] = 1
#print "***** __qTableMD5 *****\n{}\n".format(retval.keys())
return retval.keys()
#
def __qDatabaseMD5(self, md5_db):
@ -106,13 +121,19 @@ class FingerprintIndex:
#
def __populateIndex(self, fp_dir):
""" read each file, pull md5, add row to database """
finCount = 0
failCount = 0
finCount = 0
naCount = 0
try:
db = FingerprintDB()
files = os.listdir(fp_dir)
for file in files:
try:
# only parese files with .json eextension
if not re.search(r'.*\.json', file):
naCount = naCount+1
pass
print file
fq_file = fp_dir + os.path.sep + file
db.importJson(fq_file)
self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file)
@ -127,7 +148,7 @@ class FingerprintIndex:
logging.error(ex)
finally:
self.db_conn.commit()
logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount)))
logging.info("Completed populating the index. Completed: {} Failed: {} NA: {}".format(str(finCount), str(failCount), str(naCount)))
#
def __insertMod_md5_all(self, md5_db, md5_list, filename):