FIN: completed the fingerprint comparison with and index of fingerprints
This commit is contained in:
parent
2984a84a81
commit
96ef570419
35
dbfp.py
35
dbfp.py
|
@ -19,16 +19,16 @@ def main():
|
||||||
parseArgs()
|
parseArgs()
|
||||||
|
|
||||||
#
|
#
|
||||||
def compareFingerprint(filein, filejson):
|
def compareFingerprint(file_in, file_json):
|
||||||
db = FingerprintDB()
|
db = FingerprintDB()
|
||||||
db.scanDBFile(filein)
|
db.scanDBFile(file_in)
|
||||||
percent = db.compareDB(filejson)
|
percent = db.compareDB(file_json)
|
||||||
print "Percent match: {}".format(str(percent))
|
print "Percent match: {}".format(str(percent))
|
||||||
|
|
||||||
#
|
#
|
||||||
def createFingerprint(filein, verbose, app_name, app_ver, notes):
|
def createFingerprint(file_in, verbose, app_name, app_ver, notes):
|
||||||
db = FingerprintDB()
|
db = FingerprintDB()
|
||||||
retVal = db.scanDBFile(filein)
|
retVal = db.scanDBFile(file_in)
|
||||||
if (retVal > 0):
|
if (retVal > 0):
|
||||||
if verbose:
|
if verbose:
|
||||||
db.debugFingerprint()
|
db.debugFingerprint()
|
||||||
|
@ -52,17 +52,18 @@ def indexFingerprints(fp_dir):
|
||||||
#
|
#
|
||||||
def compareFingerprintDir(file_in, fp_dir):
|
def compareFingerprintDir(file_in, fp_dir):
|
||||||
db = FingerprintDB()
|
db = FingerprintDB()
|
||||||
db.scanDBFile(filein)
|
db.scanDBFile(file_in)
|
||||||
md5_db = db.getMD5DB()
|
md5_db = db.getMD5DB()
|
||||||
md5_tables = db.getMD5Tables()
|
md5_tables = db.getMD5TablesArray()
|
||||||
#percent = db.compareDB(filejson)
|
|
||||||
fp = FingerprintIndex()
|
fp = FingerprintIndex()
|
||||||
fp.openIndex(fp_dir)
|
fp.openIndex(fp_dir)
|
||||||
fp_list = fp.findFP(md5_db, md5_tables)
|
fp_list = fp.findFP(md5_db, md5_tables)
|
||||||
results = []
|
results = []
|
||||||
for fp in fp_list:
|
for fp in fp_list:
|
||||||
percent = db.compareDB(fp)
|
fq_fp = fp_dir + os.path.sep + fp
|
||||||
results.add(percent)
|
print "[ OPEN fingerprint ] [ {} ]".format(fq_fp)
|
||||||
|
percent = db.compareDB(fq_fp)
|
||||||
|
results.append(percent)
|
||||||
|
|
||||||
print "RESULTS: {}".format(results)
|
print "RESULTS: {}".format(results)
|
||||||
results.sort()
|
results.sort()
|
||||||
|
@ -147,7 +148,7 @@ def parseArgs():
|
||||||
print ' DB Fingerprint'
|
print ' DB Fingerprint'
|
||||||
print '***** ***** ***** *****\n'
|
print '***** ***** ***** *****\n'
|
||||||
parser = argparse.ArgumentParser(description="Fingerprint a sqlite database based on its schema")
|
parser = argparse.ArgumentParser(description="Fingerprint a sqlite database based on its schema")
|
||||||
parser.add_argument('-f', '--file', required=False, help="path to file to be fingerprinted")
|
parser.add_argument('-db', '--database', required=False, help="path to file to be fingerprinted")
|
||||||
parser.add_argument('-fd', '--fpdir', required=False, help="path to directory of fingerprint files")
|
parser.add_argument('-fd', '--fpdir', required=False, help="path to directory of fingerprint files")
|
||||||
parser.add_argument('-fp', '--fingerprint', required=False, help="fingerprint file to use in comparison")
|
parser.add_argument('-fp', '--fingerprint', required=False, help="fingerprint file to use in comparison")
|
||||||
parser.add_argument('-dd', '--data_dir', required=False, help="path to a directory with sqlite files")
|
parser.add_argument('-dd', '--data_dir', required=False, help="path to a directory with sqlite files")
|
||||||
|
@ -169,18 +170,18 @@ def parseArgs():
|
||||||
else:
|
else:
|
||||||
logging.basicConfig(level=logging.CRITICAL)
|
logging.basicConfig(level=logging.CRITICAL)
|
||||||
|
|
||||||
if (args.file and args.fingerprint):
|
if (args.database and args.fingerprint):
|
||||||
compareFingerprint(args.file, args.fingerprint)
|
compareFingerprint(args.database, args.fingerprint)
|
||||||
elif (args.file and args.fpdir):
|
elif (args.database and args.fpdir):
|
||||||
compareFingerprintDir(args.file, args.fpdir)
|
compareFingerprintDir(args.database, args.fpdir)
|
||||||
elif (args.data_dir):
|
elif (args.data_dir):
|
||||||
androidData(args.data_dir)
|
androidData(args.data_dir)
|
||||||
elif (args.index_fingerprints):
|
elif (args.index_fingerprints):
|
||||||
indexFingerprints(args.index_fingerprints)
|
indexFingerprints(args.index_fingerprints)
|
||||||
elif (args.pull):
|
elif (args.pull):
|
||||||
androidPull()
|
androidPull()
|
||||||
elif (args.file):
|
elif (args.database):
|
||||||
createFingerprint(args.file, args.verbose, args.app_name, args.app_version, args.notes)
|
createFingerprint(args.database, args.verbose, args.app_name, args.app_version, args.notes)
|
||||||
else:
|
else:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ import hashlib
|
||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
from libs.toolbox import ToolBox
|
from libs.toolbox import ToolBox
|
||||||
from libs.exceptions import FingerprintWrite
|
from libs.exceptions import FingerprintWrite, FingerprintMD5
|
||||||
|
|
||||||
delimeter = "|"
|
delimeter = "|"
|
||||||
|
|
||||||
|
@ -45,10 +45,10 @@ class FingerprintDB:
|
||||||
self.cur = None
|
self.cur = None
|
||||||
self.table_names = []
|
self.table_names = []
|
||||||
self.tables = {}
|
self.tables = {}
|
||||||
|
self.db_hash = None
|
||||||
# hashtables of json data
|
# hashtables of json data
|
||||||
self.tables_json = {}
|
self.tables_json = {}
|
||||||
self.db_hash = {}
|
self.db_hash_json = None
|
||||||
self.table_hashes = {}
|
|
||||||
# fingerprint metadata
|
# fingerprint metadata
|
||||||
self.db_name = ""
|
self.db_name = ""
|
||||||
self.app_name = ""
|
self.app_name = ""
|
||||||
|
@ -74,6 +74,8 @@ class FingerprintDB:
|
||||||
try:
|
try:
|
||||||
# read database schema, parse the schema
|
# read database schema, parse the schema
|
||||||
self.__readDatabase()
|
self.__readDatabase()
|
||||||
|
# concat all the table create statements, then md5
|
||||||
|
self.__createMD5DB()
|
||||||
except Exception, ex:
|
except Exception, ex:
|
||||||
print ex
|
print ex
|
||||||
return -3
|
return -3
|
||||||
|
@ -128,14 +130,25 @@ class FingerprintDB:
|
||||||
|
|
||||||
#
|
#
|
||||||
def compareDB(self, filejson):
|
def compareDB(self, filejson):
|
||||||
|
""" return the percentage of the match between two fingerprints """
|
||||||
if (not self.scanned):
|
if (not self.scanned):
|
||||||
return
|
return
|
||||||
""" return the percentage of the match between two fingerprints """
|
|
||||||
self.__importJsonDBSchema(filejson)
|
self.__importJsonDBSchema(filejson)
|
||||||
result = self.__DBSchemaCompare()
|
result = self.__DBSchemaCompare()
|
||||||
print "[ Percetage == {}]".format(result)
|
print "[ Percetage == {}]".format(result)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
#
|
||||||
|
def getMD5DB(self):
|
||||||
|
return self.db_hash
|
||||||
|
|
||||||
|
#
|
||||||
|
def getMD5TablesArray(self):
|
||||||
|
retval = []
|
||||||
|
for key in self.tables.keys():
|
||||||
|
retval.append(self.tables[key].hash())
|
||||||
|
return retval
|
||||||
|
|
||||||
#
|
#
|
||||||
def __importJsonDBSchema(self, file_json):
|
def __importJsonDBSchema(self, file_json):
|
||||||
""" import fingerprint from a json file """
|
""" import fingerprint from a json file """
|
||||||
|
@ -154,11 +167,10 @@ class FingerprintDB:
|
||||||
newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name])
|
newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name])
|
||||||
tables[table_name] = newTable
|
tables[table_name] = newTable
|
||||||
|
|
||||||
self.tables_json = tables
|
self.tables_json = tables
|
||||||
self.db_hash = dbmd5
|
self.db_hash_json = dbmd5
|
||||||
self.table_hashes = dbht
|
except Exception as ex:
|
||||||
except Exception as e:
|
logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex))
|
||||||
logging.error("ERROR: problem loading json file: " + file_json + e)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
def __DBMD5Compare(self):
|
def __DBMD5Compare(self):
|
||||||
|
@ -174,19 +186,36 @@ class FingerprintDB:
|
||||||
diff_total = 0
|
diff_total = 0
|
||||||
all_total = 0
|
all_total = 0
|
||||||
for tableName in self.tables_json.keys():
|
for tableName in self.tables_json.keys():
|
||||||
table = self.tables[tableName]
|
try:
|
||||||
print "[[ Comparing Table: " + tableName + " ]]"
|
print "[[ Comparing Table: " + tableName + " ]]"
|
||||||
if (table):
|
table = self.tables[tableName]
|
||||||
if not (self.tables_json[tableName].hash() == table.hash()):
|
if (table):
|
||||||
logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash()))
|
if not (self.tables_json[tableName].hash() == table.hash()):
|
||||||
(total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table)
|
logging.info("*** Hash difference 1:{}!={}".format(self.tables_json[tableName].hash(), table.hash()))
|
||||||
all_total += total
|
(total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table)
|
||||||
diff_total += diff_num
|
all_total += total
|
||||||
|
diff_total += diff_num
|
||||||
|
else:
|
||||||
|
# get the number of fields from the other table and add to the all_total
|
||||||
|
logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName))
|
||||||
|
else:
|
||||||
|
self.__FuzzyTable()
|
||||||
|
except KeyError as ex:
|
||||||
|
# get the number of fields from the other table to add to the diff_total
|
||||||
|
all_total += 10
|
||||||
|
diff_total += 10
|
||||||
|
logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName))
|
||||||
|
|
||||||
|
print "diff_total=={}, all_total=={}".format(diff_total, all_total)
|
||||||
|
if (all_total == diff_total):
|
||||||
|
percentage = 0
|
||||||
|
else:
|
||||||
|
# percentage = 100
|
||||||
|
if (diff_total > 0):
|
||||||
|
percentage = float(diff_total / all_total)
|
||||||
else:
|
else:
|
||||||
self.__FuzzyTable()
|
percentage = 100
|
||||||
percentage = 100
|
# percentage = percentage - float(diff_total / all_total)
|
||||||
if (diff_total > 0):
|
|
||||||
percentage = percentage - float(diff_total) / all_total
|
|
||||||
|
|
||||||
return percentage
|
return percentage
|
||||||
|
|
||||||
|
@ -294,26 +323,27 @@ class FingerprintDB:
|
||||||
dmhash[table] = self.tables[table].SQLstr()
|
dmhash[table] = self.tables[table].SQLstr()
|
||||||
shash[table] = self.tables[table].sqlStrHash
|
shash[table] = self.tables[table].sqlStrHash
|
||||||
|
|
||||||
ahash['db-metadata-md5'] = self.__createMD5Index(shash)
|
ahash['db-metadata-md5'] = self.db_hash
|
||||||
|
|
||||||
json.dump(ahash, filehandle, sort_keys=True, indent=4)
|
json.dump(ahash, filehandle, sort_keys=True, indent=4)
|
||||||
|
|
||||||
#
|
#
|
||||||
def __createMD5Index(self, dbht):
|
def __createMD5DB(self):
|
||||||
retval = "hieeee!!!"
|
retval = None
|
||||||
concat_str = ""
|
concat_str = ""
|
||||||
try:
|
try:
|
||||||
bitchasskeys = dbht.keys()
|
bitchasskeys = self.tables.keys()
|
||||||
bitchasskeys.sort()
|
bitchasskeys.sort()
|
||||||
for key in bitchasskeys:
|
for key in bitchasskeys:
|
||||||
concat_str += dbht[key]
|
concat_str += self.tables[key].hash()
|
||||||
|
#print "---> {}".format(concat_str)
|
||||||
m = hashlib.md5()
|
m = hashlib.md5()
|
||||||
m.update(concat_str)
|
m.update(concat_str)
|
||||||
retval = m.hexdigest()
|
retval = m.hexdigest()
|
||||||
|
self.db_hash = retval
|
||||||
except Exception, ex:
|
except Exception, ex:
|
||||||
logging.error(ex)
|
logging.error(ex)
|
||||||
raise FingerprintMD5("Problem creating a MD5 sum")
|
raise FingerprintMD5("Problem creating a MD5 sum")
|
||||||
return retval
|
|
||||||
|
|
||||||
#
|
#
|
||||||
def setAppName(self, name):
|
def setAppName(self, name):
|
||||||
|
@ -359,7 +389,7 @@ class TableSchema:
|
||||||
self.tableName = tableName
|
self.tableName = tableName
|
||||||
self.sqlStr = sqlStr
|
self.sqlStr = sqlStr
|
||||||
|
|
||||||
print "[[ TABLE: <{}> ] processing...]".format(tableName)
|
logging.info("[[ TABLE: <{}> ] processing...]".format(tableName))
|
||||||
# hash the sql create string for quicker fingerprint matching
|
# hash the sql create string for quicker fingerprint matching
|
||||||
try:
|
try:
|
||||||
m = hashlib.md5()
|
m = hashlib.md5()
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
# [ md5_all, md5_list, file_name ]
|
# [ md5_all, md5_list, file_name ]
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
|
import re
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import logging
|
import logging
|
||||||
|
@ -33,6 +34,7 @@ class FingerprintIndex:
|
||||||
if (os.path.isfile(fq_fpidx)):
|
if (os.path.isfile(fq_fpidx)):
|
||||||
self.db_conn = sql.connect(fq_fpidx)
|
self.db_conn = sql.connect(fq_fpidx)
|
||||||
logging.info("DB Open SUCCESSFUL")
|
logging.info("DB Open SUCCESSFUL")
|
||||||
|
self.cur = self.db_conn.cursor()
|
||||||
else:
|
else:
|
||||||
logging.info("No index file found, creating index now...")
|
logging.info("No index file found, creating index now...")
|
||||||
self.__createIndex(fp_dir)
|
self.__createIndex(fp_dir)
|
||||||
|
@ -52,10 +54,23 @@ class FingerprintIndex:
|
||||||
|
|
||||||
#
|
#
|
||||||
def findFP(self, md5_db, md5_tables):
|
def findFP(self, md5_db, md5_tables):
|
||||||
rows = __qDatabaseMD5(md5_db)
|
#print "***** findFP *****\nmd5_db=={} md5_tables=={}\n".format(md5_db, md5_tables)
|
||||||
print "***** ***** *****\n{}\n".format(rows)
|
rows = self.__qDatabaseMD5(md5_db)
|
||||||
rows = __qTableMD5(md5_tables)
|
# rowcount will be -1 if nothing was returned
|
||||||
print "***** ***** *****\n{}\n".format(rows)
|
if rows.rowcount > 0:
|
||||||
|
#print "***** __qDatabaseMD5 *****\n{}\n".format(rows)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
for md5_table in md5_tables:
|
||||||
|
retval = {}
|
||||||
|
rows = self.__qTableMD5(md5_table)
|
||||||
|
for row in rows:
|
||||||
|
fp_list = row[0]
|
||||||
|
fps = fp_list.split(',')
|
||||||
|
for fp in fps:
|
||||||
|
retval[fp] = 1
|
||||||
|
#print "***** __qTableMD5 *****\n{}\n".format(retval.keys())
|
||||||
|
return retval.keys()
|
||||||
|
|
||||||
#
|
#
|
||||||
def __qDatabaseMD5(self, md5_db):
|
def __qDatabaseMD5(self, md5_db):
|
||||||
|
@ -106,13 +121,19 @@ class FingerprintIndex:
|
||||||
#
|
#
|
||||||
def __populateIndex(self, fp_dir):
|
def __populateIndex(self, fp_dir):
|
||||||
""" read each file, pull md5, add row to database """
|
""" read each file, pull md5, add row to database """
|
||||||
|
finCount = 0
|
||||||
failCount = 0
|
failCount = 0
|
||||||
finCount = 0
|
naCount = 0
|
||||||
try:
|
try:
|
||||||
db = FingerprintDB()
|
db = FingerprintDB()
|
||||||
files = os.listdir(fp_dir)
|
files = os.listdir(fp_dir)
|
||||||
for file in files:
|
for file in files:
|
||||||
try:
|
try:
|
||||||
|
# only parese files with .json eextension
|
||||||
|
if not re.search(r'.*\.json', file):
|
||||||
|
naCount = naCount+1
|
||||||
|
pass
|
||||||
|
print file
|
||||||
fq_file = fp_dir + os.path.sep + file
|
fq_file = fp_dir + os.path.sep + file
|
||||||
db.importJson(fq_file)
|
db.importJson(fq_file)
|
||||||
self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file)
|
self.__insertMod_md5_all(db.db_hash, db.table_hashes.values(), file)
|
||||||
|
@ -127,7 +148,7 @@ class FingerprintIndex:
|
||||||
logging.error(ex)
|
logging.error(ex)
|
||||||
finally:
|
finally:
|
||||||
self.db_conn.commit()
|
self.db_conn.commit()
|
||||||
logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount)))
|
logging.info("Completed populating the index. Completed: {} Failed: {} NA: {}".format(str(finCount), str(failCount), str(naCount)))
|
||||||
|
|
||||||
#
|
#
|
||||||
def __insertMod_md5_all(self, md5_db, md5_list, filename):
|
def __insertMod_md5_all(self, md5_db, md5_list, filename):
|
||||||
|
|
Loading…
Reference in New Issue