WIP: optimising the fingerprint lookup/comparison

This commit is contained in:
JohnE 2016-03-01 23:46:26 -08:00
parent 8cca9ed317
commit 76a9dd1f8b
3 changed files with 28 additions and 12 deletions

15
dbfp.py
View File

@ -61,20 +61,19 @@ def indexFingerprints(fp_dir):
# #
def compareFPIndex(fp_dir, db_in, json_in): def compareFPIndex(fp_dir, db_in, json_in):
try: # try:
db = FingerprintDB() db = FingerprintDB()
if (db_in): if (db_in):
db.scanDBFile(db_in) db.scanDBFile(db_in)
else: else:
db.importJson(json_in) db.importJson(json_in)
logging.info("MD5 DB == {}".format(db.getMD5DB()))
logging.info("MD5 TB == {}".format(db.getMD5Tables()))
fp = FingerprintIndex() fp = FingerprintIndex()
fp.openIndex(fp_dir) fp.openIndex(fp_dir)
# search for fingerprints with exact database match # search for fingerprints with exact database match
logging.info("Searching for MD5 DB: {}".format(db.getMD5DB()))
fp_ret = fp.findFP(db.getMD5DB()) fp_ret = fp.findFP(db.getMD5DB())
if (fp_ret): if (fp_ret):
print "Database matche(s) found" print "Database matche(s) found"
@ -85,11 +84,11 @@ def compareFPIndex(fp_dir, db_in, json_in):
# search for fingerprints with similar tables # search for fingerprints with similar tables
else: else:
logging.info("Searching for md5 tables: {}".format(db.getMD5Tables())) logging.info("Searching for md5 tables: {}".format(db.getMD5Tables()))
fp_list = fp.findFPTables(db.getMD5Tables()) fp_list = fp.findFPTables(db.getMD5Tables().values())
results = [] results = []
for fp in fp_list: for fp in fp_list:
fq_fp = fp_dir + os.path.sep + fp fq_fp = fp_dir + os.path.sep + fp
print "[ OPEN fingerprint ] [ {} ]".format(fq_fp) logging.info("Comparing fingerprint: {}".format(fq_fp))
percent = db.compareDB(fq_fp) percent = db.compareDB(fq_fp)
results.append(percent) results.append(percent)
@ -97,9 +96,9 @@ def compareFPIndex(fp_dir, db_in, json_in):
print "RESULTS: {}".format(results) print "RESULTS: {}".format(results)
results.sort() results.sort()
print "RESULTS: {}".format(results) print "RESULTS: {}".format(results)
except Exception as ex: # except Exception as ex:
print "Error comparing fingerprint" # print "Error comparing fingerprint"
print ex # print ex
# #
def androidPull(): def androidPull():

View File

@ -3,7 +3,7 @@
Action Items from the code review: Action Items from the code review:
1) Change table name from "metadata" to "app_details" xx1) Change table name from "metadata" to "app_details"
1) Regression Testing of all Features 1) Regression Testing of all Features

View File

@ -8,6 +8,7 @@ import os
import sys import sys
import logging import logging
import sqlite3 as sql import sqlite3 as sql
from operator import itemgetter
from libs.exceptions import FingerprintIndexWrite from libs.exceptions import FingerprintIndexWrite
from libs.exceptions import FingerprintIndexOpen from libs.exceptions import FingerprintIndexOpen
from libs.fingerprint import FingerprintDB from libs.fingerprint import FingerprintDB
@ -80,17 +81,33 @@ class FingerprintIndex:
return rows return rows
return None return None
# return an a list of json fingerprint files to open # return an a sort array with the most likely fingerprints ordered first
def findFPTables(self, md5_tables): def findFPTables(self, md5_tables):
retval = {} retval = {}
for md5_table in md5_tables: for md5_table in md5_tables:
logging.debug("findFPTables::md5_table=={}".format(md5_table))
rows = self.__qTableMD5(md5_table) rows = self.__qTableMD5(md5_table)
for row in rows: for row in rows:
logging.debug("findFPTables::row=={}\n".format(row[0]))
fp_list = row[0] fp_list = row[0]
fps = fp_list.split(',') fps = fp_list.split(',')
for fp in fps: for fp in fps:
if (retval.has_key(fp)):
retval[fp] = retval[fp] + 1
else:
retval[fp] = 1 retval[fp] = 1
return retval.keys() logging.debug("findFPTables::retval=={}\n".format(retval))
# retval_sorted = sorted(retval.items(), key==lambda x: x[1])
retval_sorted = sorted(retval.items(), key=itemgetter(1))
# retval_sorted = []
# vals = retval.values()
# vals.sort()
# for val in vals:
# retval_sorted.append(retval[val])
logging.debug("findFPTables::retval_sorted=={}\n".format(retval_sorted))
return retval_sorted
# return retval.keys()
# #
def findDB(self, md5_db): def findDB(self, md5_db):