dbfp_pub/libs/fingerprint_index.py

116 lines
3.1 KiB
Python

#
#
# [ md5_all, md5_list, file_name ]
#
#
import os
import sys
import logging
import sqlite3 as sql
from libs.exceptions import FingerprintIndexWrite
from libs.fingerprint import FingerprintDB
# prefixed with "_" so that it will be listed first and visible
INDEX_FILENAME = '_index_dpfp.db'
class FingerprintIndex:
"""
Class handling an index of fingerprints for effeciently locating a fingerprint
"""
#
def __init__(self):
self.db_conn = None
return
#
def openIndex(self, fp_dir):
fq_fpidx = fp_dir + os.path.sep + INDEX_FILENAME
try:
if (os.path.isfile(fq_fpidx)):
self.db_conn = sql.connect(fq_fpidx)
logging.info("DB Open SUCCESSFUL")
else:
logging.info("No index file found, creating index now...")
self.createIndex(fp_dir)
except:
raise FingerprintIndexOpen("Error opening/creating an index file")
finally:
if self.db_conn:
self.db_conn.close()
self.db_conn = None
#
def createIndex(self, fp_dir):
fq_fpidx = fp_dir + os.path.sep + INDEX_FILENAME
try:
self.db_conn = sql.connect(fq_fpidx)
self.db_conn.execute('''
CREATE TABLE md5_index (
md5_all TEXT PRIMARY KEY,
md5_list TEXT,
file_name TEXT);
''')
logging.info("Successfully created index table")
self.__populateIndex(fp_dir)
logging.info("Successfully populated the index")
except:
raise FingerprintIndexWrite("Error creating an index file")
finally:
if self.db_conn:
self.db_conn.close()
self.db_conn = None
#
def __populateIndex(self, fp_dir):
""" read each file, pull md5, add row to database """
failCount = 0
finCount = 0
try:
db = FingerprintDB()
files = os.listdir(fp_dir)
# print ("Populating DB, files=={}".format(files))
for file in files:
try:
fq_file = fp_dir + os.path.sep + file
# print ("importJson file=={}".format(fq_file))
db.importJson(fq_file)
self.__insertRecord(db.db_hash, db.table_hashes.values(), file)
# print("db_hash=={}".format(db.db_hash))
# print("table_hashes={}".format(db.table_hashes))
#md5_all = __createMD5Index(dbht)
return
finCount = finCount+1
except:
failCount = failCount+1
except:
pass
logging.info("Completed populating the index. Completed: {} Failed: {} ".format(str(finCount), str(failCount)))
#
def __insertRecord(self, md5_all, md5_list, filename):
try:
logging.info("INSERT INTO md5_index VALUES(?, ?, ?): {}; {}; {}".format(md5_all, str(md5_list), filename))
self.db_conn.execute(
'''
INSERT INTO md5_index VALUES(?, ?, ?)
''', (md5_all, str(md5_list), filename))
except Exception as e:
logging.error("Error inserting a row\n{}".format(e))
raise FingerprintIndexWrite("Error inserting a row")
#
def __checkIntegrity(self):
""" Sanity check the number of files against the index rows """
pass
#
def dirCompare(self, folder):
pass
def compareFingerprint(self, fp1, fp2):
pass