# # # import os import re import json import sqlite3 import hashlib import time import logging from libs.toolbox import ToolBox from libs.exceptions import FingerprintWrite, FingerprintMD5, FingerprintReadNoData delimeter = "|" # # Database Schema # The SQLite database schema is stored in page 1 of the database (root page). # The sqlite_master table contains one row for each table, index, view, and trigger # (collectively "objects") in the database schema. # CREATE TABLE sqlite_master( # type text, # name text, # tbl_name text, # rootpage integer, # sql text # ); # class FingerprintDB: """ This class represents a complete database schema Helper functions: Writing of the database schema as a "fingerprint" Comparing of a database schema (fingerprint loaded from file) """ sqlmaster = "SELECT name, sql FROM sqlite_master WHERE type='table'" # version of the scanner used to create the fingerprint scanner_ver = "0.98" # version of the json file format, this # is inserted in the json fingerprint file and can be used to determine what is supported at the time of that version format_ver = "0.92" # def __init__(self): self.conn = None self.cur = None self.table_names = [] self.tables = {} self.db_hash = None self.table_hashes = None self.filein = "" self.init = False # fingerprint metadata self.metadata = {} self.metadata['db-name'] = "" self.metadata['app-name'] = "" self.metadata['app-ver'] = "" self.metadata['notes'] = "" self.metadata['scan-date'] = "" # self.db_name = "" # self.app_name = "" # self.app_ver = "" # self.notes = "" # self.scan_date = "" # def scanDBFile(self, filein): """ read the database, populate the data into the class """ try: (self.conn, self.cur) = self.__openDB(filein) except Exception, ex: logging.info(ex) raise try: # extract file name from path+filename self.metadata['db-name'] = os.path.basename(filein) except Exception as ex: logging.error(ex) self.metadata['db-name'] = filein try: # read database schema, parse the schema self.__readDatabase() # concat all the table create statements, then md5 self.__createMD5DB() except Exception as ex: logging.error(ex) raise # create and index of table hashes self.table_hashes = {} for key in self.tables.keys(): self.table_hashes[key] = self.tables[key].hash() # flag is used to determine if the class has data self.init = True self.filein = filein # def writeFingerprint(self): if (not self.init): return try: filename = ToolBox.getTimestampFilename(self.filein) fh = open(filename, "w") try: self.__writeFingerprint(fh) finally: fh.close() except Exception, ex: logging.error(ex) raise FingerprintWrite("Problem writing the fingerprint to a file, file=={}".format(filename)) return filename # def writeFingerprintFile(self, filename): if (not self.init): return try: fh = open(filename, "w") try: self.__writeFingerprint(fh) finally: fh.close() except Exception, ex: logging.error(ex) raise FingerprintWrite("Problem writing the fingerprint to a file, file=={}".format(filename)) # import fingerprint from a json file def importJson(self, file_json): """ import fingerprint from a json file """ self.__importJsonDBSchema(file_json) # # def importJsonIndex(self, file_json): # """ import fingerprint from a json file, return the MD5 sums """ # self.__importJsonDBSchema(file_json) # return (self. # def compareDB(self, filejson): """ return the percentage of the match between two fingerprints """ if (not self.init): return fp = FingerprintDB(); fp.__importJsonDBSchema(filejson) result = self.__DBSchemaCompare(fp) print "[ Percetage == %f]".format(result) return result # def getMD5DB(self): return self.db_hash def getMD5Tables(self): return self.table_hashes # def __importJsonDBSchema(self, file_json): """ import fingerprint from a json file """ self.__init__() tables = {} try: fh = open(file_json, "r") jsonData = json.load(fh) tb = jsonData['tables'] dbmt = jsonData['db-metadata'] dbht = jsonData['db-metadata-hashes'] dbmd5 = jsonData['db-metadata-md5'] metadata = jsonData['_file-metadata'] all_tables = tb.keys() for table_name in all_tables: logging.debug("[[ Table <" + table_name + "> imported ]]") newTable = TableSchema() newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name]) tables[table_name] = newTable self.tables = tables self.db_hash = dbmd5 self.table_hashes = dbht self.metadata = metadata except Exception as ex: logging.error("ERROR: problem loading json file: \n{}\n{}".format(file_json, ex)) # def __DBMD5Compare(self): pass def __DBSchemaCompare(self, fp): # the json database schema definition is what our tools is expecting... # ...so we use it as the baseline # look for table, if exists, compare each # if exists, compare each field # else, add to unknown tables...or do a fuzzy compare (look at number of fields, field names) diff_num = 0 diff_total = 0 all_total = 0 for tableName in fp.tables.keys(): try: logging.info("[[ Comparing Table: " + tableName + " ]]") table = self.tables[tableName] if (table): if not (fp.tables[tableName].hash() == table.hash()): logging.info("*** Hash difference 1:{}!={}".format(fp.tables[tableName].hash(), table.hash())) (total, diff_num) = self.__CompareTable(fp.tables[tableName], table) all_total += total diff_total += diff_num else: all_total += 10 logging.info("Table {} is IDENTICAL (to json fingerprint)".format(tableName)) else: self.__FuzzyTable() except KeyError as ex: # get the number of fields from the other table to add to the diff_total all_total += 10 diff_total += 10 logging.info("Table {} not EXISTS (to json fingerprint)".format(tableName)) logging.info("diff_total=={}, all_total=={}".format(diff_total, all_total)) if (diff_total > 0): if (diff_total == all_total): percentage = 0 else: percentage = float(diff_total / all_total) else: percentage = 100 return percentage # # Compare the Table Definitions. # Compare Table 1 (Json table) to Table 2 # def __CompareTable(self, tb1, tb2): fields_total_count = 0 fields_diff_count = 0 prop_total_count = 0 prop_error_count = 0 totals = 0 diff_total = 0 fields1 = tb1.fields fields2 = tb2.fields for field in fields1.keys(): field1 = fields1[field] fields_total_count += 1 if (fields2.has_key(field)): field2 = fields1[field] for properties in field1.keys(): prop_total_count += 1 if not field2.has_key(properties): prop_error_count += 1 else: fields_diff_count += 1 totals = prop_total_count + fields_total_count diff_total = prop_error_count + fields_diff_count return (totals, diff_total) # look at un-identified tables and try to match fields by their properties def __FuzzyTable(): return # def __openDB(self, filein): conn = sqlite3.connect(filein) cur = conn.cursor() return (conn, cur) # read a sqlite database by parsing the create table strings # sqlmaster = "SELECT name, sql FROM sqlite_master WHERE type='table'" def __readDatabase(self): flag = False rows = self.cur.execute(self.sqlmaster) for row in rows: flag = True newTable = TableSchema() newTable.loadTable(row[0], row[1]) self.table_names.append(newTable.name()) self.tables[newTable.name()] = newTable if (not flag): raise FingerprintReadNoData("No data, possible zero byte file") # def debugFingerprint(self): if self.tables: myDict = self.tables elif self.tablesJson: myDict = self.tablesJson else: return # keys = myDict.keys() for key in keys: print "[[ TABLE: <" + key + "> ]]" tableDef = myDict[key] tableDef.toJSON() # def __writeFingerprint(self, filehandle): ahash = {} thash = {} dmhash = {} shash = {} mhash = {} ahash['_file-metadata'] = mhash ahash['db-metadata'] = dmhash ahash['db-metadata-hashes'] = shash ahash['db-metadata-md5'] = None ahash['tables'] = thash try: timestr = time.strftime('%Y-%m-%d_%H%M%S', time.localtime(time.time())) except: timestr = "" mhash['scan-date'] = timestr mhash['format-ver'] = self.format_ver mhash['scanner-ver'] = self.scanner_ver mhash['scanner-name'] = 'dbfp' mhash['db-name'] = self.metadata['db-name'] mhash['app-name'] = self.metadata['app-name'] mhash['app-ver'] = self.metadata['app-ver'] mhash['notes'] = self.metadata['notes'] # tables tables = self.tables.keys() for table in tables: thash[table] = self.tables[table].fields dmhash[table] = self.tables[table].SQLstr() shash[table] = self.tables[table].sqlStrHash ahash['db-metadata-md5'] = self.db_hash json.dump(ahash, filehandle, sort_keys=True, indent=4) # def __createMD5DB(self): retval = None concat_str = "" try: bitchasskeys = self.tables.keys() bitchasskeys.sort() for key in bitchasskeys: concat_str += self.tables[key].hash() #print "---> {}".format(concat_str) m = hashlib.md5() m.update(concat_str) retval = m.hexdigest() self.db_hash = retval except Exception, ex: logging.error(ex) raise FingerprintMD5("Problem creating a MD5 sum") # def setAppName(self, name): self.metadata['app-name'] = name # def setAppVer(self, version): self.metadata['app-ver'] = version # def setNotes(self, notes): self.metadata['notes'] = notes # def getErrorString(self, errorCode): retval = "ERROR: unknown error code: " + str(errorCode) if (errorCode == -2): retval = "ERROR: problem opening file, or not sqlite database" elif (errorCode == -3): retval = "ERROR: problem reading database" return retval # # # class TableSchema: """ This class represents the definition of database table """ tableschemaregex = r'\((.*)\)' # def __init__(self): self.tableName = "" self.sqlStr = "" self.sqlStrHash = "" self.fields = {} self.primarykeyFlag = False self.uniqueFlag = False # def loadTable(self, tableName, sqlStr): self.tableName = tableName self.sqlStr = sqlStr logging.info("[[ TABLE: <{}> ] processing...]".format(tableName)) # hash the sql create string for quicker fingerprint matching try: m = hashlib.md5() m.update(self.sqlStr) self.sqlStrHash = m.hexdigest() except: logging.warn('WARN: problem hashing sql string: "{}"'.format(self.sqlStr)) # parse the create string into a structured hash table results = re.search(self.tableschemaregex, sqlStr) if results: colstr = results.group(1) columns = colstr.split(',') for col in columns: newField = self.__parseCreateStr(col.strip()) if newField: self.fields[newField['name']] = newField del newField['name'] # def importTable(self, tbName, fields, sqlStr, hashStr): self.tableName = tbName self.sqlStr = sqlStr self.fields = fields self.sqlStrHash = hashStr # Table Definition # # CREATE TABLE contacts (_id INTEGER PRIMARY KEY AUTOINCREMENT,name_raw_contact_id INTEGER REFERENCES raw_contacts(_id), # photo_id INTEGER REFERENCES data(_id),photo_file_id INTEGER REFERENCES photo_files(_id), # custom_ringtone TEXT,send_to_voicemail INTEGER NOT NULL DEFAULT 0, # times_contacted INTEGER NOT NULL DEFAULT 0,last_time_contacted INTEGER, # starred INTEGER NOT NULL DEFAULT 0,pinned INTEGER NOT NULL DEFAULT 2147483647, # has_phone_number INTEGER NOT NULL DEFAULT 0,lookup TEXT, # status_update_id INTEGER REFERENCES data(_id),contact_last_updated_timestamp INTEGER) # # CREATE TABLE sent_files_v2 (uid INTEGER, phone TEXT, sphone TEXT, deleted INTEGER, # PRIMARY KEY (uid, phone) def __parseCreateStr(self, sqltext): try: newField = {} # use for debug purposes # print "sqltext=={}".format(sqltext) # raw_contact_id INTEGER REFERENCES raw_contacts(_id) NOT NULL results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+REFERENCES\s+(.*)\s+NOT.NULL', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['references'] = True newField['referencesdata'] = results.group(3) newField['notnull'] = True return newField # photo_id INTEGER REFERENCES data(_id) # name_raw_contact_id INTEGER REFERENCES raw_contacts(_id) results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+REFERENCES\s+(.*)', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['references'] = True newField['referencesdata'] = results.group(3) return newField # pinned INTEGER NOT NULL DEFAULT 2147483647 # send_to_voicemail INTEGER NOT NULL DEFAULT 0 results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+NOT\s+NULL\s+DEFAULT\s+(\w+)', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['notnull'] = True newField['default'] = results.group(3) return newField # pinned INTEGER DEFAULT 2147483647 # send_to_voicemail INTEGER DEFAULT 0 results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+DEFAULT\s+(\w+)', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['default'] = results.group(3) return newField # _id INTEGER PRIMARY KEY AUTOINCREMENT results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+PRIMARY\s+KEY\s+AUTOINCREMENT', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['primarykey'] = True newField['autoincrement'] = True return newField # _id INTEGER PRIMARY KEY results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+PRIMARY\s+KEY', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['primarykey'] = True return newField # FileID INTEGER NOT NULL results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+NOT\s+NULL', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['notnull'] = True return newField # PRIMARY KEY (field_name, results = re.match(r'PRIMARY.KEY\s*\((?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\,?', sqltext, re.IGNORECASE) if results: field = self.fields[results.group(1)] field['primarykey'] = True self.primarykeyFlag = True return False # UNIQUE (field_name, results = re.match(r'UNIQUE\s*\((?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\,?', sqltext, re.IGNORECASE) if results: field = self.fields[results.group(1)] field['unique'] = True self.uniqueFlag = True; return False # custom_ringtone TEXT results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) return newField # field_name) results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\)', sqltext, re.IGNORECASE) if results: field = self.fields[results.group(1)] if (self.primarykeyFlag): if (field): field['primarykey'] = True self.primarykeyFlag = False elif (self.uniqueFlag): if (field): field['unique'] = True self.uniqueFlag = False return False # field_name results = re.match(r'^(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*$', sqltext, re.IGNORECASE) if results: if (self.primarykeyFlag): field = self.fields[results.group(1)] field['primarykey'] = True elif (self.uniqueFlag): field = self.fields[results.group(1)] field['unique'] = True else: newField['name'] = results.group(1) newField['datatype'] = "INTEGER" return newField return False logging.warn('WARN: field definition not recognized: "{}"'.format(sqltext)) except Exception, e: logging.warn('WARN: problem parsing sql create text: "{}"'.format(sqltext)) logging.warn('Exception: \n{}'.format(e)) return None return None # def fields(self): return self.fields # def toJSON(self): print json.dumps(self.fields) # def toFile(self, filehandle): json.dump(self.fields, filehandle, sort_keys=True, indent=4) # def __str__(self): global delimeter retstr = "" retstr = json.dumps(self.fields) return retstr # def name(self): return self.tableName # def setSQLstr(self, str): return self.sqlStr # def SQLstr(self): return self.sqlStr # def hash(self): return self.sqlStrHash