# # # import re import json import sqlite3 import hashlib import time import logging from libs import toolbox from libs.exceptions import FingerprintWrite delimeter = "|" # # Database Schema # The SQLite database schema is stored in page 1 of the database (root page). # The sqlite_master table contains one row for each table, index, view, and trigger # (collectively "objects") in the database schema. # CREATE TABLE sqlite_master( # type text, # name text, # tbl_name text, # rootpage integer, # sql text # ); # class FingerprintDB: """ This class represents a complete database schema Helper functions: Writing of the database schema as a "fingerprint" Comparing of a database schema (fingerprint loaded from file) """ sqlmaster = "SELECT name, sql FROM sqlite_master WHERE type='table'" scanner_ver = "0.90" format_ver = "0.90" # def __init__(self): self.conn = None self.cur = None self.tableNames = [] self.tables = {} self.tablesJson = {} self.dbName = "" self.app_name = "" self.app_ver = "" self.notes = "" self.filein = "" self.scanned = False # self.jsonData = None return # def scanDBFile(self, filein): """ read the database, populate the data into the class """ try: (self.conn, self.cur) = self.__openDB(filein) except Exception, ex: print ex return -2 try: # extract file name from path+filename self.dbName = toolbox.ToolBox.parseFilenameIncExt(filein) except: self.dbName = filein try: # read database schema, parse the schema self.__readDatabase() except Exception, ex: print ex return -3 # flag is used to determine if the class has data self.scanned = True self.filein = filein return 1 # def writeFingerprint(self): if (not self.scanned): return try: filename = toolbox.ToolBox.getTimestampFilename(self.filein) fh = open(filename, "w") try: self.__writeFingerprint(fh) finally: fh.close() except Exception, ex: logging.error(ex) raise FingerprintWrite("Problem writing the fingerprint to a file, file=={}".format(filename)) # def writeFingerprintFile(self, filename): if (not self.scanned): return try: fh = open(filename, "w") try: self.__writeFingerprint(fh) finally: fh.close() except Exception, ex: logging.error(ex) raise FingerprintWrite("Problem writing the fingerprint to a file, file=={}".format(filename)) # import fingerprint from a json file def importJson(self, filejson): """ import fingerprint from a json file """ (self.tablesJson, dummy) = self.__importJsonDBSchema(filejson) # def importJsonIndex(self): """ import fingerprint from a json file, return the MD5 sums """ (dummy, dbht) = self.__importJsonDBSchema(filejson) return dbht # def compareDB(self, filejson): if (not self.scanned): return """ return the percentage of the match between two fingerprints """ self.tablesJson = self.__importJsonDBSchema(filejson) result = self.__DBSchemaCompare() print "[ Percetage == {}]".format(result) return result # def __importJsonDBSchema(self, file_json): """ import fingerprint from a json file """ tables = {} try: fh = open(file_json, "r") jsonData = json.load(fh) tb = jsonData['tables'] dbmt = jsonData['db-metadata'] dbht = jsonData['db-metadata-hashes'] all_tables = tb.keys() for table_name in all_tables: print "[[ Table <" + table_name + "> imported ]]" newTable = TableSchema() newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name]) tables[table_name] = newTable except Exception, e: print "ERROR: problem loading json file: " + file_json print e return (tables, dbht) def __DBMD5Compare(self): pass def __DBSchemaCompare(self): # the json database schema definition is what our tools is expecting... # ...so we use it as the baseline # look for table, if exists, compare each # if exists, compare each field # else, add to unknown tables...or do a fuzzy compare (look at number of fields, field names) diff_num = 0 diff_total = 0 all_total = 0 for tableName in self.tablesJson.keys(): table = self.tables[tableName] print "[[ Comparing Table: " + tableName + " ]]" if (table): if not (self.tablesJson[tableName].hash() == table.hash()): logging.info("*** Hash difference 1:{}!={}".format(self.tablesJson[tableName].hash(), table.hash())) (total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table) all_total += total diff_total += diff_num else: self.__FuzzyTable() percentage = 100 if (diff_total > 0): percentage = percentage - float(diff_total) / all_total return percentage # # Compare the Table Definitions. # Compare Table 1 (Json table) to Table 2 # def __CompareTable(self, tb1, tb2): fields_total_count = 0 fields_diff_count = 0 prop_total_count = 0 prop_error_count = 0 totals = 0 diff_total = 0 fields1 = tb1.fields fields2 = tb2.fields for field in fields1.keys(): field1 = fields1[field] fields_total_count += 1 if (fields2.has_key(field)): field2 = fields1[field] for properties in field1.keys(): prop_total_count += 1 if not field2.has_key(properties): prop_error_count += 1 else: fields_diff_count += 1 if (prop_error_count == 0 and fields_diff_count == 0): print "100% compatible" else: totals = prop_total_count + fields_total_count diff_total = prop_error_count + fields_diff_count print "Table difference found: " + str(diff_total) #print str((diff_total/totals) * 100) + '% compatible total == ' + str(totals) + " diff_total == " + str(diff_total) return (totals, diff_total) # look at un-identified tables and try to match fields by their properties def __FuzzyTable(): return # def __openDB(self, filein): conn = sqlite3.connect(filein) cur = conn.cursor() return (conn, cur) # read a sqlite database by parsing the create table strings # sqlmaster = "SELECT name, sql FROM sqlite_master WHERE type='table'" def __readDatabase(self): for row in self.cur.execute(self.sqlmaster): newTable = TableSchema() newTable.loadTable(row[0], row[1]) self.tableNames.append(newTable.name()) self.tables[newTable.name()] = newTable return # def debugFingerprint(self): if self.tables: myDict = self.tables elif self.tablesJson: myDict = self.tablesJson else: return # keys = myDict.keys() for key in keys: print "[[ TABLE: <" + key + "> ]]" tableDef = myDict[key] tableDef.toJSON() # def __writeFingerprint(self, filehandle): ahash = {} thash = {} dmhash = {} shash = {} mhash = {} ahash['_file-metadata'] = mhash ahash['db-metadata'] = dmhash ahash['db-metadata-hashes'] = shash ahash['db-metadata-md5'] = None ahash['tables'] = thash try: timestr = time.strftime('%Y-%m-%d_%H%M%S', time.localtime(time.time())) except: timestr = "" mhash['scan-date'] = timestr mhash['format-ver'] = self.format_ver mhash['scanner-ver'] = self.scanner_ver mhash['scanner-name'] = 'dbfp' mhash['dn-name'] = self.dbName mhash['app-name'] = self.app_name mhash['app-ver'] = self.app_ver mhash['notes'] = self.notes # tables tables = self.tables.keys() for table in tables: thash[table] = self.tables[table].fields dmhash[table] = self.tables[table].SQLstr() shash[table] = self.tables[table].sqlStrHash ahash['db-metadata-md5'] = self.__createMD5Index(shash) json.dump(ahash, filehandle, sort_keys=True, indent=4) # def __createMD5Index(self, dbht): retval = "hieeee!!!" concat_str = "" try: bitchasskeys = dbht.keys() bitchasskeys.sort() for key in bitchasskeys: concat_str += dbht[key] m = hashlib.md5() m.update(concat_str) retval = m.hexdigest() except Exception, ex: logging.error(ex) raise FingerprintMD5("Problem creating a MD5 sum") return retval # def setAppName(self, name): self.app_name = name # def setAppVer(self, version): self.app_ver = version # def setNotes(self, notes): self.notes = notes # def getErrorString(self, errorCode): retval = "ERROR: unknown error code: " + str(errorCode) if (errorCode == -2): retval = "ERROR: problem opening file, or not sqlite database" elif (errorCode == -3): retval = "ERROR: problem reading database" return retval # # # class TableSchema: """ This class represents the definition of database table """ tableschemaregex = r'\((.*)\)' # def __init__(self): self.tableName = "" self.sqlStr = "" self.sqlStrHash = "" self.fields = {} self.primarykeyFlag = False self.uniqueFlag = False # def loadTable(self, tableName, sqlStr): self.tableName = tableName self.sqlStr = sqlStr print "[[ TABLE: <{}> ] processing...]".format(tableName) # hash the sql create string for quicker fingerprint matching try: m = hashlib.md5() m.update(self.sqlStr) self.sqlStrHash = m.hexdigest() except: print 'WARN: problem hashing sql string: "{}"'.format(self.sqlStr) # parse the create string into a structured hash table results = re.search(self.tableschemaregex, sqlStr) if results: colstr = results.group(1) columns = colstr.split(',') for col in columns: newField = self.__parseCreateStr(col.strip()) if newField: self.fields[newField['name']] = newField del newField['name'] # def importTable(self, tbName, fields, sqlStr, hashStr): self.tableName = tbName self.sqlStr = sqlStr self.fields = fields self.sqlStrHash = hashStr # Table Definition # # CREATE TABLE contacts (_id INTEGER PRIMARY KEY AUTOINCREMENT,name_raw_contact_id INTEGER REFERENCES raw_contacts(_id), # photo_id INTEGER REFERENCES data(_id),photo_file_id INTEGER REFERENCES photo_files(_id), # custom_ringtone TEXT,send_to_voicemail INTEGER NOT NULL DEFAULT 0, # times_contacted INTEGER NOT NULL DEFAULT 0,last_time_contacted INTEGER, # starred INTEGER NOT NULL DEFAULT 0,pinned INTEGER NOT NULL DEFAULT 2147483647, # has_phone_number INTEGER NOT NULL DEFAULT 0,lookup TEXT, # status_update_id INTEGER REFERENCES data(_id),contact_last_updated_timestamp INTEGER) # # CREATE TABLE sent_files_v2 (uid INTEGER, phone TEXT, sphone TEXT, deleted INTEGER, # PRIMARY KEY (uid, phone) def __parseCreateStr(self, sqltext): try: newField = {} # use for debug purposes # print "sqltext=={}".format(sqltext) # raw_contact_id INTEGER REFERENCES raw_contacts(_id) NOT NULL results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+REFERENCES\s+(.*)\s+NOT.NULL', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['references'] = True newField['referencesdata'] = results.group(3) newField['notnull'] = True return newField # photo_id INTEGER REFERENCES data(_id) # name_raw_contact_id INTEGER REFERENCES raw_contacts(_id) results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+REFERENCES\s+(.*)', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['references'] = True newField['referencesdata'] = results.group(3) return newField # pinned INTEGER NOT NULL DEFAULT 2147483647 # send_to_voicemail INTEGER NOT NULL DEFAULT 0 results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+NOT\s+NULL\s+DEFAULT\s+(\w+)', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['notnull'] = True newField['default'] = results.group(3) return newField # pinned INTEGER DEFAULT 2147483647 # send_to_voicemail INTEGER DEFAULT 0 results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+DEFAULT\s+(\w+)', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['default'] = results.group(3) return newField # _id INTEGER PRIMARY KEY AUTOINCREMENT results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+PRIMARY\s+KEY\s+AUTOINCREMENT', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['primarykey'] = True newField['autoincrement'] = True return newField # _id INTEGER PRIMARY KEY results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+PRIMARY\s+KEY', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['primarykey'] = True return newField # FileID INTEGER NOT NULL results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+NOT\s+NULL', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) newField['notnull'] = True return newField # PRIMARY KEY (field_name, results = re.match(r'PRIMARY.KEY\s*\((?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\,?', sqltext, re.IGNORECASE) if results: field = self.fields[results.group(1)] field['primarykey'] = True self.primarykeyFlag = True return False # UNIQUE (field_name, results = re.match(r'UNIQUE\s*\((?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\,?', sqltext, re.IGNORECASE) if results: field = self.fields[results.group(1)] field['unique'] = True self.uniqueFlag = True; return False # custom_ringtone TEXT results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)', sqltext, re.IGNORECASE) if results: newField['name'] = results.group(1) newField['datatype'] = results.group(2) return newField # field_name) results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\)', sqltext, re.IGNORECASE) if results: field = self.fields[results.group(1)] if (self.primarykeyFlag): if (field): field['primarykey'] = True self.primarykeyFlag = False elif (self.uniqueFlag): if (field): field['unique'] = True self.uniqueFlag = False return False # field_name results = re.match(r'^(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*$', sqltext, re.IGNORECASE) if results: if (self.primarykeyFlag): field = self.fields[results.group(1)] field['primarykey'] = True elif (self.uniqueFlag): field = self.fields[results.group(1)] field['unique'] = True else: newField['name'] = results.group(1) newField['datatype'] = "INTEGER" return newField return False print 'WARN: field definition not recognized: "{}"'.format(sqltext) except Exception, e: print 'WARN: problem parsing sql create text: "{}"'.format(sqltext) print 'Exception: \n{}'.format(e) return None return None # def fields(self): return self.fields # def toJSON(self): print json.dumps(self.fields) # def toFile(self, filehandle): json.dump(self.fields, filehandle, sort_keys=True, indent=4) # def __str__(self): global delimeter retstr = "" retstr = json.dumps(self.fields) return retstr # def name(self): return self.tableName # def setSQLstr(self, str): return self.sqlStr # def SQLstr(self): return self.sqlStr # def hash(self): return self.sqlStrHash