#
#
#
import re
import json
import sqlite3
import hashlib
import time
import logging
from libs import toolbox
from libs.exceptions import FingerprintWrite

delimeter = "|"

#
# Database Schema
# The SQLite database schema is stored in page 1 of the database (root page).
# The sqlite_master table contains one row for each table, index, view, and trigger 
#  (collectively "objects") in the database schema.
# CREATE TABLE sqlite_master(
#   type text,
#   name text,
#   tbl_name text,
#   rootpage integer,
#   sql text
# );
#
class FingerprintDB:
  """
  This class represents a complete database schema
  Helper functions:
     Writing of the database schema as a "fingerprint"
     Comparing of a database schema (fingerprint loaded from file)
  """

  sqlmaster = "SELECT name, sql FROM sqlite_master WHERE type='table'"
  scanner_ver = "0.90"
  format_ver  = "0.90"

  #
  def __init__(self):
    self.conn = None
    self.cur = None
    self.tableNames = []
    self.tables = {}
    self.tablesJson = {}
    self.dbName = ""
    self.app_name = ""
    self.app_ver = ""
    self.notes = ""
    self.filein = ""
    self.scanned = False
    # self.jsonData = None
    return

  #
  def scanDBFile(self, filein):
    """ read the database, populate the data into the class """
    try:
      (self.conn, self.cur) = self.__openDB(filein)
    except Exception, ex:
      print ex
      return -2

    try:
      # extract file name from path+filename
      self.dbName = toolbox.ToolBox.parseFilenameIncExt(filein)
    except:
      self.dbName = filein
    try:
      # read database schema, parse the schema
      self.__readDatabase()
    except Exception, ex:
      print ex
      return -3

    # flag is used to determine if the class has data
    self.scanned = True
    self.filein = filein
    return 1

  #
  def writeFingerprint(self):
    if (not self.scanned):
      return

    try:
      filename = toolbox.ToolBox.getTimestampFilename(self.filein)
      fh = open(filename, "w")
      try:
        self.__writeFingerprint(fh)
      finally:
        fh.close()
    except Exception, ex:
      logging.error(ex)
      raise FingerprintWrite("Problem writing the fingerprint to a file, file=={}".format(filename))

  #
  def writeFingerprintFile(self, filename):
    if (not self.scanned):
      return

    try:
      fh = open(filename, "w")
      try:
        self.__writeFingerprint(fh)
      finally:
        fh.close()
    except Exception, ex:
      logging.error(ex)
      raise FingerprintWrite("Problem writing the fingerprint to a file, file=={}".format(filename))


  # import fingerprint from a json file
  def importJson(self, filejson):
    """ import fingerprint from a json file """
    (self.tablesJson, dummy) = self.__importJsonDBSchema(filejson)

  #
  def importJsonIndex(self):
    """ import fingerprint from a json file, return the MD5 sums """
    (dummy, dbht) = self.__importJsonDBSchema(filejson)
    return dbht

  #
  def compareDB(self, filejson):
    if (not self.scanned):
      return
    """ return the percentage of the match between two fingerprints """
    self.tablesJson = self.__importJsonDBSchema(filejson)
    result = self.__DBSchemaCompare()
    print "[ Percetage == {}]".format(result)
    return result

  #
  def __importJsonDBSchema(self, file_json):
    """ import fingerprint from a json file """
    tables = {}
    try:
      fh = open(file_json, "r")
      jsonData = json.load(fh)
      tb = jsonData['tables']
      dbmt = jsonData['db-metadata']
      dbht = jsonData['db-metadata-hashes']
      all_tables = tb.keys()
      for table_name in all_tables:
        print "[[ Table <" + table_name + "> imported ]]"
        newTable = TableSchema()
        newTable.importTable(table_name, tb[table_name], dbmt[table_name], dbht[table_name])
        tables[table_name] = newTable

    except Exception, e:
      print "ERROR: problem loading json file: " + file_json
      print e

    return (tables, dbht)


  def __DBMD5Compare(self):
    pass

  def __DBSchemaCompare(self):
    # the json database schema definition is what our tools is expecting...
    # ...so we use it as the baseline
    # look for table, if exists, compare each 
    # if exists, compare each field
    # else, add to unknown tables...or do a fuzzy compare (look at number of fields, field names)
    diff_num   = 0
    diff_total = 0
    all_total  = 0
    for tableName in self.tablesJson.keys():
      table = self.tables[tableName]
      print "[[ Comparing Table: " + tableName + " ]]"
      if (table):
        if not (self.tablesJson[tableName].hash() == table.hash()):
          logging.info("*** Hash difference 1:{}!={}".format(self.tablesJson[tableName].hash(), table.hash()))
          (total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table)
          all_total += total
          diff_total += diff_num
      else:
        self.__FuzzyTable()
    percentage = 100
    if (diff_total > 0):
      percentage = percentage - float(diff_total) / all_total

    return percentage

  #
  # Compare the Table Definitions. 
  # Compare Table 1 (Json table) to Table 2
  # 
  def __CompareTable(self, tb1, tb2):
    fields_total_count = 0
    fields_diff_count  = 0
    prop_total_count   = 0
    prop_error_count   = 0
    totals     = 0
    diff_total = 0

    fields1 = tb1.fields
    fields2 = tb2.fields
    for field in fields1.keys():
      field1 = fields1[field]
      fields_total_count += 1
      if (fields2.has_key(field)):
        field2 = fields1[field]
        for properties in field1.keys():
          prop_total_count += 1
          if not field2.has_key(properties):
            prop_error_count += 1
      else:
        fields_diff_count += 1

    if (prop_error_count == 0 and fields_diff_count == 0):
      print "100% compatible"
    else:
      totals = prop_total_count + fields_total_count
      diff_total = prop_error_count + fields_diff_count
      print "Table difference found: " + str(diff_total)
      #print str((diff_total/totals) * 100) + '% compatible   total == ' + str(totals) + "   diff_total == " + str(diff_total)
    return (totals, diff_total)

  # look at un-identified tables and try to match fields by their properties
  def __FuzzyTable():
    return

  #
  def __openDB(self, filein):
    conn = sqlite3.connect(filein)
    cur = conn.cursor()
    return (conn, cur)

  # read a sqlite database by parsing the create table strings
  # sqlmaster = "SELECT name, sql FROM sqlite_master WHERE type='table'"
  def __readDatabase(self):
    for row in self.cur.execute(self.sqlmaster):
      newTable = TableSchema()
      newTable.loadTable(row[0], row[1])
      self.tableNames.append(newTable.name())
      self.tables[newTable.name()] = newTable
    return

  #
  def debugFingerprint(self):
    if self.tables:
      myDict = self.tables
    elif self.tablesJson:
      myDict = self.tablesJson
    else:
      return
    #
    keys = myDict.keys()
    for key in keys:
      print "[[ TABLE: <" + key + "> ]]"
      tableDef = myDict[key]
      tableDef.toJSON()

  #
  def __writeFingerprint(self, filehandle):
    ahash  = {}
    thash  = {}
    dmhash = {}
    shash  = {}
    mhash  = {}
    ahash['_file-metadata']     = mhash
    ahash['db-metadata']        = dmhash
    ahash['db-metadata-hashes'] = shash
    ahash['db-metadata-md5']    = None
    ahash['tables']             = thash

    try:
      timestr = time.strftime('%Y-%m-%d_%H%M%S', time.localtime(time.time()))
    except:
      timestr = ""

    mhash['scan-date']    = timestr
    mhash['format-ver']   = self.format_ver
    mhash['scanner-ver']  = self.scanner_ver
    mhash['scanner-name'] = 'dbfp'
    mhash['dn-name']      = self.dbName
    mhash['app-name']     = self.app_name
    mhash['app-ver']      = self.app_ver
    mhash['notes']        = self.notes

    # tables
    tables = self.tables.keys()
    for table in tables:
      thash[table]  = self.tables[table].fields
      dmhash[table] = self.tables[table].SQLstr()
      shash[table]  = self.tables[table].sqlStrHash

    ahash['db-metadata-md5'] = self.__createMD5Index(shash)

    json.dump(ahash, filehandle, sort_keys=True, indent=4)

  #
  def __createMD5Index(self, dbht):
    retval = "hieeee!!!"
    concat_str = ""
    try:
      bitchasskeys = dbht.keys()
      bitchasskeys.sort()
      for key in bitchasskeys:
        concat_str += dbht[key]
      m = hashlib.md5()
      m.update(concat_str)
      retval = m.hexdigest()      
    except Exception, ex:
      logging.error(ex)
      raise FingerprintMD5("Problem creating a MD5 sum")
    return retval

  #
  def setAppName(self, name):
    self.app_name = name

  #
  def setAppVer(self, version):
    self.app_ver = version

  #
  def setNotes(self, notes):
    self.notes = notes

  #
  def getErrorString(self, errorCode):
    retval = "ERROR: unknown error code: " + str(errorCode)
    if (errorCode == -2):
      retval = "ERROR: problem opening file, or not sqlite database"
    elif (errorCode == -3):
      retval = "ERROR: problem reading database"
    return retval
#
#
#
class TableSchema:
  """
  This class represents the definition of database table
  """

  tableschemaregex = r'\((.*)\)'

  #
  def __init__(self):
    self.tableName  = ""
    self.sqlStr     = ""
    self.sqlStrHash = ""
    self.fields     = {}
    self.primarykeyFlag = False
    self.uniqueFlag     = False

  #
  def loadTable(self, tableName, sqlStr):
    self.tableName = tableName
    self.sqlStr = sqlStr

    print "[[ TABLE: <{}> ] processing...]".format(tableName)
    # hash the sql create string for quicker fingerprint matching
    try:
      m = hashlib.md5()
      m.update(self.sqlStr)
      self.sqlStrHash = m.hexdigest()
    except:
      print 'WARN: problem hashing sql string: "{}"'.format(self.sqlStr)
    # parse the create string into a structured hash table
    results = re.search(self.tableschemaregex, sqlStr)
    if results:
      colstr = results.group(1)
      columns = colstr.split(',')
      for col in columns:
        newField = self.__parseCreateStr(col.strip())
        if newField:
          self.fields[newField['name']] = newField
          del newField['name']

  #
  def importTable(self, tbName, fields, sqlStr, hashStr):
    self.tableName = tbName
    self.sqlStr = sqlStr
    self.fields = fields
    self.sqlStrHash = hashStr


  # Table Definition
  #
  #  CREATE TABLE contacts (_id INTEGER PRIMARY KEY AUTOINCREMENT,name_raw_contact_id INTEGER REFERENCES raw_contacts(_id),
  #      photo_id INTEGER REFERENCES data(_id),photo_file_id INTEGER REFERENCES photo_files(_id),
  #      custom_ringtone TEXT,send_to_voicemail INTEGER NOT NULL DEFAULT 0,
  #      times_contacted INTEGER NOT NULL DEFAULT 0,last_time_contacted INTEGER,
  #      starred INTEGER NOT NULL DEFAULT 0,pinned INTEGER NOT NULL DEFAULT 2147483647,
  #      has_phone_number INTEGER NOT NULL DEFAULT 0,lookup TEXT,
  #      status_update_id INTEGER REFERENCES data(_id),contact_last_updated_timestamp INTEGER)
  #
  #  CREATE TABLE sent_files_v2 (uid INTEGER, phone TEXT, sphone TEXT, deleted INTEGER, 
  #                              PRIMARY KEY (uid, phone)
  def __parseCreateStr(self, sqltext):
    try:
      newField = {}

      # use for debug purposes
      # print "sqltext=={}".format(sqltext)
      #                     raw_contact_id INTEGER REFERENCES raw_contacts(_id) NOT NULL
      results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+REFERENCES\s+(.*)\s+NOT.NULL', sqltext, re.IGNORECASE)
      if results:
        newField['name'] = results.group(1)
        newField['datatype'] = results.group(2)
        newField['references'] = True
        newField['referencesdata'] = results.group(3)
        newField['notnull'] = True
        return newField
      #                     photo_id INTEGER REFERENCES data(_id)
      #                     name_raw_contact_id INTEGER REFERENCES raw_contacts(_id)
      results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+REFERENCES\s+(.*)', sqltext, re.IGNORECASE)
      if results:
        newField['name'] = results.group(1)
        newField['datatype'] = results.group(2)
        newField['references'] = True
        newField['referencesdata'] = results.group(3)
        return newField
      #                     pinned INTEGER NOT NULL DEFAULT 2147483647
      #                     send_to_voicemail INTEGER NOT NULL DEFAULT 0
      results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+NOT\s+NULL\s+DEFAULT\s+(\w+)', sqltext, re.IGNORECASE)
      if results:
        newField['name'] = results.group(1)
        newField['datatype'] = results.group(2)
        newField['notnull'] = True
        newField['default'] = results.group(3)
        return newField
      #                     pinned INTEGER DEFAULT 2147483647
      #                     send_to_voicemail INTEGER DEFAULT 0
      results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+DEFAULT\s+(\w+)', sqltext, re.IGNORECASE)
      if results:
        newField['name'] = results.group(1)
        newField['datatype'] = results.group(2)
        newField['default'] = results.group(3)
        return newField
      #                     _id INTEGER PRIMARY KEY AUTOINCREMENT
      results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+PRIMARY\s+KEY\s+AUTOINCREMENT', sqltext, re.IGNORECASE)
      if results:
        newField['name'] = results.group(1)
        newField['datatype'] = results.group(2)
        newField['primarykey'] = True
        newField['autoincrement'] = True
        return newField
      #                     _id INTEGER PRIMARY KEY
      results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+PRIMARY\s+KEY', sqltext, re.IGNORECASE)
      if results:
        newField['name'] = results.group(1)
        newField['datatype'] = results.group(2)
        newField['primarykey'] = True
        return newField
      #                     FileID INTEGER NOT NULL
      results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)\s+NOT\s+NULL', sqltext, re.IGNORECASE)
      if results:
        newField['name'] = results.group(1)
        newField['datatype'] = results.group(2)
        newField['notnull'] = True
        return newField
      #                     PRIMARY KEY (field_name, 
      results = re.match(r'PRIMARY.KEY\s*\((?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\,?', sqltext, re.IGNORECASE)
      if results:
        field = self.fields[results.group(1)]
        field['primarykey'] = True
        self.primarykeyFlag = True
        return False
      #                     UNIQUE (field_name, 
      results = re.match(r'UNIQUE\s*\((?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\,?', sqltext, re.IGNORECASE)
      if results:
        field = self.fields[results.group(1)]
        field['unique'] = True
        self.uniqueFlag = True;
        return False
      #                     custom_ringtone TEXT
      results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\s+(\w+)', sqltext, re.IGNORECASE)
      if results:
        newField['name'] = results.group(1)
        newField['datatype'] = results.group(2)
        return newField
      #                     field_name)
      results = re.match(r'(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*\)', sqltext, re.IGNORECASE)
      if results:
        field = self.fields[results.group(1)]
        if (self.primarykeyFlag):
          if (field):
            field['primarykey'] = True
          self.primarykeyFlag = False
        elif (self.uniqueFlag):
          if (field):
            field['unique'] = True
          self.uniqueFlag = False
        return False
      #                     field_name
      results = re.match(r'^(?:[`|\"|\'])*(\w+)(?:[`|\"|\'])*$', sqltext, re.IGNORECASE)
      if results:
        if (self.primarykeyFlag):
          field = self.fields[results.group(1)]
          field['primarykey'] = True
        elif (self.uniqueFlag):
          field = self.fields[results.group(1)]
          field['unique'] = True
        else:
          newField['name'] = results.group(1)
          newField['datatype'] = "INTEGER"
          return newField
        return False

      print 'WARN: field definition not recognized: "{}"'.format(sqltext)
    except Exception, e:
      print 'WARN: problem parsing sql create text: "{}"'.format(sqltext)
      print 'Exception: \n{}'.format(e)
      return None

    return None

  #
  def fields(self):
    return self.fields

  #
  def toJSON(self):
    print json.dumps(self.fields)

  #
  def toFile(self, filehandle):
    json.dump(self.fields, filehandle, sort_keys=True, indent=4)

  #
  def __str__(self):
    global delimeter
    retstr = ""
    retstr = json.dumps(self.fields)

    return retstr    

  #
  def name(self):
    return self.tableName

  #
  def setSQLstr(self, str):
    return self.sqlStr

  #
  def SQLstr(self):
    return self.sqlStr

  #
  def hash(self):
    return self.sqlStrHash