MOD: improved the table schema comparison, difference # count added, cleaned up old commented code

2015-09-03 01:31:39 -07:00 · 2015-09-03 01:31:39 -07:00 · 82bf211e4c
parent 7fa40e6c9c
commit 82bf211e4c
2 changed files with 42 additions and 53 deletions
--- a/dbfp.py
+++ b/dbfp.py
@ -10,29 +10,6 @@ from libs import toolbox
 def main():
  parseArgs()

-  # db = fingerprint.DBSchema()
-
-  # if (filein and filejson):
-  #   db.scanDBFile(filein)
-  #   db.compareDB(filejson)
-  # elif (filein):
-  #   retVal = db.scanDBFile(filein)
-  #   if (retVal > 0):
-  #     fh = open(fileout, "w")
-  #     if verbose:
-  #       db.debugFingerprint()
-  #     if title:
-  #       db.setTitle(title)
-  #     if notes:
-  #       db.setNotes(notes)
-  #     db.writeFingerprint(fh)
-  #     fh.close()
-  #   else:
-  #     print db.getErrorString(retVal)
-  # elif (filejson):
-  #   db.importJson(filejson)
-  #   db.debugFingerprint()
-
 #
 def compareFingerprintDir(filein, filejson, fpdir):
  db = fingerprint.DBSchema()
@ -71,8 +48,8 @@ def parseArgs():
  timestr = time.strftime('%Y-%m-%d_%H%M%S', time.localtime(time.time()))
  parser = argparse.ArgumentParser(description='Fingerprint a sqlite database based on its schema')
  parser.add_argument('-f', '--file', required=False)
-  parser.add_argument('-fd', '--fpdir', required=False)
-  parser.add_argument('-j', '--json', required=False)
+  parser.add_argument('-fd', '--fpdir', required=False, help="path to dirctory of fingerprint files")
+  parser.add_argument('-fp', '--fp', required=False, help="fingerprint file to use in comparison")
  parser.add_argument('-an', '--app_name', required=False)
  parser.add_argument('-av', '--app_version', required=False)
  parser.add_argument('-n', '--notes', required=False)
@ -80,25 +57,22 @@ def parseArgs():
  # parser.add_argument('-t', '--title', required=False)
  args = parser.parse_args()

-  if (args.file is None) and (args.json is None):
+  if (args.file is None) and (args.fp is None):
    parser.print_help()
    exit()


  # compare a sqlite database file to all fingerprints
-  if (args.json and args.fpdir):
-    compareFingerprints(args.file, args.json, args.fpdir)
+  if (args.fp and args.fpdir):
+    compareFingerprints(args.file, args.fp, args.fpdir)
  # compare a sqlite database file to a fingerprint
-  if (args.json):
-    compareFingerprint(args.file, args.json)
+  if (args.fp):
+    compareFingerprint(args.file, args.fp)
  # create a fingerprint from the sqlite file
  filename = toolbox.ToolBox.parseFilename(args.file)
  fileout = filename + "_" + timestr + '.json'
  createFingerprint(args.file, fileout, args.verbose, args.app_name, args.app_version, args.notes)

-  # return (args.file, fileout, args.json, args.verbose, args.title, args.notes,
-  #         args.fpdir)
-

 if __name__ == "__main__":
  main()
--- a/libs/fingerprint.py
+++ b/libs/fingerprint.py
@ -79,13 +79,14 @@ class DBSchema:
  def compareDB(self, filejson):
    self.tablesJson = self.__importJsonDBSchema(filejson)
    result = self.__DBSchemaCompare()
+    print "[ Percetage == {}]".format(result)
    return result

  # import fingerprint from a json file
-  def __importJsonDBSchema(self, filejson):
+  def __importJsonDBSchema(self, file_json):
    tables = {}
    try:
-      fh = open(filein, "r")
+      fh = open(file_json, "r")
      jsonData = json.load(fh)
      dbmt = jsonData['db-metadata']
      tb = jsonData['tables']
@ -97,7 +98,7 @@ class DBSchema:
        tables[table_name] = newTable

    except Exception, e:
-      print "ERROR: problem loading json file: " + filein
+      print "ERROR: problem loading json file: " + file_json
      print e

    return tables
@ -109,48 +110,58 @@ class DBSchema:
    # look for table, if exists, compare each 
    # if exists, compare each field
    # else, add to unknown tables...or do a fuzzy compare (look at number of fields, field names)
+    diff_num   = 0
+    diff_total = 0
+    all_total  = 0
    for tableName in self.tablesJson.keys():
      table = self.tables[tableName]
      print "[[ Comparing Table: " + tableName + " ]]"
      if (table):
-        self.__CompareTable(self.tablesJson[tableName], table)
+        if not (self.tablesJson[tableName].hash() == table.hash()):
+          (total, diff_num) = self.__CompareTable(self.tablesJson[tableName], table)
+          all_total += total
+          diff_total += diff_num
      else:
        self.__FuzzyTable()
-
-    return
+    percentage = 0
+    if (diff_total > 0):
+      percentage = float(diff_total) / all_total
+    return percentage

  #
  # Compare the Table Definitions. 
  # Compare Table 1 (Json table) to Table 2
  # 
  def __CompareTable(self, tb1, tb2):
-    fieldsTotalCount = 0
-    fieldsErrorCount = 0
-    propTotalCount = 0
-    propErrorCount = 0
+    fields_total_count = 0
+    fields_diff_count  = 0
+    prop_total_count   = 0
+    prop_error_count   = 0
+    totals     = 0
+    diff_total = 0

    fields1 = tb1.fields
    fields2 = tb2.fields
    for field in fields1.keys():
      field1 = fields1[field]
-      fieldsTotalCount += 1
+      fields_total_count += 1
      if (fields2.has_key(field)):
        field2 = fields1[field]
        for properties in field1.keys():
-          propTotalCount += 1
+          prop_total_count += 1
          if not field2.has_key(properties):
-            propErrorCount += 1
+            prop_error_count += 1
      else:
-        fieldsErrorCount += 1
+        fields_diff_count += 1

-    if (propErrorCount == 0 and fieldsErrorCount == 0):
+    if (prop_error_count == 0 and fields_diff_count == 0):
      print "100% compatible"
    else:
-      totals = propTotalCount + fieldsTotalCount
-      errors = propErrorCount + fieldsErrorCount
-      print "Table difference found: " + str(errors)
-      #print str((errors/totals) * 100) + '% compatible   total == ' + str(totals) + "   errors == " + str(errors)
-
+      totals = prop_total_count + fields_total_count
+      diff_total = prop_error_count + fields_diff_count
+      print "Table difference found: " + str(diff_total)
+      #print str((diff_total/totals) * 100) + '% compatible   total == ' + str(totals) + "   diff_total == " + str(diff_total)
+    return (totals, diff_total)

  # look at un-identified tables and try to match fields by their properties
  def __FuzzyTable():
@ -433,4 +444,8 @@ class TableDefinition:
  def SQLstr(self):
    return self.sqlStr

+  #
+  def hash(self):
+    return self.sqlStrHash
+