diff --git a/bench/comparemd5 b/bench/comparemd5
deleted file mode 100755
index eb2ab3e..0000000
--- a/bench/comparemd5
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python3.7
-#
-# Copyright (C) 2018 The University of Sheffield, UK
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#
-
-import sqlite3
-import MySQLdb.cursors
-import sys
-import os
-
-def usage():
- print(f"Usage: {sys.argv[0]} (app_join|db_join) (sqlite|mysql) (sqlite_path|my.cnf)")
-
-if len(sys.argv) < 2 or len(sys.argv) > 5:
- usage()
- sys.exit(1)
-
-method, db_kind, db_path = sys.argv[1:]
-if method not in ["app_join", "db_join"] or db_kind not in ["mysql", "sqlite"]:
- usage()
- sys.exit(1)
-
-if db_kind == "mysql":
- def dbobj():
- return MySQLdb.connect(
- read_default_file=os.path.expanduser(db_path),
- cursorclass=MySQLdb.cursors.SSCursor
- )
- def query(db, q, args=None):
- db.execute(q.replace("%", "%%").replace("?", "%s"), args)
- for row in db:
- yield row
-else:
- db = sqlite3.connect(db_path)
- def dbobj():
- return db
- def query(db, q, args=None):
- if args is None:
- return db.execute(q)
- else:
- return db.execute(q, args)
-
-def app_join():
- with dbobj() as db1:
- with dbobj() as db2:
- with dbobj() as db3:
- with dbobj() as db4:
- for (extid, date) in query(db1, "select extid, max(date) as date from extension where date <= '2018-05-01' group by extid order by extid limit 10000 offset 10000"):
- for (crx_etag,) in query(db2, "select crx_etag from extension where extid=? and date=? order by crx_etag", (extid, date)):
- for (path, md5, typ, simhash) in query(db3, "select path, md5, typ, simhash from crxfile where crx_etag=? and simhash is not null and path like '%.js' order by path, md5, typ", (crx_etag,)):
- for (size,) in query(db4, "select size from libdet where md5=? and typ=? and size >= 1024 order by size", (md5, typ)):
- yield md5
-
-def db_join():
- with dbobj() as db:
- for (md5,) in query(db, "select md5 from ((((select extid, max(date) as date from extension where date <= '2018-05-01' group by extid order by extid limit 10000 offset 10000) as e1 "
- "join (select extid, date, crx_etag from extension) as e2 using (extid, date)) "
- "join (select path, crx_etag, md5, typ from crxfile where simhash is not null and path like '%.js') as d2 using (crx_etag)) "
- "join (select md5, typ, size from libdet where size >= 1024) as d3 using (md5, typ)) order by extid, crx_etag, path, md5, typ, size"):
- yield md5
-
-with dbobj() as db:
- s = {}
- for (md5, library, path, typ) in query(db, "select md5, library, path, typ from cdnjs limit 10000"):
- s[md5] = (library, path, typ)
-
- hit = 0
- miss = 0
- if method == "app_join":
- f = app_join
- else:
- f = db_join
- for md5 in f():
- if md5 in s:
- hit += 1
- else:
- miss += 1
-
-print(f"Hit: {hit}")
-print(f"Miss: {miss}")
diff --git a/comparemd5 b/comparemd5
deleted file mode 100755
index 387a487..0000000
--- a/comparemd5
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python3.7
-#
-# Copyright (C) 2018 The University of Sheffield, UK
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#
-
-import sqlite3
-import sys
-
-db_path = sys.argv[1]
-
-with sqlite3.connect(db_path) as db:
- hit = 0
- miss = 0
- s = {}
- for (md5, library, path, typ) in db.execute("select md5, library, path, typ from cdnjs"):
- s[md5] = (library, path, typ)
-
- for (extid, date) in db.execute("select extid, max(date) as date from extension group by extid order by extid"):
- for (crx_etag,) in db.execute("select crx_etag from extension where extid=? and date=? order by crx_etag", (extid, date)):
- for (path, md5, typ, simhash) in db.execute("select path, md5, typ, simhash from crxfile where crx_etag=? and simhash is not null and path like '%.js' order by path, md5, typ", (crx_etag,)):
- for (size,) in db.execute("select size from libdet where md5=? and typ=? and size >= 1024 order by size", (md5, typ)):
- if md5 in s:
- hit += 1
- # library, path, typ = s[md5]
- # print("|".join((library, path, typ, extid, date, path, typ)))
- else:
- miss += 1
- print("|".join((extid, date, path, typ)))
-
- print(f"Hit: {hit}")
- print(f"Miss: {miss}")