diff --git a/PermissionAnalysis/grep-unused-permissions b/PermissionAnalysis/grep-unused-permissions
new file mode 100644
index 0000000..582f83e
--- /dev/null
+++ b/PermissionAnalysis/grep-unused-permissions
@@ -0,0 +1,216 @@
+#!/usr/bin/env python3.7
+#
+# Copyright (C) 2019 The University of Sheffield, UK
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+import argparse
+import io
+import logging
+import re
+import json
+import sys
+import csv
+from jsmin import jsmin
+
+from zipfile import ZipFile
+
+from ExtensionCrawler.config import (const_log_format, const_basedir)
+from ExtensionCrawler.archive import iter_tar_entries_by_date
+from ExtensionCrawler.js_mincer import mince_js
+
+
+def get_etag(headers_content):
+ headers_content = headers_content.replace(
+ '"', '\\"').replace("'", '"')
+ headers_json = json.loads(headers_content)
+ if "ETag" in headers_json:
+ return headers_json["ETag"]
+
+
+def get_name_and_version(overview_contents):
+ # Extract extension name
+ match = re.search("""""",
+ overview_contents)
+ name = match.group(1) if match else None
+
+ # Extract extension version
+ match = re.search(
+ """""", overview_contents)
+ version = match.group(1) if match else None
+
+ return name, version
+
+
+def handle_extid(conf, extid, permission_map, csvwriter):
+ results = []
+
+ still_in_store = None
+ crx_etags = [None]
+ for date, tups in iter_tar_entries_by_date(conf.archive_dir, extid):
+ if conf.from_date and not (conf.from_date <= date):
+ continue
+ if conf.latest_date and not (date <= conf.latest_date):
+ continue
+
+ crx_etag = None
+ name = None
+ version = None
+ date_matches = {}
+ for permission in permission_map.keys():
+ date_matches[permission] = False
+ has_crx_file = False
+ used_permissions = set()
+
+ for tarentry, tarfile in tups:
+ tarentry_filename = tarentry.name.split("/")[-1]
+
+ if tarentry_filename.endswith(".crx.headers"):
+ crx_etag = get_etag(tarfile.read().decode())
+ if crx_etag:
+ crx_etags += [crx_etag]
+
+ if tarentry_filename == "overview.html":
+ name, version = get_name_and_version(tarfile.read().decode())
+
+ if tarentry_filename == "overview.html.status":
+ still_in_store = tarfile.read().decode().startswith("2")
+
+ if tarentry_filename.endswith(".crx") and tarentry.size > 0:
+ has_crx_file = True
+ with ZipFile(tarfile) as zf:
+ for zipentry in zf.infolist():
+ if zipentry.filename.endswith(".js") or zipentry.filename.endswith(".html"):
+ with zf.open(zipentry) as f:
+ verbatim_lines = []
+ for block in mince_js(io.TextIOWrapper(f, encoding="utf-8", errors="surrogateescape")):
+ verbatim_lines += block.content.splitlines()
+
+ for permission, evidences in permission_map.items():
+ for evidence in evidences:
+ for line in verbatim_lines:
+ if evidence in line:
+ date_matches[permission] = True
+ break
+
+ if zipentry.filename == "manifest.json":
+ with zf.open(zipentry) as m:
+ raw_content = m.read()
+ # There are some manifests that seem to have weird encodings...
+ try:
+ content = raw_content.decode("utf-8-sig")
+ except UnicodeDecodeError:
+ # Trying a different encoding, manifests are weird...
+ content = raw_content.decode("latin1")
+
+ manifest = json.loads(jsmin(content), strict=False)
+ if "permissions" in manifest:
+ for permission in manifest["permissions"]:
+ used_permissions.add(str(permission))
+
+ if has_crx_file:
+ line = [date, crx_etag, name, version]
+ for permission in sorted(list(permission_map.keys())):
+ if permission in used_permissions:
+ if date_matches[permission]:
+ line += ["REQ_AND_FOUND"]
+ else:
+ line += ["REQ_AND_NOT_FOUND"]
+ else:
+ if date_matches[permission]:
+ line += ["NOT_REQ_AND_FOUND"]
+ else:
+ line += ["NOT_REQ_AND_NOT_FOUND"]
+ results += [line]
+
+ for result in results:
+ csvwriter.writerow([str(x) for x in ([extid, still_in_store, crx_etags[-1]] + result)])
+
+
+def main(conf):
+ logger = logging.getLogger()
+ ch = logging.StreamHandler(sys.stderr)
+ ch.setFormatter(logging.Formatter(const_log_format()))
+ logger.addHandler(ch)
+ if conf.verbose:
+ logger.setLevel(logging.DEBUG)
+ else:
+ logger.setLevel(logging.WARNING)
+
+ with open(conf.MAP_FILE) as f:
+ permission_map = json.load(f)
+
+ with open(conf.EXTID_FILE) as f:
+ csvwriter = csv.writer(sys.stdout, csv.unix_dialect)
+ csvwriter.writerow(["extid", "still_in_store", "most_recent_crx_etag", "date", "crx_etag", "name", "version"]
+ + sorted(list(permission_map.keys())))
+ for extid in [l.strip() for l in f.readlines()]:
+ handle_extid(conf, extid, permission_map, csvwriter)
+
+
+def build_parser():
+ main_parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawTextHelpFormatter,
+ description='Search extensions for unused permissions')
+ main_parser.add_argument(
+ 'MAP_FILE',
+ help='json file with permission - literal string mapping')
+ main_parser.add_argument(
+ 'EXTID_FILE',
+ help='file with extension ids')
+ main_parser.add_argument(
+ '-v',
+ '--verbose',
+ action='store_true',
+ default=False,
+ help='increase verbosity')
+
+
+ main_parser.add_argument(
+ '-D',
+ '--latest-date',
+ metavar='DATE',
+ type=str,
+ help='select latest crx from tar, released before DATE.\n' +
+ 'Together with --from-date, specifies all crx released in specified\n' +
+ 'date range.')
+
+ main_parser.add_argument(
+ '-d',
+ '--from-date',
+ metavar='DATE',
+ type=str,
+ help='select oldest crx from tar released after DATE.\n' +
+ 'Together with --latest-date, specifies all crx released in specified\n' +
+ 'date range.')
+
+ main_parser.add_argument(
+ '-a',
+ '--archive-dir',
+ metavar='archive',
+ type=str,
+ default=const_basedir(),
+ help='archive directory')
+
+ return main_parser
+
+
+if __name__ == "__main__":
+ main_parser = build_parser()
+
+ main_conf = main_parser.parse_args()
+
+ sys.exit(main(main_conf))