#!/usr/bin/env python3.7 # # Copyright (C) 2017 The University of Sheffield, UK # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # SPDX-License-Identifier: GPL-3.0-or-later """Tool for extracting crx file from a tar archive.""" import sys import getopt import csv import logging from collections import OrderedDict from zipfile import ZipFile from tabulate import tabulate from ExtensionCrawler.js_decomposer import decompose_js from ExtensionCrawler.config import (const_log_format) def helpmsg(): """Print help message.""" print("crx-jsinventory [OPTION] crx-file|js-file") print(" -h print this help text") print(" -c= cvs file (output)") print(" -v verbose") print( " -d disable use of database with file information (not recommended)" ) print(" -s silent") def main(argv): """Main function of the extension crawler.""" verbose = False silent = False csvfile = None database = True try: opts, args = getopt.getopt(argv, "hvdsc:", ["cvs="]) except getopt.GetoptError: helpmsg() sys.exit(2) for opt, arg in opts: if opt == '-h': helpmsg() sys.exit() elif opt == '-v': verbose = True elif opt == '-s': silent = True elif opt == '-d': database = False elif opt in ('-c', "--cvs"): csvfile = arg if len(args) > 0: filename = args[0] else: helpmsg() sys.exit() if verbose: loglevel = logging.INFO else: loglevel = logging.WARNING logger = logging.getLogger() ch = logging.StreamHandler(sys.stdout) ch.setFormatter(logging.Formatter(const_log_format())) logger.addHandler(ch) logger.setLevel(loglevel) fieldnames = [ 'filename', 'path', 'size', 'dec_size', 'md5', 'sha1', 'mimetype', 'description', 'encoding', 'type', 'detectionMethod', 'detectionMethodDetails', 'lib', 'version', 'lib_filename', 'evidenceText', 'evidenceStartPos', 'evidenceEndPos' ] brief_fieldnames = [ 'filename', 'md5', 'type', 'detectionMethod', 'lib', 'version', 'lib_filename' ] if filename.endswith('.crx'): with ZipFile(filename) as crxobj: inventory = decompose_js(crxobj, database) else: inventory = decompose_js(filename, database) if not silent: if verbose: print_fieldnames = fieldnames else: print_fieldnames = brief_fieldnames print_inventory = [] for item in inventory: tmp = {k: item[k] for k in print_fieldnames} if 'type' in tmp: tmp['type'] = tmp['type'].value if 'detectionMethod' in tmp: tmp['detectionMethod'] = tmp['detectionMethod'].value if 'md5' in tmp: tmp['md5'] = tmp['md5'].hex() if 'sha1' in tmp: tmp['sha1'] = tmp['sha1'].hex() print_inventory.append( OrderedDict( sorted( tmp.items(), key=lambda t: print_fieldnames.index(t[0])))) print(tabulate(print_inventory, headers='keys')) if csvfile is not None: with open(csvfile, 'w') as csvobj: writer = csv.DictWriter(csvobj, fieldnames=fieldnames) writer.writeheader() writer.writerows(inventory) if __name__ == "__main__": main(sys.argv[1:])