ExtensionCrawler/crx-jsinventory

137 lines
4.1 KiB
Python
Executable File

#!/usr/bin/env python3.7
#
# Copyright (C) 2017 The University of Sheffield, UK
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Tool for extracting crx file from a tar archive."""
import sys
import getopt
import csv
import logging
from collections import OrderedDict
from zipfile import ZipFile
from tabulate import tabulate
from ExtensionCrawler.js_decomposer import decompose_js
from ExtensionCrawler.config import (const_log_format)
def helpmsg():
"""Print help message."""
print("crx-jsinventory [OPTION] crx-file|js-file")
print(" -h print this help text")
print(" -c=<FILE> cvs file (output)")
print(" -v verbose")
print(
" -d disable use of database with file information (not recommended)"
)
print(" -s silent")
def main(argv):
"""Main function of the extension crawler."""
verbose = False
silent = False
csvfile = None
database = True
try:
opts, args = getopt.getopt(argv, "hvdsc:", ["cvs="])
except getopt.GetoptError:
helpmsg()
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
helpmsg()
sys.exit()
elif opt == '-v':
verbose = True
elif opt == '-s':
silent = True
elif opt == '-d':
database = False
elif opt in ('-c', "--cvs"):
csvfile = arg
if len(args) > 0:
filename = args[0]
else:
helpmsg()
sys.exit()
if verbose:
loglevel = logging.INFO
else:
loglevel = logging.WARNING
logger = logging.getLogger()
ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(logging.Formatter(const_log_format()))
logger.addHandler(ch)
logger.setLevel(loglevel)
fieldnames = [
'filename', 'path', 'size', 'dec_size', 'md5', 'sha1', 'mimetype',
'description', 'encoding', 'type', 'detectionMethod',
'detectionMethodDetails', 'lib', 'version', 'lib_filename',
'evidenceText', 'evidenceStartPos', 'evidenceEndPos'
]
brief_fieldnames = [
'filename', 'md5', 'type', 'detectionMethod', 'lib', 'version',
'lib_filename'
]
if filename.endswith('.crx'):
with ZipFile(filename) as crxobj:
inventory = decompose_js(crxobj, database)
else:
inventory = decompose_js(filename, database)
if not silent:
if verbose:
print_fieldnames = fieldnames
else:
print_fieldnames = brief_fieldnames
print_inventory = []
for item in inventory:
tmp = {k: item[k] for k in print_fieldnames}
if 'type' in tmp:
tmp['type'] = tmp['type'].value
if 'detectionMethod' in tmp:
tmp['detectionMethod'] = tmp['detectionMethod'].value
if 'md5' in tmp:
tmp['md5'] = tmp['md5'].hex()
if 'sha1' in tmp:
tmp['sha1'] = tmp['sha1'].hex()
print_inventory.append(
OrderedDict(
sorted(
tmp.items(),
key=lambda t: print_fieldnames.index(t[0]))))
print(tabulate(print_inventory, headers='keys'))
if csvfile is not None:
with open(csvfile, 'w') as csvobj:
writer = csv.DictWriter(csvobj, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(inventory)
if __name__ == "__main__":
main(sys.argv[1:])