ExtensionCrawler/crx-jsstrings

#!/usr/bin/env python3.5
#
# Copyright (C) 2017 The University of Sheffield, UK
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
"""Tool for extracting crx file from a tar archive."""

import collections
import datetime
import argparse
import io
import os
import logging
import re
import sys
import tarfile
import zlib
from functools import partial
from multiprocessing import Pool
from zipfile import ZipFile

import dateutil
import dateutil.parser
import jsbeautifier

from ExtensionCrawler.config import (const_log_format, const_basedir)
from ExtensionCrawler.archive import get_existing_ids, last_crx
from ExtensionCrawler.config import (archive_file, get_local_archive_dir)
from ExtensionCrawler.js_decomposer import init_file_info
from ExtensionCrawler.js_mincer import mince_js

# Script should run with python 3.4 or 3.5
assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)

JsStringsConfig = collections.namedtuple('JsStringsConfig', [
    'comment', 'strings', 'group', 'program', 'beautify', 'basedir', 'regexp',
    'parallel', "verbose"
])


def jsstrings_data(path, data, config):
    match = False
    print("## Analyzing " + path)
    file_info = init_file_info(path, data)
    if file_info['size'] == 0:
        return match

    if not file_info['dec_encoding'] is None:
        try:
            dec = zlib.decompressobj(zlib.MAX_WBITS | 16)
            dec_data = dec.decompress(data, 100 * file_info['size'])
            if file_info['dec_encoding'] is None:
                logging.warning("Encoding is None for " + path +
                                " using utf-8.")
                str_data = dec_data.decode('UTF-8')
            else:
                str_data = dec_data.decode(file_info['dec_encoding'])
            del dec_data
        except Exception:
            return match
    else:
        if file_info['encoding'] is None:
            logging.warning("Encoding is None for " + path + " using utf-8.")
            str_data = data.decode('UTF-8')
        else:
            str_data = data.decode(file_info['encoding'])

    if config.beautify:
        str_data = jsbeautifier.beautify(str_data)

    with io.StringIO(str_data) as str_obj:
        for block in mince_js(
                str_obj, single_line_comments_block=config.group):
            if analyze_block(True, config.comment, config.program,
                             config.strings, config.regexp, block):
                match = True

    return match


def analyze_block(verbose, comment, program, strings, regexp, block):
    """Print code/comment blocks."""
    match = False
    rgx = None
    if regexp is not None:
        rgx = re.compile(regexp)
    if comment and block.is_comment():
        if regexp is None or rgx.match(block.content):
            if verbose:
                print(block)
            match = True
    elif block.is_code():
        if program:
            if regexp is None or rgx.match(block.content):
                if verbose:
                    print(block)
                match = True
        if strings:
            for string in block.string_literals:
                if regexp is None or rgx.match(string):
                    if verbose:
                        print(string)
                    match = True
    return match


def analyze_crx(config, crx, path):
    match = False
    if path is None:
        with ZipFile(crx) as crxobj:
            js_files = list(
                filter(
                    lambda x: x.filename.endswith(".js") or x.filename.endswith(".js.gz") or x.filename.endswith(".jgz") or x.filename.endswith(".jsg") or x.filename.endswith(".css.gz"),
                    crxobj.infolist()))
            for jsfile in js_files:
                with crxobj.open(jsfile) as js_file_obj:
                    data = js_file_obj.read()
                    path = js_file_obj.name
                if jsstrings_data(path, data, config):
                    match = True
    else:
        with ZipFile(crx) as crxobj:
            with crxobj.open(path) as js_file:
                data = js_file.read()
            match = jsstrings_data(path, data, config)
    return match


def analyze_tar(config, date, path, filename):
    last_crx_file = ''
    match = False
    extid = os.path.splitext(os.path.basename(filename))[0]
    if date is not None:
        dateobj = dateutil.parser.parse(date)
        if dateobj.tzinfo is None or dateobj.tzinfo.utcoffset(dateobj) is None:
            dateobj = dateobj.replace(tzinfo=datetime.timezone.utc)
        last_crx_file = last_crx(
            os.path.join(config.basedir, "data"), extid, dateobj)
    else:
        last_crx_file = last_crx(os.path.join(config.basedir, "data"), extid)
    if last_crx_file == "" or last_crx_file is None:
        print("No crx in  " + extid)
    else:
        print("# Start analyzing " + extid)
        with tarfile.open(filename, 'r') as archive:
            with archive.extractfile(last_crx_file) as crx:
                match = analyze_crx(config, crx, path)
        if match:
            print("RegExp found in " + extid)
        else:
            print("RegExp not found in " + extid)


def process_group(config, taskid, maxtaskid, date, path):
    archive_dir = os.path.join(config.basedir, "data")
    ext_ids = get_existing_ids(archive_dir)
    chunksize = int(len(ext_ids) / maxtaskid)
    if taskid == maxtaskid:
        ext_ids = ext_ids[(taskid - 1) * chunksize:]
    else:
        ext_ids = ext_ids[(taskid - 1) * chunksize:taskid * chunksize]

    ext_ids = list(map(partial(archive_file, archive_dir), ext_ids))

    with Pool(config.parallel) as p:
        p.map(partial(analyze_tar, config, date, path), ext_ids)


def main(args):
    """Main function: JavaScript strings on steroids."""
    config = JsStringsConfig(
        comment=True,
        strings=False,
        group=False,
        program=False,
        beautify=False,
        basedir=const_basedir(),
        regexp=None,
        parallel=1,
        verbose=True)

    filename = None
    path = None
    date = None
    taskid = -1
    maxtaskid = -1
    extid_re = re.compile('^[a-p]+$')

    if config.verbose:
        loglevel = logging.INFO
    else:
        loglevel = logging.WARNING

    logger = logging.getLogger()
    ch = logging.StreamHandler(sys.stdout)
    ch.setFormatter(logging.Formatter(const_log_format()))
    logger.addHandler(ch)
    logger.setLevel(loglevel)

    if taskid > 0 and maxtaskid > 0:
        process_group(config, taskid, maxtaskid, date, path)
    else:
        if filename.endswith('.crx'):
            analyze_crx(config, filename, path)
        elif filename.endswith('.tar'):
            analyze_tar(config, date, path, filename)
        elif extid_re.match(filename):
            extid = filename
            filename = os.path.join(config.basedir, 'data',
                                    get_local_archive_dir(extid),
                                    extid + ".tar")
            analyze_tar(config, date, path, filename)
        else:
            with open(filename, 'rb') as fileobj:
                data = fileobj.read()
            jsstrings_data(filename, data, config)


if __name__ == "__main__":
    main_parser = argparse.ArgumentParser(
        description=
        'A combination of strings and grep for JavaScript and CSS files.')
    main_parser.add_argument(
        '-r',
        '--reg-exp',
        metavar='REGEXP',
        type=str,
        nargs='+',
        help='search for regular expression')

    main_parser.add_argument(
        '-d',
        '--output-decoration',
        metavar='L',
        choices=[0, 1, 2, 3],
        type=int,
        help='show only matching files, crx, tar')
    main_parser.add_argument(
        '-a',
        '--archive-dir',
        metavar='archive',
        type=str,
        help='archive directory')
    main_parser.add_argument(
        '-C', '--colorize', action='store_true', help='use colors')

    main_parser.add_argument(
        '-n', '--taskid', metavar='n', type=int, help='task id')
    main_parser.add_argument(
        '-N', '--max-taskid', metavar='N', type=int, help='max task id')

    main_parser.add_argument(
        'FILE_OR_EXTID', nargs='+', help="extid/js/css/crx/tar file")

    comment_group = main_parser.add_argument_group('comment blocks')
    comment_group.add_argument(
        '-g',
        '--group-single-line-comments',
        help='Group consecutive singe-line comments into blocks')
    comment_group.add_argument(
        '-c',
        '--reg-exp-comments',
        metavar='REGEXP',
        type=str,
        nargs='+',
        help='search comments for regular expression')

    source_group = main_parser.add_argument_group('source blocks')
    source_group.add_argument(
        '-b', '--beautify', action='store_true', help='beautify source code')
    source_group.add_argument(
        '-s',
        '--reg-exp-source',
        metavar='REGEXP',
        type=str,
        nargs='+',
        help='search source for regular expression')

    strings_group = main_parser.add_argument_group('string literals')
    strings_group.add_argument(
        '-j',
        '--join-string-literals',
        action='store_true',
        help='join string literals (heuristic)')
    strings_group.add_argument(
        '-l',
        '--reg-exp-string-literals',
        metavar='REGEXP',
        type=str,
        nargs='+',
        help='search string literals for regular expression')
    args = main_parser.parse_args()

    print(args.FILE_OR_EXTID)
    main(args)
Use python3.5 for all files. 2017-09-01 13:12:05 +00:00			`#!/usr/bin/env python3.5`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`#`
			`# Copyright (C) 2017 The University of Sheffield, UK`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <https://www.gnu.org/licenses/>.`
			`#`
			`"""Tool for extracting crx file from a tar archive."""`

Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`import collections`
			`import datetime`
Inital setup (not yet functional) using argparse. 2017-09-27 22:24:45 +00:00			`import argparse`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`import io`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`import os`
Added initial logging setup. 2017-09-22 07:01:09 +00:00			`import logging`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`import re`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`import sys`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`import tarfile`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`import zlib`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`from functools import partial`
			`from multiprocessing import Pool`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`from zipfile import ZipFile`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`import dateutil`
			`import dateutil.parser`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`import jsbeautifier`

Added initial logging setup. 2017-09-22 07:01:09 +00:00			`from ExtensionCrawler.config import (const_log_format, const_basedir)`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`from ExtensionCrawler.archive import get_existing_ids, last_crx`
Reformatting 2017-09-23 21:41:04 +00:00			`from ExtensionCrawler.config import (archive_file, get_local_archive_dir)`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`from ExtensionCrawler.js_decomposer import init_file_info`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`from ExtensionCrawler.js_mincer import mince_js`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00
			`# Script should run with python 3.4 or 3.5`
			`assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)`

Reformating. 2017-09-21 20:12:00 +00:00			`JsStringsConfig = collections.namedtuple('JsStringsConfig', [`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`'comment', 'strings', 'group', 'program', 'beautify', 'basedir', 'regexp',`
Added initial logging setup. 2017-09-22 07:01:09 +00:00			`'parallel', "verbose"`
Reformating. 2017-09-21 20:12:00 +00:00			`])`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00

			`def jsstrings_data(path, data, config):`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = False`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`print("## Analyzing " + path)`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`file_info = init_file_info(path, data)`
Ignore empty files. 2017-09-21 23:15:46 +00:00			`if file_info['size'] == 0:`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`return match`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00
			`if not file_info['dec_encoding'] is None:`
			`try:`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`dec = zlib.decompressobj(zlib.MAX_WBITS \| 16)`
			`dec_data = dec.decompress(data, 100 * file_info['size'])`
Added initial logging setup. 2017-09-22 07:01:09 +00:00			`if file_info['dec_encoding'] is None:`
Reformatting 2017-09-23 21:41:04 +00:00			`logging.warning("Encoding is None for " + path +`
			`" using utf-8.")`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`str_data = dec_data.decode('UTF-8')`
Added initial logging setup. 2017-09-22 07:01:09 +00:00			`else:`
			`str_data = dec_data.decode(file_info['dec_encoding'])`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`del dec_data`
			`except Exception:`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`return match`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`else:`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`if file_info['encoding'] is None:`
Reformatting 2017-09-23 21:41:04 +00:00			`logging.warning("Encoding is None for " + path + " using utf-8.")`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`str_data = data.decode('UTF-8')`
			`else:`
			`str_data = data.decode(file_info['encoding'])`
Reformatting 2017-09-23 21:41:04 +00:00
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`if config.beautify:`
			`str_data = jsbeautifier.beautify(str_data)`

Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`with io.StringIO(str_data) as str_obj:`
Reformating. 2017-09-21 20:12:00 +00:00			`for block in mince_js(`
			`str_obj, single_line_comments_block=config.group):`
Reformatting 2017-09-23 21:41:04 +00:00			`if analyze_block(True, config.comment, config.program,`
			`config.strings, config.regexp, block):`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = True`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`return match`

First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`def analyze_block(verbose, comment, program, strings, regexp, block):`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`"""Print code/comment blocks."""`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = False`
Reformatting 2017-09-23 21:41:04 +00:00			`rgx = None`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`if regexp is not None:`
			`rgx = re.compile(regexp)`
			`if comment and block.is_comment():`
			`if regexp is None or rgx.match(block.content):`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`if verbose:`
			`print(block)`
			`match = True`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`elif block.is_code():`
			`if program:`
			`if regexp is None or rgx.match(block.content):`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`if verbose:`
			`print(block)`
			`match = True`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`if strings:`
			`for string in block.string_literals:`
			`if regexp is None or rgx.match(string):`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`if verbose:`
			`print(string)`
			`match = True`
			`return match`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00

Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`def analyze_crx(config, crx, path):`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = False`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`if path is None:`
			`with ZipFile(crx) as crxobj:`
			`js_files = list(`
			`filter(`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`lambda x: x.filename.endswith(".js") or x.filename.endswith(".js.gz") or x.filename.endswith(".jgz") or x.filename.endswith(".jsg") or x.filename.endswith(".css.gz"),`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`crxobj.infolist()))`
			`for jsfile in js_files:`
			`with crxobj.open(jsfile) as js_file_obj:`
			`data = js_file_obj.read()`
			`path = js_file_obj.name`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`if jsstrings_data(path, data, config):`
			`match = True`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`else:`
			`with ZipFile(crx) as crxobj:`
			`with crxobj.open(path) as js_file:`
			`data = js_file.read()`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = jsstrings_data(path, data, config)`
			`return match`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`def analyze_tar(config, date, path, filename):`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`last_crx_file = ''`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = False`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`extid = os.path.splitext(os.path.basename(filename))[0]`
			`if date is not None:`
			`dateobj = dateutil.parser.parse(date)`
			`if dateobj.tzinfo is None or dateobj.tzinfo.utcoffset(dateobj) is None:`
			`dateobj = dateobj.replace(tzinfo=datetime.timezone.utc)`
			`last_crx_file = last_crx(`
			`os.path.join(config.basedir, "data"), extid, dateobj)`
			`else:`
			`last_crx_file = last_crx(os.path.join(config.basedir, "data"), extid)`
Skip extensiosn with no crx file. 2017-09-21 23:55:49 +00:00			`if last_crx_file == "" or last_crx_file is None:`
Reformatting 2017-09-23 21:41:04 +00:00			`print("No crx in " + extid)`
Skip extensiosn with no crx file. 2017-09-21 23:55:49 +00:00			`else:`
			`print("# Start analyzing " + extid)`
			`with tarfile.open(filename, 'r') as archive:`
			`with archive.extractfile(last_crx_file) as crx:`
			`match = analyze_crx(config, crx, path)`
			`if match:`
			`print("RegExp found in " + extid)`
			`else:`
			`print("RegExp not found in " + extid)`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00
Reformatting 2017-09-23 21:41:04 +00:00
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`def process_group(config, taskid, maxtaskid, date, path):`
			`archive_dir = os.path.join(config.basedir, "data")`
			`ext_ids = get_existing_ids(archive_dir)`
			`chunksize = int(len(ext_ids) / maxtaskid)`
			`if taskid == maxtaskid:`
			`ext_ids = ext_ids[(taskid - 1) * chunksize:]`
			`else:`
			`ext_ids = ext_ids[(taskid - 1) * chunksize:taskid * chunksize]`

			`ext_ids = list(map(partial(archive_file, archive_dir), ext_ids))`

			`with Pool(config.parallel) as p:`
			`p.map(partial(analyze_tar, config, date, path), ext_ids)`


Inital setup (not yet functional) using argparse. 2017-09-27 22:24:45 +00:00			`def main(args):`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`"""Main function: JavaScript strings on steroids."""`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`config = JsStringsConfig(`
Reformating. 2017-09-21 20:12:00 +00:00			`comment=True,`
			`strings=False,`
			`group=False,`
			`program=False,`
			`beautify=False,`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`basedir=const_basedir(),`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`regexp=None,`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`parallel=1,`
Added initial logging setup. 2017-09-22 07:01:09 +00:00			`verbose=True)`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`filename = None`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`path = None`
Stub for handling tar archives. 2017-09-20 16:48:03 +00:00			`date = None`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`taskid = -1`
			`maxtaskid = -1`
			`extid_re = re.compile('^[a-p]+$')`
Reformating. 2017-09-21 20:12:00 +00:00
Added initial logging setup. 2017-09-22 07:01:09 +00:00			`if config.verbose:`
			`loglevel = logging.INFO`
			`else:`
			`loglevel = logging.WARNING`

			`logger = logging.getLogger()`
			`ch = logging.StreamHandler(sys.stdout)`
			`ch.setFormatter(logging.Formatter(const_log_format()))`
			`logger.addHandler(ch)`
			`logger.setLevel(loglevel)`

Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`if taskid > 0 and maxtaskid > 0:`
			`process_group(config, taskid, maxtaskid, date, path)`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`else:`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`if filename.endswith('.crx'):`
			`analyze_crx(config, filename, path)`
			`elif filename.endswith('.tar'):`
			`analyze_tar(config, date, path, filename)`
			`elif extid_re.match(filename):`
			`extid = filename`
			`filename = os.path.join(config.basedir, 'data',`
			`get_local_archive_dir(extid),`
			`extid + ".tar")`
			`analyze_tar(config, date, path, filename)`
			`else:`
			`with open(filename, 'rb') as fileobj:`
			`data = fileobj.read()`
			`jsstrings_data(filename, data, config)`
Reformating. 2017-09-21 20:12:00 +00:00

Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`if __name__ == "__main__":`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`main_parser = argparse.ArgumentParser(`
			`description=`
			`'A combination of strings and grep for JavaScript and CSS files.')`
			`main_parser.add_argument(`
			`'-r',`
			`'--reg-exp',`
			`metavar='REGEXP',`
			`type=str,`
			`nargs='+',`
			`help='search for regular expression')`

			`main_parser.add_argument(`
			`'-d',`
			`'--output-decoration',`
			`metavar='L',`
			`choices=[0, 1, 2, 3],`
			`type=int,`
			`help='show only matching files, crx, tar')`
			`main_parser.add_argument(`
			`'-a',`
			`'--archive-dir',`
			`metavar='archive',`
			`type=str,`
			`help='archive directory')`
			`main_parser.add_argument(`
			`'-C', '--colorize', action='store_true', help='use colors')`
Updated proposed command line arguments. 2017-09-30 06:26:38 +00:00
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`main_parser.add_argument(`
Reformatting. 2017-10-01 07:39:44 +00:00			`'-n', '--taskid', metavar='n', type=int, help='task id')`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`main_parser.add_argument(`
Reformatting. 2017-10-01 07:39:44 +00:00			`'-N', '--max-taskid', metavar='N', type=int, help='max task id')`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00
Reformatting. 2017-10-01 07:39:44 +00:00			`main_parser.add_argument(`
			`'FILE_OR_EXTID', nargs='+', help="extid/js/css/crx/tar file")`
Inital setup (not yet functional) using argparse. 2017-09-27 22:24:45 +00:00
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`comment_group = main_parser.add_argument_group('comment blocks')`
Reformatting. 2017-10-01 07:39:44 +00:00			`comment_group.add_argument(`
			`'-g',`
			`'--group-single-line-comments',`
			`help='Group consecutive singe-line comments into blocks')`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`comment_group.add_argument(`
			`'-c',`
			`'--reg-exp-comments',`
			`metavar='REGEXP',`
			`type=str,`
			`nargs='+',`
			`help='search comments for regular expression')`

			`source_group = main_parser.add_argument_group('source blocks')`
			`source_group.add_argument(`
			`'-b', '--beautify', action='store_true', help='beautify source code')`
			`source_group.add_argument(`
			`'-s',`
			`'--reg-exp-source',`
			`metavar='REGEXP',`
			`type=str,`
			`nargs='+',`
			`help='search source for regular expression')`

			`strings_group = main_parser.add_argument_group('string literals')`
Fixed strings_groups. 2017-09-29 04:46:25 +00:00			`strings_group.add_argument(`
Reformatting. 2017-10-01 07:39:44 +00:00			`'-j',`
			`'--join-string-literals',`
			`action='store_true',`
			`help='join string literals (heuristic)')`
Fixed strings_groups. 2017-09-29 04:46:25 +00:00			`strings_group.add_argument(`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`'-l',`
			`'--reg-exp-string-literals',`
			`metavar='REGEXP',`
			`type=str,`
			`nargs='+',`
			`help='search string literals for regular expression')`
			`args = main_parser.parse_args()`

Fixed strings_groups. 2017-09-29 04:46:25 +00:00			`print(args.FILE_OR_EXTID)`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`main(args)`