ExtensionCrawler/crx-jsstrings

#!/usr/bin/env python3.5
#
# Copyright (C) 2017 The University of Sheffield, UK
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
"""Tool for extracting crx file from a tar archive."""

import datetime
import argparse
import io
import fnmatch
import os
import logging
import re
import sys
import operator
import tarfile
import zlib
from functools import partial, reduce
from multiprocessing import Pool
from zipfile import ZipFile

import dateutil
import dateutil.parser
import jsbeautifier

from ExtensionCrawler.config import (const_log_format, const_basedir)
from ExtensionCrawler.archive import get_existing_ids, last_crx
from ExtensionCrawler.config import (archive_file, get_local_archive_dir)
from ExtensionCrawler.js_decomposer import init_file_info
from ExtensionCrawler.js_mincer import mince_js

# Script should run with python 3.4 or 3.5
assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)


def jsstrings_data(conf, path, data):
    """Analyze data in memory."""
    if not conf.file_pattern is None:
        if path is None: 
            return False
        elif not fnmatch.fnmatch(path, conf.file_pattern):
            logging.debug("Filename \'" + path + "\' does not match pattern \'" + conf.file_pattern + "\'")
            return False

    match = False
    logging.debug("Start analyzing " + path)
    file_info = init_file_info(path, data)
    if file_info['size'] == 0:
        return match

    if not file_info['dec_encoding'] is None:
        try:
            dec = zlib.decompressobj(zlib.MAX_WBITS | 16)
            dec_data = dec.decompress(data, 100 * file_info['size'])
            if file_info['dec_encoding'] is None:
                logging.debug("Encoding is None for " + path +
                                " using utf-8.")
                str_data = dec_data.decode('UTF-8')
            else:
                str_data = dec_data.decode(file_info['dec_encoding'])
            del dec_data
        except Exception:
            return match
    else:
        if file_info['encoding'] is None:
            logging.warning("Encoding is None for " + path + " using utf-8.")
            str_data = data.decode('UTF-8')
        else:
            str_data = data.decode(file_info['encoding'])

    if conf.beautify:
        str_data = jsbeautifier.beautify(str_data)

    with io.StringIO(str_data) as str_obj:
        for block in mince_js(
                str_obj,
                single_line_comments_block=conf.group_single_line_comments):
            if analyze_block(conf, block):
                match = True

    return match

def print_block(conf, block, string_match = False, code_match = False):
    print(block)

def analyze_block(conf, block):
    """Print code/comment blocks."""
    match = False
    regexps = []
    if not conf.reg_exp is None:
        for regexp in conf.reg_exp:
            regexps.append(re.compile(regexp))
    if block.is_comment():
        if not conf.reg_exp_comments is None:
            for regexp in conf.reg_exp_comments:
                regexps.append(re.compile(regexp))
        for regexp in regexps:
            if regexp.search(block.content):
                match = True
        if match:
            print_block(conf, block)
    elif block.is_code():
        regexps_string = regexps.copy()
        regexps_code = regexps.copy()
        if not conf.reg_exp_string_literals is None:
            for regexp in conf.reg_exp_string_literals:
                regexps_string.append(re.compile(regexp))
        if not conf.reg_exp_source is None:
            for regexp in conf.reg_exp_source:
                regexps_code.append(re.compile(regexp))
        string_match = False
        for regexp in regexps_string:
            for string in block.string_literals:
                if regexp.search(string):
                    string_match = True
        code_match = False
        for regexp in regexps_code:
            if regexp.search(block.content):
                code_match = True
        match = string_match or code_match            
        if match:
            print_block(conf, block, string_match, code_match)
    return match

def analyze_crx(conf, crx):
    """Analyze crx file."""
    match = False
    with ZipFile(crx) as crxobj:
        js_files = list(
            filter(
                lambda x: x.filename.endswith(".js") or x.filename.endswith(".js.gz") or x.filename.endswith(".jgz") or x.filename.endswith(".jsg") or x.filename.endswith(".css.gz"),
                crxobj.infolist()))
        for jsfile in js_files:
            with crxobj.open(jsfile) as js_file_obj:
                data = js_file_obj.read()
                path = js_file_obj.name
            if jsstrings_data(conf, path, data):
                match = True

    return match

def analyze_tar(conf, tarfile):
    last_crx_file = ''
    match = False
    extid = os.path.splitext(os.path.basename(filename))[0]
    if date is not None:
        dateobj = dateutil.parser.parse(date)
        if dateobj.tzinfo is None or dateobj.tzinfo.utcoffset(dateobj) is None:
            dateobj = dateobj.replace(tzinfo=datetime.timezone.utc)
        last_crx_file = last_crx(
            os.path.join(args.archive_dir, "data"), extid, dateobj)
    else:
        last_crx_file = last_crx(os.path.join(args.archive_dir, "data"), extid)
    if last_crx_file == "" or last_crx_file is None:
        print("No crx in  " + extid)
    else:
        print("# Start analyzing " + extid)
        with tarfile.open(filename, 'r') as archive:
            with archive.extractfile(last_crx_file) as crx:
                match = analyze_crx(args, crx, path)
        if match:
            print("RegExp found in " + extid)
        else:
            print("RegExp not found in " + extid)

def analyze_file(conf, filename):
    with open(filename, 'rb') as fileobj:
        data = fileobj.read()
    return jsstrings_data(conf, filename, data)


def compute_tasks(file_or_extids, taskid=1, maxtaskid=1):
    """Function for computing list of tasks."""
    extid_re = re.compile('^[a-p]+$')
    tasks = []
    for file_or_extid in file_or_extids: 
        if file_or_extid.endswith('.crx'):
            tasks.append(file_or_extid)
        elif file_or_extid.endswith('.tar'):
            tasks.append(file_or_extid)
        elif file_or_extid.endswith('.css'):
            tasks.append(file_or_extid)
        elif file_or_extid.endswith('.js'):
            tasks.append(file_or_extid)
        elif file_or_extid.endswith('.c'):
            tasks.append(file_or_extid)
        elif file_or_extid.endswith('.java'):
            tasks.append(file_or_extid)
        elif extid_re.match(file_or_extid):
            tasks.append(file_or_extid)
        else:
            # default: a file with extension ide
            with open(file_or_extid) as fileobj:
                for line in fileobj:
                    line = line.strip()
                    if extid_re.match(line):
                        tasks.append(line)

    chunksize = int(len(tasks) / maxtaskid)
    if taskid == maxtaskid:
        tasks = tasks[(taskid - 1) * chunksize:]
    else:
        tasks = tasks[(taskid - 1) * chunksize:taskid * chunksize]

    return tasks


def analyze_task(conf, task):
    """Analyze one file/tar/crx/extid."""
    logging.debug("Analyzing " + task)
    extid_re = re.compile('^[a-p]+$')
    retval = False
    if task.endswith('.crx'):
        retval = analyze_crx(conf, task)
    elif task.endswith('.tar'):
        retval = analyze_tar(conf, task)
    elif extid_re.match(task):
        retval = analyze_tar(conf, task + '.tar')
    else:
        retval = analyze_file(conf, task)
    return retval


def main(conf):
    """Main function: JavaScript strings on steroids."""
    logger = logging.getLogger()
    ch = logging.StreamHandler(sys.stdout)
    ch.setFormatter(logging.Formatter(const_log_format()))
    logger.addHandler(ch)
    if conf.verbose:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.WARNING)

    print(vars(conf))
    tasks = compute_tasks(conf.FILE_OR_EXTID, conf.taskid, conf.max_taskid)
    with Pool(conf.parallel) as p:
        retvals = p.map(partial(analyze_task, conf), tasks)
    return reduce(operator.or_, retvals, False)


if __name__ == "__main__":
    main_parser = argparse.ArgumentParser(
        description=
        'A combination of strings and grep for JavaScript and CSS files.')
    main_parser.add_argument(
        '-r',
        '--reg-exp',
        metavar='REGEXP',
        type=str,
        nargs='+',
        help='search for regular expression')
    main_parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        default=False,
        help='increase verbosity')

    main_parser.add_argument(
        '-d',
        '--output-decoration',
        metavar='L',
        choices=[0, 1, 2, 3],
        type=int,
        help='show only matching files, crx, tar')
    main_parser.add_argument(
        '-p',
        '--parallel',
        metavar='P',
        type=int,
        help='run P  threads in parallel')

    main_parser.add_argument(
        '-f',
        '--file-pattern',
        metavar='pattern',
        type=str,
        help='process only files matching pattern')

    main_parser.add_argument(
        '-a',
        '--archive-dir',
        metavar='archive',
        type=str,
        default=const_basedir(),
        help='archive directory')
    main_parser.add_argument(
        '-C', '--colorize', action='store_true', help='use colors')

    main_parser.add_argument(
        '-n', '--taskid', metavar='n', type=int, default=1, help='task id')
    main_parser.add_argument(
        '-N',
        '--max-taskid',
        metavar='N',
        type=int,
        default=1,
        help='max task id')

    main_parser.add_argument(
        'FILE_OR_EXTID', nargs='+', help="extid/js/css/crx/tar file")

    comment_group = main_parser.add_argument_group('comment blocks')
    comment_group.add_argument(
        '-g',
        '--group-single-line-comments',
        help='Group consecutive singe-line comments into blocks')
    comment_group.add_argument(
        '-c',
        '--reg-exp-comments',
        metavar='REGEXP',
        type=str,
        nargs='+',
        help='search comments for regular expression')

    source_group = main_parser.add_argument_group('source blocks')
    source_group.add_argument(
        '-b',
        '--beautify',
        action='store_true',
        default=False,
        help='beautify source code')
    source_group.add_argument(
        '-s',
        '--reg-exp-source',
        metavar='REGEXP',
        type=str,
        nargs='+',
        help='search source for regular expression')

    strings_group = main_parser.add_argument_group('string literals')
    strings_group.add_argument(
        '-j',
        '--join-string-literals',
        action='store_true',
        help='join string literals (heuristic)')
    strings_group.add_argument(
        '-l',
        '--reg-exp-string-literals',
        metavar='REGEXP',
        type=str,
        nargs='+',
        help='search string literals for regular expression')
    main_conf = main_parser.parse_args()

    main(main_conf)
Use python3.5 for all files. 2017-09-01 13:12:05 +00:00			`#!/usr/bin/env python3.5`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`#`
			`# Copyright (C) 2017 The University of Sheffield, UK`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <https://www.gnu.org/licenses/>.`
			`#`
			`"""Tool for extracting crx file from a tar archive."""`

Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`import datetime`
Inital setup (not yet functional) using argparse. 2017-09-27 22:24:45 +00:00			`import argparse`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`import io`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`import fnmatch`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`import os`
Added initial logging setup. 2017-09-22 07:01:09 +00:00			`import logging`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`import re`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`import sys`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`import operator`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`import tarfile`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`import zlib`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`from functools import partial, reduce`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`from multiprocessing import Pool`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`from zipfile import ZipFile`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`import dateutil`
			`import dateutil.parser`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`import jsbeautifier`

Added initial logging setup. 2017-09-22 07:01:09 +00:00			`from ExtensionCrawler.config import (const_log_format, const_basedir)`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00			`from ExtensionCrawler.archive import get_existing_ids, last_crx`
Reformatting 2017-09-23 21:41:04 +00:00			`from ExtensionCrawler.config import (archive_file, get_local_archive_dir)`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`from ExtensionCrawler.js_decomposer import init_file_info`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`from ExtensionCrawler.js_mincer import mince_js`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00
			`# Script should run with python 3.4 or 3.5`
			`assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)`

Added compute_tasks. 2017-10-04 21:06:17 +00:00
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`def jsstrings_data(conf, path, data):`
			`"""Analyze data in memory."""`
			`if not conf.file_pattern is None:`
			`if path is None:`
			`return False`
			`elif not fnmatch.fnmatch(path, conf.file_pattern):`
			`logging.debug("Filename \'" + path + "\' does not match pattern \'" + conf.file_pattern + "\'")`
			`return False`

Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = False`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`logging.debug("Start analyzing " + path)`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`file_info = init_file_info(path, data)`
Ignore empty files. 2017-09-21 23:15:46 +00:00			`if file_info['size'] == 0:`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`return match`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00
			`if not file_info['dec_encoding'] is None:`
			`try:`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`dec = zlib.decompressobj(zlib.MAX_WBITS \| 16)`
			`dec_data = dec.decompress(data, 100 * file_info['size'])`
Added initial logging setup. 2017-09-22 07:01:09 +00:00			`if file_info['dec_encoding'] is None:`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`logging.debug("Encoding is None for " + path +`
Reformatting 2017-09-23 21:41:04 +00:00			`" using utf-8.")`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`str_data = dec_data.decode('UTF-8')`
Added initial logging setup. 2017-09-22 07:01:09 +00:00			`else:`
			`str_data = dec_data.decode(file_info['dec_encoding'])`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`del dec_data`
			`except Exception:`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`return match`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`else:`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`if file_info['encoding'] is None:`
Reformatting 2017-09-23 21:41:04 +00:00			`logging.warning("Encoding is None for " + path + " using utf-8.")`
Various minor improvements. 2017-09-22 07:44:10 +00:00			`str_data = data.decode('UTF-8')`
			`else:`
			`str_data = data.decode(file_info['encoding'])`
Reformatting 2017-09-23 21:41:04 +00:00
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`if conf.beautify:`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00			`str_data = jsbeautifier.beautify(str_data)`

Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`with io.StringIO(str_data) as str_obj:`
Reformating. 2017-09-21 20:12:00 +00:00			`for block in mince_js(`
Added compute_tasks. 2017-10-04 21:06:17 +00:00			`str_obj,`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`single_line_comments_block=conf.group_single_line_comments):`
			`if analyze_block(conf, block):`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = True`
Refactoring and new feature: -b option for beautifying JavaScript code. 2017-09-21 20:11:16 +00:00
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`return match`

Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`def print_block(conf, block, string_match = False, code_match = False):`
			`print(block)`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`def analyze_block(conf, block):`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`"""Print code/comment blocks."""`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = False`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`regexps = []`
			`if not conf.reg_exp is None:`
			`for regexp in conf.reg_exp:`
			`regexps.append(re.compile(regexp))`
			`if block.is_comment():`
			`if not conf.reg_exp_comments is None:`
			`for regexp in conf.reg_exp_comments:`
			`regexps.append(re.compile(regexp))`
			`for regexp in regexps:`
			`if regexp.search(block.content):`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = True`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`if match:`
			`print_block(conf, block)`
			`elif block.is_code():`
			`regexps_string = regexps.copy()`
			`regexps_code = regexps.copy()`
			`if not conf.reg_exp_string_literals is None:`
			`for regexp in conf.reg_exp_string_literals:`
			`regexps_string.append(re.compile(regexp))`
			`if not conf.reg_exp_source is None:`
			`for regexp in conf.reg_exp_source:`
			`regexps_code.append(re.compile(regexp))`
			`string_match = False`
			`for regexp in regexps_string:`
Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`for string in block.string_literals:`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`if regexp.search(string):`
			`string_match = True`
			`code_match = False`
			`for regexp in regexps_code:`
			`if regexp.search(block.content):`
			`code_match = True`
			`match = string_match or code_match`
			`if match:`
			`print_block(conf, block, string_match, code_match)`
			`return match`
Added compute_tasks. 2017-10-04 21:06:17 +00:00
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`def analyze_crx(conf, crx):`
Restored crx file support. 2017-10-06 15:02:33 +00:00			`"""Analyze crx file."""`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = False`
Restored crx file support. 2017-10-06 15:02:33 +00:00			`with ZipFile(crx) as crxobj:`
			`js_files = list(`
			`filter(`
			`lambda x: x.filename.endswith(".js") or x.filename.endswith(".js.gz") or x.filename.endswith(".jgz") or x.filename.endswith(".jsg") or x.filename.endswith(".css.gz"),`
			`crxobj.infolist()))`
			`for jsfile in js_files:`
			`with crxobj.open(jsfile) as js_file_obj:`
			`data = js_file_obj.read()`
			`path = js_file_obj.name`
			`if jsstrings_data(conf, path, data):`
			`match = True`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00
Restored crx file support. 2017-10-06 15:02:33 +00:00			`return match`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`def analyze_tar(conf, tarfile):`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`last_crx_file = ''`
Report extid containing files that have content matching specified regesp. 2017-09-21 23:39:54 +00:00			`match = False`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`extid = os.path.splitext(os.path.basename(filename))[0]`
			`if date is not None:`
			`dateobj = dateutil.parser.parse(date)`
			`if dateobj.tzinfo is None or dateobj.tzinfo.utcoffset(dateobj) is None:`
			`dateobj = dateobj.replace(tzinfo=datetime.timezone.utc)`
			`last_crx_file = last_crx(`
Further refinement of command line interface. 2017-10-03 20:08:09 +00:00			`os.path.join(args.archive_dir, "data"), extid, dateobj)`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00			`else:`
Further refinement of command line interface. 2017-10-03 20:08:09 +00:00			`last_crx_file = last_crx(os.path.join(args.archive_dir, "data"), extid)`
Skip extensiosn with no crx file. 2017-09-21 23:55:49 +00:00			`if last_crx_file == "" or last_crx_file is None:`
Reformatting 2017-09-23 21:41:04 +00:00			`print("No crx in " + extid)`
Skip extensiosn with no crx file. 2017-09-21 23:55:49 +00:00			`else:`
			`print("# Start analyzing " + extid)`
			`with tarfile.open(filename, 'r') as archive:`
			`with archive.extractfile(last_crx_file) as crx:`
Further refinement of command line interface. 2017-10-03 20:08:09 +00:00			`match = analyze_crx(args, crx, path)`
Skip extensiosn with no crx file. 2017-09-21 23:55:49 +00:00			`if match:`
			`print("RegExp found in " + extid)`
			`else:`
			`print("RegExp not found in " + extid)`
Basic support for tar files and extensions ids. 2017-09-21 21:23:50 +00:00
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`def analyze_file(conf, filename):`
			`with open(filename, 'rb') as fileobj:`
			`data = fileobj.read()`
			`return jsstrings_data(conf, filename, data)`
Implemented analysis of groups/chunks of extensions from archive. 2017-09-21 22:35:19 +00:00

Added support for -n and -N. 2017-10-05 21:33:08 +00:00			`def compute_tasks(file_or_extids, taskid=1, maxtaskid=1):`
Added compute_tasks. 2017-10-04 21:06:17 +00:00			`"""Function for computing list of tasks."""`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`extid_re = re.compile('^[a-p]+$')`
Added compute_tasks. 2017-10-04 21:06:17 +00:00			`tasks = []`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`for file_or_extid in file_or_extids:`
Added compute_tasks. 2017-10-04 21:06:17 +00:00			`if file_or_extid.endswith('.crx'):`
			`tasks.append(file_or_extid)`
			`elif file_or_extid.endswith('.tar'):`
			`tasks.append(file_or_extid)`
			`elif file_or_extid.endswith('.css'):`
			`tasks.append(file_or_extid)`
			`elif file_or_extid.endswith('.js'):`
			`tasks.append(file_or_extid)`
			`elif file_or_extid.endswith('.c'):`
			`tasks.append(file_or_extid)`
			`elif file_or_extid.endswith('.java'):`
			`tasks.append(file_or_extid)`
			`elif extid_re.match(file_or_extid):`
			`tasks.append(file_or_extid)`
			`else:`
			`# default: a file with extension ide`
			`with open(file_or_extid) as fileobj:`
			`for line in fileobj:`
			`line = line.strip()`
			`if extid_re.match(line):`
			`tasks.append(line)`
Added support for -n and -N. 2017-10-05 21:33:08 +00:00
			`chunksize = int(len(tasks) / maxtaskid)`
			`if taskid == maxtaskid:`
			`tasks = tasks[(taskid - 1) * chunksize:]`
			`else:`
			`tasks = tasks[(taskid - 1) * chunksize:taskid * chunksize]`

Added compute_tasks. 2017-10-04 21:06:17 +00:00			`return tasks`


Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`def analyze_task(conf, task):`
			`"""Analyze one file/tar/crx/extid."""`
			`logging.debug("Analyzing " + task)`
			`extid_re = re.compile('^[a-p]+$')`
			`retval = False`
			`if task.endswith('.crx'):`
			`retval = analyze_crx(conf, task)`
			`elif task.endswith('.tar'):`
			`retval = analyze_tar(conf, task)`
			`elif extid_re.match(task):`
			`retval = analyze_tar(conf, task + '.tar')`
Added verbose mode. 2017-10-02 19:23:45 +00:00			`else:`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`retval = analyze_file(conf, task)`
			`return retval`


			`def main(conf):`
			`"""Main function: JavaScript strings on steroids."""`
Added verbose mode. 2017-10-02 19:23:45 +00:00			`logger = logging.getLogger()`
			`ch = logging.StreamHandler(sys.stdout)`
			`ch.setFormatter(logging.Formatter(const_log_format()))`
			`logger.addHandler(ch)`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`if conf.verbose:`
			`logger.setLevel(logging.DEBUG)`
			`else:`
			`logger.setLevel(logging.WARNING)`
Added compute_tasks. 2017-10-04 21:06:17 +00:00
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`print(vars(conf))`
			`tasks = compute_tasks(conf.FILE_OR_EXTID, conf.taskid, conf.max_taskid)`
			`with Pool(conf.parallel) as p:`
			`retvals = p.map(partial(analyze_task, conf), tasks)`
			`return reduce(operator.or_, retvals, False)`
Reformating. 2017-09-21 20:12:00 +00:00

Initial commit: strings for JavaScript on stereoids. 2017-08-30 16:15:51 +00:00			`if __name__ == "__main__":`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`main_parser = argparse.ArgumentParser(`
			`description=`
			`'A combination of strings and grep for JavaScript and CSS files.')`
			`main_parser.add_argument(`
			`'-r',`
			`'--reg-exp',`
			`metavar='REGEXP',`
			`type=str,`
			`nargs='+',`
			`help='search for regular expression')`
Added verbose mode. 2017-10-02 19:23:45 +00:00			`main_parser.add_argument(`
			`'-v',`
			`'--verbose',`
Further refinement of command line interface. 2017-10-03 20:08:09 +00:00			`action='store_true',`
Added verbose mode. 2017-10-02 19:23:45 +00:00			`default=False,`
			`help='increase verbosity')`
Added compute_tasks. 2017-10-04 21:06:17 +00:00
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`main_parser.add_argument(`
			`'-d',`
			`'--output-decoration',`
			`metavar='L',`
			`choices=[0, 1, 2, 3],`
			`type=int,`
			`help='show only matching files, crx, tar')`
Further refinement of command line interface. 2017-10-03 20:08:09 +00:00			`main_parser.add_argument(`
			`'-p',`
			`'--parallel',`
			`metavar='P',`
			`type=int,`
Added compute_tasks. 2017-10-04 21:06:17 +00:00			`help='run P threads in parallel')`
Further refinement of command line interface. 2017-10-03 20:08:09 +00:00
			`main_parser.add_argument(`
			`'-f',`
			`'--file-pattern',`
			`metavar='pattern',`
			`type=str,`
			`help='process only files matching pattern')`
Added compute_tasks. 2017-10-04 21:06:17 +00:00
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`main_parser.add_argument(`
			`'-a',`
			`'--archive-dir',`
			`metavar='archive',`
			`type=str,`
Added efault for archive directory. 2017-10-02 19:28:57 +00:00			`default=const_basedir(),`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`help='archive directory')`
			`main_parser.add_argument(`
			`'-C', '--colorize', action='store_true', help='use colors')`
Updated proposed command line arguments. 2017-09-30 06:26:38 +00:00
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`main_parser.add_argument(`
Added support for -n and -N. 2017-10-05 21:33:08 +00:00			`'-n', '--taskid', metavar='n', type=int, default=1, help='task id')`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`main_parser.add_argument(`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`'-N',`
			`'--max-taskid',`
			`metavar='N',`
			`type=int,`
			`default=1,`
			`help='max task id')`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00
Reformatting. 2017-10-01 07:39:44 +00:00			`main_parser.add_argument(`
			`'FILE_OR_EXTID', nargs='+', help="extid/js/css/crx/tar file")`
Inital setup (not yet functional) using argparse. 2017-09-27 22:24:45 +00:00
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`comment_group = main_parser.add_argument_group('comment blocks')`
Reformatting. 2017-10-01 07:39:44 +00:00			`comment_group.add_argument(`
			`'-g',`
			`'--group-single-line-comments',`
			`help='Group consecutive singe-line comments into blocks')`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`comment_group.add_argument(`
			`'-c',`
			`'--reg-exp-comments',`
			`metavar='REGEXP',`
			`type=str,`
			`nargs='+',`
			`help='search comments for regular expression')`

			`source_group = main_parser.add_argument_group('source blocks')`
			`source_group.add_argument(`
Added compute_tasks. 2017-10-04 21:06:17 +00:00			`'-b',`
			`'--beautify',`
			`action='store_true',`
			`default=False,`
			`help='beautify source code')`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`source_group.add_argument(`
			`'-s',`
			`'--reg-exp-source',`
			`metavar='REGEXP',`
			`type=str,`
			`nargs='+',`
			`help='search source for regular expression')`

			`strings_group = main_parser.add_argument_group('string literals')`
Fixed strings_groups. 2017-09-29 04:46:25 +00:00			`strings_group.add_argument(`
Reformatting. 2017-10-01 07:39:44 +00:00			`'-j',`
			`'--join-string-literals',`
			`action='store_true',`
			`help='join string literals (heuristic)')`
Fixed strings_groups. 2017-09-29 04:46:25 +00:00			`strings_group.add_argument(`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00			`'-l',`
			`'--reg-exp-string-literals',`
			`metavar='REGEXP',`
			`type=str,`
			`nargs='+',`
			`help='search string literals for regular expression')`
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`main_conf = main_parser.parse_args()`
First proposal for user interface using argparse. 2017-09-28 05:13:03 +00:00
Restored basic functionalty for single files. 2017-10-06 14:47:59 +00:00			`main(main_conf)`