Updated grepper.
This commit is contained in:
parent
2cfeb9b88c
commit
f37e19f46a
|
@ -0,0 +1 @@
|
||||||
|
extfind
|
61
grepper
61
grepper
|
@ -28,6 +28,8 @@ from zipfile import ZipFile
|
||||||
from functools import partial
|
from functools import partial
|
||||||
import re
|
import re
|
||||||
from ExtensionCrawler.config import const_basedir
|
from ExtensionCrawler.config import const_basedir
|
||||||
|
from extfind import iter_extension_paths, iter_extension_paths_from_file
|
||||||
|
|
||||||
|
|
||||||
def help():
|
def help():
|
||||||
print("grepper [OPTION] GREP [FILE]")
|
print("grepper [OPTION] GREP [FILE]")
|
||||||
|
@ -36,7 +38,7 @@ def help():
|
||||||
print(" -h print this help text")
|
print(" -h print this help text")
|
||||||
print(" -b beautify JavaScript before matching")
|
print(" -b beautify JavaScript before matching")
|
||||||
print(" -a <DIR> archive directory")
|
print(" -a <DIR> archive directory")
|
||||||
print(" -p <PREFIX> three-letter-prefix")
|
print(" -g <GLOB> glob on the extenion id, don't use with -e")
|
||||||
print(" -e <EXTIDFILELIST> file with extension ids")
|
print(" -e <EXTIDFILELIST> file with extension ids")
|
||||||
print(" -t <THREADS> number of threads to use")
|
print(" -t <THREADS> number of threads to use")
|
||||||
print(" -n <TASKID> process chunk n where n in [1,N]")
|
print(" -n <TASKID> process chunk n where n in [1,N]")
|
||||||
|
@ -54,6 +56,12 @@ def guarded_stderr(string):
|
||||||
sys.stderr.write(string)
|
sys.stderr.write(string)
|
||||||
lock.release()
|
lock.release()
|
||||||
|
|
||||||
|
def has_at_least_one_match(content, pattern):
|
||||||
|
for line in content.splitlines():
|
||||||
|
if re.search(pattern, line):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def process_crx(ext_id, date, crx, pattern, beautify):
|
def process_crx(ext_id, date, crx, pattern, beautify):
|
||||||
try:
|
try:
|
||||||
|
@ -64,7 +72,8 @@ def process_crx(ext_id, date, crx, pattern, beautify):
|
||||||
with z.open(zip_file_info) as f:
|
with z.open(zip_file_info) as f:
|
||||||
content = f.read().decode(errors="surrogateescape")
|
content = f.read().decode(errors="surrogateescape")
|
||||||
if beautify:
|
if beautify:
|
||||||
content = jsbeautifier.beautify(content)
|
if has_at_least_one_match(content, pattern):
|
||||||
|
content = jsbeautifier.beautify(content)
|
||||||
for i, line in enumerate(content.splitlines()):
|
for i, line in enumerate(content.splitlines()):
|
||||||
if not re.search(pattern, line):
|
if not re.search(pattern, line):
|
||||||
continue
|
continue
|
||||||
|
@ -93,24 +102,6 @@ def process_id(pattern, beautify, path):
|
||||||
path, traceback.format_exc()))
|
path, traceback.format_exc()))
|
||||||
|
|
||||||
|
|
||||||
def find(archive, pattern):
|
|
||||||
for root, _, files in os.walk(os.path.join(archive, "data")):
|
|
||||||
for file in files:
|
|
||||||
if fnmatch.fnmatch(file, pattern + ".tar"):
|
|
||||||
yield os.path.join(root, file)
|
|
||||||
|
|
||||||
|
|
||||||
def find_from_file(archive, extidlistfile):
|
|
||||||
with open(extidlistfile, 'r') as f:
|
|
||||||
extids = [l.strip() for l in f.readlines()]
|
|
||||||
|
|
||||||
for root, _, files in os.walk(os.path.join(archive, "data")):
|
|
||||||
for file in files:
|
|
||||||
for extid in extids:
|
|
||||||
if fnmatch.fnmatch(file, extid + ".tar"):
|
|
||||||
yield os.path.join(root, file)
|
|
||||||
|
|
||||||
|
|
||||||
def init(l):
|
def init(l):
|
||||||
global lock
|
global lock
|
||||||
lock = l
|
lock = l
|
||||||
|
@ -120,13 +111,16 @@ def parse_args(argv):
|
||||||
archive = const_basedir()
|
archive = const_basedir()
|
||||||
beautify = False
|
beautify = False
|
||||||
parallel = 8
|
parallel = 8
|
||||||
|
extidglob = None
|
||||||
|
extidlistfile = None
|
||||||
taskid = 1
|
taskid = 1
|
||||||
maxtaskid = 1
|
maxtaskid = 1
|
||||||
|
|
||||||
paths = []
|
paths = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
opts, args = getopt.getopt(argv, "ha:p:e:bt:n:N:", [
|
opts, args = getopt.getopt(argv, "ha:p:e:bt:n:N:", [
|
||||||
"archive=", "prefix=", "extidlistfile=", "beautify", "threads=",
|
"archive=", "glob=", "extidlistfile=", "beautify", "threads=",
|
||||||
"taskid=", "maxtaskid="
|
"taskid=", "maxtaskid="
|
||||||
])
|
])
|
||||||
except getopt.GetoptError:
|
except getopt.GetoptError:
|
||||||
|
@ -138,12 +132,10 @@ def parse_args(argv):
|
||||||
sys.exit()
|
sys.exit()
|
||||||
elif opt in ("-a", "--archive"):
|
elif opt in ("-a", "--archive"):
|
||||||
archive = arg
|
archive = arg
|
||||||
elif opt in ("-p", "--prefix"):
|
elif opt in ("-g", "--glob"):
|
||||||
prefix = arg
|
extidglob = arg
|
||||||
paths += find(archive, prefix + "*")
|
|
||||||
elif opt in ("-e", "--extidlistfile"):
|
elif opt in ("-e", "--extidlistfile"):
|
||||||
extidlistfile = arg
|
extidlistfile = arg
|
||||||
paths += find_from_file(archive, extidlistfile)
|
|
||||||
elif opt in ("-b", "--beautify"):
|
elif opt in ("-b", "--beautify"):
|
||||||
beautify = True
|
beautify = True
|
||||||
elif opt in ("-t", "--threads"):
|
elif opt in ("-t", "--threads"):
|
||||||
|
@ -158,15 +150,18 @@ def parse_args(argv):
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
pattern = args[0]
|
pattern = args[0]
|
||||||
paths += args[1:]
|
if len(args) > 1:
|
||||||
if paths == []:
|
paths = args[1:]
|
||||||
paths = list(find(archive, "*"))
|
elif extidglob is None and extidlistfile is None:
|
||||||
|
paths = iter_extension_paths(archive, taskid, maxtaskid)
|
||||||
chunksize = int(len(paths) / maxtaskid)
|
elif extidglob is None and extidlistfile is not None:
|
||||||
if taskid == maxtaskid:
|
paths = iter_extension_paths_from_file(archive, taskid, maxtaskid,
|
||||||
paths = paths[(taskid - 1) * chunksize:]
|
extidlistfile)
|
||||||
|
elif extidglob is not None and extidlistfile is None:
|
||||||
|
paths = iter_extension_paths(archive, taskid, maxtaskid, extidglob)
|
||||||
else:
|
else:
|
||||||
paths = paths[(taskid - 1) * chunksize:taskid * chunksize]
|
help()
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
return pattern, paths, beautify, parallel
|
return pattern, paths, beautify, parallel
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue