forked from BrowserSecurity/ExtensionCrawler
Added python wrapper for grepper sge stuff.
This commit is contained in:
parent
e1aee1446a
commit
82a41aef18
|
@ -0,0 +1,89 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sge_common
|
||||
import sys
|
||||
import getopt
|
||||
import os
|
||||
|
||||
|
||||
def get_sge_content(jobname, stdoutpath, execpath, archivepath, outpath,
|
||||
greps):
|
||||
return \
|
||||
"""#!/bin/bash
|
||||
#
|
||||
#$ -t 1-256
|
||||
#$ -j yes
|
||||
#$ -o "{stdoutpath}"
|
||||
#$ -N "{jobname}"
|
||||
#
|
||||
module -s load apps/python/conda 2> /dev/null
|
||||
source activate mypython35
|
||||
|
||||
function task_id_to_letter_256 {{
|
||||
ABC=abcdefghijklmnopqrstuvwxyz
|
||||
let "I1 = (($1-1) / 16) % 16"
|
||||
let "I2 = ($1-1) % 16"
|
||||
echo ${{ABC:$I1:1}}${{ABC:$I2:1}}
|
||||
}}
|
||||
|
||||
"{execpath}" -a "{archivepath}" -p "$(task_id_to_letter_256 $SGE_TASK_ID)" "{outpath}" {greps}
|
||||
""".format(
|
||||
jobname=jobname,
|
||||
stdoutpath=stdoutpath,
|
||||
execpath=execpath,
|
||||
archivepath=archivepath,
|
||||
outpath=outpath,
|
||||
greps=" ".join(greps))
|
||||
|
||||
|
||||
def helpmsg():
|
||||
print(__file__ + " ARCHIVE OUTPUT GREP1 [GREP2 ...]")
|
||||
print(" -h print this help text")
|
||||
|
||||
|
||||
def main(argv):
|
||||
try:
|
||||
opts, args = getopt.getopt(argv, "h")
|
||||
except getopt.GetoptError:
|
||||
helpmsg()
|
||||
sys.exit(2)
|
||||
for opt, arg in opts:
|
||||
if opt == '-h':
|
||||
helpmsg()
|
||||
sys.exit()
|
||||
|
||||
if len(args) < 3:
|
||||
helpmsg()
|
||||
sys.exit(2)
|
||||
|
||||
basedir = os.path.expanduser(args[0])
|
||||
outdir = os.path.expanduser(args[1])
|
||||
greps = args[2:]
|
||||
|
||||
stdoutpath = sge_common.get_stdout_path("grepper")
|
||||
jobname = os.path.basename(stdoutpath)
|
||||
execpath = os.path.join(sge_common.get_project_root(), "grepper")
|
||||
|
||||
sge_common.ensure_sharc()
|
||||
|
||||
sge_common.validate_archivepath(basedir)
|
||||
sge_common.validate_execpath(execpath)
|
||||
sge_common.validate_outdir(outdir)
|
||||
|
||||
os.makedirs(stdoutpath)
|
||||
os.makedirs(outdir)
|
||||
|
||||
print("Using data from {}".format(basedir))
|
||||
print("Writing logs to {}".format(stdoutpath))
|
||||
print("Writing results to {}".format(outdir))
|
||||
sge_content = get_sge_content(jobname, stdoutpath, execpath, basedir,
|
||||
outdir, greps)
|
||||
print("Executing the following job:")
|
||||
print("=" * 80)
|
||||
print(sge_content)
|
||||
print("=" * 80)
|
||||
sge_common.execute_sge(sge_content)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
|
@ -0,0 +1,72 @@
|
|||
import sys
|
||||
import os
|
||||
import socket
|
||||
import re
|
||||
import tempfile
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def get_project_root():
|
||||
return os.path.normpath(os.path.join(sys.path[0], os.path.pardir))
|
||||
|
||||
|
||||
def validate_archivepath(basedir):
|
||||
datadir = os.path.join(basedir, "data")
|
||||
if not os.path.exists(datadir):
|
||||
print("ERROR: '{}' does not exist!".format(datadir), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
dirs = os.listdir(datadir)
|
||||
|
||||
if dirs is []:
|
||||
print(
|
||||
"ERROR: '{}' is empty, refusing to start".format(datadir),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
for dir in dirs:
|
||||
if len(dir) is not 3:
|
||||
print(
|
||||
"ERROR: '{}' must only contain three-letter-dirs, found '{}'!".
|
||||
format(datadir, dir),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def validate_execpath(execpath):
|
||||
if not os.path.exists(execpath):
|
||||
print("ERROR: {} does not exist!".format(execpath), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def ensure_sharc():
|
||||
if not re.match("sharc-.*.shef.ac.uk", socket.gethostname()):
|
||||
print("ERROR: only running on sharc!", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def validate_outdir(outdir):
|
||||
if os.path.exists(outdir):
|
||||
print("ERROR: '{}' already exists!".format(outdir), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def execute_sge(sgecontent):
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
f.write(sgecontent.encode())
|
||||
f.flush()
|
||||
try:
|
||||
output = subprocess.check_output(
|
||||
["qsub", f.name], stderr=subprocess.STDOUT)
|
||||
print(output.decode())
|
||||
except subprocess.CalledProcessError as e:
|
||||
print("ERROR when submitting job:", file=sys.stderr)
|
||||
print(e.output.decode())
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_stdout_path(name):
|
||||
return os.path.expanduser(
|
||||
os.path.join("~", "sgelog", name + "_" +
|
||||
datetime.now().strftime("%Y-%m-%d_%H-%M-%S")))
|
Loading…
Reference in New Issue