Added python wrapper for grepper sge stuff.

This commit is contained in:
Michael Herzberg 2017-07-31 20:18:34 +01:00
parent e1aee1446a
commit 82a41aef18
2 changed files with 161 additions and 0 deletions

89
sge/grepper Executable file
View File

@ -0,0 +1,89 @@
#!/usr/bin/env python3
import sge_common
import sys
import getopt
import os
def get_sge_content(jobname, stdoutpath, execpath, archivepath, outpath,
greps):
return \
"""#!/bin/bash
#
#$ -t 1-256
#$ -j yes
#$ -o "{stdoutpath}"
#$ -N "{jobname}"
#
module -s load apps/python/conda 2> /dev/null
source activate mypython35
function task_id_to_letter_256 {{
ABC=abcdefghijklmnopqrstuvwxyz
let "I1 = (($1-1) / 16) % 16"
let "I2 = ($1-1) % 16"
echo ${{ABC:$I1:1}}${{ABC:$I2:1}}
}}
"{execpath}" -a "{archivepath}" -p "$(task_id_to_letter_256 $SGE_TASK_ID)" "{outpath}" {greps}
""".format(
jobname=jobname,
stdoutpath=stdoutpath,
execpath=execpath,
archivepath=archivepath,
outpath=outpath,
greps=" ".join(greps))
def helpmsg():
print(__file__ + " ARCHIVE OUTPUT GREP1 [GREP2 ...]")
print(" -h print this help text")
def main(argv):
try:
opts, args = getopt.getopt(argv, "h")
except getopt.GetoptError:
helpmsg()
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
helpmsg()
sys.exit()
if len(args) < 3:
helpmsg()
sys.exit(2)
basedir = os.path.expanduser(args[0])
outdir = os.path.expanduser(args[1])
greps = args[2:]
stdoutpath = sge_common.get_stdout_path("grepper")
jobname = os.path.basename(stdoutpath)
execpath = os.path.join(sge_common.get_project_root(), "grepper")
sge_common.ensure_sharc()
sge_common.validate_archivepath(basedir)
sge_common.validate_execpath(execpath)
sge_common.validate_outdir(outdir)
os.makedirs(stdoutpath)
os.makedirs(outdir)
print("Using data from {}".format(basedir))
print("Writing logs to {}".format(stdoutpath))
print("Writing results to {}".format(outdir))
sge_content = get_sge_content(jobname, stdoutpath, execpath, basedir,
outdir, greps)
print("Executing the following job:")
print("=" * 80)
print(sge_content)
print("=" * 80)
sge_common.execute_sge(sge_content)
if __name__ == "__main__":
main(sys.argv[1:])

72
sge/sge_common.py Normal file
View File

@ -0,0 +1,72 @@
import sys
import os
import socket
import re
import tempfile
import subprocess
from datetime import datetime
def get_project_root():
return os.path.normpath(os.path.join(sys.path[0], os.path.pardir))
def validate_archivepath(basedir):
datadir = os.path.join(basedir, "data")
if not os.path.exists(datadir):
print("ERROR: '{}' does not exist!".format(datadir), file=sys.stderr)
sys.exit(1)
dirs = os.listdir(datadir)
if dirs is []:
print(
"ERROR: '{}' is empty, refusing to start".format(datadir),
file=sys.stderr)
sys.exit(1)
for dir in dirs:
if len(dir) is not 3:
print(
"ERROR: '{}' must only contain three-letter-dirs, found '{}'!".
format(datadir, dir),
file=sys.stderr)
sys.exit(1)
def validate_execpath(execpath):
if not os.path.exists(execpath):
print("ERROR: {} does not exist!".format(execpath), file=sys.stderr)
sys.exit(1)
def ensure_sharc():
if not re.match("sharc-.*.shef.ac.uk", socket.gethostname()):
print("ERROR: only running on sharc!", file=sys.stderr)
sys.exit(1)
def validate_outdir(outdir):
if os.path.exists(outdir):
print("ERROR: '{}' already exists!".format(outdir), file=sys.stderr)
sys.exit(1)
def execute_sge(sgecontent):
with tempfile.NamedTemporaryFile() as f:
f.write(sgecontent.encode())
f.flush()
try:
output = subprocess.check_output(
["qsub", f.name], stderr=subprocess.STDOUT)
print(output.decode())
except subprocess.CalledProcessError as e:
print("ERROR when submitting job:", file=sys.stderr)
print(e.output.decode())
sys.exit(1)
def get_stdout_path(name):
return os.path.expanduser(
os.path.join("~", "sgelog", name + "_" +
datetime.now().strftime("%Y-%m-%d_%H-%M-%S")))