Also use singularity for grepper scripts.

This commit is contained in:
Michael Herzberg 2017-09-20 15:19:30 +01:00
parent f43d0734fc
commit 8955661e43
2 changed files with 35 additions and 14 deletions

View File

@ -1,12 +1,11 @@
#!/bin/bash
module -s load apps/python/conda 2> /dev/null
source activate mypython35
set -o nounset
SING_EXEC="singularity exec --pwd /opt/ExtensionCrawler -B $TMPDIR:/tmp $SING_IMG"
printenv
echo "The following parameter were passed: $*"
echo "Printing the content of $ARCHIVE to force mounting:"
ls "$ARCHIVE"
"$BASEDIR/ExtensionCrawler/grepper" -a "$ARCHIVE" -n $SGE_TASK_ID -N $MAX_SGE_TASK_ID $* | bzip2 > "$BASEDIR/out/$SGE_TASK_ID.bz2"
$SING_EXEC ./grepper -t 1 -a "$ARCHIVE" -n $SGE_TASK_ID -N $MAX_SGE_TASK_ID $* | bzip2 > "$BASEDIR/out/$SGE_TASK_ID.bz2"

View File

@ -2,33 +2,55 @@
set -o nounset
set -o errexit
BASEDIR=$( cd $(dirname "$0"); cd ..; pwd -P )
NRJOBS=${NRJOBS:-256}
echo "Using $NRJOBS jobs"
JOBRANGE=${JOBRANGE:-1-$NRJOBS}
echo "Executing jobs $JOBRANGE"
ARCHIVE=${ARCHIVE:-$(ssh sharc.shef.ac.uk find /shared/brucker_research1/Shared/BrowserExtensions/archive/.snapshot -maxdepth 1 -name \"D*\" | sort -r | head -n1)}
echo "Using archive: $ARCHIVE"
TARGETDIR="${TARGETDIR:-/data/\$USER}/grepper-$(date +%Y%m%d-%H%M%S)"
echo "Using target dir: $TARGETDIR"
BASEDIR=$( cd $(dirname "$0"); cd ..; pwd -P )
SING_IMG_SRC="${SING_IMG_SRC:-/shared/brucker_research1/Shared/BrowserExtensions/excrawl.img}"
SING_IMG="$TARGETDIR/excrawl.img"
if ! ssh sharc.shef.ac.uk [ -f "$SING_IMG_SRC" ]; then
echo -n "$SING_IMG_SRC does not exist! Generate new image and push? (yes/abort): "
read confirm
if [ "$confirm" != yes ]; then
exit 0
fi
echo "Creating new image ..."
(cd "$BASEDIR/singularity"; ./build.sh)
echo "Pushing new image ..."
scp "$BASEDIR/singularity/excrawl.img" sharc.shef.ac.uk:"$SING_IMG_SRC"
rm "$BASEDIR/singularity/excrawl.img"
fi
echo "Creating dirs ..."
ssh sharc.shef.ac.uk mkdir -p $TARGETDIR/ExtensionCrawler
ssh sharc.shef.ac.uk mkdir -p $TARGETDIR/logs
ssh sharc.shef.ac.uk mkdir -p $TARGETDIR/out
ssh sharc.shef.ac.uk mkdir -p $TARGETDIR/{logs,out}
echo "Pushing $BASEDIR to sharc.shef.ac.uk:$TARGETDIR/ExtensionCrawler ..."
rsync -zr --exclude "$BASEDIR/archive" "$BASEDIR/" sharc.shef.ac.uk:"$TARGETDIR/ExtensionCrawler"
echo "Copying $SING_IMG_SRC to $SING_IMG"
ssh sharc.shef.ac.uk cp "$SING_IMG_SRC" "$SING_IMG"
echo "Pushing sge script ..."
scp "$BASEDIR/sge/grepper.sge" sharc.shef.ac.uk:"$TARGETDIR/grepper.sge"
echo "Starting job ..."
ssh sharc.shef.ac.uk \
SING_IMG=\"$SING_IMG\" \
ARCHIVE=\"$ARCHIVE\" \
BASEDIR=\"$TARGETDIR\" \
MAX_SGE_TASK_ID=\"$NRJOBS\" \
qsub \
-V \
-t 1-$NRJOBS \
-m a \
-M "msherzberg1@sheffield.ac.uk" \
-t $JOBRANGE \
-j yes \
-o "$TARGETDIR/logs" \
"$TARGETDIR/ExtensionCrawler/sge/grepper.sge" \
"$TARGETDIR/grepper.sge" \
$*