diff --git a/sge/grepper.sge b/sge/grepper.sge index 9344b08..5bc79b7 100755 --- a/sge/grepper.sge +++ b/sge/grepper.sge @@ -7,4 +7,6 @@ set -o nounset printenv -"$BASEDIR/ExtensionCrawler/grepper" -a "$ARCHIVE" -n $SGE_TASK_ID -N $MAX_SGE_TASK_ID "$PATTERN" | bzip2 > "$BASEDIR/out/$SGE_TASK_ID.bz2" +echo "The following parameter were passed: $*" + +"$BASEDIR/ExtensionCrawler/grepper" -a "$ARCHIVE" -n $SGE_TASK_ID -N $MAX_SGE_TASK_ID $* | bzip2 > "$BASEDIR/out/$SGE_TASK_ID.bz2" diff --git a/sge/grepper.sh b/sge/grepper.sh index 5284718..88b7c12 100755 --- a/sge/grepper.sh +++ b/sge/grepper.sh @@ -2,12 +2,14 @@ set -o nounset set -o errexit -PATTERN=$1 +NRJOBS=${NRJOBS:-256} +echo "Using $NRJOBS jobs" -ARCHIVE=${2:-$(ssh sharc.shef.ac.uk find /shared/brucker_research1/Shared/BrowserExtensions/.snapshot -maxdepth 1 -name \"D*\" | sort -r | head -n1)} -echo "Using archive $ARCHIVE" +ARCHIVE=${ARCHIVE:-$(ssh sharc.shef.ac.uk find /shared/brucker_research1/Shared/BrowserExtensions/.snapshot -maxdepth 1 -name \"D*\" | sort -r | head -n1)} +echo "Using archive: $ARCHIVE" -TARGETDIR="${3:-/data/\$USER}/grepper-$(date +%Y%m%d-%H%M%S)" +TARGETDIR="${TARGETDIR:-/data/\$USER}/grepper-$(date +%Y%m%d-%H%M%S)" +echo "Using target dir: $TARGETDIR" BASEDIR=$( cd $(dirname "$0"); cd ..; pwd -P ) echo "Creating dirs ..." @@ -16,17 +18,17 @@ ssh sharc.shef.ac.uk mkdir -p $TARGETDIR/logs ssh sharc.shef.ac.uk mkdir -p $TARGETDIR/out echo "Pushing $BASEDIR to sharc.shef.ac.uk:$TARGETDIR/ExtensionCrawler ..." -rsync -zr "$BASEDIR/" sharc.shef.ac.uk:"$TARGETDIR/ExtensionCrawler" +rsync -zr --exclude "$BASEDIR/archive" "$BASEDIR/" sharc.shef.ac.uk:"$TARGETDIR/ExtensionCrawler" echo "Starting job ..." ssh sharc.shef.ac.uk \ ARCHIVE=\"$ARCHIVE\" \ BASEDIR=\"$TARGETDIR\" \ - PATTERN=\"$PATTERN\" \ - MAX_SGE_TASK_ID=256 \ + MAX_SGE_TASK_ID=\"$NRJOBS\" \ qsub \ -V \ - -t 1-256 \ + -t 1-$NRJOBS \ -j yes \ -o "$TARGETDIR/logs" \ - "$TARGETDIR/ExtensionCrawler/sge/grepper.sge" + "$TARGETDIR/ExtensionCrawler/sge/grepper.sge" \ + $*