diff --git a/.gitignore b/.gitignore index f0793c1..f611715 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,4 @@ target/ archive .ropeproject +excrawl.img diff --git a/sge/create-db.sge b/sge/create-db.sge index 5fdd454..2a7728a 100755 --- a/sge/create-db.sge +++ b/sge/create-db.sge @@ -1,16 +1,11 @@ #!/bin/bash - -module -s load apps/python/conda 2> /dev/null -source activate mypython35 - -export PATH=~/bin:$PATH -export LD_LIBRARY_PATH=~/lib:$LD_LIBRARY_PATH - set -o nounset +SING_EXEC="singularity exec --pwd /opt/ExtensionCrawler -B $TMPDIR:/tmp $SING_IMG" + printenv - echo "The following parameter were passed: $*" +echo "Printing the content of $ARCHIVE to force mounting:" +ls "$ARCHIVE" -cd "$BASEDIR" -./ExtensionCrawler/create-db -t 1 -a "$ARCHIVE" -n $SGE_TASK_ID -N $MAX_SGE_TASK_ID $* +$SING_EXEC ./create-db -t 1 -a "$ARCHIVE" -n $SGE_TASK_ID -N $MAX_SGE_TASK_ID $* diff --git a/sge/create-db.sh b/sge/create-db.sh index 7a28e47..0b70e72 100755 --- a/sge/create-db.sh +++ b/sge/create-db.sh @@ -2,22 +2,42 @@ set -o nounset set -o errexit +BASEDIR=$( cd $(dirname "$0"); cd ..; pwd -P ) + NRJOBS=${NRJOBS:-256} echo "Using $NRJOBS jobs" +JOBRANGE=${JOBRANGE:-1-$NRJOBS} +echo "Executing jobs $JOBRANGE" + ARCHIVE=${ARCHIVE:-$(ssh sharc.shef.ac.uk find /shared/brucker_research1/Shared/BrowserExtensions/archive/.snapshot -maxdepth 1 -name \"D*\" | sort -r | head -n1)} echo "Using archive: $ARCHIVE" TARGETDIR="${TARGETDIR:-/data/\$USER}/create-db-$(date +%Y%m%d-%H%M%S)" echo "Using target dir: $TARGETDIR" -BASEDIR=$( cd $(dirname "$0"); cd ..; pwd -P ) +SING_IMG_SRC="${SING_IMG_SRC:-/shared/brucker_research1/Shared/BrowserExtensions/excrawl.img}" +SING_IMG="$TARGETDIR/excrawl.img" +if ! ssh sharc.shef.ac.uk [ -f "$SING_IMG_SRC" ]; then + echo -n "$SING_IMG_SRC does not exist! Generate new image and push? (yes/abort): " + read confirm + if [ "$confirm" != yes ]; then + exit 0 + fi + echo "Creating new image ..." + (cd "$BASEDIR/singularity"; ./build.sh) + echo "Pushing new image ..." + scp "$BASEDIR/singularity/excrawl.img" sharc.shef.ac.uk:"$SING_IMG_SRC" + rm "$BASEDIR/singularity/excrawl.img" +fi echo "Creating dirs ..." -ssh sharc.shef.ac.uk mkdir -p $TARGETDIR/ExtensionCrawler ssh sharc.shef.ac.uk mkdir -p $TARGETDIR/logs -echo "Pushing $BASEDIR to sharc.shef.ac.uk:$TARGETDIR/ExtensionCrawler ..." -rsync -zr --exclude "$BASEDIR/archive" "$BASEDIR/" sharc.shef.ac.uk:"$TARGETDIR/ExtensionCrawler" +echo "Copying $SING_IMG_SRC to $SING_IMG" +ssh sharc.shef.ac.uk cp "$SING_IMG_SRC" "$SING_IMG" + +echo "Pushing sge script ..." +scp "$BASEDIR/sge/create-db.sge" sharc.shef.ac.uk:"$TARGETDIR/create-db.sge" echo "Starting job ..." ssh sharc.shef.ac.uk \ @@ -28,8 +48,8 @@ ssh sharc.shef.ac.uk \ -V \ -m a \ -M "msherzberg1@sheffield.ac.uk" \ - -t 1-$NRJOBS \ + -t $JOBRANGE \ -j yes \ -o "$TARGETDIR/logs" \ - "$TARGETDIR/ExtensionCrawler/sge/create-db.sge" \ + "$TARGETDIR/create-db.sge" \ $* diff --git a/singularity/build.sh b/singularity/build.sh new file mode 100755 index 0000000..e29e912 --- /dev/null +++ b/singularity/build.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# Copyright 2017 The University of Sheffield, UK +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ -f excrawl.img ]; then + echo "Image excrawl.img exists already." + echo "Please remove/rename the image and restart this script" + exit 1 +else + singularity create --size 600 excrawl.img + sudo singularity bootstrap excrawl.img excrawl.def +fi diff --git a/singularity/excrawl.def b/singularity/excrawl.def new file mode 100644 index 0000000..dc0d6bc --- /dev/null +++ b/singularity/excrawl.def @@ -0,0 +1,108 @@ +#!/bin/sh +# Copyright 2017 The University of Sheffield, UK +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +bootstrap:debootstrap +OSVersion: testing +MirrorURL: https://mirror.bytemark.co.uk/debian/ + +%labels +Maintainer The LogicalHacking Team (https://logicalhacking.com) + +%setup + +%post + +################################################################### +# Add Debian unstable as a secondary (lower priority) source +# and update the data base of available packages. +cat >> /etc/apt/sources.list << EOF +deb http://ftp.us.debian.org/debian unstable main +EOF + +cat > /etc/apt/preferences << EOF +Package: * +Pin: release a=testing +Pin-Priority: 900 + +Package: * +Pin: release a=unstable +Pin-Priority: 800 +EOF + +cat > /etc/apt/apt.conf.d/01norecommend << EOF +APT::Install-Recommends "0"; +APT::Install-Suggests "0"; +EOF + +chmod go+r /etc/apt/preferences +apt-get update +################################################################### + +################################################################### +# Install the core dependencies (Python 3.5 or later) +# from the Debian Testing repository +apt-get install -y python3-magic python3-crypto python3-minimal python3-pip python3-setuptools python3-mysqldb +apt-get clean +apt-get install -y git +apt-get clean +rm -rf /var/lib/apt/lists/* +################################################################### + +################################################################### +# Create /opt for local software (mainly cloned git repositories +# from logicalhacking.com +mkdir -p /opt +chmod 755 /opt +################################################################### + +################################################################### +# Add the Extension Crawler repository, for more details, visit +# https://git.logicalhacking.com/BrowserSecurity/ExtensionCrawler +cd /opt +git clone https://git.logicalhacking.com/BrowserSecurity/ExtensionCrawler.git +cd ExtensionCrawler +git checkout production +cd .. +pip3 install --system -e ExtensionCrawler +ln -s /opt/ExtensionCrawler/crx-* /usr/local/bin/ +cd / +chmod -R go+u-w /opt/ExtensionCrawler +chmod -R go+u-w /usr/local/lib/ +chmod -R go+u-w /usr/local/bin/ +################################################################### + +################################################################### +# Create mount/bind points for the various network drives +# on SHARC (only useful when using the Singularity image on +# the High-Performance Cluster of The University of Sheffield +mkdir /scratch +mkdir /fastdata +mkdir /data +mkdir /shared + +# Create nvidia driver directories to get rid of the singularity +# warnings on sharc +mkdir /nvbin +mkdir /nvlib +chmod go+u-w /scratch /fastdata /data /shared +################################################################### + +%environment + +# We install all python modules into the container, so we do not want +# to use any packages that the user might have installed in their home +# directory. +export PYTHONNOUSERSITE=1