Added singularity image for create-db.

This commit is contained in:
Michael Herzberg 2017-09-20 11:37:57 +01:00
parent e4245ed1dd
commit 543c8d7fc8
5 changed files with 163 additions and 16 deletions

1
.gitignore vendored
View File

@ -66,3 +66,4 @@ target/
archive
.ropeproject
excrawl.img

View File

@ -1,16 +1,11 @@
#!/bin/bash
module -s load apps/python/conda 2> /dev/null
source activate mypython35
export PATH=~/bin:$PATH
export LD_LIBRARY_PATH=~/lib:$LD_LIBRARY_PATH
set -o nounset
SING_EXEC="singularity exec --pwd /opt/ExtensionCrawler -B $TMPDIR:/tmp $SING_IMG"
printenv
echo "The following parameter were passed: $*"
echo "Printing the content of $ARCHIVE to force mounting:"
ls "$ARCHIVE"
cd "$BASEDIR"
./ExtensionCrawler/create-db -t 1 -a "$ARCHIVE" -n $SGE_TASK_ID -N $MAX_SGE_TASK_ID $*
$SING_EXEC ./create-db -t 1 -a "$ARCHIVE" -n $SGE_TASK_ID -N $MAX_SGE_TASK_ID $*

View File

@ -2,22 +2,42 @@
set -o nounset
set -o errexit
BASEDIR=$( cd $(dirname "$0"); cd ..; pwd -P )
NRJOBS=${NRJOBS:-256}
echo "Using $NRJOBS jobs"
JOBRANGE=${JOBRANGE:-1-$NRJOBS}
echo "Executing jobs $JOBRANGE"
ARCHIVE=${ARCHIVE:-$(ssh sharc.shef.ac.uk find /shared/brucker_research1/Shared/BrowserExtensions/archive/.snapshot -maxdepth 1 -name \"D*\" | sort -r | head -n1)}
echo "Using archive: $ARCHIVE"
TARGETDIR="${TARGETDIR:-/data/\$USER}/create-db-$(date +%Y%m%d-%H%M%S)"
echo "Using target dir: $TARGETDIR"
BASEDIR=$( cd $(dirname "$0"); cd ..; pwd -P )
SING_IMG_SRC="${SING_IMG_SRC:-/shared/brucker_research1/Shared/BrowserExtensions/excrawl.img}"
SING_IMG="$TARGETDIR/excrawl.img"
if ! ssh sharc.shef.ac.uk [ -f "$SING_IMG_SRC" ]; then
echo -n "$SING_IMG_SRC does not exist! Generate new image and push? (yes/abort): "
read confirm
if [ "$confirm" != yes ]; then
exit 0
fi
echo "Creating new image ..."
(cd "$BASEDIR/singularity"; ./build.sh)
echo "Pushing new image ..."
scp "$BASEDIR/singularity/excrawl.img" sharc.shef.ac.uk:"$SING_IMG_SRC"
rm "$BASEDIR/singularity/excrawl.img"
fi
echo "Creating dirs ..."
ssh sharc.shef.ac.uk mkdir -p $TARGETDIR/ExtensionCrawler
ssh sharc.shef.ac.uk mkdir -p $TARGETDIR/logs
echo "Pushing $BASEDIR to sharc.shef.ac.uk:$TARGETDIR/ExtensionCrawler ..."
rsync -zr --exclude "$BASEDIR/archive" "$BASEDIR/" sharc.shef.ac.uk:"$TARGETDIR/ExtensionCrawler"
echo "Copying $SING_IMG_SRC to $SING_IMG"
ssh sharc.shef.ac.uk cp "$SING_IMG_SRC" "$SING_IMG"
echo "Pushing sge script ..."
scp "$BASEDIR/sge/create-db.sge" sharc.shef.ac.uk:"$TARGETDIR/create-db.sge"
echo "Starting job ..."
ssh sharc.shef.ac.uk \
@ -28,8 +48,8 @@ ssh sharc.shef.ac.uk \
-V \
-m a \
-M "msherzberg1@sheffield.ac.uk" \
-t 1-$NRJOBS \
-t $JOBRANGE \
-j yes \
-o "$TARGETDIR/logs" \
"$TARGETDIR/ExtensionCrawler/sge/create-db.sge" \
"$TARGETDIR/create-db.sge" \
$*

23
singularity/build.sh Executable file
View File

@ -0,0 +1,23 @@
#!/bin/sh
# Copyright 2017 The University of Sheffield, UK
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ -f excrawl.img ]; then
echo "Image excrawl.img exists already."
echo "Please remove/rename the image and restart this script"
exit 1
else
singularity create --size 600 excrawl.img
sudo singularity bootstrap excrawl.img excrawl.def
fi

108
singularity/excrawl.def Normal file
View File

@ -0,0 +1,108 @@
#!/bin/sh
# Copyright 2017 The University of Sheffield, UK
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
bootstrap:debootstrap
OSVersion: testing
MirrorURL: https://mirror.bytemark.co.uk/debian/
%labels
Maintainer The LogicalHacking Team (https://logicalhacking.com)
%setup
%post
###################################################################
# Add Debian unstable as a secondary (lower priority) source
# and update the data base of available packages.
cat >> /etc/apt/sources.list << EOF
deb http://ftp.us.debian.org/debian unstable main
EOF
cat > /etc/apt/preferences << EOF
Package: *
Pin: release a=testing
Pin-Priority: 900
Package: *
Pin: release a=unstable
Pin-Priority: 800
EOF
cat > /etc/apt/apt.conf.d/01norecommend << EOF
APT::Install-Recommends "0";
APT::Install-Suggests "0";
EOF
chmod go+r /etc/apt/preferences
apt-get update
###################################################################
###################################################################
# Install the core dependencies (Python 3.5 or later)
# from the Debian Testing repository
apt-get install -y python3-magic python3-crypto python3-minimal python3-pip python3-setuptools python3-mysqldb
apt-get clean
apt-get install -y git
apt-get clean
rm -rf /var/lib/apt/lists/*
###################################################################
###################################################################
# Create /opt for local software (mainly cloned git repositories
# from logicalhacking.com
mkdir -p /opt
chmod 755 /opt
###################################################################
###################################################################
# Add the Extension Crawler repository, for more details, visit
# https://git.logicalhacking.com/BrowserSecurity/ExtensionCrawler
cd /opt
git clone https://git.logicalhacking.com/BrowserSecurity/ExtensionCrawler.git
cd ExtensionCrawler
git checkout production
cd ..
pip3 install --system -e ExtensionCrawler
ln -s /opt/ExtensionCrawler/crx-* /usr/local/bin/
cd /
chmod -R go+u-w /opt/ExtensionCrawler
chmod -R go+u-w /usr/local/lib/
chmod -R go+u-w /usr/local/bin/
###################################################################
###################################################################
# Create mount/bind points for the various network drives
# on SHARC (only useful when using the Singularity image on
# the High-Performance Cluster of The University of Sheffield
mkdir /scratch
mkdir /fastdata
mkdir /data
mkdir /shared
# Create nvidia driver directories to get rid of the singularity
# warnings on sharc
mkdir /nvbin
mkdir /nvlib
chmod go+u-w /scratch /fastdata /data /shared
###################################################################
%environment
# We install all python modules into the container, so we do not want
# to use any packages that the user might have installed in their home
# directory.
export PYTHONNOUSERSITE=1