From 49bb2d46909dd2fc9ae2b3881dc333028af41553 Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Wed, 27 Feb 2019 20:40:57 +0000 Subject: [PATCH] Removed extfind.py - superseeded by new hpc scripts. --- extfind | 110 ----------------------------------------------------- extfind.py | 1 - 2 files changed, 111 deletions(-) delete mode 100755 extfind delete mode 120000 extfind.py diff --git a/extfind b/extfind deleted file mode 100755 index 703e7f0..0000000 --- a/extfind +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python3.7 -# -# Copyright (C) 2016,2017 The University of Sheffield, UK -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# - -import getopt -import glob -import os -import sys -import logging -import re - -from ExtensionCrawler import config - - -def print_help(): - print("""extfind [OPTION]""") - print(""" -h print this help text""") - print(""" -a archive directory""") - print(""" -g glob on the extension id, don't use with -e """) - print(""" -e file with extension ids, don't use with -g""") - print(""" -n process chunk n where n in [1,N]""") - print(""" -N """) - - -def split(l, n, N): - if n < 1 or n > N: - raise ValueError("n must be between 1 and N") - chunksize = int(len(l) / N) + 1 - - # Slicing beyond the list contents returns the empty list - return l[chunksize * (n - 1):chunksize * n] - - -def iter_extension_paths_from_file(archive, n, N, extidlistfile): - paths = [] - with open(extidlistfile, 'r') as f: - for line in f.readlines(): - path = os.path.join(archive, "data", line[:3], line + ".tar") - if re.fullmatch("[a-p]{32}", line) and os.path.exists(path): - paths += [path] - else: - logging.warning("WARNING: {} is not a valid extension path!".format(path)) - return split(paths, n, N) - - -def iter_extension_paths(archive, n, N, extidglob="[a-p]"*32): - paths = glob.glob(os.path.join(archive, "data", "[a-p]" * 3, extidglob + ".tar")) - return split(paths, n, N) - - -def main(argv): - archive = config.const_basedir() - extidglob = None - extidlistfile = None - taskid = 1 - maxtaskid = 1 - - try: - opts, args = getopt.getopt(argv, "ha:g:e:n:N:", [ - "archive=", "glob=", "extidlistfile=", "taskid=", - "maxtaskid=", "help" - ]) - except getopt.GetoptError: - print_help() - sys.exit(2) - for opt, arg in opts: - if opt in ("-h", "--help"): - print_help() - sys.exit() - elif opt in ("-a", "--archive"): - archive = arg - elif opt in ("-g", "--glob"): - extidglob = arg - elif opt in ("-e", "--extidlistfile"): - extidlistfile = arg - elif opt in ("-n", "--taskid"): - taskid = int(arg) - elif opt in ("-N", "--maxtaskid"): - maxtaskid = int(arg) - - if extidglob is None and extidlistfile is None: - paths = iter_extension_paths(archive, taskid, maxtaskid) - elif extidglob is None and extidlistfile is not None: - paths = iter_extension_paths_from_file(archive, taskid, maxtaskid, extidlistfile) - elif extidglob is not None and extidlistfile is None: - paths = iter_extension_paths(archive, taskid, maxtaskid, extidglob) - else: - print_help() - sys.exit(2) - - for path in paths: - print(path) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/extfind.py b/extfind.py deleted file mode 120000 index 0231c4e..0000000 --- a/extfind.py +++ /dev/null @@ -1 +0,0 @@ -extfind \ No newline at end of file