Reformatting.

This commit is contained in:
Achim D. Brucker 2017-09-20 10:03:14 +01:00
parent a63dd53e45
commit e4245ed1dd
2 changed files with 24 additions and 11 deletions

View File

@ -33,6 +33,7 @@ import git
from ExtensionCrawler.file_identifiers import get_file_identifiers
def get_add_date(git_path, filename):
"""Method for getting the initial add/commit date of a file."""
try:
@ -97,6 +98,7 @@ def hackish_pull_list_changed_files(git_path):
files.add(changed_file.strip())
return list(files)
def path_to_list(path):
"""Convert a path (string) to a list of folders/files."""
plist = []
@ -250,6 +252,7 @@ def pull_and_update_db(cdnjs_git_path, create_csv, poolsize=16):
gc.collect()
update_database(create_csv, release_dic, cdnjs_git_path, files, poolsize)
def update_db_from_listfile(cdnjs_git_path, listfile, create_csv, poolsize=16):
"""Update database (without pull) for files in listfile)"""
paths = []
@ -261,7 +264,8 @@ def update_db_from_listfile(cdnjs_git_path, listfile, create_csv, poolsize=16):
path_files, path_libvers = get_all_lib_files(cdnjs_git_path, path)
libvers = libvers + path_libvers
files = files + path_files
logging.info("In total, found " + str(len(files)) + " files in " + str(len(libvers)) + " liberies/versions.")
logging.info("In total, found " + str(len(files)) + " files in " +
str(len(libvers)) + " liberies/versions.")
release_dic = build_release_date_dic(cdnjs_git_path, libvers, poolsize)
update_database(create_csv, release_dic, cdnjs_git_path, files, poolsize)
@ -293,4 +297,3 @@ def update_db_all_libs(cdnjs_git_path,
del libvers
gc.collect()
update_database(create_csv, release_dic, cdnjs_git_path, files, poolsize)

View File

@ -23,7 +23,8 @@ import sys
import os
from ExtensionCrawler.config import (const_log_format, const_basedir)
from ExtensionCrawler.cdnjs_git import (pull_and_update_db, update_db_all_libs, update_db_from_listfile)
from ExtensionCrawler.cdnjs_git import (pull_and_update_db, update_db_all_libs,
update_db_from_listfile)
# Script should run with python 3.4 or 3.5
assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)
@ -32,14 +33,20 @@ assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)
def helpmsg():
"""Print help message."""
print("cdnjs-git-miner [OPTION]")
print(" -i initialize/update database with all libraries in the repository")
print(
" -i initialize/update database with all libraries in the repository"
)
print(" -u update: pull repository and update database")
print(" -p n update n files in parallel")
print(" -l <PATHFILE> read list of libraries to update from file (recusively)")
print(
" -l <PATHFILE> read list of libraries to update from file (recusively)"
)
print(" -n <TASKID> process chunk n where n in [1,N]")
print(" -N <MAXTASKID> ")
print(" -v verbose")
print(" -c print csv format to stdout instead of writing to database")
print(
" -c print csv format to stdout instead of writing to database"
)
print(" -a=<DIR> archive directory")
print(" -h print this help text")
@ -57,8 +64,9 @@ def main(argv):
csv = False
try:
opts, args = getopt.getopt(argv, "hvicl:ua:p:n:N:",
["archive=", "listupdate=", "taskid=", "maxtaskid="])
opts, args = getopt.getopt(argv, "hvicl:ua:p:n:N:", [
"archive=", "listupdate=", "taskid=", "maxtaskid="
])
except getopt.GetoptError:
helpmsg()
sys.exit(2)
@ -99,12 +107,14 @@ def main(argv):
cdnjs_git_path = os.path.join(os.path.join(basedir, "filedb"), "cdnjs-git")
if initialize:
update_db_all_libs(cdnjs_git_path, csv, taskid, maxtaskid, parallel_updates)
update_db_all_libs(cdnjs_git_path, csv, taskid, maxtaskid,
parallel_updates)
if update:
pull_and_update_db(cdnjs_git_path, csv, parallel_updates)
if not listfile is None:
update_db_from_listfile(cdnjs_git_path, listfile, csv, parallel_updates)
update_db_from_listfile(cdnjs_git_path, listfile, csv,
parallel_updates)
if __name__ == "__main__":
main(sys.argv[1:])