Reformatting.
This commit is contained in:
parent
a63dd53e45
commit
e4245ed1dd
|
@ -33,6 +33,7 @@ import git
|
|||
|
||||
from ExtensionCrawler.file_identifiers import get_file_identifiers
|
||||
|
||||
|
||||
def get_add_date(git_path, filename):
|
||||
"""Method for getting the initial add/commit date of a file."""
|
||||
try:
|
||||
|
@ -97,6 +98,7 @@ def hackish_pull_list_changed_files(git_path):
|
|||
files.add(changed_file.strip())
|
||||
return list(files)
|
||||
|
||||
|
||||
def path_to_list(path):
|
||||
"""Convert a path (string) to a list of folders/files."""
|
||||
plist = []
|
||||
|
@ -250,6 +252,7 @@ def pull_and_update_db(cdnjs_git_path, create_csv, poolsize=16):
|
|||
gc.collect()
|
||||
update_database(create_csv, release_dic, cdnjs_git_path, files, poolsize)
|
||||
|
||||
|
||||
def update_db_from_listfile(cdnjs_git_path, listfile, create_csv, poolsize=16):
|
||||
"""Update database (without pull) for files in listfile)"""
|
||||
paths = []
|
||||
|
@ -261,7 +264,8 @@ def update_db_from_listfile(cdnjs_git_path, listfile, create_csv, poolsize=16):
|
|||
path_files, path_libvers = get_all_lib_files(cdnjs_git_path, path)
|
||||
libvers = libvers + path_libvers
|
||||
files = files + path_files
|
||||
logging.info("In total, found " + str(len(files)) + " files in " + str(len(libvers)) + " liberies/versions.")
|
||||
logging.info("In total, found " + str(len(files)) + " files in " +
|
||||
str(len(libvers)) + " liberies/versions.")
|
||||
release_dic = build_release_date_dic(cdnjs_git_path, libvers, poolsize)
|
||||
update_database(create_csv, release_dic, cdnjs_git_path, files, poolsize)
|
||||
|
||||
|
@ -293,4 +297,3 @@ def update_db_all_libs(cdnjs_git_path,
|
|||
del libvers
|
||||
gc.collect()
|
||||
update_database(create_csv, release_dic, cdnjs_git_path, files, poolsize)
|
||||
|
||||
|
|
|
@ -23,7 +23,8 @@ import sys
|
|||
import os
|
||||
|
||||
from ExtensionCrawler.config import (const_log_format, const_basedir)
|
||||
from ExtensionCrawler.cdnjs_git import (pull_and_update_db, update_db_all_libs, update_db_from_listfile)
|
||||
from ExtensionCrawler.cdnjs_git import (pull_and_update_db, update_db_all_libs,
|
||||
update_db_from_listfile)
|
||||
|
||||
# Script should run with python 3.4 or 3.5
|
||||
assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)
|
||||
|
@ -32,14 +33,20 @@ assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)
|
|||
def helpmsg():
|
||||
"""Print help message."""
|
||||
print("cdnjs-git-miner [OPTION]")
|
||||
print(" -i initialize/update database with all libraries in the repository")
|
||||
print(
|
||||
" -i initialize/update database with all libraries in the repository"
|
||||
)
|
||||
print(" -u update: pull repository and update database")
|
||||
print(" -p n update n files in parallel")
|
||||
print(" -l <PATHFILE> read list of libraries to update from file (recusively)")
|
||||
print(
|
||||
" -l <PATHFILE> read list of libraries to update from file (recusively)"
|
||||
)
|
||||
print(" -n <TASKID> process chunk n where n in [1,N]")
|
||||
print(" -N <MAXTASKID> ")
|
||||
print(" -v verbose")
|
||||
print(" -c print csv format to stdout instead of writing to database")
|
||||
print(
|
||||
" -c print csv format to stdout instead of writing to database"
|
||||
)
|
||||
print(" -a=<DIR> archive directory")
|
||||
print(" -h print this help text")
|
||||
|
||||
|
@ -57,8 +64,9 @@ def main(argv):
|
|||
csv = False
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(argv, "hvicl:ua:p:n:N:",
|
||||
["archive=", "listupdate=", "taskid=", "maxtaskid="])
|
||||
opts, args = getopt.getopt(argv, "hvicl:ua:p:n:N:", [
|
||||
"archive=", "listupdate=", "taskid=", "maxtaskid="
|
||||
])
|
||||
except getopt.GetoptError:
|
||||
helpmsg()
|
||||
sys.exit(2)
|
||||
|
@ -99,12 +107,14 @@ def main(argv):
|
|||
cdnjs_git_path = os.path.join(os.path.join(basedir, "filedb"), "cdnjs-git")
|
||||
|
||||
if initialize:
|
||||
update_db_all_libs(cdnjs_git_path, csv, taskid, maxtaskid, parallel_updates)
|
||||
update_db_all_libs(cdnjs_git_path, csv, taskid, maxtaskid,
|
||||
parallel_updates)
|
||||
if update:
|
||||
pull_and_update_db(cdnjs_git_path, csv, parallel_updates)
|
||||
if not listfile is None:
|
||||
update_db_from_listfile(cdnjs_git_path, listfile, csv, parallel_updates)
|
||||
|
||||
update_db_from_listfile(cdnjs_git_path, listfile, csv,
|
||||
parallel_updates)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
||||
|
|
Loading…
Reference in New Issue