Implemented skeleton of main routine.
This commit is contained in:
parent
a8a5534be1
commit
c30f7fdd7c
|
@ -190,3 +190,44 @@ def get_file_libinfo(gitobj, libfile):
|
|||
return file_info
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def pull_get_updated_lib_files(cdnjs_repo):
|
||||
"""Pull repository and determine updated libraries."""
|
||||
files = []
|
||||
for update in pull_get_list_changed_files(cdnjs_repo):
|
||||
if not (os.path.basename(update) in ["package.json", ".gitkeep"]):
|
||||
if update.startswith("ajax"):
|
||||
files.append(update)
|
||||
return files
|
||||
|
||||
def get_all_lib_files(cdnjs_git_path):
|
||||
"""Return all libraries stored in cdnjs git repo."""
|
||||
files = []
|
||||
for dirpath, dirs, files in os.walk(os.path.join(cdnjs_git_path, "ajax")):
|
||||
for filename in files:
|
||||
if filename != "package.json" and filename != ".gitkeep":
|
||||
fname = os.path.join(dirpath, filename)
|
||||
files.append(fname)
|
||||
return files
|
||||
|
||||
def update_database(cdnjs_git, files):
|
||||
"""Update database for all files in files."""
|
||||
# could be converted to parallel map
|
||||
for fname in files:
|
||||
file_info = get_file_libinfo(cdnjs_git, fname)
|
||||
if not file_info is None:
|
||||
print("TODO: Updating data base: " + fname)
|
||||
|
||||
def pull_and_update_db(cdnjs_git_path):
|
||||
"""Pull repo and update database."""
|
||||
cdnjs_git = git.Git(cdnjs_git_path)
|
||||
cdnjs_repo = git.Repo(cdnjs_git_path)
|
||||
files = pull_get_updated_lib_files(cdnjs_repo)
|
||||
update_database(cdnjs_git, files)
|
||||
|
||||
def update_db_all_libs(cdnjs_git_path):
|
||||
"""Update database entries for all libs in git repo."""
|
||||
cdnjs_git = git.Git(cdnjs_git_path)
|
||||
files = get_all_lib_files(cdnjs_git_path)
|
||||
update_database(cdnjs_git, files)
|
||||
|
|
|
@ -20,8 +20,11 @@
|
|||
import getopt
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
|
||||
from ExtensionCrawler.config import (const_log_format, const_basedir)
|
||||
from ExtensionCrawler.cdnjs_git import (pull_and_update_db, update_db_all_libs)
|
||||
|
||||
from ExtensionCrawler.config import const_log_format
|
||||
|
||||
# Script should run with python 3.4 or 3.5
|
||||
assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)
|
||||
|
@ -30,17 +33,24 @@ assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)
|
|||
def helpmsg():
|
||||
"""Print help message."""
|
||||
print("cdnjs-git-miner [OPTION]")
|
||||
print(" -h print this help text")
|
||||
print(
|
||||
" -i initialize database: update database with all libraries in the repository"
|
||||
)
|
||||
print(
|
||||
" -u update: pull repository and update database with new/upated libraries"
|
||||
)
|
||||
print(" -v verbose")
|
||||
print(" -h print this help text")
|
||||
|
||||
|
||||
def main(argv):
|
||||
"""Main function of the extension crawler."""
|
||||
basedir = "archive"
|
||||
verbose = True
|
||||
force = False
|
||||
clean = False
|
||||
basedir = const_basedir()
|
||||
verbose = False
|
||||
initialize = False
|
||||
update = False
|
||||
try:
|
||||
opts, args = getopt.getopt(argv, "h")
|
||||
opts, args = getopt.getopt(argv, "hviu")
|
||||
except getopt.GetoptError:
|
||||
helpmsg()
|
||||
sys.exit(2)
|
||||
|
@ -48,6 +58,17 @@ def main(argv):
|
|||
if opt == '-h':
|
||||
helpmsg()
|
||||
sys.exit()
|
||||
elif opt == '-v':
|
||||
verbose = True
|
||||
elif opt == '-i':
|
||||
initialize = True
|
||||
elif opt == '-u':
|
||||
update = True
|
||||
|
||||
if verbose:
|
||||
loglevel = logging.INFO
|
||||
else:
|
||||
loglevel = logging.WARNING
|
||||
|
||||
logger = logging.getLogger()
|
||||
ch = logging.StreamHandler(sys.stdout)
|
||||
|
@ -55,6 +76,12 @@ def main(argv):
|
|||
logger.addHandler(ch)
|
||||
logger.setLevel(loglevel)
|
||||
|
||||
cdnjs_git_path = os.path.join(os.path.join(basedir, "filedb"), "cdnjs-git")
|
||||
|
||||
if initialize:
|
||||
update_db_all_libs(cdnjs_git_path)
|
||||
if update:
|
||||
pull_and_update_db(cdnjs_git_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
||||
|
|
Loading…
Reference in New Issue