Use glob instead of os.walk() to avoid memory leak in the latter.

This commit is contained in:
Achim D. Brucker 2017-09-13 04:04:38 +01:00
parent 76d5993794
commit 18fb23d3dc
1 changed files with 9 additions and 4 deletions

View File

@ -22,6 +22,7 @@ import hashlib
import logging
import mimetypes
import os
import glob
import zlib
from functools import reduce
from io import StringIO
@ -205,18 +206,20 @@ def pull_get_updated_lib_files(cdnjs_repo):
logging.info("Found " + len(files) + " files")
return files
def get_all_lib_files(cdnjs_git_path):
"""Return all libraries stored in cdnjs git repo."""
logging.info("Building file list (complete repository)")
files = []
for dirpath, dirs, files in os.walk(os.path.join(cdnjs_git_path, "ajax")):
for filename in files:
if filename != "package.json" and filename != ".gitkeep":
fname = os.path.join(dirpath, filename)
for fname in glob.iglob(
os.path.join(cdnjs_git_path, 'ajax/libs/**/*'), recursive=True):
if not os.path.basename(fname) in ["package.json", ".gitkeep"]:
if not os.path.isdir(fname):
files.append(fname)
logging.info("Found " + len(files) + " files")
return files
def update_database(cdnjs_git, files):
"""Update database for all files in files."""
# could be converted to parallel map
@ -226,6 +229,7 @@ def update_database(cdnjs_git, files):
## TODO
logging.info("Updating database")
def pull_and_update_db(cdnjs_git_path):
"""Pull repo and update database."""
cdnjs_git = git.Git(cdnjs_git_path)
@ -233,6 +237,7 @@ def pull_and_update_db(cdnjs_git_path):
files = pull_get_updated_lib_files(cdnjs_repo)
update_database(cdnjs_git, files)
def update_db_all_libs(cdnjs_git_path):
"""Update database entries for all libs in git repo."""
cdnjs_git = git.Git(cdnjs_git_path)