From 936f2d3189c7a718efcedc699071256ec5dc25c9 Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Thu, 14 Sep 2017 22:54:37 +0100 Subject: [PATCH 1/2] Log git info before starting pull (update). --- ExtensionCrawler/cdnjs_git.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ExtensionCrawler/cdnjs_git.py b/ExtensionCrawler/cdnjs_git.py index 2642be4..348a5b5 100644 --- a/ExtensionCrawler/cdnjs_git.py +++ b/ExtensionCrawler/cdnjs_git.py @@ -221,6 +221,9 @@ def pull_get_updated_lib_files(cdnjs_git_path): libvers = set() files = [] cdnjs_repo = git.Repo(cdnjs_git_path) + logging.info(" HEAD: " + str(cdnjs_repo.head.commit)) + logging.info(" is detached: " + str(cdnjs_repo.head.is_detached)) + logging.info(" is dirty: " + str(cdnjs_repo.is_dirty())) for update in pull_get_list_changed_files(cdnjs_repo): if not (os.path.basename(update) in ["package.json", ".gitkeep"]): if update.startswith("ajax"): From 26678636eb8931047ecb5c5e6c2ab94cd93f6f7c Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Fri, 15 Sep 2017 20:21:05 +0100 Subject: [PATCH 2/2] Ignore commits where blobs are None. --- ExtensionCrawler/cdnjs_git.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ExtensionCrawler/cdnjs_git.py b/ExtensionCrawler/cdnjs_git.py index 348a5b5..382bcb3 100644 --- a/ExtensionCrawler/cdnjs_git.py +++ b/ExtensionCrawler/cdnjs_git.py @@ -60,8 +60,10 @@ def pull_get_list_changed_files(gitrepo): for single_fetch_info in fetch_info: for diff in single_fetch_info.commit.diff( single_fetch_info.old_commit): - if not diff.a_blob.path in files: - files.append(diff.a_blob.path) + logging.debug("Found diff: " + str(diff)) + if not diff.a_blob is None: + if not diff.a_blob.path in files: + files.append(diff.a_blob.path) return files def normalize_jsdata(str_data):