From b3e5b9bb37c623c88d90ae241ab00d7504e00754 Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Sun, 26 Nov 2017 23:35:35 +0000 Subject: [PATCH] Reformatting. --- ExtensionCrawler/archive.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ExtensionCrawler/archive.py b/ExtensionCrawler/archive.py index e9e710c..185e12b 100644 --- a/ExtensionCrawler/archive.py +++ b/ExtensionCrawler/archive.py @@ -199,12 +199,15 @@ def last_crx(archivedir, extid, date=None): ]) if old_crxs != []: last_crx = old_crxs[-1] - headers_content = t.extractfile(last_crx + ".headers").read().decode().replace('"', '\\"').replace("'", '"') + headers_content = t.extractfile( + last_crx + ".headers").read().decode().replace( + '"', '\\"').replace("'", '"') headers_json = json.loads(headers_content) last_crx_etag = headers_json["ETag"] return last_crx, last_crx_etag + def first_crx(archivedir, extid, date=None): first_crx = "" tar = os.path.join(archivedir, get_local_archive_dir(extid), @@ -255,8 +258,8 @@ def update_overview(tar, date, ext_id): def validate_crx_response(res, extid, extfilename): regex_extfilename = re.compile(r'^extension[_0-9]+\.crx$') if not 'Content-Type' in res.headers: - raise CrawlError(extid, 'Did not find Content-Type header.', - '\n'.join(res.iter_lines())) + raise CrawlError(extid, 'Did not find Content-Type header.', '\n'.join( + res.iter_lines())) if not res.headers['Content-Type'] == 'application/x-chrome-extension': text = [line.decode('utf-8') for line in res.iter_lines()] raise CrawlError( @@ -550,7 +553,6 @@ def update_extensions(archivedir, parallel, forums_ext_ids, ext_ids): ext_without_forums = list( p.map(partial(update_extension, archivedir, False), parallel_ids)) - # Second, update extensions with forums sequentially (and with delays) to # avoid running into Googles DDOS detection. log_info("Updating {} extensions including forums (sequentially)".format( @@ -559,7 +561,6 @@ def update_extensions(archivedir, parallel, forums_ext_ids, ext_ids): ext_with_forums = list( map(partial(update_extension, archivedir, True), forums_ext_ids)) - return ext_with_forums + ext_without_forums