From d6869455a8c66e4b44dd8ca796b0614050ea9cab Mon Sep 17 00:00:00 2001 From: Michael Herzberg Date: Fri, 6 Oct 2017 12:12:49 +0100 Subject: [PATCH] Sort extension ids before processing. --- ExtensionCrawler/archive.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ExtensionCrawler/archive.py b/ExtensionCrawler/archive.py index d4fb3ce..f200bc6 100644 --- a/ExtensionCrawler/archive.py +++ b/ExtensionCrawler/archive.py @@ -519,11 +519,11 @@ def update_extension(archivedir, forums, ext_id): def update_extensions(archivedir, parallel, forums_ext_ids, ext_ids): ext_with_forums = [] ext_without_forums = [] - ext_ids = list(set(ext_ids) - set(forums_ext_ids)) - forums_ext_ids = list(set(forums_ext_ids)) + ext_ids = sorted(list(set(ext_ids) - set(forums_ext_ids))) + forums_ext_ids = sorted(list(set(forums_ext_ids))) log_info("Updating {} extensions ({} including forums)".format( len(ext_ids), len(forums_ext_ids))) - + # First, update all extensions without forums in parallel (increased speed). # parallel_ids = list(set(ext_ids) - set(forums_ext_ids)) parallel_ids = ext_ids @@ -533,7 +533,7 @@ def update_extensions(archivedir, parallel, forums_ext_ids, ext_ids): ext_without_forums = list( p.map(partial(update_extension, archivedir, False), parallel_ids)) - + # Second, update extensions with forums sequentially (and with delays) to # avoid running into Googles DDOS detection. log_info("Updating {} extensions including forums (sequentially)".format( @@ -542,7 +542,7 @@ def update_extensions(archivedir, parallel, forums_ext_ids, ext_ids): ext_with_forums = list( map(partial(update_extension, archivedir, True), forums_ext_ids)) - + return ext_with_forums + ext_without_forums