From 0f1c53a011bfb71cc17a6111c32d0880aec7efdb Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Tue, 3 Apr 2018 22:20:09 +0100 Subject: [PATCH] More aggressive download heuristics. --- ExtensionCrawler/archive.py | 2 +- ExtensionCrawler/util.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ExtensionCrawler/archive.py b/ExtensionCrawler/archive.py index 20d5ddc..dafd66c 100644 --- a/ExtensionCrawler/archive.py +++ b/ExtensionCrawler/archive.py @@ -346,7 +346,7 @@ def update_reviews(tar, date, ext_id): try: pages = [] - google_dos_protection() + # google_dos_protection() res = requests.post( const_review_url(), data=const_review_payload(ext_id, "0", "100"), diff --git a/ExtensionCrawler/util.py b/ExtensionCrawler/util.py index df4b82f..34bdc22 100644 --- a/ExtensionCrawler/util.py +++ b/ExtensionCrawler/util.py @@ -23,10 +23,10 @@ import traceback import logging -def google_dos_protection(maxrange=0.3): - """Wait a random number of seconds (between 0.5 to 0.5+maxrange) +def google_dos_protection(maxrange=0.15): + """Wait a random number of seconds (between 0.45 to 0.5+maxrange) to avoid Google's bot detection""" - sleep(0.5 + (random() * maxrange)) + sleep(0.45 + (random() * maxrange)) def value_of(value, default):