From 93e2a84ecd25129bcde14024e23b822edaafdd9c Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Tue, 10 Jan 2017 13:39:36 +0000 Subject: [PATCH] Added captcha warning. --- crawler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crawler.py b/crawler.py index 2c2f29d..f186c4b 100755 --- a/crawler.py +++ b/crawler.py @@ -61,6 +61,7 @@ class ExtensionCrawler: 'ext/38-search-tools', 'ext/12-shopping', 'ext/1-communication', 'ext/13-sports' ] + regex_captcha = re.compile('aptcha') regex_extid = re.compile(r'^[a-z]+$') regex_extfilename = re.compile(r'^extension[_0-9]+\.crx$') regex_store_date_string = re.compile(r'"([0-9]{8})"') @@ -91,6 +92,8 @@ class ExtensionCrawler: f.write(str(request.status_code)) with open(name+".url", 'w') as f: f.write(str(request.url)) + if 0 < request.text.find('Captcha'): + print (" WARNING: Captcha ("+name+")") def download_extension(self, extid, extdir="", last_download_date=""): if last_download_date != "":