Re-formatting.

This commit is contained in:
Achim D. Brucker 2017-01-14 20:34:58 +00:00
parent a67aaf0501
commit e096b9a2d5
1 changed files with 31 additions and 22 deletions

View File

@ -96,7 +96,7 @@ class ExtensionCrawler:
with open(name + ".url", 'w') as f:
f.write(str(request.url))
def google_dos_protection(self, name, request,max=3):
def google_dos_protection(self, name, request, max=3):
if max >= 1:
sleep(randint(1, max) * .5)
@ -145,8 +145,7 @@ class ExtensionCrawler:
raise CrawlError(
extid,
'Expected Content-Type header to be application/x-chrome-extension, but got {}.'.
format(extresult.headers['Content-Type']),
'\n'.join(text))
format(extresult.headers['Content-Type']), '\n'.join(text))
if not self.regex_extfilename.match(extfilename):
raise CrawlError(
extid,
@ -163,7 +162,7 @@ class ExtensionCrawler:
self.store_request_metadata(
os.path.join(extdir, 'storepage.html'), extpageresult)
self.google_dos_protection(
os.path.join(extdir, 'storepage.html'), extpageresult,0.1)
os.path.join(extdir, 'storepage.html'), extpageresult, 0.1)
with open(os.path.join(extdir, 'storepage.html'), 'w') as f:
f.write(extpageresult.text)
@ -234,7 +233,12 @@ class ExtensionCrawler:
os.path.dirname(os.path.relpath(path, self.basedir)))[1]
return self.httpdate(dateutil.parser.parse(utc))
def update_extension(self, extid, overwrite, extinfo=None,cnt=None,max_cnt=None):
def update_extension(self,
extid,
overwrite,
extinfo=None,
cnt=None,
max_cnt=None):
if not self.regex_extid.match(extid):
raise CrawlError(extid,
'{} is not a valid extension id.\n'.format(extid))
@ -319,7 +323,6 @@ class ExtensionCrawler:
return True
def update_extension_list(self, extensions):
n_attempts = 0
n_success = 0
@ -329,12 +332,13 @@ class ExtensionCrawler:
for extid in extensions:
try:
n_attempts += 1
self.update_extension(extid, True,None,n_attempts,len(extensions))
self.update_extension(extid, True, None, n_attempts,
len(extensions))
n_success += 1
except CrawlError as cerr:
retry_extids.append(extid)
sys.stdout.write(' Error: {}\n'.format(cerr.message))
n_errors +=1
n_errors += 1
if cerr.pagecontent != "":
sys.stderr.write(' Page content was:\n')
sys.stderr.write(' {}\n'.format(cerr.pagecontent))
@ -345,14 +349,15 @@ class ExtensionCrawler:
except ConnectionResetError as cerr:
retry_extids.append(extid)
sys.stdout.write(' Error: {}\n'.format(str(cerr)))
n_errors +=1
n_errors += 1
sys.stdout.flush()
if self.verbose:
print("*** Summary: Updated {} of {} extensions successfully".
format(n_success, n_attempts))
print("*** Login required: {}".format(n_login_required))
print("*** Hit Google DOS protection: {}".format(self.google_dos_count))
print("*** Hit Google DOS protection: {}".format(
self.google_dos_count))
print("*** Other Erros: {}".format(n_errors))
sys.stdout.flush()
return retry_extids
@ -465,7 +470,10 @@ if __name__ == '__main__':
parser.add_argument(
'-v', '--verbose', action='store_true', help='Increase verbosity.')
parser.add_argument(
'-o', '--overview', action='store_true', help='Only download/update overview page.')
'-o',
'--overview',
action='store_true',
help='Only download/update overview page.')
parser.add_argument(
'-w',
'--weak',
@ -473,7 +481,8 @@ if __name__ == '__main__':
help='weak check if crx exists already')
args = parser.parse_args()
crawler = ExtensionCrawler(args.dest, args.verbose, args.weak,args.overview)
crawler = ExtensionCrawler(args.dest, args.verbose, args.weak,
args.overview)
if args.discover:
if args.interval: