Refactoring.

This commit is contained in:
Achim D. Brucker 2017-01-28 12:56:29 +00:00
parent 9c4ba39558
commit e8f01eae55
2 changed files with 14 additions and 69 deletions

View File

@ -17,60 +17,20 @@
#
def const_sitemap_url():
return "https://chrome.google.com/webstore/sitemap"
import sys
from time import sleep
from random import randint
from datetime import datetime, timezone
def google_dos_protection(max=3):
sleep(randint(1, max) * .5)
def const_sitemap_scheme():
return "http://www.sitemaps.org/schemas/sitemap/0.9"
def log(verbose, msg):
if verbose:
sys.stdout.write(msg)
def const_overview_url(id):
return 'https://chrome.google.com/webstore/detail/{}'.format(id)
def const_store_url():
return 'https://chrome.google.com/webstore'
def const_review_url():
return 'https://chrome.google.com/reviews/components'
def const_support_url():
return 'https://chrome.google.com/reviews/components'
def const_download_url():
return 'https://clients2.google.com/service/update2/crx?response=redirect&nacl_arch=x86-64&prodversion=9999.0.9999.0&x=id%3D{}%26uc'
def const_categories():
return [
'extensions', 'ext/22-accessibility', 'ext/10-blogging',
'ext/15-by-google', 'ext/11-web-development', 'ext/14-fun',
'ext/6-news', 'ext/28-photos', 'ext/7-productivity',
'ext/38-search-tools', 'ext/12-shopping', 'ext/1-communication',
'ext/13-sports'
]
def const_support_payload(ext_id, start, end):
return (
'req={{ "appId":94,' + '"version":"150922",' + '"hl":"en",' +
'"specs":[{{"type":"CommentThread",' +
'"url":"http%3A%2F%2Fchrome.google.com%2Fextensions%2Fpermalink%3Fid%3D{}",'
+ '"groups":"chrome_webstore_support",' + '"startindex":"{}",' +
'"numresults":"{}",' + '"id":"379"}}],' + '"internedKeys":[],' +
'"internedValues":[]}}').format(ext_id, start, end)
def const_review_payload(ext_id, start, end):
return (
'req={{ "appId":94,' + '"version":"150922",' + '"hl":"en",' +
'"specs":[{{"type":"CommentThread",' +
'"url":"http%3A%2F%2Fchrome.google.com%2Fextensions%2Fpermalink%3Fid%3D{}",'
+ '"groups":"chrome_webstore",' + '"sortby":"cws_qscore",' +
'"startindex":"{}",' + '"numresults":"{}",' + '"id":"428"}}],' +
'"internedKeys":[],' + '"internedValues":[]}}').format(ext_id, start,
end)
def valueOf(value, default):
if value is not None and value is not "":
return value
else:
return default

15
crawler
View File

@ -111,14 +111,6 @@ class UpdateResult:
def not_modified(self):
return self.res_crx.not_modified()
def google_dos_protection(max=3):
sleep(randint(1, max) * .5)
def log(verbose, msg):
if verbose:
sys.stdout.write(msg)
def update_overview(dir, verbose, ext_id):
log(verbose, " * overview page: ")
@ -151,13 +143,6 @@ def validate_crx_response(res, extfilename):
extfilename))
def valueOf(value, default):
if value is not None and value is not "":
return value
else:
return default
def update_crx(dir, verbose, ext_id):
last_crx_file = last_crx(dir, ext_id)
last_crx_http_date = last_modified_http_date(last_crx_file)