Integrated last_crx_etag into last_crx.

This commit is contained in:
Michael Herzberg 2017-10-14 19:59:46 +01:00
parent 8dbf867183
commit afe137ba36
3 changed files with 24 additions and 41 deletions

View File

@ -186,21 +186,24 @@ def last_modified_http_date(path):
def last_crx(archivedir, extid, date=None):
last_crx = ""
last_crx_etag = ""
tar = os.path.join(archivedir, get_local_archive_dir(extid),
extid + ".tar")
if os.path.exists(tar):
t = tarfile.open(tar, 'r')
old_crxs = sorted([
x.name for x in t.getmembers()
if x.name.endswith(".crx") and x.size > 0 and (
date is None or (dateutil.parser.parse(
os.path.split(os.path.split(x.name)[0])[1]) <= date))
])
t.close()
if old_crxs != []:
last_crx = old_crxs[-1]
with tarfile.open(tar, 'r') as t:
old_crxs = sorted([
x.name for x in t.getmembers()
if x.name.endswith(".crx") and x.size > 0 and (
date is None or (dateutil.parser.parse(
os.path.split(os.path.split(x.name)[0])[1]) <= date))
])
if old_crxs != []:
last_crx = old_crxs[-1]
headers_content = t.extractfile(last_crx + ".headers").read().decode().replace('"', '\\"').replace("'", '"')
headers_json = json.loads(headers_content)
last_crx_etag = headers_json["ETag"]
return last_crx
return last_crx, last_crx_etag
def first_crx(archivedir, extid, date=None):
first_crx = ""
@ -220,6 +223,7 @@ def first_crx(archivedir, extid, date=None):
return first_crx
def all_crx(archivedir, extid, date=None):
tar = os.path.join(archivedir, get_local_archive_dir(extid),
extid + ".tar")
@ -234,23 +238,6 @@ def all_crx(archivedir, extid, date=None):
return all_crxs
def last_etag(archivedir, extid, crxfile):
etag = ""
tar = os.path.join(archivedir, get_local_archive_dir(extid),
extid + ".tar")
try:
if os.path.exists(tar):
t = tarfile.open(tar, 'r')
headers = eval((t.extractfile(crxfile + ".headers")).read())
etag = headers['ETag']
t.close()
except Exception:
return ""
return etag
def update_overview(tar, date, ext_id):
res = None
try:
@ -285,8 +272,7 @@ def validate_crx_response(res, extid, extfilename):
def update_crx(archivedir, tmptardir, ext_id, date):
res = None
extfilename = "default_ext_archive.crx"
last_crx_file = last_crx(archivedir, ext_id)
last_crx_etag = last_etag(archivedir, ext_id, last_crx_file)
last_crx_file, last_crx_etag = last_crx(archivedir, ext_id)
last_crx_http_date = last_modified_http_date(last_crx_file)
headers = ""
if last_crx_file is not "":
@ -505,9 +491,8 @@ def update_extension(archivedir, forums, ext_id):
if not os.path.exists(tar):
is_new = True
try:
ar = tarfile.open(tar, mode='a:')
ar.add(tmptardir, arcname=ext_id)
ar.close()
with tarfile.open(tar, mode='a:') as ar:
ar.add(tmptardir, arcname=ext_id)
except Exception as e:
log_exception("* FATAL: cannot create tar archive", 3, ext_id)
tar_exception = e

View File

@ -25,7 +25,7 @@ import tarfile
import datetime
import dateutil
import dateutil.parser
from ExtensionCrawler.archive import last_crx, get_local_archive_dir, last_etag
from ExtensionCrawler.archive import last_crx, get_local_archive_dir
from ExtensionCrawler.config import const_basedir
@ -100,13 +100,11 @@ def main(argv):
dateobj = dateutil.parser.parse(date)
if dateobj.tzinfo is None or dateobj.tzinfo.utcoffset(dateobj) is None:
dateobj = dateobj.replace(tzinfo=datetime.timezone.utc)
last = last_crx(os.path.join(basedir, "data"), extid, dateobj)
last, etag = last_crx(os.path.join(basedir, "data"), extid, dateobj)
else:
last = last_crx(os.path.join(basedir, "data"), extid)
last, etag = last_crx(os.path.join(basedir, "data"), extid)
if useetag:
etag = last_etag(os.path.join(basedir, "data"), extid, last)
else:
if not useetag:
etag = None
tar = os.path.join(basedir, "data",
get_local_archive_dir(extid), extid + ".tar")

View File

@ -280,7 +280,7 @@ def analyze_tar(conf, tarfilename):
match = False
if from_dateobj is None:
last_crx_file = last_crx(
last_crx_file, _ = last_crx(
os.path.join(conf.archive_dir, "data"), extid, latest_dateobj)
if last_crx_file == "" or last_crx_file is None:
logging.warning("No crx in " + extid)
@ -438,7 +438,7 @@ if __name__ == "__main__":
metavar='DATE',
type=str,
help=
'select latest crx from tar, released before DATE.\n'
'select latest crx from tar, released before DATE.\n'
+ 'Together with --latest-date, specifies all crx released in specified\n'
+ 'date range.'
)