Improved logging.

This commit is contained in:
Michael Herzberg 2017-06-19 18:41:29 +01:00
parent 282e2c4e8c
commit c1cd41c2e1
3 changed files with 19 additions and 6 deletions

View File

@ -461,7 +461,7 @@ def update_extension(archivedir, verbose, forums, ext_id):
try: try:
logtxt = logmsg(verbose, logtxt, " * Updating db...\n") logtxt = logmsg(verbose, logtxt, " * Updating db...\n")
msg_updatesqlite = update_sqlite_incremental( msg_updatesqlite = update_sqlite_incremental(
archivedir, tmptardir, ext_id, date, verbose, 11 * " ") archivedir, tmptardir, ext_id, date, verbose, 15 * " ")
logtxt = logmsg(verbose, logtxt, msg_updatesqlite) logtxt = logmsg(verbose, logtxt, msg_updatesqlite)
sql_success = True sql_success = True
except Exception as e: except Exception as e:

View File

@ -120,6 +120,18 @@ def get_crx_status(datepath):
with open(statuspath) as f: with open(statuspath) as f:
return int(f.read()) return int(f.read())
# If the extension is paid, we will find a main.headers file...
statuspath = os.path.join(datepath, "main.status")
if os.path.exists(statuspath):
with open(statuspath) as f:
return int(f.read())
# ... or an default.crx.headers file
statuspath = os.path.join(datepath, "default.crx.status")
if os.path.exists(statuspath):
with open(statuspath) as f:
return int(f.read())
def parse_and_insert_overview(ext_id, date, datepath, con): def parse_and_insert_overview(ext_id, date, datepath, con):
overview_path = os.path.join(datepath, "overview.html") overview_path = os.path.join(datepath, "overview.html")
@ -180,7 +192,6 @@ def parse_and_insert_overview(ext_id, date, datepath, con):
def parse_and_insert_crx(ext_id, date, datepath, con): def parse_and_insert_crx(ext_id, date, datepath, con):
crx_path = next(iter(glob.glob(os.path.join(datepath, "*.crx"))), None) crx_path = next(iter(glob.glob(os.path.join(datepath, "*.crx"))), None)
print(crx_path)
if crx_path: if crx_path:
filename = os.path.basename(crx_path) filename = os.path.basename(crx_path)
@ -252,7 +263,7 @@ def update_sqlite_incremental(archivedir, tmptardir, ext_id, date, verbose,
datepath = os.path.join(tmptardir, date) datepath = os.path.join(tmptardir, date)
txt = logmsg(verbose, txt, txt = logmsg(verbose, txt,
indent + "- updating db with data from {}\n".format(date)) indent + "- updating with data from {}\n".format(date))
if not os.path.exists(db_path): if not os.path.exists(db_path):
txt = logmsg(verbose, txt, txt = logmsg(verbose, txt,
@ -284,8 +295,10 @@ def update_sqlite_incremental(archivedir, tmptardir, ext_id, date, verbose,
txt = logmsg(verbose, txt, str(e)) txt = logmsg(verbose, txt, str(e))
txt = logmsg(verbose, txt, "\n") txt = logmsg(verbose, txt, "\n")
else: else:
txt = logmsg(verbose, txt, crx_status = get_crx_status(datepath)
indent2 + "* WARNING: could not find etag\n") if crx_status != 401 and crx_status != 204:
txt = logmsg(verbose, txt,
indent2 + "* WARNING: could not find etag\n")
reviewpaths = glob.glob(os.path.join(datepath, "reviews*.text")) reviewpaths = glob.glob(os.path.join(datepath, "reviews*.text"))
for reviewpath in reviewpaths: for reviewpath in reviewpaths:

View File

@ -55,7 +55,7 @@ def main(argv):
threeletterdirs = glob.glob(os.path.join(archivedir, prefix + "*")) threeletterdirs = glob.glob(os.path.join(archivedir, prefix + "*"))
for threeletterdir in threeletterdirs: for threeletterdir in threeletterdirs:
for ext_id in set([d[:32] for d in os.listdir(threeletterdir)]): for ext_id in set([d[:32] for d in os.listdir(threeletterdir)]):
sys.stdout.write("Processing {}...\n".format(ext_id)) sys.stdout.write("Processing {} ...\n".format(ext_id))
sys.stdout.flush() sys.stdout.flush()
tarpath = os.path.join(threeletterdir, ext_id + ".tar") tarpath = os.path.join(threeletterdir, ext_id + ".tar")
dbpath = os.path.join(threeletterdir, ext_id + ".sqlite") dbpath = os.path.join(threeletterdir, ext_id + ".sqlite")