diff --git a/ExtensionCrawler/db.py b/ExtensionCrawler/db.py index 8c04301..0ed7dfc 100644 --- a/ExtensionCrawler/db.py +++ b/ExtensionCrawler/db.py @@ -420,42 +420,40 @@ def update_db_incremental(tmptardir, ext_id, date): if etag: try: parse_and_insert_crx(ext_id, date, datepath, con) - except zipfile.BadZipfile as e: - log_warning( - "* WARNING: the found crx file is not a zip file, exception: {}". - format(str(e)), 3, ext_id) + except Exception as e: + log_exception("Exception when parsing crx", 3, ext_id) else: crx_status = get_crx_status(datepath) if crx_status != 401 and crx_status != 204 and crx_status != 404: log_warning("* WARNING: could not find etag", 3, ext_id) - parse_and_insert_overview(ext_id, date, datepath, con) - parse_and_insert_status(ext_id, date, datepath, con) + try: + parse_and_insert_overview(ext_id, date, datepath, con) + except Exception as e: + log_exception("Exception when parsing overview", 3, ext_id) + + try: + parse_and_insert_status(ext_id, date, datepath, con) + except Exception as e: + log_exception("Exception when parsing status", 3, ext_id) reviewpaths = glob.glob(os.path.join(datepath, "reviews*-*.text")) for reviewpath in reviewpaths: try: parse_and_insert_review(ext_id, date, reviewpath, con) - except json.decoder.JSONDecodeError as e: - log_warning( - "* Could not parse review file, exception: {}".format( - str(e)), 3, ext_id) + except Exception as e: + log_exception("Exception when parsing review", 3, ext_id) supportpaths = glob.glob(os.path.join(datepath, "support*-*.text")) for supportpath in supportpaths: try: parse_and_insert_support(ext_id, date, supportpath, con) - except json.decoder.JSONDecodeError as e: - log_warning( - "* Could not parse support file, exception: {}".format( - str(e)), 3, ext_id) + except Exception as e: + log_exception("Exception when parsing support", 3, ext_id) repliespaths = glob.glob(os.path.join(datepath, "*replies.text")) for repliespath in repliespaths: try: parse_and_insert_replies(ext_id, date, repliespath, con) - except json.decoder.JSONDecodeError as e: - log_warning( - "* Could not parse reply file, exception: {}".format( - str(e)), 3, ext_id) - con.commit() + except Exception as e: + log_exception("Exception when parsing reply", 3, ext_id) diff --git a/ExtensionCrawler/dbbackend/mysql_backend.py b/ExtensionCrawler/dbbackend/mysql_backend.py index 21be609..11b7cf3 100644 --- a/ExtensionCrawler/dbbackend/mysql_backend.py +++ b/ExtensionCrawler/dbbackend/mysql_backend.py @@ -52,6 +52,9 @@ class MysqlBackend: if db is not None: db.close() db = None + db = MySQLdb.connect(**self.dbargs) + db.autocommit = True + self.cursor = db.cursor() except Exception as e2: log_error("Surpressed exception: {}".format(str(e2)), 3, self.ext_id) raise last_exception @@ -72,6 +75,7 @@ class MysqlBackend: global db if db is None: db = MySQLdb.connect(**self.dbargs) + db.autocommit = True self.cursor = db.cursor() return self @@ -84,9 +88,6 @@ class MysqlBackend: except Exception as e: log_error("Surpressed exception: {}".format(str(e)), 3, self.ext_id) - def commit(self): - db.commit() - def get_single_value(self, query, args): self.retry(lambda: self.cursor.execute(query, args)) diff --git a/create-db b/create-db index 1fcbd98..824f611 100755 --- a/create-db +++ b/create-db @@ -34,19 +34,21 @@ from ExtensionCrawler.util import log_info, log_warning, log_error, log_exceptio def help(): - print("create-db [OPTION]") - print(" -h print this help text") - print(" -a