From 3bef0afe7a93d4b6a93c136c3ea9ffd23bf62a75 Mon Sep 17 00:00:00 2001 From: Michael Herzberg Date: Sun, 15 Jul 2018 00:08:11 +0100 Subject: [PATCH] Group mysql inserts and don't compress them. --- ExtensionCrawler/db.py | 3 +-- ExtensionCrawler/dbbackend/mysql_backend.py | 9 ++++++++- create-db | 3 +-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/ExtensionCrawler/db.py b/ExtensionCrawler/db.py index 9492891..41d5794 100644 --- a/ExtensionCrawler/db.py +++ b/ExtensionCrawler/db.py @@ -424,8 +424,7 @@ def update_db_incremental(tmptardir, ext_id, date, con=None): with MysqlBackend( ext_id, read_default_file=const_mysql_config_file(), - charset='utf8mb4', - compress=True) as con: + charset='utf8mb4') as con: update_db_incremental_with_connection(tmptardir, ext_id, date, con) diff --git a/ExtensionCrawler/dbbackend/mysql_backend.py b/ExtensionCrawler/dbbackend/mysql_backend.py index f3e78cd..dcc9ee9 100644 --- a/ExtensionCrawler/dbbackend/mysql_backend.py +++ b/ExtensionCrawler/dbbackend/mysql_backend.py @@ -19,6 +19,7 @@ import time import datetime from collections import OrderedDict from random import uniform +import sys import MySQLdb import _mysql_exceptions @@ -64,13 +65,19 @@ class MysqlBackend: ",".join(len(args[0]) * ["%s"]), ",".join( ["{c}=VALUES({c})".format(c=c) for c in sorted_arglist[0].keys()])) + start = time.time() self.retry(lambda: self.cursor.executemany(query, args)) + log_info("* Inserted {} bytes into {}, taking {:.2f}s.".format(sum([sys.getsizeof(arg) for arg in args]), + table, time.time() - start), 3) + start = time.time() + self.db.commit() + log_info("* DB commit took {:.2f}s".format(time.time() - start), 2) def _create_conn(self): if self.db is None: log_info("* self.db is None, open new connection ...", 3) self.db = MySQLdb.connect(**self.dbargs) - self.db.autocommit(True) + self.db.autocommit(False) log_info("* success", 4) if self.cursor is None: log_info("* self.cursor is None, assigning new cursor ...", 3) diff --git a/create-db b/create-db index feeb693..ae9399f 100755 --- a/create-db +++ b/create-db @@ -70,8 +70,7 @@ def process_id(from_date, until_date, path): try_wait=10, maxtries=30, read_default_file=const_mysql_config_file(), - charset='utf8mb4', - compress=True) as con: + charset='utf8mb4') as con: for date in sorted(os.listdir(iddir)): if (from_date is not None and date < from_date) or \ (until_date is not None and date > until_date):