Changed fts table structure.

This commit is contained in:
Michael Herzberg 2017-07-31 23:23:57 +01:00
parent b34d45c4dc
commit b8f57196c7
2 changed files with 65 additions and 85 deletions

View File

@ -43,40 +43,50 @@ class SelfclosingSqliteDB:
self.con.close()
def setup_fts_tables(con, name, columns, primary_columns):
sqls = [
s.format(
name=name,
columns=", ".join(columns),
new_columns=", ".join(["new." + x for x in columns]),
primary_columns=", ".join(primary_columns))
for s in [
"""CREATE TABLE {name}({columns}, PRIMARY KEY ({primary_columns}));""",
"""CREATE VIRTUAL TABLE {name}_fts using fts4(content="{name}", {columns});""",
"""CREATE TRIGGER {name}_bu BEFORE UPDATE ON {name} BEGIN """
"""DELETE FROM {name}_fts WHERE docid=old.rowid;"""
"""END;""",
"""CREATE TRIGGER {name}_bd BEFORE DELETE ON {name} BEGIN """
"""DELETE FROM {name}_fts WHERE docid=old.rowid;"""
"""END;""",
"""CREATE TRIGGER {name}_au AFTER UPDATE ON {name} BEGIN """
"""INSERT INTO {name}_fts(docid, {columns}) VALUES(new.rowid, {new_columns});"""
"""END;""",
"""CREATE TRIGGER {name}_ai AFTER INSERT ON {name} BEGIN """
"""INSERT INTO {name}_fts(docid, {columns}) VALUES(new.rowid, {new_columns});"""
"""END;"""
]
]
for sql in sqls:
con.execute(sql)
def setup_tables(con):
con.execute("""CREATE VIRTUAL TABLE support using fts4("""
"""author TEXT,"""
"""extid TEXT,"""
"""date TEXT,"""
"""displayname TEXT,"""
"""commentdate INTEGER,"""
"""title TEXT,"""
"""language TEXT,"""
"""shortauthor TEXT,"""
"""comment TEXT"""
""")""")
con.execute("""CREATE VIRTUAL TABLE review using fts4("""
"""author TEXT,"""
"""extid TEXT,"""
"""date TEXT,"""
"""displayname TEXT,"""
"""commentdate INTEGER,"""
"""rating INTEGER,"""
"""language TEXT,"""
"""shortauthor TEXT,"""
"""comment TEXT"""
""")""")
con.execute("""CREATE VIRTUAL TABLE reply using fts4("""
"""author TEXT,"""
"""extid TEXT,"""
"""date TEXT,"""
"""displayname TEXT,"""
"""commentdate INTEGER,"""
"""replyto TEXT,"""
"""language TEXT,"""
"""shortauthor TEXT,"""
"""comment TEXT"""
""")""")
setup_fts_tables(con, "support", [
"author", "commentdate", "extid", "date", "displayname", "title",
"language", "shortauthor", "comment"
], ["author", "commentdate", "extid", "date"])
setup_fts_tables(con, "review", [
"author", "commentdate", "extid", "date", "displayname", "rating",
"language", "shortauthor", "comment"
], ["author", "commentdate", "extid", "date"])
setup_fts_tables(con, "reply", [
"author", "commentdate", "extid", "date", "displayname", "replyto",
"language", "shortauthor", "comment"
], ["author", "commentdate", "extid", "date"])
con.execute("""CREATE TABLE category ("""
"""extid TEXT,"""
"""date TEXT,"""
@ -336,7 +346,8 @@ def parse_and_insert_crx(ext_id, date, datepath, con, verbose, indent):
with f.open(jsfile) as f2:
md5 = hashlib.md5(f2.read()).hexdigest()
con.execute("INSERT INTO jsfile VALUES (?,?,?,?)",
(etag, jsfile.filename, int(jsfile.file_size),md5))
(etag, jsfile.filename, int(jsfile.file_size),
md5))
public_key = read_crx(crx_path).public_key
@ -367,7 +378,7 @@ def parse_and_insert_review(ext_id, date, reviewpath, con):
shortauthor = get(get(review, "entity"), "shortAuthor")
con.execute("INSERT INTO review VALUES(?,?,?,?,?,?,?,?,?)",
(author, ext_id, date, displayname, timestamp,
(author, timestamp, ext_id, date, displayname,
starRating, language, shortauthor, comment))
@ -388,7 +399,7 @@ def parse_and_insert_support(ext_id, date, supportpath, con):
shortauthor = get(get(review, "entity"), "shortAuthor")
con.execute("INSERT INTO support VALUES(?,?,?,?,?,?,?,?,?)",
(author, ext_id, date, displayname, timestamp,
(author, timestamp, ext_id, date, displayname,
title, language, shortauthor, comment))
@ -412,7 +423,7 @@ def parse_and_insert_replies(ext_id, date, repliespath, con, verbose, indent):
language = get(annotation, "language")
shortauthor = get(get(annotation, "entity"), "shortAuthor")
con.execute("INSERT INTO reply VALUES(?,?,?,?,?,?,?,?,?)",
(author, ext_id, date, displayname, timestamp,
(author, timestamp, ext_id, date, displayname,
replyto, language, shortauthor, comment))
return ""

View File

@ -3,66 +3,38 @@
import sqlite3
import sys
import os
import re
import json
import fnmatch
MAX_ATTACHED_DBS = 10
def merge_and_detach(con, currently_attached, mappings, columnnames):
def merge_and_detach(con, currently_attached, tablenames):
for db in currently_attached:
for (fromtable, totable) in mappings.items():
con.execute("INSERT INTO {} SELECT {} from {}.{};".format(
totable, ",".join(columnnames[fromtable]), db, fromtable))
for tablename in tablenames:
con.execute(
"INSERT OR IGNORE INTO {tablename} SELECT * from {dbname}.{tablename};".
format(tablename=tablename, dbname=db))
con.commit()
con.commit()
for db in currently_attached:
con.execute("DETACH DATABASE {}".format(db))
def get_mappings_and_column_names(dbpath):
def get_tablenames(dbpath):
with sqlite3.connect(dbpath) as con:
create_strings = con.execute(
"""select name,sql from sqlite_master where type='table'"""
""" and name NOT LIKE '%!_segments' escape '!'"""
""" and name NOT LIKE '%!_segdir' escape '!'"""
""" and name NOT LIKE '%!_docsize' escape '!'"""
""" and name NOT LIKE '%!_stat' escape '!'"""
""";""")
mappings = {}
columnnames = {}
for (name, create_string, ) in create_strings:
if re.match("^CREATE VIRTUAL TABLE ([^\s]+) using fts",
create_string):
continue
cnames = [
name
for (_, name, _, _, _, _) in con.execute(
"pragma table_info({});".format(name)).fetchall()
]
if name.endswith("_content"):
mappings[name] = name[:-len("_content")]
columnnames[name] = cnames[1:]
else:
mappings[name] = name
columnnames[name] = cnames
return (mappings, columnnames)
"""select name from sqlite_master where type='table'"""
""" and name NOT LIKE '%!_fts%' escape '!';""").fetchall()
return [x[0] for x in create_strings]
def merge_schema(con, dbpath):
con.execute("ATTACH DATABASE ? as schemadb;", (dbpath, ))
create_strings = con.execute(
"""select sql from schemadb.sqlite_master where type='table'"""
""" and name NOT LIKE '%!_segments' escape '!'"""
""" and name NOT LIKE '%!_segdir' escape '!'"""
""" and name NOT LIKE '%!_docsize' escape '!'"""
""" and name NOT LIKE '%!_stat' escape '!'"""
""" and name NOT LIKE '%!_content' escape '!'"""
""";"""
)
"""select sql from schemadb.sqlite_master where """
"""(type='table' AND name NOT LIKE '%!_fts!_%' escape '!') OR """
"""type='trigger';""")
for (create_string, ) in create_strings:
print(create_string)
@ -77,12 +49,14 @@ def find(pattern, path):
if fnmatch.fnmatch(name, pattern):
yield os.path.join(root, name)
def help():
print("Usage: merge_dbs DBSPATH TODB")
print(" DBSPATH the folder containing the *.sqlite files")
print(" (searched recursivly)")
print(" TODB the destination sqlite file")
def main(argv):
if len(argv) != 2:
help()
@ -97,11 +71,7 @@ def main(argv):
sqlitepaths = [dbspath]
firstdb = sqlitepaths[0]
mappings, columnnames = get_mappings_and_column_names(firstdb)
print("Mappings:")
print(json.dumps(mappings, indent=4))
print("Column names:")
print(json.dumps(columnnames, indent=4))
tablenames = get_tablenames(firstdb)
with sqlite3.connect(todb) as con:
if con.execute("SELECT COUNT(*) FROM sqlite_master;").fetchone()[
@ -117,8 +87,7 @@ def main(argv):
currently_attached += [dbname]
if len(currently_attached) % MAX_ATTACHED_DBS == 0 or i + 1 == len(
sqlitepaths):
merge_and_detach(con, currently_attached, mappings,
columnnames)
merge_and_detach(con, currently_attached, tablenames)
currently_attached = []
con.commit()