forked from BrowserSecurity/ExtensionCrawler
Changed fts table structure.
This commit is contained in:
parent
b34d45c4dc
commit
b8f57196c7
|
@ -43,40 +43,50 @@ class SelfclosingSqliteDB:
|
|||
self.con.close()
|
||||
|
||||
|
||||
def setup_fts_tables(con, name, columns, primary_columns):
|
||||
sqls = [
|
||||
s.format(
|
||||
name=name,
|
||||
columns=", ".join(columns),
|
||||
new_columns=", ".join(["new." + x for x in columns]),
|
||||
primary_columns=", ".join(primary_columns))
|
||||
for s in [
|
||||
"""CREATE TABLE {name}({columns}, PRIMARY KEY ({primary_columns}));""",
|
||||
"""CREATE VIRTUAL TABLE {name}_fts using fts4(content="{name}", {columns});""",
|
||||
"""CREATE TRIGGER {name}_bu BEFORE UPDATE ON {name} BEGIN """
|
||||
"""DELETE FROM {name}_fts WHERE docid=old.rowid;"""
|
||||
"""END;""",
|
||||
"""CREATE TRIGGER {name}_bd BEFORE DELETE ON {name} BEGIN """
|
||||
"""DELETE FROM {name}_fts WHERE docid=old.rowid;"""
|
||||
"""END;""",
|
||||
"""CREATE TRIGGER {name}_au AFTER UPDATE ON {name} BEGIN """
|
||||
"""INSERT INTO {name}_fts(docid, {columns}) VALUES(new.rowid, {new_columns});"""
|
||||
"""END;""",
|
||||
"""CREATE TRIGGER {name}_ai AFTER INSERT ON {name} BEGIN """
|
||||
"""INSERT INTO {name}_fts(docid, {columns}) VALUES(new.rowid, {new_columns});"""
|
||||
"""END;"""
|
||||
]
|
||||
]
|
||||
for sql in sqls:
|
||||
con.execute(sql)
|
||||
|
||||
|
||||
def setup_tables(con):
|
||||
con.execute("""CREATE VIRTUAL TABLE support using fts4("""
|
||||
"""author TEXT,"""
|
||||
"""extid TEXT,"""
|
||||
"""date TEXT,"""
|
||||
"""displayname TEXT,"""
|
||||
"""commentdate INTEGER,"""
|
||||
"""title TEXT,"""
|
||||
"""language TEXT,"""
|
||||
"""shortauthor TEXT,"""
|
||||
"""comment TEXT"""
|
||||
""")""")
|
||||
con.execute("""CREATE VIRTUAL TABLE review using fts4("""
|
||||
"""author TEXT,"""
|
||||
"""extid TEXT,"""
|
||||
"""date TEXT,"""
|
||||
"""displayname TEXT,"""
|
||||
"""commentdate INTEGER,"""
|
||||
"""rating INTEGER,"""
|
||||
"""language TEXT,"""
|
||||
"""shortauthor TEXT,"""
|
||||
"""comment TEXT"""
|
||||
""")""")
|
||||
con.execute("""CREATE VIRTUAL TABLE reply using fts4("""
|
||||
"""author TEXT,"""
|
||||
"""extid TEXT,"""
|
||||
"""date TEXT,"""
|
||||
"""displayname TEXT,"""
|
||||
"""commentdate INTEGER,"""
|
||||
"""replyto TEXT,"""
|
||||
"""language TEXT,"""
|
||||
"""shortauthor TEXT,"""
|
||||
"""comment TEXT"""
|
||||
""")""")
|
||||
setup_fts_tables(con, "support", [
|
||||
"author", "commentdate", "extid", "date", "displayname", "title",
|
||||
"language", "shortauthor", "comment"
|
||||
], ["author", "commentdate", "extid", "date"])
|
||||
|
||||
setup_fts_tables(con, "review", [
|
||||
"author", "commentdate", "extid", "date", "displayname", "rating",
|
||||
"language", "shortauthor", "comment"
|
||||
], ["author", "commentdate", "extid", "date"])
|
||||
|
||||
setup_fts_tables(con, "reply", [
|
||||
"author", "commentdate", "extid", "date", "displayname", "replyto",
|
||||
"language", "shortauthor", "comment"
|
||||
], ["author", "commentdate", "extid", "date"])
|
||||
|
||||
con.execute("""CREATE TABLE category ("""
|
||||
"""extid TEXT,"""
|
||||
"""date TEXT,"""
|
||||
|
@ -336,7 +346,8 @@ def parse_and_insert_crx(ext_id, date, datepath, con, verbose, indent):
|
|||
with f.open(jsfile) as f2:
|
||||
md5 = hashlib.md5(f2.read()).hexdigest()
|
||||
con.execute("INSERT INTO jsfile VALUES (?,?,?,?)",
|
||||
(etag, jsfile.filename, int(jsfile.file_size),md5))
|
||||
(etag, jsfile.filename, int(jsfile.file_size),
|
||||
md5))
|
||||
|
||||
public_key = read_crx(crx_path).public_key
|
||||
|
||||
|
@ -367,7 +378,7 @@ def parse_and_insert_review(ext_id, date, reviewpath, con):
|
|||
shortauthor = get(get(review, "entity"), "shortAuthor")
|
||||
|
||||
con.execute("INSERT INTO review VALUES(?,?,?,?,?,?,?,?,?)",
|
||||
(author, ext_id, date, displayname, timestamp,
|
||||
(author, timestamp, ext_id, date, displayname,
|
||||
starRating, language, shortauthor, comment))
|
||||
|
||||
|
||||
|
@ -388,7 +399,7 @@ def parse_and_insert_support(ext_id, date, supportpath, con):
|
|||
shortauthor = get(get(review, "entity"), "shortAuthor")
|
||||
|
||||
con.execute("INSERT INTO support VALUES(?,?,?,?,?,?,?,?,?)",
|
||||
(author, ext_id, date, displayname, timestamp,
|
||||
(author, timestamp, ext_id, date, displayname,
|
||||
title, language, shortauthor, comment))
|
||||
|
||||
|
||||
|
@ -412,7 +423,7 @@ def parse_and_insert_replies(ext_id, date, repliespath, con, verbose, indent):
|
|||
language = get(annotation, "language")
|
||||
shortauthor = get(get(annotation, "entity"), "shortAuthor")
|
||||
con.execute("INSERT INTO reply VALUES(?,?,?,?,?,?,?,?,?)",
|
||||
(author, ext_id, date, displayname, timestamp,
|
||||
(author, timestamp, ext_id, date, displayname,
|
||||
replyto, language, shortauthor, comment))
|
||||
return ""
|
||||
|
||||
|
|
|
@ -3,66 +3,38 @@
|
|||
import sqlite3
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import fnmatch
|
||||
|
||||
MAX_ATTACHED_DBS = 10
|
||||
|
||||
|
||||
def merge_and_detach(con, currently_attached, mappings, columnnames):
|
||||
def merge_and_detach(con, currently_attached, tablenames):
|
||||
for db in currently_attached:
|
||||
for (fromtable, totable) in mappings.items():
|
||||
con.execute("INSERT INTO {} SELECT {} from {}.{};".format(
|
||||
totable, ",".join(columnnames[fromtable]), db, fromtable))
|
||||
for tablename in tablenames:
|
||||
con.execute(
|
||||
"INSERT OR IGNORE INTO {tablename} SELECT * from {dbname}.{tablename};".
|
||||
format(tablename=tablename, dbname=db))
|
||||
con.commit()
|
||||
|
||||
con.commit()
|
||||
for db in currently_attached:
|
||||
con.execute("DETACH DATABASE {}".format(db))
|
||||
|
||||
|
||||
def get_mappings_and_column_names(dbpath):
|
||||
def get_tablenames(dbpath):
|
||||
with sqlite3.connect(dbpath) as con:
|
||||
create_strings = con.execute(
|
||||
"""select name,sql from sqlite_master where type='table'"""
|
||||
""" and name NOT LIKE '%!_segments' escape '!'"""
|
||||
""" and name NOT LIKE '%!_segdir' escape '!'"""
|
||||
""" and name NOT LIKE '%!_docsize' escape '!'"""
|
||||
""" and name NOT LIKE '%!_stat' escape '!'"""
|
||||
""";""")
|
||||
mappings = {}
|
||||
columnnames = {}
|
||||
for (name, create_string, ) in create_strings:
|
||||
if re.match("^CREATE VIRTUAL TABLE ([^\s]+) using fts",
|
||||
create_string):
|
||||
continue
|
||||
|
||||
cnames = [
|
||||
name
|
||||
for (_, name, _, _, _, _) in con.execute(
|
||||
"pragma table_info({});".format(name)).fetchall()
|
||||
]
|
||||
if name.endswith("_content"):
|
||||
mappings[name] = name[:-len("_content")]
|
||||
columnnames[name] = cnames[1:]
|
||||
else:
|
||||
mappings[name] = name
|
||||
columnnames[name] = cnames
|
||||
|
||||
return (mappings, columnnames)
|
||||
"""select name from sqlite_master where type='table'"""
|
||||
""" and name NOT LIKE '%!_fts%' escape '!';""").fetchall()
|
||||
return [x[0] for x in create_strings]
|
||||
|
||||
|
||||
def merge_schema(con, dbpath):
|
||||
con.execute("ATTACH DATABASE ? as schemadb;", (dbpath, ))
|
||||
create_strings = con.execute(
|
||||
"""select sql from schemadb.sqlite_master where type='table'"""
|
||||
""" and name NOT LIKE '%!_segments' escape '!'"""
|
||||
""" and name NOT LIKE '%!_segdir' escape '!'"""
|
||||
""" and name NOT LIKE '%!_docsize' escape '!'"""
|
||||
""" and name NOT LIKE '%!_stat' escape '!'"""
|
||||
""" and name NOT LIKE '%!_content' escape '!'"""
|
||||
""";"""
|
||||
)
|
||||
"""select sql from schemadb.sqlite_master where """
|
||||
"""(type='table' AND name NOT LIKE '%!_fts!_%' escape '!') OR """
|
||||
"""type='trigger';""")
|
||||
|
||||
for (create_string, ) in create_strings:
|
||||
print(create_string)
|
||||
|
@ -77,12 +49,14 @@ def find(pattern, path):
|
|||
if fnmatch.fnmatch(name, pattern):
|
||||
yield os.path.join(root, name)
|
||||
|
||||
|
||||
def help():
|
||||
print("Usage: merge_dbs DBSPATH TODB")
|
||||
print(" DBSPATH the folder containing the *.sqlite files")
|
||||
print(" (searched recursivly)")
|
||||
print(" TODB the destination sqlite file")
|
||||
|
||||
|
||||
def main(argv):
|
||||
if len(argv) != 2:
|
||||
help()
|
||||
|
@ -97,11 +71,7 @@ def main(argv):
|
|||
sqlitepaths = [dbspath]
|
||||
|
||||
firstdb = sqlitepaths[0]
|
||||
mappings, columnnames = get_mappings_and_column_names(firstdb)
|
||||
print("Mappings:")
|
||||
print(json.dumps(mappings, indent=4))
|
||||
print("Column names:")
|
||||
print(json.dumps(columnnames, indent=4))
|
||||
tablenames = get_tablenames(firstdb)
|
||||
|
||||
with sqlite3.connect(todb) as con:
|
||||
if con.execute("SELECT COUNT(*) FROM sqlite_master;").fetchone()[
|
||||
|
@ -117,8 +87,7 @@ def main(argv):
|
|||
currently_attached += [dbname]
|
||||
if len(currently_attached) % MAX_ATTACHED_DBS == 0 or i + 1 == len(
|
||||
sqlitepaths):
|
||||
merge_and_detach(con, currently_attached, mappings,
|
||||
columnnames)
|
||||
merge_and_detach(con, currently_attached, tablenames)
|
||||
currently_attached = []
|
||||
con.commit()
|
||||
|
||||
|
|
Loading…
Reference in New Issue