ExtensionCrawler/scripts/merge_dbs

97 lines
2.8 KiB
Python
Executable File

#!/usr/bin/env python3
import sqlite3
import sys
import os
import fnmatch
MAX_ATTACHED_DBS = 10
def merge_and_detach(con, currently_attached, tablenames):
for db in currently_attached:
for tablename in tablenames:
con.execute(
"INSERT OR IGNORE INTO {tablename} SELECT * from {dbname}.{tablename};".
format(tablename=tablename, dbname=db))
con.commit()
con.commit()
for db in currently_attached:
con.execute("DETACH DATABASE {}".format(db))
def get_tablenames(dbpath):
with sqlite3.connect(dbpath) as con:
create_strings = con.execute(
"""select name from sqlite_master where type='table'"""
""" and name NOT LIKE '%!_fts%' escape '!';""").fetchall()
return [x[0] for x in create_strings]
def merge_schema(con, dbpath):
con.execute("ATTACH DATABASE ? as schemadb;", (dbpath, ))
create_strings = con.execute(
"""select sql from schemadb.sqlite_master where """
"""(type='table' AND name NOT LIKE '%!_fts!_%' escape '!') OR """
"""type='trigger';""")
for (create_string, ) in create_strings:
print(create_string)
con.execute(create_string)
con.execute("DETACH DATABASE schemadb;")
def find(pattern, path):
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
yield os.path.join(root, name)
def help():
print("Usage: merge_dbs DBSPATH TODB")
print(" DBSPATH the folder containing the *.sqlite files")
print(" (searched recursivly)")
print(" TODB the destination sqlite file")
def main(argv):
if len(argv) != 2:
help()
sys.exit(1)
dbspath, todb = argv[:2]
print("Using sqlite3 version {}".format(sqlite3.sqlite_version))
if os.path.isdir(dbspath):
sqlitepaths = list(find("*.sqlite", dbspath))
else:
sqlitepaths = [dbspath]
firstdb = sqlitepaths[0]
tablenames = get_tablenames(firstdb)
with sqlite3.connect(todb) as con:
if con.execute("SELECT COUNT(*) FROM sqlite_master;").fetchone()[
0] == 0:
print("Merging schema from {}".format(firstdb))
merge_schema(con, firstdb)
currently_attached = []
for i, dbpath in enumerate(sqlitepaths):
dbname = "db{}".format(i)
print("Attaching {}".format(dbpath))
con.execute("ATTACH DATABASE ? as ?", (dbpath, dbname))
currently_attached += [dbname]
if len(currently_attached) % MAX_ATTACHED_DBS == 0 or i + 1 == len(
sqlitepaths):
merge_and_detach(con, currently_attached, tablenames)
currently_attached = []
con.commit()
if __name__ == "__main__":
main(sys.argv[1:])