From 6679ef7cfd5543c89c4e7f80b3d37711da5444e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Wed, 14 Aug 2019 17:02:06 +0200 Subject: [PATCH] added persistent storage module for large collections --- autoimport/__main__.py | 29 ++++++++---- autoimport/commands.py | 36 ++++++++++----- autoimport/db.py | 88 +++++++++++++++++++++++++++++++++++++ autoimport/order/order.py | 2 + autoimport/select/select.py | 14 ++++++ autoimport/tmpdb.py | 55 +++-------------------- 6 files changed, 155 insertions(+), 69 deletions(-) create mode 100644 autoimport/db.py diff --git a/autoimport/__main__.py b/autoimport/__main__.py index f8e5a62..bbb4092 100644 --- a/autoimport/__main__.py +++ b/autoimport/__main__.py @@ -28,6 +28,7 @@ Options: from .commands import copy, move, placeholders, select from .tmpdb import get_temporary_db +from .db import get_persistent_db args = docopt.docopt(usage) @@ -40,11 +41,20 @@ logging.debug("ARGUMENTS:") for k,v in args.items(): logging.debug("\t{}: \t{}".format(k,v)) -try: - db = get_temporary_db(args["--implementation"]) -except Exception as e: - print(e) - sys.exit(1) +if(not args["--storage"]): + try: + db = get_temporary_db(args["--implementation"]) + except Exception as e: + print(e) + sys.exit(1) + stored = False +else: + try: + db = get_persistent_db(args["--storage"]) + except Exception as e: + print(e) + sys.exit(1) + stored = True if(args["placeholders"]): result = placeholders() @@ -56,7 +66,8 @@ elif(args["copy"]): , not args["--no-select-stop-on-error"] , args["--walk"] , args["--postfix"] - , args["--dry-run"]) + , args["--dry-run"] + , stored) elif(args["move"]): result = move(db @@ -66,14 +77,16 @@ elif(args["move"]): , not args["--no-select-stop-on-error"] , args["--walk"] , args["--postfix"] - , args["--dry-run"]) + , args["--dry-run"] + , stored) elif(args["select"]): result = select(db , args["SRC_PATH"] , not args["--no-select-stop-on-error"] , args["--walk"] , args["--postfix"] - , args["--dry-run"]) + , args["--dry-run"] + , stored) diff --git a/autoimport/commands.py b/autoimport/commands.py index 7804a6e..a16252b 100644 --- a/autoimport/commands.py +++ b/autoimport/commands.py @@ -13,7 +13,13 @@ def placeholders(): return 0 -def select(db, src_path, stop_on_error, walk, postfix, dryrun): +def select( db + , src_path + , stop_on_error + , walk + , postfix + , dryrun + , stored): logger = logging.getLogger(__name__) extensions = postfix.split(",") try: @@ -51,7 +57,8 @@ def copy(db , stop_on_error , walk , postfix - , dryrun): + , dryrun + , stored): return do_copy_or_move(db , src_path @@ -61,7 +68,8 @@ def copy(db , walk , postfix , dryrun - , False) + , False + , stored) def move(db , src_path @@ -70,7 +78,8 @@ def move(db , stop_on_error , walk , postfix - , dryrun): + , dryrun + , stored): return do_copy_or_move(db , src_path @@ -80,7 +89,8 @@ def move(db , walk , postfix , dryrun - , True) + , True + , stored) def do_copy_or_move(db @@ -91,16 +101,18 @@ def do_copy_or_move(db , walk , postfix , dryrun - , move): + , move + , stored): logger = logging.getLogger(__name__) extensions = postfix.split(",") - try: - findall(src_path, walk, extensions, db, stop_on_error) - except Exception as e: - logger.error(e) - logger.debug(traceback.format_exc()) - return 1 + if(not stored): + try: + findall(src_path, walk, extensions, db, stop_on_error) + except Exception as e: + logger.error(e) + logger.debug(traceback.format_exc()) + return 1 cursor = db.cursor() cursor.execute( '''SELECT COUNT(name) FROM FILES''' diff --git a/autoimport/db.py b/autoimport/db.py new file mode 100644 index 0000000..31e19f8 --- /dev/null +++ b/autoimport/db.py @@ -0,0 +1,88 @@ +""" +This module provides a way to construct (persistent) databases +used by ``autoimport``. In normal mode this module is replaced +by ``tmpdb``, however it might be useful to keep the data produced +by ``autoimport``. +""" + +import sqlite3 +import abc +import os + +class AbstractDatabase(abc.ABC): + """ + Abstract base class for all ``TemporaryDatabase`` + implementations. + + **Note**: ``__init__`` must set ``self._db`` to an + open sqlite3 connection. + """ + def __init__(self): + abc.ABC.__init__(self) + self._db = None + + @abc.abstractmethod + def close(self): + pass + + def cursor(self): + return self._db.cursor() + + def dump_db(self, file): + for line in self._db.iterdump(): + file.write("{}\n".format(line)) + def commit(self): + return self._db.commit() + +class PersistentDatabase(AbstractDatabase): + def __init__(self, database_path): + AbstractDatabase.__init__(self) + self._database_path = database_path + self._db = sqlite3.connect(database_path) + + def close(self): + self._db.close() + +def initialize_database(db): + cursor = db.cursor() + + cursor.execute( + '''CREATE TABLE FILES( + name TEXT, + DateTime TEXT, + DateTimeDigitized TEXT, + DateTimeOriginal TEXT, + Model TEXT, + Make TEXT, + Software TEXT)''' + ) + + cursor.execute( + '''CREATE TABLE DIRECTORIES( + name TEXT)''' + ) + + cursor.execute( + '''CREATE TABLE ASSOCIATIONS(file_id INTEGER, + directory_id INTEGER)''' + ) + + cursor.execute( + '''CREATE TABLE KV(key TEXT, + value TEXT)''' + ) + cursor.execute( + '''CREATE TABLE EXTENSIONS_SEARCHED(extension TEXT)''' + ) + db.commit() + + +def get_persistent_db(path): + if(not os.path.exists(path)): + if(not os.path.dirname(path)): + db = PersistentDatabase(path) + initialize_database(db) + return db + if(not os.path.exists(os.path.dirname(path))): + raise IOError("path '{}' does not exist".format(os.path.dirname(path))) + return PersistentDatabase(path) diff --git a/autoimport/order/order.py b/autoimport/order/order.py index dabdac6..9e25e29 100644 --- a/autoimport/order/order.py +++ b/autoimport/order/order.py @@ -63,6 +63,7 @@ def order(db, path_specifier): path_id = get_path_id(db, this_path) cursor.execute("INSERT INTO ASSOCIATIONS(file_id, directory_id) VALUES(?, ?)", (rowid, path_id)) + db.commit() @@ -76,5 +77,6 @@ def get_path_id(db, path): return result[0] cursor.execute("INSERT INTO DIRECTORIES(name) VALUES(?)", (path,)) + db.commit() return cursor.lastrowid diff --git a/autoimport/select/select.py b/autoimport/select/select.py index b999d89..ddf7813 100644 --- a/autoimport/select/select.py +++ b/autoimport/select/select.py @@ -11,6 +11,7 @@ def findall_this_directory(directory, files, extensions, db, stop_on_error): if(filename.split(".")[-1] in extensions): filename = os.path.join(directory, filename) insert_file_into_db(filename, db, stop_on_error) + db.commit() def insert_file_into_db(filename, db, stop_on_error): @@ -47,8 +48,21 @@ def insert_file_into_db(filename, db, stop_on_error): , data) def findall(directory, walk, extensions, db, stop_on_error): + cursor = db.cursor() + cursor.execute("SELECT extension FROM EXTENSIONS_SEARCHED") + ext_already_searched = {i[0] for i in cursor.fetchall()} + ext_to_search = set(extensions) - ext_already_searched + ext_omit = set(extensions) - ext_to_search + + if(ext_omit): + module_logger.warn("Omitting the extensions {} as they are already in the database.".format(ext_omit)) + + extensions = list(ext_to_search) for dir_, paths, files in os.walk(directory): findall_this_directory(dir_, files, extensions, db, stop_on_error) if(not walk): break + cursor.executemany("INSERT INTO EXTENSIONS_SEARCHED(extension) VALUES(?)", [(i,) for i in extensions]) + db.commit() + diff --git a/autoimport/tmpdb.py b/autoimport/tmpdb.py index 25d431d..bd948c0 100644 --- a/autoimport/tmpdb.py +++ b/autoimport/tmpdb.py @@ -14,6 +14,9 @@ import sqlite3 import tempfile import abc +from .db import AbstractDatabase, initialize_database + + def _open_db_mem(): return (sqlite3.connect(":memory:"), None) @@ -22,27 +25,8 @@ def _open_db_disk(): db = sqlite3.connect(file.name) return (db, file) -class AbstractTemporaryDatabase(abc.ABC): - """ - Abstract base class for all ``TemporaryDatabase`` - implementations. - - **Note**: ``__init__`` must set ``self._db`` to an - open sqlite3 connection. - """ - def __init__(self): - abc.ABC.__init__(self) - self._db = None - @abc.abstractmethod - def close(self): - pass - - def cursor(self): - return self._db.cursor() - - def dump_db(self, file): - for line in self._db.iterdump(): - file.write("{}\n".format(line)) +class AbstractTemporaryDatabase(AbstractDatabase): + pass class MemoryTemporaryDatabase(AbstractTemporaryDatabase): @@ -80,33 +64,6 @@ def get_temporary_db(type_): impl = implementations[type_] instance = impl() - - cursor = instance.cursor() - - cursor.execute( - '''CREATE TABLE FILES( - name TEXT, - DateTime TEXT, - DateTimeDigitized TEXT, - DateTimeOriginal TEXT, - Model TEXT, - Make TEXT, - Software TEXT)''' - ) - - cursor.execute( - '''CREATE TABLE DIRECTORIES( - name TEXT)''' - ) - - cursor.execute( - '''CREATE TABLE ASSOCIATIONS(file_id INTEGER, - directory_id INTEGER)''' - ) - - cursor.execute( - '''CREATE TABLE KV(key TEXT, - value TEXT)''' - ) + initialize_database(instance) return instance