added persistent storage module for large collections

This commit is contained in:
Daniel Knüttel 2019-08-14 17:02:06 +02:00
parent fcaf6e3803
commit 6679ef7cfd
6 changed files with 155 additions and 69 deletions

View File

@ -28,6 +28,7 @@ Options:
from .commands import copy, move, placeholders, select from .commands import copy, move, placeholders, select
from .tmpdb import get_temporary_db from .tmpdb import get_temporary_db
from .db import get_persistent_db
args = docopt.docopt(usage) args = docopt.docopt(usage)
@ -40,11 +41,20 @@ logging.debug("ARGUMENTS:")
for k,v in args.items(): for k,v in args.items():
logging.debug("\t{}: \t{}".format(k,v)) logging.debug("\t{}: \t{}".format(k,v))
try: if(not args["--storage"]):
db = get_temporary_db(args["--implementation"]) try:
except Exception as e: db = get_temporary_db(args["--implementation"])
print(e) except Exception as e:
sys.exit(1) print(e)
sys.exit(1)
stored = False
else:
try:
db = get_persistent_db(args["--storage"])
except Exception as e:
print(e)
sys.exit(1)
stored = True
if(args["placeholders"]): if(args["placeholders"]):
result = placeholders() result = placeholders()
@ -56,7 +66,8 @@ elif(args["copy"]):
, not args["--no-select-stop-on-error"] , not args["--no-select-stop-on-error"]
, args["--walk"] , args["--walk"]
, args["--postfix"] , args["--postfix"]
, args["--dry-run"]) , args["--dry-run"]
, stored)
elif(args["move"]): elif(args["move"]):
result = move(db result = move(db
@ -66,14 +77,16 @@ elif(args["move"]):
, not args["--no-select-stop-on-error"] , not args["--no-select-stop-on-error"]
, args["--walk"] , args["--walk"]
, args["--postfix"] , args["--postfix"]
, args["--dry-run"]) , args["--dry-run"]
, stored)
elif(args["select"]): elif(args["select"]):
result = select(db result = select(db
, args["SRC_PATH"] , args["SRC_PATH"]
, not args["--no-select-stop-on-error"] , not args["--no-select-stop-on-error"]
, args["--walk"] , args["--walk"]
, args["--postfix"] , args["--postfix"]
, args["--dry-run"]) , args["--dry-run"]
, stored)

View File

@ -13,7 +13,13 @@ def placeholders():
return 0 return 0
def select(db, src_path, stop_on_error, walk, postfix, dryrun): def select( db
, src_path
, stop_on_error
, walk
, postfix
, dryrun
, stored):
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
extensions = postfix.split(",") extensions = postfix.split(",")
try: try:
@ -51,7 +57,8 @@ def copy(db
, stop_on_error , stop_on_error
, walk , walk
, postfix , postfix
, dryrun): , dryrun
, stored):
return do_copy_or_move(db return do_copy_or_move(db
, src_path , src_path
@ -61,7 +68,8 @@ def copy(db
, walk , walk
, postfix , postfix
, dryrun , dryrun
, False) , False
, stored)
def move(db def move(db
, src_path , src_path
@ -70,7 +78,8 @@ def move(db
, stop_on_error , stop_on_error
, walk , walk
, postfix , postfix
, dryrun): , dryrun
, stored):
return do_copy_or_move(db return do_copy_or_move(db
, src_path , src_path
@ -80,7 +89,8 @@ def move(db
, walk , walk
, postfix , postfix
, dryrun , dryrun
, True) , True
, stored)
def do_copy_or_move(db def do_copy_or_move(db
@ -91,16 +101,18 @@ def do_copy_or_move(db
, walk , walk
, postfix , postfix
, dryrun , dryrun
, move): , move
, stored):
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
extensions = postfix.split(",") extensions = postfix.split(",")
try: if(not stored):
findall(src_path, walk, extensions, db, stop_on_error) try:
except Exception as e: findall(src_path, walk, extensions, db, stop_on_error)
logger.error(e) except Exception as e:
logger.debug(traceback.format_exc()) logger.error(e)
return 1 logger.debug(traceback.format_exc())
return 1
cursor = db.cursor() cursor = db.cursor()
cursor.execute( cursor.execute(
'''SELECT COUNT(name) FROM FILES''' '''SELECT COUNT(name) FROM FILES'''

88
autoimport/db.py Normal file
View File

@ -0,0 +1,88 @@
"""
This module provides a way to construct (persistent) databases
used by ``autoimport``. In normal mode this module is replaced
by ``tmpdb``, however it might be useful to keep the data produced
by ``autoimport``.
"""
import sqlite3
import abc
import os
class AbstractDatabase(abc.ABC):
"""
Abstract base class for all ``TemporaryDatabase``
implementations.
**Note**: ``__init__`` must set ``self._db`` to an
open sqlite3 connection.
"""
def __init__(self):
abc.ABC.__init__(self)
self._db = None
@abc.abstractmethod
def close(self):
pass
def cursor(self):
return self._db.cursor()
def dump_db(self, file):
for line in self._db.iterdump():
file.write("{}\n".format(line))
def commit(self):
return self._db.commit()
class PersistentDatabase(AbstractDatabase):
def __init__(self, database_path):
AbstractDatabase.__init__(self)
self._database_path = database_path
self._db = sqlite3.connect(database_path)
def close(self):
self._db.close()
def initialize_database(db):
cursor = db.cursor()
cursor.execute(
'''CREATE TABLE FILES(
name TEXT,
DateTime TEXT,
DateTimeDigitized TEXT,
DateTimeOriginal TEXT,
Model TEXT,
Make TEXT,
Software TEXT)'''
)
cursor.execute(
'''CREATE TABLE DIRECTORIES(
name TEXT)'''
)
cursor.execute(
'''CREATE TABLE ASSOCIATIONS(file_id INTEGER,
directory_id INTEGER)'''
)
cursor.execute(
'''CREATE TABLE KV(key TEXT,
value TEXT)'''
)
cursor.execute(
'''CREATE TABLE EXTENSIONS_SEARCHED(extension TEXT)'''
)
db.commit()
def get_persistent_db(path):
if(not os.path.exists(path)):
if(not os.path.dirname(path)):
db = PersistentDatabase(path)
initialize_database(db)
return db
if(not os.path.exists(os.path.dirname(path))):
raise IOError("path '{}' does not exist".format(os.path.dirname(path)))
return PersistentDatabase(path)

View File

@ -63,6 +63,7 @@ def order(db, path_specifier):
path_id = get_path_id(db, this_path) path_id = get_path_id(db, this_path)
cursor.execute("INSERT INTO ASSOCIATIONS(file_id, directory_id) VALUES(?, ?)", (rowid, path_id)) cursor.execute("INSERT INTO ASSOCIATIONS(file_id, directory_id) VALUES(?, ?)", (rowid, path_id))
db.commit()
@ -76,5 +77,6 @@ def get_path_id(db, path):
return result[0] return result[0]
cursor.execute("INSERT INTO DIRECTORIES(name) VALUES(?)", (path,)) cursor.execute("INSERT INTO DIRECTORIES(name) VALUES(?)", (path,))
db.commit()
return cursor.lastrowid return cursor.lastrowid

View File

@ -11,6 +11,7 @@ def findall_this_directory(directory, files, extensions, db, stop_on_error):
if(filename.split(".")[-1] in extensions): if(filename.split(".")[-1] in extensions):
filename = os.path.join(directory, filename) filename = os.path.join(directory, filename)
insert_file_into_db(filename, db, stop_on_error) insert_file_into_db(filename, db, stop_on_error)
db.commit()
def insert_file_into_db(filename, db, stop_on_error): def insert_file_into_db(filename, db, stop_on_error):
@ -47,8 +48,21 @@ def insert_file_into_db(filename, db, stop_on_error):
, data) , data)
def findall(directory, walk, extensions, db, stop_on_error): def findall(directory, walk, extensions, db, stop_on_error):
cursor = db.cursor()
cursor.execute("SELECT extension FROM EXTENSIONS_SEARCHED")
ext_already_searched = {i[0] for i in cursor.fetchall()}
ext_to_search = set(extensions) - ext_already_searched
ext_omit = set(extensions) - ext_to_search
if(ext_omit):
module_logger.warn("Omitting the extensions {} as they are already in the database.".format(ext_omit))
extensions = list(ext_to_search)
for dir_, paths, files in os.walk(directory): for dir_, paths, files in os.walk(directory):
findall_this_directory(dir_, files, extensions, db, stop_on_error) findall_this_directory(dir_, files, extensions, db, stop_on_error)
if(not walk): if(not walk):
break break
cursor.executemany("INSERT INTO EXTENSIONS_SEARCHED(extension) VALUES(?)", [(i,) for i in extensions])
db.commit()

View File

@ -14,6 +14,9 @@ import sqlite3
import tempfile import tempfile
import abc import abc
from .db import AbstractDatabase, initialize_database
def _open_db_mem(): def _open_db_mem():
return (sqlite3.connect(":memory:"), None) return (sqlite3.connect(":memory:"), None)
@ -22,27 +25,8 @@ def _open_db_disk():
db = sqlite3.connect(file.name) db = sqlite3.connect(file.name)
return (db, file) return (db, file)
class AbstractTemporaryDatabase(abc.ABC): class AbstractTemporaryDatabase(AbstractDatabase):
""" pass
Abstract base class for all ``TemporaryDatabase``
implementations.
**Note**: ``__init__`` must set ``self._db`` to an
open sqlite3 connection.
"""
def __init__(self):
abc.ABC.__init__(self)
self._db = None
@abc.abstractmethod
def close(self):
pass
def cursor(self):
return self._db.cursor()
def dump_db(self, file):
for line in self._db.iterdump():
file.write("{}\n".format(line))
class MemoryTemporaryDatabase(AbstractTemporaryDatabase): class MemoryTemporaryDatabase(AbstractTemporaryDatabase):
@ -80,33 +64,6 @@ def get_temporary_db(type_):
impl = implementations[type_] impl = implementations[type_]
instance = impl() instance = impl()
initialize_database(instance)
cursor = instance.cursor()
cursor.execute(
'''CREATE TABLE FILES(
name TEXT,
DateTime TEXT,
DateTimeDigitized TEXT,
DateTimeOriginal TEXT,
Model TEXT,
Make TEXT,
Software TEXT)'''
)
cursor.execute(
'''CREATE TABLE DIRECTORIES(
name TEXT)'''
)
cursor.execute(
'''CREATE TABLE ASSOCIATIONS(file_id INTEGER,
directory_id INTEGER)'''
)
cursor.execute(
'''CREATE TABLE KV(key TEXT,
value TEXT)'''
)
return instance return instance