added persistent storage module for large collections
This commit is contained in:
parent
fcaf6e3803
commit
6679ef7cfd
|
@ -28,6 +28,7 @@ Options:
|
|||
|
||||
from .commands import copy, move, placeholders, select
|
||||
from .tmpdb import get_temporary_db
|
||||
from .db import get_persistent_db
|
||||
|
||||
args = docopt.docopt(usage)
|
||||
|
||||
|
@ -40,11 +41,20 @@ logging.debug("ARGUMENTS:")
|
|||
for k,v in args.items():
|
||||
logging.debug("\t{}: \t{}".format(k,v))
|
||||
|
||||
if(not args["--storage"]):
|
||||
try:
|
||||
db = get_temporary_db(args["--implementation"])
|
||||
except Exception as e:
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
stored = False
|
||||
else:
|
||||
try:
|
||||
db = get_persistent_db(args["--storage"])
|
||||
except Exception as e:
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
stored = True
|
||||
|
||||
if(args["placeholders"]):
|
||||
result = placeholders()
|
||||
|
@ -56,7 +66,8 @@ elif(args["copy"]):
|
|||
, not args["--no-select-stop-on-error"]
|
||||
, args["--walk"]
|
||||
, args["--postfix"]
|
||||
, args["--dry-run"])
|
||||
, args["--dry-run"]
|
||||
, stored)
|
||||
|
||||
elif(args["move"]):
|
||||
result = move(db
|
||||
|
@ -66,14 +77,16 @@ elif(args["move"]):
|
|||
, not args["--no-select-stop-on-error"]
|
||||
, args["--walk"]
|
||||
, args["--postfix"]
|
||||
, args["--dry-run"])
|
||||
, args["--dry-run"]
|
||||
, stored)
|
||||
elif(args["select"]):
|
||||
result = select(db
|
||||
, args["SRC_PATH"]
|
||||
, not args["--no-select-stop-on-error"]
|
||||
, args["--walk"]
|
||||
, args["--postfix"]
|
||||
, args["--dry-run"])
|
||||
, args["--dry-run"]
|
||||
, stored)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -13,7 +13,13 @@ def placeholders():
|
|||
|
||||
return 0
|
||||
|
||||
def select(db, src_path, stop_on_error, walk, postfix, dryrun):
|
||||
def select( db
|
||||
, src_path
|
||||
, stop_on_error
|
||||
, walk
|
||||
, postfix
|
||||
, dryrun
|
||||
, stored):
|
||||
logger = logging.getLogger(__name__)
|
||||
extensions = postfix.split(",")
|
||||
try:
|
||||
|
@ -51,7 +57,8 @@ def copy(db
|
|||
, stop_on_error
|
||||
, walk
|
||||
, postfix
|
||||
, dryrun):
|
||||
, dryrun
|
||||
, stored):
|
||||
|
||||
return do_copy_or_move(db
|
||||
, src_path
|
||||
|
@ -61,7 +68,8 @@ def copy(db
|
|||
, walk
|
||||
, postfix
|
||||
, dryrun
|
||||
, False)
|
||||
, False
|
||||
, stored)
|
||||
|
||||
def move(db
|
||||
, src_path
|
||||
|
@ -70,7 +78,8 @@ def move(db
|
|||
, stop_on_error
|
||||
, walk
|
||||
, postfix
|
||||
, dryrun):
|
||||
, dryrun
|
||||
, stored):
|
||||
|
||||
return do_copy_or_move(db
|
||||
, src_path
|
||||
|
@ -80,7 +89,8 @@ def move(db
|
|||
, walk
|
||||
, postfix
|
||||
, dryrun
|
||||
, True)
|
||||
, True
|
||||
, stored)
|
||||
|
||||
|
||||
def do_copy_or_move(db
|
||||
|
@ -91,10 +101,12 @@ def do_copy_or_move(db
|
|||
, walk
|
||||
, postfix
|
||||
, dryrun
|
||||
, move):
|
||||
, move
|
||||
, stored):
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
extensions = postfix.split(",")
|
||||
if(not stored):
|
||||
try:
|
||||
findall(src_path, walk, extensions, db, stop_on_error)
|
||||
except Exception as e:
|
||||
|
|
88
autoimport/db.py
Normal file
88
autoimport/db.py
Normal file
|
@ -0,0 +1,88 @@
|
|||
"""
|
||||
This module provides a way to construct (persistent) databases
|
||||
used by ``autoimport``. In normal mode this module is replaced
|
||||
by ``tmpdb``, however it might be useful to keep the data produced
|
||||
by ``autoimport``.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import abc
|
||||
import os
|
||||
|
||||
class AbstractDatabase(abc.ABC):
|
||||
"""
|
||||
Abstract base class for all ``TemporaryDatabase``
|
||||
implementations.
|
||||
|
||||
**Note**: ``__init__`` must set ``self._db`` to an
|
||||
open sqlite3 connection.
|
||||
"""
|
||||
def __init__(self):
|
||||
abc.ABC.__init__(self)
|
||||
self._db = None
|
||||
|
||||
@abc.abstractmethod
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def cursor(self):
|
||||
return self._db.cursor()
|
||||
|
||||
def dump_db(self, file):
|
||||
for line in self._db.iterdump():
|
||||
file.write("{}\n".format(line))
|
||||
def commit(self):
|
||||
return self._db.commit()
|
||||
|
||||
class PersistentDatabase(AbstractDatabase):
|
||||
def __init__(self, database_path):
|
||||
AbstractDatabase.__init__(self)
|
||||
self._database_path = database_path
|
||||
self._db = sqlite3.connect(database_path)
|
||||
|
||||
def close(self):
|
||||
self._db.close()
|
||||
|
||||
def initialize_database(db):
|
||||
cursor = db.cursor()
|
||||
|
||||
cursor.execute(
|
||||
'''CREATE TABLE FILES(
|
||||
name TEXT,
|
||||
DateTime TEXT,
|
||||
DateTimeDigitized TEXT,
|
||||
DateTimeOriginal TEXT,
|
||||
Model TEXT,
|
||||
Make TEXT,
|
||||
Software TEXT)'''
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
'''CREATE TABLE DIRECTORIES(
|
||||
name TEXT)'''
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
'''CREATE TABLE ASSOCIATIONS(file_id INTEGER,
|
||||
directory_id INTEGER)'''
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
'''CREATE TABLE KV(key TEXT,
|
||||
value TEXT)'''
|
||||
)
|
||||
cursor.execute(
|
||||
'''CREATE TABLE EXTENSIONS_SEARCHED(extension TEXT)'''
|
||||
)
|
||||
db.commit()
|
||||
|
||||
|
||||
def get_persistent_db(path):
|
||||
if(not os.path.exists(path)):
|
||||
if(not os.path.dirname(path)):
|
||||
db = PersistentDatabase(path)
|
||||
initialize_database(db)
|
||||
return db
|
||||
if(not os.path.exists(os.path.dirname(path))):
|
||||
raise IOError("path '{}' does not exist".format(os.path.dirname(path)))
|
||||
return PersistentDatabase(path)
|
|
@ -63,6 +63,7 @@ def order(db, path_specifier):
|
|||
path_id = get_path_id(db, this_path)
|
||||
|
||||
cursor.execute("INSERT INTO ASSOCIATIONS(file_id, directory_id) VALUES(?, ?)", (rowid, path_id))
|
||||
db.commit()
|
||||
|
||||
|
||||
|
||||
|
@ -76,5 +77,6 @@ def get_path_id(db, path):
|
|||
return result[0]
|
||||
|
||||
cursor.execute("INSERT INTO DIRECTORIES(name) VALUES(?)", (path,))
|
||||
db.commit()
|
||||
return cursor.lastrowid
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ def findall_this_directory(directory, files, extensions, db, stop_on_error):
|
|||
if(filename.split(".")[-1] in extensions):
|
||||
filename = os.path.join(directory, filename)
|
||||
insert_file_into_db(filename, db, stop_on_error)
|
||||
db.commit()
|
||||
|
||||
|
||||
def insert_file_into_db(filename, db, stop_on_error):
|
||||
|
@ -47,8 +48,21 @@ def insert_file_into_db(filename, db, stop_on_error):
|
|||
, data)
|
||||
|
||||
def findall(directory, walk, extensions, db, stop_on_error):
|
||||
cursor = db.cursor()
|
||||
cursor.execute("SELECT extension FROM EXTENSIONS_SEARCHED")
|
||||
ext_already_searched = {i[0] for i in cursor.fetchall()}
|
||||
ext_to_search = set(extensions) - ext_already_searched
|
||||
ext_omit = set(extensions) - ext_to_search
|
||||
|
||||
if(ext_omit):
|
||||
module_logger.warn("Omitting the extensions {} as they are already in the database.".format(ext_omit))
|
||||
|
||||
extensions = list(ext_to_search)
|
||||
for dir_, paths, files in os.walk(directory):
|
||||
findall_this_directory(dir_, files, extensions, db, stop_on_error)
|
||||
if(not walk):
|
||||
break
|
||||
|
||||
cursor.executemany("INSERT INTO EXTENSIONS_SEARCHED(extension) VALUES(?)", [(i,) for i in extensions])
|
||||
db.commit()
|
||||
|
||||
|
|
|
@ -14,6 +14,9 @@ import sqlite3
|
|||
import tempfile
|
||||
import abc
|
||||
|
||||
from .db import AbstractDatabase, initialize_database
|
||||
|
||||
|
||||
def _open_db_mem():
|
||||
return (sqlite3.connect(":memory:"), None)
|
||||
|
||||
|
@ -22,28 +25,9 @@ def _open_db_disk():
|
|||
db = sqlite3.connect(file.name)
|
||||
return (db, file)
|
||||
|
||||
class AbstractTemporaryDatabase(abc.ABC):
|
||||
"""
|
||||
Abstract base class for all ``TemporaryDatabase``
|
||||
implementations.
|
||||
|
||||
**Note**: ``__init__`` must set ``self._db`` to an
|
||||
open sqlite3 connection.
|
||||
"""
|
||||
def __init__(self):
|
||||
abc.ABC.__init__(self)
|
||||
self._db = None
|
||||
@abc.abstractmethod
|
||||
def close(self):
|
||||
class AbstractTemporaryDatabase(AbstractDatabase):
|
||||
pass
|
||||
|
||||
def cursor(self):
|
||||
return self._db.cursor()
|
||||
|
||||
def dump_db(self, file):
|
||||
for line in self._db.iterdump():
|
||||
file.write("{}\n".format(line))
|
||||
|
||||
|
||||
class MemoryTemporaryDatabase(AbstractTemporaryDatabase):
|
||||
def __init__(self):
|
||||
|
@ -80,33 +64,6 @@ def get_temporary_db(type_):
|
|||
|
||||
impl = implementations[type_]
|
||||
instance = impl()
|
||||
|
||||
cursor = instance.cursor()
|
||||
|
||||
cursor.execute(
|
||||
'''CREATE TABLE FILES(
|
||||
name TEXT,
|
||||
DateTime TEXT,
|
||||
DateTimeDigitized TEXT,
|
||||
DateTimeOriginal TEXT,
|
||||
Model TEXT,
|
||||
Make TEXT,
|
||||
Software TEXT)'''
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
'''CREATE TABLE DIRECTORIES(
|
||||
name TEXT)'''
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
'''CREATE TABLE ASSOCIATIONS(file_id INTEGER,
|
||||
directory_id INTEGER)'''
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
'''CREATE TABLE KV(key TEXT,
|
||||
value TEXT)'''
|
||||
)
|
||||
initialize_database(instance)
|
||||
|
||||
return instance
|
||||
|
|
Loading…
Reference in New Issue
Block a user