added persistent storage module for large collections
This commit is contained in:
parent
fcaf6e3803
commit
6679ef7cfd
|
@ -28,6 +28,7 @@ Options:
|
||||||
|
|
||||||
from .commands import copy, move, placeholders, select
|
from .commands import copy, move, placeholders, select
|
||||||
from .tmpdb import get_temporary_db
|
from .tmpdb import get_temporary_db
|
||||||
|
from .db import get_persistent_db
|
||||||
|
|
||||||
args = docopt.docopt(usage)
|
args = docopt.docopt(usage)
|
||||||
|
|
||||||
|
@ -40,11 +41,20 @@ logging.debug("ARGUMENTS:")
|
||||||
for k,v in args.items():
|
for k,v in args.items():
|
||||||
logging.debug("\t{}: \t{}".format(k,v))
|
logging.debug("\t{}: \t{}".format(k,v))
|
||||||
|
|
||||||
try:
|
if(not args["--storage"]):
|
||||||
db = get_temporary_db(args["--implementation"])
|
try:
|
||||||
except Exception as e:
|
db = get_temporary_db(args["--implementation"])
|
||||||
print(e)
|
except Exception as e:
|
||||||
sys.exit(1)
|
print(e)
|
||||||
|
sys.exit(1)
|
||||||
|
stored = False
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
db = get_persistent_db(args["--storage"])
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
sys.exit(1)
|
||||||
|
stored = True
|
||||||
|
|
||||||
if(args["placeholders"]):
|
if(args["placeholders"]):
|
||||||
result = placeholders()
|
result = placeholders()
|
||||||
|
@ -56,7 +66,8 @@ elif(args["copy"]):
|
||||||
, not args["--no-select-stop-on-error"]
|
, not args["--no-select-stop-on-error"]
|
||||||
, args["--walk"]
|
, args["--walk"]
|
||||||
, args["--postfix"]
|
, args["--postfix"]
|
||||||
, args["--dry-run"])
|
, args["--dry-run"]
|
||||||
|
, stored)
|
||||||
|
|
||||||
elif(args["move"]):
|
elif(args["move"]):
|
||||||
result = move(db
|
result = move(db
|
||||||
|
@ -66,14 +77,16 @@ elif(args["move"]):
|
||||||
, not args["--no-select-stop-on-error"]
|
, not args["--no-select-stop-on-error"]
|
||||||
, args["--walk"]
|
, args["--walk"]
|
||||||
, args["--postfix"]
|
, args["--postfix"]
|
||||||
, args["--dry-run"])
|
, args["--dry-run"]
|
||||||
|
, stored)
|
||||||
elif(args["select"]):
|
elif(args["select"]):
|
||||||
result = select(db
|
result = select(db
|
||||||
, args["SRC_PATH"]
|
, args["SRC_PATH"]
|
||||||
, not args["--no-select-stop-on-error"]
|
, not args["--no-select-stop-on-error"]
|
||||||
, args["--walk"]
|
, args["--walk"]
|
||||||
, args["--postfix"]
|
, args["--postfix"]
|
||||||
, args["--dry-run"])
|
, args["--dry-run"]
|
||||||
|
, stored)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,13 @@ def placeholders():
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def select(db, src_path, stop_on_error, walk, postfix, dryrun):
|
def select( db
|
||||||
|
, src_path
|
||||||
|
, stop_on_error
|
||||||
|
, walk
|
||||||
|
, postfix
|
||||||
|
, dryrun
|
||||||
|
, stored):
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
extensions = postfix.split(",")
|
extensions = postfix.split(",")
|
||||||
try:
|
try:
|
||||||
|
@ -51,7 +57,8 @@ def copy(db
|
||||||
, stop_on_error
|
, stop_on_error
|
||||||
, walk
|
, walk
|
||||||
, postfix
|
, postfix
|
||||||
, dryrun):
|
, dryrun
|
||||||
|
, stored):
|
||||||
|
|
||||||
return do_copy_or_move(db
|
return do_copy_or_move(db
|
||||||
, src_path
|
, src_path
|
||||||
|
@ -61,7 +68,8 @@ def copy(db
|
||||||
, walk
|
, walk
|
||||||
, postfix
|
, postfix
|
||||||
, dryrun
|
, dryrun
|
||||||
, False)
|
, False
|
||||||
|
, stored)
|
||||||
|
|
||||||
def move(db
|
def move(db
|
||||||
, src_path
|
, src_path
|
||||||
|
@ -70,7 +78,8 @@ def move(db
|
||||||
, stop_on_error
|
, stop_on_error
|
||||||
, walk
|
, walk
|
||||||
, postfix
|
, postfix
|
||||||
, dryrun):
|
, dryrun
|
||||||
|
, stored):
|
||||||
|
|
||||||
return do_copy_or_move(db
|
return do_copy_or_move(db
|
||||||
, src_path
|
, src_path
|
||||||
|
@ -80,7 +89,8 @@ def move(db
|
||||||
, walk
|
, walk
|
||||||
, postfix
|
, postfix
|
||||||
, dryrun
|
, dryrun
|
||||||
, True)
|
, True
|
||||||
|
, stored)
|
||||||
|
|
||||||
|
|
||||||
def do_copy_or_move(db
|
def do_copy_or_move(db
|
||||||
|
@ -91,16 +101,18 @@ def do_copy_or_move(db
|
||||||
, walk
|
, walk
|
||||||
, postfix
|
, postfix
|
||||||
, dryrun
|
, dryrun
|
||||||
, move):
|
, move
|
||||||
|
, stored):
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
extensions = postfix.split(",")
|
extensions = postfix.split(",")
|
||||||
try:
|
if(not stored):
|
||||||
findall(src_path, walk, extensions, db, stop_on_error)
|
try:
|
||||||
except Exception as e:
|
findall(src_path, walk, extensions, db, stop_on_error)
|
||||||
logger.error(e)
|
except Exception as e:
|
||||||
logger.debug(traceback.format_exc())
|
logger.error(e)
|
||||||
return 1
|
logger.debug(traceback.format_exc())
|
||||||
|
return 1
|
||||||
cursor = db.cursor()
|
cursor = db.cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
'''SELECT COUNT(name) FROM FILES'''
|
'''SELECT COUNT(name) FROM FILES'''
|
||||||
|
|
88
autoimport/db.py
Normal file
88
autoimport/db.py
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
"""
|
||||||
|
This module provides a way to construct (persistent) databases
|
||||||
|
used by ``autoimport``. In normal mode this module is replaced
|
||||||
|
by ``tmpdb``, however it might be useful to keep the data produced
|
||||||
|
by ``autoimport``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import abc
|
||||||
|
import os
|
||||||
|
|
||||||
|
class AbstractDatabase(abc.ABC):
|
||||||
|
"""
|
||||||
|
Abstract base class for all ``TemporaryDatabase``
|
||||||
|
implementations.
|
||||||
|
|
||||||
|
**Note**: ``__init__`` must set ``self._db`` to an
|
||||||
|
open sqlite3 connection.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
abc.ABC.__init__(self)
|
||||||
|
self._db = None
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def cursor(self):
|
||||||
|
return self._db.cursor()
|
||||||
|
|
||||||
|
def dump_db(self, file):
|
||||||
|
for line in self._db.iterdump():
|
||||||
|
file.write("{}\n".format(line))
|
||||||
|
def commit(self):
|
||||||
|
return self._db.commit()
|
||||||
|
|
||||||
|
class PersistentDatabase(AbstractDatabase):
|
||||||
|
def __init__(self, database_path):
|
||||||
|
AbstractDatabase.__init__(self)
|
||||||
|
self._database_path = database_path
|
||||||
|
self._db = sqlite3.connect(database_path)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self._db.close()
|
||||||
|
|
||||||
|
def initialize_database(db):
|
||||||
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
'''CREATE TABLE FILES(
|
||||||
|
name TEXT,
|
||||||
|
DateTime TEXT,
|
||||||
|
DateTimeDigitized TEXT,
|
||||||
|
DateTimeOriginal TEXT,
|
||||||
|
Model TEXT,
|
||||||
|
Make TEXT,
|
||||||
|
Software TEXT)'''
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
'''CREATE TABLE DIRECTORIES(
|
||||||
|
name TEXT)'''
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
'''CREATE TABLE ASSOCIATIONS(file_id INTEGER,
|
||||||
|
directory_id INTEGER)'''
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
'''CREATE TABLE KV(key TEXT,
|
||||||
|
value TEXT)'''
|
||||||
|
)
|
||||||
|
cursor.execute(
|
||||||
|
'''CREATE TABLE EXTENSIONS_SEARCHED(extension TEXT)'''
|
||||||
|
)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def get_persistent_db(path):
|
||||||
|
if(not os.path.exists(path)):
|
||||||
|
if(not os.path.dirname(path)):
|
||||||
|
db = PersistentDatabase(path)
|
||||||
|
initialize_database(db)
|
||||||
|
return db
|
||||||
|
if(not os.path.exists(os.path.dirname(path))):
|
||||||
|
raise IOError("path '{}' does not exist".format(os.path.dirname(path)))
|
||||||
|
return PersistentDatabase(path)
|
|
@ -63,6 +63,7 @@ def order(db, path_specifier):
|
||||||
path_id = get_path_id(db, this_path)
|
path_id = get_path_id(db, this_path)
|
||||||
|
|
||||||
cursor.execute("INSERT INTO ASSOCIATIONS(file_id, directory_id) VALUES(?, ?)", (rowid, path_id))
|
cursor.execute("INSERT INTO ASSOCIATIONS(file_id, directory_id) VALUES(?, ?)", (rowid, path_id))
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -76,5 +77,6 @@ def get_path_id(db, path):
|
||||||
return result[0]
|
return result[0]
|
||||||
|
|
||||||
cursor.execute("INSERT INTO DIRECTORIES(name) VALUES(?)", (path,))
|
cursor.execute("INSERT INTO DIRECTORIES(name) VALUES(?)", (path,))
|
||||||
|
db.commit()
|
||||||
return cursor.lastrowid
|
return cursor.lastrowid
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ def findall_this_directory(directory, files, extensions, db, stop_on_error):
|
||||||
if(filename.split(".")[-1] in extensions):
|
if(filename.split(".")[-1] in extensions):
|
||||||
filename = os.path.join(directory, filename)
|
filename = os.path.join(directory, filename)
|
||||||
insert_file_into_db(filename, db, stop_on_error)
|
insert_file_into_db(filename, db, stop_on_error)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
|
||||||
def insert_file_into_db(filename, db, stop_on_error):
|
def insert_file_into_db(filename, db, stop_on_error):
|
||||||
|
@ -47,8 +48,21 @@ def insert_file_into_db(filename, db, stop_on_error):
|
||||||
, data)
|
, data)
|
||||||
|
|
||||||
def findall(directory, walk, extensions, db, stop_on_error):
|
def findall(directory, walk, extensions, db, stop_on_error):
|
||||||
|
cursor = db.cursor()
|
||||||
|
cursor.execute("SELECT extension FROM EXTENSIONS_SEARCHED")
|
||||||
|
ext_already_searched = {i[0] for i in cursor.fetchall()}
|
||||||
|
ext_to_search = set(extensions) - ext_already_searched
|
||||||
|
ext_omit = set(extensions) - ext_to_search
|
||||||
|
|
||||||
|
if(ext_omit):
|
||||||
|
module_logger.warn("Omitting the extensions {} as they are already in the database.".format(ext_omit))
|
||||||
|
|
||||||
|
extensions = list(ext_to_search)
|
||||||
for dir_, paths, files in os.walk(directory):
|
for dir_, paths, files in os.walk(directory):
|
||||||
findall_this_directory(dir_, files, extensions, db, stop_on_error)
|
findall_this_directory(dir_, files, extensions, db, stop_on_error)
|
||||||
if(not walk):
|
if(not walk):
|
||||||
break
|
break
|
||||||
|
|
||||||
|
cursor.executemany("INSERT INTO EXTENSIONS_SEARCHED(extension) VALUES(?)", [(i,) for i in extensions])
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,9 @@ import sqlite3
|
||||||
import tempfile
|
import tempfile
|
||||||
import abc
|
import abc
|
||||||
|
|
||||||
|
from .db import AbstractDatabase, initialize_database
|
||||||
|
|
||||||
|
|
||||||
def _open_db_mem():
|
def _open_db_mem():
|
||||||
return (sqlite3.connect(":memory:"), None)
|
return (sqlite3.connect(":memory:"), None)
|
||||||
|
|
||||||
|
@ -22,27 +25,8 @@ def _open_db_disk():
|
||||||
db = sqlite3.connect(file.name)
|
db = sqlite3.connect(file.name)
|
||||||
return (db, file)
|
return (db, file)
|
||||||
|
|
||||||
class AbstractTemporaryDatabase(abc.ABC):
|
class AbstractTemporaryDatabase(AbstractDatabase):
|
||||||
"""
|
pass
|
||||||
Abstract base class for all ``TemporaryDatabase``
|
|
||||||
implementations.
|
|
||||||
|
|
||||||
**Note**: ``__init__`` must set ``self._db`` to an
|
|
||||||
open sqlite3 connection.
|
|
||||||
"""
|
|
||||||
def __init__(self):
|
|
||||||
abc.ABC.__init__(self)
|
|
||||||
self._db = None
|
|
||||||
@abc.abstractmethod
|
|
||||||
def close(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def cursor(self):
|
|
||||||
return self._db.cursor()
|
|
||||||
|
|
||||||
def dump_db(self, file):
|
|
||||||
for line in self._db.iterdump():
|
|
||||||
file.write("{}\n".format(line))
|
|
||||||
|
|
||||||
|
|
||||||
class MemoryTemporaryDatabase(AbstractTemporaryDatabase):
|
class MemoryTemporaryDatabase(AbstractTemporaryDatabase):
|
||||||
|
@ -80,33 +64,6 @@ def get_temporary_db(type_):
|
||||||
|
|
||||||
impl = implementations[type_]
|
impl = implementations[type_]
|
||||||
instance = impl()
|
instance = impl()
|
||||||
|
initialize_database(instance)
|
||||||
cursor = instance.cursor()
|
|
||||||
|
|
||||||
cursor.execute(
|
|
||||||
'''CREATE TABLE FILES(
|
|
||||||
name TEXT,
|
|
||||||
DateTime TEXT,
|
|
||||||
DateTimeDigitized TEXT,
|
|
||||||
DateTimeOriginal TEXT,
|
|
||||||
Model TEXT,
|
|
||||||
Make TEXT,
|
|
||||||
Software TEXT)'''
|
|
||||||
)
|
|
||||||
|
|
||||||
cursor.execute(
|
|
||||||
'''CREATE TABLE DIRECTORIES(
|
|
||||||
name TEXT)'''
|
|
||||||
)
|
|
||||||
|
|
||||||
cursor.execute(
|
|
||||||
'''CREATE TABLE ASSOCIATIONS(file_id INTEGER,
|
|
||||||
directory_id INTEGER)'''
|
|
||||||
)
|
|
||||||
|
|
||||||
cursor.execute(
|
|
||||||
'''CREATE TABLE KV(key TEXT,
|
|
||||||
value TEXT)'''
|
|
||||||
)
|
|
||||||
|
|
||||||
return instance
|
return instance
|
||||||
|
|
Loading…
Reference in New Issue
Block a user