From 852762ddfc8891fe4b05716e29c70e2e14e7488b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Fri, 8 Mar 2019 10:54:42 +0100 Subject: [PATCH] initial --- README.rst | 0 bunker/__init__.py | 0 bunker/backends/__init__.py | 0 bunker/files/__init__.py | 0 bunker/files/bunkerfile.py | 146 ++++++++++++++++++++++++++++++++++ bunker/files/tarfile.py | 100 +++++++++++++++++++++++ setup.py | 35 ++++++++ test/test_files_bunkerfile.py | 29 +++++++ test/test_files_tarfile.py | 23 ++++++ 9 files changed, 333 insertions(+) create mode 100644 README.rst create mode 100644 bunker/__init__.py create mode 100644 bunker/backends/__init__.py create mode 100644 bunker/files/__init__.py create mode 100644 bunker/files/bunkerfile.py create mode 100644 bunker/files/tarfile.py create mode 100644 setup.py create mode 100644 test/test_files_bunkerfile.py create mode 100644 test/test_files_tarfile.py diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..e69de29 diff --git a/bunker/__init__.py b/bunker/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bunker/backends/__init__.py b/bunker/backends/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bunker/files/__init__.py b/bunker/files/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bunker/files/bunkerfile.py b/bunker/files/bunkerfile.py new file mode 100644 index 0000000..1bd211d --- /dev/null +++ b/bunker/files/bunkerfile.py @@ -0,0 +1,146 @@ +import tarfile +import sys +import io +import tempfile +import os + +class BunkeredFile(io.RawIOBase): + def __init__(self + , file_ + , name + , size=None + , isvirtual=False + , ismem=False + , bunker=None): + self._file = file_ + self.isvirtual = isvirtual + self.ismem = ismem + + self.name = name + self.size = size + self.bunker = bunker + + + + # Those are just the methods inherited from RawIOBase. + # We need them to operate like a proper file. + def read(self, size=-1): + return self._file.read(size) + def readinto(self, b): + return self._file.readinto(b) + def readall(self): + return self._file.readall() + def write(self, b): + return self._file.write(b) + def close(self, writeback=True): + if(writeback and self.bunker): + self.bunker.writeback_file(self.name) + return self._file.close() + def fileno(self): + return self._file.fileno() + def flush(self): + return self._file.flush() + def isatty(self): + return self._file.isatty() + def readable(self): + return self._file.readable() + def readline(self, size=-1): + return self._file.readline(size) + def readlines(self, hint=-1): + return self._file.readlines(hint) + def seek(self, offset, whence=0): + return self._file.seek(offset, whence) + def seekable(self): + return self._file.seekable() + def tell(self): + return self._file.tell() + def truncate(self, size=None): + return self._file.truncate(size) + def writable(self): + return self._file.writable() + def writelines(self, lines): + return self._file.writelines(lines) + def __del__(self): + del(self._file) + @property + def closed(self): + return self._file.closed + + + # Classmethods to construct new BunkeredFiles. + @classmethod + def from_file(cls, file_, name): + """ + Construct a new BunkeredFile from an existing file on disk. + This is used to bunker files and directories from the disk. + + DO NOT (!) use this to construct BunkeredFiles from BytesIO. + """ + size = None + if(file_.seekable()): + size = file_.seek(0, 2) + file_.seek(0, 0) + return cls(file_, name, size=size, isvirtual=False, ismem=False) + + @classmethod + def empty(cls, name): + """ + Construct a new BunkeredFile that uses BytesIO in the background. + + This is used either when loading data from a remote ressource or for + databases. + """ + file_ = io.BytesIO() + return cls(file_, name, size=None, isvirtual=True, ismem=True) + + @classmethod + def from_BytesIO(cls, bytes_, name): + size = None + if(file_.seekable()): + size = file_.seek(0, 2) + file_.seek(0, 0) + return cls(bytes_, name, size=size, isvirtual=True, ismem=True) + + @classmethod + def from_tar(cls + , tarfile + , tarinfo + , rewriteable_tar_file=None + , max_in_memory_bytes=2**20 + , mktempfile=tempfile.TemporaryFile): + """ + Load the file specified by ``tarinfo`` from ``tarfile``. If the size of the file + is smaller than ``max_in_memory_bytes`` it will be loaded into memory which increases speed + security because an attacker cannot find the file on-disk. + + If the size is greated than ``max_in_memory_bytes`` it will be loaded into a file created by + ``mstemp``. In order to increase security this function should create a file that is hard to find. + Having the file on the disk is a security vulnerability, because while the file is open it can be found. + + Also this method is vulnerable to malformed tar files. + See `the docs of tarfile `_. + """ + if(tarinfo.size > max_in_memory_bytes): + file_ = mktempfile() + ismem = False + else: + file_ = io.BytesIO() + ismem = True + + with tarfile.extractfile(tarinfo) as fin: + print(file_.write(fin.read())) + file_.seek(0, 0) + return cls(file_, tarinfo.name, size=tarinfo.size, ismem=ismem, isvirtual=True, bunker=rewriteable_tar_file) + + def __len__(self): + if(not self._file.seekable()): + return 0 + rewind = self._file.tell() + self._file.seek(0, 2) + length = self._file.tell() + self._file.seek(rewind, 0) + return length + + def rewind(self): + self._file.seek(0, 0) + diff --git a/bunker/files/tarfile.py b/bunker/files/tarfile.py new file mode 100644 index 0000000..34e5776 --- /dev/null +++ b/bunker/files/tarfile.py @@ -0,0 +1,100 @@ +import os +import tarfile +import tempfile + +from .bunkerfile import BunkeredFile + +class RewriteableTarFile(object): + def __init__(self, path): + self._path = path + if(not os.path.exists(path) or not tarfile.is_tarfile(path)): + raise OSError("file {} does not exist or is not a tar file".format(path)) + self._open_files = dict() + + @classmethod + def open(cls, path): + if(not os.path.exists(path)): + tarfile.open(name=path, mode="x").close() + return cls(path) + + def _open_handle(self, mode="r"): + return tarfile.open(name=self._path, mode=mode) + + def get_file(self, membername, max_in_memory_bytes=2**20, mktempfile=tempfile.TemporaryFile): + if(membername in self._open_files): + return self._open_files[membername] + + handle = self._open_handle() + info = handle.getmember(membername) + + file_ = BunkeredFile.from_tar(handle, info, self) + self._open_files[membername] = file_ + return file_ + + def writeback_file(self, membername): + if(not membername in self._open_files): + raise KeyError("cannot find open file") + handle = self._open_handle() + os.unlink(self._path) + + open_file = self._open_files[membername] + open_file.seek(0, 0) + + new_handle = self._open_handle(mode="x") + + for member in handle.getmembers(): + if(member.name == membername): + member.size = len(open_file) + open_file.rewind() + new_handle.addfile(member, open_file) + continue + new_handle.addfile(tarinfo=member, fileobj=handle.extractfile(member)) + new_handle.close() + del(self._open_files[membername]) + + def add_file(self, file_: BunkeredFile): + handle = self._open_handle(mode="a") + tarinfo = tarfile.TarInfo(name=file_.name) + tarinfo.size = len(file_) + file_.rewind() + handle.addfile(tarinfo=tarinfo + , fileobj=file_) + handle.close() + + def delete_file(self, membername): + if(membername in self._open_files): + del(self._open_files[membername]) + + handle = self._open_handle() + os.unlink(self._path) + + new_handle = self._open_handle(mode="x") + + for member in handle.getmembers(): + if(member.name == membername): + continue + new_handle.add_file(member, handle.extractfile(member)) + new_handle.close() + + def close(self): + """ + Writes back all open files. + """ + if(not self._open_files): + return + + # Write back all open files. + handle = self._open_handle() + os.unlink(self._path) + new_handle = self._open_handle(mode="x") + + for member in handle.getmembers(): + if(member.name in self._open_files): + member.size = len(self._open_files[member.name]) + self._open_files[member.name].rewind() + new_handle.addfile(tarinfo=member, fileobj=self._open_files[member.name]) + self._open_files[member.name].close(writeback=False) + continue + new_handle.addfile(member, handle.extractfile(member)) + new_handle.close() + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..1b13c0a --- /dev/null +++ b/setup.py @@ -0,0 +1,35 @@ +# Copyright (c) 2018 Daniel Knüttel # +# # +# This file is part of licor. # +# # +# licor is free software: you can redistribute it and/or modify # +# it under the terms of the GNU Affero General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# licor is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU Affero General Public License # +# along with licor. If not, see . # +# # +# # + +from setuptools import setup, find_packages + +setup( + name = "bunker", + version = "0.0.0", + packages = find_packages(), + author = "Daniel Knüttel", + author_email = "daniel.knuettel@daknuett.eu", + url = "https://daknuett.eu/gitea/daknuett/bunker", + #install_requires = ["docopt"], + description = "A module for encrypted data storage", + long_description = open("README.rst").read(), + + #entry_points = {"console_scripts": ["licor = licor.main:main"]} + ) + diff --git a/test/test_files_bunkerfile.py b/test/test_files_bunkerfile.py new file mode 100644 index 0000000..1b1c80b --- /dev/null +++ b/test/test_files_bunkerfile.py @@ -0,0 +1,29 @@ +import os +import tarfile +from bunker.files.bunkerfile import BunkeredFile + +def test_load_from_tar(tmpdir): + tmpdname = str(tmpdir) + + with open(os.path.join(tmpdname, "a.tx"), "wb") as f: + f.write(b"abcdefg") + with open(os.path.join(tmpdname, "b.tx"), "wb") as f: + f.write(b"foobar") + + f = tarfile.TarFile(os.path.join(tmpdname, "test.tar"), "w") + + f.add(os.path.join(tmpdname, "a.tx")) + f.add(os.path.join(tmpdname, "b.tx")) + + f.close() + + f = tarfile.TarFile(os.path.join(tmpdname, "test.tar")) + + ainfo = f.next() + binfo = f.next() + + a = BunkeredFile.from_tar(f, ainfo) + b = BunkeredFile.from_tar(f, binfo) + + assert a.read() == b"abcdefg" + assert b.read() == b"foobar" diff --git a/test/test_files_tarfile.py b/test/test_files_tarfile.py new file mode 100644 index 0000000..62ff48b --- /dev/null +++ b/test/test_files_tarfile.py @@ -0,0 +1,23 @@ +import os +import tarfile + +from bunker.files.tarfile import RewriteableTarFile +from bunker.files.bunkerfile import BunkeredFile + + +def test_create(tmpdir): + tmpdname = str(tmpdir) + + tf = RewriteableTarFile.open(os.path.join(tmpdname, "test.bunker")) + f = BunkeredFile.empty("__bunker_main__") + tf.add_file(f) + + f = tf.get_file("__bunker_main__") + f.write(b"foobar") + + tf.close() + + tf = RewriteableTarFile.open(os.path.join(tmpdname, "test.bunker")) + f = tf.get_file("__bunker_main__") + + assert f.read() == b"foobar"