diff --git a/doc/qubes-storage.rst b/doc/qubes-storage.rst index e12ff88a..3d9fafa6 100644 --- a/doc/qubes-storage.rst +++ b/doc/qubes-storage.rst @@ -30,11 +30,12 @@ Storage pool concept Storage pool is responsible for managing its volumes. Qubes have defined storage pool driver API, allowing to put domains storage in various places. By -default two drivers are provided: :py:class:`qubes.storage.file.FilePool` -(named `file`) and :py:class:`qubes.storage.lvm.ThinPool` (named `lvm_thin`). +default three drivers are provided: :py:class:`qubes.storage.file.FilePool` +(named `file`), :py:class:`qubes.storage.reflink.ReflinkPool` (named +`file-reflink`), and :py:class:`qubes.storage.lvm.ThinPool` (named `lvm_thin`). But the API allow to implement variety of other drivers (like additionally encrypted storage, external disk, drivers using special features of some -filesystems like btrfs, etc). +filesystems, etc). Most of storage API focus on storage volumes. Each volume have at least those properties: diff --git a/qubes/app.py b/qubes/app.py index 58129aad..a993c94d 100644 --- a/qubes/app.py +++ b/qubes/app.py @@ -628,7 +628,7 @@ def _default_pool(app): # not a thin volume? look for file pools for pool in app.pools.values(): - if pool.config.get('driver', None) != 'file': + if pool.config.get('driver', None) not in ('file', 'file-reflink'): continue if pool.config['dir_path'] == qubes.config.qubes_base_dir: return pool diff --git a/qubes/storage/reflink.py b/qubes/storage/reflink.py new file mode 100644 index 00000000..00d17d00 --- /dev/null +++ b/qubes/storage/reflink.py @@ -0,0 +1,423 @@ +# +# The Qubes OS Project, https://www.qubes-os.org/ +# +# Copyright (C) 2018 Rusty Bird +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see . +# + +''' Driver for handling VM images as files, without any device-mapper + involvement. A reflink-capable filesystem is strongly recommended, + but not required. +''' + +import collections +import errno +import fcntl +import glob +import logging +import os +import re +import subprocess +import tempfile +from contextlib import contextmanager, suppress + +import qubes.storage + +BLKSIZE = 512 +FICLONE = 1074041865 # see ioctl_ficlone manpage +LOGGER = logging.getLogger('qube.storage.reflink') + + +class ReflinkPool(qubes.storage.Pool): + driver = 'file-reflink' + _known_dir_path_prefixes = ['appvms', 'vm-templates'] + + def __init__(self, dir_path, setup_check='yes', revisions_to_keep=1, + **kwargs): + super().__init__(revisions_to_keep=revisions_to_keep, **kwargs) + self._volumes = {} + self.dir_path = os.path.abspath(dir_path) + self.setup_check = qubes.property.bool(None, None, setup_check) + + def setup(self): + created = _make_dir(self.dir_path) + if self.setup_check and not is_reflink_supported(self.dir_path): + if created: + _remove_empty_dir(self.dir_path) + raise qubes.storage.StoragePoolException( + 'The filesystem for {!r} does not support reflinks. If you' + ' can live with VM startup delays and wasted disk space, pass' + ' the "setup_check=no" option.'.format(self.dir_path)) + for dir_path_prefix in self._known_dir_path_prefixes: + _make_dir(os.path.join(self.dir_path, dir_path_prefix)) + return self + + def init_volume(self, vm, volume_config): + # Fail closed on any strange VM dir_path_prefix, just in case + # /etc/udev/rules/00-qubes-ignore-devices.rules needs updating + assert vm.dir_path_prefix in self._known_dir_path_prefixes, \ + 'Unknown dir_path_prefix {!r}'.format(vm.dir_path_prefix) + + volume_config['pool'] = self + if 'revisions_to_keep' not in volume_config: + volume_config['revisions_to_keep'] = self.revisions_to_keep + if 'vid' not in volume_config: + volume_config['vid'] = os.path.join(vm.dir_path_prefix, vm.name, + volume_config['name']) + volume = ReflinkVolume(**volume_config) + self._volumes[volume_config['vid']] = volume + return volume + + def list_volumes(self): + return list(self._volumes.values()) + + def get_volume(self, vid): + return self._volumes[vid] + + def destroy(self): + pass + + @property + def config(self): + return { + 'name': self.name, + 'dir_path': self.dir_path, + 'driver': ReflinkPool.driver, + 'revisions_to_keep': self.revisions_to_keep + } + + @property + def size(self): + statvfs = os.statvfs(self.dir_path) + return statvfs.f_frsize * statvfs.f_blocks + + @property + def usage(self): + statvfs = os.statvfs(self.dir_path) + return statvfs.f_frsize * (statvfs.f_blocks - statvfs.f_bfree) + + +class ReflinkVolume(qubes.storage.Volume): + def create(self): + if self.save_on_stop and not self.snap_on_start: + _create_sparse_file(self._path_clean, self.size) + return self + + def verify(self): + if self.snap_on_start: + # pylint: disable=protected-access + img = self.source._path_clean + elif self.save_on_stop: + img = self._path_clean + else: + img = None + + if img is None or os.path.exists(img): + return True + raise qubes.storage.StoragePoolException( + 'Missing image file {!r} for volume {!s}'.format(img, self.vid)) + + def remove(self): + ''' Drop volume object from pool; remove volume images from + oldest to newest; remove empty VM directory. + ''' + with suppress(KeyError): + # pylint: disable=protected-access + del self.pool._volumes[self] + + self._prune_revisions(keep=0) + _remove_file(self._path_clean) + _remove_file(self._path_dirty) + + try: + _remove_empty_dir(os.path.dirname(self._path_dirty)) + except OSError as ex: + if ex.errno is not errno.ENOTEMPTY: + raise + + return self + + def is_outdated(self): + if self.snap_on_start: + with suppress(FileNotFoundError): + # pylint: disable=protected-access + return (os.path.getmtime(self.source._path_clean) > + os.path.getmtime(self._path_clean)) + return False + + def is_dirty(self): + return self.save_on_stop and os.path.exists(self._path_dirty) + + def start(self): + if self.snap_on_start: + # pylint: disable=protected-access + _copy_file(self.source._path_clean, self._path_clean) + if self.is_dirty(): # implies self.save_on_stop + return self + if self.save_on_stop or self.snap_on_start: + _copy_file(self._path_clean, self._path_dirty) + else: + _create_sparse_file(self._path_dirty, self.size) + return self + + def stop(self): + if self.save_on_stop: + self._commit() + else: + _remove_file(self._path_dirty) + if self.snap_on_start: + _remove_file(self._path_clean) + return self + + def _commit(self): + self._add_revision() + self._prune_revisions() + _rename_file(self._path_dirty, self._path_clean) + + def _add_revision(self): + if self.revisions_to_keep is 0: + return + if _get_file_disk_usage(self._path_clean) is 0: + return + ctime = os.path.getctime(self._path_clean) + revision = qubes.storage.isodate(int(ctime)) + 'Z' + _copy_file(self._path_clean, self._path_revision(revision)) + + def _prune_revisions(self, keep=None): + if keep is None: + keep = self.revisions_to_keep + # pylint: disable=invalid-unary-operand-type + for revision in list(self.revisions.keys())[:(-keep) or None]: + _remove_file(self._path_revision(revision)) + + def revert(self, revision=None): + if revision is None: + revision = list(self.revisions.keys())[-1] + elif not os.path.exists(self._path_revision(revision)): + raise qubes.storage.StoragePoolException( + 'Missing revision {!r} for volume {!s}'.format( + revision, self.vid)) + self._add_revision() + _rename_file(self._path_revision(revision), self._path_clean) + return self + + def resize(self, size): + ''' Expand a read-write volume image; notify any corresponding + loop devices of the size change. + ''' + if not self.rw: + raise qubes.storage.StoragePoolException( + 'Cannot resize: {!s} is read-only'.format(self.vid)) + + if size < self.size: + raise qubes.storage.StoragePoolException( + 'For your own safety, shrinking of {!s} is disabled.' + ' If you really know what you are doing,' + ' use "truncate" manually.'.format(self.vid)) + + try: # assume volume is not (cleanly) stopped ... + _resize_file(self._path_dirty, size) + except FileNotFoundError: # ... but it actually is. + _resize_file(self._path_clean, size) + + self.size = size + + # resize any corresponding loop devices + out = _cmd('losetup', '--associated', self._path_dirty) + for match in re.finditer(br'^(/dev/loop[0-9]+): ', out, re.MULTILINE): + loop_dev = match.group(1).decode('ascii') + _cmd('losetup', '--set-capacity', loop_dev) + + return self + + def _require_save_on_stop(self, method_name): + if not self.save_on_stop: + raise NotImplementedError( + 'Cannot {!s}: {!s} is not save_on_stop'.format( + method_name, self.vid)) + + def export(self): + self._require_save_on_stop('export') + return self._path_clean + + def import_data(self): + self._require_save_on_stop('import_data') + _create_sparse_file(self._path_dirty, self.size) + return self._path_dirty + + def import_data_end(self, success): + if success: + self._commit() + else: + _remove_file(self._path_dirty) + return self + + def import_volume(self, src_volume): + self._require_save_on_stop('import_volume') + try: + _copy_file(src_volume.export(), self._path_dirty) + except: + self.import_data_end(False) + raise + self.import_data_end(True) + return self + + def _path_revision(self, revision): + return self._path_clean + '@' + revision + + @property + def _path_clean(self): + return os.path.join(self.pool.dir_path, self.vid + '.img') + + @property + def _path_dirty(self): + return os.path.join(self.pool.dir_path, self.vid + '-dirty.img') + + @property + def path(self): + return self._path_dirty + + @property + def revisions(self): + revision_to_timestamp = collections.OrderedDict() + prefix = self._path_revision('') + for filename in sorted(glob.glob(glob.escape(prefix) + '*Z')): + revision = filename[len(prefix):] + timestamp = revision[:-1] + revision_to_timestamp[revision] = timestamp + return revision_to_timestamp + + @property + def usage(self): + ''' Return volume disk usage from the VM's perspective. It is + usually much lower from the host's perspective due to CoW. + ''' + with suppress(FileNotFoundError): + return _get_file_disk_usage(self._path_dirty) + with suppress(FileNotFoundError): + return _get_file_disk_usage(self._path_clean) + return 0 + + +@contextmanager +def _replace_file(dst): + ''' Yield a tempfile whose name starts with dst, creating the last + directory component if necessary. If the block does not raise + an exception, flush+fsync the tempfile and rename it to dst. + ''' + tmp_dir, prefix = os.path.split(dst + '~') + _make_dir(tmp_dir) + tmp = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix=prefix, delete=False) + try: + yield tmp + tmp.flush() + os.fsync(tmp.fileno()) + tmp.close() + _rename_file(tmp.name, dst) + except: + tmp.close() + _remove_file(tmp.name) + raise + +def _get_file_disk_usage(path): + ''' Return real disk usage (not logical file size) of a file. ''' + return os.stat(path).st_blocks * BLKSIZE + +def _fsync_dir(path): + dir_fd = os.open(path, os.O_RDONLY | os.O_DIRECTORY) + try: + os.fsync(dir_fd) + finally: + os.close(dir_fd) + +def _make_dir(path): + ''' mkdir path, ignoring FileExistsError; return whether we + created it. + ''' + with suppress(FileExistsError): + os.mkdir(path) + _fsync_dir(os.path.dirname(path)) + LOGGER.info('Created directory: %s', path) + return True + return False + +def _remove_file(path): + with suppress(FileNotFoundError): + os.remove(path) + _fsync_dir(os.path.dirname(path)) + LOGGER.info('Removed file: %s', path) + +def _remove_empty_dir(path): + with suppress(FileNotFoundError): + os.rmdir(path) + _fsync_dir(os.path.dirname(path)) + LOGGER.info('Removed empty directory: %s', path) + +def _rename_file(src, dst): + os.rename(src, dst) + dst_dir = os.path.dirname(dst) + src_dir = os.path.dirname(src) + _fsync_dir(dst_dir) + if src_dir != dst_dir: + _fsync_dir(src_dir) + LOGGER.info('Renamed file: %s -> %s', src, dst) + +def _resize_file(path, size): + ''' Resize an existing file. ''' + with open(path, 'rb+') as file: + file.truncate(size) + +def _create_sparse_file(path, size): + ''' Create an empty sparse file. ''' + with _replace_file(path) as tmp: + tmp.truncate(size) + LOGGER.info('Created sparse file: %s', tmp.name) + +def _copy_file(src, dst): + ''' Copy src to dst as a reflink if possible, sparse if not. ''' + if not os.path.exists(src): + raise FileNotFoundError(src) + with _replace_file(dst) as tmp: + LOGGER.info('Copying file: %s -> %s', src, tmp.name) + _cmd('cp', '--sparse=always', '--reflink=auto', src, tmp.name) + +def _cmd(*args): + ''' Run command until finished; return stdout (as bytes) if it + exited 0. Otherwise, raise a detailed StoragePoolException. + ''' + try: + return subprocess.run(args, check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE).stdout + except subprocess.CalledProcessError as ex: + msg = '{!s} err={!r} out={!r}'.format(ex, ex.stderr, ex.stdout) + raise qubes.storage.StoragePoolException(msg) from ex + +def is_reflink_supported(dst_dir, src_dir=None): + ''' Return whether destination directory supports reflink copies + from source directory. (A temporary file is created in each + directory, using O_TMPFILE if possible.) + ''' + if src_dir is None: + src_dir = dst_dir + dst = tempfile.TemporaryFile(dir=dst_dir) + src = tempfile.TemporaryFile(dir=src_dir) + src.write(b'foo') # don't let any filesystem get clever with empty files + + try: + fcntl.ioctl(dst.fileno(), FICLONE, src.fileno()) + return True + except OSError: + return False diff --git a/qubes/tests/storage.py b/qubes/tests/storage.py index 8a65c5ba..1af72a08 100644 --- a/qubes/tests/storage.py +++ b/qubes/tests/storage.py @@ -101,8 +101,10 @@ class TC_00_Pool(SystemTestCase): self.app.get_pool('foo-bar') def test_001_all_pool_drivers(self): - """ The only predefined pool driver is file """ - self.assertCountEqual(['linux-kernel', 'lvm_thin', 'file'], pool_drivers()) + """ Expect all our pool drivers (and only them) """ + self.assertCountEqual( + ['linux-kernel', 'lvm_thin', 'file', 'file-reflink'], + pool_drivers()) def test_002_get_pool_klass(self): """ Expect the default pool to be `FilePool` """ diff --git a/rpm_spec/core-dom0.spec b/rpm_spec/core-dom0.spec index fbd74510..8bc234ab 100644 --- a/rpm_spec/core-dom0.spec +++ b/rpm_spec/core-dom0.spec @@ -269,6 +269,7 @@ fi %{python3_sitelib}/qubes/storage/__pycache__/* %{python3_sitelib}/qubes/storage/__init__.py %{python3_sitelib}/qubes/storage/file.py +%{python3_sitelib}/qubes/storage/reflink.py %{python3_sitelib}/qubes/storage/kernels.py %{python3_sitelib}/qubes/storage/lvm.py diff --git a/setup.py b/setup.py index 06e89cd0..1f3ae9ce 100644 --- a/setup.py +++ b/setup.py @@ -82,12 +82,14 @@ if __name__ == '__main__': ], 'qubes.storage': [ 'file = qubes.storage.file:FilePool', + 'file-reflink = qubes.storage.reflink:ReflinkPool', 'linux-kernel = qubes.storage.kernels:LinuxKernel', 'lvm_thin = qubes.storage.lvm:ThinPool', ], 'qubes.tests.storage': [ 'test = qubes.tests.storage:TestPool', 'file = qubes.storage.file:FilePool', + 'file-reflink = qubes.storage.reflink:ReflinkPool', 'linux-kernel = qubes.storage.kernels:LinuxKernel', 'lvm_thin = qubes.storage.lvm:ThinPool', ],