Merge remote-tracking branch 'qubesos/pr/188'

* qubesos/pr/188:
  file-reflink, a storage driver optimized for CoW filesystems
  Make AppVM/DispVM root volume rw to avoid CoW-on-CoW
This commit is contained in:
Marek Marczykowski-Górecki 2018-02-13 05:20:52 +01:00
commit 209af07fd0
No known key found for this signature in database
GPG Key ID: 063938BA42CFA724
9 changed files with 446 additions and 13 deletions

View File

@ -9,14 +9,18 @@ possible to register additional 3rd-party drivers.
Domain's storage volumes:
- `root` - this is where operating system is installed. The volume is
available read-write to :py:class:`~qubes.vm.templatevm.TemplateVM` and
:py:class:`~qubes.vm.standalonevm.StandaloneVM`, and read-only to others
(:py:class:`~qubes.vm.appvm.AppVM` and :py:class:`~qubes.vm.dispvm.DispVM`).
available read-write to all domain classes. It could be made read-only for
:py:class:`~qubes.vm.appvm.AppVM` and :py:class:`~qubes.vm.dispvm.DispVM` to
implement an untrusted storage domain in the future, but doing so will cause
such VMs to set up a device-mapper based copy-on-write layer that redirects
writes to the `volatile` volume. Whose storage driver may already do CoW,
leading to an inefficient CoW-on-CoW setup. For this reason, `root` is
currently read-write in all cases.
- `private` - this is where domain's data live. The volume is available
read-write to all domain classes (including :py:class:`~qubes.vm.dispvm.DispVM`,
but data written there is discarded on domain shutdown).
- `volatile` - this is used for any data that do not to persist. This include
swap, copy-on-write layer for `root` volume etc.
swap, copy-on-write layer for a future read-only `root` volume etc.
- `kernel` - domain boot files - operating system kernel, initial ramdisk,
kernel modules etc. This volume is provided read-only and should be provided by
a storage pool respecting :py:attr:`qubes.vm.qubesvm.QubesVM.kernel` property.
@ -26,11 +30,12 @@ Storage pool concept
Storage pool is responsible for managing its volumes. Qubes have defined
storage pool driver API, allowing to put domains storage in various places. By
default two drivers are provided: :py:class:`qubes.storage.file.FilePool`
(named `file`) and :py:class:`qubes.storage.lvm.ThinPool` (named `lvm_thin`).
default three drivers are provided: :py:class:`qubes.storage.file.FilePool`
(named `file`), :py:class:`qubes.storage.reflink.ReflinkPool` (named
`file-reflink`), and :py:class:`qubes.storage.lvm.ThinPool` (named `lvm_thin`).
But the API allow to implement variety of other drivers (like additionally
encrypted storage, external disk, drivers using special features of some
filesystems like btrfs, etc).
filesystems, etc).
Most of storage API focus on storage volumes. Each volume have at least those
properties:

View File

@ -628,7 +628,7 @@ def _default_pool(app):
# not a thin volume? look for file pools
for pool in app.pools.values():
if pool.config.get('driver', None) != 'file':
if pool.config.get('driver', None) not in ('file', 'file-reflink'):
continue
if pool.config['dir_path'] == qubes.config.qubes_base_dir:
return pool

423
qubes/storage/reflink.py Normal file
View File

@ -0,0 +1,423 @@
#
# The Qubes OS Project, https://www.qubes-os.org/
#
# Copyright (C) 2018 Rusty Bird <rustybird@net-c.com>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <https://www.gnu.org/licenses/>.
#
''' Driver for handling VM images as files, without any device-mapper
involvement. A reflink-capable filesystem is strongly recommended,
but not required.
'''
import collections
import errno
import fcntl
import glob
import logging
import os
import re
import subprocess
import tempfile
from contextlib import contextmanager, suppress
import qubes.storage
BLKSIZE = 512
FICLONE = 1074041865 # see ioctl_ficlone manpage
LOGGER = logging.getLogger('qube.storage.reflink')
class ReflinkPool(qubes.storage.Pool):
driver = 'file-reflink'
_known_dir_path_prefixes = ['appvms', 'vm-templates']
def __init__(self, dir_path, setup_check='yes', revisions_to_keep=1,
**kwargs):
super().__init__(revisions_to_keep=revisions_to_keep, **kwargs)
self._volumes = {}
self.dir_path = os.path.abspath(dir_path)
self.setup_check = qubes.property.bool(None, None, setup_check)
def setup(self):
created = _make_dir(self.dir_path)
if self.setup_check and not is_reflink_supported(self.dir_path):
if created:
_remove_empty_dir(self.dir_path)
raise qubes.storage.StoragePoolException(
'The filesystem for {!r} does not support reflinks. If you'
' can live with VM startup delays and wasted disk space, pass'
' the "setup_check=no" option.'.format(self.dir_path))
for dir_path_prefix in self._known_dir_path_prefixes:
_make_dir(os.path.join(self.dir_path, dir_path_prefix))
return self
def init_volume(self, vm, volume_config):
# Fail closed on any strange VM dir_path_prefix, just in case
# /etc/udev/rules/00-qubes-ignore-devices.rules needs updating
assert vm.dir_path_prefix in self._known_dir_path_prefixes, \
'Unknown dir_path_prefix {!r}'.format(vm.dir_path_prefix)
volume_config['pool'] = self
if 'revisions_to_keep' not in volume_config:
volume_config['revisions_to_keep'] = self.revisions_to_keep
if 'vid' not in volume_config:
volume_config['vid'] = os.path.join(vm.dir_path_prefix, vm.name,
volume_config['name'])
volume = ReflinkVolume(**volume_config)
self._volumes[volume_config['vid']] = volume
return volume
def list_volumes(self):
return list(self._volumes.values())
def get_volume(self, vid):
return self._volumes[vid]
def destroy(self):
pass
@property
def config(self):
return {
'name': self.name,
'dir_path': self.dir_path,
'driver': ReflinkPool.driver,
'revisions_to_keep': self.revisions_to_keep
}
@property
def size(self):
statvfs = os.statvfs(self.dir_path)
return statvfs.f_frsize * statvfs.f_blocks
@property
def usage(self):
statvfs = os.statvfs(self.dir_path)
return statvfs.f_frsize * (statvfs.f_blocks - statvfs.f_bfree)
class ReflinkVolume(qubes.storage.Volume):
def create(self):
if self.save_on_stop and not self.snap_on_start:
_create_sparse_file(self._path_clean, self.size)
return self
def verify(self):
if self.snap_on_start:
# pylint: disable=protected-access
img = self.source._path_clean
elif self.save_on_stop:
img = self._path_clean
else:
img = None
if img is None or os.path.exists(img):
return True
raise qubes.storage.StoragePoolException(
'Missing image file {!r} for volume {!s}'.format(img, self.vid))
def remove(self):
''' Drop volume object from pool; remove volume images from
oldest to newest; remove empty VM directory.
'''
with suppress(KeyError):
# pylint: disable=protected-access
del self.pool._volumes[self]
self._prune_revisions(keep=0)
_remove_file(self._path_clean)
_remove_file(self._path_dirty)
try:
_remove_empty_dir(os.path.dirname(self._path_dirty))
except OSError as ex:
if ex.errno is not errno.ENOTEMPTY:
raise
return self
def is_outdated(self):
if self.snap_on_start:
with suppress(FileNotFoundError):
# pylint: disable=protected-access
return (os.path.getmtime(self.source._path_clean) >
os.path.getmtime(self._path_clean))
return False
def is_dirty(self):
return self.save_on_stop and os.path.exists(self._path_dirty)
def start(self):
if self.snap_on_start:
# pylint: disable=protected-access
_copy_file(self.source._path_clean, self._path_clean)
if self.is_dirty(): # implies self.save_on_stop
return self
if self.save_on_stop or self.snap_on_start:
_copy_file(self._path_clean, self._path_dirty)
else:
_create_sparse_file(self._path_dirty, self.size)
return self
def stop(self):
if self.save_on_stop:
self._commit()
else:
_remove_file(self._path_dirty)
if self.snap_on_start:
_remove_file(self._path_clean)
return self
def _commit(self):
self._add_revision()
self._prune_revisions()
_rename_file(self._path_dirty, self._path_clean)
def _add_revision(self):
if self.revisions_to_keep is 0:
return
if _get_file_disk_usage(self._path_clean) is 0:
return
ctime = os.path.getctime(self._path_clean)
revision = qubes.storage.isodate(int(ctime)) + 'Z'
_copy_file(self._path_clean, self._path_revision(revision))
def _prune_revisions(self, keep=None):
if keep is None:
keep = self.revisions_to_keep
# pylint: disable=invalid-unary-operand-type
for revision in list(self.revisions.keys())[:(-keep) or None]:
_remove_file(self._path_revision(revision))
def revert(self, revision=None):
if revision is None:
revision = list(self.revisions.keys())[-1]
elif not os.path.exists(self._path_revision(revision)):
raise qubes.storage.StoragePoolException(
'Missing revision {!r} for volume {!s}'.format(
revision, self.vid))
self._add_revision()
_rename_file(self._path_revision(revision), self._path_clean)
return self
def resize(self, size):
''' Expand a read-write volume image; notify any corresponding
loop devices of the size change.
'''
if not self.rw:
raise qubes.storage.StoragePoolException(
'Cannot resize: {!s} is read-only'.format(self.vid))
if size < self.size:
raise qubes.storage.StoragePoolException(
'For your own safety, shrinking of {!s} is disabled.'
' If you really know what you are doing,'
' use "truncate" manually.'.format(self.vid))
try: # assume volume is not (cleanly) stopped ...
_resize_file(self._path_dirty, size)
except FileNotFoundError: # ... but it actually is.
_resize_file(self._path_clean, size)
self.size = size
# resize any corresponding loop devices
out = _cmd('losetup', '--associated', self._path_dirty)
for match in re.finditer(br'^(/dev/loop[0-9]+): ', out, re.MULTILINE):
loop_dev = match.group(1).decode('ascii')
_cmd('losetup', '--set-capacity', loop_dev)
return self
def _require_save_on_stop(self, method_name):
if not self.save_on_stop:
raise NotImplementedError(
'Cannot {!s}: {!s} is not save_on_stop'.format(
method_name, self.vid))
def export(self):
self._require_save_on_stop('export')
return self._path_clean
def import_data(self):
self._require_save_on_stop('import_data')
_create_sparse_file(self._path_dirty, self.size)
return self._path_dirty
def import_data_end(self, success):
if success:
self._commit()
else:
_remove_file(self._path_dirty)
return self
def import_volume(self, src_volume):
self._require_save_on_stop('import_volume')
try:
_copy_file(src_volume.export(), self._path_dirty)
except:
self.import_data_end(False)
raise
self.import_data_end(True)
return self
def _path_revision(self, revision):
return self._path_clean + '@' + revision
@property
def _path_clean(self):
return os.path.join(self.pool.dir_path, self.vid + '.img')
@property
def _path_dirty(self):
return os.path.join(self.pool.dir_path, self.vid + '-dirty.img')
@property
def path(self):
return self._path_dirty
@property
def revisions(self):
revision_to_timestamp = collections.OrderedDict()
prefix = self._path_revision('')
for filename in sorted(glob.glob(glob.escape(prefix) + '*Z')):
revision = filename[len(prefix):]
timestamp = revision[:-1]
revision_to_timestamp[revision] = timestamp
return revision_to_timestamp
@property
def usage(self):
''' Return volume disk usage from the VM's perspective. It is
usually much lower from the host's perspective due to CoW.
'''
with suppress(FileNotFoundError):
return _get_file_disk_usage(self._path_dirty)
with suppress(FileNotFoundError):
return _get_file_disk_usage(self._path_clean)
return 0
@contextmanager
def _replace_file(dst):
''' Yield a tempfile whose name starts with dst, creating the last
directory component if necessary. If the block does not raise
an exception, flush+fsync the tempfile and rename it to dst.
'''
tmp_dir, prefix = os.path.split(dst + '~')
_make_dir(tmp_dir)
tmp = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix=prefix, delete=False)
try:
yield tmp
tmp.flush()
os.fsync(tmp.fileno())
tmp.close()
_rename_file(tmp.name, dst)
except:
tmp.close()
_remove_file(tmp.name)
raise
def _get_file_disk_usage(path):
''' Return real disk usage (not logical file size) of a file. '''
return os.stat(path).st_blocks * BLKSIZE
def _fsync_dir(path):
dir_fd = os.open(path, os.O_RDONLY | os.O_DIRECTORY)
try:
os.fsync(dir_fd)
finally:
os.close(dir_fd)
def _make_dir(path):
''' mkdir path, ignoring FileExistsError; return whether we
created it.
'''
with suppress(FileExistsError):
os.mkdir(path)
_fsync_dir(os.path.dirname(path))
LOGGER.info('Created directory: %s', path)
return True
return False
def _remove_file(path):
with suppress(FileNotFoundError):
os.remove(path)
_fsync_dir(os.path.dirname(path))
LOGGER.info('Removed file: %s', path)
def _remove_empty_dir(path):
with suppress(FileNotFoundError):
os.rmdir(path)
_fsync_dir(os.path.dirname(path))
LOGGER.info('Removed empty directory: %s', path)
def _rename_file(src, dst):
os.rename(src, dst)
dst_dir = os.path.dirname(dst)
src_dir = os.path.dirname(src)
_fsync_dir(dst_dir)
if src_dir != dst_dir:
_fsync_dir(src_dir)
LOGGER.info('Renamed file: %s -> %s', src, dst)
def _resize_file(path, size):
''' Resize an existing file. '''
with open(path, 'rb+') as file:
file.truncate(size)
def _create_sparse_file(path, size):
''' Create an empty sparse file. '''
with _replace_file(path) as tmp:
tmp.truncate(size)
LOGGER.info('Created sparse file: %s', tmp.name)
def _copy_file(src, dst):
''' Copy src to dst as a reflink if possible, sparse if not. '''
if not os.path.exists(src):
raise FileNotFoundError(src)
with _replace_file(dst) as tmp:
LOGGER.info('Copying file: %s -> %s', src, tmp.name)
_cmd('cp', '--sparse=always', '--reflink=auto', src, tmp.name)
def _cmd(*args):
''' Run command until finished; return stdout (as bytes) if it
exited 0. Otherwise, raise a detailed StoragePoolException.
'''
try:
return subprocess.run(args, check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE).stdout
except subprocess.CalledProcessError as ex:
msg = '{!s} err={!r} out={!r}'.format(ex, ex.stderr, ex.stdout)
raise qubes.storage.StoragePoolException(msg) from ex
def is_reflink_supported(dst_dir, src_dir=None):
''' Return whether destination directory supports reflink copies
from source directory. (A temporary file is created in each
directory, using O_TMPFILE if possible.)
'''
if src_dir is None:
src_dir = dst_dir
dst = tempfile.TemporaryFile(dir=dst_dir)
src = tempfile.TemporaryFile(dir=src_dir)
src.write(b'foo') # don't let any filesystem get clever with empty files
try:
fcntl.ioctl(dst.fileno(), FICLONE, src.fileno())
return True
except OSError:
return False

View File

@ -101,8 +101,10 @@ class TC_00_Pool(SystemTestCase):
self.app.get_pool('foo-bar')
def test_001_all_pool_drivers(self):
""" The only predefined pool driver is file """
self.assertCountEqual(['linux-kernel', 'lvm_thin', 'file'], pool_drivers())
""" Expect all our pool drivers (and only them) """
self.assertCountEqual(
['linux-kernel', 'lvm_thin', 'file', 'file-reflink'],
pool_drivers())
def test_002_get_pool_klass(self):
""" Expect the default pool to be `FilePool` """

View File

@ -274,7 +274,7 @@ class TC_01_FileVolumes(qubes.tests.QubesTestCase):
expected = template_dir + '/root.img:' + \
template_dir + '/root-cow.img:' + \
vm_dir + '/root-cow.img'
self.assertVolumePath(vm, 'root', expected, rw=False)
self.assertVolumePath(vm, 'root', expected, rw=True)
expected = vm_dir + '/private.img:' + \
vm_dir + '/private-cow.img'
self.assertVolumePath(vm, 'private', expected, rw=True)

View File

@ -45,7 +45,7 @@ class AppVM(qubes.vm.qubesvm.QubesVM):
'name': 'root',
'snap_on_start': True,
'save_on_stop': False,
'rw': False,
'rw': True,
'source': None,
},
'private': {

View File

@ -47,7 +47,7 @@ class DispVM(qubes.vm.qubesvm.QubesVM):
'name': 'root',
'snap_on_start': True,
'save_on_stop': False,
'rw': False,
'rw': True,
'source': None,
},
'private': {

View File

@ -269,6 +269,7 @@ fi
%{python3_sitelib}/qubes/storage/__pycache__/*
%{python3_sitelib}/qubes/storage/__init__.py
%{python3_sitelib}/qubes/storage/file.py
%{python3_sitelib}/qubes/storage/reflink.py
%{python3_sitelib}/qubes/storage/kernels.py
%{python3_sitelib}/qubes/storage/lvm.py

View File

@ -82,12 +82,14 @@ if __name__ == '__main__':
],
'qubes.storage': [
'file = qubes.storage.file:FilePool',
'file-reflink = qubes.storage.reflink:ReflinkPool',
'linux-kernel = qubes.storage.kernels:LinuxKernel',
'lvm_thin = qubes.storage.lvm:ThinPool',
],
'qubes.tests.storage': [
'test = qubes.tests.storage:TestPool',
'file = qubes.storage.file:FilePool',
'file-reflink = qubes.storage.reflink:ReflinkPool',
'linux-kernel = qubes.storage.kernels:LinuxKernel',
'lvm_thin = qubes.storage.lvm:ThinPool',
],