diff --git a/ci/pylintrc b/ci/pylintrc index 7583cd8..17f7984 100644 --- a/ci/pylintrc +++ b/ci/pylintrc @@ -8,6 +8,7 @@ disable= bad-continuation, duplicate-code, fixme, + cyclic-import, locally-disabled, locally-enabled diff --git a/qubesadmin/backup/__init__.py b/qubesadmin/backup/__init__.py new file mode 100644 index 0000000..2d935b7 --- /dev/null +++ b/qubesadmin/backup/__init__.py @@ -0,0 +1,1918 @@ +# -*- encoding: utf8 -*- +# +# The Qubes OS Project, http://www.qubes-os.org +# +# Copyright (C) 2017 Marek Marczykowski-Górecki +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with this program; if not, see . + +'''Qubes backup''' + +import collections +import errno +import fcntl +import functools +import grp +import logging +import multiprocessing +from multiprocessing import Queue, Process +import os +import pwd +import re +import shutil +import subprocess +import sys +import tempfile +import termios +import time + +import qubesadmin +import qubesadmin.vm +from qubesadmin.devices import DeviceAssignment +from qubesadmin.exc import QubesException +from qubesadmin.utils import size_to_human + +# must be picklable +QUEUE_FINISHED = "!!!FINISHED" +QUEUE_ERROR = "!!!ERROR" + +HEADER_FILENAME = 'backup-header' +DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc' +# 'scrypt' is not exactly HMAC algorithm, but a tool we use to +# integrity-protect the data +DEFAULT_HMAC_ALGORITHM = 'scrypt' +DEFAULT_COMPRESSION_FILTER = 'gzip' +# Maximum size of error message get from process stderr (including VM process) +MAX_STDERR_BYTES = 1024 +# header + qubes.xml max size +HEADER_QUBES_XML_MAX_SIZE = 1024 * 1024 +# hmac file max size - regardless of backup format version! +HMAC_MAX_SIZE = 4096 + +BLKSIZE = 512 + +_re_alphanum = re.compile(r'^[A-Za-z0-9-]*$') + +class BackupCanceledError(QubesException): + '''Exception raised when backup/restore was cancelled''' + def __init__(self, msg, tmpdir=None): + super(BackupCanceledError, self).__init__(msg) + self.tmpdir = tmpdir + +class BackupHeader(object): + '''Structure describing backup-header file included as the first file in + backup archive + ''' + header_keys = { + 'version': 'version', + 'encrypted': 'encrypted', + 'compressed': 'compressed', + 'compression-filter': 'compression_filter', + 'crypto-algorithm': 'crypto_algorithm', + 'hmac-algorithm': 'hmac_algorithm', + 'backup-id': 'backup_id' + } + bool_options = ['encrypted', 'compressed'] + int_options = ['version'] + + def __init__(self, + header_data=None, + version=None, + encrypted=None, + compressed=None, + compression_filter=None, + hmac_algorithm=None, + crypto_algorithm=None, + backup_id=None): + # repeat the list to help code completion... + self.version = version + self.encrypted = encrypted + self.compressed = compressed + # Options introduced in backup format 3+, which always have a header, + # so no need for fallback in function parameter + self.compression_filter = compression_filter + self.hmac_algorithm = hmac_algorithm + self.crypto_algorithm = crypto_algorithm + self.backup_id = backup_id + + if header_data is not None: + self.load(header_data) + + def load(self, untrusted_header_text): + """Parse backup header file. + + :param untrusted_header_text: header content + :type untrusted_header_text: basestring + + .. warning:: + This function may be exposed to not yet verified header, + so is security critical. + """ + try: + untrusted_header_text = untrusted_header_text.decode('ascii') + except UnicodeDecodeError: + raise QubesException( + "Non-ASCII characters in backup header") + for untrusted_line in untrusted_header_text.splitlines(): + if untrusted_line.count('=') != 1: + raise QubesException("Invalid backup header") + key, value = untrusted_line.strip().split('=', 1) + if not _re_alphanum.match(key): + raise QubesException("Invalid backup header (" + "key)") + if key not in self.header_keys.keys(): + # Ignoring unknown option + continue + if not _re_alphanum.match(value): + raise QubesException("Invalid backup header (" + "value)") + if getattr(self, self.header_keys[key]) is not None: + raise QubesException( + "Duplicated header line: {}".format(key)) + if key in self.bool_options: + value = value.lower() in ["1", "true", "yes"] + elif key in self.int_options: + value = int(value) + setattr(self, self.header_keys[key], value) + + self.validate() + + def validate(self): + '''Validate header data, according to header version''' + if self.version == 1: + # header not really present + pass + elif self.version in [2, 3, 4]: + expected_attrs = ['version', 'encrypted', 'compressed', + 'hmac_algorithm'] + if self.encrypted and self.version < 4: + expected_attrs += ['crypto_algorithm'] + if self.version >= 3 and self.compressed: + expected_attrs += ['compression_filter'] + if self.version >= 4: + expected_attrs += ['backup_id'] + for key in expected_attrs: + if getattr(self, key) is None: + raise QubesException( + "Backup header lack '{}' info".format(key)) + else: + raise QubesException( + "Unsupported backup version {}".format(self.version)) + + def save(self, filename): + '''Save backup header into a file''' + with open(filename, "w") as f_header: + # make sure 'version' is the first key + f_header.write('version={}\n'.format(self.version)) + for key, attr in self.header_keys.items(): + if key == 'version': + continue + if getattr(self, attr) is None: + continue + f_header.write("{!s}={!s}\n".format(key, getattr(self, attr))) + +def launch_proc_with_pty(args, stdin=None, stdout=None, stderr=None, echo=True): + """Similar to pty.fork, but handle stdin/stdout according to parameters + instead of connecting to the pty + + :return tuple (subprocess.Popen, pty_master) + """ + + def set_ctty(ctty_fd, master_fd): + '''Set controlling terminal''' + os.setsid() + os.close(master_fd) + fcntl.ioctl(ctty_fd, termios.TIOCSCTTY, 0) + if not echo: + termios_p = termios.tcgetattr(ctty_fd) + # termios_p.c_lflags + termios_p[3] &= ~termios.ECHO + termios.tcsetattr(ctty_fd, termios.TCSANOW, termios_p) + (pty_master, pty_slave) = os.openpty() + p = subprocess.Popen(args, stdin=stdin, stdout=stdout, + stderr=stderr, + preexec_fn=lambda: set_ctty(pty_slave, pty_master)) + os.close(pty_slave) + return p, open(pty_master, 'wb+', buffering=0) + +def launch_scrypt(action, input_name, output_name, passphrase): + ''' + Launch 'scrypt' process, pass passphrase to it and return + subprocess.Popen object. + + :param action: 'enc' or 'dec' + :param input_name: input path or '-' for stdin + :param output_name: output path or '-' for stdout + :param passphrase: passphrase + :return: subprocess.Popen object + ''' + command_line = ['scrypt', action, input_name, output_name] + (p, pty) = launch_proc_with_pty(command_line, + stdin=subprocess.PIPE if input_name == '-' else None, + stdout=subprocess.PIPE if output_name == '-' else None, + stderr=subprocess.PIPE, + echo=False) + if action == 'enc': + prompts = (b'Please enter passphrase: ', b'Please confirm passphrase: ') + else: + prompts = (b'Please enter passphrase: ',) + for prompt in prompts: + actual_prompt = p.stderr.read(len(prompt)) + if actual_prompt != prompt: + raise QubesException( + 'Unexpected prompt from scrypt: {}'.format(actual_prompt)) + pty.write(passphrase.encode('utf-8') + b'\n') + pty.flush() + # save it here, so garbage collector would not close it (which would kill + # the child) + p.pty = pty + return p + +class ExtractWorker3(Process): + '''Process for handling inner tar layer of backup archive''' + # pylint: disable=too-many-instance-attributes + def __init__(self, queue, base_dir, passphrase, encrypted, + progress_callback, vmproc=None, + compressed=False, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM, + compression_filter=None, verify_only=False, handlers=None): + '''Start inner tar extraction worker + + The purpose of this class is to process files extracted from outer + archive layer and pass to appropriate handlers. Input files are given + through a queue. Insert :py:obj:`QUEUE_FINISHED` or + :py:obj:`QUEUE_ERROR` to end data processing (either cleanly, + or forcefully). + + Handlers are given as a map filename -> (data_func, size_func), + where data_func is called with file-like object to process, + and size_func is called with file size as argument. Note that + data_func and size_func may be called simultaneusly, in a different + processes. + + :param multiprocessing.Queue queue: a queue with filenames to + process; those files needs to be given as full path, inside *base_dir* + :param str base_dir: directory where all files to process live + :param str passphrase: passphrase to decrypt the data + :param bool encrypted: is encryption applied? + :param callable progress_callback: report extraction progress + :param subprocess.Popen vmproc: process extracting outer layer, + given here to monitor + it for failures (when it exits with non-zero exit code, inner layer + processing is stopped) + :param bool compressed: is the data compressed? + :param str crypto_algorithm: encryption algorithm, either `scrypt` or an + algorithm supported by openssl + :param str compression_filter: compression program, `gzip` by default + :param bool verify_only: only verify data integrity, do not extract + :param dict handlers: handlers for actual data + ''' + super(ExtractWorker3, self).__init__() + #: queue with files to extract + self.queue = queue + #: paths on the queue are relative to this dir + self.base_dir = base_dir + #: passphrase to decrypt/authenticate data + self.passphrase = passphrase + #: handlers for files; it should be dict filename -> (data_function, + # size_function), + # where data_function will get file-like object as the only argument and + # might be called in a separate process (multiprocessing.Process), + # and size_function will get file size (when known) in bytes + self.handlers = handlers + #: is the backup encrypted? + self.encrypted = encrypted + #: is the backup compressed? + self.compressed = compressed + #: what crypto algorithm is used for encryption? + self.crypto_algorithm = crypto_algorithm + #: only verify integrity, don't extract anything + self.verify_only = verify_only + #: progress + self.blocks_backedup = 0 + #: inner tar layer extraction (subprocess.Popen instance) + self.tar2_process = None + #: current inner tar archive name + self.tar2_current_file = None + #: call size_func handler for this file when tar report it on stderr + self.adjust_output_size = None + #: decompressor subprocess.Popen instance + self.decompressor_process = None + #: decryptor subprocess.Popen instance + self.decryptor_process = None + #: data import multiprocessing.Process instance + self.import_process = None + #: callback reporting progress to UI + self.progress_callback = progress_callback + #: process (subprocess.Popen instance) feeding the data into + # extraction tool + self.vmproc = vmproc + + self.log = logging.getLogger('qubesadmin.backup.extract') + self.stderr_encoding = sys.stderr.encoding or 'utf-8' + self.tar2_stderr = [] + self.compression_filter = compression_filter + + @staticmethod + def handle_streams(stream_in, streams_out, processes, size_limit=None, + progress_callback=None): + ''' + Copy stream_in to all streams_out and monitor all mentioned processes. + If any of them terminate with non-zero code, interrupt the process. Copy + at most `size_limit` data (if given). + + :param stream_in: file-like object to read data from + :param streams_out: dict of file-like objects to write data to + :param processes: dict of subprocess.Popen objects to monitor + :param size_limit: int maximum data amount to process + :param progress_callback: callable function to report progress, will be + given copied data size (it should accumulate internally) + :return: failed process name, failed stream name, "size_limit" or None ( + no error) + ''' + buffer_size = 409600 + bytes_copied = 0 + while True: + if size_limit: + to_copy = min(buffer_size, size_limit - bytes_copied) + if to_copy <= 0: + return "size_limit" + else: + to_copy = buffer_size + buf = stream_in.read(to_copy) + if not buf: + # done + return None + + if callable(progress_callback): + progress_callback(len(buf)) + for name, stream in streams_out.items(): + if stream is None: + continue + try: + stream.write(buf) + except IOError: + return name + bytes_copied += len(buf) + + for name, proc in processes.items(): + if proc is None: + continue + if isinstance(proc, Process): + if not proc.is_alive() and proc.exitcode != 0: + return name + elif proc.poll(): + return name + + def collect_tar_output(self): + '''Retrieve tar stderr and handle it appropriately + + Log errors, process file size if requested. + This use :py:attr:`tar2_process`. + ''' + if not self.tar2_process.stderr: + return + + if self.tar2_process.poll() is None: + try: + new_lines = self.tar2_process.stderr \ + .read(MAX_STDERR_BYTES).splitlines() + except IOError as e: + if e.errno == errno.EAGAIN: + return + else: + raise + else: + new_lines = self.tar2_process.stderr.readlines() + + new_lines = [x.decode(self.stderr_encoding) for x in new_lines] + + msg_re = re.compile(r".*#[0-9].*restore_pipe") + debug_msg = [msg for msg in new_lines if msg_re.match(msg)] + self.log.debug('tar2_stderr: %s', '\n'.join(debug_msg)) + new_lines = [msg for msg in new_lines if not msg_re.match(msg)] + if self.adjust_output_size: + # search for first file size reported by tar after setting + # self.adjust_output_size (so don't look at self.tar2_stderr) + # this is used only when extracting single-file archive, so don't + # bother with checking file name + file_size_re = re.compile(r"^[^ ]+ [^ ]+/[^ ]+ *([0-9]+) .*") + for line in new_lines: + match = file_size_re.match(line) + if match: + file_size = match.groups()[0] + self.adjust_output_size(file_size) + self.adjust_output_size = None + self.tar2_stderr += new_lines + + def run(self): + try: + self.__run__() + except Exception: + # Cleanup children + for process in [self.decompressor_process, + self.decryptor_process, + self.tar2_process]: + if process: + try: + process.terminate() + except OSError: + pass + process.wait() + self.log.exception('ERROR') + raise + + def handle_dir(self, dirname): + ''' Relocate files in given director when it's already extracted + + :param dirname: directory path to handle (relative to backup root), + without trailing slash + ''' + for fname, (data_func, size_func) in self.handlers.items(): + if not fname.startswith(dirname + '/'): + continue + if size_func is not None: + size_func(os.path.getsize(fname)) + with open(fname, 'rb') as input_file: + data_func(input_file) + os.unlink(fname) + shutil.rmtree(dirname) + + def cleanup_tar2(self, wait=True, terminate=False): + '''Cleanup running :py:attr:`tar2_process` + + :param wait: wait for it termination, otherwise method exit early if + process is still running + :param terminate: terminate the process if still running + ''' + if self.tar2_process is None: + return + if terminate: + if self.import_process is not None: + self.tar2_process.terminate() + self.import_process.terminate() + if wait: + self.tar2_process.wait() + if self.import_process is not None: + self.import_process.join() + elif self.tar2_process.poll() is None: + return + self.collect_tar_output() + if self.tar2_process.stderr: + self.tar2_process.stderr.close() + if self.tar2_process.returncode != 0: + self.log.error( + "ERROR: unable to extract files for %s, tar " + "output:\n %s", + self.tar2_current_file, + "\n ".join(self.tar2_stderr)) + else: + # Finished extracting the tar file + # if that was whole-directory archive, handle + # relocated files now + inner_name = self.tar2_current_file.rsplit('.', 1)[0] \ + .replace(self.base_dir + '/', '') + if os.path.basename(inner_name) == '.': + self.handle_dir( + os.path.dirname(inner_name)) + self.tar2_current_file = None + self.adjust_output_size = None + self.tar2_process = None + + @staticmethod + def _data_func_wrapper(close_fds, data_func, data_stream): + '''Close not needed file descriptors, then call data_func( + data_stream). + + This is to prevent holding write end of a pipe in subprocess, + preventing EOF transfer. + ''' + for fd in close_fds: + if fd == data_stream.fileno(): + continue + try: + os.close(fd) + except OSError: + pass + return data_func(data_stream) + + + def __run__(self): + self.log.debug("Started sending thread") + self.log.debug("Moving to dir " + self.base_dir) + os.chdir(self.base_dir) + + filename = None + + input_pipe = None + for filename in iter(self.queue.get, None): + if filename in (QUEUE_FINISHED, QUEUE_ERROR): + break + + assert isinstance(filename, str) + + self.log.debug("Extracting file " + filename) + + if filename.endswith('.000'): + # next file + if self.tar2_process is not None: + input_pipe.close() + self.cleanup_tar2(wait=True, terminate=False) + + inner_name = filename[:-len('.000')].replace( + self.base_dir + '/', '') + redirect_stdout = None + if os.path.basename(inner_name) == '.': + if (inner_name in self.handlers or + any(x.startswith(os.path.dirname(inner_name) + '/') + for x in self.handlers)): + tar2_cmdline = ['tar', + '-%s' % ("t" if self.verify_only else "x"), + inner_name] + else: + # ignore this directory + tar2_cmdline = None + elif inner_name in self.handlers: + tar2_cmdline = ['tar', + '-%svvO' % ("t" if self.verify_only else "x"), + inner_name] + redirect_stdout = subprocess.PIPE + else: + # no handlers for this file, ignore it + tar2_cmdline = None + + if tar2_cmdline is None: + # ignore the file + os.remove(filename) + continue + + if self.compressed: + if self.compression_filter: + tar2_cmdline.insert(-1, + "--use-compress-program=%s" % + self.compression_filter) + else: + tar2_cmdline.insert(-1, "--use-compress-program=%s" % + DEFAULT_COMPRESSION_FILTER) + + self.log.debug("Running command " + str(tar2_cmdline)) + if self.encrypted: + # Start decrypt + self.decryptor_process = subprocess.Popen( + ["openssl", "enc", + "-d", + "-" + self.crypto_algorithm, + "-pass", + "pass:" + self.passphrase], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + + self.tar2_process = subprocess.Popen( + tar2_cmdline, + stdin=self.decryptor_process.stdout, + stdout=redirect_stdout, + stderr=subprocess.PIPE) + self.decryptor_process.stdout.close() + input_pipe = self.decryptor_process.stdin + else: + self.tar2_process = subprocess.Popen( + tar2_cmdline, + stdin=subprocess.PIPE, + stdout=redirect_stdout, + stderr=subprocess.PIPE) + input_pipe = self.tar2_process.stdin + + if inner_name in self.handlers: + assert redirect_stdout is subprocess.PIPE + data_func, size_func = self.handlers[inner_name] + self.import_process = multiprocessing.Process( + target=self._data_func_wrapper, + args=([input_pipe.fileno()], + data_func, self.tar2_process.stdout)) + self.import_process.start() + self.tar2_process.stdout.close() + self.adjust_output_size = size_func + + fcntl.fcntl(self.tar2_process.stderr.fileno(), fcntl.F_SETFL, + fcntl.fcntl(self.tar2_process.stderr.fileno(), + fcntl.F_GETFL) | os.O_NONBLOCK) + self.tar2_stderr = [] + elif not self.tar2_process: + # Extracting of the current archive failed, skip to the next + # archive + os.remove(filename) + continue + else: + (basename, ext) = os.path.splitext(self.tar2_current_file) + previous_chunk_number = int(ext[1:]) + expected_filename = basename + '.%03d' % ( + previous_chunk_number+1) + if expected_filename != filename: + self.cleanup_tar2(wait=True, terminate=True) + self.log.error( + 'Unexpected file in archive: %s, expected %s', + filename, expected_filename) + os.remove(filename) + continue + self.log.debug("Releasing next chunck") + + self.tar2_current_file = filename + + input_file = open(filename, 'rb') + + run_error = self.handle_streams( + input_file, + {'target': input_pipe}, + {'vmproc': self.vmproc, + 'addproc': self.tar2_process, + 'data_import': self.import_process, + 'decryptor': self.decryptor_process, + }, + progress_callback=self.progress_callback) + input_file.close() + if run_error: + if run_error == "target": + self.collect_tar_output() + details = "\n".join(self.tar2_stderr) + else: + details = "%s failed" % run_error + if self.decryptor_process: + self.decryptor_process.terminate() + self.decryptor_process.wait() + self.decryptor_process = None + self.log.error('Error while processing \'%s\': %s', + self.tar2_current_file, details) + self.cleanup_tar2(wait=True, terminate=True) + + # Delete the file as we don't need it anymore + self.log.debug('Removing file %s', filename) + os.remove(filename) + + if self.tar2_process is not None: + input_pipe.close() + if filename == QUEUE_ERROR: + if self.decryptor_process: + self.decryptor_process.terminate() + self.decryptor_process.wait() + self.decryptor_process = None + self.cleanup_tar2(terminate=(filename == QUEUE_ERROR)) + + self.log.debug('Finished extracting thread') + + +def get_supported_hmac_algo(hmac_algorithm=None): + '''Generate a list of supported hmac algorithms + + :param hmac_algorithm: default algorithm, if given, it is placed as a + first element + ''' + # Start with provided default + if hmac_algorithm: + yield hmac_algorithm + if hmac_algorithm != 'scrypt': + yield 'scrypt' + proc = subprocess.Popen(['openssl', 'list-message-digest-algorithms'], + stdout=subprocess.PIPE) + try: + for algo in proc.stdout.readlines(): + algo = algo.decode('ascii') + if '=>' in algo: + continue + yield algo.strip() + finally: + proc.terminate() + proc.wait() + proc.stdout.close() + +class BackupApp(object): + '''Interface for backup collection''' + # pylint: disable=too-few-public-methods + def __init__(self, qubes_xml): + '''Initialize BackupApp object and load qubes.xml into it''' + self.store = qubes_xml + self.domains = {} + self.globals = {} + self.load() + + def load(self): + '''Load qubes.xml''' + raise NotImplementedError + +class BackupVM(object): + '''Interface for a single VM in the backup''' + # pylint: disable=too-few-public-methods + def __init__(self): + '''Initialize empty BackupVM object''' + #: VM class + self.klass = 'AppVM' + #: VM name + self.name = None + #: VM template + self.template = None + #: VM label + self.label = None + #: VM properties + self.properties = {} + #: VM features (key/value), aka services in core2 + self.features = {} + #: VM tags + self.tags = set() + #: VM devices - dict with key=devtype, value=dict of devices ( + # key=ident, value=options) + self.devices = collections.defaultdict(dict) + #: VM path in the backup + self.backup_path = None + #: size of the VM + self.size = 0 + + @property + def included_in_backup(self): + '''Report whether a VM is included in the backup''' + return False + +class BackupRestoreOptions(object): + '''Options for restore operation''' + # pylint: disable=too-few-public-methods + def __init__(self): + #: use default NetVM if the one referenced in backup do not exists on + # the host + self.use_default_netvm = True + #: set NetVM to "none" if the one referenced in backup do not exists + # on the host + self.use_none_netvm = False + #: set template to default if the one referenced in backup do not + # exists on the host + self.use_default_template = True + #: use default kernel if the one referenced in backup do not exists + # on the host + self.use_default_kernel = True + #: restore dom0 home + self.dom0_home = True + #: restore dom0 home even if username is different + self.ignore_username_mismatch = False + #: do not restore data, only verify backup integrity + self.verify_only = False + #: automatically rename VM during restore, when it would conflict + # with existing one + self.rename_conflicting = True + #: list of VM names to exclude + self.exclude = [] + #: restore VMs into selected storage pool + self.override_pool = None + +class BackupRestore(object): + """Usage: + + >>> restore_op = BackupRestore(...) + >>> # adjust restore_op.options here + >>> restore_info = restore_op.get_restore_info() + >>> # manipulate restore_info to select VMs to restore here + >>> restore_op.restore_do(restore_info) + """ + + class VMToRestore(object): + '''Information about a single VM to be restored''' + # pylint: disable=too-few-public-methods + #: VM excluded from restore by user + EXCLUDED = object() + #: VM with such name already exists on the host + ALREADY_EXISTS = object() + #: NetVM used by the VM does not exists on the host + MISSING_NETVM = object() + #: TemplateVM used by the VM does not exists on the host + MISSING_TEMPLATE = object() + #: Kernel used by the VM does not exists on the host + MISSING_KERNEL = object() + + def __init__(self, vm): + assert isinstance(vm, BackupVM) + self.vm = vm + self.name = vm.name + self.subdir = vm.backup_path + self.size = vm.size + self.problems = set() + self.template = vm.template + if vm.properties.get('netvm', None): + self.netvm = vm.properties['netvm'] + else: + self.netvm = None + self.orig_template = None + self.restored_vm = None + + @property + def good_to_go(self): + '''Is the VM ready for restore?''' + return len(self.problems) == 0 + + class Dom0ToRestore(VMToRestore): + '''Information about dom0 home to restore''' + # pylint: disable=too-few-public-methods + #: backup was performed on system with different dom0 username + USERNAME_MISMATCH = object() + + def __init__(self, vm, subdir=None): + super(BackupRestore.Dom0ToRestore, self).__init__(vm) + if subdir: + self.subdir = subdir + self.username = os.path.basename(subdir) + + def __init__(self, app, backup_location, backup_vm, passphrase): + super(BackupRestore, self).__init__() + + #: qubes.Qubes instance + self.app = app + + #: options how the backup should be restored + self.options = BackupRestoreOptions() + + #: VM from which backup should be retrieved + self.backup_vm = backup_vm + if backup_vm and backup_vm.qid == 0: + self.backup_vm = None + + #: backup path, inside VM pointed by :py:attr:`backup_vm` + self.backup_location = backup_location + + #: passphrase protecting backup integrity and optionally decryption + self.passphrase = passphrase + + #: temporary directory used to extract the data before moving to the + # final location + self.tmpdir = tempfile.mkdtemp(prefix="restore", dir="/var/tmp") + + #: list of processes (Popen objects) to kill on cancel + self.processes_to_kill_on_cancel = [] + + #: is the backup operation canceled + self.canceled = False + + #: report restore progress, called with one argument - percents of + # data restored + # FIXME: convert to float [0,1] + self.progress_callback = None + + self.log = logging.getLogger('qubesadmin.backup') + + #: basic information about the backup + self.header_data = self._retrieve_backup_header() + + #: VMs included in the backup + self.backup_app = self._process_qubes_xml() + + def _start_retrieval_process(self, filelist, limit_count, limit_bytes): + """Retrieve backup stream and extract it to :py:attr:`tmpdir` + + :param filelist: list of files to extract; listing directory name + will extract the whole directory; use empty list to extract the whole + archive + :param limit_count: maximum number of files to extract + :param limit_bytes: maximum size of extracted data + :return: a touple of (Popen object of started process, file-like + object for reading extracted files list, file-like object for reading + errors) + """ + + vmproc = None + if self.backup_vm is not None: + # If APPVM, STDOUT is a PIPE + vmproc = self.backup_vm.run_service('qubes.Restore') + vmproc.stdin.write( + (self.backup_location.replace("\r", "").replace("\n", + "") + "\n").encode()) + vmproc.stdin.flush() + + # Send to tar2qfile the VMs that should be extracted + vmproc.stdin.write((" ".join(filelist) + "\n").encode()) + vmproc.stdin.flush() + self.processes_to_kill_on_cancel.append(vmproc) + + backup_stdin = vmproc.stdout + tar1_command = ['/usr/libexec/qubes/qfile-dom0-unpacker', + str(os.getuid()), self.tmpdir, '-v'] + else: + backup_stdin = open(self.backup_location, 'rb') + + tar1_command = ['tar', + '-ixv', + '-C', self.tmpdir] + filelist + + tar1_env = os.environ.copy() + tar1_env['UPDATES_MAX_BYTES'] = str(limit_bytes) + tar1_env['UPDATES_MAX_FILES'] = str(limit_count) + self.log.debug("Run command" + str(tar1_command)) + command = subprocess.Popen( + tar1_command, + stdin=backup_stdin, + stdout=vmproc.stdin if vmproc else subprocess.PIPE, + stderr=subprocess.PIPE, + env=tar1_env) + backup_stdin.close() + self.processes_to_kill_on_cancel.append(command) + + # qfile-dom0-unpacker output filelist on stderr + # and have stdout connected to the VM), while tar output filelist + # on stdout + if self.backup_vm: + filelist_pipe = command.stderr + # let qfile-dom0-unpacker hold the only open FD to the write end of + # pipe, otherwise qrexec-client will not receive EOF when + # qfile-dom0-unpacker terminates + vmproc.stdin.close() + else: + filelist_pipe = command.stdout + + if self.backup_vm: + error_pipe = vmproc.stderr + else: + error_pipe = command.stderr + return command, filelist_pipe, error_pipe + + def _verify_hmac(self, filename, hmacfile, algorithm=None): + '''Verify hmac of a file using given algorithm. + + If algorithm is not specified, use the one from backup header ( + :py:attr:`header_data`). + + Raise :py:exc:`QubesException` on failure, return :py:obj:`True` on + success. + + 'scrypt' algorithm is supported only for header file; hmac file is + encrypted (and integrity protected) version of plain header. + + :param filename: path to file to be verified + :param hmacfile: path to hmac file for *filename* + :param algorithm: override algorithm + ''' + def load_hmac(hmac_text): + '''Parse hmac output by openssl. + + Return just hmac, without filename and other metadata. + ''' + if any(ord(x) not in range(128) for x in hmac_text): + raise QubesException( + "Invalid content of {}".format(hmacfile)) + hmac_text = hmac_text.strip().split("=") + if len(hmac_text) > 1: + hmac_text = hmac_text[1].strip() + else: + raise QubesException( + "ERROR: invalid hmac file content") + + return hmac_text + if algorithm is None: + algorithm = self.header_data.hmac_algorithm + passphrase = self.passphrase.encode('utf-8') + self.log.debug("Verifying file %s", filename) + + if os.stat(os.path.join(self.tmpdir, hmacfile)).st_size > \ + HMAC_MAX_SIZE: + raise QubesException('HMAC file {} too large'.format( + hmacfile)) + + if hmacfile != filename + ".hmac": + raise QubesException( + "ERROR: expected hmac for {}, but got {}". + format(filename, hmacfile)) + + if algorithm == 'scrypt': + # in case of 'scrypt' _verify_hmac is only used for backup header + assert filename == HEADER_FILENAME + self._verify_and_decrypt(hmacfile, HEADER_FILENAME + '.dec') + f_name = os.path.join(self.tmpdir, filename) + with open(f_name, 'rb') as f_one: + with open(f_name + '.dec', 'rb') as f_two: + if f_one.read() != f_two.read(): + raise QubesException( + 'Invalid hmac on {}'.format(filename)) + else: + return True + + with open(os.path.join(self.tmpdir, filename), 'rb') as f_input: + hmac_proc = subprocess.Popen( + ["openssl", "dgst", "-" + algorithm, "-hmac", passphrase], + stdin=f_input, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + hmac_stdout, hmac_stderr = hmac_proc.communicate() + + if hmac_stderr: + raise QubesException( + "ERROR: verify file {0}: {1}".format(filename, hmac_stderr)) + else: + self.log.debug("Loading hmac for file %s", filename) + with open(os.path.join(self.tmpdir, hmacfile), 'r', + encoding='ascii') as f_hmac: + hmac = load_hmac(f_hmac.read()) + + if hmac and load_hmac(hmac_stdout.decode('ascii')) == hmac: + os.unlink(os.path.join(self.tmpdir, hmacfile)) + self.log.debug( + "File verification OK -> Sending file %s", filename) + return True + else: + raise QubesException( + "ERROR: invalid hmac for file {0}: {1}. " + "Is the passphrase correct?". + format(filename, load_hmac(hmac_stdout.decode('ascii')))) + + def _verify_and_decrypt(self, filename, output=None): + '''Handle scrypt-wrapped file + + Decrypt the file, and verify its integrity - both tasks handled by + 'scrypt' tool. Filename (without extension) is also validated. + + :param filename: Input file name (relative to :py:attr:`tmpdir`), + needs to have `.enc` or `.hmac` extension + :param output: Output file name (relative to :py:attr:`tmpdir`), + use :py:obj:`None` to use *filename* without extension + :return: *filename* without extension + ''' + assert filename.endswith('.enc') or filename.endswith('.hmac') + fullname = os.path.join(self.tmpdir, filename) + (origname, _) = os.path.splitext(filename) + if output: + fulloutput = os.path.join(self.tmpdir, output) + else: + fulloutput = os.path.join(self.tmpdir, origname) + if origname == HEADER_FILENAME: + passphrase = u'{filename}!{passphrase}'.format( + filename=origname, + passphrase=self.passphrase) + else: + passphrase = u'{backup_id}!{filename}!{passphrase}'.format( + backup_id=self.header_data.backup_id, + filename=origname, + passphrase=self.passphrase) + try: + p = launch_scrypt('dec', fullname, fulloutput, passphrase) + except OSError as err: + raise QubesException('failed to decrypt {}: {!s}'.format( + fullname, err)) + (_, stderr) = p.communicate() + if hasattr(p, 'pty'): + p.pty.close() + if p.returncode != 0: + os.unlink(fulloutput) + raise QubesException('failed to decrypt {}: {}'.format( + fullname, stderr)) + # encrypted file is no longer needed + os.unlink(fullname) + return origname + + def _retrieve_backup_header_files(self, files, allow_none=False): + '''Retrieve backup header. + + Start retrieval process (possibly involving network access from + another VM). Returns a collection of retrieved file paths. + ''' + (retrieve_proc, filelist_pipe, error_pipe) = \ + self._start_retrieval_process( + files, len(files), 1024 * 1024) + filelist = filelist_pipe.read() + filelist_pipe.close() + retrieve_proc_returncode = retrieve_proc.wait() + if retrieve_proc in self.processes_to_kill_on_cancel: + self.processes_to_kill_on_cancel.remove(retrieve_proc) + extract_stderr = error_pipe.read(MAX_STDERR_BYTES) + error_pipe.close() + + # wait for other processes (if any) + for proc in self.processes_to_kill_on_cancel: + if proc.wait() != 0: + raise QubesException( + "Backup header retrieval failed (exit code {})".format( + proc.wait()) + ) + + if retrieve_proc_returncode != 0: + if not filelist and 'Not found in archive' in extract_stderr: + if allow_none: + return None + else: + raise QubesException( + "unable to read the qubes backup file {0} ({1}): {2}". + format( + self.backup_location, + retrieve_proc.wait(), + extract_stderr + )) + actual_files = filelist.decode('ascii').splitlines() + if sorted(actual_files) != sorted(files): + raise QubesException( + 'unexpected files in archive: got {!r}, expected {!r}'.format( + actual_files, files + )) + for fname in files: + if not os.path.exists(os.path.join(self.tmpdir, fname)): + if allow_none: + return None + else: + raise QubesException( + 'Unable to retrieve file {} from backup {}: {}'.format( + fname, self.backup_location, extract_stderr + ) + ) + return files + + def _retrieve_backup_header(self): + """Retrieve backup header and qubes.xml. Only backup header is + analyzed, qubes.xml is left as-is + (not even verified/decrypted/uncompressed) + + :return header_data + :rtype :py:class:`BackupHeader` + """ + + if not self.backup_vm and os.path.exists( + os.path.join(self.backup_location, 'qubes.xml')): + # backup format version 1 doesn't have header + header_data = BackupHeader() + header_data.version = 1 + return header_data + + header_files = self._retrieve_backup_header_files( + ['backup-header', 'backup-header.hmac'], allow_none=True) + + if not header_files: + # R2-Beta3 didn't have backup header, so if none is found, + # assume it's version=2 and use values present at that time + header_data = BackupHeader( + version=2, + # place explicitly this value, because it is what format_version + # 2 have + hmac_algorithm='SHA1', + crypto_algorithm='aes-256-cbc', + # TODO: set encrypted to something... + ) + else: + filename = HEADER_FILENAME + hmacfile = HEADER_FILENAME + '.hmac' + self.log.debug("Got backup header and hmac: %s, %s", + filename, hmacfile) + + file_ok = False + hmac_algorithm = DEFAULT_HMAC_ALGORITHM + for hmac_algo in get_supported_hmac_algo(hmac_algorithm): + try: + if self._verify_hmac(filename, hmacfile, hmac_algo): + file_ok = True + break + except QubesException as err: + self.log.debug( + 'Failed to verify %s using %s: %r', + hmacfile, hmac_algo, err) + # Ignore exception here, try the next algo + if not file_ok: + raise QubesException( + "Corrupted backup header (hmac verification " + "failed). Is the password correct?") + filename = os.path.join(self.tmpdir, filename) + with open(filename, 'rb') as f_header: + header_data = BackupHeader(f_header.read()) + os.unlink(filename) + + return header_data + + def _start_inner_extraction_worker(self, queue, handlers): + """Start a worker process, extracting inner layer of bacup archive, + extract them to :py:attr:`tmpdir`. + End the data by pushing QUEUE_FINISHED or QUEUE_ERROR to the queue. + + :param queue :py:class:`Queue` object to handle files from + """ + + # Setup worker to extract encrypted data chunks to the restore dirs + # Create the process here to pass it options extracted from + # backup header + extractor_params = { + 'queue': queue, + 'base_dir': self.tmpdir, + 'passphrase': self.passphrase, + 'encrypted': self.header_data.encrypted, + 'compressed': self.header_data.compressed, + 'crypto_algorithm': self.header_data.crypto_algorithm, + 'verify_only': self.options.verify_only, + 'progress_callback': self.progress_callback, + 'handlers': handlers, + } + self.log.debug( + 'Starting extraction worker in %s, file handlers map: %s', + self.tmpdir, repr(handlers)) + format_version = self.header_data.version + if format_version in [3, 4]: + extractor_params['compression_filter'] = \ + self.header_data.compression_filter + if format_version == 4: + # encryption already handled + extractor_params['encrypted'] = False + extract_proc = ExtractWorker3(**extractor_params) + else: + raise NotImplementedError( + "Backup format version %d not supported" % format_version) + extract_proc.start() + return extract_proc + + @staticmethod + def _save_qubes_xml(path, stream): + '''Handler for qubes.xml.000 content - just save the data to a file''' + with open(path, 'wb') as f_qubesxml: + f_qubesxml.write(stream.read()) + + def _process_qubes_xml(self): + """Verify, unpack and load qubes.xml. Possibly convert its format if + necessary. It expect that :py:attr:`header_data` is already populated, + and :py:meth:`retrieve_backup_header` was called. + """ + if self.header_data.version == 1: + raise NotImplementedError('Backup format version 1 not supported') + elif self.header_data.version in [2, 3]: + self._retrieve_backup_header_files( + ['qubes.xml.000', 'qubes.xml.000.hmac']) + self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac") + else: + self._retrieve_backup_header_files(['qubes.xml.000.enc']) + self._verify_and_decrypt('qubes.xml.000.enc') + + queue = Queue() + queue.put("qubes.xml.000") + queue.put(QUEUE_FINISHED) + + qubes_xml_path = os.path.join(self.tmpdir, 'qubes-restored.xml') + handlers = { + 'qubes.xml': ( + functools.partial(self._save_qubes_xml, qubes_xml_path), + None) + } + extract_proc = self._start_inner_extraction_worker(queue, handlers) + extract_proc.join() + if extract_proc.exitcode != 0: + raise QubesException( + "unable to extract the qubes backup. " + "Check extracting process errors.") + + if self.header_data.version in [2, 3]: + from qubesadmin.backup.core2 import Core2Qubes + backup_app = Core2Qubes(qubes_xml_path) + elif self.header_data.version in [4]: + from qubesadmin.backup.core3 import Core3Qubes + backup_app = Core3Qubes(qubes_xml_path) + else: + raise QubesException( + 'Unsupported qubes.xml format version: {}'.format( + self.header_data.version)) + # Not needed anymore - all the data stored in backup_app + os.unlink(qubes_xml_path) + return backup_app + + def _restore_vm_data(self, vms_dirs, vms_size, handlers): + '''Restore data of VMs + + :param vms_dirs: list of directories to extract (skip others) + :param vms_size: expected size (abort if source stream exceed this + value) + :param handlers: handlers for restored files - see + :py:class:`ExtractWorker3` for details + ''' + # Currently each VM consists of at most 7 archives (count + # file_to_backup calls in backup_prepare()), but add some safety + # margin for further extensions. Each archive is divided into 100MB + # chunks. Additionally each file have own hmac file. So assume upper + # limit as 2*(10*COUNT_OF_VMS+TOTAL_SIZE/100MB) + limit_count = str(2 * (10 * len(vms_dirs) + + int(vms_size / (100 * 1024 * 1024)))) + + self.log.debug("Working in temporary dir: %s", self.tmpdir) + self.log.info("Extracting data: %s to restore", size_to_human(vms_size)) + + # retrieve backup from the backup stream (either VM, or dom0 file) + (retrieve_proc, filelist_pipe, error_pipe) = \ + self._start_retrieval_process( + vms_dirs, limit_count, vms_size) + + to_extract = Queue() + + # extract data retrieved by retrieve_proc + extract_proc = self._start_inner_extraction_worker( + to_extract, handlers) + + try: + filename = None + hmacfile = None + nextfile = None + while True: + if self.canceled: + break + if not extract_proc.is_alive(): + retrieve_proc.terminate() + retrieve_proc.wait() + if retrieve_proc in self.processes_to_kill_on_cancel: + self.processes_to_kill_on_cancel.remove(retrieve_proc) + # wait for other processes (if any) + for proc in self.processes_to_kill_on_cancel: + proc.wait() + break + if nextfile is not None: + filename = nextfile + else: + filename = filelist_pipe.readline().decode('ascii').strip() + + self.log.debug("Getting new file: %s", filename) + + if not filename or filename == "EOF": + break + + # if reading archive directly with tar, wait for next filename - + # tar prints filename before processing it, so wait for + # the next one to be sure that whole file was extracted + if not self.backup_vm: + nextfile = filelist_pipe.readline().decode('ascii').strip() + + if self.header_data.version in [2, 3]: + if not self.backup_vm: + hmacfile = nextfile + nextfile = filelist_pipe.readline().\ + decode('ascii').strip() + else: + hmacfile = filelist_pipe.readline().\ + decode('ascii').strip() + + if self.canceled: + break + + self.log.debug("Getting hmac: %s", hmacfile) + if not hmacfile or hmacfile == "EOF": + # Premature end of archive, either of tar1_command or + # vmproc exited with error + break + else: # self.header_data.version == 4 + if not filename.endswith('.enc'): + raise qubesadmin.exc.QubesException( + 'Invalid file extension found in archive: {}'. + format(filename)) + + if not any(filename.startswith(x) for x in vms_dirs): + self.log.debug("Ignoring VM not selected for restore") + os.unlink(os.path.join(self.tmpdir, filename)) + if hmacfile: + os.unlink(os.path.join(self.tmpdir, hmacfile)) + continue + + if self.header_data.version in [2, 3]: + self._verify_hmac(filename, hmacfile) + else: + # _verify_and_decrypt will write output to a file with + # '.enc' extension cut off. This is safe because: + # - `scrypt` tool will override output, so if the file was + # already there (received from the VM), it will be removed + # - incoming archive extraction will refuse to override + # existing file, so if `scrypt` already created one, + # it can not be manipulated by the VM + # - when the file is retrieved from the VM, it appears at + # the final form - if it's visible, VM have no longer + # influence over its content + # + # This all means that if the file was correctly verified + # + decrypted, we will surely access the right file + filename = self._verify_and_decrypt(filename) + to_extract.put(os.path.join(self.tmpdir, filename)) + + if self.canceled: + raise BackupCanceledError("Restore canceled", + tmpdir=self.tmpdir) + + if retrieve_proc.wait() != 0: + raise QubesException( + "unable to read the qubes backup file {0}: {1}" + .format(self.backup_location, error_pipe.read( + MAX_STDERR_BYTES))) + # wait for other processes (if any) + for proc in self.processes_to_kill_on_cancel: + proc.wait() + if proc.returncode != 0: + raise QubesException( + "Backup completed, " + "but VM sending it reported an error (exit code {})". + format(proc.returncode)) + + if filename and filename != "EOF": + raise QubesException( + "Premature end of archive, the last file was %s" % filename) + except: + to_extract.put(QUEUE_ERROR) + extract_proc.join() + raise + else: + to_extract.put(QUEUE_FINISHED) + finally: + error_pipe.close() + filelist_pipe.close() + + self.log.debug("Waiting for the extraction process to finish...") + extract_proc.join() + self.log.debug("Extraction process finished with code: %s", + extract_proc.exitcode) + if extract_proc.exitcode != 0: + raise QubesException( + "unable to extract the qubes backup. " + "Check extracting process errors.") + + def new_name_for_conflicting_vm(self, orig_name, restore_info): + '''Generate new name for conflicting VM + + Add a number suffix, until the name is unique. If no unique name can + be found using this strategy, return :py:obj:`None` + ''' + number = 1 + if len(orig_name) > 29: + orig_name = orig_name[0:29] + new_name = orig_name + while (new_name in restore_info.keys() or + new_name in [x.name for x in restore_info.values()] or + new_name in self.app.domains): + new_name = str('{}{}'.format(orig_name, number)) + number += 1 + if number == 100: + # give up + return None + return new_name + + def restore_info_verify(self, restore_info): + '''Verify restore info - validate VM dependencies, name conflicts + etc. + ''' + for vm in restore_info.keys(): + if vm in ['dom0']: + continue + + vm_info = restore_info[vm] + assert isinstance(vm_info, self.VMToRestore) + + vm_info.problems.clear() + if vm in self.options.exclude: + vm_info.problems.add(self.VMToRestore.EXCLUDED) + + if not self.options.verify_only and \ + vm_info.name in self.app.domains: + if self.options.rename_conflicting: + new_name = self.new_name_for_conflicting_vm( + vm, restore_info + ) + if new_name is not None: + vm_info.name = new_name + else: + vm_info.problems.add(self.VMToRestore.ALREADY_EXISTS) + else: + vm_info.problems.add(self.VMToRestore.ALREADY_EXISTS) + + # check template + if vm_info.template: + template_name = vm_info.template + try: + host_template = self.app.domains[template_name] + except KeyError: + host_template = None + present_on_host = (host_template and + isinstance(host_template, qubesadmin.vm.TemplateVM)) + present_in_backup = (template_name in restore_info.keys() and + restore_info[template_name].good_to_go and + restore_info[template_name].vm.klass == + 'TemplateVM') + if not present_on_host and not present_in_backup: + if self.options.use_default_template and \ + self.app.default_template: + if vm_info.orig_template is None: + vm_info.orig_template = template_name + vm_info.template = self.app.default_template.name + else: + vm_info.problems.add( + self.VMToRestore.MISSING_TEMPLATE) + + # check netvm + if vm_info.vm.properties.get('netvm', None) is not None: + netvm_name = vm_info.netvm + + try: + netvm_on_host = self.app.domains[netvm_name] + except KeyError: + netvm_on_host = None + + present_on_host = (netvm_on_host is not None + and netvm_on_host.provides_network) + present_in_backup = (netvm_name in restore_info.keys() and + restore_info[netvm_name].good_to_go and + restore_info[netvm_name].vm.properties.get( + 'provides_network', False)) + if not present_on_host and not present_in_backup: + if self.options.use_default_netvm: + del vm_info.vm.properties['netvm'] + elif self.options.use_none_netvm: + vm_info.netvm = None + else: + vm_info.problems.add(self.VMToRestore.MISSING_NETVM) + + return restore_info + + def get_restore_info(self): + '''Get restore info + + Return information about what is included in the backup. + That dictionary can be adjusted to select what VM should be restore. + ''' + # Format versions: + # 1 - Qubes R1, Qubes R2 beta1, beta2 + # 2 - Qubes R2 beta3+ + # 3 - Qubes R2+ + # 4 - Qubes R4+ + + vms_to_restore = {} + + for vm in self.backup_app.domains.values(): + if vm.klass == 'AdminVM': + # Handle dom0 as special case later + continue + if vm.included_in_backup: + self.log.debug("%s is included in backup", vm.name) + + vms_to_restore[vm.name] = self.VMToRestore(vm) + + if vm.template is not None: + templatevm_name = vm.template + vms_to_restore[vm.name].template = templatevm_name + + vms_to_restore = self.restore_info_verify(vms_to_restore) + + # ...and dom0 home + if self.options.dom0_home and \ + self.backup_app.domains['dom0'].included_in_backup: + vm = self.backup_app.domains['dom0'] + vms_to_restore['dom0'] = self.Dom0ToRestore(vm) + local_user = grp.getgrnam('qubes').gr_mem[0] + + if vms_to_restore['dom0'].username != local_user: + if not self.options.ignore_username_mismatch: + vms_to_restore['dom0'].problems.add( + self.Dom0ToRestore.USERNAME_MISMATCH) + + return vms_to_restore + + @staticmethod + def get_restore_summary(restore_info): + '''Return a ASCII formatted table with restore info summary''' + fields = { + "name": {'func': lambda vm: vm.name}, + + "type": {'func': lambda vm: vm.klass}, + + "template": {'func': lambda vm: + 'n/a' if vm.template is None else vm.template}, + + "netvm": {'func': lambda vm: + '(default)' if 'netvm' not in vm.properties else + '-' if vm.properties['netvm'] is None else + vm.properties['netvm']}, + + "label": {'func': lambda vm: vm.label}, + } + + fields_to_display = ['name', 'type', 'template', + 'netvm', 'label'] + + # First calculate the maximum width of each field we want to display + total_width = 0 + for field in fields_to_display: + fields[field]['max_width'] = len(field) + for vm_info in restore_info.values(): + if vm_info.vm: + # noinspection PyUnusedLocal + field_len = len(str(fields[field]["func"](vm_info.vm))) + if field_len > fields[field]['max_width']: + fields[field]['max_width'] = field_len + total_width += fields[field]['max_width'] + + summary = "" + summary += "The following VMs are included in the backup:\n" + summary += "\n" + + # Display the header + for field in fields_to_display: + # noinspection PyTypeChecker + fmt = "{{0:-^{0}}}-+".format(fields[field]["max_width"] + 1) + summary += fmt.format('-') + summary += "\n" + for field in fields_to_display: + # noinspection PyTypeChecker + fmt = "{{0:>{0}}} |".format(fields[field]["max_width"] + 1) + summary += fmt.format(field) + summary += "\n" + for field in fields_to_display: + # noinspection PyTypeChecker + fmt = "{{0:-^{0}}}-+".format(fields[field]["max_width"] + 1) + summary += fmt.format('-') + summary += "\n" + + for vm_info in restore_info.values(): + assert isinstance(vm_info, BackupRestore.VMToRestore) + # Skip non-VM here + if not vm_info.vm: + continue + # noinspection PyUnusedLocal + summary_line = "" + for field in fields_to_display: + # noinspection PyTypeChecker + fmt = "{{0:>{0}}} |".format(fields[field]["max_width"] + 1) + summary_line += fmt.format(fields[field]["func"](vm_info.vm)) + + if BackupRestore.VMToRestore.EXCLUDED in vm_info.problems: + summary_line += " <-- Excluded from restore" + elif BackupRestore.VMToRestore.ALREADY_EXISTS in vm_info.problems: + summary_line += \ + " <-- A VM with the same name already exists on the host!" + elif BackupRestore.VMToRestore.MISSING_TEMPLATE in \ + vm_info.problems: + summary_line += " <-- No matching template on the host " \ + "or in the backup found!" + elif BackupRestore.VMToRestore.MISSING_NETVM in \ + vm_info.problems: + summary_line += " <-- No matching netvm on the host " \ + "or in the backup found!" + else: + if vm_info.template != vm_info.vm.template: + summary_line += " <-- Template change to '{}'".format( + vm_info.template) + if vm_info.name != vm_info.vm.name: + summary_line += " <-- Will be renamed to '{}'".format( + vm_info.name) + + summary += summary_line + "\n" + + if 'dom0' in restore_info.keys(): + summary_line = "" + for field in fields_to_display: + # noinspection PyTypeChecker + fmt = "{{0:>{0}}} |".format(fields[field]["max_width"] + 1) + if field == "name": + summary_line += fmt.format("Dom0") + elif field == "type": + summary_line += fmt.format("Home") + else: + summary_line += fmt.format("") + if BackupRestore.Dom0ToRestore.USERNAME_MISMATCH in \ + restore_info['dom0'].problems: + summary_line += " <-- username in backup and dom0 mismatch" + + summary += summary_line + "\n" + + return summary + + @staticmethod + def _templates_first(vms): + '''Sort templates befor other VM types (AppVM etc)''' + def key_function(instance): + '''Key function for :py:func:`sorted`''' + if isinstance(instance, BackupVM): + return instance.klass == 'TemplateVM' + elif hasattr(instance, 'vm'): + return key_function(instance.vm) + return 0 + return sorted(vms, + key=key_function, + reverse=True) + + + def _handle_dom0(self, backup_path): + '''Extract dom0 home''' + local_user = grp.getgrnam('qubes').gr_mem[0] + home_dir = pwd.getpwnam(local_user).pw_dir + backup_dom0_home_dir = os.path.join(self.tmpdir, backup_path) + restore_home_backupdir = "home-pre-restore-{0}".format( + time.strftime("%Y-%m-%d-%H%M%S")) + + self.log.info("Restoring home of user '%s'...", local_user) + self.log.info("Existing files/dirs backed up in '%s' dir", + restore_home_backupdir) + os.mkdir(home_dir + '/' + restore_home_backupdir) + for f_name in os.listdir(backup_dom0_home_dir): + home_file = home_dir + '/' + f_name + if os.path.exists(home_file): + os.rename(home_file, + home_dir + '/' + restore_home_backupdir + '/' + f_name) + if self.header_data.version == 1: + subprocess.call( + ["cp", "-nrp", "--reflink=auto", + backup_dom0_home_dir + '/' + f_name, home_file]) + elif self.header_data.version >= 2: + shutil.move(backup_dom0_home_dir + '/' + f_name, home_file) + retcode = subprocess.call(['sudo', 'chown', '-R', + local_user, home_dir]) + if retcode != 0: + self.log.error("*** Error while setting home directory owner") + + def restore_do(self, restore_info): + ''' + + High level workflow: + 1. Create VMs object in host collection (qubes.xml) + 2. Create them on disk (vm.create_on_disk) + 3. Restore VM data, overriding/converting VM files + 4. Apply possible fixups and save qubes.xml + + :param restore_info: + :return: + ''' + + if self.header_data.version == 1: + raise NotImplementedError('Backup format version 1 not supported') + + restore_info = self.restore_info_verify(restore_info) + + self._restore_vms_metadata(restore_info) + + # Perform VM restoration in backup order + vms_dirs = [] + handlers = {} + vms_size = 0 + for vm_info in self._templates_first(restore_info.values()): + vm = vm_info.restored_vm + if vm and vm_info.subdir: + vms_size += int(vm_info.size) + vms_dirs.append(vm_info.subdir) + for name, volume in vm.volumes.items(): + if not volume.save_on_stop: + continue + data_func = volume.import_data + size_func = volume.resize + handlers[os.path.join(vm_info.subdir, name + '.img')] = \ + (data_func, size_func) + # TODO applications whitelist + # TODO firewall + + if 'dom0' in restore_info.keys() and \ + restore_info['dom0'].good_to_go: + vms_dirs.append(os.path.dirname(restore_info['dom0'].subdir)) + vms_size += restore_info['dom0'].size + handlers[restore_info['dom0'].subdir] = (self._handle_dom0, None) + try: + self._restore_vm_data(vms_dirs=vms_dirs, vms_size=vms_size, + handlers=handlers) + except QubesException: + if self.options.verify_only: + raise + else: + self.log.warning( + "Some errors occurred during data extraction, " + "continuing anyway to restore at least some " + "VMs") + + if self.options.verify_only: + shutil.rmtree(self.tmpdir) + return + + if self.canceled: + raise BackupCanceledError("Restore canceled", + tmpdir=self.tmpdir) + + shutil.rmtree(self.tmpdir) + self.log.info("-> Done. Please install updates for all the restored " + "templates.") + + def _restore_vms_metadata(self, restore_info): + '''Restore VM metadata + + Create VMs, set their properties etc. + ''' + vms = {} + for vm_info in restore_info.values(): + assert isinstance(vm_info, self.VMToRestore) + if not vm_info.vm: + continue + if not vm_info.good_to_go: + continue + vm = vm_info.vm + vms[vm.name] = vm + + # First load templates, then other VMs + for vm in self._templates_first(vms.values()): + if self.canceled: + return + self.log.info("-> Restoring %s...", vm.name) + kwargs = {} + if vm.template: + template = restore_info[vm.name].template + # handle potentially renamed template + if template in restore_info \ + and restore_info[template].good_to_go: + template = restore_info[template].name + kwargs['template'] = template + + new_vm = None + vm_name = restore_info[vm.name].name + + try: + # first only create VMs, later setting may require other VMs + # be already created + new_vm = self.app.add_new_vm( + vm.klass, + name=vm_name, + label=vm.label, + pool=self.options.override_pool, + **kwargs) + except Exception: # pylint: disable=broad-except + self.log.exception('Error restoring VM %s, skipping', vm.name) + if new_vm: + del self.app.domains[new_vm.name] + continue + + restore_info[vm.name].restored_vm = new_vm + + for vm in vms.values(): + if self.canceled: + return + + new_vm = restore_info[vm.name].restored_vm + if not new_vm: + # skipped/failed + continue + + for prop, value in vm.properties.items(): + # exclude VM references - handled manually according to + # restore options + if prop in ['template', 'netvm', 'default_dispvm']: + continue + try: + setattr(new_vm, prop, value) + except Exception: # pylint: disable=broad-except + self.log.exception('Error setting %s.%s to %s', + vm.name, prop, value) + + for feature, value in vm.features.items(): + try: + new_vm.features[feature] = value + except Exception: # pylint: disable=broad-except + self.log.exception('Error setting %s.features[%s] to %s', + vm.name, feature, value) + + for tag in vm.tags: + try: + new_vm.tags.add(tag) + except Exception: # pylint: disable=broad-except + self.log.exception('Error adding tag %s to %s', + tag, vm.name) + + for bus in vm.devices: + for backend_domain, ident in vm.devices[bus]: + options = vm.devices[bus][(backend_domain, ident)] + assignment = DeviceAssignment( + backend_domain=backend_domain, + ident=ident, + options=options, + persistent=True) + try: + new_vm.devices[bus].attach(assignment) + except Exception: # pylint: disable=broad-except + self.log.exception('Error attaching device %s:%s to %s', + bus, ident, vm.name) + + # Set VM dependencies - only non-default setting + for vm in vms.values(): + vm_info = restore_info[vm.name] + vm_name = vm_info.name + try: + host_vm = self.app.domains[vm_name] + except KeyError: + # Failed/skipped VM + continue + + if 'netvm' in vm.properties: + if vm_info.netvm in restore_info: + value = restore_info[vm_info.netvm].name + else: + value = vm_info.netvm + + try: + host_vm.netvm = value + except Exception: # pylint: disable=broad-except + self.log.exception('Error setting %s.%s to %s', + vm.name, 'netvm', value) + + if 'default_dispvm' in vm.properties: + if vm.properties['default_dispvm'] in restore_info: + value = restore_info[vm.properties[ + 'default_dispvm']].name + else: + value = vm.properties['default_dispvm'] + + try: + host_vm.default_dispvm = value + except Exception: # pylint: disable=broad-except + self.log.exception('Error setting %s.%s to %s', + vm.name, 'default_dispvm', value) diff --git a/qubesadmin/backup/core2.py b/qubesadmin/backup/core2.py new file mode 100644 index 0000000..f21a717 --- /dev/null +++ b/qubesadmin/backup/core2.py @@ -0,0 +1,290 @@ +# -*- encoding: utf8 -*- +# +# The Qubes OS Project, http://www.qubes-os.org +# +# Copyright (C) 2017 Marek Marczykowski-Górecki +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with this program; if not, see . + +'''Parser for core2 qubes.xml''' + +import ast +import xml.parsers +import logging +import lxml.etree + +import qubesadmin.backup + +service_to_feature = { + 'ntpd': 'service.ntpd', + 'qubes-update-check': 'check-updates', + 'meminfo-writer': 'services.meminfo-writer', +} + +class Core2VM(qubesadmin.backup.BackupVM): + '''VM object''' + # pylint: disable=too-few-public-methods + def __init__(self): + super(Core2VM, self).__init__() + self.backup_content = False + + @property + def included_in_backup(self): + return self.backup_content + +class Core2Qubes(qubesadmin.backup.BackupApp): + '''Parsed qubes.xml''' + def __init__(self, store=None): + if store is None: + raise ValueError("store path required") + self.qid_map = {} + self.log = logging.getLogger('qubesadmin.backup.core2') + super(Core2Qubes, self).__init__(store) + + def load_globals(self, element): + '''Load global settings + + :param element: XML element containing global settings (root node) + ''' + default_netvm = element.get("default_netvm") + if default_netvm is not None: + self.globals['default_netvm'] = self.qid_map[int(default_netvm)] \ + if default_netvm != "None" else None + + # default_fw_netvm = element.get("default_fw_netvm") + # if default_fw_netvm is not None: + # self.globals['default_fw_netvm'] = \ + # self.qid_map[int(default_fw_netvm)] \ + # if default_fw_netvm != "None" else None + + updatevm = element.get("updatevm") + if updatevm is not None: + self.globals['updatevm'] = self.qid_map[int(updatevm)] \ + if updatevm != "None" else None + + clockvm = element.get("clockvm") + if clockvm is not None: + self.globals['clockvm'] = self.qid_map[int(clockvm)] \ + if clockvm != "None" else None + + default_template = element.get("default_template") + self.globals['default_template'] = self.qid_map[int(default_template)] \ + if default_template.lower() != "none" else None + + + def set_netvm_dependency(self, element): + '''Set dependencies between VMs''' + kwargs = {} + attr_list = ("name", "uses_default_netvm", "netvm_qid") + + for attribute in attr_list: + kwargs[attribute] = element.get(attribute) + + vm = self.domains[kwargs["name"]] + + # netvm property + if element.get("uses_default_netvm") is None: + uses_default_netvm = True + else: + uses_default_netvm = ( + True if element.get("uses_default_netvm") == "True" else False) + if not uses_default_netvm: + netvm_qid = element.get("netvm_qid") + if netvm_qid is None or netvm_qid == "none": + vm.properties['netvm'] = None + else: + vm.properties['netvm'] = self.qid_map[int(netvm_qid)] + + # And DispVM netvm, translated to default_dispvm + if element.get("uses_default_dispvm_netvm") is None: + uses_default_dispvm_netvm = True + else: + uses_default_dispvm_netvm = ( + True if element.get("uses_default_dispvm_netvm") == "True" + else False) + if not uses_default_dispvm_netvm: + dispvm_netvm_qid = element.get("dispvm_netvm_qid") + if dispvm_netvm_qid is None or dispvm_netvm_qid == "none": + dispvm_netvm = None + else: + dispvm_netvm = self.qid_map[int(dispvm_netvm_qid)] + else: + dispvm_netvm = vm.properties.get('netvm', self.globals[ + 'default_netvm']) + + if dispvm_netvm != self.globals['default_netvm']: + if dispvm_netvm: + dispvm_tpl_name = 'disp-{}'.format(dispvm_netvm) + else: + dispvm_tpl_name = 'disp-no-netvm' + + vm.properties['default_dispvm'] = dispvm_tpl_name + + if dispvm_tpl_name not in self.domains: + vm = Core2VM() + vm.name = dispvm_tpl_name + vm.label = 'red' + vm.properties['netvm'] = dispvm_netvm + vm.properties['dispvm_allowed'] = True + vm.backup_content = True + vm.backup_path = None + self.domains[vm.name] = vm + # TODO: add support for #2075 + # TODO: set qrexec policy based on dispvm_netvm value + + def import_core2_vm(self, element): + '''Parse a single VM from given XML node + + This method load only VM properties not depending on other VMs + (other than template). VM connections are set later. + :param element: XML node + ''' + vm_class_name = element.tag + vm = Core2VM() + vm.name = element.get('name') + vm.label = element.get('label', 'red') + self.domains[vm.name] = vm + kwargs = {} + if vm_class_name in ["QubesTemplateVm", "QubesTemplateHVm"]: + vm.klass = "TemplateVM" + elif element.get('template_qid').lower() == "none": + kwargs['dir_path'] = element.get('dir_path') + vm.klass = "StandaloneVM" + else: + kwargs['dir_path'] = element.get('dir_path') + vm.template = \ + self.qid_map[int(element.get('template_qid'))] + vm.klass = "AppVM" + # simple attributes + for attr, default in { + #'installed_by_rpm': 'False', + 'include_in_backups': 'True', + 'qrexec_timeout': '60', + 'vcpus': '2', + 'memory': '400', + 'maxmem': '4000', + 'default_user': 'user', + 'debug': 'False', + 'mac': None, + 'autostart': 'False'}.items(): + value = element.get(attr) + if value and value != default: + vm.properties[attr] = value + # attributes with default value + for attr in ["kernel", "kernelopts"]: + value = element.get(attr) + if value and value.lower() == "none": + value = None + value_is_default = element.get( + "uses_default_{}".format(attr)) + if value_is_default and value_is_default.lower() != \ + "true": + vm.properties[attr] = value + vm.properties['hvm'] = "HVm" in vm_class_name + if vm_class_name in ('QubesNetVm', 'QubesProxyVm'): + vm.properties['provides_network'] = True + if vm_class_name == 'QubesNetVm': + vm.properties['netvm'] = None + if element.get('internal', False) == 'True': + vm.features['internal'] = True + + services = element.get('services') + if services: + services = ast.literal_eval(services) + else: + services = {} + for service, value in services.items(): + feature = service + for repl_service, repl_feature in \ + service_to_feature.items(): + if repl_service == service: + feature = repl_feature + vm.features[feature] = value + + vm.backup_content = element.get('backup_content', False) == 'True' + vm.backup_path = element.get('backup_path', None) + vm.size = element.get('backup_size', 0) + + pci_strictreset = element.get('pci_strictreset', True) + pcidevs = element.get('pcidevs') + if pcidevs: + pcidevs = ast.literal_eval(pcidevs) + for pcidev in pcidevs: + if not pci_strictreset: + vm.devices['pci'][('dom0', pcidev.replace(':', '_'))] = { + 'no-strict-reset': True} + else: + vm.devices['pci'][('dom0', pcidev.replace(':', '_'))] = {} + + def load(self): + with open(self.store) as fh: + try: + # pylint: disable=no-member + tree = lxml.etree.parse(fh) + except (EnvironmentError, # pylint: disable=broad-except + xml.parsers.expat.ExpatError) as err: + self.log.error(err) + return False + + self.globals['default_kernel'] = tree.getroot().get("default_kernel") + + vm_classes = ["AdminVM", "TemplateVm", "TemplateHVm", + "AppVm", "HVm", "NetVm", "ProxyVm"] + + # First build qid->name map + for vm_class_name in vm_classes: + vms_of_class = tree.findall("Qubes" + vm_class_name) + for element in vms_of_class: + qid = element.get('qid', None) + name = element.get('name', None) + if qid and name: + self.qid_map[int(qid)] = name + + # Qubes R2 din't have dom0 in qubes.xml + if 0 not in self.qid_map: + vm = Core2VM() + vm.name = 'dom0' + vm.klass = 'AdminVM' + vm.label = 'black' + self.domains['dom0'] = vm + self.qid_map[0] = 'dom0' + + # Then load all VMs - since we have qid_map, no need to preserve + # specific load older. + for vm_class_name in vm_classes: + vms_of_class = tree.findall("Qubes" + vm_class_name) + for element in vms_of_class: + self.import_core2_vm(element) + + # ... and load other VMs + for vm_class_name in ["AppVm", "HVm", "NetVm", "ProxyVm"]: + vms_of_class = tree.findall("Qubes" + vm_class_name) + # first non-template based, then template based + sorted_vms_of_class = sorted(vms_of_class, + key=lambda x: str(x.get('template_qid')).lower() != "none") + for element in sorted_vms_of_class: + self.import_core2_vm(element) + + # and load other defaults (default netvm, updatevm etc) + self.load_globals(tree.getroot()) + + # After importing all VMs, set netvm references, in the same order + for vm_class_name in vm_classes: + for element in tree.findall("Qubes" + vm_class_name): + try: + self.set_netvm_dependency(element) + except (ValueError, LookupError) as err: + self.log.error("VM %s: failed to set netvm dependency: %s", + element.get('name'), err) diff --git a/qubesadmin/backup/core3.py b/qubesadmin/backup/core3.py new file mode 100644 index 0000000..a975d00 --- /dev/null +++ b/qubesadmin/backup/core3.py @@ -0,0 +1,145 @@ +# -*- encoding: utf8 -*- +# +# The Qubes OS Project, http://www.qubes-os.org +# +# Copyright (C) 2017 Marek Marczykowski-Górecki +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with this program; if not, see . + +'''Parser for core2 qubes.xml''' + +import xml.parsers +import logging +import lxml.etree + +import qubesadmin.backup + +class Core3VM(qubesadmin.backup.BackupVM): + '''VM object''' + # pylint: disable=too-few-public-methods + @property + def included_in_backup(self): + return self.backup_path is not None + +class Core3Qubes(qubesadmin.backup.BackupApp): + '''Parsed qubes.xml''' + def __init__(self, store=None): + if store is None: + raise ValueError("store path required") + self.log = logging.getLogger('qubesadmin.backup.core3') + self.labels = {} + super(Core3Qubes, self).__init__(store) + + @staticmethod + def get_property(xml_obj, prop): + '''Get property of given object (XML node) + + Object can be any PropertyHolder serialized to XML - in practice + :py:class:`BaseVM` or :py:class:`Qubes`. + ''' + xml_prop = xml_obj.findall('./property[@name=\'{}\']'.format(prop)) + if not xml_prop: + raise KeyError(prop) + return xml_prop[0].text + + def load_labels(self, labels_element): + '''Load labels table''' + for node in labels_element.findall('label'): + ident = node.get('id') + assert ident is not None + self.labels[ident] = node.text + + + def load_globals(self, globals_element): + '''Load global settings + + :param globals_element: XML element containing global settings + ''' + for node in globals_element.findall('property'): + name = node.get('name') + assert name is not None + self.globals[name] = node.text + + def import_core3_vm(self, element): + '''Parse a single VM from given XML node + + This method load only VM properties not depending on other VMs + (other than template). VM connections are set later. + :param element: XML node + ''' + vm = Core3VM() + vm.klass = element.get('class') + + for node in element.findall('./properties/property'): + name = node.get('name') + assert name is not None + vm.properties[name] = node.text + + for node in element.findall('./features/feature'): + name = node.get('name') + assert name is not None + vm.features[name] = False if node.text is None else node.text + + for node in element.findall('./tags/tag'): + name = node.get('name') + assert name is not None + vm.tags.add(name) + + for bus_node in element.findall('./devices'): + bus_name = bus_node.get('class') + assert bus_name is not None + for node in bus_node.findall('./device'): + backend_domain = node.get('backend-domain') + ident = node.get('id') + options = {} + for opt_node in node.findall('./option'): + opt_name = opt_node.get('name') + options[opt_name] = opt_node.text + vm.devices[bus_name][(backend_domain, ident)] = options + + # extract base properties + if vm.klass == 'AdminVM': + vm.name = 'dom0' + else: + vm.name = vm.properties.pop('name') + vm.label = self.labels[vm.properties.pop('label')] + vm.template = vm.properties.pop('template', None) + # skip UUID and qid, will be generated during restore + vm.properties.pop('uuid', None) + vm.properties.pop('qid', None) + + if vm.features.pop('backup-content', False): + vm.backup_path = vm.features.pop('backup-path', None) + vm.size = vm.features.pop('backup-size', 0) + + self.domains[vm.name] = vm + + def load(self): + with open(self.store) as fh: + try: + # pylint: disable=no-member + tree = lxml.etree.parse(fh) + except (EnvironmentError, # pylint: disable=broad-except + xml.parsers.expat.ExpatError) as err: + self.log.error(err) + return False + + self.load_labels(tree.find('./labels')) + + for element in tree.findall('./domains/domain'): + self.import_core3_vm(element) + + # and load other defaults (default netvm, updatevm etc) + self.load_globals(tree.find('./properties'))