Browse Source

backup: initial support for backup restore over Admin API

The code is mostly copied from core-admin.

QubesOS/qubes-issues#1214
Marek Marczykowski-Górecki 7 years ago
parent
commit
268a3453a9
4 changed files with 2354 additions and 0 deletions
  1. 1 0
      ci/pylintrc
  2. 1918 0
      qubesadmin/backup/__init__.py
  3. 290 0
      qubesadmin/backup/core2.py
  4. 145 0
      qubesadmin/backup/core3.py

+ 1 - 0
ci/pylintrc

@@ -8,6 +8,7 @@ disable=
   bad-continuation,
   duplicate-code,
   fixme,
+  cyclic-import,
   locally-disabled,
   locally-enabled
 

+ 1918 - 0
qubesadmin/backup/__init__.py

@@ -0,0 +1,1918 @@
+# -*- encoding: utf8 -*-
+#
+# The Qubes OS Project, http://www.qubes-os.org
+#
+# Copyright (C) 2017 Marek Marczykowski-Górecki
+#                               <marmarek@invisiblethingslab.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with this program; if not, see <http://www.gnu.org/licenses/>.
+
+'''Qubes backup'''
+
+import collections
+import errno
+import fcntl
+import functools
+import grp
+import logging
+import multiprocessing
+from multiprocessing import Queue, Process
+import os
+import pwd
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import termios
+import time
+
+import qubesadmin
+import qubesadmin.vm
+from qubesadmin.devices import DeviceAssignment
+from qubesadmin.exc import QubesException
+from qubesadmin.utils import size_to_human
+
+# must be picklable
+QUEUE_FINISHED = "!!!FINISHED"
+QUEUE_ERROR = "!!!ERROR"
+
+HEADER_FILENAME = 'backup-header'
+DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc'
+# 'scrypt' is not exactly HMAC algorithm, but a tool we use to
+# integrity-protect the data
+DEFAULT_HMAC_ALGORITHM = 'scrypt'
+DEFAULT_COMPRESSION_FILTER = 'gzip'
+# Maximum size of error message get from process stderr (including VM process)
+MAX_STDERR_BYTES = 1024
+# header + qubes.xml max size
+HEADER_QUBES_XML_MAX_SIZE = 1024 * 1024
+# hmac file max size - regardless of backup format version!
+HMAC_MAX_SIZE = 4096
+
+BLKSIZE = 512
+
+_re_alphanum = re.compile(r'^[A-Za-z0-9-]*$')
+
+class BackupCanceledError(QubesException):
+    '''Exception raised when backup/restore was cancelled'''
+    def __init__(self, msg, tmpdir=None):
+        super(BackupCanceledError, self).__init__(msg)
+        self.tmpdir = tmpdir
+
+class BackupHeader(object):
+    '''Structure describing backup-header file included as the first file in
+    backup archive
+    '''
+    header_keys = {
+        'version': 'version',
+        'encrypted': 'encrypted',
+        'compressed': 'compressed',
+        'compression-filter': 'compression_filter',
+        'crypto-algorithm': 'crypto_algorithm',
+        'hmac-algorithm': 'hmac_algorithm',
+        'backup-id': 'backup_id'
+    }
+    bool_options = ['encrypted', 'compressed']
+    int_options = ['version']
+
+    def __init__(self,
+            header_data=None,
+            version=None,
+            encrypted=None,
+            compressed=None,
+            compression_filter=None,
+            hmac_algorithm=None,
+            crypto_algorithm=None,
+            backup_id=None):
+        # repeat the list to help code completion...
+        self.version = version
+        self.encrypted = encrypted
+        self.compressed = compressed
+        # Options introduced in backup format 3+, which always have a header,
+        # so no need for fallback in function parameter
+        self.compression_filter = compression_filter
+        self.hmac_algorithm = hmac_algorithm
+        self.crypto_algorithm = crypto_algorithm
+        self.backup_id = backup_id
+
+        if header_data is not None:
+            self.load(header_data)
+
+    def load(self, untrusted_header_text):
+        """Parse backup header file.
+
+        :param untrusted_header_text: header content
+        :type untrusted_header_text: basestring
+
+        .. warning::
+            This function may be exposed to not yet verified header,
+            so is security critical.
+        """
+        try:
+            untrusted_header_text = untrusted_header_text.decode('ascii')
+        except UnicodeDecodeError:
+            raise QubesException(
+                "Non-ASCII characters in backup header")
+        for untrusted_line in untrusted_header_text.splitlines():
+            if untrusted_line.count('=') != 1:
+                raise QubesException("Invalid backup header")
+            key, value = untrusted_line.strip().split('=', 1)
+            if not _re_alphanum.match(key):
+                raise QubesException("Invalid backup header ("
+                                                   "key)")
+            if key not in self.header_keys.keys():
+                # Ignoring unknown option
+                continue
+            if not _re_alphanum.match(value):
+                raise QubesException("Invalid backup header ("
+                                                 "value)")
+            if getattr(self, self.header_keys[key]) is not None:
+                raise QubesException(
+                    "Duplicated header line: {}".format(key))
+            if key in self.bool_options:
+                value = value.lower() in ["1", "true", "yes"]
+            elif key in self.int_options:
+                value = int(value)
+            setattr(self, self.header_keys[key], value)
+
+        self.validate()
+
+    def validate(self):
+        '''Validate header data, according to header version'''
+        if self.version == 1:
+            # header not really present
+            pass
+        elif self.version in [2, 3, 4]:
+            expected_attrs = ['version', 'encrypted', 'compressed',
+                'hmac_algorithm']
+            if self.encrypted and self.version < 4:
+                expected_attrs += ['crypto_algorithm']
+            if self.version >= 3 and self.compressed:
+                expected_attrs += ['compression_filter']
+            if self.version >= 4:
+                expected_attrs += ['backup_id']
+            for key in expected_attrs:
+                if getattr(self, key) is None:
+                    raise QubesException(
+                        "Backup header lack '{}' info".format(key))
+        else:
+            raise QubesException(
+                "Unsupported backup version {}".format(self.version))
+
+    def save(self, filename):
+        '''Save backup header into a file'''
+        with open(filename, "w") as f_header:
+            # make sure 'version' is the first key
+            f_header.write('version={}\n'.format(self.version))
+            for key, attr in self.header_keys.items():
+                if key == 'version':
+                    continue
+                if getattr(self, attr) is None:
+                    continue
+                f_header.write("{!s}={!s}\n".format(key, getattr(self, attr)))
+
+def launch_proc_with_pty(args, stdin=None, stdout=None, stderr=None, echo=True):
+    """Similar to pty.fork, but handle stdin/stdout according to parameters
+    instead of connecting to the pty
+
+    :return tuple (subprocess.Popen, pty_master)
+    """
+
+    def set_ctty(ctty_fd, master_fd):
+        '''Set controlling terminal'''
+        os.setsid()
+        os.close(master_fd)
+        fcntl.ioctl(ctty_fd, termios.TIOCSCTTY, 0)
+        if not echo:
+            termios_p = termios.tcgetattr(ctty_fd)
+            # termios_p.c_lflags
+            termios_p[3] &= ~termios.ECHO
+            termios.tcsetattr(ctty_fd, termios.TCSANOW, termios_p)
+    (pty_master, pty_slave) = os.openpty()
+    p = subprocess.Popen(args, stdin=stdin, stdout=stdout,
+        stderr=stderr,
+        preexec_fn=lambda: set_ctty(pty_slave, pty_master))
+    os.close(pty_slave)
+    return p, open(pty_master, 'wb+', buffering=0)
+
+def launch_scrypt(action, input_name, output_name, passphrase):
+    '''
+    Launch 'scrypt' process, pass passphrase to it and return
+    subprocess.Popen object.
+
+    :param action: 'enc' or 'dec'
+    :param input_name: input path or '-' for stdin
+    :param output_name: output path or '-' for stdout
+    :param passphrase: passphrase
+    :return: subprocess.Popen object
+    '''
+    command_line = ['scrypt', action, input_name, output_name]
+    (p, pty) = launch_proc_with_pty(command_line,
+        stdin=subprocess.PIPE if input_name == '-' else None,
+        stdout=subprocess.PIPE if output_name == '-' else None,
+        stderr=subprocess.PIPE,
+        echo=False)
+    if action == 'enc':
+        prompts = (b'Please enter passphrase: ', b'Please confirm passphrase: ')
+    else:
+        prompts = (b'Please enter passphrase: ',)
+    for prompt in prompts:
+        actual_prompt = p.stderr.read(len(prompt))
+        if actual_prompt != prompt:
+            raise QubesException(
+                'Unexpected prompt from scrypt: {}'.format(actual_prompt))
+        pty.write(passphrase.encode('utf-8') + b'\n')
+        pty.flush()
+    # save it here, so garbage collector would not close it (which would kill
+    #  the child)
+    p.pty = pty
+    return p
+
+class ExtractWorker3(Process):
+    '''Process for handling inner tar layer of backup archive'''
+    # pylint: disable=too-many-instance-attributes
+    def __init__(self, queue, base_dir, passphrase, encrypted,
+                 progress_callback, vmproc=None,
+                 compressed=False, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM,
+                 compression_filter=None, verify_only=False, handlers=None):
+        '''Start inner tar extraction worker
+
+        The purpose of this class is to process files extracted from outer
+        archive layer and pass to appropriate handlers. Input files are given
+        through a queue. Insert :py:obj:`QUEUE_FINISHED` or
+        :py:obj:`QUEUE_ERROR` to end data processing (either cleanly,
+        or forcefully).
+
+        Handlers are given as a map filename -> (data_func, size_func),
+        where data_func is called with file-like object to process,
+        and size_func is called with file size as argument. Note that
+        data_func and size_func may be called simultaneusly, in a different
+        processes.
+
+        :param multiprocessing.Queue queue: a queue with filenames to
+        process; those files needs to be given as full path, inside *base_dir*
+        :param str base_dir: directory where all files to process live
+        :param str passphrase: passphrase to decrypt the data
+        :param bool encrypted: is encryption applied?
+        :param callable progress_callback: report extraction progress
+        :param subprocess.Popen vmproc: process extracting outer layer,
+        given here to monitor
+        it for failures (when it exits with non-zero exit code, inner layer
+        processing is stopped)
+        :param bool compressed: is the data compressed?
+        :param str crypto_algorithm: encryption algorithm, either `scrypt` or an
+        algorithm supported by openssl
+        :param str compression_filter: compression program, `gzip` by default
+        :param bool verify_only: only verify data integrity, do not extract
+        :param dict handlers: handlers for actual data
+        '''
+        super(ExtractWorker3, self).__init__()
+        #: queue with files to extract
+        self.queue = queue
+        #: paths on the queue are relative to this dir
+        self.base_dir = base_dir
+        #: passphrase to decrypt/authenticate data
+        self.passphrase = passphrase
+        #: handlers for files; it should be dict filename -> (data_function,
+        # size_function),
+        # where data_function will get file-like object as the only argument and
+        # might be called in a separate process (multiprocessing.Process),
+        # and size_function will get file size (when known) in bytes
+        self.handlers = handlers
+        #: is the backup encrypted?
+        self.encrypted = encrypted
+        #: is the backup compressed?
+        self.compressed = compressed
+        #: what crypto algorithm is used for encryption?
+        self.crypto_algorithm = crypto_algorithm
+        #: only verify integrity, don't extract anything
+        self.verify_only = verify_only
+        #: progress
+        self.blocks_backedup = 0
+        #: inner tar layer extraction (subprocess.Popen instance)
+        self.tar2_process = None
+        #: current inner tar archive name
+        self.tar2_current_file = None
+        #: call size_func handler for this file when tar report it on stderr
+        self.adjust_output_size = None
+        #: decompressor subprocess.Popen instance
+        self.decompressor_process = None
+        #: decryptor subprocess.Popen instance
+        self.decryptor_process = None
+        #: data import multiprocessing.Process instance
+        self.import_process = None
+        #: callback reporting progress to UI
+        self.progress_callback = progress_callback
+        #: process (subprocess.Popen instance) feeding the data into
+        # extraction tool
+        self.vmproc = vmproc
+
+        self.log = logging.getLogger('qubesadmin.backup.extract')
+        self.stderr_encoding = sys.stderr.encoding or 'utf-8'
+        self.tar2_stderr = []
+        self.compression_filter = compression_filter
+
+    @staticmethod
+    def handle_streams(stream_in, streams_out, processes, size_limit=None,
+            progress_callback=None):
+        '''
+        Copy stream_in to all streams_out and monitor all mentioned processes.
+        If any of them terminate with non-zero code, interrupt the process. Copy
+        at most `size_limit` data (if given).
+
+        :param stream_in: file-like object to read data from
+        :param streams_out: dict of file-like objects to write data to
+        :param processes: dict of subprocess.Popen objects to monitor
+        :param size_limit: int maximum data amount to process
+        :param progress_callback: callable function to report progress, will be
+            given copied data size (it should accumulate internally)
+        :return: failed process name, failed stream name, "size_limit" or None (
+            no error)
+        '''
+        buffer_size = 409600
+        bytes_copied = 0
+        while True:
+            if size_limit:
+                to_copy = min(buffer_size, size_limit - bytes_copied)
+                if to_copy <= 0:
+                    return "size_limit"
+            else:
+                to_copy = buffer_size
+            buf = stream_in.read(to_copy)
+            if not buf:
+                # done
+                return None
+
+            if callable(progress_callback):
+                progress_callback(len(buf))
+            for name, stream in streams_out.items():
+                if stream is None:
+                    continue
+                try:
+                    stream.write(buf)
+                except IOError:
+                    return name
+            bytes_copied += len(buf)
+
+            for name, proc in processes.items():
+                if proc is None:
+                    continue
+                if isinstance(proc, Process):
+                    if not proc.is_alive() and proc.exitcode != 0:
+                        return name
+                elif proc.poll():
+                    return name
+
+    def collect_tar_output(self):
+        '''Retrieve tar stderr and handle it appropriately
+
+        Log errors, process file size if requested.
+        This use :py:attr:`tar2_process`.
+        '''
+        if not self.tar2_process.stderr:
+            return
+
+        if self.tar2_process.poll() is None:
+            try:
+                new_lines = self.tar2_process.stderr \
+                    .read(MAX_STDERR_BYTES).splitlines()
+            except IOError as e:
+                if e.errno == errno.EAGAIN:
+                    return
+                else:
+                    raise
+        else:
+            new_lines = self.tar2_process.stderr.readlines()
+
+        new_lines = [x.decode(self.stderr_encoding) for x in new_lines]
+
+        msg_re = re.compile(r".*#[0-9].*restore_pipe")
+        debug_msg = [msg for msg in new_lines if msg_re.match(msg)]
+        self.log.debug('tar2_stderr: %s', '\n'.join(debug_msg))
+        new_lines = [msg for msg in new_lines if not msg_re.match(msg)]
+        if self.adjust_output_size:
+            # search for first file size reported by tar after setting
+            # self.adjust_output_size (so don't look at self.tar2_stderr)
+            # this is used only when extracting single-file archive, so don't
+            #  bother with checking file name
+            file_size_re = re.compile(r"^[^ ]+ [^ ]+/[^ ]+ *([0-9]+) .*")
+            for line in new_lines:
+                match = file_size_re.match(line)
+                if match:
+                    file_size = match.groups()[0]
+                    self.adjust_output_size(file_size)
+                    self.adjust_output_size = None
+        self.tar2_stderr += new_lines
+
+    def run(self):
+        try:
+            self.__run__()
+        except Exception:
+            # Cleanup children
+            for process in [self.decompressor_process,
+                    self.decryptor_process,
+                    self.tar2_process]:
+                if process:
+                    try:
+                        process.terminate()
+                    except OSError:
+                        pass
+                    process.wait()
+            self.log.exception('ERROR')
+            raise
+
+    def handle_dir(self, dirname):
+        ''' Relocate files in given director when it's already extracted
+
+        :param dirname: directory path to handle (relative to backup root),
+        without trailing slash
+        '''
+        for fname, (data_func, size_func) in self.handlers.items():
+            if not fname.startswith(dirname + '/'):
+                continue
+            if size_func is not None:
+                size_func(os.path.getsize(fname))
+            with open(fname, 'rb') as input_file:
+                data_func(input_file)
+            os.unlink(fname)
+        shutil.rmtree(dirname)
+
+    def cleanup_tar2(self, wait=True, terminate=False):
+        '''Cleanup running :py:attr:`tar2_process`
+
+        :param wait: wait for it termination, otherwise method exit early if
+        process is still running
+        :param terminate: terminate the process if still running
+        '''
+        if self.tar2_process is None:
+            return
+        if terminate:
+            if self.import_process is not None:
+                self.tar2_process.terminate()
+            self.import_process.terminate()
+        if wait:
+            self.tar2_process.wait()
+            if self.import_process is not None:
+                self.import_process.join()
+        elif self.tar2_process.poll() is None:
+            return
+        self.collect_tar_output()
+        if self.tar2_process.stderr:
+            self.tar2_process.stderr.close()
+        if self.tar2_process.returncode != 0:
+            self.log.error(
+                "ERROR: unable to extract files for %s, tar "
+                "output:\n  %s",
+                    self.tar2_current_file,
+                    "\n  ".join(self.tar2_stderr))
+        else:
+            # Finished extracting the tar file
+            # if that was whole-directory archive, handle
+            # relocated files now
+            inner_name = self.tar2_current_file.rsplit('.', 1)[0] \
+                .replace(self.base_dir + '/', '')
+            if os.path.basename(inner_name) == '.':
+                self.handle_dir(
+                    os.path.dirname(inner_name))
+            self.tar2_current_file = None
+            self.adjust_output_size = None
+        self.tar2_process = None
+
+    @staticmethod
+    def _data_func_wrapper(close_fds, data_func, data_stream):
+        '''Close not needed file descriptors, then call data_func(
+        data_stream).
+
+        This is to prevent holding write end of a pipe in subprocess,
+        preventing EOF transfer.
+        '''
+        for fd in close_fds:
+            if fd == data_stream.fileno():
+                continue
+            try:
+                os.close(fd)
+            except OSError:
+                pass
+        return data_func(data_stream)
+
+
+    def __run__(self):
+        self.log.debug("Started sending thread")
+        self.log.debug("Moving to dir " + self.base_dir)
+        os.chdir(self.base_dir)
+
+        filename = None
+
+        input_pipe = None
+        for filename in iter(self.queue.get, None):
+            if filename in (QUEUE_FINISHED, QUEUE_ERROR):
+                break
+
+            assert isinstance(filename, str)
+
+            self.log.debug("Extracting file " + filename)
+
+            if filename.endswith('.000'):
+                # next file
+                if self.tar2_process is not None:
+                    input_pipe.close()
+                    self.cleanup_tar2(wait=True, terminate=False)
+
+                inner_name = filename[:-len('.000')].replace(
+                    self.base_dir + '/', '')
+                redirect_stdout = None
+                if os.path.basename(inner_name) == '.':
+                    if (inner_name in self.handlers or
+                            any(x.startswith(os.path.dirname(inner_name) + '/')
+                            for x in self.handlers)):
+                        tar2_cmdline = ['tar',
+                            '-%s' % ("t" if self.verify_only else "x"),
+                            inner_name]
+                    else:
+                        # ignore this directory
+                        tar2_cmdline = None
+                elif inner_name in self.handlers:
+                    tar2_cmdline = ['tar',
+                        '-%svvO' % ("t" if self.verify_only else "x"),
+                        inner_name]
+                    redirect_stdout = subprocess.PIPE
+                else:
+                    # no handlers for this file, ignore it
+                    tar2_cmdline = None
+
+                if tar2_cmdline is None:
+                    # ignore the file
+                    os.remove(filename)
+                    continue
+
+                if self.compressed:
+                    if self.compression_filter:
+                        tar2_cmdline.insert(-1,
+                                            "--use-compress-program=%s" %
+                                            self.compression_filter)
+                    else:
+                        tar2_cmdline.insert(-1, "--use-compress-program=%s" %
+                                            DEFAULT_COMPRESSION_FILTER)
+
+                self.log.debug("Running command " + str(tar2_cmdline))
+                if self.encrypted:
+                    # Start decrypt
+                    self.decryptor_process = subprocess.Popen(
+                        ["openssl", "enc",
+                         "-d",
+                         "-" + self.crypto_algorithm,
+                         "-pass",
+                         "pass:" + self.passphrase],
+                        stdin=subprocess.PIPE,
+                        stdout=subprocess.PIPE)
+
+                    self.tar2_process = subprocess.Popen(
+                        tar2_cmdline,
+                        stdin=self.decryptor_process.stdout,
+                        stdout=redirect_stdout,
+                        stderr=subprocess.PIPE)
+                    self.decryptor_process.stdout.close()
+                    input_pipe = self.decryptor_process.stdin
+                else:
+                    self.tar2_process = subprocess.Popen(
+                        tar2_cmdline,
+                        stdin=subprocess.PIPE,
+                        stdout=redirect_stdout,
+                        stderr=subprocess.PIPE)
+                    input_pipe = self.tar2_process.stdin
+
+                if inner_name in self.handlers:
+                    assert redirect_stdout is subprocess.PIPE
+                    data_func, size_func = self.handlers[inner_name]
+                    self.import_process = multiprocessing.Process(
+                        target=self._data_func_wrapper,
+                        args=([input_pipe.fileno()],
+                        data_func, self.tar2_process.stdout))
+                    self.import_process.start()
+                    self.tar2_process.stdout.close()
+                    self.adjust_output_size = size_func
+
+                fcntl.fcntl(self.tar2_process.stderr.fileno(), fcntl.F_SETFL,
+                            fcntl.fcntl(self.tar2_process.stderr.fileno(),
+                                        fcntl.F_GETFL) | os.O_NONBLOCK)
+                self.tar2_stderr = []
+            elif not self.tar2_process:
+                # Extracting of the current archive failed, skip to the next
+                # archive
+                os.remove(filename)
+                continue
+            else:
+                (basename, ext) = os.path.splitext(self.tar2_current_file)
+                previous_chunk_number = int(ext[1:])
+                expected_filename = basename + '.%03d' % (
+                    previous_chunk_number+1)
+                if expected_filename != filename:
+                    self.cleanup_tar2(wait=True, terminate=True)
+                    self.log.error(
+                        'Unexpected file in archive: %s, expected %s',
+                            filename, expected_filename)
+                    os.remove(filename)
+                    continue
+                self.log.debug("Releasing next chunck")
+
+            self.tar2_current_file = filename
+
+            input_file = open(filename, 'rb')
+
+            run_error = self.handle_streams(
+                input_file,
+                {'target': input_pipe},
+                {'vmproc': self.vmproc,
+                 'addproc': self.tar2_process,
+                 'data_import': self.import_process,
+                 'decryptor': self.decryptor_process,
+                },
+                progress_callback=self.progress_callback)
+            input_file.close()
+            if run_error:
+                if run_error == "target":
+                    self.collect_tar_output()
+                    details = "\n".join(self.tar2_stderr)
+                else:
+                    details = "%s failed" % run_error
+                if self.decryptor_process:
+                    self.decryptor_process.terminate()
+                    self.decryptor_process.wait()
+                    self.decryptor_process = None
+                self.log.error('Error while processing \'%s\': %s',
+                    self.tar2_current_file, details)
+                self.cleanup_tar2(wait=True, terminate=True)
+
+            # Delete the file as we don't need it anymore
+            self.log.debug('Removing file %s', filename)
+            os.remove(filename)
+
+        if self.tar2_process is not None:
+            input_pipe.close()
+            if filename == QUEUE_ERROR:
+                if self.decryptor_process:
+                    self.decryptor_process.terminate()
+                    self.decryptor_process.wait()
+                    self.decryptor_process = None
+            self.cleanup_tar2(terminate=(filename == QUEUE_ERROR))
+
+        self.log.debug('Finished extracting thread')
+
+
+def get_supported_hmac_algo(hmac_algorithm=None):
+    '''Generate a list of supported hmac algorithms
+
+    :param hmac_algorithm: default algorithm, if given, it is placed as a
+    first element
+    '''
+    # Start with provided default
+    if hmac_algorithm:
+        yield hmac_algorithm
+    if hmac_algorithm != 'scrypt':
+        yield 'scrypt'
+    proc = subprocess.Popen(['openssl', 'list-message-digest-algorithms'],
+                            stdout=subprocess.PIPE)
+    try:
+        for algo in proc.stdout.readlines():
+            algo = algo.decode('ascii')
+            if '=>' in algo:
+                continue
+            yield algo.strip()
+    finally:
+        proc.terminate()
+        proc.wait()
+        proc.stdout.close()
+
+class BackupApp(object):
+    '''Interface for backup collection'''
+    # pylint: disable=too-few-public-methods
+    def __init__(self, qubes_xml):
+        '''Initialize BackupApp object and load qubes.xml into it'''
+        self.store = qubes_xml
+        self.domains = {}
+        self.globals = {}
+        self.load()
+
+    def load(self):
+        '''Load qubes.xml'''
+        raise NotImplementedError
+
+class BackupVM(object):
+    '''Interface for a single VM in the backup'''
+    # pylint: disable=too-few-public-methods
+    def __init__(self):
+        '''Initialize empty BackupVM object'''
+        #: VM class
+        self.klass = 'AppVM'
+        #: VM name
+        self.name = None
+        #: VM template
+        self.template = None
+        #: VM label
+        self.label = None
+        #: VM properties
+        self.properties = {}
+        #: VM features (key/value), aka services in core2
+        self.features = {}
+        #: VM tags
+        self.tags = set()
+        #: VM devices - dict with key=devtype, value=dict of devices (
+        # key=ident, value=options)
+        self.devices = collections.defaultdict(dict)
+        #: VM path in the backup
+        self.backup_path = None
+        #: size of the VM
+        self.size = 0
+
+    @property
+    def included_in_backup(self):
+        '''Report whether a VM is included in the backup'''
+        return False
+
+class BackupRestoreOptions(object):
+    '''Options for restore operation'''
+    # pylint: disable=too-few-public-methods
+    def __init__(self):
+        #: use default NetVM if the one referenced in backup do not exists on
+        #  the host
+        self.use_default_netvm = True
+        #: set NetVM to "none" if the one referenced in backup do not exists
+        # on the host
+        self.use_none_netvm = False
+        #: set template to default if the one referenced in backup do not
+        # exists on the host
+        self.use_default_template = True
+        #: use default kernel if the one referenced in backup do not exists
+        # on the host
+        self.use_default_kernel = True
+        #: restore dom0 home
+        self.dom0_home = True
+        #: restore dom0 home even if username is different
+        self.ignore_username_mismatch = False
+        #: do not restore data, only verify backup integrity
+        self.verify_only = False
+        #: automatically rename VM during restore, when it would conflict
+        # with existing one
+        self.rename_conflicting = True
+        #: list of VM names to exclude
+        self.exclude = []
+        #: restore VMs into selected storage pool
+        self.override_pool = None
+
+class BackupRestore(object):
+    """Usage:
+
+    >>> restore_op = BackupRestore(...)
+    >>> # adjust restore_op.options here
+    >>> restore_info = restore_op.get_restore_info()
+    >>> # manipulate restore_info to select VMs to restore here
+    >>> restore_op.restore_do(restore_info)
+    """
+
+    class VMToRestore(object):
+        '''Information about a single VM to be restored'''
+        # pylint: disable=too-few-public-methods
+        #: VM excluded from restore by user
+        EXCLUDED = object()
+        #: VM with such name already exists on the host
+        ALREADY_EXISTS = object()
+        #: NetVM used by the VM does not exists on the host
+        MISSING_NETVM = object()
+        #: TemplateVM used by the VM does not exists on the host
+        MISSING_TEMPLATE = object()
+        #: Kernel used by the VM does not exists on the host
+        MISSING_KERNEL = object()
+
+        def __init__(self, vm):
+            assert isinstance(vm, BackupVM)
+            self.vm = vm
+            self.name = vm.name
+            self.subdir = vm.backup_path
+            self.size = vm.size
+            self.problems = set()
+            self.template = vm.template
+            if vm.properties.get('netvm', None):
+                self.netvm = vm.properties['netvm']
+            else:
+                self.netvm = None
+            self.orig_template = None
+            self.restored_vm = None
+
+        @property
+        def good_to_go(self):
+            '''Is the VM ready for restore?'''
+            return len(self.problems) == 0
+
+    class Dom0ToRestore(VMToRestore):
+        '''Information about dom0 home to restore'''
+        # pylint: disable=too-few-public-methods
+        #: backup was performed on system with different dom0 username
+        USERNAME_MISMATCH = object()
+
+        def __init__(self, vm, subdir=None):
+            super(BackupRestore.Dom0ToRestore, self).__init__(vm)
+            if subdir:
+                self.subdir = subdir
+            self.username = os.path.basename(subdir)
+
+    def __init__(self, app, backup_location, backup_vm, passphrase):
+        super(BackupRestore, self).__init__()
+
+        #: qubes.Qubes instance
+        self.app = app
+
+        #: options how the backup should be restored
+        self.options = BackupRestoreOptions()
+
+        #: VM from which backup should be retrieved
+        self.backup_vm = backup_vm
+        if backup_vm and backup_vm.qid == 0:
+            self.backup_vm = None
+
+        #: backup path, inside VM pointed by :py:attr:`backup_vm`
+        self.backup_location = backup_location
+
+        #: passphrase protecting backup integrity and optionally decryption
+        self.passphrase = passphrase
+
+        #: temporary directory used to extract the data before moving to the
+        # final location
+        self.tmpdir = tempfile.mkdtemp(prefix="restore", dir="/var/tmp")
+
+        #: list of processes (Popen objects) to kill on cancel
+        self.processes_to_kill_on_cancel = []
+
+        #: is the backup operation canceled
+        self.canceled = False
+
+        #: report restore progress, called with one argument - percents of
+        # data restored
+        # FIXME: convert to float [0,1]
+        self.progress_callback = None
+
+        self.log = logging.getLogger('qubesadmin.backup')
+
+        #: basic information about the backup
+        self.header_data = self._retrieve_backup_header()
+
+        #: VMs included in the backup
+        self.backup_app = self._process_qubes_xml()
+
+    def _start_retrieval_process(self, filelist, limit_count, limit_bytes):
+        """Retrieve backup stream and extract it to :py:attr:`tmpdir`
+
+        :param filelist: list of files to extract; listing directory name
+        will extract the whole directory; use empty list to extract the whole
+        archive
+        :param limit_count: maximum number of files to extract
+        :param limit_bytes: maximum size of extracted data
+        :return: a touple of (Popen object of started process, file-like
+        object for reading extracted files list, file-like object for reading
+        errors)
+        """
+
+        vmproc = None
+        if self.backup_vm is not None:
+            # If APPVM, STDOUT is a PIPE
+            vmproc = self.backup_vm.run_service('qubes.Restore')
+            vmproc.stdin.write(
+                (self.backup_location.replace("\r", "").replace("\n",
+                    "") + "\n").encode())
+            vmproc.stdin.flush()
+
+            # Send to tar2qfile the VMs that should be extracted
+            vmproc.stdin.write((" ".join(filelist) + "\n").encode())
+            vmproc.stdin.flush()
+            self.processes_to_kill_on_cancel.append(vmproc)
+
+            backup_stdin = vmproc.stdout
+            tar1_command = ['/usr/libexec/qubes/qfile-dom0-unpacker',
+                            str(os.getuid()), self.tmpdir, '-v']
+        else:
+            backup_stdin = open(self.backup_location, 'rb')
+
+            tar1_command = ['tar',
+                            '-ixv',
+                            '-C', self.tmpdir] + filelist
+
+        tar1_env = os.environ.copy()
+        tar1_env['UPDATES_MAX_BYTES'] = str(limit_bytes)
+        tar1_env['UPDATES_MAX_FILES'] = str(limit_count)
+        self.log.debug("Run command" + str(tar1_command))
+        command = subprocess.Popen(
+            tar1_command,
+            stdin=backup_stdin,
+            stdout=vmproc.stdin if vmproc else subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=tar1_env)
+        backup_stdin.close()
+        self.processes_to_kill_on_cancel.append(command)
+
+        # qfile-dom0-unpacker output filelist on stderr
+        # and have stdout connected to the VM), while tar output filelist
+        # on stdout
+        if self.backup_vm:
+            filelist_pipe = command.stderr
+            # let qfile-dom0-unpacker hold the only open FD to the write end of
+            # pipe, otherwise qrexec-client will not receive EOF when
+            # qfile-dom0-unpacker terminates
+            vmproc.stdin.close()
+        else:
+            filelist_pipe = command.stdout
+
+        if self.backup_vm:
+            error_pipe = vmproc.stderr
+        else:
+            error_pipe = command.stderr
+        return command, filelist_pipe, error_pipe
+
+    def _verify_hmac(self, filename, hmacfile, algorithm=None):
+        '''Verify hmac of a file using given algorithm.
+
+        If algorithm is not specified, use the one from backup header (
+        :py:attr:`header_data`).
+
+        Raise :py:exc:`QubesException` on failure, return :py:obj:`True` on
+        success.
+
+        'scrypt' algorithm is supported only for header file; hmac file is
+        encrypted (and integrity protected) version of plain header.
+
+        :param filename: path to file to be verified
+        :param hmacfile: path to hmac file for *filename*
+        :param algorithm: override algorithm
+        '''
+        def load_hmac(hmac_text):
+            '''Parse hmac output by openssl.
+
+            Return just hmac, without filename and other metadata.
+            '''
+            if any(ord(x) not in range(128) for x in hmac_text):
+                raise QubesException(
+                    "Invalid content of {}".format(hmacfile))
+            hmac_text = hmac_text.strip().split("=")
+            if len(hmac_text) > 1:
+                hmac_text = hmac_text[1].strip()
+            else:
+                raise QubesException(
+                    "ERROR: invalid hmac file content")
+
+            return hmac_text
+        if algorithm is None:
+            algorithm = self.header_data.hmac_algorithm
+        passphrase = self.passphrase.encode('utf-8')
+        self.log.debug("Verifying file %s", filename)
+
+        if os.stat(os.path.join(self.tmpdir, hmacfile)).st_size > \
+                HMAC_MAX_SIZE:
+            raise QubesException('HMAC file {} too large'.format(
+                hmacfile))
+
+        if hmacfile != filename + ".hmac":
+            raise QubesException(
+                "ERROR: expected hmac for {}, but got {}".
+                format(filename, hmacfile))
+
+        if algorithm == 'scrypt':
+            # in case of 'scrypt' _verify_hmac is only used for backup header
+            assert filename == HEADER_FILENAME
+            self._verify_and_decrypt(hmacfile, HEADER_FILENAME + '.dec')
+            f_name = os.path.join(self.tmpdir, filename)
+            with open(f_name, 'rb') as f_one:
+                with open(f_name + '.dec', 'rb') as f_two:
+                    if f_one.read() != f_two.read():
+                        raise QubesException(
+                            'Invalid hmac on {}'.format(filename))
+                    else:
+                        return True
+
+        with open(os.path.join(self.tmpdir, filename), 'rb') as f_input:
+            hmac_proc = subprocess.Popen(
+                ["openssl", "dgst", "-" + algorithm, "-hmac", passphrase],
+                stdin=f_input,
+                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            hmac_stdout, hmac_stderr = hmac_proc.communicate()
+
+        if hmac_stderr:
+            raise QubesException(
+                "ERROR: verify file {0}: {1}".format(filename, hmac_stderr))
+        else:
+            self.log.debug("Loading hmac for file %s", filename)
+            with open(os.path.join(self.tmpdir, hmacfile), 'r',
+                    encoding='ascii') as f_hmac:
+                hmac = load_hmac(f_hmac.read())
+
+            if hmac and load_hmac(hmac_stdout.decode('ascii')) == hmac:
+                os.unlink(os.path.join(self.tmpdir, hmacfile))
+                self.log.debug(
+                    "File verification OK -> Sending file %s", filename)
+                return True
+            else:
+                raise QubesException(
+                    "ERROR: invalid hmac for file {0}: {1}. "
+                    "Is the passphrase correct?".
+                    format(filename, load_hmac(hmac_stdout.decode('ascii'))))
+
+    def _verify_and_decrypt(self, filename, output=None):
+        '''Handle scrypt-wrapped file
+
+        Decrypt the file, and verify its integrity - both tasks handled by
+        'scrypt' tool. Filename (without extension) is also validated.
+
+        :param filename: Input file name (relative to :py:attr:`tmpdir`),
+        needs to have `.enc` or `.hmac` extension
+        :param output: Output file name (relative to :py:attr:`tmpdir`),
+        use :py:obj:`None` to use *filename* without extension
+        :return: *filename* without extension
+        '''
+        assert filename.endswith('.enc') or filename.endswith('.hmac')
+        fullname = os.path.join(self.tmpdir, filename)
+        (origname, _) = os.path.splitext(filename)
+        if output:
+            fulloutput = os.path.join(self.tmpdir, output)
+        else:
+            fulloutput = os.path.join(self.tmpdir, origname)
+        if origname == HEADER_FILENAME:
+            passphrase = u'{filename}!{passphrase}'.format(
+                filename=origname,
+                passphrase=self.passphrase)
+        else:
+            passphrase = u'{backup_id}!{filename}!{passphrase}'.format(
+                backup_id=self.header_data.backup_id,
+                filename=origname,
+                passphrase=self.passphrase)
+        try:
+            p = launch_scrypt('dec', fullname, fulloutput, passphrase)
+        except OSError as err:
+            raise QubesException('failed to decrypt {}: {!s}'.format(
+                fullname, err))
+        (_, stderr) = p.communicate()
+        if hasattr(p, 'pty'):
+            p.pty.close()
+        if p.returncode != 0:
+            os.unlink(fulloutput)
+            raise QubesException('failed to decrypt {}: {}'.format(
+                fullname, stderr))
+        # encrypted file is no longer needed
+        os.unlink(fullname)
+        return origname
+
+    def _retrieve_backup_header_files(self, files, allow_none=False):
+        '''Retrieve backup header.
+
+        Start retrieval process (possibly involving network access from
+        another VM). Returns a collection of retrieved file paths.
+        '''
+        (retrieve_proc, filelist_pipe, error_pipe) = \
+            self._start_retrieval_process(
+                files, len(files), 1024 * 1024)
+        filelist = filelist_pipe.read()
+        filelist_pipe.close()
+        retrieve_proc_returncode = retrieve_proc.wait()
+        if retrieve_proc in self.processes_to_kill_on_cancel:
+            self.processes_to_kill_on_cancel.remove(retrieve_proc)
+        extract_stderr = error_pipe.read(MAX_STDERR_BYTES)
+        error_pipe.close()
+
+        # wait for other processes (if any)
+        for proc in self.processes_to_kill_on_cancel:
+            if proc.wait() != 0:
+                raise QubesException(
+                    "Backup header retrieval failed (exit code {})".format(
+                        proc.wait())
+                )
+
+        if retrieve_proc_returncode != 0:
+            if not filelist and 'Not found in archive' in extract_stderr:
+                if allow_none:
+                    return None
+                else:
+                    raise QubesException(
+                        "unable to read the qubes backup file {0} ({1}): {2}".
+                        format(
+                            self.backup_location,
+                            retrieve_proc.wait(),
+                            extract_stderr
+                        ))
+        actual_files = filelist.decode('ascii').splitlines()
+        if sorted(actual_files) != sorted(files):
+            raise QubesException(
+                'unexpected files in archive: got {!r}, expected {!r}'.format(
+                    actual_files, files
+                ))
+        for fname in files:
+            if not os.path.exists(os.path.join(self.tmpdir, fname)):
+                if allow_none:
+                    return None
+                else:
+                    raise QubesException(
+                        'Unable to retrieve file {} from backup {}: {}'.format(
+                            fname, self.backup_location, extract_stderr
+                        )
+                    )
+        return files
+
+    def _retrieve_backup_header(self):
+        """Retrieve backup header and qubes.xml. Only backup header is
+        analyzed, qubes.xml is left as-is
+        (not even verified/decrypted/uncompressed)
+
+        :return header_data
+        :rtype :py:class:`BackupHeader`
+        """
+
+        if not self.backup_vm and os.path.exists(
+                os.path.join(self.backup_location, 'qubes.xml')):
+            # backup format version 1 doesn't have header
+            header_data = BackupHeader()
+            header_data.version = 1
+            return header_data
+
+        header_files = self._retrieve_backup_header_files(
+            ['backup-header', 'backup-header.hmac'], allow_none=True)
+
+        if not header_files:
+            # R2-Beta3 didn't have backup header, so if none is found,
+            # assume it's version=2 and use values present at that time
+            header_data = BackupHeader(
+                version=2,
+                # place explicitly this value, because it is what format_version
+                # 2 have
+                hmac_algorithm='SHA1',
+                crypto_algorithm='aes-256-cbc',
+                # TODO: set encrypted to something...
+            )
+        else:
+            filename = HEADER_FILENAME
+            hmacfile = HEADER_FILENAME + '.hmac'
+            self.log.debug("Got backup header and hmac: %s, %s",
+                filename, hmacfile)
+
+            file_ok = False
+            hmac_algorithm = DEFAULT_HMAC_ALGORITHM
+            for hmac_algo in get_supported_hmac_algo(hmac_algorithm):
+                try:
+                    if self._verify_hmac(filename, hmacfile, hmac_algo):
+                        file_ok = True
+                        break
+                except QubesException as err:
+                    self.log.debug(
+                        'Failed to verify %s using %s: %r',
+                            hmacfile, hmac_algo, err)
+                    # Ignore exception here, try the next algo
+            if not file_ok:
+                raise QubesException(
+                    "Corrupted backup header (hmac verification "
+                    "failed). Is the password correct?")
+            filename = os.path.join(self.tmpdir, filename)
+            with open(filename, 'rb') as f_header:
+                header_data = BackupHeader(f_header.read())
+            os.unlink(filename)
+
+        return header_data
+
+    def _start_inner_extraction_worker(self, queue, handlers):
+        """Start a worker process, extracting inner layer of bacup archive,
+        extract them to :py:attr:`tmpdir`.
+        End the data by pushing QUEUE_FINISHED or QUEUE_ERROR to the queue.
+
+        :param queue :py:class:`Queue` object to handle files from
+        """
+
+        # Setup worker to extract encrypted data chunks to the restore dirs
+        # Create the process here to pass it options extracted from
+        # backup header
+        extractor_params = {
+            'queue': queue,
+            'base_dir': self.tmpdir,
+            'passphrase': self.passphrase,
+            'encrypted': self.header_data.encrypted,
+            'compressed': self.header_data.compressed,
+            'crypto_algorithm': self.header_data.crypto_algorithm,
+            'verify_only': self.options.verify_only,
+            'progress_callback': self.progress_callback,
+            'handlers': handlers,
+        }
+        self.log.debug(
+            'Starting extraction worker in %s, file handlers map: %s',
+            self.tmpdir, repr(handlers))
+        format_version = self.header_data.version
+        if format_version in [3, 4]:
+            extractor_params['compression_filter'] = \
+                self.header_data.compression_filter
+            if format_version == 4:
+                # encryption already handled
+                extractor_params['encrypted'] = False
+            extract_proc = ExtractWorker3(**extractor_params)
+        else:
+            raise NotImplementedError(
+                "Backup format version %d not supported" % format_version)
+        extract_proc.start()
+        return extract_proc
+
+    @staticmethod
+    def _save_qubes_xml(path, stream):
+        '''Handler for qubes.xml.000 content - just save the data to a file'''
+        with open(path, 'wb') as f_qubesxml:
+            f_qubesxml.write(stream.read())
+
+    def _process_qubes_xml(self):
+        """Verify, unpack and load qubes.xml. Possibly convert its format if
+        necessary. It expect that :py:attr:`header_data` is already populated,
+        and :py:meth:`retrieve_backup_header` was called.
+        """
+        if self.header_data.version == 1:
+            raise NotImplementedError('Backup format version 1 not supported')
+        elif self.header_data.version in [2, 3]:
+            self._retrieve_backup_header_files(
+                ['qubes.xml.000', 'qubes.xml.000.hmac'])
+            self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac")
+        else:
+            self._retrieve_backup_header_files(['qubes.xml.000.enc'])
+            self._verify_and_decrypt('qubes.xml.000.enc')
+
+        queue = Queue()
+        queue.put("qubes.xml.000")
+        queue.put(QUEUE_FINISHED)
+
+        qubes_xml_path = os.path.join(self.tmpdir, 'qubes-restored.xml')
+        handlers = {
+            'qubes.xml': (
+                functools.partial(self._save_qubes_xml, qubes_xml_path),
+                None)
+            }
+        extract_proc = self._start_inner_extraction_worker(queue, handlers)
+        extract_proc.join()
+        if extract_proc.exitcode != 0:
+            raise QubesException(
+                "unable to extract the qubes backup. "
+                "Check extracting process errors.")
+
+        if self.header_data.version in [2, 3]:
+            from qubesadmin.backup.core2 import Core2Qubes
+            backup_app = Core2Qubes(qubes_xml_path)
+        elif self.header_data.version in [4]:
+            from qubesadmin.backup.core3 import Core3Qubes
+            backup_app = Core3Qubes(qubes_xml_path)
+        else:
+            raise QubesException(
+                'Unsupported qubes.xml format version: {}'.format(
+                    self.header_data.version))
+        # Not needed anymore - all the data stored in backup_app
+        os.unlink(qubes_xml_path)
+        return backup_app
+
+    def _restore_vm_data(self, vms_dirs, vms_size, handlers):
+        '''Restore data of VMs
+
+        :param vms_dirs: list of directories to extract (skip others)
+        :param vms_size: expected size (abort if source stream exceed this
+        value)
+        :param handlers: handlers for restored files - see
+        :py:class:`ExtractWorker3` for details
+        '''
+        # Currently each VM consists of at most 7 archives (count
+        # file_to_backup calls in backup_prepare()), but add some safety
+        # margin for further extensions. Each archive is divided into 100MB
+        # chunks. Additionally each file have own hmac file. So assume upper
+        # limit as 2*(10*COUNT_OF_VMS+TOTAL_SIZE/100MB)
+        limit_count = str(2 * (10 * len(vms_dirs) +
+                               int(vms_size / (100 * 1024 * 1024))))
+
+        self.log.debug("Working in temporary dir: %s", self.tmpdir)
+        self.log.info("Extracting data: %s to restore", size_to_human(vms_size))
+
+        # retrieve backup from the backup stream (either VM, or dom0 file)
+        (retrieve_proc, filelist_pipe, error_pipe) = \
+            self._start_retrieval_process(
+                vms_dirs, limit_count, vms_size)
+
+        to_extract = Queue()
+
+        # extract data retrieved by retrieve_proc
+        extract_proc = self._start_inner_extraction_worker(
+            to_extract, handlers)
+
+        try:
+            filename = None
+            hmacfile = None
+            nextfile = None
+            while True:
+                if self.canceled:
+                    break
+                if not extract_proc.is_alive():
+                    retrieve_proc.terminate()
+                    retrieve_proc.wait()
+                    if retrieve_proc in self.processes_to_kill_on_cancel:
+                        self.processes_to_kill_on_cancel.remove(retrieve_proc)
+                    # wait for other processes (if any)
+                    for proc in self.processes_to_kill_on_cancel:
+                        proc.wait()
+                    break
+                if nextfile is not None:
+                    filename = nextfile
+                else:
+                    filename = filelist_pipe.readline().decode('ascii').strip()
+
+                self.log.debug("Getting new file: %s", filename)
+
+                if not filename or filename == "EOF":
+                    break
+
+                # if reading archive directly with tar, wait for next filename -
+                # tar prints filename before processing it, so wait for
+                # the next one to be sure that whole file was extracted
+                if not self.backup_vm:
+                    nextfile = filelist_pipe.readline().decode('ascii').strip()
+
+                if self.header_data.version in [2, 3]:
+                    if not self.backup_vm:
+                        hmacfile = nextfile
+                        nextfile = filelist_pipe.readline().\
+                            decode('ascii').strip()
+                    else:
+                        hmacfile = filelist_pipe.readline().\
+                            decode('ascii').strip()
+
+                    if self.canceled:
+                        break
+
+                    self.log.debug("Getting hmac: %s", hmacfile)
+                    if not hmacfile or hmacfile == "EOF":
+                        # Premature end of archive, either of tar1_command or
+                        # vmproc exited with error
+                        break
+                else:  # self.header_data.version == 4
+                    if not filename.endswith('.enc'):
+                        raise qubesadmin.exc.QubesException(
+                            'Invalid file extension found in archive: {}'.
+                            format(filename))
+
+                if not any(filename.startswith(x) for x in vms_dirs):
+                    self.log.debug("Ignoring VM not selected for restore")
+                    os.unlink(os.path.join(self.tmpdir, filename))
+                    if hmacfile:
+                        os.unlink(os.path.join(self.tmpdir, hmacfile))
+                    continue
+
+                if self.header_data.version in [2, 3]:
+                    self._verify_hmac(filename, hmacfile)
+                else:
+                    # _verify_and_decrypt will write output to a file with
+                    # '.enc' extension cut off. This is safe because:
+                    # - `scrypt` tool will override output, so if the file was
+                    # already there (received from the VM), it will be removed
+                    # - incoming archive extraction will refuse to override
+                    # existing file, so if `scrypt` already created one,
+                    # it can not be manipulated by the VM
+                    # - when the file is retrieved from the VM, it appears at
+                    # the final form - if it's visible, VM have no longer
+                    # influence over its content
+                    #
+                    # This all means that if the file was correctly verified
+                    # + decrypted, we will surely access the right file
+                    filename = self._verify_and_decrypt(filename)
+                to_extract.put(os.path.join(self.tmpdir, filename))
+
+            if self.canceled:
+                raise BackupCanceledError("Restore canceled",
+                                          tmpdir=self.tmpdir)
+
+            if retrieve_proc.wait() != 0:
+                raise QubesException(
+                    "unable to read the qubes backup file {0}: {1}"
+                    .format(self.backup_location, error_pipe.read(
+                        MAX_STDERR_BYTES)))
+            # wait for other processes (if any)
+            for proc in self.processes_to_kill_on_cancel:
+                proc.wait()
+                if proc.returncode != 0:
+                    raise QubesException(
+                        "Backup completed, "
+                        "but VM sending it reported an error (exit code {})".
+                        format(proc.returncode))
+
+            if filename and filename != "EOF":
+                raise QubesException(
+                    "Premature end of archive, the last file was %s" % filename)
+        except:
+            to_extract.put(QUEUE_ERROR)
+            extract_proc.join()
+            raise
+        else:
+            to_extract.put(QUEUE_FINISHED)
+        finally:
+            error_pipe.close()
+            filelist_pipe.close()
+
+        self.log.debug("Waiting for the extraction process to finish...")
+        extract_proc.join()
+        self.log.debug("Extraction process finished with code: %s",
+            extract_proc.exitcode)
+        if extract_proc.exitcode != 0:
+            raise QubesException(
+                "unable to extract the qubes backup. "
+                "Check extracting process errors.")
+
+    def new_name_for_conflicting_vm(self, orig_name, restore_info):
+        '''Generate new name for conflicting VM
+
+        Add a number suffix, until the name is unique. If no unique name can
+        be found using this strategy, return :py:obj:`None`
+        '''
+        number = 1
+        if len(orig_name) > 29:
+            orig_name = orig_name[0:29]
+        new_name = orig_name
+        while (new_name in restore_info.keys() or
+               new_name in [x.name for x in restore_info.values()] or
+               new_name in self.app.domains):
+            new_name = str('{}{}'.format(orig_name, number))
+            number += 1
+            if number == 100:
+                # give up
+                return None
+        return new_name
+
+    def restore_info_verify(self, restore_info):
+        '''Verify restore info - validate VM dependencies, name conflicts
+        etc.
+        '''
+        for vm in restore_info.keys():
+            if vm in ['dom0']:
+                continue
+
+            vm_info = restore_info[vm]
+            assert isinstance(vm_info, self.VMToRestore)
+
+            vm_info.problems.clear()
+            if vm in self.options.exclude:
+                vm_info.problems.add(self.VMToRestore.EXCLUDED)
+
+            if not self.options.verify_only and \
+                    vm_info.name in self.app.domains:
+                if self.options.rename_conflicting:
+                    new_name = self.new_name_for_conflicting_vm(
+                        vm, restore_info
+                    )
+                    if new_name is not None:
+                        vm_info.name = new_name
+                    else:
+                        vm_info.problems.add(self.VMToRestore.ALREADY_EXISTS)
+                else:
+                    vm_info.problems.add(self.VMToRestore.ALREADY_EXISTS)
+
+            # check template
+            if vm_info.template:
+                template_name = vm_info.template
+                try:
+                    host_template = self.app.domains[template_name]
+                except KeyError:
+                    host_template = None
+                present_on_host = (host_template and
+                    isinstance(host_template, qubesadmin.vm.TemplateVM))
+                present_in_backup = (template_name in restore_info.keys() and
+                    restore_info[template_name].good_to_go and
+                    restore_info[template_name].vm.klass ==
+                    'TemplateVM')
+                if not present_on_host and not present_in_backup:
+                    if self.options.use_default_template and \
+                            self.app.default_template:
+                        if vm_info.orig_template is None:
+                            vm_info.orig_template = template_name
+                        vm_info.template = self.app.default_template.name
+                    else:
+                        vm_info.problems.add(
+                            self.VMToRestore.MISSING_TEMPLATE)
+
+            # check netvm
+            if vm_info.vm.properties.get('netvm', None) is not None:
+                netvm_name = vm_info.netvm
+
+                try:
+                    netvm_on_host = self.app.domains[netvm_name]
+                except KeyError:
+                    netvm_on_host = None
+
+                present_on_host = (netvm_on_host is not None
+                        and netvm_on_host.provides_network)
+                present_in_backup = (netvm_name in restore_info.keys() and
+                    restore_info[netvm_name].good_to_go and
+                    restore_info[netvm_name].vm.properties.get(
+                        'provides_network', False))
+                if not present_on_host and not present_in_backup:
+                    if self.options.use_default_netvm:
+                        del vm_info.vm.properties['netvm']
+                    elif self.options.use_none_netvm:
+                        vm_info.netvm = None
+                    else:
+                        vm_info.problems.add(self.VMToRestore.MISSING_NETVM)
+
+        return restore_info
+
+    def get_restore_info(self):
+        '''Get restore info
+
+        Return information about what is included in the backup.
+        That dictionary can be adjusted to select what VM should be restore.
+        '''
+        # Format versions:
+        #  1 - Qubes R1, Qubes R2 beta1, beta2
+        #  2 - Qubes R2 beta3+
+        #  3 - Qubes R2+
+        #  4 - Qubes R4+
+
+        vms_to_restore = {}
+
+        for vm in self.backup_app.domains.values():
+            if vm.klass == 'AdminVM':
+                # Handle dom0 as special case later
+                continue
+            if vm.included_in_backup:
+                self.log.debug("%s is included in backup", vm.name)
+
+                vms_to_restore[vm.name] = self.VMToRestore(vm)
+
+                if vm.template is not None:
+                    templatevm_name = vm.template
+                    vms_to_restore[vm.name].template = templatevm_name
+
+        vms_to_restore = self.restore_info_verify(vms_to_restore)
+
+        # ...and dom0 home
+        if self.options.dom0_home and \
+                self.backup_app.domains['dom0'].included_in_backup:
+            vm = self.backup_app.domains['dom0']
+            vms_to_restore['dom0'] = self.Dom0ToRestore(vm)
+            local_user = grp.getgrnam('qubes').gr_mem[0]
+
+            if vms_to_restore['dom0'].username != local_user:
+                if not self.options.ignore_username_mismatch:
+                    vms_to_restore['dom0'].problems.add(
+                        self.Dom0ToRestore.USERNAME_MISMATCH)
+
+        return vms_to_restore
+
+    @staticmethod
+    def get_restore_summary(restore_info):
+        '''Return a ASCII formatted table with restore info summary'''
+        fields = {
+            "name": {'func': lambda vm: vm.name},
+
+            "type": {'func': lambda vm: vm.klass},
+
+            "template": {'func': lambda vm:
+                'n/a' if vm.template is None else vm.template},
+
+            "netvm": {'func': lambda vm:
+                '(default)' if 'netvm' not in vm.properties else
+                '-' if vm.properties['netvm'] is None else
+                vm.properties['netvm']},
+
+            "label": {'func': lambda vm: vm.label},
+        }
+
+        fields_to_display = ['name', 'type', 'template',
+            'netvm', 'label']
+
+        # First calculate the maximum width of each field we want to display
+        total_width = 0
+        for field in fields_to_display:
+            fields[field]['max_width'] = len(field)
+            for vm_info in restore_info.values():
+                if vm_info.vm:
+                    # noinspection PyUnusedLocal
+                    field_len = len(str(fields[field]["func"](vm_info.vm)))
+                    if field_len > fields[field]['max_width']:
+                        fields[field]['max_width'] = field_len
+            total_width += fields[field]['max_width']
+
+        summary = ""
+        summary += "The following VMs are included in the backup:\n"
+        summary += "\n"
+
+        # Display the header
+        for field in fields_to_display:
+            # noinspection PyTypeChecker
+            fmt = "{{0:-^{0}}}-+".format(fields[field]["max_width"] + 1)
+            summary += fmt.format('-')
+        summary += "\n"
+        for field in fields_to_display:
+            # noinspection PyTypeChecker
+            fmt = "{{0:>{0}}} |".format(fields[field]["max_width"] + 1)
+            summary += fmt.format(field)
+        summary += "\n"
+        for field in fields_to_display:
+            # noinspection PyTypeChecker
+            fmt = "{{0:-^{0}}}-+".format(fields[field]["max_width"] + 1)
+            summary += fmt.format('-')
+        summary += "\n"
+
+        for vm_info in restore_info.values():
+            assert isinstance(vm_info, BackupRestore.VMToRestore)
+            # Skip non-VM here
+            if not vm_info.vm:
+                continue
+            # noinspection PyUnusedLocal
+            summary_line = ""
+            for field in fields_to_display:
+                # noinspection PyTypeChecker
+                fmt = "{{0:>{0}}} |".format(fields[field]["max_width"] + 1)
+                summary_line += fmt.format(fields[field]["func"](vm_info.vm))
+
+            if BackupRestore.VMToRestore.EXCLUDED in vm_info.problems:
+                summary_line += " <-- Excluded from restore"
+            elif BackupRestore.VMToRestore.ALREADY_EXISTS in vm_info.problems:
+                summary_line += \
+                    " <-- A VM with the same name already exists on the host!"
+            elif BackupRestore.VMToRestore.MISSING_TEMPLATE in \
+                    vm_info.problems:
+                summary_line += " <-- No matching template on the host " \
+                     "or in the backup found!"
+            elif BackupRestore.VMToRestore.MISSING_NETVM in \
+                    vm_info.problems:
+                summary_line += " <-- No matching netvm on the host " \
+                     "or in the backup found!"
+            else:
+                if vm_info.template != vm_info.vm.template:
+                    summary_line += " <-- Template change to '{}'".format(
+                        vm_info.template)
+                if vm_info.name != vm_info.vm.name:
+                    summary_line += " <-- Will be renamed to '{}'".format(
+                        vm_info.name)
+
+            summary += summary_line + "\n"
+
+        if 'dom0' in restore_info.keys():
+            summary_line = ""
+            for field in fields_to_display:
+                # noinspection PyTypeChecker
+                fmt = "{{0:>{0}}} |".format(fields[field]["max_width"] + 1)
+                if field == "name":
+                    summary_line += fmt.format("Dom0")
+                elif field == "type":
+                    summary_line += fmt.format("Home")
+                else:
+                    summary_line += fmt.format("")
+            if BackupRestore.Dom0ToRestore.USERNAME_MISMATCH in \
+                    restore_info['dom0'].problems:
+                summary_line += " <-- username in backup and dom0 mismatch"
+
+            summary += summary_line + "\n"
+
+        return summary
+
+    @staticmethod
+    def _templates_first(vms):
+        '''Sort templates befor other VM types (AppVM etc)'''
+        def key_function(instance):
+            '''Key function for :py:func:`sorted`'''
+            if isinstance(instance, BackupVM):
+                return instance.klass == 'TemplateVM'
+            elif hasattr(instance, 'vm'):
+                return key_function(instance.vm)
+            return 0
+        return sorted(vms,
+            key=key_function,
+            reverse=True)
+
+
+    def _handle_dom0(self, backup_path):
+        '''Extract dom0 home'''
+        local_user = grp.getgrnam('qubes').gr_mem[0]
+        home_dir = pwd.getpwnam(local_user).pw_dir
+        backup_dom0_home_dir = os.path.join(self.tmpdir, backup_path)
+        restore_home_backupdir = "home-pre-restore-{0}".format(
+            time.strftime("%Y-%m-%d-%H%M%S"))
+
+        self.log.info("Restoring home of user '%s'...", local_user)
+        self.log.info("Existing files/dirs backed up in '%s' dir",
+            restore_home_backupdir)
+        os.mkdir(home_dir + '/' + restore_home_backupdir)
+        for f_name in os.listdir(backup_dom0_home_dir):
+            home_file = home_dir + '/' + f_name
+            if os.path.exists(home_file):
+                os.rename(home_file,
+                    home_dir + '/' + restore_home_backupdir + '/' + f_name)
+            if self.header_data.version == 1:
+                subprocess.call(
+                    ["cp", "-nrp", "--reflink=auto",
+                        backup_dom0_home_dir + '/' + f_name, home_file])
+            elif self.header_data.version >= 2:
+                shutil.move(backup_dom0_home_dir + '/' + f_name, home_file)
+        retcode = subprocess.call(['sudo', 'chown', '-R',
+            local_user, home_dir])
+        if retcode != 0:
+            self.log.error("*** Error while setting home directory owner")
+
+    def restore_do(self, restore_info):
+        '''
+
+        High level workflow:
+        1. Create VMs object in host collection (qubes.xml)
+        2. Create them on disk (vm.create_on_disk)
+        3. Restore VM data, overriding/converting VM files
+        4. Apply possible fixups and save qubes.xml
+
+        :param restore_info:
+        :return:
+        '''
+
+        if self.header_data.version == 1:
+            raise NotImplementedError('Backup format version 1 not supported')
+
+        restore_info = self.restore_info_verify(restore_info)
+
+        self._restore_vms_metadata(restore_info)
+
+        # Perform VM restoration in backup order
+        vms_dirs = []
+        handlers = {}
+        vms_size = 0
+        for vm_info in self._templates_first(restore_info.values()):
+            vm = vm_info.restored_vm
+            if vm and vm_info.subdir:
+                vms_size += int(vm_info.size)
+                vms_dirs.append(vm_info.subdir)
+                for name, volume in vm.volumes.items():
+                    if not volume.save_on_stop:
+                        continue
+                    data_func = volume.import_data
+                    size_func = volume.resize
+                    handlers[os.path.join(vm_info.subdir, name + '.img')] = \
+                        (data_func, size_func)
+                # TODO applications whitelist
+                # TODO firewall
+
+        if 'dom0' in restore_info.keys() and \
+                restore_info['dom0'].good_to_go:
+            vms_dirs.append(os.path.dirname(restore_info['dom0'].subdir))
+            vms_size += restore_info['dom0'].size
+            handlers[restore_info['dom0'].subdir] = (self._handle_dom0, None)
+        try:
+            self._restore_vm_data(vms_dirs=vms_dirs, vms_size=vms_size,
+                handlers=handlers)
+        except QubesException:
+            if self.options.verify_only:
+                raise
+            else:
+                self.log.warning(
+                    "Some errors occurred during data extraction, "
+                    "continuing anyway to restore at least some "
+                    "VMs")
+
+        if self.options.verify_only:
+            shutil.rmtree(self.tmpdir)
+            return
+
+        if self.canceled:
+            raise BackupCanceledError("Restore canceled",
+                                      tmpdir=self.tmpdir)
+
+        shutil.rmtree(self.tmpdir)
+        self.log.info("-> Done. Please install updates for all the restored "
+                      "templates.")
+
+    def _restore_vms_metadata(self, restore_info):
+        '''Restore VM metadata
+
+        Create VMs, set their properties etc.
+        '''
+        vms = {}
+        for vm_info in restore_info.values():
+            assert isinstance(vm_info, self.VMToRestore)
+            if not vm_info.vm:
+                continue
+            if not vm_info.good_to_go:
+                continue
+            vm = vm_info.vm
+            vms[vm.name] = vm
+
+        # First load templates, then other VMs
+        for vm in self._templates_first(vms.values()):
+            if self.canceled:
+                return
+            self.log.info("-> Restoring %s...", vm.name)
+            kwargs = {}
+            if vm.template:
+                template = restore_info[vm.name].template
+                # handle potentially renamed template
+                if template in restore_info \
+                        and restore_info[template].good_to_go:
+                    template = restore_info[template].name
+                kwargs['template'] = template
+
+            new_vm = None
+            vm_name = restore_info[vm.name].name
+
+            try:
+                # first only create VMs, later setting may require other VMs
+                # be already created
+                new_vm = self.app.add_new_vm(
+                    vm.klass,
+                    name=vm_name,
+                    label=vm.label,
+                    pool=self.options.override_pool,
+                    **kwargs)
+            except Exception:  # pylint: disable=broad-except
+                self.log.exception('Error restoring VM %s, skipping', vm.name)
+                if new_vm:
+                    del self.app.domains[new_vm.name]
+                continue
+
+            restore_info[vm.name].restored_vm = new_vm
+
+        for vm in vms.values():
+            if self.canceled:
+                return
+
+            new_vm = restore_info[vm.name].restored_vm
+            if not new_vm:
+                # skipped/failed
+                continue
+
+            for prop, value in vm.properties.items():
+                # exclude VM references - handled manually according to
+                # restore options
+                if prop in ['template', 'netvm', 'default_dispvm']:
+                    continue
+                try:
+                    setattr(new_vm, prop, value)
+                except Exception:  # pylint: disable=broad-except
+                    self.log.exception('Error setting %s.%s to %s',
+                        vm.name, prop, value)
+
+            for feature, value in vm.features.items():
+                try:
+                    new_vm.features[feature] = value
+                except Exception:  # pylint: disable=broad-except
+                    self.log.exception('Error setting %s.features[%s] to %s',
+                        vm.name, feature, value)
+
+            for tag in vm.tags:
+                try:
+                    new_vm.tags.add(tag)
+                except Exception:  # pylint: disable=broad-except
+                    self.log.exception('Error adding tag %s to %s',
+                        tag, vm.name)
+
+            for bus in vm.devices:
+                for backend_domain, ident in vm.devices[bus]:
+                    options = vm.devices[bus][(backend_domain, ident)]
+                    assignment = DeviceAssignment(
+                        backend_domain=backend_domain,
+                        ident=ident,
+                        options=options,
+                        persistent=True)
+                    try:
+                        new_vm.devices[bus].attach(assignment)
+                    except Exception:  # pylint: disable=broad-except
+                        self.log.exception('Error attaching device %s:%s to %s',
+                            bus, ident, vm.name)
+
+        # Set VM dependencies - only non-default setting
+        for vm in vms.values():
+            vm_info = restore_info[vm.name]
+            vm_name = vm_info.name
+            try:
+                host_vm = self.app.domains[vm_name]
+            except KeyError:
+                # Failed/skipped VM
+                continue
+
+            if 'netvm' in vm.properties:
+                if vm_info.netvm in restore_info:
+                    value = restore_info[vm_info.netvm].name
+                else:
+                    value = vm_info.netvm
+
+                try:
+                    host_vm.netvm = value
+                except Exception:  # pylint: disable=broad-except
+                    self.log.exception('Error setting %s.%s to %s',
+                        vm.name, 'netvm', value)
+
+            if 'default_dispvm' in vm.properties:
+                if vm.properties['default_dispvm'] in restore_info:
+                    value = restore_info[vm.properties[
+                        'default_dispvm']].name
+                else:
+                    value = vm.properties['default_dispvm']
+
+                try:
+                    host_vm.default_dispvm = value
+                except Exception:  # pylint: disable=broad-except
+                    self.log.exception('Error setting %s.%s to %s',
+                        vm.name, 'default_dispvm', value)

+ 290 - 0
qubesadmin/backup/core2.py

@@ -0,0 +1,290 @@
+# -*- encoding: utf8 -*-
+#
+# The Qubes OS Project, http://www.qubes-os.org
+#
+# Copyright (C) 2017 Marek Marczykowski-Górecki
+#                               <marmarek@invisiblethingslab.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with this program; if not, see <http://www.gnu.org/licenses/>.
+
+'''Parser for core2 qubes.xml'''
+
+import ast
+import xml.parsers
+import logging
+import lxml.etree
+
+import qubesadmin.backup
+
+service_to_feature = {
+    'ntpd': 'service.ntpd',
+    'qubes-update-check': 'check-updates',
+    'meminfo-writer': 'services.meminfo-writer',
+}
+
+class Core2VM(qubesadmin.backup.BackupVM):
+    '''VM object'''
+    # pylint: disable=too-few-public-methods
+    def __init__(self):
+        super(Core2VM, self).__init__()
+        self.backup_content = False
+
+    @property
+    def included_in_backup(self):
+        return self.backup_content
+
+class Core2Qubes(qubesadmin.backup.BackupApp):
+    '''Parsed qubes.xml'''
+    def __init__(self, store=None):
+        if store is None:
+            raise ValueError("store path required")
+        self.qid_map = {}
+        self.log = logging.getLogger('qubesadmin.backup.core2')
+        super(Core2Qubes, self).__init__(store)
+
+    def load_globals(self, element):
+        '''Load global settings
+
+        :param element: XML element containing global settings (root node)
+        '''
+        default_netvm = element.get("default_netvm")
+        if default_netvm is not None:
+            self.globals['default_netvm'] = self.qid_map[int(default_netvm)] \
+                if default_netvm != "None" else None
+
+        # default_fw_netvm = element.get("default_fw_netvm")
+        # if default_fw_netvm is not None:
+        #     self.globals['default_fw_netvm'] = \
+        #         self.qid_map[int(default_fw_netvm)] \
+        #         if default_fw_netvm != "None" else None
+
+        updatevm = element.get("updatevm")
+        if updatevm is not None:
+            self.globals['updatevm'] = self.qid_map[int(updatevm)] \
+                if updatevm != "None" else None
+
+        clockvm = element.get("clockvm")
+        if clockvm is not None:
+            self.globals['clockvm'] = self.qid_map[int(clockvm)] \
+                if clockvm != "None" else None
+
+        default_template = element.get("default_template")
+        self.globals['default_template'] = self.qid_map[int(default_template)] \
+            if default_template.lower() != "none" else None
+
+
+    def set_netvm_dependency(self, element):
+        '''Set dependencies between VMs'''
+        kwargs = {}
+        attr_list = ("name", "uses_default_netvm", "netvm_qid")
+
+        for attribute in attr_list:
+            kwargs[attribute] = element.get(attribute)
+
+        vm = self.domains[kwargs["name"]]
+
+        # netvm property
+        if element.get("uses_default_netvm") is None:
+            uses_default_netvm = True
+        else:
+            uses_default_netvm = (
+                True if element.get("uses_default_netvm") == "True" else False)
+        if not uses_default_netvm:
+            netvm_qid = element.get("netvm_qid")
+            if netvm_qid is None or netvm_qid == "none":
+                vm.properties['netvm'] = None
+            else:
+                vm.properties['netvm'] = self.qid_map[int(netvm_qid)]
+
+        # And DispVM netvm, translated to default_dispvm
+        if element.get("uses_default_dispvm_netvm") is None:
+            uses_default_dispvm_netvm = True
+        else:
+            uses_default_dispvm_netvm = (
+                True if element.get("uses_default_dispvm_netvm") == "True"
+                else False)
+        if not uses_default_dispvm_netvm:
+            dispvm_netvm_qid = element.get("dispvm_netvm_qid")
+            if dispvm_netvm_qid is None or dispvm_netvm_qid == "none":
+                dispvm_netvm = None
+            else:
+                dispvm_netvm = self.qid_map[int(dispvm_netvm_qid)]
+        else:
+            dispvm_netvm = vm.properties.get('netvm', self.globals[
+                'default_netvm'])
+
+        if dispvm_netvm != self.globals['default_netvm']:
+            if dispvm_netvm:
+                dispvm_tpl_name = 'disp-{}'.format(dispvm_netvm)
+            else:
+                dispvm_tpl_name = 'disp-no-netvm'
+
+            vm.properties['default_dispvm'] = dispvm_tpl_name
+
+            if dispvm_tpl_name not in self.domains:
+                vm = Core2VM()
+                vm.name = dispvm_tpl_name
+                vm.label = 'red'
+                vm.properties['netvm'] = dispvm_netvm
+                vm.properties['dispvm_allowed'] = True
+                vm.backup_content = True
+                vm.backup_path = None
+                self.domains[vm.name] = vm
+                # TODO: add support for #2075
+            # TODO: set qrexec policy based on dispvm_netvm value
+
+    def import_core2_vm(self, element):
+        '''Parse a single VM from given XML node
+
+        This method load only VM properties not depending on other VMs
+        (other than template). VM connections are set later.
+        :param element: XML node
+        '''
+        vm_class_name = element.tag
+        vm = Core2VM()
+        vm.name = element.get('name')
+        vm.label = element.get('label', 'red')
+        self.domains[vm.name] = vm
+        kwargs = {}
+        if vm_class_name in ["QubesTemplateVm", "QubesTemplateHVm"]:
+            vm.klass = "TemplateVM"
+        elif element.get('template_qid').lower() == "none":
+            kwargs['dir_path'] = element.get('dir_path')
+            vm.klass = "StandaloneVM"
+        else:
+            kwargs['dir_path'] = element.get('dir_path')
+            vm.template = \
+                self.qid_map[int(element.get('template_qid'))]
+            vm.klass = "AppVM"
+        # simple attributes
+        for attr, default in {
+            #'installed_by_rpm': 'False',
+            'include_in_backups': 'True',
+            'qrexec_timeout': '60',
+            'vcpus': '2',
+            'memory': '400',
+            'maxmem': '4000',
+            'default_user': 'user',
+            'debug': 'False',
+            'mac': None,
+            'autostart': 'False'}.items():
+            value = element.get(attr)
+            if value and value != default:
+                vm.properties[attr] = value
+        # attributes with default value
+        for attr in ["kernel", "kernelopts"]:
+            value = element.get(attr)
+            if value and value.lower() == "none":
+                value = None
+            value_is_default = element.get(
+                "uses_default_{}".format(attr))
+            if value_is_default and value_is_default.lower() != \
+                    "true":
+                vm.properties[attr] = value
+        vm.properties['hvm'] = "HVm" in vm_class_name
+        if vm_class_name in ('QubesNetVm', 'QubesProxyVm'):
+            vm.properties['provides_network'] = True
+        if vm_class_name == 'QubesNetVm':
+            vm.properties['netvm'] = None
+        if element.get('internal', False) == 'True':
+            vm.features['internal'] = True
+
+        services = element.get('services')
+        if services:
+            services = ast.literal_eval(services)
+        else:
+            services = {}
+        for service, value in services.items():
+            feature = service
+            for repl_service, repl_feature in \
+                    service_to_feature.items():
+                if repl_service == service:
+                    feature = repl_feature
+            vm.features[feature] = value
+
+        vm.backup_content = element.get('backup_content', False) == 'True'
+        vm.backup_path = element.get('backup_path', None)
+        vm.size = element.get('backup_size', 0)
+
+        pci_strictreset = element.get('pci_strictreset', True)
+        pcidevs = element.get('pcidevs')
+        if pcidevs:
+            pcidevs = ast.literal_eval(pcidevs)
+        for pcidev in pcidevs:
+            if not pci_strictreset:
+                vm.devices['pci'][('dom0', pcidev.replace(':', '_'))] = {
+                    'no-strict-reset': True}
+            else:
+                vm.devices['pci'][('dom0', pcidev.replace(':', '_'))] = {}
+
+    def load(self):
+        with open(self.store) as fh:
+            try:
+                # pylint: disable=no-member
+                tree = lxml.etree.parse(fh)
+            except (EnvironmentError,  # pylint: disable=broad-except
+                    xml.parsers.expat.ExpatError) as err:
+                self.log.error(err)
+                return False
+
+        self.globals['default_kernel'] = tree.getroot().get("default_kernel")
+
+        vm_classes = ["AdminVM", "TemplateVm", "TemplateHVm",
+            "AppVm", "HVm", "NetVm", "ProxyVm"]
+
+        # First build qid->name map
+        for vm_class_name in vm_classes:
+            vms_of_class = tree.findall("Qubes" + vm_class_name)
+            for element in vms_of_class:
+                qid = element.get('qid', None)
+                name = element.get('name', None)
+                if qid and name:
+                    self.qid_map[int(qid)] = name
+
+        # Qubes R2 din't have dom0 in qubes.xml
+        if 0 not in self.qid_map:
+            vm = Core2VM()
+            vm.name = 'dom0'
+            vm.klass = 'AdminVM'
+            vm.label = 'black'
+            self.domains['dom0'] = vm
+            self.qid_map[0] = 'dom0'
+
+        # Then load all VMs - since we have qid_map, no need to preserve
+        # specific load older.
+        for vm_class_name in vm_classes:
+            vms_of_class = tree.findall("Qubes" + vm_class_name)
+            for element in vms_of_class:
+                self.import_core2_vm(element)
+
+        # ... and load other VMs
+        for vm_class_name in ["AppVm", "HVm", "NetVm", "ProxyVm"]:
+            vms_of_class = tree.findall("Qubes" + vm_class_name)
+            # first non-template based, then template based
+            sorted_vms_of_class = sorted(vms_of_class,
+                key=lambda x: str(x.get('template_qid')).lower() != "none")
+            for element in sorted_vms_of_class:
+                self.import_core2_vm(element)
+
+        # and load other defaults (default netvm, updatevm etc)
+        self.load_globals(tree.getroot())
+
+        # After importing all VMs, set netvm references, in the same order
+        for vm_class_name in vm_classes:
+            for element in tree.findall("Qubes" + vm_class_name):
+                try:
+                    self.set_netvm_dependency(element)
+                except (ValueError, LookupError) as err:
+                    self.log.error("VM %s: failed to set netvm dependency: %s",
+                        element.get('name'), err)

+ 145 - 0
qubesadmin/backup/core3.py

@@ -0,0 +1,145 @@
+# -*- encoding: utf8 -*-
+#
+# The Qubes OS Project, http://www.qubes-os.org
+#
+# Copyright (C) 2017 Marek Marczykowski-Górecki
+#                               <marmarek@invisiblethingslab.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with this program; if not, see <http://www.gnu.org/licenses/>.
+
+'''Parser for core2 qubes.xml'''
+
+import xml.parsers
+import logging
+import lxml.etree
+
+import qubesadmin.backup
+
+class Core3VM(qubesadmin.backup.BackupVM):
+    '''VM object'''
+    # pylint: disable=too-few-public-methods
+    @property
+    def included_in_backup(self):
+        return self.backup_path is not None
+
+class Core3Qubes(qubesadmin.backup.BackupApp):
+    '''Parsed qubes.xml'''
+    def __init__(self, store=None):
+        if store is None:
+            raise ValueError("store path required")
+        self.log = logging.getLogger('qubesadmin.backup.core3')
+        self.labels = {}
+        super(Core3Qubes, self).__init__(store)
+
+    @staticmethod
+    def get_property(xml_obj, prop):
+        '''Get property of given object (XML node)
+
+        Object can be any PropertyHolder serialized to XML - in practice
+        :py:class:`BaseVM` or :py:class:`Qubes`.
+        '''
+        xml_prop = xml_obj.findall('./property[@name=\'{}\']'.format(prop))
+        if not xml_prop:
+            raise KeyError(prop)
+        return xml_prop[0].text
+
+    def load_labels(self, labels_element):
+        '''Load labels table'''
+        for node in labels_element.findall('label'):
+            ident = node.get('id')
+            assert ident is not None
+            self.labels[ident] = node.text
+
+
+    def load_globals(self, globals_element):
+        '''Load global settings
+
+        :param globals_element: XML element containing global settings
+        '''
+        for node in globals_element.findall('property'):
+            name = node.get('name')
+            assert name is not None
+            self.globals[name] = node.text
+
+    def import_core3_vm(self, element):
+        '''Parse a single VM from given XML node
+
+        This method load only VM properties not depending on other VMs
+        (other than template). VM connections are set later.
+        :param element: XML node
+        '''
+        vm = Core3VM()
+        vm.klass = element.get('class')
+
+        for node in element.findall('./properties/property'):
+            name = node.get('name')
+            assert name is not None
+            vm.properties[name] = node.text
+
+        for node in element.findall('./features/feature'):
+            name = node.get('name')
+            assert name is not None
+            vm.features[name] = False if node.text is None else node.text
+
+        for node in element.findall('./tags/tag'):
+            name = node.get('name')
+            assert name is not None
+            vm.tags.add(name)
+
+        for bus_node in element.findall('./devices'):
+            bus_name = bus_node.get('class')
+            assert bus_name is not None
+            for node in bus_node.findall('./device'):
+                backend_domain = node.get('backend-domain')
+                ident = node.get('id')
+                options = {}
+                for opt_node in node.findall('./option'):
+                    opt_name = opt_node.get('name')
+                    options[opt_name] = opt_node.text
+                vm.devices[bus_name][(backend_domain, ident)] = options
+
+        # extract base properties
+        if vm.klass == 'AdminVM':
+            vm.name = 'dom0'
+        else:
+            vm.name = vm.properties.pop('name')
+        vm.label = self.labels[vm.properties.pop('label')]
+        vm.template = vm.properties.pop('template', None)
+        # skip UUID and qid, will be generated during restore
+        vm.properties.pop('uuid', None)
+        vm.properties.pop('qid', None)
+
+        if vm.features.pop('backup-content', False):
+            vm.backup_path = vm.features.pop('backup-path', None)
+            vm.size = vm.features.pop('backup-size', 0)
+
+        self.domains[vm.name] = vm
+
+    def load(self):
+        with open(self.store) as fh:
+            try:
+                # pylint: disable=no-member
+                tree = lxml.etree.parse(fh)
+            except (EnvironmentError,  # pylint: disable=broad-except
+                    xml.parsers.expat.ExpatError) as err:
+                self.log.error(err)
+                return False
+
+        self.load_labels(tree.find('./labels'))
+
+        for element in tree.findall('./domains/domain'):
+            self.import_core3_vm(element)
+
+        # and load other defaults (default netvm, updatevm etc)
+        self.load_globals(tree.find('./properties'))