core-admin/qubes/backup.py

#
# The Qubes OS Project, http://www.qubes-os.org
#
# Copyright (C) 2013-2015  Marek Marczykowski-Górecki
#                                   <marmarek@invisiblethingslab.com>
# Copyright (C) 2013  Olivier Médoc <o_medoc@yahoo.fr>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
#
#
from __future__ import unicode_literals
import itertools
import logging
import functools
import termios

from qubes.utils import size_to_human
import sys
import stat
import os
import fcntl
import subprocess
import re
import shutil
import tempfile
import time
import grp
import pwd
import errno
import datetime
from multiprocessing import Queue, Process
import qubes
import qubes.core2migration
import qubes.storage
import qubes.storage.file
import qubes.vm.templatevm

QUEUE_ERROR = "ERROR"

QUEUE_FINISHED = "FINISHED"

HEADER_FILENAME = 'backup-header'
DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc'
# 'scrypt' is not exactly HMAC algorithm, but a tool we use to
# integrity-protect the data
DEFAULT_HMAC_ALGORITHM = 'scrypt'
DEFAULT_COMPRESSION_FILTER = 'gzip'
CURRENT_BACKUP_FORMAT_VERSION = '4'
# Maximum size of error message get from process stderr (including VM process)
MAX_STDERR_BYTES = 1024
# header + qubes.xml max size
HEADER_QUBES_XML_MAX_SIZE = 1024 * 1024
# hmac file max size - regardless of backup format version!
HMAC_MAX_SIZE = 4096

BLKSIZE = 512

_re_alphanum = re.compile(r'^[A-Za-z0-9-]*$')


class BackupCanceledError(qubes.exc.QubesException):
    def __init__(self, msg, tmpdir=None):
        super(BackupCanceledError, self).__init__(msg)
        self.tmpdir = tmpdir


class BackupHeader(object):
    '''Structure describing backup-header file included as the first file in
    backup archive
    '''
    header_keys = {
        'version': 'version',
        'encrypted': 'encrypted',
        'compressed': 'compressed',
        'compression-filter': 'compression_filter',
        'crypto-algorithm': 'crypto_algorithm',
        'hmac-algorithm': 'hmac_algorithm',
        'backup-id': 'backup_id'
    }
    bool_options = ['encrypted', 'compressed']
    int_options = ['version']

    def __init__(self,
            header_data=None,
            version=None,
            encrypted=None,
            compressed=None,
            compression_filter=None,
            hmac_algorithm=None,
            crypto_algorithm=None,
            backup_id=None):
        # repeat the list to help code completion...
        self.version = version
        self.encrypted = encrypted
        self.compressed = compressed
        # Options introduced in backup format 3+, which always have a header,
        # so no need for fallback in function parameter
        self.compression_filter = compression_filter
        self.hmac_algorithm = hmac_algorithm
        self.crypto_algorithm = crypto_algorithm
        self.backup_id = backup_id

        if header_data is not None:
            self.load(header_data)

    def load(self, untrusted_header_text):
        """Parse backup header file.

        :param untrusted_header_text: header content
        :type untrusted_header_text: basestring

        .. warning::
            This function may be exposed to not yet verified header,
            so is security critical.
        """
        try:
            untrusted_header_text = untrusted_header_text.decode('ascii')
        except UnicodeDecodeError:
            raise qubes.exc.QubesException(
                "Non-ASCII characters in backup header")
        for untrusted_line in untrusted_header_text.splitlines():
            if untrusted_line.count('=') != 1:
                raise qubes.exc.QubesException("Invalid backup header")
            key, value = untrusted_line.strip().split('=', 1)
            if not _re_alphanum.match(key):
                raise qubes.exc.QubesException("Invalid backup header (key)")
            if key not in self.header_keys.keys():
                # Ignoring unknown option
                continue
            if not _re_alphanum.match(value):
                raise qubes.exc.QubesException("Invalid backup header (value)")
            if getattr(self, self.header_keys[key]) is not None:
                raise qubes.exc.QubesException(
                    "Duplicated header line: {}".format(key))
            if key in self.bool_options:
                value = value.lower() in ["1", "true", "yes"]
            elif key in self.int_options:
                value = int(value)
            setattr(self, self.header_keys[key], value)

        self.validate()

    def validate(self):
        if self.version == 1:
            # header not really present
            pass
        elif self.version in [2, 3, 4]:
            expected_attrs = ['version', 'encrypted', 'compressed',
                'hmac_algorithm']
            if self.encrypted:
                expected_attrs += ['crypto_algorithm']
            if self.version >= 3 and self.compressed:
                expected_attrs += ['compression_filter']
            if self.version >= 4:
                expected_attrs += ['backup_id']
            for key in expected_attrs:
                if getattr(self, key) is None:
                    raise qubes.exc.QubesException(
                        "Backup header lack '{}' info".format(key))
        else:
            raise qubes.exc.QubesException(
                "Unsupported backup version {}".format(self.version))

    def save(self, filename):
        with open(filename, "w") as f:
            # make sure 'version' is the first key
            f.write('version={}\n'.format(self.version))
            for key, attr in self.header_keys.items():
                if key == 'version':
                    continue
                if getattr(self, attr) is None:
                    continue
                f.write("{!s}={!s}\n".format(key, getattr(self, attr)))


class SendWorker(Process):
    def __init__(self, queue, base_dir, backup_stdout):
        super(SendWorker, self).__init__()
        self.queue = queue
        self.base_dir = base_dir
        self.backup_stdout = backup_stdout
        self.log = logging.getLogger('qubes.backup')

    def run(self):
        self.log.debug("Started sending thread")

        self.log.debug("Moving to temporary dir".format(self.base_dir))
        os.chdir(self.base_dir)

        for filename in iter(self.queue.get, None):
            if filename in (QUEUE_FINISHED, QUEUE_ERROR):
                break

            self.log.debug("Sending file {}".format(filename))
            # This tar used for sending data out need to be as simple, as
            # simple, as featureless as possible. It will not be
            # verified before untaring.
            tar_final_cmd = ["tar", "-cO", "--posix",
                             "-C", self.base_dir, filename]
            final_proc = subprocess.Popen(tar_final_cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=self.backup_stdout)
            if final_proc.wait() >= 2:
                if self.queue.full():
                    # if queue is already full, remove some entry to wake up
                    # main thread, so it will be able to notice error
                    self.queue.get()
                # handle only exit code 2 (tar fatal error) or
                # greater (call failed?)
                raise qubes.exc.QubesException(
                    "ERROR: Failed to write the backup, out of disk space? "
                    "Check console output or ~/.xsession-errors for details.")

            # Delete the file as we don't need it anymore
            self.log.debug("Removing file {}".format(filename))
            os.remove(filename)

        self.log.debug("Finished sending thread")


def launch_proc_with_pty(args, stdin=None, stdout=None, stderr=None, echo=True):
    """Similar to pty.fork, but handle stdin/stdout according to parameters
    instead of connecting to the pty

    :return tuple (subprocess.Popen, pty_master)
    """

    def set_ctty(ctty_fd, master_fd):
        os.setsid()
        os.close(master_fd)
        fcntl.ioctl(ctty_fd, termios.TIOCSCTTY, 0)
        if not echo:
            termios_p = termios.tcgetattr(ctty_fd)
            # termios_p.c_lflags
            termios_p[3] &= ~termios.ECHO
            termios.tcsetattr(ctty_fd, termios.TCSANOW, termios_p)
    (pty_master, pty_slave) = os.openpty()
    p = subprocess.Popen(args, stdin=stdin, stdout=stdout, stderr=stderr,
        preexec_fn=lambda: set_ctty(pty_slave, pty_master))
    os.close(pty_slave)
    return p, os.fdopen(pty_master, 'wb+', buffering=0)


def launch_scrypt(action, input_name, output_name, passphrase):
    '''
    Launch 'scrypt' process, pass passphrase to it and return
    subprocess.Popen object.

    :param action: 'enc' or 'dec'
    :param input_name: input path or '-' for stdin
    :param output_name: output path or '-' for stdout
    :param passphrase: passphrase
    :return: subprocess.Popen object
    '''
    command_line = ['scrypt', action, input_name, output_name]
    (p, pty) = launch_proc_with_pty(command_line,
        stdin=subprocess.PIPE if input_name == '-' else None,
        stdout=subprocess.PIPE if output_name == '-' else None,
        stderr=subprocess.PIPE,
        echo=False)
    if action == 'enc':
        prompts = (b'Please enter passphrase: ', b'Please confirm passphrase: ')
    else:
        prompts = (b'Please enter passphrase: ',)
    for prompt in prompts:
        actual_prompt = p.stderr.read(len(prompt))
        if actual_prompt != prompt:
            raise qubes.exc.QubesException(
                'Unexpected prompt from scrypt: {}'.format(actual_prompt))
        pty.write(passphrase.encode('utf-8') + b'\n')
        pty.flush()
    # save it here, so garbage collector would not close it (which would kill
    #  the child)
    p.pty = pty
    return p


class Backup(object):
    '''Backup operation manager. Usage:

    >>> app = qubes.Qubes()
    >>> # optional - you can use 'None' to use default list (based on
    >>> #  vm.include_in_backups property)
    >>> vms = [app.domains[name] for name in ['my-vm1', 'my-vm2', 'my-vm3']]
    >>> exclude_vms = []
    >>> options = {
    >>>     'encrypted': True,
    >>>     'compressed': True,
    >>>     'passphrase': 'This is very weak backup passphrase',
    >>>     'target_vm': app.domains['sys-usb'],
    >>>     'target_dir': '/media/disk',
    >>> }
    >>> backup_op = Backup(app, vms, exclude_vms, **options)
    >>> print(backup_op.get_backup_summary())
    >>> backup_op.backup_do()

    See attributes of this object for all available options.

    '''
    class FileToBackup(object):
        def __init__(self, file_path, subdir=None, name=None):
            sz = qubes.storage.file.get_disk_usage(file_path)

            if subdir is None:
                abs_file_path = os.path.abspath(file_path)
                abs_base_dir = os.path.abspath(
                    qubes.config.system_path["qubes_base_dir"]) + '/'
                abs_file_dir = os.path.dirname(abs_file_path) + '/'
                (nothing, directory, subdir) = abs_file_dir.partition(abs_base_dir)
                assert nothing == ""
                assert directory == abs_base_dir
            else:
                if len(subdir) > 0 and not subdir.endswith('/'):
                    subdir += '/'

            #: real path to the file
            self.path = file_path
            #: size of the file
            self.size = sz
            #: directory in backup archive where file should be placed
            self.subdir = subdir
            #: use this name in the archive (aka rename)
            self.name = os.path.basename(file_path)
            if name is not None:
                self.name = name

    class VMToBackup(object):
        def __init__(self, vm, files, subdir):
            self.vm = vm
            self.files = files
            self.subdir = subdir

        @property
        def size(self):
            return functools.reduce(lambda x, y: x + y.size, self.files, 0)

    def __init__(self, app, vms_list=None, exclude_list=None, **kwargs):
        """
        If vms = None, include all (sensible) VMs;
        exclude_list is always applied
        """
        super(Backup, self).__init__()

        #: progress of the backup - bytes handled of the current VM
        self.chunk_size = 100 * 1024 * 1024
        self._current_vm_bytes = 0
        #: progress of the backup - bytes handled of finished VMs
        self._done_vms_bytes = 0
        #: total backup size (set by :py:meth:`get_files_to_backup`)
        self.total_backup_bytes = 0
        #: application object
        self.app = app
        #: directory for temporary files - set after creating the directory
        self.tmpdir = None

        # Backup settings - defaults
        #: should the backup be encrypted?
        self.encrypted = True
        #: should the backup be compressed?
        self.compressed = True
        #: what passphrase should be used to intergrity protect (and encrypt)
        #: the backup; required
        self.passphrase = None
        #: custom hmac algorithm
        self.hmac_algorithm = DEFAULT_HMAC_ALGORITHM
        #: custom encryption algorithm
        self.crypto_algorithm = DEFAULT_CRYPTO_ALGORITHM
        #: custom compression filter; a program which process stdin to stdout
        self.compression_filter = DEFAULT_COMPRESSION_FILTER
        #: VM to which backup should be sent (if any)
        self.target_vm = None
        #: directory to save backup in (either in dom0 or target VM,
        #: depending on :py:attr:`target_vm`
        self.target_dir = None
        #: callback for progress reporting. Will be called with one argument
        #: - progress in percents
        self.progress_callback = None
        #: backup ID, needs to be unique (for a given user),
        #: not necessary unpredictable; automatically generated
        self.backup_id = datetime.datetime.now().strftime(
            '%Y%m%dT%H%M%S-' + str(os.getpid()))

        for key, value in kwargs.items():
            if hasattr(self, key):
                setattr(self, key, value)
            else:
                raise AttributeError(key)

        #: whether backup was canceled
        self.canceled = False
        #: list of PIDs to kill on backup cancel
        self.processes_to_kill_on_cancel = []

        self.log = logging.getLogger('qubes.backup')

        if not self.encrypted:
            self.log.warning('\'encrypted\' option is ignored, backup is '
                             'always encrypted')

        if exclude_list is None:
            exclude_list = []

        if vms_list is None:
            vms_list = [vm for vm in app.domains if vm.include_in_backups]

        # Apply exclude list
        self.vms_for_backup = [vm for vm in vms_list
            if vm.name not in exclude_list]

        self._files_to_backup = self.get_files_to_backup()

    def __del__(self):
        if self.tmpdir and os.path.exists(self.tmpdir):
            shutil.rmtree(self.tmpdir)

    def cancel(self):
        """Cancel running backup operation. Can be called from another thread.
        """
        self.canceled = True
        for proc in self.processes_to_kill_on_cancel:
            try:
                proc.terminate()
            except OSError:
                pass


    def get_files_to_backup(self):
        files_to_backup = {}
        for vm in self.vms_for_backup:
            if vm.qid == 0:
                # handle dom0 later
                continue

            if self.encrypted:
                subdir = 'vm%d/' % vm.qid
            else:
                subdir = None

            vm_files = []
            if vm.volumes['private'] is not None:
                path_to_private_img = vm.storage.export('private')
                vm_files.append(self.FileToBackup(path_to_private_img, subdir,
                        'private.img'))

            vm_files.append(self.FileToBackup(vm.icon_path, subdir))
            vm_files.extend(self.FileToBackup(i, subdir)
                for i in vm.fire_event('backup-get-files'))

            # TODO: drop after merging firewall.xml into qubes.xml
            firewall_conf = os.path.join(vm.dir_path, vm.firewall_conf)
            if os.path.exists(firewall_conf):
                vm_files.append(self.FileToBackup(firewall_conf, subdir))

            if vm.updateable:
                path_to_root_img = vm.storage.export('root')
                vm_files.append(self.FileToBackup(path_to_root_img, subdir,
                    'root.img'))
            files_to_backup[vm.qid] = self.VMToBackup(vm, vm_files, subdir)

        # Dom0 user home
        if 0 in [vm.qid for vm in self.vms_for_backup]:
            local_user = grp.getgrnam('qubes').gr_mem[0]
            home_dir = pwd.getpwnam(local_user).pw_dir
            # Home dir should have only user-owned files, so fix it now
            # to prevent permissions problems - some root-owned files can
            # left after 'sudo bash' and similar commands
            subprocess.check_call(['sudo', 'chown', '-R', local_user, home_dir])

            home_to_backup = [
                self.FileToBackup(home_dir, 'dom0-home/')]
            vm_files = home_to_backup

            files_to_backup[0] = self.VMToBackup(self.app.domains[0],
                vm_files,
                os.path.join('dom0-home', os.path.basename(home_dir)))

        self.total_backup_bytes = functools.reduce(
            lambda x, y: x + y.size, files_to_backup.values(), 0)
        return files_to_backup


    def get_backup_summary(self):
        summary = ""

        fields_to_display = [
            {"name": "VM", "width": 16},
            {"name": "type", "width": 12},
            {"name": "size", "width": 12}
        ]

        # Display the header
        for f in fields_to_display:
            fmt = "{{0:-^{0}}}-+".format(f["width"] + 1)
            summary += fmt.format('-')
        summary += "\n"
        for f in fields_to_display:
            fmt = "{{0:>{0}}} |".format(f["width"] + 1)
            summary += fmt.format(f["name"])
        summary += "\n"
        for f in fields_to_display:
            fmt = "{{0:-^{0}}}-+".format(f["width"] + 1)
            summary += fmt.format('-')
        summary += "\n"

        files_to_backup = self._files_to_backup

        for qid, vm_info in files_to_backup.items():
            s = ""
            fmt = "{{0:>{0}}} |".format(fields_to_display[0]["width"] + 1)
            s += fmt.format(vm_info['vm'].name)

            fmt = "{{0:>{0}}} |".format(fields_to_display[1]["width"] + 1)
            if qid == 0:
                s += fmt.format("User home")
            elif isinstance(vm_info['vm'], qubes.vm.templatevm.TemplateVM):
                s += fmt.format("Template VM")
            else:
                s += fmt.format("VM" + (" + Sys" if vm_info['vm'].updateable
                    else ""))

            vm_size = vm_info['size']

            fmt = "{{0:>{0}}} |".format(fields_to_display[2]["width"] + 1)
            s += fmt.format(size_to_human(vm_size))

            if qid != 0 and vm_info['vm'].is_running():
                s += " <-- The VM is running, please shut it down before proceeding " \
                     "with the backup!"

            summary += s + "\n"

        for f in fields_to_display:
            fmt = "{{0:-^{0}}}-+".format(f["width"] + 1)
            summary += fmt.format('-')
        summary += "\n"

        fmt = "{{0:>{0}}} |".format(fields_to_display[0]["width"] + 1)
        summary += fmt.format("Total size:")
        fmt = "{{0:>{0}}} |".format(
            fields_to_display[1]["width"] + 1 + 2 + fields_to_display[2][
                "width"] + 1)
        summary += fmt.format(size_to_human(self.total_backup_bytes))
        summary += "\n"

        for f in fields_to_display:
            fmt = "{{0:-^{0}}}-+".format(f["width"] + 1)
            summary += fmt.format('-')
        summary += "\n"

        vms_not_for_backup = [vm.name for vm in self.app.domains
                              if vm not in self.vms_for_backup]
        summary += "VMs not selected for backup:\n - " + "\n - ".join(
            sorted(vms_not_for_backup))

        return summary

    def prepare_backup_header(self):
        header_file_path = os.path.join(self.tmpdir, HEADER_FILENAME)
        backup_header = BackupHeader(
            version=CURRENT_BACKUP_FORMAT_VERSION,
            hmac_algorithm=self.hmac_algorithm,
            crypto_algorithm=self.crypto_algorithm,
            encrypted=self.encrypted,
            compressed=self.compressed,
            compression_filter=self.compression_filter,
            backup_id=self.backup_id,
        )
        backup_header.save(header_file_path)
        # Start encrypt, scrypt will also handle integrity
        # protection
        scrypt_passphrase = u'{filename}!{passphrase}'.format(
            filename=HEADER_FILENAME, passphrase=self.passphrase)
        scrypt = launch_scrypt(
            'enc', header_file_path, header_file_path + '.hmac',
            scrypt_passphrase)

        if scrypt.wait() != 0:
            raise qubes.exc.QubesException(
                "Failed to compute hmac of header file: "
                + scrypt.stderr.read())
        return HEADER_FILENAME, HEADER_FILENAME + ".hmac"


    @staticmethod
    def _queue_put_with_check(proc, vmproc, queue, element):
        if queue.full():
            if not proc.is_alive():
                if vmproc:
                    message = ("Failed to write the backup, VM output:\n" +
                               vmproc.stderr.read())
                else:
                    message = "Failed to write the backup. Out of disk space?"
                raise qubes.exc.QubesException(message)
        queue.put(element)

    def _send_progress_update(self):
        if callable(self.progress_callback):
            progress = (
                100 * (self._done_vms_bytes + self._current_vm_bytes) /
                self.total_backup_bytes)
            self.progress_callback(progress)

    def _add_vm_progress(self, bytes_done):
        self._current_vm_bytes += bytes_done
        self._send_progress_update()

    def backup_do(self):
        if self.passphrase is None:
            raise qubes.exc.QubesException("No passphrase set")
        qubes_xml = self.app.store
        self.tmpdir = tempfile.mkdtemp()
        shutil.copy(qubes_xml, os.path.join(self.tmpdir, 'qubes.xml'))
        qubes_xml = os.path.join(self.tmpdir, 'qubes.xml')
        backup_app = qubes.Qubes(qubes_xml)

        files_to_backup = self._files_to_backup
        # make sure backup_content isn't set initially
        for vm in backup_app.domains:
            vm.features['backup-content'] = False

        for qid, vm_info in files_to_backup.items():
            if qid != 0 and vm_info.vm.is_running():
                raise qubes.exc.QubesVMNotHaltedError(vm_info.vm)
            # VM is included in the backup
            backup_app.domains[qid].features['backup-content'] = True
            backup_app.domains[qid].features['backup-path'] = vm_info.subdir
            backup_app.domains[qid].features['backup-size'] = vm_info.size
        backup_app.save()

        vmproc = None
        tar_sparse = None
        if self.target_vm is not None:
            # Prepare the backup target (Qubes service call)
            # If APPVM, STDOUT is a PIPE
            vmproc = self.target_vm.run_service('qubes.Backup',
                passio_popen=True, passio_stderr=True)
            vmproc.stdin.write((self.target_dir.
                replace("\r", "").replace("\n", "") + "\n").encode())
            vmproc.stdin.flush()
            backup_stdout = vmproc.stdin
            self.processes_to_kill_on_cancel.append(vmproc)
        else:
            # Prepare the backup target (local file)
            if os.path.isdir(self.target_dir):
                backup_target = self.target_dir + "/qubes-{0}". \
                    format(time.strftime("%Y-%m-%dT%H%M%S"))
            else:
                backup_target = self.target_dir

                # Create the target directory
                if not os.path.exists(os.path.dirname(self.target_dir)):
                    raise qubes.exc.QubesException(
                        "ERROR: the backup directory for {0} does not exists".
                        format(self.target_dir))

            # If not APPVM, STDOUT is a local file
            backup_stdout = open(backup_target, 'wb')

        # Tar with tape length does not deals well with stdout
        # (close stdout between two tapes)
        # For this reason, we will use named pipes instead
        self.log.debug("Working in {}".format(self.tmpdir))

        backup_pipe = os.path.join(self.tmpdir, "backup_pipe")
        self.log.debug("Creating pipe in: {}".format(backup_pipe))
        os.mkfifo(backup_pipe)

        self.log.debug("Will backup: {}".format(files_to_backup))

        header_files = self.prepare_backup_header()

        # Setup worker to send encrypted data chunks to the backup_target
        to_send = Queue(10)
        send_proc = SendWorker(to_send, self.tmpdir, backup_stdout)
        send_proc.start()

        for f in header_files:
            to_send.put(f)

        qubes_xml_info = self.VMToBackup(
            None,
            [self.FileToBackup(qubes_xml, '')],
            ''
        )
        for vm_info in itertools.chain([qubes_xml_info],
                files_to_backup.values()):
            for file_info in vm_info.files:

                self.log.debug("Backing up {}".format(file_info))

                backup_tempfile = os.path.join(
                    self.tmpdir, file_info.subdir,
                    file_info.name)
                self.log.debug("Using temporary location: {}".format(
                    backup_tempfile))

                # Ensure the temporary directory exists
                if not os.path.isdir(os.path.dirname(backup_tempfile)):
                    os.makedirs(os.path.dirname(backup_tempfile))

                # The first tar cmd can use any complex feature as we want.
                # Files will be verified before untaring this.
                # Prefix the path in archive with filename["subdir"] to have it
                # verified during untar
                tar_cmdline = (["tar", "-Pc", '--sparse',
                               "-f", backup_pipe,
                               '-C', os.path.dirname(file_info.path)] +
                               (['--dereference'] if
                                file_info.subdir != "dom0-home/" else []) +
                               ['--xform=s:^%s:%s\\0:' % (
                                   os.path.basename(file_info.path),
                                   file_info.subdir),
                                os.path.basename(file_info.path)
                                ])
                file_stat = os.stat(file_info.path)
                if stat.S_ISBLK(file_stat.st_mode) or \
                        file_info.name != os.path.basename(file_info.path):
                    # tar doesn't handle content of block device, use our
                    # writer
                    # also use our tar writer when renaming file
                    assert not stat.S_ISDIR(file_stat.st_mode),\
                        "Renaming directories not supported"
                    tar_cmdline = ['python3', '-m', 'qubes.tarwriter',
                        '--override-name=%s' % (
                            os.path.join(file_info.subdir, os.path.basename(
                                file_info.name))),
                        file_info.path,
                        backup_pipe]
                if self.compressed:
                    tar_cmdline.insert(-2,
                        "--use-compress-program=%s" % self.compression_filter)

                self.log.debug(" ".join(tar_cmdline))

                # Pipe: tar-sparse | scrypt | tar | backup_target
                # TODO: log handle stderr
                tar_sparse = subprocess.Popen(
                    tar_cmdline)
                self.processes_to_kill_on_cancel.append(tar_sparse)

                # Wait for compressor (tar) process to finish or for any
                # error of other subprocesses
                i = 0
                pipe = open(backup_pipe, 'rb')
                run_error = "paused"
                while run_error == "paused":
                    # Prepare a first chunk
                    chunkfile = backup_tempfile + ".%03d.enc" % i
                    i += 1

                    # Start encrypt, scrypt will also handle integrity
                    # protection
                    scrypt_passphrase = \
                        u'{backup_id}!{filename}!{passphrase}'.format(
                            backup_id=self.backup_id,
                            filename=os.path.relpath(chunkfile[:-4],
                                self.tmpdir),
                            passphrase=self.passphrase)
                    scrypt = launch_scrypt(
                        "enc", "-", chunkfile, scrypt_passphrase)

                    run_error = handle_streams(
                        pipe,
                        {'backup_target': scrypt.stdin},
                        {'vmproc': vmproc,
                         'addproc': tar_sparse,
                         'scrypt': scrypt,
                        },
                        self.chunk_size,
                        self._add_vm_progress
                    )

                    self.log.debug(
                        "Wait_backup_feedback returned: {}".format(run_error))

                    if self.canceled:
                        try:
                            tar_sparse.terminate()
                        except OSError:
                            pass
                        tar_sparse.wait()
                        to_send.put(QUEUE_ERROR)
                        send_proc.join()
                        shutil.rmtree(self.tmpdir)
                        raise BackupCanceledError("Backup canceled")
                    if run_error and run_error != "size_limit":
                        send_proc.terminate()
                        if run_error == "VM" and vmproc:
                            raise qubes.exc.QubesException(
                                "Failed to write the backup, VM output:\n" +
                                vmproc.stderr.read(MAX_STDERR_BYTES))
                        else:
                            raise qubes.exc.QubesException(
                                "Failed to perform backup: error in " +
                                run_error)

                    scrypt.stdin.close()
                    scrypt.wait()
                    self.log.debug("scrypt return code: {}".format(
                        scrypt.poll()))

                    # Send the chunk to the backup target
                    self._queue_put_with_check(
                        send_proc, vmproc, to_send,
                        os.path.relpath(chunkfile, self.tmpdir))

                    if tar_sparse.poll() is None or run_error == "size_limit":
                        run_error = "paused"
                    else:
                        self.processes_to_kill_on_cancel.remove(tar_sparse)
                        self.log.debug(
                            "Finished tar sparse with exit code {}".format(
                                tar_sparse.poll()))
                pipe.close()

            # This VM done, update progress
            self._done_vms_bytes += vm_info.size
            self._current_vm_bytes = 0
            self._send_progress_update()
            # Save date of last backup
            if vm_info.vm:
                vm_info.vm.backup_timestamp = datetime.datetime.now()

        self._queue_put_with_check(send_proc, vmproc, to_send, QUEUE_FINISHED)
        send_proc.join()
        shutil.rmtree(self.tmpdir)

        if self.canceled:
            raise BackupCanceledError("Backup canceled")

        if send_proc.exitcode != 0:
            raise qubes.exc.QubesException(
                "Failed to send backup: error in the sending process")

        if vmproc:
            self.log.debug("VMProc1 proc return code: {}".format(vmproc.poll()))
            if tar_sparse is not None:
                self.log.debug("Sparse1 proc return code: {}".format(
                    tar_sparse.poll()))
            vmproc.stdin.close()

        self.app.save()


def handle_streams(stream_in, streams_out, processes, size_limit=None,
        progress_callback=None):
    '''
    Copy stream_in to all streams_out and monitor all mentioned processes.
    If any of them terminate with non-zero code, interrupt the process. Copy
    at most `size_limit` data (if given).

    :param stream_in: file-like object to read data from
    :param streams_out: dict of file-like objects to write data to
    :param processes: dict of subprocess.Popen objects to monitor
    :param size_limit: int maximum data amount to process
    :param progress_callback: callable function to report progress, will be
        given copied data size (it should accumulate internally)
    :return: failed process name, failed stream name, "size_limit" or None (
        no error)
    '''
    buffer_size = 409600
    bytes_copied = 0
    while True:
        if size_limit:
            to_copy = min(buffer_size, size_limit - bytes_copied)
            if to_copy <= 0:
                return "size_limit"
        else:
            to_copy = buffer_size
        buf = stream_in.read(to_copy)
        if not len(buf):
            # done
            return None

        if callable(progress_callback):
            progress_callback(len(buf))
        for name, stream in streams_out.items():
            if stream is None:
                continue
            try:
                stream.write(buf)
            except IOError:
                return name
        bytes_copied += len(buf)

        for name, proc in processes.items():
            if proc is None:
                continue
            if proc.poll():
                return name


class ExtractWorker2(Process):
    def __init__(self, queue, base_dir, passphrase, encrypted,
                 progress_callback, vmproc=None,
                 compressed=False, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM,
                 verify_only=False, relocate=None):
        super(ExtractWorker2, self).__init__()
        #: queue with files to extract
        self.queue = queue
        #: paths on the queue are relative to this dir
        self.base_dir = base_dir
        #: passphrase to decrypt/authenticate data
        self.passphrase = passphrase
        #: extract those files/directories to alternative locations (truncate,
        # but not unlink target beforehand); if specific file is in the map,
        # redirect it accordingly, otherwise check if the whole directory is
        # there
        self.relocate = relocate
        #: is the backup encrypted?
        self.encrypted = encrypted
        #: is the backup compressed?
        self.compressed = compressed
        #: what crypto algorithm is used for encryption?
        self.crypto_algorithm = crypto_algorithm
        #: only verify integrity, don't extract anything
        self.verify_only = verify_only
        #: progress
        self.blocks_backedup = 0
        #: inner tar layer extraction (subprocess.Popen instance)
        self.tar2_process = None
        #: current inner tar archive name
        self.tar2_current_file = None
        #: set size of this file when tar report it on stderr (adjust LVM
        # volume size)
        self.adjust_output_size = None
        #: decompressor subprocess.Popen instance
        self.decompressor_process = None
        #: decryptor subprocess.Popen instance
        self.decryptor_process = None
        #: callback reporting progress to UI
        self.progress_callback = progress_callback
        #: process (subprocess.Popen instance) feeding the data into
        # extraction tool
        self.vmproc = vmproc

        #: pipe to feed the data into tar (use pipe instead of stdin,
        # as stdin is used for tar control commands)
        self.restore_pipe = os.path.join(self.base_dir, "restore_pipe")

        self.log = logging.getLogger('qubes.backup.extract')
        self.log.debug("Creating pipe in: {}".format(self.restore_pipe))
        os.mkfifo(self.restore_pipe)

        self.stderr_encoding = sys.stderr.encoding or 'utf-8'

    def collect_tar_output(self):
        if not self.tar2_process.stderr:
            return

        if self.tar2_process.poll() is None:
            try:
                new_lines = self.tar2_process.stderr \
                    .read(MAX_STDERR_BYTES).splitlines()
            except IOError as e:
                if e.errno == errno.EAGAIN:
                    return
                else:
                    raise
        else:
            new_lines = self.tar2_process.stderr.readlines()

        new_lines = map(lambda x: x.decode(self.stderr_encoding), new_lines)

        msg_re = re.compile(r".*#[0-9].*restore_pipe")
        debug_msg = filter(msg_re.match, new_lines)
        self.log.debug('tar2_stderr: {}'.format('\n'.join(debug_msg)))
        new_lines = filter(lambda x: not msg_re.match(x), new_lines)
        if self.adjust_output_size:
            # search for first file size reported by tar, after setting
            # self.adjust_output_size (so don't look at self.tar2_stderr)
            # this is used only when extracting single-file archive, so don't
            #  bother with checking file name
            file_size_re = re.compile(r"^[^ ]+ [^ ]+/[^ ]+ *([0-9]+) .*")
            for line in new_lines:
                match = file_size_re.match(line)
                if match:
                    file_size = match.groups()[0]
                    self.resize_lvm(self.adjust_output_size, file_size)
                    self.adjust_output_size = None
        self.tar2_stderr += new_lines

    def resize_lvm(self, dev, size):
        # FIXME: HACK
        try:
            subprocess.check_call(
                ['sudo', 'lvresize', '-f', '-L', str(size) + 'B', dev],
                stdout=open(os.devnull, 'w'), stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            if e.returncode == 3:
                # already at the right size
                pass
            else:
                raise

    def run(self):
        try:
            self.__run__()
        except Exception as e:
            # Cleanup children
            for process in [self.decompressor_process,
                            self.decryptor_process,
                            self.tar2_process]:
                if process:
                    try:
                        process.terminate()
                    except OSError:
                        pass
                    process.wait()
            self.log.error("ERROR: " + str(e))
            raise

    def handle_dir_relocations(self, dirname):
        ''' Relocate files in given director when it's already extracted

        :param dirname: directory path to handle (relative to backup root),
        without trailing slash
        '''

        for old, new in self.relocate:
            if not old.startswith(dirname + '/'):
                continue
            # if directory is relocated too (most likely is), the file
            # is extracted there
            if dirname in self.relocate:
                old = old.replace(dirname, self.relocate[dirname], 1)
            try:
                stat_buf = os.stat(new)
                if stat.S_ISBLK(stat_buf.st_mode):
                    # output file is block device (LVM) - adjust its
                    # size, otherwise it may fail
                    # from lack of space
                    self.resize_lvm(new, stat_buf.st_size)
            except OSError:  # ENOENT
                pass
            subprocess.check_call(
                ['dd', 'if='+old, 'of='+new, 'conv=sparse'])
            os.unlink(old)

    def cleanup_tar2(self, wait=True, terminate=False):
        if self.tar2_process is None:
            return
        if terminate:
            self.tar2_process.terminate()
        if wait:
            self.tar2_process.wait()
        elif self.tar2_process.poll() is None:
            return
        if self.tar2_process.returncode != 0:
            self.collect_tar_output()
            self.log.error(
                "ERROR: unable to extract files for {0}, tar "
                "output:\n  {1}".
                    format(self.tar2_current_file,
                    "\n  ".join(self.tar2_stderr)))
        else:
            # Finished extracting the tar file
            self.collect_tar_output()
            self.tar2_process = None
            # if that was whole-directory archive, handle
            # relocated files now
            inner_name = os.path.splitext(self.tar2_current_file)[0]\
                .replace(self.base_dir + '/', '')
            if os.path.basename(inner_name) == '.':
                self.handle_dir_relocations(
                    os.path.dirname(inner_name))
            self.tar2_current_file = None
            self.adjust_output_size = None

    def __run__(self):
        self.log.debug("Started sending thread")
        self.log.debug("Moving to dir " + self.base_dir)
        os.chdir(self.base_dir)

        filename = None

        for filename in iter(self.queue.get, None):
            if filename in (QUEUE_FINISHED, QUEUE_ERROR):
                break

            self.log.debug("Extracting file " + filename)

            if filename.endswith('.000'):
                # next file
                self.cleanup_tar2(wait=True, terminate=False)

                inner_name = filename.rstrip('.000').replace(
                    self.base_dir + '/', '')
                redirect_stdout = None
                if self.relocate and inner_name in self.relocate:
                    # TODO: add `dd conv=sparse` when removing tar layer
                    tar2_cmdline = ['tar',
                        '-%sMvvOf' % ("t" if self.verify_only else "x"),
                        self.restore_pipe,
                        inner_name]
                    output_file = self.relocate[inner_name]
                    try:
                        stat_buf = os.stat(output_file)
                        if stat.S_ISBLK(stat_buf.st_mode):
                            # output file is block device (LVM) - adjust its
                            # size during extraction, otherwise it may fail
                            # from lack of space
                            self.adjust_output_size = output_file
                    except OSError:  # ENOENT
                        pass
                    redirect_stdout = open(output_file, 'w')
                elif self.relocate and \
                        os.path.dirname(inner_name) in self.relocate:
                    tar2_cmdline = ['tar',
                        '-%sMf' % ("t" if self.verify_only else "x"),
                        self.restore_pipe,
                        '-C', self.relocate[os.path.dirname(inner_name)],
                        # strip all directories - leave only final filename
                        '--strip-components', str(inner_name.count(os.sep)),
                        inner_name]

                else:
                    tar2_cmdline = ['tar',
                        '-%sMkf' % ("t" if self.verify_only else "x"),
                        self.restore_pipe,
                        inner_name]

                self.log.debug("Running command " + str(tar2_cmdline))
                self.tar2_process = subprocess.Popen(tar2_cmdline,
                    stdin=subprocess.PIPE, stderr=subprocess.PIPE,
                    stdout=redirect_stdout)
                fcntl.fcntl(self.tar2_process.stderr.fileno(), fcntl.F_SETFL,
                            fcntl.fcntl(self.tar2_process.stderr.fileno(),
                                        fcntl.F_GETFL) | os.O_NONBLOCK)
                self.tar2_stderr = []
            elif not self.tar2_process:
                # Extracting of the current archive failed, skip to the next
                # archive
                os.remove(filename)
                continue
            else:
                self.collect_tar_output()
                self.log.debug("Releasing next chunck")
                self.tar2_process.stdin.write("\n")
                self.tar2_process.stdin.flush()
            self.tar2_current_file = filename

            pipe = open(self.restore_pipe, 'wb')
            monitor_processes = {
                'vmproc': self.vmproc,
                'addproc': self.tar2_process,
            }
            if self.encrypted:
                # Start decrypt
                self.decryptor_process = subprocess.Popen(
                    ["openssl", "enc",
                     "-d",
                     "-" + self.crypto_algorithm,
                     "-pass",
                     "pass:" + self.passphrase] +
                    (["-z"] if self.compressed else []),
                    stdin=open(filename, 'rb'),
                    stdout=subprocess.PIPE)
                in_stream = self.decryptor_process.stdout
                monitor_processes['decryptor'] = self.decryptor_process
            elif self.compressed:
                self.decompressor_process = subprocess.Popen(
                    ["gzip", "-d"],
                    stdin=open(filename, 'rb'),
                    stdout=subprocess.PIPE)
                in_stream = self.decompressor_process.stdout
                monitor_processes['decompresor'] = self.decompressor_process
            else:
                in_stream = open(filename, 'rb')

            run_error = handle_streams(
                in_stream,
                {'target': pipe},
                monitor_processes,
                progress_callback=self.progress_callback)

            try:
                pipe.close()
            except IOError as e:
                if e.errno == errno.EPIPE:
                    self.log.debug(
                        "Got EPIPE while closing pipe to "
                        "the inner tar process")
                    # ignore the error
                else:
                    raise
            if run_error:
                if run_error == "target":
                    self.collect_tar_output()
                    details = "\n".join(self.tar2_stderr)
                else:
                    details = "%s failed" % run_error
                self.log.error("Error while processing '{}': {}".format(
                    self.tar2_current_file, details))
                self.cleanup_tar2(wait=True, terminate=True)

            # Delete the file as we don't need it anymore
            self.log.debug("Removing file " + filename)
            os.remove(filename)

        os.unlink(self.restore_pipe)

        self.cleanup_tar2(wait=True, terminate=(filename == QUEUE_ERROR))
        self.log.debug("Finished extracting thread")


class ExtractWorker3(ExtractWorker2):
    def __init__(self, queue, base_dir, passphrase, encrypted,
                 progress_callback, vmproc=None,
                 compressed=False, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM,
                 compression_filter=None, verify_only=False, relocate=None):
        super(ExtractWorker3, self).__init__(queue, base_dir, passphrase,
                                             encrypted,
                                             progress_callback, vmproc,
                                             compressed, crypto_algorithm,
                                             verify_only, relocate)
        self.compression_filter = compression_filter
        os.unlink(self.restore_pipe)

    def __run__(self):
        self.log.debug("Started sending thread")
        self.log.debug("Moving to dir " + self.base_dir)
        os.chdir(self.base_dir)

        filename = None

        input_pipe = None
        for filename in iter(self.queue.get, None):
            if filename in (QUEUE_FINISHED, QUEUE_ERROR):
                break

            self.log.debug("Extracting file " + filename)

            if filename.endswith('.000'):
                # next file
                if self.tar2_process is not None:
                    input_pipe.close()
                    self.cleanup_tar2(wait=True, terminate=False)

                inner_name = filename.rstrip('.000').replace(
                    self.base_dir + '/', '')
                redirect_stdout = None
                if self.relocate and inner_name in self.relocate:
                    # TODO: add dd conv=sparse when removing tar layer
                    tar2_cmdline = ['tar',
                        '-%svvO' % ("t" if self.verify_only else "x"),
                        inner_name]
                    output_file = self.relocate[inner_name]
                    try:
                        stat_buf = os.stat(output_file)
                        if stat.S_ISBLK(stat_buf.st_mode):
                            # output file is block device (LVM) - adjust its
                            # size during extraction, otherwise it may fail
                            # from lack of space
                            self.adjust_output_size = output_file
                    except OSError:  # ENOENT
                        pass
                    redirect_stdout = open(output_file, 'w')
                elif self.relocate and \
                        os.path.dirname(inner_name) in self.relocate:
                    tar2_cmdline = ['tar',
                        '-%s' % ("t" if self.verify_only else "x"),
                        '-C', self.relocate[os.path.dirname(inner_name)],
                        # strip all directories - leave only final filename
                        '--strip-components', str(inner_name.count(os.sep)),
                        inner_name]
                else:
                    tar2_cmdline = ['tar',
                        '-%sk' % ("t" if self.verify_only else "x"),
                        inner_name]

                if self.compressed:
                    if self.compression_filter:
                        tar2_cmdline.insert(-1,
                                            "--use-compress-program=%s" %
                                            self.compression_filter)
                    else:
                        tar2_cmdline.insert(-1, "--use-compress-program=%s" %
                                            DEFAULT_COMPRESSION_FILTER)

                self.log.debug("Running command " + str(tar2_cmdline))
                if self.encrypted:
                    # Start decrypt
                    self.decryptor_process = subprocess.Popen(
                        ["openssl", "enc",
                         "-d",
                         "-" + self.crypto_algorithm,
                         "-pass",
                         "pass:" + self.passphrase],
                        stdin=subprocess.PIPE,
                        stdout=subprocess.PIPE)

                    self.tar2_process = subprocess.Popen(
                        tar2_cmdline,
                        stdin=self.decryptor_process.stdout,
                        stdout=redirect_stdout,
                        stderr=subprocess.PIPE)
                    input_pipe = self.decryptor_process.stdin
                else:
                    self.tar2_process = subprocess.Popen(
                        tar2_cmdline,
                        stdin=subprocess.PIPE,
                        stdout=redirect_stdout,
                        stderr=subprocess.PIPE)
                    input_pipe = self.tar2_process.stdin

                fcntl.fcntl(self.tar2_process.stderr.fileno(), fcntl.F_SETFL,
                            fcntl.fcntl(self.tar2_process.stderr.fileno(),
                                        fcntl.F_GETFL) | os.O_NONBLOCK)
                self.tar2_stderr = []
            elif not self.tar2_process:
                # Extracting of the current archive failed, skip to the next
                # archive
                os.remove(filename)
                continue
            else:
                (basename, ext) = os.path.splitext(self.tar2_current_file)
                previous_chunk_number = int(ext[1:])
                expected_filename = basename + '.%03d' % (
                    previous_chunk_number+1)
                if expected_filename != filename:
                    self.cleanup_tar2(wait=True, terminate=True)
                    self.log.error(
                        'Unexpected file in archive: {}, expected {}'.format(
                            filename, expected_filename))
                    os.remove(filename)
                    continue
                self.log.debug("Releasing next chunck")

            self.tar2_current_file = filename

            run_error = handle_streams(
                open(filename, 'rb'),
                {'target': input_pipe},
                {'vmproc': self.vmproc,
                 'addproc': self.tar2_process,
                 'decryptor': self.decryptor_process,
                },
                progress_callback=self.progress_callback)

            if run_error:
                if run_error == "target":
                    self.collect_tar_output()
                    details = "\n".join(self.tar2_stderr)
                else:
                    details = "%s failed" % run_error
                if self.decryptor_process:
                    self.decryptor_process.terminate()
                    self.decryptor_process.wait()
                    self.decryptor_process = None
                self.log.error("Error while processing '{}': {}".format(
                    self.tar2_current_file, details))
                self.cleanup_tar2(wait=True, terminate=True)

            # Delete the file as we don't need it anymore
            self.log.debug("Removing file " + filename)
            os.remove(filename)

        if self.tar2_process is not None:
            input_pipe.close()
            if filename == QUEUE_ERROR:
                if self.decryptor_process:
                    self.decryptor_process.terminate()
                    self.decryptor_process.wait()
                    self.decryptor_process = None
            self.cleanup_tar2(terminate=(filename == QUEUE_ERROR))

        self.log.debug("Finished extracting thread")


def get_supported_hmac_algo(hmac_algorithm=None):
    # Start with provided default
    if hmac_algorithm:
        yield hmac_algorithm
    if hmac_algorithm != 'scrypt':
        yield 'scrypt'
    proc = subprocess.Popen(['openssl', 'list-message-digest-algorithms'],
                            stdout=subprocess.PIPE)
    for algo in proc.stdout.readlines():
        algo = algo.decode('ascii')
        if '=>' in algo:
            continue
        yield algo.strip()
    proc.wait()


class BackupRestoreOptions(object):
    def __init__(self):
        #: use default NetVM if the one referenced in backup do not exists on
        #  the host
        self.use_default_netvm = True
        #: set NetVM to "none" if the one referenced in backup do not exists
        # on the host
        self.use_none_netvm = False
        #: set template to default if the one referenced in backup do not
        # exists on the host
        self.use_default_template = True
        #: use default kernel if the one referenced in backup do not exists
        # on the host
        self.use_default_kernel = True
        #: restore dom0 home
        self.dom0_home = True
        #: dictionary how what templates should be used instead of those
        # referenced in backup
        self.replace_template = {}
        #: restore dom0 home even if username is different
        self.ignore_username_mismatch = False
        #: do not restore data, only verify backup integrity
        self.verify_only = False
        #: automatically rename VM during restore, when it would conflict
        # with existing one
        self.rename_conflicting = True
        #: list of VM names to exclude
        self.exclude = []
        #: restore VMs into selected storage pool
        self.override_pool = None


class BackupRestore(object):
    """Usage:

    >>> restore_op = BackupRestore(...)
    >>> # adjust restore_op.options here
    >>> restore_info = restore_op.get_restore_info()
    >>> # manipulate restore_info to select VMs to restore here
    >>> restore_op.restore_do(restore_info)
    """

    class VMToRestore(object):
        #: VM excluded from restore by user
        EXCLUDED = object()
        #: VM with such name already exists on the host
        ALREADY_EXISTS = object()
        #: NetVM used by the VM does not exists on the host
        MISSING_NETVM = object()
        #: TemplateVM used by the VM does not exists on the host
        MISSING_TEMPLATE = object()
        #: Kernel used by the VM does not exists on the host
        MISSING_KERNEL = object()

        def __init__(self, vm):
            self.vm = vm
            if 'backup-path' in vm.features:
                self.subdir = vm.features['backup-path']
            else:
                self.subdir = None
            if 'backup-size' in vm.features and vm.features['backup-size']:
                self.size = int(vm.features['backup-size'])
            else:
                self.size = 0
            self.problems = set()
            if hasattr(vm, 'template') and vm.template:
                self.template = vm.template.name
            else:
                self.template = None
            if vm.netvm:
                self.netvm = vm.netvm.name
            else:
                self.netvm = None
            self.name = vm.name
            self.orig_template = None
            self.restored_vm = None

        @property
        def good_to_go(self):
            return len(self.problems) == 0

    class Dom0ToRestore(VMToRestore):
        #: backup was performed on system with different dom0 username
        USERNAME_MISMATCH = object()

        def __init__(self, vm, subdir=None):
            super(BackupRestore.Dom0ToRestore, self).__init__(vm)
            if subdir:
                self.subdir = subdir
            self.username = os.path.basename(subdir)

    def __init__(self, app, backup_location, backup_vm, passphrase):
        super(BackupRestore, self).__init__()

        #: qubes.Qubes instance
        self.app = app

        #: options how the backup should be restored
        self.options = BackupRestoreOptions()

        #: VM from which backup should be retrieved
        self.backup_vm = backup_vm
        if backup_vm and backup_vm.qid == 0:
            self.backup_vm = None

        #: backup path, inside VM pointed by :py:attr:`backup_vm`
        self.backup_location = backup_location

        #: passphrase protecting backup integrity and optionally decryption
        self.passphrase = passphrase

        #: temporary directory used to extract the data before moving to the
        # final location; should be on the same filesystem as /var/lib/qubes
        self.tmpdir = tempfile.mkdtemp(prefix="restore", dir="/var/tmp")

        #: list of processes (Popen objects) to kill on cancel
        self.processes_to_kill_on_cancel = []

        #: is the backup operation canceled
        self.canceled = False

        #: report restore progress, called with one argument - percents of
        # data restored
        # FIXME: convert to float [0,1]
        self.progress_callback = None

        self.log = logging.getLogger('qubes.backup')

        #: basic information about the backup
        self.header_data = self._retrieve_backup_header()

        #: VMs included in the backup
        self.backup_app = self._process_qubes_xml()

    def cancel(self):
        """Cancel running backup operation. Can be called from another thread.
        """
        self.canceled = True
        for proc in self.processes_to_kill_on_cancel:
            try:
                proc.terminate()
            except OSError:
                pass

    def _start_retrieval_process(self, filelist, limit_count, limit_bytes):
        """Retrieve backup stream and extract it to :py:attr:`tmpdir`

        :param filelist: list of files to extract; listing directory name
        will extract the whole directory; use empty list to extract the whole
        archive
        :param limit_count: maximum number of files to extract
        :param limit_bytes: maximum size of extracted data
        :return: a touple of (Popen object of started process, file-like
        object for reading extracted files list, file-like object for reading
        errors)
        """

        vmproc = None
        if self.backup_vm is not None:
            # If APPVM, STDOUT is a PIPE
            vmproc = self.backup_vm.run_service('qubes.Restore',
                passio_popen=True, passio_stderr=True)
            vmproc.stdin.write(
                (self.backup_location.replace("\r", "").replace("\n",
                    "") + "\n").encode())
            vmproc.stdin.flush()

            # Send to tar2qfile the VMs that should be extracted
            vmproc.stdin.write((" ".join(filelist) + "\n").encode())
            vmproc.stdin.flush()
            self.processes_to_kill_on_cancel.append(vmproc)

            backup_stdin = vmproc.stdout
            tar1_command = ['/usr/libexec/qubes/qfile-dom0-unpacker',
                            str(os.getuid()), self.tmpdir, '-v']
        else:
            backup_stdin = open(self.backup_location, 'rb')

            tar1_command = ['tar',
                            '-ixv',
                            '-C', self.tmpdir] + filelist

        tar1_env = os.environ.copy()
        tar1_env['UPDATES_MAX_BYTES'] = str(limit_bytes)
        tar1_env['UPDATES_MAX_FILES'] = str(limit_count)
        self.log.debug("Run command" + str(tar1_command))
        command = subprocess.Popen(
            tar1_command,
            stdin=backup_stdin,
            stdout=vmproc.stdin if vmproc else subprocess.PIPE,
            stderr=subprocess.PIPE,
            env=tar1_env)
        self.processes_to_kill_on_cancel.append(command)

        # qfile-dom0-unpacker output filelist on stderr
        # and have stdout connected to the VM), while tar output filelist
        # on stdout
        if self.backup_vm:
            filelist_pipe = command.stderr
            # let qfile-dom0-unpacker hold the only open FD to the write end of
            # pipe, otherwise qrexec-client will not receive EOF when
            # qfile-dom0-unpacker terminates
            vmproc.stdin.close()
        else:
            filelist_pipe = command.stdout

        if self.backup_vm:
            error_pipe = vmproc.stderr
        else:
            error_pipe = command.stderr
        return command, filelist_pipe, error_pipe

    def _verify_hmac(self, filename, hmacfile, algorithm=None):
        def load_hmac(hmac_text):
            if any(ord(x) not in range(128) for x in hmac_text):
                raise qubes.exc.QubesException(
                    "Invalid content of {}".format(hmacfile))
            hmac_text = hmac_text.strip().split("=")
            if len(hmac_text) > 1:
                hmac_text = hmac_text[1].strip()
            else:
                raise qubes.exc.QubesException(
                    "ERROR: invalid hmac file content")

            return hmac_text
        if algorithm is None:
            algorithm = self.header_data.hmac_algorithm
        passphrase = self.passphrase.encode('utf-8')
        self.log.debug("Verifying file {}".format(filename))

        if os.stat(os.path.join(self.tmpdir, hmacfile)).st_size > \
                HMAC_MAX_SIZE:
            raise qubes.exc.QubesException('HMAC file {} too large'.format(
                hmacfile))

        if hmacfile != filename + ".hmac":
            raise qubes.exc.QubesException(
                "ERROR: expected hmac for {}, but got {}".
                format(filename, hmacfile))

        if algorithm == 'scrypt':
            # in case of 'scrypt' _verify_hmac is only used for backup header
            assert filename == HEADER_FILENAME
            self._verify_and_decrypt(hmacfile, HEADER_FILENAME + '.dec')
            if open(os.path.join(self.tmpdir, filename), 'rb').read() != \
                    open(os.path.join(self.tmpdir, filename + '.dec'),
                        'rb').read():
                raise qubes.exc.QubesException(
                    'Invalid hmac on {}'.format(filename))
            else:
                return True

        hmac_proc = subprocess.Popen(
            ["openssl", "dgst", "-" + algorithm, "-hmac", passphrase],
            stdin=open(os.path.join(self.tmpdir, filename), 'rb'),
            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        hmac_stdout, hmac_stderr = hmac_proc.communicate()

        if len(hmac_stderr) > 0:
            raise qubes.exc.QubesException(
                "ERROR: verify file {0}: {1}".format(filename, hmac_stderr))
        else:
            self.log.debug("Loading hmac for file {}".format(filename))
            hmac = load_hmac(open(os.path.join(self.tmpdir, hmacfile),
                'r', encoding='ascii').read())

            if len(hmac) > 0 and load_hmac(hmac_stdout.decode('ascii')) == hmac:
                os.unlink(os.path.join(self.tmpdir, hmacfile))
                self.log.debug(
                    "File verification OK -> Sending file {}".format(filename))
                return True
            else:
                raise qubes.exc.QubesException(
                    "ERROR: invalid hmac for file {0}: {1}. "
                    "Is the passphrase correct?".
                    format(filename, load_hmac(hmac_stdout.decode('ascii'))))

    def _verify_and_decrypt(self, filename, output=None):
        assert filename.endswith('.enc') or filename.endswith('.hmac')
        fullname = os.path.join(self.tmpdir, filename)
        (origname, _) = os.path.splitext(filename)
        if output:
            fulloutput = os.path.join(self.tmpdir, output)
        else:
            fulloutput = os.path.join(self.tmpdir, origname)
        if origname == HEADER_FILENAME:
            passphrase = u'{filename}!{passphrase}'.format(
                filename=origname,
                passphrase=self.passphrase)
        else:
            passphrase = u'{backup_id}!{filename}!{passphrase}'.format(
                backup_id=self.header_data.backup_id,
                filename=origname,
                passphrase=self.passphrase)
        p = launch_scrypt('dec', fullname, fulloutput, passphrase)
        (_, stderr) = p.communicate()
        if p.returncode != 0:
            os.unlink(fulloutput)
            raise qubes.exc.QubesException('failed to decrypt {}: {}'.format(
                fullname, stderr))
        # encrypted file is no longer needed
        os.unlink(fullname)
        return origname

    def _retrieve_backup_header_files(self, files, allow_none=False):
        (retrieve_proc, filelist_pipe, error_pipe) = \
            self._start_retrieval_process(
                files, len(files), 1024 * 1024)
        filelist = filelist_pipe.read()
        retrieve_proc_returncode = retrieve_proc.wait()
        if retrieve_proc in self.processes_to_kill_on_cancel:
            self.processes_to_kill_on_cancel.remove(retrieve_proc)
        extract_stderr = error_pipe.read(MAX_STDERR_BYTES)

        # wait for other processes (if any)
        for proc in self.processes_to_kill_on_cancel:
            if proc.wait() != 0:
                raise qubes.exc.QubesException(
                    "Backup header retrieval failed (exit code {})".format(
                        proc.wait())
                )

        if retrieve_proc_returncode != 0:
            if not filelist and 'Not found in archive' in extract_stderr:
                if allow_none:
                    return None
                else:
                    raise qubes.exc.QubesException(
                        "unable to read the qubes backup file {0} ({1}): {2}".format(
                            self.backup_location,
                            retrieve_proc.wait(),
                            extract_stderr
                        ))
        actual_files = filelist.decode('ascii').splitlines()
        if sorted(actual_files) != sorted(files):
            raise qubes.exc.QubesException(
                'unexpected files in archive: got {!r}, expected {!r}'.format(
                    actual_files, files
                ))
        for f in files:
            if not os.path.exists(os.path.join(self.tmpdir, f)):
                if allow_none:
                    return None
                else:
                    raise qubes.exc.QubesException(
                        'Unable to retrieve file {} from backup {}: {}'.format(
                            f, self.backup_location, extract_stderr
                        )
                    )
        return files

    def _retrieve_backup_header(self):
        """Retrieve backup header and qubes.xml. Only backup header is
        analyzed, qubes.xml is left as-is
        (not even verified/decrypted/uncompressed)

        :return header_data
        :rtype :py:class:`BackupHeader`
        """

        if not self.backup_vm and os.path.exists(
                os.path.join(self.backup_location, 'qubes.xml')):
            # backup format version 1 doesn't have header
            header_data = BackupHeader()
            header_data.version = 1
            return header_data

        header_files = self._retrieve_backup_header_files(
            ['backup-header', 'backup-header.hmac'], allow_none=True)

        if not header_files:
            # R2-Beta3 didn't have backup header, so if none is found,
            # assume it's version=2 and use values present at that time
            header_data = BackupHeader(
                version=2,
                # place explicitly this value, because it is what format_version
                # 2 have
                hmac_algorithm='SHA1',
                crypto_algorithm='aes-256-cbc',
                # TODO: set encrypted to something...
            )
        else:
            filename = HEADER_FILENAME
            hmacfile = HEADER_FILENAME + '.hmac'
            self.log.debug("Got backup header and hmac: {}, {}".format(
                filename, hmacfile))

            file_ok = False
            hmac_algorithm = DEFAULT_HMAC_ALGORITHM
            for hmac_algo in get_supported_hmac_algo(hmac_algorithm):
                try:
                    if self._verify_hmac(filename, hmacfile, hmac_algo):
                        file_ok = True
                        break
                except qubes.exc.QubesException as e:
                    self.log.debug(
                        'Failed to verify {} using {}: {}'.format(
                            hmacfile, hmac_algo, str(e)))
                    # Ignore exception here, try the next algo
                    pass
            if not file_ok:
                raise qubes.exc.QubesException(
                    "Corrupted backup header (hmac verification "
                    "failed). Is the password correct?")
            filename = os.path.join(self.tmpdir, filename)
            header_data = BackupHeader(open(filename, 'rb').read())
            os.unlink(filename)

        return header_data

    def _start_inner_extraction_worker(self, queue, relocate):
        """Start a worker process, extracting inner layer of bacup archive,
        extract them to :py:attr:`tmpdir`.
        End the data by pushing QUEUE_FINISHED or QUEUE_ERROR to the queue.

        :param queue :py:class:`Queue` object to handle files from
        """

        # Setup worker to extract encrypted data chunks to the restore dirs
        # Create the process here to pass it options extracted from
        # backup header
        extractor_params = {
            'queue': queue,
            'base_dir': self.tmpdir,
            'passphrase': self.passphrase,
            'encrypted': self.header_data.encrypted,
            'compressed': self.header_data.compressed,
            'crypto_algorithm': self.header_data.crypto_algorithm,
            'verify_only': self.options.verify_only,
            'progress_callback': self.progress_callback,
            'relocate': relocate,
        }
        self.log.debug('Starting extraction worker in {}, file relocation '
                       'map: {!r}'.format(self.tmpdir, relocate))
        format_version = self.header_data.version
        if format_version == 2:
            extract_proc = ExtractWorker2(**extractor_params)
        elif format_version in [3, 4]:
            extractor_params['compression_filter'] = \
                self.header_data.compression_filter
            if format_version == 4:
                # encryption already handled
                extractor_params['encrypted'] = False
            extract_proc = ExtractWorker3(**extractor_params)
        else:
            raise NotImplementedError(
                "Backup format version %d not supported" % format_version)
        extract_proc.start()
        return extract_proc

    def _process_qubes_xml(self):
        """Verify, unpack and load qubes.xml. Possibly convert its format if
        necessary. It expect that :py:attr:`header_data` is already populated,
        and :py:meth:`retrieve_backup_header` was called.
        """
        if self.header_data.version == 1:
            backup_app = qubes.core2migration.Core2Qubes(
                os.path.join(self.backup_location, 'qubes.xml'),
                offline_mode=True)
            return backup_app
        else:
            if self.header_data.version in [2, 3]:
                self._retrieve_backup_header_files(
                    ['qubes.xml.000', 'qubes.xml.000.hmac'])
                self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac")
            else:
                self._retrieve_backup_header_files(['qubes.xml.000.enc'])
                self._verify_and_decrypt('qubes.xml.000.enc')

            queue = Queue()
            queue.put("qubes.xml.000")
            queue.put(QUEUE_FINISHED)

        extract_proc = self._start_inner_extraction_worker(queue, None)
        extract_proc.join()
        if extract_proc.exitcode != 0:
            raise qubes.exc.QubesException(
                "unable to extract the qubes backup. "
                "Check extracting process errors.")

        if self.header_data.version in [2, 3]:
            backup_app = qubes.core2migration.Core2Qubes(
                os.path.join(self.tmpdir, 'qubes.xml'), offline_mode=True)
        else:
            backup_app = qubes.Qubes(os.path.join(self.tmpdir, 'qubes.xml'),
                offline_mode=True)
        # Not needed anymore - all the data stored in backup_app
        os.unlink(os.path.join(self.tmpdir, 'qubes.xml'))
        return backup_app

    def _restore_vm_dirs(self, vms_dirs, vms_size, relocate):
        # Currently each VM consists of at most 7 archives (count
        # file_to_backup calls in backup_prepare()), but add some safety
        # margin for further extensions. Each archive is divided into 100MB
        # chunks. Additionally each file have own hmac file. So assume upper
        # limit as 2*(10*COUNT_OF_VMS+TOTAL_SIZE/100MB)
        limit_count = str(2 * (10 * len(vms_dirs) +
                               int(vms_size / (100 * 1024 * 1024))))

        self.log.debug("Working in temporary dir:" + self.tmpdir)
        self.log.info(
            "Extracting data: " + size_to_human(vms_size) + " to restore")

        # retrieve backup from the backup stream (either VM, or dom0 file)
        (retrieve_proc, filelist_pipe, error_pipe) = \
            self._start_retrieval_process(
                vms_dirs, limit_count, vms_size)

        to_extract = Queue()

        # extract data retrieved by retrieve_proc
        extract_proc = self._start_inner_extraction_worker(
            to_extract, relocate)

        try:
            filename = None
            hmacfile = None
            nextfile = None
            while True:
                if self.canceled:
                    break
                if not extract_proc.is_alive():
                    retrieve_proc.terminate()
                    retrieve_proc.wait()
                    if retrieve_proc in self.processes_to_kill_on_cancel:
                        self.processes_to_kill_on_cancel.remove(retrieve_proc)
                    # wait for other processes (if any)
                    for proc in self.processes_to_kill_on_cancel:
                        proc.wait()
                    break
                if nextfile is not None:
                    filename = nextfile
                else:
                    filename = filelist_pipe.readline().decode('ascii').strip()

                self.log.debug("Getting new file:" + filename)

                if not filename or filename == "EOF":
                    break

                # if reading archive directly with tar, wait for next filename -
                # tar prints filename before processing it, so wait for
                # the next one to be sure that whole file was extracted
                if not self.backup_vm:
                    nextfile = filelist_pipe.readline().decode('ascii').strip()

                if self.header_data.version in [2, 3]:
                    if not self.backup_vm:
                        hmacfile = nextfile
                        nextfile = filelist_pipe.readline().\
                            decode('ascii').strip()
                    else:
                        hmacfile = filelist_pipe.readline().\
                            decode('ascii').strip()

                    if self.canceled:
                        break

                    self.log.debug("Getting hmac:" + hmacfile)
                    if not hmacfile or hmacfile == "EOF":
                        # Premature end of archive, either of tar1_command or
                        # vmproc exited with error
                        break
                else:  # self.header_data.version == 4
                    if not filename.endswith('.enc'):
                        raise qubes.exc.QubesException(
                            'Invalid file extension found in archive: {}'.
                            format(filename))

                if not any(map(lambda x: filename.startswith(x), vms_dirs)):
                    self.log.debug("Ignoring VM not selected for restore")
                    os.unlink(os.path.join(self.tmpdir, filename))
                    if hmacfile:
                        os.unlink(os.path.join(self.tmpdir, hmacfile))
                    continue

                if self.header_data.version in [2, 3]:
                    self._verify_hmac(filename, hmacfile)
                else:
                    # _verify_and_decrypt will write output to a file with
                    # '.enc' extension cut off. This is safe because:
                    # - `scrypt` tool will override output, so if the file was
                    # already there (received from the VM), it will be removed
                    # - incoming archive extraction will refuse to override
                    # existing file, so if `scrypt` already created one,
                    # it can not be manipulated by the VM
                    # - when the file is retrieved from the VM, it appears at
                    # the final form - if it's visible, VM have no longer
                    # influence over its content
                    #
                    # This all means that if the file was correctly verified
                    # + decrypted, we will surely access the right file
                    filename = self._verify_and_decrypt(filename)
                to_extract.put(os.path.join(self.tmpdir, filename))

            if self.canceled:
                raise BackupCanceledError("Restore canceled",
                                          tmpdir=self.tmpdir)

            if retrieve_proc.wait() != 0:
                raise qubes.exc.QubesException(
                    "unable to read the qubes backup file {0}: {1}"
                    .format(self.backup_location, error_pipe.read(
                        MAX_STDERR_BYTES)))
            # wait for other processes (if any)
            for proc in self.processes_to_kill_on_cancel:
                proc.wait()
                if proc.returncode != 0:
                    raise qubes.exc.QubesException(
                        "Backup completed, but VM receiving it reported an error "
                        "(exit code {})".format(proc.returncode))

            if filename and filename != "EOF":
                raise qubes.exc.QubesException(
                    "Premature end of archive, the last file was %s" % filename)
        except:
            to_extract.put(QUEUE_ERROR)
            extract_proc.join()
            raise
        else:
            to_extract.put(QUEUE_FINISHED)

        self.log.debug("Waiting for the extraction process to finish...")
        extract_proc.join()
        self.log.debug("Extraction process finished with code: {}".format(
            extract_proc.exitcode))
        if extract_proc.exitcode != 0:
            raise qubes.exc.QubesException(
                "unable to extract the qubes backup. "
                "Check extracting process errors.")

    def generate_new_name_for_conflicting_vm(self, orig_name, restore_info):
        number = 1
        if len(orig_name) > 29:
            orig_name = orig_name[0:29]
        new_name = orig_name
        while (new_name in restore_info.keys() or
               new_name in map(lambda x: x.name,
                               restore_info.values()) or
               new_name in self.app.domains):
            new_name = str('{}{}'.format(orig_name, number))
            number += 1
            if number == 100:
                # give up
                return None
        return new_name

    def restore_info_verify(self, restore_info):
        for vm in restore_info.keys():
            if vm in ['dom0']:
                continue

            vm_info = restore_info[vm]
            assert isinstance(vm_info, self.VMToRestore)

            vm_info.problems.clear()
            if vm in self.options.exclude:
                vm_info.problems.add(self.VMToRestore.EXCLUDED)

            if not self.options.verify_only and \
                    vm_info.name in self.app.domains:
                if self.options.rename_conflicting:
                    new_name = self.generate_new_name_for_conflicting_vm(
                        vm, restore_info
                    )
                    if new_name is not None:
                        vm_info.name = new_name
                    else:
                        vm_info.problems.add(self.VMToRestore.ALREADY_EXISTS)
                else:
                    vm_info.problems.add(self.VMToRestore.ALREADY_EXISTS)

            # check template
            if vm_info.template:
                template_name = vm_info.template
                try:
                    host_template = self.app.domains[template_name]
                except KeyError:
                    host_template = None
                if not host_template \
                        or not isinstance(host_template,
                        qubes.vm.templatevm.TemplateVM):
                    # Maybe the (custom) template is in the backup?
                    if not (template_name in restore_info.keys() and
                            restore_info[template_name].good_to_go and
                            isinstance(restore_info[template_name].vm,
                                qubes.vm.templatevm.TemplateVM)):
                        if self.options.use_default_template and \
                                self.app.default_template:
                            if vm_info.orig_template is None:
                                vm_info.orig_template = template_name
                            vm_info.template = self.app.default_template.name
                        else:
                            vm_info.problems.add(
                                self.VMToRestore.MISSING_TEMPLATE)

            # check netvm
            if not vm_info.vm.property_is_default('netvm') and vm_info.netvm:
                netvm_name = vm_info.netvm

                try:
                    netvm_on_host = self.app.domains[netvm_name]
                except KeyError:
                    netvm_on_host = None
                # No netvm on the host?
                if not ((netvm_on_host is not None)
                        and netvm_on_host.provides_network):

                    # Maybe the (custom) netvm is in the backup?
                    if not (netvm_name in restore_info.keys() and
                            restore_info[netvm_name].good_to_go and
                            restore_info[netvm_name].vm.provides_network):
                        if self.options.use_default_netvm:
                            vm_info.vm.netvm = qubes.property.DEFAULT
                        elif self.options.use_none_netvm:
                            vm_info.netvm = None
                        else:
                            vm_info.problems.add(self.VMToRestore.MISSING_NETVM)

            # check kernel
            if hasattr(vm_info.vm, 'kernel'):
                installed_kernels = os.listdir(os.path.join(
                    qubes.config.qubes_base_dir,
                    qubes.config.system_path['qubes_kernels_base_dir']))
                # if uses default kernel - do not validate it
                # allow kernel=None only for HVM,
                # otherwise require valid kernel
                if not (vm_info.vm.property_is_default('kernel')
                        or (not vm_info.vm.kernel and vm_info.vm.virt_mode == 'hvm')
                        or vm_info.vm.kernel in installed_kernels):
                    if self.options.use_default_kernel:
                        vm_info.vm.kernel = qubes.property.DEFAULT
                    else:
                        vm_info.problems.add(self.VMToRestore.MISSING_KERNEL)

        return restore_info

    def _is_vm_included_in_backup_v1(self, check_vm):
        if check_vm.qid == 0:
            return os.path.exists(
                os.path.join(self.backup_location, 'dom0-home'))

        # DisposableVM
        if check_vm.dir_path is None:
            return False

        backup_vm_dir_path = check_vm.dir_path.replace(
            qubes.config.system_path["qubes_base_dir"], self.backup_location)

        if os.path.exists(backup_vm_dir_path):
            return True
        else:
            return False

    @staticmethod
    def _is_vm_included_in_backup_v2(check_vm):
        if 'backup-content' in check_vm.features:
            return check_vm.features['backup-content']
        else:
            return False

    def _find_template_name(self, template):
        if template in self.options.replace_template:
            return self.options.replace_template[template]
        return template

    def _is_vm_included_in_backup(self, vm):
        if self.header_data.version == 1:
            return self._is_vm_included_in_backup_v1(vm)
        elif self.header_data.version in [2, 3, 4]:
            return self._is_vm_included_in_backup_v2(vm)
        else:
            raise qubes.exc.QubesException(
                "Unknown backup format version: {}".format(
                    self.header_data.version))

    def get_restore_info(self):
        # Format versions:
        # 1 - Qubes R1, Qubes R2 beta1, beta2
        #  2 - Qubes R2 beta3+

        vms_to_restore = {}

        for vm in self.backup_app.domains:
            if vm.qid == 0:
                # Handle dom0 as special case later
                continue
            if self._is_vm_included_in_backup(vm):
                self.log.debug("{} is included in backup".format(vm.name))

                vms_to_restore[vm.name] = self.VMToRestore(vm)

                if hasattr(vm, 'template'):
                    templatevm_name = self._find_template_name(
                        vm.template.name)
                    vms_to_restore[vm.name].template = templatevm_name

                # Set to None to not confuse QubesVm object from backup
                # collection with host collection (further in clone_attrs).
                vm.netvm = None

        vms_to_restore = self.restore_info_verify(vms_to_restore)

        # ...and dom0 home
        if self.options.dom0_home and \
                self._is_vm_included_in_backup(self.backup_app.domains[0]):
            vm = self.backup_app.domains[0]
            if self.header_data.version == 1:
                subdir = os.listdir(os.path.join(self.backup_location,
                    'dom0-home'))[0]
            else:
                subdir = None
            vms_to_restore['dom0'] = self.Dom0ToRestore(vm, subdir)
            local_user = grp.getgrnam('qubes').gr_mem[0]

            if vms_to_restore['dom0'].username != local_user:
                if not self.options.ignore_username_mismatch:
                    vms_to_restore['dom0'].problems.add(
                        self.Dom0ToRestore.USERNAME_MISMATCH)

        return vms_to_restore

    @staticmethod
    def get_restore_summary(restore_info):
        fields = {
            "qid": {"func": "vm.qid"},

            "name": {"func": "('[' if isinstance(vm, qubes.vm.templatevm.TemplateVM) else '')\
                     + ('{' if vm.provides_network else '')\
                     + vm.name \
                     + (']' if isinstance(vm, qubes.vm.templatevm.TemplateVM) else '')\
                     + ('}' if vm.provides_network else '')"},

            "type": {"func": "'Tpl' if isinstance(vm, qubes.vm.templatevm.TemplateVM) else \
                     'App' if isinstance(vm, qubes.vm.appvm.AppVM) else \
                     vm.__class__.__name__.replace('VM','')"},

            "updbl": {"func": "'Yes' if vm.updateable else ''"},

            "template": {"func": "'n/a' if not hasattr(vm, 'template') "
                                 "else vm_info.template"},

            "netvm": {"func": "('*' if vm.property_is_default('netvm') else '') +\
                        vm_info.netvm if vm_info.netvm is not None "
                              "else '-'"},

            "label": {"func": "vm.label.name"},
        }

        fields_to_display = ["name", "type", "template", "updbl",
            "netvm", "label"]

        # First calculate the maximum width of each field we want to display
        total_width = 0
        for f in fields_to_display:
            fields[f]["max_width"] = len(f)
            for vm_info in restore_info.values():
                if vm_info.vm:
                    # noinspection PyUnusedLocal
                    vm = vm_info.vm
                    l = len(str(eval(fields[f]["func"])))
                    if l > fields[f]["max_width"]:
                        fields[f]["max_width"] = l
            total_width += fields[f]["max_width"]

        summary = ""
        summary += "The following VMs are included in the backup:\n"
        summary += "\n"

        # Display the header
        for f in fields_to_display:
            # noinspection PyTypeChecker
            fmt = "{{0:-^{0}}}-+".format(fields[f]["max_width"] + 1)
            summary += fmt.format('-')
        summary += "\n"
        for f in fields_to_display:
            # noinspection PyTypeChecker
            fmt = "{{0:>{0}}} |".format(fields[f]["max_width"] + 1)
            summary += fmt.format(f)
        summary += "\n"
        for f in fields_to_display:
            # noinspection PyTypeChecker
            fmt = "{{0:-^{0}}}-+".format(fields[f]["max_width"] + 1)
            summary += fmt.format('-')
        summary += "\n"

        for vm_info in restore_info.values():
            assert isinstance(vm_info, BackupRestore.VMToRestore)
            # Skip non-VM here
            if not vm_info.vm:
                continue
            # noinspection PyUnusedLocal
            vm = vm_info.vm
            s = ""
            for f in fields_to_display:
                # noinspection PyTypeChecker
                fmt = "{{0:>{0}}} |".format(fields[f]["max_width"] + 1)
                s += fmt.format(eval(fields[f]["func"]))

            if BackupRestore.VMToRestore.EXCLUDED in vm_info.problems:
                s += " <-- Excluded from restore"
            elif BackupRestore.VMToRestore.ALREADY_EXISTS in vm_info.problems:
                s += " <-- A VM with the same name already exists on the host!"
            elif BackupRestore.VMToRestore.MISSING_TEMPLATE in \
                    vm_info.problems:
                s += " <-- No matching template on the host " \
                     "or in the backup found!"
            elif BackupRestore.VMToRestore.MISSING_NETVM in \
                    vm_info.problems:
                s += " <-- No matching netvm on the host " \
                     "or in the backup found!"
            else:
                if vm_info.orig_template:
                    s += " <-- Original template was '{}'".format(
                        vm_info.orig_template)
                if vm_info.name != vm_info.vm.name:
                    s += " <-- Will be renamed to '{}'".format(
                        vm_info.name)

            summary += s + "\n"

        if 'dom0' in restore_info.keys():
            s = ""
            for f in fields_to_display:
                # noinspection PyTypeChecker
                fmt = "{{0:>{0}}} |".format(fields[f]["max_width"] + 1)
                if f == "name":
                    s += fmt.format("Dom0")
                elif f == "type":
                    s += fmt.format("Home")
                else:
                    s += fmt.format("")
            if BackupRestore.Dom0ToRestore.USERNAME_MISMATCH in \
                    restore_info['dom0'].problems:
                s += " <-- username in backup and dom0 mismatch"

            summary += s + "\n"

        return summary

    def _restore_vm_dir_v1(self, src_dir, dst_dir):

        backup_src_dir = src_dir.replace(
            qubes.config.system_path["qubes_base_dir"], self.backup_location)

        # We prefer to use Linux's cp, because it nicely handles sparse files
        cp_retcode = subprocess.call(
            ["cp", "-rp", "--reflink=auto", backup_src_dir, dst_dir])
        if cp_retcode != 0:
            raise qubes.exc.QubesException(
                "*** Error while copying file {0} to {1}".format(backup_src_dir,
                                                                 dst_dir))

    @staticmethod
    def _templates_first(vms):
        def key_function(instance):
            if isinstance(instance, qubes.vm.BaseVM):
                return isinstance(instance, qubes.vm.templatevm.TemplateVM)
            elif hasattr(instance, 'vm'):
                return key_function(instance.vm)
            else:
                return 0
        return sorted(vms,
            key=key_function,
            reverse=True)

    def restore_do(self, restore_info):
        '''


        High level workflow:
        1. Create VMs object in host collection (qubes.xml)
        2. Create them on disk (vm.create_on_disk)
        3. Restore VM data, overriding/converting VM files
        4. Apply possible fixups and save qubes.xml

        :param restore_info:
        :return:
        '''

        # FIXME handle locking

        restore_info = self.restore_info_verify(restore_info)

        self._restore_vms_metadata(restore_info)

        # Perform VM restoration in backup order
        vms_dirs = []
        relocate = {}
        vms_size = 0
        for vm_info in self._templates_first(restore_info.values()):
            vm = vm_info.restored_vm
            if vm:
                vms_size += int(vm_info.size)
                vms_dirs.append(vm_info.subdir)
                relocate[vm_info.subdir.rstrip('/')] = vm.dir_path
                for name, volume in vm.volumes.items():
                    if not volume.save_on_stop:
                        continue
                    export_path = vm.storage.export(name)
                    backup_path = os.path.join(
                        vm_info.vm.dir_path, name + '.img')
                    if backup_path != export_path:
                        relocate[
                            os.path.join(vm_info.subdir, name + '.img')] = \
                            export_path

        if self.header_data.version >= 2:
            if 'dom0' in restore_info.keys() and \
                    restore_info['dom0'].good_to_go:
                vms_dirs.append(os.path.dirname(restore_info['dom0'].subdir))
                vms_size += restore_info['dom0'].size

            try:
                self._restore_vm_dirs(vms_dirs=vms_dirs, vms_size=vms_size,
                    relocate=relocate)
            except qubes.exc.QubesException:
                if self.options.verify_only:
                    raise
                else:
                    self.log.warning(
                        "Some errors occurred during data extraction, "
                        "continuing anyway to restore at least some "
                        "VMs")
        else:
            for vm_info in self._templates_first(restore_info.values()):
                vm = vm_info.restored_vm
                if vm:
                    try:
                        self._restore_vm_dir_v1(vm_info.vm.dir_path,
                            os.path.dirname(vm.dir_path))
                    except qubes.exc.QubesException as e:
                        if self.options.verify_only:
                            raise
                        else:
                            self.log.error(
                                "Failed to restore VM '{}': {}".format(
                                    vm.name, str(e)))
                            vm.remove_from_disk()
                            del self.app.domains[vm]

            if self.options.verify_only:
                self.log.warning(
                    "Backup verification not supported for this backup format.")

        if self.options.verify_only:
            shutil.rmtree(self.tmpdir)
            return

        for vm_info in self._templates_first(restore_info.values()):
            if not vm_info.restored_vm:
                continue
            try:
                vm_info.restored_vm.fire_event('domain-restore')
            except Exception as err:
                self.log.error("ERROR during appmenu restore: "
                               "{0}".format(err))
                self.log.warning(
                    "*** VM '{0}' will not have appmenus".format(vm_info.name))

            try:
                vm_info.restored_vm.storage.verify()
            except Exception as err:
                self.log.error("ERROR: {0}".format(err))
                if vm_info.restored_vm:
                    vm_info.restored_vm.remove_from_disk()
                    del self.app.domains[vm_info.restored_vm]

        self.app.save()

        if self.canceled:
            if self.header_data.version >= 2:
                raise BackupCanceledError("Restore canceled",
                                          tmpdir=self.tmpdir)
            else:
                raise BackupCanceledError("Restore canceled")

        # ... and dom0 home as last step
        if 'dom0' in restore_info.keys() and restore_info['dom0'].good_to_go:
            backup_path = restore_info['dom0'].subdir
            local_user = grp.getgrnam('qubes').gr_mem[0]
            home_dir = pwd.getpwnam(local_user).pw_dir
            if self.header_data.version == 1:
                backup_dom0_home_dir = os.path.join(self.backup_location,
                    backup_path)
            else:
                backup_dom0_home_dir = os.path.join(self.tmpdir, backup_path)
            restore_home_backupdir = "home-pre-restore-{0}".format(
                time.strftime("%Y-%m-%d-%H%M%S"))

            self.log.info(
                "Restoring home of user '{0}'...".format(local_user))
            self.log.info(
                "Existing files/dirs backed up in '{0}' dir".format(
                    restore_home_backupdir))
            os.mkdir(home_dir + '/' + restore_home_backupdir)
            for f in os.listdir(backup_dom0_home_dir):
                home_file = home_dir + '/' + f
                if os.path.exists(home_file):
                    os.rename(home_file,
                              home_dir + '/' + restore_home_backupdir + '/' + f)
                if self.header_data.version == 1:
                    subprocess.call(
                        ["cp", "-nrp", "--reflink=auto",
                            backup_dom0_home_dir + '/' + f, home_file])
                elif self.header_data.version >= 2:
                    shutil.move(backup_dom0_home_dir + '/' + f, home_file)
            retcode = subprocess.call(['sudo', 'chown', '-R',
                local_user, home_dir])
            if retcode != 0:
                self.log.error("*** Error while setting home directory owner")

        shutil.rmtree(self.tmpdir)
        self.log.info("-> Done. Please install updates for all the restored "
                      "templates.")

    def _restore_vms_metadata(self, restore_info):
        vms = {}
        for vm_info in restore_info.values():
            assert isinstance(vm_info, self.VMToRestore)
            if not vm_info.vm:
                continue
            if not vm_info.good_to_go:
                continue
            vm = vm_info.vm
            vms[vm.name] = vm

        # First load templates, then other VMs
        for vm in self._templates_first(vms.values()):
            if self.canceled:
                # only break the loop to save qubes.xml
                # with already restored VMs
                break
            self.log.info("-> Restoring {0}...".format(vm.name))
            kwargs = {}
            if hasattr(vm, 'template'):
                template = restore_info[vm.name].template
                # handle potentially renamed template
                if template in restore_info \
                        and restore_info[template].good_to_go:
                    template = restore_info[template].name
                kwargs['template'] = template

            new_vm = None
            vm_name = restore_info[vm.name].name

            try:
                # first only minimal set, later clone_properties
                # will be called
                cls = self.app.get_vm_class(vm.__class__.__name__)
                new_vm = self.app.add_new_vm(
                    cls,
                    name=vm_name,
                    label=vm.label,
                    installed_by_rpm=False,
                    **kwargs)
                if os.path.exists(new_vm.dir_path):
                    move_to_path = tempfile.mkdtemp('', os.path.basename(
                        new_vm.dir_path), os.path.dirname(new_vm.dir_path))
                    try:
                        os.rename(new_vm.dir_path, move_to_path)
                        self.log.warning(
                            "*** Directory {} already exists! It has "
                            "been moved to {}".format(new_vm.dir_path,
                                move_to_path))
                    except OSError:
                        self.log.error(
                            "*** Directory {} already exists and "
                            "cannot be moved!".format(new_vm.dir_path))
                        self.log.warning("Skipping VM {}...".format(
                            vm.name))
                        continue
            except Exception as err:
                self.log.error("ERROR: {0}".format(err))
                self.log.warning("*** Skipping VM: {0}".format(vm.name))
                if new_vm:
                    del self.app.domains[new_vm.qid]
                continue

            # remove no longer needed backup metadata
            if 'backup-content' in vm.features:
                del vm.features['backup-content']
                del vm.features['backup-size']
                del vm.features['backup-path']
            try:
                # exclude VM references - handled manually according to
                # restore options
                proplist = [prop for prop in new_vm.property_list()
                    if prop.clone and prop.__name__ not in
                          ['template', 'netvm', 'dispvm_netvm']]
                new_vm.clone_properties(vm, proplist=proplist)
            except Exception as err:
                self.log.error("ERROR: {0}".format(err))
                self.log.warning("*** Some VM property will not be "
                                 "restored")

            if not self.options.verify_only:
                try:
                    # have it here, to (maybe) patch storage config before
                    # creating child VMs (template first)
                    # TODO: adjust volumes config - especially size
                    new_vm.create_on_disk(pool=self.options.override_pool)
                except qubes.exc.QubesException as e:
                    self.log.warning("Failed to create VM {}: {}".format(
                        vm.name, str(e)))
                    del self.app.domains[new_vm]
                    continue

            restore_info[vm.name].restored_vm = new_vm

        # Set network dependencies - only non-default netvm setting
        for vm in vms.values():
            vm_info = restore_info[vm.name]
            vm_name = vm_info.name
            try:
                host_vm = self.app.domains[vm_name]
            except KeyError:
                # Failed/skipped VM
                continue

            if not vm.property_is_default('netvm'):
                if vm_info.netvm in restore_info:
                    host_vm.netvm = restore_info[vm_info.netvm].name
                else:
                    host_vm.netvm = vm_info.netvm

# vim:sw=4:et: