2013-11-25 05:41:13 +01:00
|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
#
|
|
|
|
# The Qubes OS Project, http://www.qubes-os.org
|
|
|
|
#
|
2015-05-03 14:45:01 +02:00
|
|
|
# Copyright (C) 2013-2015 Marek Marczykowski-Górecki
|
|
|
|
# <marmarek@invisiblethingslab.com>
|
2013-11-25 05:41:13 +01:00
|
|
|
# Copyright (C) 2013 Olivier Médoc <o_medoc@yahoo.fr>
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or
|
|
|
|
# modify it under the terms of the GNU General Public License
|
|
|
|
# as published by the Free Software Foundation; either version 2
|
|
|
|
# of the License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
2016-03-14 13:32:56 +01:00
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
2013-11-25 05:41:13 +01:00
|
|
|
#
|
|
|
|
#
|
2014-09-17 14:34:45 +02:00
|
|
|
from __future__ import unicode_literals
|
2016-03-13 13:49:42 +01:00
|
|
|
import itertools
|
2016-03-14 12:04:07 +01:00
|
|
|
import logging
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
import termios
|
|
|
|
|
2016-03-10 11:22:52 +01:00
|
|
|
from qubes.utils import size_to_human
|
2013-11-25 05:41:13 +01:00
|
|
|
import sys
|
2016-09-29 01:55:34 +02:00
|
|
|
import stat
|
2013-11-25 05:41:13 +01:00
|
|
|
import os
|
2014-09-17 14:40:26 +02:00
|
|
|
import fcntl
|
2013-11-25 05:41:13 +01:00
|
|
|
import subprocess
|
|
|
|
import re
|
|
|
|
import shutil
|
2013-11-26 16:45:51 +01:00
|
|
|
import tempfile
|
2013-11-25 05:41:13 +01:00
|
|
|
import time
|
2015-05-03 14:45:01 +02:00
|
|
|
import grp
|
|
|
|
import pwd
|
2014-01-10 03:31:15 +01:00
|
|
|
import errno
|
2014-03-10 04:29:14 +01:00
|
|
|
import datetime
|
2015-05-03 14:45:01 +02:00
|
|
|
from multiprocessing import Queue, Process
|
2016-03-10 11:22:52 +01:00
|
|
|
import qubes
|
2016-03-16 01:01:13 +01:00
|
|
|
import qubes.core2migration
|
2016-04-02 23:53:03 +02:00
|
|
|
import qubes.storage
|
2016-05-21 03:31:29 +02:00
|
|
|
import qubes.storage.file
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-04-02 23:51:37 +02:00
|
|
|
QUEUE_ERROR = "ERROR"
|
|
|
|
|
|
|
|
QUEUE_FINISHED = "FINISHED"
|
|
|
|
|
2014-01-15 03:53:45 +01:00
|
|
|
HEADER_FILENAME = 'backup-header'
|
2014-01-15 03:50:29 +01:00
|
|
|
DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc'
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
# 'scrypt' is not exactly HMAC algorithm, but a tool we use to
|
|
|
|
# integrity-protect the data
|
|
|
|
DEFAULT_HMAC_ALGORITHM = 'scrypt'
|
2014-09-26 03:44:17 +02:00
|
|
|
DEFAULT_COMPRESSION_FILTER = 'gzip'
|
2016-03-16 01:01:13 +01:00
|
|
|
CURRENT_BACKUP_FORMAT_VERSION = '4'
|
2014-01-15 03:35:12 +01:00
|
|
|
# Maximum size of error message get from process stderr (including VM process)
|
|
|
|
MAX_STDERR_BYTES = 1024
|
|
|
|
# header + qubes.xml max size
|
|
|
|
HEADER_QUBES_XML_MAX_SIZE = 1024 * 1024
|
|
|
|
|
2016-03-10 11:22:52 +01:00
|
|
|
BLKSIZE = 512
|
|
|
|
|
2016-04-02 23:53:56 +02:00
|
|
|
_re_alphanum = re.compile(r'^[A-Za-z0-9-]*$')
|
2016-03-10 11:22:52 +01:00
|
|
|
|
|
|
|
class BackupCanceledError(qubes.exc.QubesException):
|
2014-03-08 03:55:47 +01:00
|
|
|
def __init__(self, msg, tmpdir=None):
|
|
|
|
super(BackupCanceledError, self).__init__(msg)
|
|
|
|
self.tmpdir = tmpdir
|
|
|
|
|
2015-05-03 14:45:01 +02:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
class BackupHeader(object):
|
|
|
|
header_keys = {
|
|
|
|
'version': 'version',
|
|
|
|
'encrypted': 'encrypted',
|
|
|
|
'compressed': 'compressed',
|
|
|
|
'compression-filter': 'compression_filter',
|
|
|
|
'crypto-algorithm': 'crypto_algorithm',
|
|
|
|
'hmac-algorithm': 'hmac_algorithm',
|
2016-10-25 21:28:59 +02:00
|
|
|
'backup-id': 'backup_id'
|
2016-03-13 13:49:42 +01:00
|
|
|
}
|
2014-01-15 03:53:45 +01:00
|
|
|
bool_options = ['encrypted', 'compressed']
|
2014-09-26 03:24:19 +02:00
|
|
|
int_options = ['version']
|
2014-01-15 03:53:45 +01:00
|
|
|
|
2016-04-02 23:56:00 +02:00
|
|
|
def __init__(self,
|
|
|
|
header_data=None,
|
|
|
|
version=None,
|
|
|
|
encrypted=None,
|
|
|
|
compressed=None,
|
|
|
|
compression_filter=None,
|
|
|
|
hmac_algorithm=None,
|
2016-10-25 21:28:59 +02:00
|
|
|
crypto_algorithm=None,
|
|
|
|
backup_id=None):
|
2016-03-13 13:49:42 +01:00
|
|
|
# repeat the list to help code completion...
|
2016-04-02 23:56:00 +02:00
|
|
|
self.version = version
|
|
|
|
self.encrypted = encrypted
|
|
|
|
self.compressed = compressed
|
2016-03-13 13:49:42 +01:00
|
|
|
# Options introduced in backup format 3+, which always have a header,
|
|
|
|
# so no need for fallback in function parameter
|
2016-04-02 23:56:00 +02:00
|
|
|
self.compression_filter = compression_filter
|
|
|
|
self.hmac_algorithm = hmac_algorithm
|
|
|
|
self.crypto_algorithm = crypto_algorithm
|
2016-10-25 21:28:59 +02:00
|
|
|
self.backup_id = backup_id
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
if header_data is not None:
|
|
|
|
self.load(header_data)
|
|
|
|
|
|
|
|
def load(self, untrusted_header_text):
|
|
|
|
"""Parse backup header file.
|
|
|
|
|
|
|
|
:param untrusted_header_text: header content
|
|
|
|
:type untrusted_header_text: basestring
|
|
|
|
.. warning::
|
|
|
|
This function may be exposed to not yet verified header,
|
|
|
|
so is security critical.
|
|
|
|
"""
|
2014-03-08 03:55:47 +01:00
|
|
|
try:
|
2016-03-13 13:49:42 +01:00
|
|
|
untrusted_header_text = untrusted_header_text.decode('ascii')
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Non-ASCII characters in backup header")
|
|
|
|
for untrusted_line in untrusted_header_text.splitlines():
|
|
|
|
if untrusted_line.count('=') != 1:
|
|
|
|
raise qubes.exc.QubesException("Invalid backup header")
|
2016-04-02 23:56:00 +02:00
|
|
|
key, value = untrusted_line.strip().split('=', 1)
|
2016-04-02 23:53:56 +02:00
|
|
|
if not _re_alphanum.match(key):
|
2016-03-14 12:16:23 +01:00
|
|
|
raise qubes.exc.QubesException("Invalid backup header (key)")
|
2016-03-13 13:49:42 +01:00
|
|
|
if key not in self.header_keys.keys():
|
|
|
|
# Ignoring unknown option
|
|
|
|
continue
|
2016-04-02 23:53:56 +02:00
|
|
|
if not _re_alphanum.match(value):
|
2016-03-14 12:16:23 +01:00
|
|
|
raise qubes.exc.QubesException("Invalid backup header (value)")
|
2016-03-13 13:49:42 +01:00
|
|
|
if getattr(self, self.header_keys[key]) is not None:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Duplicated header line: {}".format(key))
|
|
|
|
if key in self.bool_options:
|
|
|
|
value = value.lower() in ["1", "true", "yes"]
|
|
|
|
elif key in self.int_options:
|
|
|
|
value = int(value)
|
|
|
|
setattr(self, self.header_keys[key], value)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
self.validate()
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def validate(self):
|
|
|
|
if self.version == 1:
|
|
|
|
# header not really present
|
|
|
|
pass
|
2016-03-16 01:01:13 +01:00
|
|
|
elif self.version in [2, 3, 4]:
|
2016-03-13 13:49:42 +01:00
|
|
|
expected_attrs = ['version', 'encrypted', 'compressed',
|
|
|
|
'hmac_algorithm']
|
|
|
|
if self.encrypted:
|
|
|
|
expected_attrs += ['crypto_algorithm']
|
|
|
|
if self.version >= 3 and self.compressed:
|
|
|
|
expected_attrs += ['compression_filter']
|
2016-10-25 21:28:59 +02:00
|
|
|
if self.version >= 4:
|
|
|
|
expected_attrs += ['backup_id']
|
2016-03-13 13:49:42 +01:00
|
|
|
for key in expected_attrs:
|
|
|
|
if getattr(self, key) is None:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Backup header lack '{}' info".format(key))
|
2013-11-27 03:19:23 +01:00
|
|
|
else:
|
2016-03-13 13:49:42 +01:00
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Unsupported backup version {}".format(self.version))
|
|
|
|
|
|
|
|
def save(self, filename):
|
|
|
|
with open(filename, "w") as f:
|
|
|
|
# make sure 'version' is the first key
|
|
|
|
f.write('version={}\n'.format(self.version))
|
|
|
|
for key, attr in self.header_keys.iteritems():
|
|
|
|
if key == 'version':
|
|
|
|
continue
|
|
|
|
if getattr(self, attr) is None:
|
|
|
|
continue
|
|
|
|
f.write("{!s}={!s}\n".format(key, getattr(self, attr)))
|
2015-05-03 14:45:01 +02:00
|
|
|
|
2016-03-14 13:34:17 +01:00
|
|
|
|
2014-01-13 04:27:19 +01:00
|
|
|
class SendWorker(Process):
|
2013-11-25 05:41:13 +01:00
|
|
|
def __init__(self, queue, base_dir, backup_stdout):
|
2014-01-13 04:27:19 +01:00
|
|
|
super(SendWorker, self).__init__()
|
2013-11-25 05:41:13 +01:00
|
|
|
self.queue = queue
|
|
|
|
self.base_dir = base_dir
|
|
|
|
self.backup_stdout = backup_stdout
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log = logging.getLogger('qubes.backup')
|
2013-11-25 05:41:13 +01:00
|
|
|
|
|
|
|
def run(self):
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Started sending thread")
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Moving to temporary dir".format(self.base_dir))
|
2013-11-25 05:41:13 +01:00
|
|
|
os.chdir(self.base_dir)
|
|
|
|
|
2015-05-03 14:45:01 +02:00
|
|
|
for filename in iter(self.queue.get, None):
|
2016-04-02 23:51:37 +02:00
|
|
|
if filename in (QUEUE_FINISHED, QUEUE_ERROR):
|
2013-11-25 05:41:13 +01:00
|
|
|
break
|
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Sending file {}".format(filename))
|
2013-11-25 05:41:13 +01:00
|
|
|
# This tar used for sending data out need to be as simple, as
|
|
|
|
# simple, as featureless as possible. It will not be
|
|
|
|
# verified before untaring.
|
|
|
|
tar_final_cmd = ["tar", "-cO", "--posix",
|
2015-05-03 14:45:01 +02:00
|
|
|
"-C", self.base_dir, filename]
|
|
|
|
final_proc = subprocess.Popen(tar_final_cmd,
|
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
stdout=self.backup_stdout)
|
2013-12-02 14:04:20 +01:00
|
|
|
if final_proc.wait() >= 2:
|
2015-11-25 00:59:54 +01:00
|
|
|
if self.queue.full():
|
|
|
|
# if queue is already full, remove some entry to wake up
|
|
|
|
# main thread, so it will be able to notice error
|
|
|
|
self.queue.get()
|
2015-05-03 14:45:01 +02:00
|
|
|
# handle only exit code 2 (tar fatal error) or
|
|
|
|
# greater (call failed?)
|
2016-03-10 11:22:52 +01:00
|
|
|
raise qubes.exc.QubesException(
|
2015-05-03 14:45:01 +02:00
|
|
|
"ERROR: Failed to write the backup, out of disk space? "
|
|
|
|
"Check console output or ~/.xsession-errors for details.")
|
2013-11-25 05:41:13 +01:00
|
|
|
|
|
|
|
# Delete the file as we don't need it anymore
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Removing file {}".format(filename))
|
2013-11-25 05:41:13 +01:00
|
|
|
os.remove(filename)
|
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Finished sending thread")
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2015-05-03 14:45:01 +02:00
|
|
|
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
def launch_proc_with_pty(args, stdin=None, stdout=None, stderr=None, echo=True):
|
|
|
|
"""Similar to pty.fork, but handle stdin/stdout according to parameters
|
|
|
|
instead of connecting to the pty
|
|
|
|
|
|
|
|
:return tuple (subprocess.Popen, pty_master)
|
|
|
|
"""
|
|
|
|
|
|
|
|
def set_ctty(ctty_fd, master_fd):
|
|
|
|
os.setsid()
|
|
|
|
os.close(master_fd)
|
|
|
|
fcntl.ioctl(ctty_fd, termios.TIOCSCTTY, 0)
|
|
|
|
if not echo:
|
|
|
|
termios_p = termios.tcgetattr(ctty_fd)
|
|
|
|
# termios_p.c_lflags
|
|
|
|
termios_p[3] &= ~termios.ECHO
|
|
|
|
termios.tcsetattr(ctty_fd, termios.TCSANOW, termios_p)
|
|
|
|
(pty_master, pty_slave) = os.openpty()
|
|
|
|
p = subprocess.Popen(args, stdin=stdin, stdout=stdout, stderr=stderr,
|
|
|
|
preexec_fn=lambda: set_ctty(pty_slave, pty_master))
|
|
|
|
os.close(pty_slave)
|
|
|
|
return p, os.fdopen(pty_master, 'w+')
|
|
|
|
|
|
|
|
|
|
|
|
def launch_scrypt(action, input_name, output_name, passphrase):
|
|
|
|
'''
|
|
|
|
Launch 'scrypt' process, pass passphrase to it and return
|
|
|
|
subprocess.Popen object.
|
|
|
|
|
|
|
|
:param action: 'enc' or 'dec'
|
|
|
|
:param input_name: input path or '-' for stdin
|
|
|
|
:param output_name: output path or '-' for stdout
|
|
|
|
:param passphrase: passphrase
|
|
|
|
:return: subprocess.Popen object
|
|
|
|
'''
|
|
|
|
command_line = ['scrypt', action, input_name, output_name]
|
|
|
|
(p, pty) = launch_proc_with_pty(command_line,
|
|
|
|
stdin=subprocess.PIPE if input_name == '-' else None,
|
|
|
|
stdout=subprocess.PIPE if output_name == '-' else None,
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
echo=False)
|
|
|
|
if action == 'enc':
|
|
|
|
prompts = ('Please enter passphrase: ', 'Please confirm passphrase: ')
|
|
|
|
else:
|
|
|
|
prompts = ('Please enter passphrase: ',)
|
|
|
|
for prompt in prompts:
|
|
|
|
actual_prompt = p.stderr.read(len(prompt))
|
|
|
|
if actual_prompt != prompt:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
'Unexpected prompt from scrypt: {}'.format(actual_prompt))
|
2016-10-26 20:45:24 +02:00
|
|
|
pty.write(passphrase.encode('utf-8') + b'\n')
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
pty.flush()
|
|
|
|
# save it here, so garbage collector would not close it (which would kill
|
|
|
|
# the child)
|
|
|
|
p.pty = pty
|
|
|
|
return p
|
|
|
|
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
class Backup(object):
|
2016-04-03 03:09:15 +02:00
|
|
|
class FileToBackup(object):
|
2016-10-05 01:55:30 +02:00
|
|
|
def __init__(self, file_path, subdir=None, name=None):
|
2016-05-21 03:31:29 +02:00
|
|
|
sz = qubes.storage.file.get_disk_usage(file_path)
|
2016-04-03 03:09:15 +02:00
|
|
|
|
|
|
|
if subdir is None:
|
|
|
|
abs_file_path = os.path.abspath(file_path)
|
|
|
|
abs_base_dir = os.path.abspath(
|
|
|
|
qubes.config.system_path["qubes_base_dir"]) + '/'
|
|
|
|
abs_file_dir = os.path.dirname(abs_file_path) + '/'
|
|
|
|
(nothing, directory, subdir) = abs_file_dir.partition(abs_base_dir)
|
|
|
|
assert nothing == ""
|
|
|
|
assert directory == abs_base_dir
|
|
|
|
else:
|
|
|
|
if len(subdir) > 0 and not subdir.endswith('/'):
|
|
|
|
subdir += '/'
|
|
|
|
|
2016-10-05 01:55:30 +02:00
|
|
|
#: real path to the file
|
2016-04-03 03:09:15 +02:00
|
|
|
self.path = file_path
|
2016-10-05 01:55:30 +02:00
|
|
|
#: size of the file
|
2016-04-03 03:09:15 +02:00
|
|
|
self.size = sz
|
2016-10-05 01:55:30 +02:00
|
|
|
#: directory in backup archive where file should be placed
|
2016-04-03 03:09:15 +02:00
|
|
|
self.subdir = subdir
|
2016-10-05 01:55:30 +02:00
|
|
|
#: use this name in the archive (aka rename)
|
|
|
|
self.name = os.path.basename(file_path)
|
|
|
|
if name is not None:
|
|
|
|
self.name = name
|
2016-04-03 03:09:15 +02:00
|
|
|
|
|
|
|
class VMToBackup(object):
|
|
|
|
def __init__(self, vm, files, subdir):
|
|
|
|
self.vm = vm
|
|
|
|
self.files = files
|
|
|
|
self.subdir = subdir
|
|
|
|
|
|
|
|
@property
|
|
|
|
def size(self):
|
|
|
|
return reduce(lambda x, y: x + y.size, self.files, 0)
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def __init__(self, app, vms_list=None, exclude_list=None, **kwargs):
|
|
|
|
"""
|
|
|
|
If vms = None, include all (sensible) VMs;
|
|
|
|
exclude_list is always applied
|
|
|
|
"""
|
|
|
|
super(Backup, self).__init__()
|
|
|
|
|
|
|
|
#: progress of the backup - bytes handled of the current VM
|
|
|
|
self.chunk_size = 100 * 1024 * 1024
|
|
|
|
self._current_vm_bytes = 0
|
|
|
|
#: progress of the backup - bytes handled of finished VMs
|
|
|
|
self._done_vms_bytes = 0
|
|
|
|
#: total backup size (set by :py:meth:`get_files_to_backup`)
|
|
|
|
self.total_backup_bytes = 0
|
2016-04-03 03:11:57 +02:00
|
|
|
#: application object
|
2016-03-13 13:49:42 +01:00
|
|
|
self.app = app
|
|
|
|
#: directory for temporary files - set after creating the directory
|
|
|
|
self.tmpdir = None
|
|
|
|
|
|
|
|
# Backup settings - defaults
|
|
|
|
#: should the backup be encrypted?
|
|
|
|
self.encrypted = True
|
|
|
|
#: should the backup be compressed?
|
|
|
|
self.compressed = True
|
|
|
|
#: what passphrase should be used to intergrity protect (and encrypt)
|
2016-04-03 03:11:57 +02:00
|
|
|
#: the backup; required
|
2016-03-13 13:49:42 +01:00
|
|
|
self.passphrase = None
|
|
|
|
#: custom hmac algorithm
|
|
|
|
self.hmac_algorithm = DEFAULT_HMAC_ALGORITHM
|
|
|
|
#: custom encryption algorithm
|
|
|
|
self.crypto_algorithm = DEFAULT_CRYPTO_ALGORITHM
|
|
|
|
#: custom compression filter; a program which process stdin to stdout
|
|
|
|
self.compression_filter = DEFAULT_COMPRESSION_FILTER
|
|
|
|
#: VM to which backup should be sent (if any)
|
|
|
|
self.target_vm = None
|
|
|
|
#: directory to save backup in (either in dom0 or target VM,
|
2016-04-03 03:11:57 +02:00
|
|
|
#: depending on :py:attr:`target_vm`
|
2016-03-13 13:49:42 +01:00
|
|
|
self.target_dir = None
|
|
|
|
#: callback for progress reporting. Will be called with one argument
|
2016-04-03 03:11:57 +02:00
|
|
|
#: - progress in percents
|
2016-03-13 13:49:42 +01:00
|
|
|
self.progress_callback = None
|
2016-10-25 21:28:59 +02:00
|
|
|
#: backup ID, needs to be unique (for a given user),
|
|
|
|
#: not necessary unpredictable; automatically generated
|
|
|
|
self.backup_id = datetime.datetime.now().strftime(
|
|
|
|
'%Y%m%dT%H%M%S-' + str(os.getpid()))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
for key, value in kwargs.iteritems():
|
|
|
|
if hasattr(self, key):
|
|
|
|
setattr(self, key, value)
|
|
|
|
else:
|
|
|
|
raise AttributeError(key)
|
|
|
|
|
|
|
|
#: whether backup was canceled
|
|
|
|
self.canceled = False
|
2016-04-03 03:11:57 +02:00
|
|
|
#: list of PIDs to kill on backup cancel
|
|
|
|
self.processes_to_kill_on_cancel = []
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log = logging.getLogger('qubes.backup')
|
|
|
|
|
2016-10-25 22:11:39 +02:00
|
|
|
if not self.encrypted:
|
|
|
|
self.log.warning('\'encrypted\' option is ignored, backup is '
|
|
|
|
'always encrypted')
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
if exclude_list is None:
|
|
|
|
exclude_list = []
|
|
|
|
|
|
|
|
if vms_list is None:
|
2016-04-03 03:13:27 +02:00
|
|
|
vms_list = [vm for vm in app.domains if vm.include_in_backups]
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# Apply exclude list
|
2016-04-02 23:56:00 +02:00
|
|
|
self.vms_for_backup = [vm for vm in vms_list
|
|
|
|
if vm.name not in exclude_list]
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-06-15 19:10:19 +02:00
|
|
|
self._files_to_backup = self.get_files_to_backup()
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def __del__(self):
|
|
|
|
if self.tmpdir and os.path.exists(self.tmpdir):
|
|
|
|
shutil.rmtree(self.tmpdir)
|
|
|
|
|
|
|
|
def cancel(self):
|
|
|
|
"""Cancel running backup operation. Can be called from another thread.
|
|
|
|
"""
|
|
|
|
self.canceled = True
|
|
|
|
for proc in self.processes_to_kill_on_cancel:
|
|
|
|
try:
|
|
|
|
proc.terminate()
|
2016-03-14 13:30:55 +01:00
|
|
|
except OSError:
|
2016-03-13 13:49:42 +01:00
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def get_files_to_backup(self):
|
|
|
|
files_to_backup = {}
|
|
|
|
for vm in self.vms_for_backup:
|
|
|
|
if vm.qid == 0:
|
|
|
|
# handle dom0 later
|
|
|
|
continue
|
|
|
|
|
|
|
|
if self.encrypted:
|
|
|
|
subdir = 'vm%d/' % vm.qid
|
|
|
|
else:
|
|
|
|
subdir = None
|
|
|
|
|
|
|
|
vm_files = []
|
2016-06-16 19:59:58 +02:00
|
|
|
if vm.volumes['private'] is not None:
|
2016-10-05 01:55:30 +02:00
|
|
|
path_to_private_img = vm.storage.export('private')
|
|
|
|
vm_files.append(self.FileToBackup(path_to_private_img, subdir,
|
|
|
|
'private.img'))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-04-03 03:09:15 +02:00
|
|
|
vm_files.append(self.FileToBackup(vm.icon_path, subdir))
|
|
|
|
vm_files.extend(self.FileToBackup(i, subdir)
|
|
|
|
for i in vm.fire_event('backup-get-files'))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
# TODO: drop after merging firewall.xml into qubes.xml
|
|
|
|
firewall_conf = os.path.join(vm.dir_path, vm.firewall_conf)
|
|
|
|
if os.path.exists(firewall_conf):
|
2016-04-03 03:09:15 +02:00
|
|
|
vm_files.append(self.FileToBackup(firewall_conf, subdir))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
if vm.updateable:
|
2016-10-05 01:55:30 +02:00
|
|
|
path_to_root_img = vm.storage.export('root')
|
|
|
|
vm_files.append(self.FileToBackup(path_to_root_img, subdir,
|
|
|
|
'root.img'))
|
2016-04-03 03:09:15 +02:00
|
|
|
files_to_backup[vm.qid] = self.VMToBackup(vm, vm_files, subdir)
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
# Dom0 user home
|
|
|
|
if 0 in [vm.qid for vm in self.vms_for_backup]:
|
|
|
|
local_user = grp.getgrnam('qubes').gr_mem[0]
|
|
|
|
home_dir = pwd.getpwnam(local_user).pw_dir
|
2016-03-14 13:34:17 +01:00
|
|
|
# Home dir should have only user-owned files, so fix it now
|
|
|
|
# to prevent permissions problems - some root-owned files can
|
|
|
|
# left after 'sudo bash' and similar commands
|
2016-03-13 13:49:42 +01:00
|
|
|
subprocess.check_call(['sudo', 'chown', '-R', local_user, home_dir])
|
|
|
|
|
|
|
|
home_to_backup = [
|
2016-04-03 03:09:15 +02:00
|
|
|
self.FileToBackup(home_dir, 'dom0-home/')]
|
2016-03-13 13:49:42 +01:00
|
|
|
vm_files = home_to_backup
|
|
|
|
|
2016-04-03 03:09:15 +02:00
|
|
|
files_to_backup[0] = self.VMToBackup(self.app.domains[0],
|
|
|
|
vm_files,
|
|
|
|
os.path.join('dom0-home', os.path.basename(home_dir)))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-04-03 03:09:15 +02:00
|
|
|
self.total_backup_bytes = reduce(
|
|
|
|
lambda x, y: x + y.size, files_to_backup.values(), 0)
|
2016-03-13 13:49:42 +01:00
|
|
|
return files_to_backup
|
|
|
|
|
|
|
|
|
|
|
|
def get_backup_summary(self):
|
|
|
|
summary = ""
|
|
|
|
|
|
|
|
fields_to_display = [
|
|
|
|
{"name": "VM", "width": 16},
|
|
|
|
{"name": "type", "width": 12},
|
|
|
|
{"name": "size", "width": 12}
|
|
|
|
]
|
|
|
|
|
|
|
|
# Display the header
|
|
|
|
for f in fields_to_display:
|
|
|
|
fmt = "{{0:-^{0}}}-+".format(f["width"] + 1)
|
|
|
|
summary += fmt.format('-')
|
|
|
|
summary += "\n"
|
|
|
|
for f in fields_to_display:
|
|
|
|
fmt = "{{0:>{0}}} |".format(f["width"] + 1)
|
|
|
|
summary += fmt.format(f["name"])
|
|
|
|
summary += "\n"
|
|
|
|
for f in fields_to_display:
|
|
|
|
fmt = "{{0:-^{0}}}-+".format(f["width"] + 1)
|
|
|
|
summary += fmt.format('-')
|
|
|
|
summary += "\n"
|
|
|
|
|
2016-06-15 19:10:19 +02:00
|
|
|
files_to_backup = self._files_to_backup
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
for qid, vm_info in files_to_backup.iteritems():
|
|
|
|
s = ""
|
|
|
|
fmt = "{{0:>{0}}} |".format(fields_to_display[0]["width"] + 1)
|
|
|
|
s += fmt.format(vm_info['vm'].name)
|
|
|
|
|
|
|
|
fmt = "{{0:>{0}}} |".format(fields_to_display[1]["width"] + 1)
|
|
|
|
if qid == 0:
|
|
|
|
s += fmt.format("User home")
|
2016-06-16 21:27:56 +02:00
|
|
|
elif isinstance(vm_info['vm'], qubes.vm.templatevm.TemplateVM):
|
2016-03-13 13:49:42 +01:00
|
|
|
s += fmt.format("Template VM")
|
|
|
|
else:
|
2016-03-14 13:34:17 +01:00
|
|
|
s += fmt.format("VM" + (" + Sys" if vm_info['vm'].updateable
|
|
|
|
else ""))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
vm_size = vm_info['size']
|
|
|
|
|
|
|
|
fmt = "{{0:>{0}}} |".format(fields_to_display[2]["width"] + 1)
|
|
|
|
s += fmt.format(size_to_human(vm_size))
|
|
|
|
|
|
|
|
if qid != 0 and vm_info['vm'].is_running():
|
|
|
|
s += " <-- The VM is running, please shut it down before proceeding " \
|
|
|
|
"with the backup!"
|
|
|
|
|
|
|
|
summary += s + "\n"
|
|
|
|
|
|
|
|
for f in fields_to_display:
|
|
|
|
fmt = "{{0:-^{0}}}-+".format(f["width"] + 1)
|
|
|
|
summary += fmt.format('-')
|
|
|
|
summary += "\n"
|
|
|
|
|
|
|
|
fmt = "{{0:>{0}}} |".format(fields_to_display[0]["width"] + 1)
|
|
|
|
summary += fmt.format("Total size:")
|
|
|
|
fmt = "{{0:>{0}}} |".format(
|
|
|
|
fields_to_display[1]["width"] + 1 + 2 + fields_to_display[2][
|
|
|
|
"width"] + 1)
|
|
|
|
summary += fmt.format(size_to_human(self.total_backup_bytes))
|
|
|
|
summary += "\n"
|
|
|
|
|
|
|
|
for f in fields_to_display:
|
|
|
|
fmt = "{{0:-^{0}}}-+".format(f["width"] + 1)
|
|
|
|
summary += fmt.format('-')
|
|
|
|
summary += "\n"
|
|
|
|
|
|
|
|
vms_not_for_backup = [vm.name for vm in self.app.domains
|
|
|
|
if vm not in self.vms_for_backup]
|
|
|
|
summary += "VMs not selected for backup:\n - " + "\n - ".join(
|
2016-04-11 13:03:12 +02:00
|
|
|
sorted(vms_not_for_backup))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
return summary
|
|
|
|
|
|
|
|
def prepare_backup_header(self):
|
|
|
|
header_file_path = os.path.join(self.tmpdir, HEADER_FILENAME)
|
2016-04-02 23:56:00 +02:00
|
|
|
backup_header = BackupHeader(
|
|
|
|
version=CURRENT_BACKUP_FORMAT_VERSION,
|
|
|
|
hmac_algorithm=self.hmac_algorithm,
|
|
|
|
crypto_algorithm=self.crypto_algorithm,
|
|
|
|
encrypted=self.encrypted,
|
|
|
|
compressed=self.compressed,
|
|
|
|
compression_filter=self.compression_filter,
|
2016-10-25 21:28:59 +02:00
|
|
|
backup_id=self.backup_id,
|
2016-04-02 23:56:00 +02:00
|
|
|
)
|
2016-03-13 13:49:42 +01:00
|
|
|
backup_header.save(header_file_path)
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
# Start encrypt, scrypt will also handle integrity
|
|
|
|
# protection
|
2016-10-26 20:45:24 +02:00
|
|
|
scrypt_passphrase = u'{filename}!{passphrase}'.format(
|
|
|
|
filename=HEADER_FILENAME, passphrase=self.passphrase)
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
scrypt = launch_scrypt(
|
|
|
|
'enc', header_file_path, header_file_path + '.hmac',
|
|
|
|
scrypt_passphrase)
|
|
|
|
|
|
|
|
if scrypt.wait() != 0:
|
2016-03-13 13:49:42 +01:00
|
|
|
raise qubes.exc.QubesException(
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
"Failed to compute hmac of header file: "
|
|
|
|
+ scrypt.stderr.read())
|
2016-03-13 13:49:42 +01:00
|
|
|
return HEADER_FILENAME, HEADER_FILENAME + ".hmac"
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _queue_put_with_check(proc, vmproc, queue, element):
|
2015-11-25 00:59:54 +01:00
|
|
|
if queue.full():
|
|
|
|
if not proc.is_alive():
|
|
|
|
if vmproc:
|
|
|
|
message = ("Failed to write the backup, VM output:\n" +
|
|
|
|
vmproc.stderr.read())
|
|
|
|
else:
|
|
|
|
message = "Failed to write the backup. Out of disk space?"
|
2016-03-10 11:22:52 +01:00
|
|
|
raise qubes.exc.QubesException(message)
|
2015-11-25 00:59:54 +01:00
|
|
|
queue.put(element)
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def _send_progress_update(self):
|
|
|
|
if callable(self.progress_callback):
|
|
|
|
progress = (
|
|
|
|
100 * (self._done_vms_bytes + self._current_vm_bytes) /
|
|
|
|
self.total_backup_bytes)
|
|
|
|
self.progress_callback(progress)
|
2014-03-17 21:15:39 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def _add_vm_progress(self, bytes_done):
|
|
|
|
self._current_vm_bytes += bytes_done
|
|
|
|
self._send_progress_update()
|
|
|
|
|
|
|
|
def backup_do(self):
|
|
|
|
if self.passphrase is None:
|
|
|
|
raise qubes.exc.QubesException("No passphrase set")
|
2016-04-02 23:47:41 +02:00
|
|
|
qubes_xml = self.app.store
|
2016-03-13 13:49:42 +01:00
|
|
|
self.tmpdir = tempfile.mkdtemp()
|
|
|
|
shutil.copy(qubes_xml, os.path.join(self.tmpdir, 'qubes.xml'))
|
|
|
|
qubes_xml = os.path.join(self.tmpdir, 'qubes.xml')
|
|
|
|
backup_app = qubes.Qubes(qubes_xml)
|
|
|
|
|
2016-06-15 19:10:19 +02:00
|
|
|
files_to_backup = self._files_to_backup
|
2016-03-13 13:49:42 +01:00
|
|
|
# make sure backup_content isn't set initially
|
|
|
|
for vm in backup_app.domains:
|
2016-04-03 03:33:58 +02:00
|
|
|
vm.features['backup-content'] = False
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
for qid, vm_info in files_to_backup.iteritems():
|
2016-04-03 03:09:15 +02:00
|
|
|
if qid != 0 and vm_info.vm.is_running():
|
|
|
|
raise qubes.exc.QubesVMNotHaltedError(vm_info.vm)
|
2016-03-13 13:49:42 +01:00
|
|
|
# VM is included in the backup
|
2016-04-03 03:33:58 +02:00
|
|
|
backup_app.domains[qid].features['backup-content'] = True
|
|
|
|
backup_app.domains[qid].features['backup-path'] = vm_info.subdir
|
|
|
|
backup_app.domains[qid].features['backup-size'] = vm_info.size
|
2016-03-13 13:49:42 +01:00
|
|
|
backup_app.save()
|
|
|
|
|
|
|
|
vmproc = None
|
|
|
|
tar_sparse = None
|
|
|
|
if self.target_vm is not None:
|
|
|
|
# Prepare the backup target (Qubes service call)
|
|
|
|
# If APPVM, STDOUT is a PIPE
|
2016-04-03 03:19:30 +02:00
|
|
|
vmproc = self.target_vm.run_service('qubes.Backup',
|
|
|
|
passio_popen=True, passio_stderr=True)
|
2016-03-13 13:49:42 +01:00
|
|
|
vmproc.stdin.write(self.target_dir.
|
|
|
|
replace("\r", "").replace("\n", "") + "\n")
|
|
|
|
backup_stdout = vmproc.stdin
|
|
|
|
self.processes_to_kill_on_cancel.append(vmproc)
|
|
|
|
else:
|
|
|
|
# Prepare the backup target (local file)
|
|
|
|
if os.path.isdir(self.target_dir):
|
|
|
|
backup_target = self.target_dir + "/qubes-{0}". \
|
|
|
|
format(time.strftime("%Y-%m-%dT%H%M%S"))
|
|
|
|
else:
|
|
|
|
backup_target = self.target_dir
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# Create the target directory
|
|
|
|
if not os.path.exists(os.path.dirname(self.target_dir)):
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"ERROR: the backup directory for {0} does not exists".
|
|
|
|
format(self.target_dir))
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# If not APPVM, STDOUT is a local file
|
|
|
|
backup_stdout = open(backup_target, 'wb')
|
2014-01-15 03:53:45 +01:00
|
|
|
|
2016-03-14 13:34:17 +01:00
|
|
|
# Tar with tape length does not deals well with stdout
|
|
|
|
# (close stdout between two tapes)
|
2016-03-13 13:49:42 +01:00
|
|
|
# For this reason, we will use named pipes instead
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Working in {}".format(self.tmpdir))
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
backup_pipe = os.path.join(self.tmpdir, "backup_pipe")
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Creating pipe in: {}".format(backup_pipe))
|
2016-03-13 13:49:42 +01:00
|
|
|
os.mkfifo(backup_pipe)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Will backup: {}".format(files_to_backup))
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
header_files = self.prepare_backup_header()
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# Setup worker to send encrypted data chunks to the backup_target
|
|
|
|
to_send = Queue(10)
|
|
|
|
send_proc = SendWorker(to_send, self.tmpdir, backup_stdout)
|
|
|
|
send_proc.start()
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
for f in header_files:
|
|
|
|
to_send.put(f)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-04-03 03:09:15 +02:00
|
|
|
qubes_xml_info = self.VMToBackup(
|
|
|
|
None,
|
|
|
|
[self.FileToBackup(qubes_xml, '')],
|
|
|
|
''
|
|
|
|
)
|
2016-03-13 13:49:42 +01:00
|
|
|
for vm_info in itertools.chain([qubes_xml_info],
|
2016-06-15 19:10:19 +02:00
|
|
|
files_to_backup.itervalues()):
|
2016-04-03 03:09:15 +02:00
|
|
|
for file_info in vm_info.files:
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Backing up {}".format(file_info))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-03-14 13:34:17 +01:00
|
|
|
backup_tempfile = os.path.join(
|
2016-04-03 03:09:15 +02:00
|
|
|
self.tmpdir, file_info.subdir,
|
2016-10-05 01:55:30 +02:00
|
|
|
file_info.name)
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Using temporary location: {}".format(
|
|
|
|
backup_tempfile))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
# Ensure the temporary directory exists
|
|
|
|
if not os.path.isdir(os.path.dirname(backup_tempfile)):
|
|
|
|
os.makedirs(os.path.dirname(backup_tempfile))
|
|
|
|
|
2016-03-14 13:34:17 +01:00
|
|
|
# The first tar cmd can use any complex feature as we want.
|
|
|
|
# Files will be verified before untaring this.
|
2016-03-13 13:49:42 +01:00
|
|
|
# Prefix the path in archive with filename["subdir"] to have it
|
|
|
|
# verified during untar
|
|
|
|
tar_cmdline = (["tar", "-Pc", '--sparse',
|
|
|
|
"-f", backup_pipe,
|
2016-04-03 03:09:15 +02:00
|
|
|
'-C', os.path.dirname(file_info.path)] +
|
2016-03-14 13:34:17 +01:00
|
|
|
(['--dereference'] if
|
2016-04-03 03:09:15 +02:00
|
|
|
file_info.subdir != "dom0-home/" else []) +
|
2016-10-05 01:55:30 +02:00
|
|
|
['--xform=s:^%s:%s\\0:' % (
|
2016-04-03 03:09:15 +02:00
|
|
|
os.path.basename(file_info.path),
|
|
|
|
file_info.subdir),
|
|
|
|
os.path.basename(file_info.path)
|
2016-03-14 13:34:17 +01:00
|
|
|
])
|
2016-10-05 01:55:30 +02:00
|
|
|
file_stat = os.stat(file_info.path)
|
|
|
|
if stat.S_ISBLK(file_stat.st_mode) or \
|
|
|
|
file_info.name != os.path.basename(file_info.path):
|
|
|
|
# tar doesn't handle content of block device, use our
|
|
|
|
# writer
|
|
|
|
# also use our tar writer when renaming file
|
|
|
|
assert not stat.S_ISDIR(file_stat.st_mode),\
|
|
|
|
"Renaming directories not supported"
|
|
|
|
tar_cmdline = ['python', '-m', 'qubes.tarwriter',
|
|
|
|
'--override-name=%s' % (
|
|
|
|
os.path.join(file_info.subdir, os.path.basename(
|
|
|
|
file_info.name))),
|
|
|
|
file_info.path,
|
|
|
|
backup_pipe]
|
2016-03-13 13:49:42 +01:00
|
|
|
if self.compressed:
|
2016-10-05 01:55:30 +02:00
|
|
|
tar_cmdline.insert(-2,
|
2016-03-13 13:49:42 +01:00
|
|
|
"--use-compress-program=%s" % self.compression_filter)
|
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug(" ".join(tar_cmdline))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
# Pipe: tar-sparse | scrypt | tar | backup_target
|
2016-03-14 12:04:07 +01:00
|
|
|
# TODO: log handle stderr
|
|
|
|
tar_sparse = subprocess.Popen(
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
tar_cmdline)
|
2016-03-13 13:49:42 +01:00
|
|
|
self.processes_to_kill_on_cancel.append(tar_sparse)
|
|
|
|
|
2016-03-14 13:34:17 +01:00
|
|
|
# Wait for compressor (tar) process to finish or for any
|
|
|
|
# error of other subprocesses
|
2016-03-13 13:49:42 +01:00
|
|
|
i = 0
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
pipe = open(backup_pipe, 'rb')
|
2016-03-13 13:49:42 +01:00
|
|
|
run_error = "paused"
|
|
|
|
while run_error == "paused":
|
|
|
|
# Prepare a first chunk
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
chunkfile = backup_tempfile + ".%03d.enc" % i
|
2016-03-13 13:49:42 +01:00
|
|
|
i += 1
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
|
|
|
|
# Start encrypt, scrypt will also handle integrity
|
|
|
|
# protection
|
2016-10-25 21:28:59 +02:00
|
|
|
scrypt_passphrase = \
|
2016-10-26 20:45:24 +02:00
|
|
|
u'{backup_id}!{filename}!{passphrase}'.format(
|
2016-10-25 21:28:59 +02:00
|
|
|
backup_id=self.backup_id,
|
|
|
|
filename=os.path.relpath(chunkfile[:-4],
|
|
|
|
self.tmpdir),
|
2016-10-26 20:45:24 +02:00
|
|
|
passphrase=self.passphrase)
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
scrypt = launch_scrypt(
|
|
|
|
"enc", "-", chunkfile, scrypt_passphrase)
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-10-19 22:41:49 +02:00
|
|
|
run_error = handle_streams(
|
|
|
|
pipe,
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
{'backup_target': scrypt.stdin},
|
|
|
|
{'vmproc': vmproc,
|
2016-10-19 22:41:49 +02:00
|
|
|
'addproc': tar_sparse,
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
'scrypt': scrypt,
|
2016-10-19 22:41:49 +02:00
|
|
|
},
|
|
|
|
self.chunk_size,
|
|
|
|
self._add_vm_progress
|
|
|
|
)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug(
|
2016-10-19 22:41:49 +02:00
|
|
|
"12 returned: {}".format(run_error))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
if self.canceled:
|
|
|
|
try:
|
|
|
|
tar_sparse.terminate()
|
2016-03-14 13:30:55 +01:00
|
|
|
except OSError:
|
2016-03-13 13:49:42 +01:00
|
|
|
pass
|
|
|
|
tar_sparse.wait()
|
2016-04-02 23:51:37 +02:00
|
|
|
to_send.put(QUEUE_ERROR)
|
2016-03-13 13:49:42 +01:00
|
|
|
send_proc.join()
|
|
|
|
shutil.rmtree(self.tmpdir)
|
|
|
|
raise BackupCanceledError("Backup canceled")
|
|
|
|
if run_error and run_error != "size_limit":
|
|
|
|
send_proc.terminate()
|
|
|
|
if run_error == "VM" and vmproc:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Failed to write the backup, VM output:\n" +
|
|
|
|
vmproc.stderr.read(MAX_STDERR_BYTES))
|
|
|
|
else:
|
2016-03-14 13:34:17 +01:00
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Failed to perform backup: error in " +
|
|
|
|
run_error)
|
2016-03-13 13:49:42 +01:00
|
|
|
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
scrypt.stdin.close()
|
|
|
|
scrypt.wait()
|
|
|
|
self.log.debug("scrypt return code: {}".format(
|
|
|
|
scrypt.poll()))
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# Send the chunk to the backup target
|
|
|
|
self._queue_put_with_check(
|
|
|
|
send_proc, vmproc, to_send,
|
|
|
|
os.path.relpath(chunkfile, self.tmpdir))
|
|
|
|
|
|
|
|
if tar_sparse.poll() is None or run_error == "size_limit":
|
|
|
|
run_error = "paused"
|
|
|
|
else:
|
|
|
|
self.processes_to_kill_on_cancel.remove(tar_sparse)
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug(
|
|
|
|
"Finished tar sparse with exit code {}".format(
|
|
|
|
tar_sparse.poll()))
|
2016-03-13 13:49:42 +01:00
|
|
|
pipe.close()
|
2014-03-08 03:55:47 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# This VM done, update progress
|
2016-04-03 03:09:15 +02:00
|
|
|
self._done_vms_bytes += vm_info.size
|
2016-03-13 13:49:42 +01:00
|
|
|
self._current_vm_bytes = 0
|
|
|
|
self._send_progress_update()
|
2016-04-03 03:33:58 +02:00
|
|
|
# Save date of last backup
|
|
|
|
if vm_info.vm:
|
|
|
|
vm_info.vm.backup_timestamp = datetime.datetime.now()
|
2014-03-08 03:55:47 +01:00
|
|
|
|
2016-04-02 23:51:37 +02:00
|
|
|
self._queue_put_with_check(send_proc, vmproc, to_send, QUEUE_FINISHED)
|
2016-03-13 13:49:42 +01:00
|
|
|
send_proc.join()
|
|
|
|
shutil.rmtree(self.tmpdir)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
if self.canceled:
|
|
|
|
raise BackupCanceledError("Backup canceled")
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
if send_proc.exitcode != 0:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Failed to send backup: error in the sending process")
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
if vmproc:
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("VMProc1 proc return code: {}".format(vmproc.poll()))
|
|
|
|
if tar_sparse is not None:
|
|
|
|
self.log.debug("Sparse1 proc return code: {}".format(
|
|
|
|
tar_sparse.poll()))
|
2016-03-13 13:49:42 +01:00
|
|
|
vmproc.stdin.close()
|
|
|
|
|
|
|
|
self.app.save()
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2015-05-03 14:45:01 +02:00
|
|
|
|
2016-10-19 22:41:49 +02:00
|
|
|
def handle_streams(stream_in, streams_out, processes, size_limit=None,
|
|
|
|
progress_callback=None):
|
2016-04-07 12:46:57 +02:00
|
|
|
'''
|
2016-10-19 22:41:49 +02:00
|
|
|
Copy stream_in to all streams_out and monitor all mentioned processes.
|
|
|
|
If any of them terminate with non-zero code, interrupt the process. Copy
|
|
|
|
at most `size_limit` data (if given).
|
|
|
|
|
|
|
|
:param stream_in: file-like object to read data from
|
|
|
|
:param streams_out: dict of file-like objects to write data to
|
|
|
|
:param processes: dict of subprocess.Popen objects to monitor
|
|
|
|
:param size_limit: int maximum data amount to process
|
|
|
|
:param progress_callback: callable function to report progress, will be
|
|
|
|
given copied data size (it should accumulate internally)
|
|
|
|
:return: failed process name, failed stream name, "size_limit" or None (
|
|
|
|
no error)
|
2016-04-07 12:46:57 +02:00
|
|
|
'''
|
|
|
|
buffer_size = 409600
|
2014-09-26 03:24:19 +02:00
|
|
|
bytes_copied = 0
|
2016-10-19 22:41:49 +02:00
|
|
|
while True:
|
|
|
|
if size_limit:
|
|
|
|
to_copy = min(buffer_size, size_limit - bytes_copied)
|
|
|
|
if to_copy <= 0:
|
|
|
|
return "size_limit"
|
|
|
|
else:
|
|
|
|
to_copy = buffer_size
|
|
|
|
buf = stream_in.read(to_copy)
|
|
|
|
if not len(buf):
|
|
|
|
# done
|
|
|
|
return None
|
2016-04-07 12:46:57 +02:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
if callable(progress_callback):
|
|
|
|
progress_callback(len(buf))
|
2016-10-19 22:41:49 +02:00
|
|
|
for name, stream in streams_out.items():
|
|
|
|
if stream is None:
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
stream.write(buf)
|
|
|
|
except IOError:
|
|
|
|
return name
|
2015-05-03 14:56:30 +02:00
|
|
|
bytes_copied += len(buf)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-10-19 22:41:49 +02:00
|
|
|
for name, proc in processes.items():
|
|
|
|
if proc is None:
|
|
|
|
continue
|
|
|
|
if proc.poll():
|
|
|
|
return name
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-14 13:34:17 +01:00
|
|
|
|
2014-09-26 03:24:19 +02:00
|
|
|
class ExtractWorker2(Process):
|
2016-03-13 13:49:42 +01:00
|
|
|
def __init__(self, queue, base_dir, passphrase, encrypted,
|
2016-03-14 12:04:07 +01:00
|
|
|
progress_callback, vmproc=None,
|
2015-05-03 14:45:01 +02:00
|
|
|
compressed=False, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM,
|
2016-09-29 01:52:31 +02:00
|
|
|
verify_only=False, relocate=None):
|
2014-09-26 03:24:19 +02:00
|
|
|
super(ExtractWorker2, self).__init__()
|
2016-09-29 01:52:31 +02:00
|
|
|
#: queue with files to extract
|
2013-11-25 05:41:13 +01:00
|
|
|
self.queue = queue
|
2016-09-29 01:52:31 +02:00
|
|
|
#: paths on the queue are relative to this dir
|
2013-11-25 05:41:13 +01:00
|
|
|
self.base_dir = base_dir
|
2016-09-29 01:52:31 +02:00
|
|
|
#: passphrase to decrypt/authenticate data
|
2013-11-25 05:41:13 +01:00
|
|
|
self.passphrase = passphrase
|
2016-09-29 01:52:31 +02:00
|
|
|
#: extract those files/directories to alternative locations (truncate,
|
|
|
|
# but not unlink target beforehand); if specific file is in the map,
|
|
|
|
# redirect it accordingly, otherwise check if the whole directory is
|
|
|
|
# there
|
|
|
|
self.relocate = relocate
|
|
|
|
#: is the backup encrypted?
|
2013-11-25 05:41:13 +01:00
|
|
|
self.encrypted = encrypted
|
2016-09-29 01:52:31 +02:00
|
|
|
#: is the backup compressed?
|
2013-12-02 14:05:41 +01:00
|
|
|
self.compressed = compressed
|
2016-09-29 01:52:31 +02:00
|
|
|
#: what crypto algorithm is used for encryption?
|
2014-01-15 03:50:29 +01:00
|
|
|
self.crypto_algorithm = crypto_algorithm
|
2016-09-29 01:52:31 +02:00
|
|
|
#: only verify integrity, don't extract anything
|
2014-09-17 13:54:33 +02:00
|
|
|
self.verify_only = verify_only
|
2016-09-29 01:52:31 +02:00
|
|
|
#: progress
|
2013-11-25 05:41:13 +01:00
|
|
|
self.blocks_backedup = 0
|
2016-09-29 01:52:31 +02:00
|
|
|
#: inner tar layer extraction (subprocess.Popen instance)
|
2014-01-10 03:23:51 +01:00
|
|
|
self.tar2_process = None
|
2016-09-29 01:52:31 +02:00
|
|
|
#: current inner tar archive name
|
2013-11-25 05:41:13 +01:00
|
|
|
self.tar2_current_file = None
|
2016-09-29 01:52:31 +02:00
|
|
|
#: set size of this file when tar report it on stderr (adjust LVM
|
|
|
|
# volume size)
|
|
|
|
self.adjust_output_size = None
|
|
|
|
#: decompressor subprocess.Popen instance
|
2014-01-10 03:23:51 +01:00
|
|
|
self.decompressor_process = None
|
2016-09-29 01:52:31 +02:00
|
|
|
#: decryptor subprocess.Popen instance
|
2014-01-10 03:23:51 +01:00
|
|
|
self.decryptor_process = None
|
2016-09-29 01:52:31 +02:00
|
|
|
#: callback reporting progress to UI
|
2013-11-25 05:41:13 +01:00
|
|
|
self.progress_callback = progress_callback
|
2016-09-29 01:52:31 +02:00
|
|
|
#: process (subprocess.Popen instance) feeding the data into
|
|
|
|
# extraction tool
|
2013-11-25 05:41:13 +01:00
|
|
|
self.vmproc = vmproc
|
|
|
|
|
2016-09-29 01:52:31 +02:00
|
|
|
#: pipe to feed the data into tar (use pipe instead of stdin,
|
|
|
|
# as stdin is used for tar control commands)
|
2015-05-03 14:45:01 +02:00
|
|
|
self.restore_pipe = os.path.join(self.base_dir, "restore_pipe")
|
2016-03-14 12:04:07 +01:00
|
|
|
|
|
|
|
self.log = logging.getLogger('qubes.backup.extract')
|
|
|
|
self.log.debug("Creating pipe in: {}".format(self.restore_pipe))
|
2013-11-25 05:41:13 +01:00
|
|
|
os.mkfifo(self.restore_pipe)
|
|
|
|
|
2014-09-17 14:40:26 +02:00
|
|
|
self.stderr_encoding = sys.stderr.encoding or 'utf-8'
|
|
|
|
|
|
|
|
def collect_tar_output(self):
|
|
|
|
if not self.tar2_process.stderr:
|
|
|
|
return
|
|
|
|
|
|
|
|
if self.tar2_process.poll() is None:
|
2014-09-18 07:39:19 +02:00
|
|
|
try:
|
2015-05-03 14:45:01 +02:00
|
|
|
new_lines = self.tar2_process.stderr \
|
|
|
|
.read(MAX_STDERR_BYTES).splitlines()
|
2014-09-18 07:39:19 +02:00
|
|
|
except IOError as e:
|
|
|
|
if e.errno == errno.EAGAIN:
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
raise
|
2014-09-17 14:40:26 +02:00
|
|
|
else:
|
|
|
|
new_lines = self.tar2_process.stderr.readlines()
|
|
|
|
|
|
|
|
new_lines = map(lambda x: x.decode(self.stderr_encoding), new_lines)
|
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
msg_re = re.compile(r".*#[0-9].*restore_pipe")
|
|
|
|
debug_msg = filter(msg_re.match, new_lines)
|
|
|
|
self.log.debug('tar2_stderr: {}'.format('\n'.join(debug_msg)))
|
|
|
|
new_lines = filter(lambda x: not msg_re.match(x), new_lines)
|
2016-09-29 01:55:34 +02:00
|
|
|
if self.adjust_output_size:
|
|
|
|
# search for first file size reported by tar, after setting
|
|
|
|
# self.adjust_output_size (so don't look at self.tar2_stderr)
|
|
|
|
# this is used only when extracting single-file archive, so don't
|
|
|
|
# bother with checking file name
|
|
|
|
file_size_re = re.compile(r"^[^ ]+ [^ ]+/[^ ]+ *([0-9]+) .*")
|
|
|
|
for line in new_lines:
|
|
|
|
match = file_size_re.match(line)
|
|
|
|
if match:
|
|
|
|
file_size = match.groups()[0]
|
|
|
|
self.resize_lvm(self.adjust_output_size, file_size)
|
|
|
|
self.adjust_output_size = None
|
2014-09-17 14:40:26 +02:00
|
|
|
self.tar2_stderr += new_lines
|
|
|
|
|
2016-09-29 01:55:34 +02:00
|
|
|
def resize_lvm(self, dev, size):
|
|
|
|
# FIXME: HACK
|
|
|
|
try:
|
|
|
|
subprocess.check_call(
|
|
|
|
['sudo', 'lvresize', '-f', '-L', str(size) + 'B', dev],
|
|
|
|
stdout=open(os.devnull, 'w'), stderr=subprocess.STDOUT)
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
|
|
if e.returncode == 3:
|
|
|
|
# already at the right size
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
raise
|
|
|
|
|
2013-11-25 05:41:13 +01:00
|
|
|
def run(self):
|
2014-01-10 03:23:51 +01:00
|
|
|
try:
|
|
|
|
self.__run__()
|
|
|
|
except Exception as e:
|
2014-02-01 14:01:21 +01:00
|
|
|
exc_type, exc_value, exc_traceback = sys.exc_info()
|
2014-01-10 03:23:51 +01:00
|
|
|
# Cleanup children
|
|
|
|
for process in [self.decompressor_process,
|
2015-05-03 14:45:01 +02:00
|
|
|
self.decryptor_process,
|
|
|
|
self.tar2_process]:
|
2014-01-10 03:23:51 +01:00
|
|
|
if process:
|
2014-01-15 03:57:59 +01:00
|
|
|
try:
|
|
|
|
process.terminate()
|
|
|
|
except OSError:
|
|
|
|
pass
|
2014-01-10 03:23:51 +01:00
|
|
|
process.wait()
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.error("ERROR: " + unicode(e))
|
2014-02-01 14:01:21 +01:00
|
|
|
raise e, None, exc_traceback
|
2014-01-10 03:23:51 +01:00
|
|
|
|
2016-09-29 01:52:31 +02:00
|
|
|
def handle_dir_relocations(self, dirname):
|
|
|
|
''' Relocate files in given director when it's already extracted
|
|
|
|
|
|
|
|
:param dirname: directory path to handle (relative to backup root),
|
|
|
|
without trailing slash
|
|
|
|
'''
|
|
|
|
|
|
|
|
for old, new in self.relocate:
|
|
|
|
if not old.startswith(dirname + '/'):
|
|
|
|
continue
|
|
|
|
# if directory is relocated too (most likely is), the file
|
|
|
|
# is extracted there
|
|
|
|
if dirname in self.relocate:
|
|
|
|
old = old.replace(dirname, self.relocate[dirname], 1)
|
2016-09-29 01:55:34 +02:00
|
|
|
try:
|
|
|
|
stat_buf = os.stat(new)
|
|
|
|
if stat.S_ISBLK(stat_buf.st_mode):
|
|
|
|
# output file is block device (LVM) - adjust its
|
|
|
|
# size, otherwise it may fail
|
|
|
|
# from lack of space
|
|
|
|
self.resize_lvm(new, stat_buf.st_size)
|
|
|
|
except OSError: # ENOENT
|
|
|
|
pass
|
2016-09-29 01:52:31 +02:00
|
|
|
subprocess.check_call(
|
|
|
|
['dd', 'if='+old, 'of='+new, 'conv=sparse'])
|
|
|
|
os.unlink(old)
|
|
|
|
|
2016-10-03 13:43:36 +02:00
|
|
|
def cleanup_tar2(self, wait=True, terminate=False):
|
|
|
|
if self.tar2_process is None:
|
|
|
|
return
|
|
|
|
if terminate:
|
|
|
|
self.tar2_process.terminate()
|
|
|
|
if wait:
|
|
|
|
self.tar2_process.wait()
|
|
|
|
elif self.tar2_process.poll() is None:
|
|
|
|
return
|
|
|
|
if self.tar2_process.returncode != 0:
|
|
|
|
self.collect_tar_output()
|
|
|
|
self.log.error(
|
|
|
|
"ERROR: unable to extract files for {0}, tar "
|
|
|
|
"output:\n {1}".
|
|
|
|
format(self.tar2_current_file,
|
|
|
|
"\n ".join(self.tar2_stderr)))
|
|
|
|
else:
|
|
|
|
# Finished extracting the tar file
|
|
|
|
self.collect_tar_output()
|
|
|
|
self.tar2_process = None
|
|
|
|
# if that was whole-directory archive, handle
|
|
|
|
# relocated files now
|
2016-10-04 21:38:59 +02:00
|
|
|
inner_name = os.path.splitext(self.tar2_current_file)[0]\
|
|
|
|
.replace(self.base_dir + '/', '')
|
2016-10-03 13:43:36 +02:00
|
|
|
if os.path.basename(inner_name) == '.':
|
|
|
|
self.handle_dir_relocations(
|
|
|
|
os.path.dirname(inner_name))
|
|
|
|
self.tar2_current_file = None
|
|
|
|
self.adjust_output_size = None
|
|
|
|
|
2014-01-10 03:23:51 +01:00
|
|
|
def __run__(self):
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Started sending thread")
|
|
|
|
self.log.debug("Moving to dir " + self.base_dir)
|
2013-11-25 05:41:13 +01:00
|
|
|
os.chdir(self.base_dir)
|
|
|
|
|
2014-01-13 04:37:54 +01:00
|
|
|
filename = None
|
|
|
|
|
2014-01-10 03:28:53 +01:00
|
|
|
for filename in iter(self.queue.get, None):
|
2016-04-02 23:51:37 +02:00
|
|
|
if filename in (QUEUE_FINISHED, QUEUE_ERROR):
|
2013-11-25 05:41:13 +01:00
|
|
|
break
|
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Extracting file " + filename)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
|
|
|
if filename.endswith('.000'):
|
|
|
|
# next file
|
2016-10-03 13:43:36 +02:00
|
|
|
self.cleanup_tar2(wait=True, terminate=False)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-10-04 21:38:59 +02:00
|
|
|
inner_name = filename.rstrip('.000').replace(
|
|
|
|
self.base_dir + '/', '')
|
2016-09-29 01:52:31 +02:00
|
|
|
redirect_stdout = None
|
|
|
|
if self.relocate and inner_name in self.relocate:
|
|
|
|
# TODO: add `dd conv=sparse` when removing tar layer
|
|
|
|
tar2_cmdline = ['tar',
|
2016-09-29 01:55:34 +02:00
|
|
|
'-%sMvvOf' % ("t" if self.verify_only else "x"),
|
2016-09-29 01:52:31 +02:00
|
|
|
self.restore_pipe,
|
|
|
|
inner_name]
|
|
|
|
output_file = self.relocate[inner_name]
|
2016-09-29 01:55:34 +02:00
|
|
|
try:
|
|
|
|
stat_buf = os.stat(output_file)
|
|
|
|
if stat.S_ISBLK(stat_buf.st_mode):
|
|
|
|
# output file is block device (LVM) - adjust its
|
|
|
|
# size during extraction, otherwise it may fail
|
|
|
|
# from lack of space
|
|
|
|
self.adjust_output_size = output_file
|
2016-10-03 13:43:36 +02:00
|
|
|
except OSError: # ENOENT
|
2016-09-29 01:55:34 +02:00
|
|
|
pass
|
2016-09-29 01:52:31 +02:00
|
|
|
redirect_stdout = open(output_file, 'w')
|
|
|
|
elif self.relocate and \
|
|
|
|
os.path.dirname(inner_name) in self.relocate:
|
|
|
|
tar2_cmdline = ['tar',
|
|
|
|
'-%sMf' % ("t" if self.verify_only else "x"),
|
|
|
|
self.restore_pipe,
|
|
|
|
'-C', self.relocate[os.path.dirname(inner_name)],
|
|
|
|
# strip all directories - leave only final filename
|
|
|
|
'--strip-components', str(inner_name.count(os.sep)),
|
|
|
|
inner_name]
|
|
|
|
|
|
|
|
else:
|
|
|
|
tar2_cmdline = ['tar',
|
|
|
|
'-%sMkf' % ("t" if self.verify_only else "x"),
|
|
|
|
self.restore_pipe,
|
|
|
|
inner_name]
|
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Running command " + unicode(tar2_cmdline))
|
2014-01-10 03:23:51 +01:00
|
|
|
self.tar2_process = subprocess.Popen(tar2_cmdline,
|
2016-09-29 01:52:31 +02:00
|
|
|
stdin=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
|
|
stdout=redirect_stdout)
|
2014-09-17 14:40:26 +02:00
|
|
|
fcntl.fcntl(self.tar2_process.stderr.fileno(), fcntl.F_SETFL,
|
|
|
|
fcntl.fcntl(self.tar2_process.stderr.fileno(),
|
|
|
|
fcntl.F_GETFL) | os.O_NONBLOCK)
|
|
|
|
self.tar2_stderr = []
|
2014-09-17 23:09:31 +02:00
|
|
|
elif not self.tar2_process:
|
|
|
|
# Extracting of the current archive failed, skip to the next
|
|
|
|
# archive
|
2016-03-14 12:04:07 +01:00
|
|
|
os.remove(filename)
|
2014-09-17 23:09:31 +02:00
|
|
|
continue
|
2013-11-25 05:41:13 +01:00
|
|
|
else:
|
2014-09-17 14:40:26 +02:00
|
|
|
self.collect_tar_output()
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Releasing next chunck")
|
2014-01-10 03:23:51 +01:00
|
|
|
self.tar2_process.stdin.write("\n")
|
|
|
|
self.tar2_process.stdin.flush()
|
2013-11-25 05:41:13 +01:00
|
|
|
self.tar2_current_file = filename
|
|
|
|
|
2015-05-03 14:45:01 +02:00
|
|
|
pipe = open(self.restore_pipe, 'wb')
|
2016-10-19 22:41:49 +02:00
|
|
|
monitor_processes = {
|
|
|
|
'vmproc': self.vmproc,
|
|
|
|
'addproc': self.tar2_process,
|
|
|
|
}
|
2013-11-25 05:41:13 +01:00
|
|
|
common_args = {
|
2015-05-03 14:45:01 +02:00
|
|
|
'backup_target': pipe,
|
|
|
|
'hmac': None,
|
|
|
|
'vmproc': self.vmproc,
|
|
|
|
'addproc': self.tar2_process
|
2013-11-25 05:41:13 +01:00
|
|
|
}
|
|
|
|
if self.encrypted:
|
|
|
|
# Start decrypt
|
2015-05-03 14:45:01 +02:00
|
|
|
self.decryptor_process = subprocess.Popen(
|
|
|
|
["openssl", "enc",
|
|
|
|
"-d",
|
|
|
|
"-" + self.crypto_algorithm,
|
|
|
|
"-pass",
|
|
|
|
"pass:" + self.passphrase] +
|
|
|
|
(["-z"] if self.compressed else []),
|
|
|
|
stdin=open(filename, 'rb'),
|
|
|
|
stdout=subprocess.PIPE)
|
2016-10-19 22:41:49 +02:00
|
|
|
in_stream = self.decryptor_process.stdout
|
|
|
|
monitor_processes['decryptor'] = self.decryptor_process
|
2013-12-02 14:05:41 +01:00
|
|
|
elif self.compressed:
|
2015-05-03 14:45:01 +02:00
|
|
|
self.decompressor_process = subprocess.Popen(
|
|
|
|
["gzip", "-d"],
|
|
|
|
stdin=open(filename, 'rb'),
|
|
|
|
stdout=subprocess.PIPE)
|
2016-10-19 22:41:49 +02:00
|
|
|
in_stream = self.decompressor_process.stdout
|
|
|
|
monitor_processes['decompresor'] = self.decompressor_process
|
2013-11-25 05:41:13 +01:00
|
|
|
else:
|
2016-10-19 22:41:49 +02:00
|
|
|
in_stream = open(filename, 'rb')
|
|
|
|
|
|
|
|
run_error = handle_streams(
|
|
|
|
in_stream,
|
|
|
|
{'target': pipe},
|
|
|
|
monitor_processes,
|
|
|
|
progress_callback=self.progress_callback)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2014-01-10 03:31:15 +01:00
|
|
|
try:
|
|
|
|
pipe.close()
|
|
|
|
except IOError as e:
|
|
|
|
if e.errno == errno.EPIPE:
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug(
|
|
|
|
"Got EPIPE while closing pipe to "
|
|
|
|
"the inner tar process")
|
|
|
|
# ignore the error
|
2014-01-10 03:31:15 +01:00
|
|
|
else:
|
|
|
|
raise
|
2016-10-19 22:41:49 +02:00
|
|
|
if run_error:
|
2014-09-17 14:40:26 +02:00
|
|
|
if run_error == "target":
|
|
|
|
self.collect_tar_output()
|
|
|
|
details = "\n".join(self.tar2_stderr)
|
|
|
|
else:
|
|
|
|
details = "%s failed" % run_error
|
2016-03-14 13:30:55 +01:00
|
|
|
self.log.error("Error while processing '{}': {}".format(
|
|
|
|
self.tar2_current_file, details))
|
2016-10-03 13:43:36 +02:00
|
|
|
self.cleanup_tar2(wait=True, terminate=True)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
|
|
|
# Delete the file as we don't need it anymore
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Removing file " + filename)
|
2013-11-25 05:41:13 +01:00
|
|
|
os.remove(filename)
|
|
|
|
|
2014-01-10 03:28:53 +01:00
|
|
|
os.unlink(self.restore_pipe)
|
|
|
|
|
2016-10-03 13:43:36 +02:00
|
|
|
self.cleanup_tar2(wait=True, terminate=(filename == QUEUE_ERROR))
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Finished extracting thread")
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2014-09-26 03:24:19 +02:00
|
|
|
|
2015-05-03 14:45:01 +02:00
|
|
|
class ExtractWorker3(ExtractWorker2):
|
2016-03-13 13:49:42 +01:00
|
|
|
def __init__(self, queue, base_dir, passphrase, encrypted,
|
2016-03-14 12:04:07 +01:00
|
|
|
progress_callback, vmproc=None,
|
2014-09-26 03:24:19 +02:00
|
|
|
compressed=False, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM,
|
2016-09-29 01:52:31 +02:00
|
|
|
compression_filter=None, verify_only=False, relocate=None):
|
2014-09-26 03:24:19 +02:00
|
|
|
super(ExtractWorker3, self).__init__(queue, base_dir, passphrase,
|
2016-03-13 13:49:42 +01:00
|
|
|
encrypted,
|
2014-09-26 03:24:19 +02:00
|
|
|
progress_callback, vmproc,
|
|
|
|
compressed, crypto_algorithm,
|
2016-09-29 01:52:31 +02:00
|
|
|
verify_only, relocate)
|
2014-09-26 03:44:17 +02:00
|
|
|
self.compression_filter = compression_filter
|
2014-09-26 03:24:19 +02:00
|
|
|
os.unlink(self.restore_pipe)
|
|
|
|
|
|
|
|
def __run__(self):
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Started sending thread")
|
|
|
|
self.log.debug("Moving to dir " + self.base_dir)
|
2014-09-26 03:24:19 +02:00
|
|
|
os.chdir(self.base_dir)
|
|
|
|
|
|
|
|
filename = None
|
|
|
|
|
|
|
|
input_pipe = None
|
|
|
|
for filename in iter(self.queue.get, None):
|
2016-04-02 23:51:37 +02:00
|
|
|
if filename in (QUEUE_FINISHED, QUEUE_ERROR):
|
2014-09-26 03:24:19 +02:00
|
|
|
break
|
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Extracting file " + filename)
|
2014-09-26 03:24:19 +02:00
|
|
|
|
|
|
|
if filename.endswith('.000'):
|
|
|
|
# next file
|
2015-05-03 14:45:01 +02:00
|
|
|
if self.tar2_process is not None:
|
2014-09-26 03:24:19 +02:00
|
|
|
input_pipe.close()
|
2016-10-03 13:43:36 +02:00
|
|
|
self.cleanup_tar2(wait=True, terminate=False)
|
2014-09-26 03:24:19 +02:00
|
|
|
|
2016-10-04 21:38:59 +02:00
|
|
|
inner_name = filename.rstrip('.000').replace(
|
|
|
|
self.base_dir + '/', '')
|
2016-09-29 01:52:31 +02:00
|
|
|
redirect_stdout = None
|
|
|
|
if self.relocate and inner_name in self.relocate:
|
|
|
|
# TODO: add dd conv=sparse when removing tar layer
|
|
|
|
tar2_cmdline = ['tar',
|
2016-09-29 01:55:34 +02:00
|
|
|
'-%svvO' % ("t" if self.verify_only else "x"),
|
2016-09-29 01:52:31 +02:00
|
|
|
inner_name]
|
|
|
|
output_file = self.relocate[inner_name]
|
2016-09-29 01:55:34 +02:00
|
|
|
try:
|
|
|
|
stat_buf = os.stat(output_file)
|
|
|
|
if stat.S_ISBLK(stat_buf.st_mode):
|
|
|
|
# output file is block device (LVM) - adjust its
|
|
|
|
# size during extraction, otherwise it may fail
|
|
|
|
# from lack of space
|
|
|
|
self.adjust_output_size = output_file
|
|
|
|
except OSError: # ENOENT
|
|
|
|
pass
|
2016-09-29 01:52:31 +02:00
|
|
|
redirect_stdout = open(output_file, 'w')
|
|
|
|
elif self.relocate and \
|
|
|
|
os.path.dirname(inner_name) in self.relocate:
|
|
|
|
tar2_cmdline = ['tar',
|
|
|
|
'-%s' % ("t" if self.verify_only else "x"),
|
|
|
|
'-C', self.relocate[os.path.dirname(inner_name)],
|
|
|
|
# strip all directories - leave only final filename
|
|
|
|
'--strip-components', str(inner_name.count(os.sep)),
|
|
|
|
inner_name]
|
|
|
|
else:
|
|
|
|
tar2_cmdline = ['tar',
|
|
|
|
'-%sk' % ("t" if self.verify_only else "x"),
|
|
|
|
inner_name]
|
|
|
|
|
2014-09-26 03:44:17 +02:00
|
|
|
if self.compressed:
|
|
|
|
if self.compression_filter:
|
|
|
|
tar2_cmdline.insert(-1,
|
|
|
|
"--use-compress-program=%s" %
|
|
|
|
self.compression_filter)
|
|
|
|
else:
|
|
|
|
tar2_cmdline.insert(-1, "--use-compress-program=%s" %
|
2015-05-03 14:45:01 +02:00
|
|
|
DEFAULT_COMPRESSION_FILTER)
|
2014-09-26 03:24:19 +02:00
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Running command " + unicode(tar2_cmdline))
|
2014-09-26 03:24:19 +02:00
|
|
|
if self.encrypted:
|
|
|
|
# Start decrypt
|
2015-05-03 14:45:01 +02:00
|
|
|
self.decryptor_process = subprocess.Popen(
|
|
|
|
["openssl", "enc",
|
|
|
|
"-d",
|
|
|
|
"-" + self.crypto_algorithm,
|
|
|
|
"-pass",
|
|
|
|
"pass:" + self.passphrase],
|
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
stdout=subprocess.PIPE)
|
2014-09-26 03:24:19 +02:00
|
|
|
|
2015-05-03 14:45:01 +02:00
|
|
|
self.tar2_process = subprocess.Popen(
|
|
|
|
tar2_cmdline,
|
|
|
|
stdin=self.decryptor_process.stdout,
|
2016-09-29 01:52:31 +02:00
|
|
|
stdout=redirect_stdout,
|
2015-05-03 14:45:01 +02:00
|
|
|
stderr=subprocess.PIPE)
|
2014-09-26 03:24:19 +02:00
|
|
|
input_pipe = self.decryptor_process.stdin
|
|
|
|
else:
|
2015-05-03 14:45:01 +02:00
|
|
|
self.tar2_process = subprocess.Popen(
|
|
|
|
tar2_cmdline,
|
|
|
|
stdin=subprocess.PIPE,
|
2016-09-29 01:52:31 +02:00
|
|
|
stdout=redirect_stdout,
|
2015-05-03 14:45:01 +02:00
|
|
|
stderr=subprocess.PIPE)
|
2014-09-26 03:24:19 +02:00
|
|
|
input_pipe = self.tar2_process.stdin
|
|
|
|
|
|
|
|
fcntl.fcntl(self.tar2_process.stderr.fileno(), fcntl.F_SETFL,
|
|
|
|
fcntl.fcntl(self.tar2_process.stderr.fileno(),
|
|
|
|
fcntl.F_GETFL) | os.O_NONBLOCK)
|
|
|
|
self.tar2_stderr = []
|
|
|
|
elif not self.tar2_process:
|
|
|
|
# Extracting of the current archive failed, skip to the next
|
|
|
|
# archive
|
2016-03-14 12:04:07 +01:00
|
|
|
os.remove(filename)
|
2014-09-26 03:24:19 +02:00
|
|
|
continue
|
|
|
|
else:
|
2016-10-25 21:31:39 +02:00
|
|
|
(basename, ext) = os.path.splitext(self.tar2_current_file)
|
|
|
|
previous_chunk_number = int(ext[1:])
|
|
|
|
expected_filename = basename + '.%03d' % (
|
|
|
|
previous_chunk_number+1)
|
|
|
|
if expected_filename != filename:
|
|
|
|
self.cleanup_tar2(wait=True, terminate=True)
|
|
|
|
self.log.error(
|
|
|
|
'Unexpected file in archive: {}, expected {}'.format(
|
|
|
|
filename, expected_filename))
|
|
|
|
os.remove(filename)
|
|
|
|
continue
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Releasing next chunck")
|
2016-10-25 21:31:39 +02:00
|
|
|
|
2014-09-26 03:24:19 +02:00
|
|
|
self.tar2_current_file = filename
|
|
|
|
|
2016-10-19 22:41:49 +02:00
|
|
|
run_error = handle_streams(
|
|
|
|
open(filename, 'rb'),
|
|
|
|
{'target': input_pipe},
|
|
|
|
{'vmproc': self.vmproc,
|
|
|
|
'addproc': self.tar2_process,
|
|
|
|
'decryptor': self.decryptor_process,
|
|
|
|
},
|
|
|
|
progress_callback=self.progress_callback)
|
2014-09-26 03:24:19 +02:00
|
|
|
|
2016-10-19 22:41:49 +02:00
|
|
|
if run_error:
|
2014-09-26 03:24:19 +02:00
|
|
|
if run_error == "target":
|
|
|
|
self.collect_tar_output()
|
|
|
|
details = "\n".join(self.tar2_stderr)
|
|
|
|
else:
|
|
|
|
details = "%s failed" % run_error
|
|
|
|
if self.decryptor_process:
|
|
|
|
self.decryptor_process.terminate()
|
|
|
|
self.decryptor_process.wait()
|
|
|
|
self.decryptor_process = None
|
2016-03-14 13:30:55 +01:00
|
|
|
self.log.error("Error while processing '{}': {}".format(
|
|
|
|
self.tar2_current_file, details))
|
2016-10-03 13:43:36 +02:00
|
|
|
self.cleanup_tar2(wait=True, terminate=True)
|
2014-09-26 03:24:19 +02:00
|
|
|
|
|
|
|
# Delete the file as we don't need it anymore
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Removing file " + filename)
|
2014-09-26 03:24:19 +02:00
|
|
|
os.remove(filename)
|
|
|
|
|
|
|
|
if self.tar2_process is not None:
|
|
|
|
input_pipe.close()
|
2016-04-02 23:51:37 +02:00
|
|
|
if filename == QUEUE_ERROR:
|
2014-09-26 03:24:19 +02:00
|
|
|
if self.decryptor_process:
|
|
|
|
self.decryptor_process.terminate()
|
|
|
|
self.decryptor_process.wait()
|
|
|
|
self.decryptor_process = None
|
2016-10-03 13:43:36 +02:00
|
|
|
self.cleanup_tar2(terminate=(filename == QUEUE_ERROR))
|
2014-09-26 03:24:19 +02:00
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Finished extracting thread")
|
2014-09-26 03:24:19 +02:00
|
|
|
|
2014-01-15 03:53:45 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def get_supported_hmac_algo(hmac_algorithm=None):
|
2014-01-15 03:53:45 +01:00
|
|
|
# Start with provided default
|
|
|
|
if hmac_algorithm:
|
|
|
|
yield hmac_algorithm
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
if hmac_algorithm != 'scrypt':
|
|
|
|
yield 'scrypt'
|
2014-01-15 03:53:45 +01:00
|
|
|
proc = subprocess.Popen(['openssl', 'list-message-digest-algorithms'],
|
|
|
|
stdout=subprocess.PIPE)
|
|
|
|
for algo in proc.stdout.readlines():
|
|
|
|
if '=>' in algo:
|
|
|
|
continue
|
|
|
|
yield algo.strip()
|
|
|
|
proc.wait()
|
|
|
|
|
2016-03-14 13:34:17 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
class BackupRestoreOptions(object):
|
|
|
|
def __init__(self):
|
|
|
|
#: use default NetVM if the one referenced in backup do not exists on
|
|
|
|
# the host
|
|
|
|
self.use_default_netvm = True
|
|
|
|
#: set NetVM to "none" if the one referenced in backup do not exists
|
|
|
|
# on the host
|
|
|
|
self.use_none_netvm = False
|
|
|
|
#: set template to default if the one referenced in backup do not
|
|
|
|
# exists on the host
|
|
|
|
self.use_default_template = True
|
2016-06-15 19:10:52 +02:00
|
|
|
#: use default kernel if the one referenced in backup do not exists
|
|
|
|
# on the host
|
|
|
|
self.use_default_kernel = True
|
2016-03-13 13:49:42 +01:00
|
|
|
#: restore dom0 home
|
|
|
|
self.dom0_home = True
|
|
|
|
#: dictionary how what templates should be used instead of those
|
|
|
|
# referenced in backup
|
|
|
|
self.replace_template = {}
|
|
|
|
#: restore dom0 home even if username is different
|
|
|
|
self.ignore_username_mismatch = False
|
|
|
|
#: do not restore data, only verify backup integrity
|
|
|
|
self.verify_only = False
|
|
|
|
#: automatically rename VM during restore, when it would conflict
|
|
|
|
# with existing one
|
|
|
|
self.rename_conflicting = True
|
|
|
|
#: list of VM names to exclude
|
|
|
|
self.exclude = []
|
2016-09-29 01:52:31 +02:00
|
|
|
#: restore VMs into selected storage pool
|
|
|
|
self.override_pool = None
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
|
|
|
|
class BackupRestore(object):
|
|
|
|
"""Usage:
|
|
|
|
>>> restore_op = BackupRestore(...)
|
|
|
|
>>> # adjust restore_op.options here
|
|
|
|
>>> restore_info = restore_op.get_restore_info()
|
|
|
|
>>> # manipulate restore_info to select VMs to restore here
|
|
|
|
>>> restore_op.restore_do(restore_info)
|
|
|
|
"""
|
2015-05-03 14:45:01 +02:00
|
|
|
|
2016-04-04 05:19:59 +02:00
|
|
|
class VMToRestore(object):
|
|
|
|
#: VM excluded from restore by user
|
|
|
|
EXCLUDED = object()
|
|
|
|
#: VM with such name already exists on the host
|
|
|
|
ALREADY_EXISTS = object()
|
|
|
|
#: NetVM used by the VM does not exists on the host
|
|
|
|
MISSING_NETVM = object()
|
|
|
|
#: TemplateVM used by the VM does not exists on the host
|
|
|
|
MISSING_TEMPLATE = object()
|
2016-06-15 19:10:52 +02:00
|
|
|
#: Kernel used by the VM does not exists on the host
|
|
|
|
MISSING_KERNEL = object()
|
2016-04-04 05:19:59 +02:00
|
|
|
|
|
|
|
def __init__(self, vm):
|
|
|
|
self.vm = vm
|
|
|
|
if 'backup-path' in vm.features:
|
|
|
|
self.subdir = vm.features['backup-path']
|
|
|
|
else:
|
|
|
|
self.subdir = None
|
|
|
|
if 'backup-size' in vm.features and vm.features['backup-size']:
|
|
|
|
self.size = int(vm.features['backup-size'])
|
|
|
|
else:
|
|
|
|
self.size = 0
|
|
|
|
self.problems = set()
|
|
|
|
if hasattr(vm, 'template') and vm.template:
|
|
|
|
self.template = vm.template.name
|
|
|
|
else:
|
|
|
|
self.template = None
|
|
|
|
if vm.netvm:
|
|
|
|
self.netvm = vm.netvm.name
|
|
|
|
else:
|
|
|
|
self.netvm = None
|
2016-04-05 00:34:19 +02:00
|
|
|
self.name = vm.name
|
2016-04-04 05:19:59 +02:00
|
|
|
self.orig_template = None
|
2016-09-29 01:52:31 +02:00
|
|
|
self.restored_vm = None
|
2016-04-04 05:19:59 +02:00
|
|
|
|
|
|
|
@property
|
|
|
|
def good_to_go(self):
|
|
|
|
return len(self.problems) == 0
|
|
|
|
|
|
|
|
class Dom0ToRestore(VMToRestore):
|
|
|
|
#: backup was performed on system with different dom0 username
|
|
|
|
USERNAME_MISMATCH = object()
|
|
|
|
|
|
|
|
def __init__(self, vm, subdir=None):
|
|
|
|
super(BackupRestore.Dom0ToRestore, self).__init__(vm)
|
|
|
|
if subdir:
|
|
|
|
self.subdir = subdir
|
|
|
|
self.username = os.path.basename(subdir)
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def __init__(self, app, backup_location, backup_vm, passphrase):
|
|
|
|
super(BackupRestore, self).__init__()
|
|
|
|
|
|
|
|
#: qubes.Qubes instance
|
|
|
|
self.app = app
|
2014-01-15 03:53:45 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
#: options how the backup should be restored
|
|
|
|
self.options = BackupRestoreOptions()
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
#: VM from which backup should be retrieved
|
|
|
|
self.backup_vm = backup_vm
|
|
|
|
if backup_vm and backup_vm.qid == 0:
|
|
|
|
self.backup_vm = None
|
2014-03-08 03:55:47 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
#: backup path, inside VM pointed by :py:attr:`backup_vm`
|
|
|
|
self.backup_location = backup_location
|
|
|
|
|
|
|
|
#: passphrase protecting backup integrity and optionally decryption
|
|
|
|
self.passphrase = passphrase
|
|
|
|
|
|
|
|
#: temporary directory used to extract the data before moving to the
|
|
|
|
# final location; should be on the same filesystem as /var/lib/qubes
|
|
|
|
self.tmpdir = tempfile.mkdtemp(prefix="restore", dir="/var/tmp")
|
|
|
|
|
|
|
|
#: list of processes (Popen objects) to kill on cancel
|
|
|
|
self.processes_to_kill_on_cancel = []
|
|
|
|
|
|
|
|
#: is the backup operation canceled
|
|
|
|
self.canceled = False
|
|
|
|
|
|
|
|
#: report restore progress, called with one argument - percents of
|
|
|
|
# data restored
|
|
|
|
# FIXME: convert to float [0,1]
|
|
|
|
self.progress_callback = None
|
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log = logging.getLogger('qubes.backup')
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
#: basic information about the backup
|
|
|
|
self.header_data = self._retrieve_backup_header()
|
|
|
|
|
|
|
|
#: VMs included in the backup
|
|
|
|
self.backup_app = self._process_qubes_xml()
|
|
|
|
|
|
|
|
def cancel(self):
|
|
|
|
"""Cancel running backup operation. Can be called from another thread.
|
|
|
|
"""
|
|
|
|
self.canceled = True
|
|
|
|
for proc in self.processes_to_kill_on_cancel:
|
|
|
|
try:
|
|
|
|
proc.terminate()
|
2016-03-14 13:30:55 +01:00
|
|
|
except OSError:
|
2016-03-13 13:49:42 +01:00
|
|
|
pass
|
|
|
|
|
|
|
|
def _start_retrieval_process(self, filelist, limit_count, limit_bytes):
|
|
|
|
"""Retrieve backup stream and extract it to :py:attr:`tmpdir`
|
|
|
|
|
|
|
|
:param filelist: list of files to extract; listing directory name
|
|
|
|
will extract the whole directory; use empty list to extract the whole
|
|
|
|
archive
|
|
|
|
:param limit_count: maximum number of files to extract
|
|
|
|
:param limit_bytes: maximum size of extracted data
|
|
|
|
:return: a touple of (Popen object of started process, file-like
|
|
|
|
object for reading extracted files list, file-like object for reading
|
|
|
|
errors)
|
|
|
|
"""
|
|
|
|
|
|
|
|
vmproc = None
|
|
|
|
if self.backup_vm is not None:
|
|
|
|
# If APPVM, STDOUT is a PIPE
|
2016-04-03 03:19:30 +02:00
|
|
|
vmproc = self.backup_vm.run_service('qubes.Restore',
|
|
|
|
passio_popen=True, passio_stderr=True)
|
2016-03-13 13:49:42 +01:00
|
|
|
vmproc.stdin.write(
|
|
|
|
self.backup_location.replace("\r", "").replace("\n", "") + "\n")
|
|
|
|
|
|
|
|
# Send to tar2qfile the VMs that should be extracted
|
|
|
|
vmproc.stdin.write(" ".join(filelist) + "\n")
|
|
|
|
self.processes_to_kill_on_cancel.append(vmproc)
|
|
|
|
|
|
|
|
backup_stdin = vmproc.stdout
|
|
|
|
tar1_command = ['/usr/libexec/qubes/qfile-dom0-unpacker',
|
|
|
|
str(os.getuid()), self.tmpdir, '-v']
|
|
|
|
else:
|
|
|
|
backup_stdin = open(self.backup_location, 'rb')
|
|
|
|
|
|
|
|
tar1_command = ['tar',
|
|
|
|
'-ixv',
|
|
|
|
'-C', self.tmpdir] + filelist
|
|
|
|
|
|
|
|
tar1_env = os.environ.copy()
|
|
|
|
tar1_env['UPDATES_MAX_BYTES'] = str(limit_bytes)
|
|
|
|
tar1_env['UPDATES_MAX_FILES'] = str(limit_count)
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Run command" + unicode(tar1_command))
|
2016-03-13 13:49:42 +01:00
|
|
|
command = subprocess.Popen(
|
|
|
|
tar1_command,
|
|
|
|
stdin=backup_stdin,
|
|
|
|
stdout=vmproc.stdin if vmproc else subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
env=tar1_env)
|
|
|
|
self.processes_to_kill_on_cancel.append(command)
|
|
|
|
|
2016-03-14 13:34:17 +01:00
|
|
|
# qfile-dom0-unpacker output filelist on stderr
|
|
|
|
# and have stdout connected to the VM), while tar output filelist
|
|
|
|
# on stdout
|
2016-03-13 13:49:42 +01:00
|
|
|
if self.backup_vm:
|
|
|
|
filelist_pipe = command.stderr
|
|
|
|
# let qfile-dom0-unpacker hold the only open FD to the write end of
|
|
|
|
# pipe, otherwise qrexec-client will not receive EOF when
|
|
|
|
# qfile-dom0-unpacker terminates
|
|
|
|
vmproc.stdin.close()
|
|
|
|
else:
|
|
|
|
filelist_pipe = command.stdout
|
|
|
|
|
|
|
|
if self.backup_vm:
|
|
|
|
error_pipe = vmproc.stderr
|
|
|
|
else:
|
|
|
|
error_pipe = command.stderr
|
|
|
|
return command, filelist_pipe, error_pipe
|
|
|
|
|
|
|
|
def _verify_hmac(self, filename, hmacfile, algorithm=None):
|
2016-03-14 13:30:55 +01:00
|
|
|
def load_hmac(hmac_text):
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
if filter(lambda x: ord(x) not in range(128),
|
|
|
|
hmac_text):
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Invalid content of {}".format(hmacfile))
|
2016-03-14 13:30:55 +01:00
|
|
|
hmac_text = hmac_text.strip().split("=")
|
|
|
|
if len(hmac_text) > 1:
|
|
|
|
hmac_text = hmac_text[1].strip()
|
2016-03-13 13:49:42 +01:00
|
|
|
else:
|
2016-03-14 13:30:55 +01:00
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"ERROR: invalid hmac file content")
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-03-14 13:30:55 +01:00
|
|
|
return hmac_text
|
2016-03-13 13:49:42 +01:00
|
|
|
if algorithm is None:
|
|
|
|
algorithm = self.header_data.hmac_algorithm
|
|
|
|
passphrase = self.passphrase.encode('utf-8')
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Verifying file {}".format(filename))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
if hmacfile != filename + ".hmac":
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"ERROR: expected hmac for {}, but got {}".
|
|
|
|
format(filename, hmacfile))
|
|
|
|
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
if algorithm == 'scrypt':
|
|
|
|
# in case of 'scrypt' _verify_hmac is only used for backup header
|
|
|
|
assert filename == HEADER_FILENAME
|
|
|
|
self._verify_and_decrypt(hmacfile, HEADER_FILENAME + '.dec')
|
|
|
|
if open(os.path.join(self.tmpdir, filename)).read() != \
|
|
|
|
open(os.path.join(self.tmpdir, filename + '.dec')).read():
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
'Invalid hmac on {}'.format(filename))
|
|
|
|
else:
|
|
|
|
return True
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
hmac_proc = subprocess.Popen(
|
|
|
|
["openssl", "dgst", "-" + algorithm, "-hmac", passphrase],
|
|
|
|
stdin=open(os.path.join(self.tmpdir, filename), 'rb'),
|
|
|
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
|
|
hmac_stdout, hmac_stderr = hmac_proc.communicate()
|
|
|
|
|
|
|
|
if len(hmac_stderr) > 0:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"ERROR: verify file {0}: {1}".format(filename, hmac_stderr))
|
|
|
|
else:
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Loading hmac for file {}".format(filename))
|
2016-03-13 13:49:42 +01:00
|
|
|
hmac = load_hmac(open(os.path.join(self.tmpdir, hmacfile),
|
|
|
|
'r').read())
|
|
|
|
|
|
|
|
if len(hmac) > 0 and load_hmac(hmac_stdout) == hmac:
|
|
|
|
os.unlink(os.path.join(self.tmpdir, hmacfile))
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug(
|
|
|
|
"File verification OK -> Sending file {}".format(filename))
|
2016-03-13 13:49:42 +01:00
|
|
|
return True
|
|
|
|
else:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"ERROR: invalid hmac for file {0}: {1}. "
|
|
|
|
"Is the passphrase correct?".
|
|
|
|
format(filename, load_hmac(hmac_stdout)))
|
|
|
|
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
def _verify_and_decrypt(self, filename, output=None):
|
|
|
|
assert filename.endswith('.enc') or filename.endswith('.hmac')
|
|
|
|
fullname = os.path.join(self.tmpdir, filename)
|
|
|
|
(origname, _) = os.path.splitext(filename)
|
|
|
|
if output:
|
|
|
|
fulloutput = os.path.join(self.tmpdir, output)
|
|
|
|
else:
|
|
|
|
fulloutput = os.path.join(self.tmpdir, origname)
|
2016-10-25 21:28:59 +02:00
|
|
|
if origname == HEADER_FILENAME:
|
2016-10-26 20:45:24 +02:00
|
|
|
passphrase = u'{filename}!{passphrase}'.format(
|
|
|
|
filename=origname,
|
|
|
|
passphrase=self.passphrase)
|
2016-10-25 21:28:59 +02:00
|
|
|
else:
|
2016-10-26 20:45:24 +02:00
|
|
|
passphrase = u'{backup_id}!{filename}!{passphrase}'.format(
|
|
|
|
backup_id=self.header_data.backup_id,
|
|
|
|
filename=origname,
|
|
|
|
passphrase=self.passphrase)
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
p = launch_scrypt('dec', fullname, fulloutput, passphrase)
|
|
|
|
(_, stderr) = p.communicate()
|
|
|
|
if p.returncode != 0:
|
|
|
|
os.unlink(fulloutput)
|
|
|
|
raise qubes.exc.QubesException('failed to decrypt {}: {}'.format(
|
|
|
|
fullname, stderr))
|
|
|
|
# encrypted file is no longer needed
|
|
|
|
os.unlink(fullname)
|
|
|
|
return origname
|
|
|
|
|
|
|
|
def _retrieve_backup_header_files(self, files, allow_none=False):
|
|
|
|
(retrieve_proc, filelist_pipe, error_pipe) = \
|
|
|
|
self._start_retrieval_process(
|
|
|
|
files, len(files), 1024 * 1024)
|
|
|
|
filelist = filelist_pipe.read()
|
|
|
|
retrieve_proc_returncode = retrieve_proc.wait()
|
|
|
|
if retrieve_proc in self.processes_to_kill_on_cancel:
|
|
|
|
self.processes_to_kill_on_cancel.remove(retrieve_proc)
|
|
|
|
extract_stderr = error_pipe.read(MAX_STDERR_BYTES)
|
|
|
|
|
|
|
|
# wait for other processes (if any)
|
|
|
|
for proc in self.processes_to_kill_on_cancel:
|
|
|
|
if proc.wait() != 0:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Backup header retrieval failed (exit code {})".format(
|
|
|
|
proc.wait())
|
|
|
|
)
|
|
|
|
|
|
|
|
if retrieve_proc_returncode != 0:
|
|
|
|
if not filelist and 'Not found in archive' in extract_stderr:
|
|
|
|
if allow_none:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"unable to read the qubes backup file {0} ({1}): {2}".format(
|
|
|
|
self.backup_location,
|
|
|
|
retrieve_proc.wait(),
|
|
|
|
extract_stderr
|
|
|
|
))
|
|
|
|
actual_files = filelist.splitlines()
|
|
|
|
if sorted(actual_files) != sorted(files):
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
'unexpected files in archive: got {!r}, expeced {!r}'.format(
|
|
|
|
actual_files, files
|
|
|
|
))
|
|
|
|
for f in files:
|
|
|
|
if not os.path.exists(os.path.join(self.tmpdir, f)):
|
|
|
|
if allow_none:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
'Unable to retrieve file {} from backup {}: {}'.format(
|
|
|
|
f, self.backup_location, extract_stderr
|
|
|
|
)
|
|
|
|
)
|
|
|
|
return files
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def _retrieve_backup_header(self):
|
|
|
|
"""Retrieve backup header and qubes.xml. Only backup header is
|
|
|
|
analyzed, qubes.xml is left as-is
|
|
|
|
(not even verified/decrypted/uncompressed)
|
|
|
|
|
|
|
|
:return header_data
|
|
|
|
:rtype :py:class:`BackupHeader`
|
|
|
|
"""
|
|
|
|
|
|
|
|
if not self.backup_vm and os.path.exists(
|
|
|
|
os.path.join(self.backup_location, 'qubes.xml')):
|
|
|
|
# backup format version 1 doesn't have header
|
|
|
|
header_data = BackupHeader()
|
|
|
|
header_data.version = 1
|
|
|
|
return header_data
|
|
|
|
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
header_files = self._retrieve_backup_header_files(
|
|
|
|
['backup-header', 'backup-header.hmac'], allow_none=True)
|
2014-01-15 03:53:45 +01:00
|
|
|
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
if not header_files:
|
|
|
|
# R2-Beta3 didn't have backup header, so if none is found,
|
|
|
|
# assume it's version=2 and use values present at that time
|
2016-04-02 23:56:00 +02:00
|
|
|
header_data = BackupHeader(
|
|
|
|
version=2,
|
|
|
|
# place explicitly this value, because it is what format_version
|
|
|
|
# 2 have
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
hmac_algorithm='SHA1',
|
2016-04-02 23:56:00 +02:00
|
|
|
crypto_algorithm='aes-256-cbc',
|
|
|
|
# TODO: set encrypted to something...
|
|
|
|
)
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
else:
|
|
|
|
filename = HEADER_FILENAME
|
|
|
|
hmacfile = HEADER_FILENAME + '.hmac'
|
|
|
|
self.log.debug("Got backup header and hmac: {}, {}".format(
|
|
|
|
filename, hmacfile))
|
|
|
|
|
|
|
|
file_ok = False
|
|
|
|
hmac_algorithm = DEFAULT_HMAC_ALGORITHM
|
|
|
|
for hmac_algo in get_supported_hmac_algo(hmac_algorithm):
|
|
|
|
try:
|
|
|
|
if self._verify_hmac(filename, hmacfile, hmac_algo):
|
|
|
|
file_ok = True
|
|
|
|
break
|
|
|
|
except qubes.exc.QubesException as e:
|
|
|
|
self.log.debug(
|
|
|
|
'Failed to verify {} using {}: {}'.format(
|
|
|
|
hmacfile, hmac_algo, str(e)))
|
|
|
|
# Ignore exception here, try the next algo
|
|
|
|
pass
|
|
|
|
if not file_ok:
|
2016-03-13 13:49:42 +01:00
|
|
|
raise qubes.exc.QubesException(
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
"Corrupted backup header (hmac verification "
|
|
|
|
"failed). Is the password correct?")
|
|
|
|
filename = os.path.join(self.tmpdir, filename)
|
|
|
|
header_data = BackupHeader(open(filename, 'r').read())
|
|
|
|
os.unlink(filename)
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
return header_data
|
|
|
|
|
2016-09-29 01:52:31 +02:00
|
|
|
def _start_inner_extraction_worker(self, queue, relocate):
|
2016-03-13 13:49:42 +01:00
|
|
|
"""Start a worker process, extracting inner layer of bacup archive,
|
|
|
|
extract them to :py:attr:`tmpdir`.
|
2016-04-02 23:51:37 +02:00
|
|
|
End the data by pushing QUEUE_FINISHED or QUEUE_ERROR to the queue.
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
:param queue :py:class:`Queue` object to handle files from
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Setup worker to extract encrypted data chunks to the restore dirs
|
2016-03-14 13:34:17 +01:00
|
|
|
# Create the process here to pass it options extracted from
|
|
|
|
# backup header
|
2016-03-13 13:49:42 +01:00
|
|
|
extractor_params = {
|
|
|
|
'queue': queue,
|
|
|
|
'base_dir': self.tmpdir,
|
|
|
|
'passphrase': self.passphrase,
|
|
|
|
'encrypted': self.header_data.encrypted,
|
|
|
|
'compressed': self.header_data.compressed,
|
|
|
|
'crypto_algorithm': self.header_data.crypto_algorithm,
|
|
|
|
'verify_only': self.options.verify_only,
|
|
|
|
'progress_callback': self.progress_callback,
|
2016-09-29 01:52:31 +02:00
|
|
|
'relocate': relocate,
|
2016-03-13 13:49:42 +01:00
|
|
|
}
|
2016-09-29 01:52:31 +02:00
|
|
|
self.log.debug('Starting extraction worker in {}, file relocation '
|
|
|
|
'map: {!r}'.format(self.tmpdir, relocate))
|
2016-03-13 13:49:42 +01:00
|
|
|
format_version = self.header_data.version
|
|
|
|
if format_version == 2:
|
|
|
|
extract_proc = ExtractWorker2(**extractor_params)
|
2016-03-16 01:01:13 +01:00
|
|
|
elif format_version in [3, 4]:
|
2016-03-13 13:49:42 +01:00
|
|
|
extractor_params['compression_filter'] = \
|
|
|
|
self.header_data.compression_filter
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
if format_version == 4:
|
|
|
|
# encryption already handled
|
|
|
|
extractor_params['encrypted'] = False
|
2016-03-13 13:49:42 +01:00
|
|
|
extract_proc = ExtractWorker3(**extractor_params)
|
|
|
|
else:
|
|
|
|
raise NotImplementedError(
|
|
|
|
"Backup format version %d not supported" % format_version)
|
|
|
|
extract_proc.start()
|
|
|
|
return extract_proc
|
|
|
|
|
|
|
|
def _process_qubes_xml(self):
|
|
|
|
"""Verify, unpack and load qubes.xml. Possibly convert its format if
|
|
|
|
necessary. It expect that :py:attr:`header_data` is already populated,
|
|
|
|
and :py:meth:`retrieve_backup_header` was called.
|
|
|
|
"""
|
|
|
|
if self.header_data.version == 1:
|
2016-03-16 01:01:13 +01:00
|
|
|
backup_app = qubes.core2migration.Core2Qubes(
|
2016-08-16 00:00:14 +02:00
|
|
|
os.path.join(self.backup_location, 'qubes.xml'),
|
|
|
|
offline_mode=True)
|
2016-03-16 01:01:13 +01:00
|
|
|
return backup_app
|
2016-03-13 13:49:42 +01:00
|
|
|
else:
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
if self.header_data.version in [2, 3]:
|
|
|
|
self._retrieve_backup_header_files(
|
|
|
|
['qubes.xml.000', 'qubes.xml.000.hmac'])
|
|
|
|
self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac")
|
|
|
|
else:
|
|
|
|
self._retrieve_backup_header_files(['qubes.xml.000.enc'])
|
|
|
|
self._verify_and_decrypt('qubes.xml.000.enc')
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
queue = Queue()
|
|
|
|
queue.put("qubes.xml.000")
|
2016-04-02 23:51:37 +02:00
|
|
|
queue.put(QUEUE_FINISHED)
|
2014-01-15 03:53:45 +01:00
|
|
|
|
2016-09-29 01:52:31 +02:00
|
|
|
extract_proc = self._start_inner_extraction_worker(queue, None)
|
2016-03-13 13:49:42 +01:00
|
|
|
extract_proc.join()
|
|
|
|
if extract_proc.exitcode != 0:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"unable to extract the qubes backup. "
|
|
|
|
"Check extracting process errors.")
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-16 01:01:13 +01:00
|
|
|
if self.header_data.version in [2, 3]:
|
|
|
|
backup_app = qubes.core2migration.Core2Qubes(
|
2016-08-16 00:00:14 +02:00
|
|
|
os.path.join(self.tmpdir, 'qubes.xml'), offline_mode=True)
|
2016-03-13 13:49:42 +01:00
|
|
|
else:
|
2016-08-16 00:00:14 +02:00
|
|
|
backup_app = qubes.Qubes(os.path.join(self.tmpdir, 'qubes.xml'),
|
|
|
|
offline_mode=True)
|
2016-04-02 23:56:00 +02:00
|
|
|
# Not needed anymore - all the data stored in backup_app
|
2016-03-16 01:01:13 +01:00
|
|
|
os.unlink(os.path.join(self.tmpdir, 'qubes.xml'))
|
|
|
|
return backup_app
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-09-29 01:52:31 +02:00
|
|
|
def _restore_vm_dirs(self, vms_dirs, vms_size, relocate):
|
2016-03-13 13:49:42 +01:00
|
|
|
# Currently each VM consists of at most 7 archives (count
|
|
|
|
# file_to_backup calls in backup_prepare()), but add some safety
|
|
|
|
# margin for further extensions. Each archive is divided into 100MB
|
|
|
|
# chunks. Additionally each file have own hmac file. So assume upper
|
|
|
|
# limit as 2*(10*COUNT_OF_VMS+TOTAL_SIZE/100MB)
|
|
|
|
limit_count = str(2 * (10 * len(vms_dirs) +
|
|
|
|
int(vms_size / (100 * 1024 * 1024))))
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Working in temporary dir:" + self.tmpdir)
|
|
|
|
self.log.info(
|
|
|
|
"Extracting data: " + size_to_human(vms_size) + " to restore")
|
2014-03-08 03:55:47 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# retrieve backup from the backup stream (either VM, or dom0 file)
|
|
|
|
(retrieve_proc, filelist_pipe, error_pipe) = \
|
2016-09-29 01:52:31 +02:00
|
|
|
self._start_retrieval_process(
|
|
|
|
vms_dirs, limit_count, vms_size)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
to_extract = Queue()
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# extract data retrieved by retrieve_proc
|
2016-09-29 01:52:31 +02:00
|
|
|
extract_proc = self._start_inner_extraction_worker(
|
|
|
|
to_extract, relocate)
|
2014-01-10 03:28:53 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
try:
|
|
|
|
filename = None
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
hmacfile = None
|
2016-03-13 13:49:42 +01:00
|
|
|
nextfile = None
|
|
|
|
while True:
|
|
|
|
if self.canceled:
|
|
|
|
break
|
|
|
|
if not extract_proc.is_alive():
|
|
|
|
retrieve_proc.terminate()
|
|
|
|
retrieve_proc.wait()
|
|
|
|
if retrieve_proc in self.processes_to_kill_on_cancel:
|
|
|
|
self.processes_to_kill_on_cancel.remove(retrieve_proc)
|
|
|
|
# wait for other processes (if any)
|
|
|
|
for proc in self.processes_to_kill_on_cancel:
|
|
|
|
proc.wait()
|
|
|
|
break
|
|
|
|
if nextfile is not None:
|
|
|
|
filename = nextfile
|
|
|
|
else:
|
|
|
|
filename = filelist_pipe.readline().strip()
|
2014-03-08 03:55:47 +01:00
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Getting new file:" + filename)
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
if not filename or filename == "EOF":
|
|
|
|
break
|
|
|
|
|
|
|
|
# if reading archive directly with tar, wait for next filename -
|
|
|
|
# tar prints filename before processing it, so wait for
|
|
|
|
# the next one to be sure that whole file was extracted
|
|
|
|
if not self.backup_vm:
|
|
|
|
nextfile = filelist_pipe.readline().strip()
|
|
|
|
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
if self.header_data.version in [2, 3]:
|
|
|
|
if not self.backup_vm:
|
|
|
|
hmacfile = nextfile
|
|
|
|
nextfile = filelist_pipe.readline().strip()
|
|
|
|
else:
|
|
|
|
hmacfile = filelist_pipe.readline().strip()
|
|
|
|
|
|
|
|
if self.canceled:
|
|
|
|
break
|
|
|
|
|
|
|
|
self.log.debug("Getting hmac:" + hmacfile)
|
|
|
|
if not hmacfile or hmacfile == "EOF":
|
|
|
|
# Premature end of archive, either of tar1_command or
|
|
|
|
# vmproc exited with error
|
|
|
|
break
|
|
|
|
else: # self.header_data.version == 4
|
|
|
|
if not filename.endswith('.enc'):
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
'Invalid file extension found in archive: {}'.
|
|
|
|
format(filename))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
if not any(map(lambda x: filename.startswith(x), vms_dirs)):
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Ignoring VM not selected for restore")
|
2016-03-13 13:49:42 +01:00
|
|
|
os.unlink(os.path.join(self.tmpdir, filename))
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
if hmacfile:
|
|
|
|
os.unlink(os.path.join(self.tmpdir, hmacfile))
|
2016-03-13 13:49:42 +01:00
|
|
|
continue
|
|
|
|
|
backup: use 'scrypt' tool for backup encryption and integrity protection
`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT
This means change of backup format. Mainly:
1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:
backup ID (from backup-header)!filename!
For backup-header itself, there is no backup ID (just 'backup-header!').
Fixes QubesOS/qubes-issues#971
2016-10-20 12:56:51 +02:00
|
|
|
if self.header_data.version in [2, 3]:
|
|
|
|
self._verify_hmac(filename, hmacfile)
|
|
|
|
else:
|
|
|
|
# _verify_and_decrypt will write output to a file with
|
|
|
|
# '.enc' extension cut off. This is safe because:
|
|
|
|
# - `scrypt` tool will override output, so if the file was
|
|
|
|
# already there (received from the VM), it will be removed
|
|
|
|
# - incoming archive extraction will refuse to override
|
|
|
|
# existing file, so if `scrypt` already created one,
|
|
|
|
# it can not be manipulated by the VM
|
|
|
|
# - when the file is retrieved from the VM, it appears at
|
|
|
|
# the final form - if it's visible, VM have no longer
|
|
|
|
# influence over its content
|
|
|
|
#
|
|
|
|
# This all means that if the file was correctly verified
|
|
|
|
# + decrypted, we will surely access the right file
|
|
|
|
filename = self._verify_and_decrypt(filename)
|
|
|
|
to_extract.put(os.path.join(self.tmpdir, filename))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
if self.canceled:
|
|
|
|
raise BackupCanceledError("Restore canceled",
|
|
|
|
tmpdir=self.tmpdir)
|
|
|
|
|
|
|
|
if retrieve_proc.wait() != 0:
|
|
|
|
raise qubes.exc.QubesException(
|
2016-09-29 01:52:31 +02:00
|
|
|
"unable to read the qubes backup file {0}: {1}"
|
2016-03-13 13:49:42 +01:00
|
|
|
.format(self.backup_location, error_pipe.read(
|
|
|
|
MAX_STDERR_BYTES)))
|
|
|
|
# wait for other processes (if any)
|
|
|
|
for proc in self.processes_to_kill_on_cancel:
|
|
|
|
proc.wait()
|
2016-07-13 02:22:52 +02:00
|
|
|
if proc.returncode != 0:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Backup completed, but VM receiving it reported an error "
|
|
|
|
"(exit code {})".format(proc.returncode))
|
2016-06-15 19:03:11 +02:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
if filename and filename != "EOF":
|
2016-03-10 11:22:52 +01:00
|
|
|
raise qubes.exc.QubesException(
|
2016-03-13 13:49:42 +01:00
|
|
|
"Premature end of archive, the last file was %s" % filename)
|
|
|
|
except:
|
2016-04-02 23:51:37 +02:00
|
|
|
to_extract.put(QUEUE_ERROR)
|
2016-03-13 13:49:42 +01:00
|
|
|
extract_proc.join()
|
|
|
|
raise
|
|
|
|
else:
|
2016-04-02 23:51:37 +02:00
|
|
|
to_extract.put(QUEUE_FINISHED)
|
2014-01-10 03:28:53 +01:00
|
|
|
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Waiting for the extraction process to finish...")
|
2014-01-10 03:28:53 +01:00
|
|
|
extract_proc.join()
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("Extraction process finished with code: {}".format(
|
|
|
|
extract_proc.exitcode))
|
2016-03-13 13:49:42 +01:00
|
|
|
if extract_proc.exitcode != 0:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"unable to extract the qubes backup. "
|
|
|
|
"Check extracting process errors.")
|
|
|
|
|
|
|
|
def generate_new_name_for_conflicting_vm(self, orig_name, restore_info):
|
|
|
|
number = 1
|
|
|
|
if len(orig_name) > 29:
|
|
|
|
orig_name = orig_name[0:29]
|
|
|
|
new_name = orig_name
|
|
|
|
while (new_name in restore_info.keys() or
|
2016-04-05 00:34:19 +02:00
|
|
|
new_name in map(lambda x: x.name,
|
2016-03-13 13:49:42 +01:00
|
|
|
restore_info.values()) or
|
|
|
|
new_name in self.app.domains):
|
|
|
|
new_name = str('{}{}'.format(orig_name, number))
|
|
|
|
number += 1
|
|
|
|
if number == 100:
|
|
|
|
# give up
|
|
|
|
return None
|
|
|
|
return new_name
|
|
|
|
|
|
|
|
def restore_info_verify(self, restore_info):
|
|
|
|
for vm in restore_info.keys():
|
|
|
|
if vm in ['dom0']:
|
|
|
|
continue
|
2014-01-13 04:45:02 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
vm_info = restore_info[vm]
|
2016-04-04 05:19:59 +02:00
|
|
|
assert isinstance(vm_info, self.VMToRestore)
|
2014-01-13 04:45:02 +01:00
|
|
|
|
2016-04-04 05:19:59 +02:00
|
|
|
vm_info.problems.clear()
|
2016-03-13 13:49:42 +01:00
|
|
|
if vm in self.options.exclude:
|
2016-04-04 05:19:59 +02:00
|
|
|
vm_info.problems.add(self.VMToRestore.EXCLUDED)
|
2014-01-13 04:45:02 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
if not self.options.verify_only and \
|
|
|
|
vm in self.app.domains:
|
|
|
|
if self.options.rename_conflicting:
|
|
|
|
new_name = self.generate_new_name_for_conflicting_vm(
|
|
|
|
vm, restore_info
|
|
|
|
)
|
|
|
|
if new_name is not None:
|
2016-04-05 00:34:19 +02:00
|
|
|
vm_info.name = new_name
|
2016-03-13 13:49:42 +01:00
|
|
|
else:
|
2016-04-04 05:19:59 +02:00
|
|
|
vm_info.problems.add(self.VMToRestore.ALREADY_EXISTS)
|
2015-11-27 03:39:18 +01:00
|
|
|
else:
|
2016-04-04 05:19:59 +02:00
|
|
|
vm_info.problems.add(self.VMToRestore.ALREADY_EXISTS)
|
2014-01-13 04:45:02 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# check template
|
2016-04-04 05:19:59 +02:00
|
|
|
if vm_info.template:
|
|
|
|
template_name = vm_info.template
|
2016-03-13 13:49:42 +01:00
|
|
|
try:
|
|
|
|
host_template = self.app.domains[template_name]
|
|
|
|
except KeyError:
|
|
|
|
host_template = None
|
2016-06-16 21:27:56 +02:00
|
|
|
if not host_template \
|
|
|
|
or not isinstance(host_template,
|
|
|
|
qubes.vm.templatevm.TemplateVM):
|
2016-03-13 13:49:42 +01:00
|
|
|
# Maybe the (custom) template is in the backup?
|
|
|
|
if not (template_name in restore_info.keys() and
|
2016-04-05 00:34:19 +02:00
|
|
|
restore_info[template_name].good_to_go and
|
2016-06-16 21:27:56 +02:00
|
|
|
isinstance(restore_info[template_name].vm,
|
|
|
|
qubes.vm.templatevm.TemplateVM)):
|
2016-04-05 00:35:57 +02:00
|
|
|
if self.options.use_default_template and \
|
|
|
|
self.app.default_template:
|
2016-04-04 05:19:59 +02:00
|
|
|
if vm_info.orig_template is None:
|
|
|
|
vm_info.orig_template = template_name
|
|
|
|
vm_info.template = self.app.default_template.name
|
2016-03-13 13:49:42 +01:00
|
|
|
else:
|
2016-04-04 05:19:59 +02:00
|
|
|
vm_info.problems.add(
|
|
|
|
self.VMToRestore.MISSING_TEMPLATE)
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
# check netvm
|
2016-04-05 00:34:19 +02:00
|
|
|
if not vm_info.vm.property_is_default('netvm') and vm_info.netvm:
|
2016-04-04 05:19:59 +02:00
|
|
|
netvm_name = vm_info.netvm
|
2014-01-13 04:45:02 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
try:
|
|
|
|
netvm_on_host = self.app.domains[netvm_name]
|
|
|
|
except KeyError:
|
|
|
|
netvm_on_host = None
|
|
|
|
# No netvm on the host?
|
2016-03-14 13:34:17 +01:00
|
|
|
if not ((netvm_on_host is not None)
|
2016-04-05 00:35:57 +02:00
|
|
|
and netvm_on_host.provides_network):
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
# Maybe the (custom) netvm is in the backup?
|
|
|
|
if not (netvm_name in restore_info.keys() and
|
2016-04-05 00:34:19 +02:00
|
|
|
restore_info[netvm_name].good_to_go and
|
2016-04-05 00:35:57 +02:00
|
|
|
restore_info[netvm_name].vm.provides_network):
|
2016-03-13 13:49:42 +01:00
|
|
|
if self.options.use_default_netvm:
|
2016-04-04 05:19:59 +02:00
|
|
|
vm_info.vm.netvm = qubes.property.DEFAULT
|
2016-03-13 13:49:42 +01:00
|
|
|
elif self.options.use_none_netvm:
|
2016-04-04 05:19:59 +02:00
|
|
|
vm_info.netvm = None
|
2016-03-13 13:49:42 +01:00
|
|
|
else:
|
2016-04-04 05:19:59 +02:00
|
|
|
vm_info.problems.add(self.VMToRestore.MISSING_NETVM)
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-06-15 19:10:52 +02:00
|
|
|
# check kernel
|
|
|
|
if hasattr(vm_info.vm, 'kernel'):
|
|
|
|
installed_kernels = os.listdir(os.path.join(
|
|
|
|
qubes.config.qubes_base_dir,
|
|
|
|
qubes.config.system_path['qubes_kernels_base_dir']))
|
2016-08-17 01:18:11 +02:00
|
|
|
# if uses default kernel - do not validate it
|
|
|
|
# allow kernel=None only for HVM,
|
|
|
|
# otherwise require valid kernel
|
|
|
|
if not (vm_info.vm.property_is_default('kernel')
|
|
|
|
or (not vm_info.vm.kernel and vm_info.vm.hvm)
|
|
|
|
or vm_info.vm.kernel in installed_kernels):
|
2016-06-15 19:10:52 +02:00
|
|
|
if self.options.use_default_kernel:
|
|
|
|
vm_info.vm.kernel = qubes.property.DEFAULT
|
|
|
|
else:
|
|
|
|
vm_info.problems.add(self.VMToRestore.MISSING_KERNEL)
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
return restore_info
|
2014-01-13 04:45:02 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def _is_vm_included_in_backup_v1(self, check_vm):
|
2015-05-03 14:56:30 +02:00
|
|
|
if check_vm.qid == 0:
|
2016-03-13 13:49:42 +01:00
|
|
|
return os.path.exists(
|
|
|
|
os.path.join(self.backup_location, 'dom0-home'))
|
2013-11-29 03:25:41 +01:00
|
|
|
|
2015-09-06 00:06:09 +02:00
|
|
|
# DisposableVM
|
|
|
|
if check_vm.dir_path is None:
|
|
|
|
return False
|
|
|
|
|
2015-05-03 14:56:30 +02:00
|
|
|
backup_vm_dir_path = check_vm.dir_path.replace(
|
2016-03-13 13:49:42 +01:00
|
|
|
qubes.config.system_path["qubes_base_dir"], self.backup_location)
|
2013-11-29 03:25:41 +01:00
|
|
|
|
2015-05-03 14:45:01 +02:00
|
|
|
if os.path.exists(backup_vm_dir_path):
|
2013-11-29 03:25:41 +01:00
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
2015-05-03 14:45:01 +02:00
|
|
|
|
2016-03-14 13:30:55 +01:00
|
|
|
@staticmethod
|
|
|
|
def _is_vm_included_in_backup_v2(check_vm):
|
2016-04-03 03:33:58 +02:00
|
|
|
if 'backup-content' in check_vm.features:
|
|
|
|
return check_vm.features['backup-content']
|
2013-11-25 05:41:13 +01:00
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def _find_template_name(self, template):
|
|
|
|
if template in self.options.replace_template:
|
|
|
|
return self.options.replace_template[template]
|
2013-11-25 05:41:13 +01:00
|
|
|
return template
|
2015-05-03 14:45:01 +02:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def _is_vm_included_in_backup(self, vm):
|
|
|
|
if self.header_data.version == 1:
|
|
|
|
return self._is_vm_included_in_backup_v1(vm)
|
2016-03-16 01:01:13 +01:00
|
|
|
elif self.header_data.version in [2, 3, 4]:
|
2016-03-13 13:49:42 +01:00
|
|
|
return self._is_vm_included_in_backup_v2(vm)
|
|
|
|
else:
|
|
|
|
raise qubes.exc.QubesException(
|
|
|
|
"Unknown backup format version: {}".format(
|
|
|
|
self.header_data.version))
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def get_restore_info(self):
|
|
|
|
# Format versions:
|
|
|
|
# 1 - Qubes R1, Qubes R2 beta1, beta2
|
|
|
|
# 2 - Qubes R2 beta3+
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
vms_to_restore = {}
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
for vm in self.backup_app.domains:
|
|
|
|
if vm.qid == 0:
|
|
|
|
# Handle dom0 as special case later
|
|
|
|
continue
|
|
|
|
if self._is_vm_included_in_backup(vm):
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.debug("{} is included in backup".format(vm.name))
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-04-04 05:19:59 +02:00
|
|
|
vms_to_restore[vm.name] = self.VMToRestore(vm)
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-04-04 05:19:59 +02:00
|
|
|
if hasattr(vm, 'template'):
|
2016-03-13 13:49:42 +01:00
|
|
|
templatevm_name = self._find_template_name(
|
|
|
|
vm.template.name)
|
2016-04-04 05:19:59 +02:00
|
|
|
vms_to_restore[vm.name].template = templatevm_name
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-04-04 05:19:59 +02:00
|
|
|
# Set to None to not confuse QubesVm object from backup
|
|
|
|
# collection with host collection (further in clone_attrs).
|
|
|
|
vm.netvm = None
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
vms_to_restore = self.restore_info_verify(vms_to_restore)
|
|
|
|
|
|
|
|
# ...and dom0 home
|
|
|
|
if self.options.dom0_home and \
|
|
|
|
self._is_vm_included_in_backup(self.backup_app.domains[0]):
|
|
|
|
vm = self.backup_app.domains[0]
|
|
|
|
if self.header_data.version == 1:
|
2016-04-04 05:19:59 +02:00
|
|
|
subdir = os.listdir(os.path.join(self.backup_location,
|
|
|
|
'dom0-home'))[0]
|
2014-06-25 16:00:32 +02:00
|
|
|
else:
|
2016-04-04 05:19:59 +02:00
|
|
|
subdir = None
|
|
|
|
vms_to_restore['dom0'] = self.Dom0ToRestore(vm, subdir)
|
2016-03-13 13:49:42 +01:00
|
|
|
local_user = grp.getgrnam('qubes').gr_mem[0]
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-04-04 05:19:59 +02:00
|
|
|
if vms_to_restore['dom0'].username != local_user:
|
|
|
|
if not self.options.ignore_username_mismatch:
|
|
|
|
vms_to_restore['dom0'].problems.add(
|
|
|
|
self.Dom0ToRestore.USERNAME_MISMATCH)
|
2015-05-03 14:45:01 +02:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
return vms_to_restore
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-14 13:30:55 +01:00
|
|
|
@staticmethod
|
|
|
|
def get_restore_summary(restore_info):
|
2016-03-13 13:49:42 +01:00
|
|
|
fields = {
|
|
|
|
"qid": {"func": "vm.qid"},
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-06-16 21:27:56 +02:00
|
|
|
"name": {"func": "('[' if isinstance(vm, qubes.vm.templatevm.TemplateVM) else '')\
|
2016-06-16 21:37:58 +02:00
|
|
|
+ ('{' if vm.provides_network else '')\
|
2016-03-13 13:49:42 +01:00
|
|
|
+ vm.name \
|
2016-06-16 21:27:56 +02:00
|
|
|
+ (']' if isinstance(vm, qubes.vm.templatevm.TemplateVM) else '')\
|
2016-06-16 21:37:58 +02:00
|
|
|
+ ('}' if vm.provides_network else '')"},
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-06-16 21:27:56 +02:00
|
|
|
"type": {"func": "'Tpl' if isinstance(vm, qubes.vm.templatevm.TemplateVM) else \
|
2016-03-13 13:49:42 +01:00
|
|
|
'App' if isinstance(vm, qubes.vm.appvm.AppVM) else \
|
|
|
|
vm.__class__.__name__.replace('VM','')"},
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
"updbl": {"func": "'Yes' if vm.updateable else ''"},
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-10-05 01:58:11 +02:00
|
|
|
"template": {"func": "'n/a' if not hasattr(vm, 'template') "
|
2016-04-04 05:19:59 +02:00
|
|
|
"else vm_info.template"},
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-10-05 01:58:11 +02:00
|
|
|
"netvm": {"func": "('*' if vm.property_is_default('netvm') else '') +\
|
2016-04-04 05:19:59 +02:00
|
|
|
vm_info.netvm if vm_info.netvm is not None "
|
2016-03-14 13:34:17 +01:00
|
|
|
"else '-'"},
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
"label": {"func": "vm.label.name"},
|
|
|
|
}
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-14 13:34:17 +01:00
|
|
|
fields_to_display = ["name", "type", "template", "updbl",
|
|
|
|
"netvm", "label"]
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# First calculate the maximum width of each field we want to display
|
|
|
|
total_width = 0
|
|
|
|
for f in fields_to_display:
|
|
|
|
fields[f]["max_width"] = len(f)
|
|
|
|
for vm_info in restore_info.values():
|
2016-04-04 05:19:59 +02:00
|
|
|
if vm_info.vm:
|
2016-03-13 13:49:42 +01:00
|
|
|
# noinspection PyUnusedLocal
|
2016-04-04 05:19:59 +02:00
|
|
|
vm = vm_info.vm
|
2016-03-13 13:49:42 +01:00
|
|
|
l = len(unicode(eval(fields[f]["func"])))
|
|
|
|
if l > fields[f]["max_width"]:
|
|
|
|
fields[f]["max_width"] = l
|
|
|
|
total_width += fields[f]["max_width"]
|
|
|
|
|
|
|
|
summary = ""
|
|
|
|
summary += "The following VMs are included in the backup:\n"
|
|
|
|
summary += "\n"
|
|
|
|
|
|
|
|
# Display the header
|
|
|
|
for f in fields_to_display:
|
2016-03-14 13:34:40 +01:00
|
|
|
# noinspection PyTypeChecker
|
2016-03-13 13:49:42 +01:00
|
|
|
fmt = "{{0:-^{0}}}-+".format(fields[f]["max_width"] + 1)
|
|
|
|
summary += fmt.format('-')
|
|
|
|
summary += "\n"
|
2013-11-25 05:41:13 +01:00
|
|
|
for f in fields_to_display:
|
2016-03-14 13:34:40 +01:00
|
|
|
# noinspection PyTypeChecker
|
2015-05-03 14:45:01 +02:00
|
|
|
fmt = "{{0:>{0}}} |".format(fields[f]["max_width"] + 1)
|
2016-03-13 13:49:42 +01:00
|
|
|
summary += fmt.format(f)
|
|
|
|
summary += "\n"
|
|
|
|
for f in fields_to_display:
|
2016-03-14 13:34:40 +01:00
|
|
|
# noinspection PyTypeChecker
|
2016-03-13 13:49:42 +01:00
|
|
|
fmt = "{{0:-^{0}}}-+".format(fields[f]["max_width"] + 1)
|
|
|
|
summary += fmt.format('-')
|
|
|
|
summary += "\n"
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
for vm_info in restore_info.values():
|
2016-04-04 05:19:59 +02:00
|
|
|
assert isinstance(vm_info, BackupRestore.VMToRestore)
|
2016-03-13 13:49:42 +01:00
|
|
|
# Skip non-VM here
|
2016-04-04 05:19:59 +02:00
|
|
|
if not vm_info.vm:
|
2016-03-13 13:49:42 +01:00
|
|
|
continue
|
|
|
|
# noinspection PyUnusedLocal
|
2016-04-04 05:19:59 +02:00
|
|
|
vm = vm_info.vm
|
2016-03-13 13:49:42 +01:00
|
|
|
s = ""
|
|
|
|
for f in fields_to_display:
|
2016-03-14 13:34:40 +01:00
|
|
|
# noinspection PyTypeChecker
|
2016-03-13 13:49:42 +01:00
|
|
|
fmt = "{{0:>{0}}} |".format(fields[f]["max_width"] + 1)
|
|
|
|
s += fmt.format(eval(fields[f]["func"]))
|
|
|
|
|
2016-04-04 05:19:59 +02:00
|
|
|
if BackupRestore.VMToRestore.EXCLUDED in vm_info.problems:
|
2016-03-13 13:49:42 +01:00
|
|
|
s += " <-- Excluded from restore"
|
2016-04-04 05:19:59 +02:00
|
|
|
elif BackupRestore.VMToRestore.ALREADY_EXISTS in vm_info.problems:
|
2016-03-13 13:49:42 +01:00
|
|
|
s += " <-- A VM with the same name already exists on the host!"
|
2016-04-04 05:19:59 +02:00
|
|
|
elif BackupRestore.VMToRestore.MISSING_TEMPLATE in \
|
|
|
|
vm_info.problems:
|
2016-03-14 13:34:40 +01:00
|
|
|
s += " <-- No matching template on the host " \
|
|
|
|
"or in the backup found!"
|
2016-04-04 05:19:59 +02:00
|
|
|
elif BackupRestore.VMToRestore.MISSING_NETVM in \
|
|
|
|
vm_info.problems:
|
2016-03-14 13:34:40 +01:00
|
|
|
s += " <-- No matching netvm on the host " \
|
|
|
|
"or in the backup found!"
|
2016-03-13 13:49:42 +01:00
|
|
|
else:
|
2016-04-04 05:19:59 +02:00
|
|
|
if vm_info.orig_template:
|
2016-03-14 13:34:40 +01:00
|
|
|
s += " <-- Original template was '{}'".format(
|
2016-04-04 05:19:59 +02:00
|
|
|
vm_info.orig_template)
|
2016-04-05 00:34:19 +02:00
|
|
|
if vm_info.name != vm_info.vm.name:
|
2016-03-14 13:34:40 +01:00
|
|
|
s += " <-- Will be renamed to '{}'".format(
|
2016-04-05 00:34:19 +02:00
|
|
|
vm_info.name)
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
summary += s + "\n"
|
|
|
|
|
|
|
|
if 'dom0' in restore_info.keys():
|
|
|
|
s = ""
|
|
|
|
for f in fields_to_display:
|
2016-03-14 13:34:40 +01:00
|
|
|
# noinspection PyTypeChecker
|
2016-03-13 13:49:42 +01:00
|
|
|
fmt = "{{0:>{0}}} |".format(fields[f]["max_width"] + 1)
|
|
|
|
if f == "name":
|
|
|
|
s += fmt.format("Dom0")
|
|
|
|
elif f == "type":
|
|
|
|
s += fmt.format("Home")
|
|
|
|
else:
|
|
|
|
s += fmt.format("")
|
2016-04-04 05:19:59 +02:00
|
|
|
if BackupRestore.Dom0ToRestore.USERNAME_MISMATCH in \
|
|
|
|
restore_info['dom0'].problems:
|
2016-03-13 13:49:42 +01:00
|
|
|
s += " <-- username in backup and dom0 mismatch"
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
summary += s + "\n"
|
2013-11-29 03:25:41 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
return summary
|
2014-03-08 03:55:47 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def _restore_vm_dir_v1(self, src_dir, dst_dir):
|
2013-11-29 03:25:41 +01:00
|
|
|
|
2016-03-10 11:22:52 +01:00
|
|
|
backup_src_dir = src_dir.replace(
|
2016-03-13 13:49:42 +01:00
|
|
|
qubes.config.system_path["qubes_base_dir"], self.backup_location)
|
2013-11-29 03:25:41 +01:00
|
|
|
|
|
|
|
# We prefer to use Linux's cp, because it nicely handles sparse files
|
2016-03-13 13:49:42 +01:00
|
|
|
cp_retcode = subprocess.call(
|
|
|
|
["cp", "-rp", "--reflink=auto", backup_src_dir, dst_dir])
|
2015-05-03 14:56:30 +02:00
|
|
|
if cp_retcode != 0:
|
2016-03-10 11:22:52 +01:00
|
|
|
raise qubes.exc.QubesException(
|
2014-01-13 04:37:54 +01:00
|
|
|
"*** Error while copying file {0} to {1}".format(backup_src_dir,
|
|
|
|
dst_dir))
|
2015-05-03 14:45:01 +02:00
|
|
|
|
2016-09-29 01:52:31 +02:00
|
|
|
@staticmethod
|
|
|
|
def _templates_first(vms):
|
|
|
|
def key_function(instance):
|
|
|
|
if isinstance(instance, qubes.vm.BaseVM):
|
|
|
|
return isinstance(instance, qubes.vm.templatevm.TemplateVM)
|
|
|
|
elif hasattr(instance, 'vm'):
|
|
|
|
return key_function(instance.vm)
|
|
|
|
else:
|
|
|
|
return 0
|
|
|
|
return sorted(vms,
|
|
|
|
key=key_function,
|
|
|
|
reverse=True)
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
def restore_do(self, restore_info):
|
2016-09-29 01:52:31 +02:00
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
High level workflow:
|
|
|
|
1. Create VMs object in host collection (qubes.xml)
|
|
|
|
2. Create them on disk (vm.create_on_disk)
|
|
|
|
3. Restore VM data, overriding/converting VM files
|
|
|
|
4. Apply possible fixups and save qubes.xml
|
|
|
|
|
|
|
|
:param restore_info:
|
|
|
|
:return:
|
|
|
|
'''
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# FIXME handle locking
|
2014-03-08 03:55:47 +01:00
|
|
|
|
2016-09-29 01:52:31 +02:00
|
|
|
self._restore_vms_metadata(restore_info)
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# Perform VM restoration in backup order
|
|
|
|
vms_dirs = []
|
2016-09-29 01:52:31 +02:00
|
|
|
relocate = {}
|
2016-03-13 13:49:42 +01:00
|
|
|
vms_size = 0
|
2016-09-29 01:52:31 +02:00
|
|
|
for vm_info in self._templates_first(restore_info.values()):
|
|
|
|
vm = vm_info.restored_vm
|
|
|
|
if vm:
|
|
|
|
vms_size += int(vm_info.size)
|
|
|
|
vms_dirs.append(vm_info.subdir)
|
|
|
|
relocate[vm_info.subdir.rstrip('/')] = vm.dir_path
|
|
|
|
for name, volume in vm.volumes.items():
|
|
|
|
if not volume.save_on_stop:
|
|
|
|
continue
|
|
|
|
export_path = vm.storage.export(name)
|
|
|
|
backup_path = os.path.join(
|
|
|
|
vm_info.vm.dir_path, name + '.img')
|
|
|
|
if backup_path != export_path:
|
|
|
|
relocate[
|
|
|
|
os.path.join(vm_info.subdir, name + '.img')] = \
|
|
|
|
export_path
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
if self.header_data.version >= 2:
|
|
|
|
if 'dom0' in restore_info.keys() and \
|
2016-04-04 05:19:59 +02:00
|
|
|
restore_info['dom0'].good_to_go:
|
|
|
|
vms_dirs.append(os.path.dirname(restore_info['dom0'].subdir))
|
|
|
|
vms_size += restore_info['dom0'].size
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
try:
|
2016-09-29 01:52:31 +02:00
|
|
|
self._restore_vm_dirs(vms_dirs=vms_dirs, vms_size=vms_size,
|
|
|
|
relocate=relocate)
|
2016-03-13 13:49:42 +01:00
|
|
|
except qubes.exc.QubesException:
|
|
|
|
if self.options.verify_only:
|
|
|
|
raise
|
2016-03-10 11:22:52 +01:00
|
|
|
else:
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.warning(
|
|
|
|
"Some errors occurred during data extraction, "
|
|
|
|
"continuing anyway to restore at least some "
|
|
|
|
"VMs")
|
2016-03-13 13:49:42 +01:00
|
|
|
else:
|
2016-09-29 01:52:31 +02:00
|
|
|
for vm_info in self._templates_first(restore_info.values()):
|
|
|
|
vm = vm_info.restored_vm
|
|
|
|
if vm:
|
|
|
|
try:
|
|
|
|
self._restore_vm_dir_v1(vm_info.vm.dir_path,
|
|
|
|
os.path.dirname(vm.dir_path))
|
|
|
|
except qubes.exc.QubesException as e:
|
|
|
|
if self.options.verify_only:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
self.log.error(
|
|
|
|
"Failed to restore VM '{}': {}".format(
|
|
|
|
vm.name, str(e)))
|
|
|
|
vm.remove_from_disk()
|
|
|
|
del self.app.domains[vm]
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
if self.options.verify_only:
|
2016-03-14 12:04:07 +01:00
|
|
|
self.log.warning(
|
|
|
|
"Backup verification not supported for this backup format.")
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
if self.options.verify_only:
|
|
|
|
shutil.rmtree(self.tmpdir)
|
|
|
|
return
|
2013-11-25 05:41:13 +01:00
|
|
|
|
2016-09-29 01:52:31 +02:00
|
|
|
for vm_info in self._templates_first(restore_info.values()):
|
|
|
|
if not vm_info.restored_vm:
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
vm_info.restored_vm.fire_event('domain-restore')
|
|
|
|
except Exception as err:
|
|
|
|
self.log.error("ERROR during appmenu restore: "
|
|
|
|
"{0}".format(err))
|
|
|
|
self.log.warning(
|
|
|
|
"*** VM '{0}' will not have appmenus".format(vm_info.name))
|
|
|
|
|
|
|
|
try:
|
|
|
|
vm_info.restored_vm.storage.verify()
|
|
|
|
except Exception as err:
|
|
|
|
self.log.error("ERROR: {0}".format(err))
|
|
|
|
if vm_info.restored_vm:
|
|
|
|
vm_info.restored_vm.remove_from_disk()
|
|
|
|
del self.app.domains[vm_info.restored_vm]
|
|
|
|
|
|
|
|
self.app.save()
|
|
|
|
|
|
|
|
if self.canceled:
|
|
|
|
if self.header_data.version >= 2:
|
|
|
|
raise BackupCanceledError("Restore canceled",
|
|
|
|
tmpdir=self.tmpdir)
|
|
|
|
else:
|
|
|
|
raise BackupCanceledError("Restore canceled")
|
|
|
|
|
|
|
|
# ... and dom0 home as last step
|
|
|
|
if 'dom0' in restore_info.keys() and restore_info['dom0'].good_to_go:
|
|
|
|
backup_path = restore_info['dom0'].subdir
|
|
|
|
local_user = grp.getgrnam('qubes').gr_mem[0]
|
|
|
|
home_dir = pwd.getpwnam(local_user).pw_dir
|
|
|
|
if self.header_data.version == 1:
|
|
|
|
backup_dom0_home_dir = os.path.join(self.backup_location,
|
|
|
|
backup_path)
|
|
|
|
else:
|
|
|
|
backup_dom0_home_dir = os.path.join(self.tmpdir, backup_path)
|
|
|
|
restore_home_backupdir = "home-pre-restore-{0}".format(
|
|
|
|
time.strftime("%Y-%m-%d-%H%M%S"))
|
|
|
|
|
|
|
|
self.log.info(
|
|
|
|
"Restoring home of user '{0}'...".format(local_user))
|
|
|
|
self.log.info(
|
|
|
|
"Existing files/dirs backed up in '{0}' dir".format(
|
|
|
|
restore_home_backupdir))
|
|
|
|
os.mkdir(home_dir + '/' + restore_home_backupdir)
|
|
|
|
for f in os.listdir(backup_dom0_home_dir):
|
|
|
|
home_file = home_dir + '/' + f
|
|
|
|
if os.path.exists(home_file):
|
|
|
|
os.rename(home_file,
|
|
|
|
home_dir + '/' + restore_home_backupdir + '/' + f)
|
|
|
|
if self.header_data.version == 1:
|
|
|
|
subprocess.call(
|
|
|
|
["cp", "-nrp", "--reflink=auto",
|
|
|
|
backup_dom0_home_dir + '/' + f, home_file])
|
|
|
|
elif self.header_data.version >= 2:
|
|
|
|
shutil.move(backup_dom0_home_dir + '/' + f, home_file)
|
|
|
|
retcode = subprocess.call(['sudo', 'chown', '-R',
|
|
|
|
local_user, home_dir])
|
|
|
|
if retcode != 0:
|
|
|
|
self.log.error("*** Error while setting home directory owner")
|
|
|
|
|
|
|
|
shutil.rmtree(self.tmpdir)
|
|
|
|
self.log.info("-> Done. Please install updates for all the restored "
|
|
|
|
"templates.")
|
|
|
|
|
|
|
|
def _restore_vms_metadata(self, restore_info):
|
|
|
|
vms = {}
|
|
|
|
for vm_info in restore_info.values():
|
|
|
|
assert isinstance(vm_info, self.VMToRestore)
|
|
|
|
if not vm_info.vm:
|
|
|
|
continue
|
|
|
|
if not vm_info.good_to_go:
|
|
|
|
continue
|
|
|
|
vm = vm_info.vm
|
|
|
|
vms[vm.name] = vm
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
# First load templates, then other VMs
|
2016-09-29 01:52:31 +02:00
|
|
|
for vm in self._templates_first(vms.values()):
|
2016-03-13 13:49:42 +01:00
|
|
|
if self.canceled:
|
2016-04-04 22:06:21 +02:00
|
|
|
# only break the loop to save qubes.xml
|
|
|
|
# with already restored VMs
|
2016-03-13 13:49:42 +01:00
|
|
|
break
|
2016-04-04 22:06:21 +02:00
|
|
|
self.log.info("-> Restoring {0}...".format(vm.name))
|
|
|
|
kwargs = {}
|
|
|
|
if hasattr(vm, 'template'):
|
2016-04-05 00:34:19 +02:00
|
|
|
template = restore_info[vm.name].template
|
|
|
|
# handle potentially renamed template
|
|
|
|
if template in restore_info \
|
|
|
|
and restore_info[template].good_to_go:
|
|
|
|
template = restore_info[template].name
|
|
|
|
kwargs['template'] = template
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-04-04 22:06:21 +02:00
|
|
|
new_vm = None
|
2016-04-05 00:34:19 +02:00
|
|
|
vm_name = restore_info[vm.name].name
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-04-04 22:06:21 +02:00
|
|
|
try:
|
|
|
|
# first only minimal set, later clone_properties
|
|
|
|
# will be called
|
2016-10-05 01:58:11 +02:00
|
|
|
cls = self.app.get_vm_class(vm.__class__.__name__)
|
2016-04-04 22:06:21 +02:00
|
|
|
new_vm = self.app.add_new_vm(
|
2016-10-05 01:58:11 +02:00
|
|
|
cls,
|
2016-04-04 22:06:21 +02:00
|
|
|
name=vm_name,
|
|
|
|
label=vm.label,
|
|
|
|
installed_by_rpm=False,
|
|
|
|
**kwargs)
|
|
|
|
if os.path.exists(new_vm.dir_path):
|
|
|
|
move_to_path = tempfile.mkdtemp('', os.path.basename(
|
|
|
|
new_vm.dir_path), os.path.dirname(new_vm.dir_path))
|
|
|
|
try:
|
|
|
|
os.rename(new_vm.dir_path, move_to_path)
|
|
|
|
self.log.warning(
|
|
|
|
"*** Directory {} already exists! It has "
|
|
|
|
"been moved to {}".format(new_vm.dir_path,
|
2016-09-29 01:52:31 +02:00
|
|
|
move_to_path))
|
2016-04-04 22:06:21 +02:00
|
|
|
except OSError:
|
|
|
|
self.log.error(
|
|
|
|
"*** Directory {} already exists and "
|
|
|
|
"cannot be moved!".format(new_vm.dir_path))
|
|
|
|
self.log.warning("Skipping VM {}...".format(
|
|
|
|
vm.name))
|
|
|
|
continue
|
|
|
|
except Exception as err:
|
|
|
|
self.log.error("ERROR: {0}".format(err))
|
|
|
|
self.log.warning("*** Skipping VM: {0}".format(vm.name))
|
|
|
|
if new_vm:
|
|
|
|
del self.app.domains[new_vm.qid]
|
|
|
|
continue
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-04-04 22:06:21 +02:00
|
|
|
# remove no longer needed backup metadata
|
|
|
|
if 'backup-content' in vm.features:
|
|
|
|
del vm.features['backup-content']
|
|
|
|
del vm.features['backup-size']
|
|
|
|
del vm.features['backup-path']
|
|
|
|
try:
|
|
|
|
# exclude VM references - handled manually according to
|
|
|
|
# restore options
|
|
|
|
proplist = [prop for prop in new_vm.property_list()
|
|
|
|
if prop.clone and prop.__name__ not in
|
|
|
|
['template', 'netvm', 'dispvm_netvm']]
|
|
|
|
new_vm.clone_properties(vm, proplist=proplist)
|
|
|
|
except Exception as err:
|
|
|
|
self.log.error("ERROR: {0}".format(err))
|
|
|
|
self.log.warning("*** Some VM property will not be "
|
|
|
|
"restored")
|
2016-03-13 13:49:42 +01:00
|
|
|
|
2016-09-29 01:52:31 +02:00
|
|
|
if not self.options.verify_only:
|
|
|
|
try:
|
|
|
|
# have it here, to (maybe) patch storage config before
|
|
|
|
# creating child VMs (template first)
|
|
|
|
# TODO: adjust volumes config - especially size
|
|
|
|
new_vm.create_on_disk(pool=self.options.override_pool)
|
|
|
|
except qubes.exc.QubesException as e:
|
|
|
|
self.log.warning("Failed to create VM {}: {}".format(
|
|
|
|
vm.name, str(e)))
|
|
|
|
del self.app.domains[new_vm]
|
|
|
|
continue
|
|
|
|
|
|
|
|
restore_info[vm.name].restored_vm = new_vm
|
2016-03-13 13:49:42 +01:00
|
|
|
|
|
|
|
# Set network dependencies - only non-default netvm setting
|
|
|
|
for vm in vms.values():
|
2016-04-05 00:34:19 +02:00
|
|
|
vm_info = restore_info[vm.name]
|
|
|
|
vm_name = vm_info.name
|
2013-11-25 05:41:13 +01:00
|
|
|
try:
|
2016-03-13 13:49:42 +01:00
|
|
|
host_vm = self.app.domains[vm_name]
|
|
|
|
except KeyError:
|
|
|
|
# Failed/skipped VM
|
2013-11-25 05:41:13 +01:00
|
|
|
continue
|
|
|
|
|
2016-03-13 13:49:42 +01:00
|
|
|
if not vm.property_is_default('netvm'):
|
2016-04-05 00:34:19 +02:00
|
|
|
if vm_info.netvm in restore_info:
|
|
|
|
host_vm.netvm = restore_info[vm_info.netvm].name
|
2016-03-13 13:49:42 +01:00
|
|
|
else:
|
2016-04-05 00:34:19 +02:00
|
|
|
host_vm.netvm = vm_info.netvm
|
2013-11-25 05:41:13 +01:00
|
|
|
|
|
|
|
# vim:sw=4:et:
|