backup: use 'scrypt' tool for backup encryption and integrity protection

`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT

This means change of backup format. Mainly:

1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:

        backup ID (from backup-header)!filename!

For backup-header itself, there is no backup ID (just 'backup-header!').

Fixes QubesOS/qubes-issues#971
This commit is contained in:
Marek Marczykowski-Górecki 2016-10-20 12:56:51 +02:00
parent d7c355eadb
commit 418d749680
No known key found for this signature in database
GPG Key ID: 063938BA42CFA724
2 changed files with 258 additions and 145 deletions

View File

@ -24,6 +24,8 @@
from __future__ import unicode_literals
import itertools
import logging
import termios
from qubes.utils import size_to_human
import sys
import stat
@ -50,7 +52,9 @@ QUEUE_FINISHED = "FINISHED"
HEADER_FILENAME = 'backup-header'
DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc'
DEFAULT_HMAC_ALGORITHM = 'SHA512'
# 'scrypt' is not exactly HMAC algorithm, but a tool we use to
# integrity-protect the data
DEFAULT_HMAC_ALGORITHM = 'scrypt'
DEFAULT_COMPRESSION_FILTER = 'gzip'
CURRENT_BACKUP_FORMAT_VERSION = '4'
# Maximum size of error message get from process stderr (including VM process)
@ -213,6 +217,63 @@ class SendWorker(Process):
self.log.debug("Finished sending thread")
def launch_proc_with_pty(args, stdin=None, stdout=None, stderr=None, echo=True):
"""Similar to pty.fork, but handle stdin/stdout according to parameters
instead of connecting to the pty
:return tuple (subprocess.Popen, pty_master)
"""
def set_ctty(ctty_fd, master_fd):
os.setsid()
os.close(master_fd)
fcntl.ioctl(ctty_fd, termios.TIOCSCTTY, 0)
if not echo:
termios_p = termios.tcgetattr(ctty_fd)
# termios_p.c_lflags
termios_p[3] &= ~termios.ECHO
termios.tcsetattr(ctty_fd, termios.TCSANOW, termios_p)
(pty_master, pty_slave) = os.openpty()
p = subprocess.Popen(args, stdin=stdin, stdout=stdout, stderr=stderr,
preexec_fn=lambda: set_ctty(pty_slave, pty_master))
os.close(pty_slave)
return p, os.fdopen(pty_master, 'w+')
def launch_scrypt(action, input_name, output_name, passphrase):
'''
Launch 'scrypt' process, pass passphrase to it and return
subprocess.Popen object.
:param action: 'enc' or 'dec'
:param input_name: input path or '-' for stdin
:param output_name: output path or '-' for stdout
:param passphrase: passphrase
:return: subprocess.Popen object
'''
command_line = ['scrypt', action, input_name, output_name]
(p, pty) = launch_proc_with_pty(command_line,
stdin=subprocess.PIPE if input_name == '-' else None,
stdout=subprocess.PIPE if output_name == '-' else None,
stderr=subprocess.PIPE,
echo=False)
if action == 'enc':
prompts = ('Please enter passphrase: ', 'Please confirm passphrase: ')
else:
prompts = ('Please enter passphrase: ',)
for prompt in prompts:
actual_prompt = p.stderr.read(len(prompt))
if actual_prompt != prompt:
raise qubes.exc.QubesException(
'Unexpected prompt from scrypt: {}'.format(actual_prompt))
pty.write(passphrase + '\n')
pty.flush()
# save it here, so garbage collector would not close it (which would kill
# the child)
p.pty = pty
return p
class Backup(object):
class FileToBackup(object):
def __init__(self, file_path, subdir=None, name=None):
@ -476,15 +537,18 @@ class Backup(object):
compression_filter=self.compression_filter,
)
backup_header.save(header_file_path)
# Start encrypt, scrypt will also handle integrity
# protection
scrypt_passphrase = HEADER_FILENAME + '!' + self.passphrase.encode(
'utf-8')
scrypt = launch_scrypt(
'enc', header_file_path, header_file_path + '.hmac',
scrypt_passphrase)
hmac = subprocess.Popen(
["openssl", "dgst", "-" + self.hmac_algorithm,
"-hmac", self.passphrase],
stdin=open(header_file_path, "r"),
stdout=open(header_file_path + ".hmac", "w"))
if hmac.wait() != 0:
if scrypt.wait() != 0:
raise qubes.exc.QubesException(
"Failed to compute hmac of header file")
"Failed to compute hmac of header file: "
+ scrypt.stderr.read())
return HEADER_FILENAME, HEADER_FILENAME + ".hmac"
@ -640,60 +704,39 @@ class Backup(object):
self.log.debug(" ".join(tar_cmdline))
# Tips: Popen(bufsize=0)
# Pipe: tar-sparse | encryptor [| hmac] | tar | backup_target
# Pipe: tar-sparse [| hmac] | tar | backup_target
# Pipe: tar-sparse | scrypt | tar | backup_target
# TODO: log handle stderr
tar_sparse = subprocess.Popen(
tar_cmdline, stdin=subprocess.PIPE)
tar_cmdline)
self.processes_to_kill_on_cancel.append(tar_sparse)
# Wait for compressor (tar) process to finish or for any
# error of other subprocesses
i = 0
pipe = open(backup_pipe, 'rb')
run_error = "paused"
encryptor = None
if self.encrypted:
# Start encrypt
# If no cipher is provided,
# the data is forwarded unencrypted !!!
encryptor = subprocess.Popen([
"openssl", "enc",
"-e", "-" + self.crypto_algorithm,
"-pass", "pass:" + passphrase],
stdin=open(backup_pipe, 'rb'),
stdout=subprocess.PIPE)
pipe = encryptor.stdout
else:
pipe = open(backup_pipe, 'rb')
while run_error == "paused":
# Start HMAC
hmac = subprocess.Popen([
"openssl", "dgst", "-" + self.hmac_algorithm,
"-hmac", passphrase],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
# Prepare a first chunk
chunkfile = backup_tempfile + "." + "%03d" % i
chunkfile = backup_tempfile + ".%03d.enc" % i
i += 1
chunkfile_p = open(chunkfile, 'wb')
# Start encrypt, scrypt will also handle integrity
# protection
scrypt_passphrase = os.path.relpath(chunkfile[:-4],
self.tmpdir) + '!' + passphrase
scrypt = launch_scrypt(
"enc", "-", chunkfile, scrypt_passphrase)
run_error = handle_streams(
pipe,
{'hmac_data': hmac.stdin,
'backup_target': chunkfile_p,
},
{'hmac': hmac,
'vmproc': vmproc,
{'backup_target': scrypt.stdin},
{'vmproc': vmproc,
'addproc': tar_sparse,
'streamproc': encryptor,
'scrypt': scrypt,
},
self.chunk_size,
self._add_vm_progress
)
chunkfile_p.close()
self.log.debug(
"12 returned: {}".format(run_error))
@ -703,12 +746,7 @@ class Backup(object):
tar_sparse.terminate()
except OSError:
pass
try:
hmac.terminate()
except OSError:
pass
tar_sparse.wait()
hmac.wait()
to_send.put(QUEUE_ERROR)
send_proc.join()
shutil.rmtree(self.tmpdir)
@ -724,29 +762,16 @@ class Backup(object):
"Failed to perform backup: error in " +
run_error)
scrypt.stdin.close()
scrypt.wait()
self.log.debug("scrypt return code: {}".format(
scrypt.poll()))
# Send the chunk to the backup target
self._queue_put_with_check(
send_proc, vmproc, to_send,
os.path.relpath(chunkfile, self.tmpdir))
# Close HMAC
hmac.stdin.close()
hmac.wait()
self.log.debug("HMAC proc return code: {}".format(
hmac.poll()))
# Write HMAC data next to the chunk file
hmac_data = hmac.stdout.read()
self.log.debug(
"Writing hmac to {}.hmac".format(chunkfile))
with open(chunkfile + ".hmac", 'w') as hmac_file:
hmac_file.write(hmac_data)
# Send the HMAC to the backup target
self._queue_put_with_check(
send_proc, vmproc, to_send,
os.path.relpath(chunkfile, self.tmpdir) + ".hmac")
if tar_sparse.poll() is None or run_error == "size_limit":
run_error = "paused"
else:
@ -1310,6 +1335,8 @@ def get_supported_hmac_algo(hmac_algorithm=None):
# Start with provided default
if hmac_algorithm:
yield hmac_algorithm
if hmac_algorithm != 'scrypt':
yield 'scrypt'
proc = subprocess.Popen(['openssl', 'list-message-digest-algorithms'],
stdout=subprocess.PIPE)
for algo in proc.stdout.readlines():
@ -1529,6 +1556,10 @@ class BackupRestore(object):
def _verify_hmac(self, filename, hmacfile, algorithm=None):
def load_hmac(hmac_text):
if filter(lambda x: ord(x) not in range(128),
hmac_text):
raise qubes.exc.QubesException(
"Invalid content of {}".format(hmacfile))
hmac_text = hmac_text.strip().split("=")
if len(hmac_text) > 1:
hmac_text = hmac_text[1].strip()
@ -1547,6 +1578,17 @@ class BackupRestore(object):
"ERROR: expected hmac for {}, but got {}".
format(filename, hmacfile))
if algorithm == 'scrypt':
# in case of 'scrypt' _verify_hmac is only used for backup header
assert filename == HEADER_FILENAME
self._verify_and_decrypt(hmacfile, HEADER_FILENAME + '.dec')
if open(os.path.join(self.tmpdir, filename)).read() != \
open(os.path.join(self.tmpdir, filename + '.dec')).read():
raise qubes.exc.QubesException(
'Invalid hmac on {}'.format(filename))
else:
return True
hmac_proc = subprocess.Popen(
["openssl", "dgst", "-" + algorithm, "-hmac", passphrase],
stdin=open(os.path.join(self.tmpdir, filename), 'rb'),
@ -1572,6 +1614,72 @@ class BackupRestore(object):
"Is the passphrase correct?".
format(filename, load_hmac(hmac_stdout)))
def _verify_and_decrypt(self, filename, output=None):
assert filename.endswith('.enc') or filename.endswith('.hmac')
fullname = os.path.join(self.tmpdir, filename)
(origname, _) = os.path.splitext(filename)
if output:
fulloutput = os.path.join(self.tmpdir, output)
else:
fulloutput = os.path.join(self.tmpdir, origname)
passphrase = origname + '!' + self.passphrase.encode('utf-8')
p = launch_scrypt('dec', fullname, fulloutput, passphrase)
(_, stderr) = p.communicate()
if p.returncode != 0:
os.unlink(fulloutput)
raise qubes.exc.QubesException('failed to decrypt {}: {}'.format(
fullname, stderr))
# encrypted file is no longer needed
os.unlink(fullname)
return origname
def _retrieve_backup_header_files(self, files, allow_none=False):
(retrieve_proc, filelist_pipe, error_pipe) = \
self._start_retrieval_process(
files, len(files), 1024 * 1024)
filelist = filelist_pipe.read()
retrieve_proc_returncode = retrieve_proc.wait()
if retrieve_proc in self.processes_to_kill_on_cancel:
self.processes_to_kill_on_cancel.remove(retrieve_proc)
extract_stderr = error_pipe.read(MAX_STDERR_BYTES)
# wait for other processes (if any)
for proc in self.processes_to_kill_on_cancel:
if proc.wait() != 0:
raise qubes.exc.QubesException(
"Backup header retrieval failed (exit code {})".format(
proc.wait())
)
if retrieve_proc_returncode != 0:
if not filelist and 'Not found in archive' in extract_stderr:
if allow_none:
return None
else:
raise qubes.exc.QubesException(
"unable to read the qubes backup file {0} ({1}): {2}".format(
self.backup_location,
retrieve_proc.wait(),
extract_stderr
))
actual_files = filelist.splitlines()
if sorted(actual_files) != sorted(files):
raise qubes.exc.QubesException(
'unexpected files in archive: got {!r}, expeced {!r}'.format(
actual_files, files
))
for f in files:
if not os.path.exists(os.path.join(self.tmpdir, f)):
if allow_none:
return None
else:
raise qubes.exc.QubesException(
'Unable to retrieve file {} from backup {}: {}'.format(
f, self.backup_location, extract_stderr
)
)
return files
def _retrieve_backup_header(self):
"""Retrieve backup header and qubes.xml. Only backup header is
analyzed, qubes.xml is left as-is
@ -1588,82 +1696,47 @@ class BackupRestore(object):
header_data.version = 1
return header_data
(retrieve_proc, filelist_pipe, error_pipe) = \
self._start_retrieval_process(
['backup-header', 'backup-header.hmac',
'qubes.xml.000', 'qubes.xml.000.hmac'], 4, 1024 * 1024)
header_files = self._retrieve_backup_header_files(
['backup-header', 'backup-header.hmac'], allow_none=True)
expect_tar_error = False
filename = filelist_pipe.readline().strip()
hmacfile = filelist_pipe.readline().strip()
# tar output filename before actually extracting it, so wait for the
# next one before trying to access it
if not self.backup_vm:
filelist_pipe.readline().strip()
self.log.debug("Got backup header and hmac: {}, {}".format(
filename, hmacfile))
if not filename or filename == "EOF" or \
not hmacfile or hmacfile == "EOF":
retrieve_proc.wait()
proc_error_msg = error_pipe.read(MAX_STDERR_BYTES)
raise qubes.exc.QubesException(
"Premature end of archive while receiving "
"backup header. Process output:\n" + proc_error_msg)
file_ok = False
hmac_algorithm = DEFAULT_HMAC_ALGORITHM
for hmac_algo in get_supported_hmac_algo(hmac_algorithm):
try:
if self._verify_hmac(filename, hmacfile, hmac_algo):
file_ok = True
hmac_algorithm = hmac_algo
break
except qubes.exc.QubesException:
# Ignore exception here, try the next algo
pass
if not file_ok:
raise qubes.exc.QubesException(
"Corrupted backup header (hmac verification "
"failed). Is the password correct?")
if os.path.basename(filename) == HEADER_FILENAME:
filename = os.path.join(self.tmpdir, filename)
header_data = BackupHeader(open(filename, 'r').read())
os.unlink(filename)
else:
# if no header found, create one with guessed HMAC algo
if not header_files:
# R2-Beta3 didn't have backup header, so if none is found,
# assume it's version=2 and use values present at that time
header_data = BackupHeader(
version=2,
hmac_algorithm=hmac_algorithm,
# place explicitly this value, because it is what format_version
# 2 have
hmac_algorithm='SHA1',
crypto_algorithm='aes-256-cbc',
# TODO: set encrypted to something...
)
# when tar do not find expected file in archive, it exit with
# code 2. This will happen because we've requested backup-header
# file, but the archive do not contain it. Ignore this particular
# error.
if not self.backup_vm:
expect_tar_error = True
else:
filename = HEADER_FILENAME
hmacfile = HEADER_FILENAME + '.hmac'
self.log.debug("Got backup header and hmac: {}, {}".format(
filename, hmacfile))
if retrieve_proc.wait() != 0 and not expect_tar_error:
raise qubes.exc.QubesException(
"unable to read the qubes backup file {0} ({1}): {2}".format(
self.backup_location,
retrieve_proc.wait(),
error_pipe.read(MAX_STDERR_BYTES)
))
if retrieve_proc in self.processes_to_kill_on_cancel:
self.processes_to_kill_on_cancel.remove(retrieve_proc)
# wait for other processes (if any)
for proc in self.processes_to_kill_on_cancel:
if proc.wait() != 0:
file_ok = False
hmac_algorithm = DEFAULT_HMAC_ALGORITHM
for hmac_algo in get_supported_hmac_algo(hmac_algorithm):
try:
if self._verify_hmac(filename, hmacfile, hmac_algo):
file_ok = True
break
except qubes.exc.QubesException as e:
self.log.debug(
'Failed to verify {} using {}: {}'.format(
hmacfile, hmac_algo, str(e)))
# Ignore exception here, try the next algo
pass
if not file_ok:
raise qubes.exc.QubesException(
"Backup header retrieval failed (exit code {})".format(
proc.wait())
)
"Corrupted backup header (hmac verification "
"failed). Is the password correct?")
filename = os.path.join(self.tmpdir, filename)
header_data = BackupHeader(open(filename, 'r').read())
os.unlink(filename)
return header_data
def _start_inner_extraction_worker(self, queue, relocate):
@ -1696,6 +1769,9 @@ class BackupRestore(object):
elif format_version in [3, 4]:
extractor_params['compression_filter'] = \
self.header_data.compression_filter
if format_version == 4:
# encryption already handled
extractor_params['encrypted'] = False
extract_proc = ExtractWorker3(**extractor_params)
else:
raise NotImplementedError(
@ -1714,7 +1790,14 @@ class BackupRestore(object):
offline_mode=True)
return backup_app
else:
self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac")
if self.header_data.version in [2, 3]:
self._retrieve_backup_header_files(
['qubes.xml.000', 'qubes.xml.000.hmac'])
self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac")
else:
self._retrieve_backup_header_files(['qubes.xml.000.enc'])
self._verify_and_decrypt('qubes.xml.000.enc')
queue = Queue()
queue.put("qubes.xml.000")
queue.put(QUEUE_FINISHED)
@ -1762,6 +1845,7 @@ class BackupRestore(object):
try:
filename = None
hmacfile = None
nextfile = None
while True:
if self.canceled:
@ -1785,30 +1869,58 @@ class BackupRestore(object):
if not filename or filename == "EOF":
break
hmacfile = filelist_pipe.readline().strip()
if self.canceled:
break
# if reading archive directly with tar, wait for next filename -
# tar prints filename before processing it, so wait for
# the next one to be sure that whole file was extracted
if not self.backup_vm:
nextfile = filelist_pipe.readline().strip()
self.log.debug("Getting hmac:" + hmacfile)
if not hmacfile or hmacfile == "EOF":
# Premature end of archive, either of tar1_command or
# vmproc exited with error
break
if self.header_data.version in [2, 3]:
if not self.backup_vm:
hmacfile = nextfile
nextfile = filelist_pipe.readline().strip()
else:
hmacfile = filelist_pipe.readline().strip()
if self.canceled:
break
self.log.debug("Getting hmac:" + hmacfile)
if not hmacfile or hmacfile == "EOF":
# Premature end of archive, either of tar1_command or
# vmproc exited with error
break
else: # self.header_data.version == 4
if not filename.endswith('.enc'):
raise qubes.exc.QubesException(
'Invalid file extension found in archive: {}'.
format(filename))
if not any(map(lambda x: filename.startswith(x), vms_dirs)):
self.log.debug("Ignoring VM not selected for restore")
os.unlink(os.path.join(self.tmpdir, filename))
os.unlink(os.path.join(self.tmpdir, hmacfile))
if hmacfile:
os.unlink(os.path.join(self.tmpdir, hmacfile))
continue
if self._verify_hmac(filename, hmacfile):
to_extract.put(os.path.join(self.tmpdir, filename))
if self.header_data.version in [2, 3]:
self._verify_hmac(filename, hmacfile)
else:
# _verify_and_decrypt will write output to a file with
# '.enc' extension cut off. This is safe because:
# - `scrypt` tool will override output, so if the file was
# already there (received from the VM), it will be removed
# - incoming archive extraction will refuse to override
# existing file, so if `scrypt` already created one,
# it can not be manipulated by the VM
# - when the file is retrieved from the VM, it appears at
# the final form - if it's visible, VM have no longer
# influence over its content
#
# This all means that if the file was correctly verified
# + decrypted, we will surely access the right file
filename = self._verify_and_decrypt(filename)
to_extract.put(os.path.join(self.tmpdir, filename))
if self.canceled:
raise BackupCanceledError("Restore canceled",

View File

@ -83,6 +83,7 @@ Requires: createrepo
Requires: gnome-packagekit
Requires: cronie
Requires: bsdtar
Requires: scrypt
# for qubes-hcl-report
Requires: dmidecode
Requires: PyQt4