Kaynağa Gözat

backup: use 'scrypt' tool for backup encryption and integrity protection

`openssl dgst` and `openssl enc` used previously poorly handle key
stretching - in case of `openssl enc` encryption key is derived using
single MD5 iteration, without even any salt. This hardly prevent
brute force or even rainbow tables attacks. To make things worse, the
same key is used for encryption and integrity protection which ease
brute force even further.
All this is still about brute force attacks, so when using long, high
entropy passphrase, it should be still relatively safe. But lets do
better.
According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is
a good choice for key stretching (it isn't the best of all existing, but
a good one and widely adopted). At the same time, lets switch away from
`openssl` tool, as it is very limited and apparently not designed for
production use. Use `scrypt` tool, which is very simple and does exactly
what we need - encrypt the data and integrity protect it. Its archive
format have own (simple) header with data required by the `scrypt`
algorithm, including salt. Internally data is encrypted with AES256-CTR
and integrity protected with HMAC-SHA256. For details see:
https://github.com/tarsnap/scrypt/blob/master/FORMAT

This means change of backup format. Mainly:

1. HMAC is stored in scrypt header, so don't use separate file for it.
Instead have data in files with `.enc` extension.
2. For compatibility leave `backup-header` and `backup-header.hmac`. But
`backup-header.hmac` is really scrypt-encrypted version of `backup-header`.
3. For each file, prepend its identifier to the passphrase, to
authenticate filename itself too. Having this we can guard against
reordering archive files within a single backup and across backups. This
identifier is built as:

        backup ID (from backup-header)!filename!

For backup-header itself, there is no backup ID (just 'backup-header!').

Fixes QubesOS/qubes-issues#971
Marek Marczykowski-Górecki 7 yıl önce
ebeveyn
işleme
418d749680
2 değiştirilmiş dosya ile 260 ekleme ve 147 silme
  1. 259 147
      qubes/backup.py
  2. 1 0
      rpm_spec/core-dom0.spec

+ 259 - 147
qubes/backup.py

@@ -24,6 +24,8 @@
 from __future__ import unicode_literals
 import itertools
 import logging
+import termios
+
 from qubes.utils import size_to_human
 import sys
 import stat
@@ -50,7 +52,9 @@ QUEUE_FINISHED = "FINISHED"
 
 HEADER_FILENAME = 'backup-header'
 DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc'
-DEFAULT_HMAC_ALGORITHM = 'SHA512'
+# 'scrypt' is not exactly HMAC algorithm, but a tool we use to
+# integrity-protect the data
+DEFAULT_HMAC_ALGORITHM = 'scrypt'
 DEFAULT_COMPRESSION_FILTER = 'gzip'
 CURRENT_BACKUP_FORMAT_VERSION = '4'
 # Maximum size of error message get from process stderr (including VM process)
@@ -213,6 +217,63 @@ class SendWorker(Process):
         self.log.debug("Finished sending thread")
 
 
+def launch_proc_with_pty(args, stdin=None, stdout=None, stderr=None, echo=True):
+    """Similar to pty.fork, but handle stdin/stdout according to parameters
+    instead of connecting to the pty
+
+    :return tuple (subprocess.Popen, pty_master)
+    """
+
+    def set_ctty(ctty_fd, master_fd):
+        os.setsid()
+        os.close(master_fd)
+        fcntl.ioctl(ctty_fd, termios.TIOCSCTTY, 0)
+        if not echo:
+            termios_p = termios.tcgetattr(ctty_fd)
+            # termios_p.c_lflags
+            termios_p[3] &= ~termios.ECHO
+            termios.tcsetattr(ctty_fd, termios.TCSANOW, termios_p)
+    (pty_master, pty_slave) = os.openpty()
+    p = subprocess.Popen(args, stdin=stdin, stdout=stdout, stderr=stderr,
+        preexec_fn=lambda: set_ctty(pty_slave, pty_master))
+    os.close(pty_slave)
+    return p, os.fdopen(pty_master, 'w+')
+
+
+def launch_scrypt(action, input_name, output_name, passphrase):
+    '''
+    Launch 'scrypt' process, pass passphrase to it and return
+    subprocess.Popen object.
+
+    :param action: 'enc' or 'dec'
+    :param input_name: input path or '-' for stdin
+    :param output_name: output path or '-' for stdout
+    :param passphrase: passphrase
+    :return: subprocess.Popen object
+    '''
+    command_line = ['scrypt', action, input_name, output_name]
+    (p, pty) = launch_proc_with_pty(command_line,
+        stdin=subprocess.PIPE if input_name == '-' else None,
+        stdout=subprocess.PIPE if output_name == '-' else None,
+        stderr=subprocess.PIPE,
+        echo=False)
+    if action == 'enc':
+        prompts = ('Please enter passphrase: ', 'Please confirm passphrase: ')
+    else:
+        prompts = ('Please enter passphrase: ',)
+    for prompt in prompts:
+        actual_prompt = p.stderr.read(len(prompt))
+        if actual_prompt != prompt:
+            raise qubes.exc.QubesException(
+                'Unexpected prompt from scrypt: {}'.format(actual_prompt))
+        pty.write(passphrase + '\n')
+        pty.flush()
+    # save it here, so garbage collector would not close it (which would kill
+    #  the child)
+    p.pty = pty
+    return p
+
+
 class Backup(object):
     class FileToBackup(object):
         def __init__(self, file_path, subdir=None, name=None):
@@ -476,15 +537,18 @@ class Backup(object):
             compression_filter=self.compression_filter,
         )
         backup_header.save(header_file_path)
-
-        hmac = subprocess.Popen(
-            ["openssl", "dgst", "-" + self.hmac_algorithm,
-                "-hmac", self.passphrase],
-            stdin=open(header_file_path, "r"),
-            stdout=open(header_file_path + ".hmac", "w"))
-        if hmac.wait() != 0:
+        # Start encrypt, scrypt will also handle integrity
+        # protection
+        scrypt_passphrase = HEADER_FILENAME + '!' + self.passphrase.encode(
+            'utf-8')
+        scrypt = launch_scrypt(
+            'enc', header_file_path, header_file_path + '.hmac',
+            scrypt_passphrase)
+
+        if scrypt.wait() != 0:
             raise qubes.exc.QubesException(
-                "Failed to compute hmac of header file")
+                "Failed to compute hmac of header file: "
+                + scrypt.stderr.read())
         return HEADER_FILENAME, HEADER_FILENAME + ".hmac"
 
 
@@ -640,60 +704,39 @@ class Backup(object):
 
                 self.log.debug(" ".join(tar_cmdline))
 
-                # Tips: Popen(bufsize=0)
-                # Pipe: tar-sparse | encryptor [| hmac] | tar | backup_target
-                # Pipe: tar-sparse [| hmac] | tar | backup_target
+                # Pipe: tar-sparse | scrypt | tar | backup_target
                 # TODO: log handle stderr
                 tar_sparse = subprocess.Popen(
-                    tar_cmdline, stdin=subprocess.PIPE)
+                    tar_cmdline)
                 self.processes_to_kill_on_cancel.append(tar_sparse)
 
                 # Wait for compressor (tar) process to finish or for any
                 # error of other subprocesses
                 i = 0
+                pipe = open(backup_pipe, 'rb')
                 run_error = "paused"
-                encryptor = None
-                if self.encrypted:
-                    # Start encrypt
-                    # If no cipher is provided,
-                    # the data is forwarded unencrypted !!!
-                    encryptor = subprocess.Popen([
-                        "openssl", "enc",
-                        "-e", "-" + self.crypto_algorithm,
-                        "-pass", "pass:" + passphrase],
-                        stdin=open(backup_pipe, 'rb'),
-                        stdout=subprocess.PIPE)
-                    pipe = encryptor.stdout
-                else:
-                    pipe = open(backup_pipe, 'rb')
                 while run_error == "paused":
-
-                    # Start HMAC
-                    hmac = subprocess.Popen([
-                        "openssl", "dgst", "-" + self.hmac_algorithm,
-                        "-hmac", passphrase],
-                        stdin=subprocess.PIPE,
-                        stdout=subprocess.PIPE)
-
                     # Prepare a first chunk
-                    chunkfile = backup_tempfile + "." + "%03d" % i
+                    chunkfile = backup_tempfile + ".%03d.enc" % i
                     i += 1
-                    chunkfile_p = open(chunkfile, 'wb')
+
+                    # Start encrypt, scrypt will also handle integrity
+                    # protection
+                    scrypt_passphrase = os.path.relpath(chunkfile[:-4],
+                        self.tmpdir) + '!' + passphrase
+                    scrypt = launch_scrypt(
+                        "enc", "-", chunkfile, scrypt_passphrase)
 
                     run_error = handle_streams(
                         pipe,
-                        {'hmac_data': hmac.stdin,
-                         'backup_target': chunkfile_p,
-                        },
-                        {'hmac': hmac,
-                         'vmproc': vmproc,
+                        {'backup_target': scrypt.stdin},
+                        {'vmproc': vmproc,
                          'addproc': tar_sparse,
-                         'streamproc': encryptor,
+                         'scrypt': scrypt,
                         },
                         self.chunk_size,
                         self._add_vm_progress
                     )
-                    chunkfile_p.close()
 
                     self.log.debug(
                         "12 returned: {}".format(run_error))
@@ -703,12 +746,7 @@ class Backup(object):
                             tar_sparse.terminate()
                         except OSError:
                             pass
-                        try:
-                            hmac.terminate()
-                        except OSError:
-                            pass
                         tar_sparse.wait()
-                        hmac.wait()
                         to_send.put(QUEUE_ERROR)
                         send_proc.join()
                         shutil.rmtree(self.tmpdir)
@@ -724,29 +762,16 @@ class Backup(object):
                                 "Failed to perform backup: error in " +
                                 run_error)
 
+                    scrypt.stdin.close()
+                    scrypt.wait()
+                    self.log.debug("scrypt return code: {}".format(
+                        scrypt.poll()))
+
                     # Send the chunk to the backup target
                     self._queue_put_with_check(
                         send_proc, vmproc, to_send,
                         os.path.relpath(chunkfile, self.tmpdir))
 
-                    # Close HMAC
-                    hmac.stdin.close()
-                    hmac.wait()
-                    self.log.debug("HMAC proc return code: {}".format(
-                        hmac.poll()))
-
-                    # Write HMAC data next to the chunk file
-                    hmac_data = hmac.stdout.read()
-                    self.log.debug(
-                        "Writing hmac to {}.hmac".format(chunkfile))
-                    with open(chunkfile + ".hmac", 'w') as hmac_file:
-                        hmac_file.write(hmac_data)
-
-                    # Send the HMAC to the backup target
-                    self._queue_put_with_check(
-                        send_proc, vmproc, to_send,
-                        os.path.relpath(chunkfile, self.tmpdir) + ".hmac")
-
                     if tar_sparse.poll() is None or run_error == "size_limit":
                         run_error = "paused"
                     else:
@@ -1310,6 +1335,8 @@ def get_supported_hmac_algo(hmac_algorithm=None):
     # Start with provided default
     if hmac_algorithm:
         yield hmac_algorithm
+    if hmac_algorithm != 'scrypt':
+        yield 'scrypt'
     proc = subprocess.Popen(['openssl', 'list-message-digest-algorithms'],
                             stdout=subprocess.PIPE)
     for algo in proc.stdout.readlines():
@@ -1529,6 +1556,10 @@ class BackupRestore(object):
 
     def _verify_hmac(self, filename, hmacfile, algorithm=None):
         def load_hmac(hmac_text):
+            if filter(lambda x: ord(x) not in range(128),
+                    hmac_text):
+                raise qubes.exc.QubesException(
+                    "Invalid content of {}".format(hmacfile))
             hmac_text = hmac_text.strip().split("=")
             if len(hmac_text) > 1:
                 hmac_text = hmac_text[1].strip()
@@ -1547,6 +1578,17 @@ class BackupRestore(object):
                 "ERROR: expected hmac for {}, but got {}".
                 format(filename, hmacfile))
 
+        if algorithm == 'scrypt':
+            # in case of 'scrypt' _verify_hmac is only used for backup header
+            assert filename == HEADER_FILENAME
+            self._verify_and_decrypt(hmacfile, HEADER_FILENAME + '.dec')
+            if open(os.path.join(self.tmpdir, filename)).read() != \
+                    open(os.path.join(self.tmpdir, filename + '.dec')).read():
+                raise qubes.exc.QubesException(
+                    'Invalid hmac on {}'.format(filename))
+            else:
+                return True
+
         hmac_proc = subprocess.Popen(
             ["openssl", "dgst", "-" + algorithm, "-hmac", passphrase],
             stdin=open(os.path.join(self.tmpdir, filename), 'rb'),
@@ -1572,6 +1614,72 @@ class BackupRestore(object):
                     "Is the passphrase correct?".
                     format(filename, load_hmac(hmac_stdout)))
 
+    def _verify_and_decrypt(self, filename, output=None):
+        assert filename.endswith('.enc') or filename.endswith('.hmac')
+        fullname = os.path.join(self.tmpdir, filename)
+        (origname, _) = os.path.splitext(filename)
+        if output:
+            fulloutput = os.path.join(self.tmpdir, output)
+        else:
+            fulloutput = os.path.join(self.tmpdir, origname)
+        passphrase = origname + '!' + self.passphrase.encode('utf-8')
+        p = launch_scrypt('dec', fullname, fulloutput, passphrase)
+        (_, stderr) = p.communicate()
+        if p.returncode != 0:
+            os.unlink(fulloutput)
+            raise qubes.exc.QubesException('failed to decrypt {}: {}'.format(
+                fullname, stderr))
+        # encrypted file is no longer needed
+        os.unlink(fullname)
+        return origname
+
+    def _retrieve_backup_header_files(self, files, allow_none=False):
+        (retrieve_proc, filelist_pipe, error_pipe) = \
+            self._start_retrieval_process(
+                files, len(files), 1024 * 1024)
+        filelist = filelist_pipe.read()
+        retrieve_proc_returncode = retrieve_proc.wait()
+        if retrieve_proc in self.processes_to_kill_on_cancel:
+            self.processes_to_kill_on_cancel.remove(retrieve_proc)
+        extract_stderr = error_pipe.read(MAX_STDERR_BYTES)
+
+        # wait for other processes (if any)
+        for proc in self.processes_to_kill_on_cancel:
+            if proc.wait() != 0:
+                raise qubes.exc.QubesException(
+                    "Backup header retrieval failed (exit code {})".format(
+                        proc.wait())
+                )
+
+        if retrieve_proc_returncode != 0:
+            if not filelist and 'Not found in archive' in extract_stderr:
+                if allow_none:
+                    return None
+                else:
+                    raise qubes.exc.QubesException(
+                        "unable to read the qubes backup file {0} ({1}): {2}".format(
+                            self.backup_location,
+                            retrieve_proc.wait(),
+                            extract_stderr
+                        ))
+        actual_files = filelist.splitlines()
+        if sorted(actual_files) != sorted(files):
+            raise qubes.exc.QubesException(
+                'unexpected files in archive: got {!r}, expeced {!r}'.format(
+                    actual_files, files
+                ))
+        for f in files:
+            if not os.path.exists(os.path.join(self.tmpdir, f)):
+                if allow_none:
+                    return None
+                else:
+                    raise qubes.exc.QubesException(
+                        'Unable to retrieve file {} from backup {}: {}'.format(
+                            f, self.backup_location, extract_stderr
+                        )
+                    )
+        return files
+
     def _retrieve_backup_header(self):
         """Retrieve backup header and qubes.xml. Only backup header is
         analyzed, qubes.xml is left as-is
@@ -1588,82 +1696,47 @@ class BackupRestore(object):
             header_data.version = 1
             return header_data
 
-        (retrieve_proc, filelist_pipe, error_pipe) = \
-            self._start_retrieval_process(
-                ['backup-header', 'backup-header.hmac',
-                'qubes.xml.000', 'qubes.xml.000.hmac'], 4, 1024 * 1024)
-
-        expect_tar_error = False
-
-        filename = filelist_pipe.readline().strip()
-        hmacfile = filelist_pipe.readline().strip()
-        # tar output filename before actually extracting it, so wait for the
-        # next one before trying to access it
-        if not self.backup_vm:
-            filelist_pipe.readline().strip()
+        header_files = self._retrieve_backup_header_files(
+            ['backup-header', 'backup-header.hmac'], allow_none=True)
 
-        self.log.debug("Got backup header and hmac: {}, {}".format(
-            filename, hmacfile))
-
-        if not filename or filename == "EOF" or \
-                not hmacfile or hmacfile == "EOF":
-            retrieve_proc.wait()
-            proc_error_msg = error_pipe.read(MAX_STDERR_BYTES)
-            raise qubes.exc.QubesException(
-                "Premature end of archive while receiving "
-                "backup header. Process output:\n" + proc_error_msg)
-        file_ok = False
-        hmac_algorithm = DEFAULT_HMAC_ALGORITHM
-        for hmac_algo in get_supported_hmac_algo(hmac_algorithm):
-            try:
-                if self._verify_hmac(filename, hmacfile, hmac_algo):
-                    file_ok = True
-                    hmac_algorithm = hmac_algo
-                    break
-            except qubes.exc.QubesException:
-                # Ignore exception here, try the next algo
-                pass
-        if not file_ok:
-            raise qubes.exc.QubesException(
-                "Corrupted backup header (hmac verification "
-                "failed). Is the password correct?")
-        if os.path.basename(filename) == HEADER_FILENAME:
-            filename = os.path.join(self.tmpdir, filename)
-            header_data = BackupHeader(open(filename, 'r').read())
-            os.unlink(filename)
-        else:
-            # if no header found, create one with guessed HMAC algo
+        if not header_files:
+            # R2-Beta3 didn't have backup header, so if none is found,
+            # assume it's version=2 and use values present at that time
             header_data = BackupHeader(
                 version=2,
-                hmac_algorithm=hmac_algorithm,
                 # place explicitly this value, because it is what format_version
                 # 2 have
+                hmac_algorithm='SHA1',
                 crypto_algorithm='aes-256-cbc',
                 # TODO: set encrypted to something...
             )
-            # when tar do not find expected file in archive, it exit with
-            # code 2. This will happen because we've requested backup-header
-            # file, but the archive do not contain it. Ignore this particular
-            # error.
-            if not self.backup_vm:
-                expect_tar_error = True
-
-        if retrieve_proc.wait() != 0 and not expect_tar_error:
-            raise qubes.exc.QubesException(
-                "unable to read the qubes backup file {0} ({1}): {2}".format(
-                    self.backup_location,
-                    retrieve_proc.wait(),
-                    error_pipe.read(MAX_STDERR_BYTES)
-                ))
-        if retrieve_proc in self.processes_to_kill_on_cancel:
-            self.processes_to_kill_on_cancel.remove(retrieve_proc)
-        # wait for other processes (if any)
-        for proc in self.processes_to_kill_on_cancel:
-            if proc.wait() != 0:
+        else:
+            filename = HEADER_FILENAME
+            hmacfile = HEADER_FILENAME + '.hmac'
+            self.log.debug("Got backup header and hmac: {}, {}".format(
+                filename, hmacfile))
+
+            file_ok = False
+            hmac_algorithm = DEFAULT_HMAC_ALGORITHM
+            for hmac_algo in get_supported_hmac_algo(hmac_algorithm):
+                try:
+                    if self._verify_hmac(filename, hmacfile, hmac_algo):
+                        file_ok = True
+                        break
+                except qubes.exc.QubesException as e:
+                    self.log.debug(
+                        'Failed to verify {} using {}: {}'.format(
+                            hmacfile, hmac_algo, str(e)))
+                    # Ignore exception here, try the next algo
+                    pass
+            if not file_ok:
                 raise qubes.exc.QubesException(
-                    "Backup header retrieval failed (exit code {})".format(
-                        proc.wait())
-                )
+                    "Corrupted backup header (hmac verification "
+                    "failed). Is the password correct?")
+            filename = os.path.join(self.tmpdir, filename)
+            header_data = BackupHeader(open(filename, 'r').read())
+            os.unlink(filename)
+
         return header_data
 
     def _start_inner_extraction_worker(self, queue, relocate):
@@ -1696,6 +1769,9 @@ class BackupRestore(object):
         elif format_version in [3, 4]:
             extractor_params['compression_filter'] = \
                 self.header_data.compression_filter
+            if format_version == 4:
+                # encryption already handled
+                extractor_params['encrypted'] = False
             extract_proc = ExtractWorker3(**extractor_params)
         else:
             raise NotImplementedError(
@@ -1714,7 +1790,14 @@ class BackupRestore(object):
                 offline_mode=True)
             return backup_app
         else:
-            self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac")
+            if self.header_data.version in [2, 3]:
+                self._retrieve_backup_header_files(
+                    ['qubes.xml.000', 'qubes.xml.000.hmac'])
+                self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac")
+            else:
+                self._retrieve_backup_header_files(['qubes.xml.000.enc'])
+                self._verify_and_decrypt('qubes.xml.000.enc')
+
             queue = Queue()
             queue.put("qubes.xml.000")
             queue.put(QUEUE_FINISHED)
@@ -1762,6 +1845,7 @@ class BackupRestore(object):
 
         try:
             filename = None
+            hmacfile = None
             nextfile = None
             while True:
                 if self.canceled:
@@ -1785,30 +1869,58 @@ class BackupRestore(object):
                 if not filename or filename == "EOF":
                     break
 
-                hmacfile = filelist_pipe.readline().strip()
-
-                if self.canceled:
-                    break
                 # if reading archive directly with tar, wait for next filename -
                 # tar prints filename before processing it, so wait for
                 # the next one to be sure that whole file was extracted
                 if not self.backup_vm:
                     nextfile = filelist_pipe.readline().strip()
 
-                self.log.debug("Getting hmac:" + hmacfile)
-                if not hmacfile or hmacfile == "EOF":
-                    # Premature end of archive, either of tar1_command or
-                    # vmproc exited with error
-                    break
+                if self.header_data.version in [2, 3]:
+                    if not self.backup_vm:
+                        hmacfile = nextfile
+                        nextfile = filelist_pipe.readline().strip()
+                    else:
+                        hmacfile = filelist_pipe.readline().strip()
+
+                    if self.canceled:
+                        break
+
+                    self.log.debug("Getting hmac:" + hmacfile)
+                    if not hmacfile or hmacfile == "EOF":
+                        # Premature end of archive, either of tar1_command or
+                        # vmproc exited with error
+                        break
+                else:  # self.header_data.version == 4
+                    if not filename.endswith('.enc'):
+                        raise qubes.exc.QubesException(
+                            'Invalid file extension found in archive: {}'.
+                            format(filename))
 
                 if not any(map(lambda x: filename.startswith(x), vms_dirs)):
                     self.log.debug("Ignoring VM not selected for restore")
                     os.unlink(os.path.join(self.tmpdir, filename))
-                    os.unlink(os.path.join(self.tmpdir, hmacfile))
+                    if hmacfile:
+                        os.unlink(os.path.join(self.tmpdir, hmacfile))
                     continue
 
-                if self._verify_hmac(filename, hmacfile):
-                    to_extract.put(os.path.join(self.tmpdir, filename))
+                if self.header_data.version in [2, 3]:
+                    self._verify_hmac(filename, hmacfile)
+                else:
+                    # _verify_and_decrypt will write output to a file with
+                    # '.enc' extension cut off. This is safe because:
+                    # - `scrypt` tool will override output, so if the file was
+                    # already there (received from the VM), it will be removed
+                    # - incoming archive extraction will refuse to override
+                    # existing file, so if `scrypt` already created one,
+                    # it can not be manipulated by the VM
+                    # - when the file is retrieved from the VM, it appears at
+                    # the final form - if it's visible, VM have no longer
+                    # influence over its content
+                    #
+                    # This all means that if the file was correctly verified
+                    # + decrypted, we will surely access the right file
+                    filename = self._verify_and_decrypt(filename)
+                to_extract.put(os.path.join(self.tmpdir, filename))
 
             if self.canceled:
                 raise BackupCanceledError("Restore canceled",

+ 1 - 0
rpm_spec/core-dom0.spec

@@ -83,6 +83,7 @@ Requires:       createrepo
 Requires:       gnome-packagekit
 Requires:       cronie
 Requires:       bsdtar
+Requires:       scrypt
 # for qubes-hcl-report
 Requires:       dmidecode
 Requires:       PyQt4