Browse Source

Revert "backup: use 'scrypt' tool for backup encryption and integrity protection"

This reverts commit 418d74968092bfc806d864535f64d220d13e206e.

Package `scrypt` is currently not installable (not present in any repo).

Cc: @marmarek
Wojtek Porczyk 8 years ago
parent
commit
0f1672dc63
2 changed files with 147 additions and 272 deletions
  1. 147 271
      qubes/backup.py
  2. 0 1
      rpm_spec/core-dom0.spec

+ 147 - 271
qubes/backup.py

@@ -24,8 +24,6 @@
 from __future__ import unicode_literals
 import itertools
 import logging
-import termios
-
 from qubes.utils import size_to_human
 import sys
 import stat
@@ -52,9 +50,7 @@ QUEUE_FINISHED = "FINISHED"
 
 HEADER_FILENAME = 'backup-header'
 DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc'
-# 'scrypt' is not exactly HMAC algorithm, but a tool we use to
-# integrity-protect the data
-DEFAULT_HMAC_ALGORITHM = 'scrypt'
+DEFAULT_HMAC_ALGORITHM = 'SHA512'
 DEFAULT_COMPRESSION_FILTER = 'gzip'
 CURRENT_BACKUP_FORMAT_VERSION = '4'
 # Maximum size of error message get from process stderr (including VM process)
@@ -222,63 +218,6 @@ class SendWorker(Process):
         self.log.debug("Finished sending thread")
 
 
-def launch_proc_with_pty(args, stdin=None, stdout=None, stderr=None, echo=True):
-    """Similar to pty.fork, but handle stdin/stdout according to parameters
-    instead of connecting to the pty
-
-    :return tuple (subprocess.Popen, pty_master)
-    """
-
-    def set_ctty(ctty_fd, master_fd):
-        os.setsid()
-        os.close(master_fd)
-        fcntl.ioctl(ctty_fd, termios.TIOCSCTTY, 0)
-        if not echo:
-            termios_p = termios.tcgetattr(ctty_fd)
-            # termios_p.c_lflags
-            termios_p[3] &= ~termios.ECHO
-            termios.tcsetattr(ctty_fd, termios.TCSANOW, termios_p)
-    (pty_master, pty_slave) = os.openpty()
-    p = subprocess.Popen(args, stdin=stdin, stdout=stdout, stderr=stderr,
-        preexec_fn=lambda: set_ctty(pty_slave, pty_master))
-    os.close(pty_slave)
-    return p, os.fdopen(pty_master, 'w+')
-
-
-def launch_scrypt(action, input_name, output_name, passphrase):
-    '''
-    Launch 'scrypt' process, pass passphrase to it and return
-    subprocess.Popen object.
-
-    :param action: 'enc' or 'dec'
-    :param input_name: input path or '-' for stdin
-    :param output_name: output path or '-' for stdout
-    :param passphrase: passphrase
-    :return: subprocess.Popen object
-    '''
-    command_line = ['scrypt', action, input_name, output_name]
-    (p, pty) = launch_proc_with_pty(command_line,
-        stdin=subprocess.PIPE if input_name == '-' else None,
-        stdout=subprocess.PIPE if output_name == '-' else None,
-        stderr=subprocess.PIPE,
-        echo=False)
-    if action == 'enc':
-        prompts = ('Please enter passphrase: ', 'Please confirm passphrase: ')
-    else:
-        prompts = ('Please enter passphrase: ',)
-    for prompt in prompts:
-        actual_prompt = p.stderr.read(len(prompt))
-        if actual_prompt != prompt:
-            raise qubes.exc.QubesException(
-                'Unexpected prompt from scrypt: {}'.format(actual_prompt))
-        pty.write(passphrase.encode('utf-8') + b'\n')
-        pty.flush()
-    # save it here, so garbage collector would not close it (which would kill
-    #  the child)
-    p.pty = pty
-    return p
-
-
 class Backup(object):
     class FileToBackup(object):
         def __init__(self, file_path, subdir=None, name=None):
@@ -549,18 +488,15 @@ class Backup(object):
             backup_id=self.backup_id,
         )
         backup_header.save(header_file_path)
-        # Start encrypt, scrypt will also handle integrity
-        # protection
-        scrypt_passphrase = u'{filename}!{passphrase}'.format(
-            filename=HEADER_FILENAME, passphrase=self.passphrase)
-        scrypt = launch_scrypt(
-            'enc', header_file_path, header_file_path + '.hmac',
-            scrypt_passphrase)
-
-        if scrypt.wait() != 0:
+
+        hmac = subprocess.Popen(
+            ["openssl", "dgst", "-" + self.hmac_algorithm,
+                "-hmac", self.passphrase],
+            stdin=open(header_file_path, "r"),
+            stdout=open(header_file_path + ".hmac", "w"))
+        if hmac.wait() != 0:
             raise qubes.exc.QubesException(
-                "Failed to compute hmac of header file: "
-                + scrypt.stderr.read())
+                "Failed to compute hmac of header file")
         return HEADER_FILENAME, HEADER_FILENAME + ".hmac"
 
 
@@ -714,43 +650,60 @@ class Backup(object):
 
                 self.log.debug(" ".join(tar_cmdline))
 
-                # Pipe: tar-sparse | scrypt | tar | backup_target
+                # Tips: Popen(bufsize=0)
+                # Pipe: tar-sparse | encryptor [| hmac] | tar | backup_target
+                # Pipe: tar-sparse [| hmac] | tar | backup_target
                 # TODO: log handle stderr
                 tar_sparse = subprocess.Popen(
-                    tar_cmdline)
+                    tar_cmdline, stdin=subprocess.PIPE)
                 self.processes_to_kill_on_cancel.append(tar_sparse)
 
                 # Wait for compressor (tar) process to finish or for any
                 # error of other subprocesses
                 i = 0
-                pipe = open(backup_pipe, 'rb')
                 run_error = "paused"
+                encryptor = None
+                if self.encrypted:
+                    # Start encrypt
+                    # If no cipher is provided,
+                    # the data is forwarded unencrypted !!!
+                    encryptor = subprocess.Popen([
+                        "openssl", "enc",
+                        "-e", "-" + self.crypto_algorithm,
+                        "-pass", "pass:" + passphrase],
+                        stdin=open(backup_pipe, 'rb'),
+                        stdout=subprocess.PIPE)
+                    pipe = encryptor.stdout
+                else:
+                    pipe = open(backup_pipe, 'rb')
                 while run_error == "paused":
+
+                    # Start HMAC
+                    hmac = subprocess.Popen([
+                        "openssl", "dgst", "-" + self.hmac_algorithm,
+                        "-hmac", passphrase],
+                        stdin=subprocess.PIPE,
+                        stdout=subprocess.PIPE)
+
                     # Prepare a first chunk
-                    chunkfile = backup_tempfile + ".%03d.enc" % i
+                    chunkfile = backup_tempfile + "." + "%03d" % i
                     i += 1
-
-                    # Start encrypt, scrypt will also handle integrity
-                    # protection
-                    scrypt_passphrase = \
-                        u'{backup_id}!{filename}!{passphrase}'.format(
-                            backup_id=self.backup_id,
-                            filename=os.path.relpath(chunkfile[:-4],
-                                self.tmpdir),
-                            passphrase=self.passphrase)
-                    scrypt = launch_scrypt(
-                        "enc", "-", chunkfile, scrypt_passphrase)
+                    chunkfile_p = open(chunkfile, 'wb')
 
                     run_error = handle_streams(
                         pipe,
-                        {'backup_target': scrypt.stdin},
-                        {'vmproc': vmproc,
+                        {'hmac_data': hmac.stdin,
+                         'backup_target': chunkfile_p,
+                        },
+                        {'hmac': hmac,
+                         'vmproc': vmproc,
                          'addproc': tar_sparse,
-                         'scrypt': scrypt,
+                         'streamproc': encryptor,
                         },
                         self.chunk_size,
                         self._add_vm_progress
                     )
+                    chunkfile_p.close()
 
                     self.log.debug(
                         "12 returned: {}".format(run_error))
@@ -760,7 +713,12 @@ class Backup(object):
                             tar_sparse.terminate()
                         except OSError:
                             pass
+                        try:
+                            hmac.terminate()
+                        except OSError:
+                            pass
                         tar_sparse.wait()
+                        hmac.wait()
                         to_send.put(QUEUE_ERROR)
                         send_proc.join()
                         shutil.rmtree(self.tmpdir)
@@ -776,16 +734,29 @@ class Backup(object):
                                 "Failed to perform backup: error in " +
                                 run_error)
 
-                    scrypt.stdin.close()
-                    scrypt.wait()
-                    self.log.debug("scrypt return code: {}".format(
-                        scrypt.poll()))
-
                     # Send the chunk to the backup target
                     self._queue_put_with_check(
                         send_proc, vmproc, to_send,
                         os.path.relpath(chunkfile, self.tmpdir))
 
+                    # Close HMAC
+                    hmac.stdin.close()
+                    hmac.wait()
+                    self.log.debug("HMAC proc return code: {}".format(
+                        hmac.poll()))
+
+                    # Write HMAC data next to the chunk file
+                    hmac_data = hmac.stdout.read()
+                    self.log.debug(
+                        "Writing hmac to {}.hmac".format(chunkfile))
+                    with open(chunkfile + ".hmac", 'w') as hmac_file:
+                        hmac_file.write(hmac_data)
+
+                    # Send the HMAC to the backup target
+                    self._queue_put_with_check(
+                        send_proc, vmproc, to_send,
+                        os.path.relpath(chunkfile, self.tmpdir) + ".hmac")
+
                     if tar_sparse.poll() is None or run_error == "size_limit":
                         run_error = "paused"
                     else:
@@ -1361,8 +1332,6 @@ def get_supported_hmac_algo(hmac_algorithm=None):
     # Start with provided default
     if hmac_algorithm:
         yield hmac_algorithm
-    if hmac_algorithm != 'scrypt':
-        yield 'scrypt'
     proc = subprocess.Popen(['openssl', 'list-message-digest-algorithms'],
                             stdout=subprocess.PIPE)
     for algo in proc.stdout.readlines():
@@ -1582,10 +1551,6 @@ class BackupRestore(object):
 
     def _verify_hmac(self, filename, hmacfile, algorithm=None):
         def load_hmac(hmac_text):
-            if filter(lambda x: ord(x) not in range(128),
-                    hmac_text):
-                raise qubes.exc.QubesException(
-                    "Invalid content of {}".format(hmacfile))
             hmac_text = hmac_text.strip().split("=")
             if len(hmac_text) > 1:
                 hmac_text = hmac_text[1].strip()
@@ -1604,17 +1569,6 @@ class BackupRestore(object):
                 "ERROR: expected hmac for {}, but got {}".
                 format(filename, hmacfile))
 
-        if algorithm == 'scrypt':
-            # in case of 'scrypt' _verify_hmac is only used for backup header
-            assert filename == HEADER_FILENAME
-            self._verify_and_decrypt(hmacfile, HEADER_FILENAME + '.dec')
-            if open(os.path.join(self.tmpdir, filename)).read() != \
-                    open(os.path.join(self.tmpdir, filename + '.dec')).read():
-                raise qubes.exc.QubesException(
-                    'Invalid hmac on {}'.format(filename))
-            else:
-                return True
-
         hmac_proc = subprocess.Popen(
             ["openssl", "dgst", "-" + algorithm, "-hmac", passphrase],
             stdin=open(os.path.join(self.tmpdir, filename), 'rb'),
@@ -1640,80 +1594,6 @@ class BackupRestore(object):
                     "Is the passphrase correct?".
                     format(filename, load_hmac(hmac_stdout)))
 
-    def _verify_and_decrypt(self, filename, output=None):
-        assert filename.endswith('.enc') or filename.endswith('.hmac')
-        fullname = os.path.join(self.tmpdir, filename)
-        (origname, _) = os.path.splitext(filename)
-        if output:
-            fulloutput = os.path.join(self.tmpdir, output)
-        else:
-            fulloutput = os.path.join(self.tmpdir, origname)
-        if origname == HEADER_FILENAME:
-            passphrase = u'{filename}!{passphrase}'.format(
-                filename=origname,
-                passphrase=self.passphrase)
-        else:
-            passphrase = u'{backup_id}!{filename}!{passphrase}'.format(
-                backup_id=self.header_data.backup_id,
-                filename=origname,
-                passphrase=self.passphrase)
-        p = launch_scrypt('dec', fullname, fulloutput, passphrase)
-        (_, stderr) = p.communicate()
-        if p.returncode != 0:
-            os.unlink(fulloutput)
-            raise qubes.exc.QubesException('failed to decrypt {}: {}'.format(
-                fullname, stderr))
-        # encrypted file is no longer needed
-        os.unlink(fullname)
-        return origname
-
-    def _retrieve_backup_header_files(self, files, allow_none=False):
-        (retrieve_proc, filelist_pipe, error_pipe) = \
-            self._start_retrieval_process(
-                files, len(files), 1024 * 1024)
-        filelist = filelist_pipe.read()
-        retrieve_proc_returncode = retrieve_proc.wait()
-        if retrieve_proc in self.processes_to_kill_on_cancel:
-            self.processes_to_kill_on_cancel.remove(retrieve_proc)
-        extract_stderr = error_pipe.read(MAX_STDERR_BYTES)
-
-        # wait for other processes (if any)
-        for proc in self.processes_to_kill_on_cancel:
-            if proc.wait() != 0:
-                raise qubes.exc.QubesException(
-                    "Backup header retrieval failed (exit code {})".format(
-                        proc.wait())
-                )
-
-        if retrieve_proc_returncode != 0:
-            if not filelist and 'Not found in archive' in extract_stderr:
-                if allow_none:
-                    return None
-                else:
-                    raise qubes.exc.QubesException(
-                        "unable to read the qubes backup file {0} ({1}): {2}".format(
-                            self.backup_location,
-                            retrieve_proc.wait(),
-                            extract_stderr
-                        ))
-        actual_files = filelist.splitlines()
-        if sorted(actual_files) != sorted(files):
-            raise qubes.exc.QubesException(
-                'unexpected files in archive: got {!r}, expeced {!r}'.format(
-                    actual_files, files
-                ))
-        for f in files:
-            if not os.path.exists(os.path.join(self.tmpdir, f)):
-                if allow_none:
-                    return None
-                else:
-                    raise qubes.exc.QubesException(
-                        'Unable to retrieve file {} from backup {}: {}'.format(
-                            f, self.backup_location, extract_stderr
-                        )
-                    )
-        return files
-
     def _retrieve_backup_header(self):
         """Retrieve backup header and qubes.xml. Only backup header is
         analyzed, qubes.xml is left as-is
@@ -1730,47 +1610,82 @@ class BackupRestore(object):
             header_data.version = 1
             return header_data
 
-        header_files = self._retrieve_backup_header_files(
-            ['backup-header', 'backup-header.hmac'], allow_none=True)
+        (retrieve_proc, filelist_pipe, error_pipe) = \
+            self._start_retrieval_process(
+                ['backup-header', 'backup-header.hmac',
+                'qubes.xml.000', 'qubes.xml.000.hmac'], 4, 1024 * 1024)
+
+        expect_tar_error = False
 
-        if not header_files:
-            # R2-Beta3 didn't have backup header, so if none is found,
-            # assume it's version=2 and use values present at that time
+        filename = filelist_pipe.readline().strip()
+        hmacfile = filelist_pipe.readline().strip()
+        # tar output filename before actually extracting it, so wait for the
+        # next one before trying to access it
+        if not self.backup_vm:
+            filelist_pipe.readline().strip()
+
+        self.log.debug("Got backup header and hmac: {}, {}".format(
+            filename, hmacfile))
+
+        if not filename or filename == "EOF" or \
+                not hmacfile or hmacfile == "EOF":
+            retrieve_proc.wait()
+            proc_error_msg = error_pipe.read(MAX_STDERR_BYTES)
+            raise qubes.exc.QubesException(
+                "Premature end of archive while receiving "
+                "backup header. Process output:\n" + proc_error_msg)
+        file_ok = False
+        hmac_algorithm = DEFAULT_HMAC_ALGORITHM
+        for hmac_algo in get_supported_hmac_algo(hmac_algorithm):
+            try:
+                if self._verify_hmac(filename, hmacfile, hmac_algo):
+                    file_ok = True
+                    hmac_algorithm = hmac_algo
+                    break
+            except qubes.exc.QubesException:
+                # Ignore exception here, try the next algo
+                pass
+        if not file_ok:
+            raise qubes.exc.QubesException(
+                "Corrupted backup header (hmac verification "
+                "failed). Is the password correct?")
+        if os.path.basename(filename) == HEADER_FILENAME:
+            filename = os.path.join(self.tmpdir, filename)
+            header_data = BackupHeader(open(filename, 'r').read())
+            os.unlink(filename)
+        else:
+            # if no header found, create one with guessed HMAC algo
             header_data = BackupHeader(
                 version=2,
+                hmac_algorithm=hmac_algorithm,
                 # place explicitly this value, because it is what format_version
                 # 2 have
-                hmac_algorithm='SHA1',
                 crypto_algorithm='aes-256-cbc',
                 # TODO: set encrypted to something...
             )
-        else:
-            filename = HEADER_FILENAME
-            hmacfile = HEADER_FILENAME + '.hmac'
-            self.log.debug("Got backup header and hmac: {}, {}".format(
-                filename, hmacfile))
-
-            file_ok = False
-            hmac_algorithm = DEFAULT_HMAC_ALGORITHM
-            for hmac_algo in get_supported_hmac_algo(hmac_algorithm):
-                try:
-                    if self._verify_hmac(filename, hmacfile, hmac_algo):
-                        file_ok = True
-                        break
-                except qubes.exc.QubesException as e:
-                    self.log.debug(
-                        'Failed to verify {} using {}: {}'.format(
-                            hmacfile, hmac_algo, str(e)))
-                    # Ignore exception here, try the next algo
-                    pass
-            if not file_ok:
+            # when tar do not find expected file in archive, it exit with
+            # code 2. This will happen because we've requested backup-header
+            # file, but the archive do not contain it. Ignore this particular
+            # error.
+            if not self.backup_vm:
+                expect_tar_error = True
+
+        if retrieve_proc.wait() != 0 and not expect_tar_error:
+            raise qubes.exc.QubesException(
+                "unable to read the qubes backup file {0} ({1}): {2}".format(
+                    self.backup_location,
+                    retrieve_proc.wait(),
+                    error_pipe.read(MAX_STDERR_BYTES)
+                ))
+        if retrieve_proc in self.processes_to_kill_on_cancel:
+            self.processes_to_kill_on_cancel.remove(retrieve_proc)
+        # wait for other processes (if any)
+        for proc in self.processes_to_kill_on_cancel:
+            if proc.wait() != 0:
                 raise qubes.exc.QubesException(
-                    "Corrupted backup header (hmac verification "
-                    "failed). Is the password correct?")
-            filename = os.path.join(self.tmpdir, filename)
-            header_data = BackupHeader(open(filename, 'r').read())
-            os.unlink(filename)
-
+                    "Backup header retrieval failed (exit code {})".format(
+                        proc.wait())
+                )
         return header_data
 
     def _start_inner_extraction_worker(self, queue, relocate):
@@ -1803,9 +1718,6 @@ class BackupRestore(object):
         elif format_version in [3, 4]:
             extractor_params['compression_filter'] = \
                 self.header_data.compression_filter
-            if format_version == 4:
-                # encryption already handled
-                extractor_params['encrypted'] = False
             extract_proc = ExtractWorker3(**extractor_params)
         else:
             raise NotImplementedError(
@@ -1824,14 +1736,7 @@ class BackupRestore(object):
                 offline_mode=True)
             return backup_app
         else:
-            if self.header_data.version in [2, 3]:
-                self._retrieve_backup_header_files(
-                    ['qubes.xml.000', 'qubes.xml.000.hmac'])
-                self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac")
-            else:
-                self._retrieve_backup_header_files(['qubes.xml.000.enc'])
-                self._verify_and_decrypt('qubes.xml.000.enc')
-
+            self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac")
             queue = Queue()
             queue.put("qubes.xml.000")
             queue.put(QUEUE_FINISHED)
@@ -1879,7 +1784,6 @@ class BackupRestore(object):
 
         try:
             filename = None
-            hmacfile = None
             nextfile = None
             while True:
                 if self.canceled:
@@ -1903,58 +1807,30 @@ class BackupRestore(object):
                 if not filename or filename == "EOF":
                     break
 
+                hmacfile = filelist_pipe.readline().strip()
+
+                if self.canceled:
+                    break
                 # if reading archive directly with tar, wait for next filename -
                 # tar prints filename before processing it, so wait for
                 # the next one to be sure that whole file was extracted
                 if not self.backup_vm:
                     nextfile = filelist_pipe.readline().strip()
 
-                if self.header_data.version in [2, 3]:
-                    if not self.backup_vm:
-                        hmacfile = nextfile
-                        nextfile = filelist_pipe.readline().strip()
-                    else:
-                        hmacfile = filelist_pipe.readline().strip()
-
-                    if self.canceled:
-                        break
-
-                    self.log.debug("Getting hmac:" + hmacfile)
-                    if not hmacfile or hmacfile == "EOF":
-                        # Premature end of archive, either of tar1_command or
-                        # vmproc exited with error
-                        break
-                else:  # self.header_data.version == 4
-                    if not filename.endswith('.enc'):
-                        raise qubes.exc.QubesException(
-                            'Invalid file extension found in archive: {}'.
-                            format(filename))
+                self.log.debug("Getting hmac:" + hmacfile)
+                if not hmacfile or hmacfile == "EOF":
+                    # Premature end of archive, either of tar1_command or
+                    # vmproc exited with error
+                    break
 
                 if not any(map(lambda x: filename.startswith(x), vms_dirs)):
                     self.log.debug("Ignoring VM not selected for restore")
                     os.unlink(os.path.join(self.tmpdir, filename))
-                    if hmacfile:
-                        os.unlink(os.path.join(self.tmpdir, hmacfile))
+                    os.unlink(os.path.join(self.tmpdir, hmacfile))
                     continue
 
-                if self.header_data.version in [2, 3]:
-                    self._verify_hmac(filename, hmacfile)
-                else:
-                    # _verify_and_decrypt will write output to a file with
-                    # '.enc' extension cut off. This is safe because:
-                    # - `scrypt` tool will override output, so if the file was
-                    # already there (received from the VM), it will be removed
-                    # - incoming archive extraction will refuse to override
-                    # existing file, so if `scrypt` already created one,
-                    # it can not be manipulated by the VM
-                    # - when the file is retrieved from the VM, it appears at
-                    # the final form - if it's visible, VM have no longer
-                    # influence over its content
-                    #
-                    # This all means that if the file was correctly verified
-                    # + decrypted, we will surely access the right file
-                    filename = self._verify_and_decrypt(filename)
-                to_extract.put(os.path.join(self.tmpdir, filename))
+                if self._verify_hmac(filename, hmacfile):
+                    to_extract.put(os.path.join(self.tmpdir, filename))
 
             if self.canceled:
                 raise BackupCanceledError("Restore canceled",

+ 0 - 1
rpm_spec/core-dom0.spec

@@ -83,7 +83,6 @@ Requires:       createrepo
 Requires:       gnome-packagekit
 Requires:       cronie
 Requires:       bsdtar
-Requires:       scrypt
 # for qubes-hcl-report
 Requires:       dmidecode
 Requires:       PyQt4