backups: implement compression in backup format 3 (#775)

Since tar multi-archive no longer used, we can simply instruct tar to
pipe output through gzip (or whatever compressor we want). Include used
compressor command in backup header.
This commit is contained in:
Marek Marczykowski-Górecki 2014-09-26 03:44:17 +02:00
parent fc0c0adff8
commit 0cd8281ac1

View File

@ -44,6 +44,7 @@ BACKUP_DEBUG = False
HEADER_FILENAME = 'backup-header' HEADER_FILENAME = 'backup-header'
DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc' DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc'
DEFAULT_HMAC_ALGORITHM = 'SHA512' DEFAULT_HMAC_ALGORITHM = 'SHA512'
DEFAULT_COMPRESSION_FILTER = 'gzip'
CURRENT_BACKUP_FORMAT_VERSION = '3' CURRENT_BACKUP_FORMAT_VERSION = '3'
# Maximum size of error message get from process stderr (including VM process) # Maximum size of error message get from process stderr (including VM process)
MAX_STDERR_BYTES = 1024 MAX_STDERR_BYTES = 1024
@ -68,6 +69,7 @@ class BackupHeader:
version = 'version' version = 'version'
encrypted = 'encrypted' encrypted = 'encrypted'
compressed = 'compressed' compressed = 'compressed'
compression_filter = 'compression-filter'
crypto_algorithm = 'crypto-algorithm' crypto_algorithm = 'crypto-algorithm'
hmac_algorithm = 'hmac-algorithm' hmac_algorithm = 'hmac-algorithm'
bool_options = ['encrypted', 'compressed'] bool_options = ['encrypted', 'compressed']
@ -391,7 +393,8 @@ class SendWorker(Process):
def prepare_backup_header(target_directory, passphrase, compressed=False, def prepare_backup_header(target_directory, passphrase, compressed=False,
encrypted=False, encrypted=False,
hmac_algorithm=DEFAULT_HMAC_ALGORITHM, hmac_algorithm=DEFAULT_HMAC_ALGORITHM,
crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM): crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM,
compression_filter=None):
header_file_path = os.path.join(target_directory, HEADER_FILENAME) header_file_path = os.path.join(target_directory, HEADER_FILENAME)
with open(header_file_path, "w") as f: with open(header_file_path, "w") as f:
f.write(str("%s=%s\n" % (BackupHeader.version, f.write(str("%s=%s\n" % (BackupHeader.version,
@ -401,6 +404,9 @@ def prepare_backup_header(target_directory, passphrase, compressed=False,
crypto_algorithm))) crypto_algorithm)))
f.write(str("%s=%s\n" % (BackupHeader.encrypted, str(encrypted)))) f.write(str("%s=%s\n" % (BackupHeader.encrypted, str(encrypted))))
f.write(str("%s=%s\n" % (BackupHeader.compressed, str(compressed)))) f.write(str("%s=%s\n" % (BackupHeader.compressed, str(compressed))))
if compressed:
f.write(str("%s=%s\n" % (BackupHeader.compression_filter,
str(compression_filter))))
hmac = subprocess.Popen (["openssl", "dgst", hmac = subprocess.Popen (["openssl", "dgst",
"-" + hmac_algorithm, "-hmac", passphrase], "-" + hmac_algorithm, "-hmac", passphrase],
@ -421,9 +427,10 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
for file in files_to_backup: for file in files_to_backup:
total_backup_sz += file["size"] total_backup_sz += file["size"]
if compressed and encrypted: if isinstance(compressed, str):
raise QubesException("Compressed and encrypted backups are not " compression_filter = compressed
"supported (yet).") else:
compression_filter = DEFAULT_COMPRESSION_FILTER
running_backup_operation = BackupOperationInfo() running_backup_operation = BackupOperationInfo()
vmproc = None vmproc = None
@ -479,10 +486,11 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
print "Will backup:", files_to_backup print "Will backup:", files_to_backup
header_files = prepare_backup_header(backup_tmpdir, passphrase, header_files = prepare_backup_header(backup_tmpdir, passphrase,
compressed=compressed, compressed=bool(compressed),
encrypted=encrypted, encrypted=encrypted,
hmac_algorithm=hmac_algorithm, hmac_algorithm=hmac_algorithm,
crypto_algorithm=crypto_algorithm) crypto_algorithm=crypto_algorithm,
compression_filter=compression_filter)
# Setup worker to send encrypted data chunks to the backup_target # Setup worker to send encrypted data chunks to the backup_target
def compute_progress(new_size, total_backup_sz): def compute_progress(new_size, total_backup_sz):
@ -524,6 +532,8 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
filename["subdir"]), filename["subdir"]),
os.path.basename(filename["path"]) os.path.basename(filename["path"])
] ]
if compressed:
tar_cmdline.insert(-1, "--use-compress-program=%s" % compression_filter)
if BACKUP_DEBUG: if BACKUP_DEBUG:
print " ".join(tar_cmdline) print " ".join(tar_cmdline)
@ -1024,13 +1034,14 @@ class ExtractWorker3(ExtractWorker2):
def __init__(self, queue, base_dir, passphrase, encrypted, total_size, def __init__(self, queue, base_dir, passphrase, encrypted, total_size,
print_callback, error_callback, progress_callback, vmproc=None, print_callback, error_callback, progress_callback, vmproc=None,
compressed=False, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM, compressed=False, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM,
verify_only=False): compression_filter = None, verify_only=False):
super(ExtractWorker3, self).__init__(queue, base_dir, passphrase, super(ExtractWorker3, self).__init__(queue, base_dir, passphrase,
encrypted, total_size, encrypted, total_size,
print_callback, error_callback, print_callback, error_callback,
progress_callback, vmproc, progress_callback, vmproc,
compressed, crypto_algorithm, compressed, crypto_algorithm,
verify_only) verify_only)
self.compression_filter = compression_filter
os.unlink(self.restore_pipe) os.unlink(self.restore_pipe)
def __run__(self): def __run__(self):
@ -1069,6 +1080,14 @@ class ExtractWorker3(ExtractWorker2):
'-%sk%s' % ("t" if self.verify_only else "x", '-%sk%s' % ("t" if self.verify_only else "x",
"v" if BACKUP_DEBUG else ""), "v" if BACKUP_DEBUG else ""),
os.path.relpath(filename.rstrip('.000'))] os.path.relpath(filename.rstrip('.000'))]
if self.compressed:
if self.compression_filter:
tar2_cmdline.insert(-1,
"--use-compress-program=%s" %
self.compression_filter)
else:
tar2_cmdline.insert(-1, "--use-compress-program=%s" %
DEFAULT_COMPRESSION_FILTER)
if BACKUP_DEBUG and callable(self.print_callback): if BACKUP_DEBUG and callable(self.print_callback):
self.print_callback("Running command "+ self.print_callback("Running command "+
@ -1201,7 +1220,8 @@ def restore_vm_dirs (backup_source, restore_tmpdir, passphrase, vms_dirs, vms,
progress_callback=None, encrypted=False, appvm=None, progress_callback=None, encrypted=False, appvm=None,
compressed = False, hmac_algorithm=DEFAULT_HMAC_ALGORITHM, compressed = False, hmac_algorithm=DEFAULT_HMAC_ALGORITHM,
crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM,
verify_only=False, format_version = CURRENT_BACKUP_FORMAT_VERSION): verify_only=False, format_version = CURRENT_BACKUP_FORMAT_VERSION,
compression_filter = None):
global running_backup_operation global running_backup_operation
@ -1322,6 +1342,8 @@ def restore_vm_dirs (backup_source, restore_tmpdir, passphrase, vms_dirs, vms,
compressed = header_data[BackupHeader.compressed] compressed = header_data[BackupHeader.compressed]
if BackupHeader.encrypted in header_data: if BackupHeader.encrypted in header_data:
encrypted = header_data[BackupHeader.encrypted] encrypted = header_data[BackupHeader.encrypted]
if BackupHeader.compression_filter in header_data:
compression_filter = header_data[BackupHeader.compression_filter]
os.unlink(filename) os.unlink(filename)
else: else:
# if no header found, create one with guessed HMAC algo # if no header found, create one with guessed HMAC algo
@ -1353,6 +1375,7 @@ def restore_vm_dirs (backup_source, restore_tmpdir, passphrase, vms_dirs, vms,
if format_version == 2: if format_version == 2:
extract_proc = ExtractWorker2(**extractor_params) extract_proc = ExtractWorker2(**extractor_params)
elif format_version == 3: elif format_version == 3:
extractor_params['compression_filter'] = compression_filter
extract_proc = ExtractWorker3(**extractor_params) extract_proc = ExtractWorker3(**extractor_params)
else: else:
raise NotImplemented("Backup format version %d not supported" % format_version) raise NotImplemented("Backup format version %d not supported" % format_version)
@ -1600,6 +1623,9 @@ def backup_restore_prepare(backup_location, passphrase, options = {},
crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM): crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM):
# Defaults # Defaults
backup_restore_set_defaults(options) backup_restore_set_defaults(options)
# Options introduced in backup format 3+, which always have a header,
# so no need for fallback in function parameter
compression_filter = DEFAULT_COMPRESSION_FILTER
#### Private functions begin #### Private functions begin
def is_vm_included_in_backup_v1 (backup_dir, vm): def is_vm_included_in_backup_v1 (backup_dir, vm):
@ -1671,6 +1697,8 @@ def backup_restore_prepare(backup_location, passphrase, options = {},
compressed = header_data[BackupHeader.compressed] compressed = header_data[BackupHeader.compressed]
if BackupHeader.encrypted in header_data: if BackupHeader.encrypted in header_data:
encrypted = header_data[BackupHeader.encrypted] encrypted = header_data[BackupHeader.encrypted]
if BackupHeader.compression_filter in header_data:
compression_filter = header_data[BackupHeader.compression_filter]
if BACKUP_DEBUG: if BACKUP_DEBUG:
print "Loading file", qubes_xml print "Loading file", qubes_xml
@ -1722,6 +1750,7 @@ def backup_restore_prepare(backup_location, passphrase, options = {},
options['passphrase'] = passphrase options['passphrase'] = passphrase
options['encrypted'] = encrypted options['encrypted'] = encrypted
options['compressed'] = compressed options['compressed'] = compressed
options['compression_filter'] = compression_filter
options['hmac_algorithm'] = hmac_algorithm options['hmac_algorithm'] = hmac_algorithm
options['crypto_algorithm'] = crypto_algorithm options['crypto_algorithm'] = crypto_algorithm
options['appvm'] = appvm options['appvm'] = appvm
@ -1889,6 +1918,7 @@ def backup_restore_do(restore_info,
passphrase = options['passphrase'] passphrase = options['passphrase']
encrypted = options['encrypted'] encrypted = options['encrypted']
compressed = options['compressed'] compressed = options['compressed']
compression_filter = options['compression_filter']
hmac_algorithm = options['hmac_algorithm'] hmac_algorithm = options['hmac_algorithm']
crypto_algorithm = options['crypto_algorithm'] crypto_algorithm = options['crypto_algorithm']
verify_only = options['verify-only'] verify_only = options['verify-only']
@ -1943,6 +1973,7 @@ def backup_restore_do(restore_info,
progress_callback=progress_callback, progress_callback=progress_callback,
encrypted=encrypted, encrypted=encrypted,
compressed=compressed, compressed=compressed,
compression_filter=compression_filter,
appvm=appvm) appvm=appvm)
except QubesException as e: except QubesException as e:
if verify_only: if verify_only: