From 0c476f014d7014d93d7d2a12fce086f19a311a7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 25 Nov 2015 00:59:54 +0100 Subject: [PATCH] backup: avoid deadlock when VM process fails If SendWorker queue is full, check if that thread is still alive. Otherwise it would deadlock on putting an entry to that queue. This also requires that SendWorker must ensure that the main thread isn't currently waiting for queue space when it fails. We can do this by simply removing an entry from a queue - so on the next iteration SendWorker would be already dead and main thread would notice it. Getting an entry from queue in such (error) situation is harmless, because other checks will notice it's an error condition. Fixes QubesOS/qubes-issues#1359 --- core/backup.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/core/backup.py b/core/backup.py index 6b20212c..c84d7b39 100644 --- a/core/backup.py +++ b/core/backup.py @@ -400,6 +400,10 @@ class SendWorker(Process): stdin=subprocess.PIPE, stdout=self.backup_stdout) if final_proc.wait() >= 2: + if self.queue.full(): + # if queue is already full, remove some entry to wake up + # main thread, so it will be able to notice error + self.queue.get() # handle only exit code 2 (tar fatal error) or # greater (call failed?) raise QubesException( @@ -448,6 +452,17 @@ def backup_do(base_backup_dir, files_to_backup, passphrase, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM): global running_backup_operation + def queue_put_with_check(proc, vmproc, queue, element): + if queue.full(): + if not proc.is_alive(): + if vmproc: + message = ("Failed to write the backup, VM output:\n" + + vmproc.stderr.read()) + else: + message = "Failed to write the backup. Out of disk space?" + raise QubesException(message) + queue.put(element) + total_backup_sz = 0 passphrase = passphrase.encode('utf-8') for f in files_to_backup: @@ -650,7 +665,9 @@ def backup_do(base_backup_dir, files_to_backup, passphrase, run_error) # Send the chunk to the backup target - to_send.put(os.path.relpath(chunkfile, backup_tmpdir)) + queue_put_with_check( + send_proc, vmproc, to_send, + os.path.relpath(chunkfile, backup_tmpdir)) # Close HMAC hmac.stdin.close() @@ -668,7 +685,9 @@ def backup_do(base_backup_dir, files_to_backup, passphrase, hmac_file.close() # Send the HMAC to the backup target - to_send.put(os.path.relpath(chunkfile, backup_tmpdir) + ".hmac") + queue_put_with_check( + send_proc, vmproc, to_send, + os.path.relpath(chunkfile, backup_tmpdir) + ".hmac") if tar_sparse.poll() is None or run_error == "size_limit": run_error = "paused" @@ -680,7 +699,7 @@ def backup_do(base_backup_dir, files_to_backup, passphrase, .poll() pipe.close() - to_send.put("FINISHED") + queue_put_with_check(send_proc, vmproc, to_send, "FINISHED") send_proc.join() shutil.rmtree(backup_tmpdir)