backup: avoid deadlock when VM process fails

If SendWorker queue is full, check if that thread is still alive.
Otherwise it would deadlock on putting an entry to that queue.
This also requires that SendWorker must ensure that the main thread
isn't currently waiting for queue space when it fails. We can do this by
simply removing an entry from a queue - so on the next iteration
SendWorker would be already dead and main thread would notice it.
Getting an entry from queue in such (error) situation is harmless,
because other checks will notice it's an error condition.

Fixes QubesOS/qubes-issues#1359
This commit is contained in:
Marek Marczykowski-Górecki 2015-11-25 00:59:54 +01:00
parent c6df81ccff
commit 0c476f014d
No known key found for this signature in database
GPG Key ID: 063938BA42CFA724

View File

@ -400,6 +400,10 @@ class SendWorker(Process):
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
stdout=self.backup_stdout) stdout=self.backup_stdout)
if final_proc.wait() >= 2: if final_proc.wait() >= 2:
if self.queue.full():
# if queue is already full, remove some entry to wake up
# main thread, so it will be able to notice error
self.queue.get()
# handle only exit code 2 (tar fatal error) or # handle only exit code 2 (tar fatal error) or
# greater (call failed?) # greater (call failed?)
raise QubesException( raise QubesException(
@ -448,6 +452,17 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM): crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM):
global running_backup_operation global running_backup_operation
def queue_put_with_check(proc, vmproc, queue, element):
if queue.full():
if not proc.is_alive():
if vmproc:
message = ("Failed to write the backup, VM output:\n" +
vmproc.stderr.read())
else:
message = "Failed to write the backup. Out of disk space?"
raise QubesException(message)
queue.put(element)
total_backup_sz = 0 total_backup_sz = 0
passphrase = passphrase.encode('utf-8') passphrase = passphrase.encode('utf-8')
for f in files_to_backup: for f in files_to_backup:
@ -650,7 +665,9 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
run_error) run_error)
# Send the chunk to the backup target # Send the chunk to the backup target
to_send.put(os.path.relpath(chunkfile, backup_tmpdir)) queue_put_with_check(
send_proc, vmproc, to_send,
os.path.relpath(chunkfile, backup_tmpdir))
# Close HMAC # Close HMAC
hmac.stdin.close() hmac.stdin.close()
@ -668,7 +685,9 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
hmac_file.close() hmac_file.close()
# Send the HMAC to the backup target # Send the HMAC to the backup target
to_send.put(os.path.relpath(chunkfile, backup_tmpdir) + ".hmac") queue_put_with_check(
send_proc, vmproc, to_send,
os.path.relpath(chunkfile, backup_tmpdir) + ".hmac")
if tar_sparse.poll() is None or run_error == "size_limit": if tar_sparse.poll() is None or run_error == "size_limit":
run_error = "paused" run_error = "paused"
@ -680,7 +699,7 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
.poll() .poll()
pipe.close() pipe.close()
to_send.put("FINISHED") queue_put_with_check(send_proc, vmproc, to_send, "FINISHED")
send_proc.join() send_proc.join()
shutil.rmtree(backup_tmpdir) shutil.rmtree(backup_tmpdir)