backup: avoid deadlock when VM process fails
If SendWorker queue is full, check if that thread is still alive. Otherwise it would deadlock on putting an entry to that queue. This also requires that SendWorker must ensure that the main thread isn't currently waiting for queue space when it fails. We can do this by simply removing an entry from a queue - so on the next iteration SendWorker would be already dead and main thread would notice it. Getting an entry from queue in such (error) situation is harmless, because other checks will notice it's an error condition. Fixes QubesOS/qubes-issues#1359
This commit is contained in:
parent
c6df81ccff
commit
0c476f014d
@ -400,6 +400,10 @@ class SendWorker(Process):
|
|||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=self.backup_stdout)
|
stdout=self.backup_stdout)
|
||||||
if final_proc.wait() >= 2:
|
if final_proc.wait() >= 2:
|
||||||
|
if self.queue.full():
|
||||||
|
# if queue is already full, remove some entry to wake up
|
||||||
|
# main thread, so it will be able to notice error
|
||||||
|
self.queue.get()
|
||||||
# handle only exit code 2 (tar fatal error) or
|
# handle only exit code 2 (tar fatal error) or
|
||||||
# greater (call failed?)
|
# greater (call failed?)
|
||||||
raise QubesException(
|
raise QubesException(
|
||||||
@ -448,6 +452,17 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
|
|||||||
crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM):
|
crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM):
|
||||||
global running_backup_operation
|
global running_backup_operation
|
||||||
|
|
||||||
|
def queue_put_with_check(proc, vmproc, queue, element):
|
||||||
|
if queue.full():
|
||||||
|
if not proc.is_alive():
|
||||||
|
if vmproc:
|
||||||
|
message = ("Failed to write the backup, VM output:\n" +
|
||||||
|
vmproc.stderr.read())
|
||||||
|
else:
|
||||||
|
message = "Failed to write the backup. Out of disk space?"
|
||||||
|
raise QubesException(message)
|
||||||
|
queue.put(element)
|
||||||
|
|
||||||
total_backup_sz = 0
|
total_backup_sz = 0
|
||||||
passphrase = passphrase.encode('utf-8')
|
passphrase = passphrase.encode('utf-8')
|
||||||
for f in files_to_backup:
|
for f in files_to_backup:
|
||||||
@ -650,7 +665,9 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
|
|||||||
run_error)
|
run_error)
|
||||||
|
|
||||||
# Send the chunk to the backup target
|
# Send the chunk to the backup target
|
||||||
to_send.put(os.path.relpath(chunkfile, backup_tmpdir))
|
queue_put_with_check(
|
||||||
|
send_proc, vmproc, to_send,
|
||||||
|
os.path.relpath(chunkfile, backup_tmpdir))
|
||||||
|
|
||||||
# Close HMAC
|
# Close HMAC
|
||||||
hmac.stdin.close()
|
hmac.stdin.close()
|
||||||
@ -668,7 +685,9 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
|
|||||||
hmac_file.close()
|
hmac_file.close()
|
||||||
|
|
||||||
# Send the HMAC to the backup target
|
# Send the HMAC to the backup target
|
||||||
to_send.put(os.path.relpath(chunkfile, backup_tmpdir) + ".hmac")
|
queue_put_with_check(
|
||||||
|
send_proc, vmproc, to_send,
|
||||||
|
os.path.relpath(chunkfile, backup_tmpdir) + ".hmac")
|
||||||
|
|
||||||
if tar_sparse.poll() is None or run_error == "size_limit":
|
if tar_sparse.poll() is None or run_error == "size_limit":
|
||||||
run_error = "paused"
|
run_error = "paused"
|
||||||
@ -680,7 +699,7 @@ def backup_do(base_backup_dir, files_to_backup, passphrase,
|
|||||||
.poll()
|
.poll()
|
||||||
pipe.close()
|
pipe.close()
|
||||||
|
|
||||||
to_send.put("FINISHED")
|
queue_put_with_check(send_proc, vmproc, to_send, "FINISHED")
|
||||||
send_proc.join()
|
send_proc.join()
|
||||||
shutil.rmtree(backup_tmpdir)
|
shutil.rmtree(backup_tmpdir)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user