12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009 |
- # -*- encoding: utf8 -*-
- #
- # The Qubes OS Project, http://www.qubes-os.org
- #
- # Copyright (C) 2017 Marek Marczykowski-Górecki
- # <marmarek@invisiblethingslab.com>
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU Lesser General Public License as published by
- # the Free Software Foundation; either version 2.1 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU Lesser General Public License for more details.
- #
- # You should have received a copy of the GNU Lesser General Public License along
- # with this program; if not, see <http://www.gnu.org/licenses/>.
- '''Backup restore module'''
- import errno
- import fcntl
- import functools
- import getpass
- import grp
- import logging
- import multiprocessing
- from multiprocessing import Queue, Process
- import os
- import pwd
- import re
- import shutil
- import subprocess
- import sys
- import tempfile
- import termios
- import time
- import qubesadmin
- import qubesadmin.vm
- from qubesadmin.backup import BackupVM
- from qubesadmin.backup.core2 import Core2Qubes
- from qubesadmin.backup.core3 import Core3Qubes
- from qubesadmin.devices import DeviceAssignment
- from qubesadmin.exc import QubesException
- from qubesadmin.utils import size_to_human
- # must be picklable
- QUEUE_FINISHED = "!!!FINISHED"
- QUEUE_ERROR = "!!!ERROR"
- HEADER_FILENAME = 'backup-header'
- DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc'
- # 'scrypt' is not exactly HMAC algorithm, but a tool we use to
- # integrity-protect the data
- DEFAULT_HMAC_ALGORITHM = 'scrypt'
- DEFAULT_COMPRESSION_FILTER = 'gzip'
- # Maximum size of error message get from process stderr (including VM process)
- MAX_STDERR_BYTES = 1024
- # header + qubes.xml max size
- HEADER_QUBES_XML_MAX_SIZE = 1024 * 1024
- # hmac file max size - regardless of backup format version!
- HMAC_MAX_SIZE = 4096
- BLKSIZE = 512
- _re_alphanum = re.compile(r'^[A-Za-z0-9-]*$')
- _tar_msg_re = re.compile(r".*#[0-9].*restore_pipe")
- _tar_file_size_re = re.compile(r"^[^ ]+ [^ ]+/[^ ]+ *([0-9]+) .*")
- class BackupCanceledError(QubesException):
- '''Exception raised when backup/restore was cancelled'''
- def __init__(self, msg, tmpdir=None):
- super(BackupCanceledError, self).__init__(msg)
- self.tmpdir = tmpdir
- class BackupHeader(object):
- '''Structure describing backup-header file included as the first file in
- backup archive
- '''
- header_keys = {
- 'version': 'version',
- 'encrypted': 'encrypted',
- 'compressed': 'compressed',
- 'compression-filter': 'compression_filter',
- 'crypto-algorithm': 'crypto_algorithm',
- 'hmac-algorithm': 'hmac_algorithm',
- 'backup-id': 'backup_id'
- }
- bool_options = ['encrypted', 'compressed']
- int_options = ['version']
- def __init__(self,
- header_data=None,
- version=None,
- encrypted=None,
- compressed=None,
- compression_filter=None,
- hmac_algorithm=None,
- crypto_algorithm=None,
- backup_id=None):
- # repeat the list to help code completion...
- self.version = version
- self.encrypted = encrypted
- self.compressed = compressed
- # Options introduced in backup format 3+, which always have a header,
- # so no need for fallback in function parameter
- self.compression_filter = compression_filter
- self.hmac_algorithm = hmac_algorithm
- self.crypto_algorithm = crypto_algorithm
- self.backup_id = backup_id
- if header_data is not None:
- self.load(header_data)
- def load(self, untrusted_header_text):
- """Parse backup header file.
- :param untrusted_header_text: header content
- :type untrusted_header_text: basestring
- .. warning::
- This function may be exposed to not yet verified header,
- so is security critical.
- """
- try:
- untrusted_header_text = untrusted_header_text.decode('ascii')
- except UnicodeDecodeError:
- raise QubesException(
- "Non-ASCII characters in backup header")
- for untrusted_line in untrusted_header_text.splitlines():
- if untrusted_line.count('=') != 1:
- raise QubesException("Invalid backup header")
- key, value = untrusted_line.strip().split('=', 1)
- if not _re_alphanum.match(key):
- raise QubesException("Invalid backup header ("
- "key)")
- if key not in self.header_keys.keys():
- # Ignoring unknown option
- continue
- if not _re_alphanum.match(value):
- raise QubesException("Invalid backup header ("
- "value)")
- if getattr(self, self.header_keys[key]) is not None:
- raise QubesException(
- "Duplicated header line: {}".format(key))
- if key in self.bool_options:
- value = value.lower() in ["1", "true", "yes"]
- elif key in self.int_options:
- value = int(value)
- setattr(self, self.header_keys[key], value)
- self.validate()
- def validate(self):
- '''Validate header data, according to header version'''
- if self.version == 1:
- # header not really present
- pass
- elif self.version in [2, 3, 4]:
- expected_attrs = ['version', 'encrypted', 'compressed',
- 'hmac_algorithm']
- if self.encrypted and self.version < 4:
- expected_attrs += ['crypto_algorithm']
- if self.version >= 3 and self.compressed:
- expected_attrs += ['compression_filter']
- if self.version >= 4:
- expected_attrs += ['backup_id']
- for key in expected_attrs:
- if getattr(self, key) is None:
- raise QubesException(
- "Backup header lack '{}' info".format(key))
- else:
- raise QubesException(
- "Unsupported backup version {}".format(self.version))
- def save(self, filename):
- '''Save backup header into a file'''
- with open(filename, "w") as f_header:
- # make sure 'version' is the first key
- f_header.write('version={}\n'.format(self.version))
- for key, attr in self.header_keys.items():
- if key == 'version':
- continue
- if getattr(self, attr) is None:
- continue
- f_header.write("{!s}={!s}\n".format(key, getattr(self, attr)))
- def launch_proc_with_pty(args, stdin=None, stdout=None, stderr=None, echo=True):
- """Similar to pty.fork, but handle stdin/stdout according to parameters
- instead of connecting to the pty
- :return tuple (subprocess.Popen, pty_master)
- """
- def set_ctty(ctty_fd, master_fd):
- '''Set controlling terminal'''
- os.setsid()
- os.close(master_fd)
- fcntl.ioctl(ctty_fd, termios.TIOCSCTTY, 0)
- if not echo:
- termios_p = termios.tcgetattr(ctty_fd)
- # termios_p.c_lflags
- termios_p[3] &= ~termios.ECHO
- termios.tcsetattr(ctty_fd, termios.TCSANOW, termios_p)
- (pty_master, pty_slave) = os.openpty()
- # pylint: disable=subprocess-popen-preexec-fn
- p = subprocess.Popen(args, stdin=stdin, stdout=stdout,
- stderr=stderr,
- preexec_fn=lambda: set_ctty(pty_slave, pty_master))
- os.close(pty_slave)
- return p, open(pty_master, 'wb+', buffering=0)
- def launch_scrypt(action, input_name, output_name, passphrase):
- '''
- Launch 'scrypt' process, pass passphrase to it and return
- subprocess.Popen object.
- :param action: 'enc' or 'dec'
- :param input_name: input path or '-' for stdin
- :param output_name: output path or '-' for stdout
- :param passphrase: passphrase
- :return: subprocess.Popen object
- '''
- command_line = ['scrypt', action, input_name, output_name]
- (p, pty) = launch_proc_with_pty(command_line,
- stdin=subprocess.PIPE if input_name == '-' else None,
- stdout=subprocess.PIPE if output_name == '-' else None,
- stderr=subprocess.PIPE,
- echo=False)
- if action == 'enc':
- prompts = (b'Please enter passphrase: ', b'Please confirm passphrase: ')
- else:
- prompts = (b'Please enter passphrase: ',)
- for prompt in prompts:
- actual_prompt = p.stderr.read(len(prompt))
- if actual_prompt != prompt:
- raise QubesException(
- 'Unexpected prompt from scrypt: {}'.format(actual_prompt))
- pty.write(passphrase.encode('utf-8') + b'\n')
- pty.flush()
- # save it here, so garbage collector would not close it (which would kill
- # the child)
- p.pty = pty
- return p
- def _fix_logging_lock_after_fork():
- """
- HACK:
- This is running in a child process, parent might hold some lock
- while fork was called (but will be released only in a parent
- process). This specifically applies to a logging module and
- results in a deadlock (if one is unlucky). "Fix" this by
- reinitialize a lock on all registered logging handlers
- just after a fork() call, until fixed upstream:
- https://bugs.python.org/issue6721
- """
- if not hasattr(logging, '_handlerList'):
- return
- # pylint: disable=protected-access
- for handler_ref in logging._handlerList:
- handler = handler_ref()
- if handler is None:
- continue
- if handler.lock:
- handler.lock = type(handler.lock)()
- class ExtractWorker3(Process):
- '''Process for handling inner tar layer of backup archive'''
- # pylint: disable=too-many-instance-attributes
- def __init__(self, queue, base_dir, passphrase, encrypted,
- progress_callback, vmproc=None,
- compressed=False, crypto_algorithm=DEFAULT_CRYPTO_ALGORITHM,
- compression_filter=None, verify_only=False, handlers=None):
- '''Start inner tar extraction worker
- The purpose of this class is to process files extracted from outer
- archive layer and pass to appropriate handlers. Input files are given
- through a queue. Insert :py:obj:`QUEUE_FINISHED` or
- :py:obj:`QUEUE_ERROR` to end data processing (either cleanly,
- or forcefully).
- Handlers are given as a map filename -> (data_func, size_func),
- where data_func is called with file-like object to process,
- and size_func is called with file size as argument. Note that
- data_func and size_func may be called simultaneusly, in a different
- processes.
- :param multiprocessing.Queue queue: a queue with filenames to
- process; those files needs to be given as full path, inside *base_dir*
- :param str base_dir: directory where all files to process live
- :param str passphrase: passphrase to decrypt the data
- :param bool encrypted: is encryption applied?
- :param callable progress_callback: report extraction progress
- :param subprocess.Popen vmproc: process extracting outer layer,
- given here to monitor
- it for failures (when it exits with non-zero exit code, inner layer
- processing is stopped)
- :param bool compressed: is the data compressed?
- :param str crypto_algorithm: encryption algorithm, either `scrypt` or an
- algorithm supported by openssl
- :param str compression_filter: compression program, `gzip` by default
- :param bool verify_only: only verify data integrity, do not extract
- :param dict handlers: handlers for actual data
- '''
- super(ExtractWorker3, self).__init__()
- #: queue with files to extract
- self.queue = queue
- #: paths on the queue are relative to this dir
- self.base_dir = base_dir
- #: passphrase to decrypt/authenticate data
- self.passphrase = passphrase
- #: handlers for files; it should be dict filename -> (data_function,
- # size_function),
- # where data_function will get file-like object as the only argument and
- # might be called in a separate process (multiprocessing.Process),
- # and size_function will get file size (when known) in bytes
- self.handlers = handlers
- #: is the backup encrypted?
- self.encrypted = encrypted
- #: is the backup compressed?
- self.compressed = compressed
- #: what crypto algorithm is used for encryption?
- self.crypto_algorithm = crypto_algorithm
- #: only verify integrity, don't extract anything
- self.verify_only = verify_only
- #: progress
- self.blocks_backedup = 0
- #: inner tar layer extraction (subprocess.Popen instance)
- self.tar2_process = None
- #: current inner tar archive name
- self.tar2_current_file = None
- #: cat process feeding tar2_process
- self.tar2_feeder = None
- #: decompressor subprocess.Popen instance
- self.decompressor_process = None
- #: decryptor subprocess.Popen instance
- self.decryptor_process = None
- #: data import multiprocessing.Process instance
- self.import_process = None
- #: callback reporting progress to UI
- self.progress_callback = progress_callback
- #: process (subprocess.Popen instance) feeding the data into
- # extraction tool
- self.vmproc = vmproc
- self.log = logging.getLogger('qubesadmin.backup.extract')
- self.stderr_encoding = sys.stderr.encoding or 'utf-8'
- self.tar2_stderr = []
- self.compression_filter = compression_filter
- def collect_tar_output(self):
- '''Retrieve tar stderr and handle it appropriately
- Log errors, process file size if requested.
- This use :py:attr:`tar2_process`.
- '''
- if not self.tar2_process.stderr:
- return
- if self.tar2_process.poll() is None:
- try:
- new_lines = self.tar2_process.stderr \
- .read(MAX_STDERR_BYTES).splitlines()
- except IOError as e:
- if e.errno == errno.EAGAIN:
- return
- raise
- else:
- new_lines = self.tar2_process.stderr.readlines()
- new_lines = [x.decode(self.stderr_encoding) for x in new_lines]
- debug_msg = [msg for msg in new_lines if _tar_msg_re.match(msg)]
- self.log.debug('tar2_stderr: %s', '\n'.join(debug_msg))
- new_lines = [msg for msg in new_lines if not _tar_msg_re.match(msg)]
- self.tar2_stderr += new_lines
- def run(self):
- try:
- _fix_logging_lock_after_fork()
- self.__run__()
- except Exception:
- # Cleanup children
- for process in [self.decompressor_process,
- self.decryptor_process,
- self.tar2_process]:
- if process:
- try:
- process.terminate()
- except OSError:
- pass
- process.wait()
- self.log.exception('ERROR')
- raise
- def handle_dir(self, dirname):
- ''' Relocate files in given director when it's already extracted
- :param dirname: directory path to handle (relative to backup root),
- without trailing slash
- '''
- for fname, (data_func, size_func) in self.handlers.items():
- if not fname.startswith(dirname + '/'):
- continue
- if not os.path.exists(fname):
- # for example firewall.xml
- continue
- if size_func is not None:
- size_func(os.path.getsize(fname))
- with open(fname, 'rb') as input_file:
- data_func(input_file)
- os.unlink(fname)
- shutil.rmtree(dirname)
- def cleanup_tar2(self, wait=True, terminate=False):
- '''Cleanup running :py:attr:`tar2_process`
- :param wait: wait for it termination, otherwise method exit early if
- process is still running
- :param terminate: terminate the process if still running
- '''
- if self.tar2_process is None:
- return
- if terminate:
- if self.import_process is not None:
- self.tar2_process.terminate()
- self.import_process.terminate()
- if wait:
- self.tar2_process.wait()
- if self.import_process is not None:
- self.import_process.join()
- elif self.tar2_process.poll() is None:
- return
- self.collect_tar_output()
- if self.tar2_process.stderr:
- self.tar2_process.stderr.close()
- if self.tar2_process.returncode != 0:
- self.log.error(
- "ERROR: unable to extract files for %s, tar "
- "output:\n %s",
- self.tar2_current_file,
- "\n ".join(self.tar2_stderr))
- else:
- # Finished extracting the tar file
- # if that was whole-directory archive, handle
- # relocated files now
- inner_name = self.tar2_current_file.rsplit('.', 1)[0] \
- .replace(self.base_dir + '/', '')
- if os.path.basename(inner_name) == '.':
- self.handle_dir(
- os.path.dirname(inner_name))
- self.tar2_current_file = None
- self.tar2_process = None
- def _data_import_wrapper(self, close_fds, data_func, size_func,
- tar2_process):
- '''Close not needed file descriptors, handle output size reported
- by tar (if needed) then call data_func(tar2_process.stdout).
- This is to prevent holding write end of a pipe in subprocess,
- preventing EOF transfer.
- '''
- for fd in close_fds:
- if fd in (tar2_process.stdout.fileno(),
- tar2_process.stderr.fileno()):
- continue
- try:
- os.close(fd)
- except OSError:
- pass
- # retrieve file size from tar's stderr; warning: we do
- # not read data from tar's stdout at this point, it will
- # hang if it tries to output file content before
- # reporting its size on stderr first
- if size_func:
- # process lines on stderr until we get file size
- # search for first file size reported by tar -
- # this is used only when extracting single-file archive, so don't
- # bother with checking file name
- # Also, this needs to be called before anything is retrieved
- # from tar stderr, otherwise the process may deadlock waiting for
- # size (at this point nothing is retrieving data from tar stdout
- # yet, so it will hang on write() when the output pipe fill up).
- while True:
- line = tar2_process.stderr.readline()
- if not line:
- self.log.warning('EOF from tar before got file size info')
- break
- line = line.decode()
- if _tar_msg_re.match(line):
- self.log.debug('tar2_stderr: %s', line)
- else:
- match = _tar_file_size_re.match(line)
- if match:
- file_size = int(match.groups()[0])
- size_func(file_size)
- break
- else:
- self.log.warning(
- 'unexpected tar output (no file size report): %s',
- line)
- return data_func(tar2_process.stdout)
- def feed_tar2(self, filename, input_pipe):
- '''Feed data from *filename* to *input_pipe*
- Start a cat process to do that (do not block this process). Cat
- subprocess instance will be in :py:attr:`tar2_feeder`
- '''
- assert self.tar2_feeder is None
- self.tar2_feeder = subprocess.Popen(['cat', filename],
- stdout=input_pipe)
- def check_processes(self, processes):
- '''Check if any process failed.
- And if so, wait for other relevant processes to cleanup.
- '''
- run_error = None
- for name, proc in processes.items():
- if proc is None:
- continue
- if isinstance(proc, Process):
- if not proc.is_alive() and proc.exitcode != 0:
- run_error = name
- break
- elif proc.poll():
- run_error = name
- break
- if run_error:
- if run_error == "target":
- self.collect_tar_output()
- details = "\n".join(self.tar2_stderr)
- else:
- details = "%s failed" % run_error
- if self.decryptor_process:
- self.decryptor_process.terminate()
- self.decryptor_process.wait()
- self.decryptor_process = None
- self.log.error('Error while processing \'%s\': %s',
- self.tar2_current_file, details)
- self.cleanup_tar2(wait=True, terminate=True)
- def __run__(self):
- self.log.debug("Started sending thread")
- self.log.debug("Moving to dir %s", self.base_dir)
- os.chdir(self.base_dir)
- filename = None
- input_pipe = None
- for filename in iter(self.queue.get, None):
- if filename in (QUEUE_FINISHED, QUEUE_ERROR):
- break
- assert isinstance(filename, str)
- self.log.debug("Extracting file %s", filename)
- if filename.endswith('.000'):
- # next file
- if self.tar2_process is not None:
- input_pipe.close()
- self.cleanup_tar2(wait=True, terminate=False)
- inner_name = filename[:-len('.000')].replace(
- self.base_dir + '/', '')
- redirect_stdout = None
- if os.path.basename(inner_name) == '.':
- if (inner_name in self.handlers or
- any(x.startswith(os.path.dirname(inner_name) + '/')
- for x in self.handlers)):
- tar2_cmdline = ['tar',
- '-%s' % ("t" if self.verify_only else "x"),
- inner_name]
- else:
- # ignore this directory
- tar2_cmdline = None
- elif os.path.dirname(inner_name) == "dom0-home":
- tar2_cmdline = ['cat']
- redirect_stdout = subprocess.PIPE
- elif inner_name in self.handlers:
- tar2_cmdline = ['tar',
- '-%svvO' % ("t" if self.verify_only else "x"),
- inner_name]
- redirect_stdout = subprocess.PIPE
- else:
- # no handlers for this file, ignore it
- tar2_cmdline = None
- if tar2_cmdline is None:
- # ignore the file
- os.remove(filename)
- continue
- tar_compress_cmd = None
- if self.compressed:
- if self.compression_filter:
- tar_compress_cmd = self.compression_filter
- else:
- tar_compress_cmd = DEFAULT_COMPRESSION_FILTER
- if os.path.dirname(inner_name) == "dom0-home":
- # Replaces 'cat' for compressed dom0-home!
- tar2_cmdline = [tar_compress_cmd, "-d"]
- else:
- tar2_cmdline.insert(-1, "--use-compress-program=%s " %
- tar_compress_cmd)
- self.log.debug("Running command %s", str(tar2_cmdline))
- if self.encrypted:
- # Start decrypt
- self.decryptor_process = subprocess.Popen(
- ["openssl", "enc",
- "-d",
- "-" + self.crypto_algorithm,
- "-pass",
- "pass:" + self.passphrase],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE)
- self.tar2_process = subprocess.Popen(
- tar2_cmdline,
- stdin=self.decryptor_process.stdout,
- stdout=redirect_stdout,
- stderr=subprocess.PIPE)
- self.decryptor_process.stdout.close()
- input_pipe = self.decryptor_process.stdin
- else:
- self.tar2_process = subprocess.Popen(
- tar2_cmdline,
- stdin=subprocess.PIPE,
- stdout=redirect_stdout,
- stderr=subprocess.PIPE)
- input_pipe = self.tar2_process.stdin
- self.feed_tar2(filename, input_pipe)
- if inner_name in self.handlers:
- assert redirect_stdout is subprocess.PIPE
- data_func, size_func = self.handlers[inner_name]
- self.import_process = multiprocessing.Process(
- target=self._data_import_wrapper,
- args=([input_pipe.fileno()],
- data_func, size_func, self.tar2_process))
- self.import_process.start()
- self.tar2_process.stdout.close()
- self.tar2_stderr = []
- elif not self.tar2_process:
- # Extracting of the current archive failed, skip to the next
- # archive
- os.remove(filename)
- continue
- else:
- # os.path.splitext fails to handle 'something/..000'
- (basename, ext) = self.tar2_current_file.rsplit('.', 1)
- previous_chunk_number = int(ext)
- expected_filename = basename + '.%03d' % (
- previous_chunk_number+1)
- if expected_filename != filename:
- self.cleanup_tar2(wait=True, terminate=True)
- self.log.error(
- 'Unexpected file in archive: %s, expected %s',
- filename, expected_filename)
- os.remove(filename)
- continue
- self.log.debug("Releasing next chunk")
- self.feed_tar2(filename, input_pipe)
- self.tar2_current_file = filename
- self.tar2_feeder.wait()
- # check if any process failed
- processes = {
- 'target': self.tar2_feeder,
- 'vmproc': self.vmproc,
- 'addproc': self.tar2_process,
- 'data_import': self.import_process,
- 'decryptor': self.decryptor_process,
- }
- self.check_processes(processes)
- self.tar2_feeder = None
- if callable(self.progress_callback):
- self.progress_callback(os.path.getsize(filename))
- # Delete the file as we don't need it anymore
- self.log.debug('Removing file %s', filename)
- os.remove(filename)
- if self.tar2_process is not None:
- input_pipe.close()
- if filename == QUEUE_ERROR:
- if self.decryptor_process:
- self.decryptor_process.terminate()
- self.decryptor_process.wait()
- self.decryptor_process = None
- self.cleanup_tar2(terminate=(filename == QUEUE_ERROR))
- self.log.debug('Finished extracting thread')
- def get_supported_hmac_algo(hmac_algorithm=None):
- '''Generate a list of supported hmac algorithms
- :param hmac_algorithm: default algorithm, if given, it is placed as a
- first element
- '''
- # Start with provided default
- if hmac_algorithm:
- yield hmac_algorithm
- if hmac_algorithm != 'scrypt':
- yield 'scrypt'
- proc = subprocess.Popen(
- 'openssl list-message-digest-algorithms || '
- 'openssl list -digest-algorithms',
- shell=True,
- stdout=subprocess.PIPE,
- stderr=subprocess.DEVNULL)
- try:
- for algo in proc.stdout.readlines():
- algo = algo.decode('ascii')
- if '=>' in algo:
- continue
- yield algo.strip()
- finally:
- proc.terminate()
- proc.wait()
- proc.stdout.close()
- class BackupRestoreOptions(object):
- '''Options for restore operation'''
- # pylint: disable=too-few-public-methods
- def __init__(self):
- #: use default NetVM if the one referenced in backup do not exists on
- # the host
- self.use_default_netvm = True
- #: set NetVM to "none" if the one referenced in backup do not exists
- # on the host
- self.use_none_netvm = False
- #: set template to default if the one referenced in backup do not
- # exists on the host
- self.use_default_template = True
- #: use default kernel if the one referenced in backup do not exists
- # on the host
- self.use_default_kernel = True
- #: restore dom0 home
- self.dom0_home = True
- #: restore dom0 home even if username is different
- self.ignore_username_mismatch = False
- #: do not restore data, only verify backup integrity
- self.verify_only = False
- #: automatically rename VM during restore, when it would conflict
- # with existing one
- self.rename_conflicting = True
- #: list of VM names to exclude
- self.exclude = []
- #: restore VMs into selected storage pool
- self.override_pool = None
- #: ignore size limit calculated from backup metadata
- self.ignore_size_limit = False
- class BackupRestore(object):
- """Usage:
- >>> restore_op = BackupRestore(...)
- >>> # adjust restore_op.options here
- >>> restore_info = restore_op.get_restore_info()
- >>> # manipulate restore_info to select VMs to restore here
- >>> restore_op.restore_do(restore_info)
- """
- class VMToRestore(object):
- '''Information about a single VM to be restored'''
- # pylint: disable=too-few-public-methods
- #: VM excluded from restore by user
- EXCLUDED = object()
- #: VM with such name already exists on the host
- ALREADY_EXISTS = object()
- #: NetVM used by the VM does not exists on the host
- MISSING_NETVM = object()
- #: TemplateVM used by the VM does not exists on the host
- MISSING_TEMPLATE = object()
- #: Kernel used by the VM does not exists on the host
- MISSING_KERNEL = object()
- def __init__(self, vm):
- assert isinstance(vm, BackupVM)
- self.vm = vm
- self.name = vm.name
- self.subdir = vm.backup_path
- self.size = vm.size
- self.problems = set()
- self.template = vm.template
- if vm.properties.get('netvm', None):
- self.netvm = vm.properties['netvm']
- else:
- self.netvm = None
- self.orig_template = None
- self.restored_vm = None
- @property
- def good_to_go(self):
- '''Is the VM ready for restore?'''
- return len(self.problems) == 0
- class Dom0ToRestore(VMToRestore):
- '''Information about dom0 home to restore'''
- # pylint: disable=too-few-public-methods
- #: backup was performed on system with different dom0 username
- USERNAME_MISMATCH = object()
- def __init__(self, vm, subdir=None):
- super(BackupRestore.Dom0ToRestore, self).__init__(vm)
- if subdir:
- self.subdir = subdir
- self.username = os.path.basename(subdir)
- def __init__(self, app, backup_location, backup_vm, passphrase):
- super(BackupRestore, self).__init__()
- #: qubes.Qubes instance
- self.app = app
- #: options how the backup should be restored
- self.options = BackupRestoreOptions()
- #: VM from which backup should be retrieved
- self.backup_vm = backup_vm
- if backup_vm and backup_vm.qid == 0:
- self.backup_vm = None
- #: backup path, inside VM pointed by :py:attr:`backup_vm`
- self.backup_location = backup_location
- #: passphrase protecting backup integrity and optionally decryption
- self.passphrase = passphrase
- #: temporary directory used to extract the data before moving to the
- # final location
- self.tmpdir = tempfile.mkdtemp(prefix="restore", dir="/var/tmp")
- #: list of processes (Popen objects) to kill on cancel
- self.processes_to_kill_on_cancel = []
- #: is the backup operation canceled
- self.canceled = False
- #: report restore progress, called with one argument - percents of
- # data restored
- # FIXME: convert to float [0,1]
- self.progress_callback = None
- self.log = logging.getLogger('qubesadmin.backup')
- #: basic information about the backup
- self.header_data = self._retrieve_backup_header()
- #: VMs included in the backup
- self.backup_app = self._process_qubes_xml()
- def _start_retrieval_process(self, filelist, limit_count, limit_bytes):
- """Retrieve backup stream and extract it to :py:attr:`tmpdir`
- :param filelist: list of files to extract; listing directory name
- will extract the whole directory; use empty list to extract the whole
- archive
- :param limit_count: maximum number of files to extract
- :param limit_bytes: maximum size of extracted data
- :return: a touple of (Popen object of started process, file-like
- object for reading extracted files list, file-like object for reading
- errors)
- """
- vmproc = None
- if self.backup_vm is not None:
- # If APPVM, STDOUT is a PIPE
- vmproc = self.backup_vm.run_service('qubes.Restore')
- vmproc.stdin.write(
- (self.backup_location.replace("\r", "").replace("\n",
- "") + "\n").encode())
- vmproc.stdin.flush()
- # Send to tar2qfile the VMs that should be extracted
- vmproc.stdin.write((" ".join(filelist) + "\n").encode())
- vmproc.stdin.flush()
- self.processes_to_kill_on_cancel.append(vmproc)
- backup_stdin = vmproc.stdout
- # FIXME use /usr/lib/qubes/qfile-unpacker in non-dom0
- tar1_command = ['/usr/libexec/qubes/qfile-dom0-unpacker',
- str(os.getuid()), self.tmpdir, '-v']
- else:
- backup_stdin = open(self.backup_location, 'rb')
- tar1_command = ['tar',
- '-ixv',
- '--occurrence=1',
- '-C', self.tmpdir] + filelist
- tar1_env = os.environ.copy()
- tar1_env['UPDATES_MAX_BYTES'] = str(limit_bytes)
- tar1_env['UPDATES_MAX_FILES'] = str(limit_count)
- self.log.debug("Run command %s", str(tar1_command))
- command = subprocess.Popen(
- tar1_command,
- stdin=backup_stdin,
- stdout=vmproc.stdin if vmproc else subprocess.PIPE,
- stderr=subprocess.PIPE,
- env=tar1_env)
- backup_stdin.close()
- self.processes_to_kill_on_cancel.append(command)
- # qfile-dom0-unpacker output filelist on stderr
- # and have stdout connected to the VM), while tar output filelist
- # on stdout
- if self.backup_vm:
- filelist_pipe = command.stderr
- # let qfile-dom0-unpacker hold the only open FD to the write end of
- # pipe, otherwise qrexec-client will not receive EOF when
- # qfile-dom0-unpacker terminates
- vmproc.stdin.close()
- else:
- filelist_pipe = command.stdout
- if self.backup_vm:
- error_pipe = vmproc.stderr
- else:
- error_pipe = command.stderr
- return command, filelist_pipe, error_pipe
- def _verify_hmac(self, filename, hmacfile, algorithm=None):
- '''Verify hmac of a file using given algorithm.
- If algorithm is not specified, use the one from backup header (
- :py:attr:`header_data`).
- Raise :py:exc:`QubesException` on failure, return :py:obj:`True` on
- success.
- 'scrypt' algorithm is supported only for header file; hmac file is
- encrypted (and integrity protected) version of plain header.
- :param filename: path to file to be verified
- :param hmacfile: path to hmac file for *filename*
- :param algorithm: override algorithm
- '''
- def load_hmac(hmac_text):
- '''Parse hmac output by openssl.
- Return just hmac, without filename and other metadata.
- '''
- if any(ord(x) not in range(128) for x in hmac_text):
- raise QubesException(
- "Invalid content of {}".format(hmacfile))
- hmac_text = hmac_text.strip().split("=")
- if len(hmac_text) > 1:
- hmac_text = hmac_text[1].strip()
- else:
- raise QubesException(
- "ERROR: invalid hmac file content")
- return hmac_text
- if algorithm is None:
- algorithm = self.header_data.hmac_algorithm
- passphrase = self.passphrase.encode('utf-8')
- self.log.debug("Verifying file %s", filename)
- if os.stat(os.path.join(self.tmpdir, hmacfile)).st_size > \
- HMAC_MAX_SIZE:
- raise QubesException('HMAC file {} too large'.format(
- hmacfile))
- if hmacfile != filename + ".hmac":
- raise QubesException(
- "ERROR: expected hmac for {}, but got {}".
- format(filename, hmacfile))
- if algorithm == 'scrypt':
- # in case of 'scrypt' _verify_hmac is only used for backup header
- assert filename == HEADER_FILENAME
- self._verify_and_decrypt(hmacfile, HEADER_FILENAME + '.dec')
- f_name = os.path.join(self.tmpdir, filename)
- with open(f_name, 'rb') as f_one:
- with open(f_name + '.dec', 'rb') as f_two:
- if f_one.read() != f_two.read():
- raise QubesException(
- 'Invalid hmac on {}'.format(filename))
- else:
- return True
- with open(os.path.join(self.tmpdir, filename), 'rb') as f_input:
- hmac_proc = subprocess.Popen(
- ["openssl", "dgst", "-" + algorithm, "-hmac", passphrase],
- stdin=f_input,
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- hmac_stdout, hmac_stderr = hmac_proc.communicate()
- if hmac_stderr:
- raise QubesException(
- "ERROR: verify file {0}: {1}".format(filename, hmac_stderr))
- else:
- self.log.debug("Loading hmac for file %s", filename)
- try:
- with open(os.path.join(self.tmpdir, hmacfile), 'r',
- encoding='ascii') as f_hmac:
- hmac = load_hmac(f_hmac.read())
- except UnicodeDecodeError as err:
- raise QubesException('Cannot load hmac file: ' + str(err))
- if hmac and load_hmac(hmac_stdout.decode('ascii')) == hmac:
- os.unlink(os.path.join(self.tmpdir, hmacfile))
- self.log.debug(
- "File verification OK -> Sending file %s", filename)
- return True
- raise QubesException(
- "ERROR: invalid hmac for file {0}: {1}. "
- "Is the passphrase correct?".
- format(filename, load_hmac(hmac_stdout.decode('ascii'))))
- def _verify_and_decrypt(self, filename, output=None):
- '''Handle scrypt-wrapped file
- Decrypt the file, and verify its integrity - both tasks handled by
- 'scrypt' tool. Filename (without extension) is also validated.
- :param filename: Input file name (relative to :py:attr:`tmpdir`),
- needs to have `.enc` or `.hmac` extension
- :param output: Output file name (relative to :py:attr:`tmpdir`),
- use :py:obj:`None` to use *filename* without extension
- :return: *filename* without extension
- '''
- assert filename.endswith('.enc') or filename.endswith('.hmac')
- fullname = os.path.join(self.tmpdir, filename)
- (origname, _) = os.path.splitext(filename)
- if output:
- fulloutput = os.path.join(self.tmpdir, output)
- else:
- fulloutput = os.path.join(self.tmpdir, origname)
- if origname == HEADER_FILENAME:
- passphrase = u'{filename}!{passphrase}'.format(
- filename=origname,
- passphrase=self.passphrase)
- else:
- passphrase = u'{backup_id}!{filename}!{passphrase}'.format(
- backup_id=self.header_data.backup_id,
- filename=origname,
- passphrase=self.passphrase)
- try:
- p = launch_scrypt('dec', fullname, fulloutput, passphrase)
- except OSError as err:
- raise QubesException('failed to decrypt {}: {!s}'.format(
- fullname, err))
- (_, stderr) = p.communicate()
- if hasattr(p, 'pty'):
- p.pty.close()
- if p.returncode != 0:
- os.unlink(fulloutput)
- raise QubesException('failed to decrypt {}: {}'.format(
- fullname, stderr))
- # encrypted file is no longer needed
- os.unlink(fullname)
- return origname
- def _retrieve_backup_header_files(self, files, allow_none=False):
- '''Retrieve backup header.
- Start retrieval process (possibly involving network access from
- another VM). Returns a collection of retrieved file paths.
- '''
- (retrieve_proc, filelist_pipe, error_pipe) = \
- self._start_retrieval_process(
- files, len(files), 1024 * 1024)
- filelist = filelist_pipe.read()
- filelist_pipe.close()
- retrieve_proc_returncode = retrieve_proc.wait()
- if retrieve_proc in self.processes_to_kill_on_cancel:
- self.processes_to_kill_on_cancel.remove(retrieve_proc)
- extract_stderr = error_pipe.read(MAX_STDERR_BYTES)
- error_pipe.close()
- # wait for other processes (if any)
- for proc in self.processes_to_kill_on_cancel:
- if proc.wait() != 0:
- raise QubesException(
- "Backup header retrieval failed (exit code {})".format(
- proc.wait())
- )
- if retrieve_proc_returncode != 0:
- if not filelist and 'Not found in archive' in extract_stderr:
- if allow_none:
- return None
- raise QubesException(
- "unable to read the qubes backup file {0} ({1}): {2}".
- format(
- self.backup_location,
- retrieve_proc.wait(),
- extract_stderr
- ))
- actual_files = filelist.decode('ascii').splitlines()
- if sorted(actual_files) != sorted(files):
- raise QubesException(
- 'unexpected files in archive: got {!r}, expected {!r}'.format(
- actual_files, files
- ))
- for fname in files:
- if not os.path.exists(os.path.join(self.tmpdir, fname)):
- if allow_none:
- return None
- raise QubesException(
- 'Unable to retrieve file {} from backup {}: {}'.format(
- fname, self.backup_location, extract_stderr
- )
- )
- return files
- def _retrieve_backup_header(self):
- """Retrieve backup header and qubes.xml. Only backup header is
- analyzed, qubes.xml is left as-is
- (not even verified/decrypted/uncompressed)
- :return header_data
- :rtype :py:class:`BackupHeader`
- """
- if not self.backup_vm and os.path.exists(
- os.path.join(self.backup_location, 'qubes.xml')):
- # backup format version 1 doesn't have header
- header_data = BackupHeader()
- header_data.version = 1
- return header_data
- header_files = self._retrieve_backup_header_files(
- ['backup-header', 'backup-header.hmac'], allow_none=True)
- if not header_files:
- # R2-Beta3 didn't have backup header, so if none is found,
- # assume it's version=2 and use values present at that time
- header_data = BackupHeader(
- version=2,
- # place explicitly this value, because it is what format_version
- # 2 have
- hmac_algorithm='SHA1',
- crypto_algorithm='aes-256-cbc',
- # TODO: set encrypted to something...
- )
- else:
- filename = HEADER_FILENAME
- hmacfile = HEADER_FILENAME + '.hmac'
- self.log.debug("Got backup header and hmac: %s, %s",
- filename, hmacfile)
- file_ok = False
- hmac_algorithm = DEFAULT_HMAC_ALGORITHM
- for hmac_algo in get_supported_hmac_algo(hmac_algorithm):
- try:
- if self._verify_hmac(filename, hmacfile, hmac_algo):
- file_ok = True
- break
- except QubesException as err:
- self.log.debug(
- 'Failed to verify %s using %s: %r',
- hmacfile, hmac_algo, err)
- # Ignore exception here, try the next algo
- if not file_ok:
- raise QubesException(
- "Corrupted backup header (hmac verification "
- "failed). Is the password correct?")
- filename = os.path.join(self.tmpdir, filename)
- with open(filename, 'rb') as f_header:
- header_data = BackupHeader(f_header.read())
- os.unlink(filename)
- return header_data
- def _start_inner_extraction_worker(self, queue, handlers):
- """Start a worker process, extracting inner layer of bacup archive,
- extract them to :py:attr:`tmpdir`.
- End the data by pushing QUEUE_FINISHED or QUEUE_ERROR to the queue.
- :param queue :py:class:`Queue` object to handle files from
- """
- # Setup worker to extract encrypted data chunks to the restore dirs
- # Create the process here to pass it options extracted from
- # backup header
- extractor_params = {
- 'queue': queue,
- 'base_dir': self.tmpdir,
- 'passphrase': self.passphrase,
- 'encrypted': self.header_data.encrypted,
- 'compressed': self.header_data.compressed,
- 'crypto_algorithm': self.header_data.crypto_algorithm,
- 'verify_only': self.options.verify_only,
- 'progress_callback': self.progress_callback,
- 'handlers': handlers,
- }
- self.log.debug(
- 'Starting extraction worker in %s, file handlers map: %s',
- self.tmpdir, repr(handlers))
- format_version = self.header_data.version
- if format_version in [3, 4]:
- extractor_params['compression_filter'] = \
- self.header_data.compression_filter
- if format_version == 4:
- # encryption already handled
- extractor_params['encrypted'] = False
- extract_proc = ExtractWorker3(**extractor_params)
- else:
- raise NotImplementedError(
- "Backup format version %d not supported" % format_version)
- extract_proc.start()
- return extract_proc
- @staticmethod
- def _save_qubes_xml(path, stream):
- '''Handler for qubes.xml.000 content - just save the data to a file'''
- with open(path, 'wb') as f_qubesxml:
- f_qubesxml.write(stream.read())
- def _process_qubes_xml(self):
- """Verify, unpack and load qubes.xml. Possibly convert its format if
- necessary. It expect that :py:attr:`header_data` is already populated,
- and :py:meth:`retrieve_backup_header` was called.
- """
- if self.header_data.version == 1:
- raise NotImplementedError('Backup format version 1 not supported')
- elif self.header_data.version in [2, 3]:
- self._retrieve_backup_header_files(
- ['qubes.xml.000', 'qubes.xml.000.hmac'])
- self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac")
- else:
- self._retrieve_backup_header_files(['qubes.xml.000.enc'])
- self._verify_and_decrypt('qubes.xml.000.enc')
- queue = Queue()
- queue.put("qubes.xml.000")
- queue.put(QUEUE_FINISHED)
- qubes_xml_path = os.path.join(self.tmpdir, 'qubes-restored.xml')
- handlers = {
- 'qubes.xml': (
- functools.partial(self._save_qubes_xml, qubes_xml_path),
- None)
- }
- extract_proc = self._start_inner_extraction_worker(queue, handlers)
- extract_proc.join()
- if extract_proc.exitcode != 0:
- raise QubesException(
- "unable to extract the qubes backup. "
- "Check extracting process errors.")
- if self.header_data.version in [2, 3]:
- backup_app = Core2Qubes(qubes_xml_path)
- elif self.header_data.version in [4]:
- backup_app = Core3Qubes(qubes_xml_path)
- else:
- raise QubesException(
- 'Unsupported qubes.xml format version: {}'.format(
- self.header_data.version))
- # Not needed anymore - all the data stored in backup_app
- os.unlink(qubes_xml_path)
- return backup_app
- def _restore_vm_data(self, vms_dirs, vms_size, handlers):
- '''Restore data of VMs
- :param vms_dirs: list of directories to extract (skip others)
- :param vms_size: expected size (abort if source stream exceed this
- value)
- :param handlers: handlers for restored files - see
- :py:class:`ExtractWorker3` for details
- '''
- # Currently each VM consists of at most 7 archives (count
- # file_to_backup calls in backup_prepare()), but add some safety
- # margin for further extensions. Each archive is divided into 100MB
- # chunks. Additionally each file have own hmac file. So assume upper
- # limit as 2*(10*COUNT_OF_VMS+TOTAL_SIZE/100MB)
- limit_count = str(2 * (10 * len(vms_dirs) +
- int(vms_size / (100 * 1024 * 1024))))
- if self.options.ignore_size_limit:
- limit_count = '0'
- vms_size = 0
- self.log.debug("Working in temporary dir: %s", self.tmpdir)
- self.log.info("Extracting data: %s to restore", size_to_human(vms_size))
- # retrieve backup from the backup stream (either VM, or dom0 file)
- (retrieve_proc, filelist_pipe, error_pipe) = \
- self._start_retrieval_process(
- vms_dirs, limit_count, vms_size)
- to_extract = Queue()
- # extract data retrieved by retrieve_proc
- extract_proc = self._start_inner_extraction_worker(
- to_extract, handlers)
- try:
- filename = None
- hmacfile = None
- nextfile = None
- while True:
- if self.canceled:
- break
- if not extract_proc.is_alive():
- retrieve_proc.terminate()
- retrieve_proc.wait()
- if retrieve_proc in self.processes_to_kill_on_cancel:
- self.processes_to_kill_on_cancel.remove(retrieve_proc)
- # wait for other processes (if any)
- for proc in self.processes_to_kill_on_cancel:
- proc.wait()
- break
- if nextfile is not None:
- filename = nextfile
- else:
- filename = filelist_pipe.readline().decode('ascii').strip()
- self.log.debug("Getting new file: %s", filename)
- if not filename or filename == "EOF":
- break
- # if reading archive directly with tar, wait for next filename -
- # tar prints filename before processing it, so wait for
- # the next one to be sure that whole file was extracted
- if not self.backup_vm:
- nextfile = filelist_pipe.readline().decode('ascii').strip()
- if self.header_data.version in [2, 3]:
- if not self.backup_vm:
- hmacfile = nextfile
- nextfile = filelist_pipe.readline().\
- decode('ascii').strip()
- else:
- hmacfile = filelist_pipe.readline().\
- decode('ascii').strip()
- if self.canceled:
- break
- self.log.debug("Getting hmac: %s", hmacfile)
- if not hmacfile or hmacfile == "EOF":
- # Premature end of archive, either of tar1_command or
- # vmproc exited with error
- break
- else: # self.header_data.version == 4
- if not filename.endswith('.enc'):
- raise qubesadmin.exc.QubesException(
- 'Invalid file extension found in archive: {}'.
- format(filename))
- if not any(filename.startswith(x) for x in vms_dirs):
- self.log.debug("Ignoring VM not selected for restore")
- os.unlink(os.path.join(self.tmpdir, filename))
- if hmacfile:
- os.unlink(os.path.join(self.tmpdir, hmacfile))
- continue
- if self.header_data.version in [2, 3]:
- self._verify_hmac(filename, hmacfile)
- else:
- # _verify_and_decrypt will write output to a file with
- # '.enc' extension cut off. This is safe because:
- # - `scrypt` tool will override output, so if the file was
- # already there (received from the VM), it will be removed
- # - incoming archive extraction will refuse to override
- # existing file, so if `scrypt` already created one,
- # it can not be manipulated by the VM
- # - when the file is retrieved from the VM, it appears at
- # the final form - if it's visible, VM have no longer
- # influence over its content
- #
- # This all means that if the file was correctly verified
- # + decrypted, we will surely access the right file
- filename = self._verify_and_decrypt(filename)
- if not self.options.verify_only:
- to_extract.put(os.path.join(self.tmpdir, filename))
- else:
- os.unlink(os.path.join(self.tmpdir, filename))
- if self.canceled:
- raise BackupCanceledError("Restore canceled",
- tmpdir=self.tmpdir)
- if retrieve_proc.wait() != 0:
- if retrieve_proc.returncode == errno.EDQUOT:
- raise QubesException(
- 'retrieved backup size exceed expected size, if you '
- 'believe this is ok, use --ignore-size-limit option')
- else:
- raise QubesException(
- "unable to read the qubes backup file {} ({}): {}"
- .format(self.backup_location,
- retrieve_proc.returncode, error_pipe.read(
- MAX_STDERR_BYTES)))
- # wait for other processes (if any)
- for proc in self.processes_to_kill_on_cancel:
- proc.wait()
- if proc.returncode != 0:
- raise QubesException(
- "Backup completed, "
- "but VM sending it reported an error (exit code {})".
- format(proc.returncode))
- if filename and filename != "EOF":
- raise QubesException(
- "Premature end of archive, the last file was %s" % filename)
- except:
- to_extract.put(QUEUE_ERROR)
- extract_proc.join()
- raise
- else:
- to_extract.put(QUEUE_FINISHED)
- finally:
- error_pipe.close()
- filelist_pipe.close()
- self.log.debug("Waiting for the extraction process to finish...")
- extract_proc.join()
- self.log.debug("Extraction process finished with code: %s",
- extract_proc.exitcode)
- if extract_proc.exitcode != 0:
- raise QubesException(
- "unable to extract the qubes backup. "
- "Check extracting process errors.")
- def new_name_for_conflicting_vm(self, orig_name, restore_info):
- '''Generate new name for conflicting VM
- Add a number suffix, until the name is unique. If no unique name can
- be found using this strategy, return :py:obj:`None`
- '''
- number = 1
- if len(orig_name) > 29:
- orig_name = orig_name[0:29]
- new_name = orig_name
- while (new_name in restore_info.keys() or
- new_name in [x.name for x in restore_info.values()] or
- new_name in self.app.domains):
- new_name = str('{}{}'.format(orig_name, number))
- number += 1
- if number == 100:
- # give up
- return None
- return new_name
- def restore_info_verify(self, restore_info):
- '''Verify restore info - validate VM dependencies, name conflicts
- etc.
- '''
- for vm in restore_info.keys():
- if vm in ['dom0']:
- continue
- vm_info = restore_info[vm]
- assert isinstance(vm_info, self.VMToRestore)
- vm_info.problems.clear()
- if vm in self.options.exclude:
- vm_info.problems.add(self.VMToRestore.EXCLUDED)
- if not self.options.verify_only and \
- vm_info.name in self.app.domains:
- if self.options.rename_conflicting:
- new_name = self.new_name_for_conflicting_vm(
- vm, restore_info
- )
- if new_name is not None:
- vm_info.name = new_name
- else:
- vm_info.problems.add(self.VMToRestore.ALREADY_EXISTS)
- else:
- vm_info.problems.add(self.VMToRestore.ALREADY_EXISTS)
- # check template
- if vm_info.template:
- present_on_host = False
- if vm_info.template in self.app.domains:
- host_tpl = self.app.domains[vm_info.template]
- if vm_info.vm.klass == 'DispVM':
- present_on_host = (
- getattr(host_tpl, 'template_for_dispvms', False))
- else:
- present_on_host = host_tpl.klass == 'TemplateVM'
- present_in_backup = False
- if vm_info.template in restore_info:
- bak_tpl = restore_info[vm_info.template]
- if bak_tpl.good_to_go:
- if vm_info.vm.klass == 'DispVM':
- present_in_backup = (
- bak_tpl.vm.properties.get(
- 'template_for_dispvms', False))
- else:
- present_in_backup = (
- bak_tpl.vm.klass == 'TemplateVM')
- self.log.debug(
- "vm=%s template=%s on_host=%s in_backup=%s",
- vm_info.name, vm_info.template,
- present_on_host, present_in_backup)
- if not present_on_host and not present_in_backup:
- if vm_info.vm.klass == 'DispVM':
- default_template = self.app.default_dispvm
- else:
- default_template = self.app.default_template
- if (self.options.use_default_template
- and default_template is not None):
- if vm_info.orig_template is None:
- vm_info.orig_template = vm_info.template
- vm_info.template = default_template.name
- self.log.debug(
- "vm=%s orig_template=%s -> default_template=%s",
- vm_info.name, vm_info.orig_template,
- default_template.name)
- else:
- vm_info.problems.add(self.VMToRestore.MISSING_TEMPLATE)
- # check netvm
- if vm_info.vm.properties.get('netvm', None) is not None:
- netvm_name = vm_info.netvm
- try:
- netvm_on_host = self.app.domains[netvm_name]
- except KeyError:
- netvm_on_host = None
- present_on_host = (netvm_on_host is not None
- and netvm_on_host.provides_network)
- present_in_backup = (netvm_name in restore_info.keys() and
- restore_info[netvm_name].good_to_go and
- restore_info[netvm_name].vm.properties.get(
- 'provides_network', False))
- if not present_on_host and not present_in_backup:
- if self.options.use_default_netvm:
- del vm_info.vm.properties['netvm']
- elif self.options.use_none_netvm:
- vm_info.netvm = None
- else:
- vm_info.problems.add(self.VMToRestore.MISSING_NETVM)
- return restore_info
- def get_restore_info(self):
- '''Get restore info
- Return information about what is included in the backup.
- That dictionary can be adjusted to select what VM should be restore.
- '''
- # Format versions:
- # 1 - Qubes R1, Qubes R2 beta1, beta2
- # 2 - Qubes R2 beta3+
- # 3 - Qubes R2+
- # 4 - Qubes R4+
- vms_to_restore = {}
- for vm in self.backup_app.domains.values():
- if vm.klass == 'AdminVM':
- # Handle dom0 as special case later
- continue
- if vm.included_in_backup:
- self.log.debug("%s is included in backup", vm.name)
- vms_to_restore[vm.name] = self.VMToRestore(vm)
- if vm.template is not None:
- templatevm_name = vm.template
- vms_to_restore[vm.name].template = templatevm_name
- vms_to_restore = self.restore_info_verify(vms_to_restore)
- # ...and dom0 home
- if self.options.dom0_home and \
- self.backup_app.domains['dom0'].included_in_backup:
- vm = self.backup_app.domains['dom0']
- vms_to_restore['dom0'] = self.Dom0ToRestore(vm,
- self.backup_app.domains['dom0'].backup_path)
- try:
- local_user = grp.getgrnam('qubes').gr_mem[0]
- except KeyError:
- # if no qubes group is present, assume username matches
- local_user = vms_to_restore['dom0'].username
- if vms_to_restore['dom0'].username != local_user:
- if not self.options.ignore_username_mismatch:
- vms_to_restore['dom0'].problems.add(
- self.Dom0ToRestore.USERNAME_MISMATCH)
- return vms_to_restore
- @staticmethod
- def get_restore_summary(restore_info):
- '''Return a ASCII formatted table with restore info summary'''
- fields = {
- "name": {'func': lambda vm: vm.name},
- "type": {'func': lambda vm: vm.klass},
- "template": {'func': lambda vm:
- 'n/a' if vm.template is None else vm.template},
- "netvm": {'func': lambda vm:
- '(default)' if 'netvm' not in vm.properties else
- '-' if vm.properties['netvm'] is None else
- vm.properties['netvm']},
- "label": {'func': lambda vm: vm.label},
- }
- fields_to_display = ['name', 'type', 'template',
- 'netvm', 'label']
- # First calculate the maximum width of each field we want to display
- total_width = 0
- for field in fields_to_display:
- fields[field]['max_width'] = len(field)
- for vm_info in restore_info.values():
- if vm_info.vm:
- # noinspection PyUnusedLocal
- field_len = len(str(fields[field]["func"](vm_info.vm)))
- if field_len > fields[field]['max_width']:
- fields[field]['max_width'] = field_len
- total_width += fields[field]['max_width']
- summary = ""
- summary += "The following VMs are included in the backup:\n"
- summary += "\n"
- # Display the header
- for field in fields_to_display:
- # noinspection PyTypeChecker
- fmt = "{{0:-^{0}}}-+".format(fields[field]["max_width"] + 1)
- summary += fmt.format('-')
- summary += "\n"
- for field in fields_to_display:
- # noinspection PyTypeChecker
- fmt = "{{0:>{0}}} |".format(fields[field]["max_width"] + 1)
- summary += fmt.format(field)
- summary += "\n"
- for field in fields_to_display:
- # noinspection PyTypeChecker
- fmt = "{{0:-^{0}}}-+".format(fields[field]["max_width"] + 1)
- summary += fmt.format('-')
- summary += "\n"
- for vm_info in restore_info.values():
- assert isinstance(vm_info, BackupRestore.VMToRestore)
- # Skip non-VM here
- if not vm_info.vm:
- continue
- # noinspection PyUnusedLocal
- summary_line = ""
- for field in fields_to_display:
- # noinspection PyTypeChecker
- fmt = "{{0:>{0}}} |".format(fields[field]["max_width"] + 1)
- summary_line += fmt.format(fields[field]["func"](vm_info.vm))
- if BackupRestore.VMToRestore.EXCLUDED in vm_info.problems:
- summary_line += " <-- Excluded from restore"
- elif BackupRestore.VMToRestore.ALREADY_EXISTS in vm_info.problems:
- summary_line += \
- " <-- A VM with the same name already exists on the host!"
- elif BackupRestore.VMToRestore.MISSING_TEMPLATE in \
- vm_info.problems:
- summary_line += " <-- No matching template on the host " \
- "or in the backup found!"
- elif BackupRestore.VMToRestore.MISSING_NETVM in \
- vm_info.problems:
- summary_line += " <-- No matching netvm on the host " \
- "or in the backup found!"
- elif vm_info.name == "dom0" and \
- BackupRestore.Dom0ToRestore.USERNAME_MISMATCH in \
- restore_info['dom0'].problems:
- summary_line += " <-- username in backup and dom0 mismatch"
- else:
- if vm_info.template != vm_info.vm.template:
- summary_line += " <-- Template change to '{}'".format(
- vm_info.template)
- if vm_info.name != vm_info.vm.name:
- summary_line += " <-- Will be renamed to '{}'".format(
- vm_info.name)
- summary += summary_line + "\n"
- return summary
- @staticmethod
- def _templates_first(vms):
- '''Sort templates before other VM types'''
- def key_function(instance):
- '''Key function for :py:func:`sorted`'''
- if isinstance(instance, BackupVM):
- if instance.klass == 'TemplateVM':
- return 0
- if instance.properties.get('template_for_dispvms', False):
- return 1
- return 2
- if hasattr(instance, 'vm'):
- return key_function(instance.vm)
- return 9
- return sorted(vms, key=key_function)
- def _handle_dom0(self, stream):
- '''Extract dom0 home'''
- try:
- local_user = grp.getgrnam('qubes').gr_mem[0]
- home_dir = pwd.getpwnam(local_user).pw_dir
- except KeyError:
- home_dir = os.path.expanduser('~')
- local_user = getpass.getuser()
- restore_home_backupdir = "home-restore-{0}".format(
- time.strftime("%Y-%m-%d-%H%M%S"))
- self.log.info("Restoring home of user '%s' to '%s' directory...",
- local_user, restore_home_backupdir)
- os.mkdir(os.path.join(home_dir, restore_home_backupdir))
- tar3_cmdline = ['tar', '-C',
- os.path.join(home_dir, restore_home_backupdir), '-x']
- retcode = subprocess.call(tar3_cmdline, stdin=stream)
- if retcode != 0:
- raise QubesException("Inner tar error for dom0-home")
- retcode = subprocess.call(['sudo', 'chown', '-R',
- local_user, os.path.join(home_dir, restore_home_backupdir)])
- if retcode != 0:
- self.log.error("*** Error while setting restore directory owner")
- def _handle_appmenus_list(self, vm, stream):
- '''Handle whitelisted-appmenus.list file'''
- try:
- subprocess.check_call(
- ['qvm-appmenus', '--set-whitelist=-', vm.name],
- stdin=stream)
- except (subprocess.CalledProcessError, FileNotFoundError):
- self.log.error('Failed to set application list for %s', vm.name)
- def _handle_volume_data(self, vm, volume, stream):
- '''Wrap volume data import with logging'''
- try:
- volume.import_data(stream)
- except Exception as err: # pylint: disable=broad-except
- self.log.error('Failed to restore volume %s of VM %s: %s',
- volume.name, vm.name, err)
- def _handle_volume_size(self, vm, volume, size):
- '''Wrap volume resize with logging'''
- try:
- if volume.size < size:
- volume.resize(size)
- except Exception as err: # pylint: disable=broad-except
- self.log.error('Failed to resize volume %s of VM %s to %d: %s',
- volume.name, vm.name, size, err)
- def restore_do(self, restore_info):
- '''
- High level workflow:
- 1. Create VMs object in host collection (qubes.xml)
- 2. Create them on disk (vm.create_on_disk)
- 3. Restore VM data, overriding/converting VM files
- 4. Apply possible fixups and save qubes.xml
- :param restore_info:
- :return:
- '''
- if self.header_data.version == 1:
- raise NotImplementedError('Backup format version 1 not supported')
- restore_info = self.restore_info_verify(restore_info)
- self._restore_vms_metadata(restore_info)
- # Perform VM restoration in backup order
- vms_dirs = []
- handlers = {}
- vms_size = 0
- for vm_info in self._templates_first(restore_info.values()):
- vm = vm_info.restored_vm
- if vm and vm_info.subdir:
- if isinstance(vm_info, self.Dom0ToRestore) and \
- vm_info.good_to_go:
- vms_dirs.append(os.path.dirname(vm_info.subdir))
- vms_size += int(vm_info.size)
- if self.options.verify_only:
- continue
- handlers[vm_info.subdir] = (self._handle_dom0, None)
- else:
- vms_size += int(vm_info.size)
- vms_dirs.append(vm_info.subdir)
- if self.options.verify_only:
- continue
- for name, volume in vm.volumes.items():
- if not volume.save_on_stop:
- continue
- data_func = functools.partial(
- self._handle_volume_data, vm, volume)
- size_func = functools.partial(
- self._handle_volume_size, vm, volume)
- img_path = os.path.join(vm_info.subdir, name + '.img')
- handlers[img_path] = (data_func, size_func)
- handlers[os.path.join(vm_info.subdir, 'firewall.xml')] = (
- functools.partial(vm_info.vm.handle_firewall_xml, vm),
- None)
- handlers[os.path.join(vm_info.subdir,
- 'whitelisted-appmenus.list')] = (
- functools.partial(self._handle_appmenus_list, vm), None)
- try:
- self._restore_vm_data(vms_dirs=vms_dirs, vms_size=vms_size,
- handlers=handlers)
- except QubesException as err:
- if self.options.verify_only:
- raise
- else:
- self.log.error('Error extracting data: %s', str(err))
- finally:
- if self.log.getEffectiveLevel() > logging.DEBUG:
- shutil.rmtree(self.tmpdir)
- if self.canceled:
- raise BackupCanceledError("Restore canceled",
- tmpdir=self.tmpdir)
- self.log.info("-> Done.")
- if not self.options.verify_only:
- self.log.info("-> Please install updates for all the restored "
- "templates.")
- def _restore_property(self, vm, prop, value):
- '''Restore a single VM property, logging exceptions'''
- try:
- setattr(vm, prop, value)
- except Exception as err: # pylint: disable=broad-except
- self.log.error('Error setting %s.%s to %s: %s',
- vm.name, prop, value, err)
- def _restore_vms_metadata(self, restore_info):
- '''Restore VM metadata
- Create VMs, set their properties etc.
- '''
- vms = {}
- for vm_info in restore_info.values():
- assert isinstance(vm_info, self.VMToRestore)
- if not vm_info.vm:
- continue
- if not vm_info.good_to_go:
- continue
- vm = vm_info.vm
- vms[vm.name] = vm
- # First load templates, then other VMs
- for vm in self._templates_first(vms.values()):
- if self.canceled:
- return
- if self.options.verify_only:
- self.log.info("-> Verifying %s...", vm.name)
- else:
- self.log.info("-> Restoring %s...", vm.name)
- kwargs = {}
- if vm.template:
- template = restore_info[vm.name].template
- # handle potentially renamed template
- if template in restore_info \
- and restore_info[template].good_to_go:
- template = restore_info[template].name
- kwargs['template'] = template
- new_vm = None
- vm_name = restore_info[vm.name].name
- if self.options.verify_only or vm.name == 'dom0':
- # can't create vm, but need backup info
- new_vm = self.backup_app.domains[vm_name]
- else:
- try:
- # first only create VMs, later setting may require other VMs
- # be already created
- new_vm = self.app.add_new_vm(
- vm.klass,
- name=vm_name,
- label=vm.label,
- pool=self.options.override_pool,
- **kwargs)
- except Exception as err: # pylint: disable=broad-except
- self.log.error('Error restoring VM %s, skipping: %s',
- vm.name, err)
- if new_vm:
- del self.app.domains[new_vm.name]
- continue
- # restore this property early to be ready for dependent DispVMs
- prop = 'template_for_dispvms'
- value = vm.properties.get(prop, None)
- if value is not None:
- self._restore_property(new_vm, prop, value)
- restore_info[vm.name].restored_vm = new_vm
- for vm in vms.values():
- if self.canceled:
- return
- new_vm = restore_info[vm.name].restored_vm
- if not new_vm:
- # skipped/failed
- continue
- for prop, value in vm.properties.items():
- # can't reset the first; already handled the second
- if prop in ['dispid', 'template_for_dispvms']:
- continue
- # exclude VM references - handled manually according to
- # restore options
- if prop in ['template', 'netvm', 'default_dispvm']:
- continue
- # exclude as this only applied before restoring
- if prop in ['installed_by_rpm']:
- continue
- self._restore_property(new_vm, prop, value)
- for feature, value in vm.features.items():
- try:
- new_vm.features[feature] = value
- except Exception as err: # pylint: disable=broad-except
- self.log.error('Error setting %s.features[%s] to %s: %s',
- vm.name, feature, value, err)
- for tag in vm.tags:
- try:
- new_vm.tags.add(tag)
- except Exception as err: # pylint: disable=broad-except
- self.log.error('Error adding tag %s to %s: %s',
- tag, vm.name, err)
- for bus in vm.devices:
- for backend_domain, ident in vm.devices[bus]:
- options = vm.devices[bus][(backend_domain, ident)]
- assignment = DeviceAssignment(
- backend_domain=backend_domain,
- ident=ident,
- options=options,
- persistent=True)
- try:
- new_vm.devices[bus].attach(assignment)
- except Exception as err: # pylint: disable=broad-except
- self.log.error('Error attaching device %s:%s to %s: %s',
- bus, ident, vm.name, err)
- # Set VM dependencies - only non-default setting
- for vm in vms.values():
- vm_info = restore_info[vm.name]
- vm_name = vm_info.name
- try:
- host_vm = self.app.domains[vm_name]
- except KeyError:
- # Failed/skipped VM
- continue
- if 'netvm' in vm.properties:
- if vm_info.netvm in restore_info:
- value = restore_info[vm_info.netvm].name
- else:
- value = vm_info.netvm
- try:
- host_vm.netvm = value
- except Exception as err: # pylint: disable=broad-except
- self.log.error('Error setting %s.%s to %s: %s',
- vm.name, 'netvm', value, err)
- if 'default_dispvm' in vm.properties:
- if vm.properties['default_dispvm'] in restore_info:
- value = restore_info[vm.properties[
- 'default_dispvm']].name
- else:
- value = vm.properties['default_dispvm']
- try:
- host_vm.default_dispvm = value
- except Exception as err: # pylint: disable=broad-except
- self.log.error('Error setting %s.%s to %s: %s',
- vm.name, 'default_dispvm', value, err)
|