vm: avoid starting the same VM multiple times simultaneously

While libvirt handle locking itself, there is also Qubes-specific
startup part. Especially starting qrexec-daemon and waiting until
qrexec-agent connect to it. When someone will attempt to start VM the
second time (or simply assume it's already running) - qrexec will not be
connected yet and the operation will fail. Solve the problem by wrapping
the whole vm.start() function with a lock, including a check if VM is
running and waiting for qrexec.

Also, do not throw exception if VM is already running.

This way, after a call to vm.start(), VM will be started with qrexec
connected - regardless of who really started it.
Note that, it will not solve the situation when someone check if VM is
running manually, like:

    if not vm.is_running():
        yield from vm.start()

Such code should be changed to simply:

    yield from vm.start()

Fixes QubesOS/qubes-issues#2001
Fixes QubesOS/qubes-issues#2666
This commit is contained in:
Marek Marczykowski-Górecki 2017-06-01 03:31:59 +02:00
parent 1f86c9253c
commit 018877a19c
No known key found for this signature in database
GPG Key ID: 063938BA42CFA724

View File

@ -704,6 +704,10 @@ class QubesVM(qubes.vm.mix.net.NetVMMixin, qubes.vm.BaseVM):
# will be initialized after loading all the properties # will be initialized after loading all the properties
#: operations which shouldn't happen simultaneously with qube startup
# (including another startup of the same qube)
self.startup_lock = asyncio.Lock()
# fire hooks # fire hooks
if xml is None: if xml is None:
self.events_enabled = True self.events_enabled = True
@ -841,88 +845,99 @@ class QubesVM(qubes.vm.mix.net.NetVMMixin, qubes.vm.BaseVM):
:param int mem_required: FIXME :param int mem_required: FIXME
''' '''
# Intentionally not used is_running(): eliminate also "Paused", with (yield from self.startup_lock):
# "Crashed", "Halting" # Intentionally not used is_running(): eliminate also "Paused",
if self.get_power_state() != 'Halted': # "Crashed", "Halting"
raise qubes.exc.QubesVMNotHaltedError(self) if self.get_power_state() != 'Halted':
return
self.log.info('Starting {}'.format(self.name)) self.log.info('Starting {}'.format(self.name))
self.fire_event_pre('domain-pre-start', preparing_dvm=preparing_dvm, self.fire_event_pre('domain-pre-start', preparing_dvm=preparing_dvm,
start_guid=start_guid, mem_required=mem_required) start_guid=start_guid, mem_required=mem_required)
yield from self.storage.verify() yield from self.storage.verify()
if self.netvm is not None: if self.netvm is not None:
# pylint: disable = no-member # pylint: disable = no-member
if self.netvm.qid != 0: if self.netvm.qid != 0:
if not self.netvm.is_running(): if not self.netvm.is_running():
yield from self.netvm.start(start_guid=start_guid, yield from self.netvm.start(start_guid=start_guid,
notify_function=notify_function) notify_function=notify_function)
# TODO: lock qmemman_client = yield from asyncio.get_event_loop().\
run_in_executor(None, self.request_memory, mem_required)
qmemman_client = yield from asyncio.get_event_loop().run_in_executor( try:
None, self.request_memory, mem_required) yield from self.storage.start()
self._update_libvirt_domain()
try: self.libvirt_domain.createWithFlags(
yield from self.storage.start() libvirt.VIR_DOMAIN_START_PAUSED)
self._update_libvirt_domain() finally:
if qmemman_client:
qmemman_client.close()
self.libvirt_domain.createWithFlags(libvirt.VIR_DOMAIN_START_PAUSED) try:
finally: self.fire_event('domain-spawn',
if qmemman_client: preparing_dvm=preparing_dvm, start_guid=start_guid)
qmemman_client.close()
try: self.log.info('Setting Qubes DB info for the VM')
self.fire_event('domain-spawn', yield from self.start_qubesdb()
preparing_dvm=preparing_dvm, start_guid=start_guid) self.create_qdb_entries()
self.log.info('Setting Qubes DB info for the VM') if preparing_dvm:
yield from self.start_qubesdb() self.qdb.write('/dvm', '1')
self.create_qdb_entries()
if preparing_dvm: self.log.warning('Activating the {} VM'.format(self.name))
self.qdb.write('/dvm', '1') self.libvirt_domain.resume()
self.log.warning('Activating the {} VM'.format(self.name)) # close() is not really needed, because the descriptor is
self.libvirt_domain.resume() # close-on-exec anyway, the reason to postpone close() is that
# possibly xl is not done constructing the domain after its main
# process exits so we close() when we know the domain is up the
# successful unpause is some indicator of it
if qmemman_client:
qmemman_client.close()
qmemman_client = None
# close() is not really needed, because the descriptor is #if self._start_guid_first and start_guid and not preparing_dvm \
# close-on-exec anyway, the reason to postpone close() is that # and os.path.exists('/var/run/shm.id'):
# possibly xl is not done constructing the domain after its main # self.start_guid()
# process exits so we close() when we know the domain is up the
# successful unpause is some indicator of it
if qmemman_client:
qmemman_client.close()
qmemman_client = None
# if self._start_guid_first and start_guid and not preparing_dvm \ if not preparing_dvm:
# and os.path.exists('/var/run/shm.id'): yield from self.start_qrexec_daemon()
# self.start_guid()
if not preparing_dvm: self.fire_event('domain-start',
yield from self.start_qrexec_daemon() preparing_dvm=preparing_dvm, start_guid=start_guid)
self.fire_event('domain-start', except: # pylint: disable=bare-except
preparing_dvm=preparing_dvm, start_guid=start_guid) if self.is_running() or self.is_paused():
# This avoids losing the exception if an exception is
except: # pylint: disable=bare-except # raised in self.force_shutdown(), because the vm is not
if self.is_running() or self.is_paused(): # running or paused
# This avoids losing the exception if an exception is raised in yield from self.kill() # pylint: disable=not-an-iterable
# self.force_shutdown(), because the vm is not running or paused raise
yield from self.kill() # pylint: disable=not-an-iterable finally:
raise if qmemman_client:
finally: qmemman_client.close()
if qmemman_client:
qmemman_client.close()
return self return self
@asyncio.coroutine
def on_domain_shutdown_coro(self):
'''Coroutine for executing cleanup after domain shutdown.
Do not allow domain to be started again until this finishes.
'''
with (yield from self.startup_lock):
yield from self.storage.stop()
@qubes.events.handler('domain-shutdown') @qubes.events.handler('domain-shutdown')
def on_domain_shutdown(self, _event, **_kwargs): def on_domain_shutdown(self, _event, **_kwargs):
'''Cleanup after domain shutdown''' '''Cleanup after domain shutdown'''
asyncio.ensure_future(self.storage.stop()) # TODO: ensure that domain haven't been started _before_ this
# coroutine got a chance to acquire a lock
asyncio.ensure_future(self.on_domain_shutdown_coro())
@asyncio.coroutine @asyncio.coroutine
def shutdown(self, force=False, wait=False): def shutdown(self, force=False, wait=False):