Ver código fonte

Merge remote-tracking branch 'qubesos/pr/159'

* qubesos/pr/159:
  qubes/vm: Improve stopped event handling
Marek Marczykowski-Górecki 6 anos atrás
pai
commit
cf92a576ad
4 arquivos alterados com 126 adições e 37 exclusões
  1. 1 1
      qubes/app.py
  2. 0 1
      qubes/tests/integ/basic.py
  3. 12 16
      qubes/vm/dispvm.py
  4. 113 19
      qubes/vm/qubesvm.py

+ 1 - 1
qubes/app.py

@@ -1183,7 +1183,7 @@ class Qubes(qubes.PropertyHolder):
             return
 
         if event == libvirt.VIR_DOMAIN_EVENT_STOPPED:
-            vm.fire_event('domain-shutdown')
+            vm.on_libvirt_domain_stopped()
 
     @qubes.events.handler('domain-pre-delete')
     def on_domain_pre_deleted(self, event, vm):

+ 0 - 1
qubes/tests/integ/basic.py

@@ -137,7 +137,6 @@ class TC_00_Basic(qubes.tests.SystemTestCase):
             self.test_failure_reason = 'domain-shutdown event received twice'
         self.domain_shutdown_handled = True
 
-    @unittest.expectedFailure
     def test_201_shutdown_event_race(self):
         '''Regression test for 3164 - pure events edition'''
         vmname = self.make_vm_name('appvm')

+ 12 - 16
qubes/vm/dispvm.py

@@ -129,18 +129,18 @@ class DispVM(qubes.vm.qubesvm.QubesVM):
         raise qubes.exc.QubesValueError(self,
             'Cannot change template of Disposable VM')
 
+    @qubes.events.handler('domain-shutdown')
     @asyncio.coroutine
-    def on_domain_shutdown_coro(self):
-        '''Coroutine for executing cleanup after domain shutdown.
+    def on_domain_shutdown(self, _event, **_kwargs):
+        yield from self._auto_cleanup()
 
-        This override default action defined in QubesVM.on_domain_shutdown_coro
-        '''
-        with (yield from self.startup_lock):
-            yield from self.storage.stop()
-            if self.auto_cleanup and self in self.app.domains:
-                yield from self.remove_from_disk()
-                del self.app.domains[self]
-                self.app.save()
+    @asyncio.coroutine
+    def _auto_cleanup(self):
+        '''Do auto cleanup if enabled'''
+        if self.auto_cleanup and self in self.app.domains:
+            yield from self.remove_from_disk()
+            del self.app.domains[self]
+            self.app.save()
 
     @classmethod
     @asyncio.coroutine
@@ -206,10 +206,6 @@ class DispVM(qubes.vm.qubesvm.QubesVM):
 
             yield from super(DispVM, self).start(**kwargs)
         except:
-            # cleanup also on failed startup; there is potential race with
-            # self.on_domain_shutdown_coro, so check if wasn't already removed
-            if self.auto_cleanup and self in self.app.domains:
-                yield from self.remove_from_disk()
-                del self.app.domains[self]
-                self.app.save()
+            # Cleanup also on failed startup
+            yield from self._auto_cleanup()
             raise

+ 113 - 19
qubes/vm/qubesvm.py

@@ -154,9 +154,33 @@ class QubesVM(qubes.vm.mix.net.NetVMMixin, qubes.vm.BaseVM):
 
             *other arguments are as in :py:meth:`start`*
 
+        .. event:: domain-stopped (subject, event)
+
+            Fired when domain has been stopped.
+
+            This event is emitted before ``'domain-shutdown'`` and will trigger
+            the cleanup in QubesVM. So if you require that the cleanup has
+            already run use ``'domain-shutdown'``.
+
+            Note that you can receive this event as soon as you received
+            ``'domain-pre-start'``. This also can be emitted in case of a
+            startup failure, before or after ``'domain-start-failed'``.
+
+            Handler for this event can be asynchronous (a coroutine).
+
+            :param subject: Event emitter (the qube object)
+            :param event: Event name (``'domain-stopped'``)
+
         .. event:: domain-shutdown (subject, event)
 
-            Fired when domain has been shut down.
+            Fired when domain has been shut down. It is generated after
+            ``'domain-stopped'``.
+
+            Note that you can receive this event as soon as you received
+            ``'domain-pre-start'``. This also can be emitted in case of a
+            startup failure, before or after ``'domain-start-failed'``.
+
+            Handler for this event can be asynchronous (a coroutine).
 
             :param subject: Event emitter (the qube object)
             :param event: Event name (``'domain-shutdown'``)
@@ -646,6 +670,17 @@ class QubesVM(qubes.vm.mix.net.NetVMMixin, qubes.vm.BaseVM):
         self._libvirt_domain = None
         self._qdb_connection = None
 
+        # We assume a fully halted VM here. The 'domain-init' handler will
+        # check if the VM is already running.
+        self._domain_stopped_event_received = True
+        self._domain_stopped_event_handled = True
+
+        self._domain_stopped_future = None
+
+        # Internal lock to ensure ordering between _domain_stopped_coro() and
+        # start(). This should not be accessed anywhere else.
+        self._domain_stopped_lock = asyncio.Lock()
+
         if xml is None:
             # we are creating new VM and attributes came through kwargs
             assert hasattr(self, 'qid')
@@ -715,6 +750,8 @@ class QubesVM(qubes.vm.mix.net.NetVMMixin, qubes.vm.BaseVM):
 
         if not self.app.vmm.offline_mode and self.is_running():
             self.start_qdb_watch()
+            self._domain_stopped_event_received = False
+            self._domain_stopped_event_handled = False
 
     @qubes.events.handler('property-set:label')
     def on_property_set_label(self, event, name, newvalue, oldvalue=None):
@@ -800,6 +837,30 @@ class QubesVM(qubes.vm.mix.net.NetVMMixin, qubes.vm.BaseVM):
             if self.get_power_state() != 'Halted':
                 return
 
+            with (yield from self._domain_stopped_lock):
+                # Don't accept any new stopped event's till a new VM has been
+                # created. If we didn't received any stopped event or it wasn't
+                # handled yet we will handle this in the next lines.
+                self._domain_stopped_event_received = True
+
+                if self._domain_stopped_future is not None:
+                    # Libvirt stopped event was already received, so cancel the
+                    # future. If it didn't generate the Qubes events yet we
+                    # will do it below.
+                    self._domain_stopped_future.cancel()
+                    self._domain_stopped_future = None
+
+                if not self._domain_stopped_event_handled:
+                    # No Qubes domain-stopped events have been generated yet.
+                    # So do this now.
+
+                    # Set this immediately such that we don't generate events
+                    # twice if an exception gets thrown.
+                    self._domain_stopped_event_handled = True
+
+                    yield from self.fire_event_async('domain-stopped')
+                    yield from self.fire_event_async('domain-shutdown')
+
             self.log.info('Starting {}'.format(self.name))
 
             yield from self.fire_event_async('domain-pre-start',
@@ -819,14 +880,17 @@ class QubesVM(qubes.vm.mix.net.NetVMMixin, qubes.vm.BaseVM):
                 qmemman_client = yield from asyncio.get_event_loop().\
                     run_in_executor(None, self.request_memory, mem_required)
 
+                yield from self.storage.start()
+
             except Exception as exc:
                 # let anyone receiving domain-pre-start know that startup failed
                 yield from self.fire_event_async('domain-start-failed',
                     reason=str(exc))
+                if qmemman_client:
+                    qmemman_client.close()
                 raise
 
             try:
-                yield from self.storage.start()
                 self._update_libvirt_domain()
 
                 self.libvirt_domain.createWithFlags(
@@ -836,12 +900,16 @@ class QubesVM(qubes.vm.mix.net.NetVMMixin, qubes.vm.BaseVM):
                 # let anyone receiving domain-pre-start know that startup failed
                 yield from self.fire_event_async('domain-start-failed',
                     reason=str(exc))
+                self.storage.stop()
                 raise
 
             finally:
                 if qmemman_client:
                     qmemman_client.close()
 
+            self._domain_stopped_event_received = False
+            self._domain_stopped_event_handled = False
+
             try:
                 yield from self.fire_event_async('domain-spawn',
                     start_guid=start_guid)
@@ -873,24 +941,50 @@ class QubesVM(qubes.vm.mix.net.NetVMMixin, qubes.vm.BaseVM):
 
         return self
 
-    @asyncio.coroutine
-    def on_domain_shutdown_coro(self):
-        '''Coroutine for executing cleanup after domain shutdown.
-        Do not allow domain to be started again until this finishes.
+    def on_libvirt_domain_stopped(self):
+        ''' Handle VIR_DOMAIN_EVENT_STOPPED events from libvirt.
+
+        This is not a Qubes event handler. Instead we do some sanity checks
+        and synchronization with start() and then emits Qubes events.
         '''
-        with (yield from self.startup_lock):
-            try:
-                yield from self.storage.stop()
-            except qubes.storage.StoragePoolException:
-                self.log.exception('Failed to stop storage for domain %s',
-                    self.name)
-
-    @qubes.events.handler('domain-shutdown')
-    def on_domain_shutdown(self, _event, **_kwargs):
-        '''Cleanup after domain shutdown'''
-        # TODO: ensure that domain haven't been started _before_ this
-        # coroutine got a chance to acquire a lock
-        asyncio.ensure_future(self.on_domain_shutdown_coro())
+
+        state = self.get_power_state()
+        if state not in ['Halted', 'Crashed', 'Dying']:
+            self.log.warning('Stopped event from libvirt received,'
+                ' but domain is in state {}!'.format(state))
+            # ignore this unexpected event
+            return
+
+        if self._domain_stopped_event_received:
+            self.log.warning('Duplicated stopped event from libvirt received!')
+            # ignore this unexpected event
+            return
+
+        self._domain_stopped_event_received = True
+        self._domain_stopped_future = \
+            asyncio.ensure_future(self._domain_stopped_coro())
+
+    @asyncio.coroutine
+    def _domain_stopped_coro(self):
+        with (yield from self._domain_stopped_lock):
+            assert not self._domain_stopped_event_handled
+
+            # Set this immediately such that we don't generate events twice if
+            # an exception gets thrown.
+            self._domain_stopped_event_handled = True
+
+            yield from self.fire_event_async('domain-stopped')
+            yield from self.fire_event_async('domain-shutdown')
+
+    @qubes.events.handler('domain-stopped')
+    @asyncio.coroutine
+    def on_domain_stopped(self, _event, **_kwargs):
+        '''Cleanup after domain was stopped'''
+        try:
+            yield from self.storage.stop()
+        except qubes.storage.StoragePoolException:
+            self.log.exception('Failed to stop storage for domain %s',
+                self.name)
 
     @asyncio.coroutine
     def shutdown(self, force=False, wait=False):