diff --git a/qmemman/qmemman_server.py b/qmemman/qmemman_server.py index 2af58956..7c295243 100755 --- a/qmemman/qmemman_server.py +++ b/qmemman/qmemman_server.py @@ -42,6 +42,15 @@ LOG_PATH='/var/log/qubes/qmemman.log' system_state = SystemState() global_lock = thread.allocate_lock() +# If XS_Watcher will +# handle meminfo event before @introduceDomain, it will use +# incomplete domain list for that and may redistribute memory +# allocated to some VM, but not yet used (see #1389). +# To fix that, system_state should be updated (refresh domain +# list) before processing other changes, every time some process requested +# memory for a new VM, before releasing the lock. Then XS_Watcher will check +# this flag before processing other event. +force_refresh_domain_list = False def only_in_first_list(l1, l2): ret=[] @@ -65,17 +74,30 @@ class XS_Watcher: self.log.debug('XS_Watcher()') self.handle = xen.lowlevel.xs.xs() - self.handle.watch('@introduceDomain', WatchType(XS_Watcher.domain_list_changed, None)) - self.handle.watch('@releaseDomain', WatchType(XS_Watcher.domain_list_changed, None)) + self.handle.watch('@introduceDomain', WatchType( + XS_Watcher.domain_list_changed, False)) + self.handle.watch('@releaseDomain', WatchType( + XS_Watcher.domain_list_changed, False)) self.watch_token_dict = {} + def domain_list_changed(self, refresh_only=False): + """ + Check if any domain was created/destroyed. If it was, update + appropriate list. Then redistribute memory. - def domain_list_changed(self, param): - self.log.debug('domain_list_changed(param={!r})'.format(param)) + :param refresh_only If True, only refresh domain list, do not + redistribute memory. In this mode, caller must already hold + global_lock. + """ + self.log.debug('domain_list_changed(only_refresh={!r})'.format( + refresh_only)) - self.log.debug('acquiring global_lock') - global_lock.acquire() - self.log.debug('global_lock acquired') + got_lock = False + if not refresh_only: + self.log.debug('acquiring global_lock') + global_lock.acquire() + got_lock = True + self.log.debug('global_lock acquired') try: curr = self.handle.ls('', '/local/domain') if curr is None: @@ -105,10 +127,12 @@ class XS_Watcher: self.watch_token_dict.pop(i) system_state.del_domain(i) finally: - global_lock.release() - self.log.debug('global_lock released') + if got_lock: + global_lock.release() + self.log.debug('global_lock released') - system_state.do_balance() + if not refresh_only: + system_state.do_balance() def meminfo_changed(self, domain_id): @@ -120,6 +144,8 @@ class XS_Watcher: self.log.debug('acquiring global_lock') global_lock.acquire() self.log.debug('global_lock acquired') + if force_refresh_domain_list: + self.domain_list_changed(refresh_only=True) system_state.refresh_meminfo(domain_id, untrusted_meminfo_key) @@ -155,15 +181,9 @@ class QMemmanReqHandler(SocketServer.BaseRequestHandler): self.log.debug('data={!r}'.format(self.data)) if len(self.data) == 0: self.log.info('EOF') - # FIXME: there is a race condition here: if XS_Watcher will - # handle meminfo event before @introduceDomain, it will use - # incomplete domain list for that and may redistribute memory - # allocated to some VM, but not yet used (see #1389). - # To fix that, system_state should be updated (refresh domain - # list) before releasing the lock, but in the current code - # layout XS_Watcher instance isn't available here, - # so xenstore watches would not be registered if got_lock: + global force_refresh_domain_list + force_refresh_domain_list = True global_lock.release() self.log.debug('global_lock released') return