qmemman: fix race condition on starting new VM
Force refreshing domain list after starting new VM, even if the triggering watch wasn't about domain list change. Otherwise (with outdated domain list) memory allocated to a new VM, but not yet used may be redistributed (leaving to little to the Xen itself). Fixes QubesOS/qubes-issues#1389
This commit is contained in:
parent
411e8b5323
commit
5d36923170
@ -42,6 +42,15 @@ LOG_PATH='/var/log/qubes/qmemman.log'
|
||||
|
||||
system_state = SystemState()
|
||||
global_lock = thread.allocate_lock()
|
||||
# If XS_Watcher will
|
||||
# handle meminfo event before @introduceDomain, it will use
|
||||
# incomplete domain list for that and may redistribute memory
|
||||
# allocated to some VM, but not yet used (see #1389).
|
||||
# To fix that, system_state should be updated (refresh domain
|
||||
# list) before processing other changes, every time some process requested
|
||||
# memory for a new VM, before releasing the lock. Then XS_Watcher will check
|
||||
# this flag before processing other event.
|
||||
force_refresh_domain_list = False
|
||||
|
||||
def only_in_first_list(l1, l2):
|
||||
ret=[]
|
||||
@ -65,16 +74,29 @@ class XS_Watcher:
|
||||
self.log.debug('XS_Watcher()')
|
||||
|
||||
self.handle = xen.lowlevel.xs.xs()
|
||||
self.handle.watch('@introduceDomain', WatchType(XS_Watcher.domain_list_changed, None))
|
||||
self.handle.watch('@releaseDomain', WatchType(XS_Watcher.domain_list_changed, None))
|
||||
self.handle.watch('@introduceDomain', WatchType(
|
||||
XS_Watcher.domain_list_changed, False))
|
||||
self.handle.watch('@releaseDomain', WatchType(
|
||||
XS_Watcher.domain_list_changed, False))
|
||||
self.watch_token_dict = {}
|
||||
|
||||
def domain_list_changed(self, refresh_only=False):
|
||||
"""
|
||||
Check if any domain was created/destroyed. If it was, update
|
||||
appropriate list. Then redistribute memory.
|
||||
|
||||
def domain_list_changed(self, param):
|
||||
self.log.debug('domain_list_changed(param={!r})'.format(param))
|
||||
:param refresh_only If True, only refresh domain list, do not
|
||||
redistribute memory. In this mode, caller must already hold
|
||||
global_lock.
|
||||
"""
|
||||
self.log.debug('domain_list_changed(only_refresh={!r})'.format(
|
||||
refresh_only))
|
||||
|
||||
got_lock = False
|
||||
if not refresh_only:
|
||||
self.log.debug('acquiring global_lock')
|
||||
global_lock.acquire()
|
||||
got_lock = True
|
||||
self.log.debug('global_lock acquired')
|
||||
try:
|
||||
curr = self.handle.ls('', '/local/domain')
|
||||
@ -105,9 +127,11 @@ class XS_Watcher:
|
||||
self.watch_token_dict.pop(i)
|
||||
system_state.del_domain(i)
|
||||
finally:
|
||||
if got_lock:
|
||||
global_lock.release()
|
||||
self.log.debug('global_lock released')
|
||||
|
||||
if not refresh_only:
|
||||
system_state.do_balance()
|
||||
|
||||
|
||||
@ -120,6 +144,8 @@ class XS_Watcher:
|
||||
self.log.debug('acquiring global_lock')
|
||||
global_lock.acquire()
|
||||
self.log.debug('global_lock acquired')
|
||||
if force_refresh_domain_list:
|
||||
self.domain_list_changed(refresh_only=True)
|
||||
|
||||
system_state.refresh_meminfo(domain_id, untrusted_meminfo_key)
|
||||
|
||||
@ -155,15 +181,9 @@ class QMemmanReqHandler(SocketServer.BaseRequestHandler):
|
||||
self.log.debug('data={!r}'.format(self.data))
|
||||
if len(self.data) == 0:
|
||||
self.log.info('EOF')
|
||||
# FIXME: there is a race condition here: if XS_Watcher will
|
||||
# handle meminfo event before @introduceDomain, it will use
|
||||
# incomplete domain list for that and may redistribute memory
|
||||
# allocated to some VM, but not yet used (see #1389).
|
||||
# To fix that, system_state should be updated (refresh domain
|
||||
# list) before releasing the lock, but in the current code
|
||||
# layout XS_Watcher instance isn't available here,
|
||||
# so xenstore watches would not be registered
|
||||
if got_lock:
|
||||
global force_refresh_domain_list
|
||||
force_refresh_domain_list = True
|
||||
global_lock.release()
|
||||
self.log.debug('global_lock released')
|
||||
return
|
||||
|
Loading…
Reference in New Issue
Block a user