From 181eb3e76414bd102be5c2004daca784a74ee010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 6 Jan 2016 00:40:10 +0100 Subject: [PATCH] qmemman: handle memory assigned to VM but not yet used When VM got some memory assigned, balloon driver may not pick it up immediatelly and the memory will still be seen as "free" by Xen, but VM can use (request) it at any time. Qmemman needs to take care of such memory (exclude it from "free" pool), otherwise it would redistribute it to other domains, allowing the original domain to drain Xen memory pool. Do this by redefining DomainState.memory_actual - it is now amount of memory available to the VM (currently used, or possibly used). Then calculate free memory by subtracting memory allocated but not used (memory_target-memory_current). Fixes QubesOS/qubes-issues#1389 --- qmemman/qmemman.py | 37 ++++++++++++++++++++++++++++--------- qmemman/qmemman_server.py | 8 ++++++++ 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/qmemman/qmemman.py b/qmemman/qmemman.py index 2d124ff6..01712375 100755 --- a/qmemman/qmemman.py +++ b/qmemman/qmemman.py @@ -37,7 +37,9 @@ slow_memset_react_msg="VM didn't give back all requested memory" class DomainState: def __init__(self, id): self.meminfo = None #dictionary of memory info read from client - self.memory_actual = None #the current memory size + self.memory_current = None #the current memory size + self.memory_actual = None # the current memory allocation (what VM + # is using or can use at any time) self.memory_maximum = None #the maximum memory size self.mem_used = None #used memory, computed based on meminfo self.id = id #domain id @@ -65,25 +67,42 @@ class SystemState(object): def add_domain(self, id): self.log.debug('add_domain(id={!r})'.format(id)) self.domdict[id] = DomainState(id) + # TODO: move to DomainState.__init__ + target_str = self.xs.read('', '/local/domain/' + id + '/memory/target') + if target_str: + self.domdict[id].last_target = int(target_str) * 1024 def del_domain(self, id): self.log.debug('del_domain(id={!r})'.format(id)) self.domdict.pop(id) def get_free_xen_memory(self): - return int(self.xc.physinfo()['free_memory']*1024 * self.MEM_OVERHEAD_FACTOR) -# hosts = self.xend_session.session.xenapi.host.get_all() -# host_record = self.xend_session.session.xenapi.host.get_record(hosts[0]) -# host_metrics_record = self.xend_session.session.xenapi.host_metrics.get_record(host_record["metrics"]) -# ret = host_metrics_record["memory_free"] -# return long(ret) + xen_free = int(self.xc.physinfo()['free_memory']*1024 * + self.MEM_OVERHEAD_FACTOR) + # now check for domains which have assigned more memory than really + # used - do not count it as "free", because domain is free to use it + # at any time + # assumption: self.refresh_memactual was called before + # (so domdict[id].memory_actual is up to date) + assigned_but_unused = reduce( + lambda acc, dom: acc + max(0, dom.last_target-dom.memory_current), + self.domdict.values(), + 0 + ) + return xen_free - assigned_but_unused #refresh information on memory assigned to all domains def refresh_memactual(self): for domain in self.xc.domain_getinfo(): id = str(domain['domid']) if self.domdict.has_key(id): - self.domdict[id].memory_actual = domain['mem_kb']*1024 + # real memory usage + self.domdict[id].memory_current = domain['mem_kb']*1024 + # what VM is using or can use + self.domdict[id].memory_actual = max( + self.domdict[id].memory_current, + self.domdict[id].last_target + ) self.domdict[id].memory_maximum = self.xs.read('', '/local/domain/%s/memory/static-max' % str(id)) if self.domdict[id].memory_maximum: self.domdict[id].memory_maximum = int(self.domdict[id].memory_maximum)*1024 @@ -272,11 +291,11 @@ class SystemState(object): self.log.debug('do_balance dom={!r} sleeping ntries={}'.format( dom, ntries)) time.sleep(self.BALOON_DELAY) + self.refresh_memactual() ntries -= 1 if ntries <= 0: # Waiting haven't helped; Find which domain get stuck and # abort balance (after distributing what we have) - self.refresh_memactual() for rq2 in memset_reqs: dom2, mem2 = rq2 if dom2 == dom: diff --git a/qmemman/qmemman_server.py b/qmemman/qmemman_server.py index c2b7e0e3..6e6fa94c 100755 --- a/qmemman/qmemman_server.py +++ b/qmemman/qmemman_server.py @@ -157,6 +157,14 @@ class QMemmanReqHandler(SocketServer.BaseRequestHandler): self.log.debug('data={!r}'.format(self.data)) if len(self.data) == 0: self.log.info('EOF') + # FIXME: there is a race condition here: if XS_Watcher will + # handle meminfo event before @introduceDomain, it will use + # incomplete domain list for that and may redistribute memory + # allocated to some VM, but not yet used (see #1389). + # To fix that, system_state should be updated (refresh domain + # list) before releasing the lock, but in the current code + # layout XS_Watcher instance isn't available here, + # so xenstore watches would not be registered if got_lock: global_lock.release() self.log.debug('global_lock released')