diff --git a/qmemman/qmemman.py b/qmemman/qmemman.py index 2d124ff6..01712375 100755 --- a/qmemman/qmemman.py +++ b/qmemman/qmemman.py @@ -37,7 +37,9 @@ slow_memset_react_msg="VM didn't give back all requested memory" class DomainState: def __init__(self, id): self.meminfo = None #dictionary of memory info read from client - self.memory_actual = None #the current memory size + self.memory_current = None #the current memory size + self.memory_actual = None # the current memory allocation (what VM + # is using or can use at any time) self.memory_maximum = None #the maximum memory size self.mem_used = None #used memory, computed based on meminfo self.id = id #domain id @@ -65,25 +67,42 @@ class SystemState(object): def add_domain(self, id): self.log.debug('add_domain(id={!r})'.format(id)) self.domdict[id] = DomainState(id) + # TODO: move to DomainState.__init__ + target_str = self.xs.read('', '/local/domain/' + id + '/memory/target') + if target_str: + self.domdict[id].last_target = int(target_str) * 1024 def del_domain(self, id): self.log.debug('del_domain(id={!r})'.format(id)) self.domdict.pop(id) def get_free_xen_memory(self): - return int(self.xc.physinfo()['free_memory']*1024 * self.MEM_OVERHEAD_FACTOR) -# hosts = self.xend_session.session.xenapi.host.get_all() -# host_record = self.xend_session.session.xenapi.host.get_record(hosts[0]) -# host_metrics_record = self.xend_session.session.xenapi.host_metrics.get_record(host_record["metrics"]) -# ret = host_metrics_record["memory_free"] -# return long(ret) + xen_free = int(self.xc.physinfo()['free_memory']*1024 * + self.MEM_OVERHEAD_FACTOR) + # now check for domains which have assigned more memory than really + # used - do not count it as "free", because domain is free to use it + # at any time + # assumption: self.refresh_memactual was called before + # (so domdict[id].memory_actual is up to date) + assigned_but_unused = reduce( + lambda acc, dom: acc + max(0, dom.last_target-dom.memory_current), + self.domdict.values(), + 0 + ) + return xen_free - assigned_but_unused #refresh information on memory assigned to all domains def refresh_memactual(self): for domain in self.xc.domain_getinfo(): id = str(domain['domid']) if self.domdict.has_key(id): - self.domdict[id].memory_actual = domain['mem_kb']*1024 + # real memory usage + self.domdict[id].memory_current = domain['mem_kb']*1024 + # what VM is using or can use + self.domdict[id].memory_actual = max( + self.domdict[id].memory_current, + self.domdict[id].last_target + ) self.domdict[id].memory_maximum = self.xs.read('', '/local/domain/%s/memory/static-max' % str(id)) if self.domdict[id].memory_maximum: self.domdict[id].memory_maximum = int(self.domdict[id].memory_maximum)*1024 @@ -272,11 +291,11 @@ class SystemState(object): self.log.debug('do_balance dom={!r} sleeping ntries={}'.format( dom, ntries)) time.sleep(self.BALOON_DELAY) + self.refresh_memactual() ntries -= 1 if ntries <= 0: # Waiting haven't helped; Find which domain get stuck and # abort balance (after distributing what we have) - self.refresh_memactual() for rq2 in memset_reqs: dom2, mem2 = rq2 if dom2 == dom: diff --git a/qmemman/qmemman_server.py b/qmemman/qmemman_server.py index c2b7e0e3..6e6fa94c 100755 --- a/qmemman/qmemman_server.py +++ b/qmemman/qmemman_server.py @@ -157,6 +157,14 @@ class QMemmanReqHandler(SocketServer.BaseRequestHandler): self.log.debug('data={!r}'.format(self.data)) if len(self.data) == 0: self.log.info('EOF') + # FIXME: there is a race condition here: if XS_Watcher will + # handle meminfo event before @introduceDomain, it will use + # incomplete domain list for that and may redistribute memory + # allocated to some VM, but not yet used (see #1389). + # To fix that, system_state should be updated (refresh domain + # list) before releasing the lock, but in the current code + # layout XS_Watcher instance isn't available here, + # so xenstore watches would not be registered if got_lock: global_lock.release() self.log.debug('global_lock released')