qmemman: handle memory assigned to VM but not yet used

When VM got some memory assigned, balloon driver may not pick it up
immediatelly and the memory will still be seen as "free" by Xen, but VM
can use (request) it at any time. Qmemman needs to take care of such
memory (exclude it from "free" pool), otherwise it would redistribute it
to other domains, allowing the original domain to drain Xen memory pool.

Do this by redefining DomainState.memory_actual - it is now amount of
memory available to the VM (currently used, or possibly used). Then
calculate free memory by subtracting memory allocated but not used
(memory_target-memory_current).

Fixes QubesOS/qubes-issues#1389
This commit is contained in:
Marek Marczykowski-Górecki 2016-01-06 00:40:10 +01:00
parent e30e802903
commit 181eb3e764
No known key found for this signature in database
GPG Key ID: 063938BA42CFA724
2 changed files with 36 additions and 9 deletions

View File

@ -37,7 +37,9 @@ slow_memset_react_msg="VM didn't give back all requested memory"
class DomainState:
def __init__(self, id):
self.meminfo = None #dictionary of memory info read from client
self.memory_actual = None #the current memory size
self.memory_current = None #the current memory size
self.memory_actual = None # the current memory allocation (what VM
# is using or can use at any time)
self.memory_maximum = None #the maximum memory size
self.mem_used = None #used memory, computed based on meminfo
self.id = id #domain id
@ -65,25 +67,42 @@ class SystemState(object):
def add_domain(self, id):
self.log.debug('add_domain(id={!r})'.format(id))
self.domdict[id] = DomainState(id)
# TODO: move to DomainState.__init__
target_str = self.xs.read('', '/local/domain/' + id + '/memory/target')
if target_str:
self.domdict[id].last_target = int(target_str) * 1024
def del_domain(self, id):
self.log.debug('del_domain(id={!r})'.format(id))
self.domdict.pop(id)
def get_free_xen_memory(self):
return int(self.xc.physinfo()['free_memory']*1024 * self.MEM_OVERHEAD_FACTOR)
# hosts = self.xend_session.session.xenapi.host.get_all()
# host_record = self.xend_session.session.xenapi.host.get_record(hosts[0])
# host_metrics_record = self.xend_session.session.xenapi.host_metrics.get_record(host_record["metrics"])
# ret = host_metrics_record["memory_free"]
# return long(ret)
xen_free = int(self.xc.physinfo()['free_memory']*1024 *
self.MEM_OVERHEAD_FACTOR)
# now check for domains which have assigned more memory than really
# used - do not count it as "free", because domain is free to use it
# at any time
# assumption: self.refresh_memactual was called before
# (so domdict[id].memory_actual is up to date)
assigned_but_unused = reduce(
lambda acc, dom: acc + max(0, dom.last_target-dom.memory_current),
self.domdict.values(),
0
)
return xen_free - assigned_but_unused
#refresh information on memory assigned to all domains
def refresh_memactual(self):
for domain in self.xc.domain_getinfo():
id = str(domain['domid'])
if self.domdict.has_key(id):
self.domdict[id].memory_actual = domain['mem_kb']*1024
# real memory usage
self.domdict[id].memory_current = domain['mem_kb']*1024
# what VM is using or can use
self.domdict[id].memory_actual = max(
self.domdict[id].memory_current,
self.domdict[id].last_target
)
self.domdict[id].memory_maximum = self.xs.read('', '/local/domain/%s/memory/static-max' % str(id))
if self.domdict[id].memory_maximum:
self.domdict[id].memory_maximum = int(self.domdict[id].memory_maximum)*1024
@ -272,11 +291,11 @@ class SystemState(object):
self.log.debug('do_balance dom={!r} sleeping ntries={}'.format(
dom, ntries))
time.sleep(self.BALOON_DELAY)
self.refresh_memactual()
ntries -= 1
if ntries <= 0:
# Waiting haven't helped; Find which domain get stuck and
# abort balance (after distributing what we have)
self.refresh_memactual()
for rq2 in memset_reqs:
dom2, mem2 = rq2
if dom2 == dom:

View File

@ -157,6 +157,14 @@ class QMemmanReqHandler(SocketServer.BaseRequestHandler):
self.log.debug('data={!r}'.format(self.data))
if len(self.data) == 0:
self.log.info('EOF')
# FIXME: there is a race condition here: if XS_Watcher will
# handle meminfo event before @introduceDomain, it will use
# incomplete domain list for that and may redistribute memory
# allocated to some VM, but not yet used (see #1389).
# To fix that, system_state should be updated (refresh domain
# list) before releasing the lock, but in the current code
# layout XS_Watcher instance isn't available here,
# so xenstore watches would not be registered
if got_lock:
global_lock.release()
self.log.debug('global_lock released')