dom0/qmemman: check if donors have returned memory before distributing it to other VMs (#563)

When some VM did't returned memory to Xen, mark this VM as suspicious and abort
balance to always have some xen free_memory margin.
VMs marked as suspicius will be evaluated before next balance and still didn't
returned memory, will be skipped in balance process.
This commit is contained in:
Marek Marczykowski 2012-07-05 01:23:43 +02:00
parent 892a6bbc13
commit b4070a99a3
2 changed files with 53 additions and 0 deletions

View File

@ -5,6 +5,9 @@ import time
import qmemman_algo import qmemman_algo
import os import os
no_progress_msg="VM refused to give back requested memory"
slow_memset_react_msg="VM didn't give back all requested memory"
class DomainState: class DomainState:
def __init__(self, id): def __init__(self, id):
self.meminfo = None #dictionary of memory info read from client self.meminfo = None #dictionary of memory info read from client
@ -13,6 +16,8 @@ class DomainState:
self.mem_used = None #used memory, computed based on meminfo self.mem_used = None #used memory, computed based on meminfo
self.id = id #domain id self.id = id #domain id
self.last_target = 0 #the last memset target self.last_target = 0 #the last memset target
self.no_progress = False #no react to memset
self.slow_memset_react = False #slow react to memset (after few tries still above target)
class SystemState: class SystemState:
def __init__(self): def __init__(self):
@ -54,6 +59,17 @@ class SystemState:
# in fact, the only possible case of nonexisting memory/static-max is dom0 # in fact, the only possible case of nonexisting memory/static-max is dom0
# see #307 # see #307
def clear_outdated_error_markers(self):
# Clear outdated errors
for i in self.domdict.keys():
if self.domdict[i].slow_memset_react and \
self.domdict[i].memory_actual <= self.domdict[i].last_target + self.XEN_FREE_MEM_LEFT/4:
self.domdict[i].slow_memset_react = False
if self.domdict[i].no_progress and \
self.domdict[i].memory_actual <= self.domdict[i].last_target + self.XEN_FREE_MEM_LEFT/4:
self.domdict[i].no_progress = False
#the below works (and is fast), but then 'xm list' shows unchanged memory value #the below works (and is fast), but then 'xm list' shows unchanged memory value
def mem_set(self, id, val): def mem_set(self, id, val):
print 'mem-set domain', id, 'to', val print 'mem-set domain', id, 'to', val
@ -156,6 +172,7 @@ class SystemState:
if os.path.isfile('/var/run/qubes/do-not-membalance'): if os.path.isfile('/var/run/qubes/do-not-membalance'):
return return
self.refresh_memactual() self.refresh_memactual()
self.clear_outdated_error_markers()
xenfree = self.get_free_xen_memory() xenfree = self.get_free_xen_memory()
memset_reqs = qmemman_algo.balance(xenfree - self.XEN_FREE_MEM_LEFT, self.domdict) memset_reqs = qmemman_algo.balance(xenfree - self.XEN_FREE_MEM_LEFT, self.domdict)
if not self.is_balance_req_significant(memset_reqs, xenfree): if not self.is_balance_req_significant(memset_reqs, xenfree):
@ -163,8 +180,40 @@ class SystemState:
self.print_stats(xenfree, memset_reqs) self.print_stats(xenfree, memset_reqs)
prev_memactual = {}
for i in self.domdict.keys():
prev_memactual[i] = self.domdict[i].memory_actual
for rq in memset_reqs: for rq in memset_reqs:
dom, mem = rq dom, mem = rq
# Force to always have at least 0.9*self.XEN_FREE_MEM_LEFT (some
# margin for rounding errors). Before giving memory to
# domain, ensure that others have gived it back.
# If not - wait a little.
ntries = 5
while self.get_free_xen_memory() - (mem - self.domdict[dom].memory_actual) < 0.9*self.XEN_FREE_MEM_LEFT:
time.sleep(self.BALOON_DELAY)
ntries -= 1
if ntries <= 0:
# Waiting haven't helped; Find which domain get stuck and
# abort balance (after distributing what we have)
self.refresh_memactual()
for rq2 in memset_reqs:
dom2, mem2 = rq2
if dom2 == dom:
# All donors have been procesed
break
# allow some small margin
if self.domdict[dom2].memory_actual > self.domdict[dom2].last_target + self.XEN_FREE_MEM_LEFT/4:
# VM didn't react to memory request at all, remove from donors
if prev_memactual[dom2] == self.domdict[dom2].memory_actual:
print 'dom %s didnt react to memory request (holds %d, requested balloon down to %d)' % (dom2, self.domdict[dom2].memory_actual, mem2)
self.domdict[dom2].no_progress = True
else:
print 'dom %s still hold more memory than have assigned (%d > %d)' % (dom2, self.domdict[dom2].memory_actual, mem2)
self.domdict[dom2].slow_memset_react = True
self.mem_set(dom, self.get_free_xen_memory() + self.domdict[dom].memory_actual - self.XEN_FREE_MEM_LEFT)
return
self.mem_set(dom, mem) self.mem_set(dom, mem)
# for i in self.domdict.keys(): # for i in self.domdict.keys():

View File

@ -117,6 +117,8 @@ def balance_when_enough_memory(domain_dictionary, xen_free_memory, total_mem_pre
for i in domain_dictionary.keys(): for i in domain_dictionary.keys():
if domain_dictionary[i].meminfo is None: if domain_dictionary[i].meminfo is None:
continue continue
if domain_dictionary[i].no_progress:
continue
#distribute total_available_memory proportionally to mempref #distribute total_available_memory proportionally to mempref
scale = 1.0*prefmem(domain_dictionary[i])/total_mem_pref scale = 1.0*prefmem(domain_dictionary[i])/total_mem_pref
target_nonint = prefmem(domain_dictionary[i]) + scale*total_available_memory target_nonint = prefmem(domain_dictionary[i]) + scale*total_available_memory
@ -212,6 +214,8 @@ def balance(xen_free_memory, domain_dictionary):
for i in domain_dictionary.keys(): for i in domain_dictionary.keys():
if domain_dictionary[i].meminfo is None: if domain_dictionary[i].meminfo is None:
continue continue
if domain_dictionary[i].no_progress:
continue
need = memory_needed(domain_dictionary[i]) need = memory_needed(domain_dictionary[i])
# print 'domain' , i, 'act/pref', domain_dictionary[i].memory_actual, prefmem(domain_dictionary[i]), 'need=', need # print 'domain' , i, 'act/pref', domain_dictionary[i].memory_actual, prefmem(domain_dictionary[i]), 'need=', need
if need < 0 or domain_dictionary[i].memory_actual >= domain_dictionary[i].memory_maximum: if need < 0 or domain_dictionary[i].memory_actual >= domain_dictionary[i].memory_maximum: