dom0/qmemman: check if donors have returned memory before distributing it to other VMs (#563)
When some VM did't returned memory to Xen, mark this VM as suspicious and abort balance to always have some xen free_memory margin. VMs marked as suspicius will be evaluated before next balance and still didn't returned memory, will be skipped in balance process.
This commit is contained in:
parent
892a6bbc13
commit
b4070a99a3
@ -5,6 +5,9 @@ import time
|
|||||||
import qmemman_algo
|
import qmemman_algo
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
no_progress_msg="VM refused to give back requested memory"
|
||||||
|
slow_memset_react_msg="VM didn't give back all requested memory"
|
||||||
|
|
||||||
class DomainState:
|
class DomainState:
|
||||||
def __init__(self, id):
|
def __init__(self, id):
|
||||||
self.meminfo = None #dictionary of memory info read from client
|
self.meminfo = None #dictionary of memory info read from client
|
||||||
@ -13,6 +16,8 @@ class DomainState:
|
|||||||
self.mem_used = None #used memory, computed based on meminfo
|
self.mem_used = None #used memory, computed based on meminfo
|
||||||
self.id = id #domain id
|
self.id = id #domain id
|
||||||
self.last_target = 0 #the last memset target
|
self.last_target = 0 #the last memset target
|
||||||
|
self.no_progress = False #no react to memset
|
||||||
|
self.slow_memset_react = False #slow react to memset (after few tries still above target)
|
||||||
|
|
||||||
class SystemState:
|
class SystemState:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -54,6 +59,17 @@ class SystemState:
|
|||||||
# in fact, the only possible case of nonexisting memory/static-max is dom0
|
# in fact, the only possible case of nonexisting memory/static-max is dom0
|
||||||
# see #307
|
# see #307
|
||||||
|
|
||||||
|
def clear_outdated_error_markers(self):
|
||||||
|
# Clear outdated errors
|
||||||
|
for i in self.domdict.keys():
|
||||||
|
if self.domdict[i].slow_memset_react and \
|
||||||
|
self.domdict[i].memory_actual <= self.domdict[i].last_target + self.XEN_FREE_MEM_LEFT/4:
|
||||||
|
self.domdict[i].slow_memset_react = False
|
||||||
|
|
||||||
|
if self.domdict[i].no_progress and \
|
||||||
|
self.domdict[i].memory_actual <= self.domdict[i].last_target + self.XEN_FREE_MEM_LEFT/4:
|
||||||
|
self.domdict[i].no_progress = False
|
||||||
|
|
||||||
#the below works (and is fast), but then 'xm list' shows unchanged memory value
|
#the below works (and is fast), but then 'xm list' shows unchanged memory value
|
||||||
def mem_set(self, id, val):
|
def mem_set(self, id, val):
|
||||||
print 'mem-set domain', id, 'to', val
|
print 'mem-set domain', id, 'to', val
|
||||||
@ -156,6 +172,7 @@ class SystemState:
|
|||||||
if os.path.isfile('/var/run/qubes/do-not-membalance'):
|
if os.path.isfile('/var/run/qubes/do-not-membalance'):
|
||||||
return
|
return
|
||||||
self.refresh_memactual()
|
self.refresh_memactual()
|
||||||
|
self.clear_outdated_error_markers()
|
||||||
xenfree = self.get_free_xen_memory()
|
xenfree = self.get_free_xen_memory()
|
||||||
memset_reqs = qmemman_algo.balance(xenfree - self.XEN_FREE_MEM_LEFT, self.domdict)
|
memset_reqs = qmemman_algo.balance(xenfree - self.XEN_FREE_MEM_LEFT, self.domdict)
|
||||||
if not self.is_balance_req_significant(memset_reqs, xenfree):
|
if not self.is_balance_req_significant(memset_reqs, xenfree):
|
||||||
@ -163,8 +180,40 @@ class SystemState:
|
|||||||
|
|
||||||
self.print_stats(xenfree, memset_reqs)
|
self.print_stats(xenfree, memset_reqs)
|
||||||
|
|
||||||
|
prev_memactual = {}
|
||||||
|
for i in self.domdict.keys():
|
||||||
|
prev_memactual[i] = self.domdict[i].memory_actual
|
||||||
for rq in memset_reqs:
|
for rq in memset_reqs:
|
||||||
dom, mem = rq
|
dom, mem = rq
|
||||||
|
# Force to always have at least 0.9*self.XEN_FREE_MEM_LEFT (some
|
||||||
|
# margin for rounding errors). Before giving memory to
|
||||||
|
# domain, ensure that others have gived it back.
|
||||||
|
# If not - wait a little.
|
||||||
|
ntries = 5
|
||||||
|
while self.get_free_xen_memory() - (mem - self.domdict[dom].memory_actual) < 0.9*self.XEN_FREE_MEM_LEFT:
|
||||||
|
time.sleep(self.BALOON_DELAY)
|
||||||
|
ntries -= 1
|
||||||
|
if ntries <= 0:
|
||||||
|
# Waiting haven't helped; Find which domain get stuck and
|
||||||
|
# abort balance (after distributing what we have)
|
||||||
|
self.refresh_memactual()
|
||||||
|
for rq2 in memset_reqs:
|
||||||
|
dom2, mem2 = rq2
|
||||||
|
if dom2 == dom:
|
||||||
|
# All donors have been procesed
|
||||||
|
break
|
||||||
|
# allow some small margin
|
||||||
|
if self.domdict[dom2].memory_actual > self.domdict[dom2].last_target + self.XEN_FREE_MEM_LEFT/4:
|
||||||
|
# VM didn't react to memory request at all, remove from donors
|
||||||
|
if prev_memactual[dom2] == self.domdict[dom2].memory_actual:
|
||||||
|
print 'dom %s didnt react to memory request (holds %d, requested balloon down to %d)' % (dom2, self.domdict[dom2].memory_actual, mem2)
|
||||||
|
self.domdict[dom2].no_progress = True
|
||||||
|
else:
|
||||||
|
print 'dom %s still hold more memory than have assigned (%d > %d)' % (dom2, self.domdict[dom2].memory_actual, mem2)
|
||||||
|
self.domdict[dom2].slow_memset_react = True
|
||||||
|
self.mem_set(dom, self.get_free_xen_memory() + self.domdict[dom].memory_actual - self.XEN_FREE_MEM_LEFT)
|
||||||
|
return
|
||||||
|
|
||||||
self.mem_set(dom, mem)
|
self.mem_set(dom, mem)
|
||||||
|
|
||||||
# for i in self.domdict.keys():
|
# for i in self.domdict.keys():
|
||||||
|
@ -117,6 +117,8 @@ def balance_when_enough_memory(domain_dictionary, xen_free_memory, total_mem_pre
|
|||||||
for i in domain_dictionary.keys():
|
for i in domain_dictionary.keys():
|
||||||
if domain_dictionary[i].meminfo is None:
|
if domain_dictionary[i].meminfo is None:
|
||||||
continue
|
continue
|
||||||
|
if domain_dictionary[i].no_progress:
|
||||||
|
continue
|
||||||
#distribute total_available_memory proportionally to mempref
|
#distribute total_available_memory proportionally to mempref
|
||||||
scale = 1.0*prefmem(domain_dictionary[i])/total_mem_pref
|
scale = 1.0*prefmem(domain_dictionary[i])/total_mem_pref
|
||||||
target_nonint = prefmem(domain_dictionary[i]) + scale*total_available_memory
|
target_nonint = prefmem(domain_dictionary[i]) + scale*total_available_memory
|
||||||
@ -212,6 +214,8 @@ def balance(xen_free_memory, domain_dictionary):
|
|||||||
for i in domain_dictionary.keys():
|
for i in domain_dictionary.keys():
|
||||||
if domain_dictionary[i].meminfo is None:
|
if domain_dictionary[i].meminfo is None:
|
||||||
continue
|
continue
|
||||||
|
if domain_dictionary[i].no_progress:
|
||||||
|
continue
|
||||||
need = memory_needed(domain_dictionary[i])
|
need = memory_needed(domain_dictionary[i])
|
||||||
# print 'domain' , i, 'act/pref', domain_dictionary[i].memory_actual, prefmem(domain_dictionary[i]), 'need=', need
|
# print 'domain' , i, 'act/pref', domain_dictionary[i].memory_actual, prefmem(domain_dictionary[i]), 'need=', need
|
||||||
if need < 0 or domain_dictionary[i].memory_actual >= domain_dictionary[i].memory_maximum:
|
if need < 0 or domain_dictionary[i].memory_actual >= domain_dictionary[i].memory_maximum:
|
||||||
|
Loading…
Reference in New Issue
Block a user