2010-08-30 11:40:19 +02:00
|
|
|
import xen.lowlevel.xc
|
2010-09-03 16:19:48 +02:00
|
|
|
import xen.lowlevel.xs
|
2010-08-30 11:40:19 +02:00
|
|
|
import string
|
|
|
|
import time
|
|
|
|
import qmemman_algo
|
|
|
|
import os
|
2012-07-05 01:27:36 +02:00
|
|
|
from guihelpers import notify_error_qubes_manager, clear_error_qubes_manager
|
2010-08-30 11:40:19 +02:00
|
|
|
|
2012-07-05 01:23:43 +02:00
|
|
|
no_progress_msg="VM refused to give back requested memory"
|
|
|
|
slow_memset_react_msg="VM didn't give back all requested memory"
|
|
|
|
|
2010-08-30 11:40:19 +02:00
|
|
|
class DomainState:
|
|
|
|
def __init__(self, id):
|
2011-05-04 17:58:28 +02:00
|
|
|
self.meminfo = None #dictionary of memory info read from client
|
|
|
|
self.memory_actual = None #the current memory size
|
2011-05-12 15:20:26 +02:00
|
|
|
self.memory_maximum = None #the maximum memory size
|
2011-05-04 17:58:28 +02:00
|
|
|
self.mem_used = None #used memory, computed based on meminfo
|
|
|
|
self.id = id #domain id
|
|
|
|
self.last_target = 0 #the last memset target
|
2012-07-05 01:23:43 +02:00
|
|
|
self.no_progress = False #no react to memset
|
|
|
|
self.slow_memset_react = False #slow react to memset (after few tries still above target)
|
2010-08-30 11:40:19 +02:00
|
|
|
|
|
|
|
class SystemState:
|
|
|
|
def __init__(self):
|
|
|
|
self.domdict = {}
|
|
|
|
self.xc = xen.lowlevel.xc.xc()
|
2010-09-03 16:19:48 +02:00
|
|
|
self.xs = xen.lowlevel.xs.xs()
|
2010-08-30 11:40:19 +02:00
|
|
|
self.BALOON_DELAY = 0.1
|
2010-09-16 15:57:11 +02:00
|
|
|
self.XEN_FREE_MEM_LEFT = 50*1024*1024
|
qmemman: handle requests for small pieces correctly
There seems to be a problem with xm mem-set, when executed for a value
very close to the current value - the request is ignored; apparently, the
domU kernel imposes some granularity on the request size.
So, if qmemman is asked for, say 470MB, and there is 469MB free, it will try
to milk 1MB from all domains - and this will fail. REQ_SAFETY_NET_FACTOR
does not help in this scenario.
The logs show
req= 1110016 avail= 2503727104.0 donors [('11', 194375270.40000001),...
borrow 90484.1597129 from 11 - so, beg for 90K from a domain
borrow 132239.288652 from 10
borrow 537099.316089 from 0
borrow 148004.024941 from 7
borrow 139834.21573 from 9
borrow 117855.794876 from 8
and then we fail when a domain does not provide this lousy 90KB.
The solution is to ask for actual_need+XEN_FREE_MEM_LEFT, but return if we already
have actual_need+XEN_FREE_MEM_MIN (the latter is 25MB smaller).
2011-04-05 10:52:53 +02:00
|
|
|
self.XEN_FREE_MEM_MIN = 25*1024*1024
|
2011-07-22 11:33:11 +02:00
|
|
|
self.ALL_PHYS_MEM = self.xc.physinfo()['total_memory']*1024
|
2010-08-30 11:40:19 +02:00
|
|
|
|
|
|
|
def add_domain(self, id):
|
|
|
|
self.domdict[id] = DomainState(id)
|
|
|
|
|
|
|
|
def del_domain(self, id):
|
|
|
|
self.domdict.pop(id)
|
|
|
|
|
|
|
|
def get_free_xen_memory(self):
|
|
|
|
return self.xc.physinfo()['free_memory']*1024
|
|
|
|
# hosts = self.xend_session.session.xenapi.host.get_all()
|
|
|
|
# host_record = self.xend_session.session.xenapi.host.get_record(hosts[0])
|
|
|
|
# host_metrics_record = self.xend_session.session.xenapi.host_metrics.get_record(host_record["metrics"])
|
|
|
|
# ret = host_metrics_record["memory_free"]
|
|
|
|
# return long(ret)
|
|
|
|
|
2011-05-04 17:58:28 +02:00
|
|
|
#refresh information on memory assigned to all domains
|
2010-08-30 11:40:19 +02:00
|
|
|
def refresh_memactual(self):
|
|
|
|
for domain in self.xc.domain_getinfo():
|
|
|
|
id = str(domain['domid'])
|
|
|
|
if self.domdict.has_key(id):
|
|
|
|
self.domdict[id].memory_actual = domain['mem_kb']*1024
|
2011-06-07 16:19:52 +02:00
|
|
|
self.domdict[id].memory_maximum = self.xs.read('', '/local/domain/%s/memory/static-max' % str(id))
|
|
|
|
if not self.domdict[id].memory_maximum:
|
2011-07-22 11:33:11 +02:00
|
|
|
self.domdict[id].memory_maximum = self.ALL_PHYS_MEM
|
|
|
|
# the previous line used to be
|
|
|
|
# self.domdict[id].memory_maximum = domain['maxmem_kb']*1024
|
|
|
|
# but domain['maxmem_kb'] changes in self.mem_set as well, and this results in
|
|
|
|
# the memory never increasing
|
|
|
|
# in fact, the only possible case of nonexisting memory/static-max is dom0
|
|
|
|
# see #307
|
2010-08-30 11:40:19 +02:00
|
|
|
|
2012-07-05 01:23:43 +02:00
|
|
|
def clear_outdated_error_markers(self):
|
|
|
|
# Clear outdated errors
|
|
|
|
for i in self.domdict.keys():
|
|
|
|
if self.domdict[i].slow_memset_react and \
|
|
|
|
self.domdict[i].memory_actual <= self.domdict[i].last_target + self.XEN_FREE_MEM_LEFT/4:
|
2012-07-05 01:27:36 +02:00
|
|
|
dom_name = self.xs.read('', '/local/domain/%s/name' % str(i))
|
2012-10-15 01:58:07 +02:00
|
|
|
if dom_name is not None:
|
|
|
|
clear_error_qubes_manager(dom_name, slow_memset_react_msg)
|
2012-07-05 01:23:43 +02:00
|
|
|
self.domdict[i].slow_memset_react = False
|
|
|
|
|
|
|
|
if self.domdict[i].no_progress and \
|
|
|
|
self.domdict[i].memory_actual <= self.domdict[i].last_target + self.XEN_FREE_MEM_LEFT/4:
|
2012-07-05 01:27:36 +02:00
|
|
|
dom_name = self.xs.read('', '/local/domain/%s/name' % str(i))
|
2012-10-15 01:58:07 +02:00
|
|
|
if dom_name is not None:
|
|
|
|
clear_error_qubes_manager(dom_name, no_progress_msg)
|
2012-07-05 01:23:43 +02:00
|
|
|
self.domdict[i].no_progress = False
|
|
|
|
|
2010-08-30 11:40:19 +02:00
|
|
|
#the below works (and is fast), but then 'xm list' shows unchanged memory value
|
2010-09-03 16:19:48 +02:00
|
|
|
def mem_set(self, id, val):
|
|
|
|
print 'mem-set domain', id, 'to', val
|
2010-09-09 10:36:13 +02:00
|
|
|
self.domdict[id].last_target = val
|
2010-09-07 13:10:48 +02:00
|
|
|
#can happen in the middle of domain shutdown
|
|
|
|
#apparently xc.lowlevel throws exceptions too
|
|
|
|
try:
|
2012-07-05 01:17:45 +02:00
|
|
|
self.xc.domain_setmaxmem(int(id), int(val/1024) + 1024) # LIBXL_MAXMEM_CONSTANT=1024
|
|
|
|
self.xc.domain_set_target_mem(int(id), int(val/1024))
|
2010-09-07 13:10:48 +02:00
|
|
|
except:
|
|
|
|
pass
|
2012-07-05 01:17:45 +02:00
|
|
|
self.xs.write('', '/local/domain/' + id + '/memory/target', str(int(val/1024)))
|
|
|
|
|
2010-09-03 16:19:48 +02:00
|
|
|
def mem_set_obsolete(self, id, val):
|
2010-08-30 11:40:19 +02:00
|
|
|
uuid = self.domdict[id].uuid
|
2010-08-31 16:19:01 +02:00
|
|
|
if val >= 2**31:
|
|
|
|
print 'limiting memory from ', val, 'to maxint because of xml-rpc lameness'
|
|
|
|
val = 2**31 - 1
|
2010-08-30 11:40:19 +02:00
|
|
|
print 'mem-set domain', id, 'to', val
|
2010-08-31 15:53:24 +02:00
|
|
|
try:
|
|
|
|
self.xend_session.session.xenapi.VM.set_memory_dynamic_max_live(uuid, val)
|
|
|
|
self.xend_session.session.xenapi.VM.set_memory_dynamic_min_live(uuid, val)
|
|
|
|
#can happen in the middle of domain shutdown
|
|
|
|
except XenAPI.Failure:
|
|
|
|
pass
|
2010-08-30 11:40:19 +02:00
|
|
|
|
2011-07-22 13:40:21 +02:00
|
|
|
# this is called at the end of ballooning, when we have Xen free mem already
|
|
|
|
# make sure that past mem_set will not decrease Xen free mem
|
|
|
|
def inhibit_balloon_up(self):
|
|
|
|
for i in self.domdict.keys():
|
|
|
|
dom = self.domdict[i]
|
|
|
|
if dom.memory_actual is not None and dom.memory_actual + 200*1024 < dom.last_target:
|
|
|
|
print "Preventing balloon up to", dom.last_target
|
|
|
|
self.mem_set(i, dom.memory_actual)
|
|
|
|
|
2011-05-04 17:58:28 +02:00
|
|
|
#perform memory ballooning, across all domains, to add "memsize" to Xen free memory
|
2010-08-30 11:40:19 +02:00
|
|
|
def do_balloon(self, memsize):
|
|
|
|
MAX_TRIES = 20
|
|
|
|
niter = 0
|
|
|
|
prev_memory_actual = None
|
|
|
|
for i in self.domdict.keys():
|
|
|
|
self.domdict[i].no_progress = False
|
2011-07-22 13:40:21 +02:00
|
|
|
print "do_balloon start"
|
2010-08-30 11:40:19 +02:00
|
|
|
while True:
|
2011-07-22 13:40:21 +02:00
|
|
|
self.refresh_memactual()
|
2010-08-30 11:40:19 +02:00
|
|
|
xenfree = self.get_free_xen_memory()
|
|
|
|
print 'got xenfree=', xenfree
|
qmemman: handle requests for small pieces correctly
There seems to be a problem with xm mem-set, when executed for a value
very close to the current value - the request is ignored; apparently, the
domU kernel imposes some granularity on the request size.
So, if qmemman is asked for, say 470MB, and there is 469MB free, it will try
to milk 1MB from all domains - and this will fail. REQ_SAFETY_NET_FACTOR
does not help in this scenario.
The logs show
req= 1110016 avail= 2503727104.0 donors [('11', 194375270.40000001),...
borrow 90484.1597129 from 11 - so, beg for 90K from a domain
borrow 132239.288652 from 10
borrow 537099.316089 from 0
borrow 148004.024941 from 7
borrow 139834.21573 from 9
borrow 117855.794876 from 8
and then we fail when a domain does not provide this lousy 90KB.
The solution is to ask for actual_need+XEN_FREE_MEM_LEFT, but return if we already
have actual_need+XEN_FREE_MEM_MIN (the latter is 25MB smaller).
2011-04-05 10:52:53 +02:00
|
|
|
if xenfree >= memsize + self.XEN_FREE_MEM_MIN:
|
2011-07-22 13:40:21 +02:00
|
|
|
self.inhibit_balloon_up()
|
2010-08-30 11:40:19 +02:00
|
|
|
return True
|
|
|
|
if prev_memory_actual is not None:
|
|
|
|
for i in prev_memory_actual.keys():
|
|
|
|
if prev_memory_actual[i] == self.domdict[i].memory_actual:
|
2011-05-04 17:58:28 +02:00
|
|
|
#domain not responding to memset requests, remove it from donors
|
2010-08-30 11:40:19 +02:00
|
|
|
self.domdict[i].no_progress = True
|
|
|
|
print 'domain', i, 'stuck at', self.domdict[i].memory_actual
|
2010-09-16 15:57:11 +02:00
|
|
|
memset_reqs = qmemman_algo.balloon(memsize + self.XEN_FREE_MEM_LEFT - xenfree, self.domdict)
|
2010-08-30 11:40:19 +02:00
|
|
|
print 'requests:', memset_reqs
|
|
|
|
if niter > MAX_TRIES or len(memset_reqs) == 0:
|
|
|
|
return False
|
|
|
|
prev_memory_actual = {}
|
|
|
|
for i in memset_reqs:
|
|
|
|
dom, mem = i
|
|
|
|
self.mem_set(dom, mem)
|
|
|
|
prev_memory_actual[dom] = self.domdict[dom].memory_actual
|
|
|
|
time.sleep(self.BALOON_DELAY)
|
|
|
|
niter = niter + 1
|
|
|
|
|
2011-05-04 17:10:01 +02:00
|
|
|
def refresh_meminfo(self, domid, untrusted_meminfo_key):
|
|
|
|
qmemman_algo.refresh_meminfo_for_domain(self.domdict[domid], untrusted_meminfo_key)
|
2010-09-09 12:36:18 +02:00
|
|
|
self.do_balance()
|
2010-08-30 11:40:19 +02:00
|
|
|
|
2011-05-04 17:58:28 +02:00
|
|
|
#is the computed balance request big enough ?
|
|
|
|
#so that we do not trash with small adjustments
|
2010-09-16 16:00:07 +02:00
|
|
|
def is_balance_req_significant(self, memset_reqs, xenfree):
|
2010-09-01 10:39:39 +02:00
|
|
|
total_memory_transfer = 0
|
|
|
|
MIN_TOTAL_MEMORY_TRANSFER = 150*1024*1024
|
2010-09-10 11:35:30 +02:00
|
|
|
MIN_MEM_CHANGE_WHEN_UNDER_PREF = 15*1024*1024
|
2012-06-26 00:26:53 +02:00
|
|
|
# If xenfree to low, return immediately
|
|
|
|
if self.XEN_FREE_MEM_LEFT - xenfree > MIN_MEM_CHANGE_WHEN_UNDER_PREF:
|
|
|
|
return True
|
2010-09-01 10:39:39 +02:00
|
|
|
for rq in memset_reqs:
|
|
|
|
dom, mem = rq
|
2010-09-10 11:35:30 +02:00
|
|
|
last_target = self.domdict[dom].last_target
|
|
|
|
memory_change = mem - last_target
|
2010-09-01 10:39:39 +02:00
|
|
|
total_memory_transfer += abs(memory_change)
|
2010-09-10 11:35:30 +02:00
|
|
|
pref = qmemman_algo.prefmem(self.domdict[dom])
|
|
|
|
if last_target > 0 and last_target < pref and memory_change > MIN_MEM_CHANGE_WHEN_UNDER_PREF:
|
|
|
|
print 'dom', dom, 'is below pref, allowing balance'
|
|
|
|
return True
|
2010-09-16 16:00:07 +02:00
|
|
|
return total_memory_transfer + abs(xenfree - self.XEN_FREE_MEM_LEFT) > MIN_TOTAL_MEMORY_TRANSFER
|
2010-09-01 10:39:39 +02:00
|
|
|
|
2010-09-09 10:29:35 +02:00
|
|
|
def print_stats(self, xenfree, memset_reqs):
|
|
|
|
for i in self.domdict.keys():
|
|
|
|
if self.domdict[i].meminfo is not None:
|
|
|
|
print 'dom' , i, 'act/pref', self.domdict[i].memory_actual, qmemman_algo.prefmem(self.domdict[i])
|
|
|
|
print 'xenfree=', xenfree, 'balance req:', memset_reqs
|
|
|
|
|
2010-08-30 11:40:19 +02:00
|
|
|
def do_balance(self):
|
2010-09-07 16:00:14 +02:00
|
|
|
if os.path.isfile('/var/run/qubes/do-not-membalance'):
|
2010-08-30 11:40:19 +02:00
|
|
|
return
|
|
|
|
self.refresh_memactual()
|
2012-07-05 01:23:43 +02:00
|
|
|
self.clear_outdated_error_markers()
|
2010-08-30 11:40:19 +02:00
|
|
|
xenfree = self.get_free_xen_memory()
|
2010-09-16 15:57:11 +02:00
|
|
|
memset_reqs = qmemman_algo.balance(xenfree - self.XEN_FREE_MEM_LEFT, self.domdict)
|
2010-09-16 16:00:07 +02:00
|
|
|
if not self.is_balance_req_significant(memset_reqs, xenfree):
|
2010-09-01 10:39:39 +02:00
|
|
|
return
|
|
|
|
|
2010-09-09 10:29:35 +02:00
|
|
|
self.print_stats(xenfree, memset_reqs)
|
2010-09-09 10:36:13 +02:00
|
|
|
|
2012-07-05 01:23:43 +02:00
|
|
|
prev_memactual = {}
|
|
|
|
for i in self.domdict.keys():
|
|
|
|
prev_memactual[i] = self.domdict[i].memory_actual
|
2010-09-09 10:36:13 +02:00
|
|
|
for rq in memset_reqs:
|
|
|
|
dom, mem = rq
|
2012-07-05 01:23:43 +02:00
|
|
|
# Force to always have at least 0.9*self.XEN_FREE_MEM_LEFT (some
|
|
|
|
# margin for rounding errors). Before giving memory to
|
|
|
|
# domain, ensure that others have gived it back.
|
|
|
|
# If not - wait a little.
|
|
|
|
ntries = 5
|
|
|
|
while self.get_free_xen_memory() - (mem - self.domdict[dom].memory_actual) < 0.9*self.XEN_FREE_MEM_LEFT:
|
|
|
|
time.sleep(self.BALOON_DELAY)
|
|
|
|
ntries -= 1
|
|
|
|
if ntries <= 0:
|
|
|
|
# Waiting haven't helped; Find which domain get stuck and
|
|
|
|
# abort balance (after distributing what we have)
|
|
|
|
self.refresh_memactual()
|
|
|
|
for rq2 in memset_reqs:
|
|
|
|
dom2, mem2 = rq2
|
|
|
|
if dom2 == dom:
|
|
|
|
# All donors have been procesed
|
|
|
|
break
|
|
|
|
# allow some small margin
|
|
|
|
if self.domdict[dom2].memory_actual > self.domdict[dom2].last_target + self.XEN_FREE_MEM_LEFT/4:
|
|
|
|
# VM didn't react to memory request at all, remove from donors
|
|
|
|
if prev_memactual[dom2] == self.domdict[dom2].memory_actual:
|
|
|
|
print 'dom %s didnt react to memory request (holds %d, requested balloon down to %d)' % (dom2, self.domdict[dom2].memory_actual, mem2)
|
|
|
|
self.domdict[dom2].no_progress = True
|
2012-07-05 01:27:36 +02:00
|
|
|
dom_name = self.xs.read('', '/local/domain/%s/name' % str(dom2))
|
2012-08-16 01:53:49 +02:00
|
|
|
if dom_name is not None:
|
|
|
|
notify_error_qubes_manager(str(dom_name), no_progress_msg)
|
2012-07-05 01:23:43 +02:00
|
|
|
else:
|
|
|
|
print 'dom %s still hold more memory than have assigned (%d > %d)' % (dom2, self.domdict[dom2].memory_actual, mem2)
|
|
|
|
self.domdict[dom2].slow_memset_react = True
|
2012-07-05 01:27:36 +02:00
|
|
|
dom_name = self.xs.read('', '/local/domain/%s/name' % str(dom2))
|
2012-08-16 01:53:49 +02:00
|
|
|
if dom_name is not None:
|
|
|
|
notify_error_qubes_manager(str(dom_name), slow_memset_react_msg)
|
2012-07-05 01:23:43 +02:00
|
|
|
self.mem_set(dom, self.get_free_xen_memory() + self.domdict[dom].memory_actual - self.XEN_FREE_MEM_LEFT)
|
|
|
|
return
|
|
|
|
|
2010-08-30 11:40:19 +02:00
|
|
|
self.mem_set(dom, mem)
|
|
|
|
|
|
|
|
# for i in self.domdict.keys():
|
|
|
|
# print 'domain ', i, ' meminfo=', self.domdict[i].meminfo, 'actual mem', self.domdict[i].memory_actual
|
|
|
|
# print 'domain ', i, 'actual mem', self.domdict[i].memory_actual
|
|
|
|
# print 'xen free mem', self.get_free_xen_memory()
|