qmemman: add logging

This commit is contained in:
Wojtek Porczyk 2015-03-17 16:45:00 +01:00
parent 19dfe3d390
commit fbdb2f07ac
3 changed files with 170 additions and 39 deletions

View File

@ -29,6 +29,8 @@ import qmemman_algo
import os
from notify import notify_error_qubes_manager, clear_error_qubes_manager
import logging
no_progress_msg="VM refused to give back requested memory"
slow_memset_react_msg="VM didn't give back all requested memory"
@ -43,8 +45,11 @@ class DomainState:
self.no_progress = False #no react to memset
self.slow_memset_react = False #slow react to memset (after few tries still above target)
class SystemState:
class SystemState(object):
def __init__(self):
self.log = logging.getLogger('qmemman.systemstate')
self.log.debug('SystemState()')
self.domdict = {}
self.xc = xen.lowlevel.xc.xc()
self.xs = xen.lowlevel.xs.xs()
@ -54,9 +59,11 @@ class SystemState:
self.ALL_PHYS_MEM = self.xc.physinfo()['total_memory']*1024
def add_domain(self, id):
self.log.debug('add_domain(id={!r})'.format(id))
self.domdict[id] = DomainState(id)
def del_domain(self, id):
self.log.debug('del_domain(id={!r})'.format(id))
self.domdict.pop(id)
def get_free_xen_memory(self):
@ -104,7 +111,7 @@ class SystemState:
#the below works (and is fast), but then 'xm list' shows unchanged memory value
def mem_set(self, id, val):
print 'mem-set domain', id, 'to', val
self.log.info('mem-set domain {} to {}'.format(id, val))
self.domdict[id].last_target = val
#can happen in the middle of domain shutdown
#apparently xc.lowlevel throws exceptions too
@ -118,24 +125,29 @@ class SystemState:
# this is called at the end of ballooning, when we have Xen free mem already
# make sure that past mem_set will not decrease Xen free mem
def inhibit_balloon_up(self):
self.log.debug('inhibit_balloon_up()')
for i in self.domdict.keys():
dom = self.domdict[i]
if dom.memory_actual is not None and dom.memory_actual + 200*1024 < dom.last_target:
print "Preventing balloon up to", dom.last_target
self.log.info(
'Preventing balloon up to {}'.format(dom.last_target))
self.mem_set(i, dom.memory_actual)
#perform memory ballooning, across all domains, to add "memsize" to Xen free memory
def do_balloon(self, memsize):
self.log.info('do_balloon(memsize={!r})'.format(memsize))
MAX_TRIES = 20
niter = 0
prev_memory_actual = None
for i in self.domdict.keys():
self.domdict[i].no_progress = False
print "do_balloon start"
while True:
self.log.debug('niter={:2d}/{:2d}'.format(niter, MAX_TRIES))
self.refresh_memactual()
xenfree = self.get_free_xen_memory()
print 'got xenfree=', xenfree
self.log.info('xenfree={!r}'.format(xenfree))
if xenfree >= memsize + self.XEN_FREE_MEM_MIN:
self.inhibit_balloon_up()
return True
@ -144,9 +156,9 @@ class SystemState:
if prev_memory_actual[i] == self.domdict[i].memory_actual:
#domain not responding to memset requests, remove it from donors
self.domdict[i].no_progress = True
print 'domain', i, 'stuck at', self.domdict[i].memory_actual
self.log.info('domain {} stuck at {}'.format(i, self.domdict[i].memory_actual))
memset_reqs = qmemman_algo.balloon(memsize + self.XEN_FREE_MEM_LEFT - xenfree, self.domdict)
print 'requests:', memset_reqs
self.log.info('memset_reqs={!r}'.format(memset_reqs))
if niter > MAX_TRIES or len(memset_reqs) == 0:
return False
prev_memory_actual = {}
@ -154,42 +166,68 @@ class SystemState:
dom, mem = i
self.mem_set(dom, mem)
prev_memory_actual[dom] = self.domdict[dom].memory_actual
self.log.debug('sleeping for {} s'.format(self.BALOON_DELAY))
time.sleep(self.BALOON_DELAY)
niter = niter + 1
def refresh_meminfo(self, domid, untrusted_meminfo_key):
qmemman_algo.refresh_meminfo_for_domain(self.domdict[domid], untrusted_meminfo_key)
self.log.debug(
'refresh_meminfo(domid={}, untrusted_meminfo_key={!r})'.format(
domid, untrusted_meminfo_key))
qmemman_algo.refresh_meminfo_for_domain(
self.domdict[domid], untrusted_meminfo_key)
self.do_balance()
#is the computed balance request big enough ?
#so that we do not trash with small adjustments
def is_balance_req_significant(self, memset_reqs, xenfree):
self.log.debug(
'is_balance_req_significant(memset_reqs={}, xenfree={})'.format(
memset_reqs, xenfree))
total_memory_transfer = 0
MIN_TOTAL_MEMORY_TRANSFER = 150*1024*1024
MIN_MEM_CHANGE_WHEN_UNDER_PREF = 15*1024*1024
# If xenfree to low, return immediately
if self.XEN_FREE_MEM_LEFT - xenfree > MIN_MEM_CHANGE_WHEN_UNDER_PREF:
self.log.debug('xenfree is too low, returning')
return True
for rq in memset_reqs:
dom, mem = rq
last_target = self.domdict[dom].last_target
memory_change = mem - last_target
total_memory_transfer += abs(memory_change)
pref = qmemman_algo.prefmem(self.domdict[dom])
if last_target > 0 and last_target < pref and memory_change > MIN_MEM_CHANGE_WHEN_UNDER_PREF:
print 'dom', dom, 'is below pref, allowing balance'
self.log.info(
'dom {} is below pref, allowing balance'.format(dom))
return True
return total_memory_transfer + abs(xenfree - self.XEN_FREE_MEM_LEFT) > MIN_TOTAL_MEMORY_TRANSFER
ret = total_memory_transfer + abs(xenfree - self.XEN_FREE_MEM_LEFT) > MIN_TOTAL_MEMORY_TRANSFER
self.log.debug('is_balance_req_significant return {}'.format(ret))
return ret
def print_stats(self, xenfree, memset_reqs):
for i in self.domdict.keys():
if self.domdict[i].meminfo is not None:
print 'dom' , i, 'act/pref', self.domdict[i].memory_actual, qmemman_algo.prefmem(self.domdict[i])
print 'xenfree=', xenfree, 'balance req:', memset_reqs
self.log.info('stat: dom {!r} act={} pref={}'.format(i,
self.domdict[i].memory_actual,
qmemman_algo.prefmem(self.domdict[i])))
self.log.info('stat: xenfree={} memset_reqs={}'.format(xenfree, memset_reqs))
def do_balance(self):
self.log.debug('do_balance()')
if os.path.isfile('/var/run/qubes/do-not-membalance'):
self.log.debug('do-not-membalance file preset, returning')
return
self.refresh_memactual()
self.clear_outdated_error_markers()
xenfree = self.get_free_xen_memory()
@ -210,6 +248,8 @@ class SystemState:
# If not - wait a little.
ntries = 5
while self.get_free_xen_memory() - (mem - self.domdict[dom].memory_actual) < 0.9*self.XEN_FREE_MEM_LEFT:
self.log.debug('do_balance dom={!r} sleeping ntries={}'.format(
dom, ntries))
time.sleep(self.BALOON_DELAY)
ntries -= 1
if ntries <= 0:
@ -225,13 +265,22 @@ class SystemState:
if self.domdict[dom2].memory_actual > self.domdict[dom2].last_target + self.XEN_FREE_MEM_LEFT/4:
# VM didn't react to memory request at all, remove from donors
if prev_memactual[dom2] == self.domdict[dom2].memory_actual:
print 'dom %s didnt react to memory request (holds %d, requested balloon down to %d)' % (dom2, self.domdict[dom2].memory_actual, mem2)
self.log.warning(
'dom {!r} didnt react to memory request'
' (holds {}, requested balloon down to {})'
.format(dom2,
self.domdict[dom2].memory_actual,
mem2))
self.domdict[dom2].no_progress = True
dom_name = self.xs.read('', '/local/domain/%s/name' % str(dom2))
if dom_name is not None:
notify_error_qubes_manager(str(dom_name), no_progress_msg)
else:
print 'dom %s still hold more memory than have assigned (%d > %d)' % (dom2, self.domdict[dom2].memory_actual, mem2)
self.log.warning('dom {!r} still hold more'
' memory than have assigned ({} > {})'
.format(dom2,
self.domdict[dom2].memory_actual,
mem2))
self.domdict[dom2].slow_memset_react = True
dom_name = self.xs.read('', '/local/domain/%s/name' % str(dom2))
if dom_name is not None:

View File

@ -21,6 +21,7 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
#
import logging
import string
# This are only defaults - can be overriden by QMemmanServer with values from
@ -29,6 +30,9 @@ CACHE_FACTOR = 1.3
MIN_PREFMEM = 200*1024*1024
DOM0_MEM_BOOST = 350*1024*1024
log = logging.getLogger('qmemman.daemon.algo')
#untrusted meminfo size is taken from xenstore key, thus its size is limited
#so splits do not require excessive memory
def parse_meminfo(untrusted_meminfo):
@ -44,8 +48,11 @@ def parse_meminfo(untrusted_meminfo):
return untrusted_dict
def is_meminfo_suspicious(domain, untrusted_meminfo):
log.debug('is_meminfo_suspicious('
'domain={!r}, untrusted_meminfo={!r})'.format(
domain, untrusted_meminfo))
ret = False
#check whether the required keys exist and are not negative
try:
for i in ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree'):
@ -66,7 +73,9 @@ def is_meminfo_suspicious(domain, untrusted_meminfo):
#it can be achieved with legal values, too, and it will not allow to
#starve existing domains, by design
if ret:
print 'suspicious meminfo for domain', domain.id, 'mem actual', domain.memory_actual, untrusted_meminfo
log.warning('suspicious meminfo for domain {!r}'
' memory_actual={!r} untrusted_meminfo={!r}'.format(domain.id,
domain.memory_actual, untrusted_meminfo))
return ret
#called when a domain updates its 'meminfo' xenstore key
@ -101,6 +110,8 @@ def memory_needed(domain):
#to "xm memset" equivalent in order to obtain "memsize" of memory
#return empty list when the request cannot be satisfied
def balloon(memsize, domain_dictionary):
log.debug('balloon(memsize={!r}, domain_dictionary={!r})'.format(
memsize, domain_dictionary))
REQ_SAFETY_NET_FACTOR = 1.05
donors = list()
request = list()
@ -112,17 +123,20 @@ def balloon(memsize, domain_dictionary):
continue
need = memory_needed(domain_dictionary[i])
if need < 0:
print 'balloon: dom' , i, 'has actual memory', domain_dictionary[i].memory_actual
log.info('balloon: dom {} has actual memory {}'.format(i,
domain_dictionary[i].memory_actual))
donors.append((i,-need))
available-=need
print 'req=', memsize, 'avail=', available, 'donors', donors
log.info('req={} avail={} donors={!r}'.format(memsize, available, donors))
if available<memsize:
return ()
scale = 1.0*memsize/available
for donors_iter in donors:
id, mem = donors_iter
memborrowed = mem*scale*REQ_SAFETY_NET_FACTOR
print 'borrow' , memborrowed, 'from', id
log.info('borrow {} from {}'.format(memborrowed, id))
memtarget = int(domain_dictionary[id].memory_actual - memborrowed)
request.append((id, memtarget))
return request
@ -131,8 +145,12 @@ def balloon(memsize, domain_dictionary):
#redistribute positive "total_available_memory" of memory between domains, proportionally to prefmem
def balance_when_enough_memory(domain_dictionary, xen_free_memory, total_mem_pref, total_available_memory):
print 'balance_when_enough_memory(', xen_free_memory, total_mem_pref, total_available_memory, ')'
def balance_when_enough_memory(domain_dictionary,
xen_free_memory, total_mem_pref, total_available_memory):
log.info('balance_when_enough_memory(xen_free_memory={!r}, '
'total_mem_pref={!r}, total_available_memory={!r})'.format(
xen_free_memory, total_mem_pref, total_available_memory))
target_memory = {}
# memory not assigned because of static max
left_memory = 0
@ -157,7 +175,9 @@ def balance_when_enough_memory(domain_dictionary, xen_free_memory, total_mem_pre
target_memory[i] = target
# distribute left memory across all acceptors
while left_memory > 0 and acceptors_count > 0:
print ' left_memory:', left_memory, 'acceptors_count:', acceptors_count
log.info('left_memory={} acceptors_count={}'.format(
left_memory, acceptors_count))
new_left_memory = 0
new_acceptors_count = acceptors_count
for i in target_memory.keys():
@ -187,9 +207,14 @@ def balance_when_enough_memory(domain_dictionary, xen_free_memory, total_mem_pre
# print 'balance(enough): xen_free_memory=', xen_free_memory, 'requests:', donors_rq + acceptors_rq
return donors_rq + acceptors_rq
#when not enough mem to make everyone be above prefmem, make donors be at prefmem, and
#redistribute anything left between acceptors
def balance_when_low_on_memory(domain_dictionary, xen_free_memory, total_mem_pref_acceptors, donors, acceptors):
def balance_when_low_on_memory(domain_dictionary,
xen_free_memory, total_mem_pref_acceptors, donors, acceptors):
log.debug('balance_when_low_on_memory(xen_free_memory={!r}, '
'total_mem_pref_acceptors={!r}, donors={!r}, acceptors={!r})'.format(
xen_free_memory, total_mem_pref_acceptors, donors, acceptors))
donors_rq = list()
acceptors_rq = list()
squeezed_mem = xen_free_memory
@ -218,6 +243,8 @@ def balance_when_low_on_memory(domain_dictionary, xen_free_memory, total_mem_pre
#return the list of (domain, memory_target) pairs to be passed to
#"xm memset" equivalent
def balance(xen_free_memory, domain_dictionary):
log.debug('balance(xen_free_memory={!r}, domain_dictionary={!r})'.format(
xen_free_memory, domain_dictionary))
#sum of all memory requirements - in other words, the difference between
#memory required to be added to domains (acceptors) to make them be at their

View File

@ -33,6 +33,9 @@ from ConfigParser import SafeConfigParser
from optparse import OptionParser
from qubesutils import parse_size
import logging
import logging.handlers
config_path = '/etc/qubes/qmemman.conf'
SOCK_PATH='/var/run/qubes/qmemman.sock'
LOG_PATH='/var/log/qubes/qmemman.log'
@ -58,42 +61,68 @@ class WatchType:
class XS_Watcher:
def __init__(self):
self.log = logging.getLogger('qmemman.daemon.xswatcher')
self.log.debug('XS_Watcher()')
self.handle = xen.lowlevel.xs.xs()
self.handle.watch('@introduceDomain', WatchType(XS_Watcher.domain_list_changed, None))
self.handle.watch('@releaseDomain', WatchType(XS_Watcher.domain_list_changed, None))
self.watch_token_dict = {}
def domain_list_changed(self, param):
self.log.debug('domain_list_changed(param={!r})'.format(param))
curr = self.handle.ls('', '/local/domain')
self.log.debug('curr={!r}'.format(curr))
if curr == None:
return
self.log.debug('acquiring global_lock')
global_lock.acquire()
self.log.debug('global_lock acquired')
for i in only_in_first_list(curr, self.watch_token_dict.keys()):
#new domain has been created
watch = WatchType(XS_Watcher.meminfo_changed, i)
self.watch_token_dict[i] = watch
self.handle.watch(get_domain_meminfo_key(i), watch)
system_state.add_domain(i)
for i in only_in_first_list(self.watch_token_dict.keys(), curr):
#domain destroyed
self.handle.unwatch(get_domain_meminfo_key(i), self.watch_token_dict[i])
self.watch_token_dict.pop(i)
system_state.del_domain(i)
global_lock.release()
self.log.debug('global_lock released')
system_state.do_balance()
def meminfo_changed(self, domain_id):
self.log.debug('meminfo_changed(domain_id={!r})'.format(domain_id))
untrusted_meminfo_key = self.handle.read('', get_domain_meminfo_key(domain_id))
if untrusted_meminfo_key == None or untrusted_meminfo_key == '':
return
self.log.debug('acquiring global_lock')
global_lock.acquire()
self.log.debug('global_lock acquired')
system_state.refresh_meminfo(domain_id, untrusted_meminfo_key)
global_lock.release()
self.log.debug('global_lock released')
def watch_loop(self):
# sys.stderr = file('/var/log/qubes/qfileexchgd.errors', 'a')
self.log.debug('watch_loop()')
while True:
result = self.handle.read_watch()
self.log.debug('watch_loop result={!r}'.format(result))
token = result[1]
token.fn(self, token.param)
@ -108,26 +137,39 @@ class QMemmanReqHandler(SocketServer.BaseRequestHandler):
"""
def handle(self):
self.log = logging.getLogger('qmemman.daemon.reqhandler')
got_lock = False
# self.request is the TCP socket connected to the client
while True:
self.data = self.request.recv(1024).strip()
self.log.debug('data={!r}'.format(self.data))
if len(self.data) == 0:
print 'EOF'
self.log.info('EOF')
if got_lock:
global_lock.release()
self.log.debug('global_lock released')
return
# XXX something is wrong here: return without release?
if got_lock:
print 'Second request over qmemman.sock ?'
self.log.warning('Second request over qmemman.sock?')
return
self.log.debug('acquiring global_lock')
global_lock.acquire()
self.log.debug('global_lock acquired')
got_lock = True
if system_state.do_balloon(int(self.data)):
resp = "OK\n"
else:
resp = "FAIL\n"
self.log.debug('resp={!r}'.format(resp))
self.request.send(resp)
# XXX no release of lock?
def start_server(server):
server.serve_forever()
@ -135,23 +177,29 @@ def start_server(server):
class QMemmanServer:
@staticmethod
def main():
# setup logging
ha_syslog = logging.handlers.SysLogHandler('/dev/log')
ha_syslog.setFormatter(
logging.Formatter('%(name)s[%(process)d]: %(message)s'))
logging.root.addHandler(ha_syslog)
# leave log for backwards compatibility
ha_file = logging.FileHandler(LOG_PATH)
ha_file.setFormatter(
logging.Formatter('%(asctime)s %(name)s[%(process)d]: %(message)s'))
logging.root.addHandler(ha_file)
log = logging.getLogger('qmemman.daemon')
usage = "usage: %prog [options]"
parser = OptionParser(usage)
parser.add_option("-c", "--config", action="store", dest="config", default=config_path)
(options, args) = parser.parse_args()
logfd = os.open(LOG_PATH, os.O_WRONLY|os.O_APPEND|os.O_CREAT, 0644)
if logfd < 0:
print sys.stderr, "ERROR: Failed to open log file (%s)" % LOG_PATH
exit(1)
# reinitialize python stdout/err
sys.stdout.flush()
sys.stderr.flush()
os.dup2(logfd, 1)
os.dup2(logfd, 2)
os.close(logfd)
devnull = os.open('/dev/null', os.O_RDONLY)
os.dup2(devnull, 0)
# close io
sys.stdin.close()
sys.stdout.close()
sys.stderr.close()
config = SafeConfigParser({
'vm-min-mem': str(qmemman_algo.MIN_PREFMEM),
@ -164,18 +212,25 @@ class QMemmanServer:
qmemman_algo.DOM0_MEM_BOOST = parse_size(config.get('global', 'dom0-mem-boost'))
qmemman_algo.CACHE_FACTOR = config.getfloat('global', 'cache-margin-factor')
print "values: %s, %s, %s" % (str(qmemman_algo.MIN_PREFMEM), str(qmemman_algo.DOM0_MEM_BOOST), str(qmemman_algo.CACHE_FACTOR))
log.info('MIN_PREFMEM={qmemman_algo.MIN_PREFMEM}'
' DOM0_MEM_BOOST={qmemman_algo.DOM0_MEM_BOOST}'
' CACHE_FACTOR={qmemman_algo.CACHE_FACTOR}'.format(
qmemman_algo=qmemman_algo))
try:
os.unlink(SOCK_PATH)
except:
pass
log.debug('instantiating server')
os.umask(0)
server = SocketServer.UnixStreamServer(SOCK_PATH, QMemmanReqHandler)
os.umask(077)
# notify systemd
nofity_socket = os.getenv('NOTIFY_SOCKET')
if nofity_socket:
log.debug('notifying systemd')
s = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
if nofity_socket.startswith('@'):
nofity_socket = '\0%s' % nofity_socket[1:]