From fbdb2f07ac86538bda66ad790a84ce601663b691 Mon Sep 17 00:00:00 2001 From: Wojtek Porczyk Date: Tue, 17 Mar 2015 16:45:00 +0100 Subject: [PATCH] qmemman: add logging --- qmemman/qmemman.py | 77 +++++++++++++++++++++++++++------- qmemman/qmemman_algo.py | 45 ++++++++++++++++---- qmemman/qmemman_server.py | 87 ++++++++++++++++++++++++++++++++------- 3 files changed, 170 insertions(+), 39 deletions(-) diff --git a/qmemman/qmemman.py b/qmemman/qmemman.py index 3c66bd11..4a4b0d0e 100755 --- a/qmemman/qmemman.py +++ b/qmemman/qmemman.py @@ -29,6 +29,8 @@ import qmemman_algo import os from notify import notify_error_qubes_manager, clear_error_qubes_manager +import logging + no_progress_msg="VM refused to give back requested memory" slow_memset_react_msg="VM didn't give back all requested memory" @@ -43,8 +45,11 @@ class DomainState: self.no_progress = False #no react to memset self.slow_memset_react = False #slow react to memset (after few tries still above target) -class SystemState: +class SystemState(object): def __init__(self): + self.log = logging.getLogger('qmemman.systemstate') + self.log.debug('SystemState()') + self.domdict = {} self.xc = xen.lowlevel.xc.xc() self.xs = xen.lowlevel.xs.xs() @@ -54,9 +59,11 @@ class SystemState: self.ALL_PHYS_MEM = self.xc.physinfo()['total_memory']*1024 def add_domain(self, id): + self.log.debug('add_domain(id={!r})'.format(id)) self.domdict[id] = DomainState(id) def del_domain(self, id): + self.log.debug('del_domain(id={!r})'.format(id)) self.domdict.pop(id) def get_free_xen_memory(self): @@ -104,7 +111,7 @@ class SystemState: #the below works (and is fast), but then 'xm list' shows unchanged memory value def mem_set(self, id, val): - print 'mem-set domain', id, 'to', val + self.log.info('mem-set domain {} to {}'.format(id, val)) self.domdict[id].last_target = val #can happen in the middle of domain shutdown #apparently xc.lowlevel throws exceptions too @@ -118,24 +125,29 @@ class SystemState: # this is called at the end of ballooning, when we have Xen free mem already # make sure that past mem_set will not decrease Xen free mem def inhibit_balloon_up(self): + self.log.debug('inhibit_balloon_up()') for i in self.domdict.keys(): dom = self.domdict[i] if dom.memory_actual is not None and dom.memory_actual + 200*1024 < dom.last_target: - print "Preventing balloon up to", dom.last_target + self.log.info( + 'Preventing balloon up to {}'.format(dom.last_target)) self.mem_set(i, dom.memory_actual) #perform memory ballooning, across all domains, to add "memsize" to Xen free memory def do_balloon(self, memsize): + self.log.info('do_balloon(memsize={!r})'.format(memsize)) MAX_TRIES = 20 niter = 0 prev_memory_actual = None + for i in self.domdict.keys(): self.domdict[i].no_progress = False - print "do_balloon start" + while True: + self.log.debug('niter={:2d}/{:2d}'.format(niter, MAX_TRIES)) self.refresh_memactual() xenfree = self.get_free_xen_memory() - print 'got xenfree=', xenfree + self.log.info('xenfree={!r}'.format(xenfree)) if xenfree >= memsize + self.XEN_FREE_MEM_MIN: self.inhibit_balloon_up() return True @@ -144,9 +156,9 @@ class SystemState: if prev_memory_actual[i] == self.domdict[i].memory_actual: #domain not responding to memset requests, remove it from donors self.domdict[i].no_progress = True - print 'domain', i, 'stuck at', self.domdict[i].memory_actual + self.log.info('domain {} stuck at {}'.format(i, self.domdict[i].memory_actual)) memset_reqs = qmemman_algo.balloon(memsize + self.XEN_FREE_MEM_LEFT - xenfree, self.domdict) - print 'requests:', memset_reqs + self.log.info('memset_reqs={!r}'.format(memset_reqs)) if niter > MAX_TRIES or len(memset_reqs) == 0: return False prev_memory_actual = {} @@ -154,42 +166,68 @@ class SystemState: dom, mem = i self.mem_set(dom, mem) prev_memory_actual[dom] = self.domdict[dom].memory_actual + self.log.debug('sleeping for {} s'.format(self.BALOON_DELAY)) time.sleep(self.BALOON_DELAY) niter = niter + 1 def refresh_meminfo(self, domid, untrusted_meminfo_key): - qmemman_algo.refresh_meminfo_for_domain(self.domdict[domid], untrusted_meminfo_key) + self.log.debug( + 'refresh_meminfo(domid={}, untrusted_meminfo_key={!r})'.format( + domid, untrusted_meminfo_key)) + + qmemman_algo.refresh_meminfo_for_domain( + self.domdict[domid], untrusted_meminfo_key) self.do_balance() #is the computed balance request big enough ? #so that we do not trash with small adjustments def is_balance_req_significant(self, memset_reqs, xenfree): + self.log.debug( + 'is_balance_req_significant(memset_reqs={}, xenfree={})'.format( + memset_reqs, xenfree)) + total_memory_transfer = 0 MIN_TOTAL_MEMORY_TRANSFER = 150*1024*1024 MIN_MEM_CHANGE_WHEN_UNDER_PREF = 15*1024*1024 + # If xenfree to low, return immediately if self.XEN_FREE_MEM_LEFT - xenfree > MIN_MEM_CHANGE_WHEN_UNDER_PREF: + self.log.debug('xenfree is too low, returning') return True + for rq in memset_reqs: dom, mem = rq last_target = self.domdict[dom].last_target memory_change = mem - last_target total_memory_transfer += abs(memory_change) pref = qmemman_algo.prefmem(self.domdict[dom]) + if last_target > 0 and last_target < pref and memory_change > MIN_MEM_CHANGE_WHEN_UNDER_PREF: - print 'dom', dom, 'is below pref, allowing balance' + self.log.info( + 'dom {} is below pref, allowing balance'.format(dom)) return True - return total_memory_transfer + abs(xenfree - self.XEN_FREE_MEM_LEFT) > MIN_TOTAL_MEMORY_TRANSFER + + ret = total_memory_transfer + abs(xenfree - self.XEN_FREE_MEM_LEFT) > MIN_TOTAL_MEMORY_TRANSFER + self.log.debug('is_balance_req_significant return {}'.format(ret)) + return ret + def print_stats(self, xenfree, memset_reqs): for i in self.domdict.keys(): if self.domdict[i].meminfo is not None: - print 'dom' , i, 'act/pref', self.domdict[i].memory_actual, qmemman_algo.prefmem(self.domdict[i]) - print 'xenfree=', xenfree, 'balance req:', memset_reqs + self.log.info('stat: dom {!r} act={} pref={}'.format(i, + self.domdict[i].memory_actual, + qmemman_algo.prefmem(self.domdict[i]))) + + self.log.info('stat: xenfree={} memset_reqs={}'.format(xenfree, memset_reqs)) + def do_balance(self): + self.log.debug('do_balance()') if os.path.isfile('/var/run/qubes/do-not-membalance'): + self.log.debug('do-not-membalance file preset, returning') return + self.refresh_memactual() self.clear_outdated_error_markers() xenfree = self.get_free_xen_memory() @@ -210,6 +248,8 @@ class SystemState: # If not - wait a little. ntries = 5 while self.get_free_xen_memory() - (mem - self.domdict[dom].memory_actual) < 0.9*self.XEN_FREE_MEM_LEFT: + self.log.debug('do_balance dom={!r} sleeping ntries={}'.format( + dom, ntries)) time.sleep(self.BALOON_DELAY) ntries -= 1 if ntries <= 0: @@ -225,13 +265,22 @@ class SystemState: if self.domdict[dom2].memory_actual > self.domdict[dom2].last_target + self.XEN_FREE_MEM_LEFT/4: # VM didn't react to memory request at all, remove from donors if prev_memactual[dom2] == self.domdict[dom2].memory_actual: - print 'dom %s didnt react to memory request (holds %d, requested balloon down to %d)' % (dom2, self.domdict[dom2].memory_actual, mem2) + self.log.warning( + 'dom {!r} didnt react to memory request' + ' (holds {}, requested balloon down to {})' + .format(dom2, + self.domdict[dom2].memory_actual, + mem2)) self.domdict[dom2].no_progress = True dom_name = self.xs.read('', '/local/domain/%s/name' % str(dom2)) if dom_name is not None: notify_error_qubes_manager(str(dom_name), no_progress_msg) else: - print 'dom %s still hold more memory than have assigned (%d > %d)' % (dom2, self.domdict[dom2].memory_actual, mem2) + self.log.warning('dom {!r} still hold more' + ' memory than have assigned ({} > {})' + .format(dom2, + self.domdict[dom2].memory_actual, + mem2)) self.domdict[dom2].slow_memset_react = True dom_name = self.xs.read('', '/local/domain/%s/name' % str(dom2)) if dom_name is not None: diff --git a/qmemman/qmemman_algo.py b/qmemman/qmemman_algo.py index e27f8827..7bd0960d 100755 --- a/qmemman/qmemman_algo.py +++ b/qmemman/qmemman_algo.py @@ -21,6 +21,7 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # +import logging import string # This are only defaults - can be overriden by QMemmanServer with values from @@ -29,6 +30,9 @@ CACHE_FACTOR = 1.3 MIN_PREFMEM = 200*1024*1024 DOM0_MEM_BOOST = 350*1024*1024 + +log = logging.getLogger('qmemman.daemon.algo') + #untrusted meminfo size is taken from xenstore key, thus its size is limited #so splits do not require excessive memory def parse_meminfo(untrusted_meminfo): @@ -44,8 +48,11 @@ def parse_meminfo(untrusted_meminfo): return untrusted_dict def is_meminfo_suspicious(domain, untrusted_meminfo): + log.debug('is_meminfo_suspicious(' + 'domain={!r}, untrusted_meminfo={!r})'.format( + domain, untrusted_meminfo)) ret = False - + #check whether the required keys exist and are not negative try: for i in ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree'): @@ -66,7 +73,9 @@ def is_meminfo_suspicious(domain, untrusted_meminfo): #it can be achieved with legal values, too, and it will not allow to #starve existing domains, by design if ret: - print 'suspicious meminfo for domain', domain.id, 'mem actual', domain.memory_actual, untrusted_meminfo + log.warning('suspicious meminfo for domain {!r}' + ' memory_actual={!r} untrusted_meminfo={!r}'.format(domain.id, + domain.memory_actual, untrusted_meminfo)) return ret #called when a domain updates its 'meminfo' xenstore key @@ -101,6 +110,8 @@ def memory_needed(domain): #to "xm memset" equivalent in order to obtain "memsize" of memory #return empty list when the request cannot be satisfied def balloon(memsize, domain_dictionary): + log.debug('balloon(memsize={!r}, domain_dictionary={!r})'.format( + memsize, domain_dictionary)) REQ_SAFETY_NET_FACTOR = 1.05 donors = list() request = list() @@ -112,17 +123,20 @@ def balloon(memsize, domain_dictionary): continue need = memory_needed(domain_dictionary[i]) if need < 0: - print 'balloon: dom' , i, 'has actual memory', domain_dictionary[i].memory_actual + log.info('balloon: dom {} has actual memory {}'.format(i, + domain_dictionary[i].memory_actual)) donors.append((i,-need)) available-=need - print 'req=', memsize, 'avail=', available, 'donors', donors + + log.info('req={} avail={} donors={!r}'.format(memsize, available, donors)) + if available 0 and acceptors_count > 0: - print ' left_memory:', left_memory, 'acceptors_count:', acceptors_count + log.info('left_memory={} acceptors_count={}'.format( + left_memory, acceptors_count)) + new_left_memory = 0 new_acceptors_count = acceptors_count for i in target_memory.keys(): @@ -187,9 +207,14 @@ def balance_when_enough_memory(domain_dictionary, xen_free_memory, total_mem_pre # print 'balance(enough): xen_free_memory=', xen_free_memory, 'requests:', donors_rq + acceptors_rq return donors_rq + acceptors_rq + #when not enough mem to make everyone be above prefmem, make donors be at prefmem, and #redistribute anything left between acceptors -def balance_when_low_on_memory(domain_dictionary, xen_free_memory, total_mem_pref_acceptors, donors, acceptors): +def balance_when_low_on_memory(domain_dictionary, + xen_free_memory, total_mem_pref_acceptors, donors, acceptors): + log.debug('balance_when_low_on_memory(xen_free_memory={!r}, ' + 'total_mem_pref_acceptors={!r}, donors={!r}, acceptors={!r})'.format( + xen_free_memory, total_mem_pref_acceptors, donors, acceptors)) donors_rq = list() acceptors_rq = list() squeezed_mem = xen_free_memory @@ -218,6 +243,8 @@ def balance_when_low_on_memory(domain_dictionary, xen_free_memory, total_mem_pre #return the list of (domain, memory_target) pairs to be passed to #"xm memset" equivalent def balance(xen_free_memory, domain_dictionary): + log.debug('balance(xen_free_memory={!r}, domain_dictionary={!r})'.format( + xen_free_memory, domain_dictionary)) #sum of all memory requirements - in other words, the difference between #memory required to be added to domains (acceptors) to make them be at their diff --git a/qmemman/qmemman_server.py b/qmemman/qmemman_server.py index 4a0a4a92..cf1bb2fb 100755 --- a/qmemman/qmemman_server.py +++ b/qmemman/qmemman_server.py @@ -33,6 +33,9 @@ from ConfigParser import SafeConfigParser from optparse import OptionParser from qubesutils import parse_size +import logging +import logging.handlers + config_path = '/etc/qubes/qmemman.conf' SOCK_PATH='/var/run/qubes/qmemman.sock' LOG_PATH='/var/log/qubes/qmemman.log' @@ -58,42 +61,68 @@ class WatchType: class XS_Watcher: def __init__(self): + self.log = logging.getLogger('qmemman.daemon.xswatcher') + self.log.debug('XS_Watcher()') + self.handle = xen.lowlevel.xs.xs() self.handle.watch('@introduceDomain', WatchType(XS_Watcher.domain_list_changed, None)) self.handle.watch('@releaseDomain', WatchType(XS_Watcher.domain_list_changed, None)) self.watch_token_dict = {} + def domain_list_changed(self, param): + self.log.debug('domain_list_changed(param={!r})'.format(param)) + curr = self.handle.ls('', '/local/domain') + self.log.debug('curr={!r}'.format(curr)) + if curr == None: return + + self.log.debug('acquiring global_lock') global_lock.acquire() + self.log.debug('global_lock acquired') + for i in only_in_first_list(curr, self.watch_token_dict.keys()): #new domain has been created watch = WatchType(XS_Watcher.meminfo_changed, i) self.watch_token_dict[i] = watch self.handle.watch(get_domain_meminfo_key(i), watch) system_state.add_domain(i) + for i in only_in_first_list(self.watch_token_dict.keys(), curr): #domain destroyed self.handle.unwatch(get_domain_meminfo_key(i), self.watch_token_dict[i]) self.watch_token_dict.pop(i) system_state.del_domain(i) + global_lock.release() + self.log.debug('global_lock released') + system_state.do_balance() + def meminfo_changed(self, domain_id): + self.log.debug('meminfo_changed(domain_id={!r})'.format(domain_id)) untrusted_meminfo_key = self.handle.read('', get_domain_meminfo_key(domain_id)) if untrusted_meminfo_key == None or untrusted_meminfo_key == '': return + + self.log.debug('acquiring global_lock') global_lock.acquire() + self.log.debug('global_lock acquired') + system_state.refresh_meminfo(domain_id, untrusted_meminfo_key) + global_lock.release() + self.log.debug('global_lock released') + def watch_loop(self): -# sys.stderr = file('/var/log/qubes/qfileexchgd.errors', 'a') + self.log.debug('watch_loop()') while True: result = self.handle.read_watch() + self.log.debug('watch_loop result={!r}'.format(result)) token = result[1] token.fn(self, token.param) @@ -108,26 +137,39 @@ class QMemmanReqHandler(SocketServer.BaseRequestHandler): """ def handle(self): + self.log = logging.getLogger('qmemman.daemon.reqhandler') + got_lock = False # self.request is the TCP socket connected to the client while True: self.data = self.request.recv(1024).strip() + self.log.debug('data={!r}'.format(self.data)) if len(self.data) == 0: - print 'EOF' + self.log.info('EOF') if got_lock: global_lock.release() + self.log.debug('global_lock released') return + + # XXX something is wrong here: return without release? if got_lock: - print 'Second request over qmemman.sock ?' + self.log.warning('Second request over qmemman.sock?') return + + self.log.debug('acquiring global_lock') global_lock.acquire() + self.log.debug('global_lock acquired') + got_lock = True if system_state.do_balloon(int(self.data)): resp = "OK\n" else: resp = "FAIL\n" + self.log.debug('resp={!r}'.format(resp)) self.request.send(resp) + # XXX no release of lock? + def start_server(server): server.serve_forever() @@ -135,23 +177,29 @@ def start_server(server): class QMemmanServer: @staticmethod def main(): + # setup logging + ha_syslog = logging.handlers.SysLogHandler('/dev/log') + ha_syslog.setFormatter( + logging.Formatter('%(name)s[%(process)d]: %(message)s')) + logging.root.addHandler(ha_syslog) + + # leave log for backwards compatibility + ha_file = logging.FileHandler(LOG_PATH) + ha_file.setFormatter( + logging.Formatter('%(asctime)s %(name)s[%(process)d]: %(message)s')) + logging.root.addHandler(ha_file) + + log = logging.getLogger('qmemman.daemon') + usage = "usage: %prog [options]" parser = OptionParser(usage) parser.add_option("-c", "--config", action="store", dest="config", default=config_path) (options, args) = parser.parse_args() - logfd = os.open(LOG_PATH, os.O_WRONLY|os.O_APPEND|os.O_CREAT, 0644) - if logfd < 0: - print sys.stderr, "ERROR: Failed to open log file (%s)" % LOG_PATH - exit(1) - # reinitialize python stdout/err - sys.stdout.flush() - sys.stderr.flush() - os.dup2(logfd, 1) - os.dup2(logfd, 2) - os.close(logfd) - devnull = os.open('/dev/null', os.O_RDONLY) - os.dup2(devnull, 0) + # close io + sys.stdin.close() + sys.stdout.close() + sys.stderr.close() config = SafeConfigParser({ 'vm-min-mem': str(qmemman_algo.MIN_PREFMEM), @@ -164,18 +212,25 @@ class QMemmanServer: qmemman_algo.DOM0_MEM_BOOST = parse_size(config.get('global', 'dom0-mem-boost')) qmemman_algo.CACHE_FACTOR = config.getfloat('global', 'cache-margin-factor') - print "values: %s, %s, %s" % (str(qmemman_algo.MIN_PREFMEM), str(qmemman_algo.DOM0_MEM_BOOST), str(qmemman_algo.CACHE_FACTOR)) + log.info('MIN_PREFMEM={qmemman_algo.MIN_PREFMEM}' + ' DOM0_MEM_BOOST={qmemman_algo.DOM0_MEM_BOOST}' + ' CACHE_FACTOR={qmemman_algo.CACHE_FACTOR}'.format( + qmemman_algo=qmemman_algo)) try: os.unlink(SOCK_PATH) except: pass + + log.debug('instantiating server') os.umask(0) server = SocketServer.UnixStreamServer(SOCK_PATH, QMemmanReqHandler) os.umask(077) + # notify systemd nofity_socket = os.getenv('NOTIFY_SOCKET') if nofity_socket: + log.debug('notifying systemd') s = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) if nofity_socket.startswith('@'): nofity_socket = '\0%s' % nofity_socket[1:]