core-admin/qubes/qmemman/__init__.py
Marek Marczykowski-Górecki 4bca631350
qmemman: request VMs balloon down with 16MB safety margin
It looks like Linux balloon driver do not always precisely respect
requested target memory, but perform some rounding. Also, in some cases
(HVM domains), VM do not see all the memory that Xen have assigned to it
- there are some additional Xen pools for internal usage.
Include 16MB safety margin in memory requests to account for those two
things. This will avoid setting "no_response" flag for most of VMs.

QubesOS/qubes-issues#3265
2018-01-11 03:41:55 +01:00

364 lines
16 KiB
Python

# pylint: skip-file
#
# The Qubes OS Project, http://www.qubes-os.org
#
# Copyright (C) 2010 Rafal Wojtczuk <rafal@invisiblethingslab.com>
# Copyright (C) 2013 Marek Marczykowski <marmarek@invisiblethingslab.com>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <https://www.gnu.org/licenses/>.
#
import logging
import os
import string
import time
import functools
import xen.lowlevel.xc
import xen.lowlevel.xs
import qubes.qmemman.algo
no_progress_msg="VM refused to give back requested memory"
slow_memset_react_msg="VM didn't give back all requested memory"
class DomainState:
def __init__(self, id):
self.memory_current = 0 # the current memory size
self.memory_actual = None # the current memory allocation (what VM
# is using or can use at any time)
self.memory_maximum = None # the maximum memory size
self.mem_used = None # used memory, computed based on meminfo
self.id = id # domain id
self.last_target = 0 # the last memset target
self.no_progress = False # no react to memset
self.slow_memset_react = False # slow react to memset (after few
# tries still above target)
def __repr__(self):
return self.__dict__.__repr__()
class SystemState(object):
def __init__(self):
self.log = logging.getLogger('qmemman.systemstate')
self.log.debug('SystemState()')
self.domdict = {}
self.xc = xen.lowlevel.xc.xc()
self.xs = xen.lowlevel.xs.xs()
self.BALOON_DELAY = 0.1
self.XEN_FREE_MEM_LEFT = 50*1024*1024
self.XEN_FREE_MEM_MIN = 25*1024*1024
# Overhead of per-page Xen structures, taken from OpenStack
# nova/virt/xenapi/driver.py
# see https://wiki.openstack.org/wiki/XenServer/Overhead
# we divide total and free physical memory by this to get
# "assignable" memory
self.MEM_OVERHEAD_FACTOR = 1.0 / 1.00781
try:
self.ALL_PHYS_MEM = int(self.xc.physinfo()['total_memory']*1024 * self.MEM_OVERHEAD_FACTOR)
except xen.lowlevel.xc.Error:
self.ALL_PHYS_MEM = 0
def add_domain(self, id):
self.log.debug('add_domain(id={!r})'.format(id))
self.domdict[id] = DomainState(id)
# TODO: move to DomainState.__init__
target_str = self.xs.read('', '/local/domain/' + id + '/memory/target')
if target_str:
self.domdict[id].last_target = int(target_str) * 1024
def del_domain(self, id):
self.log.debug('del_domain(id={!r})'.format(id))
self.domdict.pop(id)
def get_free_xen_memory(self):
xen_free = int(self.xc.physinfo()['free_memory']*1024 *
self.MEM_OVERHEAD_FACTOR)
# now check for domains which have assigned more memory than really
# used - do not count it as "free", because domain is free to use it
# at any time
# assumption: self.refresh_memactual was called before
# (so domdict[id].memory_actual is up to date)
assigned_but_unused = functools.reduce(
lambda acc, dom: acc + max(0, dom.last_target-dom.memory_current),
self.domdict.values(),
0
)
# If, at any time, Xen have less memory than XEN_FREE_MEM_MIN,
# it is a failure of qmemman. Collect as much data as possible to
# debug it
if xen_free < self.XEN_FREE_MEM_MIN:
self.log.error("Xen free = {!r} below acceptable value! "
"assigned_but_unused={!r}, domdict={!r}".format(
xen_free, assigned_but_unused, self.domdict))
elif xen_free < assigned_but_unused+self.XEN_FREE_MEM_MIN:
self.log.error("Xen free = {!r} too small for satisfy assignments! "
"assigned_but_unused={!r}, domdict={!r}".format(
xen_free, assigned_but_unused, self.domdict))
return xen_free - assigned_but_unused
# refresh information on memory assigned to all domains
def refresh_memactual(self):
for domain in self.xc.domain_getinfo():
id = str(domain['domid'])
if id in self.domdict:
# real memory usage
self.domdict[id].memory_current = domain['mem_kb']*1024
# what VM is using or can use
self.domdict[id].memory_actual = max(
self.domdict[id].memory_current,
self.domdict[id].last_target
)
self.domdict[id].memory_maximum = self.xs.read('', '/local/domain/%s/memory/static-max' % str(id))
if self.domdict[id].memory_maximum:
self.domdict[id].memory_maximum = int(self.domdict[id].memory_maximum)*1024
else:
self.domdict[id].memory_maximum = self.ALL_PHYS_MEM
# the previous line used to be
# self.domdict[id].memory_maximum = domain[
# 'maxmem_kb']*1024
# but domain['maxmem_kb'] changes in self.mem_set as well,
# and this results in the memory never increasing
# in fact, the only possible case of nonexisting
# memory/static-max is dom0
# see #307
def clear_outdated_error_markers(self):
# Clear outdated errors
for i in self.domdict.keys():
# clear markers excluding VM from memory balance, if:
# - VM have responded to previous request (with some safety margin)
# - VM request more memory than it has assigned
# The second condition avoids starving a VM, even when there is
# some free memory available
if self.domdict[i].memory_actual <= \
self.domdict[i].last_target + self.XEN_FREE_MEM_LEFT/2 or \
self.domdict[i].memory_actual < \
qubes.qmemman.algo.prefmem(self.domdict[i]):
self.domdict[i].slow_memset_react = False
self.domdict[i].no_progress = False
# the below works (and is fast), but then 'xm list' shows unchanged
# memory value
def mem_set(self, id, val):
self.log.info('mem-set domain {} to {}'.format(id, val))
self.domdict[id].last_target = val
# can happen in the middle of domain shutdown
# apparently xc.lowlevel throws exceptions too
try:
self.xc.domain_setmaxmem(int(id), int(val/1024) + 1024) # LIBXL_MAXMEM_CONSTANT=1024
self.xc.domain_set_target_mem(int(id), int(val / 1024))
except:
pass
# VM sees about 16MB memory less, so adjust for it here - qmemman
# handle Xen view of memory
self.xs.write('', '/local/domain/' + id + '/memory/target',
str(int(val/1024 - 16 * 1024)))
# this is called at the end of ballooning, when we have Xen free mem already
# make sure that past mem_set will not decrease Xen free mem
def inhibit_balloon_up(self):
self.log.debug('inhibit_balloon_up()')
for i in self.domdict.keys():
dom = self.domdict[i]
if dom.memory_actual is not None and dom.memory_actual + 200*1024 < dom.last_target:
self.log.info(
'Preventing balloon up to {}'.format(dom.last_target))
self.mem_set(i, dom.memory_actual)
# perform memory ballooning, across all domains, to add "memsize" to Xen
# free memory
def do_balloon(self, memsize):
self.log.info('do_balloon(memsize={!r})'.format(memsize))
CHECK_PERIOD_S = 3
CHECK_MB_S = 100
niter = 0
prev_memory_actual = None
for i in self.domdict.keys():
self.domdict[i].no_progress = False
#: number of loop iterations for CHECK_PERIOD_S seconds
check_period = max(1, int((CHECK_PERIOD_S + 0.0) / self.BALOON_DELAY))
#: number of free memory bytes expected to get during CHECK_PERIOD_S
#: seconds
check_delta = CHECK_PERIOD_S * CHECK_MB_S * 1024 * 1024
#: helper array for holding free memory size, CHECK_PERIOD_S seconds
#: ago, at every loop iteration
xenfree_ring = [0] * check_period
while True:
self.log.debug('niter={:2d}'.format(niter))
self.refresh_memactual()
xenfree = self.get_free_xen_memory()
self.log.info('xenfree={!r}'.format(xenfree))
if xenfree >= memsize + self.XEN_FREE_MEM_MIN:
self.inhibit_balloon_up()
return True
# fail the request if over past CHECK_PERIOD_S seconds,
# we got less than CHECK_MB_S MB/s on average
ring_slot = niter % check_period
if niter >= check_period and xenfree < xenfree_ring[ring_slot] + check_delta:
return False
xenfree_ring[ring_slot] = xenfree
if prev_memory_actual is not None:
for i in prev_memory_actual.keys():
if prev_memory_actual[i] == self.domdict[i].memory_actual:
# domain not responding to memset requests, remove it
# from donors
self.domdict[i].no_progress = True
self.log.info('domain {} stuck at {}'.format(i, self.domdict[i].memory_actual))
memset_reqs = qubes.qmemman.algo.balloon(memsize + self.XEN_FREE_MEM_LEFT - xenfree, self.domdict)
self.log.info('memset_reqs={!r}'.format(memset_reqs))
if len(memset_reqs) == 0:
return False
prev_memory_actual = {}
for i in memset_reqs:
dom, mem = i
self.mem_set(dom, mem)
prev_memory_actual[dom] = self.domdict[dom].memory_actual
self.log.debug('sleeping for {} s'.format(self.BALOON_DELAY))
time.sleep(self.BALOON_DELAY)
niter = niter + 1
def refresh_meminfo(self, domid, untrusted_meminfo_key):
self.log.debug(
'refresh_meminfo(domid={}, untrusted_meminfo_key={!r})'.format(
domid, untrusted_meminfo_key))
qubes.qmemman.algo.refresh_meminfo_for_domain(
self.domdict[domid], untrusted_meminfo_key)
self.do_balance()
# is the computed balance request big enough ?
# so that we do not trash with small adjustments
def is_balance_req_significant(self, memset_reqs, xenfree):
self.log.debug(
'is_balance_req_significant(memset_reqs={}, xenfree={})'.format(
memset_reqs, xenfree))
total_memory_transfer = 0
MIN_TOTAL_MEMORY_TRANSFER = 150*1024*1024
MIN_MEM_CHANGE_WHEN_UNDER_PREF = 15*1024*1024
# If xenfree to low, return immediately
if self.XEN_FREE_MEM_LEFT - xenfree > MIN_MEM_CHANGE_WHEN_UNDER_PREF:
self.log.debug('xenfree is too low, returning')
return True
for rq in memset_reqs:
dom, mem = rq
last_target = self.domdict[dom].last_target
memory_change = mem - last_target
total_memory_transfer += abs(memory_change)
pref = qubes.qmemman.algo.prefmem(self.domdict[dom])
if 0 < last_target < pref and \
memory_change > MIN_MEM_CHANGE_WHEN_UNDER_PREF:
self.log.info(
'dom {} is below pref, allowing balance'.format(dom))
return True
ret = total_memory_transfer + abs(xenfree - self.XEN_FREE_MEM_LEFT) > MIN_TOTAL_MEMORY_TRANSFER
self.log.debug('is_balance_req_significant return {}'.format(ret))
return ret
def print_stats(self, xenfree, memset_reqs):
for i in self.domdict.keys():
if self.domdict[i].mem_used is not None:
self.log.info('stat: dom {!r} act={} pref={} last_target={}'
'{}{}'.format(i,
self.domdict[i].memory_actual,
qubes.qmemman.algo.prefmem(self.domdict[i]),
self.domdict[i].last_target,
' no_progress' if self.domdict[i].no_progress else '',
' slow_memset_react'
if self.domdict[i].slow_memset_react else ''))
self.log.info('stat: xenfree={} memset_reqs={}'.format(xenfree, memset_reqs))
def do_balance(self):
self.log.debug('do_balance()')
if os.path.isfile('/var/run/qubes/do-not-membalance'):
self.log.debug('do-not-membalance file preset, returning')
return
self.refresh_memactual()
self.clear_outdated_error_markers()
xenfree = self.get_free_xen_memory()
memset_reqs = qubes.qmemman.algo.balance(xenfree - self.XEN_FREE_MEM_LEFT, self.domdict)
if not self.is_balance_req_significant(memset_reqs, xenfree):
return
self.print_stats(xenfree, memset_reqs)
prev_memactual = {}
for i in self.domdict.keys():
prev_memactual[i] = self.domdict[i].memory_actual
for rq in memset_reqs:
dom, mem = rq
# Force to always have at least 0.9*self.XEN_FREE_MEM_LEFT (some
# margin for rounding errors). Before giving memory to
# domain, ensure that others have gave it back.
# If not - wait a little.
ntries = 5
while self.get_free_xen_memory() - (mem - self.domdict[dom].memory_actual) < 0.9*self.XEN_FREE_MEM_LEFT:
self.log.debug('do_balance dom={!r} sleeping ntries={}'.format(
dom, ntries))
time.sleep(self.BALOON_DELAY)
self.refresh_memactual()
ntries -= 1
if ntries <= 0:
# Waiting haven't helped; Find which domain get stuck and
# abort balance (after distributing what we have)
for rq2 in memset_reqs:
dom2, mem2 = rq2
if dom2 == dom:
# All donors have been processed
break
# allow some small margin
if self.domdict[dom2].memory_actual > self.domdict[dom2].last_target + self.XEN_FREE_MEM_LEFT/4:
# VM didn't react to memory request at all,
# remove from donors
if prev_memactual[dom2] == self.domdict[dom2].memory_actual:
self.log.warning(
'dom {!r} didnt react to memory request'
' (holds {}, requested balloon down to {})'
.format(dom2,
self.domdict[dom2].memory_actual,
mem2))
self.domdict[dom2].no_progress = True
else:
self.log.warning('dom {!r} still hold more'
' memory than have assigned ({} > {})'
.format(dom2,
self.domdict[dom2].memory_actual,
mem2))
self.domdict[dom2].slow_memset_react = True
self.mem_set(dom, self.get_free_xen_memory() + self.domdict[dom].memory_actual - self.XEN_FREE_MEM_LEFT)
return
self.mem_set(dom, mem)
# for i in self.domdict.keys():
# print 'domain ', i, ' meminfo=', self.domdict[i].mem_used, 'actual mem', self.domdict[i].memory_actual
# print 'domain ', i, 'actual mem', self.domdict[i].memory_actual
# print 'xen free mem', self.get_free_xen_memory()