reflink.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. #
  2. # The Qubes OS Project, https://www.qubes-os.org/
  3. #
  4. # Copyright (C) 2018 Rusty Bird <rustybird@net-c.com>
  5. #
  6. # This library is free software; you can redistribute it and/or
  7. # modify it under the terms of the GNU Lesser General Public
  8. # License as published by the Free Software Foundation; either
  9. # version 2.1 of the License, or (at your option) any later version.
  10. #
  11. # This library is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. # Lesser General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU Lesser General Public
  17. # License along with this library; if not, see <https://www.gnu.org/licenses/>.
  18. #
  19. ''' Driver for handling VM images as files, without any device-mapper
  20. involvement. A reflink-capable filesystem is strongly recommended,
  21. but not required.
  22. '''
  23. import asyncio
  24. import collections
  25. import errno
  26. import fcntl
  27. import functools
  28. import glob
  29. import logging
  30. import os
  31. import subprocess
  32. import tempfile
  33. import threading
  34. from contextlib import contextmanager, suppress
  35. import qubes.storage
  36. FICLONE = 1074041865 # defined in <linux/fs.h>, assuming sizeof(int)==4
  37. LOOP_SET_CAPACITY = 0x4C07 # defined in <linux/loop.h>
  38. LOGGER = logging.getLogger('qubes.storage.reflink')
  39. def _coroutinized(function):
  40. ''' Decorator transforming a synchronous function into a coroutine
  41. that runs the function in the event loop's thread-based
  42. default executor.
  43. '''
  44. @asyncio.coroutine
  45. @functools.wraps(function)
  46. def wrapper(*args, **kwargs):
  47. return (yield from asyncio.get_event_loop().run_in_executor(
  48. None, functools.partial(function, *args, **kwargs)))
  49. return wrapper
  50. class ReflinkPool(qubes.storage.Pool):
  51. driver = 'file-reflink'
  52. _known_dir_path_prefixes = ['appvms', 'vm-templates']
  53. def __init__(self, *, name, revisions_to_keep=1,
  54. dir_path, setup_check=True):
  55. super().__init__(name=name, revisions_to_keep=revisions_to_keep)
  56. self._setup_check = qubes.property.bool(None, None, setup_check)
  57. self._volumes = {}
  58. self.dir_path = os.path.abspath(dir_path)
  59. @_coroutinized
  60. def setup(self):
  61. created = _make_dir(self.dir_path)
  62. if self._setup_check and not is_supported(self.dir_path):
  63. if created:
  64. _remove_empty_dir(self.dir_path)
  65. raise qubes.storage.StoragePoolException(
  66. 'The filesystem for {!r} does not support reflinks. If you'
  67. ' can live with VM startup delays and wasted disk space, pass'
  68. ' the "setup_check=False" option.'.format(self.dir_path))
  69. for dir_path_prefix in self._known_dir_path_prefixes:
  70. _make_dir(os.path.join(self.dir_path, dir_path_prefix))
  71. return self
  72. def init_volume(self, vm, volume_config):
  73. # Fail closed on any strange VM dir_path_prefix, just in case
  74. # /etc/udev/rules.d/00-qubes-ignore-devices.rules needs update
  75. assert vm.dir_path_prefix in self._known_dir_path_prefixes, \
  76. 'Unknown dir_path_prefix {!r}'.format(vm.dir_path_prefix)
  77. volume_config['pool'] = self
  78. if 'revisions_to_keep' not in volume_config:
  79. volume_config['revisions_to_keep'] = self.revisions_to_keep
  80. if 'vid' not in volume_config:
  81. volume_config['vid'] = os.path.join(vm.dir_path_prefix, vm.name,
  82. volume_config['name'])
  83. volume = ReflinkVolume(**volume_config)
  84. self._volumes[volume_config['vid']] = volume
  85. return volume
  86. def list_volumes(self):
  87. return list(self._volumes.values())
  88. def get_volume(self, vid):
  89. return self._volumes[vid]
  90. def destroy(self):
  91. pass
  92. @property
  93. def config(self):
  94. return {
  95. 'name': self.name,
  96. 'dir_path': self.dir_path,
  97. 'driver': ReflinkPool.driver,
  98. 'revisions_to_keep': self.revisions_to_keep
  99. }
  100. @property
  101. def size(self):
  102. statvfs = os.statvfs(self.dir_path)
  103. return statvfs.f_frsize * statvfs.f_blocks
  104. @property
  105. def usage(self):
  106. statvfs = os.statvfs(self.dir_path)
  107. return statvfs.f_frsize * (statvfs.f_blocks - statvfs.f_bfree)
  108. def included_in(self, app):
  109. ''' Check if there is pool containing this one - either as a
  110. filesystem or its LVM volume'''
  111. return qubes.storage.search_pool_containing_dir(
  112. [pool for pool in app.pools.values() if pool is not self],
  113. self.dir_path)
  114. def _locked(method):
  115. ''' Decorator transforming a synchronous volume method to run
  116. under the volume lock.
  117. '''
  118. @functools.wraps(method)
  119. def wrapper(self, *args, **kwargs):
  120. with self._lock: # pylint: disable=protected-access
  121. return method(self, *args, **kwargs)
  122. return wrapper
  123. class ReflinkVolume(qubes.storage.Volume):
  124. def __init__(self, *args, **kwargs):
  125. super().__init__(*args, **kwargs)
  126. self._lock = threading.Lock()
  127. self._path_vid = os.path.join(self.pool.dir_path, self.vid)
  128. self._path_clean = self._path_vid + '.img'
  129. self._path_dirty = self._path_vid + '-dirty.img'
  130. self._path_import = self._path_vid + '-import.img'
  131. self.path = self._path_dirty
  132. @_coroutinized
  133. @_locked
  134. def create(self):
  135. self._remove_all_images()
  136. if self.save_on_stop and not self.snap_on_start:
  137. _create_sparse_file(self._path_clean, self._size)
  138. return self
  139. @_coroutinized
  140. def verify(self):
  141. if self.snap_on_start:
  142. img = self.source._path_clean # pylint: disable=protected-access
  143. elif self.save_on_stop:
  144. img = self._path_clean
  145. else:
  146. img = None
  147. if img is None or os.path.exists(img):
  148. return True
  149. raise qubes.storage.StoragePoolException(
  150. 'Missing image file {!r} for volume {}'.format(img, self.vid))
  151. @_coroutinized
  152. @_locked
  153. def remove(self):
  154. self.pool._volumes.pop(self, None) # pylint: disable=protected-access
  155. self._remove_all_images()
  156. _remove_empty_dir(os.path.dirname(self._path_vid))
  157. return self
  158. def _remove_all_images(self):
  159. self._remove_incomplete_images()
  160. self._prune_revisions(keep=0)
  161. _remove_file(self._path_clean)
  162. _remove_file(self._path_dirty)
  163. def _remove_incomplete_images(self):
  164. for tmp in glob.iglob(glob.escape(self._path_vid) + '*.img*~*'):
  165. _remove_file(tmp)
  166. _remove_file(self._path_import)
  167. def is_outdated(self):
  168. if self.snap_on_start:
  169. with suppress(FileNotFoundError):
  170. # pylint: disable=protected-access
  171. return (os.path.getmtime(self.source._path_clean) >
  172. os.path.getmtime(self._path_clean))
  173. return False
  174. def is_dirty(self):
  175. return self.save_on_stop and os.path.exists(self._path_dirty)
  176. @_coroutinized
  177. @_locked
  178. def start(self):
  179. self._remove_incomplete_images()
  180. if not self.is_dirty():
  181. if self.snap_on_start:
  182. # pylint: disable=protected-access
  183. _copy_file(self.source._path_clean, self._path_clean)
  184. if self.snap_on_start or self.save_on_stop:
  185. _copy_file(self._path_clean, self._path_dirty)
  186. else:
  187. # Preferably use the size of a leftover image, in case
  188. # the volume was previously resized - but then a crash
  189. # prevented qubes.xml serialization of the new size.
  190. _create_sparse_file(self._path_dirty, self._get_size())
  191. return self
  192. @_coroutinized
  193. @_locked
  194. def stop(self):
  195. if self.save_on_stop:
  196. self._commit(self._path_dirty)
  197. else:
  198. if not self.snap_on_start:
  199. self._get_size() # preserve manual resize of image
  200. _remove_file(self._path_dirty)
  201. _remove_file(self._path_clean)
  202. return self
  203. def _commit(self, path_from):
  204. self._add_revision()
  205. self._prune_revisions()
  206. _fsync_path(path_from)
  207. _rename_file(path_from, self._path_clean)
  208. def _add_revision(self):
  209. if self.revisions_to_keep == 0:
  210. return
  211. ctime = os.path.getctime(self._path_clean)
  212. timestamp = qubes.storage.isodate(int(ctime))
  213. _copy_file(self._path_clean,
  214. self._path_revision(self._next_revision_number, timestamp))
  215. def _prune_revisions(self, keep=None):
  216. if keep is None:
  217. keep = self.revisions_to_keep
  218. # pylint: disable=invalid-unary-operand-type
  219. for number, timestamp in list(self.revisions.items())[:-keep or None]:
  220. _remove_file(self._path_revision(number, timestamp))
  221. @_coroutinized
  222. @_locked
  223. def revert(self, revision=None):
  224. if self.is_dirty():
  225. raise qubes.storage.StoragePoolException(
  226. 'Cannot revert: {} is not cleanly stopped'.format(self.vid))
  227. if revision is None:
  228. number, timestamp = list(self.revisions.items())[-1]
  229. else:
  230. number, timestamp = revision, None
  231. path_revision = self._path_revision(number, timestamp)
  232. self._add_revision()
  233. _rename_file(path_revision, self._path_clean)
  234. return self
  235. @_coroutinized
  236. @_locked
  237. def resize(self, size):
  238. ''' Resize a read-write volume; notify any corresponding loop
  239. devices of the size change.
  240. '''
  241. if not self.rw:
  242. raise qubes.storage.StoragePoolException(
  243. 'Cannot resize: {} is read-only'.format(self.vid))
  244. for path in (self._path_dirty, self._path_clean):
  245. with suppress(FileNotFoundError):
  246. _resize_file(path, size)
  247. break
  248. self._size = size
  249. if path == self._path_dirty:
  250. _update_loopdev_sizes(self._path_dirty)
  251. return self
  252. def export(self):
  253. if not self.save_on_stop:
  254. raise NotImplementedError(
  255. 'Cannot export: {} is not save_on_stop'.format(self.vid))
  256. return self._path_clean
  257. @_coroutinized
  258. @_locked
  259. def import_data(self, size):
  260. if not self.save_on_stop:
  261. raise NotImplementedError(
  262. 'Cannot import_data: {} is not save_on_stop'.format(self.vid))
  263. _create_sparse_file(self._path_import, size)
  264. return self._path_import
  265. def _import_data_end(self, success):
  266. (self._commit if success else _remove_file)(self._path_import)
  267. return self
  268. import_data_end = _coroutinized(_locked(_import_data_end))
  269. @_coroutinized
  270. @_locked
  271. def import_volume(self, src_volume):
  272. if self.save_on_stop:
  273. try:
  274. success = False
  275. _copy_file(src_volume.export(), self._path_import)
  276. success = True
  277. finally:
  278. self._import_data_end(success)
  279. return self
  280. def _path_revision(self, number, timestamp=None):
  281. if timestamp is None:
  282. timestamp = self.revisions[number]
  283. return self._path_clean + '.' + number + '@' + timestamp + 'Z'
  284. @property
  285. def _next_revision_number(self):
  286. numbers = self.revisions.keys()
  287. if numbers:
  288. return str(int(list(numbers)[-1]) + 1)
  289. return '1'
  290. @property
  291. def revisions(self):
  292. prefix = self._path_clean + '.'
  293. paths = glob.iglob(glob.escape(prefix) + '*@*Z')
  294. items = (path[len(prefix):-1].split('@') for path in paths)
  295. return collections.OrderedDict(sorted(items,
  296. key=lambda item: int(item[0])))
  297. def _get_size(self):
  298. for path in (self._path_dirty, self._path_clean):
  299. with suppress(FileNotFoundError):
  300. self._size = os.path.getsize(path)
  301. break
  302. return self._size
  303. size = property(_locked(_get_size))
  304. @property
  305. def usage(self):
  306. ''' Return volume disk usage from the VM's perspective. It is
  307. usually much lower from the host's perspective due to CoW.
  308. '''
  309. for path in (self._path_dirty, self._path_clean):
  310. with suppress(FileNotFoundError):
  311. return os.stat(path).st_blocks * 512
  312. return 0
  313. @contextmanager
  314. def _replace_file(dst):
  315. ''' Yield a tempfile whose name starts with dst, creating the last
  316. directory component if necessary. If the block does not raise
  317. an exception, safely rename the tempfile to dst.
  318. '''
  319. tmp_dir, prefix = os.path.split(dst + '~')
  320. _make_dir(tmp_dir)
  321. tmp = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix=prefix, delete=False)
  322. try:
  323. yield tmp
  324. tmp.flush()
  325. os.fsync(tmp.fileno())
  326. tmp.close()
  327. _rename_file(tmp.name, dst)
  328. except:
  329. tmp.close()
  330. _remove_file(tmp.name)
  331. raise
  332. def _fsync_path(path):
  333. fd = os.open(path, os.O_RDONLY) # works for a file or a directory
  334. try:
  335. os.fsync(fd)
  336. finally:
  337. os.close(fd)
  338. def _make_dir(path):
  339. ''' mkdir path, ignoring FileExistsError; return whether we
  340. created it.
  341. '''
  342. with suppress(FileExistsError):
  343. os.mkdir(path)
  344. _fsync_path(os.path.dirname(path))
  345. LOGGER.info('Created directory: %s', path)
  346. return True
  347. return False
  348. def _remove_file(path):
  349. with suppress(FileNotFoundError):
  350. os.remove(path)
  351. _fsync_path(os.path.dirname(path))
  352. LOGGER.info('Removed file: %s', path)
  353. def _remove_empty_dir(path):
  354. try:
  355. os.rmdir(path)
  356. _fsync_path(os.path.dirname(path))
  357. LOGGER.info('Removed empty directory: %s', path)
  358. except OSError as ex:
  359. if ex.errno not in (errno.ENOENT, errno.ENOTEMPTY):
  360. raise
  361. def _rename_file(src, dst):
  362. os.rename(src, dst)
  363. dst_dir = os.path.dirname(dst)
  364. src_dir = os.path.dirname(src)
  365. _fsync_path(dst_dir)
  366. if src_dir != dst_dir:
  367. _fsync_path(src_dir)
  368. LOGGER.info('Renamed file: %s -> %s', src, dst)
  369. def _resize_file(path, size):
  370. ''' Resize an existing file. '''
  371. with open(path, 'rb+') as file:
  372. file.truncate(size)
  373. os.fsync(file.fileno())
  374. def _create_sparse_file(path, size):
  375. ''' Create an empty sparse file. '''
  376. with _replace_file(path) as tmp:
  377. tmp.truncate(size)
  378. LOGGER.info('Created sparse file: %s', tmp.name)
  379. def _update_loopdev_sizes(img):
  380. ''' Resolve img; update the size of loop devices backed by it. '''
  381. needle = os.fsencode(os.path.realpath(img)) + b'\n'
  382. for sys_path in glob.iglob('/sys/block/loop[0-9]*/loop/backing_file'):
  383. try:
  384. with open(sys_path, 'rb') as sys_io:
  385. if sys_io.read() != needle:
  386. continue
  387. except FileNotFoundError:
  388. continue
  389. with open('/dev/' + sys_path.split('/')[3], 'rb') as dev_io:
  390. fcntl.ioctl(dev_io.fileno(), LOOP_SET_CAPACITY)
  391. def _attempt_ficlone(src, dst):
  392. try:
  393. fcntl.ioctl(dst.fileno(), FICLONE, src.fileno())
  394. return True
  395. except OSError as ex:
  396. if ex.errno not in (errno.EBADF, errno.EINVAL,
  397. errno.EOPNOTSUPP, errno.EXDEV):
  398. raise
  399. return False
  400. def _copy_file(src, dst):
  401. ''' Copy src to dst as a reflink if possible, sparse if not. '''
  402. with _replace_file(dst) as tmp_io:
  403. with open(src, 'rb') as src_io:
  404. if _attempt_ficlone(src_io, tmp_io):
  405. LOGGER.info('Reflinked file: %s -> %s', src, tmp_io.name)
  406. return True
  407. LOGGER.info('Copying file: %s -> %s', src, tmp_io.name)
  408. cmd = 'cp', '--sparse=always', src, tmp_io.name
  409. p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
  410. check=False)
  411. if p.returncode != 0:
  412. raise qubes.storage.StoragePoolException(str(p))
  413. return False
  414. def is_supported(dst_dir, src_dir=None):
  415. ''' Return whether destination directory supports reflink copies
  416. from source directory. (A temporary file is created in each
  417. directory, using O_TMPFILE if possible.)
  418. '''
  419. if src_dir is None:
  420. src_dir = dst_dir
  421. with tempfile.TemporaryFile(dir=src_dir) as src, \
  422. tempfile.TemporaryFile(dir=dst_dir) as dst:
  423. src.write(b'foo') # don't let any fs get clever with empty files
  424. return _attempt_ficlone(src, dst)