reflink.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. #
  2. # The Qubes OS Project, https://www.qubes-os.org/
  3. #
  4. # Copyright (C) 2018 Rusty Bird <rustybird@net-c.com>
  5. #
  6. # This library is free software; you can redistribute it and/or
  7. # modify it under the terms of the GNU Lesser General Public
  8. # License as published by the Free Software Foundation; either
  9. # version 2.1 of the License, or (at your option) any later version.
  10. #
  11. # This library is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. # Lesser General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU Lesser General Public
  17. # License along with this library; if not, see <https://www.gnu.org/licenses/>.
  18. #
  19. ''' Driver for handling VM images as files, without any device-mapper
  20. involvement. A reflink-capable filesystem is strongly recommended,
  21. but not required.
  22. '''
  23. import asyncio
  24. import collections
  25. import errno
  26. import fcntl
  27. import functools
  28. import glob
  29. import logging
  30. import os
  31. import subprocess
  32. import tempfile
  33. from contextlib import contextmanager, suppress
  34. import qubes.storage
  35. BLKSIZE = 512
  36. FICLONE = 1074041865 # defined in <linux/fs.h>
  37. LOOP_SET_CAPACITY = 0x4C07 # defined in <linux/loop.h>
  38. LOGGER = logging.getLogger('qubes.storage.reflink')
  39. class ReflinkPool(qubes.storage.Pool):
  40. driver = 'file-reflink'
  41. _known_dir_path_prefixes = ['appvms', 'vm-templates']
  42. def __init__(self, dir_path, setup_check='yes', revisions_to_keep=1,
  43. **kwargs):
  44. super().__init__(revisions_to_keep=revisions_to_keep, **kwargs)
  45. self._volumes = {}
  46. self.dir_path = os.path.abspath(dir_path)
  47. self.setup_check = qubes.property.bool(None, None, setup_check)
  48. def setup(self):
  49. created = _make_dir(self.dir_path)
  50. if self.setup_check and not is_supported(self.dir_path):
  51. if created:
  52. _remove_empty_dir(self.dir_path)
  53. raise qubes.storage.StoragePoolException(
  54. 'The filesystem for {!r} does not support reflinks. If you'
  55. ' can live with VM startup delays and wasted disk space, pass'
  56. ' the "setup_check=no" option.'.format(self.dir_path))
  57. for dir_path_prefix in self._known_dir_path_prefixes:
  58. _make_dir(os.path.join(self.dir_path, dir_path_prefix))
  59. return self
  60. def init_volume(self, vm, volume_config):
  61. # Fail closed on any strange VM dir_path_prefix, just in case
  62. # /etc/udev/rules/00-qubes-ignore-devices.rules needs updating
  63. assert vm.dir_path_prefix in self._known_dir_path_prefixes, \
  64. 'Unknown dir_path_prefix {!r}'.format(vm.dir_path_prefix)
  65. volume_config['pool'] = self
  66. if 'revisions_to_keep' not in volume_config:
  67. volume_config['revisions_to_keep'] = self.revisions_to_keep
  68. if 'vid' not in volume_config:
  69. volume_config['vid'] = os.path.join(vm.dir_path_prefix, vm.name,
  70. volume_config['name'])
  71. volume = ReflinkVolume(**volume_config)
  72. self._volumes[volume_config['vid']] = volume
  73. return volume
  74. def list_volumes(self):
  75. return list(self._volumes.values())
  76. def get_volume(self, vid):
  77. return self._volumes[vid]
  78. def destroy(self):
  79. pass
  80. @property
  81. def config(self):
  82. return {
  83. 'name': self.name,
  84. 'dir_path': self.dir_path,
  85. 'driver': ReflinkPool.driver,
  86. 'revisions_to_keep': self.revisions_to_keep
  87. }
  88. @property
  89. def size(self):
  90. statvfs = os.statvfs(self.dir_path)
  91. return statvfs.f_frsize * statvfs.f_blocks
  92. @property
  93. def usage(self):
  94. statvfs = os.statvfs(self.dir_path)
  95. return statvfs.f_frsize * (statvfs.f_blocks - statvfs.f_bfree)
  96. def included_in(self, app):
  97. ''' Check if there is pool containing this one - either as a
  98. filesystem or its LVM volume'''
  99. return qubes.storage.search_pool_containing_dir(
  100. [pool for pool in app.pools.values() if pool is not self],
  101. self.dir_path)
  102. def _unblock(method):
  103. ''' Decorator transforming a synchronous volume method into a
  104. coroutine that runs the original method in the event loop's
  105. thread-based default executor, under a per-volume lock.
  106. '''
  107. @asyncio.coroutine
  108. @functools.wraps(method)
  109. def wrapper(self, *args, **kwargs):
  110. with (yield from self._lock): # pylint: disable=protected-access
  111. return (yield from asyncio.get_event_loop().run_in_executor(
  112. None, functools.partial(method, self, *args, **kwargs)))
  113. return wrapper
  114. class ReflinkVolume(qubes.storage.Volume):
  115. def __init__(self, *args, **kwargs):
  116. super().__init__(*args, **kwargs)
  117. self._lock = asyncio.Lock()
  118. self._path_vid = os.path.join(self.pool.dir_path, self.vid)
  119. self._path_clean = self._path_vid + '.img'
  120. self._path_dirty = self._path_vid + '-dirty.img'
  121. self._path_import = self._path_vid + '-import.img'
  122. self.path = self._path_dirty
  123. @_unblock
  124. def create(self):
  125. if self.save_on_stop and not self.snap_on_start:
  126. _create_sparse_file(self._path_clean, self.size)
  127. return self
  128. @_unblock
  129. def verify(self):
  130. if self.snap_on_start:
  131. img = self.source._path_clean # pylint: disable=protected-access
  132. elif self.save_on_stop:
  133. img = self._path_clean
  134. else:
  135. img = None
  136. if img is None or os.path.exists(img):
  137. return True
  138. raise qubes.storage.StoragePoolException(
  139. 'Missing image file {!r} for volume {}'.format(img, self.vid))
  140. @_unblock
  141. def remove(self):
  142. ''' Drop volume object from pool; remove volume images from
  143. oldest to newest; remove empty VM directory.
  144. '''
  145. self.pool._volumes.pop(self, None) # pylint: disable=protected-access
  146. self._cleanup()
  147. self._prune_revisions(keep=0)
  148. _remove_file(self._path_clean)
  149. _remove_file(self._path_dirty)
  150. _remove_empty_dir(os.path.dirname(self._path_dirty))
  151. return self
  152. def _cleanup(self):
  153. for tmp in glob.iglob(glob.escape(self._path_vid) + '*.img*~*'):
  154. _remove_file(tmp)
  155. _remove_file(self._path_import)
  156. def is_outdated(self):
  157. if self.snap_on_start:
  158. with suppress(FileNotFoundError):
  159. # pylint: disable=protected-access
  160. return (os.path.getmtime(self.source._path_clean) >
  161. os.path.getmtime(self._path_clean))
  162. return False
  163. def is_dirty(self):
  164. return self.save_on_stop and os.path.exists(self._path_dirty)
  165. @_unblock
  166. def start(self):
  167. self._cleanup()
  168. if self.is_dirty(): # implies self.save_on_stop
  169. return self
  170. if self.snap_on_start:
  171. # pylint: disable=protected-access
  172. _copy_file(self.source._path_clean, self._path_clean)
  173. if self.snap_on_start or self.save_on_stop:
  174. _copy_file(self._path_clean, self._path_dirty)
  175. else:
  176. _create_sparse_file(self._path_dirty, self.size)
  177. return self
  178. @_unblock
  179. def stop(self):
  180. if self.save_on_stop:
  181. self._commit(self._path_dirty)
  182. else:
  183. _remove_file(self._path_dirty)
  184. _remove_file(self._path_clean)
  185. return self
  186. def _commit(self, path_from):
  187. self._add_revision()
  188. self._prune_revisions()
  189. _rename_file(path_from, self._path_clean)
  190. def _add_revision(self):
  191. if self.revisions_to_keep == 0:
  192. return
  193. ctime = os.path.getctime(self._path_clean)
  194. timestamp = qubes.storage.isodate(int(ctime))
  195. _copy_file(self._path_clean,
  196. self._path_revision(self._next_revision_number, timestamp))
  197. def _prune_revisions(self, keep=None):
  198. if keep is None:
  199. keep = self.revisions_to_keep
  200. # pylint: disable=invalid-unary-operand-type
  201. for number, timestamp in list(self.revisions.items())[:-keep or None]:
  202. _remove_file(self._path_revision(number, timestamp))
  203. @_unblock
  204. def revert(self, revision=None):
  205. if self.is_dirty():
  206. raise qubes.storage.StoragePoolException(
  207. 'Cannot revert: {} is not cleanly stopped'.format(self.vid))
  208. if revision is None:
  209. number, timestamp = list(self.revisions.items())[-1]
  210. else:
  211. number, timestamp = revision, None
  212. path_revision = self._path_revision(number, timestamp)
  213. self._add_revision()
  214. _rename_file(path_revision, self._path_clean)
  215. return self
  216. @_unblock
  217. def resize(self, size):
  218. ''' Expand a read-write volume image; notify any corresponding
  219. loop devices of the size change.
  220. '''
  221. if not self.rw:
  222. raise qubes.storage.StoragePoolException(
  223. 'Cannot resize: {} is read-only'.format(self.vid))
  224. if size < self.size:
  225. raise qubes.storage.StoragePoolException(
  226. 'For your own safety, shrinking of {} is disabled'
  227. ' ({} < {}). If you really know what you are doing,'
  228. ' use "truncate" manually.'.format(self.vid, size, self.size))
  229. try: # assume volume is not (cleanly) stopped ...
  230. _resize_file(self._path_dirty, size)
  231. update = True
  232. except FileNotFoundError: # ... but it actually is.
  233. _resize_file(self._path_clean, size)
  234. update = False
  235. self.size = size
  236. if update:
  237. _update_loopdev_sizes(self._path_dirty)
  238. return self
  239. def export(self):
  240. if not self.save_on_stop:
  241. raise NotImplementedError(
  242. 'Cannot export: {} is not save_on_stop'.format(self.vid))
  243. return self._path_clean
  244. @_unblock
  245. def import_data(self):
  246. if not self.save_on_stop:
  247. raise NotImplementedError(
  248. 'Cannot import_data: {} is not save_on_stop'.format(self.vid))
  249. _create_sparse_file(self._path_import, self.size)
  250. return self._path_import
  251. def _import_data_end(self, success):
  252. if success:
  253. self._commit(self._path_import)
  254. else:
  255. _remove_file(self._path_import)
  256. return self
  257. import_data_end = _unblock(_import_data_end)
  258. @_unblock
  259. def import_volume(self, src_volume):
  260. if not self.save_on_stop:
  261. return self
  262. try:
  263. success = False
  264. _copy_file(src_volume.export(), self._path_import)
  265. success = True
  266. finally:
  267. self._import_data_end(success)
  268. return self
  269. def _path_revision(self, number, timestamp=None):
  270. if timestamp is None:
  271. timestamp = self.revisions[number]
  272. return self._path_clean + '.' + number + '@' + timestamp + 'Z'
  273. @property
  274. def _next_revision_number(self):
  275. numbers = self.revisions.keys()
  276. if numbers:
  277. return str(int(list(numbers)[-1]) + 1)
  278. return '1'
  279. @property
  280. def revisions(self):
  281. prefix = self._path_clean + '.'
  282. paths = glob.iglob(glob.escape(prefix) + '*@*Z')
  283. items = (path[len(prefix):-1].split('@') for path in paths)
  284. return collections.OrderedDict(sorted(items,
  285. key=lambda item: int(item[0])))
  286. @property
  287. def usage(self):
  288. ''' Return volume disk usage from the VM's perspective. It is
  289. usually much lower from the host's perspective due to CoW.
  290. '''
  291. with suppress(FileNotFoundError):
  292. return _get_file_disk_usage(self._path_dirty)
  293. with suppress(FileNotFoundError):
  294. return _get_file_disk_usage(self._path_clean)
  295. return 0
  296. @contextmanager
  297. def _replace_file(dst):
  298. ''' Yield a tempfile whose name starts with dst, creating the last
  299. directory component if necessary. If the block does not raise
  300. an exception, flush+fsync the tempfile and rename it to dst.
  301. '''
  302. tmp_dir, prefix = os.path.split(dst + '~')
  303. _make_dir(tmp_dir)
  304. tmp = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix=prefix, delete=False)
  305. try:
  306. yield tmp
  307. tmp.flush()
  308. os.fsync(tmp.fileno())
  309. tmp.close()
  310. _rename_file(tmp.name, dst)
  311. except:
  312. tmp.close()
  313. _remove_file(tmp.name)
  314. raise
  315. def _get_file_disk_usage(path):
  316. ''' Return real disk usage (not logical file size) of a file. '''
  317. return os.stat(path).st_blocks * BLKSIZE
  318. def _fsync_dir(path):
  319. dir_fd = os.open(path, os.O_RDONLY | os.O_DIRECTORY)
  320. try:
  321. os.fsync(dir_fd)
  322. finally:
  323. os.close(dir_fd)
  324. def _make_dir(path):
  325. ''' mkdir path, ignoring FileExistsError; return whether we
  326. created it.
  327. '''
  328. with suppress(FileExistsError):
  329. os.mkdir(path)
  330. _fsync_dir(os.path.dirname(path))
  331. LOGGER.info('Created directory: %s', path)
  332. return True
  333. return False
  334. def _remove_file(path):
  335. with suppress(FileNotFoundError):
  336. os.remove(path)
  337. _fsync_dir(os.path.dirname(path))
  338. LOGGER.info('Removed file: %s', path)
  339. def _remove_empty_dir(path):
  340. try:
  341. os.rmdir(path)
  342. _fsync_dir(os.path.dirname(path))
  343. LOGGER.info('Removed empty directory: %s', path)
  344. except OSError as ex:
  345. if ex.errno not in (errno.ENOENT, errno.ENOTEMPTY):
  346. raise
  347. def _rename_file(src, dst):
  348. os.rename(src, dst)
  349. dst_dir = os.path.dirname(dst)
  350. src_dir = os.path.dirname(src)
  351. _fsync_dir(dst_dir)
  352. if src_dir != dst_dir:
  353. _fsync_dir(src_dir)
  354. LOGGER.info('Renamed file: %s -> %s', src, dst)
  355. def _resize_file(path, size):
  356. ''' Resize an existing file. '''
  357. with open(path, 'rb+') as file:
  358. file.truncate(size)
  359. os.fsync(file.fileno())
  360. def _create_sparse_file(path, size):
  361. ''' Create an empty sparse file. '''
  362. with _replace_file(path) as tmp:
  363. tmp.truncate(size)
  364. LOGGER.info('Created sparse file: %s', tmp.name)
  365. def _update_loopdev_sizes(img):
  366. ''' Resolve img; update the size of loop devices backed by it. '''
  367. needle = os.fsencode(os.path.realpath(img)) + b'\n'
  368. for sys_path in glob.iglob('/sys/block/loop[0-9]*/loop/backing_file'):
  369. try:
  370. with open(sys_path, 'rb') as sys_io:
  371. if sys_io.read() != needle:
  372. continue
  373. except FileNotFoundError:
  374. continue
  375. with open('/dev/' + sys_path.split('/')[3]) as dev_io:
  376. fcntl.ioctl(dev_io.fileno(), LOOP_SET_CAPACITY)
  377. def _attempt_ficlone(src, dst):
  378. try:
  379. fcntl.ioctl(dst.fileno(), FICLONE, src.fileno())
  380. return True
  381. except OSError:
  382. return False
  383. def _copy_file(src, dst):
  384. ''' Copy src to dst as a reflink if possible, sparse if not. '''
  385. with _replace_file(dst) as tmp_io:
  386. with open(src, 'rb') as src_io:
  387. if _attempt_ficlone(src_io, tmp_io):
  388. LOGGER.info('Reflinked file: %s -> %s', src, tmp_io.name)
  389. return True
  390. LOGGER.info('Copying file: %s -> %s', src, tmp_io.name)
  391. cmd = 'cp', '--sparse=always', src, tmp_io.name
  392. p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  393. if p.returncode != 0:
  394. raise qubes.storage.StoragePoolException(str(p))
  395. return False
  396. def is_supported(dst_dir, src_dir=None):
  397. ''' Return whether destination directory supports reflink copies
  398. from source directory. (A temporary file is created in each
  399. directory, using O_TMPFILE if possible.)
  400. '''
  401. if src_dir is None:
  402. src_dir = dst_dir
  403. with tempfile.TemporaryFile(dir=src_dir) as src, \
  404. tempfile.TemporaryFile(dir=dst_dir) as dst:
  405. src.write(b'foo') # don't let any fs get clever with empty files
  406. return _attempt_ficlone(src, dst)