|
@@ -0,0 +1,206 @@
|
|
|
+#!/usr/bin/python2
|
|
|
+# -*- encoding: utf8 -*-
|
|
|
+#
|
|
|
+# The Qubes OS Project, http://www.qubes-os.org
|
|
|
+#
|
|
|
+# Copyright (C) 2016 Marek Marczykowski-Górecki
|
|
|
+# <marmarek@invisiblethingslab.com>
|
|
|
+#
|
|
|
+# This program is free software; you can redistribute it and/or modify
|
|
|
+# it under the terms of the GNU General Public License as published by
|
|
|
+# the Free Software Foundation; either version 2 of the License, or
|
|
|
+# (at your option) any later version.
|
|
|
+#
|
|
|
+# This program is distributed in the hope that it will be useful,
|
|
|
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
+# GNU General Public License for more details.
|
|
|
+#
|
|
|
+# You should have received a copy of the GNU General Public License along
|
|
|
+# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
+import argparse
|
|
|
+import functools
|
|
|
+import subprocess
|
|
|
+import tarfile
|
|
|
+import io
|
|
|
+
|
|
|
+BUF_SIZE = 409600
|
|
|
+
|
|
|
+
|
|
|
+class TarSparseInfo(tarfile.TarInfo):
|
|
|
+ def __init__(self, name="", sparsemap=None):
|
|
|
+ super(TarSparseInfo, self).__init__(name)
|
|
|
+ if sparsemap is not None:
|
|
|
+ self.type = tarfile.GNUTYPE_SPARSE
|
|
|
+ self.sparsemap = list(sparsemap)
|
|
|
+ # compact size
|
|
|
+ self.size = functools.reduce(lambda x, y: x+y[1], sparsemap, 0)
|
|
|
+ else:
|
|
|
+ self.sparsemap = []
|
|
|
+
|
|
|
+ @property
|
|
|
+ def realsize(self):
|
|
|
+ if len(self.sparsemap):
|
|
|
+ return self.sparsemap[-1][0] + self.sparsemap[-1][1]
|
|
|
+ else:
|
|
|
+ return self.size
|
|
|
+
|
|
|
+ def sparse_header_chunk(self, index):
|
|
|
+ if index < len(self.sparsemap):
|
|
|
+ return ''.join([
|
|
|
+ tarfile.itn(self.sparsemap[index][0], 12, tarfile.GNU_FORMAT),
|
|
|
+ tarfile.itn(self.sparsemap[index][1], 12, tarfile.GNU_FORMAT),
|
|
|
+ ])
|
|
|
+ else:
|
|
|
+ return '\0' * 12 * 2
|
|
|
+
|
|
|
+ def get_gnu_header(self):
|
|
|
+ '''Part placed in 'prefix' field of posix header'''
|
|
|
+
|
|
|
+ parts = [
|
|
|
+ tarfile.itn(self.mtime, 12, tarfile.GNU_FORMAT), # atime
|
|
|
+ tarfile.itn(self.mtime, 12, tarfile.GNU_FORMAT), # ctime
|
|
|
+ tarfile.itn(0, 12, tarfile.GNU_FORMAT), # offset
|
|
|
+ tarfile.stn('', 4), # longnames
|
|
|
+ '\0', # unused_pad2
|
|
|
+ ]
|
|
|
+ parts += [self.sparse_header_chunk(i) for i in range(4)]
|
|
|
+ parts += [
|
|
|
+ '\1' if len(self.sparsemap) > 4 else '\0', # isextended
|
|
|
+ tarfile.itn(self.realsize, 12, tarfile.GNU_FORMAT), # realsize
|
|
|
+ ]
|
|
|
+ return ''.join(parts)
|
|
|
+
|
|
|
+ def get_info(self, encoding, errors):
|
|
|
+ info = super(TarSparseInfo, self).get_info(encoding, errors)
|
|
|
+ # place GNU extension into
|
|
|
+ info['prefix'] = self.get_gnu_header()
|
|
|
+ return info
|
|
|
+
|
|
|
+ def tobuf(self, format=tarfile.DEFAULT_FORMAT, encoding=tarfile.ENCODING,
|
|
|
+ errors="strict"):
|
|
|
+ # pylint: disable=redefined-builtin
|
|
|
+ header_buf = super(TarSparseInfo, self).tobuf(format, encoding, errors)
|
|
|
+ if len(self.sparsemap) > 4:
|
|
|
+ return header_buf + ''.join(self.create_ext_sparse_headers())
|
|
|
+ else:
|
|
|
+ return header_buf
|
|
|
+
|
|
|
+ def create_ext_sparse_headers(self):
|
|
|
+ for ext_hdr in range(4, len(self.sparsemap), 21):
|
|
|
+ sparse_parts = [self.sparse_header_chunk(i) for i in
|
|
|
+ range(ext_hdr, ext_hdr+21)]
|
|
|
+ sparse_parts += '\1' if ext_hdr+21 < len(self.sparsemap) else '\0'
|
|
|
+ yield tarfile.stn(''.join(sparse_parts), 512)
|
|
|
+
|
|
|
+
|
|
|
+def get_sparse_map(input_file):
|
|
|
+ '''
|
|
|
+ Return map of the file where actual data is present, ignoring zero-ed
|
|
|
+ blocks. Last entry of the map spans to the end of file, even if that part is
|
|
|
+ zero-size (when file ends with zeros).
|
|
|
+
|
|
|
+ This function is performance critical.
|
|
|
+
|
|
|
+ :param input_file: io.File object
|
|
|
+ :return: iterable of (offset, size)
|
|
|
+ '''
|
|
|
+ zero_block = bytearray(tarfile.BLOCKSIZE)
|
|
|
+ buf = bytearray(BUF_SIZE)
|
|
|
+ in_data_block = False
|
|
|
+ data_block_start = 0
|
|
|
+ buf_start_offset = 0
|
|
|
+ while True:
|
|
|
+ buf_len = input_file.readinto(buf)
|
|
|
+ if not buf_len:
|
|
|
+ break
|
|
|
+ for offset in range(0, buf_len, tarfile.BLOCKSIZE):
|
|
|
+ if buf[offset:offset+tarfile.BLOCKSIZE] == zero_block:
|
|
|
+ if in_data_block:
|
|
|
+ in_data_block = False
|
|
|
+ yield (data_block_start,
|
|
|
+ buf_start_offset+offset-data_block_start)
|
|
|
+ else:
|
|
|
+ if not in_data_block:
|
|
|
+ in_data_block = True
|
|
|
+ data_block_start = buf_start_offset+offset
|
|
|
+ buf_start_offset += buf_len
|
|
|
+ if in_data_block:
|
|
|
+ yield (data_block_start, buf_start_offset-data_block_start)
|
|
|
+ else:
|
|
|
+ # always emit last slice to the input end - otherwise extracted file
|
|
|
+ # will be truncated
|
|
|
+ yield (buf_start_offset, 0)
|
|
|
+
|
|
|
+
|
|
|
+def copy_sparse_data(input_stream, output_stream, sparse_map):
|
|
|
+ '''Copy data blocks from input to output according to sparse_map
|
|
|
+
|
|
|
+ :param input_stream: io.IOBase input instance
|
|
|
+ :param output_stream: io.IOBase output instance
|
|
|
+ :param sparse_map: iterable of (offset, size)
|
|
|
+ '''
|
|
|
+
|
|
|
+ buf = bytearray(BUF_SIZE)
|
|
|
+
|
|
|
+ for chunk in sparse_map:
|
|
|
+ input_stream.seek(chunk[0])
|
|
|
+ left = chunk[1]
|
|
|
+ while left:
|
|
|
+ if left > BUF_SIZE:
|
|
|
+ read = input_stream.readinto(buf)
|
|
|
+ output_stream.write(buf[:read])
|
|
|
+ else:
|
|
|
+ buf_trailer = input_stream.read(left)
|
|
|
+ read = len(buf_trailer)
|
|
|
+ output_stream.write(buf_trailer)
|
|
|
+ left -= read
|
|
|
+ if not read:
|
|
|
+ raise Exception('premature EOF')
|
|
|
+
|
|
|
+def finalize(output):
|
|
|
+ '''Write EOF blocks'''
|
|
|
+ output.write('\0' * 512)
|
|
|
+ output.write('\0' * 512)
|
|
|
+
|
|
|
+def main(args=None):
|
|
|
+ parser = argparse.ArgumentParser()
|
|
|
+ parser.add_argument('--override-name', action='store', dest='override_name',
|
|
|
+ help='use this name in tar header')
|
|
|
+ parser.add_argument('--use-compress-program', default=None,
|
|
|
+ metavar='COMMAND', action='store', dest='use_compress_program',
|
|
|
+ help='Filter data through COMMAND.')
|
|
|
+ parser.add_argument('input_file',
|
|
|
+ help='input file name')
|
|
|
+ parser.add_argument('output_file', default='-', nargs='?',
|
|
|
+ help='output file name')
|
|
|
+ args = parser.parse_args(args)
|
|
|
+ input_file = io.open(args.input_file, 'rb')
|
|
|
+ sparse_map = list(get_sparse_map(input_file))
|
|
|
+ header_name = args.input_file
|
|
|
+ if args.override_name:
|
|
|
+ header_name = args.override_name
|
|
|
+ tar_info = TarSparseInfo(header_name, sparse_map)
|
|
|
+ if args.output_file == '-':
|
|
|
+ output = io.open('/dev/stdout', 'wb')
|
|
|
+ else:
|
|
|
+ output = io.open(args.output_file, 'wb')
|
|
|
+ if args.use_compress_program:
|
|
|
+ compress = subprocess.Popen([args.use_compress_program],
|
|
|
+ stdin=subprocess.PIPE, stdout=output)
|
|
|
+ output = compress.stdin
|
|
|
+ else:
|
|
|
+ compress = None
|
|
|
+ output.write(tar_info.tobuf(tarfile.GNU_FORMAT))
|
|
|
+ copy_sparse_data(input_file, output, sparse_map)
|
|
|
+ finalize(output)
|
|
|
+ input_file.close()
|
|
|
+ output.close()
|
|
|
+ if compress is not None:
|
|
|
+ compress.wait()
|
|
|
+ return compress.returncode
|
|
|
+ return 0
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ main()
|