123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- #
- # The Qubes OS Project, http://www.qubes-os.org
- #
- # Copyright (C) 2016 Marek Marczykowski-Górecki
- # <marmarek@invisiblethingslab.com>
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License along
- # with this program; if not, write to the Free Software Foundation, Inc.,
- # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- import argparse
- import functools
- import subprocess
- import tarfile
- import io
- BUF_SIZE = 409600
- class TarSparseInfo(tarfile.TarInfo):
- def __init__(self, name="", sparsemap=None):
- super(TarSparseInfo, self).__init__(name)
- if sparsemap is not None:
- self.type = tarfile.GNUTYPE_SPARSE
- self.sparsemap = list(sparsemap)
- # compact size
- self.size = functools.reduce(lambda x, y: x+y[1], sparsemap, 0)
- else:
- self.sparsemap = []
- @property
- def realsize(self):
- if len(self.sparsemap):
- return self.sparsemap[-1][0] + self.sparsemap[-1][1]
- else:
- return self.size
- def sparse_header_chunk(self, index):
- if index < len(self.sparsemap):
- return b''.join([
- tarfile.itn(self.sparsemap[index][0], 12, tarfile.GNU_FORMAT),
- tarfile.itn(self.sparsemap[index][1], 12, tarfile.GNU_FORMAT),
- ])
- else:
- return b'\0' * 12 * 2
- def get_gnu_header(self):
- '''Part placed in 'prefix' field of posix header'''
- parts = [
- tarfile.itn(self.mtime, 12, tarfile.GNU_FORMAT), # atime
- tarfile.itn(self.mtime, 12, tarfile.GNU_FORMAT), # ctime
- tarfile.itn(0, 12, tarfile.GNU_FORMAT), # offset
- tarfile.stn('', 4, tarfile.ENCODING, 'surrogateescape'), #longnames
- b'\0', # unused_pad2
- ]
- parts += [self.sparse_header_chunk(i) for i in range(4)]
- parts += [
- b'\1' if len(self.sparsemap) > 4 else b'\0', # isextended
- tarfile.itn(self.realsize, 12, tarfile.GNU_FORMAT), # realsize
- ]
- return b''.join(parts)
- def get_info(self):
- info = super(TarSparseInfo, self).get_info()
- # place GNU extension into
- info['prefix'] = self.get_gnu_header().decode(tarfile.ENCODING)
- return info
- def tobuf(self, format=tarfile.DEFAULT_FORMAT, encoding=tarfile.ENCODING,
- errors="strict"):
- # pylint: disable=redefined-builtin
- header_buf = super(TarSparseInfo, self).tobuf(format, encoding, errors)
- if len(self.sparsemap) > 4:
- return header_buf + b''.join(self.create_ext_sparse_headers())
- else:
- return header_buf
- def create_ext_sparse_headers(self):
- for ext_hdr in range(4, len(self.sparsemap), 21):
- sparse_parts = [
- self.sparse_header_chunk(i).decode(
- tarfile.ENCODING, 'surrogateescape')
- for i in range(ext_hdr, ext_hdr+21)]
- sparse_parts.append(
- '\1' if ext_hdr+21 < len(self.sparsemap) else '\0')
- yield tarfile.stn(''.join(sparse_parts), 512,
- tarfile.ENCODING, 'surrogateescape')
- def get_sparse_map(input_file):
- '''
- Return map of the file where actual data is present, ignoring zero-ed
- blocks. Last entry of the map spans to the end of file, even if that part is
- zero-size (when file ends with zeros).
- This function is performance critical.
- :param input_file: io.File object
- :return: iterable of (offset, size)
- '''
- zero_block = bytearray(tarfile.BLOCKSIZE)
- buf = bytearray(BUF_SIZE)
- in_data_block = False
- data_block_start = 0
- buf_start_offset = 0
- while True:
- buf_len = input_file.readinto(buf)
- if not buf_len:
- break
- for offset in range(0, buf_len, tarfile.BLOCKSIZE):
- if buf[offset:offset+tarfile.BLOCKSIZE] == zero_block:
- if in_data_block:
- in_data_block = False
- yield (data_block_start,
- buf_start_offset+offset-data_block_start)
- else:
- if not in_data_block:
- in_data_block = True
- data_block_start = buf_start_offset+offset
- buf_start_offset += buf_len
- if in_data_block:
- yield (data_block_start, buf_start_offset-data_block_start)
- else:
- # always emit last slice to the input end - otherwise extracted file
- # will be truncated
- yield (buf_start_offset, 0)
- def copy_sparse_data(input_stream, output_stream, sparse_map):
- '''Copy data blocks from input to output according to sparse_map
- :param input_stream: io.IOBase input instance
- :param output_stream: io.IOBase output instance
- :param sparse_map: iterable of (offset, size)
- '''
- buf = bytearray(BUF_SIZE)
- for chunk in sparse_map:
- input_stream.seek(chunk[0])
- left = chunk[1]
- while left:
- if left > BUF_SIZE:
- read = input_stream.readinto(buf)
- output_stream.write(buf[:read])
- else:
- buf_trailer = input_stream.read(left)
- read = len(buf_trailer)
- output_stream.write(buf_trailer)
- left -= read
- if not read:
- raise Exception('premature EOF')
- def finalize(output):
- '''Write EOF blocks'''
- output.write(b'\0' * 512)
- output.write(b'\0' * 512)
- def main(args=None):
- parser = argparse.ArgumentParser()
- parser.add_argument('--override-name', action='store', dest='override_name',
- help='use this name in tar header')
- parser.add_argument('--use-compress-program', default=None,
- metavar='COMMAND', action='store', dest='use_compress_program',
- help='Filter data through COMMAND.')
- parser.add_argument('input_file',
- help='input file name')
- parser.add_argument('output_file', default='-', nargs='?',
- help='output file name')
- args = parser.parse_args(args)
- input_file = io.open(args.input_file, 'rb')
- sparse_map = list(get_sparse_map(input_file))
- header_name = args.input_file
- if args.override_name:
- header_name = args.override_name
- tar_info = TarSparseInfo(header_name, sparse_map)
- if args.output_file == '-':
- output = io.open('/dev/stdout', 'wb')
- else:
- output = io.open(args.output_file, 'wb')
- if args.use_compress_program:
- compress = subprocess.Popen([args.use_compress_program],
- stdin=subprocess.PIPE, stdout=output)
- output = compress.stdin
- else:
- compress = None
- output.write(tar_info.tobuf(tarfile.GNU_FORMAT))
- copy_sparse_data(input_file, output, sparse_map)
- finalize(output)
- input_file.close()
- output.close()
- if compress is not None:
- compress.wait()
- return compress.returncode
- return 0
- if __name__ == '__main__':
- main()
|