tar2qfile.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008
  1. /* $OpenBSD: tar.h,v 1.7 2003/06/02 23:32:09 millert Exp $ */
  2. /* $NetBSD: tar.h,v 1.3 1995/03/21 09:07:51 cgd Exp $ */
  3. /*-
  4. * Copyright (c) 1992 Keith Muller.
  5. * Copyright (c) 1992, 1993
  6. * The Regents of the University of California. All rights reserved.
  7. *
  8. * This code is derived from software contributed to Berkeley by
  9. * Keith Muller of the University of California, San Diego.
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. * 1. Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * 2. Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in the
  18. * documentation and/or other materials provided with the distribution.
  19. * 3. Neither the name of the University nor the names of its contributors
  20. * may be used to endorse or promote products derived from this software
  21. * without specific prior written permission.
  22. *
  23. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  24. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  27. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  29. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  30. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  32. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  33. * SUCH DAMAGE.
  34. *
  35. * @(#)tar.h 8.2 (Berkeley) 4/18/94
  36. */
  37. #define _GNU_SOURCE /* For O_NOFOLLOW. */
  38. #include <errno.h>
  39. #include <ioall.h>
  40. #include <fcntl.h>
  41. #include <sys/time.h>
  42. #include <sys/stat.h>
  43. #include <stdlib.h>
  44. #include <unistd.h>
  45. #include <stdio.h>
  46. #include <string.h>
  47. #include <qfile-utils.h>
  48. // #define DEBUG
  49. /***************************************************
  50. * Most routines extracted from the PAX project (tar.c...) *
  51. ***************************************************/
  52. /*
  53. * BSD PAX global data structures and constants.
  54. */
  55. #define MAXBLK 64512 /* MAX blocksize supported (posix SPEC) */
  56. /* WARNING: increasing MAXBLK past 32256 */
  57. /* will violate posix spec. */
  58. #define MAXBLK_POSIX 32256 /* MAX blocksize supported as per POSIX */
  59. #define BLKMULT 512 /* blocksize must be even mult of 512 bytes */
  60. /* Don't even think of changing this */
  61. #define DEVBLK 8192 /* default read blksize for devices */
  62. #define FILEBLK 10240 /* default read blksize for files */
  63. #define PAXPATHLEN 3072 /* maximium path length for pax. MUST be */
  64. /*
  65. * defines and data structures common to all tar formats
  66. */
  67. #define CHK_LEN 8 /* length of checksum field */
  68. #define TNMSZ 100 /* size of name field */
  69. #define NULLCNT 2 /* number of null blocks in trailer */
  70. #define CHK_OFFSET 148 /* start of chksum field */
  71. #define BLNKSUM 256L /* sum of checksum field using ' ' */
  72. /*
  73. * General Defines
  74. */
  75. #define HEX 16
  76. #define OCT 8
  77. #define _PAX_ 1
  78. #define _TFILE_BASE "paxXXXXXXXXXX"
  79. /*
  80. * General Macros
  81. */
  82. #ifndef MIN
  83. #define MIN(a,b) (((a)<(b))?(a):(b))
  84. #endif
  85. #ifndef MAX
  86. #define MAX(a,b) (((a)>(b))?(a):(b))
  87. #endif
  88. #define MAJOR(x) major(x)
  89. #define MINOR(x) minor(x)
  90. #define TODEV(x, y) makedev((x), (y))
  91. /*
  92. * Values used in typeflag field in all tar formats
  93. * (only REGTYPE, LNKTYPE and SYMTYPE are used in old bsd tar headers)
  94. */
  95. #define REGTYPE '0' /* Regular File */
  96. #define AREGTYPE '\0' /* Regular File */
  97. #define LNKTYPE '1' /* Link */
  98. #define SYMTYPE '2' /* Symlink */
  99. #define CHRTYPE '3' /* Character Special File */
  100. #define BLKTYPE '4' /* Block Special File */
  101. #define DIRTYPE '5' /* Directory */
  102. #define FIFOTYPE '6' /* FIFO */
  103. #define CONTTYPE '7' /* high perf file */
  104. /*
  105. * GNU tar compatibility;
  106. */
  107. #define LONGLINKTYPE 'K' /* Long Symlink */
  108. #define LONGNAMETYPE 'L' /* Long File */
  109. #define EXTHEADERTYPE 'x' /* Extended header */
  110. /*
  111. * Pad with a bit mask, much faster than doing a mod but only works on powers
  112. * of 2. Macro below is for block of 512 bytes.
  113. */
  114. #define TAR_PAD(x) ((512 - ((x) & 511)) & 511)
  115. /*
  116. * Data Interchange Format - Extended tar header format - POSIX 1003.1-1990
  117. */
  118. #define TPFSZ 155
  119. #define TMAGIC "ustar" /* ustar and a null */
  120. #define TMAGLEN 6
  121. #define TVERSION "00" /* 00 and no null */
  122. #define TVERSLEN 2
  123. typedef struct {
  124. char name[TNMSZ]; /* name of entry */
  125. char mode[8]; /* mode */
  126. char uid[8]; /* uid */
  127. char gid[8]; /* gid */
  128. char size[12]; /* size */
  129. char mtime[12]; /* modification time */
  130. char chksum[CHK_LEN]; /* checksum */
  131. char typeflag; /* type of file. */
  132. char linkname[TNMSZ]; /* linked to name */
  133. char magic[TMAGLEN]; /* magic cookie */
  134. char version[TVERSLEN]; /* version */
  135. char uname[32]; /* ascii owner name */
  136. char gname[32]; /* ascii group name */
  137. char devmajor[8]; /* major device number */
  138. char devminor[8]; /* minor device number */
  139. char prefix[TPFSZ]; /* linked to name */
  140. } HD_USTAR;
  141. /*
  142. * Routines for manipulating headers, trailers:
  143. * asc_ul()
  144. * tar_trail()
  145. * tar_chksm()
  146. * ustar_id()
  147. */
  148. static unsigned long tar_chksm (char *, int);
  149. char *gnu_hack_string; /* GNU ././@LongLink hackery */
  150. char untrusted_namebuf[MAX_PATH_LENGTH];
  151. extern int ignore_quota_error;
  152. struct filters {
  153. int filters_count;
  154. char **filters;
  155. int *filters_matches;
  156. int matched_filters;
  157. };
  158. /*
  159. * asc_ul()
  160. * convert hex/octal character string into a u_long. We do not have to
  161. * check for overflow! (the headers in all supported formats are not large
  162. * enough to create an overflow).
  163. * NOTE: strings passed to us are NOT TERMINATED.
  164. * Return:
  165. * unsigned long value
  166. */
  167. u_long
  168. asc_ul (char *str, int len, int base)
  169. {
  170. char *stop;
  171. u_long tval = 0;
  172. stop = str + len;
  173. /*
  174. * skip over leading blanks and zeros
  175. */
  176. while ((str < stop) && ((*str == ' ') || (*str == '0')))
  177. ++str;
  178. /*
  179. * for each valid digit, shift running value (tval) over to next digit
  180. * and add next digit
  181. */
  182. if (base == HEX)
  183. {
  184. while (str < stop)
  185. {
  186. if ((*str >= '0') && (*str <= '9'))
  187. tval = (tval << 4) + (*str++ - '0');
  188. else if ((*str >= 'A') && (*str <= 'F'))
  189. tval = (tval << 4) + 10 + (*str++ - 'A');
  190. else if ((*str >= 'a') && (*str <= 'f'))
  191. tval = (tval << 4) + 10 + (*str++ - 'a');
  192. else
  193. break;
  194. }
  195. }
  196. else
  197. {
  198. while ((str < stop) && (*str >= '0') && (*str <= '7'))
  199. tval = (tval << 3) + (*str++ - '0');
  200. }
  201. return (tval);
  202. }
  203. /*
  204. * tar_trail()
  205. * Called to determine if a header block is a valid trailer. We are passed
  206. * the block, the in_sync flag (which tells us we are in resync mode;
  207. * looking for a valid header), and cnt (which starts at zero) which is
  208. * used to count the number of empty blocks we have seen so far.
  209. * Return:
  210. * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
  211. * could never contain a header.
  212. */
  213. int
  214. tar_trail (char *buf,
  215. int in_resync, int *cnt)
  216. {
  217. register int i;
  218. /*
  219. * look for all zero, trailer is two consecutive blocks of zero
  220. */
  221. for (i = 0; i < BLKMULT; ++i)
  222. {
  223. if (buf[i] != '\0')
  224. break;
  225. }
  226. /*
  227. * if not all zero it is not a trailer, but MIGHT be a header.
  228. */
  229. if (i != BLKMULT)
  230. return (-1);
  231. /*
  232. * When given a zero block, we must be careful!
  233. * If we are not in resync mode, check for the trailer. Have to watch
  234. * out that we do not mis-identify file data as the trailer, so we do
  235. * NOT try to id a trailer during resync mode. During resync mode we
  236. * might as well throw this block out since a valid header can NEVER be
  237. * a block of all 0 (we must have a valid file name).
  238. */
  239. if (!in_resync && (++*cnt >= NULLCNT))
  240. return (0);
  241. return (1);
  242. }
  243. /*
  244. * tar_chksm()
  245. * calculate the checksum for a tar block counting the checksum field as
  246. * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
  247. * NOTE: we use len to short circuit summing 0's on write since we ALWAYS
  248. * pad headers with 0.
  249. * Return:
  250. * unsigned long checksum
  251. */
  252. static unsigned long
  253. tar_chksm (char *blk, int len)
  254. {
  255. char *stop;
  256. char *pt;
  257. unsigned int chksm = BLNKSUM; /* initial value is checksum field sum */
  258. /*
  259. * add the part of the block before the checksum field
  260. */
  261. pt = blk;
  262. stop = blk + CHK_OFFSET;
  263. while (pt < stop)
  264. chksm += (*pt++ & 0xff);
  265. /*
  266. * move past the checksum field and keep going, spec counts the
  267. * checksum field as the sum of 8 blanks (which is pre-computed as
  268. * BLNKSUM).
  269. * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
  270. * starts, no point in summing zero's)
  271. */
  272. pt += CHK_LEN;
  273. stop = blk + len;
  274. while (pt < stop)
  275. chksm += (*pt++ & 0xff);
  276. return chksm;
  277. }
  278. /*
  279. * ustar_id()
  280. * determine if a block given to us is a valid ustar header. We have to
  281. * be on the lookout for those pesky blocks of all zero's
  282. * Return:
  283. * 0 if a ustar header, -1 otherwise
  284. */
  285. int
  286. ustar_id (char *blk, size_t size)
  287. {
  288. HD_USTAR *hd;
  289. if (size < BLKMULT)
  290. return (-1);
  291. hd = (HD_USTAR *) blk;
  292. /*
  293. * check for block of zero's first, a simple and fast test then check
  294. * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
  295. * programs are fouled up and create archives missing the \0. Last we
  296. * check the checksum. If ok we have to assume it is a valid header.
  297. */
  298. if (hd->name[0] == '\0')
  299. return (-1);
  300. if (strncmp (hd->magic, TMAGIC, TMAGLEN - 1) != 0)
  301. return (-1);
  302. if (asc_ul (hd->chksum, sizeof (hd->chksum), OCT) !=
  303. tar_chksm (blk, BLKMULT))
  304. return (-1);
  305. return (0);
  306. }
  307. /*
  308. * Routines for reading tar files
  309. // Source: http://www.mkssoftware.com/docs/man4/pax.4.asp
  310. struct file_header { // PAX header is similar as file_header and can be completely ignored
  311. unsigned char[100] name;
  312. unsigned char[8] mode;
  313. unsigned char[8] uid; // unused
  314. unsigned char[8] gid; // unused
  315. unsigned char[12] size; // 0 if file is a link
  316. unsigned char[12] mtime;
  317. unsigned char[8] chksum;
  318. unsigned char[1] typeflag;
  319. unsigned char[100] linkname;
  320. unsigned char[6] magic; //ustar
  321. unsigned char[2] version; // 00
  322. unsigned char[32] uname; // unused
  323. unsigned char[32] gname; // unused
  324. unsigned char[8] devmajor; // unused ?
  325. unsigned char[8] devminor; // unused ?
  326. unsigned char[155] prefix; // only used for files > 100 characters. could be unused ?
  327. };
  328. enum {
  329. TYPE_REGULAR, //0
  330. TYPE_ARCHIVE_LINK, //1
  331. TYPE_SYMLINK, //2
  332. TYPE_CHARACTER_DEVICE, //3
  333. TYPE_BLOCK_DEVICE, //4
  334. TYPE_DIRECTORY, //5
  335. TYPE_FIFO, //6
  336. // Other types:
  337. TYPE_EXTENDED_USAGE, //xxxxx
  338. // A-Z are available for custom usage
  339. };
  340. // Extended attribute:
  341. // length keyword=value
  342. // atime, charset, comment, gname, linkpath, mtime, path, size, uname
  343. */
  344. enum {
  345. NEED_NOTHING,
  346. NEED_SKIP,
  347. NEED_SKIP_FILE, // distinguish between skipped file and unwanted blocks (extended headers etc)
  348. NEED_READ,
  349. NEED_SYNC_TRAIL,
  350. INVALID_HEADER,
  351. MEMORY_ALLOC_FAILED,
  352. };
  353. /*
  354. * ustar_rd()
  355. * extract the values out of block already determined to be a ustar header.
  356. * store the values in the ARCHD parameter.
  357. * Return:
  358. * 0
  359. */
  360. int n_dirs = 0;
  361. char ** dirs_headers_sent = NULL;
  362. int
  363. ustar_rd (int fd, struct file_header * untrusted_hdr, char *buf, struct stat * sb, struct filters *filters)
  364. {
  365. register HD_USTAR *hd;
  366. register char *dest;
  367. register int cnt = 0;
  368. int ret;
  369. int i;
  370. int should_extract;
  371. /* DISABLED: unused
  372. dev_t devmajor;
  373. dev_t devminor;
  374. */
  375. /*
  376. * we only get proper sized buffers
  377. */
  378. #ifdef DEBUG
  379. fprintf(stderr,"Checking if valid header\n");
  380. #endif
  381. if (ustar_id (buf, BLKMULT) < 0) {
  382. #ifdef DEBUG
  383. fprintf (stderr, "Invalid header\n");
  384. #endif
  385. return INVALID_HEADER;
  386. }
  387. #ifdef DEBUG
  388. fprintf(stderr,"Valid header!\n");
  389. #endif
  390. /* DISABLED: Internal to PAX
  391. arcn->org_name = arcn->name;
  392. arcn->sb.st_nlink = 1;
  393. arcn->pat = NULL;
  394. arcn->nlen = 0;
  395. */
  396. untrusted_hdr->namelen = 0;
  397. hd = (HD_USTAR *) buf;
  398. /*
  399. * see if the filename is split into two parts. if, so joint the parts.
  400. * we copy the prefix first and add a / between the prefix and name.
  401. */
  402. dest = untrusted_namebuf;
  403. if (*(hd->prefix) != '\0')
  404. {
  405. cnt = strlen(strncpy (dest, hd->prefix,
  406. MIN(sizeof (untrusted_namebuf) - 1,TPFSZ+1)));
  407. dest += cnt;
  408. *dest++ = '/';
  409. cnt++;
  410. }
  411. if (gnu_hack_string)
  412. {
  413. untrusted_hdr->namelen = cnt + strlen(strncpy (dest, gnu_hack_string,
  414. MIN(TNMSZ+1, sizeof (untrusted_namebuf) - cnt)));
  415. free(gnu_hack_string);
  416. gnu_hack_string = NULL;
  417. } else
  418. untrusted_hdr->namelen = cnt + strlen(strncpy (dest, hd->name,
  419. MIN(TNMSZ+1, sizeof (untrusted_namebuf) - cnt)));
  420. // qfile count the \0 in the namelen
  421. untrusted_hdr->namelen += 1;
  422. #ifdef DEBUG
  423. fprintf(stderr,"Retrieved name len: %d\n",untrusted_hdr->namelen);
  424. fprintf(stderr,"Retrieved name: %s\n",untrusted_namebuf);
  425. #endif
  426. /*
  427. * follow the spec to the letter. we should only have mode bits, strip
  428. * off all other crud we may be passed.
  429. */
  430. sb->st_mode = (mode_t) (asc_ul (hd->mode, sizeof (hd->mode), OCT) &
  431. 0xfff);
  432. untrusted_hdr->mode = sb->st_mode;
  433. #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64
  434. sb->st_size = (off_t) asc_uqd (hd->size, sizeof (hd->size), OCT);
  435. #else
  436. sb->st_size = (off_t) asc_ul (hd->size, sizeof (hd->size), OCT);
  437. #endif
  438. untrusted_hdr->filelen = sb->st_size;
  439. untrusted_hdr->atime = (time_t) asc_ul (hd->mtime, sizeof (hd->mtime), OCT);
  440. untrusted_hdr->mtime = untrusted_hdr->atime;
  441. untrusted_hdr->atime_nsec = untrusted_hdr->mtime_nsec = 0;
  442. sb->st_mtime = (time_t) asc_ul (hd->mtime, sizeof (hd->mtime), OCT);
  443. sb->st_ctime = sb->st_atime = sb->st_mtime;
  444. /*
  445. * If we can find the ascii names for gname and uname in the password
  446. * and group files we will use the uid's and gid they bind. Otherwise
  447. * we use the uid and gid values stored in the header. (This is what
  448. * the posix spec wants).
  449. */
  450. /* DISABLED: unused
  451. hd->gname[sizeof (hd->gname) - 1] = '\0';
  452. if (gid_name (hd->gname, &(arcn->sb.st_gid)) < 0)
  453. arcn->sb.st_gid = (gid_t) asc_ul (hd->gid, sizeof (hd->gid), OCT);
  454. hd->uname[sizeof (hd->uname) - 1] = '\0';
  455. if (uid_name (hd->uname, &(arcn->sb.st_uid)) < 0)
  456. arcn->sb.st_uid = (uid_t) asc_ul (hd->uid, sizeof (hd->uid), OCT);
  457. */
  458. /*
  459. * set the defaults, these may be changed depending on the file type
  460. */
  461. /* Disabled: pax specific
  462. arcn->ln_name[0] = '\0';
  463. arcn->ln_nlen = 0;
  464. arcn->pad = 0;
  465. arcn->skip = 0;
  466. arcn->sb.st_rdev = (dev_t) 0;
  467. */
  468. /*
  469. * set the mode and PAX type according to the typeflag in the header
  470. */
  471. switch (hd->typeflag)
  472. {
  473. case FIFOTYPE:
  474. #ifdef DEBUG
  475. fprintf(stderr,"File is FIFOTYPE\n");
  476. #endif
  477. /* DISABLED: unused
  478. arcn->type = PAX_FIF;
  479. arcn->sb.st_mode |= S_IFIFO;
  480. */
  481. break;
  482. case DIRTYPE:
  483. #ifdef DEBUG
  484. fprintf(stderr,"File is DIRTYPE\n");
  485. #endif
  486. /* DISABLED: unused
  487. arcn->type = PAX_DIR;
  488. arcn->sb.st_mode |= S_IFDIR;
  489. arcn->sb.st_nlink = 2;
  490. */
  491. /*
  492. * Some programs that create ustar archives append a '/'
  493. * to the pathname for directories. This clearly violates
  494. * ustar specs, but we will silently strip it off anyway.
  495. */
  496. /*
  497. if (arcn->name[arcn->nlen - 1] == '/')
  498. arcn->name[--arcn->nlen] = '\0';
  499. */
  500. break;
  501. case BLKTYPE:
  502. #ifdef DEBUG
  503. fprintf(stderr,"File is BLKTYPE\n");
  504. #endif
  505. break;
  506. case CHRTYPE:
  507. #ifdef DEBUG
  508. fprintf(stderr,"File is CHRTYPE\n");
  509. #endif
  510. /*
  511. * this type requires the rdev field to be set.
  512. */
  513. if (hd->typeflag == BLKTYPE)
  514. {
  515. /*
  516. arcn->type = PAX_BLK;
  517. arcn->sb.st_mode |= S_IFBLK;
  518. */
  519. }
  520. else
  521. {
  522. /*
  523. arcn->type = PAX_CHR;
  524. arcn->sb.st_mode |= S_IFCHR;
  525. */
  526. }
  527. /* DISABLED: unused
  528. devmajor = (dev_t) asc_ul (hd->devmajor, sizeof (hd->devmajor), OCT);
  529. devminor = (dev_t) asc_ul (hd->devminor, sizeof (hd->devminor), OCT);
  530. */
  531. // arcn->sb.st_rdev = TODEV (devmajor, devminor);
  532. break;
  533. case SYMTYPE:
  534. #ifdef DEBUG
  535. fprintf(stderr,"File is SYMTYPE\n");
  536. #endif
  537. break;
  538. case LNKTYPE:
  539. #ifdef DEBUG
  540. fprintf(stderr,"File is LNKTYPE\n");
  541. #endif
  542. if (hd->typeflag == SYMTYPE)
  543. {
  544. // arcn->type = PAX_SLK;
  545. // arcn->sb.st_mode |= S_IFLNK;
  546. }
  547. else
  548. {
  549. // arcn->type = PAX_HLK;
  550. /*
  551. * so printing looks better
  552. */
  553. // arcn->sb.st_mode |= S_IFREG;
  554. // arcn->sb.st_nlink = 2;
  555. }
  556. /*
  557. * copy the link name
  558. */
  559. // arcn->ln_nlen = strlcpy (arcn->ln_name, hd->linkname,
  560. // MIN(TNMSZ+1,sizeof (arcn->ln_name)));
  561. break;
  562. case LONGLINKTYPE:
  563. #ifdef DEBUG
  564. fprintf(stderr,"File is LONGLINKTYPE\n");
  565. #endif
  566. break;
  567. case LONGNAMETYPE:
  568. #ifdef DEBUG
  569. fprintf(stderr,"File is LONGNAMETYPE\n");
  570. #endif
  571. /*
  572. * GNU long link/file; we tag these here and let the
  573. * pax internals deal with it -- too ugly otherwise.
  574. */
  575. // arcn->type =
  576. // hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
  577. // arcn->pad = TAR_PAD(arcn->sb.st_size);
  578. // arcn->skip = arcn->sb.st_size;
  579. // arcn->ln_name[0] = '\0';
  580. // arcn->ln_nlen = 0;
  581. break;
  582. case CONTTYPE:
  583. #ifdef DEBUG
  584. fprintf(stderr,"File is CONTTYPE\n");
  585. #endif
  586. break;
  587. case AREGTYPE:
  588. #ifdef DEBUG
  589. fprintf(stderr,"File is AREGTYPE\n");
  590. #endif
  591. break;
  592. case REGTYPE:
  593. #ifdef DEBUG
  594. fprintf(stderr,"File is REGTYPE of size %ld\n",sb->st_size);
  595. #endif
  596. // Check if user want to extract this file
  597. should_extract = 1;
  598. for (i=0; i < filters->filters_count; i++) {
  599. should_extract = 0;
  600. #ifdef DEBUG
  601. fprintf(stderr, "Comparing with filter %s\n", filters->filters[i]);
  602. #endif
  603. if (strncmp(untrusted_namebuf, filters->filters[i], strlen(filters->filters[i])) == 0) {
  604. #ifdef DEBUG
  605. fprintf(stderr, "Match (%d)\n", filters->filters_matches[i]);
  606. #endif
  607. should_extract = 1;
  608. filters->filters_matches[i]++;
  609. if (filters->filters_matches[i] == 1) {
  610. // first match
  611. filters->matched_filters++;
  612. }
  613. break;
  614. }
  615. }
  616. if (should_extract != 1) {
  617. #ifdef DEBUG
  618. fprintf(stderr, "File should be filtered.. Skipping\n");
  619. #endif
  620. return NEED_SKIP_FILE;
  621. }
  622. // Create a copy of untrusted_namebuf to be used for strtok
  623. char * dirbuf;
  624. dirbuf = malloc(sizeof (char) * (untrusted_hdr->namelen));
  625. if (dirbuf == NULL)
  626. return MEMORY_ALLOC_FAILED;
  627. dirbuf = strncpy(dirbuf, untrusted_namebuf, untrusted_hdr->namelen);
  628. int i = 0;
  629. int dir_found = 0;
  630. size_t pathsize = 0;
  631. char * path = NULL;
  632. struct file_header dir_header;
  633. // Split the path in directories and recompose it incrementally
  634. char * last_token = strtok(dirbuf,"/");
  635. char * token = strtok(NULL, "/");
  636. while (token != NULL) {
  637. #ifdef DEBUG
  638. fprintf(stderr,"Found directory %s (last:%s)\n",token,last_token);
  639. #endif
  640. // Recompose the path based on last discovered directory
  641. if (path == NULL) {
  642. path = malloc(sizeof (char) * (strlen(last_token)+1));
  643. if (path == NULL)
  644. return MEMORY_ALLOC_FAILED;
  645. path = strncpy(path, last_token, strlen(last_token));
  646. path[strlen(last_token)] = '\0';
  647. } else {
  648. pathsize = strlen(path);
  649. path = realloc(path, sizeof (char) * (strlen(path)+1+strlen(last_token)+1));
  650. if (path == NULL)
  651. return MEMORY_ALLOC_FAILED;
  652. path[pathsize] = '/';
  653. strncpy(path+pathsize+1, last_token, strlen(last_token));
  654. path[pathsize+strlen(last_token)+1] = '\0';
  655. }
  656. #ifdef DEBUG
  657. fprintf(stderr,"Path is %s\n",path);
  658. #endif
  659. #ifdef DEBUG
  660. fprintf(stderr,"Checking from i=0 i<%d\n",n_dirs);
  661. #endif
  662. // Verify if qfile headers for the current path have already been sent based on the dirs_headers_sent table
  663. dir_found = 0;
  664. for (i = 0; i < n_dirs; ++i) {
  665. #ifdef DEBUG
  666. fprintf(stderr,"Comparing with %d %d %s %s\n",i,n_dirs,dirs_headers_sent[i],path);
  667. #endif
  668. if (strcmp(dirs_headers_sent[i],path)==0) {
  669. #ifdef DEBUG
  670. fprintf(stderr,"Directory headers already sent\n");
  671. #endif
  672. dir_found=1;
  673. }
  674. }
  675. if (dir_found == 0) {
  676. // Register the current path as being sent in the dirs_headers_sent table
  677. #ifdef DEBUG
  678. fprintf(stderr,"Inserting %s into register\n",path);
  679. #endif
  680. dirs_headers_sent = realloc(dirs_headers_sent, sizeof (char*) * n_dirs++);
  681. if (dirs_headers_sent == NULL)
  682. return MEMORY_ALLOC_FAILED;
  683. dirs_headers_sent[n_dirs-1] = malloc(sizeof (char) * (strlen(path)+1));
  684. if (dirs_headers_sent[n_dirs-1] == NULL)
  685. return MEMORY_ALLOC_FAILED;
  686. strncpy(dirs_headers_sent[n_dirs-1], path, strlen(path)+1);
  687. // Initialize the qfile headers for the current directory path
  688. dir_header.namelen = strlen(path)+1;
  689. dir_header.atime = untrusted_hdr->atime;
  690. dir_header.atime_nsec = untrusted_hdr->atime_nsec;
  691. dir_header.mtime = untrusted_hdr->mtime;
  692. dir_header.mtime_nsec = untrusted_hdr->mtime_nsec;
  693. dir_header.mode = untrusted_hdr->mode | S_IFDIR;
  694. dir_header.filelen = 0;
  695. #ifdef DEBUG
  696. fprintf(stderr,"Sending directory headers for %s\n",path);
  697. #endif
  698. // Send the qfile headers for the current directory path
  699. write_headers(&dir_header, path);
  700. }
  701. last_token = token;
  702. token = strtok(NULL, "/");
  703. }
  704. free(path);
  705. free(dirbuf);
  706. #ifdef DEBUG
  707. fprintf(stderr,"End of directory checks\n");
  708. #endif
  709. // Restore POSIX stat file mode (because PAX format use its own file type)
  710. untrusted_hdr->mode |= S_IFREG;
  711. #ifdef DEBUG
  712. fprintf(stderr,"Writing file header\n");
  713. #endif
  714. // Send header and file content
  715. write_headers(untrusted_hdr, untrusted_namebuf);
  716. #ifdef DEBUG
  717. fprintf(stderr,"Writing file content\n");
  718. #endif
  719. ret = copy_file(1, fd, untrusted_hdr->filelen, &crc32_sum);
  720. #ifdef DEBUG
  721. fprintf(stderr,"Copyfile returned with error %d\n",ret);
  722. #endif
  723. if (ret != COPY_FILE_OK) {
  724. if (ret != COPY_FILE_WRITE_ERROR)
  725. gui_fatal("Copying file %s: %s", untrusted_namebuf,
  726. copy_file_status_to_str(ret));
  727. else {
  728. fprintf(stderr,"UNKNOWN ERROR RETURN STATUS:%d\n.. Waiting...\n",ret);
  729. set_block(0);
  730. wait_for_result();
  731. exit(1);
  732. }
  733. }
  734. // Extract extra padding
  735. #ifdef DEBUG
  736. fprintf(stderr,"Need to remove pad:%lld %lld\n",untrusted_hdr->filelen,BLKMULT-(untrusted_hdr->filelen%BLKMULT));
  737. #endif
  738. if (untrusted_hdr->filelen%BLKMULT > 0)
  739. ret = read(fd, buf, BLKMULT-(untrusted_hdr->filelen%BLKMULT));
  740. #ifdef DEBUG
  741. fprintf(stderr,"Removed %d bytes of padding\n",ret);
  742. #endif
  743. // Resync trailing headers in order to find next file chunck in the tar file
  744. return NEED_SYNC_TRAIL;
  745. break;
  746. case EXTHEADERTYPE:
  747. #ifdef DEBUG
  748. fprintf(stderr,"Extended HEADER encountered\n");
  749. #endif
  750. return NEED_SKIP;
  751. break;
  752. default:
  753. #ifdef DEBUG
  754. fprintf(stderr,"Default type detected:%c\n",hd->typeflag);
  755. #endif
  756. return NEED_SKIP;
  757. /*
  758. * these types have file data that follows. Set the skip and
  759. * pad fields.
  760. */
  761. // arcn->type = PAX_REG;
  762. // arcn->pad = TAR_PAD (arcn->sb.st_size);
  763. // arcn->skip = arcn->sb.st_size;
  764. // arcn->sb.st_mode |= S_IFREG;
  765. break;
  766. }
  767. return NEED_SKIP;
  768. }
  769. void tar_file_processor(int fd, struct filters *filters)
  770. {
  771. int ret;
  772. int i;
  773. int current;
  774. struct file_header hdr;
  775. struct stat sb; /* stat buffer see stat(2) */
  776. char buf[BLKMULT+1];
  777. size_t size;
  778. i=0;
  779. current = NEED_READ;
  780. size_t to_skip = 0;
  781. int sync_count = 0;
  782. while ((size = read(fd, &buf, BLKMULT))) {
  783. if (size != -1) {
  784. #ifdef DEBUG
  785. fprintf(stderr,"Read %ld bytes\n",size);
  786. #endif
  787. ret = 0;
  788. if (current==NEED_SYNC_TRAIL) {
  789. ret = tar_trail (buf, 1, &sync_count);
  790. #ifdef DEBUG
  791. fprintf(stderr,"Synchronizing trail: %d %d\n",ret,sync_count);
  792. #endif
  793. if (ret != 1) {
  794. current = NEED_READ;
  795. }
  796. }
  797. if (current==NEED_READ) {
  798. current = ustar_rd(fd, &hdr, buf, &sb, filters);
  799. #ifdef DEBUG
  800. fprintf(stderr,"Return %d\n",ret);
  801. #endif
  802. }
  803. if (current==NEED_SKIP || current==NEED_SKIP_FILE) {
  804. if (current==NEED_SKIP_FILE &&
  805. filters->filters_count > 0 &&
  806. filters->filters_count == filters->matched_filters) {
  807. // This assume that either:
  808. // a) files are sorted (using full path as sort key)
  809. // b) all the directory content is in
  810. // consecutive block and only directories
  811. // are given as filters
  812. // This is true for backups prepared by qvm-backup
  813. #ifdef DEBUG
  814. fprintf(stderr, "All filters matched at least once - assuming end of requested data\n");
  815. #endif
  816. return;
  817. }
  818. #ifdef DEBUG
  819. fprintf(stderr,"Need to skip %lld bytes (matched filters %d < %d)\n",
  820. hdr.filelen, filters->matched_filters, filters->filters_count);
  821. #endif
  822. to_skip = hdr.filelen;
  823. while (to_skip > 0) {
  824. to_skip -= read(fd, &buf, MIN(to_skip,BLKMULT));
  825. }
  826. // Extract extra padding
  827. #ifdef DEBUG
  828. fprintf(stderr,"Need to remove pad:%ld %lld %lld\n",to_skip,hdr.filelen,BLKMULT-(hdr.filelen%BLKMULT));
  829. #endif
  830. if (hdr.filelen%BLKMULT > 0) {
  831. ret = read(fd, &buf, BLKMULT-(hdr.filelen%BLKMULT));
  832. #ifdef DEBUG
  833. fprintf(stderr,"Removed %d bytes of padding\n",ret);
  834. #endif
  835. }
  836. current = NEED_SYNC_TRAIL;
  837. }
  838. i++;
  839. }
  840. //if (i >= 10)
  841. // exit(0);
  842. }
  843. }
  844. int main(int argc, char **argv)
  845. {
  846. int i;
  847. char *entry;
  848. int fd;
  849. int use_stdin = 1;
  850. struct filters filters;
  851. signal(SIGPIPE, SIG_IGN);
  852. // this will allow checking for possible feedback packet in the middle of transfer
  853. // if disabled, the copy_file process could hang
  854. notify_progress(0, PROGRESS_FLAG_INIT);
  855. //set_size_limit(1500000000, 2048);
  856. crc32_sum = 0;
  857. /* when extracting backup header, dom0 will terminate the transfer with
  858. * EDQUOT just after getting qubes.xml */
  859. ignore_quota_error = 1;
  860. for (i = 1; i < argc; i++) {
  861. set_nonblock(0);
  862. if (strcmp(argv[i], "--ignore-symlinks")==0) {
  863. ignore_symlinks = 1;
  864. continue;
  865. } else if (strcmp(argv[i], "-")==0) {
  866. use_stdin = 1;
  867. i++;
  868. break;
  869. } else {
  870. // Parse tar file
  871. use_stdin = 0;
  872. entry = argv[i];
  873. #ifdef DEBUG
  874. fprintf(stderr,"Parsing file %s\n",entry);
  875. #endif
  876. fd = open(entry, O_RDONLY);
  877. if (fd < 0) {
  878. fprintf(stderr,"Error opening file %s\n",entry);
  879. exit(2);
  880. }
  881. i++;
  882. break;
  883. }
  884. }
  885. filters.filters_count = argc-i;
  886. filters.filters = argv+i;
  887. filters.filters_matches = calloc(filters.filters_count, sizeof(int));
  888. if (filters.filters_matches == NULL) {
  889. perror("calloc");
  890. exit(1);
  891. }
  892. filters.matched_filters = 0;
  893. if (use_stdin == 1) {
  894. #ifdef DEBUG
  895. fprintf(stderr,"Using STDIN\n");
  896. #endif
  897. set_block(0);
  898. fd = 0;
  899. }
  900. tar_file_processor(fd, &filters);
  901. notify_end_and_wait_for_result();
  902. notify_progress(0, PROGRESS_FLAG_DONE);
  903. return 0;
  904. }