tar2qfile.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028
  1. /* $OpenBSD: tar.h,v 1.7 2003/06/02 23:32:09 millert Exp $ */
  2. /* $NetBSD: tar.h,v 1.3 1995/03/21 09:07:51 cgd Exp $ */
  3. /*-
  4. * Copyright (c) 1992 Keith Muller.
  5. * Copyright (c) 1992, 1993
  6. * The Regents of the University of California. All rights reserved.
  7. *
  8. * This code is derived from software contributed to Berkeley by
  9. * Keith Muller of the University of California, San Diego.
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. * 1. Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * 2. Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in the
  18. * documentation and/or other materials provided with the distribution.
  19. * 3. Neither the name of the University nor the names of its contributors
  20. * may be used to endorse or promote products derived from this software
  21. * without specific prior written permission.
  22. *
  23. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  24. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  27. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  29. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  30. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  32. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  33. * SUCH DAMAGE.
  34. *
  35. * @(#)tar.h 8.2 (Berkeley) 4/18/94
  36. */
  37. #define _GNU_SOURCE /* For O_NOFOLLOW. */
  38. #include <errno.h>
  39. #include <fcntl.h>
  40. #include <sys/time.h>
  41. #include <sys/stat.h>
  42. #include <stdlib.h>
  43. #include <unistd.h>
  44. #include <stdio.h>
  45. #include <libqubes-rpc-filecopy.h>
  46. #include <string.h>
  47. #include <qfile-utils.h>
  48. // #define DEBUG
  49. /***************************************************
  50. * Most routines extracted from the PAX project (tar.c...) *
  51. ***************************************************/
  52. /*
  53. * BSD PAX global data structures and constants.
  54. */
  55. #define MAXBLK 64512 /* MAX blocksize supported (posix SPEC) */
  56. /* WARNING: increasing MAXBLK past 32256 */
  57. /* will violate posix spec. */
  58. #define MAXBLK_POSIX 32256 /* MAX blocksize supported as per POSIX */
  59. #define BLKMULT 512 /* blocksize must be even mult of 512 bytes */
  60. /* Don't even think of changing this */
  61. #define DEVBLK 8192 /* default read blksize for devices */
  62. #define FILEBLK 10240 /* default read blksize for files */
  63. #define PAXPATHLEN 3072 /* maximium path length for pax. MUST be */
  64. /*
  65. * defines and data structures common to all tar formats
  66. */
  67. #define CHK_LEN 8 /* length of checksum field */
  68. #define TNMSZ 100 /* size of name field */
  69. #define NULLCNT 2 /* number of null blocks in trailer */
  70. #define CHK_OFFSET 148 /* start of chksum field */
  71. #define BLNKSUM 256L /* sum of checksum field using ' ' */
  72. /*
  73. * General Defines
  74. */
  75. #define HEX 16
  76. #define OCT 8
  77. #define _PAX_ 1
  78. #define _TFILE_BASE "paxXXXXXXXXXX"
  79. /*
  80. * General Macros
  81. */
  82. #ifndef MIN
  83. #define MIN(a,b) (((a)<(b))?(a):(b))
  84. #endif
  85. #ifndef MAX
  86. #define MAX(a,b) (((a)>(b))?(a):(b))
  87. #endif
  88. #define MAJOR(x) major(x)
  89. #define MINOR(x) minor(x)
  90. #define TODEV(x, y) makedev((x), (y))
  91. /*
  92. * Values used in typeflag field in all tar formats
  93. * (only REGTYPE, LNKTYPE and SYMTYPE are used in old bsd tar headers)
  94. */
  95. #define REGTYPE '0' /* Regular File */
  96. #define AREGTYPE '\0' /* Regular File */
  97. #define LNKTYPE '1' /* Link */
  98. #define SYMTYPE '2' /* Symlink */
  99. #define CHRTYPE '3' /* Character Special File */
  100. #define BLKTYPE '4' /* Block Special File */
  101. #define DIRTYPE '5' /* Directory */
  102. #define FIFOTYPE '6' /* FIFO */
  103. #define CONTTYPE '7' /* high perf file */
  104. /*
  105. * GNU tar compatibility;
  106. */
  107. #define LONGLINKTYPE 'K' /* Long Symlink */
  108. #define LONGNAMETYPE 'L' /* Long File */
  109. #define EXTHEADERTYPE 'x' /* Extended header */
  110. /*
  111. * Pad with a bit mask, much faster than doing a mod but only works on powers
  112. * of 2. Macro below is for block of 512 bytes.
  113. */
  114. #define TAR_PAD(x) ((512 - ((x) & 511)) & 511)
  115. /*
  116. * Data Interchange Format - Extended tar header format - POSIX 1003.1-1990
  117. */
  118. #define TPFSZ 155
  119. #define TMAGIC "ustar" /* ustar and a null */
  120. #define TMAGLEN 6
  121. #define TVERSION "00" /* 00 and no null */
  122. #define TVERSLEN 2
  123. typedef struct {
  124. char name[TNMSZ]; /* name of entry */
  125. char mode[8]; /* mode */
  126. char uid[8]; /* uid */
  127. char gid[8]; /* gid */
  128. char size[12]; /* size */
  129. char mtime[12]; /* modification time */
  130. char chksum[CHK_LEN]; /* checksum */
  131. char typeflag; /* type of file. */
  132. char linkname[TNMSZ]; /* linked to name */
  133. char magic[TMAGLEN]; /* magic cookie */
  134. char version[TVERSLEN]; /* version */
  135. char uname[32]; /* ascii owner name */
  136. char gname[32]; /* ascii group name */
  137. char devmajor[8]; /* major device number */
  138. char devminor[8]; /* minor device number */
  139. char prefix[TPFSZ]; /* linked to name */
  140. } HD_USTAR;
  141. /*
  142. * Routines for manipulating headers, trailers:
  143. * asc_ul()
  144. * tar_trail()
  145. * tar_chksm()
  146. * ustar_id()
  147. */
  148. static unsigned long tar_chksm (char *, int);
  149. char *gnu_hack_string; /* GNU ././@LongLink hackery */
  150. char untrusted_namebuf[MAX_PATH_LENGTH];
  151. int use_seek = 1;
  152. extern int ignore_quota_error;
  153. struct filters {
  154. int filters_count;
  155. char **filters;
  156. int *filters_matches;
  157. int matched_filters;
  158. };
  159. /*
  160. * asc_ul()
  161. * convert hex/octal character string into a u_long. We do not have to
  162. * check for overflow! (the headers in all supported formats are not large
  163. * enough to create an overflow).
  164. * NOTE: strings passed to us are NOT TERMINATED.
  165. * Return:
  166. * unsigned long value
  167. */
  168. u_long
  169. asc_ul (char *str, int len, int base)
  170. {
  171. char *stop;
  172. u_long tval = 0;
  173. stop = str + len;
  174. /*
  175. * skip over leading blanks and zeros
  176. */
  177. while ((str < stop) && ((*str == ' ') || (*str == '0')))
  178. ++str;
  179. /*
  180. * for each valid digit, shift running value (tval) over to next digit
  181. * and add next digit
  182. */
  183. if (base == HEX)
  184. {
  185. while (str < stop)
  186. {
  187. if ((*str >= '0') && (*str <= '9'))
  188. tval = (tval << 4) + (*str++ - '0');
  189. else if ((*str >= 'A') && (*str <= 'F'))
  190. tval = (tval << 4) + 10 + (*str++ - 'A');
  191. else if ((*str >= 'a') && (*str <= 'f'))
  192. tval = (tval << 4) + 10 + (*str++ - 'a');
  193. else
  194. break;
  195. }
  196. }
  197. else
  198. {
  199. while ((str < stop) && (*str >= '0') && (*str <= '7'))
  200. tval = (tval << 3) + (*str++ - '0');
  201. }
  202. return (tval);
  203. }
  204. /*
  205. * tar_trail()
  206. * Called to determine if a header block is a valid trailer. We are passed
  207. * the block, the in_sync flag (which tells us we are in resync mode;
  208. * looking for a valid header), and cnt (which starts at zero) which is
  209. * used to count the number of empty blocks we have seen so far.
  210. * Return:
  211. * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
  212. * could never contain a header.
  213. */
  214. int
  215. tar_trail (char *buf,
  216. int in_resync, int *cnt)
  217. {
  218. register int i;
  219. /*
  220. * look for all zero, trailer is two consecutive blocks of zero
  221. */
  222. for (i = 0; i < BLKMULT; ++i)
  223. {
  224. if (buf[i] != '\0')
  225. break;
  226. }
  227. /*
  228. * if not all zero it is not a trailer, but MIGHT be a header.
  229. */
  230. if (i != BLKMULT)
  231. return (-1);
  232. /*
  233. * When given a zero block, we must be careful!
  234. * If we are not in resync mode, check for the trailer. Have to watch
  235. * out that we do not mis-identify file data as the trailer, so we do
  236. * NOT try to id a trailer during resync mode. During resync mode we
  237. * might as well throw this block out since a valid header can NEVER be
  238. * a block of all 0 (we must have a valid file name).
  239. */
  240. if (!in_resync && (++*cnt >= NULLCNT))
  241. return (0);
  242. return (1);
  243. }
  244. /*
  245. * tar_chksm()
  246. * calculate the checksum for a tar block counting the checksum field as
  247. * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
  248. * NOTE: we use len to short circuit summing 0's on write since we ALWAYS
  249. * pad headers with 0.
  250. * Return:
  251. * unsigned long checksum
  252. */
  253. static unsigned long
  254. tar_chksm (char *blk, int len)
  255. {
  256. char *stop;
  257. char *pt;
  258. unsigned int chksm = BLNKSUM; /* initial value is checksum field sum */
  259. /*
  260. * add the part of the block before the checksum field
  261. */
  262. pt = blk;
  263. stop = blk + CHK_OFFSET;
  264. while (pt < stop)
  265. chksm += (*pt++ & 0xff);
  266. /*
  267. * move past the checksum field and keep going, spec counts the
  268. * checksum field as the sum of 8 blanks (which is pre-computed as
  269. * BLNKSUM).
  270. * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
  271. * starts, no point in summing zero's)
  272. */
  273. pt += CHK_LEN;
  274. stop = blk + len;
  275. while (pt < stop)
  276. chksm += (*pt++ & 0xff);
  277. return chksm;
  278. }
  279. /*
  280. * ustar_id()
  281. * determine if a block given to us is a valid ustar header. We have to
  282. * be on the lookout for those pesky blocks of all zero's
  283. * Return:
  284. * 0 if a ustar header, -1 otherwise
  285. */
  286. int
  287. ustar_id (char *blk, size_t size)
  288. {
  289. HD_USTAR *hd;
  290. if (size < BLKMULT)
  291. return (-1);
  292. hd = (HD_USTAR *) blk;
  293. /*
  294. * check for block of zero's first, a simple and fast test then check
  295. * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
  296. * programs are fouled up and create archives missing the \0. Last we
  297. * check the checksum. If ok we have to assume it is a valid header.
  298. */
  299. if (hd->name[0] == '\0')
  300. return (-1);
  301. if (strncmp (hd->magic, TMAGIC, TMAGLEN - 1) != 0)
  302. return (-1);
  303. if (asc_ul (hd->chksum, sizeof (hd->chksum), OCT) !=
  304. tar_chksm (blk, BLKMULT))
  305. return (-1);
  306. return (0);
  307. }
  308. /*
  309. * Routines for reading tar files
  310. // Source: http://www.mkssoftware.com/docs/man4/pax.4.asp
  311. struct file_header { // PAX header is similar as file_header and can be completely ignored
  312. unsigned char[100] name;
  313. unsigned char[8] mode;
  314. unsigned char[8] uid; // unused
  315. unsigned char[8] gid; // unused
  316. unsigned char[12] size; // 0 if file is a link
  317. unsigned char[12] mtime;
  318. unsigned char[8] chksum;
  319. unsigned char[1] typeflag;
  320. unsigned char[100] linkname;
  321. unsigned char[6] magic; //ustar
  322. unsigned char[2] version; // 00
  323. unsigned char[32] uname; // unused
  324. unsigned char[32] gname; // unused
  325. unsigned char[8] devmajor; // unused ?
  326. unsigned char[8] devminor; // unused ?
  327. unsigned char[155] prefix; // only used for files > 100 characters. could be unused ?
  328. };
  329. enum {
  330. TYPE_REGULAR, //0
  331. TYPE_ARCHIVE_LINK, //1
  332. TYPE_SYMLINK, //2
  333. TYPE_CHARACTER_DEVICE, //3
  334. TYPE_BLOCK_DEVICE, //4
  335. TYPE_DIRECTORY, //5
  336. TYPE_FIFO, //6
  337. // Other types:
  338. TYPE_EXTENDED_USAGE, //xxxxx
  339. // A-Z are available for custom usage
  340. };
  341. // Extended attribute:
  342. // length keyword=value
  343. // atime, charset, comment, gname, linkpath, mtime, path, size, uname
  344. */
  345. enum {
  346. NEED_NOTHING,
  347. NEED_SKIP,
  348. NEED_SKIP_FILE, // distinguish between skipped file and unwanted blocks (extended headers etc)
  349. NEED_READ,
  350. NEED_SYNC_TRAIL,
  351. INVALID_HEADER,
  352. MEMORY_ALLOC_FAILED,
  353. };
  354. /*
  355. * ustar_rd()
  356. * extract the values out of block already determined to be a ustar header.
  357. * store the values in the ARCHD parameter.
  358. * Return:
  359. * 0
  360. */
  361. int n_dirs = 0;
  362. char ** dirs_headers_sent = NULL;
  363. int
  364. ustar_rd (int fd, struct file_header * untrusted_hdr, char *buf, struct stat * sb, struct filters *filters)
  365. {
  366. register HD_USTAR *hd;
  367. register char *dest;
  368. register int cnt = 0;
  369. int ret;
  370. int i;
  371. int should_extract;
  372. /* DISABLED: unused
  373. dev_t devmajor;
  374. dev_t devminor;
  375. */
  376. /*
  377. * we only get proper sized buffers
  378. */
  379. #ifdef DEBUG
  380. fprintf(stderr,"Checking if valid header\n");
  381. #endif
  382. if (ustar_id (buf, BLKMULT) < 0) {
  383. #ifdef DEBUG
  384. fprintf (stderr, "Invalid header\n");
  385. #endif
  386. return INVALID_HEADER;
  387. }
  388. #ifdef DEBUG
  389. fprintf(stderr,"Valid header!\n");
  390. #endif
  391. /* DISABLED: Internal to PAX
  392. arcn->org_name = arcn->name;
  393. arcn->sb.st_nlink = 1;
  394. arcn->pat = NULL;
  395. arcn->nlen = 0;
  396. */
  397. untrusted_hdr->namelen = 0;
  398. hd = (HD_USTAR *) buf;
  399. /*
  400. * see if the filename is split into two parts. if, so joint the parts.
  401. * we copy the prefix first and add a / between the prefix and name.
  402. */
  403. dest = untrusted_namebuf;
  404. if (*(hd->prefix) != '\0')
  405. {
  406. cnt = strlen(strncpy (dest, hd->prefix,
  407. MIN(sizeof (untrusted_namebuf) - 1,TPFSZ+1)));
  408. dest += cnt;
  409. *dest++ = '/';
  410. cnt++;
  411. }
  412. if (gnu_hack_string)
  413. {
  414. untrusted_hdr->namelen = cnt + strlen(strncpy (dest, gnu_hack_string,
  415. MIN(TNMSZ+1, sizeof (untrusted_namebuf) - cnt)));
  416. free(gnu_hack_string);
  417. gnu_hack_string = NULL;
  418. } else
  419. untrusted_hdr->namelen = cnt + strlen(strncpy (dest, hd->name,
  420. MIN(TNMSZ+1, sizeof (untrusted_namebuf) - cnt)));
  421. // qfile count the \0 in the namelen
  422. untrusted_hdr->namelen += 1;
  423. #ifdef DEBUG
  424. fprintf(stderr,"Retrieved name len: %d\n",untrusted_hdr->namelen);
  425. fprintf(stderr,"Retrieved name: %s\n",untrusted_namebuf);
  426. #endif
  427. /*
  428. * follow the spec to the letter. we should only have mode bits, strip
  429. * off all other crud we may be passed.
  430. */
  431. sb->st_mode = (mode_t) (asc_ul (hd->mode, sizeof (hd->mode), OCT) &
  432. 0xfff);
  433. untrusted_hdr->mode = sb->st_mode;
  434. #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64
  435. sb->st_size = (off_t) asc_uqd (hd->size, sizeof (hd->size), OCT);
  436. #else
  437. sb->st_size = (off_t) asc_ul (hd->size, sizeof (hd->size), OCT);
  438. #endif
  439. untrusted_hdr->filelen = sb->st_size;
  440. untrusted_hdr->atime = (time_t) asc_ul (hd->mtime, sizeof (hd->mtime), OCT);
  441. untrusted_hdr->mtime = untrusted_hdr->atime;
  442. untrusted_hdr->atime_nsec = untrusted_hdr->mtime_nsec = 0;
  443. sb->st_mtime = (time_t) asc_ul (hd->mtime, sizeof (hd->mtime), OCT);
  444. sb->st_ctime = sb->st_atime = sb->st_mtime;
  445. /*
  446. * If we can find the ascii names for gname and uname in the password
  447. * and group files we will use the uid's and gid they bind. Otherwise
  448. * we use the uid and gid values stored in the header. (This is what
  449. * the posix spec wants).
  450. */
  451. /* DISABLED: unused
  452. hd->gname[sizeof (hd->gname) - 1] = '\0';
  453. if (gid_name (hd->gname, &(arcn->sb.st_gid)) < 0)
  454. arcn->sb.st_gid = (gid_t) asc_ul (hd->gid, sizeof (hd->gid), OCT);
  455. hd->uname[sizeof (hd->uname) - 1] = '\0';
  456. if (uid_name (hd->uname, &(arcn->sb.st_uid)) < 0)
  457. arcn->sb.st_uid = (uid_t) asc_ul (hd->uid, sizeof (hd->uid), OCT);
  458. */
  459. /*
  460. * set the defaults, these may be changed depending on the file type
  461. */
  462. /* Disabled: pax specific
  463. arcn->ln_name[0] = '\0';
  464. arcn->ln_nlen = 0;
  465. arcn->pad = 0;
  466. arcn->skip = 0;
  467. arcn->sb.st_rdev = (dev_t) 0;
  468. */
  469. /*
  470. * set the mode and PAX type according to the typeflag in the header
  471. */
  472. switch (hd->typeflag)
  473. {
  474. case FIFOTYPE:
  475. #ifdef DEBUG
  476. fprintf(stderr,"File is FIFOTYPE\n");
  477. #endif
  478. /* DISABLED: unused
  479. arcn->type = PAX_FIF;
  480. arcn->sb.st_mode |= S_IFIFO;
  481. */
  482. break;
  483. case DIRTYPE:
  484. #ifdef DEBUG
  485. fprintf(stderr,"File is DIRTYPE\n");
  486. #endif
  487. /* DISABLED: unused
  488. arcn->type = PAX_DIR;
  489. arcn->sb.st_mode |= S_IFDIR;
  490. arcn->sb.st_nlink = 2;
  491. */
  492. /*
  493. * Some programs that create ustar archives append a '/'
  494. * to the pathname for directories. This clearly violates
  495. * ustar specs, but we will silently strip it off anyway.
  496. */
  497. /*
  498. if (arcn->name[arcn->nlen - 1] == '/')
  499. arcn->name[--arcn->nlen] = '\0';
  500. */
  501. break;
  502. case BLKTYPE:
  503. #ifdef DEBUG
  504. fprintf(stderr,"File is BLKTYPE\n");
  505. #endif
  506. break;
  507. case CHRTYPE:
  508. #ifdef DEBUG
  509. fprintf(stderr,"File is CHRTYPE\n");
  510. #endif
  511. /*
  512. * this type requires the rdev field to be set.
  513. */
  514. if (hd->typeflag == BLKTYPE)
  515. {
  516. /*
  517. arcn->type = PAX_BLK;
  518. arcn->sb.st_mode |= S_IFBLK;
  519. */
  520. }
  521. else
  522. {
  523. /*
  524. arcn->type = PAX_CHR;
  525. arcn->sb.st_mode |= S_IFCHR;
  526. */
  527. }
  528. /* DISABLED: unused
  529. devmajor = (dev_t) asc_ul (hd->devmajor, sizeof (hd->devmajor), OCT);
  530. devminor = (dev_t) asc_ul (hd->devminor, sizeof (hd->devminor), OCT);
  531. */
  532. // arcn->sb.st_rdev = TODEV (devmajor, devminor);
  533. break;
  534. case SYMTYPE:
  535. #ifdef DEBUG
  536. fprintf(stderr,"File is SYMTYPE\n");
  537. #endif
  538. break;
  539. case LNKTYPE:
  540. #ifdef DEBUG
  541. fprintf(stderr,"File is LNKTYPE\n");
  542. #endif
  543. if (hd->typeflag == SYMTYPE)
  544. {
  545. // arcn->type = PAX_SLK;
  546. // arcn->sb.st_mode |= S_IFLNK;
  547. }
  548. else
  549. {
  550. // arcn->type = PAX_HLK;
  551. /*
  552. * so printing looks better
  553. */
  554. // arcn->sb.st_mode |= S_IFREG;
  555. // arcn->sb.st_nlink = 2;
  556. }
  557. /*
  558. * copy the link name
  559. */
  560. // arcn->ln_nlen = strlcpy (arcn->ln_name, hd->linkname,
  561. // MIN(TNMSZ+1,sizeof (arcn->ln_name)));
  562. break;
  563. case LONGLINKTYPE:
  564. #ifdef DEBUG
  565. fprintf(stderr,"File is LONGLINKTYPE\n");
  566. #endif
  567. break;
  568. case LONGNAMETYPE:
  569. #ifdef DEBUG
  570. fprintf(stderr,"File is LONGNAMETYPE\n");
  571. #endif
  572. /*
  573. * GNU long link/file; we tag these here and let the
  574. * pax internals deal with it -- too ugly otherwise.
  575. */
  576. // arcn->type =
  577. // hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
  578. // arcn->pad = TAR_PAD(arcn->sb.st_size);
  579. // arcn->skip = arcn->sb.st_size;
  580. // arcn->ln_name[0] = '\0';
  581. // arcn->ln_nlen = 0;
  582. break;
  583. case CONTTYPE:
  584. #ifdef DEBUG
  585. fprintf(stderr,"File is CONTTYPE\n");
  586. #endif
  587. break;
  588. case AREGTYPE:
  589. #ifdef DEBUG
  590. fprintf(stderr,"File is AREGTYPE\n");
  591. #endif
  592. break;
  593. case REGTYPE:
  594. #ifdef DEBUG
  595. fprintf(stderr,"File is REGTYPE of size %ld\n",sb->st_size);
  596. #endif
  597. // Check if user want to extract this file
  598. should_extract = 1;
  599. for (i=0; i < filters->filters_count; i++) {
  600. should_extract = 0;
  601. #ifdef DEBUG
  602. fprintf(stderr, "Comparing with filter %s\n", filters->filters[i]);
  603. #endif
  604. if (strncmp(untrusted_namebuf, filters->filters[i], strlen(filters->filters[i])) == 0) {
  605. #ifdef DEBUG
  606. fprintf(stderr, "Match (%d)\n", filters->filters_matches[i]);
  607. #endif
  608. should_extract = 1;
  609. filters->filters_matches[i]++;
  610. if (filters->filters_matches[i] == 1) {
  611. // first match
  612. filters->matched_filters++;
  613. }
  614. break;
  615. }
  616. }
  617. if (should_extract != 1) {
  618. #ifdef DEBUG
  619. fprintf(stderr, "File should be filtered.. Skipping\n");
  620. #endif
  621. return NEED_SKIP_FILE;
  622. }
  623. // Create a copy of untrusted_namebuf to be used for strtok
  624. char * dirbuf;
  625. dirbuf = malloc(sizeof (char) * (untrusted_hdr->namelen));
  626. if (dirbuf == NULL)
  627. return MEMORY_ALLOC_FAILED;
  628. dirbuf = strncpy(dirbuf, untrusted_namebuf, untrusted_hdr->namelen);
  629. int i = 0;
  630. int dir_found = 0;
  631. size_t pathsize = 0;
  632. char * path = NULL;
  633. struct file_header dir_header;
  634. // Split the path in directories and recompose it incrementally
  635. char * last_token = strtok(dirbuf,"/");
  636. char * token = strtok(NULL, "/");
  637. while (token != NULL) {
  638. #ifdef DEBUG
  639. fprintf(stderr,"Found directory %s (last:%s)\n",token,last_token);
  640. #endif
  641. // Recompose the path based on last discovered directory
  642. if (path == NULL) {
  643. path = malloc(sizeof (char) * (strlen(last_token)+1));
  644. if (path == NULL)
  645. return MEMORY_ALLOC_FAILED;
  646. path = strncpy(path, last_token, strlen(last_token));
  647. path[strlen(last_token)] = '\0';
  648. } else {
  649. pathsize = strlen(path);
  650. path = realloc(path, sizeof (char) * (strlen(path)+1+strlen(last_token)+1));
  651. if (path == NULL)
  652. return MEMORY_ALLOC_FAILED;
  653. path[pathsize] = '/';
  654. strncpy(path+pathsize+1, last_token, strlen(last_token));
  655. path[pathsize+strlen(last_token)+1] = '\0';
  656. }
  657. #ifdef DEBUG
  658. fprintf(stderr,"Path is %s\n",path);
  659. #endif
  660. #ifdef DEBUG
  661. fprintf(stderr,"Checking from i=0 i<%d\n",n_dirs);
  662. #endif
  663. // Verify if qfile headers for the current path have already been sent based on the dirs_headers_sent table
  664. dir_found = 0;
  665. for (i = 0; i < n_dirs; ++i) {
  666. #ifdef DEBUG
  667. fprintf(stderr,"Comparing with %d %d %s %s\n",i,n_dirs,dirs_headers_sent[i],path);
  668. #endif
  669. if (strcmp(dirs_headers_sent[i],path)==0) {
  670. #ifdef DEBUG
  671. fprintf(stderr,"Directory headers already sent\n");
  672. #endif
  673. dir_found=1;
  674. }
  675. }
  676. if (dir_found == 0) {
  677. // Register the current path as being sent in the dirs_headers_sent table
  678. #ifdef DEBUG
  679. fprintf(stderr,"Inserting %s into register\n",path);
  680. #endif
  681. dirs_headers_sent = realloc(dirs_headers_sent, sizeof (char*) * (++n_dirs));
  682. if (dirs_headers_sent == NULL)
  683. return MEMORY_ALLOC_FAILED;
  684. dirs_headers_sent[n_dirs-1] = malloc(sizeof (char) * (strlen(path)+1));
  685. if (dirs_headers_sent[n_dirs-1] == NULL)
  686. return MEMORY_ALLOC_FAILED;
  687. strncpy(dirs_headers_sent[n_dirs-1], path, strlen(path)+1);
  688. // Initialize the qfile headers for the current directory path
  689. dir_header.namelen = strlen(path)+1;
  690. dir_header.atime = untrusted_hdr->atime;
  691. dir_header.atime_nsec = untrusted_hdr->atime_nsec;
  692. dir_header.mtime = untrusted_hdr->mtime;
  693. dir_header.mtime_nsec = untrusted_hdr->mtime_nsec;
  694. dir_header.mode = untrusted_hdr->mode | S_IFDIR;
  695. dir_header.filelen = 0;
  696. #ifdef DEBUG
  697. fprintf(stderr,"Sending directory headers for %s\n",path);
  698. #endif
  699. // Send the qfile headers for the current directory path
  700. write_headers(&dir_header, path);
  701. }
  702. last_token = token;
  703. token = strtok(NULL, "/");
  704. }
  705. free(path);
  706. free(dirbuf);
  707. #ifdef DEBUG
  708. fprintf(stderr,"End of directory checks\n");
  709. #endif
  710. // Restore POSIX stat file mode (because PAX format use its own file type)
  711. untrusted_hdr->mode |= S_IFREG;
  712. #ifdef DEBUG
  713. fprintf(stderr,"Writing file header\n");
  714. #endif
  715. // Send header and file content
  716. write_headers(untrusted_hdr, untrusted_namebuf);
  717. #ifdef DEBUG
  718. fprintf(stderr,"Writing file content\n");
  719. #endif
  720. ret = copy_file(1, fd, untrusted_hdr->filelen, &crc32_sum);
  721. #ifdef DEBUG
  722. fprintf(stderr,"Copyfile returned with error %d\n",ret);
  723. #endif
  724. if (ret != COPY_FILE_OK) {
  725. if (ret != COPY_FILE_WRITE_ERROR)
  726. gui_fatal("Copying file %s: %s", untrusted_namebuf,
  727. copy_file_status_to_str(ret));
  728. else {
  729. fprintf(stderr,"UNKNOWN ERROR RETURN STATUS:%d\n.. Waiting...\n",ret);
  730. set_block(0);
  731. wait_for_result();
  732. exit(1);
  733. }
  734. }
  735. // Extract extra padding
  736. #ifdef DEBUG
  737. fprintf(stderr,"Need to remove pad:%lld %lld\n",untrusted_hdr->filelen,BLKMULT-(untrusted_hdr->filelen%BLKMULT));
  738. #endif
  739. if (untrusted_hdr->filelen%BLKMULT > 0) {
  740. if (!read_all(fd, buf, BLKMULT-(untrusted_hdr->filelen%BLKMULT))) {
  741. wait_for_result();
  742. exit(1);
  743. }
  744. }
  745. // Resync trailing headers in order to find next file chunck in the tar file
  746. return NEED_SYNC_TRAIL;
  747. break;
  748. case EXTHEADERTYPE:
  749. #ifdef DEBUG
  750. fprintf(stderr,"Extended HEADER encountered\n");
  751. #endif
  752. return NEED_SKIP;
  753. break;
  754. default:
  755. #ifdef DEBUG
  756. fprintf(stderr,"Default type detected:%c\n",hd->typeflag);
  757. #endif
  758. return NEED_SKIP;
  759. /*
  760. * these types have file data that follows. Set the skip and
  761. * pad fields.
  762. */
  763. // arcn->type = PAX_REG;
  764. // arcn->pad = TAR_PAD (arcn->sb.st_size);
  765. // arcn->skip = arcn->sb.st_size;
  766. // arcn->sb.st_mode |= S_IFREG;
  767. break;
  768. }
  769. return NEED_SKIP;
  770. }
  771. void tar_file_processor(int fd, struct filters *filters)
  772. {
  773. int ret;
  774. int i;
  775. int current;
  776. struct file_header hdr;
  777. struct stat sb; /* stat buffer see stat(2) */
  778. char buf[BLKMULT+1];
  779. i=0;
  780. current = NEED_READ;
  781. size_t to_skip = 0;
  782. int sync_count = 0;
  783. while (read_all(fd, buf, BLKMULT)) {
  784. ret = 0;
  785. if (current==NEED_SYNC_TRAIL) {
  786. ret = tar_trail (buf, 1, &sync_count);
  787. #ifdef DEBUG
  788. fprintf(stderr,"Synchronizing trail: %d %d\n", ret, sync_count);
  789. #endif
  790. if (ret != 1) {
  791. current = NEED_READ;
  792. sync_count = 0;
  793. }
  794. }
  795. if (current==NEED_READ) {
  796. current = ustar_rd(fd, &hdr, buf, &sb, filters);
  797. #ifdef DEBUG
  798. fprintf(stderr,"Return %d\n", current);
  799. #endif
  800. }
  801. if (current==NEED_SKIP || current==NEED_SKIP_FILE) {
  802. if (current==NEED_SKIP_FILE &&
  803. filters->filters_count > 0 &&
  804. filters->filters_count == filters->matched_filters) {
  805. // This assume that either:
  806. // a) files are sorted (using full path as sort key)
  807. // b) all the directory content is in
  808. // consecutive block and only directories
  809. // are given as filters
  810. // This is true for backups prepared by qvm-backup
  811. #ifdef DEBUG
  812. fprintf(stderr, "All filters matched at least once - assuming end of requested data\n");
  813. #endif
  814. return;
  815. }
  816. to_skip = hdr.filelen;
  817. #ifdef DEBUG
  818. fprintf(stderr,"Need to skip %lld bytes (matched filters %d < %d)\n",
  819. hdr.filelen, filters->matched_filters, filters->filters_count);
  820. fprintf(stderr,"Need to remove pad:%ld %lld %lld\n",to_skip,hdr.filelen,BLKMULT-(hdr.filelen%BLKMULT));
  821. #endif
  822. if (to_skip%BLKMULT > 0) {
  823. to_skip += BLKMULT-(to_skip%BLKMULT);
  824. }
  825. if (use_seek) {
  826. int tries = 3;
  827. while (lseek(fd, to_skip, SEEK_CUR) < 0) {
  828. if (errno == ESPIPE) {
  829. // fallback to read()
  830. use_seek = 0;
  831. break;
  832. } else if (errno == EAGAIN) {
  833. /* WTF?! lseek theoretically never returns this error, but
  834. * in practice it was seen... */
  835. if (tries--)
  836. continue;
  837. }
  838. perror("lseek");
  839. exit(1);
  840. }
  841. }
  842. // not using "else" because above can fallback to read() method
  843. if (!use_seek) {
  844. while (to_skip > 0) {
  845. ret = read_all(fd, &buf, MIN(to_skip,BLKMULT));
  846. if (ret <= 0) {
  847. exit(1);
  848. }
  849. to_skip -= MIN(to_skip,BLKMULT);
  850. }
  851. }
  852. current = NEED_SYNC_TRAIL;
  853. }
  854. i++;
  855. //if (i >= 10)
  856. // exit(0);
  857. }
  858. }
  859. int main(int argc, char **argv)
  860. {
  861. int i;
  862. char *entry;
  863. int fd = -1;
  864. int use_stdin = 1;
  865. struct filters filters;
  866. signal(SIGPIPE, SIG_IGN);
  867. // this will allow checking for possible feedback packet in the middle of transfer
  868. // if disabled, the copy_file process could hang
  869. register_notify_progress(&notify_progress);
  870. notify_progress(0, PROGRESS_FLAG_INIT);
  871. //set_size_limit(1500000000, 2048);
  872. crc32_sum = 0;
  873. /* when extracting backup header, dom0 will terminate the transfer with
  874. * EDQUOT just after getting qubes.xml */
  875. ignore_quota_error = 1;
  876. for (i = 1; i < argc; i++) {
  877. set_nonblock(0);
  878. if (strcmp(argv[i], "--ignore-symlinks")==0) {
  879. ignore_symlinks = 1;
  880. continue;
  881. } else if (strcmp(argv[i], "-")==0) {
  882. use_stdin = 1;
  883. i++;
  884. break;
  885. } else {
  886. // Parse tar file
  887. use_stdin = 0;
  888. entry = argv[i];
  889. #ifdef DEBUG
  890. fprintf(stderr,"Parsing file %s\n",entry);
  891. #endif
  892. fd = open(entry, O_RDONLY);
  893. if (fd < 0) {
  894. fprintf(stderr,"Error opening file %s\n",entry);
  895. exit(2);
  896. }
  897. i++;
  898. break;
  899. }
  900. }
  901. filters.filters_count = argc-i;
  902. filters.filters = argv+i;
  903. filters.filters_matches = calloc(filters.filters_count, sizeof(int));
  904. if (filters.filters_matches == NULL) {
  905. perror("calloc");
  906. exit(1);
  907. }
  908. filters.matched_filters = 0;
  909. if (use_stdin == 1) {
  910. #ifdef DEBUG
  911. fprintf(stderr,"Using STDIN\n");
  912. #endif
  913. set_block(0);
  914. fd = 0;
  915. }
  916. if (fd < 0) {
  917. fprintf(stderr, "No input file provided\n");
  918. exit(1);
  919. }
  920. tar_file_processor(fd, &filters);
  921. notify_end_and_wait_for_result();
  922. notify_progress(0, PROGRESS_FLAG_DONE);
  923. return 0;
  924. }