tar: bug fix and compatibility improvements - sbase - suckless unix tools | |
git clone git://git.suckless.org/sbase | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 5aa63dbe055d7a655ea14626abdf051ff29e057a | |
parent cfe9424ba92c735613723ab18427d091f825de34 | |
Author: Xan Phung <[email protected]> | |
Date: Sun, 4 May 2025 16:19:02 +1000 | |
tar: bug fix and compatibility improvements | |
1. A fix for a bug in the unarchive() function (causing metadata | |
loss when used on large tar files). This bug is due to existing | |
code continuing to check for h->type == HARDLINK and SOFTLINK | |
(near end of function), when the entire header block has already | |
been overwritten by a call to eread() prior to the h->type checks. | |
2. Long (>=256 byte) file name compatibility: 'L' style long file | |
names were extremely simple to add, requiring a (net) addition of | |
a handful lines of code, and this patch supports both extracting | |
and creating L-style tar archives. Pax 'x' style long names are | |
more complex to parse, and this patch only supports extracting pax | |
'x' tars, but not creating them. | |
3. Command line argument compatibility improvements: 'c', 'x', 't' | |
args are accepted without needing a hyphen in front. The '-p' | |
flag is also accepted but is no-op (as it is the normal behaviour | |
of sbase tar anyway, and allows sbase tar to be used in scripts | |
specifying this flag). Directory tree member extraction is also | |
supported by this patch. | |
4. Handle tar archives with "." (current directory) entries. Some | |
archives contain "." or "./" entries, causing error reports when | |
the current code tries to remove() the current dir. I have added a | |
check in unarchive() to not perform remove() on encountering these | |
entries. | |
Diffstat: | |
M tar.c | 218 +++++++++++++++++------------… | |
1 file changed, 122 insertions(+), 96 deletions(-) | |
--- | |
diff --git a/tar.c b/tar.c | |
@@ -6,6 +6,7 @@ | |
#include <sys/sysmacros.h> | |
#endif | |
+#include <assert.h> | |
#include <errno.h> | |
#include <fcntl.h> | |
#include <grp.h> | |
@@ -19,7 +20,7 @@ | |
#include "fs.h" | |
#include "util.h" | |
-#define BLKSIZ 512 | |
+#define BLKSIZ (sizeof (struct header)) /* must equal 512 bytes */ | |
enum Type { | |
REG = '0', | |
@@ -50,6 +51,7 @@ struct header { | |
char major[8]; | |
char minor[8]; | |
char prefix[155]; | |
+ char padding[12]; | |
}; | |
static struct dirtime { | |
@@ -169,6 +171,17 @@ ewrite(int fd, const void *buf, size_t n) | |
return r; | |
} | |
+static unsigned | |
+chksum(struct header *h) | |
+{ | |
+ unsigned sum, i; | |
+ | |
+ memset(h->chksum, ' ', sizeof(h->chksum)); | |
+ for (i = 0, sum = 0, assert(BLKSIZ == 512); i < BLKSIZ; i++) | |
+ sum += *((unsigned char *)h + i); | |
+ return sum; | |
+} | |
+ | |
static void | |
putoctal(char *dst, unsigned num, int size) | |
{ | |
@@ -179,14 +192,16 @@ putoctal(char *dst, unsigned num, int size) | |
static int | |
archive(const char *path) | |
{ | |
- char b[BLKSIZ]; | |
- const char *base, *p; | |
- struct group *gr; | |
- struct header *h; | |
+ static const struct header blank = { | |
+ "././@LongLink", "0000000" , "0000000", "0000000", "0000000000… | |
+ "00000000000" , " ", AREG , "" , "ustar", "0… | |
+ }; | |
+ char b[BLKSIZ + BLKSIZ], *p; | |
+ struct header *h = (struct header *)b; | |
+ struct group *gr; | |
struct passwd *pw; | |
struct stat st; | |
- size_t chksum, i, nlen, plen; | |
- ssize_t l, r; | |
+ ssize_t l, n, r; | |
int fd = -1; | |
if (lstat(path, &st) < 0) { | |
@@ -196,47 +211,37 @@ archive(const char *path) | |
weprintf("ignoring %s\n", path); | |
return 0; | |
} | |
- | |
pw = getpwuid(st.st_uid); | |
gr = getgrgid(st.st_gid); | |
- h = (struct header *)b; | |
- memset(b, 0, sizeof(b)); | |
- | |
- plen = 0; | |
- base = path; | |
- if ((nlen = strlen(base)) >= sizeof(h->name)) { | |
- /* | |
- * Cover case where path name is too long (in which case we | |
- * need to split it to prefix and name). | |
- */ | |
- if ((base = strrchr(path, '/')) == NULL) | |
- goto too_long; | |
- for (p = base++; p > path && *p == '/'; --p) | |
- ; | |
- | |
- nlen -= base - path; | |
- plen = p - path + 1; | |
- if (nlen >= sizeof(h->name) || plen >= sizeof(h->prefix)) | |
- goto too_long; | |
+ *h = blank; | |
+ n = strlcpy(h->name, path, sizeof(h->name)); | |
+ if (n >= sizeof(h->name)) { | |
+ *++h = blank; | |
+ h->type = 'L'; | |
+ putoctal(h->size, n, sizeof(h->size)); | |
+ putoctal(h->chksum, chksum(h), sizeof(h->chksum)); | |
+ | |
+ ewrite(tarfd, (char *)h, BLKSIZ); | |
+ for (p = (char *)path; n > 0; n -= BLKSIZ, p += BLKSIZ) { | |
+ if (n < BLKSIZ) { | |
+ p = memcpy(h--, p, n); | |
+ memset(p + n, 0, BLKSIZ - n); | |
+ } | |
+ ewrite(tarfd, p, BLKSIZ); | |
+ } | |
} | |
- memcpy(h->name, base, nlen); | |
- memcpy(h->prefix, path, plen); | |
- | |
putoctal(h->mode, (unsigned)st.st_mode & 0777, sizeof(h->mode)); | |
putoctal(h->uid, (unsigned)st.st_uid, sizeof(h->uid)); | |
putoctal(h->gid, (unsigned)st.st_gid, sizeof(h->gid)); | |
- putoctal(h->size, 0, sizeof(h->size)); | |
putoctal(h->mtime, (unsigned)st.st_mtime, sizeof(h->mtime)); | |
- memcpy( h->magic, "ustar", sizeof(h->magic)); | |
- memcpy( h->version, "00", sizeof(h->version)); | |
estrlcpy(h->uname, pw ? pw->pw_name : "", sizeof(h->uname)); | |
estrlcpy(h->gname, gr ? gr->gr_name : "", sizeof(h->gname)); | |
if (S_ISREG(st.st_mode)) { | |
h->type = REG; | |
- putoctal(h->size, (unsigned)st.st_size, sizeof(h->size)); | |
+ putoctal(h->size, st.st_size, sizeof(h->size)); | |
fd = open(path, O_RDONLY); | |
if (fd < 0) | |
eprintf("open %s:", path); | |
@@ -255,10 +260,7 @@ archive(const char *path) | |
h->type = FIFO; | |
} | |
- memset(h->chksum, ' ', sizeof(h->chksum)); | |
- for (i = 0, chksum = 0; i < sizeof(*h); i++) | |
- chksum += (unsigned char)b[i]; | |
- putoctal(h->chksum, chksum, sizeof(h->chksum)); | |
+ putoctal(h->chksum, chksum(h), sizeof(h->chksum)); | |
ewrite(tarfd, b, BLKSIZ); | |
if (fd != -1) { | |
@@ -271,24 +273,21 @@ archive(const char *path) | |
} | |
return 0; | |
- | |
-too_long: | |
- eprintf("filename too long: %s\n", path); | |
} | |
static int | |
unarchive(char *fname, ssize_t l, char b[BLKSIZ]) | |
{ | |
- char lname[101], *tmp, *p; | |
- long mode, major, minor, type, mtime, uid, gid; | |
struct header *h = (struct header *)b; | |
- int fd = -1; | |
struct timespec times[2]; | |
+ char lname[101], *tmp, *p; | |
+ long mode, major, minor, type, mtime, uid, gid; | |
+ int fd = -1, lnk = h->type == SYMLINK; | |
if (!mflag && ((mtime = strtol(h->mtime, &p, 8)) < 0 || *p != '\0')) | |
eprintf("strtol %s: invalid number\n", h->mtime); | |
- if (remove(fname) < 0 && errno != ENOENT) | |
- weprintf("remove %s:", fname); | |
+ if (strcmp(fname, ".") && strcmp(fname, "./") && remove(fname) < 0) | |
+ if (errno != ENOENT) weprintf("remove %s:", fname); | |
tmp = estrdup(fname); | |
mkdirp(dirname(tmp), 0777, 0777); | |
@@ -308,10 +307,9 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ]) | |
case SYMLINK: | |
snprintf(lname, sizeof(lname), "%.*s", (int)sizeof(h->linkname… | |
h->linkname); | |
- if (((h->type == HARDLINK) ? link : symlink)(lname, fname) < 0) | |
- eprintf("%s %s -> %s:", | |
- (h->type == HARDLINK) ? "link" : "symlink", | |
- fname, lname); | |
+ if ((lnk ? symlink:link)(lname, fname) < 0) | |
+ eprintf("%s %s -> %s:", lnk ? "symlink":"link", fname,… | |
+ lnk++; | |
break; | |
case DIRECTORY: | |
if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0') | |
@@ -354,14 +352,14 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ]) | |
close(fd); | |
} | |
- if (h->type == HARDLINK) | |
+ if (lnk == 1) | |
return 0; | |
times[0].tv_sec = times[1].tv_sec = mtime; | |
times[0].tv_nsec = times[1].tv_nsec = 0; | |
if (!mflag && utimensat(AT_FDCWD, fname, times, AT_SYMLINK_NOFOLLOW) <… | |
weprintf("utimensat %s:", fname); | |
- if (h->type == SYMLINK) { | |
+ if (lnk) { | |
if (!getuid() && lchown(fname, uid, gid)) | |
weprintf("lchown %s:", fname); | |
} else { | |
@@ -435,9 +433,9 @@ sanitize(struct header *h) | |
static void | |
chktar(struct header *h) | |
{ | |
- char tmp[8], *err, *p = (char *)h; | |
const char *reason; | |
- long s1, s2, i; | |
+ char tmp[sizeof h->chksum], *err = ""; | |
+ long sum, i; | |
if (h->prefix[0] == '\0' && h->name[0] == '\0') { | |
reason = "empty filename"; | |
@@ -448,23 +446,19 @@ chktar(struct header *h) | |
goto bad; | |
} | |
memcpy(tmp, h->chksum, sizeof(tmp)); | |
- for (i = 0; i < sizeof(tmp) && tmp[i] == ' '; i++); | |
- for (; i < sizeof(tmp); i++) | |
- if (tmp[i] == ' ') | |
- tmp[i] = '\0'; | |
- s1 = strtol(tmp, &err, 8); | |
- if (s1 < 0 || *err != '\0') { | |
+ for (i = sizeof(tmp)-1; i > 0 && tmp[i] == ' '; i--) { | |
+ tmp[i] = '\0'; | |
+ } | |
+ sum = strtol(tmp, &err, 8); | |
+ if (sum < 0 || sum >= BLKSIZ*256 || *err != '\0') { | |
reason = "invalid checksum"; | |
goto bad; | |
} | |
- memset(h->chksum, ' ', sizeof(h->chksum)); | |
- for (i = 0, s2 = 0; i < sizeof(*h); i++) | |
- s2 += (unsigned char)p[i]; | |
- if (s1 != s2) { | |
+ if (sum != chksum(h)) { | |
reason = "incorrect checksum"; | |
goto bad; | |
} | |
- memcpy(h->chksum, tmp, sizeof(h->chksum)); | |
+ memcpy(h->chksum, tmp, sizeof(tmp)); | |
return; | |
bad: | |
eprintf("malformed tar archive: %s\n", reason); | |
@@ -473,45 +467,70 @@ bad: | |
static void | |
xt(int argc, char *argv[], int mode) | |
{ | |
- char b[BLKSIZ], fname[256 + 1], *p; | |
+ long size, l; | |
+ char b[BLKSIZ], fname[l = PATH_MAX + 1], *p, *q = NULL; | |
+ int i, m, n; | |
+ int (*fn)(char *, ssize_t, char[BLKSIZ]) = (mode == 'x') ? unarchive :… | |
struct timespec times[2]; | |
struct header *h = (struct header *)b; | |
struct dirtime *dirtime; | |
- long size; | |
- int i, n; | |
- int (*fn)(char *, ssize_t, char[BLKSIZ]) = (mode == 'x') ? unarchive :… | |
while (eread(tarfd, b, BLKSIZ) > 0 && (h->name[0] || h->prefix[0])) { | |
chktar(h); | |
- sanitize(h), n = 0; | |
- | |
- /* small dance around non-null terminated fields */ | |
- if (h->prefix[0]) | |
- n = snprintf(fname, sizeof(fname), "%.*s/", | |
- (int)sizeof(h->prefix), h->prefix); | |
- snprintf(fname + n, sizeof(fname) - n, "%.*s", | |
- (int)sizeof(h->name), h->name); | |
+ sanitize(h); | |
if ((size = strtol(h->size, &p, 8)) < 0 || *p != '\0') | |
- eprintf("strtol %s: invalid number\n", h->size); | |
+ eprintf("strtol %s: invalid size\n", h->size); | |
- if (argc) { | |
- /* only extract the given files */ | |
- for (i = 0; i < argc; i++) | |
- if (!strcmp(argv[i], fname)) | |
+ /* Long file path is read direcly into fname*/ | |
+ if (h->type == 'L' || h->type == 'x' || h->type == 'g') { | |
+ | |
+ /* Read header only up to size of fname buffer */ | |
+ for (q = fname; q < fname+size; q += BLKSIZ) { | |
+ if (q + BLKSIZ >= fname + l) | |
+ eprintf("name exceeds buffer: %s\n", f… | |
+ eread(tarfd, q, BLKSIZ); | |
+ } | |
+ | |
+ /* Convert pax x header with 'path=' field into L head… | |
+ if (h->type == 'x') for (q = fname; q < fname+size-16;… | |
+ if ((n = strtol(q, &p, 10)) < 0 || *p != ' ') | |
+ eprintf("strtol %.*s: invalid number\n… | |
+ if (n && strncmp(p+1, "path=", 5) == 0) { | |
+ memmove(fname, p+6, size = q+n - p-6 -… | |
+ h->type = 'L'; | |
break; | |
+ } | |
+ } | |
+ fname[size] = '\0'; | |
+ | |
+ /* Non L-like header (eg. pax 'g') is skipped by setti… | |
+ if (h->type != 'L') | |
+ q = NULL; | |
+ continue; | |
+ } | |
+ | |
+ /* Ustar path is copied into fname if no L header (ie: q is NU… | |
+ if (!q) { | |
+ m = sizeof h->prefix, n = sizeof h->name; | |
+ p = "/" + !h->prefix[0]; | |
+ snprintf(fname, l, "%.*s%s%.*s", m, h->prefix, p, n, h… | |
+ } | |
+ q = NULL; | |
+ | |
+ /* If argc > 0 then only extract the given files/dirs */ | |
+ if (argc) { | |
+ for (i = 0; i < argc; i++) { | |
+ if (strncmp(argv[i], fname, n = strlen(argv[i]… | |
+ if (strchr("/", fname[n]) || argv[i][n… | |
+ break; | |
+ } | |
if (i == argc) { | |
skipblk(size); | |
continue; | |
} | |
} | |
- /* ignore global pax header craziness */ | |
- if (h->type == 'g' || h->type == 'x') { | |
- skipblk(size); | |
- continue; | |
- } | |
- | |
fn(fname, size, b); | |
if (vflag && mode != 't') | |
puts(fname); | |
@@ -530,12 +549,15 @@ xt(int argc, char *argv[], int mode) | |
} | |
} | |
+char **args; | |
+int argn; | |
+ | |
static void | |
usage(void) | |
{ | |
- eprintf("usage: %s [-C dir] [-J | -Z | -a | -j | -z] -x [-m | -t] " | |
+ eprintf("usage: %s [x | t | -x | -t] [-C dir] [-J | -Z | -a | -j | -z]… | |
"[-f file] [file ...]\n" | |
- " %s [-C dir] [-J | -Z | -a | -j | -z] [-h] -c path ... " | |
+ " %s [c | -c] [-C dir] [-J | -Z | -a | -j | -z] [-h] pat… | |
"[-f file]\n", argv0, argv0); | |
} | |
@@ -547,6 +569,10 @@ main(int argc, char *argv[]) | |
char *file = NULL, *dir = ".", mode = '\0'; | |
int fd; | |
+ argv0 = argv[0]; | |
+ if (argc > 1 && strchr("cxt", mode = *argv[1])) | |
+ *(argv[1]+1) ? *argv[1] = '-' : (*++argv = argv0, --argc); | |
+ | |
ARGBEGIN { | |
case 'x': | |
case 'c': | |
@@ -576,18 +602,16 @@ main(int argc, char *argv[]) | |
case 'v': | |
vflag = 1; | |
break; | |
+ case 'p': | |
+ break; /* Do nothing as already default behaviour */ | |
default: | |
usage(); | |
} ARGEND | |
- if (!mode) | |
- usage(); | |
- if (mode == 'c') | |
- if (!argc) | |
- usage(); | |
- | |
switch (mode) { | |
case 'c': | |
+ if (!argc) | |
+ usage(); | |
tarfd = 1; | |
if (file && *file != '-') { | |
tarfd = open(file, O_WRONLY | O_TRUNC | O_CREAT, 0644); | |
@@ -626,6 +650,8 @@ main(int argc, char *argv[]) | |
eprintf("chdir %s:", dir); | |
xt(argc, argv, mode); | |
break; | |
+ default: | |
+ usage(); | |
} | |
return recurse_status; |