/* $NetBSD: gzip.c,v 1.127 2024/06/01 10:17:12 martin Exp $ */
/*
* Copyright (c) 1997-2024 Matthew R. Green
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1997-2024 Matthew R. Green. "
"All rights reserved.");
__RCSID("$NetBSD: gzip.c,v 1.127 2024/06/01 10:17:12 martin Exp $");
#endif /* not lint */
/*
* gzip.c -- GPL free gzip using zlib.
*
* RFC 1950 covers the zlib format
* RFC 1951 covers the deflate format
* RFC 1952 covers the gzip format
*
* TODO:
* - use mmap where possible
* - handle some signals better (remove outfile?)
* - make bzip2/compress -v/-t/-l support work as well as possible
*/
#ifndef SMALL
/* split up $GZIP and prepend it to the argument list */
static void
prepend_gzip(char *gzip, int *argc, char ***argv)
{
char *s, **nargv, **ac;
int nenvarg = 0, i;
/* scan how many arguments there are */
for (s = gzip;;) {
while (*s == ' ' || *s == '\t')
s++;
if (*s == 0)
goto count_done;
nenvarg++;
while (*s != ' ' && *s != '\t')
if (*s++ == 0)
goto count_done;
}
count_done:
/* punt early */
if (nenvarg == 0)
return;
/* copy the program name first */
i = 0;
nargv[i++] = *(ac++);
/* take a copy of $GZIP and add it to the array */
s = strdup(gzip);
if (s == NULL)
maybe_err("strdup");
for (;;) {
/* Skip whitespaces. */
while (*s == ' ' || *s == '\t')
s++;
if (*s == 0)
goto copy_done;
nargv[i++] = s;
/* Find the end of this argument. */
while (*s != ' ' && *s != '\t')
if (*s++ == 0)
/* Argument followed by NUL. */
goto copy_done;
/* Terminate by overwriting ' ' or '\t' with NUL. */
*s++ = 0;
}
copy_done:
/* copy the original arguments and a NULL */
while (*ac)
nargv[i++] = *(ac++);
nargv[i] = NULL;
}
#endif
/* compress input to output. Return bytes read, -1 on error */
static off_t
gz_compress(int in, int out, off_t *gsizep, const char *origname, uint32_t mtime)
{
z_stream z;
char *outbufp, *inbufp;
off_t in_tot = 0, out_tot = 0;
ssize_t in_size;
int i, error;
uLong crc;
#ifdef SMALL
static char header[] = { GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED, 0,
0, 0, 0, 0,
0, OS_CODE };
#endif
#ifndef SMALL
/*
* set the owner, mode, flags & utimes using the given file descriptor.
* file is only used in possible warning messages.
*/
static void
copymodes(int fd, const struct stat *sbp, const char *file)
{
struct timeval times[2];
struct stat sb;
/*
* If we have no info on the input, give this file some
* default values and return..
*/
if (sbp == NULL) {
mode_t mask = umask(022);
#ifndef SMALL
/* check the outfile is OK. */
static int
check_outfile(const char *outfile)
{
struct stat sb;
int ok = 1;
if (lflag == 0 && stat(outfile, &sb) == 0) {
if (fflag)
unlink(outfile);
else if (isatty(STDIN_FILENO)) {
char ans[10] = { 'n', '\0' }; /* default */
fprintf(stderr, "%s already exists -- do you wish to "
"overwrite (y or n)? " , outfile);
(void)fgets(ans, sizeof(ans) - 1, stdin);
if (ans[0] != 'y' && ans[0] != 'Y') {
fprintf(stderr, "\tnot overwriting\n");
ok = 0;
} else
unlink(outfile);
} else {
maybe_warnx("%s already exists -- skipping", outfile);
ok = 0;
}
}
return ok;
}
static void
unlink_input(const char *file, const struct stat *sb)
{
struct stat nsb;
if (kflag)
return;
if (stat(file, &nsb) != 0)
/* Must be gone already */
return;
if (nsb.st_dev != sb->st_dev || nsb.st_ino != sb->st_ino)
/* Definitely a different file */
return;
unlink(file);
}
static const suffixes_t *
check_suffix(char *file, int xlate)
{
const suffixes_t *s;
int len = strlen(file);
char *sp;
for (s = suffixes; s != suffixes + NUM_SUFFIXES; s++) {
/* if it doesn't fit in "a.suf", don't bother */
if (s->ziplen >= len)
continue;
sp = file + len - s->ziplen;
if (strcmp(s->zipped, sp) != 0)
continue;
if (xlate)
strcpy(sp, s->normal);
return s;
}
return NULL;
}
/*
* compress the given file: create a corresponding .gz file and remove the
* original.
*/
static off_t
file_compress(char *file, char *outfile, size_t outsize)
{
int in;
int out;
off_t size, in_size;
#ifndef SMALL
struct stat isb, osb;
const suffixes_t *suff;
#endif
in = open(file, O_RDONLY);
if (in == -1) {
maybe_warn("can't open %s", file);
return -1;
}
#ifndef SMALL
if (fstat(in, &isb) != 0) {
close(in);
maybe_warn("can't stat %s", file);
return -1;
}
infile_set(file, isb.st_size);
#endif
if (cflag == 0) {
#ifndef SMALL
if (isb.st_nlink > 1 && fflag == 0) {
maybe_warnx("%s has %d other link%s -- "
"skipping", file, isb.st_nlink - 1,
isb.st_nlink == 1 ? "" : "s");
close(in);
return -1;
}
/*
* If there was an error, in_size will be -1.
* If we compressed to stdout, just return the size.
* Otherwise stat the file and check it is the correct size.
* We only blow away the file if we can stat the output and it
* has the expected size.
*/
if (cflag != 0)
return in_size == -1 ? -1 : size;
#ifndef SMALL
if (fstat(out, &osb) != 0) {
maybe_warn("couldn't stat: %s", outfile);
goto bad_outfile;
}
if (close(fd) != 0)
maybe_warn("couldn't close input");
if (zfd != STDOUT_FILENO && close(zfd) != 0)
maybe_warn("couldn't close output");
if (size == -1) {
if (cflag == 0)
unlink(outfile);
maybe_warnx("%s: uncompress failed", file);
return -1;
}
/* if testing, or we uncompressed to stdout, this is all we need */
#ifndef SMALL
if (tflag)
return size;
#endif
/* if we are uncompressing to stdin, don't remove the file. */
if (cflag)
return size;
/*
* if we create a file...
*/
/*
* if we can't stat the file don't remove the file.
*/
ofd = open(outfile, O_RDWR, 0);
if (ofd == -1) {
maybe_warn("couldn't open (leaving original): %s",
outfile);
return -1;
}
if (fstat(ofd, &osb) != 0) {
maybe_warn("couldn't stat (leaving original): %s",
outfile);
close(ofd);
return -1;
}
if (osb.st_size != size) {
maybe_warnx("stat gave different size: %" PRIdOFF
" != %" PRIdOFF " (leaving original)",
size, osb.st_size);
close(ofd);
unlink(outfile);
return -1;
}
unlink_input(file, &isb);
#ifndef SMALL
copymodes(ofd, &isb, outfile);
#endif
close(ofd);
return size;
unexpected_EOF:
maybe_warnx("%s: unexpected end of file", file);
lose:
if (fd != -1)
close(fd);
if (zfd != -1 && zfd != STDOUT_FILENO)
close(fd);
return -1;
}
#ifndef check_siginfo
static void
check_siginfo(void)
{
static int ttyfd = -2;
char buf[2048];
int n;
if (print_info == 0)
return;
if (!infile)
goto out;
if (ttyfd == -2)
ttyfd = open(_PATH_TTY, O_RDWR | O_CLOEXEC);
#ifndef SMALL
if (vflag && !tflag && usize != -1 && gsize != -1)
print_verbage(NULL, NULL, usize, gsize);
if (vflag && tflag)
print_test("(stdin)", usize != -1);
#else
(void)&usize;
#endif
out:
infile_clear();
}
static void
handle_stdout(void)
{
off_t gsize;
#ifndef SMALL
off_t usize;
struct stat sb;
time_t systime;
uint32_t mtime;
int ret;
infile_set("<stdout>", 0);
if (fflag == 0 && isatty(STDOUT_FILENO)) {
maybe_warnx("standard output is a terminal -- ignoring");
return;
}
/* If stdin is a file use its mtime, otherwise use current time */
ret = fstat(STDIN_FILENO, &sb);
if (ret < 0) {
maybe_warn("Can't stat stdin");
return;
}
/* do what is asked for, for the path name */
static void
handle_pathname(char *path)
{
char *opath = path, *s = NULL;
ssize_t len;
int slen;
struct stat sb;
/* check for stdout/stdin */
if (path[0] == '-' && path[1] == '\0') {
if (dflag)
handle_stdin();
else
handle_stdout();
return;
}
retry:
if (stat(path, &sb) != 0) {
/* lets try <path>.gz if we're decompressing */
if (dflag && s == NULL && errno == ENOENT) {
len = strlen(path);
slen = suffixes[0].ziplen;
s = malloc(len + slen + 1);
if (s == NULL)
maybe_err("malloc");
memcpy(s, path, len);
memcpy(s + len, suffixes[0].zipped, slen + 1);
path = s;
goto retry;
}
maybe_warn("can't stat: %s", opath);
goto out;
}
if (S_ISDIR(sb.st_mode)) {
#ifndef SMALL
if (rflag)
handle_dir(path);
else
#endif
maybe_warnx("%s is a directory", path);
goto out;
}
if (S_ISREG(sb.st_mode))
handle_file(path, &sb);
else
maybe_warnx("%s is not a regular file", path);
out:
if (s)
free(s);
}
/* compress/decompress a file */
static void
handle_file(char *file, struct stat *sbp)
{
off_t usize, gsize;
char outfile[PATH_MAX];
infile_set(file, sbp->st_size);
if (dflag) {
usize = file_uncompress(file, outfile, sizeof(outfile));
#ifndef SMALL
if (vflag && tflag)
print_test(file, usize != -1);
#endif
if (usize == -1)
return;
gsize = sbp->st_size;
} else {
gsize = file_compress(file, outfile, sizeof(outfile));
if (gsize == -1)
return;
usize = sbp->st_size;
}
infile_clear();
#ifndef SMALL
if (vflag && !tflag)
print_verbage(file, (cflag) ? NULL : outfile, usize, gsize);
#endif
}
#ifndef SMALL
/* this is used with -r to recursively descend directories */
static void
handle_dir(char *dir)
{
char *path_argv[2];
FTS *fts;
FTSENT *entry;
while ((entry = fts_read(fts))) {
switch(entry->fts_info) {
case FTS_D:
case FTS_DP:
continue;
case FTS_DNR:
case FTS_ERR:
case FTS_NS:
maybe_warn("%s", entry->fts_path);
continue;
case FTS_F:
handle_file(entry->fts_path, entry->fts_statp);
}
}
(void)fts_close(fts);
}
#endif
/* print a ratio - size reduction as a fraction of uncompressed size */
static void
print_ratio(off_t in, off_t out, FILE *where)
{
int percent10; /* 10 * percent */
off_t diff;
char buff[8];
int len;
diff = in - out/2;
if (in == 0 && out == 0)
percent10 = 0;
else if (diff < 0)
/*
* Output is more than double size of input! print -99.9%
* Quite possibly we've failed to get the original size.
*/
percent10 = -999;
else {
/*
* We only need 12 bits of result from the final division,
* so reduce the values until a 32bit division will suffice.
*/
while (in > 0x100000) {
diff >>= 1;
in >>= 1;
}
if (in != 0)
percent10 = ((u_int)diff * 2000) / (u_int)in - 1000;
else
percent10 = 0;
}
len = snprintf(buff, sizeof buff, "%2.2d.", percent10);
/* Move the '.' to before the last digit */
buff[len - 1] = buff[len - 2];
buff[len - 2] = '.';
fprintf(where, "%5s%%", buff);
}
#ifndef SMALL
/* print compression statistics, and the new name (if there is one!) */
static void
print_verbage(const char *file, const char *nfile, off_t usize, off_t gsize)
{
if (file)
fprintf(stderr, "%s:%s ", file,
strlen(file) < 7 ? "\t\t" : "\t");
print_ratio(usize, gsize, stderr);
if (nfile)
fprintf(stderr, " -- replaced with %s", nfile);
fprintf(stderr, "\n");
fflush(stderr);
}
/* print test results */
static void
print_test(const char *file, int ok)
{
/* print a file's info ala --list */
/* eg:
compressed uncompressed ratio uncompressed_name
354841 1679360 78.8% /usr/pkgsrc/distfiles/libglade-2.0.1.tar
*/
static void
print_list(int fd, off_t out, const char *outfile, time_t ts)
{
static int first = 1;
#ifndef SMALL
static off_t in_tot, out_tot;
uint32_t crc = 0;
#endif
off_t in = 0, rv;
if (first) {
#ifndef SMALL
if (vflag)
printf("method crc date time ");
#endif
if (qflag == 0)
printf(" compressed uncompressed "
"ratio uncompressed_name\n");
}
first = 0;
/* print totals? */
#ifndef SMALL
if (fd == -1) {
in = in_tot;
out = out_tot;
} else
#endif
{
/* read the last 4 bytes - this is the uncompressed size */
rv = lseek(fd, (off_t)(-8), SEEK_END);
if (rv != -1) {
unsigned char buf[8];
uint32_t usize;
rv = read(fd, (char *)buf, sizeof(buf));
if (rv == -1)
maybe_warn("read of uncompressed size");
else if (rv != sizeof(buf))
maybe_warnx("read of uncompressed size");