/*      $NetBSD: vndcompress.c,v 1.29 2017/07/29 21:04:07 riastradh Exp $       */

/*-
* Copyright (c) 2013 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Taylor R. Campbell.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include <sys/cdefs.h>
__RCSID("$NetBSD: vndcompress.c,v 1.29 2017/07/29 21:04:07 riastradh Exp $");

#include <sys/endian.h>
#include <sys/stat.h>

#include <assert.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <limits.h>
#include <signal.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <zlib.h>

#include "common.h"
#include "offtab.h"
#include "utils.h"

/*
* XXX Switch to control bug-for-bug byte-for-byte compatibility with
* NetBSD's vndcompress.
*/
#define VNDCOMPRESS_COMPAT      0

__CTASSERT(sizeof(struct cloop2_header) == CLOOP2_OFFSET_TABLE_OFFSET);

struct compress_state {
       uint64_t        size;           /* uncompressed size */
       uint64_t        offset;         /* output byte offset */
       uint32_t        blocksize;      /* bytes per block */
       uint32_t        blkno;          /* input block number */
       uint32_t        n_full_blocks;  /* floor(size/blocksize) */
       uint32_t        n_blocks;       /* ceiling(size/blocksize) */
       uint32_t        n_offsets;      /* n_blocks + 1 */
       uint32_t        end_block;      /* last block to transfer */
       uint32_t        checkpoint_blocks;      /* blocks before checkpoint */
       int             image_fd;
       int             cloop2_fd;
       struct offtab   offtab;
       uint32_t        n_checkpointed_blocks;
       volatile sig_atomic_t
                       initialized;    /* everything above initialized?  */
};

/* Global compression state for SIGINFO handler.  */
static struct compress_state    global_state;

struct sigdesc {
       int sd_signo;
       const char *sd_name;
};

static const struct sigdesc info_signals[] = {
       { SIGINFO, "SIGINFO" },
       { SIGUSR1, "SIGUSR1" },
};

static const struct sigdesc checkpoint_signals[] = {
       { SIGUSR2, "SIGUSR2" },
};

static void     init_signals(void);
static void     init_signal_handler(int, const struct sigdesc *, size_t,
                   void (*)(int));
static void     info_signal_handler(int);
static void     checkpoint_signal_handler(int);
static void     compress_progress(struct compress_state *);
static void     compress_init(int, char **, const struct options *,
                   struct compress_state *);
static bool     compress_restart(struct compress_state *);
static uint32_t compress_block(int, int, uint32_t, uint32_t, uint32_t, void *,
                   void *);
static void     compress_maybe_checkpoint(struct compress_state *);
static void     compress_checkpoint(struct compress_state *);
static void     compress_exit(struct compress_state *);

/*
* Compression entry point.
*/
int
vndcompress(int argc, char **argv, const struct options *O)
{
       struct compress_state *const S = &global_state;

       /* Paranoia.  The other fields either have no sentinel or use zero.  */
       S->image_fd = -1;
       S->cloop2_fd = -1;

       /* Set up signal handlers so we can handle SIGINFO ASAP.  */
       init_signals();

       /*
        * Parse the arguments to initialize our state.
        */
       compress_init(argc, argv, O, S);
       assert(MIN_BLOCKSIZE <= S->blocksize);
       assert(S->blocksize <= MAX_BLOCKSIZE);

       /*
        * Allocate compression buffers.
        *
        * Compression may actually expand.  From an overabundance of
        * caution, assume it can expand by at most double.
        *
        * XXX Check and consider tightening this assumption.
        */
       __CTASSERT(MAX_BLOCKSIZE <= SIZE_MAX);
       void *const uncompbuf = malloc(S->blocksize);
       if (uncompbuf == NULL)
               err(1, "malloc uncompressed buffer");

       /* XXX compression ratio bound */
       __CTASSERT(MUL_OK(size_t, 2, MAX_BLOCKSIZE));
       void *const compbuf = malloc(2 * (size_t)S->blocksize);
       if (compbuf == NULL)
               err(1, "malloc compressed buffer");

       /*
        * Compress the blocks.  S->blkno specifies the input block
        * we're about to transfer.  S->offset is the current output
        * offset.
        */
       while (S->blkno < S->n_blocks) {
               /* Report any progress.  */
               compress_progress(S);

               /* Stop if we've done the requested partial transfer.  */
               if ((0 < S->end_block) && (S->end_block <= S->blkno))
                       goto out;

               /* Checkpoint if appropriate.  */
               compress_maybe_checkpoint(S);
               offtab_prepare_put(&S->offtab, (S->blkno + 1));

               /* Choose read size: partial if last block, full if not.  */
               const uint32_t readsize = (S->blkno == S->n_full_blocks?
                   (S->size % S->blocksize) : S->blocksize);
               assert(readsize > 0);
               assert(readsize <= S->blocksize);

               /* Fail noisily if we might be about to overflow.  */
               /* XXX compression ratio bound */
               __CTASSERT(MUL_OK(uint64_t, 2, MAX_BLOCKSIZE));
               __CTASSERT(MUL_OK(off_t, 2, MAX_BLOCKSIZE));
               assert(S->offset <= MIN(UINT64_MAX, OFF_MAX));
               if (!ADD_OK(uint64_t, S->offset, 2*(uintmax_t)readsize) ||
                   !ADD_OK(off_t, S->offset, 2*(uintmax_t)readsize))
                       errx(1, "blkno %"PRIu32" may overflow: %ju + 2*%ju",
                           S->blkno, (uintmax_t)S->offset,
                           (uintmax_t)readsize);

               /* Process the block.  */
               const uint32_t complen =
                   compress_block(S->image_fd, S->cloop2_fd, S->blkno,
                       S->blocksize, readsize, uncompbuf, compbuf);

               /*
                * Signal-atomically update the state to reflect
                * (a) what block number we are now at,
                * (b) how far we are now in the output file, and
                * (c) where the last block ended.
                */
               assert(ADD_OK(uint32_t, S->blkno, 1));
               assert(ADD_OK(uint64_t, S->offset, complen));
               assert(ADD_OK(off_t, (off_t)S->offset, (off_t)complen));
               assert((S->blkno + 1) < S->n_offsets);
           {
               sigset_t old_sigmask;
               block_signals(&old_sigmask);
               S->blkno += 1;                                  /* (a) */
               S->offset += complen;                           /* (b) */
               offtab_put(&S->offtab, S->blkno, S->offset);    /* (c) */
               restore_sigmask(&old_sigmask);
           }
       }

       /* Make sure we're all done. */
       assert(S->blkno == S->n_blocks);
       assert((S->blkno + 1) == S->n_offsets);

       /* Pad to the disk block size.  */
       const uint32_t n_extra = (S->offset % DEV_BSIZE);
       if (n_extra != 0) {
               const uint32_t n_padding = (DEV_BSIZE - n_extra);
               /* Reuse compbuf -- guaranteed to be large enough.  */
               (void)memset(compbuf, 0, n_padding);
               const ssize_t n_written = write(S->cloop2_fd, compbuf,
                   n_padding);
               if (n_written == -1)
                       err(1, "write final padding failed");
               assert(n_written >= 0);
               if ((size_t)n_written != n_padding)
                       errx(1, "partial write of final padding bytes"
                           ": %zu != %"PRIu32,
                           (size_t)n_written, n_padding);

               /* Account for the extra bytes in the output file.  */
               assert(ADD_OK(uint64_t, S->offset, n_padding));
               assert(ADD_OK(off_t, (off_t)S->offset, (off_t)n_padding));
           {
               sigset_t old_sigmask;
               block_signals(&old_sigmask);
               S->offset += n_padding;
               restore_sigmask(&old_sigmask);
           }
       }

out:
       /* One last checkpoint to commit the offset table.  */
       assert(S->offset <= OFF_MAX);
       assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
       compress_checkpoint(S);

       /*
        * Free the compression buffers and finalize the compression.
        */
       free(compbuf);
       free(uncompbuf);
       compress_exit(S);

       return 0;
}

/*
* Signal cruft.
*/

static void
init_signals(void)
{

       init_signal_handler(SA_RESTART, info_signals,
           __arraycount(info_signals), &info_signal_handler);
       init_signal_handler(SA_RESTART, checkpoint_signals,
           __arraycount(checkpoint_signals), &checkpoint_signal_handler);
}

static void
init_signal_handler(int flags, const struct sigdesc *signals, size_t n,
   void (*handler)(int))
{
       static const struct sigaction zero_sa;
       struct sigaction sa = zero_sa;
       size_t i;

       (void)sigemptyset(&sa.sa_mask);
       for (i = 0; i < n; i++)
               (void)sigaddset(&sa.sa_mask, signals[i].sd_signo);
       sa.sa_flags = flags;
       sa.sa_handler = handler;
       for (i = 0; i < n; i++)
               if (sigaction(signals[i].sd_signo, &sa, NULL) == -1)
                       err(1, "sigaction(%s)", signals[i].sd_name);
}

static void
info_signal_handler(int signo __unused)
{
       /* Save errno.  */
       const int error = errno;
       struct compress_state *const S = &global_state;
       char buf[128];

       /* Bail if the state is not yet initialized.  */
       if (!S->initialized) {
               warnx_ss("initializing");
               goto out;
       }

       /* Carefully calculate our I/O position.  */
       assert(S->blocksize > 0);
       __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
       const uint64_t nread = ((uint64_t)S->blkno * (uint64_t)S->blocksize);

       assert(S->n_blocks > 0);
       __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, sizeof(uint64_t)));
       __CTASSERT(ADD_OK(uint64_t, CLOOP2_OFFSET_TABLE_OFFSET,
               MAX_N_BLOCKS*sizeof(uint64_t)));
       const uint64_t nwritten = (S->offset <= (CLOOP2_OFFSET_TABLE_OFFSET +
               ((uint64_t)S->n_blocks * sizeof(uint64_t)))?
           0 : S->offset);

       /* snprintf_ss can't do floating-point, so do fixed-point instead.  */
       const uint64_t ratio_percent =
           (nread > 0?
               ((nwritten >= (UINT64_MAX / 100)) ?
                   ((nwritten / nread) * 100) : ((nwritten * 100) / nread))
               : 0);

       /* Format the status.  */
       assert(S->n_checkpointed_blocks <= MAX_N_BLOCKS);
       assert(S->blocksize <= MAX_BLOCKSIZE);
       __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
       const int n = snprintf_ss(buf, sizeof(buf),
           "vndcompress: read %"PRIu64" bytes, wrote %"PRIu64" bytes, "
           "compression ratio %"PRIu64"%% (checkpointed %"PRIu64" bytes)\n",
           nread, nwritten, ratio_percent,
           ((uint64_t)S->n_checkpointed_blocks * (uint64_t)S->blocksize));
       if (n < 0) {
               const char msg[] = "vndcompress: can't format info\n";
               (void)write(STDERR_FILENO, msg, __arraycount(msg));
       } else {
               __CTASSERT(INT_MAX <= SIZE_MAX);
               (void)write(STDERR_FILENO, buf, (size_t)n);
       }

out:
       /* Restore errno.  */
       errno = error;
}

static void
checkpoint_signal_handler(int signo __unused)
{
       /* Save errno.  */
       const int error = errno;
       struct compress_state *const S = &global_state;

       /* Bail if the state is not yet initialized.  */
       if (!S->initialized) {
               warnx_ss("nothing to checkpoint yet");
               goto out;
       }

       assert(S->image_fd >= 0);
       assert(S->cloop2_fd >= 0);

       /* Take a checkpoint.  */
       assert(S->blkno <= MAX_N_BLOCKS);
       assert(S->blocksize <= MAX_BLOCKSIZE);
       __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
       warnx_ss("checkpointing %"PRIu64" bytes",
           ((uint64_t)S->blkno * (uint64_t)S->blocksize));
       compress_checkpoint(S);

out:
       /* Restore errno.  */
       errno = error;
}

/*
* Report progress.
*
* XXX Should do a progress bar here.
*/
static void
compress_progress(struct compress_state *S __unused)
{
}

/*
* Parse arguments, open the files, and initialize the state.
*/
static void
compress_init(int argc, char **argv, const struct options *O,
   struct compress_state *S)
{

       if (!((argc == 2) || (argc == 3)))
               usage();

       const char *const image_pathname = argv[0];
       const char *const cloop2_pathname = argv[1];

       /* Grab the block size either from `-b' or from the last argument.  */
       __CTASSERT(0 < DEV_BSIZE);
       __CTASSERT((MIN_BLOCKSIZE % DEV_BSIZE) == 0);
       __CTASSERT(MIN_BLOCKSIZE <= DEF_BLOCKSIZE);
       __CTASSERT((DEF_BLOCKSIZE % DEV_BSIZE) == 0);
       __CTASSERT(DEF_BLOCKSIZE <= MAX_BLOCKSIZE);
       __CTASSERT((MAX_BLOCKSIZE % DEV_BSIZE) == 0);
       if (ISSET(O->flags, FLAG_b)) {
               if (argc == 3) {
                       warnx("use -b or the extra argument, not both");
                       usage();
               }
               S->blocksize = O->blocksize;
       } else {
               S->blocksize = (argc == 2? DEF_BLOCKSIZE :
                   strsuftoll("block size", argv[2], MIN_BLOCKSIZE,
                       MAX_BLOCKSIZE));
       }

       /* Sanity-check the blocksize.  (strsuftoll guarantees bounds.)  */
       __CTASSERT(DEV_BSIZE <= UINT32_MAX);
       if ((S->blocksize % DEV_BSIZE) != 0)
               errx(1, "bad blocksize: %"PRIu32
                   " (not a multiple of %"PRIu32")",
                   S->blocksize, (uint32_t)DEV_BSIZE);
       assert(MIN_BLOCKSIZE <= S->blocksize);
       assert((S->blocksize % DEV_BSIZE) == 0);
       assert(S->blocksize <= MAX_BLOCKSIZE);

       /* Grab the end block number if we have one.  */
       S->end_block = (ISSET(O->flags, FLAG_p)? O->end_block : 0);

       /* Grab the checkpoint block count, if we have one.  */
       S->checkpoint_blocks =
           (ISSET(O->flags, FLAG_k)? O->checkpoint_blocks : 0);

       /* Open the input image file and the output cloop2 file.  */
       S->image_fd = open(image_pathname, O_RDONLY);
       if (S->image_fd == -1)
               err(1, "open(%s)", image_pathname);

       int oflags;
       if (!ISSET(O->flags, FLAG_r))
               oflags = (O_WRONLY | O_TRUNC | O_CREAT);
       else if (!ISSET(O->flags, FLAG_R))
               oflags = (O_RDWR | O_CREAT);
       else
               oflags = O_RDWR;
       S->cloop2_fd = open(cloop2_pathname, oflags, 0777);
       if (S->cloop2_fd == -1)
               err(1, "open(%s)", cloop2_pathname);

       /* Find the size of the input image.  */
       if (ISSET(O->flags, FLAG_l)) {
               S->size = O->length;
       } else {
               static const struct stat zero_st;
               struct stat st = zero_st;
               if (fstat(S->image_fd, &st) == -1)
                       err(1, "stat(%s)", image_pathname);
               if (st.st_size <= 0)
                       errx(1, "unknown image size");
               assert(st.st_size >= 0);
               __CTASSERT(OFF_MAX <= UINT64_MAX);
               assert(__type_fit(uint64_t, st.st_size));
               S->size = st.st_size;
       }
       assert(S->size <= OFF_MAX);

       /* Find number of full blocks and whether there's a partial block.  */
       __CTASSERT(0 < MIN_BLOCKSIZE);
       assert(0 < S->blocksize);
       if (TOOMANY(off_t, (off_t)S->size, (off_t)S->blocksize,
               (off_t)MAX_N_BLOCKS))
               errx(1, "image too large for block size %"PRIu32": %"PRIu64,
                   S->blocksize, S->size);
       __CTASSERT(MAX_N_BLOCKS <= UINT32_MAX);
       S->n_full_blocks = S->size/S->blocksize;
       S->n_blocks = HOWMANY(S->size, S->blocksize);
       assert(S->n_full_blocks <= S->n_blocks);
       assert(S->n_blocks <= MAX_N_BLOCKS);

       /* Choose a window size.  */
       const uint32_t window_size = (ISSET(O->flags, FLAG_w)? O->window_size :
           DEF_WINDOW_SIZE);

       /* Create an offset table for the blocks; one extra for the end.  */
       __CTASSERT(ADD_OK(uint32_t, MAX_N_BLOCKS, 1));
       S->n_offsets = (S->n_blocks + 1);
       __CTASSERT(MAX_N_OFFSETS == (MAX_N_BLOCKS + 1));
       __CTASSERT(MUL_OK(size_t, MAX_N_OFFSETS, sizeof(uint64_t)));
       __CTASSERT(CLOOP2_OFFSET_TABLE_OFFSET <= OFFTAB_MAX_FDPOS);
       offtab_init(&S->offtab, S->n_offsets, window_size, S->cloop2_fd,
           CLOOP2_OFFSET_TABLE_OFFSET);

       /* Attempt to restart a partial transfer if requested.  */
       if (ISSET(O->flags, FLAG_r)) {
               if (compress_restart(S)) {
                       /*
                        * Restart succeeded.  Truncate the output
                        * here, in case any garbage got appended.  We
                        * are committed to making progress at this
                        * point.  If the ftruncate fails, we don't
                        * lose anything valuable -- this is the last
                        * point at which we can restart anyway.
                        */
                       if (ftruncate(S->cloop2_fd, S->offset) == -1)
                               err(1, "ftruncate failed");

                       /* All set!  No more initialization to do.  */
                       return;
               } else {
                       /* Restart failed.  Barf now if requested.  */
                       if (ISSET(O->flags, FLAG_R))
                               errx(1, "restart failed, aborting");

                       /* Otherwise, truncate and start at the top.  */
                       if (ftruncate(S->cloop2_fd, 0) == -1)
                               err(1, "truncate failed");
                       if (lseek(S->cloop2_fd, 0, SEEK_SET) == -1)
                               err(1, "lseek to cloop2 beginning failed");

                       /* If we seeked in the input, rewind.  */
                       if (S->blkno != 0) {
                               if (lseek(S->image_fd, 0, SEEK_SET) == -1)
                                       err(1,
                                           "lseek to image beginning failed");
                       }
               }
       }

       /* Write a bogus (zero) header for now, until we checkpoint.  */
       static const struct cloop2_header zero_header;
       const ssize_t h_written = write(S->cloop2_fd, &zero_header,
           sizeof(zero_header));
       if (h_written == -1)
               err(1, "write header");
       assert(h_written >= 0);
       if ((size_t)h_written != sizeof(zero_header))
               errx(1, "partial write of header: %zu != %zu",
                   (size_t)h_written, sizeof(zero_header));

       /* Reset the offset table to be empty and write it.  */
       offtab_reset_write(&S->offtab);

       /* Start at the beginning of the image.  */
       S->blkno = 0;
       S->offset = (sizeof(struct cloop2_header) +
           ((uint64_t)S->n_offsets * sizeof(uint64_t)));
       S->n_checkpointed_blocks = 0;

       /* Good to go and ready for interruption by a signal.  */
       S->initialized = 1;
}

/*
* Try to recover state from an existing output file.
*
* On success, fill the offset table with what's in the file, set
* S->blkno and S->offset to reflect our position, and seek to the
* respective positions in the input and output files.
*
* On failure, return false.  May clobber the offset table, S->blkno,
* S->offset, and the file pointers.
*/
static bool
compress_restart(struct compress_state *S)
{

       /* Read in the header.  */
       static const struct cloop2_header zero_header;
       struct cloop2_header header = zero_header;

       const ssize_t h_read = read_block(S->cloop2_fd, &header,
           sizeof(header));
       if (h_read == -1) {
               warn("failed to read header");
               return false;
       }
       assert(h_read >= 0);
       if ((size_t)h_read != sizeof(header)) {
               warnx("partial read of header");
               return false;
       }

       /* Check that the header looks like a header.  */
       __CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
       if (memcmp(header.cl2h_magic, cloop2_magic, sizeof(cloop2_magic))
           != 0) {
               warnx("bad cloop2 shell script magic");
               return false;
       }

       /* Check the header parameters.  */
       if (be32toh(header.cl2h_blocksize) != S->blocksize) {
               warnx("mismatched block size: %"PRIu32
                   " (expected %"PRIu32")",
                   be32toh(header.cl2h_blocksize), S->blocksize);
               return false;
       }
       if (be32toh(header.cl2h_n_blocks) != S->n_blocks) {
               warnx("mismatched number of blocks: %"PRIu32
                   " (expected %"PRIu32")",
                   be32toh(header.cl2h_n_blocks), S->n_blocks);
               return false;
       }

       /* Read in the partial offset table.  */
       if (!offtab_reset_read(&S->offtab, &warn, &warnx))
               return false;
       if (!offtab_prepare_get(&S->offtab, 0))
               return false;
       const uint64_t first_offset = offtab_get(&S->offtab, 0);
       __CTASSERT(MUL_OK(uint64_t, MAX_N_OFFSETS, sizeof(uint64_t)));
       __CTASSERT(ADD_OK(uint64_t, sizeof(struct cloop2_header),
               MAX_N_OFFSETS*sizeof(uint64_t)));
       const uint64_t expected = sizeof(struct cloop2_header) +
           ((uint64_t)S->n_offsets * sizeof(uint64_t));
       if (first_offset != expected) {
               warnx("first offset is not 0x%"PRIx64": 0x%"PRIx64,
                   expected, first_offset);
               return false;
       }

       /* Find where we left off.  */
       __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
       uint32_t blkno = 0;
       uint64_t last_offset = first_offset;
       for (blkno = 0; blkno < S->n_blocks; blkno++) {
               if (!offtab_prepare_get(&S->offtab, blkno))
                       return false;
               const uint64_t offset = offtab_get(&S->offtab, blkno);
               if (offset == ~(uint64_t)0)
                       break;

               if (0 < blkno) {
                       const uint64_t start = last_offset;
                       const uint64_t end = offset;
                       if (end <= start) {
                               warnx("bad offset table: 0x%"PRIx64
                                   ", 0x%"PRIx64, start, end);
                               return false;
                       }
                       /* XXX compression ratio bound */
                       __CTASSERT(MUL_OK(size_t, 2, MAX_BLOCKSIZE));
                       if ((2 * (size_t)S->blocksize) <= (end - start)) {
                               warnx("block %"PRIu32" too large:"
                                   " %"PRIu64" bytes"
                                   " from 0x%"PRIx64" to 0x%"PRIx64,
                                   blkno, (end - start), start, end);
                               return false;
                       }
               }

               last_offset = offset;
       }

       if (blkno == 0) {
               warnx("no blocks were written; nothing to restart");
               return false;
       }

       /* Make sure the rest of the offset table is all ones.  */
       if (blkno < S->n_blocks) {
               uint32_t nblkno;

               for (nblkno = blkno; nblkno < S->n_blocks; nblkno++) {
                       if (!offtab_prepare_get(&S->offtab, nblkno))
                               return false;
                       const uint64_t offset = offtab_get(&S->offtab, nblkno);
                       if (offset != ~(uint64_t)0) {
                               warnx("bad partial offset table entry"
                                   " at %"PRIu32": 0x%"PRIx64,
                                   nblkno, offset);
                               return false;
                       }
               }
       }

       /*
        * XXX Consider decompressing some number of blocks to make
        * sure they match.
        */

       /* Back up by one.  */
       assert(1 <= blkno);
       blkno -= 1;

       /* Seek to the output position.  */
       assert(last_offset <= OFF_MAX);
       if (lseek(S->cloop2_fd, last_offset, SEEK_SET) == -1) {
               warn("lseek output cloop2 to %"PRIx64" failed", last_offset);
               return false;
       }

       /* Switch from reading to writing the offset table.  */
       if (!offtab_transmogrify_read_to_write(&S->offtab, blkno))
               return false;

       /*
        * Seek to the input position last, after all other possible
        * failures, because if the input is a pipe, we can't change
        * our mind, rewind, and start at the beginning instead of
        * restarting.
        */
       assert(S->size <= OFF_MAX);
       assert(blkno <= (S->size / S->blocksize));
       const off_t restart_position = ((off_t)blkno * (off_t)S->blocksize);
       assert(0 <= restart_position);
       assert(restart_position <= (off_t)S->size);
       if (lseek(S->image_fd, restart_position, SEEK_SET) == -1) {
               if (errno != ESPIPE) {
                       warn("lseek input image failed");
                       return false;
               }

               /* Try read instead of lseek for a pipe/socket/fifo.  */
               void *const buffer = malloc(0x10000);
               if (buffer == NULL)
                       err(1, "malloc temporary buffer");
               off_t left = restart_position;
               while (left > 0) {
                       const size_t size = MIN(0x10000, left);
                       const ssize_t n_read = read_block(S->image_fd, buffer,
                           size);
                       if (n_read == -1) {
                               free(buffer);
                               warn("read of input image failed");
                               return false;
                       }
                       assert(n_read >= 0);
                       if ((size_t)n_read != size) {
                               free(buffer);
                               warnx("partial read of input image");
                               return false;
                       }
                       assert((off_t)size <= left);
                       left -= size;
               }
               free(buffer);
       }

       /* Start where we left off.  */
       S->blkno = blkno;
       S->offset = last_offset;
       S->n_checkpointed_blocks = blkno;

       /* Good to go and ready for interruption by a signal.  */
       S->initialized = 1;

       /* Success!  */
       return true;
}

/*
* Read a single block, compress it, and write the compressed block.
* Return the size of the compressed block.
*/
static uint32_t
compress_block(int in_fd, int out_fd, uint32_t blkno, uint32_t blocksize,
   uint32_t readsize, void *uncompbuf, void *compbuf)
{

       assert(readsize <= blocksize);
       assert(blocksize <= MAX_BLOCKSIZE);

       /* Read the uncompressed block.  */
       const ssize_t n_read = read_block(in_fd, uncompbuf, readsize);
       if (n_read == -1)
               err(1, "read block %"PRIu32, blkno);
       assert(n_read >= 0);
       if ((size_t)n_read != readsize)
               errx(1, "partial read of block %"PRIu32": %zu != %"PRIu32,
                   blkno, (size_t)n_read, readsize);

       /* Compress the block.  */
       /* XXX compression ratio bound */
       __CTASSERT(MUL_OK(unsigned long, 2, MAX_BLOCKSIZE));
       const unsigned long uncomplen =
           (VNDCOMPRESS_COMPAT? blocksize : readsize); /* XXX */
       unsigned long complen = (uncomplen * 2);
       const int zerror = compress2(compbuf, &complen, uncompbuf, uncomplen,
           Z_BEST_COMPRESSION);
       if (zerror != Z_OK)
               errx(1, "compressed failed at block %"PRIu32" (%d): %s", blkno,
                   zerror, zError(zerror));
       assert(complen <= (uncomplen * 2));

       /* Write the compressed block.  */
       const ssize_t n_written = write(out_fd, compbuf, complen);
       if (n_written == -1)
               err(1, "write block %"PRIu32, blkno);
       assert(n_written >= 0);
       if ((size_t)n_written != complen)
               errx(1, "partial write of block %"PRIu32": %zu != %lu",
                   blkno, (size_t)n_written, complen);

       return (size_t)n_written;
}

/*
* Checkpoint if appropriate.
*/
static void
compress_maybe_checkpoint(struct compress_state *S)
{

       if ((0 < S->checkpoint_blocks) && (0 < S->blkno) &&
           ((S->blkno % S->checkpoint_blocks) == 0)) {
               assert(S->offset <= OFF_MAX);
               assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
               compress_checkpoint(S);
       }
}

/*
* Write the prefix of the offset table that we have filled so far.
*
* We fsync the data blocks we have written, and then write the offset
* table, and then fsync the offset table and file metadata.  This
* should help to avoid offset tables that point at garbage data.
*
* This may be called from a signal handler, so it must not use stdio,
* malloc, &c. -- it may only (a) handle signal-safe state in S, and
* (b) do file descriptor I/O / fsync.
*
* XXX This requires further thought and heavy testing to be sure.
*
* XXX Should have an option to suppress fsync.
*
* XXX Should have an option to fail on fsync failures.
*
* XXX Would be nice if we could just do a barrier rather than an
* fsync.
*
* XXX How might we automatically test the fsyncs?
*/
static void
compress_checkpoint(struct compress_state *S)
{

       assert(S->blkno < S->n_offsets);
       const uint32_t n_offsets = (S->blkno + 1);
       assert(n_offsets <= S->n_offsets);

       assert(S->offset <= OFF_MAX);
       assert((off_t)S->offset <= lseek(S->cloop2_fd, 0, SEEK_CUR));

       /* Make sure the data hits the disk before we say it's ready.  */
       if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC), 0, S->offset)
           == -1)
               warn_ss("fsync of output failed");

       /* Say the data blocks are ready.  */
       offtab_checkpoint(&S->offtab, n_offsets,
           (S->n_checkpointed_blocks == 0? OFFTAB_CHECKPOINT_SYNC : 0));

       /*
        * If this is the first checkpoint, initialize the header.
        * Signal handler can race with main code here, but it is
        * harmless -- just an extra fsync and write of the header,
        * which are both idempotent.
        *
        * Once we have synchronously checkpointed the offset table,
        * subsequent writes will preserve a valid state.
        */
       if (S->n_checkpointed_blocks == 0) {
               static const struct cloop2_header zero_header;
               struct cloop2_header header = zero_header;

               /* Format the header.  */
               __CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
               (void)memcpy(header.cl2h_magic, cloop2_magic,
                   sizeof(cloop2_magic));
               header.cl2h_blocksize = htobe32(S->blocksize);
               header.cl2h_n_blocks = htobe32(S->n_blocks);

               /* Write the header.  */
               const ssize_t h_written = pwrite(S->cloop2_fd, &header,
                   sizeof(header), 0);
               if (h_written == -1)
                       err_ss(1, "write header");
               assert(h_written >= 0);
               if ((size_t)h_written != sizeof(header))
                       errx_ss(1, "partial write of header: %zu != %zu",
                           (size_t)h_written, sizeof(header));
       }

       /* Record how many blocks we've checkpointed.  */
   {
       sigset_t old_sigmask;
       block_signals(&old_sigmask);
       S->n_checkpointed_blocks = S->blkno;
       restore_sigmask(&old_sigmask);
   }
}

/*
* Release everything we allocated in compress_init.
*/
static void
compress_exit(struct compress_state *S)
{

       /* Done with the offset table.  Destroy it.  */
       offtab_destroy(&S->offtab);

       /* Done with the files.  Close them.  */
       if (close(S->cloop2_fd) == -1)
               warn("close(cloop2 fd)");
       if (close(S->image_fd) == -1)
               warn("close(image fd)");
}