/*      $NetBSD: bzip2recover.c,v 1.5 2019/07/21 11:52:14 maya Exp $    */

/*-----------------------------------------------------------*/
/*--- Block recoverer program for bzip2                   ---*/
/*---                                      bzip2recover.c ---*/
/*-----------------------------------------------------------*/

/* ------------------------------------------------------------------
  This file is part of bzip2/libbzip2, a program and library for
  lossless, block-sorting data compression.

  bzip2/libbzip2 version 1.0.8 of 13 July 2019
  Copyright (C) 1996-2019 Julian Seward <[email protected]>

  Please read the WARNING, DISCLAIMER and PATENTS sections in the
  README file.

  This program is released under the terms of the license contained
  in the file LICENSE.
  ------------------------------------------------------------------ */

/* This program is a complete hack and should be rewritten properly.
        It isn't very complicated. */

#include <inttypes.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>


/* This program records bit locations in the file to be recovered.
  That means that if 64-bit ints are not supported, we will not
  be able to recover .bz2 files over 512MB (2^32 bits) long.
  On GNU supported platforms, we take advantage of the 64-bit
  int support to circumvent this problem.  Ditto MSVC.

  This change occurred in version 1.0.2; all prior versions have
  the 512MB limitation.
*/
#if 1
  typedef uint64_t  MaybeUInt64;
#  define MaybeUInt64_FMT "%" PRIu64
#else
#ifdef _MSC_VER
  typedef  unsigned __int64  MaybeUInt64;
#  define MaybeUInt64_FMT "%I64u"
#else
  typedef  unsigned int   MaybeUInt64;
#  define MaybeUInt64_FMT "%u"
#endif
#endif

typedef  unsigned int   UInt32;
typedef  int            Int32;
typedef  unsigned char  UChar;
typedef  char           Char;
typedef  unsigned char  Bool;
#define True    ((Bool)1)
#define False   ((Bool)0)


#define BZ_MAX_FILENAME 2000

Char inFileName[BZ_MAX_FILENAME];
Char outFileName[BZ_MAX_FILENAME];
Char progName[BZ_MAX_FILENAME];

MaybeUInt64 bytesOut = 0;
MaybeUInt64 bytesIn  = 0;

/*---------------------------------------------------*/
/*--- Bit stream I/O                              ---*/
/*---------------------------------------------------*/

typedef
  struct {
     FILE*  handle;
     Int32  buffer;
     Int32  buffLive;
     Char   mode;
  }
  BitStream;

static void readError ( void );
static void writeError ( void );
static void mallocFail ( Int32 n );
static BitStream* bsOpenReadStream ( FILE* stream );
static BitStream* bsOpenWriteStream ( FILE* stream );
static void bsPutBit ( BitStream* bs, Int32 bit );
static Int32 bsGetBit ( BitStream* bs );
static void bsClose ( BitStream* bs );
static void bsPutUChar ( BitStream* bs, UChar c );
static void bsPutUInt32 ( BitStream* bs, UInt32 c );
static Bool endsInBz2 ( Char* name );
static void tooManyBlocks ( Int32 max_handled_blocks );


/*---------------------------------------------------*/
/*--- Header bytes                                ---*/
/*---------------------------------------------------*/

#define BZ_HDR_B 0x42                         /* 'B' */
#define BZ_HDR_Z 0x5a                         /* 'Z' */
#define BZ_HDR_h 0x68                         /* 'h' */
#define BZ_HDR_0 0x30                         /* '0' */


/*---------------------------------------------------*/
/*--- I/O errors                                  ---*/
/*---------------------------------------------------*/

/*---------------------------------------------*/
__dead static void readError ( void )
{
  fprintf ( stderr,
            "%s: I/O error reading `%s', possible reason follows.\n",
           progName, inFileName );
  perror ( progName );
  fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
            progName );
  exit ( 1 );
}


/*---------------------------------------------*/
__dead static void writeError ( void )
{
  fprintf ( stderr,
            "%s: I/O error reading `%s', possible reason follows.\n",
           progName, inFileName );
  perror ( progName );
  fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
            progName );
  exit ( 1 );
}


/*---------------------------------------------*/
__dead static void mallocFail ( Int32 n )
{
  fprintf ( stderr,
            "%s: malloc failed on request for %d bytes.\n",
           progName, n );
  fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
            progName );
  exit ( 1 );
}


/*---------------------------------------------*/
__dead static void tooManyBlocks ( Int32 max_handled_blocks )
{
  fprintf ( stderr,
            "%s: `%s' appears to contain more than %d blocks\n",
           progName, inFileName, max_handled_blocks );
  fprintf ( stderr,
            "%s: and cannot be handled.  To fix, increase\n",
            progName );
  fprintf ( stderr,
            "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
            progName );
  exit ( 1 );
}



/*---------------------------------------------*/
static BitStream* bsOpenReadStream ( FILE* stream )
{
  BitStream *bs = malloc ( sizeof(BitStream) );
  if (bs == NULL) mallocFail ( sizeof(BitStream) );
  bs->handle = stream;
  bs->buffer = 0;
  bs->buffLive = 0;
  bs->mode = 'r';
  return bs;
}


/*---------------------------------------------*/
static BitStream* bsOpenWriteStream ( FILE* stream )
{
  BitStream *bs = malloc ( sizeof(BitStream) );
  if (bs == NULL) mallocFail ( sizeof(BitStream) );
  bs->handle = stream;
  bs->buffer = 0;
  bs->buffLive = 0;
  bs->mode = 'w';
  return bs;
}


/*---------------------------------------------*/
static void bsPutBit ( BitStream* bs, Int32 bit )
{
  if (bs->buffLive == 8) {
     Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
     if (retVal == EOF) writeError();
     bytesOut++;
     bs->buffLive = 1;
     bs->buffer = bit & 0x1;
  } else {
     bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
     bs->buffLive++;
  };
}


/*---------------------------------------------*/
/*--
  Returns 0 or 1, or 2 to indicate EOF.
--*/
static Int32 bsGetBit ( BitStream* bs )
{
  if (bs->buffLive > 0) {
     bs->buffLive --;
     return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
  } else {
     Int32 retVal = getc ( bs->handle );
     if ( retVal == EOF ) {
        if (errno != 0) readError();
        return 2;
     }
     bs->buffLive = 7;
     bs->buffer = retVal;
     return ( ((bs->buffer) >> 7) & 0x1 );
  }
}


/*---------------------------------------------*/
static void bsClose ( BitStream* bs )
{
  Int32 retVal;

  if ( bs->mode == 'w' ) {
     while ( bs->buffLive < 8 ) {
        bs->buffLive++;
        bs->buffer <<= 1;
     };
     retVal = putc ( (UChar) (bs->buffer), bs->handle );
     if (retVal == EOF) writeError();
     bytesOut++;
     retVal = fflush ( bs->handle );
     if (retVal == EOF) writeError();
  }
  retVal = fclose ( bs->handle );
  if (retVal == EOF) {
     if (bs->mode == 'w') writeError(); else readError();
  }
  free ( bs );
}


/*---------------------------------------------*/
static void bsPutUChar ( BitStream* bs, UChar c )
{
  Int32 i;
  for (i = 7; i >= 0; i--)
     bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
}


/*---------------------------------------------*/
static void bsPutUInt32 ( BitStream* bs, UInt32 c )
{
  Int32 i;

  for (i = 31; i >= 0; i--)
     bsPutBit ( bs, (c >> i) & 0x1 );
}


/*---------------------------------------------*/
static Bool endsInBz2 ( Char* name )
{
  Int32 n = strlen ( name );
  if (n <= 4) return False;
  return
     (name[n-4] == '.' &&
      name[n-3] == 'b' &&
      name[n-2] == 'z' &&
      name[n-1] == '2');
}


/*---------------------------------------------------*/
/*---                                             ---*/
/*---------------------------------------------------*/

/* This logic isn't really right when it comes to Cygwin. */
#ifdef _WIN32
#  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
#else
#  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
#endif

#define BLOCK_HEADER_HI  0x00003141UL
#define BLOCK_HEADER_LO  0x59265359UL

#define BLOCK_ENDMARK_HI 0x00001772UL
#define BLOCK_ENDMARK_LO 0x45385090UL

/* Increase if necessary.  However, a .bz2 file with > 50000 blocks
  would have an uncompressed size of at least 40GB, so the chances
  are low you'll need to up this.
*/
#define BZ_MAX_HANDLED_BLOCKS 50000

MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];

Int32 main ( Int32 argc, Char** argv )
{
  FILE*       inFile;
  FILE*       outFile;
  BitStream*  bsIn, *bsWr;
  Int32       b, wrBlock, currBlock, rbCtr;
  MaybeUInt64 bitsRead;

  UInt32      buffHi, buffLo, blockCRC;
  Char*       p;

  strncpy ( progName, argv[0], BZ_MAX_FILENAME-1);
  progName[BZ_MAX_FILENAME-1]='\0';
  inFileName[0] = outFileName[0] = 0;

  fprintf ( stderr,
            "bzip2recover 1.0.8: extracts blocks from damaged .bz2 files.\n" );

  if (argc != 2) {
     fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
                       progName, progName );
     switch (sizeof(MaybeUInt64)) {
        case 8:
           fprintf(stderr,
                   "\trestrictions on size of recovered file: None\n");
           break;
        case 4:
           fprintf(stderr,
                   "\trestrictions on size of recovered file: 512 MB\n");
           fprintf(stderr,
                   "\tto circumvent, recompile with MaybeUInt64 as an\n"
                   "\tunsigned 64-bit int.\n");
           break;
        default:
           fprintf(stderr,
                   "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
                   "configuration error.\n");
           break;
     }
     exit(1);
  }

  if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
     fprintf ( stderr,
               "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
               progName, (int)strlen(argv[1]) );
     exit(1);
  }

  strcpy ( inFileName, argv[1] );

  inFile = fopen ( inFileName, "rb" );
  if (inFile == NULL) {
     fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
     exit(1);
  }

  bsIn = bsOpenReadStream ( inFile );
  fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );

  bitsRead = 0;
  buffHi = buffLo = 0;
  currBlock = 0;
  bStart[currBlock] = 0;

  rbCtr = 0;

  while (True) {
     b = bsGetBit ( bsIn );
     bitsRead++;
     if (b == 2) {
        if (bitsRead >= bStart[currBlock] &&
           (bitsRead - bStart[currBlock]) >= 40) {
           bEnd[currBlock] = bitsRead-1;
           if (currBlock > 0)
              fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
                                " to " MaybeUInt64_FMT " (incomplete)\n",
                        currBlock,  bStart[currBlock], bEnd[currBlock] );
        } else
           currBlock--;
        break;
     }
     buffHi = (buffHi << 1) | (buffLo >> 31);
     buffLo = (buffLo << 1) | (b & 1);
     if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
            && buffLo == BLOCK_HEADER_LO)
          ||
          ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
            && buffLo == BLOCK_ENDMARK_LO)
        ) {
        if (bitsRead > 49) {
           bEnd[currBlock] = bitsRead-49;
        } else {
           bEnd[currBlock] = 0;
        }
        if (currBlock > 0 &&
            (bEnd[currBlock] - bStart[currBlock]) >= 130) {
           fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
                             " to " MaybeUInt64_FMT "\n",
                     rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
           rbStart[rbCtr] = bStart[currBlock];
           rbEnd[rbCtr] = bEnd[currBlock];
           rbCtr++;
        }
        if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
           tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
        currBlock++;

        bStart[currBlock] = bitsRead;
     }
  }

  bsClose ( bsIn );

  /*-- identified blocks run from 1 to rbCtr inclusive. --*/

  if (rbCtr < 1) {
     fprintf ( stderr,
               "%s: sorry, I couldn't find any block boundaries.\n",
               progName );
     exit(1);
  };

  fprintf ( stderr, "%s: splitting into blocks\n", progName );

  inFile = fopen ( inFileName, "rb" );
  if (inFile == NULL) {
     fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
     exit(1);
  }
  bsIn = bsOpenReadStream ( inFile );

  /*-- placate gcc's dataflow analyser --*/
  blockCRC = 0; bsWr = 0;

  bitsRead = 0;
  outFile = NULL;
  wrBlock = 0;
  while (True) {
     b = bsGetBit(bsIn);
     if (b == 2) break;
     buffHi = (buffHi << 1) | (buffLo >> 31);
     buffLo = (buffLo << 1) | (b & 1);
     if (bitsRead == 47+rbStart[wrBlock])
        blockCRC = (buffHi << 16) | (buffLo >> 16);

     if (outFile != NULL && bitsRead >= rbStart[wrBlock]
                         && bitsRead <= rbEnd[wrBlock]) {
        bsPutBit ( bsWr, b );
     }

     bitsRead++;

     if (bitsRead == rbEnd[wrBlock]+1) {
        if (outFile != NULL) {
           bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
           bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
           bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
           bsPutUInt32 ( bsWr, blockCRC );
           bsClose ( bsWr );
           outFile = NULL;
        }
        if (wrBlock >= rbCtr) break;
        wrBlock++;
     } else
     if (bitsRead == rbStart[wrBlock]) {
        /* Create the output file name, correctly handling leading paths.
           (31.10.2001 by Sergey E. Kusikov) */
        Char* split;
        Int32 ofs, k;
        for (k = 0; k < BZ_MAX_FILENAME; k++)
           outFileName[k] = 0;
        strcpy (outFileName, inFileName);
        split = strrchr (outFileName, BZ_SPLIT_SYM);
        if (split == NULL) {
           split = outFileName;
        } else {
           ++split;
        }
        /* Now split points to the start of the basename. */
        ofs  = split - outFileName;
        sprintf (split, "rec%5d", wrBlock+1);
        for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
        strcat (outFileName, inFileName + ofs);

        if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );

        fprintf ( stderr, "   writing block %d to `%s' ...\n",
                          wrBlock+1, outFileName );

        outFile = fopen ( outFileName, "wb" );
        if (outFile == NULL) {
           fprintf ( stderr, "%s: can't write `%s'\n",
                     progName, outFileName );
           exit(1);
        }
        bsWr = bsOpenWriteStream ( outFile );
        bsPutUChar ( bsWr, BZ_HDR_B );
        bsPutUChar ( bsWr, BZ_HDR_Z );
        bsPutUChar ( bsWr, BZ_HDR_h );
        bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
        bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
        bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
        bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
     }
  }

  fprintf ( stderr, "%s: finished\n", progName );
  return 0;
}



/*-----------------------------------------------------------*/
/*--- end                                  bzip2recover.c ---*/
/*-----------------------------------------------------------*/