/*
* MIME mail decoding.
*
* This module contains decoding routines for converting
* quoted-printable data into pure 8-bit data, in MIME
* formatted messages.
*
* By Henrik Storner <[email protected]>
*
* Configuration file support for fetchmail 4.3.8 by
* Frank Damgaard <[email protected]>
*
* For license terms, see the file COPYING in this directory.
*/

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <iconv.h>
#include "unmime.h"

static unsigned char unhex(unsigned char c)
{
 if ((c >= '0') && (c <= '9'))
   return (c - '0');
 else if ((c >= 'A') && (c <= 'F'))
   return (c - 'A' + 10);
 else if ((c >= 'a') && (c <= 'f'))
   return (c - 'a' + 10);
 else
     return 16;        /* invalid hex character */
}

static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out)
{
 c1 = unhex(c1);
 c2 = unhex(c2);

 if ((c1 > 15) || (c2 > 15))
   return 1;
 else {
   *c_out = 16*c1+c2;
   return 0;
 }
}


/*
* Routines to decode MIME QP-encoded headers, as per RFC 2047.
*/

/* States of the decoding state machine */
#define S_COPY_PLAIN        0   /* Just copy, but watch for the QP flag */
#define S_SKIP_MIMEINIT     1   /* Get the encoding, and skip header */
#define S_COPY_MIME         2   /* Decode a sequence of coded characters */

static const char MIMEHDR_INIT[]  = "=?";       /* Start of coded sequence */
static const char MIMEHDR_END[]   = "?=";       /* End of coded sequence */

void UnMimeHeader(unsigned char *hdr)
{
 /* Decode a buffer containing data encoded according to RFC
  * 2047. This only handles content-transfer-encoding; conversion
  * between character sets is not implemented.  In other words: We
  * assume the charsets used can be displayed by your mail program
  * without problems.
  */

 /* Note: Decoding is done "in-situ", i.e. without using an
  * additional buffer for temp. storage. This is possible, since the
  * decoded string will always be shorter than the encoded string,
  * due to the encoding scheme.
  */

 int  state = S_COPY_PLAIN;
 unsigned char *p_in, *p_out, *p;
 unsigned char enc = '\0';             /* initialization pacifies -Wall */
 int  i;
 char charset[128];

 /* Speed up in case this is not a MIME-encoded header */
 p = strstr(hdr, MIMEHDR_INIT);
 if (p == NULL)
   return;   /* No MIME header */

 /* Loop through the buffer.
  *  p_in : Next char to be processed.
  *  p_out: Where to put the next processed char
  *  enc  : Encoding used (usually, 'q' = quoted-printable)
  */
 for (p_out = p_in = hdr; (*p_in); ) {
   switch (state) {
   case S_COPY_PLAIN:
     p = strstr(p_in, MIMEHDR_INIT);
     if (p == NULL) {
       /*
        * No more coded data in buffer,
        * just move remainder into place.
        */
       i = strlen(p_in);   /* How much left */
       memmove(p_out, p_in, i);
       p_in += i; p_out += i;
     }
     else {
       /* MIME header init found at location p */
       if (p > p_in) {
         /* There are some uncoded chars at the beginning. */
         i = (p - p_in);
         memmove(p_out, p_in, i);
         p_out += i;
       }
       p_in = (p + 2);
       state = S_SKIP_MIMEINIT;
     }
     break;

   case S_SKIP_MIMEINIT:
     /* Mime type definition: "charset?encoding?" */
     p = strchr(p_in, '?');
     if (p != NULL) {
       /* p_in .. (p-1) holds the charset */
       strncpy(charset, p_in, p - p_in);
       charset[p - p_in] = '\0';

       /* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
       if (*(p+2) == '?') {
         enc = tolower(*(p+1));
         p_in = p+3;
         state = S_COPY_MIME;
       }
       else
         state = S_COPY_PLAIN;
     }
     else
       state = S_COPY_PLAIN;   /* Invalid data */
     break;

   case S_COPY_MIME:
     p = strstr(p_in, MIMEHDR_END);  /* Find end of coded data */
     if (p == NULL) p = p_in + strlen(p_in);
     for (; (p_in < p); ) {
       /* Decode all encoded data */
       if (enc == 'q') {
         if (*p_in == '=') {
           /* Decode one char qp-coded at (p_in+1) and (p_in+2) */
           if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
             p_in += 3;
           else {
             /* Invalid QP data - pass through unchanged. */
             *p_out = *p_in;
             p_in++;
           }
         }
         else if (*p_in == '_') {
           /*
            * RFC 2047: '_' inside encoded word represents 0x20.
            * NOT a space - always the value 0x20.
            */
           *p_out = 0x20;
           p_in++;
         }
         else {
           /* Copy unchanged */
           *p_out = *p_in;
           p_in++;
         }
         p_out++;
       }
       else if (enc == 'b') {
         /* Decode base64 encoded data */
         char delimsave;
         int decoded_count;

         delimsave = *p; *p = '\r';
         decoded_count = from64tobits(p_out, p_in, 0);
         *p = delimsave;
         if (decoded_count > 0)
           p_out += decoded_count;
         p_in = p;
       }
       else {
         /* Copy unchanged */
         *p_out = *p_in;
         p_in++;
         p_out++;
       }
     }
     if (*p_in)
       p_in += 2;   /* Skip the MIMEHDR_END delimiter */

     /*
      * We've completed decoding one encoded sequence. But another
      * may follow immediately, in which case whitespace before the
      * new MIMEHDR_INIT delimiter must be discarded.
      * See if that is the case
      */
     p = strstr(p_in, MIMEHDR_INIT);
     state = S_COPY_PLAIN;
     if (p != NULL) {
       /*
        * There is more MIME data later on. Is there
        * whitespace  only before the delimiter?
        */
       unsigned char *q;
       int  wsp_only = 1;

       for (q=p_in; (wsp_only && (q < p)); q++)
         wsp_only = isspace(*q);

       if (wsp_only) {
         /*
          * Whitespace-only before the MIME delimiter. OK,
          * just advance p_in to past the new MIMEHDR_INIT,
          * and prepare to process the new MIME charset/encoding
          * header.
          */
         p_in = p + sizeof(MIMEHDR_INIT) - 1;
         state = S_SKIP_MIMEINIT;
       }
     }
     break;
   }
 }

 *p_out = '\0';

 if ((! strcmp(charset, "UTF-8")) || (! strcmp(charset, "utf-8"))) {
    char obuf[1024], *ip, *op;
    size_t ileft, oleft;

    ip = hdr; op = obuf;
    ileft = strlen(ip); oleft = 1023;
    iconv(icd, (const char **)&ip, &ileft, (char **)&op, &oleft);
    if (ileft)
       return;
    *op = '\0';
    strcpy(hdr, obuf);
 }
}

void unmime_header(unsigned char *s)
{
   unsigned char *p;

   UnMimeHeader(s);

   /* Convert soft hyphens to spaces */
   for (p = strchr(s, '\xAD'); p; p = strchr(p, '\xAD'))
       *p = ' ';

   /* Convert carriage returns and newlines to spaces */
   for (p = strchr(s, '\r'); p; p = strchr(p, '\r'))
       *p = ' ';
   for (p = strchr(s, '\n'); p; p = strchr(p, '\n'))
       *p = ' ';
}