* MIME mail decoding.

/*
* MIME mail decoding.
*
* This module contains decoding routines for converting
* quoted-printable data into pure 8-bit data, in MIME
* formatted messages.
*
* By Henrik Storner <[email protected]>
*
* Configuration file support for fetchmail 4.3.8 by
* Frank Damgaard <[email protected]>
*
* For license terms, see the file COPYING in this directory.
*/

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <iconv.h>
#include "unmime.h"

static unsigned char unhex(unsigned char c)
{
if ((c >= '0') && (c <= '9'))
return (c - '0');
else if ((c >= 'A') && (c <= 'F'))
return (c - 'A' + 10);
else if ((c >= 'a') && (c <= 'f'))
return (c - 'a' + 10);
else
return 16; /* invalid hex character */
}

static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out)
{
c1 = unhex(c1);
c2 = unhex(c2);

if ((c1 > 15) || (c2 > 15))
return 1;
else {
*c_out = 16*c1+c2;
return 0;
}
}

/*
* Routines to decode MIME QP-encoded headers, as per RFC 2047.
*/

/* States of the decoding state machine */
#define S_COPY_PLAIN 0 /* Just copy, but watch for the QP flag */
#define S_SKIP_MIMEINIT 1 /* Get the encoding, and skip header */
#define S_COPY_MIME 2 /* Decode a sequence of coded characters */

static const char MIMEHDR_INIT[] = "=?"; /* Start of coded sequence */
static const char MIMEHDR_END[] = "?="; /* End of coded sequence */

void UnMimeHeader(unsigned char *hdr)
{
/* Decode a buffer containing data encoded according to RFC
* 2047. This only handles content-transfer-encoding; conversion
* between character sets is not implemented. In other words: We
* assume the charsets used can be displayed by your mail program
* without problems.
*/

/* Note: Decoding is done "in-situ", i.e. without using an
* additional buffer for temp. storage. This is possible, since the
* decoded string will always be shorter than the encoded string,
* due to the encoding scheme.
*/

int state = S_COPY_PLAIN;
unsigned char *p_in, *p_out, *p;
unsigned char enc = '\0'; /* initialization pacifies -Wall */
int i;
char charset[128];

/* Speed up in case this is not a MIME-encoded header */
p = strstr(hdr, MIMEHDR_INIT);
if (p == NULL)
return; /* No MIME header */

/* Loop through the buffer.
* p_in : Next char to be processed.
* p_out: Where to put the next processed char
* enc : Encoding used (usually, 'q' = quoted-printable)
*/
for (p_out = p_in = hdr; (*p_in); ) {
switch (state) {
case S_COPY_PLAIN:
p = strstr(p_in, MIMEHDR_INIT);
if (p == NULL) {
/*
* No more coded data in buffer,
* just move remainder into place.
*/
i = strlen(p_in); /* How much left */
memmove(p_out, p_in, i);
p_in += i; p_out += i;
}
else {
/* MIME header init found at location p */
if (p > p_in) {
/* There are some uncoded chars at the beginning. */
i = (p - p_in);
memmove(p_out, p_in, i);
p_out += i;
}
p_in = (p + 2);
state = S_SKIP_MIMEINIT;
}
break;

case S_SKIP_MIMEINIT:
/* Mime type definition: "charset?encoding?" */
p = strchr(p_in, '?');
if (p != NULL) {
/* p_in .. (p-1) holds the charset */
strncpy(charset, p_in, p - p_in);
charset[p - p_in] = '\0';

/* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
if (*(p+2) == '?') {
enc = tolower(*(p+1));
p_in = p+3;
state = S_COPY_MIME;
}
else
state = S_COPY_PLAIN;
}
else
state = S_COPY_PLAIN; /* Invalid data */
break;

case S_COPY_MIME:
p = strstr(p_in, MIMEHDR_END); /* Find end of coded data */
if (p == NULL) p = p_in + strlen(p_in);
for (; (p_in < p); ) {
/* Decode all encoded data */
if (enc == 'q') {
if (*p_in == '=') {
/* Decode one char qp-coded at (p_in+1) and (p_in+2) */
if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
p_in += 3;
else {
/* Invalid QP data - pass through unchanged. */
*p_out = *p_in;
p_in++;
}
}
else if (*p_in == '_') {
/*
* RFC 2047: '_' inside encoded word represents 0x20.
* NOT a space - always the value 0x20.
*/
*p_out = 0x20;
p_in++;
}
else {
/* Copy unchanged */
*p_out = *p_in;
p_in++;
}
p_out++;
}
else if (enc == 'b') {
/* Decode base64 encoded data */
char delimsave;
int decoded_count;

delimsave = *p; *p = '\r';
decoded_count = from64tobits(p_out, p_in, 0);
*p = delimsave;
if (decoded_count > 0)
p_out += decoded_count;
p_in = p;
}
else {
/* Copy unchanged */
*p_out = *p_in;
p_in++;
p_out++;
}
}
if (*p_in)
p_in += 2; /* Skip the MIMEHDR_END delimiter */

/*
* We've completed decoding one encoded sequence. But another
* may follow immediately, in which case whitespace before the
* new MIMEHDR_INIT delimiter must be discarded.
* See if that is the case
*/
p = strstr(p_in, MIMEHDR_INIT);
state = S_COPY_PLAIN;
if (p != NULL) {
/*
* There is more MIME data later on. Is there
* whitespace only before the delimiter?
*/
unsigned char *q;
int wsp_only = 1;

for (q=p_in; (wsp_only && (q < p)); q++)
wsp_only = isspace(*q);

if (wsp_only) {
/*
* Whitespace-only before the MIME delimiter. OK,
* just advance p_in to past the new MIMEHDR_INIT,
* and prepare to process the new MIME charset/encoding
* header.
*/
p_in = p + sizeof(MIMEHDR_INIT) - 1;
state = S_SKIP_MIMEINIT;
}
}
break;
}
}

*p_out = '\0';

if ((! strcmp(charset, "UTF-8")) || (! strcmp(charset, "utf-8"))) {
char obuf[1024], *ip, *op;
size_t ileft, oleft;

ip = hdr; op = obuf;
ileft = strlen(ip); oleft = 1023;
iconv(icd, (const char **)&ip, &ileft, (char **)&op, &oleft);
if (ileft)
return;
*op = '\0';
strcpy(hdr, obuf);
}
}

void unmime_header(unsigned char *s)
{
unsigned char *p;

UnMimeHeader(s);

/* Convert soft hyphens to spaces */
for (p = strchr(s, '\xAD'); p; p = strchr(p, '\xAD'))
*p = ' ';

/* Convert carriage returns and newlines to spaces */
for (p = strchr(s, '\r'); p; p = strchr(p, '\r'))
*p = ' ';
for (p = strchr(s, '\n'); p; p = strchr(p, '\n'))
*p = ' ';
}