/* Charset conversion.
  Copyright (C) 2001-2006 Free Software Foundation, Inc.
  Written by Bruno Haible and Simon Josefsson.

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2, or (at your option)
  any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software Foundation,
  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

#include <config.h>

/* Specification.  */
#include "striconv.h"

#include <errno.h>
#include <stdlib.h>
#include <string.h>

#if HAVE_ICONV
# include <iconv.h>
/* Get MB_LEN_MAX, CHAR_BIT.  */
# include <limits.h>
#endif

#include "strdup.h"
#include "c-strcase.h"

#ifndef SIZE_MAX
# define SIZE_MAX ((size_t) -1)
#endif


#if HAVE_ICONV

int
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
             char **resultp, size_t *lengthp)
{
# define tmpbufsize 4096
 size_t length;
 char *result;

 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
   || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 /* Set to the initial state.  */
 iconv (cd, NULL, NULL, NULL, NULL);
# endif

 /* Determine the length we need.  */
 {
   size_t count = 0;
   char tmpbuf[tmpbufsize];
   const char *inptr = src;
   size_t insize = srclen;

   while (insize > 0)
     {
       char *outptr = tmpbuf;
       size_t outsize = tmpbufsize;
       size_t res = iconv (cd,
                           (ICONV_CONST char **) &inptr, &insize,
                           &outptr, &outsize);

       if (res == (size_t)(-1))
         {
           if (errno == E2BIG)
             ;
           else if (errno == EINVAL)
             break;
           else
             return -1;
         }
# if !defined _LIBICONV_VERSION && !defined __GLIBC__
       /* Irix iconv() inserts a NUL byte if it cannot convert.
          NetBSD iconv() inserts a question mark if it cannot convert.
          Only GNU libiconv and GNU libc are known to prefer to fail rather
          than doing a lossy conversion.  */
       else if (res > 0)
         {
           errno = EILSEQ;
           return -1;
         }
# endif
       count += outptr - tmpbuf;
     }
   /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
   || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
   {
     char *outptr = tmpbuf;
     size_t outsize = tmpbufsize;
     size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);

     if (res == (size_t)(-1))
       return -1;
     count += outptr - tmpbuf;
   }
# endif
   length = count;
 }

 if (length == 0)
   {
     *lengthp = 0;
     return 0;
   }
 result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length));
 if (result == NULL)
   {
     errno = ENOMEM;
     return -1;
   }
 *resultp = result;
 *lengthp = length;

 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
   || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 /* Return to the initial state.  */
 iconv (cd, NULL, NULL, NULL, NULL);
# endif

 /* Do the conversion for real.  */
 {
   const char *inptr = src;
   size_t insize = srclen;
   char *outptr = result;
   size_t outsize = length;

   while (insize > 0)
     {
       size_t res = iconv (cd,
                           (ICONV_CONST char **) &inptr, &insize,
                           &outptr, &outsize);

       if (res == (size_t)(-1))
         {
           if (errno == EINVAL)
             break;
           else
             return -1;
         }
# if !defined _LIBICONV_VERSION && !defined __GLIBC__
       /* Irix iconv() inserts a NUL byte if it cannot convert.
          NetBSD iconv() inserts a question mark if it cannot convert.
          Only GNU libiconv and GNU libc are known to prefer to fail rather
          than doing a lossy conversion.  */
       else if (res > 0)
         {
           errno = EILSEQ;
           return -1;
         }
# endif
     }
   /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
   || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
   {
     size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);

     if (res == (size_t)(-1))
       return -1;
   }
# endif
   if (outsize != 0)
     abort ();
 }

 return 0;
# undef tmpbufsize
}

char *
str_cd_iconv (const char *src, iconv_t cd)
{
 /* For most encodings, a trailing NUL byte in the input will be converted
    to a trailing NUL byte in the output.  But not for UTF-7.  So that this
    function is usable for UTF-7, we have to exclude the NUL byte from the
    conversion and add it by hand afterwards.  */
# if PROBABLY_SLOWER

 char *result = NULL;
 size_t length;
 int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
 char *final_result;

 if (retval < 0)
   {
     if (result != NULL)
       {
         int saved_errno = errno;
         free (result);
         errno = saved_errno;
       }
     return NULL;
   }

 /* Add the terminating NUL byte.  */
 final_result =
   (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
 if (final_result == NULL)
   {
     if (result != NULL)
       free (result);
     errno = ENOMEM;
     return NULL;
   }
 final_result[length] = '\0';

 return final_result;

# else

 char *result;
 size_t result_size;
 size_t length;
 const char *inptr = src;
 size_t inbytes_remaining = strlen (src);

 /* Make a guess for the worst-case output size, in order to avoid a
    realloc.  It's OK if the guess is wrong as long as it is not zero and
    doesn't lead to an integer overflow.  */
 result_size = inbytes_remaining;
 {
   size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
   if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
     result_size *= MB_LEN_MAX;
 }
 result_size += 1; /* for the terminating NUL */

 result = (char *) malloc (result_size);
 if (result == NULL)
   {
     errno = ENOMEM;
     return NULL;
   }

 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
   || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 /* Set to the initial state.  */
 iconv (cd, NULL, NULL, NULL, NULL);
# endif

 /* Do the conversion.  */
 {
   char *outptr = result;
   size_t outbytes_remaining = result_size - 1;

   for (;;)
     {
       /* Here inptr + inbytes_remaining = src + strlen (src),
               outptr + outbytes_remaining = result + result_size - 1.  */
       size_t res = iconv (cd,
                           (ICONV_CONST char **) &inptr, &inbytes_remaining,
                           &outptr, &outbytes_remaining);

       if (res == (size_t)(-1))
         {
           if (errno == EINVAL)
             break;
           else if (errno == E2BIG)
             {
               size_t used = outptr - result;
               size_t newsize = result_size * 2;
               char *newresult;

               if (!(newsize > result_size))
                 {
                   errno = ENOMEM;
                   goto failed;
                 }
               newresult = (char *) realloc (result, newsize);
               if (newresult == NULL)
                 {
                   errno = ENOMEM;
                   goto failed;
                 }
               result = newresult;
               result_size = newsize;
               outptr = result + used;
               outbytes_remaining = result_size - 1 - used;
             }
           else
             goto failed;
         }
# if !defined _LIBICONV_VERSION && !defined __GLIBC__
       /* Irix iconv() inserts a NUL byte if it cannot convert.
          NetBSD iconv() inserts a question mark if it cannot convert.
          Only GNU libiconv and GNU libc are known to prefer to fail rather
          than doing a lossy conversion.  */
       else if (res > 0)
         {
           errno = EILSEQ;
           goto failed;
         }
# endif
       else
         break;
     }
   /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
   || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
   for (;;)
     {
       /* Here outptr + outbytes_remaining = result + result_size - 1.  */
       size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);

       if (res == (size_t)(-1))
         {
           if (errno == E2BIG)
             {
               size_t used = outptr - result;
               size_t newsize = result_size * 2;
               char *newresult;

               if (!(newsize > result_size))
                 {
                   errno = ENOMEM;
                   goto failed;
                 }
               newresult = (char *) realloc (result, newsize);
               if (newresult == NULL)
                 {
                   errno = ENOMEM;
                   goto failed;
                 }
               result = newresult;
               result_size = newsize;
               outptr = result + used;
               outbytes_remaining = result_size - 1 - used;
             }
           else
             goto failed;
         }
       else
         break;
     }
# endif

   /* Add the terminating NUL byte.  */
   *outptr++ = '\0';

   length = outptr - result;
 }

 /* Give away unused memory.  */
 if (length < result_size)
   {
     char *smaller_result = (char *) realloc (result, length);

     if (smaller_result != NULL)
       result = smaller_result;
   }

 return result;

failed:
 {
   int saved_errno = errno;
   free (result);
   errno = saved_errno;
   return NULL;
 }

# endif
}

#endif

char *
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
{
 if (c_strcasecmp (from_codeset, to_codeset) == 0)
   return strdup (src);
 else
   {
#if HAVE_ICONV
     iconv_t cd;
     char *result;

     /* Avoid glibc-2.1 bug with EUC-KR.  */
# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
     if (c_strcasecmp (from_codeset, "EUC-KR") == 0
         || c_strcasecmp (to_codeset, "EUC-KR") == 0)
       {
         errno = EINVAL;
         return NULL;
       }
# endif
     cd = iconv_open (to_codeset, from_codeset);
     if (cd == (iconv_t) -1)
       return NULL;

     result = str_cd_iconv (src, cd);

     if (result == NULL)
       {
         /* Close cd, but preserve the errno from str_cd_iconv.  */
         int saved_errno = errno;
         iconv_close (cd);
         errno = saved_errno;
       }
     else
       {
         if (iconv_close (cd) < 0)
           {
             /* Return NULL, but free the allocated memory, and while doing
                that, preserve the errno from iconv_close.  */
             int saved_errno = errno;
             free (result);
             errno = saved_errno;
             return NULL;
           }
       }
     return result;
#else
     /* This is a different error code than if iconv_open existed but didn't
        support from_codeset and to_codeset, so that the caller can emit
        an error message such as
          "iconv() is not supported. Installing GNU libiconv and
           then reinstalling this package would fix this."  */
     errno = ENOSYS;
     return NULL;
#endif
   }
}