/*
* Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
*      The Regents of the University of California.  All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
*    must display the following acknowledgement:
*      This product includes software developed by the Computer Systems
*      Engineering Group at Lawrence Berkeley Laboratory.
* 4. Neither the name of the University nor of the Laboratory may be used
*    to endorse or promote products derived from this software without
*    specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/

/*
* Utilities for message formatting used both by libpcap and rpcapd.
*/

#include <config.h>

#include "ftmacros.h"

#include <stddef.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>

#include "pcap-int.h"

#include "portability.h"

#include "fmtutils.h"

#ifdef _WIN32
#include "charconv.h"
#endif

/*
* Set the encoding.
*/
#ifdef _WIN32
/*
* True if we should use UTF-8.
*/
static int use_utf_8;

void
pcapint_fmt_set_encoding(unsigned int opts)
{
       if (opts == PCAP_CHAR_ENC_UTF_8)
               use_utf_8 = 1;
}
#else
void
pcapint_fmt_set_encoding(unsigned int opts _U_)
{
       /*
        * Nothing to do here.
        */
}
#endif

#ifdef _WIN32
/*
* Convert a null-terminated UTF-16LE string to UTF-8, putting it into
* a buffer starting at the specified location and stopping if we go
* past the specified size.  This will only put out complete UTF-8
* sequences.
*
* We do this ourselves because Microsoft doesn't offer a "convert and
* stop at a UTF-8 character boundary if we run out of space" routine.
*/
#define IS_LEADING_SURROGATE(c) \
       ((c) >= 0xd800 && (c) < 0xdc00)
#define IS_TRAILING_SURROGATE(c) \
       ((c) >= 0xdc00 && (c) < 0xe000)
#define SURROGATE_VALUE(leading, trailing) \
       (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
#define REPLACEMENT_CHARACTER   0x0FFFD

static char *
utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
   size_t utf_8_len)
{
       wchar_t c, c2;
       uint32_t uc;

       if (utf_8_len == 0) {
               /*
                * Not even enough room for a trailing '\0'.
                * Don't put anything into the buffer.
                */
               return (utf_8);
       }

       while ((c = *utf_16++) != '\0') {
               if (IS_LEADING_SURROGATE(c)) {
                       /*
                        * Leading surrogate.  Must be followed by
                        * a trailing surrogate.
                        */
                       c2 = *utf_16;
                       if (c2 == '\0') {
                               /*
                                * Oops, string ends with a lead
                                * surrogate.  Try to drop in
                                * a REPLACEMENT CHARACTER, and
                                * don't move the string pointer,
                                * so on the next trip through
                                * the loop we grab the terminating
                                * '\0' and quit.
                                */
                               uc = REPLACEMENT_CHARACTER;
                       } else {
                               /*
                                * OK, we can consume this 2-octet
                                * value.
                                */
                               utf_16++;
                               if (IS_TRAILING_SURROGATE(c2)) {
                                       /*
                                        * Trailing surrogate.
                                        * This calculation will,
                                        * for c being a leading
                                        * surrogate and c2 being
                                        * a trailing surrogate,
                                        * produce a value between
                                        * 0x100000 and 0x10ffff,
                                        * so it's always going to be
                                        * a valid Unicode code point.
                                        */
                                       uc = SURROGATE_VALUE(c, c2);
                               } else {
                                       /*
                                        * Not a trailing surrogate;
                                        * try to drop in a
                                        * REPLACEMENT CHARACTER.
                                        */
                                       uc = REPLACEMENT_CHARACTER;
                               }
                       }
               } else {
                       /*
                        * Not a leading surrogate.
                        */
                       if (IS_TRAILING_SURROGATE(c)) {
                               /*
                                * Trailing surrogate without
                                * a preceding leading surrogate.
                                * Try to drop in a REPLACEMENT
                                * CHARACTER.
                                */
                               uc = REPLACEMENT_CHARACTER;
                       } else {
                               /*
                                * This is a valid BMP character;
                                * drop it in.
                                */
                               uc = c;
                       }
               }

               /*
                * OK, uc is a valid Unicode character; how
                * many bytes worth of UTF-8 does it require?
                */
               if (uc < 0x0080) {
                       /* 1 byte. */
                       if (utf_8_len < 2) {
                               /*
                                * Not enough room for that byte
                                * plus a trailing '\0'.
                                */
                               break;
                       }
                       *utf_8++ = (char)uc;
                       utf_8_len--;
               } else if (uc < 0x0800) {
                       /* 2 bytes. */
                       if (utf_8_len < 3) {
                               /*
                                * Not enough room for those bytes
                                * plus a trailing '\0'.
                                */
                               break;
                       }
                       *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
                       *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
                       utf_8_len -= 2;
               } else if (uc < 0x010000) {
                       /* 3 bytes. */
                       if (utf_8_len < 4) {
                               /*
                                * Not enough room for those bytes
                                * plus a trailing '\0'.
                                */
                               break;
                       }
                       *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
                       *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
                       *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
                       utf_8_len -= 3;
               } else {
                       /* 4 bytes. */
                       if (utf_8_len < 5) {
                               /*
                                * Not enough room for those bytes
                                * plus a trailing '\0'.
                                */
                               break;
                       }
                       *utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
                       *utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
                       *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
                       *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
                       utf_8_len -= 3;
               }
       }

       /*
        * OK, we have enough room for (at least) a trailing '\0'.
        * (We started out with enough room, thanks to the test
        * for a zero-length buffer at the beginning, and if
        * there wasn't enough room for any character we wanted
        * to put into the buffer *plus* a trailing '\0',
        * we'd have quit before putting it into the buffer,
        * and thus would have left enough room for the trailing
        * '\0'.)
        *
        * Drop it in.
        */
       *utf_8 = '\0';

       /*
        * Return a pointer to the terminating '\0', in case we
        * want to drop something in after that.
        */
       return (utf_8);
}
#endif /* _WIN32 */

/*
* Generate an error message based on a format, arguments, and an
* errno, with a message for the errno after the formatted output.
*/
void
pcapint_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
   const char *fmt, ...)
{
       va_list ap;

       va_start(ap, fmt);
       pcapint_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
       va_end(ap);
}

void
pcapint_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
   const char *fmt, va_list ap)
{
       size_t msglen;
       char *p;
       size_t errbuflen_remaining;

       (void)vsnprintf(errbuf, errbuflen, fmt, ap);
       msglen = strlen(errbuf);

       /*
        * Do we have enough space to append ": "?
        * Including the terminating '\0', that's 3 bytes.
        */
       if (msglen + 3 > errbuflen) {
               /* No - just give them what we've produced. */
               return;
       }
       p = errbuf + msglen;
       errbuflen_remaining = errbuflen - msglen;
       *p++ = ':';
       *p++ = ' ';
       *p = '\0';
       errbuflen_remaining -= 2;

       /*
        * Now append the string for the error code.
        */
#if defined(HAVE__WCSERROR_S)
       /*
        * We have a Windows-style _wcserror_s().
        * Generate a UTF-16LE error message.
        */
       wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
       errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
       if (err != 0) {
               /*
                * It doesn't appear to be documented anywhere obvious
                * what the error returns from _wcserror_s().
                */
               snprintf(p, errbuflen_remaining, "Error %d", errnum);
               return;
       }

       /*
        * Now convert it from UTF-16LE to UTF-8, dropping it in the
        * remaining space in the buffer, and truncating it - cleanly,
        * on a UTF-8 character boundary - if it doesn't fit.
        */
       utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);

       /*
        * Now, if we're not in UTF-8 mode, convert errbuf to the
        * local code page.
        */
       if (!use_utf_8)
               utf_8_to_acp_truncated(errbuf);
#else
       /*
        * Either Windows without _wcserror_s() or not Windows.  Let pcap_strerror()
        * solve the non-UTF-16 part of this problem space.
        */
       snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
#endif
}

#ifdef _WIN32
/*
* Generate an error message based on a format, arguments, and a
* Win32 error, with a message for the Win32 error after the formatted output.
*/
void
pcapint_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
   const char *fmt, ...)
{
       va_list ap;

       va_start(ap, fmt);
       pcapint_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
       va_end(ap);
}

void
pcapint_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
   const char *fmt, va_list ap)
{
       size_t msglen;
       char *p;
       size_t errbuflen_remaining;
       DWORD retval;
       wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
       size_t utf_8_len;

       vsnprintf(errbuf, errbuflen, fmt, ap);
       msglen = strlen(errbuf);

       /*
        * Do we have enough space to append ": "?
        * Including the terminating '\0', that's 3 bytes.
        */
       if (msglen + 3 > errbuflen) {
               /* No - just give them what we've produced. */
               return;
       }
       p = errbuf + msglen;
       errbuflen_remaining = errbuflen - msglen;
       *p++ = ':';
       *p++ = ' ';
       *p = '\0';
       msglen += 2;
       errbuflen_remaining -= 2;

       /*
        * Now append the string for the error code.
        *
        * XXX - what language ID to use?
        *
        * For UN*Xes, pcap_strerror() may or may not return localized
        * strings.
        *
        * We currently don't have localized messages for libpcap, but
        * we might want to do so.  On the other hand, if most of these
        * messages are going to be read by libpcap developers and
        * perhaps by developers of libpcap-based applications, English
        * might be a better choice, so the developer doesn't have to
        * get the message translated if it's in a language they don't
        * happen to understand.
        */
       retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
           NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
           utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
       if (retval == 0) {
               /*
                * Failed.
                */
               snprintf(p, errbuflen_remaining,
                   "Couldn't get error message for error (%lu)", errnum);
               return;
       }

       /*
        * Now convert it from UTF-16LE to UTF-8.
        */
       p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);

       /*
        * Now append the error number, if it fits.
        */
       utf_8_len = p - errbuf;
       errbuflen_remaining -= utf_8_len;
       if (utf_8_len == 0) {
               /* The message was empty. */
               snprintf(p, errbuflen_remaining, "(%lu)", errnum);
       } else
               snprintf(p, errbuflen_remaining, " (%lu)", errnum);

       /*
        * Now, if we're not in UTF-8 mode, convert errbuf to the
        * local code page.
        */
       if (!use_utf_8)
               utf_8_to_acp_truncated(errbuf);
}
#endif