/*
* file: utfcode.c
*
* (c) Peter Kleiweg 2000
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2,
* or (at your option) any later version.
*/

#define UTFcodeVERSION "1.0"

#ifdef __MSDOS__
#ifndef __COMPACT__
#error Memory model COMPACT required
#endif  /* __COMPACT__  */
#include <dir.h>
#endif  /* __MSDOS__  */
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int
   obits = 0,
   utf7 = 1,
   utf8 = 1,
   nr;

unsigned
   octal;

long unsigned
   *lu;

char
   *programname,
   *no_mem_buffer,
   out_of_memory [] = "Out of memory";

void
   utf_7 (void),
   utf_8 (void),
   outbyte (unsigned i),
   outoct (void),
   get_programname (char const *argv0),
   errit (char const *format, ...),
   syntax (void),
   *s_malloc (size_t size),
   *s_realloc (void *block, size_t size);
char
   *s_strdup (char const *s);

int main (int argc, char *argv [])
{
   int
       i;

   no_mem_buffer = (char *) malloc (1024);

   get_programname (argv [0]);

   while (argc > 1) {
       if (! strcmp (argv [1], "-7")) {
           utf7 = 1;
           utf8 = 0;
       } else if (! strcmp (argv [1], "-8")) {
           utf7 = 0;
           utf8 = 1;
       } else
           break;
       argv++;
       argc--;
   }

   if (argc == 1)
       syntax ();

   nr = argc - 1;

   lu = (long unsigned *) s_malloc (nr * sizeof (long unsigned));
   for (i = 0; i < nr; i++) {
       if ((argv [i + 1][0] == 'U' || argv [i + 1][0] == 'u') && argv [i + 1][1] == '+') {
           argv [i + 1][0] = '0';
           argv [i + 1][1] = 'x';
       }
       lu [i] = strtoul (argv [i + 1], NULL, 0);
   }

   if (utf7)
       utf_7 ();

   if (utf8)
       utf_8 ();

   return 0;
}

void utf_7 ()
{
   int
       i;

   fputc ('+', stdout);
   for (i = 0; i < nr; i++)
       if (lu [i] < 0x10000) {
           outbyte (lu [i] >> 8);
           outbyte (lu [i] & 0xFF);
       } else
           errit ("Too large for UTF-7: 0x%lX", lu [i]);

   if (obits)
       outoct ();

   fputs ("-\n", stdout);
}

void outbyte (unsigned u)
{
   switch (obits) {
       case 0:
           octal = (u >> 2);
           outoct ();
           octal = ((u & 0x03) << 4);
           obits = 2;
           break;
       case 2:
           octal |= (u >> 4);
           outoct ();
           octal = ((u & 0x0F) << 2);
           obits = 4;
           break;
       case 4:
           octal |= (u >> 6);
           outoct ();
           octal = (u & 0x3F);
           outoct ();
           obits = 0;
           break;
   }
}

void outoct ()
{
   char
       c;

   if (octal < 26)
       c = octal + 'A';
   else if (octal < 52)
       c = octal - 26 + 'a';
   else if (octal < 62)
       c = octal - 52 + '0';
   else if (octal == 62)
       c = '+';
   else
       c = '/';
   fputc (c, stdout);
}

void utf_8 ()
{
   int
       i;

   for (i = 0; i < nr; i++) {
       /* 1 byte */
       if (lu [i] < 0x80)
           fputc (lu [i], stdout);

       /* 2 bytes */
       else if (lu [i] < 0x800) {
           fputc (0xC0 | (lu [i] >> 6), stdout);
           fputc (0x80 | (lu [i] & 0x3F), stdout);
       }

       /* 3 bytes */
       else if (lu [i] < 0x10000) {
           fputc (0xE0 | (lu [i] >> 12), stdout);
           fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
           fputc (0x80 | (lu [i] & 0x3F), stdout);
       }

       /* 4 bytes */
       else if (lu [i] < 0x200000) {
           fputc (0xF0 | (lu [i] >> 18), stdout);
           fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout);
           fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
           fputc (0x80 | (lu [i] & 0x3F), stdout);
       }

       /* 5 bytes */
       else if (lu [i] < 0x4000000) {
           fputc (0xF8 | (lu [i] >> 24), stdout);
           fputc (0x80 | ((lu [i] >> 18) & 0x3F), stdout);
           fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout);
           fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
           fputc (0x80 | (lu [i] & 0x3F), stdout);
       }

       /* 6 bytes */
       else if (lu [i] < 0x80000000) {
           fputc (0xFC | (lu [i] >> 30), stdout);
           fputc (0x80 | ((lu [i] >> 24) & 0x3F), stdout);
           fputc (0x80 | ((lu [i] >> 18) & 0x3F), stdout);
           fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout);
           fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
           fputc (0x80 | (lu [i] & 0x3F), stdout);
       } else
           errit ("Too large for UTF-8: 0x%lX", lu [i]);
   }
   fputc ('\n', stdout);
}

void errit (char const *format, ...)
{
   va_list
       list;

   fprintf (stderr, "\nError %s: ", programname);

   va_start (list, format);
   vfprintf (stderr, format, list);

   fprintf (stderr, "\n\n");

   exit (1);
}

void get_programname (char const *argv0)
{
#ifdef __MSDOS__
   char
       name [MAXFILE];
   fnsplit (argv0, NULL, NULL, name, NULL);
   programname = strdup (name);
#else   /* unix */
   char
       *p;
   p = strrchr (argv0, '/');
   if (p)
       programname = strdup (p + 1);
   else
       programname = strdup (argv0);
#endif
}

void *s_malloc (size_t size)
{
   void
       *p;

   p = malloc (size);
   if (! p) {
       free (no_mem_buffer);
       errit (out_of_memory);
   }
   return p;
}

void *s_realloc (void *block, size_t size)
{
   void
       *p;

   p = realloc (block, size);
   if (! p) {
       free (no_mem_buffer);
       errit (out_of_memory);
   }
   return p;
}

char *s_strdup (char const *s)
{
   char
       *s1;

   if (s) {
       s1 = (char *) s_malloc (strlen (s) + 1);
       strcpy (s1, s);
   } else {
       s1 = (char *) s_malloc (1);
       s1 [0] = '\0';
   }
   return s1;
}

void syntax ()
{
   fprintf (
       stderr,
       "\n"
       "This is utfcode, version " UTFcodeVERSION "\n"
       "\n"
       "Usage: %s [-7|-8] [number...]\n"
       "\n"
       " -7: utf-7 only\n"
       " -8: utf-8 only\n"
       "\n",
       programname
   );
   exit (1);
}