* * * * *

            99 ways to program a hex, Part 24: more lookup tables

So we went from a character encoding specific version [1] to a character
encoding agnostic version [2] to today's version—another character encoding
specific version (ASCII (American Standard Code for Information Interchange)
[3] to be exact). But today's version also eliminates a branch point in the
code, using a 256-element string to pick which character to display as part
of the hexidecimal dump.

> /*************************************************************************
> *
> * Copyright 2012 by Sean Conner.  All Rights Reserved.
> *
> * This program is free software; you can redistribute it and/or
> * modify it under the terms of the GNU General Public License
> * as published by the Free Software Foundation; either version 2
> * of the License, or (at your option) any later version.
> *
> * This program is distributed in the hope that it will be useful,
> * but WITHOUT ANY WARRANTY; without even the implied warranty of
> * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> * GNU General Public License for more details.
> *
> * You should have received a copy of the GNU General Public License
> * along with this program; if not, write to the Free Software
> * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
> *
> * Comments, questions and criticisms can be sent to: [email protected]
> *
> *************************************************************************/
>
> /* Style: C89, const correctness, assertive, system calls, full buffering */
> /*      lookup tables */
>
> #include <stdlib.h>
> #include <string.h>
> #include <errno.h>
> #include <assert.h>
>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
> #include <unistd.h>
>
> #define LINESIZE      16
>
> /********************************************************************/
>
> extern const char *sys_errlist[];
> extern int         sys_nerr;
>
> static void   do_dump         (const int,const int);
> static size_t dump_line       (char **const,unsigned char *,size_t,const unsigned long);
> static void   hexout          (char *const,unsigned long,size_t,const int);
> static void   myperror        (const char *const);
> static size_t myread          (const int,char *,size_t);
> static void   mywrite         (const int,const char *const,const size_t);
>
> /********************************************************************/
>
> int main(const int argc,const char *const argv[])
> {
>   if (argc == 1)
>     do_dump(STDIN_FILENO,STDOUT_FILENO);
>   else
>   {
>     int i;
>
>     for (i = 1 ; i < argc ; i++)
>     {
>       int fhin;
>
>       fhin = open(argv[i],O_RDONLY);
>       if (fhin == -1)
>       {
>         myperror(argv[i]);
>         continue;
>       }
>
>       mywrite(STDOUT_FILENO,"-----",5);
>       mywrite(STDOUT_FILENO,argv[i],strlen(argv[i]));
>       mywrite(STDOUT_FILENO,"-----\n",6);
>
>       do_dump(fhin,STDOUT_FILENO);
>       if (close(fhin) < 0)
>         myperror(argv[i]);
>     }
>   }
>
>   return EXIT_SUCCESS;
> }
>
> /************************************************************************/
>
> static void do_dump(const int fhin,const int fhout)
> {
>   unsigned char  buffer[4096];
>   char           outbuffer[75 * 109];
>   char          *pout;
>   unsigned long  off;
>   size_t         bytes;
>   size_t         count;
>
>   assert(fhin  >= 0);
>   assert(fhout >= 0);
>
>   memset(outbuffer,' ',sizeof(outbuffer));
>   off      = 0;
>   count    = 0;
>   pout     = outbuffer;
>
>   while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0)
>   {
>     unsigned char *p = buffer;
>
>     for (p = buffer ; bytes > 0 ; )
>     {
>       size_t amount;
>
>       amount    = dump_line(&pout,p,bytes,off);
>       p        += amount;
>       bytes    -= amount;
>       off      += amount;
>       count++;
>
>       if (count == 109)
>       {
>         mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
>         memset(outbuffer,' ',sizeof(outbuffer));
>         count    = 0;
>         pout     = outbuffer;
>       }
>     }
>   }
>
>   if ((size_t)(pout - outbuffer) > 0)
>     mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
> }
>
> /********************************************************************/
>
> static size_t dump_line(
>       char                **const pline,
>       unsigned char              *p,
>       size_t                      bytes,
>       const unsigned long         off
> )
> {
>   char   *line;
>   char   *dh;
>   char   *da;
>   size_t  count;
>
>   assert(pline  != NULL);
>   assert(*pline != NULL);
>   assert(p      != NULL);
>   assert(bytes  >  0);
>
>   line = *pline;
>
>   hexout(line,off,8,':');
>   if (bytes > LINESIZE)
>     bytes = LINESIZE;
>
>   p  += bytes;
>   dh  = &line[10 + bytes * 3];
>   da  = &line[58 + bytes];
>
>   for (count = 0 ; count < bytes ; count++)
>   {
>     p  --;
>     da --;
>     dh -= 3;
>
>     *da = "................................ !\"#$%&'()*+,-./0123456789:;<=>?"
>       "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~."
>       "................................................................"
>       "........................................................"
>       "........"[*p];
>
>     hexout(dh,(unsigned long)*p,2,' ');
>   }
>
>   line[58 + count] = '\n';
>   *pline = &line[59 + count];
>   return count;
> }
>
> /**********************************************************************/
>
> static void hexout(char *const dest,unsigned long value,size_t size,const int padding)
> {
>   assert(dest != NULL);
>   assert(size >  0);
>   assert((padding >= ' ') && (padding <= '~'));
>
>   dest[size] = padding;
>   while(size--)
>   {
>     dest[size] = "0123456789ABCDEF"[value & 0x0f];
>     value >>= 4;
>   }
> }
>
> /************************************************************************/
>
> static void myperror(const char *const s)
> {
>   int err = errno;
>
>   assert(s != NULL);
>
>   mywrite(STDERR_FILENO,s,strlen(s));
>   mywrite(STDERR_FILENO,": ",2);
>
>   if (err > sys_nerr)
>     mywrite(STDERR_FILENO,"(unknown)",9);
>   else
>     mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err]));
>   mywrite(STDERR_FILENO,"\n",1);
> }
>
> /************************************************************************/
>
> static size_t myread(const int fh,char *buf,size_t size)
> {
>   size_t amount = 0;
>
>   assert(fh   >= 0);
>   assert(buf  != NULL);
>   assert(size >  0);
>
>   while(size > 0)
>   {
>     ssize_t bytes;
>
>     bytes = read(fh,buf,size);
>     if (bytes < 0)
>     {
>       myperror("read()");
>       exit(EXIT_FAILURE);
>     }
>     if (bytes == 0)
>       break;
>
>     amount += bytes;
>     size   -= bytes;
>     buf    += bytes;
>   }
>
>   return amount;
> }
>
> /*********************************************************************/
>
> static void mywrite(const int fh,const char *const msg,const size_t size)
> {
>   assert(fh   >= 0);
>   assert(msg  != NULL);
>   assert(size >  0);
>
>   if (write(fh,msg,size) < (ssize_t)size)
>   {
>     if (fh != STDERR_FILENO)
>       myperror("output");
>
>     exit(EXIT_FAILURE);
>   }
> }
>
> /***********************************************************************/
>

And it is faster:

> [spc]lucy:~/projects/99/src>time ./23 ~/bin/firefox/libxul.so >/dev/null
>
> real    0m0.258s
> user    0m0.247s
> sys     0m0.011s
> [spc]lucy:~/projects/99/src>time ./24 ~/bin/firefox/libxul.so >/dev/null
>
> real    0m0.186s
> user    0m0.178s
> sys     0m0.008s
>

About 1.3 times faster, but it is faster.

The conversion string is fixed, but that doesn't preclude a port to, say, an
EBCIDIC (Extended Binary Coded Decimal Interchange Code) [4] system from
using a different one, or the string being constructed at run time. The
runtime generation would be more portable, but to me, that's wasted time
spent generating a string that will always be the same (and frankly, if we're
using this hack for speed, that's just wasted time).

Perhaps better might be several such strings, ASCII (American Standard Code
for Information Interchange), EBCIDIC (Extended Binary Coded Decimal
Interchange Code), Baudot [5], PETSCII (Personal Electronic Transactor
Standard Code of Information Interchange) [6] and select via a command line
option which one to use (defaulting to whatever character set is native for
the platform the program is running on). It could be a useful thing.

But such a modification I'm leaving as an exercise for the reader.

Now, is this the fastest version possible? I'm not going to say yes this
time. There might be something else that could be done to wring that last bit
of performance out of this code, but at this point, I am definitely done with
wringing out the speed.

I think.

* Part 23: C89, const correctness, assertive, system calls, full buffering,
 lookup table [7]
* Part 25: C♯ [8]

[1] gopher://gopher.conman.org/0Phlog:2012/01/30.1
[2] gopher://gopher.conman.org/0Phlog:2012/01/31.2
[3] http://en.wikipedia.org/wiki/ASCII
[4] http://en.wikipedia.org/wiki/EBCDIC
[5] http://en.wikipedia.org/wiki/Baudot_code
[6] http://en.wikipedia.org/wiki/PETSCII
[7] gopher://gopher.conman.org/0Phlog:2012/01/31.2
[8] gopher://gopher.conman.org/0Phlog:2012/02/02.1

Email author at [email protected]