* * * * *

 99 ways to program a hex, Part 20: C89, const correctness, assertive, system
                                    calls

When last we left the C versions [1], we pretty much hit the limit of what we
could do using the standard C library to remain portable (well, we did use a
GCC extenstion [2]). Not much else we can do, unless we want to leave the
Land of Portability™ and start hitting some system specific calls.

So, that's what this version does—it eschews the use of the standard C
library (except for exit(), errno and memset()—while I could replace this
with my own version, C compilers can and will produce better optimized
versions [3] than I can write) and goes straight for the system calls.

This means I will have to write my own code to convert binary to hexidecimal,
but I've written such code plenty of times before.

> /*************************************************************************
> *
> * Copyright 2012 by Sean Conner.  All Rights Reserved.
> *
> * This program is free software; you can redistribute it and/or
> * modify it under the terms of the GNU General Public License
> * as published by the Free Software Foundation; either version 2
> * of the License, or (at your option) any later version.
> *
> * This program is distributed in the hope that it will be useful,
> * but WITHOUT ANY WARRANTY; without even the implied warranty of
> * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> * GNU General Public License for more details.
> *
> * You should have received a copy of the GNU General Public License
> * along with this program; if not, write to the Free Software
> * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
> *
> * Comments, questions and criticisms can be sent to: [email protected]
> *
> *************************************************************************/
>
> /* Style: C89, const correctness, assertive, system calls */
>
> #include <stdlib.h>
> #include <string.h>
> #include <errno.h>
> #include <assert.h>
>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
> #include <unistd.h>
>
> #define LINESIZE      16
>
> /********************************************************************/
>
> extern const char *sys_errlist[];
> extern int         sys_nerr;
>
> static void   do_dump         (const int,const int);
> static size_t dump_line       (const int,unsigned char *,size_t,const unsigned long);
> static void   hexout          (char *,unsigned long,size_t,const int);
> static void   myperror        (const char *const);
> static size_t myread          (const int,char *,size_t);
> static void   mywrite         (const int,const char *const,const size_t);
>
> /********************************************************************/
>
> int main(const int argc,const char *const argv[])
> {
>   if (argc == 1)
>     do_dump(STDIN_FILENO,STDOUT_FILENO);
>   else
>   {
>     int i;
>
>     for (i = 1 ; i < argc ; i++)
>     {
>       int fhin;
>
>       fhin = open(argv[i],O_RDONLY);
>       if (fhin == -1)
>       {
>         myperror(argv[i]);
>         continue;
>       }
>
>       mywrite(STDOUT_FILENO,"-----",5);
>       mywrite(STDOUT_FILENO,argv[i],strlen(argv[i]));
>       mywrite(STDOUT_FILENO,"-----\n",6);
>
>       do_dump(fhin,STDOUT_FILENO);
>       if (close(fhin) < 0)
>         myperror(argv[i]);
>     }
>   }
>
>   return 0;
> }
>
> /************************************************************************/
>
> static void do_dump(const int fhin,const int fhout)
> {
>   unsigned char buffer[4096];
>   unsigned long off;
>   size_t        bytes;
>
>   assert(fhin  >= 0);
>   assert(fhout >= 0);
>
>   off = 0;
>
>   while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0)
>   {
>     unsigned char *p = buffer;
>
>     for (p = buffer ; bytes > 0 ; )
>     {
>       size_t amount;
>
>       amount = dump_line(fhout,p,bytes,off);
>       p     += amount;
>       bytes -= amount;
>       off   += amount;
>     }
>   }
> }
>
> /********************************************************************/
>
> static size_t dump_line(
>       const int            fhout,
>       unsigned char       *p,
>       size_t               bytes,
>       const unsigned long  off
> )
> {
>   size_t count;
>   char   addr [9];
>   char   hex  [LINESIZE * 3];
>   char   ascii[LINESIZE];
>   char  *dh;
>   char  *da;
>
>   assert(fhout >= 0);
>   assert(p     != NULL);
>   assert(bytes >  0);
>
>   memset(hex   ,' ',sizeof(hex));
>   memset(ascii,' ',sizeof(hex));
>
>   hexout(addr,off,8,':');
>   if (bytes > LINESIZE)
>     bytes = LINESIZE;
>
>   p  += bytes;
>   dh  = &hex[bytes * 3];
>   da  = &ascii[bytes];
>
>   assert(addr[8] == ':');
>   assert(bytes <= LINESIZE);
>   assert(dh == &hex  [bytes * 3]);
>   assert(da == &ascii[bytes]);
>
>   for (count = 0 ; (count < bytes) && (count < LINESIZE) ; count++)
>   {
>     p  --;
>     da --;
>     dh -= 3;
>
>     if ((*p >= ' ') && (*p <= '~'))
>       *da = *p;
>     else
>       *da = '.';
>
>     hexout(dh,(unsigned long)*p,2,' ');
>   }
>
>   assert(dh == hex);
>   assert(da == ascii);
>
>   mywrite(fhout,addr,sizeof(addr));
>   mywrite(fhout," ",1);
>   mywrite(fhout,hex,sizeof(hex));
>   mywrite(fhout,ascii,count);
>   mywrite(fhout,"\n",1);
>
>   return count;
> }
>
> /**********************************************************************/
>
> static void hexout(char *dest,unsigned long value,size_t size,const int padding)
> {
>   assert(dest != NULL);
>   assert(size >  0);
>   assert((padding >= ' ') && (padding <= '~'));
>
>   dest[size] = padding;
>   while(size--)
>   {
>     dest[size] = (char)((value & 0x0F) + '0');
>     if (dest[size] > '9') dest[size] += 7;
>     value >>= 4;
>   }
> }
>
> /************************************************************************/
>
> static void myperror(const char *const s)
> {
>   int err = errno;
>
>   assert(s != NULL);
>
>   mywrite(STDERR_FILENO,s,strlen(s));
>   mywrite(STDERR_FILENO,": ",2);
>
>   if (err > sys_nerr)
>     mywrite(STDERR_FILENO,"(unknown)",9);
>   else
>     mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err]));
>   mywrite(STDERR_FILENO,"\n",1);
> }
>
> /************************************************************************/
>
> static size_t myread(const int fh,char *buf,size_t size)
> {
>   size_t amount = 0;
>
>   assert(fh   >= 0);
>   assert(buf  != NULL);
>   assert(size >  0);
>
>   while(size > 0)
>   {
>     ssize_t bytes;
>
>     bytes = read(fh,buf,size);
>     if (bytes < 0)
>     {
>       myperror("read()");
>       exit(EXIT_FAILURE);
>     }
>     if (bytes == 0)
>       break;
>
>     amount += bytes;
>     size   -= bytes;
>     buf    += bytes;
>   }
>
>   return amount;
> }
>
> /*********************************************************************/
>
> static void mywrite(const int fh,const char *const msg,const size_t size)
> {
>   assert(fh   >= 0);
>   assert(msg  != NULL);
>   assert(size >  0);
>
>   if (write(fh,msg,size) < (ssize_t)size)
>   {
>     if (fh != STDERR_FILENO)
>       myperror("output");
>
>     exit(EXIT_FAILURE);
>   }
> }
>
> /***********************************************************************/
>

The major trick here is that I generate the output for each line backwards! I
do that because it's easier to generate the hexidecimal output that way.
Generating the hexidecimal output “forwards” would mean I need to rotate the
first four bits down into position (so with a 32-bit value, I would need to
shift the bits down 28 positions), then generate the hex digit, then rotate
the next four bits down 24 positions, but by then, I'm doing repeated rotates
and discarding all the work I did previously for each digit. And if I only
want to work with 8 bits, I have to have another special function do handle
that, or complicate one function to handle multiple number of bits.

But by going backwards, I start with the last four bits, which are already in
the “proper position” to generate a digit, then shift everthing down four
bits, and keep repeating this until the specified number of hexidecimal
digits are produced.

So, while the amount of code goes up, it is faster than the more portable
version:

> [spc]lucy:~/projects/99/src>time ./12 ~/bin/firefox/libxul.so >/dev/null
>
> real    0m4.985s
> user    0m4.969s
> sys     0m0.015s
> [spc]lucy:~/projects/99/src>time ./20 ~/bin/firefox/libxul.so >/dev/null
>
> real    0m2.936s
> user    0m1.511s
> sys     0m1.425s
>

It's almost twice as fast, yet it spends a disturbingly large amount of time
(compared to the portable version) in the kernel. It's because of all the
calls to write() I do. That's a problem I'll attack in the next version.

* Part 19: Lua, recursion, closure as callback [4]
* Part 21: C89, const correctness, assertive, system calls, per line
 buffering [5]

[1] gopher://gopher.conman.org/0Phlog:2012/01/20.2
[2] http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
[3] http://prog21.dadgum.com/9.html
[4] gopher://gopher.conman.org/0Phlog:2012/01/27.1
[5] gopher://gopher.conman.org/0Phlog:2012/01/29.1

Email author at [email protected]