* * * * *

 99 ways to program a hex, Part 22: C89, const correctness, assertive, system
                            calls, full buffering

So yesterday [1] I presented a non-portable version that was quite a bit
faster than the portable version, but I'm not quite done yet. That version
just buffered a line at a time—today's version buffers nearly 8k (Kilobyte)
worth of data (it's not exact, but it's close enough) between calls to
write().

> /*************************************************************************
> *
> * Copyright 2012 by Sean Conner.  All Rights Reserved.
> *
> * This program is free software; you can redistribute it and/or
> * modify it under the terms of the GNU General Public License
> * as published by the Free Software Foundation; either version 2
> * of the License, or (at your option) any later version.
> *
> * This program is distributed in the hope that it will be useful,
> * but WITHOUT ANY WARRANTY; without even the implied warranty of
> * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> * GNU General Public License for more details.
> *
> * You should have received a copy of the GNU General Public License
> * along with this program; if not, write to the Free Software
> * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
> *
> * Comments, questions and criticisms can be sent to: [email protected]
> *
> *************************************************************************/
>
> /* Style: C89, const correctness, assertive, system calls, full buffering */
>
> #include <stdlib.h>
> #include <string.h>
> #include <errno.h>
> #include <assert.h>
>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
> #include <unistd.h>
>
> #define LINESIZE      16
>
> /********************************************************************/
>
> extern const char *sys_errlist[];
> extern int         sys_nerr;
>
> static void   do_dump         (const int,const int);
> static size_t dump_line       (char **const,unsigned char *,size_t,const unsigned long);
> static void   hexout          (char *const,unsigned long,size_t,const int);
> static void   myperror        (const char *const);
> static size_t myread          (const int,char *,size_t);
> static void   mywrite         (const int,const char *const,const size_t);
>
> /********************************************************************/
>
> int main(const int argc,const char *const argv[])
> {
>   if (argc == 1)
>     do_dump(STDIN_FILENO,STDOUT_FILENO);
>   else
>   {
>     int i;
>
>     for (i = 1 ; i < argc ; i++)
>     {
>       int fhin;
>
>       fhin = open(argv[i],O_RDONLY);
>       if (fhin == -1)
>       {
>         myperror(argv[i]);
>         continue;
>       }
>
>       mywrite(STDOUT_FILENO,"-----",5);
>       mywrite(STDOUT_FILENO,argv[i],strlen(argv[i]));
>       mywrite(STDOUT_FILENO,"-----\n",6);
>
>       do_dump(fhin,STDOUT_FILENO);
>       if (close(fhin) < 0)
>         myperror(argv[i]);
>     }
>   }
>
>   return EXIT_SUCCESS;
> }
>
> /************************************************************************/
>
> static void do_dump(const int fhin,const int fhout)
> {
>   unsigned char  buffer[4096];
>   char           outbuffer[75 * 109];
>   char          *pout;
>   unsigned long  off;
>   size_t         bytes;
>   size_t         count;
>
>   assert(fhin  >= 0);
>   assert(fhout >= 0);
>
>   memset(outbuffer,' ',sizeof(outbuffer));
>   off      = 0;
>   count    = 0;
>   pout     = outbuffer;
>
>   while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0)
>   {
>     unsigned char *p = buffer;
>
>     for (p = buffer ; bytes > 0 ; )
>     {
>       size_t amount;
>
>       amount    = dump_line(&pout,p,bytes,off);
>       p        += amount;
>       bytes    -= amount;
>       off      += amount;
>       count++;
>
>       if (count == 109)
>       {
>         mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
>         memset(outbuffer,' ',sizeof(outbuffer));
>         count    = 0;
>         pout     = outbuffer;
>       }
>     }
>   }
>
>   if ((size_t)(pout - outbuffer) > 0)
>     mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
> }
>
> /********************************************************************/
>
> static size_t dump_line(
>       char                **const pline,
>       unsigned char              *p,
>       size_t                      bytes,
>       const unsigned long         off
> )
> {
>   char   *line;
>   char   *dh;
>   char   *da;
>   size_t  count;
>
>   assert(pline  != NULL);
>   assert(*pline != NULL);
>   assert(p      != NULL);
>   assert(bytes  >  0);
>
>   line = *pline;
>
>   hexout(line,off,8,':');
>   if (bytes > LINESIZE)
>     bytes = LINESIZE;
>
>   p  += bytes;
>   dh  = &line[10 + bytes * 3];
>   da  = &line[58 + bytes];
>
>   for (count = 0 ; count < bytes ; count++)
>   {
>     p  --;
>     da --;
>     dh -= 3;
>
>     if ((*p >= ' ') && (*p <= '~'))
>       *da = *p;
>     else
>       *da = '.';
>
>     hexout(dh,(unsigned long)*p,2,' ');
>   }
>
>   line[58 + count] = '\n';
>   *pline = &line[59 + count];
>   return count;
> }
>
> /**********************************************************************/
>
> static void hexout(char *const dest,unsigned long value,size_t size,const int padding)
> {
>   assert(dest != NULL);
>   assert(size >  0);
>   assert((padding >= ' ') && (padding <= '~'));
>
>   dest[size] = padding;
>   while(size--)
>   {
>     dest[size] = (char)((value & 0x0F) + '0');
>     if (dest[size] > '9') dest[size] += 7;
>     value >>= 4;
>   }
> }
>
> /************************************************************************/
>
> static void myperror(const char *const s)
> {
>   int err = errno;
>
>   assert(s != NULL);
>
>   mywrite(STDERR_FILENO,s,strlen(s));
>   mywrite(STDERR_FILENO,": ",2);
>
>   if (err > sys_nerr)
>     mywrite(STDERR_FILENO,"(unknown)",9);
>   else
>     mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err]));
>   mywrite(STDERR_FILENO,"\n",1);
> }
>
> /************************************************************************/
>
> static size_t myread(const int fh,char *buf,size_t size)
> {
>   size_t amount = 0;
>
>   assert(fh   >= 0);
>   assert(buf  != NULL);
>   assert(size >  0);
>
>   while(size > 0)
>   {
>     ssize_t bytes;
>
>     bytes = read(fh,buf,size);
>     if (bytes < 0)
>     {
>       myperror("read()");
>       exit(EXIT_FAILURE);
>     }
>     if (bytes == 0)
>       break;
>
>     amount += bytes;
>     size   -= bytes;
>     buf    += bytes;
>   }
>
>   return amount;
> }
>
> /*********************************************************************/
>
> static void mywrite(const int fh,const char *const msg,const size_t size)
> {
>   assert(fh   >= 0);
>   assert(msg  != NULL);
>   assert(size >  0);
>
>   if (write(fh,msg,size) < (ssize_t)size)
>   {
>     if (fh != STDERR_FILENO)
>       myperror("output");
>
>     exit(EXIT_FAILURE);
>   }
> }
>
> /***********************************************************************/
>

And that makes all the difference. The portable vesrsion [2]:

> [spc]lucy:~/projects/99/src>time ./12 ~/bin/firefox/libxul.so >/dev/null
>
> real    0m4.985s
> user    0m4.969s
> sys     0m0.015s
>

Our first stab at a non-portable, but possibly faster version [3]:

> [spc]lucy:~/projects/99/src>time ./20 ~/bin/firefox/libxul.so >/dev/null
>
> real    0m2.936s
> user    0m1.511s
> sys     0m1.425s
>

The “it's quite a bit faster” version [4]:

> [spc]lucy:~/projects/99/src>time ./21 ~/bin/firefox/libxul.so >/dev/null
>
> real    0m0.957s
> user    0m0.645s
> sys     0m0.313s
>

And finally, the punchline—today's version:

> [spc]lucy:~/projects/99/src>time ./22 ~/bin/firefox/libxul.so >/dev/null
>
> real    0m0.460s
> user    0m0.448s
> sys     0m0.012s
>

And yes, that's the real output—1/10 the time of the portable version with a
similar amount of time in the kernel.

Frankly, I was a bit surprised at these results—not that the non-portable
version was faster (that's almost a given) but the magnitude of the results.
I didn't think the standard C library had that much overhead. I was expecting
easily a percentage increase in speed, but even twice would have been
unexpected, but ten times faster?

Wow.

Increasing the size of the buffer past what I have probably won't help all
that much, and in fact, when I doubled the buffer size:

> [spc]lucy:~/projects/99/src>time ./22 ~/bin/firefox/libxul.so >/dev/null
>
> real    0m0.592s
> user    0m0.582s
> sys     0m0.010s
>

The timing difference could be due to cache effects, maybe?

So I think we've maxed out the speed at which this program will run. As a
test, I profiled the code to see if there was anything I migh have missed:

> Each sample counts as 0.01 seconds.
>   %   cumulative   self              self     total
>  time   seconds   seconds    calls  ms/call  ms/call  name
> 100.21      0.41     0.41        1   410.86   410.86  do_dump
>   0.00      0.41     0.00     9197     0.00     0.00  mywrite
>

I checked the code GCC [5] produced (all code was compiled with -O3, a very
high level of optimization) and well, I'm not sure I could have done much
better, and probably would have done worse—GCC inlined everything into
do_dump() (with the exception of main() and mywrite()), something I would not
have done in assembly (and have any hope of code reuse for another project).
So I think we're done with making this code fast.

That's not to say I won't do an assembly version of this program, but it
probably won't be for the x86 line.

* Part 21: C89, const correctness, assertive, system calls, per line
 buffering [6]
* Part 23: C89, const correctness, assertive, system calls, full buffering,
 lookup table [7]

[1] gopher://gopher.conman.org/0Phlog:2012/01/29.1
[2] gopher://gopher.conman.org/0Phlog:2012/01/20.2
[3] gopher://gopher.conman.org/0Phlog:2012/01/28.1
[4] gopher://gopher.conman.org/0Phlog:2012/01/29.1
[5] http://gcc.gnu.org/
[6] gopher://gopher.conman.org/0Phlog:2012/01/29.1
[7] gopher://gopher.conman.org/0Phlog:2012/01/31.2

Email author at [email protected]