* * * * *
99 ways to program a hex, Part 22: C89, const correctness, assertive, system
calls, full buffering
So yesterday [1] I presented a non-portable version that was quite a bit
faster than the portable version, but I'm not quite done yet. That version
just buffered a line at a time—today's version buffers nearly 8k (Kilobyte)
worth of data (it's not exact, but it's close enough) between calls to
write().
> /*************************************************************************
> *
> * Copyright 2012 by Sean Conner. All Rights Reserved.
> *
> * This program is free software; you can redistribute it and/or
> * modify it under the terms of the GNU General Public License
> * as published by the Free Software Foundation; either version 2
> * of the License, or (at your option) any later version.
> *
> * This program is distributed in the hope that it will be useful,
> * but WITHOUT ANY WARRANTY; without even the implied warranty of
> * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> * GNU General Public License for more details.
> *
> * You should have received a copy of the GNU General Public License
> * along with this program; if not, write to the Free Software
> * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> *
> * Comments, questions and criticisms can be sent to:
[email protected]
> *
> *************************************************************************/
>
> /* Style: C89, const correctness, assertive, system calls, full buffering */
>
> #include <stdlib.h>
> #include <string.h>
> #include <errno.h>
> #include <assert.h>
>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
> #include <unistd.h>
>
> #define LINESIZE 16
>
> /********************************************************************/
>
> extern const char *sys_errlist[];
> extern int sys_nerr;
>
> static void do_dump (const int,const int);
> static size_t dump_line (char **const,unsigned char *,size_t,const unsigned long);
> static void hexout (char *const,unsigned long,size_t,const int);
> static void myperror (const char *const);
> static size_t myread (const int,char *,size_t);
> static void mywrite (const int,const char *const,const size_t);
>
> /********************************************************************/
>
> int main(const int argc,const char *const argv[])
> {
> if (argc == 1)
> do_dump(STDIN_FILENO,STDOUT_FILENO);
> else
> {
> int i;
>
> for (i = 1 ; i < argc ; i++)
> {
> int fhin;
>
> fhin = open(argv[i],O_RDONLY);
> if (fhin == -1)
> {
> myperror(argv[i]);
> continue;
> }
>
> mywrite(STDOUT_FILENO,"-----",5);
> mywrite(STDOUT_FILENO,argv[i],strlen(argv[i]));
> mywrite(STDOUT_FILENO,"-----\n",6);
>
> do_dump(fhin,STDOUT_FILENO);
> if (close(fhin) < 0)
> myperror(argv[i]);
> }
> }
>
> return EXIT_SUCCESS;
> }
>
> /************************************************************************/
>
> static void do_dump(const int fhin,const int fhout)
> {
> unsigned char buffer[4096];
> char outbuffer[75 * 109];
> char *pout;
> unsigned long off;
> size_t bytes;
> size_t count;
>
> assert(fhin >= 0);
> assert(fhout >= 0);
>
> memset(outbuffer,' ',sizeof(outbuffer));
> off = 0;
> count = 0;
> pout = outbuffer;
>
> while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0)
> {
> unsigned char *p = buffer;
>
> for (p = buffer ; bytes > 0 ; )
> {
> size_t amount;
>
> amount = dump_line(&pout,p,bytes,off);
> p += amount;
> bytes -= amount;
> off += amount;
> count++;
>
> if (count == 109)
> {
> mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
> memset(outbuffer,' ',sizeof(outbuffer));
> count = 0;
> pout = outbuffer;
> }
> }
> }
>
> if ((size_t)(pout - outbuffer) > 0)
> mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
> }
>
> /********************************************************************/
>
> static size_t dump_line(
> char **const pline,
> unsigned char *p,
> size_t bytes,
> const unsigned long off
> )
> {
> char *line;
> char *dh;
> char *da;
> size_t count;
>
> assert(pline != NULL);
> assert(*pline != NULL);
> assert(p != NULL);
> assert(bytes > 0);
>
> line = *pline;
>
> hexout(line,off,8,':');
> if (bytes > LINESIZE)
> bytes = LINESIZE;
>
> p += bytes;
> dh = &line[10 + bytes * 3];
> da = &line[58 + bytes];
>
> for (count = 0 ; count < bytes ; count++)
> {
> p --;
> da --;
> dh -= 3;
>
> if ((*p >= ' ') && (*p <= '~'))
> *da = *p;
> else
> *da = '.';
>
> hexout(dh,(unsigned long)*p,2,' ');
> }
>
> line[58 + count] = '\n';
> *pline = &line[59 + count];
> return count;
> }
>
> /**********************************************************************/
>
> static void hexout(char *const dest,unsigned long value,size_t size,const int padding)
> {
> assert(dest != NULL);
> assert(size > 0);
> assert((padding >= ' ') && (padding <= '~'));
>
> dest[size] = padding;
> while(size--)
> {
> dest[size] = (char)((value & 0x0F) + '0');
> if (dest[size] > '9') dest[size] += 7;
> value >>= 4;
> }
> }
>
> /************************************************************************/
>
> static void myperror(const char *const s)
> {
> int err = errno;
>
> assert(s != NULL);
>
> mywrite(STDERR_FILENO,s,strlen(s));
> mywrite(STDERR_FILENO,": ",2);
>
> if (err > sys_nerr)
> mywrite(STDERR_FILENO,"(unknown)",9);
> else
> mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err]));
> mywrite(STDERR_FILENO,"\n",1);
> }
>
> /************************************************************************/
>
> static size_t myread(const int fh,char *buf,size_t size)
> {
> size_t amount = 0;
>
> assert(fh >= 0);
> assert(buf != NULL);
> assert(size > 0);
>
> while(size > 0)
> {
> ssize_t bytes;
>
> bytes = read(fh,buf,size);
> if (bytes < 0)
> {
> myperror("read()");
> exit(EXIT_FAILURE);
> }
> if (bytes == 0)
> break;
>
> amount += bytes;
> size -= bytes;
> buf += bytes;
> }
>
> return amount;
> }
>
> /*********************************************************************/
>
> static void mywrite(const int fh,const char *const msg,const size_t size)
> {
> assert(fh >= 0);
> assert(msg != NULL);
> assert(size > 0);
>
> if (write(fh,msg,size) < (ssize_t)size)
> {
> if (fh != STDERR_FILENO)
> myperror("output");
>
> exit(EXIT_FAILURE);
> }
> }
>
> /***********************************************************************/
>
And that makes all the difference. The portable vesrsion [2]:
> [spc]lucy:~/projects/99/src>time ./12 ~/bin/firefox/libxul.so >/dev/null
>
> real 0m4.985s
> user 0m4.969s
> sys 0m0.015s
>
Our first stab at a non-portable, but possibly faster version [3]:
> [spc]lucy:~/projects/99/src>time ./20 ~/bin/firefox/libxul.so >/dev/null
>
> real 0m2.936s
> user 0m1.511s
> sys 0m1.425s
>
The “it's quite a bit faster” version [4]:
> [spc]lucy:~/projects/99/src>time ./21 ~/bin/firefox/libxul.so >/dev/null
>
> real 0m0.957s
> user 0m0.645s
> sys 0m0.313s
>
And finally, the punchline—today's version:
> [spc]lucy:~/projects/99/src>time ./22 ~/bin/firefox/libxul.so >/dev/null
>
> real 0m0.460s
> user 0m0.448s
> sys 0m0.012s
>
And yes, that's the real output—1/10 the time of the portable version with a
similar amount of time in the kernel.
Frankly, I was a bit surprised at these results—not that the non-portable
version was faster (that's almost a given) but the magnitude of the results.
I didn't think the standard C library had that much overhead. I was expecting
easily a percentage increase in speed, but even twice would have been
unexpected, but ten times faster?
Wow.
Increasing the size of the buffer past what I have probably won't help all
that much, and in fact, when I doubled the buffer size:
> [spc]lucy:~/projects/99/src>time ./22 ~/bin/firefox/libxul.so >/dev/null
>
> real 0m0.592s
> user 0m0.582s
> sys 0m0.010s
>
The timing difference could be due to cache effects, maybe?
So I think we've maxed out the speed at which this program will run. As a
test, I profiled the code to see if there was anything I migh have missed:
> Each sample counts as 0.01 seconds.
> % cumulative self self total
> time seconds seconds calls ms/call ms/call name
> 100.21 0.41 0.41 1 410.86 410.86 do_dump
> 0.00 0.41 0.00 9197 0.00 0.00 mywrite
>
I checked the code GCC [5] produced (all code was compiled with -O3, a very
high level of optimization) and well, I'm not sure I could have done much
better, and probably would have done worse—GCC inlined everything into
do_dump() (with the exception of main() and mywrite()), something I would not
have done in assembly (and have any hope of code reuse for another project).
So I think we're done with making this code fast.
That's not to say I won't do an assembly version of this program, but it
probably won't be for the x86 line.
* Part 21: C89, const correctness, assertive, system calls, per line
buffering [6]
* Part 23: C89, const correctness, assertive, system calls, full buffering,
lookup table [7]
[1]
gopher://gopher.conman.org/0Phlog:2012/01/29.1
[2]
gopher://gopher.conman.org/0Phlog:2012/01/20.2
[3]
gopher://gopher.conman.org/0Phlog:2012/01/28.1
[4]
gopher://gopher.conman.org/0Phlog:2012/01/29.1
[5]
http://gcc.gnu.org/
[6]
gopher://gopher.conman.org/0Phlog:2012/01/29.1
[7]
gopher://gopher.conman.org/0Phlog:2012/01/31.2
Email author at
[email protected]