/*
* This is a file system benchmark which attempts to study bottlenecks -
* it is named 'Bonnie' for semi-obvious reasons.
*
* Specifically, these are the types of filesystem activity that have been
* observed to be bottlenecks in I/O-intensive applications, in particular
* the text database work done in connection with the New Oxford English
* Dictionary Project at the University of Waterloo.
*
* It performs a series of tests on a file of known size. By default, that
* size is 100 Mb (but that's not enough - see below). For each test, Bonnie
* reports the bytes processed per elapsed second, per CPU second, and the
* % CPU usage (user and system).
*
* In each case, an attempt is made to keep optimizers from noticing it's
* all bogus. The idea is to make sure that these are real transfers to/from
* user space to the physical disk. The tests are:
*
* 1. Sequential Output
*
* 1.1 Per-Character. The file is written using the putc() stdio macro.
* The loop that does the writing should be small enough to fit into any
* reasonable I-cache. The CPU overhead here is that required to do the
* stdio code plus the OS file space allocation.
*
* 1.2 Block. The file is created using write(2). The CPU overhead
* should be just the OS file space allocation.
*
* 1.3 Rewrite. Each BUFSIZ of the file is read with read(2), dirtied, and
* rewritten with write(2), requiring an lseek(2). Since no space
* allocation is done, and the I/O is well-localized, this should test the
* effectiveness of the filesystem cache and the speed of data transfer.
*
* 2. Sequential Input
*
* 2.1 Per-Character. The file is read using the getc() stdio macro. Once
* again, the inner loop is small. This should exercise only stdio and
* sequential input.
*
* 2.2 Block. The file is read using read(2). This should be a very pure
* test of sequential input performance.
*
* 3. Random Seeks
*
* This test runs SeekProcCount processes in parallel, doing a total of
* 4000 lseek()s to locations in the file specified by random() in bsd systems,
* drand48() on sysV systems. In each case, the block is read with read(2).
* In 10% of cases, it is dirtied and written back with write(2).
*
* The idea behind the SeekProcCount processes is to make sure there's always
* a seek queued up.
*
* AXIOM: For any unix filesystem, the effective number of lseek(2) calls
* per second declines asymptotically to near 30, once the effect of
* caching is defeated.
*
* The size of the file has a strong nonlinear effect on the results of
* this test. Many Unix systems that have the memory available will make
* aggressive efforts to cache the whole thing, and report random I/O rates
* in the thousands per second, which is ridiculous. As an extreme
* example, an IBM RISC 6000 with 64 Mb of memory reported 3,722 per second
* on a 50 Mb file. Some have argued that bypassing the cache is artificial
* since the cache is just doing what it's designed to. True, but in any
* application that requires rapid random access to file(s) significantly
* larger than main memory which is running on a system which is doing
* significant other work, the caches will inevitably max out. There is
* a hard limit hiding behind the cache which has been observed by the
* author to be of significant import in many situations - what we are trying
* to do here is measure that number.
*
* COPYRIGHT NOTICE:
* Copyright (c) Tim Bray, 1990.
* Everybody is hereby granted rights to use, copy, and modify this program,
* provided only that this copyright notice and the disclaimer below
* are preserved without change.
* DISCLAIMER:
* This program is provided AS IS with no warranty of any kind, and
* The author makes no representation with respect to the adequacy of this
* program for any particular purpose or with respect to its adequacy to
* produce any particular result, and
* The author shall not be liable for loss or damage arising out of
* the use of this program regardless of how sustained, and
* In no event shall the author be liable for special, direct, indirect
* or consequential damage, loss, costs or fees or expenses of any
* nature or kind.
*/
/*
* N.B. in seeker_reports, CPU appears and Start/End time, but not Elapsed,
* so position 1 is re-used; icky data coupling.
*/
#define CPU (0)
#define Elapsed (1)
#define StartTime (1)
#define EndTime (2)
#define Seeks (4000)
#define UpdateSeek (10)
#define SeekProcCount (3)
#define Chunk (8192)
main(argc, argv)
int argc;
char * argv[];
{
int buf[Chunk / IntSize];
int bufindex;
int chars[256];
int child;
char * dir;
int fd;
double first_start;
double last_stop;
int lseek_count = 0;
char name[Chunk];
int next;
int seek_control[2];
int seek_feedback[2];
char seek_tickets[Seeks + SeekProcCount];
double seeker_report[3];
int size;
FILE * stream;
int words;
/* Fill up a file, writing it a char at a time with the stdio putc() call */
fprintf(stderr, "Writing with putc()...");
newfile(name, &fd, &stream, 1);
timestamp();
for (words = 0; words < size; words++)
if (putc(words & 0x7f, stream) == EOF)
io_error("putc");
/*
* note that we always close the file before measuring time, in an
* effort to force as much of the I/O out as we can
*/
if (fclose(stream) == -1)
io_error("fclose after putc");
get_delta_t(Putc);
fprintf(stderr, "done\n");
/* Now read & rewrite it using block I/O. Dirty one word in each block */
newfile(name, &fd, &stream, 0);
if (lseek(fd, (off_t) 0, 0) == (off_t) -1)
io_error("lseek(2) before rewrite");
fprintf(stderr, "Rewriting...");
timestamp();
bufindex = 0;
if ((words = read(fd, (char *) buf, Chunk)) == -1)
io_error("rewrite read");
while (words == Chunk)
{ /* while we can read a block */
if (bufindex == Chunk / IntSize)
bufindex = 0;
buf[bufindex++]++;
if (lseek(fd, (off_t) -words, 1) == -1)
io_error("relative lseek(2)");
if (write(fd, (char *) buf, words) == -1)
io_error("re write(2)");
if ((words = read(fd, (char *) buf, Chunk)) == -1)
io_error("rwrite read");
} /* while we can read a block */
if (close(fd) == -1)
io_error("close after rewrite");
get_delta_t(ReWrite);
fprintf(stderr, "done\n");
/* Write the whole file from scratch, again, with block I/O */
newfile(name, &fd, &stream, 1);
fprintf(stderr, "Writing intelligently...");
for (words = 0; words < Chunk / IntSize; words++)
buf[words] = 0;
timestamp();
for (words = bufindex = 0; words < (size / Chunk); words++)
{ /* for each word */
if (bufindex == (Chunk / IntSize))
bufindex = 0;
buf[bufindex++]++;
if (write(fd, (char *) buf, Chunk) == -1)
io_error("write(2)");
} /* for each word */
if (close(fd) == -1)
io_error("close after fast write");
get_delta_t(FastWrite);
fprintf(stderr, "done\n");
/* read them all back with getc() */
newfile(name, &fd, &stream, 0);
for (words = 0; words < 256; words++)
chars[words] = 0;
fprintf(stderr, "Reading with getc()...");
timestamp();
for (words = 0; words < size; words++)
{ /* for each byte */
if ((next = getc(stream)) == EOF)
io_error("getc(3)");
/* just to fool optimizers */
chars[next]++;
} /* for each byte */
if (fclose(stream) == -1)
io_error("fclose after getc");
get_delta_t(Getc);
fprintf(stderr, "done\n");
/* use the frequency count */
for (words = 0; words < 256; words++)
sprintf((char *) buf, "%d", chars[words]);
/* Now suck it in, Chunk at a time, as fast as we can */
newfile(name, &fd, &stream, 0);
if (lseek(fd, (off_t) 0, 0) == -1)
io_error("lseek before read");
fprintf(stderr, "Reading intelligently...");
timestamp();
do
{ /* per block */
if ((words = read(fd, (char *) buf, Chunk)) == -1)
io_error("read(2)");
chars[buf[abs(buf[0]) % (Chunk / IntSize)] & 0x7f]++;
} /* per block */
while (words);
if (close(fd) == -1)
io_error("close after read");
get_delta_t(FastRead);
fprintf(stderr, "done\n");
/* use the frequency count */
for (words = 0; words < 256; words++)
sprintf((char *) buf, "%d", chars[words]);
/*
* Now test random seeks; first, set up for communicating with children.
* The object of the game is to do "Seeks" lseek() calls as quickly
* as possible. So we'll farm them out among SeekProcCount processes.
* We'll control them by writing 1-byte tickets down a pipe which
* the children all read. We write "Seeks" bytes with val 1, whichever
* child happens to get them does it and the right number of seeks get
* done.
* The idea is that since the write() of the tickets is probably
* atomic, the parent process likely won't get scheduled while the
* children are seeking away. If you draw a picture of the likely
* timelines for three children, it seems likely that the seeks will
* overlap very nicely with the process scheduling with the effect
* that there will *always* be a seek() outstanding on the file.
* Question: should the file be opened *before* the fork, so that
* all the children are lseeking on the same underlying file object?
*/
if (pipe(seek_feedback) == -1 || pipe(seek_control) == -1)
io_error("pipe");
for (next = 0; next < Seeks; next++)
seek_tickets[next] = 1;
for ( ; next < (Seeks + SeekProcCount); next++)
seek_tickets[next] = 0;
/* launch some parallel seek processes */
for (next = 0; next < SeekProcCount; next++)
{ /* for each seek proc */
if ((child = fork()) == -1)
io_error("fork");
else if (child == 0)
{ /* child process */
/* set up and wait for the go-ahead */
close(seek_feedback[0]);
close(seek_control[1]);
newfile(name, &fd, &stream, 0);
srandom(getpid());
fprintf(stderr, "Seeker %d...", next + 1);
/* wait for the go-ahead */
if (read(seek_control[0], seek_tickets, 1) != 1)
io_error("read ticket");
timestamp();
seeker_report[StartTime] = time_so_far();
/* loop until we read a 0 ticket back from our parent */
while(seek_tickets[0])
{ /* until Mom says stop */
doseek((long) (random() % size), fd,
((lseek_count++ % UpdateSeek) == 0));
if (read(seek_control[0], seek_tickets, 1) != 1)
io_error("read ticket");
} /* until Mom says stop */
if (close(fd) == -1)
io_error("close after seek");
/* report to parent */
get_delta_t(Lseek);
seeker_report[EndTime] = time_so_far();
seeker_report[CPU] = delta[(int) Lseek][CPU];
if (write(seek_feedback[1], seeker_report, sizeof(seeker_report))
!= sizeof(seeker_report))
io_error("pipe write");
exit(0);
} /* child process */
} /* for each seek proc */
/*
* Back in the parent; in an effort to ensure the children get an even
* start, wait a few seconds for them to get scheduled, open their
* files & so on.
*/
close(seek_feedback[1]);
close(seek_control[0]);
sleep(5);
fprintf(stderr, "start 'em...");
if (write(seek_control[1], seek_tickets, sizeof(seek_tickets))
!= sizeof(seek_tickets))
io_error("write tickets");
/* read back from children */
for (next = 0; next < SeekProcCount; next++)
{ /* for each child */
if (read(seek_feedback[0], (char *) seeker_report, sizeof(seeker_report))
!= sizeof(seeker_report))
io_error("pipe read");
/*
* each child writes back its CPU, start & end times. The elapsed time
* to do all the seeks is the time the first child started until the
* time the last child stopped
*/
delta[(int) Lseek][CPU] += seeker_report[CPU];
if (next == 0)
{ /* first time */
first_start = seeker_report[StartTime];
last_stop = seeker_report[EndTime];
} /* first time */
else
{ /* not first time */
first_start = (first_start < seeker_report[StartTime]) ?
first_start : seeker_report[StartTime];
last_stop = (last_stop > seeker_report[EndTime]) ?
last_stop : seeker_report[EndTime];
} /* not first time */
if (wait(&child) == -1)
io_error("wait");
fprintf(stderr, "done...");
} /* for each child */
fprintf(stderr, "\n");
delta[(int) Lseek][Elapsed] = last_stop - first_start;
/*
* Do a typical-of-something random I/O. Any serious application that
* has a random I/O bottleneck is going to be smart enough to operate
* in a page mode, and not stupidly pull individual words out at
* odd offsets. To keep the cache from getting too clever, some
* pages must be updated. However an application that updated each of
* many random pages that it looked at is hard to imagine.
* However, it would be wrong to put the update percentage in as a
* parameter - the effect is too nonlinear. Need a profile
* of what Oracle or Ingres or some such actually does.
* Be warned - there is a *sharp* elbow in this curve - on a 1-Mb file,
* most substantial unix systems show >2000 random I/Os per second -
* obviously they've cached the whole thing and are just doing buffer
* copies.
*/
static void
doseek(where, fd, update)
long where;
int fd;
int update;
{
int buf[Chunk / IntSize];
off_t probe;
int size;
probe = (where / Chunk) * Chunk;
if (lseek(fd, probe, 0) != probe)
io_error("lseek in doseek");
if ((size = read(fd, (char *) buf, Chunk)) == -1)
io_error("read in doseek");
/* every so often, update a block */
if (update)
{ /* update this block */
/* touch a word */
buf[((int) random() % (size/IntSize - 2)) + 1]--;
if (lseek(fd, (long) probe, 0) != probe)
io_error("lseek in doseek update");
if (write(fd, (char *) buf, size) == -1)
io_error("write in doseek");
} /* update this block */
}
#ifdef SysV
static char randseed[32];
static void
srandom(seed)
int seed;
{
sprintf(randseed, "%06d", seed);
}
static long
random()
{
return nrand48(randseed);
}
#endif