Introduction
Introduction Statistics Contact Development Disclaimer Help
tassorted changes from Plan 9 - plan9port - [fork] Plan 9 from user space
git clone git://src.adamsgaard.dk/plan9port
Log
Files
Refs
README
LICENSE
---
commit 28b49df3542a635cca788f3de213385f3fcb6334
parent 686bd37d9d8db5e3b969a3aa2d5b455e0976b262
Author: rsc <devnull@localhost>
Date: Tue, 18 Jul 2006 15:26:33 +0000
assorted changes from Plan 9
Diffstat:
M src/cmd/venti/srv/arena.c | 40 +++++++++++++++++++++++------…
M src/cmd/venti/srv/arenas.c | 6 +++---
M src/cmd/venti/srv/bloom.c | 62 ++++++++++++++++++++++++-----…
M src/cmd/venti/srv/buildbuck.c | 6 +++---
M src/cmd/venti/srv/buildindex.c | 1018 +++++++++++++++++++++++++++--…
M src/cmd/venti/srv/checkindex.c | 4 +++-
M src/cmd/venti/srv/clump.c | 7 ++++++-
M src/cmd/venti/srv/conv.c | 66 +++++++++++++++++++++++++++++…
M src/cmd/venti/srv/dat.h | 42 +++++++++++++++++++----------…
M src/cmd/venti/srv/dcache.c | 64 +++++++++++++++++++++--------…
A src/cmd/venti/srv/disksched.c | 88 +++++++++++++++++++++++++++++…
M src/cmd/venti/srv/findscore.c | 2 +-
A src/cmd/venti/srv/fixarenas.c | 1894 +++++++++++++++++++++++++++++…
M src/cmd/venti/srv/fns.h | 9 +++++++++
M src/cmd/venti/srv/graph.c | 16 ++++++++++------
M src/cmd/venti/srv/httpd.c | 219 ++++++++++++++++++++++++++++-…
M src/cmd/venti/srv/icache.c | 50 ++++++++++++++++++++++++++---…
M src/cmd/venti/srv/icachewrite.c | 36 +++++++++++++++++++++--------…
M src/cmd/venti/srv/index.c | 22 ++++++++++------------
M src/cmd/venti/srv/lump.c | 27 ++++++++++++++++++++++++---
M src/cmd/venti/srv/lumpcache.c | 13 +++++++++++--
M src/cmd/venti/srv/lumpqueue.c | 16 ----------------
A src/cmd/venti/srv/mirrorarenas.c | 464 ++++++++++++++++++++++++++++++
M src/cmd/venti/srv/mkfile | 3 +++
M src/cmd/venti/srv/part.c | 260 +++++++++++++++++++++++++++++…
A src/cmd/venti/srv/printarenapart.c | 160 +++++++++++++++++++++++++++++…
M src/cmd/venti/srv/printarenas.c | 2 +-
M src/cmd/venti/srv/sortientry.c | 17 +++++------------
M src/cmd/venti/srv/stats.c | 2 +-
M src/cmd/venti/srv/syncarena.c | 21 +++++++++++----------
M src/cmd/venti/srv/syncindex.c | 2 ++
M src/cmd/venti/srv/syncindex0.c | 16 +++++++++++++++-
M src/cmd/venti/srv/unwhack.c | 2 +-
M src/cmd/venti/srv/utils.c | 5 +++++
M src/cmd/venti/srv/venti.c | 6 ++++--
M src/cmd/venti/srv/verifyarena.c | 220 +++++++++++++++++++++++++----…
M src/cmd/venti/srv/wrarena.c | 4 ++--
M src/cmd/venti/srv/zblock.c | 6 ++++--
M src/cmd/venti/srv/zeropart.c | 4 ----
39 files changed, 4540 insertions(+), 361 deletions(-)
---
diff --git a/src/cmd/venti/srv/arena.c b/src/cmd/venti/srv/arena.c
t@@ -20,6 +20,7 @@ static void sumproc(void *);
static QLock sumlock;
static Rendez sumwait;
static ASum *sumq;
+static ASum *sumqtail;
static uchar zero[8192];
int arenasumsleeptime;
t@@ -257,7 +258,6 @@ writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
if(m > n - nn)
m = n - nn;
memmove(&b->data[off], &clbuf[nn], m);
- /* ok = writepart(arena->part, a, b->data, blocksize); */
ok = 0;
putdblock(b);
if(ok < 0){
t@@ -329,7 +329,6 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int s…
if(m > n - nn)
m = n - nn;
memmove(&b->data[off], &clbuf[nn], m);
- /* ok = writepart(arena->part, a, b->data, blocksize); */
ok = 0;
putdblock(b);
if(ok < 0){
t@@ -356,6 +355,7 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int s…
arena->ctime = arena->wtime;
writeclumpinfo(arena, clump, &c->info);
+ wbarena(arena);
/* set up for call to setdcachestate */
as.arena = arena;
t@@ -410,6 +410,9 @@ setatailstate(AState *as)
trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa…
+ /*
+ * Look up as->arena to find index.
+ */
ix = mainindex;
for(i=0; i<ix->narenas; i++)
if(ix->arenas[i] == as->arena)
t@@ -419,6 +422,9 @@ setatailstate(AState *as)
return;
}
+ /*
+ * Walk backward until we find the last time these were in sync.
+ */
for(j=i; --j>=0; ){
a = ix->arenas[j];
if(atailcmp(&a->diskstats, &a->memstats) == 0)
t@@ -464,8 +470,12 @@ backsumarena(Arena *arena)
return;
qlock(&sumlock);
as->arena = arena;
- as->next = sumq;
- sumq = as;
+ as->next = nil;
+ if(sumq)
+ sumqtail->next = as;
+ else
+ sumq = as;
+ sumqtail = as;
rwakeup(&sumwait);
qunlock(&sumlock);
}
t@@ -499,6 +509,7 @@ sumarena(Arena *arena)
DigestState s;
u64int a, e;
u32int bs;
+ int t;
u8int score[VtScoreSize];
bs = MaxIoSize;
t@@ -512,7 +523,12 @@ sumarena(Arena *arena)
b = alloczblock(bs, 0, arena->part->blocksize);
e = arena->base + arena->size;
for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a +…
- sleep(arenasumsleeptime);
+ disksched();
+ while((t=arenasumsleeptime) == SleepForever){
+ sleep(1000);
+ disksched();
+ }
+ sleep(t);
if(a + bs > e)
bs = arena->blocksize;
if(readpart(arena->part, a, b->data, bs) < 0)
t@@ -595,7 +611,7 @@ wbarenahead(Arena *arena)
b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
if(b == nil){
logerr(EAdmin, "can't write arena header: %r");
-/*/ZZZ add error message? */
+/* ZZZ add error message? */
return -1;
}
/*
t@@ -681,18 +697,22 @@ okarena(Arena *arena)
ok = 0;
dsize = arenadirsize(arena, arena->diskstats.clumps);
if(arena->diskstats.used + dsize > arena->size){
- seterr(ECorrupt, "arena used > size");
+ seterr(ECorrupt, "arena %s used > size", arena->name);
ok = -1;
}
if(arena->diskstats.cclumps > arena->diskstats.clumps)
- logerr(ECorrupt, "arena has more compressed clumps than total …
+ logerr(ECorrupt, "arena %s has more compressed clumps than tot…
+ /*
+ * This need not be true if some of the disk is corrupted.
+ *
if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + ar…
- logerr(ECorrupt, "arena uncompressed size inconsistent with us…
+ logerr(ECorrupt, "arena %s uncompressed size inconsistent with…
+ */
if(arena->ctime > arena->wtime)
- logerr(ECorrupt, "arena creation time after last write time");
+ logerr(ECorrupt, "arena %s creation time after last write time…
return ok;
}
diff --git a/src/cmd/venti/srv/arenas.c b/src/cmd/venti/srv/arenas.c
t@@ -214,7 +214,7 @@ wbarenapart(ArenaPart *ap)
return -1;
b = alloczblock(HeadSize, 1, 0);
if(b == nil)
-/*ZZZ set error message? */
+/* ZZZ set error message? */
return -1;
if(packarenapart(ap, b->data) < 0){
t@@ -337,8 +337,8 @@ wbarenamap(AMap *am, int n, Part *part, u64int base, u64in…
/*
* amap: n '\n' amapelem * n
* n: u32int
- * amapelem: name '\t' astart '\t' asize '\n'
- * astart, asize: u64int
+ * amapelem: name '\t' astart '\t' astop '\n'
+ * astart, astop: u64int
*/
int
parseamap(IFile *f, AMapN *amn)
diff --git a/src/cmd/venti/srv/bloom.c b/src/cmd/venti/srv/bloom.c
t@@ -7,6 +7,8 @@
#include "dat.h"
#include "fns.h"
+int ignorebloom;
+
int
bloominit(Bloom *b, vlong vsize, u8int *data)
{
t@@ -24,6 +26,7 @@ bloominit(Bloom *b, vlong vsize, u8int *data)
if(unpackbloomhead(b, data) < 0)
return -1;
+fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash);
b->mask = b->size-1;
b->data = data;
return 0;
t@@ -38,11 +41,7 @@ wbbloomhead(Bloom *b)
Bloom*
readbloom(Part *p)
{
- int i, n;
- uint ones;
uchar buf[512];
- uchar *data;
- u32int *a;
Bloom *b;
b = vtmallocz(sizeof *b);
t@@ -52,14 +51,40 @@ readbloom(Part *p)
vtfree(b);
return nil;
}
+ b->part = p;
+ return b;
+}
+
+int
+resetbloom(Bloom *b)
+{
+ uchar *data;
+
data = vtmallocz(b->size);
- if(readpart(p, 0, data, b->size) < 0){
+fprint(2, "bloom data %lud\n", b->size);
+ b->data = data;
+ if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
+ addstat(StatBloomBits, b->size*8-1);
+ else
+ addstat(StatBloomBits, b->size*8);
+ return 0;
+}
+
+int
+loadbloom(Bloom *b)
+{
+ int i, n;
+ uint ones;
+ uchar *data;
+ u32int *a;
+
+ data = vtmallocz(b->size);
+ if(readpart(b->part, 0, data, b->size) < 0){
vtfree(b);
vtfree(data);
- return nil;
+ return -1;
}
b->data = data;
- b->part = p;
a = (u32int*)b->data;
n = b->size/4;
t@@ -73,7 +98,7 @@ readbloom(Part *p)
else
addstat(StatBloomBits, b->size*8);
- return b;
+ return 0;
}
int
t@@ -101,6 +126,8 @@ gethashes(u8int *score, ulong *h)
a ^= *(u32int*)(score+i);
b ^= *(u32int*)(score+i+4);
}
+ if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
+ a ^= *(u32int*)(score+i);
for(i=0; i<BloomMaxHash; i++, a+=b)
h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
}
t@@ -154,14 +181,17 @@ inbloomfilter(Bloom *b, u8int *score)
int r;
uint ms;
- if(b == nil)
+ if(b == nil || b->data == nil)
return 1;
+ if(ignorebloom)
+ return 1;
+
ms = msec();
rlock(&b->lk);
r = _inbloomfilter(b, score);
runlock(&b->lk);
- ms = msec() - ms;
+ ms = ms - msec();
addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
if(r)
addstat(StatBloomMiss, 1);
t@@ -173,7 +203,7 @@ inbloomfilter(Bloom *b, u8int *score)
void
markbloomfilter(Bloom *b, u8int *score)
{
- if(b == nil)
+ if(b == nil || b->data == nil)
return;
rlock(&b->lk);
t@@ -186,14 +216,18 @@ markbloomfilter(Bloom *b, u8int *score)
static void
bloomwriteproc(void *v)
{
+ int ret;
Bloom *b;
-
+
+ threadsetname("bloomwriteproc");
b = v;
for(;;){
recv(b->writechan, 0);
- if(writebloom(b) < 0)
+ if((ret=writebloom(b)) < 0)
fprint(2, "oops! writing bloom: %r\n");
- send(b->writedonechan, 0);
+ else
+ ret = 0;
+ sendul(b->writedonechan, ret);
}
}
diff --git a/src/cmd/venti/srv/buildbuck.c b/src/cmd/venti/srv/buildbuck.c
t@@ -21,7 +21,7 @@ initiestream(Part *part, u64int off, u64int clumps, u32int s…
{
IEStream *ies;
-/*ZZZ out of memory? */
+/* out of memory? */
ies = MKZ(IEStream);
ies->buf = MKN(u8int, size);
ies->epos = ies->buf;
t@@ -61,7 +61,7 @@ peekientry(IEStream *ies)
nn -= n;
if(nn == 0)
return nil;
-/*fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos); */
+//fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos);
if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
seterr(EOk, "can't read sorted index entries: %r");
return nil;
t@@ -101,7 +101,7 @@ buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint ma…
b = peekientry(ies);
if(b == nil)
return TWID32;
-/*fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, i…
+/* fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, …
if(ib->n == 0)
buck = iebuck(ix, b, ib, ies);
else{
diff --git a/src/cmd/venti/srv/buildindex.c b/src/cmd/venti/srv/buildindex.c
t@@ -1,164 +1,936 @@
/*
- * Rebuild the Venti index from scratch.
+ * Rebuild the index from scratch, in place.
*/
-
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
-/*
- * Write a single bucket. Could profit from a big buffer here
- * so that we can absorb sporadic runs of blocks into one write,
- * avoiding disk seeks.
- */
-static int
-writebucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b)
+enum
{
- ISect *is;
+ MinBufSize = 64*1024,
+ MaxBufSize = 4*1024*1024,
+};
- is = ix->sects[indexsect0(ix, buck)];
- if(buck < is->start || buck >= is->stop){
- seterr(EAdmin, "cannot find index section for bucket %lud\n", …
- return -1;
- }
- buck -= is->start;
+int dumb;
+int errors;
+char **isect;
+int nisect;
+int bloom;
+int zero;
-/*
- qlock(&stats.lock);
- stats.indexwrites++;
- qunlock(&stats.lock);
-*/
- packibucket(ib, b->data, is->bucketmagic);
- return writepart(is->part, is->blockbase + ((u64int)buck << is->blockl…
-}
+u32int isectmem;
+u64int totalbuckets;
+u64int totalclumps;
+Channel *arenadonechan;
+Channel *isectdonechan;
+Index *ix;
-static int
-buildindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
-{
- IEStream *ies;
- IBucket ib, zib;
- ZBlock *z, *b;
- u32int next, buck;
- int ok;
- uint nbuck;
- u64int found = 0;
-
-/*ZZZ make buffer size configurable */
- b = alloczblock(ix->blocksize, 0, ix->blocksize);
- z = alloczblock(ix->blocksize, 1, ix->blocksize);
- ies = initiestream(part, off, clumps, 64*1024);
- if(b == nil || z == nil || ies == nil){
- ok = 0;
- goto breakout;
- return -1;
- }
- ok = 0;
- next = 0;
- memset(&ib, 0, sizeof ib);
- ib.data = b->data + IBucketSize;
- zib.data = z->data + IBucketSize;
- zib.n = 0;
- nbuck = 0;
- for(;;){
- buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize);
- found += ib.n;
- if(zero){
- for(; next != buck; next++){
- if(next == ix->buckets){
- if(buck != TWID32){
- fprint(2, "bucket out of range…
- ok = -1;
- }
- goto breakout;
- }
- if(writebucket(ix, next, &zib, z) < 0){
- fprint(2, "can't write zero bucket to …
- ok = -1;
- }
- }
- }
- if(buck >= ix->buckets){
- if(buck == TWID32)
- break;
- fprint(2, "bucket out of range\n");
- ok = -1;
- goto breakout;
- }
- if(writebucket(ix, buck, &ib, b) < 0){
- fprint(2, "bad bucket found=%lld: %r\n", found);
- ok = -1;
- }
- next = buck + 1;
- if(++nbuck%10000 == 0)
- fprint(2, "\t%,d buckets written...\n", nbuck);
- }
-breakout:;
- fprint(2, "wrote index with %lld entries\n", found);
- freeiestream(ies);
- freezblock(z);
- freezblock(b);
- return ok;
-}
+u64int arenaentries;
+u64int skipentries;
+u64int indexentries;
+
+static int shouldprocess(ISect*);
+static void isectproc(void*);
+static void arenapartproc(void*);
void
usage(void)
{
- fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n…
- threadexitsall(0);
+ fprint(2, "usage: buildindex [-b] [-i isect]... [-M imem] venti.conf\n…
+ threadexitsall("usage");
}
-Config conf;
-
void
threadmain(int argc, char *argv[])
{
- Part *part;
- u64int clumps, base;
- u32int bcmem;
- int zero;
-
- zero = 1;
- bcmem = 0;
+ int fd, i, napart;
+ u32int bcmem, imem;
+ Config conf;
+ Part *p;
+
ventifmtinstall();
+ imem = 256*1024*1024;
ARGBEGIN{
- case 'B':
- bcmem = unittoull(ARGF());
+ case 'b':
+ bloom = 1;
+ break;
+ case 'i':
+ isect = vtrealloc(isect, (nisect+1)*sizeof(isect[0]));
+ isect[nisect++] = EARGF(usage());
break;
- case 'Z':
- zero = 0;
+ case 'd': /* debugging - make sure to run all 3 passes */
+ dumb = 1;
+ break;
+ case 'M':
+ imem = unittoull(EARGF(usage()));
break;
default:
usage();
break;
}ARGEND
-
- if(argc != 2)
+
+ if(argc != 1)
usage();
if(initventi(argv[0], &conf) < 0)
sysfatal("can't init venti: %r");
+ ix = mainindex;
+ if(nisect == 0 && ix->bloom)
+ bloom = 1;
+ if(bloom && ix->bloom && resetbloom(ix->bloom) < 0)
+ sysfatal("loadbloom: %r");
+ if(bloom && !ix->bloom)
+ sysfatal("-b specified but no bloom filter");
+ if(!bloom)
+ ix->bloom = nil;
+ isectmem = imem/ix->nsects;
- if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 …
- bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects…
+ /*
+ * safety first - only need read access to arenas
+ */
+ p = nil;
+ for(i=0; i<ix->narenas; i++){
+ if(ix->arenas[i]->part != p){
+ p = ix->arenas[i]->part;
+ if((fd = open(p->filename, OREAD)) < 0)
+ sysfatal("cannot reopen %s: %r", p->filename);
+ dup(fd, p->fd);
+ close(fd);
+ }
+ }
+
+ /*
+ * need a block for every arena
+ */
+ bcmem = maxblocksize * (mainindex->narenas + 16);
if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
initdcache(bcmem);
+
+ totalclumps = 0;
+ for(i=0; i<ix->narenas; i++)
+ totalclumps += ix->arenas[i]->diskstats.clumps;
+
+ totalbuckets = 0;
+ for(i=0; i<ix->nsects; i++)
+ totalbuckets += ix->sects[i]->blocks;
+ fprint(2, "%,lld clumps, %,lld buckets\n", totalclumps, totalbuckets);
+
+ /* start index procs */
+ fprint(2, "%T read index\n");
+ isectdonechan = chancreate(sizeof(void*), 0);
+ for(i=0; i<ix->nsects; i++){
+ if(shouldprocess(ix->sects[i]))
+ ix->sects[i]->writechan = chancreate(sizeof(IEntry), 0…
+ vtproc(isectproc, ix->sects[i]);
+ }
+
+ for(i=0; i<nisect; i++)
+ if(isect[i])
+ fprint(2, "warning: did not find index section %s\n", …
+
+ /* start arena procs */
+ p = nil;
+ napart = 0;
+ arenadonechan = chancreate(sizeof(void*), 0);
+ for(i=0; i<ix->narenas; i++){
+ if(ix->arenas[i]->part != p){
+ p = ix->arenas[i]->part;
+ vtproc(arenapartproc, p);
+ napart++;
+ }
+ }
+
+ /* wait for arena procs to finish */
+ for(i=0; i<napart; i++)
+ recvp(arenadonechan);
+
+ /* tell index procs to finish */
+ for(i=0; i<ix->nsects; i++)
+ if(ix->sects[i]->writechan)
+ send(ix->sects[i]->writechan, nil);
+
+ /* wait for index procs to finish */
+ for(i=0; i<ix->nsects; i++)
+ if(ix->sects[i]->writechan)
+ recvp(isectdonechan);
+
+ if(ix->bloom && writebloom(ix->bloom) < 0)
+ fprint(2, "writing bloom filter: %r\n");
+
+ fprint(2, "%T done arenaentries=%,lld indexed=%,lld (nskip=%,lld)\n",
+ arenaentries, indexentries, skipentries);
+ threadexitsall(nil);
+}
+
+static int
+shouldprocess(ISect *is)
+{
+ int i;
+
+ if(nisect == 0)
+ return 1;
+
+ for(i=0; i<nisect; i++)
+ if(isect[i] && strcmp(isect[i], is->name) == 0){
+ isect[i] = nil;
+ return 1;
+ }
+ return 0;
+}
+
+static void
+add(u64int *a, u64int n)
+{
+ static Lock l;
+
+ lock(&l);
+ *a += n;
+ unlock(&l);
+}
+
+/*
+ * Read through an arena partition and send each of its IEntries
+ * to the appropriate index section. When finished, send on
+ * arenadonechan.
+ */
+enum
+{
+ ClumpChunks = 32*1024,
+};
+static void
+arenapartproc(void *v)
+{
+ int i, j, n, nskip, x;
+ u32int clump;
+ u64int addr, tot;
+ Arena *a;
+ ClumpInfo *ci, *cis;
+ IEntry ie;
+ Part *p;
+
+ p = v;
+ threadsetname("arenaproc %s", p->name);
+
+ nskip = 0;
+ tot = 0;
+ cis = MKN(ClumpInfo, ClumpChunks);
+ for(i=0; i<ix->narenas; i++){
+ a = ix->arenas[i];
+ if(a->part != p)
+ continue;
+ if(a->memstats.clumps)
+ fprint(2, "%T arena %s: %d entries\n",
+ a->name, a->memstats.clumps);
+ addr = ix->amap[i].start;
+ for(clump=0; clump<a->memstats.clumps; clump+=n){
+ n = ClumpChunks;
+ if(n > a->memstats.clumps - clump)
+ n = a->memstats.clumps - clump;
+ if(readclumpinfos(a, clump, cis, n) != n){
+ fprint(2, "%T arena %s: directory read: %r\n",…
+ errors = 1;
+ break;
+ }
+ for(j=0; j<n; j++){
+ ci = &cis[j];
+ ie.ia.type = ci->type;
+ ie.ia.size = ci->uncsize;
+ ie.ia.addr = addr;
+ addr += ci->size + ClumpSize;
+ ie.ia.blocks = (ci->size + ClumpSize + (1<<ABl…
+ scorecp(ie.score, ci->score);
+ if(ci->type == VtCorruptType)
+ nskip++;
+ else{
+ tot++;
+ x = indexsect(ix, ie.score);
+ assert(0 <= x && x < ix->nsects);
+ if(ix->sects[x]->writechan)
+ send(ix->sects[x]->writechan, …
+ if(ix->bloom)
+ markbloomfilter(ix->bloom, ie.…
+ }
+ }
+ }
+ }
+ add(&arenaentries, tot);
+ add(&skipentries, nskip);
+ sendp(arenadonechan, p);
+}
+
+/*
+ * Convert score into relative bucket number in isect.
+ * Can pass a packed ientry instead of score - score is first.
+ */
+static u32int
+score2bucket(ISect *is, uchar *score)
+{
+ u32int b;
+
+ b = hashbits(score, 32)/ix->div;
+ assert(is->start <= b && b < is->stop);
+ return b - is->start;
+}
+
+/*
+ * Convert offset in index section to bucket number.
+ */
+static u32int
+offset2bucket(ISect *is, u64int offset)
+{
+ u32int b;
+
+ assert(is->blockbase <= offset);
+ offset -= is->blockbase;
+ b = offset/is->blocksize;
+ assert(b < is->stop-is->start);
+ return b;
+}
+
+/*
+ * Convert bucket number to offset.
+ */
+static u64int
+bucket2offset(ISect *is, u32int b)
+{
+ assert(b <= is->stop-is->start);
+ return is->blockbase + (u64int)b*is->blocksize;
+}
+
+/*
+ * IEntry buffers to hold initial round of spraying.
+ */
+typedef struct Buf Buf;
+struct Buf
+{
+ Part *part; /* partition being written */
+ uchar *bp; /* current block */
+ uchar *ep; /* end of block */
+ uchar *wp; /* write position in block */
+ u64int boffset; /* start offset */
+ u64int woffset; /* next write offset */
+ u64int eoffset; /* end offset */
+ u32int nentry; /* number of entries written */
+};
+
+static void
+bflush(Buf *buf)
+{
+ u32int bufsize;
+
+ if(buf->woffset >= buf->eoffset)
+ sysfatal("buf index chunk overflow - need bufger index");
+ bufsize = buf->ep - buf->bp;
+ if(writepart(buf->part, buf->woffset, buf->bp, bufsize) < 0){
+ fprint(2, "write %s: %r\n", buf->part->name);
+ errors = 1;
+ }
+ buf->woffset += bufsize;
+ memset(buf->bp, 0, bufsize);
+ buf->wp = buf->bp;
+}
+
+static void
+bwrite(Buf *buf, IEntry *ie)
+{
+ if(buf->wp+IEntrySize > buf->ep)
+ bflush(buf);
+ assert(buf->bp <= buf->wp && buf->wp < buf->ep);
+ packientry(ie, buf->wp);
+ buf->wp += IEntrySize;
+ assert(buf->bp <= buf->wp && buf->wp <= buf->ep);
+ buf->nentry++;
+}
+
+/*
+ * Minibuffer. In-memory data structure holds our place
+ * in the buffer but has no block data. We are writing and
+ * reading the minibuffers at the same time. (Careful!)
+ */
+typedef struct Minibuf Minibuf;
+struct Minibuf
+{
+ u64int boffset; /* start offset */
+ u64int roffset; /* read offset */
+ u64int woffset; /* write offset */
+ u64int eoffset; /* end offset */
+ u32int nentry; /* # entries left to read */
+ u32int nwentry; /* # entries written */
+};
+
+/*
+ * Index entry pool. Used when trying to shuffle around
+ * the entries in a big buffer into the corresponding M minibuffers.
+ * Sized to hold M*EntriesPerBlock entries, so that there will always
+ * either be room in the pool for another block worth of entries
+ * or there will be an entire block worth of sorted entries to
+ * write out.
+ */
+typedef struct IEntryLink IEntryLink;
+typedef struct IPool IPool;
+
+struct IEntryLink
+{
+ uchar ie[IEntrySize]; /* raw IEntry */
+ IEntryLink *next; /* next in chain */
+};
+
+struct IPool
+{
+ ISect *isect;
+ u32int buck0; /* first bucket in pool */
+ u32int mbufbuckets; /* buckets per minibuf */
+ IEntryLink *entry; /* all IEntryLinks */
+ u32int nentry; /* # of IEntryLinks */
+ IEntryLink *free; /* free list */
+ u32int nfree; /* # on free list */
+ Minibuf *mbuf; /* all minibufs */
+ u32int nmbuf; /* # of minibufs */
+ IEntryLink **mlist; /* lists for each minibuf */
+ u32int *mcount; /* # on each mlist[i] */
+ u32int bufsize; /* block buffer size */
+ uchar *rbuf; /* read buffer */
+ uchar *wbuf; /* write buffer */
+ u32int epbuf; /* entries per block buffer */
+};
+
+/*
+static int
+countsokay(IPool *p)
+{
+ int i;
+ u64int n;
+
+ n = 0;
+ for(i=0; i<p->nmbuf; i++)
+ n += p->mcount[i];
+ n += p->nfree;
+ if(n != p->nentry){
+ print("free %ud:", p->nfree);
+ for(i=0; i<p->nmbuf; i++)
+ print(" %ud", p->mcount[i]);
+ print(" = %lld nentry: %ud\n", n, p->nentry);
+ }
+ return n == p->nentry;
+}
+*/
- fprint(2, "building a new index %s using %s for temporary storage\n", …
+static IPool*
+mkipool(ISect *isect, Minibuf *mbuf, u32int nmbuf,
+ u32int mbufbuckets, u32int bufsize)
+{
+ u32int i, nentry;
+ uchar *data;
+ IPool *p;
+ IEntryLink *l;
+
+ nentry = (nmbuf+1)*bufsize / IEntrySize;
+ p = ezmalloc(sizeof(IPool)
+ +nentry*sizeof(IEntry)
+ +nmbuf*sizeof(IEntryLink*)
+ +nmbuf*sizeof(u32int)
+ +3*bufsize);
+
+ p->isect = isect;
+ p->mbufbuckets = mbufbuckets;
+ p->bufsize = bufsize;
+ p->entry = (IEntryLink*)(p+1);
+ p->nentry = nentry;
+ p->mlist = (IEntryLink**)(p->entry+nentry);
+ p->mcount = (u32int*)(p->mlist+nmbuf);
+ p->nmbuf = nmbuf;
+ p->mbuf = mbuf;
+ data = (uchar*)(p->mcount+nmbuf);
+ data += bufsize - (u32int)data%bufsize;
+ p->rbuf = data;
+ p->wbuf = data+bufsize;
+ p->epbuf = bufsize/IEntrySize;
- part = initpart(argv[1], ORDWR|ODIRECT);
- if(part == nil)
- sysfatal("can't initialize temporary partition: %r");
+ for(i=0; i<p->nentry; i++){
+ l = &p->entry[i];
+ l->next = p->free;
+ p->free = l;
+ p->nfree++;
+ }
+ return p;
+}
- clumps = sortrawientries(mainindex, part, &base, mainindex->bloom);
- if(clumps == TWID64)
- sysfatal("can't build sorted index: %r");
- fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", …
+/*
+ * Add the index entry ie to the pool p.
+ * Caller must know there is room.
+ */
+static void
+ipoolinsert(IPool *p, uchar *ie)
+{
+ u32int buck, x;
+ IEntryLink *l;
+
+ assert(p->free != nil);
+
+ buck = score2bucket(p->isect, ie);
+ x = (buck-p->buck0) / p->mbufbuckets;
+ if(x >= p->nmbuf){
+ fprint(2, "buck=%ud mbufbucket=%ud x=%ud\n",
+ buck, p->mbufbuckets, x);
+ }
+ assert(x < p->nmbuf);
- if(buildindex(mainindex, part, base, clumps, zero) < 0)
- sysfatal("can't build new index: %r");
+ l = p->free;
+ p->free = l->next;
+ p->nfree--;
+ memmove(l->ie, ie, IEntrySize);
+ l->next = p->mlist[x];
+ p->mlist[x] = l;
+ p->mcount[x]++;
+}
+
+/*
+ * Pull out a block containing as many
+ * entries as possible for minibuffer x.
+ */
+static u32int
+ipoolgetbuf(IPool *p, u32int x)
+{
+ uchar *bp, *ep, *wp;
+ IEntryLink *l;
+ u32int n;
+
+ bp = p->wbuf;
+ ep = p->wbuf + p->bufsize;
+ n = 0;
+ assert(x < p->nmbuf);
+ for(wp=bp; wp+IEntrySize<=ep && p->mlist[x]; wp+=IEntrySize){
+ l = p->mlist[x];
+ p->mlist[x] = l->next;
+ p->mcount[x]--;
+ memmove(wp, l->ie, IEntrySize);
+ l->next = p->free;
+ p->free = l;
+ p->nfree++;
+ n++;
+ }
+ memset(wp, 0, ep-wp);
+ return n;
+}
+
+/*
+ * Read a block worth of entries from the minibuf
+ * into the pool. Caller must know there is room.
+ */
+static void
+ipoolloadblock(IPool *p, Minibuf *mb)
+{
+ u32int i, n;
- if(mainindex->bloom)
- writebloom(mainindex->bloom);
+ assert(mb->nentry > 0);
+ assert(mb->roffset >= mb->woffset);
+ assert(mb->roffset < mb->eoffset);
- threadexitsall(0);
+ n = p->bufsize/IEntrySize;
+ if(n > mb->nentry)
+ n = mb->nentry;
+ if(readpart(p->isect->part, mb->roffset, p->rbuf, p->bufsize) < 0)
+ fprint(2, "readpart %s: %r\n", p->isect->part->name);
+ else{
+ for(i=0; i<n; i++)
+ ipoolinsert(p, p->rbuf+i*IEntrySize);
+ }
+ mb->nentry -= n;
+ mb->roffset += p->bufsize;
}
+
+/*
+ * Write out a block worth of entries to minibuffer x.
+ * If necessary, pick up the data there before overwriting it.
+ */
+static void
+ipoolflush0(IPool *pool, u32int x)
+{
+ u32int bufsize;
+ Minibuf *mb;
+
+ mb = pool->mbuf+x;
+ bufsize = pool->bufsize;
+ mb->nwentry += ipoolgetbuf(pool, x);
+ if(mb->nentry > 0 && mb->roffset == mb->woffset){
+ assert(pool->nfree >= pool->bufsize/IEntrySize);
+ /*
+ * There will be room in the pool -- we just
+ * removed a block worth.
+ */
+ ipoolloadblock(pool, mb);
+ }
+ if(writepart(pool->isect->part, mb->woffset, pool->wbuf, bufsize) < 0)
+ fprint(2, "writepart %s: %r\n", pool->isect->part->name);
+ mb->woffset += bufsize;
+}
+
+/*
+ * Write out some full block of entries.
+ * (There must be one -- the pool is almost full!)
+ */
+static void
+ipoolflush1(IPool *pool)
+{
+ u32int i;
+
+ assert(pool->nfree <= pool->epbuf);
+
+ for(i=0; i<pool->nmbuf; i++){
+ if(pool->mcount[i] >= pool->epbuf){
+ ipoolflush0(pool, i);
+ return;
+ }
+ }
+ /* can't be reached - someone must be full */
+ sysfatal("ipoolflush1");
+}
+
+/*
+ * Flush all the entries in the pool out to disk.
+ * Nothing more to read from disk.
+ */
+static void
+ipoolflush(IPool *pool)
+{
+ u32int i;
+
+ for(i=0; i<pool->nmbuf; i++)
+ while(pool->mlist[i])
+ ipoolflush0(pool, i);
+ assert(pool->nfree == pool->nentry);
+}
+
+/*
+ * Third pass. Pick up each minibuffer from disk into
+ * memory and then write out the buckets.
+ */
+
+/*
+ * Compare two packed index entries.
+ * Usual ordering except break ties by putting higher
+ * index addresses first (assumes have duplicates
+ * due to corruption in the lower addresses).
+ */
+static int
+ientrycmpaddr(const void *va, const void *vb)
+{
+ int i;
+ uchar *a, *b;
+
+ a = (uchar*)va;
+ b = (uchar*)vb;
+ i = ientrycmp(a, b);
+ if(i)
+ return i;
+ return -memcmp(a+IEntryAddrOff, b+IEntryAddrOff, 8);
+}
+
+static void
+zerorange(Part *p, u64int o, u64int e)
+{
+ static uchar zero[MaxIoSize];
+ u32int n;
+
+ for(; o<e; o+=n){
+ n = sizeof zero;
+ if(o+n > e)
+ n = e-o;
+ if(writepart(p, o, zero, n) < 0)
+ fprint(2, "writepart %s: %r\n", p->name);
+ }
+}
+
+/*
+ * Load a minibuffer into memory and write out the
+ * corresponding buckets.
+ */
+static void
+sortminibuffer(ISect *is, Minibuf *mb, uchar *buf, u32int nbuf, u32int bufsize)
+{
+ uchar *buckdata, *p, *q, *ep;
+ u32int b, lastb, memsize, n;
+ u64int o;
+ IBucket ib;
+ Part *part;
+
+ part = is->part;
+ buckdata = emalloc(is->blocksize);
+
+ if(mb->nwentry == 0)
+ return;
+
+ /*
+ * read entire buffer.
+ */
+ assert(mb->nwentry*IEntrySize <= mb->woffset-mb->boffset);
+ assert(mb->woffset-mb->boffset <= nbuf);
+ if(readpart(part, mb->boffset, buf, mb->woffset-mb->boffset) < 0){
+ fprint(2, "readpart %s: %r\n", part->name);
+ errors = 1;
+ return;
+ }
+ assert(*(uint*)buf != 0xa5a5a5a5);
+
+ /*
+ * remove fragmentation due to IEntrySize
+ * not evenly dividing Bufsize
+ */
+ memsize = (bufsize/IEntrySize)*IEntrySize;
+ for(o=mb->boffset, p=q=buf; o<mb->woffset; o+=bufsize){
+ memmove(p, q, memsize);
+ p += memsize;
+ q += bufsize;
+ }
+ ep = buf + mb->nwentry*IEntrySize;
+ assert(ep <= buf+nbuf);
+
+ /*
+ * sort entries
+ */
+ qsort(buf, mb->nwentry, IEntrySize, ientrycmpaddr);
+
+ /*
+ * write buckets out
+ */
+ n = 0;
+ lastb = offset2bucket(is, mb->boffset);
+ for(p=buf; p<ep; p=q){
+ b = score2bucket(is, p);
+ for(q=p; q<ep && score2bucket(is, q)==b; q+=IEntrySize)
+ ;
+ if(lastb+1 < b && zero)
+ zerorange(part, bucket2offset(is, lastb+1), bucket2off…
+ if(IBucketSize+(q-p) > is->blocksize)
+ sysfatal("bucket overflow - make index bigger");
+ memmove(buckdata+IBucketSize, p, q-p);
+ ib.n = (q-p)/IEntrySize;
+ n += ib.n;
+ packibucket(&ib, buckdata, is->bucketmagic);
+ if(writepart(part, bucket2offset(is, b), buckdata, is->blocksi…
+ fprint(2, "write %s: %r\n", part->name);
+ lastb = b;
+ }
+ if(lastb+1 < is->stop-is->start && zero)
+ zerorange(part, bucket2offset(is, lastb+1), bucket2offset(is, …
+
+ if(n != mb->nwentry)
+ fprint(2, "sortminibuffer bug: n=%ud nwentry=%ud have=%ld\n", …
+
+ free(buckdata);
+}
+
+static void
+isectproc(void *v)
+{
+ u32int buck, bufbuckets, bufsize, epbuf, i, j;
+ u32int mbufbuckets, n, nbucket, nn, space;
+ u32int nbuf, nminibuf, xminiclump, prod;
+ u64int blocksize, offset, xclump;
+ uchar *data, *p;
+ Buf *buf;
+ IEntry ie;
+ IPool *ipool;
+ ISect *is;
+ Minibuf *mbuf, *mb;
+
+ is = v;
+ blocksize = is->blocksize;
+ nbucket = is->stop - is->start;
+
+ /*
+ * Three passes:
+ * pass 1 - write index entries from arenas into
+ * large sequential sections on index disk.
+ * requires nbuf * bufsize memory.
+ *
+ * pass 2 - split each section into minibufs.
+ * requires nminibuf * bufsize memory.
+ *
+ * pass 3 - read each minibuf into memory and
+ * write buckets out.
+ * requires entries/minibuf * IEntrySize memory.
+ *
+ * The larger we set bufsize the less seeking hurts us.
+ *
+ * The fewer sections and minibufs we have, the less
+ * seeking hurts us.
+ *
+ * The fewer sections and minibufs we have, the
+ * more entries we end up with in each minibuf
+ * at the end.
+ *
+ * Shoot for using half our memory to hold each
+ * minibuf. The chance of a random distribution
+ * getting off by 2x is quite low.
+ *
+ * Once that is decided, figure out the smallest
+ * nminibuf and nsection/biggest bufsize we can use
+ * and still fit in the memory constraints.
+ */
+
+ /* expected number of clump index entries we'll see */
+ xclump = nbucket * (double)totalclumps/totalbuckets;
+
+ /* number of clumps we want to see in a minibuf */
+ xminiclump = isectmem/2/IEntrySize;
+
+ /* total number of minibufs we need */
+ prod = xclump / xminiclump;
+
+ /* if possible, skip second pass */
+ if(!dumb && prod*MinBufSize < isectmem){
+ nbuf = prod;
+ nminibuf = 1;
+ }else{
+ /* otherwise use nsection = sqrt(nmini) */
+ for(nbuf=1; nbuf*nbuf<prod; nbuf++)
+ ;
+ if(nbuf*MinBufSize > isectmem)
+ sysfatal("not enough memory");
+ nminibuf = nbuf;
+ }
+ /* size buffer to use extra memory */
+ bufsize = MinBufSize;
+ while(bufsize*2*nbuf <= isectmem && bufsize < MaxBufSize)
+ bufsize *= 2;
+ data = emalloc(nbuf*bufsize);
+ epbuf = bufsize/IEntrySize;
+
+ fprint(2, "%T %s: %,ud buckets, %,ud groups, %,ud minigroups, %,ud buf…
+ is->part->name, nbucket, nbuf, nminibuf, bufsize);
+ /*
+ * Accept index entries from arena procs.
+ */
+ buf = MKNZ(Buf, nbuf);
+ p = data;
+ offset = is->blockbase;
+ bufbuckets = (nbucket+nbuf-1)/nbuf;
+ for(i=0; i<nbuf; i++){
+ buf[i].part = is->part;
+ buf[i].bp = p;
+ buf[i].wp = p;
+ p += bufsize;
+ buf[i].ep = p;
+ buf[i].boffset = offset;
+ buf[i].woffset = offset;
+ if(i < nbuf-1){
+ offset += bufbuckets*blocksize;
+ buf[i].eoffset = offset;
+ }else{
+ offset = is->blockbase + nbucket*blocksize;
+ buf[i].eoffset = offset;
+ }
+ }
+ assert(p == data+nbuf*bufsize);
+
+ n = 0;
+ while(recv(is->writechan, &ie) == 1){
+ if(ie.ia.addr == 0)
+ break;
+ buck = score2bucket(is, ie.score);
+ i = buck/bufbuckets;
+ assert(i < nbuf);
+ bwrite(&buf[i], &ie);
+ n++;
+ }
+ add(&indexentries, n);
+
+ nn = 0;
+ for(i=0; i<nbuf; i++){
+ bflush(&buf[i]);
+ buf[i].bp = nil;
+ buf[i].ep = nil;
+ buf[i].wp = nil;
+ nn += buf[i].nentry;
+ }
+ if(n != nn)
+ fprint(2, "isectproc bug: n=%ud nn=%ud\n", n, nn);
+
+ free(data);
+
+ fprint(2, "%T %s: reordering\n", is->part->name);
+
+ /*
+ * Rearrange entries into minibuffers and then
+ * split each minibuffer into buckets.
+ */
+ mbuf = MKN(Minibuf, nminibuf);
+ mbufbuckets = (bufbuckets+nminibuf-1)/nminibuf;
+ for(i=0; i<nbuf; i++){
+ /*
+ * Set up descriptors.
+ */
+ n = buf[i].nentry;
+ nn = 0;
+ offset = buf[i].boffset;
+ memset(mbuf, 0, nminibuf*sizeof(mbuf[0]));
+ for(j=0; j<nminibuf; j++){
+ mb = &mbuf[j];
+ mb->boffset = offset;
+ if(j < nminibuf-1){
+ offset += mbufbuckets*blocksize;
+ mb->eoffset = offset;
+ }else
+ mb->eoffset = buf[i].eoffset;
+ mb->roffset = mb->boffset;
+ mb->woffset = mb->boffset;
+ mb->nentry = epbuf * (mb->eoffset - mb->boffset)/bufsi…
+ if(mb->nentry > buf[i].nentry)
+ mb->nentry = buf[i].nentry;
+ buf[i].nentry -= mb->nentry;
+ nn += mb->nentry;
+ }
+ if(n != nn)
+ fprint(2, "isectproc bug2: n=%ud nn=%ud (i=%d)\n", n, …
+ /*
+ * Rearrange.
+ */
+ if(!dumb && nminibuf == 1){
+ mbuf[0].nwentry = mbuf[0].nentry;
+ mbuf[0].woffset = buf[i].woffset;
+ }else{
+ ipool = mkipool(is, mbuf, nminibuf, mbufbuckets, bufsi…
+ ipool->buck0 = bufbuckets*i;
+ for(j=0; j<nminibuf; j++){
+ mb = &mbuf[j];
+ while(mb->nentry > 0){
+ if(ipool->nfree < epbuf){
+ ipoolflush1(ipool);
+ /* ipoolflush1 might change mb…
+ continue;
+ }
+ assert(ipool->nfree >= epbuf);
+ ipoolloadblock(ipool, mb);
+ }
+ }
+ ipoolflush(ipool);
+ nn = 0;
+ for(j=0; j<nminibuf; j++)
+ nn += mbuf[j].nwentry;
+ if(n != nn)
+ fprint(2, "isectproc bug3: n=%ud nn=%ud (i=%d)…
+ free(ipool);
+ }
+
+ /*
+ * Make buckets.
+ */
+ space = 0;
+ for(j=0; j<nminibuf; j++)
+ if(space < mbuf[j].woffset - mbuf[j].boffset)
+ space = mbuf[j].woffset - mbuf[j].boffset;
+
+ data = emalloc(space);
+ for(j=0; j<nminibuf; j++){
+ mb = &mbuf[j];
+ sortminibuffer(is, mb, data, space, bufsize);
+ }
+ free(data);
+ }
+
+ sendp(isectdonechan, is);
+}
+
+
+
diff --git a/src/cmd/venti/srv/checkindex.c b/src/cmd/venti/srv/checkindex.c
t@@ -109,7 +109,7 @@ checkindex(Index *ix, Part *part, u64int off, u64int clump…
int ok, bok;
u64int found = 0;
-/*ZZZ make buffer size configurable */
+/* ZZZ make buffer size configurable */
b = alloczblock(ix->blocksize, 0, ix->blocksize);
z = alloczblock(ix->blocksize, 1, ix->blocksize);
ies = initiestream(part, off, clumps, 64*1024);
t@@ -260,6 +260,8 @@ threadmain(int argc, char *argv[])
if(initventi(argv[0], &conf) < 0)
sysfatal("can't init venti: %r");
+ if(mainindex->bloom && loadbloom(mainindex->bloom) < 0)
+ sysfatal("can't load bloom filter: %r");
oldbloom = mainindex->bloom;
newbloom = nil;
if(oldbloom){
diff --git a/src/cmd/venti/srv/clump.c b/src/cmd/venti/srv/clump.c
t@@ -91,7 +91,7 @@ clumpmagic(Arena *arena, u64int aa)
{
u8int buf[U32Size];
- if(readarena(arena, aa, buf, U32Size) < 0)
+ if(readarena(arena, aa, buf, U32Size) == TWID32)
return TWID32;
return unpackmagic(buf);
}
t@@ -138,6 +138,11 @@ loadclump(Arena *arena, u64int aa, int blocks, Clump *cl,…
freezblock(cb);
return nil;
}
+ if(cl->info.type == VtCorruptType){
+ seterr(EOk, "clump is marked corrupt");
+ freezblock(cb);
+ return nil;
+ }
n -= ClumpSize;
if(n < cl->info.size){
freezblock(cb);
diff --git a/src/cmd/venti/srv/conv.c b/src/cmd/venti/srv/conv.c
t@@ -23,7 +23,7 @@ static struct {
ArenaHeadMagic, "ArenaHeadMagic",
ArenaMagic, "ArenaMagic",
ISectMagic, "ISectMagic",
- BloomMagic, "BloomMagic"
+ BloomMagic, "BloomMagic",
};
static char*
t@@ -138,9 +138,6 @@ unpackarena(Arena *arena, u8int *buf)
p += U64Size;
arena->diskstats.sealed = U8GET(p);
p += U8Size;
-
- arena->memstats = arena->diskstats;
-
switch(arena->version){
case ArenaVersion4:
sz = ArenaSize4;
t@@ -153,6 +150,35 @@ unpackarena(Arena *arena, u8int *buf)
seterr(ECorrupt, "arena has bad version number %d", arena->ver…
return -1;
}
+ /*
+ * Additional fields for the memstats version of the stats.
+ * Diskstats reflects what is committed to the index.
+ * Memstats reflects what is in the arena. Originally intended
+ * this to be a version 5 extension, but might as well use for
+ * all the existing version 4 arenas too.
+ *
+ * To maintain backwards compatibility with existing venti
+ * installations using the older format, we define that if
+ * memstats == diskstats, then the extension fields are not
+ * included (see packarena below). That is, only partially
+ * indexed arenas have these fields. Fully indexed arenas
+ * (in particular, sealed arenas) do not.
+ */
+ if(U8GET(p) == 1){
+ sz += ArenaSize5a-ArenaSize5;
+ p += U8Size;
+ arena->memstats.clumps = U32GET(p);
+ p += U32Size;
+ arena->memstats.cclumps = U32GET(p);
+ p += U32Size;
+ arena->memstats.used = U64GET(p);
+ p += U64Size;
+ arena->memstats.uncsize = U64GET(p);
+ p += U64Size;
+ arena->memstats.sealed = U8GET(p);
+ p += U8Size;
+ }else
+ arena->memstats = arena->diskstats;
if(buf + sz != p)
sysfatal("unpackarena unpacked wrong amount");
t@@ -162,6 +188,12 @@ unpackarena(Arena *arena, u8int *buf)
int
packarena(Arena *arena, u8int *buf)
{
+ return _packarena(arena, buf, 0);
+}
+
+int
+_packarena(Arena *arena, u8int *buf, int forceext)
+{
int sz;
u8int *p;
u32int t32;
t@@ -207,6 +239,30 @@ packarena(Arena *arena, u8int *buf)
p += U64Size;
U8PUT(p, arena->diskstats.sealed);
p += U8Size;
+
+ /*
+ * Extension fields; see above.
+ */
+ if(forceext
+ || arena->memstats.clumps != arena->diskstats.clumps
+ || arena->memstats.cclumps != arena->diskstats.cclumps
+ || arena->memstats.used != arena->diskstats.used
+ || arena->memstats.uncsize != arena->diskstats.uncsize
+ || arena->memstats.sealed != arena->diskstats.sealed){
+ sz += ArenaSize5a - ArenaSize5;
+ U8PUT(p, 1);
+ p += U8Size;
+ U32PUT(p, arena->memstats.clumps);
+ p += U32Size;
+ U32PUT(p, arena->memstats.cclumps);
+ p += U32Size;
+ U64PUT(p, arena->memstats.used, t32);
+ p += U64Size;
+ U64PUT(p, arena->memstats.uncsize, t32);
+ p += U64Size;
+ U8PUT(p, arena->memstats.sealed);
+ p += U8Size;
+ }
if(buf + sz != p)
sysfatal("packarena packed wrong amount");
t@@ -525,6 +581,8 @@ unpackientry(IEntry *ie, u8int *buf)
p += U32Size;
ie->train = U16GET(p);
p += U16Size;
+ if(p - buf != IEntryAddrOff)
+ sysfatal("unpackentry bad IEntryAddrOff amount");
ie->ia.addr = U64GET(p);
if(ie->ia.addr>>56) print("%.8H => %llux\n", p, ie->ia.addr);
p += U64Size;
diff --git a/src/cmd/venti/srv/dat.h b/src/cmd/venti/srv/dat.h
t@@ -75,23 +75,17 @@ enum
/*
* magic numbers on disk
*/
-/* _ClumpMagic = 0xd15cb10cU, / * clump header, d…
-#define _ClumpMagic 0xd15cb10cU
+ _ClumpMagic = 0xd15cb10cU, /* clump header, depr…
ClumpFreeMagic = 0, /* free clump; termi…
-/* ArenaPartMagic = 0xa9e4a5e7U, / * arena partit…
-/* ArenaMagic = 0xf2a14eadU, / * arena trailer */
-/* ArenaHeadMagic = 0xd15c4eadU, / * arena header…
-#define ArenaPartMagic 0xa9e4a5e7U
-#define ArenaMagic 0xf2a14eadU
-#define ArenaHeadMagic 0xd15c4eadU
-
-/* BloomMagic = 0xb1004eadU, / * bloom filter hea…
-#define BloomMagic 0xb1004eadU
+ ArenaPartMagic = 0xa9e4a5e7U, /* arena partition…
+ ArenaMagic = 0xf2a14eadU, /* arena trailer */
+ ArenaHeadMagic = 0xd15c4eadU, /* arena header */
+
+ BloomMagic = 0xb1004eadU, /* bloom filter header…
BloomMaxHash = 32,
-/* ISectMagic = 0xd15c5ec7U, / * index header */
-#define ISectMagic 0xd15c5ec7U
+ ISectMagic = 0xd15c5ec7U, /* index header */
ArenaPartVersion = 3,
ArenaVersion4 = 4,
t@@ -120,6 +114,7 @@ enum
ArenaPartSize = 4 * U32Size,
ArenaSize4 = 2 * U64Size + 6 * U32Size + ANameSize + U8…
ArenaSize5 = ArenaSize4 + U32Size,
+ ArenaSize5a = ArenaSize5 + 2 * U8Size + 2 * U32Size + 2…
ArenaHeadSize4 = U64Size + 3 * U32Size + ANameSize,
ArenaHeadSize5 = ArenaHeadSize4 + U32Size,
BloomHeadSize = 4 * U32Size,
t@@ -137,10 +132,14 @@ enum
*/
IBucketSize = U32Size + U16Size,
IEntrySize = U64Size + U32Size + 2*U16Size + 2*U8Size +…
- IEntryTypeOff = VtScoreSize + U64Size + U32Size + 2 * U…
+ IEntryTypeOff = VtScoreSize + U32Size + U16Size + U64Si…
+ IEntryAddrOff = VtScoreSize + U32Size + U16Size,
MaxClumpBlocks = (VtMaxLumpSize + ClumpSize + (1 << AB…
+
+ IcacheFrac = 1000000, /* denominator */
+ SleepForever = 1000000000, /* magic value for sl…
/*
* dirty flags - order controls disk write order
*/
t@@ -356,13 +355,11 @@ struct Arena
int blocksize; /* size of block to read …
u64int base; /* base address on …
u64int size; /* total space in t…
- u64int limit; /* storage limit f…
u8int score[VtScoreSize]; /* score of the entire…
int clumpmax; /* ClumpInfos per block */
AState mem;
int inqueue;
- DigestState sha1;
/*
* fields stored on disk
t@@ -477,6 +474,8 @@ struct ISect
u32int tabsize; /* max. bytes in index c…
Channel *writechan;
Channel *writedonechan;
+ void *ig; /* used by buildindex only */
+ int ng;
/*
* fields stored on disk
t@@ -716,7 +715,18 @@ extern int writestodevnull; …
extern int collectstats;
extern QLock memdrawlock;
extern int icachesleeptime;
+extern int minicachesleeptime;
extern int arenasumsleeptime;
+extern int manualscheduling;
+extern int l0quantum;
+extern int l1quantum;
+extern int ignorebloom;
+extern int icacheprefetch;
+extern int syncwrites;
+
+extern Stats *stathist;
+extern int nstathist;
+extern ulong stattime;
#ifndef PLAN9PORT
#pragma varargck type "V" uchar*
diff --git a/src/cmd/venti/srv/dcache.c b/src/cmd/venti/srv/dcache.c
t@@ -34,7 +34,7 @@ enum
{
HashLog = 9,
HashSize = 1<<HashLog,
- HashMask = HashSize - 1
+ HashMask = HashSize - 1,
};
struct DCache
t@@ -212,8 +212,6 @@ return;
lastmiss.part = part;
lastmiss.addr = addr;
}
-
-/* fprint(2, "%s %llx %s\n", part->name, addr, miss ? "miss" : "hit"); …
}
int
t@@ -230,6 +228,7 @@ rareadpart(Part *part, u64int addr, u8int *buf, uint n, in…
}
if(load != 2 || addr >= part->size){ /* addr >= part->size: let…
runlock(&ralock);
+ diskaccess(0);
return readpart(part, addr, buf, n);
}
t@@ -239,6 +238,7 @@ fprint(2, "raread %s %llx\n", part->name, addr);
nn = dcache.ramax;
if(addr+nn > part->size)
nn = part->size - addr;
+ diskaccess(0);
if(readpart(part, addr, dcache.rabuf, nn) < 0){
wunlock(&ralock);
return -1;
t@@ -297,7 +297,6 @@ _getdblock(Part *part, u64int addr, int mode, int load)
/*
* look for the block in the cache
*/
-/*checkdcache(); */
qlock(&dcache.lock);
again:
for(b = dcache.heads[h]; b != nil; b = b->next){
t@@ -367,7 +366,6 @@ found:
fixheap(b->heap, b);
qunlock(&dcache.lock);
-/*checkdcache(); */
trace(TraceBlock, "getdblock lock");
addstat(StatDblockStall, 1);
t@@ -427,7 +425,6 @@ putdblock(DBlock *b)
else
wunlock(&b->lock);
-/*checkdcache(); */
qlock(&dcache.lock);
if(--b->ref == 0 && !b->dirty){
if(b->heap == TWID32)
t@@ -435,7 +432,6 @@ putdblock(DBlock *b)
rwakeupall(&dcache.full);
}
qunlock(&dcache.lock);
-/*checkdcache(); */
}
void
t@@ -474,6 +470,25 @@ dirtydblock(DBlock *b, int dirty)
qunlock(&dcache.lock);
}
+static void
+unchain(DBlock *b)
+{
+ ulong h;
+
+ /*
+ * unchain the block
+ */
+ if(b->prev == nil){
+ h = pbhash(b->addr);
+ if(dcache.heads[h] != b)
+ sysfatal("bad hash chains in disk cache");
+ dcache.heads[h] = b->next;
+ }else
+ b->prev->next = b->next;
+ if(b->next != nil)
+ b->next->prev = b->prev;
+}
+
/*
* remove some block from use and update the free list and counters
*/
t@@ -481,7 +496,6 @@ static DBlock*
bumpdblock(void)
{
DBlock *b;
- ulong h;
trace(TraceBlock, "bumpdblock enter");
b = dcache.free;
t@@ -512,22 +526,28 @@ bumpdblock(void)
trace(TraceBlock, "bumpdblock bumping %s 0x%llux", b->part->name, b->a…
- /*
- * unchain the block
- */
- if(b->prev == nil){
- h = pbhash(b->addr);
- if(dcache.heads[h] != b)
- sysfatal("bad hash chains in disk cache");
- dcache.heads[h] = b->next;
- }else
- b->prev->next = b->next;
- if(b->next != nil)
- b->next->prev = b->prev;
-
+ unchain(b);
return b;
}
+void
+emptydcache(void)
+{
+ DBlock *b;
+
+ qlock(&dcache.lock);
+ while(dcache.nheap > 0){
+ b = dcache.heap[0];
+ delheap(b);
+ if(!b->ref && !b->dirty){
+ unchain(b);
+ b->next = dcache.free;
+ dcache.free = b;
+ }
+ }
+ qunlock(&dcache.lock);
+}
+
/*
* delete an arbitrary block from the heap
*/
t@@ -683,6 +703,7 @@ static int
parallelwrites(DBlock **b, DBlock **eb, int dirty)
{
DBlock **p, **q;
+
for(p=b; p<eb && (*p)->dirty == dirty; p++){
assert(b<=p && p<eb);
sendp((*p)->part->writechan, *p);
t@@ -803,6 +824,7 @@ writeproc(void *v)
trace(TraceProc, "wlock %s 0x%llux", p->name, b->addr);
wlock(&b->lock);
trace(TraceProc, "writepart %s 0x%llux", p->name, b->addr);
+ diskaccess(0);
if(writepart(p, b->addr, b->data, b->size) < 0)
fprint(2, "write error: %r\n"); /* XXX details! */
addstat(StatApartWrite, 1);
diff --git a/src/cmd/venti/srv/disksched.c b/src/cmd/venti/srv/disksched.c
t@@ -0,0 +1,88 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+ulong lasttime[2];
+int manualscheduling;
+int l0quantum = 120;
+int l1quantum = 120;
+ulong lasticachechange;
+
+void
+disksched(void)
+{
+ int p, nwrite, nflush, ndirty, tdirty, toflush;
+ ulong t;
+ vlong cflush;
+ Stats *prev;
+
+ /*
+ * no locks because all the data accesses are atomic.
+ */
+ t = time(0);
+ if(manualscheduling){
+ lasticachechange = t;
+ return;
+ }
+
+ if(t-lasttime[0] < l0quantum){
+ /* level-0 disk access going on */
+ p = icachedirtyfrac();
+ if(p < IcacheFrac*5/10){ /* can wait */
+ icachesleeptime = SleepForever;
+ lasticachechange = t;
+ }else if(p > IcacheFrac*9/10){ /* can't wait */
+ icachesleeptime = 0;
+ lasticachechange = t;
+ }else if(t-lasticachechange > 60){
+ /* have minute worth of data for current rate */
+ prev = &stathist[(stattime-60+nstathist)%nstathist];
+
+ /* # entries written to index cache */
+ nwrite = stats.n[StatIcacheWrite] - prev->n[StatIcache…
+
+ /* # dirty entries in index cache */
+ ndirty = stats.n[StatIcacheDirty] - prev->n[StatIcache…
+
+ /* # entries flushed to disk */
+ nflush = nwrite - ndirty;
+
+ /* want to stay around 70% dirty */
+ tdirty = (vlong)stats.n[StatIcacheSize]*700/1000;
+
+ /* assume nflush*icachesleeptime is a constant */
+ cflush = (vlong)nflush*(icachesleeptime+1);
+
+ /* computer number entries to write in next minute */
+ toflush = nwrite + (stats.n[StatIcacheDirty] - tdirty);
+
+ /* schedule for that many */
+ if(toflush <= 0 || cflush/toflush > 100000)
+ icachesleeptime = SleepForever;
+ else
+ icachesleeptime = cflush/toflush;
+ }
+ arenasumsleeptime = SleepForever;
+ return;
+ }
+ if(t-lasttime[1] < l1quantum){
+ /* level-1 disk access (icache flush) going on */
+ icachesleeptime = 0;
+ arenasumsleeptime = SleepForever;
+ return;
+ }
+ /* no disk access going on - no holds barred*/
+ icachesleeptime = 0;
+ arenasumsleeptime = 0;
+}
+
+void
+diskaccess(int level)
+{
+ if(level < 0 || level >= nelem(lasttime)){
+ fprint(2, "bad level in diskaccess; caller=%lux\n", getcallerp…
+ return;
+ }
+ lasttime[level] = time(0);
+}
+
diff --git a/src/cmd/venti/srv/findscore.c b/src/cmd/venti/srv/findscore.c
t@@ -27,7 +27,7 @@ findscore(Arena *arena, uchar *score)
u32int clump;
int i, n, found;
-/*ZZZ remove fprint? */
+//ZZZ remove fprint?
if(arena->memstats.clumps)
fprint(2, "reading directory for arena=%s with %d entries\n", …
diff --git a/src/cmd/venti/srv/fixarenas.c b/src/cmd/venti/srv/fixarenas.c
t@@ -0,0 +1,1894 @@
+/*
+ * Check and fix an arena partition.
+ *
+ * This is a lot grittier than the rest of Venti because
+ * it can't just give up if a byte here or there is wrong.
+ *
+ * The rule here (hopefully followed!) is that block corruption
+ * only ever has a local effect -- there are no blocks that you
+ * can wipe out that will cause large portions of
+ * uncorrupted data blocks to be useless.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "whack.h"
+
+#pragma varargck type "z" uvlong
+#pragma varargck type "z" vlong
+#pragma varargck type "t" uint
+
+enum
+{
+ K = 1024,
+ M = 1024*1024,
+ G = 1024*1024*1024,
+
+ Block = 4096,
+};
+
+int debugsha1;
+
+int verbose;
+Part *part;
+char *file;
+char *basename;
+char *dumpbase;
+int fix;
+int badreads;
+int unseal;
+uchar zero[MaxDiskBlock];
+
+Arena lastarena;
+ArenaPart ap;
+uvlong arenasize;
+int nbadread;
+int nbad;
+uvlong partend;
+void checkarena(vlong, int);
+
+void
+usage(void)
+{
+ fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file […
+ threadexitsall(0);
+}
+
+/*
+ * Format number in simplest way that is okay with unittoull.
+ */
+static int
+zfmt(Fmt *fmt)
+{
+ vlong x;
+
+ x = va_arg(fmt->args, vlong);
+ if(x == 0)
+ return fmtstrcpy(fmt, "0");
+ if(x%G == 0)
+ return fmtprint(fmt, "%lldG", x/G);
+ if(x%M == 0)
+ return fmtprint(fmt, "%lldM", x/M);
+ if(x%K == 0)
+ return fmtprint(fmt, "%lldK", x/K);
+ return fmtprint(fmt, "%lld", x);
+}
+
+/*
+ * Format time like ctime without newline.
+ */
+static int
+tfmt(Fmt *fmt)
+{
+ uint t;
+ char buf[30];
+
+ t = va_arg(fmt->args, uint);
+ strcpy(buf, ctime(t));
+ buf[28] = 0;
+ return fmtstrcpy(fmt, buf);
+}
+
+/*
+ * Coalesce messages about unreadable sectors into larger ranges.
+ * bad(0, 0) flushes the buffer.
+ */
+static void
+bad(char *msg, vlong o, int len)
+{
+ static vlong lb0, lb1;
+ static char *lmsg;
+
+ if(msg == nil)
+ msg = lmsg;
+ if(o == -1){
+ lmsg = nil;
+ lb0 = 0;
+ lb1 = 0;
+ return;
+ }
+ if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){
+ if(lb0 != lb1)
+ print("%s %#llux+%#llux (%,lld+%,lld)\n",
+ lmsg, lb0, lb1-lb0, lb0, lb1-lb0);
+ lb0 = o;
+ }
+ lmsg = msg;
+ lb1 = o+len;
+}
+
+/*
+ * Read in the len bytes of data at the offset. If can't for whatever reason,
+ * fill it with garbage but print an error.
+ */
+static uchar*
+readdisk(uchar *buf, vlong offset, int len)
+{
+ int i, j, k, n;
+
+ if(offset >= partend){
+ memset(buf, 0xFB, sizeof buf);
+ return buf;
+ }
+
+ if(offset+len > partend){
+ memset(buf, 0xFB, sizeof buf);
+ len = partend - offset;
+ }
+
+ if(readpart(part, offset, buf, len) >= 0)
+ return buf;
+
+ /*
+ * The read failed. Clear the buffer to nonsense, and
+ * then try reading in smaller pieces. If that fails,
+ * read in even smaller pieces. And so on down to sectors.
+ */
+ memset(buf, 0xFD, len);
+ for(i=0; i<len; i+=64*K){
+ n = 64*K;
+ if(i+n > len)
+ n = len-i;
+ if(readpart(part, offset+i, buf+i, n) >= 0)
+ continue;
+ for(j=i; j<len && j<i+64*K; j+=4*K){
+ n = 4*K;
+ if(j+n > len)
+ n = len-j;
+ if(readpart(part, offset+j, buf+j, n) >= 0)
+ continue;
+ for(k=j; k<len && k<j+4*K; k+=512){
+ if(readpart(part, offset+k, buf+k, 512) >= 0)
+ continue;
+ bad("disk read failed at", k, 512);
+ badreads++;
+ }
+ }
+ }
+ bad(nil, 0, 0);
+ return buf;
+}
+
+/*
+ * Buffer to support running SHA1 hash of the disk.
+ */
+typedef struct Shabuf Shabuf;
+struct Shabuf
+{
+ int fd;
+ vlong offset;
+ DigestState state;
+ int rollback;
+ vlong r0;
+ DigestState *hist;
+ int nhist;
+};
+
+void
+sbdebug(Shabuf *sb, char *file)
+{
+ int fd;
+
+ if(sb->fd > 0){
+ close(sb->fd);
+ sb->fd = 0;
+ }
+ if((fd = create(file, OWRITE, 0666)) < 0)
+ return;
+ if(fd == 0){
+ fd = dup(fd, -1);
+ close(0);
+ }
+ sb->fd = fd;
+}
+
+void
+sbupdate(Shabuf *sb, uchar *p, vlong offset, int len)
+{
+ int n, x;
+ vlong o;
+
+ if(sb->rollback && !sb->hist){
+ sb->r0 = offset;
+ sb->nhist = 1;
+ sb->hist = vtmalloc(sb->nhist*sizeof *sb->hist);
+ memset(sb->hist, 0, sizeof sb->hist[0]);
+ }
+ if(sb->r0 == 0)
+ sb->r0 = offset;
+
+ if(sb->offset < offset || sb->offset >= offset+len){
+ if(0) print("sbupdate %p %#llux+%d but offset=%#llux\n",
+ p, offset, len, sb->offset);
+ return;
+ }
+ x = sb->offset - offset;
+ if(0) print("sbupdate %p %#llux+%d skip %d\n",
+ sb, offset, len, x);
+ if(x){
+ p += x;
+ offset += x;
+ len -= x;
+ }
+ assert(sb->offset == offset);
+
+ if(sb->fd > 0)
+ pwrite(sb->fd, p, len, offset - sb->r0);
+
+ if(!sb->rollback){
+ sha1(p, len, nil, &sb->state);
+ sb->offset += len;
+ return;
+ }
+
+ /* save state every 4M so we can roll back quickly */
+ o = offset - sb->r0;
+ while(len > 0){
+ n = 4*M - o%(4*M);
+ if(n > len)
+ n = len;
+ sha1(p, n, nil, &sb->state);
+ sb->offset += n;
+ o += n;
+ p += n;
+ len -= n;
+ if(o%(4*M) == 0){
+ x = o/(4*M);
+ if(x >= sb->nhist){
+ if(x != sb->nhist)
+ print("oops! x=%d nhist=%d\n", x, sb->…
+ sb->nhist += 32;
+ sb->hist = vtrealloc(sb->hist, sb->nhist*sizeo…
+ }
+ sb->hist[x] = sb->state;
+ }
+ }
+}
+
+void
+sbdiskhash(Shabuf *sb, vlong eoffset)
+{
+ static uchar dbuf[4*M];
+ int n;
+
+ while(sb->offset < eoffset){
+ n = sizeof dbuf;
+ if(sb->offset+n > eoffset)
+ n = eoffset - sb->offset;
+ readdisk(dbuf, sb->offset, n);
+ sbupdate(sb, dbuf, sb->offset, n);
+ }
+}
+
+void
+sbrollback(Shabuf *sb, vlong offset)
+{
+ int x;
+ vlong o;
+ Dir d;
+
+ if(!sb->rollback || !sb->r0){
+ print("cannot rollback sha\n");
+ return;
+ }
+ if(offset >= sb->offset)
+ return;
+ o = offset - sb->r0;
+ x = o/(4*M);
+ if(x >= sb->nhist){
+ print("cannot rollback sha\n");
+ return;
+ }
+ sb->state = sb->hist[x];
+ sb->offset = sb->r0 + x*4*M;
+ assert(sb->offset <= offset);
+
+ if(sb->fd > 0){
+ nulldir(&d);
+ d.length = sb->offset - sb->r0;
+ dirfwstat(sb->fd, &d);
+ }
+}
+
+void
+sbscore(Shabuf *sb, uchar *score)
+{
+ if(sb->hist){
+ free(sb->hist);
+ sb->hist = nil;
+ }
+ sha1(nil, 0, score, &sb->state);
+}
+
+/*
+ * If we're fixing arenas, then editing this memory edits the disk!
+ * It will be written back out as new data is paged in.
+ */
+uchar buf[4*M];
+uchar sbuf[4*M];
+vlong bufoffset;
+int buflen;
+
+static void pageout(void);
+static uchar*
+pagein(vlong offset, int len)
+{
+ pageout();
+ if(offset >= partend){
+ memset(buf, 0xFB, sizeof buf);
+ return buf;
+ }
+
+ if(offset+len > partend){
+ memset(buf, 0xFB, sizeof buf);
+ len = partend - offset;
+ }
+ bufoffset = offset;
+ buflen = len;
+ readdisk(buf, offset, len);
+ memmove(sbuf, buf, len);
+ return buf;
+}
+
+static void
+pageout(void)
+{
+ if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){
+ buflen = 0;
+ return;
+ }
+ if(writepart(part, bufoffset, buf, buflen) < 0)
+ print("disk write failed at %#llux+%#ux (%,lld+%,d)\n",
+ bufoffset, buflen, bufoffset, buflen);
+ buflen = 0;
+}
+
+static void
+zerorange(vlong offset, int len)
+{
+ int i;
+ vlong ooff;
+ int olen;
+ enum { MinBlock = 4*K, MaxBlock = 8*K };
+
+ if(0)
+ if(bufoffset <= offset && offset+len <= bufoffset+buflen){
+ memset(buf+(offset-bufoffset), 0, len);
+ return;
+ }
+
+ ooff = bufoffset;
+ olen = buflen;
+
+ i = offset%MinBlock;
+ if(i+len < MaxBlock){
+ pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1));
+ memset(buf+i, 0, len);
+ }else{
+ pagein(offset-i, MaxBlock);
+ memset(buf+i, 0, MaxBlock-i);
+ offset += MaxBlock-i;
+ len -= MaxBlock-i;
+ while(len >= MaxBlock){
+ pagein(offset, MaxBlock);
+ memset(buf, 0, MaxBlock);
+ offset += MaxBlock;
+ len -= MaxBlock;
+ }
+ pagein(offset, (len+MinBlock-1)&~(MinBlock-1));
+ memset(buf, 0, len);
+ }
+ pagein(ooff, olen);
+}
+
+/*
+ * read/write integers
+ *
+static void
+p16(uchar *p, u16int u)
+{
+ p[0] = (u>>8) & 0xFF;
+ p[1] = u & 0xFF;
+}
+*/
+
+static u16int
+u16(uchar *p)
+{
+ return (p[0]<<8)|p[1];
+}
+
+static void
+p32(uchar *p, u32int u)
+{
+ p[0] = (u>>24) & 0xFF;
+ p[1] = (u>>16) & 0xFF;
+ p[2] = (u>>8) & 0xFF;
+ p[3] = u & 0xFF;
+}
+
+static u32int
+u32(uchar *p)
+{
+ return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
+}
+
+/*
+static void
+p64(uchar *p, u64int u)
+{
+ p32(p, u>>32);
+ p32(p, u);
+}
+*/
+
+static u64int
+u64(uchar *p)
+{
+ return ((u64int)u32(p)<<32) | u32(p+4);
+}
+
+static int
+vlongcmp(const void *va, const void *vb)
+{
+ vlong a, b;
+
+ a = *(vlong*)va;
+ b = *(vlong*)vb;
+ if(a < b)
+ return -1;
+ if(b > a)
+ return 1;
+ return 0;
+}
+
+/* D and S are in draw.h */
+#define D VD
+#define S VS
+
+enum
+{
+ D = 0x10000,
+ Z = 0x20000,
+ S = 0x30000,
+ T = 0x40000,
+ N = 0xFFFF
+};
+typedef struct Info Info;
+struct Info
+{
+ int len;
+ char *name;
+};
+
+Info partinfo[] = {
+ 4, "magic",
+ D|4, "version",
+ Z|4, "blocksize",
+ 4, "arenabase",
+ 0
+};
+
+Info headinfo4[] = {
+ 4, "magic",
+ D|4, "version",
+ S|ANameSize, "name",
+ Z|4, "blocksize",
+ Z|8, "size",
+ 0
+};
+
+Info headinfo5[] = {
+ 4, "magic",
+ D|4, "version",
+ S|ANameSize, "name",
+ Z|4, "blocksize",
+ Z|8, "size",
+ 4, "clumpmagic",
+ 0
+};
+
+Info tailinfo4[] = {
+ 4, "magic",
+ D|4, "version",
+ S|ANameSize, "name",
+ D|4, "clumps",
+ D|4, "cclumps",
+ T|4, "ctime",
+ T|4, "wtime",
+ D|8, "used",
+ D|8, "uncsize",
+ 1, "sealed",
+ 0
+};
+
+Info tailinfo4a[] = {
+ /* tailinfo 4 */
+ 4, "magic",
+ D|4, "version",
+ S|ANameSize, "name",
+ D|4, "clumps",
+ D|4, "cclumps",
+ T|4, "ctime",
+ T|4, "wtime",
+ D|8, "used",
+ D|8, "uncsize",
+ 1, "sealed",
+
+ /* mem stats */
+ 1, "extension",
+ D|4, "mem.clumps",
+ D|4, "mem.cclumps",
+ D|8, "mem.used",
+ D|8, "mem.uncsize",
+ 1, "mem.sealed",
+ 0
+};
+
+Info tailinfo5[] = {
+ 4, "magic",
+ D|4, "version",
+ S|ANameSize, "name",
+ D|4, "clumps",
+ D|4, "cclumps",
+ T|4, "ctime",
+ T|4, "wtime",
+ 4, "clumpmagic",
+ D|8, "used",
+ D|8, "uncsize",
+ 1, "sealed",
+ 0
+};
+
+Info tailinfo5a[] = {
+ /* tailinfo 5 */
+ 4, "magic",
+ D|4, "version",
+ S|ANameSize, "name",
+ D|4, "clumps",
+ D|4, "cclumps",
+ T|4, "ctime",
+ T|4, "wtime",
+ 4, "clumpmagic",
+ D|8, "used",
+ D|8, "uncsize",
+ 1, "sealed",
+
+ /* mem stats */
+ 1, "extension",
+ D|4, "mem.clumps",
+ D|4, "mem.cclumps",
+ D|8, "mem.used",
+ D|8, "mem.uncsize",
+ 1, "mem.sealed",
+ 0
+};
+
+void
+showdiffs(uchar *want, uchar *have, int len, Info *info)
+{
+ int n;
+
+ while(len > 0 && (n=info->len&N) > 0){
+ if(memcmp(have, want, n) != 0){
+ switch(info->len){
+ case 1:
+ print("\t%s: correct=%d disk=%d\n",
+ info->name, *want, *have);
+ break;
+ case 4:
+ print("\t%s: correct=%#ux disk=%#ux\n",
+ info->name, u32(want), u32(have));
+ break;
+ case D|4:
+ print("\t%s: correct=%,ud disk=%,ud\n",
+ info->name, u32(want), u32(have));
+ break;
+ case T|4:
+ print("\t%s: correct=%t\n\t\tdisk=%t\n",
+ info->name, u32(want), u32(have));
+ break;
+ case Z|4:
+ print("\t%s: correct=%z disk=%z\n",
+ info->name, (uvlong)u32(want), (uvlong…
+ break;
+ case D|8:
+ print("\t%s: correct=%,lld disk=%,lld\n",
+ info->name, u64(want), u64(have));
+ break;
+ case Z|8:
+ print("\t%s: correct=%z disk=%z\n",
+ info->name, u64(want), u64(have));
+ break;
+ case S|ANameSize:
+ print("\t%s: correct=%s disk=%.*s\n",
+ info->name, (char*)want,
+ utfnlen((char*)have, ANameSize-1),
+ (char*)have);
+ break;
+ default:
+ print("\t%s: correct=%.*H disk=%.*H\n",
+ info->name, n, want, n, have);
+ break;
+ }
+ }
+ have += n;
+ want += n;
+ len -= n;
+ info++;
+ }
+ if(len > 0 && memcmp(have, want, len) != 0){
+ if(memcmp(want, zero, len) != 0)
+ print("!!\textra want data in showdiffs (bug in fixare…
+ else
+ print("\tnon-zero data on disk after structure\n");
+ if(verbose > 1){
+ print("want: %.*H\n", len, want);
+ print("have: %.*H\n", len, have);
+ }
+ }
+}
+
+static int tabsizes[] = { 64*1024, 512*1024, };
+/*
+ * Poke around on the disk to guess what the ArenaPart numbers are.
+ */
+void
+guessgeometry(void)
+{
+ int i, j, n, bestn, ndiff, nhead, ntail;
+ uchar *p, *ep, *sp;
+ u64int diff[100], head[20], tail[20];
+ u64int offset, bestdiff;
+
+ ap.version = ArenaPartVersion;
+
+ if(arenasize == 0 || ap.blocksize == 0){
+ /*
+ * The ArenaPart block at offset PartBlank may be corrupt or j…
+ * Instead, look for the individual arena headers and tails, w…
+ * are many of, and once we've seen enough, infer the spacing.
+ *
+ * Of course, nothing in the file format requires that arenas …
+ * spaced, but fmtarenas always does that for us.
+ */
+ nhead = 0;
+ ntail = 0;
+ for(offset=PartBlank; offset<partend; offset+=4*M){
+ p = pagein(offset, 4*M);
+ for(sp=p, ep=p+4*M; p<ep; p+=K){
+ if(u32(p) == ArenaHeadMagic && nhead < nelem(h…
+ if(verbose)
+ print("arena head at %#llx\n",…
+ head[nhead++] = offset+(p-sp);
+ }
+ if(u32(p) == ArenaMagic && ntail < nelem(tail)…
+ tail[ntail++] = offset+(p-sp);
+ if(verbose)
+ print("arena tail at %#llx\n",…
+ }
+ }
+ if(nhead == nelem(head) && ntail == nelem(tail))
+ break;
+ }
+ if(nhead < 3 && ntail < 3)
+ sysfatal("too few intact arenas: %d heads, %d tails", …
+
+ /*
+ * Arena size is likely the most common
+ * inter-head or inter-tail spacing.
+ */
+ ndiff = 0;
+ for(i=1; i<nhead; i++)
+ diff[ndiff++] = head[i] - head[i-1];
+ for(i=1; i<ntail; i++)
+ diff[ndiff++] = tail[i] - tail[i-1];
+ qsort(diff, ndiff, sizeof diff[0], vlongcmp);
+ bestn = 0;
+ bestdiff = 0;
+ for(i=1, n=1; i<=ndiff; i++, n++){
+ if(i==ndiff || diff[i] != diff[i-1]){
+ if(n > bestn){
+ bestn = n;
+ bestdiff = diff[i-1];
+ }
+ n = 0;
+ }
+ }
+ print("arena size likely %z (%d of %d)\n", bestdiff, bestn, nd…
+ if(arenasize != 0 && arenasize != bestdiff)
+ print("using user-specified size %z instead\n", arenas…
+ else
+ arenasize = bestdiff;
+
+ /*
+ * The arena tail for an arena is arenasize-blocksize from the…
+ */
+ ndiff = 0;
+ for(i=j=0; i<nhead && j<ntail; ){
+ if(tail[j] < head[i]){
+ j++;
+ continue;
+ }
+ if(tail[j] < head[i]+arenasize){
+ diff[ndiff++] = head[i]+arenasize - tail[j];
+ j++;
+ continue;
+ }
+ i++;
+ }
+ if(ndiff < 3)
+ sysfatal("too few intact arenas: %d head, tail pairs",…
+ qsort(diff, ndiff, sizeof diff[0], vlongcmp);
+ bestn = 0;
+ bestdiff = 0;
+ for(i=1, n=1; i<=ndiff; i++, n++){
+ if(i==ndiff || diff[i] != diff[i-1]){
+ if(n > bestn){
+ bestn = n;
+ bestdiff = diff[i-1];
+ }
+ n = 0;
+ }
+ }
+ print("block size likely %z (%d of %d)\n", bestdiff, bestn, nd…
+ if(ap.blocksize != 0 && ap.blocksize != bestdiff)
+ print("using user-specified size %z instead\n", (vlong…
+ else
+ ap.blocksize = bestdiff;
+ if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1))
+ sysfatal("block size not a power of two");
+ if(ap.blocksize > MaxDiskBlock)
+ sysfatal("block size too big (max=%d)", MaxDiskBlock);
+
+ /*
+ * Use head/tail information to deduce arena base.
+ */
+ ndiff = 0;
+ for(i=0; i<nhead; i++)
+ diff[ndiff++] = head[i]%arenasize;
+ for(i=0; i<ntail; i++)
+ diff[ndiff++] = (tail[i]+ap.blocksize)%arenasize;
+ qsort(diff, ndiff, sizeof diff[0], vlongcmp);
+ bestn = 0;
+ bestdiff = 0;
+ for(i=1, n=1; i<=ndiff; i++, n++){
+ if(i==ndiff || diff[i] != diff[i-1]){
+ if(n > bestn){
+ bestn = n;
+ bestdiff = diff[i-1];
+ }
+ n = 0;
+ }
+ }
+ ap.arenabase = bestdiff;
+ }
+
+ ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1);
+ /*
+ * XXX pick up table, check arenabase.
+ * XXX pick up table, record base name.
+ */
+
+ /*
+ * Somewhat standard computation.
+ * Fmtarenas used to use 64k tab, now uses 512k tab.
+ */
+ if(ap.arenabase == 0){
+ for(i=0; i<nelem(tabsizes); i++){
+ ap.arenabase = (PartBlank+HeadSize+tabsizes[i]+ap.bloc…
+ p = pagein(ap.arenabase, Block);
+ if(u32(p) == ArenaHeadMagic)
+ break;
+ }
+ }
+ p = pagein(ap.arenabase, Block);
+ print("arena base likely %z%s\n", (vlong)ap.arenabase,
+ u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : "");
+
+ ap.tabsize = ap.arenabase - ap.tabbase;
+
+}
+
+/*
+ * Check the arena partition blocks and then the arenas listed in range.
+ */
+void
+checkarenas(char *range)
+{
+ char *s, *t;
+ int i, lo, hi, narena;
+ uchar dbuf[HeadSize];
+ uchar *p;
+
+ guessgeometry();
+
+ partend -= partend%ap.blocksize;
+
+ memset(dbuf, 0, sizeof dbuf);
+ packarenapart(&ap, dbuf);
+ p = pagein(PartBlank, Block);
+ if(memcmp(p, dbuf, HeadSize) != 0){
+ print("on-disk arena part superblock incorrect\n");
+ showdiffs(dbuf, p, HeadSize, partinfo);
+ }
+ memmove(p, dbuf, HeadSize);
+
+ narena = (partend-ap.arenabase + arenasize-1)/arenasize;
+ if(range == nil){
+ for(i=0; i<narena; i++)
+ checkarena(ap.arenabase+(vlong)i*arenasize, i);
+ }else if(strcmp(range, "none") == 0){
+ /* nothing */
+ }else{
+ /* parse, e.g., -4,8-9,10- */
+ for(s=range; *s; s=t){
+ t = strchr(s, ',');
+ if(t)
+ *t++ = 0;
+ else
+ t = s+strlen(s);
+ if(*s == '-')
+ lo = 0;
+ else
+ lo = strtol(s, &s, 0);
+ hi = lo;
+ if(*s == '-'){
+ s++;
+ if(*s == 0)
+ hi = narena-1;
+ else
+ hi = strtol(s, &s, 0);
+ }
+ if(*s != 0){
+ print("bad arena range: %s\n", s);
+ continue;
+ }
+ for(i=lo; i<=hi; i++)
+ checkarena(ap.arenabase+(vlong)i*arenasize, i);
+ }
+ }
+}
+
+/*
+ * Is there a clump here at p?
+ */
+static int
+isclump(uchar *p, Clump *cl, u32int *pmagic)
+{
+ int n;
+ u32int magic;
+ uchar score[VtScoreSize], *bp;
+ Unwhack uw;
+ uchar ubuf[70*1024];
+
+ bp = p;
+ magic = u32(p);
+ if(magic == 0)
+ return 0;
+ p += U32Size;
+
+ cl->info.type = vtfromdisktype(*p);
+ if(cl->info.type == 0xFF)
+ return 0;
+ p++;
+ cl->info.size = u16(p);
+ p += U16Size;
+ cl->info.uncsize = u16(p);
+ if(cl->info.size > cl->info.uncsize)
+ return 0;
+ p += U16Size;
+ scorecp(cl->info.score, p);
+ p += VtScoreSize;
+ cl->encoding = *p;
+ p++;
+ cl->creator = u32(p);
+ p += U32Size;
+ cl->time = u32(p);
+ p += U32Size;
+
+ switch(cl->encoding){
+ case ClumpENone:
+ if(cl->info.size != cl->info.uncsize)
+ return 0;
+ scoremem(score, p, cl->info.size);
+ if(scorecmp(score, cl->info.score) != 0)
+ return 0;
+ break;
+ case ClumpECompress:
+ if(cl->info.size >= cl->info.uncsize)
+ return 0;
+ unwhackinit(&uw);
+ n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size);
+ if(n != cl->info.uncsize)
+ return 0;
+ scoremem(score, ubuf, cl->info.uncsize);
+ if(scorecmp(score, cl->info.score) != 0)
+ return 0;
+ break;
+ default:
+ return 0;
+ }
+ p += cl->info.size;
+
+ /* it all worked out in the end */
+ *pmagic = magic;
+ return p - bp;
+}
+
+/*
+ * All ClumpInfos seen in this arena.
+ * Kept in binary tree so we can look up by score.
+ */
+typedef struct Cit Cit;
+struct Cit
+{
+ int left;
+ int right;
+ vlong corrupt;
+ ClumpInfo ci;
+};
+Cit *cibuf;
+int ciroot;
+int ncibuf, mcibuf;
+
+void
+resetcibuf(void)
+{
+ ncibuf = 0;
+ ciroot = -1;
+}
+
+int*
+ltreewalk(int *p, uchar *score)
+{
+ int i;
+
+ for(;;){
+ if(*p == -1)
+ return p;
+ i = scorecmp(cibuf[*p].ci.score, score);
+ if(i == 0)
+ return p;
+ if(i < 0)
+ p = &cibuf[*p].right;
+ else
+ p = &cibuf[*p].left;
+ }
+ return nil; /* stupid 8c */
+}
+
+void
+addcibuf(ClumpInfo *ci, vlong corrupt)
+{
+ Cit *cit;
+
+ if(ncibuf == mcibuf){
+ mcibuf += 131072;
+ cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]);
+ }
+ cit = &cibuf[ncibuf];
+ cit->ci = *ci;
+ cit->left = -1;
+ cit->right = -1;
+ cit->corrupt = corrupt;
+ if(!corrupt)
+ *ltreewalk(&ciroot, ci->score) = ncibuf;
+ ncibuf++;
+}
+
+void
+addcicorrupt(vlong len)
+{
+ static ClumpInfo zci;
+
+ addcibuf(&zci, len);
+}
+
+int
+haveclump(uchar *score)
+{
+ int i;
+ int p;
+
+ p = ciroot;
+ for(;;){
+ if(p == -1)
+ return 0;
+ i = scorecmp(cibuf[p].ci.score, score);
+ if(i == 0)
+ return 1;
+ if(i < 0)
+ p = cibuf[p].right;
+ else
+ p = cibuf[p].left;
+ }
+ return 0; /* stupid 8c */
+}
+
+int
+matchci(ClumpInfo *ci, uchar *p)
+{
+ if(ci->type != vtfromdisktype(p[0]))
+ return 0;
+ if(ci->size != u16(p+1))
+ return 0;
+ if(ci->uncsize != u16(p+3))
+ return 0;
+ if(scorecmp(ci->score, p+5) != 0)
+ return 0;
+ return 1;
+}
+
+int
+sealedarena(uchar *p, int blocksize)
+{
+ int v, n;
+
+ v = u32(p+4);
+ switch(v){
+ default:
+ return 0;
+ case ArenaVersion4:
+ n = ArenaSize4;
+ break;
+ case ArenaVersion5:
+ n = ArenaSize5;
+ break;
+ }
+ if(p[n-1] != 1){
+ print("arena tail says not sealed\n");
+ return 0;
+ }
+ if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){
+ print("arena tail followed by non-zero data\n");
+ return 0;
+ }
+ if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){
+ print("arena score zero\n");
+ return 0;
+ }
+ return 1;
+}
+
+int
+okayname(char *name, int n)
+{
+ char buf[20];
+
+ if(nameok(name) < 0)
+ return 0;
+ sprint(buf, "%d", n);
+ if(strlen(name) < strlen(buf)
+ || strcmp(name+strlen(name)-strlen(buf), buf) != 0)
+ return 0;
+ return 1;
+}
+
+int
+clumpinfocmp(ClumpInfo *a, ClumpInfo *b)
+{
+ if(a->type != b->type)
+ return a->type - b->type;
+ if(a->size != b->size)
+ return a->size - b->size;
+ if(a->uncsize != b->uncsize)
+ return a->uncsize - b->uncsize;
+ return scorecmp(a->score, b->score);
+}
+
+ClumpInfo*
+loadci(vlong offset, Arena *arena, int nci)
+{
+ int i, j, per;
+ uchar *p, *sp;
+ ClumpInfo *bci, *ci;
+
+ per = arena->blocksize/ClumpInfoSize;
+ bci = vtmalloc(nci*sizeof bci[0]);
+ ci = bci;
+ offset += arena->size - arena->blocksize;
+ p = sp = nil;
+ for(i=0; i<nci; i+=per){
+ if(p == sp){
+ sp = pagein(offset-4*M, 4*M);
+ p = sp+4*M;
+ }
+ p -= arena->blocksize;
+ offset -= arena->blocksize;
+ for(j=0; j<per && i+j<nci; j++)
+ unpackclumpinfo(ci++, p+j*ClumpInfoSize);
+ }
+ return bci;
+}
+
+vlong
+writeci(vlong offset, Arena *arena, ClumpInfo *ci, int nci)
+{
+ int i, j, per;
+ uchar *p, *sp;
+
+ per = arena->blocksize/ClumpInfoSize;
+ offset += arena->size - arena->blocksize;
+ p = sp = nil;
+ for(i=0; i<nci; i+=per){
+ if(p == sp){
+ sp = pagein(offset-4*M, 4*M);
+ p = sp+4*M;
+ }
+ p -= arena->blocksize;
+ offset -= arena->blocksize;
+ memset(p, 0, arena->blocksize);
+ for(j=0; j<per && i+j<nci; j++)
+ packclumpinfo(ci++, p+j*ClumpInfoSize);
+ }
+ pageout();
+ return offset;
+}
+
+void
+loadarenabasics(vlong offset0, int anum, ArenaHead *head, Arena *arena)
+{
+ char dname[ANameSize];
+ static char lastbase[ANameSize];
+ uchar *p;
+ Arena oarena;
+ ArenaHead ohead;
+
+ /*
+ * Fmtarenas makes all arenas the same size
+ * except the last, which may be smaller.
+ * It uses the same block size for arenas as for
+ * the arena partition blocks.
+ */
+ arena->size = arenasize;
+ if(offset0+arena->size > partend)
+ arena->size = partend - offset0;
+ head->size = arena->size;
+
+ arena->blocksize = ap.blocksize;
+ head->blocksize = arena->blocksize;
+
+ /*
+ * Look for clump magic and name in head/tail blocks.
+ * All the other info we will reconstruct just in case.
+ */
+ p = pagein(offset0, arena->blocksize);
+ memset(&ohead, 0, sizeof ohead);
+ if(unpackarenahead(&ohead, p) >= 0){
+ head->version = ohead.version;
+ head->clumpmagic = ohead.clumpmagic;
+ if(okayname(ohead.name, anum))
+ strcpy(head->name, ohead.name);
+ }
+
+ p = pagein(offset0+arena->size-arena->blocksize,
+ arena->blocksize);
+ memset(&oarena, 0, sizeof oarena);
+ if(unpackarena(&oarena, p) >= 0){
+ arena->version = oarena.version;
+ arena->clumpmagic = oarena.clumpmagic;
+ if(okayname(oarena.name, anum))
+ strcpy(arena->name, oarena.name);
+ arena->diskstats.clumps = oarena.diskstats.clumps;
+print("old arena: sealed=%d\n", oarena.diskstats.sealed);
+ arena->diskstats.sealed = oarena.diskstats.sealed;
+ }
+
+ /* Head trumps arena. */
+ if(head->version){
+ arena->version = head->version;
+ arena->clumpmagic = head->clumpmagic;
+ }
+ if(arena->version == 0)
+ arena->version = ArenaVersion5;
+ if(basename)
+ snprint(arena->name, ANameSize, "%s%d", basename, anum);
+ else if(lastbase[0])
+ snprint(arena->name, ANameSize, "%s%d", lastbase, anum);
+ else if(head->name[0])
+ strcpy(arena->name, head->name);
+ else if(arena->name[0] == 0)
+ sysfatal("cannot determine base name for arena; use -n");
+ strcpy(lastbase, arena->name);
+ sprint(dname, "%d", anum);
+ lastbase[strlen(lastbase)-strlen(dname)] = 0;
+
+ /* Was working in arena, now copy to head. */
+ head->version = arena->version;
+ memmove(head->name, arena->name, sizeof head->name);
+ head->blocksize = arena->blocksize;
+ head->size = arena->size;
+}
+
+void
+shahead(Shabuf *sb, vlong offset0, ArenaHead *head)
+{
+ uchar headbuf[MaxDiskBlock];
+
+ sb->offset = offset0;
+ memset(headbuf, 0, sizeof headbuf);
+ packarenahead(head, headbuf);
+ sbupdate(sb, headbuf, offset0, head->blocksize);
+}
+
+u32int
+newclumpmagic(int version)
+{
+ u32int m;
+
+ if(version == ArenaVersion4)
+ return _ClumpMagic;
+ do{
+ m = fastrand();
+ }while(m==0 || m == _ClumpMagic);
+ return m;
+}
+
+/*
+ * Poke around in the arena to find the clump data
+ * and compute the relevant statistics.
+ */
+void
+guessarena(vlong offset0, int anum, ArenaHead *head, Arena *arena,
+ uchar *oldscore, uchar *score)
+{
+ uchar dbuf[MaxDiskBlock];
+ int needtozero, clumps, nb1, nb2, minclumps;
+ int inbad, n, ncib, printed, sealing, smart;
+ u32int magic;
+ uchar *sp, *ep, *p;
+ vlong boffset, eoffset, lastclumpend, leaked;
+ vlong offset, toffset, totalcorrupt, v;
+ Clump cl;
+ ClumpInfo *bci, *ci, *eci, *xci;
+ Cit *bcit, *cit, *ecit;
+ Shabuf oldsha, newsha;
+
+ /*
+ * We expect to find an arena, with data, between offset
+ * and offset+arenasize. With any luck, the data starts at
+ * offset+ap.blocksize. The blocks have variable size and
+ * aren't padded at all, which doesn't give us any alignment
+ * constraints. The blocks are compressed or high entropy,
+ * but the headers are pretty low entropy (except the score):
+ *
+ * type[1] (range 0 thru 9, 13)
+ * size[2]
+ * uncsize[2] (<= size)
+ *
+ * so we can look for these. We check the scores as we go,
+ * so we can't make any wrong turns. If we find ourselves
+ * in a dead end, scan forward looking for a new start.
+ */
+
+ resetcibuf();
+ memset(head, 0, sizeof *head);
+ memset(arena, 0, sizeof *arena);
+ memset(oldscore, 0, VtScoreSize);
+ memset(score, 0, VtScoreSize);
+ memset(&oldsha, 0, sizeof oldsha);
+ memset(&newsha, 0, sizeof newsha);
+ newsha.rollback = 1;
+
+ if(0){
+ sbdebug(&oldsha, "old.sha");
+ sbdebug(&newsha, "new.sha");
+ }
+
+ loadarenabasics(offset0, anum, head, arena);
+
+ /* start the clump hunt */
+
+ clumps = 0;
+ totalcorrupt = 0;
+ sealing = 1;
+ boffset = offset0 + arena->blocksize;
+ offset = boffset;
+ eoffset = offset0+arena->size - arena->blocksize;
+ toffset = eoffset;
+ sp = pagein(offset0, 4*M);
+
+ if(arena->diskstats.sealed){
+ oldsha.offset = offset0;
+ sbupdate(&oldsha, sp, offset0, 4*M);
+ }
+ ep = sp+4*M;
+ p = sp + (boffset - offset0);
+ ncib = arena->blocksize / ClumpInfoSize; /* ci per block in ind…
+ lastclumpend = offset;
+ nbad = 0;
+ inbad = 0;
+ needtozero = 0;
+ minclumps = 0;
+ while(offset < eoffset){
+ /*
+ * Shift buffer if we're running out of room.
+ */
+ if(p+70*K >= ep){
+ /*
+ * Start the post SHA1 buffer. By now we should know…
+ * clumpmagic and arena version, so we can create a
+ * correct head block to get things going.
+ */
+ if(sealing && fix && newsha.offset == 0){
+ newsha.offset = offset0;
+ if(arena->clumpmagic == 0){
+ if(arena->version == 0)
+ arena->version = ArenaVersion5;
+ arena->clumpmagic = newclumpmagic(aren…
+ }
+ head->clumpmagic = arena->clumpmagic;
+ shahead(&newsha, offset0, head);
+ }
+ n = 4*M-256*K;
+ if(sealing && fix){
+ sbdiskhash(&newsha, bufoffset);
+ sbupdate(&newsha, buf, bufoffset, 4*M-256*K);
+ }
+ pagein(bufoffset+n, 4*M);
+ p -= n;
+ if(arena->diskstats.sealed)
+ sbupdate(&oldsha, buf, bufoffset, 4*M);
+ }
+
+ /*
+ * Check for a clump at p, which is at offset in the disk.
+ * Duplicate clumps happen in corrupted disks
+ * (the same pattern gets written many times in a row)
+ * and should never happen during regular use.
+ */
+ if((n = isclump(p, &cl, &magic)) > 0){
+ /*
+ * If we were in the middle of some corrupted data,
+ * flush a warning about it and then add any clump
+ * info blocks as necessary.
+ */
+ if(inbad){
+ inbad = 0;
+ v = offset-lastclumpend;
+ if(needtozero){
+ zerorange(lastclumpend, v);
+ sbrollback(&newsha, lastclumpend);
+ print("corrupt clump data - %#llux+%#l…
+ lastclumpend, v, v);
+ }
+ addcicorrupt(v);
+ totalcorrupt += v;
+ nb1 = (minclumps+ncib-1)/ncib;
+ minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(Cl…
+ nb2 = (minclumps+ncib-1)/ncib;
+ eoffset -= (nb2-nb1)*arena->blocksize;
+ }
+
+ if(haveclump(cl.info.score))
+ print("warning: duplicate clump %d %V\n", cl.i…
+
+ /*
+ * If clumps use different magic numbers, we don't car…
+ * We'll just use the first one we find and make the o…
+ * follow suit.
+ */
+ if(arena->clumpmagic == 0){
+ print("clump type %d size %d score %V magic %x…
+ cl.info.type, cl.info.size, cl.info.sc…
+ arena->clumpmagic = magic;
+ if(magic == _ClumpMagic)
+ arena->version = ArenaVersion4;
+ else
+ arena->version = ArenaVersion5;
+ }
+ if(magic != arena->clumpmagic)
+ p32(p, arena->clumpmagic);
+ if(clumps == 0)
+ arena->ctime = cl.time;
+
+ /*
+ * Record the clump, update arena stats,
+ * grow clump info blocks if needed.
+ */
+ if(verbose > 1)
+ print("\tclump %d: %d %V at %#llux+%#ux (%d)\n…
+ clumps, cl.info.type, cl.info.score, o…
+ addcibuf(&cl.info, 0);
+ if(minclumps%ncib == 0)
+ eoffset -= arena->blocksize;
+ minclumps++;
+ clumps++;
+ if(cl.encoding != ClumpENone)
+ arena->diskstats.cclumps++;
+ arena->diskstats.uncsize += cl.info.uncsize;
+ arena->wtime = cl.time;
+
+ /*
+ * Move to next clump.
+ */
+ offset += n;
+ p += n;
+ lastclumpend = offset;
+ }else{
+ /*
+ * Overwrite malformed clump data with zeros later.
+ * For now, just record whether it needs to be overwri…
+ * Bad regions must be of size at least ClumpSize.
+ * Postponing the overwriting keeps us from writing pa…
+ * the end of the arena data (which might be directory…
+ * with zeros.
+ */
+ if(!inbad){
+ inbad = 1;
+ needtozero = 0;
+ if(memcmp(p, zero, ClumpSize) != 0)
+ needtozero = 1;
+ p += ClumpSize;
+ offset += ClumpSize;
+ nbad++;
+ }else{
+ if(*p != 0)
+ needtozero = 1;
+ p++;
+ offset++;
+ }
+ }
+ }
+ pageout();
+
+ if(verbose)
+ print("readable clumps: %d; min. directory entries: %d\n",
+ clumps, minclumps);
+ arena->diskstats.used = lastclumpend - boffset;
+ leaked = eoffset - lastclumpend;
+ if(verbose)
+ print("used from %#llux to %#llux = %,lld (%,lld unused)\n",
+ boffset, lastclumpend, arena->diskstats.used, leaked);
+
+ /*
+ * Finish the SHA1 of the old data.
+ */
+ if(arena->diskstats.sealed){
+ sbdiskhash(&oldsha, toffset);
+ readdisk(dbuf, toffset, arena->blocksize);
+ scorecp(dbuf+arena->blocksize-VtScoreSize, zero);
+ sbupdate(&oldsha, dbuf, toffset, arena->blocksize);
+ sbscore(&oldsha, oldscore);
+ }
+
+ /*
+ * If we still don't know the clump magic, the arena
+ * must be empty. It still needs a value, so make
+ * something up.
+ */
+ if(arena->version == 0)
+ arena->version = ArenaVersion5;
+ if(arena->clumpmagic == 0){
+ if(arena->version == ArenaVersion4)
+ arena->clumpmagic = _ClumpMagic;
+ else{
+ do
+ arena->clumpmagic = fastrand();
+ while(arena->clumpmagic==_ClumpMagic
+ ||arena->clumpmagic==0);
+ }
+ head->clumpmagic = arena->clumpmagic;
+ }
+
+ /*
+ * Guess at number of clumpinfo blocks to load.
+ * If we guess high, it's no big deal. If we guess low,
+ * we'll be forced into rewriting the whole directory.
+ * Still not such a big deal.
+ */
+ if(clumps == 0 || arena->diskstats.used == totalcorrupt)
+ goto Nocib;
+ if(clumps < arena->diskstats.clumps)
+ clumps = arena->diskstats.clumps;
+ if(clumps < ncibuf)
+ clumps = ncibuf;
+ clumps += totalcorrupt/
+ ((arena->diskstats.used - totalcorrupt)/clumps);
+ clumps += totalcorrupt/2000;
+ if(clumps < minclumps)
+ clumps = minclumps;
+ clumps += ncib-1;
+ clumps -= clumps%ncib;
+
+ /*
+ * Can't write into the actual data.
+ */
+ v = offset0 + arena->size - arena->blocksize;
+ v -= (clumps+ncib-1)/ncib * arena->blocksize;
+ if(v < lastclumpend){
+ v = offset0 + arena->size - arena->blocksize;
+ clumps = (v-lastclumpend)/arena->blocksize * ncib;
+ }
+
+ if(clumps < minclumps)
+ print("cannot happen?\n");
+
+ /*
+ * Check clumpinfo blocks against directory we created.
+ * The tricky part is handling the corrupt sections of arena.
+ * If possible, we remark just the affected directory entries
+ * rather than slide everything down.
+ *
+ * Allocate clumps+1 blocks and check that we don't need
+ * the last one at the end.
+ */
+ bci = loadci(offset0, arena, clumps+1);
+ eci = bci+clumps+1;
+ bcit = cibuf;
+ ecit = cibuf+ncibuf;
+ smart = 1;
+Again:
+ nbad = 0;
+ ci = bci;
+ for(cit=bcit; cit<ecit && ci<eci; cit++){
+ if(cit->corrupt){
+ vlong n, m;
+ if(smart){
+ /*
+ * If we can, just mark existing entries as co…
+ */
+ n = cit->corrupt;
+ for(xci=ci; n>0 && xci<eci; xci++)
+ n -= ClumpSize+xci->size;
+ if(n > 0 || xci >= eci)
+ goto Dumb;
+ printed = 0;
+ for(; ci<xci; ci++){
+ if(verbose && ci->type != VtCorruptTyp…
+ if(!printed){
+ print("marking directo…
+ (int)(ci-bci),…
+ printed = 1;
+ }
+ print("\ttype=%d size=%d uncsi…
+ ci->type, ci->size, ci…
+ }
+ ci->type = VtCorruptType;
+ }
+ }else{
+ Dumb:
+ print("\trewriting clump directory\n");
+ /*
+ * Otherwise, blaze a new trail.
+ */
+ n = cit->corrupt;
+ while(n > 0 && ci < eci){
+ if(n < ClumpSize)
+ sysfatal("bad math in clump co…
+ if(n <= VtMaxLumpSize+ClumpSize)
+ m = n;
+ else{
+ m = VtMaxLumpSize+ClumpSize;
+ if(n-m < ClumpSize)
+ m -= ClumpSize;
+ }
+ ci->type = VtCorruptType;
+ ci->size = m-ClumpSize;
+ ci->uncsize = m-ClumpSize;
+ memset(ci->score, 0, VtScoreSize);
+ ci++;
+ n -= m;
+ }
+ }
+ continue;
+ }
+ if(clumpinfocmp(&cit->ci, ci) != 0){
+ if(verbose && (smart || verbose>1)){
+ print("clumpinfo %d\n", (int)(ci-bci));
+ print("\twant: %d %d %d %V\n",
+ cit->ci.type, cit->ci.size,
+ cit->ci.uncsize, cit->ci.score);
+ print("\thave: %d %d %d %V\n",
+ ci->type, ci->size,
+ ci->uncsize, ci->score);
+ }
+ *ci = cit->ci;
+ nbad++;
+ }
+ ci++;
+ }
+ if(ci >= eci || cit < ecit){
+ print("ran out of space editing existing directory; rewriting\…
+ print("# eci %ld ci %ld ecit %ld cit %ld\n", eci-bci, ci-bci, …
+ assert(smart); /* can't happen second time thru */
+ smart = 0;
+ goto Again;
+ }
+
+ assert(ci <= eci);
+ arena->diskstats.clumps = ci-bci;
+ eoffset = writeci(offset0, arena, bci, ci-bci);
+ if(sealing && fix)
+ sbrollback(&newsha, v);
+print("eoffset=%lld lastclumpend=%lld diff=%lld unseal=%d\n", eoffset, lastclu…
+ if(lastclumpend > eoffset)
+ print("arena directory overwrote blocks! cannot happen!\n");
+ free(bci);
+ if(smart && nbad)
+ print("arena directory has %d bad or missing entries\n", nbad);
+Nocib:
+ if(eoffset - lastclumpend > 64*1024 && (!arena->diskstats.sealed || un…
+ if(arena->diskstats.sealed)
+ print("unsealing arena\n");
+ sealing = 0;
+ memset(oldscore, 0, VtScoreSize);
+ }
+
+ /*
+ * Finish the SHA1 of the new data - only meaningful
+ * if we've been writing to disk (`fix').
+ */
+ arena->diskstats.sealed = sealing;
+ arena->memstats = arena->diskstats;
+ if(sealing && fix){
+ uchar tbuf[MaxDiskBlock];
+
+ sbdiskhash(&newsha, toffset);
+ memset(tbuf, 0, sizeof tbuf);
+ packarena(arena, tbuf);
+ sbupdate(&newsha, tbuf, toffset, arena->blocksize);
+ sbscore(&newsha, score);
+ }
+}
+
+void
+dumparena(vlong offset, int anum, Arena *arena)
+{
+ char buf[1000];
+ vlong o, e;
+ int fd, n;
+
+ snprint(buf, sizeof buf, "%s.%d", dumpbase, anum);
+ if((fd = create(buf, OWRITE, 0666)) < 0){
+ fprint(2, "create %s: %r\n", buf);
+ return;
+ }
+ e = offset+arena->size;
+ for(o=offset; o<e; o+=n){
+ n = 4*M;
+ if(o+n > e)
+ n = e-o;
+ if(pwrite(fd, pagein(o, n), n, o-offset) != n){
+ fprint(2, "write %s at %#llux: %r\n", buf, o-offset);
+ return;
+ }
+ }
+}
+
+void
+checkarena(vlong offset, int anum)
+{
+ uchar dbuf[MaxDiskBlock];
+ uchar *p, oldscore[VtScoreSize], score[VtScoreSize];
+ Arena arena, oarena;
+ ArenaHead head;
+ Info *fmt, *fmta;
+ int sz;
+
+ print("# arena %d: offset %#llux\n", anum, offset);
+
+ if(offset >= partend){
+ print("arena offset out of bounds\n");
+ return;
+ }
+
+ guessarena(offset, anum, &head, &arena, oldscore, score);
+
+ if(verbose){
+ print("#\tversion=%d name=%s blocksize=%d size=%z",
+ head.version, head.name, head.blocksize, head.size);
+ if(head.clumpmagic)
+ print(" clumpmagic=%#.8ux", head.clumpmagic);
+ print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n",
+ arena.diskstats.clumps, arena.diskstats.cclumps,
+ arena.diskstats.used, arena.diskstats.uncsize);
+ print("#\tctime=%t\n", arena.ctime);
+ print("#\twtime=%t\n", arena.wtime);
+ if(arena.diskstats.sealed)
+ print("#\tsealed score=%V\n", score);
+ }
+
+ if(dumpbase){
+ dumparena(offset, anum, &arena);
+ return;
+ }
+
+ memset(dbuf, 0, sizeof dbuf);
+ packarenahead(&head, dbuf);
+ p = pagein(offset, arena.blocksize);
+ if(memcmp(dbuf, p, arena.blocksize) != 0){
+ print("on-disk arena header incorrect\n");
+ showdiffs(dbuf, p, arena.blocksize,
+ arena.version==ArenaVersion4 ? headinfo4 : headinfo5);
+ }
+ memmove(p, dbuf, arena.blocksize);
+
+ memset(dbuf, 0, sizeof dbuf);
+ packarena(&arena, dbuf);
+ if(arena.diskstats.sealed)
+ scorecp(dbuf+arena.blocksize-VtScoreSize, score);
+ p = pagein(offset+arena.size-arena.blocksize, arena.blocksize);
+ memset(&oarena, 0, sizeof oarena);
+ unpackarena(&oarena, p);
+ if(arena.version == ArenaVersion4){
+ sz = ArenaSize4;
+ fmt = tailinfo4;
+ fmta = tailinfo4a;
+ }else{
+ sz = ArenaSize5;
+ fmt = tailinfo5;
+ fmta = tailinfo5a;
+ }
+ if(p[sz] == 1){
+ fmt = fmta;
+ if(oarena.diskstats.sealed){
+ /*
+ * some arenas were sealed with the extension
+ * before we adopted the convention that if it didn't
+ * add new information it gets dropped.
+ */
+ _packarena(&arena, dbuf, 1);
+ }
+ }
+ if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){
+ print("on-disk arena tail incorrect\n");
+ showdiffs(dbuf, p, arena.blocksize-VtScoreSize, fmt);
+ }
+ if(arena.diskstats.sealed){
+ if(oarena.diskstats.sealed)
+ if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){
+ print("on-disk arena seal score incorrect\n");
+ print("\tcorrect=%V\n", oldscore);
+ print("\t disk=%V\n", p+arena.blocksize-VtScoreSize);
+ }
+ if(fix && scorecmp(p+arena.blocksize-VtScoreSize, score) != 0){
+ print("%ssealing arena%s: %V\n",
+ oarena.diskstats.sealed ? "re" : "",
+ scorecmp(oldscore, score) == 0 ?
+ "" : " after changes", score);
+ }
+ }
+ memmove(p, dbuf, arena.blocksize);
+
+ pageout();
+}
+
+AMapN*
+buildamap(void)
+{
+ uchar *p;
+ vlong o;
+ ArenaHead h;
+ AMapN *an;
+ AMap *m;
+
+ an = vtmallocz(sizeof *an);
+ for(o=ap.arenabase; o<partend; o+=arenasize){
+ p = pagein(o, Block);
+ if(unpackarenahead(&h, p) >= 0){
+ an->map = vtrealloc(an->map, (an->n+1)*sizeof an->map[…
+ m = &an->map[an->n++];
+ m->start = o;
+ m->stop = o+h.size;
+ strcpy(m->name, h.name);
+ }
+ }
+ return an;
+}
+
+void
+checkmap(void)
+{
+ char *s;
+ uchar *p;
+ int i, len;
+ AMapN *an;
+ Fmt fmt;
+
+ an = buildamap();
+ fmtstrinit(&fmt);
+ fmtprint(&fmt, "%ud\n", an->n);
+ for(i=0; i<an->n; i++)
+ fmtprint(&fmt, "%s\t%lld\t%lld\n",
+ an->map[i].name, an->map[i].start, an->map[i].stop);
+ s = fmtstrflush(&fmt);
+ len = strlen(s);
+ if(len > ap.tabsize){
+ print("arena partition map too long: need %z bytes have %z\n",
+ (vlong)len, (vlong)ap.tabsize);
+ len = ap.tabsize;
+ }
+
+ if(ap.tabsize >= 4*M){ /* can't happen - max arenas is 2000 */
+ print("arena partition map *way* too long\n");
+ return;
+ }
+
+ p = pagein(ap.tabbase, ap.tabsize);
+ if(memcmp(p, s, len) != 0){
+ print("arena partition map incorrect; rewriting.\n");
+ memmove(p, s, len);
+ }
+ pageout();
+}
+
+int mainstacksize = 512*1024;
+
+void
+threadmain(int argc, char **argv)
+{
+ int mode;
+
+ mode = OREAD;
+ readonly = 1;
+ ARGBEGIN{
+ case 'U':
+ unseal = 1;
+ break;
+ case 'a':
+ arenasize = unittoull(EARGF(usage()));
+ break;
+ case 'b':
+ ap.blocksize = unittoull(EARGF(usage()));
+ break;
+ case 'f':
+ fix = 1;
+ mode = ORDWR;
+ readonly = 0;
+ break;
+ case 'n':
+ basename = EARGF(usage());
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 'x':
+ dumpbase = EARGF(usage());
+ break;
+ default:
+ usage();
+ }ARGEND
+
+ if(argc != 1 && argc != 2)
+ usage();
+
+ file = argv[0];
+
+ ventifmtinstall();
+ fmtinstall('z', zfmt);
+ fmtinstall('t', tfmt);
+ quotefmtinstall();
+
+ part = initpart(file, mode|ODIRECT);
+ if(part == nil)
+ sysfatal("can't open %s: %r", file);
+ partend = part->size;
+
+ checkarenas(argc > 1 ? argv[1] : nil);
+ checkmap();
+ threadexitsall(nil);
+}
+
diff --git a/src/cmd/venti/srv/fns.h b/src/cmd/venti/srv/fns.h
t@@ -24,8 +24,13 @@ void delaykickicache(void);
void delaykickround(Round*);
void delaykickroundproc(void*);
void dirtydblock(DBlock*, int);
+void diskaccess(int);
+void disksched(void);
AState diskstate(void);
void *emalloc(ulong);
+void emptydcache(void);
+void emptyicache(void);
+void emptylumpcache(void);
void *erealloc(void *, ulong);
char *estrdup(char*);
void *ezmalloc(ulong);
t@@ -49,6 +54,7 @@ u32int hashbits(u8int *score, int nbits);
int httpdinit(char *address, char *webroot);
int iaddrcmp(IAddr *ia1, IAddr *ia2);
IEntry* icachedirty(u32int, u32int, u64int);
+ulong icachedirtyfrac(void);
void icacheclean(IEntry*);
int ientrycmp(const void *vie1, const void *vie2);
char *ifileline(IFile *f);
t@@ -77,6 +83,7 @@ int insertscore(u8int *score, IAddr *ia, int …
void kickdcache(void);
void kickicache(void);
void kickround(Round*, int wait);
+int loadbloom(Bloom*);
ZBlock *loadclump(Arena *arena, u64int aa, int blocks, Clump *c…
DBlock *loadibucket(Index *index, u8int *score, ISect **is, u32int *buc…
int loadientry(Index *index, u8int *score, int type, IEntry *ie…
t@@ -98,6 +105,7 @@ int okamap(AMap *am, int n, u64int start, u6…
int okibucket(IBucket*, ISect*);
int outputamap(Fmt *f, AMap *am, int n);
int outputindex(Fmt *f, Index *ix);
+int _packarena(Arena *arena, u8int *buf, int);
int packarena(Arena *arena, u8int *buf);
int packarenahead(ArenaHead *head, u8int *buf);
int packarenapart(ArenaPart *as, u8int *buf);
t@@ -129,6 +137,7 @@ ZBlock *readfile(char *name);
int readifile(IFile *f, char *name);
Packet *readlump(u8int *score, int type, u32int size, int *cach…
int readpart(Part *part, u64int addr, u8int *buf, u32int n);
+int resetbloom(Bloom*);
int runconfig(char *config, Config*);
int scorecmp(u8int *, u8int *);
void scoremem(u8int *score, u8int *buf, int size);
diff --git a/src/cmd/venti/srv/graph.c b/src/cmd/venti/srv/graph.c
t@@ -55,7 +55,11 @@ ginit(void)
first = 0;
memimageinit();
+#ifdef PLAN9PORT
smallfont = openmemsubfont(unsharp("#9/font/lucsans/lstr.10"));
+#else
+ smallfont = openmemsubfont("/lib/font/bit/lucidasans/lstr.10");
+#endif
black = memblack;
blue = allocrepl(DBlue);
red = allocrepl(DRed);
t@@ -121,7 +125,7 @@ statgraph(Graph *g)
if(g->wid > nelem(bin))
g->wid = nelem(bin);
if(g->fill < 0)
- g->fill = ((uint)(uintptr)g->arg>>8)%nelem(lofill);
+ g->fill = ((uint)g->arg>>8)%nelem(lofill);
if(g->fill > nelem(lofill))
g->fill %= nelem(lofill);
t@@ -151,7 +155,7 @@ statgraph(Graph *g)
qlock(&memdrawlock);
ginit();
if(smallfont==nil || black==nil || blue==nil || red==nil || hifill==ni…
- werrstr("graphics initialization failed");
+ werrstr("graphics initialization failed: %r");
qunlock(&memdrawlock);
return nil;
}
t@@ -186,12 +190,12 @@ statgraph(Graph *g)
if(0)
if(lastlo != -1){
if(lastlo < lo)
- memimagedraw(m, Rect(x-1, lastlo, x, lo), hifi…
+ memimagedraw(m, Rect(x-1, lastlo, x, lo), hifi…
else if(lastlo > lo)
- memimagedraw(m, Rect(x-1, lo, x, lastlo), hifi…
+ memimagedraw(m, Rect(x-1, lo, x, lastlo), hifi…
}
- memimagedraw(m, Rect(x, hi, x+1,lo), hifill[g->fill], ZP, memo…
- memimagedraw(m, Rect(x, lo, x+1, r.max.y), lofill[g->fill], ZP…
+ memimagedraw(m, Rect(x, hi, x+1,lo), hifill[g->fill%nelem(hifi…
+ memimagedraw(m, Rect(x, lo, x+1, r.max.y), lofill[g->fill%nele…
lastlo = lo;
}
diff --git a/src/cmd/venti/srv/httpd.c b/src/cmd/venti/srv/httpd.c
t@@ -9,7 +9,7 @@ extern QLock memdrawlock;
enum
{
ObjNameSize = 64,
- MaxObjs = 16
+ MaxObjs = 64
};
struct HttpObj
t@@ -28,6 +28,12 @@ static int dindex(HConnect *c);
static int xindex(HConnect *c);
static int xlog(HConnect *c);
static int sindex(HConnect *c);
+static int hempty(HConnect *c);
+static int hlcacheempty(HConnect *c);
+static int hdcacheempty(HConnect *c);
+static int hicacheempty(HConnect *c);
+static int hicachekick(HConnect *c);
+static int hdcachekick(HConnect *c);
static int hicacheflush(HConnect *c);
static int hdcacheflush(HConnect *c);
static int notfound(HConnect *c);
t@@ -53,10 +59,17 @@ httpdinit(char *address, char *dir)
httpdobj("/xindex", xindex);
httpdobj("/flushicache", hicacheflush);
httpdobj("/flushdcache", hdcacheflush);
+ httpdobj("/kickicache", hicachekick);
+ httpdobj("/kickdcache", hdcachekick);
httpdobj("/graph/", xgraph);
+ httpdobj("/set", xset);
httpdobj("/set/", xset);
httpdobj("/log", xlog);
httpdobj("/log/", xlog);
+ httpdobj("/empty", hempty);
+ httpdobj("/emptyicache", hicacheempty);
+ httpdobj("/emptylumpcache", hlcacheempty);
+ httpdobj("/emptydcache", hdcacheempty);
if(vtproc(listenproc, address) < 0)
return -1;
t@@ -105,8 +118,6 @@ listenproc(void *vaddress)
char *address, ndir[NETPATHLEN], dir[NETPATHLEN];
int ctl, nctl, data;
-/*sleep(1000); // let strace find us */
-
address = vaddress;
ctl = announce(address, dir);
if(ctl < 0){
t@@ -148,7 +159,6 @@ httpproc(void *v)
HConnect *c;
int ok, i, n;
-/*sleep(1000); // let strace find us */
c = v;
for(;;){
t@@ -182,7 +192,7 @@ httpproc(void *v)
}
static int
-percent(long v, long total)
+percent(ulong v, ulong total)
{
if(total == 0)
total = 1;
t@@ -240,6 +250,31 @@ preqtext(HConnect *c)
}
static int
+herror(HConnect *c)
+{
+ int n;
+ Hio *hout;
+
+ hout = &c->hout;
+ n = snprint(c->xferbuf, HBufSize, "<html><head><title>Error</title></h…
+ hprint(hout, "%s %s\r\n", hversion, "400 Bad Request");
+ hprint(hout, "Date: %D\r\n", time(nil));
+ hprint(hout, "Server: Venti\r\n");
+ hprint(hout, "Content-Type: text/html\r\n");
+ hprint(hout, "Content-Length: %d\r\n", n);
+ if(c->head.closeit)
+ hprint(hout, "Connection: close\r\n");
+ else if(!http11(c))
+ hprint(hout, "Connection: Keep-Alive\r\n");
+ hprint(hout, "\r\n");
+
+ if(c->req.meth == nil || strcmp(c->req.meth, "HEAD") != 0)
+ hwrite(hout, c->xferbuf, n);
+
+ return hflush(hout);
+}
+
+static int
notfound(HConnect *c)
{
int r;
t@@ -325,21 +360,53 @@ static struct
"logging", &ventilogging,
"stats", &collectstats,
"icachesleeptime", &icachesleeptime,
+ "minicachesleeptime", &minicachesleeptime,
"arenasumsleeptime", &arenasumsleeptime,
+ "l0quantum", &l0quantum,
+ "l1quantum", &l1quantum,
+ "manualscheduling", &manualscheduling,
+ "ignorebloom", &ignorebloom,
+ "syncwrites", &syncwrites,
+ "icacheprefetch", &icacheprefetch,
0
};
static int
+xsetlist(HConnect *c)
+{
+ int i;
+
+ if(preqtype(c, "text/plain") < 0)
+ return -1;
+ for(i=0; namedints[i].name; i++)
+ print("%s = %d\n", namedints[i].name, *namedints[i].p);
+ hflush(&c->hout);
+ return 0;
+}
+
+
+
+static int
xset(HConnect *c)
{
int i, nf, r;
char *f[10], *s;
+ if(strcmp(c->req.uri, "/set") == 0 || strcmp(c->req.uri, "/set/") == 0)
+ return xsetlist(c);
+
s = estrdup(c->req.uri);
nf = getfields(s+strlen("/set/"), f, nelem(f), 1, "/");
- if(nf < 1)
- return notfound(c);
+ if(nf < 1){
+ r = preqtext(c);
+ if(r < 0)
+ return r;
+ for(i=0; namedints[i].name; i++)
+ hprint(&c->hout, "%s = %d\n", namedints[i].name, *name…
+ hflush(&c->hout);
+ return 0;
+ }
for(i=0; namedints[i].name; i++){
if(strcmp(f[0], namedints[i].name) == 0){
if(nf >= 2)
t@@ -495,6 +562,108 @@ darena(Hio *hout, Arena *arena)
}
static int
+hempty(HConnect *c)
+{
+ Hio *hout;
+ int r;
+
+ r = preqtext(c);
+ if(r < 0)
+ return r;
+ hout = &c->hout;
+
+ emptylumpcache();
+ emptydcache();
+ emptyicache();
+ hprint(hout, "emptied all caches\n");
+ hflush(hout);
+ return 0;
+}
+
+static int
+hlcacheempty(HConnect *c)
+{
+ Hio *hout;
+ int r;
+
+ r = preqtext(c);
+ if(r < 0)
+ return r;
+ hout = &c->hout;
+
+ emptylumpcache();
+ hprint(hout, "emptied lumpcache\n");
+ hflush(hout);
+ return 0;
+}
+
+static int
+hicacheempty(HConnect *c)
+{
+ Hio *hout;
+ int r;
+
+ r = preqtext(c);
+ if(r < 0)
+ return r;
+ hout = &c->hout;
+
+ emptyicache();
+ hprint(hout, "emptied icache\n");
+ hflush(hout);
+ return 0;
+}
+
+static int
+hdcacheempty(HConnect *c)
+{
+ Hio *hout;
+ int r;
+
+ r = preqtext(c);
+ if(r < 0)
+ return r;
+ hout = &c->hout;
+
+ emptydcache();
+ hprint(hout, "emptied dcache\n");
+ hflush(hout);
+ return 0;
+}
+static int
+hicachekick(HConnect *c)
+{
+ Hio *hout;
+ int r;
+
+ r = preqtext(c);
+ if(r < 0)
+ return r;
+ hout = &c->hout;
+
+ kickicache();
+ hprint(hout, "kicked icache\n");
+ hflush(hout);
+ return 0;
+}
+
+static int
+hdcachekick(HConnect *c)
+{
+ Hio *hout;
+ int r;
+
+ r = preqtext(c);
+ if(r < 0)
+ return r;
+ hout = &c->hout;
+
+ kickdcache();
+ hprint(hout, "kicked dcache\n");
+ hflush(hout);
+ return 0;
+}
+static int
hicacheflush(HConnect *c)
{
Hio *hout;
t@@ -569,6 +738,7 @@ rawgraph(Stats *s, Stats *t, void *va)
{
Arg *a;
+ USED(s);
a = va;
return t->n[a->index];
}
t@@ -587,6 +757,7 @@ pctgraph(Stats *s, Stats *t, void *va)
{
Arg *a;
+ USED(s);
a = va;
return percent(t->n[a->index], t->n[a->index2]);
}
t@@ -722,7 +893,7 @@ static char* graphname[] =
"isectwritebyte",
"sumread",
- "sumreadbyte"
+ "sumreadbyte",
};
static int
t@@ -733,7 +904,6 @@ findname(char *s)
for(i=0; i<nelem(graphname); i++)
if(strcmp(graphname[i], s) == 0)
return i;
-fprint(2, "no name '%s'\n", s);
return -1;
}
t@@ -769,10 +939,14 @@ xgraph(HConnect *c)
if(0) fprint(2, "graph %s\n" ,s);
memset(&g, 0, sizeof g);
nf = getfields(s+strlen("/graph/"), f, nelem(f), 1, "/");
- if(nf < 1)
- goto notfound;
- if((arg.index = findname(f[0])) == -1 && strcmp(f[0], "*") != 0)
- goto notfound;
+ if(nf < 1){
+ werrstr("bad syntax -- not enough fields");
+ goto error;
+ }
+ if((arg.index = findname(f[0])) == -1 && strcmp(f[0], "*") != 0){
+ werrstr("unknown name %s", f[0]);
+ goto error;
+ }
g.arg = &arg;
g.t0 = -120;
g.t1 = 0;
t@@ -793,14 +967,18 @@ if(0) fprint(2, "graph %s\n" ,s);
else if(strncmp(f[i], "max=", 4) == 0)
g.max = atoi(f[i]+4);
else if(strncmp(f[i], "pct=", 4) == 0){
- if((arg.index2 = findname(f[i]+4)) == -1)
- goto notfound;
+ if((arg.index2 = findname(f[i]+4)) == -1){
+ werrstr("unknown name %s", f[i]+4);
+ goto error;
+ }
g.fn = pctgraph;
g.min = 0;
g.max = 100;
}else if(strncmp(f[i], "pctdiff=", 8) == 0){
- if((arg.index2 = findname(f[i]+8)) == -1)
- goto notfound;
+ if((arg.index2 = findname(f[i]+8)) == -1){
+ werrstr("unknown name %s", f[i]+8);
+ goto error;
+ }
g.fn = pctdiffgraph;
g.min = 0;
g.max = 100;
t@@ -830,7 +1008,7 @@ if(0) fprint(2, "graph %s\n" ,s);
m = statgraph(&g);
if(m == nil)
- goto notfound;
+ goto error;
if(preqtype(c, "image/png") < 0)
return -1;
t@@ -843,9 +1021,9 @@ if(0) fprint(2, "graph %s\n" ,s);
free(s);
return 0;
-notfound:
+error:
free(s);
- return notfound(c);
+ return herror(c);
}
static int
t@@ -944,7 +1122,6 @@ vtloghdump(Hio *h, VtLog *l)
name = l ? l->name : "&lt;nil&gt;";
-fprint(2, "hdump xfer %d\n", h->xferenc);
hprint(h, "<html><head>\n");
hprint(h, "<title>Venti Server Log: %s</title>\n", name);
hprint(h, "</head><body>\n");
diff --git a/src/cmd/venti/srv/icache.c b/src/cmd/venti/srv/icache.c
t@@ -11,6 +11,7 @@ struct ICache
int bits; /* bits to use for indexing he…
u32int size; /* number of heads; == 1 <<…
IEntry *base; /* all allocated hash tabl…
+ IEntry *free;
u32int entries; /* elements in base */
IEntry *dirty; /* chain of dirty elements */
u32int ndirty;
t@@ -23,6 +24,8 @@ struct ICache
int nlast;
};
+int icacheprefetch = 1;
+
static ICache icache;
static IEntry *icachealloc(IAddr *ia, u8int *score);
t@@ -45,6 +48,12 @@ initicache(int bits, int depth)
setstat(StatIcacheSize, icache.entries);
}
+ulong
+icachedirtyfrac(void)
+{
+ return (vlong)icache.ndirty*IcacheFrac / icache.entries;
+}
+
u32int
hashbits(u8int *sc, int bits)
{
t@@ -141,14 +150,16 @@ lookupscore(u8int *score, int type, IAddr *ia, int *rac)
* load the table of contents for that arena into the cache.
*/
ie = icachealloc(&d.ia, score);
- icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(mainindex, i…
- aa = ie->ia.addr - aa; /* compute base addr of arena */
- for(i=0; i<nelem(icache.last); i++)
- if(icache.last[i] != icache.last[0])
- break;
- if(i==nelem(icache.last) && icache.lastload != icache.last[0]){
- load = icache.last[0];
- icache.lastload = load;
+ if(icacheprefetch){
+ icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(main…
+ aa = ie->ia.addr - aa; /* compute base addr of arena */
+ for(i=0; i<nelem(icache.last); i++)
+ if(icache.last[i] != icache.last[0])
+ break;
+ if(i==nelem(icache.last) && icache.lastload != icache.last[0]){
+ load = icache.last[0];
+ icache.lastload = load;
+ }
}
found:
t@@ -249,6 +260,11 @@ icachealloc(IAddr *ia, u8int *score)
trace(TraceLump, "icachealloc unused");
goto Found;
}
+
+ if((ie = icache.free) != nil){
+ icache.free = ie->next;
+ goto Found;
+ }
h = icache.stolen;
for(i=0;; i++){
t@@ -346,3 +362,21 @@ icacheclean(IEntry *ie)
trace(TraceProc, "icachedirty exit");
}
+void
+emptyicache(void)
+{
+ int i;
+ IEntry *ie, **lie;
+
+ qlock(&icache.lock);
+ for(i=0; i<icache.size; i++)
+ for(lie=&icache.heads[i]; (ie=*lie); ){
+ if(ie->dirty == 0){
+ *lie = ie->next;
+ ie->next = icache.free;
+ icache.free = ie;
+ }else
+ lie = &ie->next;
+ }
+ qunlock(&icache.lock);
+}
diff --git a/src/cmd/venti/srv/icachewrite.c b/src/cmd/venti/srv/icachewrite.c
t@@ -12,6 +12,7 @@ static void icachewritecoord(void*);
static IEntry *iesort(IEntry*);
int icachesleeptime = 1000; /* milliseconds */
+int minicachesleeptime = 50;
enum
{
t@@ -74,7 +75,7 @@ nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr,…
static int
icachewritesect(Index *ix, ISect *is, u8int *buf)
{
- int err, h, bsize;
+ int err, h, bsize, t;
u32int lo, hi;
u64int addr, naddr;
uint nbuf, off;
t@@ -96,7 +97,14 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
err = 0;
while(iedirty){
- sleep(icachesleeptime);
+ disksched();
+ while((t=icachesleeptime) == SleepForever){
+ sleep(1000);
+ disksched();
+ }
+ if(t < minicachesleeptime)
+ t = minicachesleeptime;
+ sleep(t);
trace(TraceProc, "icachewritesect nextchunk");
chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf);
t@@ -146,12 +154,15 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
break;
}
packibucket(&ib, buf+off, is->bucketmagic);
+ /* XXX not right - must update cache after writepart */
if((b = _getdblock(is->part, naddr, ORDWR, 0)) != nil){
memmove(b->data, buf+off, bsize);
putdblock(b);
}
}
+ diskaccess(1);
+
trace(TraceProc, "icachewritesect writepart", addr, nbuf);
if(writepart(is->part, addr, buf, nbuf) < 0){
/* XXX */
t@@ -171,6 +182,7 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
static void
icachewriteproc(void *v)
{
+ int ret;
uint bsize;
ISect *is;
Index *ix;
t@@ -188,17 +200,17 @@ icachewriteproc(void *v)
trace(TraceProc, "icachewriteproc recv");
recv(is->writechan, 0);
trace(TraceWork, "start");
- icachewritesect(ix, is, buf);
+ ret = icachewritesect(ix, is, buf);
trace(TraceProc, "icachewriteproc send");
trace(TraceWork, "finish");
- send(is->writedonechan, 0);
+ sendul(is->writedonechan, ret);
}
}
static void
icachewritecoord(void *v)
{
- int i;
+ int i, err;
Index *ix;
AState as;
t@@ -216,9 +228,9 @@ icachewritecoord(void *v)
as = diskstate();
if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){
/* will not be able to do anything more than last flus…
- trace(TraceProc, "icachewritecoord flush dcache");
+ trace(TraceProc, "icachewritecoord kick dcache");
kickdcache();
- trace(TraceProc, "icachewritecoord flushed dcache");
+ trace(TraceProc, "icachewritecoord kicked dcache");
}
iwrite.as = as;
t@@ -229,13 +241,15 @@ icachewritecoord(void *v)
if(ix->bloom)
send(ix->bloom->writechan, 0);
+ err = 0;
for(i=0; i<ix->nsects; i++)
- recv(ix->sects[i]->writedonechan, 0);
+ err |= recvul(ix->sects[i]->writedonechan);
if(ix->bloom)
- recv(ix->bloom->writedonechan, 0);
+ err |= recvul(ix->bloom->writedonechan);
- trace(TraceProc, "icachewritecoord donewrite");
- setatailstate(&iwrite.as);
+ trace(TraceProc, "icachewritecoord donewrite err=%d", …
+ if(err == 0)
+ setatailstate(&iwrite.as);
}
icacheclean(nil); /* wake up anyone waiting */
trace(TraceWork, "finish");
diff --git a/src/cmd/venti/srv/index.c b/src/cmd/venti/srv/index.c
t@@ -23,17 +23,11 @@
#include "dat.h"
#include "fns.h"
-/*static int bucklook(u8int *score, int type, u8int *data, int n); */
-/*static int writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b…
-/*static int okibucket(IBucket *ib, ISect *is); */
static int initindex1(Index*);
static ISect *initisect1(ISect *is);
-/*static int splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, …
#define KEY(k,d) ((d) ? (k)>>(32-(d)) : 0)
-/*static QLock indexlock; //ZZZ */
-
static char IndexMagic[] = "venti index configuration";
Index*
t@@ -375,6 +369,8 @@ initisect(Part *part)
seterr(EAdmin, "can't read index section header: %r");
return nil;
}
+print("read %s at %d: %.2ux %.2ux %.2ux %.2ux\n",
+ part->name, PartBlank, b->data[0], b->data[1], b->data[2], b->data[3]);
is = MKZ(ISect);
if(is == nil){
t@@ -457,9 +453,10 @@ initisect1(ISect *is)
v = is->part->size & ~(u64int)(is->blocksize - 1);
if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
seterr(ECorrupt, "invalid blocks in index section %s", is->nam…
-/*ZZZZZZZZZ */
-/* freeisect(is); */
-/* return nil; */
+ /* ZZZ what to do?
+ freeisect(is);
+ return nil;
+ */
}
if(is->stop - is->start > is->blocks){
t@@ -482,9 +479,10 @@ wbisect(ISect *is)
ZBlock *b;
b = alloczblock(HeadSize, 1, 0);
- if(b == nil)
-/*ZZZ set error? */
+ if(b == nil){
+ /* ZZZ set error? */
return -1;
+ }
if(packisect(is, b->data) < 0){
seterr(ECorrupt, "can't make index section header: %r");
t@@ -789,7 +787,7 @@ loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *…
/*
* find the number of the index section holding score
*/
-static int
+int
indexsect1(Index *ix, u8int *score)
{
return indexsect0(ix, hashbits(score, 32) / ix->div);
diff --git a/src/cmd/venti/srv/lump.c b/src/cmd/venti/srv/lump.c
t@@ -2,6 +2,7 @@
#include "dat.h"
#include "fns.h"
+int syncwrites = 0;
int queuewrites = 0;
int writestodevnull = 0;
t@@ -45,7 +46,7 @@ readlump(u8int *score, int type, u32int size, int *cached)
*cached = 0;
if(lookupscore(score, type, &ia, &rac) < 0){
- /*ZZZ place to check for someone trying to guess scores */
+ /* ZZZ place to check for someone trying to guess scores */
seterr(EOk, "no block with score %V/%d exists", score, type);
putlump(u);
t@@ -92,7 +93,15 @@ writelump(Packet *p, u8int *score, int type, u32int creator…
if(u->data != nil){
ok = 0;
if(packetcmp(p, u->data) != 0){
- seterr(EStrange, "score collision");
+ uchar nscore[VtScoreSize];
+
+ packetsha1(u->data, nscore);
+ if(scorecmp(u->score, score) != 0)
+ seterr(EStrange, "lookuplump returned bad scor…
+ else if(scorecmp(u->score, nscore) != 0)
+ seterr(EStrange, "lookuplump returned bad data…
+ else
+ seterr(EStrange, "score collision %V", score);
ok = -1;
}
packetfree(p);
t@@ -138,7 +147,13 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms)
if(old != nil){
ok = 0;
if(packetcmp(p, old) != 0){
- seterr(EStrange, "score collision");
+ uchar nscore[VtScoreSize];
+
+ packetsha1(old, nscore);
+ if(scorecmp(u->score, nscore) != 0)
+ seterr(EStrange, "readilump returned b…
+ else
+ seterr(EStrange, "score collision %V",…
ok = -1;
}
packetfree(p);
t@@ -160,6 +175,12 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms)
insertlump(u, p);
else
packetfree(p);
+
+ if(syncwrites){
+ flushdcache();
+ flushicache();
+ flushdcache();
+ }
ms = msec() - ms;
addstat2(StatRpcWriteNew, 1, StatRpcWriteNewTime, ms);
diff --git a/src/cmd/venti/srv/lumpcache.c b/src/cmd/venti/srv/lumpcache.c
t@@ -11,7 +11,7 @@ enum
{
HashLog = 9,
HashSize = 1<<HashLog,
- HashMask = HashSize - 1
+ HashMask = HashSize - 1,
};
struct LumpCache
t@@ -175,7 +175,6 @@ again:
* remove it from the heap, and fix up the heap.
*/
size = packetasize(p);
-/*ZZZ */
while(lumpcache.avail < size){
trace(TraceLump, "insertlump bump");
CHECK(checklumpcache());
t@@ -277,6 +276,15 @@ bumplump(void)
return b;
}
+void
+emptylumpcache(void)
+{
+ qlock(&lumpcache.lock);
+ while(bumplump())
+ ;
+ qunlock(&lumpcache.lock);
+}
+
/*
* delete an arbitrary block from the heap
*/
t@@ -415,3 +423,4 @@ checklumpcache(void)
if(lumpcache.nheap + nfree + refed != lumpcache.nblocks)
sysfatal("lc: missing blocks: %d %d %d %d", lumpcache.nheap, r…
}
+
diff --git a/src/cmd/venti/srv/lumpqueue.c b/src/cmd/venti/srv/lumpqueue.c
t@@ -58,22 +58,6 @@ initlumpqueues(int nq)
seterr(EOk, "can't start write queue slave: %r");
return -1;
}
- if(vtproc(queueproc, q) < 0){
- seterr(EOk, "can't start write queue slave: %r");
- return -1;
- }
- if(vtproc(queueproc, q) < 0){
- seterr(EOk, "can't start write queue slave: %r");
- return -1;
- }
- if(vtproc(queueproc, q) < 0){
- seterr(EOk, "can't start write queue slave: %r");
- return -1;
- }
- if(vtproc(queueproc, q) < 0){
- seterr(EOk, "can't start write queue slave: %r");
- return -1;
- }
}
return 0;
diff --git a/src/cmd/venti/srv/mirrorarenas.c b/src/cmd/venti/srv/mirrorarenas.c
t@@ -0,0 +1,464 @@
+/*
+ * Mirror one arena partition onto another.
+ * Be careful to copy only new data.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+Channel *writechan;
+
+typedef struct Write Write;
+struct Write
+{
+ uchar *p;
+ int n;
+ uvlong o;
+ int error;
+};
+
+Part *src;
+Part *dst;
+int force;
+int verbose;
+char *status;
+uvlong astart, aend;
+
+void
+usage(void)
+{
+ fprint(2, "usage: mirrorarenas [-v] src dst [ranges]\n");
+ threadexitsall("usage");
+}
+
+int
+ereadpart(Part *p, u64int offset, u8int *buf, u32int count)
+{
+ if(readpart(p, offset, buf, count) != count){
+ print("%T readpart %s at %#llux+%ud: %r\n", p->name, offset, c…
+ return -1;
+ }
+ return 0;
+}
+
+int
+ewritepart(Part *p, u64int offset, u8int *buf, u32int count)
+{
+ if(writepart(p, offset, buf, count) != count){
+ print("%T writepart %s at %#llux+%ud: %r\n", p->name, offset, …
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Extra proc to do writes to dst, so that we can overlap reading
+ * src with writing dst during copy. This is an easy factor of two
+ * (almost) in performance.
+ */
+static void
+writeproc(void *v)
+{
+ Write *w;
+
+ USED(v);
+ while((w = recvp(writechan)) != nil){
+ if(w->n == 0)
+ continue;
+ if(ewritepart(dst, w->o, w->p, w->n) < 0)
+ w->error = 1;
+ }
+}
+
+int
+copy(uvlong start, uvlong end, char *what, DigestState *ds)
+{
+ int i, n;
+ uvlong o;
+ static uchar tmp[2][1024*1024];
+ Write w[2];
+
+ assert(start <= end);
+ assert(astart <= start && start < aend);
+ assert(astart <= end && end <= aend);
+
+ if(verbose && start != end)
+ print("%T copy %,llud-%,llud %s\n", start, end, what);
+
+ i = 0;
+ memset(w, 0, sizeof w);
+ for(o=start; o<end; o+=n){
+ if(w[i].error)
+ goto error;
+ n = sizeof tmp[i];
+ if(o+n > end)
+ n = end - o;
+ if(ereadpart(src, o, tmp[i], n) < 0)
+ goto error;
+ w[i].p = tmp[i];
+ w[i].o = o;
+ w[i].n = n;
+ w[i].error = 0;
+ sendp(writechan, &w[i]);
+ if(ds)
+ sha1(tmp[i], n, nil, ds);
+ i = 1-i;
+ }
+ if(w[i].error)
+ goto error;
+
+ /*
+ * wait for queued write to finish
+ */
+ w[i].p = nil;
+ w[i].o = 0;
+ w[i].n = 0;
+ w[i].error = 0;
+ sendp(writechan, &w[i]);
+ i = 1-i;
+ if(w[i].error)
+ return -1;
+ return 0;
+
+error:
+ /*
+ * sync with write proc
+ */
+ w[i].p = nil;
+ w[i].o = 0;
+ w[i].n = 0;
+ w[i].error = 0;
+ sendp(writechan, &w[i]);
+ return -1;
+}
+
+/* single-threaded, for reference */
+int
+copy1(uvlong start, uvlong end, char *what, DigestState *ds)
+{
+ int n;
+ uvlong o;
+ static uchar tmp[1024*1024];
+
+ assert(start <= end);
+ assert(astart <= start && start < aend);
+ assert(astart <= end && end <= aend);
+
+ if(verbose && start != end)
+ print("%T copy %,llud-%,llud %s\n", start, end, what);
+
+ for(o=start; o<end; o+=n){
+ n = sizeof tmp;
+ if(o+n > end)
+ n = end - o;
+ if(ereadpart(src, o, tmp, n) < 0)
+ return -1;
+ if(ds)
+ sha1(tmp, n, nil, ds);
+ if(ewritepart(dst, o, tmp, n) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+int
+asha1(Part *p, uvlong start, uvlong end, DigestState *ds)
+{
+ int n;
+ uvlong o;
+ static uchar tmp[1024*1024];
+
+ if(start == end)
+ return 0;
+ assert(start < end);
+
+ if(verbose)
+ print("%T sha1 %,llud-%,llud\n", start, end);
+
+ for(o=start; o<end; o+=n){
+ n = sizeof tmp;
+ if(o+n > end)
+ n = end - o;
+ if(ereadpart(p, o, tmp, n) < 0)
+ return -1;
+ sha1(tmp, n, nil, ds);
+ }
+ return 0;
+}
+
+uvlong
+rdown(uvlong a, int b)
+{
+ return a-a%b;
+}
+
+uvlong
+rup(uvlong a, int b)
+{
+ if(a%b == 0)
+ return a;
+ return a+b-a%b;
+}
+
+void
+mirror(Arena *sa, Arena *da)
+{
+ vlong v, si, di, end;
+ int clumpmax, blocksize;
+ static uchar buf[MaxIoSize];
+ ArenaHead h;
+ DigestState xds, *ds;
+ vlong shaoff, base;
+
+ base = sa->base;
+ blocksize = sa->blocksize;
+ end = sa->base + sa->size;
+
+ astart = base - blocksize;
+ aend = end + blocksize;
+
+ shaoff = 0;
+
+ if(force){
+ copy(astart, aend, "all", nil);
+ return;
+ }
+
+ if(verbose)
+ print("%T %s (%,llud-%,llud)\n", sa->name, astart, aend);
+
+ if(sa->diskstats.sealed && da->diskstats.sealed && scorecmp(da->score,…
+ if(scorecmp(sa->score, da->score) == 0)
+ return;
+ print("%T arena %s: sealed score mismatch %V vs %V\n", sa->nam…
+ status = "errors";
+ return;
+ }
+ if(da->diskstats.sealed && scorecmp(da->score, zeroscore) != 0){
+ print("%T arena %s: dst is sealed, src is not\n", sa->name);
+ status = "errors";
+ return;
+ }
+ if(sa->diskstats.used < da->diskstats.used){
+ print("%T arena %s: src used %,lld < dst used %,lld\n", sa->na…
+ status = "errors";
+ return;
+ }
+
+ if(da->clumpmagic != sa->clumpmagic){
+ /*
+ * Write this now to reduce the window in which
+ * the head and tail disagree about clumpmagic.
+ */
+ da->clumpmagic = sa->clumpmagic;
+ memset(buf, 0, sizeof buf);
+ packarena(da, buf);
+ if(ewritepart(dst, end, buf, blocksize) < 0)
+ return;
+ }
+
+ memset(&h, 0, sizeof h);
+ h.version = da->version;
+ strcpy(h.name, da->name);
+ h.blocksize = da->blocksize;
+ h.size = da->size + 2*da->blocksize;
+ h.clumpmagic = da->clumpmagic;
+ memset(buf, 0, sizeof buf);
+ packarenahead(&h, buf);
+ if(ewritepart(dst, base - blocksize, buf, blocksize) < 0)
+ return;
+
+ ds = nil;
+ if(sa->diskstats.sealed && scorecmp(sa->score, zeroscore) != 0){
+ /* start sha1 state with header */
+ memset(&xds, 0, sizeof xds);
+ ds = &xds;
+ sha1(buf, blocksize, nil, ds);
+ shaoff = base;
+ }
+
+ if(sa->diskstats.used != da->diskstats.used){
+ di = base+rdown(da->diskstats.used, blocksize);
+ si = base+rup(sa->diskstats.used, blocksize);
+ if(ds && asha1(dst, shaoff, di, ds) < 0)
+ return;
+ if(copy(di, si, "data", ds) < 0)
+ return;
+ shaoff = si;
+ }
+
+ clumpmax = sa->clumpmax;
+ di = end - da->diskstats.clumps/clumpmax * blocksize;
+ si = end - (sa->diskstats.clumps+clumpmax-1)/clumpmax * blocksize;
+
+ if(sa->diskstats.sealed){
+ /*
+ * might be a small hole between the end of the
+ * data and the beginning of the directory.
+ */
+ v = base+rup(sa->diskstats.used, blocksize);
+ if(ds && asha1(dst, shaoff, v, ds) < 0)
+ return;
+ if(copy(v, si, "hole", ds) < 0)
+ return;
+ shaoff = si;
+ }
+
+ if(da->diskstats.clumps != sa->diskstats.clumps){
+ if(ds && asha1(dst, shaoff, si, ds) < 0)
+ return;
+ if(copy(si, di, "directory", ds) < 0) /* si < di becau…
+ return;
+ shaoff = di;
+ }
+
+ da->ctime = sa->ctime;
+ da->wtime = sa->wtime;
+ da->diskstats = sa->diskstats;
+ da->diskstats.sealed = 0;
+
+ memset(buf, 0, sizeof buf);
+ packarena(da, buf);
+ if(ewritepart(dst, end, buf, blocksize) < 0)
+ return;
+
+ if(ds){
+ asha1(dst, shaoff, end, ds);
+ da->diskstats.sealed = 1;
+ memset(buf, 0, sizeof buf);
+ packarena(da, buf);
+ sha1(buf, blocksize, da->score, ds);
+ if(scorecmp(sa->score, da->score) == 0){
+ if(verbose)
+ print("%T arena %s: %V\n", sa->name, da->score…
+ scorecp(buf+blocksize-VtScoreSize, da->score);
+ if(ewritepart(dst, end, buf, blocksize) < 0)
+ return;
+ }else{
+ print("%T arena %s: sealing dst: score mismatch: %V vs…
+ memset(&xds, 0, sizeof xds);
+ asha1(dst, base-blocksize, end, &xds);
+ sha1(buf, blocksize, da->score, &xds);
+ print("%T reseal: %V\n", da->score);
+ status = "errors";
+ }
+ }
+}
+
+void
+mirrormany(ArenaPart *sp, ArenaPart *dp, char *range)
+{
+ int i, lo, hi;
+ char *s, *t;
+ Arena *sa, *da;
+
+ if(range == nil){
+ for(i=0; i<sp->narenas; i++){
+ sa = sp->arenas[i];
+ da = dp->arenas[i];
+ mirror(sa, da);
+ }
+ return;
+ }
+ if(strcmp(range, "none") == 0)
+ return;
+
+ for(s=range; *s; s=t){
+ t = strchr(s, ',');
+ if(t)
+ *t++ = 0;
+ else
+ t = s+strlen(s);
+ if(*s == '-')
+ lo = 0;
+ else
+ lo = strtol(s, &s, 0);
+ hi = lo;
+ if(*s == '-'){
+ s++;
+ if(*s == 0)
+ hi = sp->narenas-1;
+ else
+ hi = strtol(s, &s, 0);
+ }
+ if(*s != 0){
+ print("%T bad arena range: %s\n", s);
+ continue;
+ }
+ for(i=lo; i<=hi; i++){
+ sa = sp->arenas[i];
+ da = dp->arenas[i];
+ mirror(sa, da);
+ }
+ }
+}
+
+
+void
+threadmain(int argc, char **argv)
+{
+ int i;
+ Arena *sa, *da;
+ ArenaPart *s, *d;
+ char *ranges;
+
+ ventifmtinstall();
+
+ ARGBEGIN{
+ case 'F':
+ force = 1;
+ break;
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ }ARGEND
+
+ if(argc != 2 && argc != 3)
+ usage();
+ ranges = nil;
+ if(argc == 3)
+ ranges = argv[2];
+
+ if((src = initpart(argv[0], OREAD)) == nil)
+ sysfatal("initpart %s: %r", argv[0]);
+ if((dst = initpart(argv[1], ORDWR)) == nil)
+ sysfatal("initpart %s: %r", argv[1]);
+ if((s = initarenapart(src)) == nil)
+ sysfatal("initarenapart %s: %r", argv[0]);
+ for(i=0; i<s->narenas; i++)
+ delarena(s->arenas[i]);
+ if((d = initarenapart(dst)) == nil)
+ sysfatal("loadarenapart %s: %r", argv[1]);
+ for(i=0; i<d->narenas; i++)
+ delarena(d->arenas[i]);
+
+ /*
+ * The arena geometries must match or all bets are off.
+ */
+ if(s->narenas != d->narenas)
+ sysfatal("arena count mismatch: %d vs %d", s->narenas, d->nare…
+ for(i=0; i<s->narenas; i++){
+ sa = s->arenas[i];
+ da = d->arenas[i];
+ if(sa->version != da->version)
+ sysfatal("arena %d: version mismatch: %d vs %d", i, sa…
+ if(sa->blocksize != da->blocksize)
+ sysfatal("arena %d: blocksize mismatch: %d vs %d", i, …
+ if(sa->size != da->size)
+ sysfatal("arena %d: size mismatch: %,lld vs %,lld", i,…
+ if(strcmp(sa->name, da->name) != 0)
+ sysfatal("arena %d: name mismatch: %s vs %s", i, sa->n…
+ }
+
+ /*
+ * Mirror one arena at a time.
+ */
+ writechan = chancreate(sizeof(void*), 0);
+ vtproc(writeproc, nil);
+ mirrormany(s, d, ranges);
+ sendp(writechan, nil);
+ threadexitsall(status);
+}
diff --git a/src/cmd/venti/srv/mkfile b/src/cmd/venti/srv/mkfile
t@@ -11,6 +11,7 @@ LIBOFILES=\
config.$O\
conv.$O\
dcache.$O\
+ disksched.$O\
dump.$O\
graph.$O\
httpd.$O\
t@@ -52,11 +53,13 @@ TARG=\
fmtbloom\
fmtisect\
fmtindex\
+ fixarenas\
buildindex\
checkarenas\
checkindex\
clumpstats\
findscore\
+ mirrorarenas\
rdarena\
wrarena\
syncindex\
diff --git a/src/cmd/venti/srv/part.c b/src/cmd/venti/srv/part.c
t@@ -145,8 +145,6 @@ initpart(char *name, int mode)
if(hi == 0)
hi = dir->length;
part->size = hi - part->offset;
-fprint(2, "part %s: file %s offset %,lld size %,lld\n",
- name, file, part->offset, part->size);
#ifdef CANBLOCKSIZE
{
struct statfs sfs;
t@@ -203,10 +201,32 @@ prwb(char *name, int fd, int isread, u64int offset, void…
u32int c, delta, icount, opsize;
int r;
+ icount = count;
buf = vbuf;
+
+#ifndef PLAN9PORT
+ op = isread ? "read" : "write";
+ dst = buf;
+ freetmp = nil;
+ while(count > 0){
+ opsize = min(count, 131072 /* blocksize */);
+ if(isread)
+ r = pread(fd, dst, opsize, offset);
+ else
+ r = pwrite(fd, dst, opsize, offset);
+ if(r <= 0)
+ goto Error;
+ offset += r;
+ count -= r;
+ dst += r;
+ if(r != opsize)
+ goto Error;
+ }
+ return icount;
+#endif
+
tmp = nil;
freetmp = nil;
- icount = count;
opsize = blocksize;
if(count == 0){
t@@ -313,7 +333,7 @@ print("FAILED isread=%d r=%d count=%d blocksize=%d\n", isr…
memmove(buf, tmp, count);
else{
memmove(tmp, buf, count);
- if(pwrite(fd, tmp, blocksize, offset) != blocksize){
+ if(pwrite(fd, tmp, opsize, offset) != blocksize){
dst = tmp;
op = "write";
goto Error;
t@@ -332,9 +352,16 @@ Error:
return -1;
}
+#ifndef PLAN9PORT
+static int sdreset(Part*);
+static int reopen(Part*);
+static int threadspawnl(int[3], char*, char*, ...);
+#endif
+
int
rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count)
{
+ int n, try;
u32int blocksize;
trace(TraceDisk, "%s %s %ud at 0x%llx",
t@@ -351,9 +378,33 @@ rwpart(Part *part, int isread, u64int offset, u8int *buf,…
if(blocksize == 0)
blocksize = 4096;
- return prwb(part->filename, part->fd, isread, part->offset+offset, buf…
-}
+ for(try=0;; try++){
+ n = prwb(part->filename, part->fd, isread, part->offset+offset…
+ if(n >= 0 || try > 10)
+ break;
+#ifndef PLAN9PORT
+ {
+ char err[ERRMAX];
+ /*
+ * This happens with the sdmv disks frustratingly often.
+ * Try to fix things up and continue.
+ */
+ rerrstr(err, sizeof err);
+ if(strstr(err, "i/o timeout") || strstr(err, "i/o error")){
+ if(sdreset(part) >= 0)
+ reopen(part);
+ continue;
+ }else if(strstr(err, "partition has changed")){
+ reopen(part);
+ continue;
+ }
+ }
+#endif
+ break;
+ }
+ return n;
+}
int
readpart(Part *part, u64int offset, u8int *buf, u32int count)
{
t@@ -391,3 +442,200 @@ readfile(char *name)
return b;
}
+
+
+
+
+
+
+
+#ifndef PLAN9PORT
+static int
+sdreset(Part *part)
+{
+ char *name, *p;
+ int i, fd, xfd[3], rv;
+ static QLock resetlk;
+ Dir *d, *dd;
+
+ fprint(2, "sdreset %s\n", part->name);
+ name = emalloc(strlen(part->filename)+20);
+ strcpy(name, part->filename);
+ p = strrchr(name, '/');
+ if(p)
+ p++;
+ else
+ p = name;
+
+ strcpy(p, "ctl");
+ d = dirstat(name);
+ if(d == nil){
+ free(name);
+ return -1;
+ }
+
+ /*
+ * We don't need multiple people resetting the disk.
+ */
+ qlock(&resetlk);
+ if((fd = open(name, OWRITE)) < 0)
+ goto error;
+ dd = dirfstat(fd);
+ if(d && dd && d->qid.vers != dd->qid.vers){
+ fprint(2, "sdreset %s: got scooped\n", part->name);
+ /* Someone else got here first. */
+ if(access(part->filename, AEXIST) >= 0)
+ goto ok;
+ goto error;
+ }
+
+ /*
+ * Write "reset" to the ctl file to cause the chipset
+ * to reinitialize itself (specific to sdmv driver).
+ * Ignore error in case using other disk.
+ */
+ fprint(2, "sdreset %s: reset ctl\n", part->name);
+ write(fd, "reset", 5);
+
+ if(access(part->filename, AEXIST) >= 0)
+ goto ok;
+
+ /*
+ * Re-run fdisk and prep. Don't use threadwaitchan
+ * to avoid coordinating for it. Reopen ctl because
+ * we reset the disk.
+ */
+ strcpy(p, "ctl");
+ close(fd);
+ if((fd = open(name, OWRITE)) < 0)
+ goto error;
+ strcpy(p, "data");
+ xfd[0] = open("/dev/null", OREAD);
+ xfd[1] = dup(fd, -1);
+ xfd[2] = dup(2, -1);
+ fprint(2, "sdreset %s: run fdisk %s\n", part->name, name);
+ if(threadspawnl(xfd, "/bin/disk/fdisk", "disk/fdisk", "-p", name, nil)…
+ close(xfd[0]);
+ close(xfd[1]);
+ close(xfd[2]);
+ goto error;
+ }
+ strcpy(p, "plan9");
+ for(i=0; i<=20; i++){
+ sleep(i*100);
+ if(access(part->filename, AEXIST) >= 0)
+ goto ok;
+ if(access(name, AEXIST) >= 0)
+ goto prep;
+ }
+ goto error;
+
+prep:
+ strcpy(p, "ctl");
+ close(fd);
+ if((fd = open(name, OWRITE)) < 0)
+ goto error;
+ strcpy(p, "plan9");
+ xfd[0] = open("/dev/null", OREAD);
+ xfd[1] = dup(fd, -1);
+ xfd[2] = dup(2, -1);
+ fprint(2, "sdreset %s: run prep\n", part->name);
+ if(threadspawnl(xfd, "/bin/disk/prep", "disk/prep", "-p", name, nil) <…
+ close(xfd[0]);
+ close(xfd[1]);
+ close(xfd[2]);
+ goto error;
+ }
+ for(i=0; i<=20; i++){
+ sleep(i*100);
+ if(access(part->filename, AEXIST) >= 0)
+ goto ok;
+ }
+
+error:
+ fprint(2, "sdreset %s: error: %r\n", part->name);
+ rv = -1;
+ if(fd >= 0)
+ close(fd);
+ goto out;
+
+ok:
+ fprint(2, "sdreset %s: all okay\n", part->name);
+ rv = 0;
+ goto out;
+
+out:
+ free(name);
+ qunlock(&resetlk);
+ return rv;
+}
+
+static int
+reopen(Part *part)
+{
+ int fd;
+
+ fprint(2, "reopen %s\n", part->filename);
+ if((fd = open(part->filename, ORDWR)) < 0){
+ fprint(2, "reopen %s: %r\n", part->filename);
+ return -1;
+ }
+ if(fd != part->fd){
+ dup(fd, part->fd);
+ close(fd);
+ }
+ return 0;
+}
+
+typedef struct Spawn Spawn;
+struct Spawn
+{
+ Channel *c;
+ int fd[3];
+ char *file;
+ char **argv;
+};
+
+static void
+spawnproc(void *v)
+{
+ int i, *fd;
+ Spawn *s;
+
+ rfork(RFFDG);
+ s = v;
+ fd = s->fd;
+ for(i=0; i<3; i++)
+ dup(fd[i], i);
+ if(fd[0] > 2)
+ close(fd[0]);
+ if(fd[1] > 2 && fd[1] != fd[0])
+ close(fd[1]);
+ if(fd[2] > 2 && fd[2] != fd[1] && fd[2] != fd[0])
+ close(fd[2]);
+ procexec(s->c, s->file, s->argv);
+}
+
+static int
+threadspawnl(int fd[3], char *file, char *argv0, ...)
+{
+ int pid;
+ Spawn s;
+
+ s.c = chancreate(sizeof(void*), 0);
+ memmove(s.fd, fd, sizeof(s.fd));
+ s.file = file;
+ s.argv = &argv0;
+ vtproc(spawnproc, &s);
+ pid = recvul(s.c);
+ if(pid < 0)
+ return -1;
+ close(fd[0]);
+ if(fd[1] != fd[0])
+ close(fd[1]);
+ if(fd[2] != fd[1] && fd[2] != fd[0])
+ close(fd[2]);
+ return pid;
+}
+
+#endif
diff --git a/src/cmd/venti/srv/printarenapart.c b/src/cmd/venti/srv/printarenap…
t@@ -0,0 +1,160 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+uchar buf[64*1024];
+
+void
+usage(void)
+{
+ fprint(2, "usage: printarenapart arenafile [offset]\n");
+ threadexitsall("usage");
+}
+
+static void
+rdarena(Arena *arena, u64int offset)
+{
+ u64int a, aa, e;
+ u32int magic;
+ Clump cl;
+ uchar score[VtScoreSize];
+ ZBlock *lump;
+
+ printarena(2, arena);
+
+ a = arena->base;
+ e = arena->base + arena->size;
+ if(offset != ~(u64int)0) {
+ if(offset >= e-a)
+ sysfatal("bad offset %llud >= %llud\n",
+ offset, e-a);
+ aa = offset;
+ } else
+ aa = 0;
+
+ for(; aa < e; aa += ClumpSize+cl.info.size) {
+ magic = clumpmagic(arena, aa);
+ if(magic == ClumpFreeMagic)
+ break;
+ if(magic != arena->clumpmagic) {
+ fprint(2, "illegal clump magic number %#8.8ux offset %…
+ magic, aa);
+ break;
+ }
+ lump = loadclump(arena, aa, 0, &cl, score, 0);
+ if(lump == nil) {
+ fprint(2, "clump %llud failed to read: %r\n", aa);
+ break;
+ }
+ if(cl.info.type != VtCorruptType) {
+ scoremem(score, lump->data, cl.info.uncsize);
+ if(scorecmp(cl.info.score, score) != 0) {
+ fprint(2, "clump %llud has mismatched score\n"…
+ break;
+ }
+ if(vttypevalid(cl.info.type) < 0) {
+ fprint(2, "clump %llud has bad type %d\n", aa,…
+ break;
+ }
+ }
+ print("%22llud %V %3d %5d\n", aa, score, cl.info.type, cl.info…
+ freezblock(lump);
+ }
+ print("end offset %llud\n", aa);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+ char *file, *p, *name;
+ char *table;
+ u64int offset;
+ Part *part;
+ ArenaPart ap;
+ ArenaHead head;
+ Arena tail;
+ char ct[40], mt[40];
+
+ readonly = 1; /* for part.c */
+ ARGBEGIN{
+ default:
+ usage();
+ break;
+ }ARGEND
+
+ switch(argc) {
+ default:
+ usage();
+ case 1:
+ file = argv[0];
+ }
+
+ ventifmtinstall();
+ statsinit();
+
+ part = initpart(file, OREAD|ODIRECT);
+ if(part == nil)
+ sysfatal("can't open file %s: %r", file);
+ if(readpart(part, PartBlank, buf, sizeof buf) < 0)
+ sysfatal("can't read file %s: %r", file);
+
+ if(unpackarenapart(&ap, buf) < 0)
+ sysfatal("corrupted arena part header: %r");
+
+ print("# arena part version=%d blocksize=%d arenabase=%d\n",
+ ap.version, ap.blocksize, ap.arenabase);
+ ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1);
+ ap.tabsize = ap.arenabase - ap.tabbase;
+
+print("A");
+ table = malloc(ap.tabsize+1);
+ if(readpart(part, ap.tabbase, (uchar*)table, ap.tabsize) < 0)
+ sysfatal("read %s: %r", file);
+ table[ap.tabsize] = 0;
+
+print("A");
+ partblocksize(part, ap.blocksize);
+ initdcache(8 * MaxDiskBlock);
+
+print("A");
+/* XXX - read the number of arenas from the first line */
+ for(p=table; p && *p; p=strchr(p, '\n')){
+ if(*p == '\n')
+ p++;
+ name = p;
+ p = strpbrk(p, " \t");
+ if(p == nil){
+ fprint(2, "bad line: %s\n", name);
+ break;
+ }
+print("%p\n", p);
+ offset = strtoull(p, nil, 0);
+ if(readpart(part, offset, buf, sizeof buf) < 0){
+ fprint(2, "%s: read %s: %r\n", argv0, file);
+ continue;
+ }
+ if(unpackarenahead(&head, buf) < 0){
+ fprint(2, "%s: unpackarenahead: %r\n", argv0);
+ continue;
+ }
+ if(readpart(part, offset+head.size-head.blocksize, buf, head.b…
+ fprint(2, "%s: read %s: %r\n", argv0, file);
+ continue;
+ }
+ if(unpackarena(&tail, buf) < 0){
+ fprint(2, "%s: unpackarena: %r\n", argv0);
+ continue;
+ }
+ print("arena %s %lld clumps=%,d cclumps=%,d used=%,lld uncsize…
+ tail.name, offset,
+ tail.diskstats.clumps, tail.diskstats.cclumps,
+ tail.diskstats.used, tail.diskstats.uncsize,
+ tail.diskstats.sealed ? " sealed" : "");
+ strcpy(ct, ctime(tail.ctime));
+ ct[28] = 0;
+ strcpy(mt, ctime(tail.wtime));
+ mt[28] = 0;
+ print("\tctime=%s\n\tmtime=%s\n", ct, mt);
+ }
+ threadexitsall(0);
+}
diff --git a/src/cmd/venti/srv/printarenas.c b/src/cmd/venti/srv/printarenas.c
t@@ -36,7 +36,7 @@ shoulddump(char *name, int argc, char **argv)
enum
{
- ClumpChunks = 32*1024
+ ClumpChunks = 32*1024,
};
void
diff --git a/src/cmd/venti/srv/sortientry.c b/src/cmd/venti/srv/sortientry.c
t@@ -61,7 +61,7 @@ sortrawientries(Index *ix, Part *tmp, u64int *base, Bloom *b…
u32int n;
int i, ok;
-/*ZZZ should allow configuration of bits, bucket size */
+/* ZZZ should allow configuration of bits, bucket size */
ib = initiebucks(tmp, 8, 64*1024);
if(ib == nil){
seterr(EOk, "can't create sorting buckets: %r");
t@@ -116,10 +116,7 @@ readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom …
ClumpInfo *ci, *cis;
u32int clump;
int i, n, ok, nskip;
-/* static Biobuf bout; */
-/*ZZZ remove fprint? */
-/*fprint(2, "ra %s %d %d\n", arena->name, arena->memstats.clumps, arena->disks…
if(arena->memstats.clumps)
fprint(2, "\tarena %s: %d entries\n", arena->name, arena->mems…
else
t@@ -129,7 +126,6 @@ readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *…
ok = 0;
nskip = 0;
memset(&ie, 0, sizeof(IEntry));
-/* Binit(&bout, 1, OWRITE); */
for(clump = 0; clump < arena->memstats.clumps; clump += n){
n = ClumpChunks;
if(n > arena->memstats.clumps - clump)
t@@ -148,18 +144,15 @@ readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom…
a += ci->size + ClumpSize;
ie.ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog…
scorecp(ie.score, ci->score);
- /* Bprint(&bout, "%22lld %V %3d %5d\n", */
- /* ie.ia.addr, ie.score, ie.ia.type, ie.ia.size…
if(ci->type == VtCorruptType){
- /* print("! %V %22lld %3d %5d %3d\n", */
- /* ie.score, ie.ia.addr, ie.ia.type, ie…
+ if(0) print("! %V %22lld %3d %5d %3d\n",
+ ie.score, ie.ia.addr, ie.ia.type, ie.i…
nskip++;
}else
sprayientry(ib, &ie);
markbloomfilter(b, ie.score);
}
}
-/* Bterm(&bout); */
free(cis);
if(ok < 0)
return TWID32;
t@@ -358,8 +351,8 @@ readiebuck(IEBucks *ib, int b)
m = ib->bucks[b].used;
if(m == 0)
m = ib->usable;
-/* if(ib->bucks[b].total) */
-/* fprint(2, "\tbucket %d: %d entries\n", b, ib->bucks[b].total…
+ if(0) if(ib->bucks[b].total)
+ fprint(2, "\tbucket %d: %d entries\n", b, ib->bucks[b].total/I…
while(head != TWID32){
if(readpart(ib->part, (u64int)head * ib->size, &ib->buf[n], m+…
seterr(EOk, "can't read index sort bucket: %r");
diff --git a/src/cmd/venti/srv/stats.c b/src/cmd/venti/srv/stats.c
t@@ -80,7 +80,7 @@ Statdesc statdesc[NStat] =
{ "isect block write bytes", },
{ "sum reads", },
- { "sum read bytes", }
+ { "sum read bytes", },
};
QLock statslock;
diff --git a/src/cmd/venti/srv/syncarena.c b/src/cmd/venti/srv/syncarena.c
t@@ -30,12 +30,11 @@ syncarena(Arena *arena, u64int start, u32int n, int zok, i…
ZBlock *lump;
Clump cl;
ClumpInfo ci;
- static ClumpInfo zci = { -1 };
+ static ClumpInfo zci = { .type = -1 };
u8int score[VtScoreSize];
u64int uncsize, used, aa;
u32int clump, clumps, cclumps, magic;
int err, flush, broken;
- AState as;
used = arena->memstats.used;
clumps = arena->memstats.clumps;
t@@ -133,19 +132,21 @@ syncarena(Arena *arena, u64int start, u32int n, int zok,…
flushdcache();
}
+fprint(2, "arena %s: start=%lld fix=%d flush=%d %lld->%lld %ud->%ud %ud->%ud %…
+ arena->name,
+ start,
+ fix,
+ flush,
+ used, arena->memstats.used,
+ clumps, arena->memstats.clumps,
+ cclumps, arena->memstats.cclumps,
+ uncsize, arena->memstats.uncsize);
+
if(used != arena->memstats.used
|| clumps != arena->memstats.clumps
|| cclumps != arena->memstats.cclumps
|| uncsize != arena->memstats.uncsize)
err |= SyncHeader;
- if(start && (err&SyncHeader)){
- trace(TraceProc, "syncarena setdcachestate");
- as.arena = arena;
- as.aa = start+arena->memstats.used;
- as.stats = arena->memstats;
- setdcachestate(&as);
- }
-
return err;
}
diff --git a/src/cmd/venti/srv/syncindex.c b/src/cmd/venti/srv/syncindex.c
t@@ -48,6 +48,8 @@ threadmain(int argc, char *argv[])
ventifmtinstall();
if(initventi(argv[0], &conf) < 0)
sysfatal("can't init venti: %r");
+ if(mainindex->bloom && loadbloom(mainindex->bloom) < 0)
+ sysfatal("can't load bloom filter: %r");
if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 …
bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects…
diff --git a/src/cmd/venti/srv/syncindex0.c b/src/cmd/venti/srv/syncindex0.c
t@@ -121,6 +121,7 @@ int
syncindex(Index *ix, int fix, int mustflush, int check)
{
Arena *arena;
+ AState as;
u64int a;
u32int clump;
int i, e, e1, ok, ok1, flush;
t@@ -130,7 +131,12 @@ syncindex(Index *ix, int fix, int mustflush, int check)
for(i = 0; i < ix->narenas; i++){
trace(TraceProc, "syncindex start %d", i);
arena = ix->arenas[i];
- clump = arena->memstats.clumps;
+ /*
+ * Syncarena will scan through the arena looking for blocks
+ * that have been forgotten. It will update arena->memstats.u…
+ * so save the currenct copy as the place to start the
+ * syncarenaindex scan.
+ */
a = arena->memstats.used;
e = syncarena(arena, ix->amap[i].start, TWID32, fix, fix);
e1 = e;
t@@ -138,15 +144,23 @@ syncindex(Index *ix, int fix, int mustflush, int check)
e1 &= ~(SyncHeader|SyncCIZero|SyncCIErr);
if(e1 == SyncHeader)
fprint(2, "arena %s: header is out-of-date\n", arena->…
+ clump = arena->diskstats.clumps;
if(e1)
ok = -1;
else{
ok1 = syncarenaindex(ix, arena, clump, a + ix->amap[i]…
if(ok1 < 0)
fprint(2, "syncarenaindex: %r\n");
+fprint(2, "arena %s: wbarena in syncindex\n", arena->name);
if(fix && ok1==0 && (e & SyncHeader) && wbarena(arena)…
fprint(2, "arena=%s header write failed: %r\n"…
ok |= ok1;
+
+fprint(2, "arena %s: setdcachestate\n", arena->name);
+ as.arena = arena;
+ as.aa = ix->amap[i].start + arena->memstats.used;
+ as.stats = arena->memstats;
+ setdcachestate(&as);
}
}
if(missing || wrong)
diff --git a/src/cmd/venti/srv/unwhack.c b/src/cmd/venti/srv/unwhack.c
t@@ -23,7 +23,7 @@ static uchar lenval[1 << (DBigLenBits - 1)] =
static uchar lenbits[] =
{
0, 0, 0,
- 2, 3, 5, 5
+ 2, 3, 5, 5,
};
static uchar offbits[16] =
diff --git a/src/cmd/venti/srv/utils.c b/src/cmd/venti/srv/utils.c
t@@ -148,6 +148,7 @@ emalloc(ulong n)
sysfatal("out of memory allocating %lud", n);
}
memset(p, 0xa5, n);
+ setmalloctag(p, getcallerpc(&n));
if(0)print("emalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n));
return p;
}
t@@ -164,6 +165,7 @@ ezmalloc(ulong n)
sysfatal("out of memory allocating %lud", n);
}
memset(p, 0, n);
+ setmalloctag(p, getcallerpc(&n));
if(0)print("ezmalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n));
return p;
}
t@@ -177,6 +179,7 @@ erealloc(void *p, ulong n)
abort();
sysfatal("out of memory allocating %lud", n);
}
+ setrealloctag(p, getcallerpc(&p));
if(0)print("erealloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&p));
return p;
}
t@@ -190,6 +193,7 @@ estrdup(char *s)
n = strlen(s) + 1;
t = emalloc(n);
memmove(t, s, n);
+ setmalloctag(t, getcallerpc(&s));
if(0)print("estrdup %p-%p by %lux\n", t, (char*)t+n, getcallerpc(&s));
return t;
}
t@@ -231,6 +235,7 @@ ventifmtinstall(void)
fmtinstall('F', vtfcallfmt);
fmtinstall('H', encodefmt);
fmtinstall('I', ientryfmt);
+ fmtinstall('T', vttimefmt);
fmtinstall('V', vtscorefmt);
}
diff --git a/src/cmd/venti/srv/venti.c b/src/cmd/venti/srv/venti.c
t@@ -105,6 +105,8 @@ threadmain(int argc, char *argv[])
fprint(2, "conf...");
if(initventi(configfile, &config) < 0)
sysfatal("can't init server: %r");
+ if(mainindex->bloom && loadbloom(mainindex->bloom) < 0)
+ sysfatal("can't load bloom filter: %r");
if(mem == 0)
mem = config.mem;
t@@ -210,8 +212,8 @@ ventiserver(void *v)
trace(TraceRpc, "<- %F", &r->tx);
r->rx.msgtype = r->tx.msgtype+1;
addstat(StatRpcTotal, 1);
- /* print("req (arenas[0]=%p sects[0]=%p) %F\n", */
- /* mainindex->arenas[0], mainindex->sects[0], &r->tx); …
+ if(0) print("req (arenas[0]=%p sects[0]=%p) %F\n",
+ mainindex->arenas[0], mainindex->sects[0], &r->tx);
switch(r->tx.msgtype){
default:
vtrerror(r, "unknown request");
diff --git a/src/cmd/venti/srv/verifyarena.c b/src/cmd/venti/srv/verifyarena.c
t@@ -3,65 +3,102 @@
#include "fns.h"
static int verbose;
+static int fd;
+static uchar *data;
+static int blocksize;
+static int sleepms;
void
usage(void)
{
- fprint(2, "usage: verifyarena [-v]\n");
+ fprint(2, "usage: verifyarena [-b blocksize] [-s ms] [-v] [arenapart […
threadexitsall(0);
}
-static void
+static int
+preadblock(uchar *buf, int n, vlong off)
+{
+ int nr, m;
+
+ for(nr = 0; nr < n; nr += m){
+ m = n - nr;
+ m = pread(fd, &buf[nr], m, off+nr);
+ if(m <= 0){
+ if(m == 0)
+ werrstr("early eof");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
readblock(uchar *buf, int n)
{
int nr, m;
for(nr = 0; nr < n; nr += m){
m = n - nr;
- m = read(0, &buf[nr], m);
- if(m <= 0)
- sysfatal("can't read arena from standard input: %r");
+ m = read(fd, &buf[nr], m);
+ if(m <= 0){
+ if(m == 0)
+ werrstr("early eof");
+ return -1;
+ }
}
+ return 0;
}
static void
-verifyarena(void)
+verifyarena(char *name, vlong len)
{
Arena arena;
ArenaHead head;
- ZBlock *b;
DigestState s;
u64int n, e;
u32int bs;
u8int score[VtScoreSize];
- fprint(2, "verify arena from standard input\n");
+ fprint(2, "verify %s\n", name);
memset(&arena, 0, sizeof arena);
memset(&s, 0, sizeof s);
/*
- * read the little bit, which will included the header
+ * read a little bit, which will include the header
*/
- bs = MaxIoSize;
- b = alloczblock(bs, 0, 0);
- readblock(b->data, HeadSize);
- sha1(b->data, HeadSize, nil, &s);
- if(unpackarenahead(&head, b->data) < 0)
- sysfatal("corrupted arena header: %r");
+ if(readblock(data, HeadSize) < 0){
+ fprint(2, "%s: reading header: %r\n", name);
+ return;
+ }
+ sha1(data, HeadSize, nil, &s);
+ if(unpackarenahead(&head, data) < 0){
+ fprint(2, "%s: corrupt arena header: %r\n", name);
+ return;
+ }
if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
- fprint(2, "warning: unknown arena version %d\n", head.version);
+ fprint(2, "%s: warning: unknown arena version %d\n", name, hea…
+ if(len != 0 && len != head.size)
+ fprint(2, "%s: warning: unexpected length %lld != %lld\n", nam…
+ if(strcmp(name, "<stdin>") != 0 && strcmp(head.name, name) != 0)
+ fprint(2, "%s: warning: unexpected name %s\n", name, head.name…
/*
* now we know how much to read
* read everything but the last block, which is special
*/
e = head.size - head.blocksize;
+ bs = blocksize;
for(n = HeadSize; n < e; n += bs){
if(n + bs > e)
bs = e - n;
- readblock(b->data, bs);
- sha1(b->data, bs, nil, &s);
+ if(readblock(data, bs) < 0){
+ fprint(2, "%s: read data: %r\n", name);
+ return;
+ }
+ sha1(data, bs, nil, &s);
+ if(sleepms)
+ sleep(sleepms);
}
/*
t@@ -69,8 +106,11 @@ verifyarena(void)
* the sum is calculated assuming the slot for the sum is zero.
*/
bs = head.blocksize;
- readblock(b->data, bs);
- sha1(b->data, bs-VtScoreSize, nil, &s);
+ if(readblock(data, bs) < 0){
+ fprint(2, "%s: read last block: %r\n", name);
+ return;
+ }
+ sha1(data, bs-VtScoreSize, nil, &s);
sha1(zeroscore, VtScoreSize, nil, &s);
sha1(nil, 0, score, &s);
t@@ -78,37 +118,73 @@ verifyarena(void)
* validity check on the trailer
*/
arena.blocksize = head.blocksize;
- if(unpackarena(&arena, b->data) < 0)
- sysfatal("corrupted arena trailer: %r");
- scorecp(arena.score, &b->data[arena.blocksize - VtScoreSize]);
-
- if(namecmp(arena.name, head.name) != 0)
- sysfatal("arena header and trailer names clash: %s vs. %s\n", …
- if(arena.version != head.version)
- sysfatal("arena header and trailer versions clash: %d vs. %d\n…
+ if(unpackarena(&arena, data) < 0){
+ fprint(2, "%s: corrupt arena trailer: %r\n", name);
+ return;
+ }
+ scorecp(arena.score, &data[arena.blocksize - VtScoreSize]);
+
+ if(namecmp(arena.name, head.name) != 0){
+ fprint(2, "%s: wrong name in trailer: %s vs. %s\n",
+ name, head.name, arena.name);
+ return;
+ }
+ if(arena.version != head.version){
+ fprint(2, "%s: wrong version in trailer: %d vs. %d\n",
+ name, head.version, arena.version);
+ return;
+ }
arena.size = head.size - 2 * head.blocksize;
/*
* check for no checksum or the same
*/
- if(scorecmp(score, arena.score) != 0){
- if(scorecmp(zeroscore, arena.score) != 0)
- fprint(2, "warning: mismatched checksums for arena=%s,…
- arena.name, arena.score, score);
- scorecp(arena.score, score);
- }else
- fprint(2, "matched score\n");
-
+ if(scorecmp(score, arena.score) == 0)
+ fprint(2, "%s: verified score\n", name);
+ else if(scorecmp(zeroscore, arena.score) == 0)
+ fprint(2, "%s: unsealed\n", name);
+ else{
+ fprint(2, "%s: mismatch checksum - found=%V calculated=%V\n",
+ name, arena.score, score);
+ return;
+ }
printarena(2, &arena);
}
+static int
+shouldcheck(char *name, char **s, int n)
+{
+ int i;
+
+ if(n == 0)
+ return 1;
+
+ for(i=0; i<n; i++){
+ if(s[i] && strcmp(name, s[i]) == 0){
+ s[i] = nil;
+ return 1;
+ }
+ }
+ return 0;
+}
+
void
threadmain(int argc, char *argv[])
{
+ int i, nline;
+ char *p, *q, *table, *f[10], line[256];
+ vlong start, stop;
+ ArenaPart ap;
+
ventifmtinstall();
- statsinit();
-
+ blocksize = MaxIoSize;
ARGBEGIN{
+ case 'b':
+ blocksize = unittoull(EARGF(usage()));
+ break;
+ case 's':
+ sleepms = atoi(EARGF(usage()));
+ break;
case 'v':
verbose++;
break;
t@@ -117,11 +193,69 @@ threadmain(int argc, char *argv[])
break;
}ARGEND
- readonly = 1;
+ data = vtmalloc(blocksize);
+ if(argc == 0){
+ fd = 0;
+ verifyarena("<stdin>", 0);
+ threadexitsall(nil);
+ }
+
+ if((fd = open(argv[0], OREAD)) < 0)
+ sysfatal("open %s: %r", argv[0]);
- if(argc != 0)
- usage();
+ if(preadblock(data, 8192, PartBlank) < 0)
+ sysfatal("read arena part header: %r");
+ if(unpackarenapart(&ap, data) < 0)
+ sysfatal("corrupted arena part header: %r");
+ fprint(2, "# arena part version=%d blocksize=%d arenabase=%d\n",
+ ap.version, ap.blocksize, ap.arenabase);
+ ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1);
+ ap.tabsize = ap.arenabase - ap.tabbase;
+ table = malloc(ap.tabsize+1);
+ if(preadblock((uchar*)table, ap.tabsize, ap.tabbase) < 0)
+ sysfatal("reading arena part directory: %r");
+ table[ap.tabsize] = 0;
+
+ nline = atoi(table);
+ p = strchr(table, '\n');
+ if(p)
+ p++;
+ for(i=0; i<nline; i++){
+ if(p == nil){
+ fprint(2, "warning: unexpected arena table end\n");
+ break;
+ }
+ q = strchr(p, '\n');
+ if(q)
+ *q++ = 0;
+ if(strlen(p) >= sizeof line){
+ fprint(2, "warning: long arena table line: %s\n", p);
+ p = q;
+ continue;
+ }
+ strcpy(line, p);
+ memset(f, 0, sizeof f);
+ if(tokenize(line, f, nelem(f)) < 3){
+ fprint(2, "warning: bad arena table line: %s\n", p);
+ p = q;
+ continue;
+ }
+ p = q;
+ if(shouldcheck(f[0], argv+1, argc-1)){
+ start = strtoull(f[1], 0, 0);
+ stop = strtoull(f[2], 0, 0);
+ if(stop <= start){
+ fprint(2, "%s: bad start,stop %lld,%lld\n", f[…
+ continue;
+ }
+ if(seek(fd, start, 0) < 0)
+ fprint(2, "%s: seek to start: %r\n", f[0]);
+ verifyarena(f[0], stop - start);
+ }
+ }
+ for(i=1; i<argc; i++)
+ if(argv[i] != 0)
+ fprint(2, "%s: did not find arena\n", argv[i]);
- verifyarena();
- threadexitsall(0);
+ threadexitsall(nil);
}
diff --git a/src/cmd/venti/srv/wrarena.c b/src/cmd/venti/srv/wrarena.c
t@@ -83,8 +83,8 @@ rdarena(Arena *arena, u64int offset)
if(magic == ClumpFreeMagic)
break;
if(magic != arena->clumpmagic) {
- /* fprint(2, "illegal clump magic number %#8.8ux offset…
- /* magic, aa); */
+ if(0) fprint(2, "illegal clump magic number %#8.8ux of…
+ magic, aa);
break;
}
lump = loadclump(arena, aa, 0, &cl, score, 0);
diff --git a/src/cmd/venti/srv/zblock.c b/src/cmd/venti/srv/zblock.c
t@@ -5,11 +5,13 @@
void
fmtzbinit(Fmt *f, ZBlock *b)
{
- memset(f, 0, sizeof *f);
- fmtlocaleinit(f, nil, nil, nil);
+ f->runes = 0;
f->start = b->data;
f->to = f->start;
f->stop = (char*)f->start + b->len;
+ f->flush = nil;
+ f->farg = nil;
+ f->nfmt = 0;
}
#define ROUNDUP(p, n) ((void*)(((uintptr)(p)+(n)-1)&~(uintptr)((n)-1)))
diff --git a/src/cmd/venti/srv/zeropart.c b/src/cmd/venti/srv/zeropart.c
t@@ -10,10 +10,6 @@ zeropart(Part *part, int blocksize)
int w;
fprint(2, "clearing the partition\n");
-/*fprint(2, "NOT!\n"); */
-/*return; */
-/*b=alloczblock(MaxIoSize, 1, blocksize); */
-/*freezblock(b); */
b = alloczblock(MaxIoSize, 1, blocksize);
w = 0;
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.