| tassorted changes from Plan 9 - plan9port - [fork] Plan 9 from user space | |
| git clone git://src.adamsgaard.dk/plan9port | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| commit 28b49df3542a635cca788f3de213385f3fcb6334 | |
| parent 686bd37d9d8db5e3b969a3aa2d5b455e0976b262 | |
| Author: rsc <devnull@localhost> | |
| Date: Tue, 18 Jul 2006 15:26:33 +0000 | |
| assorted changes from Plan 9 | |
| Diffstat: | |
| M src/cmd/venti/srv/arena.c | 40 +++++++++++++++++++++++------… | |
| M src/cmd/venti/srv/arenas.c | 6 +++--- | |
| M src/cmd/venti/srv/bloom.c | 62 ++++++++++++++++++++++++-----… | |
| M src/cmd/venti/srv/buildbuck.c | 6 +++--- | |
| M src/cmd/venti/srv/buildindex.c | 1018 +++++++++++++++++++++++++++--… | |
| M src/cmd/venti/srv/checkindex.c | 4 +++- | |
| M src/cmd/venti/srv/clump.c | 7 ++++++- | |
| M src/cmd/venti/srv/conv.c | 66 +++++++++++++++++++++++++++++… | |
| M src/cmd/venti/srv/dat.h | 42 +++++++++++++++++++----------… | |
| M src/cmd/venti/srv/dcache.c | 64 +++++++++++++++++++++--------… | |
| A src/cmd/venti/srv/disksched.c | 88 +++++++++++++++++++++++++++++… | |
| M src/cmd/venti/srv/findscore.c | 2 +- | |
| A src/cmd/venti/srv/fixarenas.c | 1894 +++++++++++++++++++++++++++++… | |
| M src/cmd/venti/srv/fns.h | 9 +++++++++ | |
| M src/cmd/venti/srv/graph.c | 16 ++++++++++------ | |
| M src/cmd/venti/srv/httpd.c | 219 ++++++++++++++++++++++++++++-… | |
| M src/cmd/venti/srv/icache.c | 50 ++++++++++++++++++++++++++---… | |
| M src/cmd/venti/srv/icachewrite.c | 36 +++++++++++++++++++++--------… | |
| M src/cmd/venti/srv/index.c | 22 ++++++++++------------ | |
| M src/cmd/venti/srv/lump.c | 27 ++++++++++++++++++++++++--- | |
| M src/cmd/venti/srv/lumpcache.c | 13 +++++++++++-- | |
| M src/cmd/venti/srv/lumpqueue.c | 16 ---------------- | |
| A src/cmd/venti/srv/mirrorarenas.c | 464 ++++++++++++++++++++++++++++++ | |
| M src/cmd/venti/srv/mkfile | 3 +++ | |
| M src/cmd/venti/srv/part.c | 260 +++++++++++++++++++++++++++++… | |
| A src/cmd/venti/srv/printarenapart.c | 160 +++++++++++++++++++++++++++++… | |
| M src/cmd/venti/srv/printarenas.c | 2 +- | |
| M src/cmd/venti/srv/sortientry.c | 17 +++++------------ | |
| M src/cmd/venti/srv/stats.c | 2 +- | |
| M src/cmd/venti/srv/syncarena.c | 21 +++++++++++---------- | |
| M src/cmd/venti/srv/syncindex.c | 2 ++ | |
| M src/cmd/venti/srv/syncindex0.c | 16 +++++++++++++++- | |
| M src/cmd/venti/srv/unwhack.c | 2 +- | |
| M src/cmd/venti/srv/utils.c | 5 +++++ | |
| M src/cmd/venti/srv/venti.c | 6 ++++-- | |
| M src/cmd/venti/srv/verifyarena.c | 220 +++++++++++++++++++++++++----… | |
| M src/cmd/venti/srv/wrarena.c | 4 ++-- | |
| M src/cmd/venti/srv/zblock.c | 6 ++++-- | |
| M src/cmd/venti/srv/zeropart.c | 4 ---- | |
| 39 files changed, 4540 insertions(+), 361 deletions(-) | |
| --- | |
| diff --git a/src/cmd/venti/srv/arena.c b/src/cmd/venti/srv/arena.c | |
| t@@ -20,6 +20,7 @@ static void sumproc(void *); | |
| static QLock sumlock; | |
| static Rendez sumwait; | |
| static ASum *sumq; | |
| +static ASum *sumqtail; | |
| static uchar zero[8192]; | |
| int arenasumsleeptime; | |
| t@@ -257,7 +258,6 @@ writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n) | |
| if(m > n - nn) | |
| m = n - nn; | |
| memmove(&b->data[off], &clbuf[nn], m); | |
| - /* ok = writepart(arena->part, a, b->data, blocksize); */ | |
| ok = 0; | |
| putdblock(b); | |
| if(ok < 0){ | |
| t@@ -329,7 +329,6 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int s… | |
| if(m > n - nn) | |
| m = n - nn; | |
| memmove(&b->data[off], &clbuf[nn], m); | |
| - /* ok = writepart(arena->part, a, b->data, blocksize); */ | |
| ok = 0; | |
| putdblock(b); | |
| if(ok < 0){ | |
| t@@ -356,6 +355,7 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int s… | |
| arena->ctime = arena->wtime; | |
| writeclumpinfo(arena, clump, &c->info); | |
| + wbarena(arena); | |
| /* set up for call to setdcachestate */ | |
| as.arena = arena; | |
| t@@ -410,6 +410,9 @@ setatailstate(AState *as) | |
| trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa… | |
| + /* | |
| + * Look up as->arena to find index. | |
| + */ | |
| ix = mainindex; | |
| for(i=0; i<ix->narenas; i++) | |
| if(ix->arenas[i] == as->arena) | |
| t@@ -419,6 +422,9 @@ setatailstate(AState *as) | |
| return; | |
| } | |
| + /* | |
| + * Walk backward until we find the last time these were in sync. | |
| + */ | |
| for(j=i; --j>=0; ){ | |
| a = ix->arenas[j]; | |
| if(atailcmp(&a->diskstats, &a->memstats) == 0) | |
| t@@ -464,8 +470,12 @@ backsumarena(Arena *arena) | |
| return; | |
| qlock(&sumlock); | |
| as->arena = arena; | |
| - as->next = sumq; | |
| - sumq = as; | |
| + as->next = nil; | |
| + if(sumq) | |
| + sumqtail->next = as; | |
| + else | |
| + sumq = as; | |
| + sumqtail = as; | |
| rwakeup(&sumwait); | |
| qunlock(&sumlock); | |
| } | |
| t@@ -499,6 +509,7 @@ sumarena(Arena *arena) | |
| DigestState s; | |
| u64int a, e; | |
| u32int bs; | |
| + int t; | |
| u8int score[VtScoreSize]; | |
| bs = MaxIoSize; | |
| t@@ -512,7 +523,12 @@ sumarena(Arena *arena) | |
| b = alloczblock(bs, 0, arena->part->blocksize); | |
| e = arena->base + arena->size; | |
| for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a +… | |
| - sleep(arenasumsleeptime); | |
| + disksched(); | |
| + while((t=arenasumsleeptime) == SleepForever){ | |
| + sleep(1000); | |
| + disksched(); | |
| + } | |
| + sleep(t); | |
| if(a + bs > e) | |
| bs = arena->blocksize; | |
| if(readpart(arena->part, a, b->data, bs) < 0) | |
| t@@ -595,7 +611,7 @@ wbarenahead(Arena *arena) | |
| b = alloczblock(arena->blocksize, 1, arena->part->blocksize); | |
| if(b == nil){ | |
| logerr(EAdmin, "can't write arena header: %r"); | |
| -/*/ZZZ add error message? */ | |
| +/* ZZZ add error message? */ | |
| return -1; | |
| } | |
| /* | |
| t@@ -681,18 +697,22 @@ okarena(Arena *arena) | |
| ok = 0; | |
| dsize = arenadirsize(arena, arena->diskstats.clumps); | |
| if(arena->diskstats.used + dsize > arena->size){ | |
| - seterr(ECorrupt, "arena used > size"); | |
| + seterr(ECorrupt, "arena %s used > size", arena->name); | |
| ok = -1; | |
| } | |
| if(arena->diskstats.cclumps > arena->diskstats.clumps) | |
| - logerr(ECorrupt, "arena has more compressed clumps than total … | |
| + logerr(ECorrupt, "arena %s has more compressed clumps than tot… | |
| + /* | |
| + * This need not be true if some of the disk is corrupted. | |
| + * | |
| if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + ar… | |
| - logerr(ECorrupt, "arena uncompressed size inconsistent with us… | |
| + logerr(ECorrupt, "arena %s uncompressed size inconsistent with… | |
| + */ | |
| if(arena->ctime > arena->wtime) | |
| - logerr(ECorrupt, "arena creation time after last write time"); | |
| + logerr(ECorrupt, "arena %s creation time after last write time… | |
| return ok; | |
| } | |
| diff --git a/src/cmd/venti/srv/arenas.c b/src/cmd/venti/srv/arenas.c | |
| t@@ -214,7 +214,7 @@ wbarenapart(ArenaPart *ap) | |
| return -1; | |
| b = alloczblock(HeadSize, 1, 0); | |
| if(b == nil) | |
| -/*ZZZ set error message? */ | |
| +/* ZZZ set error message? */ | |
| return -1; | |
| if(packarenapart(ap, b->data) < 0){ | |
| t@@ -337,8 +337,8 @@ wbarenamap(AMap *am, int n, Part *part, u64int base, u64in… | |
| /* | |
| * amap: n '\n' amapelem * n | |
| * n: u32int | |
| - * amapelem: name '\t' astart '\t' asize '\n' | |
| - * astart, asize: u64int | |
| + * amapelem: name '\t' astart '\t' astop '\n' | |
| + * astart, astop: u64int | |
| */ | |
| int | |
| parseamap(IFile *f, AMapN *amn) | |
| diff --git a/src/cmd/venti/srv/bloom.c b/src/cmd/venti/srv/bloom.c | |
| t@@ -7,6 +7,8 @@ | |
| #include "dat.h" | |
| #include "fns.h" | |
| +int ignorebloom; | |
| + | |
| int | |
| bloominit(Bloom *b, vlong vsize, u8int *data) | |
| { | |
| t@@ -24,6 +26,7 @@ bloominit(Bloom *b, vlong vsize, u8int *data) | |
| if(unpackbloomhead(b, data) < 0) | |
| return -1; | |
| +fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash); | |
| b->mask = b->size-1; | |
| b->data = data; | |
| return 0; | |
| t@@ -38,11 +41,7 @@ wbbloomhead(Bloom *b) | |
| Bloom* | |
| readbloom(Part *p) | |
| { | |
| - int i, n; | |
| - uint ones; | |
| uchar buf[512]; | |
| - uchar *data; | |
| - u32int *a; | |
| Bloom *b; | |
| b = vtmallocz(sizeof *b); | |
| t@@ -52,14 +51,40 @@ readbloom(Part *p) | |
| vtfree(b); | |
| return nil; | |
| } | |
| + b->part = p; | |
| + return b; | |
| +} | |
| + | |
| +int | |
| +resetbloom(Bloom *b) | |
| +{ | |
| + uchar *data; | |
| + | |
| data = vtmallocz(b->size); | |
| - if(readpart(p, 0, data, b->size) < 0){ | |
| +fprint(2, "bloom data %lud\n", b->size); | |
| + b->data = data; | |
| + if(b->size == MaxBloomSize) /* 2^32 overflows ulong */ | |
| + addstat(StatBloomBits, b->size*8-1); | |
| + else | |
| + addstat(StatBloomBits, b->size*8); | |
| + return 0; | |
| +} | |
| + | |
| +int | |
| +loadbloom(Bloom *b) | |
| +{ | |
| + int i, n; | |
| + uint ones; | |
| + uchar *data; | |
| + u32int *a; | |
| + | |
| + data = vtmallocz(b->size); | |
| + if(readpart(b->part, 0, data, b->size) < 0){ | |
| vtfree(b); | |
| vtfree(data); | |
| - return nil; | |
| + return -1; | |
| } | |
| b->data = data; | |
| - b->part = p; | |
| a = (u32int*)b->data; | |
| n = b->size/4; | |
| t@@ -73,7 +98,7 @@ readbloom(Part *p) | |
| else | |
| addstat(StatBloomBits, b->size*8); | |
| - return b; | |
| + return 0; | |
| } | |
| int | |
| t@@ -101,6 +126,8 @@ gethashes(u8int *score, ulong *h) | |
| a ^= *(u32int*)(score+i); | |
| b ^= *(u32int*)(score+i+4); | |
| } | |
| + if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */ | |
| + a ^= *(u32int*)(score+i); | |
| for(i=0; i<BloomMaxHash; i++, a+=b) | |
| h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a; | |
| } | |
| t@@ -154,14 +181,17 @@ inbloomfilter(Bloom *b, u8int *score) | |
| int r; | |
| uint ms; | |
| - if(b == nil) | |
| + if(b == nil || b->data == nil) | |
| return 1; | |
| + if(ignorebloom) | |
| + return 1; | |
| + | |
| ms = msec(); | |
| rlock(&b->lk); | |
| r = _inbloomfilter(b, score); | |
| runlock(&b->lk); | |
| - ms = msec() - ms; | |
| + ms = ms - msec(); | |
| addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms); | |
| if(r) | |
| addstat(StatBloomMiss, 1); | |
| t@@ -173,7 +203,7 @@ inbloomfilter(Bloom *b, u8int *score) | |
| void | |
| markbloomfilter(Bloom *b, u8int *score) | |
| { | |
| - if(b == nil) | |
| + if(b == nil || b->data == nil) | |
| return; | |
| rlock(&b->lk); | |
| t@@ -186,14 +216,18 @@ markbloomfilter(Bloom *b, u8int *score) | |
| static void | |
| bloomwriteproc(void *v) | |
| { | |
| + int ret; | |
| Bloom *b; | |
| - | |
| + | |
| + threadsetname("bloomwriteproc"); | |
| b = v; | |
| for(;;){ | |
| recv(b->writechan, 0); | |
| - if(writebloom(b) < 0) | |
| + if((ret=writebloom(b)) < 0) | |
| fprint(2, "oops! writing bloom: %r\n"); | |
| - send(b->writedonechan, 0); | |
| + else | |
| + ret = 0; | |
| + sendul(b->writedonechan, ret); | |
| } | |
| } | |
| diff --git a/src/cmd/venti/srv/buildbuck.c b/src/cmd/venti/srv/buildbuck.c | |
| t@@ -21,7 +21,7 @@ initiestream(Part *part, u64int off, u64int clumps, u32int s… | |
| { | |
| IEStream *ies; | |
| -/*ZZZ out of memory? */ | |
| +/* out of memory? */ | |
| ies = MKZ(IEStream); | |
| ies->buf = MKN(u8int, size); | |
| ies->epos = ies->buf; | |
| t@@ -61,7 +61,7 @@ peekientry(IEStream *ies) | |
| nn -= n; | |
| if(nn == 0) | |
| return nil; | |
| -/*fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos); */ | |
| +//fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos); | |
| if(readpart(ies->part, ies->off, ies->epos, nn) < 0){ | |
| seterr(EOk, "can't read sorted index entries: %r"); | |
| return nil; | |
| t@@ -101,7 +101,7 @@ buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint ma… | |
| b = peekientry(ies); | |
| if(b == nil) | |
| return TWID32; | |
| -/*fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, i… | |
| +/* fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, … | |
| if(ib->n == 0) | |
| buck = iebuck(ix, b, ib, ies); | |
| else{ | |
| diff --git a/src/cmd/venti/srv/buildindex.c b/src/cmd/venti/srv/buildindex.c | |
| t@@ -1,164 +1,936 @@ | |
| /* | |
| - * Rebuild the Venti index from scratch. | |
| + * Rebuild the index from scratch, in place. | |
| */ | |
| - | |
| #include "stdinc.h" | |
| #include "dat.h" | |
| #include "fns.h" | |
| -/* | |
| - * Write a single bucket. Could profit from a big buffer here | |
| - * so that we can absorb sporadic runs of blocks into one write, | |
| - * avoiding disk seeks. | |
| - */ | |
| -static int | |
| -writebucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b) | |
| +enum | |
| { | |
| - ISect *is; | |
| + MinBufSize = 64*1024, | |
| + MaxBufSize = 4*1024*1024, | |
| +}; | |
| - is = ix->sects[indexsect0(ix, buck)]; | |
| - if(buck < is->start || buck >= is->stop){ | |
| - seterr(EAdmin, "cannot find index section for bucket %lud\n", … | |
| - return -1; | |
| - } | |
| - buck -= is->start; | |
| +int dumb; | |
| +int errors; | |
| +char **isect; | |
| +int nisect; | |
| +int bloom; | |
| +int zero; | |
| -/* | |
| - qlock(&stats.lock); | |
| - stats.indexwrites++; | |
| - qunlock(&stats.lock); | |
| -*/ | |
| - packibucket(ib, b->data, is->bucketmagic); | |
| - return writepart(is->part, is->blockbase + ((u64int)buck << is->blockl… | |
| -} | |
| +u32int isectmem; | |
| +u64int totalbuckets; | |
| +u64int totalclumps; | |
| +Channel *arenadonechan; | |
| +Channel *isectdonechan; | |
| +Index *ix; | |
| -static int | |
| -buildindex(Index *ix, Part *part, u64int off, u64int clumps, int zero) | |
| -{ | |
| - IEStream *ies; | |
| - IBucket ib, zib; | |
| - ZBlock *z, *b; | |
| - u32int next, buck; | |
| - int ok; | |
| - uint nbuck; | |
| - u64int found = 0; | |
| - | |
| -/*ZZZ make buffer size configurable */ | |
| - b = alloczblock(ix->blocksize, 0, ix->blocksize); | |
| - z = alloczblock(ix->blocksize, 1, ix->blocksize); | |
| - ies = initiestream(part, off, clumps, 64*1024); | |
| - if(b == nil || z == nil || ies == nil){ | |
| - ok = 0; | |
| - goto breakout; | |
| - return -1; | |
| - } | |
| - ok = 0; | |
| - next = 0; | |
| - memset(&ib, 0, sizeof ib); | |
| - ib.data = b->data + IBucketSize; | |
| - zib.data = z->data + IBucketSize; | |
| - zib.n = 0; | |
| - nbuck = 0; | |
| - for(;;){ | |
| - buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize); | |
| - found += ib.n; | |
| - if(zero){ | |
| - for(; next != buck; next++){ | |
| - if(next == ix->buckets){ | |
| - if(buck != TWID32){ | |
| - fprint(2, "bucket out of range… | |
| - ok = -1; | |
| - } | |
| - goto breakout; | |
| - } | |
| - if(writebucket(ix, next, &zib, z) < 0){ | |
| - fprint(2, "can't write zero bucket to … | |
| - ok = -1; | |
| - } | |
| - } | |
| - } | |
| - if(buck >= ix->buckets){ | |
| - if(buck == TWID32) | |
| - break; | |
| - fprint(2, "bucket out of range\n"); | |
| - ok = -1; | |
| - goto breakout; | |
| - } | |
| - if(writebucket(ix, buck, &ib, b) < 0){ | |
| - fprint(2, "bad bucket found=%lld: %r\n", found); | |
| - ok = -1; | |
| - } | |
| - next = buck + 1; | |
| - if(++nbuck%10000 == 0) | |
| - fprint(2, "\t%,d buckets written...\n", nbuck); | |
| - } | |
| -breakout:; | |
| - fprint(2, "wrote index with %lld entries\n", found); | |
| - freeiestream(ies); | |
| - freezblock(z); | |
| - freezblock(b); | |
| - return ok; | |
| -} | |
| +u64int arenaentries; | |
| +u64int skipentries; | |
| +u64int indexentries; | |
| + | |
| +static int shouldprocess(ISect*); | |
| +static void isectproc(void*); | |
| +static void arenapartproc(void*); | |
| void | |
| usage(void) | |
| { | |
| - fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n… | |
| - threadexitsall(0); | |
| + fprint(2, "usage: buildindex [-b] [-i isect]... [-M imem] venti.conf\n… | |
| + threadexitsall("usage"); | |
| } | |
| -Config conf; | |
| - | |
| void | |
| threadmain(int argc, char *argv[]) | |
| { | |
| - Part *part; | |
| - u64int clumps, base; | |
| - u32int bcmem; | |
| - int zero; | |
| - | |
| - zero = 1; | |
| - bcmem = 0; | |
| + int fd, i, napart; | |
| + u32int bcmem, imem; | |
| + Config conf; | |
| + Part *p; | |
| + | |
| ventifmtinstall(); | |
| + imem = 256*1024*1024; | |
| ARGBEGIN{ | |
| - case 'B': | |
| - bcmem = unittoull(ARGF()); | |
| + case 'b': | |
| + bloom = 1; | |
| + break; | |
| + case 'i': | |
| + isect = vtrealloc(isect, (nisect+1)*sizeof(isect[0])); | |
| + isect[nisect++] = EARGF(usage()); | |
| break; | |
| - case 'Z': | |
| - zero = 0; | |
| + case 'd': /* debugging - make sure to run all 3 passes */ | |
| + dumb = 1; | |
| + break; | |
| + case 'M': | |
| + imem = unittoull(EARGF(usage())); | |
| break; | |
| default: | |
| usage(); | |
| break; | |
| }ARGEND | |
| - | |
| - if(argc != 2) | |
| + | |
| + if(argc != 1) | |
| usage(); | |
| if(initventi(argv[0], &conf) < 0) | |
| sysfatal("can't init venti: %r"); | |
| + ix = mainindex; | |
| + if(nisect == 0 && ix->bloom) | |
| + bloom = 1; | |
| + if(bloom && ix->bloom && resetbloom(ix->bloom) < 0) | |
| + sysfatal("loadbloom: %r"); | |
| + if(bloom && !ix->bloom) | |
| + sysfatal("-b specified but no bloom filter"); | |
| + if(!bloom) | |
| + ix->bloom = nil; | |
| + isectmem = imem/ix->nsects; | |
| - if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 … | |
| - bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects… | |
| + /* | |
| + * safety first - only need read access to arenas | |
| + */ | |
| + p = nil; | |
| + for(i=0; i<ix->narenas; i++){ | |
| + if(ix->arenas[i]->part != p){ | |
| + p = ix->arenas[i]->part; | |
| + if((fd = open(p->filename, OREAD)) < 0) | |
| + sysfatal("cannot reopen %s: %r", p->filename); | |
| + dup(fd, p->fd); | |
| + close(fd); | |
| + } | |
| + } | |
| + | |
| + /* | |
| + * need a block for every arena | |
| + */ | |
| + bcmem = maxblocksize * (mainindex->narenas + 16); | |
| if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem); | |
| initdcache(bcmem); | |
| + | |
| + totalclumps = 0; | |
| + for(i=0; i<ix->narenas; i++) | |
| + totalclumps += ix->arenas[i]->diskstats.clumps; | |
| + | |
| + totalbuckets = 0; | |
| + for(i=0; i<ix->nsects; i++) | |
| + totalbuckets += ix->sects[i]->blocks; | |
| + fprint(2, "%,lld clumps, %,lld buckets\n", totalclumps, totalbuckets); | |
| + | |
| + /* start index procs */ | |
| + fprint(2, "%T read index\n"); | |
| + isectdonechan = chancreate(sizeof(void*), 0); | |
| + for(i=0; i<ix->nsects; i++){ | |
| + if(shouldprocess(ix->sects[i])) | |
| + ix->sects[i]->writechan = chancreate(sizeof(IEntry), 0… | |
| + vtproc(isectproc, ix->sects[i]); | |
| + } | |
| + | |
| + for(i=0; i<nisect; i++) | |
| + if(isect[i]) | |
| + fprint(2, "warning: did not find index section %s\n", … | |
| + | |
| + /* start arena procs */ | |
| + p = nil; | |
| + napart = 0; | |
| + arenadonechan = chancreate(sizeof(void*), 0); | |
| + for(i=0; i<ix->narenas; i++){ | |
| + if(ix->arenas[i]->part != p){ | |
| + p = ix->arenas[i]->part; | |
| + vtproc(arenapartproc, p); | |
| + napart++; | |
| + } | |
| + } | |
| + | |
| + /* wait for arena procs to finish */ | |
| + for(i=0; i<napart; i++) | |
| + recvp(arenadonechan); | |
| + | |
| + /* tell index procs to finish */ | |
| + for(i=0; i<ix->nsects; i++) | |
| + if(ix->sects[i]->writechan) | |
| + send(ix->sects[i]->writechan, nil); | |
| + | |
| + /* wait for index procs to finish */ | |
| + for(i=0; i<ix->nsects; i++) | |
| + if(ix->sects[i]->writechan) | |
| + recvp(isectdonechan); | |
| + | |
| + if(ix->bloom && writebloom(ix->bloom) < 0) | |
| + fprint(2, "writing bloom filter: %r\n"); | |
| + | |
| + fprint(2, "%T done arenaentries=%,lld indexed=%,lld (nskip=%,lld)\n", | |
| + arenaentries, indexentries, skipentries); | |
| + threadexitsall(nil); | |
| +} | |
| + | |
| +static int | |
| +shouldprocess(ISect *is) | |
| +{ | |
| + int i; | |
| + | |
| + if(nisect == 0) | |
| + return 1; | |
| + | |
| + for(i=0; i<nisect; i++) | |
| + if(isect[i] && strcmp(isect[i], is->name) == 0){ | |
| + isect[i] = nil; | |
| + return 1; | |
| + } | |
| + return 0; | |
| +} | |
| + | |
| +static void | |
| +add(u64int *a, u64int n) | |
| +{ | |
| + static Lock l; | |
| + | |
| + lock(&l); | |
| + *a += n; | |
| + unlock(&l); | |
| +} | |
| + | |
| +/* | |
| + * Read through an arena partition and send each of its IEntries | |
| + * to the appropriate index section. When finished, send on | |
| + * arenadonechan. | |
| + */ | |
| +enum | |
| +{ | |
| + ClumpChunks = 32*1024, | |
| +}; | |
| +static void | |
| +arenapartproc(void *v) | |
| +{ | |
| + int i, j, n, nskip, x; | |
| + u32int clump; | |
| + u64int addr, tot; | |
| + Arena *a; | |
| + ClumpInfo *ci, *cis; | |
| + IEntry ie; | |
| + Part *p; | |
| + | |
| + p = v; | |
| + threadsetname("arenaproc %s", p->name); | |
| + | |
| + nskip = 0; | |
| + tot = 0; | |
| + cis = MKN(ClumpInfo, ClumpChunks); | |
| + for(i=0; i<ix->narenas; i++){ | |
| + a = ix->arenas[i]; | |
| + if(a->part != p) | |
| + continue; | |
| + if(a->memstats.clumps) | |
| + fprint(2, "%T arena %s: %d entries\n", | |
| + a->name, a->memstats.clumps); | |
| + addr = ix->amap[i].start; | |
| + for(clump=0; clump<a->memstats.clumps; clump+=n){ | |
| + n = ClumpChunks; | |
| + if(n > a->memstats.clumps - clump) | |
| + n = a->memstats.clumps - clump; | |
| + if(readclumpinfos(a, clump, cis, n) != n){ | |
| + fprint(2, "%T arena %s: directory read: %r\n",… | |
| + errors = 1; | |
| + break; | |
| + } | |
| + for(j=0; j<n; j++){ | |
| + ci = &cis[j]; | |
| + ie.ia.type = ci->type; | |
| + ie.ia.size = ci->uncsize; | |
| + ie.ia.addr = addr; | |
| + addr += ci->size + ClumpSize; | |
| + ie.ia.blocks = (ci->size + ClumpSize + (1<<ABl… | |
| + scorecp(ie.score, ci->score); | |
| + if(ci->type == VtCorruptType) | |
| + nskip++; | |
| + else{ | |
| + tot++; | |
| + x = indexsect(ix, ie.score); | |
| + assert(0 <= x && x < ix->nsects); | |
| + if(ix->sects[x]->writechan) | |
| + send(ix->sects[x]->writechan, … | |
| + if(ix->bloom) | |
| + markbloomfilter(ix->bloom, ie.… | |
| + } | |
| + } | |
| + } | |
| + } | |
| + add(&arenaentries, tot); | |
| + add(&skipentries, nskip); | |
| + sendp(arenadonechan, p); | |
| +} | |
| + | |
| +/* | |
| + * Convert score into relative bucket number in isect. | |
| + * Can pass a packed ientry instead of score - score is first. | |
| + */ | |
| +static u32int | |
| +score2bucket(ISect *is, uchar *score) | |
| +{ | |
| + u32int b; | |
| + | |
| + b = hashbits(score, 32)/ix->div; | |
| + assert(is->start <= b && b < is->stop); | |
| + return b - is->start; | |
| +} | |
| + | |
| +/* | |
| + * Convert offset in index section to bucket number. | |
| + */ | |
| +static u32int | |
| +offset2bucket(ISect *is, u64int offset) | |
| +{ | |
| + u32int b; | |
| + | |
| + assert(is->blockbase <= offset); | |
| + offset -= is->blockbase; | |
| + b = offset/is->blocksize; | |
| + assert(b < is->stop-is->start); | |
| + return b; | |
| +} | |
| + | |
| +/* | |
| + * Convert bucket number to offset. | |
| + */ | |
| +static u64int | |
| +bucket2offset(ISect *is, u32int b) | |
| +{ | |
| + assert(b <= is->stop-is->start); | |
| + return is->blockbase + (u64int)b*is->blocksize; | |
| +} | |
| + | |
| +/* | |
| + * IEntry buffers to hold initial round of spraying. | |
| + */ | |
| +typedef struct Buf Buf; | |
| +struct Buf | |
| +{ | |
| + Part *part; /* partition being written */ | |
| + uchar *bp; /* current block */ | |
| + uchar *ep; /* end of block */ | |
| + uchar *wp; /* write position in block */ | |
| + u64int boffset; /* start offset */ | |
| + u64int woffset; /* next write offset */ | |
| + u64int eoffset; /* end offset */ | |
| + u32int nentry; /* number of entries written */ | |
| +}; | |
| + | |
| +static void | |
| +bflush(Buf *buf) | |
| +{ | |
| + u32int bufsize; | |
| + | |
| + if(buf->woffset >= buf->eoffset) | |
| + sysfatal("buf index chunk overflow - need bufger index"); | |
| + bufsize = buf->ep - buf->bp; | |
| + if(writepart(buf->part, buf->woffset, buf->bp, bufsize) < 0){ | |
| + fprint(2, "write %s: %r\n", buf->part->name); | |
| + errors = 1; | |
| + } | |
| + buf->woffset += bufsize; | |
| + memset(buf->bp, 0, bufsize); | |
| + buf->wp = buf->bp; | |
| +} | |
| + | |
| +static void | |
| +bwrite(Buf *buf, IEntry *ie) | |
| +{ | |
| + if(buf->wp+IEntrySize > buf->ep) | |
| + bflush(buf); | |
| + assert(buf->bp <= buf->wp && buf->wp < buf->ep); | |
| + packientry(ie, buf->wp); | |
| + buf->wp += IEntrySize; | |
| + assert(buf->bp <= buf->wp && buf->wp <= buf->ep); | |
| + buf->nentry++; | |
| +} | |
| + | |
| +/* | |
| + * Minibuffer. In-memory data structure holds our place | |
| + * in the buffer but has no block data. We are writing and | |
| + * reading the minibuffers at the same time. (Careful!) | |
| + */ | |
| +typedef struct Minibuf Minibuf; | |
| +struct Minibuf | |
| +{ | |
| + u64int boffset; /* start offset */ | |
| + u64int roffset; /* read offset */ | |
| + u64int woffset; /* write offset */ | |
| + u64int eoffset; /* end offset */ | |
| + u32int nentry; /* # entries left to read */ | |
| + u32int nwentry; /* # entries written */ | |
| +}; | |
| + | |
| +/* | |
| + * Index entry pool. Used when trying to shuffle around | |
| + * the entries in a big buffer into the corresponding M minibuffers. | |
| + * Sized to hold M*EntriesPerBlock entries, so that there will always | |
| + * either be room in the pool for another block worth of entries | |
| + * or there will be an entire block worth of sorted entries to | |
| + * write out. | |
| + */ | |
| +typedef struct IEntryLink IEntryLink; | |
| +typedef struct IPool IPool; | |
| + | |
| +struct IEntryLink | |
| +{ | |
| + uchar ie[IEntrySize]; /* raw IEntry */ | |
| + IEntryLink *next; /* next in chain */ | |
| +}; | |
| + | |
| +struct IPool | |
| +{ | |
| + ISect *isect; | |
| + u32int buck0; /* first bucket in pool */ | |
| + u32int mbufbuckets; /* buckets per minibuf */ | |
| + IEntryLink *entry; /* all IEntryLinks */ | |
| + u32int nentry; /* # of IEntryLinks */ | |
| + IEntryLink *free; /* free list */ | |
| + u32int nfree; /* # on free list */ | |
| + Minibuf *mbuf; /* all minibufs */ | |
| + u32int nmbuf; /* # of minibufs */ | |
| + IEntryLink **mlist; /* lists for each minibuf */ | |
| + u32int *mcount; /* # on each mlist[i] */ | |
| + u32int bufsize; /* block buffer size */ | |
| + uchar *rbuf; /* read buffer */ | |
| + uchar *wbuf; /* write buffer */ | |
| + u32int epbuf; /* entries per block buffer */ | |
| +}; | |
| + | |
| +/* | |
| +static int | |
| +countsokay(IPool *p) | |
| +{ | |
| + int i; | |
| + u64int n; | |
| + | |
| + n = 0; | |
| + for(i=0; i<p->nmbuf; i++) | |
| + n += p->mcount[i]; | |
| + n += p->nfree; | |
| + if(n != p->nentry){ | |
| + print("free %ud:", p->nfree); | |
| + for(i=0; i<p->nmbuf; i++) | |
| + print(" %ud", p->mcount[i]); | |
| + print(" = %lld nentry: %ud\n", n, p->nentry); | |
| + } | |
| + return n == p->nentry; | |
| +} | |
| +*/ | |
| - fprint(2, "building a new index %s using %s for temporary storage\n", … | |
| +static IPool* | |
| +mkipool(ISect *isect, Minibuf *mbuf, u32int nmbuf, | |
| + u32int mbufbuckets, u32int bufsize) | |
| +{ | |
| + u32int i, nentry; | |
| + uchar *data; | |
| + IPool *p; | |
| + IEntryLink *l; | |
| + | |
| + nentry = (nmbuf+1)*bufsize / IEntrySize; | |
| + p = ezmalloc(sizeof(IPool) | |
| + +nentry*sizeof(IEntry) | |
| + +nmbuf*sizeof(IEntryLink*) | |
| + +nmbuf*sizeof(u32int) | |
| + +3*bufsize); | |
| + | |
| + p->isect = isect; | |
| + p->mbufbuckets = mbufbuckets; | |
| + p->bufsize = bufsize; | |
| + p->entry = (IEntryLink*)(p+1); | |
| + p->nentry = nentry; | |
| + p->mlist = (IEntryLink**)(p->entry+nentry); | |
| + p->mcount = (u32int*)(p->mlist+nmbuf); | |
| + p->nmbuf = nmbuf; | |
| + p->mbuf = mbuf; | |
| + data = (uchar*)(p->mcount+nmbuf); | |
| + data += bufsize - (u32int)data%bufsize; | |
| + p->rbuf = data; | |
| + p->wbuf = data+bufsize; | |
| + p->epbuf = bufsize/IEntrySize; | |
| - part = initpart(argv[1], ORDWR|ODIRECT); | |
| - if(part == nil) | |
| - sysfatal("can't initialize temporary partition: %r"); | |
| + for(i=0; i<p->nentry; i++){ | |
| + l = &p->entry[i]; | |
| + l->next = p->free; | |
| + p->free = l; | |
| + p->nfree++; | |
| + } | |
| + return p; | |
| +} | |
| - clumps = sortrawientries(mainindex, part, &base, mainindex->bloom); | |
| - if(clumps == TWID64) | |
| - sysfatal("can't build sorted index: %r"); | |
| - fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", … | |
| +/* | |
| + * Add the index entry ie to the pool p. | |
| + * Caller must know there is room. | |
| + */ | |
| +static void | |
| +ipoolinsert(IPool *p, uchar *ie) | |
| +{ | |
| + u32int buck, x; | |
| + IEntryLink *l; | |
| + | |
| + assert(p->free != nil); | |
| + | |
| + buck = score2bucket(p->isect, ie); | |
| + x = (buck-p->buck0) / p->mbufbuckets; | |
| + if(x >= p->nmbuf){ | |
| + fprint(2, "buck=%ud mbufbucket=%ud x=%ud\n", | |
| + buck, p->mbufbuckets, x); | |
| + } | |
| + assert(x < p->nmbuf); | |
| - if(buildindex(mainindex, part, base, clumps, zero) < 0) | |
| - sysfatal("can't build new index: %r"); | |
| + l = p->free; | |
| + p->free = l->next; | |
| + p->nfree--; | |
| + memmove(l->ie, ie, IEntrySize); | |
| + l->next = p->mlist[x]; | |
| + p->mlist[x] = l; | |
| + p->mcount[x]++; | |
| +} | |
| + | |
| +/* | |
| + * Pull out a block containing as many | |
| + * entries as possible for minibuffer x. | |
| + */ | |
| +static u32int | |
| +ipoolgetbuf(IPool *p, u32int x) | |
| +{ | |
| + uchar *bp, *ep, *wp; | |
| + IEntryLink *l; | |
| + u32int n; | |
| + | |
| + bp = p->wbuf; | |
| + ep = p->wbuf + p->bufsize; | |
| + n = 0; | |
| + assert(x < p->nmbuf); | |
| + for(wp=bp; wp+IEntrySize<=ep && p->mlist[x]; wp+=IEntrySize){ | |
| + l = p->mlist[x]; | |
| + p->mlist[x] = l->next; | |
| + p->mcount[x]--; | |
| + memmove(wp, l->ie, IEntrySize); | |
| + l->next = p->free; | |
| + p->free = l; | |
| + p->nfree++; | |
| + n++; | |
| + } | |
| + memset(wp, 0, ep-wp); | |
| + return n; | |
| +} | |
| + | |
| +/* | |
| + * Read a block worth of entries from the minibuf | |
| + * into the pool. Caller must know there is room. | |
| + */ | |
| +static void | |
| +ipoolloadblock(IPool *p, Minibuf *mb) | |
| +{ | |
| + u32int i, n; | |
| - if(mainindex->bloom) | |
| - writebloom(mainindex->bloom); | |
| + assert(mb->nentry > 0); | |
| + assert(mb->roffset >= mb->woffset); | |
| + assert(mb->roffset < mb->eoffset); | |
| - threadexitsall(0); | |
| + n = p->bufsize/IEntrySize; | |
| + if(n > mb->nentry) | |
| + n = mb->nentry; | |
| + if(readpart(p->isect->part, mb->roffset, p->rbuf, p->bufsize) < 0) | |
| + fprint(2, "readpart %s: %r\n", p->isect->part->name); | |
| + else{ | |
| + for(i=0; i<n; i++) | |
| + ipoolinsert(p, p->rbuf+i*IEntrySize); | |
| + } | |
| + mb->nentry -= n; | |
| + mb->roffset += p->bufsize; | |
| } | |
| + | |
| +/* | |
| + * Write out a block worth of entries to minibuffer x. | |
| + * If necessary, pick up the data there before overwriting it. | |
| + */ | |
| +static void | |
| +ipoolflush0(IPool *pool, u32int x) | |
| +{ | |
| + u32int bufsize; | |
| + Minibuf *mb; | |
| + | |
| + mb = pool->mbuf+x; | |
| + bufsize = pool->bufsize; | |
| + mb->nwentry += ipoolgetbuf(pool, x); | |
| + if(mb->nentry > 0 && mb->roffset == mb->woffset){ | |
| + assert(pool->nfree >= pool->bufsize/IEntrySize); | |
| + /* | |
| + * There will be room in the pool -- we just | |
| + * removed a block worth. | |
| + */ | |
| + ipoolloadblock(pool, mb); | |
| + } | |
| + if(writepart(pool->isect->part, mb->woffset, pool->wbuf, bufsize) < 0) | |
| + fprint(2, "writepart %s: %r\n", pool->isect->part->name); | |
| + mb->woffset += bufsize; | |
| +} | |
| + | |
| +/* | |
| + * Write out some full block of entries. | |
| + * (There must be one -- the pool is almost full!) | |
| + */ | |
| +static void | |
| +ipoolflush1(IPool *pool) | |
| +{ | |
| + u32int i; | |
| + | |
| + assert(pool->nfree <= pool->epbuf); | |
| + | |
| + for(i=0; i<pool->nmbuf; i++){ | |
| + if(pool->mcount[i] >= pool->epbuf){ | |
| + ipoolflush0(pool, i); | |
| + return; | |
| + } | |
| + } | |
| + /* can't be reached - someone must be full */ | |
| + sysfatal("ipoolflush1"); | |
| +} | |
| + | |
| +/* | |
| + * Flush all the entries in the pool out to disk. | |
| + * Nothing more to read from disk. | |
| + */ | |
| +static void | |
| +ipoolflush(IPool *pool) | |
| +{ | |
| + u32int i; | |
| + | |
| + for(i=0; i<pool->nmbuf; i++) | |
| + while(pool->mlist[i]) | |
| + ipoolflush0(pool, i); | |
| + assert(pool->nfree == pool->nentry); | |
| +} | |
| + | |
| +/* | |
| + * Third pass. Pick up each minibuffer from disk into | |
| + * memory and then write out the buckets. | |
| + */ | |
| + | |
| +/* | |
| + * Compare two packed index entries. | |
| + * Usual ordering except break ties by putting higher | |
| + * index addresses first (assumes have duplicates | |
| + * due to corruption in the lower addresses). | |
| + */ | |
| +static int | |
| +ientrycmpaddr(const void *va, const void *vb) | |
| +{ | |
| + int i; | |
| + uchar *a, *b; | |
| + | |
| + a = (uchar*)va; | |
| + b = (uchar*)vb; | |
| + i = ientrycmp(a, b); | |
| + if(i) | |
| + return i; | |
| + return -memcmp(a+IEntryAddrOff, b+IEntryAddrOff, 8); | |
| +} | |
| + | |
| +static void | |
| +zerorange(Part *p, u64int o, u64int e) | |
| +{ | |
| + static uchar zero[MaxIoSize]; | |
| + u32int n; | |
| + | |
| + for(; o<e; o+=n){ | |
| + n = sizeof zero; | |
| + if(o+n > e) | |
| + n = e-o; | |
| + if(writepart(p, o, zero, n) < 0) | |
| + fprint(2, "writepart %s: %r\n", p->name); | |
| + } | |
| +} | |
| + | |
| +/* | |
| + * Load a minibuffer into memory and write out the | |
| + * corresponding buckets. | |
| + */ | |
| +static void | |
| +sortminibuffer(ISect *is, Minibuf *mb, uchar *buf, u32int nbuf, u32int bufsize) | |
| +{ | |
| + uchar *buckdata, *p, *q, *ep; | |
| + u32int b, lastb, memsize, n; | |
| + u64int o; | |
| + IBucket ib; | |
| + Part *part; | |
| + | |
| + part = is->part; | |
| + buckdata = emalloc(is->blocksize); | |
| + | |
| + if(mb->nwentry == 0) | |
| + return; | |
| + | |
| + /* | |
| + * read entire buffer. | |
| + */ | |
| + assert(mb->nwentry*IEntrySize <= mb->woffset-mb->boffset); | |
| + assert(mb->woffset-mb->boffset <= nbuf); | |
| + if(readpart(part, mb->boffset, buf, mb->woffset-mb->boffset) < 0){ | |
| + fprint(2, "readpart %s: %r\n", part->name); | |
| + errors = 1; | |
| + return; | |
| + } | |
| + assert(*(uint*)buf != 0xa5a5a5a5); | |
| + | |
| + /* | |
| + * remove fragmentation due to IEntrySize | |
| + * not evenly dividing Bufsize | |
| + */ | |
| + memsize = (bufsize/IEntrySize)*IEntrySize; | |
| + for(o=mb->boffset, p=q=buf; o<mb->woffset; o+=bufsize){ | |
| + memmove(p, q, memsize); | |
| + p += memsize; | |
| + q += bufsize; | |
| + } | |
| + ep = buf + mb->nwentry*IEntrySize; | |
| + assert(ep <= buf+nbuf); | |
| + | |
| + /* | |
| + * sort entries | |
| + */ | |
| + qsort(buf, mb->nwentry, IEntrySize, ientrycmpaddr); | |
| + | |
| + /* | |
| + * write buckets out | |
| + */ | |
| + n = 0; | |
| + lastb = offset2bucket(is, mb->boffset); | |
| + for(p=buf; p<ep; p=q){ | |
| + b = score2bucket(is, p); | |
| + for(q=p; q<ep && score2bucket(is, q)==b; q+=IEntrySize) | |
| + ; | |
| + if(lastb+1 < b && zero) | |
| + zerorange(part, bucket2offset(is, lastb+1), bucket2off… | |
| + if(IBucketSize+(q-p) > is->blocksize) | |
| + sysfatal("bucket overflow - make index bigger"); | |
| + memmove(buckdata+IBucketSize, p, q-p); | |
| + ib.n = (q-p)/IEntrySize; | |
| + n += ib.n; | |
| + packibucket(&ib, buckdata, is->bucketmagic); | |
| + if(writepart(part, bucket2offset(is, b), buckdata, is->blocksi… | |
| + fprint(2, "write %s: %r\n", part->name); | |
| + lastb = b; | |
| + } | |
| + if(lastb+1 < is->stop-is->start && zero) | |
| + zerorange(part, bucket2offset(is, lastb+1), bucket2offset(is, … | |
| + | |
| + if(n != mb->nwentry) | |
| + fprint(2, "sortminibuffer bug: n=%ud nwentry=%ud have=%ld\n", … | |
| + | |
| + free(buckdata); | |
| +} | |
| + | |
| +static void | |
| +isectproc(void *v) | |
| +{ | |
| + u32int buck, bufbuckets, bufsize, epbuf, i, j; | |
| + u32int mbufbuckets, n, nbucket, nn, space; | |
| + u32int nbuf, nminibuf, xminiclump, prod; | |
| + u64int blocksize, offset, xclump; | |
| + uchar *data, *p; | |
| + Buf *buf; | |
| + IEntry ie; | |
| + IPool *ipool; | |
| + ISect *is; | |
| + Minibuf *mbuf, *mb; | |
| + | |
| + is = v; | |
| + blocksize = is->blocksize; | |
| + nbucket = is->stop - is->start; | |
| + | |
| + /* | |
| + * Three passes: | |
| + * pass 1 - write index entries from arenas into | |
| + * large sequential sections on index disk. | |
| + * requires nbuf * bufsize memory. | |
| + * | |
| + * pass 2 - split each section into minibufs. | |
| + * requires nminibuf * bufsize memory. | |
| + * | |
| + * pass 3 - read each minibuf into memory and | |
| + * write buckets out. | |
| + * requires entries/minibuf * IEntrySize memory. | |
| + * | |
| + * The larger we set bufsize the less seeking hurts us. | |
| + * | |
| + * The fewer sections and minibufs we have, the less | |
| + * seeking hurts us. | |
| + * | |
| + * The fewer sections and minibufs we have, the | |
| + * more entries we end up with in each minibuf | |
| + * at the end. | |
| + * | |
| + * Shoot for using half our memory to hold each | |
| + * minibuf. The chance of a random distribution | |
| + * getting off by 2x is quite low. | |
| + * | |
| + * Once that is decided, figure out the smallest | |
| + * nminibuf and nsection/biggest bufsize we can use | |
| + * and still fit in the memory constraints. | |
| + */ | |
| + | |
| + /* expected number of clump index entries we'll see */ | |
| + xclump = nbucket * (double)totalclumps/totalbuckets; | |
| + | |
| + /* number of clumps we want to see in a minibuf */ | |
| + xminiclump = isectmem/2/IEntrySize; | |
| + | |
| + /* total number of minibufs we need */ | |
| + prod = xclump / xminiclump; | |
| + | |
| + /* if possible, skip second pass */ | |
| + if(!dumb && prod*MinBufSize < isectmem){ | |
| + nbuf = prod; | |
| + nminibuf = 1; | |
| + }else{ | |
| + /* otherwise use nsection = sqrt(nmini) */ | |
| + for(nbuf=1; nbuf*nbuf<prod; nbuf++) | |
| + ; | |
| + if(nbuf*MinBufSize > isectmem) | |
| + sysfatal("not enough memory"); | |
| + nminibuf = nbuf; | |
| + } | |
| + /* size buffer to use extra memory */ | |
| + bufsize = MinBufSize; | |
| + while(bufsize*2*nbuf <= isectmem && bufsize < MaxBufSize) | |
| + bufsize *= 2; | |
| + data = emalloc(nbuf*bufsize); | |
| + epbuf = bufsize/IEntrySize; | |
| + | |
| + fprint(2, "%T %s: %,ud buckets, %,ud groups, %,ud minigroups, %,ud buf… | |
| + is->part->name, nbucket, nbuf, nminibuf, bufsize); | |
| + /* | |
| + * Accept index entries from arena procs. | |
| + */ | |
| + buf = MKNZ(Buf, nbuf); | |
| + p = data; | |
| + offset = is->blockbase; | |
| + bufbuckets = (nbucket+nbuf-1)/nbuf; | |
| + for(i=0; i<nbuf; i++){ | |
| + buf[i].part = is->part; | |
| + buf[i].bp = p; | |
| + buf[i].wp = p; | |
| + p += bufsize; | |
| + buf[i].ep = p; | |
| + buf[i].boffset = offset; | |
| + buf[i].woffset = offset; | |
| + if(i < nbuf-1){ | |
| + offset += bufbuckets*blocksize; | |
| + buf[i].eoffset = offset; | |
| + }else{ | |
| + offset = is->blockbase + nbucket*blocksize; | |
| + buf[i].eoffset = offset; | |
| + } | |
| + } | |
| + assert(p == data+nbuf*bufsize); | |
| + | |
| + n = 0; | |
| + while(recv(is->writechan, &ie) == 1){ | |
| + if(ie.ia.addr == 0) | |
| + break; | |
| + buck = score2bucket(is, ie.score); | |
| + i = buck/bufbuckets; | |
| + assert(i < nbuf); | |
| + bwrite(&buf[i], &ie); | |
| + n++; | |
| + } | |
| + add(&indexentries, n); | |
| + | |
| + nn = 0; | |
| + for(i=0; i<nbuf; i++){ | |
| + bflush(&buf[i]); | |
| + buf[i].bp = nil; | |
| + buf[i].ep = nil; | |
| + buf[i].wp = nil; | |
| + nn += buf[i].nentry; | |
| + } | |
| + if(n != nn) | |
| + fprint(2, "isectproc bug: n=%ud nn=%ud\n", n, nn); | |
| + | |
| + free(data); | |
| + | |
| + fprint(2, "%T %s: reordering\n", is->part->name); | |
| + | |
| + /* | |
| + * Rearrange entries into minibuffers and then | |
| + * split each minibuffer into buckets. | |
| + */ | |
| + mbuf = MKN(Minibuf, nminibuf); | |
| + mbufbuckets = (bufbuckets+nminibuf-1)/nminibuf; | |
| + for(i=0; i<nbuf; i++){ | |
| + /* | |
| + * Set up descriptors. | |
| + */ | |
| + n = buf[i].nentry; | |
| + nn = 0; | |
| + offset = buf[i].boffset; | |
| + memset(mbuf, 0, nminibuf*sizeof(mbuf[0])); | |
| + for(j=0; j<nminibuf; j++){ | |
| + mb = &mbuf[j]; | |
| + mb->boffset = offset; | |
| + if(j < nminibuf-1){ | |
| + offset += mbufbuckets*blocksize; | |
| + mb->eoffset = offset; | |
| + }else | |
| + mb->eoffset = buf[i].eoffset; | |
| + mb->roffset = mb->boffset; | |
| + mb->woffset = mb->boffset; | |
| + mb->nentry = epbuf * (mb->eoffset - mb->boffset)/bufsi… | |
| + if(mb->nentry > buf[i].nentry) | |
| + mb->nentry = buf[i].nentry; | |
| + buf[i].nentry -= mb->nentry; | |
| + nn += mb->nentry; | |
| + } | |
| + if(n != nn) | |
| + fprint(2, "isectproc bug2: n=%ud nn=%ud (i=%d)\n", n, … | |
| + /* | |
| + * Rearrange. | |
| + */ | |
| + if(!dumb && nminibuf == 1){ | |
| + mbuf[0].nwentry = mbuf[0].nentry; | |
| + mbuf[0].woffset = buf[i].woffset; | |
| + }else{ | |
| + ipool = mkipool(is, mbuf, nminibuf, mbufbuckets, bufsi… | |
| + ipool->buck0 = bufbuckets*i; | |
| + for(j=0; j<nminibuf; j++){ | |
| + mb = &mbuf[j]; | |
| + while(mb->nentry > 0){ | |
| + if(ipool->nfree < epbuf){ | |
| + ipoolflush1(ipool); | |
| + /* ipoolflush1 might change mb… | |
| + continue; | |
| + } | |
| + assert(ipool->nfree >= epbuf); | |
| + ipoolloadblock(ipool, mb); | |
| + } | |
| + } | |
| + ipoolflush(ipool); | |
| + nn = 0; | |
| + for(j=0; j<nminibuf; j++) | |
| + nn += mbuf[j].nwentry; | |
| + if(n != nn) | |
| + fprint(2, "isectproc bug3: n=%ud nn=%ud (i=%d)… | |
| + free(ipool); | |
| + } | |
| + | |
| + /* | |
| + * Make buckets. | |
| + */ | |
| + space = 0; | |
| + for(j=0; j<nminibuf; j++) | |
| + if(space < mbuf[j].woffset - mbuf[j].boffset) | |
| + space = mbuf[j].woffset - mbuf[j].boffset; | |
| + | |
| + data = emalloc(space); | |
| + for(j=0; j<nminibuf; j++){ | |
| + mb = &mbuf[j]; | |
| + sortminibuffer(is, mb, data, space, bufsize); | |
| + } | |
| + free(data); | |
| + } | |
| + | |
| + sendp(isectdonechan, is); | |
| +} | |
| + | |
| + | |
| + | |
| diff --git a/src/cmd/venti/srv/checkindex.c b/src/cmd/venti/srv/checkindex.c | |
| t@@ -109,7 +109,7 @@ checkindex(Index *ix, Part *part, u64int off, u64int clump… | |
| int ok, bok; | |
| u64int found = 0; | |
| -/*ZZZ make buffer size configurable */ | |
| +/* ZZZ make buffer size configurable */ | |
| b = alloczblock(ix->blocksize, 0, ix->blocksize); | |
| z = alloczblock(ix->blocksize, 1, ix->blocksize); | |
| ies = initiestream(part, off, clumps, 64*1024); | |
| t@@ -260,6 +260,8 @@ threadmain(int argc, char *argv[]) | |
| if(initventi(argv[0], &conf) < 0) | |
| sysfatal("can't init venti: %r"); | |
| + if(mainindex->bloom && loadbloom(mainindex->bloom) < 0) | |
| + sysfatal("can't load bloom filter: %r"); | |
| oldbloom = mainindex->bloom; | |
| newbloom = nil; | |
| if(oldbloom){ | |
| diff --git a/src/cmd/venti/srv/clump.c b/src/cmd/venti/srv/clump.c | |
| t@@ -91,7 +91,7 @@ clumpmagic(Arena *arena, u64int aa) | |
| { | |
| u8int buf[U32Size]; | |
| - if(readarena(arena, aa, buf, U32Size) < 0) | |
| + if(readarena(arena, aa, buf, U32Size) == TWID32) | |
| return TWID32; | |
| return unpackmagic(buf); | |
| } | |
| t@@ -138,6 +138,11 @@ loadclump(Arena *arena, u64int aa, int blocks, Clump *cl,… | |
| freezblock(cb); | |
| return nil; | |
| } | |
| + if(cl->info.type == VtCorruptType){ | |
| + seterr(EOk, "clump is marked corrupt"); | |
| + freezblock(cb); | |
| + return nil; | |
| + } | |
| n -= ClumpSize; | |
| if(n < cl->info.size){ | |
| freezblock(cb); | |
| diff --git a/src/cmd/venti/srv/conv.c b/src/cmd/venti/srv/conv.c | |
| t@@ -23,7 +23,7 @@ static struct { | |
| ArenaHeadMagic, "ArenaHeadMagic", | |
| ArenaMagic, "ArenaMagic", | |
| ISectMagic, "ISectMagic", | |
| - BloomMagic, "BloomMagic" | |
| + BloomMagic, "BloomMagic", | |
| }; | |
| static char* | |
| t@@ -138,9 +138,6 @@ unpackarena(Arena *arena, u8int *buf) | |
| p += U64Size; | |
| arena->diskstats.sealed = U8GET(p); | |
| p += U8Size; | |
| - | |
| - arena->memstats = arena->diskstats; | |
| - | |
| switch(arena->version){ | |
| case ArenaVersion4: | |
| sz = ArenaSize4; | |
| t@@ -153,6 +150,35 @@ unpackarena(Arena *arena, u8int *buf) | |
| seterr(ECorrupt, "arena has bad version number %d", arena->ver… | |
| return -1; | |
| } | |
| + /* | |
| + * Additional fields for the memstats version of the stats. | |
| + * Diskstats reflects what is committed to the index. | |
| + * Memstats reflects what is in the arena. Originally intended | |
| + * this to be a version 5 extension, but might as well use for | |
| + * all the existing version 4 arenas too. | |
| + * | |
| + * To maintain backwards compatibility with existing venti | |
| + * installations using the older format, we define that if | |
| + * memstats == diskstats, then the extension fields are not | |
| + * included (see packarena below). That is, only partially | |
| + * indexed arenas have these fields. Fully indexed arenas | |
| + * (in particular, sealed arenas) do not. | |
| + */ | |
| + if(U8GET(p) == 1){ | |
| + sz += ArenaSize5a-ArenaSize5; | |
| + p += U8Size; | |
| + arena->memstats.clumps = U32GET(p); | |
| + p += U32Size; | |
| + arena->memstats.cclumps = U32GET(p); | |
| + p += U32Size; | |
| + arena->memstats.used = U64GET(p); | |
| + p += U64Size; | |
| + arena->memstats.uncsize = U64GET(p); | |
| + p += U64Size; | |
| + arena->memstats.sealed = U8GET(p); | |
| + p += U8Size; | |
| + }else | |
| + arena->memstats = arena->diskstats; | |
| if(buf + sz != p) | |
| sysfatal("unpackarena unpacked wrong amount"); | |
| t@@ -162,6 +188,12 @@ unpackarena(Arena *arena, u8int *buf) | |
| int | |
| packarena(Arena *arena, u8int *buf) | |
| { | |
| + return _packarena(arena, buf, 0); | |
| +} | |
| + | |
| +int | |
| +_packarena(Arena *arena, u8int *buf, int forceext) | |
| +{ | |
| int sz; | |
| u8int *p; | |
| u32int t32; | |
| t@@ -207,6 +239,30 @@ packarena(Arena *arena, u8int *buf) | |
| p += U64Size; | |
| U8PUT(p, arena->diskstats.sealed); | |
| p += U8Size; | |
| + | |
| + /* | |
| + * Extension fields; see above. | |
| + */ | |
| + if(forceext | |
| + || arena->memstats.clumps != arena->diskstats.clumps | |
| + || arena->memstats.cclumps != arena->diskstats.cclumps | |
| + || arena->memstats.used != arena->diskstats.used | |
| + || arena->memstats.uncsize != arena->diskstats.uncsize | |
| + || arena->memstats.sealed != arena->diskstats.sealed){ | |
| + sz += ArenaSize5a - ArenaSize5; | |
| + U8PUT(p, 1); | |
| + p += U8Size; | |
| + U32PUT(p, arena->memstats.clumps); | |
| + p += U32Size; | |
| + U32PUT(p, arena->memstats.cclumps); | |
| + p += U32Size; | |
| + U64PUT(p, arena->memstats.used, t32); | |
| + p += U64Size; | |
| + U64PUT(p, arena->memstats.uncsize, t32); | |
| + p += U64Size; | |
| + U8PUT(p, arena->memstats.sealed); | |
| + p += U8Size; | |
| + } | |
| if(buf + sz != p) | |
| sysfatal("packarena packed wrong amount"); | |
| t@@ -525,6 +581,8 @@ unpackientry(IEntry *ie, u8int *buf) | |
| p += U32Size; | |
| ie->train = U16GET(p); | |
| p += U16Size; | |
| + if(p - buf != IEntryAddrOff) | |
| + sysfatal("unpackentry bad IEntryAddrOff amount"); | |
| ie->ia.addr = U64GET(p); | |
| if(ie->ia.addr>>56) print("%.8H => %llux\n", p, ie->ia.addr); | |
| p += U64Size; | |
| diff --git a/src/cmd/venti/srv/dat.h b/src/cmd/venti/srv/dat.h | |
| t@@ -75,23 +75,17 @@ enum | |
| /* | |
| * magic numbers on disk | |
| */ | |
| -/* _ClumpMagic = 0xd15cb10cU, / * clump header, d… | |
| -#define _ClumpMagic 0xd15cb10cU | |
| + _ClumpMagic = 0xd15cb10cU, /* clump header, depr… | |
| ClumpFreeMagic = 0, /* free clump; termi… | |
| -/* ArenaPartMagic = 0xa9e4a5e7U, / * arena partit… | |
| -/* ArenaMagic = 0xf2a14eadU, / * arena trailer */ | |
| -/* ArenaHeadMagic = 0xd15c4eadU, / * arena header… | |
| -#define ArenaPartMagic 0xa9e4a5e7U | |
| -#define ArenaMagic 0xf2a14eadU | |
| -#define ArenaHeadMagic 0xd15c4eadU | |
| - | |
| -/* BloomMagic = 0xb1004eadU, / * bloom filter hea… | |
| -#define BloomMagic 0xb1004eadU | |
| + ArenaPartMagic = 0xa9e4a5e7U, /* arena partition… | |
| + ArenaMagic = 0xf2a14eadU, /* arena trailer */ | |
| + ArenaHeadMagic = 0xd15c4eadU, /* arena header */ | |
| + | |
| + BloomMagic = 0xb1004eadU, /* bloom filter header… | |
| BloomMaxHash = 32, | |
| -/* ISectMagic = 0xd15c5ec7U, / * index header */ | |
| -#define ISectMagic 0xd15c5ec7U | |
| + ISectMagic = 0xd15c5ec7U, /* index header */ | |
| ArenaPartVersion = 3, | |
| ArenaVersion4 = 4, | |
| t@@ -120,6 +114,7 @@ enum | |
| ArenaPartSize = 4 * U32Size, | |
| ArenaSize4 = 2 * U64Size + 6 * U32Size + ANameSize + U8… | |
| ArenaSize5 = ArenaSize4 + U32Size, | |
| + ArenaSize5a = ArenaSize5 + 2 * U8Size + 2 * U32Size + 2… | |
| ArenaHeadSize4 = U64Size + 3 * U32Size + ANameSize, | |
| ArenaHeadSize5 = ArenaHeadSize4 + U32Size, | |
| BloomHeadSize = 4 * U32Size, | |
| t@@ -137,10 +132,14 @@ enum | |
| */ | |
| IBucketSize = U32Size + U16Size, | |
| IEntrySize = U64Size + U32Size + 2*U16Size + 2*U8Size +… | |
| - IEntryTypeOff = VtScoreSize + U64Size + U32Size + 2 * U… | |
| + IEntryTypeOff = VtScoreSize + U32Size + U16Size + U64Si… | |
| + IEntryAddrOff = VtScoreSize + U32Size + U16Size, | |
| MaxClumpBlocks = (VtMaxLumpSize + ClumpSize + (1 << AB… | |
| + | |
| + IcacheFrac = 1000000, /* denominator */ | |
| + SleepForever = 1000000000, /* magic value for sl… | |
| /* | |
| * dirty flags - order controls disk write order | |
| */ | |
| t@@ -356,13 +355,11 @@ struct Arena | |
| int blocksize; /* size of block to read … | |
| u64int base; /* base address on … | |
| u64int size; /* total space in t… | |
| - u64int limit; /* storage limit f… | |
| u8int score[VtScoreSize]; /* score of the entire… | |
| int clumpmax; /* ClumpInfos per block */ | |
| AState mem; | |
| int inqueue; | |
| - DigestState sha1; | |
| /* | |
| * fields stored on disk | |
| t@@ -477,6 +474,8 @@ struct ISect | |
| u32int tabsize; /* max. bytes in index c… | |
| Channel *writechan; | |
| Channel *writedonechan; | |
| + void *ig; /* used by buildindex only */ | |
| + int ng; | |
| /* | |
| * fields stored on disk | |
| t@@ -716,7 +715,18 @@ extern int writestodevnull; … | |
| extern int collectstats; | |
| extern QLock memdrawlock; | |
| extern int icachesleeptime; | |
| +extern int minicachesleeptime; | |
| extern int arenasumsleeptime; | |
| +extern int manualscheduling; | |
| +extern int l0quantum; | |
| +extern int l1quantum; | |
| +extern int ignorebloom; | |
| +extern int icacheprefetch; | |
| +extern int syncwrites; | |
| + | |
| +extern Stats *stathist; | |
| +extern int nstathist; | |
| +extern ulong stattime; | |
| #ifndef PLAN9PORT | |
| #pragma varargck type "V" uchar* | |
| diff --git a/src/cmd/venti/srv/dcache.c b/src/cmd/venti/srv/dcache.c | |
| t@@ -34,7 +34,7 @@ enum | |
| { | |
| HashLog = 9, | |
| HashSize = 1<<HashLog, | |
| - HashMask = HashSize - 1 | |
| + HashMask = HashSize - 1, | |
| }; | |
| struct DCache | |
| t@@ -212,8 +212,6 @@ return; | |
| lastmiss.part = part; | |
| lastmiss.addr = addr; | |
| } | |
| - | |
| -/* fprint(2, "%s %llx %s\n", part->name, addr, miss ? "miss" : "hit"); … | |
| } | |
| int | |
| t@@ -230,6 +228,7 @@ rareadpart(Part *part, u64int addr, u8int *buf, uint n, in… | |
| } | |
| if(load != 2 || addr >= part->size){ /* addr >= part->size: let… | |
| runlock(&ralock); | |
| + diskaccess(0); | |
| return readpart(part, addr, buf, n); | |
| } | |
| t@@ -239,6 +238,7 @@ fprint(2, "raread %s %llx\n", part->name, addr); | |
| nn = dcache.ramax; | |
| if(addr+nn > part->size) | |
| nn = part->size - addr; | |
| + diskaccess(0); | |
| if(readpart(part, addr, dcache.rabuf, nn) < 0){ | |
| wunlock(&ralock); | |
| return -1; | |
| t@@ -297,7 +297,6 @@ _getdblock(Part *part, u64int addr, int mode, int load) | |
| /* | |
| * look for the block in the cache | |
| */ | |
| -/*checkdcache(); */ | |
| qlock(&dcache.lock); | |
| again: | |
| for(b = dcache.heads[h]; b != nil; b = b->next){ | |
| t@@ -367,7 +366,6 @@ found: | |
| fixheap(b->heap, b); | |
| qunlock(&dcache.lock); | |
| -/*checkdcache(); */ | |
| trace(TraceBlock, "getdblock lock"); | |
| addstat(StatDblockStall, 1); | |
| t@@ -427,7 +425,6 @@ putdblock(DBlock *b) | |
| else | |
| wunlock(&b->lock); | |
| -/*checkdcache(); */ | |
| qlock(&dcache.lock); | |
| if(--b->ref == 0 && !b->dirty){ | |
| if(b->heap == TWID32) | |
| t@@ -435,7 +432,6 @@ putdblock(DBlock *b) | |
| rwakeupall(&dcache.full); | |
| } | |
| qunlock(&dcache.lock); | |
| -/*checkdcache(); */ | |
| } | |
| void | |
| t@@ -474,6 +470,25 @@ dirtydblock(DBlock *b, int dirty) | |
| qunlock(&dcache.lock); | |
| } | |
| +static void | |
| +unchain(DBlock *b) | |
| +{ | |
| + ulong h; | |
| + | |
| + /* | |
| + * unchain the block | |
| + */ | |
| + if(b->prev == nil){ | |
| + h = pbhash(b->addr); | |
| + if(dcache.heads[h] != b) | |
| + sysfatal("bad hash chains in disk cache"); | |
| + dcache.heads[h] = b->next; | |
| + }else | |
| + b->prev->next = b->next; | |
| + if(b->next != nil) | |
| + b->next->prev = b->prev; | |
| +} | |
| + | |
| /* | |
| * remove some block from use and update the free list and counters | |
| */ | |
| t@@ -481,7 +496,6 @@ static DBlock* | |
| bumpdblock(void) | |
| { | |
| DBlock *b; | |
| - ulong h; | |
| trace(TraceBlock, "bumpdblock enter"); | |
| b = dcache.free; | |
| t@@ -512,22 +526,28 @@ bumpdblock(void) | |
| trace(TraceBlock, "bumpdblock bumping %s 0x%llux", b->part->name, b->a… | |
| - /* | |
| - * unchain the block | |
| - */ | |
| - if(b->prev == nil){ | |
| - h = pbhash(b->addr); | |
| - if(dcache.heads[h] != b) | |
| - sysfatal("bad hash chains in disk cache"); | |
| - dcache.heads[h] = b->next; | |
| - }else | |
| - b->prev->next = b->next; | |
| - if(b->next != nil) | |
| - b->next->prev = b->prev; | |
| - | |
| + unchain(b); | |
| return b; | |
| } | |
| +void | |
| +emptydcache(void) | |
| +{ | |
| + DBlock *b; | |
| + | |
| + qlock(&dcache.lock); | |
| + while(dcache.nheap > 0){ | |
| + b = dcache.heap[0]; | |
| + delheap(b); | |
| + if(!b->ref && !b->dirty){ | |
| + unchain(b); | |
| + b->next = dcache.free; | |
| + dcache.free = b; | |
| + } | |
| + } | |
| + qunlock(&dcache.lock); | |
| +} | |
| + | |
| /* | |
| * delete an arbitrary block from the heap | |
| */ | |
| t@@ -683,6 +703,7 @@ static int | |
| parallelwrites(DBlock **b, DBlock **eb, int dirty) | |
| { | |
| DBlock **p, **q; | |
| + | |
| for(p=b; p<eb && (*p)->dirty == dirty; p++){ | |
| assert(b<=p && p<eb); | |
| sendp((*p)->part->writechan, *p); | |
| t@@ -803,6 +824,7 @@ writeproc(void *v) | |
| trace(TraceProc, "wlock %s 0x%llux", p->name, b->addr); | |
| wlock(&b->lock); | |
| trace(TraceProc, "writepart %s 0x%llux", p->name, b->addr); | |
| + diskaccess(0); | |
| if(writepart(p, b->addr, b->data, b->size) < 0) | |
| fprint(2, "write error: %r\n"); /* XXX details! */ | |
| addstat(StatApartWrite, 1); | |
| diff --git a/src/cmd/venti/srv/disksched.c b/src/cmd/venti/srv/disksched.c | |
| t@@ -0,0 +1,88 @@ | |
| +#include "stdinc.h" | |
| +#include "dat.h" | |
| +#include "fns.h" | |
| + | |
| +ulong lasttime[2]; | |
| +int manualscheduling; | |
| +int l0quantum = 120; | |
| +int l1quantum = 120; | |
| +ulong lasticachechange; | |
| + | |
| +void | |
| +disksched(void) | |
| +{ | |
| + int p, nwrite, nflush, ndirty, tdirty, toflush; | |
| + ulong t; | |
| + vlong cflush; | |
| + Stats *prev; | |
| + | |
| + /* | |
| + * no locks because all the data accesses are atomic. | |
| + */ | |
| + t = time(0); | |
| + if(manualscheduling){ | |
| + lasticachechange = t; | |
| + return; | |
| + } | |
| + | |
| + if(t-lasttime[0] < l0quantum){ | |
| + /* level-0 disk access going on */ | |
| + p = icachedirtyfrac(); | |
| + if(p < IcacheFrac*5/10){ /* can wait */ | |
| + icachesleeptime = SleepForever; | |
| + lasticachechange = t; | |
| + }else if(p > IcacheFrac*9/10){ /* can't wait */ | |
| + icachesleeptime = 0; | |
| + lasticachechange = t; | |
| + }else if(t-lasticachechange > 60){ | |
| + /* have minute worth of data for current rate */ | |
| + prev = &stathist[(stattime-60+nstathist)%nstathist]; | |
| + | |
| + /* # entries written to index cache */ | |
| + nwrite = stats.n[StatIcacheWrite] - prev->n[StatIcache… | |
| + | |
| + /* # dirty entries in index cache */ | |
| + ndirty = stats.n[StatIcacheDirty] - prev->n[StatIcache… | |
| + | |
| + /* # entries flushed to disk */ | |
| + nflush = nwrite - ndirty; | |
| + | |
| + /* want to stay around 70% dirty */ | |
| + tdirty = (vlong)stats.n[StatIcacheSize]*700/1000; | |
| + | |
| + /* assume nflush*icachesleeptime is a constant */ | |
| + cflush = (vlong)nflush*(icachesleeptime+1); | |
| + | |
| + /* computer number entries to write in next minute */ | |
| + toflush = nwrite + (stats.n[StatIcacheDirty] - tdirty); | |
| + | |
| + /* schedule for that many */ | |
| + if(toflush <= 0 || cflush/toflush > 100000) | |
| + icachesleeptime = SleepForever; | |
| + else | |
| + icachesleeptime = cflush/toflush; | |
| + } | |
| + arenasumsleeptime = SleepForever; | |
| + return; | |
| + } | |
| + if(t-lasttime[1] < l1quantum){ | |
| + /* level-1 disk access (icache flush) going on */ | |
| + icachesleeptime = 0; | |
| + arenasumsleeptime = SleepForever; | |
| + return; | |
| + } | |
| + /* no disk access going on - no holds barred*/ | |
| + icachesleeptime = 0; | |
| + arenasumsleeptime = 0; | |
| +} | |
| + | |
| +void | |
| +diskaccess(int level) | |
| +{ | |
| + if(level < 0 || level >= nelem(lasttime)){ | |
| + fprint(2, "bad level in diskaccess; caller=%lux\n", getcallerp… | |
| + return; | |
| + } | |
| + lasttime[level] = time(0); | |
| +} | |
| + | |
| diff --git a/src/cmd/venti/srv/findscore.c b/src/cmd/venti/srv/findscore.c | |
| t@@ -27,7 +27,7 @@ findscore(Arena *arena, uchar *score) | |
| u32int clump; | |
| int i, n, found; | |
| -/*ZZZ remove fprint? */ | |
| +//ZZZ remove fprint? | |
| if(arena->memstats.clumps) | |
| fprint(2, "reading directory for arena=%s with %d entries\n", … | |
| diff --git a/src/cmd/venti/srv/fixarenas.c b/src/cmd/venti/srv/fixarenas.c | |
| t@@ -0,0 +1,1894 @@ | |
| +/* | |
| + * Check and fix an arena partition. | |
| + * | |
| + * This is a lot grittier than the rest of Venti because | |
| + * it can't just give up if a byte here or there is wrong. | |
| + * | |
| + * The rule here (hopefully followed!) is that block corruption | |
| + * only ever has a local effect -- there are no blocks that you | |
| + * can wipe out that will cause large portions of | |
| + * uncorrupted data blocks to be useless. | |
| + */ | |
| + | |
| +#include "stdinc.h" | |
| +#include "dat.h" | |
| +#include "fns.h" | |
| +#include "whack.h" | |
| + | |
| +#pragma varargck type "z" uvlong | |
| +#pragma varargck type "z" vlong | |
| +#pragma varargck type "t" uint | |
| + | |
| +enum | |
| +{ | |
| + K = 1024, | |
| + M = 1024*1024, | |
| + G = 1024*1024*1024, | |
| + | |
| + Block = 4096, | |
| +}; | |
| + | |
| +int debugsha1; | |
| + | |
| +int verbose; | |
| +Part *part; | |
| +char *file; | |
| +char *basename; | |
| +char *dumpbase; | |
| +int fix; | |
| +int badreads; | |
| +int unseal; | |
| +uchar zero[MaxDiskBlock]; | |
| + | |
| +Arena lastarena; | |
| +ArenaPart ap; | |
| +uvlong arenasize; | |
| +int nbadread; | |
| +int nbad; | |
| +uvlong partend; | |
| +void checkarena(vlong, int); | |
| + | |
| +void | |
| +usage(void) | |
| +{ | |
| + fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file [… | |
| + threadexitsall(0); | |
| +} | |
| + | |
| +/* | |
| + * Format number in simplest way that is okay with unittoull. | |
| + */ | |
| +static int | |
| +zfmt(Fmt *fmt) | |
| +{ | |
| + vlong x; | |
| + | |
| + x = va_arg(fmt->args, vlong); | |
| + if(x == 0) | |
| + return fmtstrcpy(fmt, "0"); | |
| + if(x%G == 0) | |
| + return fmtprint(fmt, "%lldG", x/G); | |
| + if(x%M == 0) | |
| + return fmtprint(fmt, "%lldM", x/M); | |
| + if(x%K == 0) | |
| + return fmtprint(fmt, "%lldK", x/K); | |
| + return fmtprint(fmt, "%lld", x); | |
| +} | |
| + | |
| +/* | |
| + * Format time like ctime without newline. | |
| + */ | |
| +static int | |
| +tfmt(Fmt *fmt) | |
| +{ | |
| + uint t; | |
| + char buf[30]; | |
| + | |
| + t = va_arg(fmt->args, uint); | |
| + strcpy(buf, ctime(t)); | |
| + buf[28] = 0; | |
| + return fmtstrcpy(fmt, buf); | |
| +} | |
| + | |
| +/* | |
| + * Coalesce messages about unreadable sectors into larger ranges. | |
| + * bad(0, 0) flushes the buffer. | |
| + */ | |
| +static void | |
| +bad(char *msg, vlong o, int len) | |
| +{ | |
| + static vlong lb0, lb1; | |
| + static char *lmsg; | |
| + | |
| + if(msg == nil) | |
| + msg = lmsg; | |
| + if(o == -1){ | |
| + lmsg = nil; | |
| + lb0 = 0; | |
| + lb1 = 0; | |
| + return; | |
| + } | |
| + if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){ | |
| + if(lb0 != lb1) | |
| + print("%s %#llux+%#llux (%,lld+%,lld)\n", | |
| + lmsg, lb0, lb1-lb0, lb0, lb1-lb0); | |
| + lb0 = o; | |
| + } | |
| + lmsg = msg; | |
| + lb1 = o+len; | |
| +} | |
| + | |
| +/* | |
| + * Read in the len bytes of data at the offset. If can't for whatever reason, | |
| + * fill it with garbage but print an error. | |
| + */ | |
| +static uchar* | |
| +readdisk(uchar *buf, vlong offset, int len) | |
| +{ | |
| + int i, j, k, n; | |
| + | |
| + if(offset >= partend){ | |
| + memset(buf, 0xFB, sizeof buf); | |
| + return buf; | |
| + } | |
| + | |
| + if(offset+len > partend){ | |
| + memset(buf, 0xFB, sizeof buf); | |
| + len = partend - offset; | |
| + } | |
| + | |
| + if(readpart(part, offset, buf, len) >= 0) | |
| + return buf; | |
| + | |
| + /* | |
| + * The read failed. Clear the buffer to nonsense, and | |
| + * then try reading in smaller pieces. If that fails, | |
| + * read in even smaller pieces. And so on down to sectors. | |
| + */ | |
| + memset(buf, 0xFD, len); | |
| + for(i=0; i<len; i+=64*K){ | |
| + n = 64*K; | |
| + if(i+n > len) | |
| + n = len-i; | |
| + if(readpart(part, offset+i, buf+i, n) >= 0) | |
| + continue; | |
| + for(j=i; j<len && j<i+64*K; j+=4*K){ | |
| + n = 4*K; | |
| + if(j+n > len) | |
| + n = len-j; | |
| + if(readpart(part, offset+j, buf+j, n) >= 0) | |
| + continue; | |
| + for(k=j; k<len && k<j+4*K; k+=512){ | |
| + if(readpart(part, offset+k, buf+k, 512) >= 0) | |
| + continue; | |
| + bad("disk read failed at", k, 512); | |
| + badreads++; | |
| + } | |
| + } | |
| + } | |
| + bad(nil, 0, 0); | |
| + return buf; | |
| +} | |
| + | |
| +/* | |
| + * Buffer to support running SHA1 hash of the disk. | |
| + */ | |
| +typedef struct Shabuf Shabuf; | |
| +struct Shabuf | |
| +{ | |
| + int fd; | |
| + vlong offset; | |
| + DigestState state; | |
| + int rollback; | |
| + vlong r0; | |
| + DigestState *hist; | |
| + int nhist; | |
| +}; | |
| + | |
| +void | |
| +sbdebug(Shabuf *sb, char *file) | |
| +{ | |
| + int fd; | |
| + | |
| + if(sb->fd > 0){ | |
| + close(sb->fd); | |
| + sb->fd = 0; | |
| + } | |
| + if((fd = create(file, OWRITE, 0666)) < 0) | |
| + return; | |
| + if(fd == 0){ | |
| + fd = dup(fd, -1); | |
| + close(0); | |
| + } | |
| + sb->fd = fd; | |
| +} | |
| + | |
| +void | |
| +sbupdate(Shabuf *sb, uchar *p, vlong offset, int len) | |
| +{ | |
| + int n, x; | |
| + vlong o; | |
| + | |
| + if(sb->rollback && !sb->hist){ | |
| + sb->r0 = offset; | |
| + sb->nhist = 1; | |
| + sb->hist = vtmalloc(sb->nhist*sizeof *sb->hist); | |
| + memset(sb->hist, 0, sizeof sb->hist[0]); | |
| + } | |
| + if(sb->r0 == 0) | |
| + sb->r0 = offset; | |
| + | |
| + if(sb->offset < offset || sb->offset >= offset+len){ | |
| + if(0) print("sbupdate %p %#llux+%d but offset=%#llux\n", | |
| + p, offset, len, sb->offset); | |
| + return; | |
| + } | |
| + x = sb->offset - offset; | |
| + if(0) print("sbupdate %p %#llux+%d skip %d\n", | |
| + sb, offset, len, x); | |
| + if(x){ | |
| + p += x; | |
| + offset += x; | |
| + len -= x; | |
| + } | |
| + assert(sb->offset == offset); | |
| + | |
| + if(sb->fd > 0) | |
| + pwrite(sb->fd, p, len, offset - sb->r0); | |
| + | |
| + if(!sb->rollback){ | |
| + sha1(p, len, nil, &sb->state); | |
| + sb->offset += len; | |
| + return; | |
| + } | |
| + | |
| + /* save state every 4M so we can roll back quickly */ | |
| + o = offset - sb->r0; | |
| + while(len > 0){ | |
| + n = 4*M - o%(4*M); | |
| + if(n > len) | |
| + n = len; | |
| + sha1(p, n, nil, &sb->state); | |
| + sb->offset += n; | |
| + o += n; | |
| + p += n; | |
| + len -= n; | |
| + if(o%(4*M) == 0){ | |
| + x = o/(4*M); | |
| + if(x >= sb->nhist){ | |
| + if(x != sb->nhist) | |
| + print("oops! x=%d nhist=%d\n", x, sb->… | |
| + sb->nhist += 32; | |
| + sb->hist = vtrealloc(sb->hist, sb->nhist*sizeo… | |
| + } | |
| + sb->hist[x] = sb->state; | |
| + } | |
| + } | |
| +} | |
| + | |
| +void | |
| +sbdiskhash(Shabuf *sb, vlong eoffset) | |
| +{ | |
| + static uchar dbuf[4*M]; | |
| + int n; | |
| + | |
| + while(sb->offset < eoffset){ | |
| + n = sizeof dbuf; | |
| + if(sb->offset+n > eoffset) | |
| + n = eoffset - sb->offset; | |
| + readdisk(dbuf, sb->offset, n); | |
| + sbupdate(sb, dbuf, sb->offset, n); | |
| + } | |
| +} | |
| + | |
| +void | |
| +sbrollback(Shabuf *sb, vlong offset) | |
| +{ | |
| + int x; | |
| + vlong o; | |
| + Dir d; | |
| + | |
| + if(!sb->rollback || !sb->r0){ | |
| + print("cannot rollback sha\n"); | |
| + return; | |
| + } | |
| + if(offset >= sb->offset) | |
| + return; | |
| + o = offset - sb->r0; | |
| + x = o/(4*M); | |
| + if(x >= sb->nhist){ | |
| + print("cannot rollback sha\n"); | |
| + return; | |
| + } | |
| + sb->state = sb->hist[x]; | |
| + sb->offset = sb->r0 + x*4*M; | |
| + assert(sb->offset <= offset); | |
| + | |
| + if(sb->fd > 0){ | |
| + nulldir(&d); | |
| + d.length = sb->offset - sb->r0; | |
| + dirfwstat(sb->fd, &d); | |
| + } | |
| +} | |
| + | |
| +void | |
| +sbscore(Shabuf *sb, uchar *score) | |
| +{ | |
| + if(sb->hist){ | |
| + free(sb->hist); | |
| + sb->hist = nil; | |
| + } | |
| + sha1(nil, 0, score, &sb->state); | |
| +} | |
| + | |
| +/* | |
| + * If we're fixing arenas, then editing this memory edits the disk! | |
| + * It will be written back out as new data is paged in. | |
| + */ | |
| +uchar buf[4*M]; | |
| +uchar sbuf[4*M]; | |
| +vlong bufoffset; | |
| +int buflen; | |
| + | |
| +static void pageout(void); | |
| +static uchar* | |
| +pagein(vlong offset, int len) | |
| +{ | |
| + pageout(); | |
| + if(offset >= partend){ | |
| + memset(buf, 0xFB, sizeof buf); | |
| + return buf; | |
| + } | |
| + | |
| + if(offset+len > partend){ | |
| + memset(buf, 0xFB, sizeof buf); | |
| + len = partend - offset; | |
| + } | |
| + bufoffset = offset; | |
| + buflen = len; | |
| + readdisk(buf, offset, len); | |
| + memmove(sbuf, buf, len); | |
| + return buf; | |
| +} | |
| + | |
| +static void | |
| +pageout(void) | |
| +{ | |
| + if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){ | |
| + buflen = 0; | |
| + return; | |
| + } | |
| + if(writepart(part, bufoffset, buf, buflen) < 0) | |
| + print("disk write failed at %#llux+%#ux (%,lld+%,d)\n", | |
| + bufoffset, buflen, bufoffset, buflen); | |
| + buflen = 0; | |
| +} | |
| + | |
| +static void | |
| +zerorange(vlong offset, int len) | |
| +{ | |
| + int i; | |
| + vlong ooff; | |
| + int olen; | |
| + enum { MinBlock = 4*K, MaxBlock = 8*K }; | |
| + | |
| + if(0) | |
| + if(bufoffset <= offset && offset+len <= bufoffset+buflen){ | |
| + memset(buf+(offset-bufoffset), 0, len); | |
| + return; | |
| + } | |
| + | |
| + ooff = bufoffset; | |
| + olen = buflen; | |
| + | |
| + i = offset%MinBlock; | |
| + if(i+len < MaxBlock){ | |
| + pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1)); | |
| + memset(buf+i, 0, len); | |
| + }else{ | |
| + pagein(offset-i, MaxBlock); | |
| + memset(buf+i, 0, MaxBlock-i); | |
| + offset += MaxBlock-i; | |
| + len -= MaxBlock-i; | |
| + while(len >= MaxBlock){ | |
| + pagein(offset, MaxBlock); | |
| + memset(buf, 0, MaxBlock); | |
| + offset += MaxBlock; | |
| + len -= MaxBlock; | |
| + } | |
| + pagein(offset, (len+MinBlock-1)&~(MinBlock-1)); | |
| + memset(buf, 0, len); | |
| + } | |
| + pagein(ooff, olen); | |
| +} | |
| + | |
| +/* | |
| + * read/write integers | |
| + * | |
| +static void | |
| +p16(uchar *p, u16int u) | |
| +{ | |
| + p[0] = (u>>8) & 0xFF; | |
| + p[1] = u & 0xFF; | |
| +} | |
| +*/ | |
| + | |
| +static u16int | |
| +u16(uchar *p) | |
| +{ | |
| + return (p[0]<<8)|p[1]; | |
| +} | |
| + | |
| +static void | |
| +p32(uchar *p, u32int u) | |
| +{ | |
| + p[0] = (u>>24) & 0xFF; | |
| + p[1] = (u>>16) & 0xFF; | |
| + p[2] = (u>>8) & 0xFF; | |
| + p[3] = u & 0xFF; | |
| +} | |
| + | |
| +static u32int | |
| +u32(uchar *p) | |
| +{ | |
| + return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3]; | |
| +} | |
| + | |
| +/* | |
| +static void | |
| +p64(uchar *p, u64int u) | |
| +{ | |
| + p32(p, u>>32); | |
| + p32(p, u); | |
| +} | |
| +*/ | |
| + | |
| +static u64int | |
| +u64(uchar *p) | |
| +{ | |
| + return ((u64int)u32(p)<<32) | u32(p+4); | |
| +} | |
| + | |
| +static int | |
| +vlongcmp(const void *va, const void *vb) | |
| +{ | |
| + vlong a, b; | |
| + | |
| + a = *(vlong*)va; | |
| + b = *(vlong*)vb; | |
| + if(a < b) | |
| + return -1; | |
| + if(b > a) | |
| + return 1; | |
| + return 0; | |
| +} | |
| + | |
| +/* D and S are in draw.h */ | |
| +#define D VD | |
| +#define S VS | |
| + | |
| +enum | |
| +{ | |
| + D = 0x10000, | |
| + Z = 0x20000, | |
| + S = 0x30000, | |
| + T = 0x40000, | |
| + N = 0xFFFF | |
| +}; | |
| +typedef struct Info Info; | |
| +struct Info | |
| +{ | |
| + int len; | |
| + char *name; | |
| +}; | |
| + | |
| +Info partinfo[] = { | |
| + 4, "magic", | |
| + D|4, "version", | |
| + Z|4, "blocksize", | |
| + 4, "arenabase", | |
| + 0 | |
| +}; | |
| + | |
| +Info headinfo4[] = { | |
| + 4, "magic", | |
| + D|4, "version", | |
| + S|ANameSize, "name", | |
| + Z|4, "blocksize", | |
| + Z|8, "size", | |
| + 0 | |
| +}; | |
| + | |
| +Info headinfo5[] = { | |
| + 4, "magic", | |
| + D|4, "version", | |
| + S|ANameSize, "name", | |
| + Z|4, "blocksize", | |
| + Z|8, "size", | |
| + 4, "clumpmagic", | |
| + 0 | |
| +}; | |
| + | |
| +Info tailinfo4[] = { | |
| + 4, "magic", | |
| + D|4, "version", | |
| + S|ANameSize, "name", | |
| + D|4, "clumps", | |
| + D|4, "cclumps", | |
| + T|4, "ctime", | |
| + T|4, "wtime", | |
| + D|8, "used", | |
| + D|8, "uncsize", | |
| + 1, "sealed", | |
| + 0 | |
| +}; | |
| + | |
| +Info tailinfo4a[] = { | |
| + /* tailinfo 4 */ | |
| + 4, "magic", | |
| + D|4, "version", | |
| + S|ANameSize, "name", | |
| + D|4, "clumps", | |
| + D|4, "cclumps", | |
| + T|4, "ctime", | |
| + T|4, "wtime", | |
| + D|8, "used", | |
| + D|8, "uncsize", | |
| + 1, "sealed", | |
| + | |
| + /* mem stats */ | |
| + 1, "extension", | |
| + D|4, "mem.clumps", | |
| + D|4, "mem.cclumps", | |
| + D|8, "mem.used", | |
| + D|8, "mem.uncsize", | |
| + 1, "mem.sealed", | |
| + 0 | |
| +}; | |
| + | |
| +Info tailinfo5[] = { | |
| + 4, "magic", | |
| + D|4, "version", | |
| + S|ANameSize, "name", | |
| + D|4, "clumps", | |
| + D|4, "cclumps", | |
| + T|4, "ctime", | |
| + T|4, "wtime", | |
| + 4, "clumpmagic", | |
| + D|8, "used", | |
| + D|8, "uncsize", | |
| + 1, "sealed", | |
| + 0 | |
| +}; | |
| + | |
| +Info tailinfo5a[] = { | |
| + /* tailinfo 5 */ | |
| + 4, "magic", | |
| + D|4, "version", | |
| + S|ANameSize, "name", | |
| + D|4, "clumps", | |
| + D|4, "cclumps", | |
| + T|4, "ctime", | |
| + T|4, "wtime", | |
| + 4, "clumpmagic", | |
| + D|8, "used", | |
| + D|8, "uncsize", | |
| + 1, "sealed", | |
| + | |
| + /* mem stats */ | |
| + 1, "extension", | |
| + D|4, "mem.clumps", | |
| + D|4, "mem.cclumps", | |
| + D|8, "mem.used", | |
| + D|8, "mem.uncsize", | |
| + 1, "mem.sealed", | |
| + 0 | |
| +}; | |
| + | |
| +void | |
| +showdiffs(uchar *want, uchar *have, int len, Info *info) | |
| +{ | |
| + int n; | |
| + | |
| + while(len > 0 && (n=info->len&N) > 0){ | |
| + if(memcmp(have, want, n) != 0){ | |
| + switch(info->len){ | |
| + case 1: | |
| + print("\t%s: correct=%d disk=%d\n", | |
| + info->name, *want, *have); | |
| + break; | |
| + case 4: | |
| + print("\t%s: correct=%#ux disk=%#ux\n", | |
| + info->name, u32(want), u32(have)); | |
| + break; | |
| + case D|4: | |
| + print("\t%s: correct=%,ud disk=%,ud\n", | |
| + info->name, u32(want), u32(have)); | |
| + break; | |
| + case T|4: | |
| + print("\t%s: correct=%t\n\t\tdisk=%t\n", | |
| + info->name, u32(want), u32(have)); | |
| + break; | |
| + case Z|4: | |
| + print("\t%s: correct=%z disk=%z\n", | |
| + info->name, (uvlong)u32(want), (uvlong… | |
| + break; | |
| + case D|8: | |
| + print("\t%s: correct=%,lld disk=%,lld\n", | |
| + info->name, u64(want), u64(have)); | |
| + break; | |
| + case Z|8: | |
| + print("\t%s: correct=%z disk=%z\n", | |
| + info->name, u64(want), u64(have)); | |
| + break; | |
| + case S|ANameSize: | |
| + print("\t%s: correct=%s disk=%.*s\n", | |
| + info->name, (char*)want, | |
| + utfnlen((char*)have, ANameSize-1), | |
| + (char*)have); | |
| + break; | |
| + default: | |
| + print("\t%s: correct=%.*H disk=%.*H\n", | |
| + info->name, n, want, n, have); | |
| + break; | |
| + } | |
| + } | |
| + have += n; | |
| + want += n; | |
| + len -= n; | |
| + info++; | |
| + } | |
| + if(len > 0 && memcmp(have, want, len) != 0){ | |
| + if(memcmp(want, zero, len) != 0) | |
| + print("!!\textra want data in showdiffs (bug in fixare… | |
| + else | |
| + print("\tnon-zero data on disk after structure\n"); | |
| + if(verbose > 1){ | |
| + print("want: %.*H\n", len, want); | |
| + print("have: %.*H\n", len, have); | |
| + } | |
| + } | |
| +} | |
| + | |
| +static int tabsizes[] = { 64*1024, 512*1024, }; | |
| +/* | |
| + * Poke around on the disk to guess what the ArenaPart numbers are. | |
| + */ | |
| +void | |
| +guessgeometry(void) | |
| +{ | |
| + int i, j, n, bestn, ndiff, nhead, ntail; | |
| + uchar *p, *ep, *sp; | |
| + u64int diff[100], head[20], tail[20]; | |
| + u64int offset, bestdiff; | |
| + | |
| + ap.version = ArenaPartVersion; | |
| + | |
| + if(arenasize == 0 || ap.blocksize == 0){ | |
| + /* | |
| + * The ArenaPart block at offset PartBlank may be corrupt or j… | |
| + * Instead, look for the individual arena headers and tails, w… | |
| + * are many of, and once we've seen enough, infer the spacing. | |
| + * | |
| + * Of course, nothing in the file format requires that arenas … | |
| + * spaced, but fmtarenas always does that for us. | |
| + */ | |
| + nhead = 0; | |
| + ntail = 0; | |
| + for(offset=PartBlank; offset<partend; offset+=4*M){ | |
| + p = pagein(offset, 4*M); | |
| + for(sp=p, ep=p+4*M; p<ep; p+=K){ | |
| + if(u32(p) == ArenaHeadMagic && nhead < nelem(h… | |
| + if(verbose) | |
| + print("arena head at %#llx\n",… | |
| + head[nhead++] = offset+(p-sp); | |
| + } | |
| + if(u32(p) == ArenaMagic && ntail < nelem(tail)… | |
| + tail[ntail++] = offset+(p-sp); | |
| + if(verbose) | |
| + print("arena tail at %#llx\n",… | |
| + } | |
| + } | |
| + if(nhead == nelem(head) && ntail == nelem(tail)) | |
| + break; | |
| + } | |
| + if(nhead < 3 && ntail < 3) | |
| + sysfatal("too few intact arenas: %d heads, %d tails", … | |
| + | |
| + /* | |
| + * Arena size is likely the most common | |
| + * inter-head or inter-tail spacing. | |
| + */ | |
| + ndiff = 0; | |
| + for(i=1; i<nhead; i++) | |
| + diff[ndiff++] = head[i] - head[i-1]; | |
| + for(i=1; i<ntail; i++) | |
| + diff[ndiff++] = tail[i] - tail[i-1]; | |
| + qsort(diff, ndiff, sizeof diff[0], vlongcmp); | |
| + bestn = 0; | |
| + bestdiff = 0; | |
| + for(i=1, n=1; i<=ndiff; i++, n++){ | |
| + if(i==ndiff || diff[i] != diff[i-1]){ | |
| + if(n > bestn){ | |
| + bestn = n; | |
| + bestdiff = diff[i-1]; | |
| + } | |
| + n = 0; | |
| + } | |
| + } | |
| + print("arena size likely %z (%d of %d)\n", bestdiff, bestn, nd… | |
| + if(arenasize != 0 && arenasize != bestdiff) | |
| + print("using user-specified size %z instead\n", arenas… | |
| + else | |
| + arenasize = bestdiff; | |
| + | |
| + /* | |
| + * The arena tail for an arena is arenasize-blocksize from the… | |
| + */ | |
| + ndiff = 0; | |
| + for(i=j=0; i<nhead && j<ntail; ){ | |
| + if(tail[j] < head[i]){ | |
| + j++; | |
| + continue; | |
| + } | |
| + if(tail[j] < head[i]+arenasize){ | |
| + diff[ndiff++] = head[i]+arenasize - tail[j]; | |
| + j++; | |
| + continue; | |
| + } | |
| + i++; | |
| + } | |
| + if(ndiff < 3) | |
| + sysfatal("too few intact arenas: %d head, tail pairs",… | |
| + qsort(diff, ndiff, sizeof diff[0], vlongcmp); | |
| + bestn = 0; | |
| + bestdiff = 0; | |
| + for(i=1, n=1; i<=ndiff; i++, n++){ | |
| + if(i==ndiff || diff[i] != diff[i-1]){ | |
| + if(n > bestn){ | |
| + bestn = n; | |
| + bestdiff = diff[i-1]; | |
| + } | |
| + n = 0; | |
| + } | |
| + } | |
| + print("block size likely %z (%d of %d)\n", bestdiff, bestn, nd… | |
| + if(ap.blocksize != 0 && ap.blocksize != bestdiff) | |
| + print("using user-specified size %z instead\n", (vlong… | |
| + else | |
| + ap.blocksize = bestdiff; | |
| + if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1)) | |
| + sysfatal("block size not a power of two"); | |
| + if(ap.blocksize > MaxDiskBlock) | |
| + sysfatal("block size too big (max=%d)", MaxDiskBlock); | |
| + | |
| + /* | |
| + * Use head/tail information to deduce arena base. | |
| + */ | |
| + ndiff = 0; | |
| + for(i=0; i<nhead; i++) | |
| + diff[ndiff++] = head[i]%arenasize; | |
| + for(i=0; i<ntail; i++) | |
| + diff[ndiff++] = (tail[i]+ap.blocksize)%arenasize; | |
| + qsort(diff, ndiff, sizeof diff[0], vlongcmp); | |
| + bestn = 0; | |
| + bestdiff = 0; | |
| + for(i=1, n=1; i<=ndiff; i++, n++){ | |
| + if(i==ndiff || diff[i] != diff[i-1]){ | |
| + if(n > bestn){ | |
| + bestn = n; | |
| + bestdiff = diff[i-1]; | |
| + } | |
| + n = 0; | |
| + } | |
| + } | |
| + ap.arenabase = bestdiff; | |
| + } | |
| + | |
| + ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1); | |
| + /* | |
| + * XXX pick up table, check arenabase. | |
| + * XXX pick up table, record base name. | |
| + */ | |
| + | |
| + /* | |
| + * Somewhat standard computation. | |
| + * Fmtarenas used to use 64k tab, now uses 512k tab. | |
| + */ | |
| + if(ap.arenabase == 0){ | |
| + for(i=0; i<nelem(tabsizes); i++){ | |
| + ap.arenabase = (PartBlank+HeadSize+tabsizes[i]+ap.bloc… | |
| + p = pagein(ap.arenabase, Block); | |
| + if(u32(p) == ArenaHeadMagic) | |
| + break; | |
| + } | |
| + } | |
| + p = pagein(ap.arenabase, Block); | |
| + print("arena base likely %z%s\n", (vlong)ap.arenabase, | |
| + u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : ""); | |
| + | |
| + ap.tabsize = ap.arenabase - ap.tabbase; | |
| + | |
| +} | |
| + | |
| +/* | |
| + * Check the arena partition blocks and then the arenas listed in range. | |
| + */ | |
| +void | |
| +checkarenas(char *range) | |
| +{ | |
| + char *s, *t; | |
| + int i, lo, hi, narena; | |
| + uchar dbuf[HeadSize]; | |
| + uchar *p; | |
| + | |
| + guessgeometry(); | |
| + | |
| + partend -= partend%ap.blocksize; | |
| + | |
| + memset(dbuf, 0, sizeof dbuf); | |
| + packarenapart(&ap, dbuf); | |
| + p = pagein(PartBlank, Block); | |
| + if(memcmp(p, dbuf, HeadSize) != 0){ | |
| + print("on-disk arena part superblock incorrect\n"); | |
| + showdiffs(dbuf, p, HeadSize, partinfo); | |
| + } | |
| + memmove(p, dbuf, HeadSize); | |
| + | |
| + narena = (partend-ap.arenabase + arenasize-1)/arenasize; | |
| + if(range == nil){ | |
| + for(i=0; i<narena; i++) | |
| + checkarena(ap.arenabase+(vlong)i*arenasize, i); | |
| + }else if(strcmp(range, "none") == 0){ | |
| + /* nothing */ | |
| + }else{ | |
| + /* parse, e.g., -4,8-9,10- */ | |
| + for(s=range; *s; s=t){ | |
| + t = strchr(s, ','); | |
| + if(t) | |
| + *t++ = 0; | |
| + else | |
| + t = s+strlen(s); | |
| + if(*s == '-') | |
| + lo = 0; | |
| + else | |
| + lo = strtol(s, &s, 0); | |
| + hi = lo; | |
| + if(*s == '-'){ | |
| + s++; | |
| + if(*s == 0) | |
| + hi = narena-1; | |
| + else | |
| + hi = strtol(s, &s, 0); | |
| + } | |
| + if(*s != 0){ | |
| + print("bad arena range: %s\n", s); | |
| + continue; | |
| + } | |
| + for(i=lo; i<=hi; i++) | |
| + checkarena(ap.arenabase+(vlong)i*arenasize, i); | |
| + } | |
| + } | |
| +} | |
| + | |
| +/* | |
| + * Is there a clump here at p? | |
| + */ | |
| +static int | |
| +isclump(uchar *p, Clump *cl, u32int *pmagic) | |
| +{ | |
| + int n; | |
| + u32int magic; | |
| + uchar score[VtScoreSize], *bp; | |
| + Unwhack uw; | |
| + uchar ubuf[70*1024]; | |
| + | |
| + bp = p; | |
| + magic = u32(p); | |
| + if(magic == 0) | |
| + return 0; | |
| + p += U32Size; | |
| + | |
| + cl->info.type = vtfromdisktype(*p); | |
| + if(cl->info.type == 0xFF) | |
| + return 0; | |
| + p++; | |
| + cl->info.size = u16(p); | |
| + p += U16Size; | |
| + cl->info.uncsize = u16(p); | |
| + if(cl->info.size > cl->info.uncsize) | |
| + return 0; | |
| + p += U16Size; | |
| + scorecp(cl->info.score, p); | |
| + p += VtScoreSize; | |
| + cl->encoding = *p; | |
| + p++; | |
| + cl->creator = u32(p); | |
| + p += U32Size; | |
| + cl->time = u32(p); | |
| + p += U32Size; | |
| + | |
| + switch(cl->encoding){ | |
| + case ClumpENone: | |
| + if(cl->info.size != cl->info.uncsize) | |
| + return 0; | |
| + scoremem(score, p, cl->info.size); | |
| + if(scorecmp(score, cl->info.score) != 0) | |
| + return 0; | |
| + break; | |
| + case ClumpECompress: | |
| + if(cl->info.size >= cl->info.uncsize) | |
| + return 0; | |
| + unwhackinit(&uw); | |
| + n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size); | |
| + if(n != cl->info.uncsize) | |
| + return 0; | |
| + scoremem(score, ubuf, cl->info.uncsize); | |
| + if(scorecmp(score, cl->info.score) != 0) | |
| + return 0; | |
| + break; | |
| + default: | |
| + return 0; | |
| + } | |
| + p += cl->info.size; | |
| + | |
| + /* it all worked out in the end */ | |
| + *pmagic = magic; | |
| + return p - bp; | |
| +} | |
| + | |
| +/* | |
| + * All ClumpInfos seen in this arena. | |
| + * Kept in binary tree so we can look up by score. | |
| + */ | |
| +typedef struct Cit Cit; | |
| +struct Cit | |
| +{ | |
| + int left; | |
| + int right; | |
| + vlong corrupt; | |
| + ClumpInfo ci; | |
| +}; | |
| +Cit *cibuf; | |
| +int ciroot; | |
| +int ncibuf, mcibuf; | |
| + | |
| +void | |
| +resetcibuf(void) | |
| +{ | |
| + ncibuf = 0; | |
| + ciroot = -1; | |
| +} | |
| + | |
| +int* | |
| +ltreewalk(int *p, uchar *score) | |
| +{ | |
| + int i; | |
| + | |
| + for(;;){ | |
| + if(*p == -1) | |
| + return p; | |
| + i = scorecmp(cibuf[*p].ci.score, score); | |
| + if(i == 0) | |
| + return p; | |
| + if(i < 0) | |
| + p = &cibuf[*p].right; | |
| + else | |
| + p = &cibuf[*p].left; | |
| + } | |
| + return nil; /* stupid 8c */ | |
| +} | |
| + | |
| +void | |
| +addcibuf(ClumpInfo *ci, vlong corrupt) | |
| +{ | |
| + Cit *cit; | |
| + | |
| + if(ncibuf == mcibuf){ | |
| + mcibuf += 131072; | |
| + cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]); | |
| + } | |
| + cit = &cibuf[ncibuf]; | |
| + cit->ci = *ci; | |
| + cit->left = -1; | |
| + cit->right = -1; | |
| + cit->corrupt = corrupt; | |
| + if(!corrupt) | |
| + *ltreewalk(&ciroot, ci->score) = ncibuf; | |
| + ncibuf++; | |
| +} | |
| + | |
| +void | |
| +addcicorrupt(vlong len) | |
| +{ | |
| + static ClumpInfo zci; | |
| + | |
| + addcibuf(&zci, len); | |
| +} | |
| + | |
| +int | |
| +haveclump(uchar *score) | |
| +{ | |
| + int i; | |
| + int p; | |
| + | |
| + p = ciroot; | |
| + for(;;){ | |
| + if(p == -1) | |
| + return 0; | |
| + i = scorecmp(cibuf[p].ci.score, score); | |
| + if(i == 0) | |
| + return 1; | |
| + if(i < 0) | |
| + p = cibuf[p].right; | |
| + else | |
| + p = cibuf[p].left; | |
| + } | |
| + return 0; /* stupid 8c */ | |
| +} | |
| + | |
| +int | |
| +matchci(ClumpInfo *ci, uchar *p) | |
| +{ | |
| + if(ci->type != vtfromdisktype(p[0])) | |
| + return 0; | |
| + if(ci->size != u16(p+1)) | |
| + return 0; | |
| + if(ci->uncsize != u16(p+3)) | |
| + return 0; | |
| + if(scorecmp(ci->score, p+5) != 0) | |
| + return 0; | |
| + return 1; | |
| +} | |
| + | |
| +int | |
| +sealedarena(uchar *p, int blocksize) | |
| +{ | |
| + int v, n; | |
| + | |
| + v = u32(p+4); | |
| + switch(v){ | |
| + default: | |
| + return 0; | |
| + case ArenaVersion4: | |
| + n = ArenaSize4; | |
| + break; | |
| + case ArenaVersion5: | |
| + n = ArenaSize5; | |
| + break; | |
| + } | |
| + if(p[n-1] != 1){ | |
| + print("arena tail says not sealed\n"); | |
| + return 0; | |
| + } | |
| + if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){ | |
| + print("arena tail followed by non-zero data\n"); | |
| + return 0; | |
| + } | |
| + if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){ | |
| + print("arena score zero\n"); | |
| + return 0; | |
| + } | |
| + return 1; | |
| +} | |
| + | |
| +int | |
| +okayname(char *name, int n) | |
| +{ | |
| + char buf[20]; | |
| + | |
| + if(nameok(name) < 0) | |
| + return 0; | |
| + sprint(buf, "%d", n); | |
| + if(strlen(name) < strlen(buf) | |
| + || strcmp(name+strlen(name)-strlen(buf), buf) != 0) | |
| + return 0; | |
| + return 1; | |
| +} | |
| + | |
| +int | |
| +clumpinfocmp(ClumpInfo *a, ClumpInfo *b) | |
| +{ | |
| + if(a->type != b->type) | |
| + return a->type - b->type; | |
| + if(a->size != b->size) | |
| + return a->size - b->size; | |
| + if(a->uncsize != b->uncsize) | |
| + return a->uncsize - b->uncsize; | |
| + return scorecmp(a->score, b->score); | |
| +} | |
| + | |
| +ClumpInfo* | |
| +loadci(vlong offset, Arena *arena, int nci) | |
| +{ | |
| + int i, j, per; | |
| + uchar *p, *sp; | |
| + ClumpInfo *bci, *ci; | |
| + | |
| + per = arena->blocksize/ClumpInfoSize; | |
| + bci = vtmalloc(nci*sizeof bci[0]); | |
| + ci = bci; | |
| + offset += arena->size - arena->blocksize; | |
| + p = sp = nil; | |
| + for(i=0; i<nci; i+=per){ | |
| + if(p == sp){ | |
| + sp = pagein(offset-4*M, 4*M); | |
| + p = sp+4*M; | |
| + } | |
| + p -= arena->blocksize; | |
| + offset -= arena->blocksize; | |
| + for(j=0; j<per && i+j<nci; j++) | |
| + unpackclumpinfo(ci++, p+j*ClumpInfoSize); | |
| + } | |
| + return bci; | |
| +} | |
| + | |
| +vlong | |
| +writeci(vlong offset, Arena *arena, ClumpInfo *ci, int nci) | |
| +{ | |
| + int i, j, per; | |
| + uchar *p, *sp; | |
| + | |
| + per = arena->blocksize/ClumpInfoSize; | |
| + offset += arena->size - arena->blocksize; | |
| + p = sp = nil; | |
| + for(i=0; i<nci; i+=per){ | |
| + if(p == sp){ | |
| + sp = pagein(offset-4*M, 4*M); | |
| + p = sp+4*M; | |
| + } | |
| + p -= arena->blocksize; | |
| + offset -= arena->blocksize; | |
| + memset(p, 0, arena->blocksize); | |
| + for(j=0; j<per && i+j<nci; j++) | |
| + packclumpinfo(ci++, p+j*ClumpInfoSize); | |
| + } | |
| + pageout(); | |
| + return offset; | |
| +} | |
| + | |
| +void | |
| +loadarenabasics(vlong offset0, int anum, ArenaHead *head, Arena *arena) | |
| +{ | |
| + char dname[ANameSize]; | |
| + static char lastbase[ANameSize]; | |
| + uchar *p; | |
| + Arena oarena; | |
| + ArenaHead ohead; | |
| + | |
| + /* | |
| + * Fmtarenas makes all arenas the same size | |
| + * except the last, which may be smaller. | |
| + * It uses the same block size for arenas as for | |
| + * the arena partition blocks. | |
| + */ | |
| + arena->size = arenasize; | |
| + if(offset0+arena->size > partend) | |
| + arena->size = partend - offset0; | |
| + head->size = arena->size; | |
| + | |
| + arena->blocksize = ap.blocksize; | |
| + head->blocksize = arena->blocksize; | |
| + | |
| + /* | |
| + * Look for clump magic and name in head/tail blocks. | |
| + * All the other info we will reconstruct just in case. | |
| + */ | |
| + p = pagein(offset0, arena->blocksize); | |
| + memset(&ohead, 0, sizeof ohead); | |
| + if(unpackarenahead(&ohead, p) >= 0){ | |
| + head->version = ohead.version; | |
| + head->clumpmagic = ohead.clumpmagic; | |
| + if(okayname(ohead.name, anum)) | |
| + strcpy(head->name, ohead.name); | |
| + } | |
| + | |
| + p = pagein(offset0+arena->size-arena->blocksize, | |
| + arena->blocksize); | |
| + memset(&oarena, 0, sizeof oarena); | |
| + if(unpackarena(&oarena, p) >= 0){ | |
| + arena->version = oarena.version; | |
| + arena->clumpmagic = oarena.clumpmagic; | |
| + if(okayname(oarena.name, anum)) | |
| + strcpy(arena->name, oarena.name); | |
| + arena->diskstats.clumps = oarena.diskstats.clumps; | |
| +print("old arena: sealed=%d\n", oarena.diskstats.sealed); | |
| + arena->diskstats.sealed = oarena.diskstats.sealed; | |
| + } | |
| + | |
| + /* Head trumps arena. */ | |
| + if(head->version){ | |
| + arena->version = head->version; | |
| + arena->clumpmagic = head->clumpmagic; | |
| + } | |
| + if(arena->version == 0) | |
| + arena->version = ArenaVersion5; | |
| + if(basename) | |
| + snprint(arena->name, ANameSize, "%s%d", basename, anum); | |
| + else if(lastbase[0]) | |
| + snprint(arena->name, ANameSize, "%s%d", lastbase, anum); | |
| + else if(head->name[0]) | |
| + strcpy(arena->name, head->name); | |
| + else if(arena->name[0] == 0) | |
| + sysfatal("cannot determine base name for arena; use -n"); | |
| + strcpy(lastbase, arena->name); | |
| + sprint(dname, "%d", anum); | |
| + lastbase[strlen(lastbase)-strlen(dname)] = 0; | |
| + | |
| + /* Was working in arena, now copy to head. */ | |
| + head->version = arena->version; | |
| + memmove(head->name, arena->name, sizeof head->name); | |
| + head->blocksize = arena->blocksize; | |
| + head->size = arena->size; | |
| +} | |
| + | |
| +void | |
| +shahead(Shabuf *sb, vlong offset0, ArenaHead *head) | |
| +{ | |
| + uchar headbuf[MaxDiskBlock]; | |
| + | |
| + sb->offset = offset0; | |
| + memset(headbuf, 0, sizeof headbuf); | |
| + packarenahead(head, headbuf); | |
| + sbupdate(sb, headbuf, offset0, head->blocksize); | |
| +} | |
| + | |
| +u32int | |
| +newclumpmagic(int version) | |
| +{ | |
| + u32int m; | |
| + | |
| + if(version == ArenaVersion4) | |
| + return _ClumpMagic; | |
| + do{ | |
| + m = fastrand(); | |
| + }while(m==0 || m == _ClumpMagic); | |
| + return m; | |
| +} | |
| + | |
| +/* | |
| + * Poke around in the arena to find the clump data | |
| + * and compute the relevant statistics. | |
| + */ | |
| +void | |
| +guessarena(vlong offset0, int anum, ArenaHead *head, Arena *arena, | |
| + uchar *oldscore, uchar *score) | |
| +{ | |
| + uchar dbuf[MaxDiskBlock]; | |
| + int needtozero, clumps, nb1, nb2, minclumps; | |
| + int inbad, n, ncib, printed, sealing, smart; | |
| + u32int magic; | |
| + uchar *sp, *ep, *p; | |
| + vlong boffset, eoffset, lastclumpend, leaked; | |
| + vlong offset, toffset, totalcorrupt, v; | |
| + Clump cl; | |
| + ClumpInfo *bci, *ci, *eci, *xci; | |
| + Cit *bcit, *cit, *ecit; | |
| + Shabuf oldsha, newsha; | |
| + | |
| + /* | |
| + * We expect to find an arena, with data, between offset | |
| + * and offset+arenasize. With any luck, the data starts at | |
| + * offset+ap.blocksize. The blocks have variable size and | |
| + * aren't padded at all, which doesn't give us any alignment | |
| + * constraints. The blocks are compressed or high entropy, | |
| + * but the headers are pretty low entropy (except the score): | |
| + * | |
| + * type[1] (range 0 thru 9, 13) | |
| + * size[2] | |
| + * uncsize[2] (<= size) | |
| + * | |
| + * so we can look for these. We check the scores as we go, | |
| + * so we can't make any wrong turns. If we find ourselves | |
| + * in a dead end, scan forward looking for a new start. | |
| + */ | |
| + | |
| + resetcibuf(); | |
| + memset(head, 0, sizeof *head); | |
| + memset(arena, 0, sizeof *arena); | |
| + memset(oldscore, 0, VtScoreSize); | |
| + memset(score, 0, VtScoreSize); | |
| + memset(&oldsha, 0, sizeof oldsha); | |
| + memset(&newsha, 0, sizeof newsha); | |
| + newsha.rollback = 1; | |
| + | |
| + if(0){ | |
| + sbdebug(&oldsha, "old.sha"); | |
| + sbdebug(&newsha, "new.sha"); | |
| + } | |
| + | |
| + loadarenabasics(offset0, anum, head, arena); | |
| + | |
| + /* start the clump hunt */ | |
| + | |
| + clumps = 0; | |
| + totalcorrupt = 0; | |
| + sealing = 1; | |
| + boffset = offset0 + arena->blocksize; | |
| + offset = boffset; | |
| + eoffset = offset0+arena->size - arena->blocksize; | |
| + toffset = eoffset; | |
| + sp = pagein(offset0, 4*M); | |
| + | |
| + if(arena->diskstats.sealed){ | |
| + oldsha.offset = offset0; | |
| + sbupdate(&oldsha, sp, offset0, 4*M); | |
| + } | |
| + ep = sp+4*M; | |
| + p = sp + (boffset - offset0); | |
| + ncib = arena->blocksize / ClumpInfoSize; /* ci per block in ind… | |
| + lastclumpend = offset; | |
| + nbad = 0; | |
| + inbad = 0; | |
| + needtozero = 0; | |
| + minclumps = 0; | |
| + while(offset < eoffset){ | |
| + /* | |
| + * Shift buffer if we're running out of room. | |
| + */ | |
| + if(p+70*K >= ep){ | |
| + /* | |
| + * Start the post SHA1 buffer. By now we should know… | |
| + * clumpmagic and arena version, so we can create a | |
| + * correct head block to get things going. | |
| + */ | |
| + if(sealing && fix && newsha.offset == 0){ | |
| + newsha.offset = offset0; | |
| + if(arena->clumpmagic == 0){ | |
| + if(arena->version == 0) | |
| + arena->version = ArenaVersion5; | |
| + arena->clumpmagic = newclumpmagic(aren… | |
| + } | |
| + head->clumpmagic = arena->clumpmagic; | |
| + shahead(&newsha, offset0, head); | |
| + } | |
| + n = 4*M-256*K; | |
| + if(sealing && fix){ | |
| + sbdiskhash(&newsha, bufoffset); | |
| + sbupdate(&newsha, buf, bufoffset, 4*M-256*K); | |
| + } | |
| + pagein(bufoffset+n, 4*M); | |
| + p -= n; | |
| + if(arena->diskstats.sealed) | |
| + sbupdate(&oldsha, buf, bufoffset, 4*M); | |
| + } | |
| + | |
| + /* | |
| + * Check for a clump at p, which is at offset in the disk. | |
| + * Duplicate clumps happen in corrupted disks | |
| + * (the same pattern gets written many times in a row) | |
| + * and should never happen during regular use. | |
| + */ | |
| + if((n = isclump(p, &cl, &magic)) > 0){ | |
| + /* | |
| + * If we were in the middle of some corrupted data, | |
| + * flush a warning about it and then add any clump | |
| + * info blocks as necessary. | |
| + */ | |
| + if(inbad){ | |
| + inbad = 0; | |
| + v = offset-lastclumpend; | |
| + if(needtozero){ | |
| + zerorange(lastclumpend, v); | |
| + sbrollback(&newsha, lastclumpend); | |
| + print("corrupt clump data - %#llux+%#l… | |
| + lastclumpend, v, v); | |
| + } | |
| + addcicorrupt(v); | |
| + totalcorrupt += v; | |
| + nb1 = (minclumps+ncib-1)/ncib; | |
| + minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(Cl… | |
| + nb2 = (minclumps+ncib-1)/ncib; | |
| + eoffset -= (nb2-nb1)*arena->blocksize; | |
| + } | |
| + | |
| + if(haveclump(cl.info.score)) | |
| + print("warning: duplicate clump %d %V\n", cl.i… | |
| + | |
| + /* | |
| + * If clumps use different magic numbers, we don't car… | |
| + * We'll just use the first one we find and make the o… | |
| + * follow suit. | |
| + */ | |
| + if(arena->clumpmagic == 0){ | |
| + print("clump type %d size %d score %V magic %x… | |
| + cl.info.type, cl.info.size, cl.info.sc… | |
| + arena->clumpmagic = magic; | |
| + if(magic == _ClumpMagic) | |
| + arena->version = ArenaVersion4; | |
| + else | |
| + arena->version = ArenaVersion5; | |
| + } | |
| + if(magic != arena->clumpmagic) | |
| + p32(p, arena->clumpmagic); | |
| + if(clumps == 0) | |
| + arena->ctime = cl.time; | |
| + | |
| + /* | |
| + * Record the clump, update arena stats, | |
| + * grow clump info blocks if needed. | |
| + */ | |
| + if(verbose > 1) | |
| + print("\tclump %d: %d %V at %#llux+%#ux (%d)\n… | |
| + clumps, cl.info.type, cl.info.score, o… | |
| + addcibuf(&cl.info, 0); | |
| + if(minclumps%ncib == 0) | |
| + eoffset -= arena->blocksize; | |
| + minclumps++; | |
| + clumps++; | |
| + if(cl.encoding != ClumpENone) | |
| + arena->diskstats.cclumps++; | |
| + arena->diskstats.uncsize += cl.info.uncsize; | |
| + arena->wtime = cl.time; | |
| + | |
| + /* | |
| + * Move to next clump. | |
| + */ | |
| + offset += n; | |
| + p += n; | |
| + lastclumpend = offset; | |
| + }else{ | |
| + /* | |
| + * Overwrite malformed clump data with zeros later. | |
| + * For now, just record whether it needs to be overwri… | |
| + * Bad regions must be of size at least ClumpSize. | |
| + * Postponing the overwriting keeps us from writing pa… | |
| + * the end of the arena data (which might be directory… | |
| + * with zeros. | |
| + */ | |
| + if(!inbad){ | |
| + inbad = 1; | |
| + needtozero = 0; | |
| + if(memcmp(p, zero, ClumpSize) != 0) | |
| + needtozero = 1; | |
| + p += ClumpSize; | |
| + offset += ClumpSize; | |
| + nbad++; | |
| + }else{ | |
| + if(*p != 0) | |
| + needtozero = 1; | |
| + p++; | |
| + offset++; | |
| + } | |
| + } | |
| + } | |
| + pageout(); | |
| + | |
| + if(verbose) | |
| + print("readable clumps: %d; min. directory entries: %d\n", | |
| + clumps, minclumps); | |
| + arena->diskstats.used = lastclumpend - boffset; | |
| + leaked = eoffset - lastclumpend; | |
| + if(verbose) | |
| + print("used from %#llux to %#llux = %,lld (%,lld unused)\n", | |
| + boffset, lastclumpend, arena->diskstats.used, leaked); | |
| + | |
| + /* | |
| + * Finish the SHA1 of the old data. | |
| + */ | |
| + if(arena->diskstats.sealed){ | |
| + sbdiskhash(&oldsha, toffset); | |
| + readdisk(dbuf, toffset, arena->blocksize); | |
| + scorecp(dbuf+arena->blocksize-VtScoreSize, zero); | |
| + sbupdate(&oldsha, dbuf, toffset, arena->blocksize); | |
| + sbscore(&oldsha, oldscore); | |
| + } | |
| + | |
| + /* | |
| + * If we still don't know the clump magic, the arena | |
| + * must be empty. It still needs a value, so make | |
| + * something up. | |
| + */ | |
| + if(arena->version == 0) | |
| + arena->version = ArenaVersion5; | |
| + if(arena->clumpmagic == 0){ | |
| + if(arena->version == ArenaVersion4) | |
| + arena->clumpmagic = _ClumpMagic; | |
| + else{ | |
| + do | |
| + arena->clumpmagic = fastrand(); | |
| + while(arena->clumpmagic==_ClumpMagic | |
| + ||arena->clumpmagic==0); | |
| + } | |
| + head->clumpmagic = arena->clumpmagic; | |
| + } | |
| + | |
| + /* | |
| + * Guess at number of clumpinfo blocks to load. | |
| + * If we guess high, it's no big deal. If we guess low, | |
| + * we'll be forced into rewriting the whole directory. | |
| + * Still not such a big deal. | |
| + */ | |
| + if(clumps == 0 || arena->diskstats.used == totalcorrupt) | |
| + goto Nocib; | |
| + if(clumps < arena->diskstats.clumps) | |
| + clumps = arena->diskstats.clumps; | |
| + if(clumps < ncibuf) | |
| + clumps = ncibuf; | |
| + clumps += totalcorrupt/ | |
| + ((arena->diskstats.used - totalcorrupt)/clumps); | |
| + clumps += totalcorrupt/2000; | |
| + if(clumps < minclumps) | |
| + clumps = minclumps; | |
| + clumps += ncib-1; | |
| + clumps -= clumps%ncib; | |
| + | |
| + /* | |
| + * Can't write into the actual data. | |
| + */ | |
| + v = offset0 + arena->size - arena->blocksize; | |
| + v -= (clumps+ncib-1)/ncib * arena->blocksize; | |
| + if(v < lastclumpend){ | |
| + v = offset0 + arena->size - arena->blocksize; | |
| + clumps = (v-lastclumpend)/arena->blocksize * ncib; | |
| + } | |
| + | |
| + if(clumps < minclumps) | |
| + print("cannot happen?\n"); | |
| + | |
| + /* | |
| + * Check clumpinfo blocks against directory we created. | |
| + * The tricky part is handling the corrupt sections of arena. | |
| + * If possible, we remark just the affected directory entries | |
| + * rather than slide everything down. | |
| + * | |
| + * Allocate clumps+1 blocks and check that we don't need | |
| + * the last one at the end. | |
| + */ | |
| + bci = loadci(offset0, arena, clumps+1); | |
| + eci = bci+clumps+1; | |
| + bcit = cibuf; | |
| + ecit = cibuf+ncibuf; | |
| + smart = 1; | |
| +Again: | |
| + nbad = 0; | |
| + ci = bci; | |
| + for(cit=bcit; cit<ecit && ci<eci; cit++){ | |
| + if(cit->corrupt){ | |
| + vlong n, m; | |
| + if(smart){ | |
| + /* | |
| + * If we can, just mark existing entries as co… | |
| + */ | |
| + n = cit->corrupt; | |
| + for(xci=ci; n>0 && xci<eci; xci++) | |
| + n -= ClumpSize+xci->size; | |
| + if(n > 0 || xci >= eci) | |
| + goto Dumb; | |
| + printed = 0; | |
| + for(; ci<xci; ci++){ | |
| + if(verbose && ci->type != VtCorruptTyp… | |
| + if(!printed){ | |
| + print("marking directo… | |
| + (int)(ci-bci),… | |
| + printed = 1; | |
| + } | |
| + print("\ttype=%d size=%d uncsi… | |
| + ci->type, ci->size, ci… | |
| + } | |
| + ci->type = VtCorruptType; | |
| + } | |
| + }else{ | |
| + Dumb: | |
| + print("\trewriting clump directory\n"); | |
| + /* | |
| + * Otherwise, blaze a new trail. | |
| + */ | |
| + n = cit->corrupt; | |
| + while(n > 0 && ci < eci){ | |
| + if(n < ClumpSize) | |
| + sysfatal("bad math in clump co… | |
| + if(n <= VtMaxLumpSize+ClumpSize) | |
| + m = n; | |
| + else{ | |
| + m = VtMaxLumpSize+ClumpSize; | |
| + if(n-m < ClumpSize) | |
| + m -= ClumpSize; | |
| + } | |
| + ci->type = VtCorruptType; | |
| + ci->size = m-ClumpSize; | |
| + ci->uncsize = m-ClumpSize; | |
| + memset(ci->score, 0, VtScoreSize); | |
| + ci++; | |
| + n -= m; | |
| + } | |
| + } | |
| + continue; | |
| + } | |
| + if(clumpinfocmp(&cit->ci, ci) != 0){ | |
| + if(verbose && (smart || verbose>1)){ | |
| + print("clumpinfo %d\n", (int)(ci-bci)); | |
| + print("\twant: %d %d %d %V\n", | |
| + cit->ci.type, cit->ci.size, | |
| + cit->ci.uncsize, cit->ci.score); | |
| + print("\thave: %d %d %d %V\n", | |
| + ci->type, ci->size, | |
| + ci->uncsize, ci->score); | |
| + } | |
| + *ci = cit->ci; | |
| + nbad++; | |
| + } | |
| + ci++; | |
| + } | |
| + if(ci >= eci || cit < ecit){ | |
| + print("ran out of space editing existing directory; rewriting\… | |
| + print("# eci %ld ci %ld ecit %ld cit %ld\n", eci-bci, ci-bci, … | |
| + assert(smart); /* can't happen second time thru */ | |
| + smart = 0; | |
| + goto Again; | |
| + } | |
| + | |
| + assert(ci <= eci); | |
| + arena->diskstats.clumps = ci-bci; | |
| + eoffset = writeci(offset0, arena, bci, ci-bci); | |
| + if(sealing && fix) | |
| + sbrollback(&newsha, v); | |
| +print("eoffset=%lld lastclumpend=%lld diff=%lld unseal=%d\n", eoffset, lastclu… | |
| + if(lastclumpend > eoffset) | |
| + print("arena directory overwrote blocks! cannot happen!\n"); | |
| + free(bci); | |
| + if(smart && nbad) | |
| + print("arena directory has %d bad or missing entries\n", nbad); | |
| +Nocib: | |
| + if(eoffset - lastclumpend > 64*1024 && (!arena->diskstats.sealed || un… | |
| + if(arena->diskstats.sealed) | |
| + print("unsealing arena\n"); | |
| + sealing = 0; | |
| + memset(oldscore, 0, VtScoreSize); | |
| + } | |
| + | |
| + /* | |
| + * Finish the SHA1 of the new data - only meaningful | |
| + * if we've been writing to disk (`fix'). | |
| + */ | |
| + arena->diskstats.sealed = sealing; | |
| + arena->memstats = arena->diskstats; | |
| + if(sealing && fix){ | |
| + uchar tbuf[MaxDiskBlock]; | |
| + | |
| + sbdiskhash(&newsha, toffset); | |
| + memset(tbuf, 0, sizeof tbuf); | |
| + packarena(arena, tbuf); | |
| + sbupdate(&newsha, tbuf, toffset, arena->blocksize); | |
| + sbscore(&newsha, score); | |
| + } | |
| +} | |
| + | |
| +void | |
| +dumparena(vlong offset, int anum, Arena *arena) | |
| +{ | |
| + char buf[1000]; | |
| + vlong o, e; | |
| + int fd, n; | |
| + | |
| + snprint(buf, sizeof buf, "%s.%d", dumpbase, anum); | |
| + if((fd = create(buf, OWRITE, 0666)) < 0){ | |
| + fprint(2, "create %s: %r\n", buf); | |
| + return; | |
| + } | |
| + e = offset+arena->size; | |
| + for(o=offset; o<e; o+=n){ | |
| + n = 4*M; | |
| + if(o+n > e) | |
| + n = e-o; | |
| + if(pwrite(fd, pagein(o, n), n, o-offset) != n){ | |
| + fprint(2, "write %s at %#llux: %r\n", buf, o-offset); | |
| + return; | |
| + } | |
| + } | |
| +} | |
| + | |
| +void | |
| +checkarena(vlong offset, int anum) | |
| +{ | |
| + uchar dbuf[MaxDiskBlock]; | |
| + uchar *p, oldscore[VtScoreSize], score[VtScoreSize]; | |
| + Arena arena, oarena; | |
| + ArenaHead head; | |
| + Info *fmt, *fmta; | |
| + int sz; | |
| + | |
| + print("# arena %d: offset %#llux\n", anum, offset); | |
| + | |
| + if(offset >= partend){ | |
| + print("arena offset out of bounds\n"); | |
| + return; | |
| + } | |
| + | |
| + guessarena(offset, anum, &head, &arena, oldscore, score); | |
| + | |
| + if(verbose){ | |
| + print("#\tversion=%d name=%s blocksize=%d size=%z", | |
| + head.version, head.name, head.blocksize, head.size); | |
| + if(head.clumpmagic) | |
| + print(" clumpmagic=%#.8ux", head.clumpmagic); | |
| + print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n", | |
| + arena.diskstats.clumps, arena.diskstats.cclumps, | |
| + arena.diskstats.used, arena.diskstats.uncsize); | |
| + print("#\tctime=%t\n", arena.ctime); | |
| + print("#\twtime=%t\n", arena.wtime); | |
| + if(arena.diskstats.sealed) | |
| + print("#\tsealed score=%V\n", score); | |
| + } | |
| + | |
| + if(dumpbase){ | |
| + dumparena(offset, anum, &arena); | |
| + return; | |
| + } | |
| + | |
| + memset(dbuf, 0, sizeof dbuf); | |
| + packarenahead(&head, dbuf); | |
| + p = pagein(offset, arena.blocksize); | |
| + if(memcmp(dbuf, p, arena.blocksize) != 0){ | |
| + print("on-disk arena header incorrect\n"); | |
| + showdiffs(dbuf, p, arena.blocksize, | |
| + arena.version==ArenaVersion4 ? headinfo4 : headinfo5); | |
| + } | |
| + memmove(p, dbuf, arena.blocksize); | |
| + | |
| + memset(dbuf, 0, sizeof dbuf); | |
| + packarena(&arena, dbuf); | |
| + if(arena.diskstats.sealed) | |
| + scorecp(dbuf+arena.blocksize-VtScoreSize, score); | |
| + p = pagein(offset+arena.size-arena.blocksize, arena.blocksize); | |
| + memset(&oarena, 0, sizeof oarena); | |
| + unpackarena(&oarena, p); | |
| + if(arena.version == ArenaVersion4){ | |
| + sz = ArenaSize4; | |
| + fmt = tailinfo4; | |
| + fmta = tailinfo4a; | |
| + }else{ | |
| + sz = ArenaSize5; | |
| + fmt = tailinfo5; | |
| + fmta = tailinfo5a; | |
| + } | |
| + if(p[sz] == 1){ | |
| + fmt = fmta; | |
| + if(oarena.diskstats.sealed){ | |
| + /* | |
| + * some arenas were sealed with the extension | |
| + * before we adopted the convention that if it didn't | |
| + * add new information it gets dropped. | |
| + */ | |
| + _packarena(&arena, dbuf, 1); | |
| + } | |
| + } | |
| + if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){ | |
| + print("on-disk arena tail incorrect\n"); | |
| + showdiffs(dbuf, p, arena.blocksize-VtScoreSize, fmt); | |
| + } | |
| + if(arena.diskstats.sealed){ | |
| + if(oarena.diskstats.sealed) | |
| + if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){ | |
| + print("on-disk arena seal score incorrect\n"); | |
| + print("\tcorrect=%V\n", oldscore); | |
| + print("\t disk=%V\n", p+arena.blocksize-VtScoreSize); | |
| + } | |
| + if(fix && scorecmp(p+arena.blocksize-VtScoreSize, score) != 0){ | |
| + print("%ssealing arena%s: %V\n", | |
| + oarena.diskstats.sealed ? "re" : "", | |
| + scorecmp(oldscore, score) == 0 ? | |
| + "" : " after changes", score); | |
| + } | |
| + } | |
| + memmove(p, dbuf, arena.blocksize); | |
| + | |
| + pageout(); | |
| +} | |
| + | |
| +AMapN* | |
| +buildamap(void) | |
| +{ | |
| + uchar *p; | |
| + vlong o; | |
| + ArenaHead h; | |
| + AMapN *an; | |
| + AMap *m; | |
| + | |
| + an = vtmallocz(sizeof *an); | |
| + for(o=ap.arenabase; o<partend; o+=arenasize){ | |
| + p = pagein(o, Block); | |
| + if(unpackarenahead(&h, p) >= 0){ | |
| + an->map = vtrealloc(an->map, (an->n+1)*sizeof an->map[… | |
| + m = &an->map[an->n++]; | |
| + m->start = o; | |
| + m->stop = o+h.size; | |
| + strcpy(m->name, h.name); | |
| + } | |
| + } | |
| + return an; | |
| +} | |
| + | |
| +void | |
| +checkmap(void) | |
| +{ | |
| + char *s; | |
| + uchar *p; | |
| + int i, len; | |
| + AMapN *an; | |
| + Fmt fmt; | |
| + | |
| + an = buildamap(); | |
| + fmtstrinit(&fmt); | |
| + fmtprint(&fmt, "%ud\n", an->n); | |
| + for(i=0; i<an->n; i++) | |
| + fmtprint(&fmt, "%s\t%lld\t%lld\n", | |
| + an->map[i].name, an->map[i].start, an->map[i].stop); | |
| + s = fmtstrflush(&fmt); | |
| + len = strlen(s); | |
| + if(len > ap.tabsize){ | |
| + print("arena partition map too long: need %z bytes have %z\n", | |
| + (vlong)len, (vlong)ap.tabsize); | |
| + len = ap.tabsize; | |
| + } | |
| + | |
| + if(ap.tabsize >= 4*M){ /* can't happen - max arenas is 2000 */ | |
| + print("arena partition map *way* too long\n"); | |
| + return; | |
| + } | |
| + | |
| + p = pagein(ap.tabbase, ap.tabsize); | |
| + if(memcmp(p, s, len) != 0){ | |
| + print("arena partition map incorrect; rewriting.\n"); | |
| + memmove(p, s, len); | |
| + } | |
| + pageout(); | |
| +} | |
| + | |
| +int mainstacksize = 512*1024; | |
| + | |
| +void | |
| +threadmain(int argc, char **argv) | |
| +{ | |
| + int mode; | |
| + | |
| + mode = OREAD; | |
| + readonly = 1; | |
| + ARGBEGIN{ | |
| + case 'U': | |
| + unseal = 1; | |
| + break; | |
| + case 'a': | |
| + arenasize = unittoull(EARGF(usage())); | |
| + break; | |
| + case 'b': | |
| + ap.blocksize = unittoull(EARGF(usage())); | |
| + break; | |
| + case 'f': | |
| + fix = 1; | |
| + mode = ORDWR; | |
| + readonly = 0; | |
| + break; | |
| + case 'n': | |
| + basename = EARGF(usage()); | |
| + break; | |
| + case 'v': | |
| + verbose++; | |
| + break; | |
| + case 'x': | |
| + dumpbase = EARGF(usage()); | |
| + break; | |
| + default: | |
| + usage(); | |
| + }ARGEND | |
| + | |
| + if(argc != 1 && argc != 2) | |
| + usage(); | |
| + | |
| + file = argv[0]; | |
| + | |
| + ventifmtinstall(); | |
| + fmtinstall('z', zfmt); | |
| + fmtinstall('t', tfmt); | |
| + quotefmtinstall(); | |
| + | |
| + part = initpart(file, mode|ODIRECT); | |
| + if(part == nil) | |
| + sysfatal("can't open %s: %r", file); | |
| + partend = part->size; | |
| + | |
| + checkarenas(argc > 1 ? argv[1] : nil); | |
| + checkmap(); | |
| + threadexitsall(nil); | |
| +} | |
| + | |
| diff --git a/src/cmd/venti/srv/fns.h b/src/cmd/venti/srv/fns.h | |
| t@@ -24,8 +24,13 @@ void delaykickicache(void); | |
| void delaykickround(Round*); | |
| void delaykickroundproc(void*); | |
| void dirtydblock(DBlock*, int); | |
| +void diskaccess(int); | |
| +void disksched(void); | |
| AState diskstate(void); | |
| void *emalloc(ulong); | |
| +void emptydcache(void); | |
| +void emptyicache(void); | |
| +void emptylumpcache(void); | |
| void *erealloc(void *, ulong); | |
| char *estrdup(char*); | |
| void *ezmalloc(ulong); | |
| t@@ -49,6 +54,7 @@ u32int hashbits(u8int *score, int nbits); | |
| int httpdinit(char *address, char *webroot); | |
| int iaddrcmp(IAddr *ia1, IAddr *ia2); | |
| IEntry* icachedirty(u32int, u32int, u64int); | |
| +ulong icachedirtyfrac(void); | |
| void icacheclean(IEntry*); | |
| int ientrycmp(const void *vie1, const void *vie2); | |
| char *ifileline(IFile *f); | |
| t@@ -77,6 +83,7 @@ int insertscore(u8int *score, IAddr *ia, int … | |
| void kickdcache(void); | |
| void kickicache(void); | |
| void kickround(Round*, int wait); | |
| +int loadbloom(Bloom*); | |
| ZBlock *loadclump(Arena *arena, u64int aa, int blocks, Clump *c… | |
| DBlock *loadibucket(Index *index, u8int *score, ISect **is, u32int *buc… | |
| int loadientry(Index *index, u8int *score, int type, IEntry *ie… | |
| t@@ -98,6 +105,7 @@ int okamap(AMap *am, int n, u64int start, u6… | |
| int okibucket(IBucket*, ISect*); | |
| int outputamap(Fmt *f, AMap *am, int n); | |
| int outputindex(Fmt *f, Index *ix); | |
| +int _packarena(Arena *arena, u8int *buf, int); | |
| int packarena(Arena *arena, u8int *buf); | |
| int packarenahead(ArenaHead *head, u8int *buf); | |
| int packarenapart(ArenaPart *as, u8int *buf); | |
| t@@ -129,6 +137,7 @@ ZBlock *readfile(char *name); | |
| int readifile(IFile *f, char *name); | |
| Packet *readlump(u8int *score, int type, u32int size, int *cach… | |
| int readpart(Part *part, u64int addr, u8int *buf, u32int n); | |
| +int resetbloom(Bloom*); | |
| int runconfig(char *config, Config*); | |
| int scorecmp(u8int *, u8int *); | |
| void scoremem(u8int *score, u8int *buf, int size); | |
| diff --git a/src/cmd/venti/srv/graph.c b/src/cmd/venti/srv/graph.c | |
| t@@ -55,7 +55,11 @@ ginit(void) | |
| first = 0; | |
| memimageinit(); | |
| +#ifdef PLAN9PORT | |
| smallfont = openmemsubfont(unsharp("#9/font/lucsans/lstr.10")); | |
| +#else | |
| + smallfont = openmemsubfont("/lib/font/bit/lucidasans/lstr.10"); | |
| +#endif | |
| black = memblack; | |
| blue = allocrepl(DBlue); | |
| red = allocrepl(DRed); | |
| t@@ -121,7 +125,7 @@ statgraph(Graph *g) | |
| if(g->wid > nelem(bin)) | |
| g->wid = nelem(bin); | |
| if(g->fill < 0) | |
| - g->fill = ((uint)(uintptr)g->arg>>8)%nelem(lofill); | |
| + g->fill = ((uint)g->arg>>8)%nelem(lofill); | |
| if(g->fill > nelem(lofill)) | |
| g->fill %= nelem(lofill); | |
| t@@ -151,7 +155,7 @@ statgraph(Graph *g) | |
| qlock(&memdrawlock); | |
| ginit(); | |
| if(smallfont==nil || black==nil || blue==nil || red==nil || hifill==ni… | |
| - werrstr("graphics initialization failed"); | |
| + werrstr("graphics initialization failed: %r"); | |
| qunlock(&memdrawlock); | |
| return nil; | |
| } | |
| t@@ -186,12 +190,12 @@ statgraph(Graph *g) | |
| if(0) | |
| if(lastlo != -1){ | |
| if(lastlo < lo) | |
| - memimagedraw(m, Rect(x-1, lastlo, x, lo), hifi… | |
| + memimagedraw(m, Rect(x-1, lastlo, x, lo), hifi… | |
| else if(lastlo > lo) | |
| - memimagedraw(m, Rect(x-1, lo, x, lastlo), hifi… | |
| + memimagedraw(m, Rect(x-1, lo, x, lastlo), hifi… | |
| } | |
| - memimagedraw(m, Rect(x, hi, x+1,lo), hifill[g->fill], ZP, memo… | |
| - memimagedraw(m, Rect(x, lo, x+1, r.max.y), lofill[g->fill], ZP… | |
| + memimagedraw(m, Rect(x, hi, x+1,lo), hifill[g->fill%nelem(hifi… | |
| + memimagedraw(m, Rect(x, lo, x+1, r.max.y), lofill[g->fill%nele… | |
| lastlo = lo; | |
| } | |
| diff --git a/src/cmd/venti/srv/httpd.c b/src/cmd/venti/srv/httpd.c | |
| t@@ -9,7 +9,7 @@ extern QLock memdrawlock; | |
| enum | |
| { | |
| ObjNameSize = 64, | |
| - MaxObjs = 16 | |
| + MaxObjs = 64 | |
| }; | |
| struct HttpObj | |
| t@@ -28,6 +28,12 @@ static int dindex(HConnect *c); | |
| static int xindex(HConnect *c); | |
| static int xlog(HConnect *c); | |
| static int sindex(HConnect *c); | |
| +static int hempty(HConnect *c); | |
| +static int hlcacheempty(HConnect *c); | |
| +static int hdcacheempty(HConnect *c); | |
| +static int hicacheempty(HConnect *c); | |
| +static int hicachekick(HConnect *c); | |
| +static int hdcachekick(HConnect *c); | |
| static int hicacheflush(HConnect *c); | |
| static int hdcacheflush(HConnect *c); | |
| static int notfound(HConnect *c); | |
| t@@ -53,10 +59,17 @@ httpdinit(char *address, char *dir) | |
| httpdobj("/xindex", xindex); | |
| httpdobj("/flushicache", hicacheflush); | |
| httpdobj("/flushdcache", hdcacheflush); | |
| + httpdobj("/kickicache", hicachekick); | |
| + httpdobj("/kickdcache", hdcachekick); | |
| httpdobj("/graph/", xgraph); | |
| + httpdobj("/set", xset); | |
| httpdobj("/set/", xset); | |
| httpdobj("/log", xlog); | |
| httpdobj("/log/", xlog); | |
| + httpdobj("/empty", hempty); | |
| + httpdobj("/emptyicache", hicacheempty); | |
| + httpdobj("/emptylumpcache", hlcacheempty); | |
| + httpdobj("/emptydcache", hdcacheempty); | |
| if(vtproc(listenproc, address) < 0) | |
| return -1; | |
| t@@ -105,8 +118,6 @@ listenproc(void *vaddress) | |
| char *address, ndir[NETPATHLEN], dir[NETPATHLEN]; | |
| int ctl, nctl, data; | |
| -/*sleep(1000); // let strace find us */ | |
| - | |
| address = vaddress; | |
| ctl = announce(address, dir); | |
| if(ctl < 0){ | |
| t@@ -148,7 +159,6 @@ httpproc(void *v) | |
| HConnect *c; | |
| int ok, i, n; | |
| -/*sleep(1000); // let strace find us */ | |
| c = v; | |
| for(;;){ | |
| t@@ -182,7 +192,7 @@ httpproc(void *v) | |
| } | |
| static int | |
| -percent(long v, long total) | |
| +percent(ulong v, ulong total) | |
| { | |
| if(total == 0) | |
| total = 1; | |
| t@@ -240,6 +250,31 @@ preqtext(HConnect *c) | |
| } | |
| static int | |
| +herror(HConnect *c) | |
| +{ | |
| + int n; | |
| + Hio *hout; | |
| + | |
| + hout = &c->hout; | |
| + n = snprint(c->xferbuf, HBufSize, "<html><head><title>Error</title></h… | |
| + hprint(hout, "%s %s\r\n", hversion, "400 Bad Request"); | |
| + hprint(hout, "Date: %D\r\n", time(nil)); | |
| + hprint(hout, "Server: Venti\r\n"); | |
| + hprint(hout, "Content-Type: text/html\r\n"); | |
| + hprint(hout, "Content-Length: %d\r\n", n); | |
| + if(c->head.closeit) | |
| + hprint(hout, "Connection: close\r\n"); | |
| + else if(!http11(c)) | |
| + hprint(hout, "Connection: Keep-Alive\r\n"); | |
| + hprint(hout, "\r\n"); | |
| + | |
| + if(c->req.meth == nil || strcmp(c->req.meth, "HEAD") != 0) | |
| + hwrite(hout, c->xferbuf, n); | |
| + | |
| + return hflush(hout); | |
| +} | |
| + | |
| +static int | |
| notfound(HConnect *c) | |
| { | |
| int r; | |
| t@@ -325,21 +360,53 @@ static struct | |
| "logging", &ventilogging, | |
| "stats", &collectstats, | |
| "icachesleeptime", &icachesleeptime, | |
| + "minicachesleeptime", &minicachesleeptime, | |
| "arenasumsleeptime", &arenasumsleeptime, | |
| + "l0quantum", &l0quantum, | |
| + "l1quantum", &l1quantum, | |
| + "manualscheduling", &manualscheduling, | |
| + "ignorebloom", &ignorebloom, | |
| + "syncwrites", &syncwrites, | |
| + "icacheprefetch", &icacheprefetch, | |
| 0 | |
| }; | |
| static int | |
| +xsetlist(HConnect *c) | |
| +{ | |
| + int i; | |
| + | |
| + if(preqtype(c, "text/plain") < 0) | |
| + return -1; | |
| + for(i=0; namedints[i].name; i++) | |
| + print("%s = %d\n", namedints[i].name, *namedints[i].p); | |
| + hflush(&c->hout); | |
| + return 0; | |
| +} | |
| + | |
| + | |
| + | |
| +static int | |
| xset(HConnect *c) | |
| { | |
| int i, nf, r; | |
| char *f[10], *s; | |
| + if(strcmp(c->req.uri, "/set") == 0 || strcmp(c->req.uri, "/set/") == 0) | |
| + return xsetlist(c); | |
| + | |
| s = estrdup(c->req.uri); | |
| nf = getfields(s+strlen("/set/"), f, nelem(f), 1, "/"); | |
| - if(nf < 1) | |
| - return notfound(c); | |
| + if(nf < 1){ | |
| + r = preqtext(c); | |
| + if(r < 0) | |
| + return r; | |
| + for(i=0; namedints[i].name; i++) | |
| + hprint(&c->hout, "%s = %d\n", namedints[i].name, *name… | |
| + hflush(&c->hout); | |
| + return 0; | |
| + } | |
| for(i=0; namedints[i].name; i++){ | |
| if(strcmp(f[0], namedints[i].name) == 0){ | |
| if(nf >= 2) | |
| t@@ -495,6 +562,108 @@ darena(Hio *hout, Arena *arena) | |
| } | |
| static int | |
| +hempty(HConnect *c) | |
| +{ | |
| + Hio *hout; | |
| + int r; | |
| + | |
| + r = preqtext(c); | |
| + if(r < 0) | |
| + return r; | |
| + hout = &c->hout; | |
| + | |
| + emptylumpcache(); | |
| + emptydcache(); | |
| + emptyicache(); | |
| + hprint(hout, "emptied all caches\n"); | |
| + hflush(hout); | |
| + return 0; | |
| +} | |
| + | |
| +static int | |
| +hlcacheempty(HConnect *c) | |
| +{ | |
| + Hio *hout; | |
| + int r; | |
| + | |
| + r = preqtext(c); | |
| + if(r < 0) | |
| + return r; | |
| + hout = &c->hout; | |
| + | |
| + emptylumpcache(); | |
| + hprint(hout, "emptied lumpcache\n"); | |
| + hflush(hout); | |
| + return 0; | |
| +} | |
| + | |
| +static int | |
| +hicacheempty(HConnect *c) | |
| +{ | |
| + Hio *hout; | |
| + int r; | |
| + | |
| + r = preqtext(c); | |
| + if(r < 0) | |
| + return r; | |
| + hout = &c->hout; | |
| + | |
| + emptyicache(); | |
| + hprint(hout, "emptied icache\n"); | |
| + hflush(hout); | |
| + return 0; | |
| +} | |
| + | |
| +static int | |
| +hdcacheempty(HConnect *c) | |
| +{ | |
| + Hio *hout; | |
| + int r; | |
| + | |
| + r = preqtext(c); | |
| + if(r < 0) | |
| + return r; | |
| + hout = &c->hout; | |
| + | |
| + emptydcache(); | |
| + hprint(hout, "emptied dcache\n"); | |
| + hflush(hout); | |
| + return 0; | |
| +} | |
| +static int | |
| +hicachekick(HConnect *c) | |
| +{ | |
| + Hio *hout; | |
| + int r; | |
| + | |
| + r = preqtext(c); | |
| + if(r < 0) | |
| + return r; | |
| + hout = &c->hout; | |
| + | |
| + kickicache(); | |
| + hprint(hout, "kicked icache\n"); | |
| + hflush(hout); | |
| + return 0; | |
| +} | |
| + | |
| +static int | |
| +hdcachekick(HConnect *c) | |
| +{ | |
| + Hio *hout; | |
| + int r; | |
| + | |
| + r = preqtext(c); | |
| + if(r < 0) | |
| + return r; | |
| + hout = &c->hout; | |
| + | |
| + kickdcache(); | |
| + hprint(hout, "kicked dcache\n"); | |
| + hflush(hout); | |
| + return 0; | |
| +} | |
| +static int | |
| hicacheflush(HConnect *c) | |
| { | |
| Hio *hout; | |
| t@@ -569,6 +738,7 @@ rawgraph(Stats *s, Stats *t, void *va) | |
| { | |
| Arg *a; | |
| + USED(s); | |
| a = va; | |
| return t->n[a->index]; | |
| } | |
| t@@ -587,6 +757,7 @@ pctgraph(Stats *s, Stats *t, void *va) | |
| { | |
| Arg *a; | |
| + USED(s); | |
| a = va; | |
| return percent(t->n[a->index], t->n[a->index2]); | |
| } | |
| t@@ -722,7 +893,7 @@ static char* graphname[] = | |
| "isectwritebyte", | |
| "sumread", | |
| - "sumreadbyte" | |
| + "sumreadbyte", | |
| }; | |
| static int | |
| t@@ -733,7 +904,6 @@ findname(char *s) | |
| for(i=0; i<nelem(graphname); i++) | |
| if(strcmp(graphname[i], s) == 0) | |
| return i; | |
| -fprint(2, "no name '%s'\n", s); | |
| return -1; | |
| } | |
| t@@ -769,10 +939,14 @@ xgraph(HConnect *c) | |
| if(0) fprint(2, "graph %s\n" ,s); | |
| memset(&g, 0, sizeof g); | |
| nf = getfields(s+strlen("/graph/"), f, nelem(f), 1, "/"); | |
| - if(nf < 1) | |
| - goto notfound; | |
| - if((arg.index = findname(f[0])) == -1 && strcmp(f[0], "*") != 0) | |
| - goto notfound; | |
| + if(nf < 1){ | |
| + werrstr("bad syntax -- not enough fields"); | |
| + goto error; | |
| + } | |
| + if((arg.index = findname(f[0])) == -1 && strcmp(f[0], "*") != 0){ | |
| + werrstr("unknown name %s", f[0]); | |
| + goto error; | |
| + } | |
| g.arg = &arg; | |
| g.t0 = -120; | |
| g.t1 = 0; | |
| t@@ -793,14 +967,18 @@ if(0) fprint(2, "graph %s\n" ,s); | |
| else if(strncmp(f[i], "max=", 4) == 0) | |
| g.max = atoi(f[i]+4); | |
| else if(strncmp(f[i], "pct=", 4) == 0){ | |
| - if((arg.index2 = findname(f[i]+4)) == -1) | |
| - goto notfound; | |
| + if((arg.index2 = findname(f[i]+4)) == -1){ | |
| + werrstr("unknown name %s", f[i]+4); | |
| + goto error; | |
| + } | |
| g.fn = pctgraph; | |
| g.min = 0; | |
| g.max = 100; | |
| }else if(strncmp(f[i], "pctdiff=", 8) == 0){ | |
| - if((arg.index2 = findname(f[i]+8)) == -1) | |
| - goto notfound; | |
| + if((arg.index2 = findname(f[i]+8)) == -1){ | |
| + werrstr("unknown name %s", f[i]+8); | |
| + goto error; | |
| + } | |
| g.fn = pctdiffgraph; | |
| g.min = 0; | |
| g.max = 100; | |
| t@@ -830,7 +1008,7 @@ if(0) fprint(2, "graph %s\n" ,s); | |
| m = statgraph(&g); | |
| if(m == nil) | |
| - goto notfound; | |
| + goto error; | |
| if(preqtype(c, "image/png") < 0) | |
| return -1; | |
| t@@ -843,9 +1021,9 @@ if(0) fprint(2, "graph %s\n" ,s); | |
| free(s); | |
| return 0; | |
| -notfound: | |
| +error: | |
| free(s); | |
| - return notfound(c); | |
| + return herror(c); | |
| } | |
| static int | |
| t@@ -944,7 +1122,6 @@ vtloghdump(Hio *h, VtLog *l) | |
| name = l ? l->name : "<nil>"; | |
| -fprint(2, "hdump xfer %d\n", h->xferenc); | |
| hprint(h, "<html><head>\n"); | |
| hprint(h, "<title>Venti Server Log: %s</title>\n", name); | |
| hprint(h, "</head><body>\n"); | |
| diff --git a/src/cmd/venti/srv/icache.c b/src/cmd/venti/srv/icache.c | |
| t@@ -11,6 +11,7 @@ struct ICache | |
| int bits; /* bits to use for indexing he… | |
| u32int size; /* number of heads; == 1 <<… | |
| IEntry *base; /* all allocated hash tabl… | |
| + IEntry *free; | |
| u32int entries; /* elements in base */ | |
| IEntry *dirty; /* chain of dirty elements */ | |
| u32int ndirty; | |
| t@@ -23,6 +24,8 @@ struct ICache | |
| int nlast; | |
| }; | |
| +int icacheprefetch = 1; | |
| + | |
| static ICache icache; | |
| static IEntry *icachealloc(IAddr *ia, u8int *score); | |
| t@@ -45,6 +48,12 @@ initicache(int bits, int depth) | |
| setstat(StatIcacheSize, icache.entries); | |
| } | |
| +ulong | |
| +icachedirtyfrac(void) | |
| +{ | |
| + return (vlong)icache.ndirty*IcacheFrac / icache.entries; | |
| +} | |
| + | |
| u32int | |
| hashbits(u8int *sc, int bits) | |
| { | |
| t@@ -141,14 +150,16 @@ lookupscore(u8int *score, int type, IAddr *ia, int *rac) | |
| * load the table of contents for that arena into the cache. | |
| */ | |
| ie = icachealloc(&d.ia, score); | |
| - icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(mainindex, i… | |
| - aa = ie->ia.addr - aa; /* compute base addr of arena */ | |
| - for(i=0; i<nelem(icache.last); i++) | |
| - if(icache.last[i] != icache.last[0]) | |
| - break; | |
| - if(i==nelem(icache.last) && icache.lastload != icache.last[0]){ | |
| - load = icache.last[0]; | |
| - icache.lastload = load; | |
| + if(icacheprefetch){ | |
| + icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(main… | |
| + aa = ie->ia.addr - aa; /* compute base addr of arena */ | |
| + for(i=0; i<nelem(icache.last); i++) | |
| + if(icache.last[i] != icache.last[0]) | |
| + break; | |
| + if(i==nelem(icache.last) && icache.lastload != icache.last[0]){ | |
| + load = icache.last[0]; | |
| + icache.lastload = load; | |
| + } | |
| } | |
| found: | |
| t@@ -249,6 +260,11 @@ icachealloc(IAddr *ia, u8int *score) | |
| trace(TraceLump, "icachealloc unused"); | |
| goto Found; | |
| } | |
| + | |
| + if((ie = icache.free) != nil){ | |
| + icache.free = ie->next; | |
| + goto Found; | |
| + } | |
| h = icache.stolen; | |
| for(i=0;; i++){ | |
| t@@ -346,3 +362,21 @@ icacheclean(IEntry *ie) | |
| trace(TraceProc, "icachedirty exit"); | |
| } | |
| +void | |
| +emptyicache(void) | |
| +{ | |
| + int i; | |
| + IEntry *ie, **lie; | |
| + | |
| + qlock(&icache.lock); | |
| + for(i=0; i<icache.size; i++) | |
| + for(lie=&icache.heads[i]; (ie=*lie); ){ | |
| + if(ie->dirty == 0){ | |
| + *lie = ie->next; | |
| + ie->next = icache.free; | |
| + icache.free = ie; | |
| + }else | |
| + lie = &ie->next; | |
| + } | |
| + qunlock(&icache.lock); | |
| +} | |
| diff --git a/src/cmd/venti/srv/icachewrite.c b/src/cmd/venti/srv/icachewrite.c | |
| t@@ -12,6 +12,7 @@ static void icachewritecoord(void*); | |
| static IEntry *iesort(IEntry*); | |
| int icachesleeptime = 1000; /* milliseconds */ | |
| +int minicachesleeptime = 50; | |
| enum | |
| { | |
| t@@ -74,7 +75,7 @@ nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr,… | |
| static int | |
| icachewritesect(Index *ix, ISect *is, u8int *buf) | |
| { | |
| - int err, h, bsize; | |
| + int err, h, bsize, t; | |
| u32int lo, hi; | |
| u64int addr, naddr; | |
| uint nbuf, off; | |
| t@@ -96,7 +97,14 @@ icachewritesect(Index *ix, ISect *is, u8int *buf) | |
| err = 0; | |
| while(iedirty){ | |
| - sleep(icachesleeptime); | |
| + disksched(); | |
| + while((t=icachesleeptime) == SleepForever){ | |
| + sleep(1000); | |
| + disksched(); | |
| + } | |
| + if(t < minicachesleeptime) | |
| + t = minicachesleeptime; | |
| + sleep(t); | |
| trace(TraceProc, "icachewritesect nextchunk"); | |
| chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf); | |
| t@@ -146,12 +154,15 @@ icachewritesect(Index *ix, ISect *is, u8int *buf) | |
| break; | |
| } | |
| packibucket(&ib, buf+off, is->bucketmagic); | |
| + /* XXX not right - must update cache after writepart */ | |
| if((b = _getdblock(is->part, naddr, ORDWR, 0)) != nil){ | |
| memmove(b->data, buf+off, bsize); | |
| putdblock(b); | |
| } | |
| } | |
| + diskaccess(1); | |
| + | |
| trace(TraceProc, "icachewritesect writepart", addr, nbuf); | |
| if(writepart(is->part, addr, buf, nbuf) < 0){ | |
| /* XXX */ | |
| t@@ -171,6 +182,7 @@ icachewritesect(Index *ix, ISect *is, u8int *buf) | |
| static void | |
| icachewriteproc(void *v) | |
| { | |
| + int ret; | |
| uint bsize; | |
| ISect *is; | |
| Index *ix; | |
| t@@ -188,17 +200,17 @@ icachewriteproc(void *v) | |
| trace(TraceProc, "icachewriteproc recv"); | |
| recv(is->writechan, 0); | |
| trace(TraceWork, "start"); | |
| - icachewritesect(ix, is, buf); | |
| + ret = icachewritesect(ix, is, buf); | |
| trace(TraceProc, "icachewriteproc send"); | |
| trace(TraceWork, "finish"); | |
| - send(is->writedonechan, 0); | |
| + sendul(is->writedonechan, ret); | |
| } | |
| } | |
| static void | |
| icachewritecoord(void *v) | |
| { | |
| - int i; | |
| + int i, err; | |
| Index *ix; | |
| AState as; | |
| t@@ -216,9 +228,9 @@ icachewritecoord(void *v) | |
| as = diskstate(); | |
| if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){ | |
| /* will not be able to do anything more than last flus… | |
| - trace(TraceProc, "icachewritecoord flush dcache"); | |
| + trace(TraceProc, "icachewritecoord kick dcache"); | |
| kickdcache(); | |
| - trace(TraceProc, "icachewritecoord flushed dcache"); | |
| + trace(TraceProc, "icachewritecoord kicked dcache"); | |
| } | |
| iwrite.as = as; | |
| t@@ -229,13 +241,15 @@ icachewritecoord(void *v) | |
| if(ix->bloom) | |
| send(ix->bloom->writechan, 0); | |
| + err = 0; | |
| for(i=0; i<ix->nsects; i++) | |
| - recv(ix->sects[i]->writedonechan, 0); | |
| + err |= recvul(ix->sects[i]->writedonechan); | |
| if(ix->bloom) | |
| - recv(ix->bloom->writedonechan, 0); | |
| + err |= recvul(ix->bloom->writedonechan); | |
| - trace(TraceProc, "icachewritecoord donewrite"); | |
| - setatailstate(&iwrite.as); | |
| + trace(TraceProc, "icachewritecoord donewrite err=%d", … | |
| + if(err == 0) | |
| + setatailstate(&iwrite.as); | |
| } | |
| icacheclean(nil); /* wake up anyone waiting */ | |
| trace(TraceWork, "finish"); | |
| diff --git a/src/cmd/venti/srv/index.c b/src/cmd/venti/srv/index.c | |
| t@@ -23,17 +23,11 @@ | |
| #include "dat.h" | |
| #include "fns.h" | |
| -/*static int bucklook(u8int *score, int type, u8int *data, int n); */ | |
| -/*static int writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b… | |
| -/*static int okibucket(IBucket *ib, ISect *is); */ | |
| static int initindex1(Index*); | |
| static ISect *initisect1(ISect *is); | |
| -/*static int splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, … | |
| #define KEY(k,d) ((d) ? (k)>>(32-(d)) : 0) | |
| -/*static QLock indexlock; //ZZZ */ | |
| - | |
| static char IndexMagic[] = "venti index configuration"; | |
| Index* | |
| t@@ -375,6 +369,8 @@ initisect(Part *part) | |
| seterr(EAdmin, "can't read index section header: %r"); | |
| return nil; | |
| } | |
| +print("read %s at %d: %.2ux %.2ux %.2ux %.2ux\n", | |
| + part->name, PartBlank, b->data[0], b->data[1], b->data[2], b->data[3]); | |
| is = MKZ(ISect); | |
| if(is == nil){ | |
| t@@ -457,9 +453,10 @@ initisect1(ISect *is) | |
| v = is->part->size & ~(u64int)(is->blocksize - 1); | |
| if(is->blockbase + (u64int)is->blocks * is->blocksize != v){ | |
| seterr(ECorrupt, "invalid blocks in index section %s", is->nam… | |
| -/*ZZZZZZZZZ */ | |
| -/* freeisect(is); */ | |
| -/* return nil; */ | |
| + /* ZZZ what to do? | |
| + freeisect(is); | |
| + return nil; | |
| + */ | |
| } | |
| if(is->stop - is->start > is->blocks){ | |
| t@@ -482,9 +479,10 @@ wbisect(ISect *is) | |
| ZBlock *b; | |
| b = alloczblock(HeadSize, 1, 0); | |
| - if(b == nil) | |
| -/*ZZZ set error? */ | |
| + if(b == nil){ | |
| + /* ZZZ set error? */ | |
| return -1; | |
| + } | |
| if(packisect(is, b->data) < 0){ | |
| seterr(ECorrupt, "can't make index section header: %r"); | |
| t@@ -789,7 +787,7 @@ loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *… | |
| /* | |
| * find the number of the index section holding score | |
| */ | |
| -static int | |
| +int | |
| indexsect1(Index *ix, u8int *score) | |
| { | |
| return indexsect0(ix, hashbits(score, 32) / ix->div); | |
| diff --git a/src/cmd/venti/srv/lump.c b/src/cmd/venti/srv/lump.c | |
| t@@ -2,6 +2,7 @@ | |
| #include "dat.h" | |
| #include "fns.h" | |
| +int syncwrites = 0; | |
| int queuewrites = 0; | |
| int writestodevnull = 0; | |
| t@@ -45,7 +46,7 @@ readlump(u8int *score, int type, u32int size, int *cached) | |
| *cached = 0; | |
| if(lookupscore(score, type, &ia, &rac) < 0){ | |
| - /*ZZZ place to check for someone trying to guess scores */ | |
| + /* ZZZ place to check for someone trying to guess scores */ | |
| seterr(EOk, "no block with score %V/%d exists", score, type); | |
| putlump(u); | |
| t@@ -92,7 +93,15 @@ writelump(Packet *p, u8int *score, int type, u32int creator… | |
| if(u->data != nil){ | |
| ok = 0; | |
| if(packetcmp(p, u->data) != 0){ | |
| - seterr(EStrange, "score collision"); | |
| + uchar nscore[VtScoreSize]; | |
| + | |
| + packetsha1(u->data, nscore); | |
| + if(scorecmp(u->score, score) != 0) | |
| + seterr(EStrange, "lookuplump returned bad scor… | |
| + else if(scorecmp(u->score, nscore) != 0) | |
| + seterr(EStrange, "lookuplump returned bad data… | |
| + else | |
| + seterr(EStrange, "score collision %V", score); | |
| ok = -1; | |
| } | |
| packetfree(p); | |
| t@@ -138,7 +147,13 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms) | |
| if(old != nil){ | |
| ok = 0; | |
| if(packetcmp(p, old) != 0){ | |
| - seterr(EStrange, "score collision"); | |
| + uchar nscore[VtScoreSize]; | |
| + | |
| + packetsha1(old, nscore); | |
| + if(scorecmp(u->score, nscore) != 0) | |
| + seterr(EStrange, "readilump returned b… | |
| + else | |
| + seterr(EStrange, "score collision %V",… | |
| ok = -1; | |
| } | |
| packetfree(p); | |
| t@@ -160,6 +175,12 @@ writeqlump(Lump *u, Packet *p, int creator, uint ms) | |
| insertlump(u, p); | |
| else | |
| packetfree(p); | |
| + | |
| + if(syncwrites){ | |
| + flushdcache(); | |
| + flushicache(); | |
| + flushdcache(); | |
| + } | |
| ms = msec() - ms; | |
| addstat2(StatRpcWriteNew, 1, StatRpcWriteNewTime, ms); | |
| diff --git a/src/cmd/venti/srv/lumpcache.c b/src/cmd/venti/srv/lumpcache.c | |
| t@@ -11,7 +11,7 @@ enum | |
| { | |
| HashLog = 9, | |
| HashSize = 1<<HashLog, | |
| - HashMask = HashSize - 1 | |
| + HashMask = HashSize - 1, | |
| }; | |
| struct LumpCache | |
| t@@ -175,7 +175,6 @@ again: | |
| * remove it from the heap, and fix up the heap. | |
| */ | |
| size = packetasize(p); | |
| -/*ZZZ */ | |
| while(lumpcache.avail < size){ | |
| trace(TraceLump, "insertlump bump"); | |
| CHECK(checklumpcache()); | |
| t@@ -277,6 +276,15 @@ bumplump(void) | |
| return b; | |
| } | |
| +void | |
| +emptylumpcache(void) | |
| +{ | |
| + qlock(&lumpcache.lock); | |
| + while(bumplump()) | |
| + ; | |
| + qunlock(&lumpcache.lock); | |
| +} | |
| + | |
| /* | |
| * delete an arbitrary block from the heap | |
| */ | |
| t@@ -415,3 +423,4 @@ checklumpcache(void) | |
| if(lumpcache.nheap + nfree + refed != lumpcache.nblocks) | |
| sysfatal("lc: missing blocks: %d %d %d %d", lumpcache.nheap, r… | |
| } | |
| + | |
| diff --git a/src/cmd/venti/srv/lumpqueue.c b/src/cmd/venti/srv/lumpqueue.c | |
| t@@ -58,22 +58,6 @@ initlumpqueues(int nq) | |
| seterr(EOk, "can't start write queue slave: %r"); | |
| return -1; | |
| } | |
| - if(vtproc(queueproc, q) < 0){ | |
| - seterr(EOk, "can't start write queue slave: %r"); | |
| - return -1; | |
| - } | |
| - if(vtproc(queueproc, q) < 0){ | |
| - seterr(EOk, "can't start write queue slave: %r"); | |
| - return -1; | |
| - } | |
| - if(vtproc(queueproc, q) < 0){ | |
| - seterr(EOk, "can't start write queue slave: %r"); | |
| - return -1; | |
| - } | |
| - if(vtproc(queueproc, q) < 0){ | |
| - seterr(EOk, "can't start write queue slave: %r"); | |
| - return -1; | |
| - } | |
| } | |
| return 0; | |
| diff --git a/src/cmd/venti/srv/mirrorarenas.c b/src/cmd/venti/srv/mirrorarenas.c | |
| t@@ -0,0 +1,464 @@ | |
| +/* | |
| + * Mirror one arena partition onto another. | |
| + * Be careful to copy only new data. | |
| + */ | |
| + | |
| +#include "stdinc.h" | |
| +#include "dat.h" | |
| +#include "fns.h" | |
| + | |
| +Channel *writechan; | |
| + | |
| +typedef struct Write Write; | |
| +struct Write | |
| +{ | |
| + uchar *p; | |
| + int n; | |
| + uvlong o; | |
| + int error; | |
| +}; | |
| + | |
| +Part *src; | |
| +Part *dst; | |
| +int force; | |
| +int verbose; | |
| +char *status; | |
| +uvlong astart, aend; | |
| + | |
| +void | |
| +usage(void) | |
| +{ | |
| + fprint(2, "usage: mirrorarenas [-v] src dst [ranges]\n"); | |
| + threadexitsall("usage"); | |
| +} | |
| + | |
| +int | |
| +ereadpart(Part *p, u64int offset, u8int *buf, u32int count) | |
| +{ | |
| + if(readpart(p, offset, buf, count) != count){ | |
| + print("%T readpart %s at %#llux+%ud: %r\n", p->name, offset, c… | |
| + return -1; | |
| + } | |
| + return 0; | |
| +} | |
| + | |
| +int | |
| +ewritepart(Part *p, u64int offset, u8int *buf, u32int count) | |
| +{ | |
| + if(writepart(p, offset, buf, count) != count){ | |
| + print("%T writepart %s at %#llux+%ud: %r\n", p->name, offset, … | |
| + return -1; | |
| + } | |
| + return 0; | |
| +} | |
| + | |
| +/* | |
| + * Extra proc to do writes to dst, so that we can overlap reading | |
| + * src with writing dst during copy. This is an easy factor of two | |
| + * (almost) in performance. | |
| + */ | |
| +static void | |
| +writeproc(void *v) | |
| +{ | |
| + Write *w; | |
| + | |
| + USED(v); | |
| + while((w = recvp(writechan)) != nil){ | |
| + if(w->n == 0) | |
| + continue; | |
| + if(ewritepart(dst, w->o, w->p, w->n) < 0) | |
| + w->error = 1; | |
| + } | |
| +} | |
| + | |
| +int | |
| +copy(uvlong start, uvlong end, char *what, DigestState *ds) | |
| +{ | |
| + int i, n; | |
| + uvlong o; | |
| + static uchar tmp[2][1024*1024]; | |
| + Write w[2]; | |
| + | |
| + assert(start <= end); | |
| + assert(astart <= start && start < aend); | |
| + assert(astart <= end && end <= aend); | |
| + | |
| + if(verbose && start != end) | |
| + print("%T copy %,llud-%,llud %s\n", start, end, what); | |
| + | |
| + i = 0; | |
| + memset(w, 0, sizeof w); | |
| + for(o=start; o<end; o+=n){ | |
| + if(w[i].error) | |
| + goto error; | |
| + n = sizeof tmp[i]; | |
| + if(o+n > end) | |
| + n = end - o; | |
| + if(ereadpart(src, o, tmp[i], n) < 0) | |
| + goto error; | |
| + w[i].p = tmp[i]; | |
| + w[i].o = o; | |
| + w[i].n = n; | |
| + w[i].error = 0; | |
| + sendp(writechan, &w[i]); | |
| + if(ds) | |
| + sha1(tmp[i], n, nil, ds); | |
| + i = 1-i; | |
| + } | |
| + if(w[i].error) | |
| + goto error; | |
| + | |
| + /* | |
| + * wait for queued write to finish | |
| + */ | |
| + w[i].p = nil; | |
| + w[i].o = 0; | |
| + w[i].n = 0; | |
| + w[i].error = 0; | |
| + sendp(writechan, &w[i]); | |
| + i = 1-i; | |
| + if(w[i].error) | |
| + return -1; | |
| + return 0; | |
| + | |
| +error: | |
| + /* | |
| + * sync with write proc | |
| + */ | |
| + w[i].p = nil; | |
| + w[i].o = 0; | |
| + w[i].n = 0; | |
| + w[i].error = 0; | |
| + sendp(writechan, &w[i]); | |
| + return -1; | |
| +} | |
| + | |
| +/* single-threaded, for reference */ | |
| +int | |
| +copy1(uvlong start, uvlong end, char *what, DigestState *ds) | |
| +{ | |
| + int n; | |
| + uvlong o; | |
| + static uchar tmp[1024*1024]; | |
| + | |
| + assert(start <= end); | |
| + assert(astart <= start && start < aend); | |
| + assert(astart <= end && end <= aend); | |
| + | |
| + if(verbose && start != end) | |
| + print("%T copy %,llud-%,llud %s\n", start, end, what); | |
| + | |
| + for(o=start; o<end; o+=n){ | |
| + n = sizeof tmp; | |
| + if(o+n > end) | |
| + n = end - o; | |
| + if(ereadpart(src, o, tmp, n) < 0) | |
| + return -1; | |
| + if(ds) | |
| + sha1(tmp, n, nil, ds); | |
| + if(ewritepart(dst, o, tmp, n) < 0) | |
| + return -1; | |
| + } | |
| + return 0; | |
| +} | |
| + | |
| +int | |
| +asha1(Part *p, uvlong start, uvlong end, DigestState *ds) | |
| +{ | |
| + int n; | |
| + uvlong o; | |
| + static uchar tmp[1024*1024]; | |
| + | |
| + if(start == end) | |
| + return 0; | |
| + assert(start < end); | |
| + | |
| + if(verbose) | |
| + print("%T sha1 %,llud-%,llud\n", start, end); | |
| + | |
| + for(o=start; o<end; o+=n){ | |
| + n = sizeof tmp; | |
| + if(o+n > end) | |
| + n = end - o; | |
| + if(ereadpart(p, o, tmp, n) < 0) | |
| + return -1; | |
| + sha1(tmp, n, nil, ds); | |
| + } | |
| + return 0; | |
| +} | |
| + | |
| +uvlong | |
| +rdown(uvlong a, int b) | |
| +{ | |
| + return a-a%b; | |
| +} | |
| + | |
| +uvlong | |
| +rup(uvlong a, int b) | |
| +{ | |
| + if(a%b == 0) | |
| + return a; | |
| + return a+b-a%b; | |
| +} | |
| + | |
| +void | |
| +mirror(Arena *sa, Arena *da) | |
| +{ | |
| + vlong v, si, di, end; | |
| + int clumpmax, blocksize; | |
| + static uchar buf[MaxIoSize]; | |
| + ArenaHead h; | |
| + DigestState xds, *ds; | |
| + vlong shaoff, base; | |
| + | |
| + base = sa->base; | |
| + blocksize = sa->blocksize; | |
| + end = sa->base + sa->size; | |
| + | |
| + astart = base - blocksize; | |
| + aend = end + blocksize; | |
| + | |
| + shaoff = 0; | |
| + | |
| + if(force){ | |
| + copy(astart, aend, "all", nil); | |
| + return; | |
| + } | |
| + | |
| + if(verbose) | |
| + print("%T %s (%,llud-%,llud)\n", sa->name, astart, aend); | |
| + | |
| + if(sa->diskstats.sealed && da->diskstats.sealed && scorecmp(da->score,… | |
| + if(scorecmp(sa->score, da->score) == 0) | |
| + return; | |
| + print("%T arena %s: sealed score mismatch %V vs %V\n", sa->nam… | |
| + status = "errors"; | |
| + return; | |
| + } | |
| + if(da->diskstats.sealed && scorecmp(da->score, zeroscore) != 0){ | |
| + print("%T arena %s: dst is sealed, src is not\n", sa->name); | |
| + status = "errors"; | |
| + return; | |
| + } | |
| + if(sa->diskstats.used < da->diskstats.used){ | |
| + print("%T arena %s: src used %,lld < dst used %,lld\n", sa->na… | |
| + status = "errors"; | |
| + return; | |
| + } | |
| + | |
| + if(da->clumpmagic != sa->clumpmagic){ | |
| + /* | |
| + * Write this now to reduce the window in which | |
| + * the head and tail disagree about clumpmagic. | |
| + */ | |
| + da->clumpmagic = sa->clumpmagic; | |
| + memset(buf, 0, sizeof buf); | |
| + packarena(da, buf); | |
| + if(ewritepart(dst, end, buf, blocksize) < 0) | |
| + return; | |
| + } | |
| + | |
| + memset(&h, 0, sizeof h); | |
| + h.version = da->version; | |
| + strcpy(h.name, da->name); | |
| + h.blocksize = da->blocksize; | |
| + h.size = da->size + 2*da->blocksize; | |
| + h.clumpmagic = da->clumpmagic; | |
| + memset(buf, 0, sizeof buf); | |
| + packarenahead(&h, buf); | |
| + if(ewritepart(dst, base - blocksize, buf, blocksize) < 0) | |
| + return; | |
| + | |
| + ds = nil; | |
| + if(sa->diskstats.sealed && scorecmp(sa->score, zeroscore) != 0){ | |
| + /* start sha1 state with header */ | |
| + memset(&xds, 0, sizeof xds); | |
| + ds = &xds; | |
| + sha1(buf, blocksize, nil, ds); | |
| + shaoff = base; | |
| + } | |
| + | |
| + if(sa->diskstats.used != da->diskstats.used){ | |
| + di = base+rdown(da->diskstats.used, blocksize); | |
| + si = base+rup(sa->diskstats.used, blocksize); | |
| + if(ds && asha1(dst, shaoff, di, ds) < 0) | |
| + return; | |
| + if(copy(di, si, "data", ds) < 0) | |
| + return; | |
| + shaoff = si; | |
| + } | |
| + | |
| + clumpmax = sa->clumpmax; | |
| + di = end - da->diskstats.clumps/clumpmax * blocksize; | |
| + si = end - (sa->diskstats.clumps+clumpmax-1)/clumpmax * blocksize; | |
| + | |
| + if(sa->diskstats.sealed){ | |
| + /* | |
| + * might be a small hole between the end of the | |
| + * data and the beginning of the directory. | |
| + */ | |
| + v = base+rup(sa->diskstats.used, blocksize); | |
| + if(ds && asha1(dst, shaoff, v, ds) < 0) | |
| + return; | |
| + if(copy(v, si, "hole", ds) < 0) | |
| + return; | |
| + shaoff = si; | |
| + } | |
| + | |
| + if(da->diskstats.clumps != sa->diskstats.clumps){ | |
| + if(ds && asha1(dst, shaoff, si, ds) < 0) | |
| + return; | |
| + if(copy(si, di, "directory", ds) < 0) /* si < di becau… | |
| + return; | |
| + shaoff = di; | |
| + } | |
| + | |
| + da->ctime = sa->ctime; | |
| + da->wtime = sa->wtime; | |
| + da->diskstats = sa->diskstats; | |
| + da->diskstats.sealed = 0; | |
| + | |
| + memset(buf, 0, sizeof buf); | |
| + packarena(da, buf); | |
| + if(ewritepart(dst, end, buf, blocksize) < 0) | |
| + return; | |
| + | |
| + if(ds){ | |
| + asha1(dst, shaoff, end, ds); | |
| + da->diskstats.sealed = 1; | |
| + memset(buf, 0, sizeof buf); | |
| + packarena(da, buf); | |
| + sha1(buf, blocksize, da->score, ds); | |
| + if(scorecmp(sa->score, da->score) == 0){ | |
| + if(verbose) | |
| + print("%T arena %s: %V\n", sa->name, da->score… | |
| + scorecp(buf+blocksize-VtScoreSize, da->score); | |
| + if(ewritepart(dst, end, buf, blocksize) < 0) | |
| + return; | |
| + }else{ | |
| + print("%T arena %s: sealing dst: score mismatch: %V vs… | |
| + memset(&xds, 0, sizeof xds); | |
| + asha1(dst, base-blocksize, end, &xds); | |
| + sha1(buf, blocksize, da->score, &xds); | |
| + print("%T reseal: %V\n", da->score); | |
| + status = "errors"; | |
| + } | |
| + } | |
| +} | |
| + | |
| +void | |
| +mirrormany(ArenaPart *sp, ArenaPart *dp, char *range) | |
| +{ | |
| + int i, lo, hi; | |
| + char *s, *t; | |
| + Arena *sa, *da; | |
| + | |
| + if(range == nil){ | |
| + for(i=0; i<sp->narenas; i++){ | |
| + sa = sp->arenas[i]; | |
| + da = dp->arenas[i]; | |
| + mirror(sa, da); | |
| + } | |
| + return; | |
| + } | |
| + if(strcmp(range, "none") == 0) | |
| + return; | |
| + | |
| + for(s=range; *s; s=t){ | |
| + t = strchr(s, ','); | |
| + if(t) | |
| + *t++ = 0; | |
| + else | |
| + t = s+strlen(s); | |
| + if(*s == '-') | |
| + lo = 0; | |
| + else | |
| + lo = strtol(s, &s, 0); | |
| + hi = lo; | |
| + if(*s == '-'){ | |
| + s++; | |
| + if(*s == 0) | |
| + hi = sp->narenas-1; | |
| + else | |
| + hi = strtol(s, &s, 0); | |
| + } | |
| + if(*s != 0){ | |
| + print("%T bad arena range: %s\n", s); | |
| + continue; | |
| + } | |
| + for(i=lo; i<=hi; i++){ | |
| + sa = sp->arenas[i]; | |
| + da = dp->arenas[i]; | |
| + mirror(sa, da); | |
| + } | |
| + } | |
| +} | |
| + | |
| + | |
| +void | |
| +threadmain(int argc, char **argv) | |
| +{ | |
| + int i; | |
| + Arena *sa, *da; | |
| + ArenaPart *s, *d; | |
| + char *ranges; | |
| + | |
| + ventifmtinstall(); | |
| + | |
| + ARGBEGIN{ | |
| + case 'F': | |
| + force = 1; | |
| + break; | |
| + case 'v': | |
| + verbose++; | |
| + break; | |
| + default: | |
| + usage(); | |
| + }ARGEND | |
| + | |
| + if(argc != 2 && argc != 3) | |
| + usage(); | |
| + ranges = nil; | |
| + if(argc == 3) | |
| + ranges = argv[2]; | |
| + | |
| + if((src = initpart(argv[0], OREAD)) == nil) | |
| + sysfatal("initpart %s: %r", argv[0]); | |
| + if((dst = initpart(argv[1], ORDWR)) == nil) | |
| + sysfatal("initpart %s: %r", argv[1]); | |
| + if((s = initarenapart(src)) == nil) | |
| + sysfatal("initarenapart %s: %r", argv[0]); | |
| + for(i=0; i<s->narenas; i++) | |
| + delarena(s->arenas[i]); | |
| + if((d = initarenapart(dst)) == nil) | |
| + sysfatal("loadarenapart %s: %r", argv[1]); | |
| + for(i=0; i<d->narenas; i++) | |
| + delarena(d->arenas[i]); | |
| + | |
| + /* | |
| + * The arena geometries must match or all bets are off. | |
| + */ | |
| + if(s->narenas != d->narenas) | |
| + sysfatal("arena count mismatch: %d vs %d", s->narenas, d->nare… | |
| + for(i=0; i<s->narenas; i++){ | |
| + sa = s->arenas[i]; | |
| + da = d->arenas[i]; | |
| + if(sa->version != da->version) | |
| + sysfatal("arena %d: version mismatch: %d vs %d", i, sa… | |
| + if(sa->blocksize != da->blocksize) | |
| + sysfatal("arena %d: blocksize mismatch: %d vs %d", i, … | |
| + if(sa->size != da->size) | |
| + sysfatal("arena %d: size mismatch: %,lld vs %,lld", i,… | |
| + if(strcmp(sa->name, da->name) != 0) | |
| + sysfatal("arena %d: name mismatch: %s vs %s", i, sa->n… | |
| + } | |
| + | |
| + /* | |
| + * Mirror one arena at a time. | |
| + */ | |
| + writechan = chancreate(sizeof(void*), 0); | |
| + vtproc(writeproc, nil); | |
| + mirrormany(s, d, ranges); | |
| + sendp(writechan, nil); | |
| + threadexitsall(status); | |
| +} | |
| diff --git a/src/cmd/venti/srv/mkfile b/src/cmd/venti/srv/mkfile | |
| t@@ -11,6 +11,7 @@ LIBOFILES=\ | |
| config.$O\ | |
| conv.$O\ | |
| dcache.$O\ | |
| + disksched.$O\ | |
| dump.$O\ | |
| graph.$O\ | |
| httpd.$O\ | |
| t@@ -52,11 +53,13 @@ TARG=\ | |
| fmtbloom\ | |
| fmtisect\ | |
| fmtindex\ | |
| + fixarenas\ | |
| buildindex\ | |
| checkarenas\ | |
| checkindex\ | |
| clumpstats\ | |
| findscore\ | |
| + mirrorarenas\ | |
| rdarena\ | |
| wrarena\ | |
| syncindex\ | |
| diff --git a/src/cmd/venti/srv/part.c b/src/cmd/venti/srv/part.c | |
| t@@ -145,8 +145,6 @@ initpart(char *name, int mode) | |
| if(hi == 0) | |
| hi = dir->length; | |
| part->size = hi - part->offset; | |
| -fprint(2, "part %s: file %s offset %,lld size %,lld\n", | |
| - name, file, part->offset, part->size); | |
| #ifdef CANBLOCKSIZE | |
| { | |
| struct statfs sfs; | |
| t@@ -203,10 +201,32 @@ prwb(char *name, int fd, int isread, u64int offset, void… | |
| u32int c, delta, icount, opsize; | |
| int r; | |
| + icount = count; | |
| buf = vbuf; | |
| + | |
| +#ifndef PLAN9PORT | |
| + op = isread ? "read" : "write"; | |
| + dst = buf; | |
| + freetmp = nil; | |
| + while(count > 0){ | |
| + opsize = min(count, 131072 /* blocksize */); | |
| + if(isread) | |
| + r = pread(fd, dst, opsize, offset); | |
| + else | |
| + r = pwrite(fd, dst, opsize, offset); | |
| + if(r <= 0) | |
| + goto Error; | |
| + offset += r; | |
| + count -= r; | |
| + dst += r; | |
| + if(r != opsize) | |
| + goto Error; | |
| + } | |
| + return icount; | |
| +#endif | |
| + | |
| tmp = nil; | |
| freetmp = nil; | |
| - icount = count; | |
| opsize = blocksize; | |
| if(count == 0){ | |
| t@@ -313,7 +333,7 @@ print("FAILED isread=%d r=%d count=%d blocksize=%d\n", isr… | |
| memmove(buf, tmp, count); | |
| else{ | |
| memmove(tmp, buf, count); | |
| - if(pwrite(fd, tmp, blocksize, offset) != blocksize){ | |
| + if(pwrite(fd, tmp, opsize, offset) != blocksize){ | |
| dst = tmp; | |
| op = "write"; | |
| goto Error; | |
| t@@ -332,9 +352,16 @@ Error: | |
| return -1; | |
| } | |
| +#ifndef PLAN9PORT | |
| +static int sdreset(Part*); | |
| +static int reopen(Part*); | |
| +static int threadspawnl(int[3], char*, char*, ...); | |
| +#endif | |
| + | |
| int | |
| rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count) | |
| { | |
| + int n, try; | |
| u32int blocksize; | |
| trace(TraceDisk, "%s %s %ud at 0x%llx", | |
| t@@ -351,9 +378,33 @@ rwpart(Part *part, int isread, u64int offset, u8int *buf,… | |
| if(blocksize == 0) | |
| blocksize = 4096; | |
| - return prwb(part->filename, part->fd, isread, part->offset+offset, buf… | |
| -} | |
| + for(try=0;; try++){ | |
| + n = prwb(part->filename, part->fd, isread, part->offset+offset… | |
| + if(n >= 0 || try > 10) | |
| + break; | |
| +#ifndef PLAN9PORT | |
| + { | |
| + char err[ERRMAX]; | |
| + /* | |
| + * This happens with the sdmv disks frustratingly often. | |
| + * Try to fix things up and continue. | |
| + */ | |
| + rerrstr(err, sizeof err); | |
| + if(strstr(err, "i/o timeout") || strstr(err, "i/o error")){ | |
| + if(sdreset(part) >= 0) | |
| + reopen(part); | |
| + continue; | |
| + }else if(strstr(err, "partition has changed")){ | |
| + reopen(part); | |
| + continue; | |
| + } | |
| + } | |
| +#endif | |
| + break; | |
| + } | |
| + return n; | |
| +} | |
| int | |
| readpart(Part *part, u64int offset, u8int *buf, u32int count) | |
| { | |
| t@@ -391,3 +442,200 @@ readfile(char *name) | |
| return b; | |
| } | |
| + | |
| + | |
| + | |
| + | |
| + | |
| + | |
| + | |
| +#ifndef PLAN9PORT | |
| +static int | |
| +sdreset(Part *part) | |
| +{ | |
| + char *name, *p; | |
| + int i, fd, xfd[3], rv; | |
| + static QLock resetlk; | |
| + Dir *d, *dd; | |
| + | |
| + fprint(2, "sdreset %s\n", part->name); | |
| + name = emalloc(strlen(part->filename)+20); | |
| + strcpy(name, part->filename); | |
| + p = strrchr(name, '/'); | |
| + if(p) | |
| + p++; | |
| + else | |
| + p = name; | |
| + | |
| + strcpy(p, "ctl"); | |
| + d = dirstat(name); | |
| + if(d == nil){ | |
| + free(name); | |
| + return -1; | |
| + } | |
| + | |
| + /* | |
| + * We don't need multiple people resetting the disk. | |
| + */ | |
| + qlock(&resetlk); | |
| + if((fd = open(name, OWRITE)) < 0) | |
| + goto error; | |
| + dd = dirfstat(fd); | |
| + if(d && dd && d->qid.vers != dd->qid.vers){ | |
| + fprint(2, "sdreset %s: got scooped\n", part->name); | |
| + /* Someone else got here first. */ | |
| + if(access(part->filename, AEXIST) >= 0) | |
| + goto ok; | |
| + goto error; | |
| + } | |
| + | |
| + /* | |
| + * Write "reset" to the ctl file to cause the chipset | |
| + * to reinitialize itself (specific to sdmv driver). | |
| + * Ignore error in case using other disk. | |
| + */ | |
| + fprint(2, "sdreset %s: reset ctl\n", part->name); | |
| + write(fd, "reset", 5); | |
| + | |
| + if(access(part->filename, AEXIST) >= 0) | |
| + goto ok; | |
| + | |
| + /* | |
| + * Re-run fdisk and prep. Don't use threadwaitchan | |
| + * to avoid coordinating for it. Reopen ctl because | |
| + * we reset the disk. | |
| + */ | |
| + strcpy(p, "ctl"); | |
| + close(fd); | |
| + if((fd = open(name, OWRITE)) < 0) | |
| + goto error; | |
| + strcpy(p, "data"); | |
| + xfd[0] = open("/dev/null", OREAD); | |
| + xfd[1] = dup(fd, -1); | |
| + xfd[2] = dup(2, -1); | |
| + fprint(2, "sdreset %s: run fdisk %s\n", part->name, name); | |
| + if(threadspawnl(xfd, "/bin/disk/fdisk", "disk/fdisk", "-p", name, nil)… | |
| + close(xfd[0]); | |
| + close(xfd[1]); | |
| + close(xfd[2]); | |
| + goto error; | |
| + } | |
| + strcpy(p, "plan9"); | |
| + for(i=0; i<=20; i++){ | |
| + sleep(i*100); | |
| + if(access(part->filename, AEXIST) >= 0) | |
| + goto ok; | |
| + if(access(name, AEXIST) >= 0) | |
| + goto prep; | |
| + } | |
| + goto error; | |
| + | |
| +prep: | |
| + strcpy(p, "ctl"); | |
| + close(fd); | |
| + if((fd = open(name, OWRITE)) < 0) | |
| + goto error; | |
| + strcpy(p, "plan9"); | |
| + xfd[0] = open("/dev/null", OREAD); | |
| + xfd[1] = dup(fd, -1); | |
| + xfd[2] = dup(2, -1); | |
| + fprint(2, "sdreset %s: run prep\n", part->name); | |
| + if(threadspawnl(xfd, "/bin/disk/prep", "disk/prep", "-p", name, nil) <… | |
| + close(xfd[0]); | |
| + close(xfd[1]); | |
| + close(xfd[2]); | |
| + goto error; | |
| + } | |
| + for(i=0; i<=20; i++){ | |
| + sleep(i*100); | |
| + if(access(part->filename, AEXIST) >= 0) | |
| + goto ok; | |
| + } | |
| + | |
| +error: | |
| + fprint(2, "sdreset %s: error: %r\n", part->name); | |
| + rv = -1; | |
| + if(fd >= 0) | |
| + close(fd); | |
| + goto out; | |
| + | |
| +ok: | |
| + fprint(2, "sdreset %s: all okay\n", part->name); | |
| + rv = 0; | |
| + goto out; | |
| + | |
| +out: | |
| + free(name); | |
| + qunlock(&resetlk); | |
| + return rv; | |
| +} | |
| + | |
| +static int | |
| +reopen(Part *part) | |
| +{ | |
| + int fd; | |
| + | |
| + fprint(2, "reopen %s\n", part->filename); | |
| + if((fd = open(part->filename, ORDWR)) < 0){ | |
| + fprint(2, "reopen %s: %r\n", part->filename); | |
| + return -1; | |
| + } | |
| + if(fd != part->fd){ | |
| + dup(fd, part->fd); | |
| + close(fd); | |
| + } | |
| + return 0; | |
| +} | |
| + | |
| +typedef struct Spawn Spawn; | |
| +struct Spawn | |
| +{ | |
| + Channel *c; | |
| + int fd[3]; | |
| + char *file; | |
| + char **argv; | |
| +}; | |
| + | |
| +static void | |
| +spawnproc(void *v) | |
| +{ | |
| + int i, *fd; | |
| + Spawn *s; | |
| + | |
| + rfork(RFFDG); | |
| + s = v; | |
| + fd = s->fd; | |
| + for(i=0; i<3; i++) | |
| + dup(fd[i], i); | |
| + if(fd[0] > 2) | |
| + close(fd[0]); | |
| + if(fd[1] > 2 && fd[1] != fd[0]) | |
| + close(fd[1]); | |
| + if(fd[2] > 2 && fd[2] != fd[1] && fd[2] != fd[0]) | |
| + close(fd[2]); | |
| + procexec(s->c, s->file, s->argv); | |
| +} | |
| + | |
| +static int | |
| +threadspawnl(int fd[3], char *file, char *argv0, ...) | |
| +{ | |
| + int pid; | |
| + Spawn s; | |
| + | |
| + s.c = chancreate(sizeof(void*), 0); | |
| + memmove(s.fd, fd, sizeof(s.fd)); | |
| + s.file = file; | |
| + s.argv = &argv0; | |
| + vtproc(spawnproc, &s); | |
| + pid = recvul(s.c); | |
| + if(pid < 0) | |
| + return -1; | |
| + close(fd[0]); | |
| + if(fd[1] != fd[0]) | |
| + close(fd[1]); | |
| + if(fd[2] != fd[1] && fd[2] != fd[0]) | |
| + close(fd[2]); | |
| + return pid; | |
| +} | |
| + | |
| +#endif | |
| diff --git a/src/cmd/venti/srv/printarenapart.c b/src/cmd/venti/srv/printarenap… | |
| t@@ -0,0 +1,160 @@ | |
| +#include "stdinc.h" | |
| +#include "dat.h" | |
| +#include "fns.h" | |
| + | |
| +uchar buf[64*1024]; | |
| + | |
| +void | |
| +usage(void) | |
| +{ | |
| + fprint(2, "usage: printarenapart arenafile [offset]\n"); | |
| + threadexitsall("usage"); | |
| +} | |
| + | |
| +static void | |
| +rdarena(Arena *arena, u64int offset) | |
| +{ | |
| + u64int a, aa, e; | |
| + u32int magic; | |
| + Clump cl; | |
| + uchar score[VtScoreSize]; | |
| + ZBlock *lump; | |
| + | |
| + printarena(2, arena); | |
| + | |
| + a = arena->base; | |
| + e = arena->base + arena->size; | |
| + if(offset != ~(u64int)0) { | |
| + if(offset >= e-a) | |
| + sysfatal("bad offset %llud >= %llud\n", | |
| + offset, e-a); | |
| + aa = offset; | |
| + } else | |
| + aa = 0; | |
| + | |
| + for(; aa < e; aa += ClumpSize+cl.info.size) { | |
| + magic = clumpmagic(arena, aa); | |
| + if(magic == ClumpFreeMagic) | |
| + break; | |
| + if(magic != arena->clumpmagic) { | |
| + fprint(2, "illegal clump magic number %#8.8ux offset %… | |
| + magic, aa); | |
| + break; | |
| + } | |
| + lump = loadclump(arena, aa, 0, &cl, score, 0); | |
| + if(lump == nil) { | |
| + fprint(2, "clump %llud failed to read: %r\n", aa); | |
| + break; | |
| + } | |
| + if(cl.info.type != VtCorruptType) { | |
| + scoremem(score, lump->data, cl.info.uncsize); | |
| + if(scorecmp(cl.info.score, score) != 0) { | |
| + fprint(2, "clump %llud has mismatched score\n"… | |
| + break; | |
| + } | |
| + if(vttypevalid(cl.info.type) < 0) { | |
| + fprint(2, "clump %llud has bad type %d\n", aa,… | |
| + break; | |
| + } | |
| + } | |
| + print("%22llud %V %3d %5d\n", aa, score, cl.info.type, cl.info… | |
| + freezblock(lump); | |
| + } | |
| + print("end offset %llud\n", aa); | |
| +} | |
| + | |
| +void | |
| +threadmain(int argc, char *argv[]) | |
| +{ | |
| + char *file, *p, *name; | |
| + char *table; | |
| + u64int offset; | |
| + Part *part; | |
| + ArenaPart ap; | |
| + ArenaHead head; | |
| + Arena tail; | |
| + char ct[40], mt[40]; | |
| + | |
| + readonly = 1; /* for part.c */ | |
| + ARGBEGIN{ | |
| + default: | |
| + usage(); | |
| + break; | |
| + }ARGEND | |
| + | |
| + switch(argc) { | |
| + default: | |
| + usage(); | |
| + case 1: | |
| + file = argv[0]; | |
| + } | |
| + | |
| + ventifmtinstall(); | |
| + statsinit(); | |
| + | |
| + part = initpart(file, OREAD|ODIRECT); | |
| + if(part == nil) | |
| + sysfatal("can't open file %s: %r", file); | |
| + if(readpart(part, PartBlank, buf, sizeof buf) < 0) | |
| + sysfatal("can't read file %s: %r", file); | |
| + | |
| + if(unpackarenapart(&ap, buf) < 0) | |
| + sysfatal("corrupted arena part header: %r"); | |
| + | |
| + print("# arena part version=%d blocksize=%d arenabase=%d\n", | |
| + ap.version, ap.blocksize, ap.arenabase); | |
| + ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1); | |
| + ap.tabsize = ap.arenabase - ap.tabbase; | |
| + | |
| +print("A"); | |
| + table = malloc(ap.tabsize+1); | |
| + if(readpart(part, ap.tabbase, (uchar*)table, ap.tabsize) < 0) | |
| + sysfatal("read %s: %r", file); | |
| + table[ap.tabsize] = 0; | |
| + | |
| +print("A"); | |
| + partblocksize(part, ap.blocksize); | |
| + initdcache(8 * MaxDiskBlock); | |
| + | |
| +print("A"); | |
| +/* XXX - read the number of arenas from the first line */ | |
| + for(p=table; p && *p; p=strchr(p, '\n')){ | |
| + if(*p == '\n') | |
| + p++; | |
| + name = p; | |
| + p = strpbrk(p, " \t"); | |
| + if(p == nil){ | |
| + fprint(2, "bad line: %s\n", name); | |
| + break; | |
| + } | |
| +print("%p\n", p); | |
| + offset = strtoull(p, nil, 0); | |
| + if(readpart(part, offset, buf, sizeof buf) < 0){ | |
| + fprint(2, "%s: read %s: %r\n", argv0, file); | |
| + continue; | |
| + } | |
| + if(unpackarenahead(&head, buf) < 0){ | |
| + fprint(2, "%s: unpackarenahead: %r\n", argv0); | |
| + continue; | |
| + } | |
| + if(readpart(part, offset+head.size-head.blocksize, buf, head.b… | |
| + fprint(2, "%s: read %s: %r\n", argv0, file); | |
| + continue; | |
| + } | |
| + if(unpackarena(&tail, buf) < 0){ | |
| + fprint(2, "%s: unpackarena: %r\n", argv0); | |
| + continue; | |
| + } | |
| + print("arena %s %lld clumps=%,d cclumps=%,d used=%,lld uncsize… | |
| + tail.name, offset, | |
| + tail.diskstats.clumps, tail.diskstats.cclumps, | |
| + tail.diskstats.used, tail.diskstats.uncsize, | |
| + tail.diskstats.sealed ? " sealed" : ""); | |
| + strcpy(ct, ctime(tail.ctime)); | |
| + ct[28] = 0; | |
| + strcpy(mt, ctime(tail.wtime)); | |
| + mt[28] = 0; | |
| + print("\tctime=%s\n\tmtime=%s\n", ct, mt); | |
| + } | |
| + threadexitsall(0); | |
| +} | |
| diff --git a/src/cmd/venti/srv/printarenas.c b/src/cmd/venti/srv/printarenas.c | |
| t@@ -36,7 +36,7 @@ shoulddump(char *name, int argc, char **argv) | |
| enum | |
| { | |
| - ClumpChunks = 32*1024 | |
| + ClumpChunks = 32*1024, | |
| }; | |
| void | |
| diff --git a/src/cmd/venti/srv/sortientry.c b/src/cmd/venti/srv/sortientry.c | |
| t@@ -61,7 +61,7 @@ sortrawientries(Index *ix, Part *tmp, u64int *base, Bloom *b… | |
| u32int n; | |
| int i, ok; | |
| -/*ZZZ should allow configuration of bits, bucket size */ | |
| +/* ZZZ should allow configuration of bits, bucket size */ | |
| ib = initiebucks(tmp, 8, 64*1024); | |
| if(ib == nil){ | |
| seterr(EOk, "can't create sorting buckets: %r"); | |
| t@@ -116,10 +116,7 @@ readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom … | |
| ClumpInfo *ci, *cis; | |
| u32int clump; | |
| int i, n, ok, nskip; | |
| -/* static Biobuf bout; */ | |
| -/*ZZZ remove fprint? */ | |
| -/*fprint(2, "ra %s %d %d\n", arena->name, arena->memstats.clumps, arena->disks… | |
| if(arena->memstats.clumps) | |
| fprint(2, "\tarena %s: %d entries\n", arena->name, arena->mems… | |
| else | |
| t@@ -129,7 +126,6 @@ readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *… | |
| ok = 0; | |
| nskip = 0; | |
| memset(&ie, 0, sizeof(IEntry)); | |
| -/* Binit(&bout, 1, OWRITE); */ | |
| for(clump = 0; clump < arena->memstats.clumps; clump += n){ | |
| n = ClumpChunks; | |
| if(n > arena->memstats.clumps - clump) | |
| t@@ -148,18 +144,15 @@ readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom… | |
| a += ci->size + ClumpSize; | |
| ie.ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog… | |
| scorecp(ie.score, ci->score); | |
| - /* Bprint(&bout, "%22lld %V %3d %5d\n", */ | |
| - /* ie.ia.addr, ie.score, ie.ia.type, ie.ia.size… | |
| if(ci->type == VtCorruptType){ | |
| - /* print("! %V %22lld %3d %5d %3d\n", */ | |
| - /* ie.score, ie.ia.addr, ie.ia.type, ie… | |
| + if(0) print("! %V %22lld %3d %5d %3d\n", | |
| + ie.score, ie.ia.addr, ie.ia.type, ie.i… | |
| nskip++; | |
| }else | |
| sprayientry(ib, &ie); | |
| markbloomfilter(b, ie.score); | |
| } | |
| } | |
| -/* Bterm(&bout); */ | |
| free(cis); | |
| if(ok < 0) | |
| return TWID32; | |
| t@@ -358,8 +351,8 @@ readiebuck(IEBucks *ib, int b) | |
| m = ib->bucks[b].used; | |
| if(m == 0) | |
| m = ib->usable; | |
| -/* if(ib->bucks[b].total) */ | |
| -/* fprint(2, "\tbucket %d: %d entries\n", b, ib->bucks[b].total… | |
| + if(0) if(ib->bucks[b].total) | |
| + fprint(2, "\tbucket %d: %d entries\n", b, ib->bucks[b].total/I… | |
| while(head != TWID32){ | |
| if(readpart(ib->part, (u64int)head * ib->size, &ib->buf[n], m+… | |
| seterr(EOk, "can't read index sort bucket: %r"); | |
| diff --git a/src/cmd/venti/srv/stats.c b/src/cmd/venti/srv/stats.c | |
| t@@ -80,7 +80,7 @@ Statdesc statdesc[NStat] = | |
| { "isect block write bytes", }, | |
| { "sum reads", }, | |
| - { "sum read bytes", } | |
| + { "sum read bytes", }, | |
| }; | |
| QLock statslock; | |
| diff --git a/src/cmd/venti/srv/syncarena.c b/src/cmd/venti/srv/syncarena.c | |
| t@@ -30,12 +30,11 @@ syncarena(Arena *arena, u64int start, u32int n, int zok, i… | |
| ZBlock *lump; | |
| Clump cl; | |
| ClumpInfo ci; | |
| - static ClumpInfo zci = { -1 }; | |
| + static ClumpInfo zci = { .type = -1 }; | |
| u8int score[VtScoreSize]; | |
| u64int uncsize, used, aa; | |
| u32int clump, clumps, cclumps, magic; | |
| int err, flush, broken; | |
| - AState as; | |
| used = arena->memstats.used; | |
| clumps = arena->memstats.clumps; | |
| t@@ -133,19 +132,21 @@ syncarena(Arena *arena, u64int start, u32int n, int zok,… | |
| flushdcache(); | |
| } | |
| +fprint(2, "arena %s: start=%lld fix=%d flush=%d %lld->%lld %ud->%ud %ud->%ud %… | |
| + arena->name, | |
| + start, | |
| + fix, | |
| + flush, | |
| + used, arena->memstats.used, | |
| + clumps, arena->memstats.clumps, | |
| + cclumps, arena->memstats.cclumps, | |
| + uncsize, arena->memstats.uncsize); | |
| + | |
| if(used != arena->memstats.used | |
| || clumps != arena->memstats.clumps | |
| || cclumps != arena->memstats.cclumps | |
| || uncsize != arena->memstats.uncsize) | |
| err |= SyncHeader; | |
| - if(start && (err&SyncHeader)){ | |
| - trace(TraceProc, "syncarena setdcachestate"); | |
| - as.arena = arena; | |
| - as.aa = start+arena->memstats.used; | |
| - as.stats = arena->memstats; | |
| - setdcachestate(&as); | |
| - } | |
| - | |
| return err; | |
| } | |
| diff --git a/src/cmd/venti/srv/syncindex.c b/src/cmd/venti/srv/syncindex.c | |
| t@@ -48,6 +48,8 @@ threadmain(int argc, char *argv[]) | |
| ventifmtinstall(); | |
| if(initventi(argv[0], &conf) < 0) | |
| sysfatal("can't init venti: %r"); | |
| + if(mainindex->bloom && loadbloom(mainindex->bloom) < 0) | |
| + sysfatal("can't load bloom filter: %r"); | |
| if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 … | |
| bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects… | |
| diff --git a/src/cmd/venti/srv/syncindex0.c b/src/cmd/venti/srv/syncindex0.c | |
| t@@ -121,6 +121,7 @@ int | |
| syncindex(Index *ix, int fix, int mustflush, int check) | |
| { | |
| Arena *arena; | |
| + AState as; | |
| u64int a; | |
| u32int clump; | |
| int i, e, e1, ok, ok1, flush; | |
| t@@ -130,7 +131,12 @@ syncindex(Index *ix, int fix, int mustflush, int check) | |
| for(i = 0; i < ix->narenas; i++){ | |
| trace(TraceProc, "syncindex start %d", i); | |
| arena = ix->arenas[i]; | |
| - clump = arena->memstats.clumps; | |
| + /* | |
| + * Syncarena will scan through the arena looking for blocks | |
| + * that have been forgotten. It will update arena->memstats.u… | |
| + * so save the currenct copy as the place to start the | |
| + * syncarenaindex scan. | |
| + */ | |
| a = arena->memstats.used; | |
| e = syncarena(arena, ix->amap[i].start, TWID32, fix, fix); | |
| e1 = e; | |
| t@@ -138,15 +144,23 @@ syncindex(Index *ix, int fix, int mustflush, int check) | |
| e1 &= ~(SyncHeader|SyncCIZero|SyncCIErr); | |
| if(e1 == SyncHeader) | |
| fprint(2, "arena %s: header is out-of-date\n", arena->… | |
| + clump = arena->diskstats.clumps; | |
| if(e1) | |
| ok = -1; | |
| else{ | |
| ok1 = syncarenaindex(ix, arena, clump, a + ix->amap[i]… | |
| if(ok1 < 0) | |
| fprint(2, "syncarenaindex: %r\n"); | |
| +fprint(2, "arena %s: wbarena in syncindex\n", arena->name); | |
| if(fix && ok1==0 && (e & SyncHeader) && wbarena(arena)… | |
| fprint(2, "arena=%s header write failed: %r\n"… | |
| ok |= ok1; | |
| + | |
| +fprint(2, "arena %s: setdcachestate\n", arena->name); | |
| + as.arena = arena; | |
| + as.aa = ix->amap[i].start + arena->memstats.used; | |
| + as.stats = arena->memstats; | |
| + setdcachestate(&as); | |
| } | |
| } | |
| if(missing || wrong) | |
| diff --git a/src/cmd/venti/srv/unwhack.c b/src/cmd/venti/srv/unwhack.c | |
| t@@ -23,7 +23,7 @@ static uchar lenval[1 << (DBigLenBits - 1)] = | |
| static uchar lenbits[] = | |
| { | |
| 0, 0, 0, | |
| - 2, 3, 5, 5 | |
| + 2, 3, 5, 5, | |
| }; | |
| static uchar offbits[16] = | |
| diff --git a/src/cmd/venti/srv/utils.c b/src/cmd/venti/srv/utils.c | |
| t@@ -148,6 +148,7 @@ emalloc(ulong n) | |
| sysfatal("out of memory allocating %lud", n); | |
| } | |
| memset(p, 0xa5, n); | |
| + setmalloctag(p, getcallerpc(&n)); | |
| if(0)print("emalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n)); | |
| return p; | |
| } | |
| t@@ -164,6 +165,7 @@ ezmalloc(ulong n) | |
| sysfatal("out of memory allocating %lud", n); | |
| } | |
| memset(p, 0, n); | |
| + setmalloctag(p, getcallerpc(&n)); | |
| if(0)print("ezmalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n)); | |
| return p; | |
| } | |
| t@@ -177,6 +179,7 @@ erealloc(void *p, ulong n) | |
| abort(); | |
| sysfatal("out of memory allocating %lud", n); | |
| } | |
| + setrealloctag(p, getcallerpc(&p)); | |
| if(0)print("erealloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&p)); | |
| return p; | |
| } | |
| t@@ -190,6 +193,7 @@ estrdup(char *s) | |
| n = strlen(s) + 1; | |
| t = emalloc(n); | |
| memmove(t, s, n); | |
| + setmalloctag(t, getcallerpc(&s)); | |
| if(0)print("estrdup %p-%p by %lux\n", t, (char*)t+n, getcallerpc(&s)); | |
| return t; | |
| } | |
| t@@ -231,6 +235,7 @@ ventifmtinstall(void) | |
| fmtinstall('F', vtfcallfmt); | |
| fmtinstall('H', encodefmt); | |
| fmtinstall('I', ientryfmt); | |
| + fmtinstall('T', vttimefmt); | |
| fmtinstall('V', vtscorefmt); | |
| } | |
| diff --git a/src/cmd/venti/srv/venti.c b/src/cmd/venti/srv/venti.c | |
| t@@ -105,6 +105,8 @@ threadmain(int argc, char *argv[]) | |
| fprint(2, "conf..."); | |
| if(initventi(configfile, &config) < 0) | |
| sysfatal("can't init server: %r"); | |
| + if(mainindex->bloom && loadbloom(mainindex->bloom) < 0) | |
| + sysfatal("can't load bloom filter: %r"); | |
| if(mem == 0) | |
| mem = config.mem; | |
| t@@ -210,8 +212,8 @@ ventiserver(void *v) | |
| trace(TraceRpc, "<- %F", &r->tx); | |
| r->rx.msgtype = r->tx.msgtype+1; | |
| addstat(StatRpcTotal, 1); | |
| - /* print("req (arenas[0]=%p sects[0]=%p) %F\n", */ | |
| - /* mainindex->arenas[0], mainindex->sects[0], &r->tx); … | |
| + if(0) print("req (arenas[0]=%p sects[0]=%p) %F\n", | |
| + mainindex->arenas[0], mainindex->sects[0], &r->tx); | |
| switch(r->tx.msgtype){ | |
| default: | |
| vtrerror(r, "unknown request"); | |
| diff --git a/src/cmd/venti/srv/verifyarena.c b/src/cmd/venti/srv/verifyarena.c | |
| t@@ -3,65 +3,102 @@ | |
| #include "fns.h" | |
| static int verbose; | |
| +static int fd; | |
| +static uchar *data; | |
| +static int blocksize; | |
| +static int sleepms; | |
| void | |
| usage(void) | |
| { | |
| - fprint(2, "usage: verifyarena [-v]\n"); | |
| + fprint(2, "usage: verifyarena [-b blocksize] [-s ms] [-v] [arenapart [… | |
| threadexitsall(0); | |
| } | |
| -static void | |
| +static int | |
| +preadblock(uchar *buf, int n, vlong off) | |
| +{ | |
| + int nr, m; | |
| + | |
| + for(nr = 0; nr < n; nr += m){ | |
| + m = n - nr; | |
| + m = pread(fd, &buf[nr], m, off+nr); | |
| + if(m <= 0){ | |
| + if(m == 0) | |
| + werrstr("early eof"); | |
| + return -1; | |
| + } | |
| + } | |
| + return 0; | |
| +} | |
| + | |
| +static int | |
| readblock(uchar *buf, int n) | |
| { | |
| int nr, m; | |
| for(nr = 0; nr < n; nr += m){ | |
| m = n - nr; | |
| - m = read(0, &buf[nr], m); | |
| - if(m <= 0) | |
| - sysfatal("can't read arena from standard input: %r"); | |
| + m = read(fd, &buf[nr], m); | |
| + if(m <= 0){ | |
| + if(m == 0) | |
| + werrstr("early eof"); | |
| + return -1; | |
| + } | |
| } | |
| + return 0; | |
| } | |
| static void | |
| -verifyarena(void) | |
| +verifyarena(char *name, vlong len) | |
| { | |
| Arena arena; | |
| ArenaHead head; | |
| - ZBlock *b; | |
| DigestState s; | |
| u64int n, e; | |
| u32int bs; | |
| u8int score[VtScoreSize]; | |
| - fprint(2, "verify arena from standard input\n"); | |
| + fprint(2, "verify %s\n", name); | |
| memset(&arena, 0, sizeof arena); | |
| memset(&s, 0, sizeof s); | |
| /* | |
| - * read the little bit, which will included the header | |
| + * read a little bit, which will include the header | |
| */ | |
| - bs = MaxIoSize; | |
| - b = alloczblock(bs, 0, 0); | |
| - readblock(b->data, HeadSize); | |
| - sha1(b->data, HeadSize, nil, &s); | |
| - if(unpackarenahead(&head, b->data) < 0) | |
| - sysfatal("corrupted arena header: %r"); | |
| + if(readblock(data, HeadSize) < 0){ | |
| + fprint(2, "%s: reading header: %r\n", name); | |
| + return; | |
| + } | |
| + sha1(data, HeadSize, nil, &s); | |
| + if(unpackarenahead(&head, data) < 0){ | |
| + fprint(2, "%s: corrupt arena header: %r\n", name); | |
| + return; | |
| + } | |
| if(head.version != ArenaVersion4 && head.version != ArenaVersion5) | |
| - fprint(2, "warning: unknown arena version %d\n", head.version); | |
| + fprint(2, "%s: warning: unknown arena version %d\n", name, hea… | |
| + if(len != 0 && len != head.size) | |
| + fprint(2, "%s: warning: unexpected length %lld != %lld\n", nam… | |
| + if(strcmp(name, "<stdin>") != 0 && strcmp(head.name, name) != 0) | |
| + fprint(2, "%s: warning: unexpected name %s\n", name, head.name… | |
| /* | |
| * now we know how much to read | |
| * read everything but the last block, which is special | |
| */ | |
| e = head.size - head.blocksize; | |
| + bs = blocksize; | |
| for(n = HeadSize; n < e; n += bs){ | |
| if(n + bs > e) | |
| bs = e - n; | |
| - readblock(b->data, bs); | |
| - sha1(b->data, bs, nil, &s); | |
| + if(readblock(data, bs) < 0){ | |
| + fprint(2, "%s: read data: %r\n", name); | |
| + return; | |
| + } | |
| + sha1(data, bs, nil, &s); | |
| + if(sleepms) | |
| + sleep(sleepms); | |
| } | |
| /* | |
| t@@ -69,8 +106,11 @@ verifyarena(void) | |
| * the sum is calculated assuming the slot for the sum is zero. | |
| */ | |
| bs = head.blocksize; | |
| - readblock(b->data, bs); | |
| - sha1(b->data, bs-VtScoreSize, nil, &s); | |
| + if(readblock(data, bs) < 0){ | |
| + fprint(2, "%s: read last block: %r\n", name); | |
| + return; | |
| + } | |
| + sha1(data, bs-VtScoreSize, nil, &s); | |
| sha1(zeroscore, VtScoreSize, nil, &s); | |
| sha1(nil, 0, score, &s); | |
| t@@ -78,37 +118,73 @@ verifyarena(void) | |
| * validity check on the trailer | |
| */ | |
| arena.blocksize = head.blocksize; | |
| - if(unpackarena(&arena, b->data) < 0) | |
| - sysfatal("corrupted arena trailer: %r"); | |
| - scorecp(arena.score, &b->data[arena.blocksize - VtScoreSize]); | |
| - | |
| - if(namecmp(arena.name, head.name) != 0) | |
| - sysfatal("arena header and trailer names clash: %s vs. %s\n", … | |
| - if(arena.version != head.version) | |
| - sysfatal("arena header and trailer versions clash: %d vs. %d\n… | |
| + if(unpackarena(&arena, data) < 0){ | |
| + fprint(2, "%s: corrupt arena trailer: %r\n", name); | |
| + return; | |
| + } | |
| + scorecp(arena.score, &data[arena.blocksize - VtScoreSize]); | |
| + | |
| + if(namecmp(arena.name, head.name) != 0){ | |
| + fprint(2, "%s: wrong name in trailer: %s vs. %s\n", | |
| + name, head.name, arena.name); | |
| + return; | |
| + } | |
| + if(arena.version != head.version){ | |
| + fprint(2, "%s: wrong version in trailer: %d vs. %d\n", | |
| + name, head.version, arena.version); | |
| + return; | |
| + } | |
| arena.size = head.size - 2 * head.blocksize; | |
| /* | |
| * check for no checksum or the same | |
| */ | |
| - if(scorecmp(score, arena.score) != 0){ | |
| - if(scorecmp(zeroscore, arena.score) != 0) | |
| - fprint(2, "warning: mismatched checksums for arena=%s,… | |
| - arena.name, arena.score, score); | |
| - scorecp(arena.score, score); | |
| - }else | |
| - fprint(2, "matched score\n"); | |
| - | |
| + if(scorecmp(score, arena.score) == 0) | |
| + fprint(2, "%s: verified score\n", name); | |
| + else if(scorecmp(zeroscore, arena.score) == 0) | |
| + fprint(2, "%s: unsealed\n", name); | |
| + else{ | |
| + fprint(2, "%s: mismatch checksum - found=%V calculated=%V\n", | |
| + name, arena.score, score); | |
| + return; | |
| + } | |
| printarena(2, &arena); | |
| } | |
| +static int | |
| +shouldcheck(char *name, char **s, int n) | |
| +{ | |
| + int i; | |
| + | |
| + if(n == 0) | |
| + return 1; | |
| + | |
| + for(i=0; i<n; i++){ | |
| + if(s[i] && strcmp(name, s[i]) == 0){ | |
| + s[i] = nil; | |
| + return 1; | |
| + } | |
| + } | |
| + return 0; | |
| +} | |
| + | |
| void | |
| threadmain(int argc, char *argv[]) | |
| { | |
| + int i, nline; | |
| + char *p, *q, *table, *f[10], line[256]; | |
| + vlong start, stop; | |
| + ArenaPart ap; | |
| + | |
| ventifmtinstall(); | |
| - statsinit(); | |
| - | |
| + blocksize = MaxIoSize; | |
| ARGBEGIN{ | |
| + case 'b': | |
| + blocksize = unittoull(EARGF(usage())); | |
| + break; | |
| + case 's': | |
| + sleepms = atoi(EARGF(usage())); | |
| + break; | |
| case 'v': | |
| verbose++; | |
| break; | |
| t@@ -117,11 +193,69 @@ threadmain(int argc, char *argv[]) | |
| break; | |
| }ARGEND | |
| - readonly = 1; | |
| + data = vtmalloc(blocksize); | |
| + if(argc == 0){ | |
| + fd = 0; | |
| + verifyarena("<stdin>", 0); | |
| + threadexitsall(nil); | |
| + } | |
| + | |
| + if((fd = open(argv[0], OREAD)) < 0) | |
| + sysfatal("open %s: %r", argv[0]); | |
| - if(argc != 0) | |
| - usage(); | |
| + if(preadblock(data, 8192, PartBlank) < 0) | |
| + sysfatal("read arena part header: %r"); | |
| + if(unpackarenapart(&ap, data) < 0) | |
| + sysfatal("corrupted arena part header: %r"); | |
| + fprint(2, "# arena part version=%d blocksize=%d arenabase=%d\n", | |
| + ap.version, ap.blocksize, ap.arenabase); | |
| + ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1); | |
| + ap.tabsize = ap.arenabase - ap.tabbase; | |
| + table = malloc(ap.tabsize+1); | |
| + if(preadblock((uchar*)table, ap.tabsize, ap.tabbase) < 0) | |
| + sysfatal("reading arena part directory: %r"); | |
| + table[ap.tabsize] = 0; | |
| + | |
| + nline = atoi(table); | |
| + p = strchr(table, '\n'); | |
| + if(p) | |
| + p++; | |
| + for(i=0; i<nline; i++){ | |
| + if(p == nil){ | |
| + fprint(2, "warning: unexpected arena table end\n"); | |
| + break; | |
| + } | |
| + q = strchr(p, '\n'); | |
| + if(q) | |
| + *q++ = 0; | |
| + if(strlen(p) >= sizeof line){ | |
| + fprint(2, "warning: long arena table line: %s\n", p); | |
| + p = q; | |
| + continue; | |
| + } | |
| + strcpy(line, p); | |
| + memset(f, 0, sizeof f); | |
| + if(tokenize(line, f, nelem(f)) < 3){ | |
| + fprint(2, "warning: bad arena table line: %s\n", p); | |
| + p = q; | |
| + continue; | |
| + } | |
| + p = q; | |
| + if(shouldcheck(f[0], argv+1, argc-1)){ | |
| + start = strtoull(f[1], 0, 0); | |
| + stop = strtoull(f[2], 0, 0); | |
| + if(stop <= start){ | |
| + fprint(2, "%s: bad start,stop %lld,%lld\n", f[… | |
| + continue; | |
| + } | |
| + if(seek(fd, start, 0) < 0) | |
| + fprint(2, "%s: seek to start: %r\n", f[0]); | |
| + verifyarena(f[0], stop - start); | |
| + } | |
| + } | |
| + for(i=1; i<argc; i++) | |
| + if(argv[i] != 0) | |
| + fprint(2, "%s: did not find arena\n", argv[i]); | |
| - verifyarena(); | |
| - threadexitsall(0); | |
| + threadexitsall(nil); | |
| } | |
| diff --git a/src/cmd/venti/srv/wrarena.c b/src/cmd/venti/srv/wrarena.c | |
| t@@ -83,8 +83,8 @@ rdarena(Arena *arena, u64int offset) | |
| if(magic == ClumpFreeMagic) | |
| break; | |
| if(magic != arena->clumpmagic) { | |
| - /* fprint(2, "illegal clump magic number %#8.8ux offset… | |
| - /* magic, aa); */ | |
| + if(0) fprint(2, "illegal clump magic number %#8.8ux of… | |
| + magic, aa); | |
| break; | |
| } | |
| lump = loadclump(arena, aa, 0, &cl, score, 0); | |
| diff --git a/src/cmd/venti/srv/zblock.c b/src/cmd/venti/srv/zblock.c | |
| t@@ -5,11 +5,13 @@ | |
| void | |
| fmtzbinit(Fmt *f, ZBlock *b) | |
| { | |
| - memset(f, 0, sizeof *f); | |
| - fmtlocaleinit(f, nil, nil, nil); | |
| + f->runes = 0; | |
| f->start = b->data; | |
| f->to = f->start; | |
| f->stop = (char*)f->start + b->len; | |
| + f->flush = nil; | |
| + f->farg = nil; | |
| + f->nfmt = 0; | |
| } | |
| #define ROUNDUP(p, n) ((void*)(((uintptr)(p)+(n)-1)&~(uintptr)((n)-1))) | |
| diff --git a/src/cmd/venti/srv/zeropart.c b/src/cmd/venti/srv/zeropart.c | |
| t@@ -10,10 +10,6 @@ zeropart(Part *part, int blocksize) | |
| int w; | |
| fprint(2, "clearing the partition\n"); | |
| -/*fprint(2, "NOT!\n"); */ | |
| -/*return; */ | |
| -/*b=alloczblock(MaxIoSize, 1, blocksize); */ | |
| -/*freezblock(b); */ | |
| b = alloczblock(MaxIoSize, 1, blocksize); | |
| w = 0; |