/*
* this comment used to say `our target is 4000 names cached, this should
* be larger on large servers'. dns at Bell Labs starts off with
* about 1780 names.
*
* aging seems to corrupt the cache, so raise the trigger from 4000 until we
* figure it out.
*/
enum {
/* these settings will trigger frequent aging */
Deftarget = 4000,
Minage = 5*Min,
Defagefreq = 15*Min, /* age names this often (seconds) */
};
/*
* Hash table for domain names. The hash is based only on the
* first element of the domain name.
*/
DN *ht[HTLEN];
static struct {
Lock;
ulong names; /* names allocated */
ulong oldest; /* longest we'll leave a name around */
int active;
int mutex;
ushort id; /* same size as in packet */
} dnvars;
/*
* purge all records
*/
void
dnpurge(void)
{
DN *dp;
RR *rp, *srp;
int i;
lock(&dnlock);
for(i = 0; i < HTLEN; i++)
for(dp = ht[i]; dp; dp = dp->next){
srp = rp = dp->rr;
dp->rr = nil;
for(; rp != nil; rp = rp->next)
rp->cached = 0;
rrfreelist(srp);
}
unlock(&dnlock);
}
/*
* delete head of *l and free the old head.
* call with dnlock held.
*/
static void
rrdelhead(RR **l)
{
RR *rp;
if (canlock(&dnlock))
abort(); /* rrdelhead called with dnlock not held */
rp = *l;
if(rp == nil)
return;
*l = rp->next; /* unlink head */
rp->cached = 0; /* avoid blowing an assertion in rrfree */
rrfree(rp);
}
/*
* check the age of resource records, free any that have timed out.
* call with dnlock held.
*/
void
dnage(DN *dp)
{
RR **l, *rp;
ulong diff;
if (canlock(&dnlock))
abort(); /* dnage called with dnlock not held */
diff = now - dp->referenced;
if(diff < Reserved || dp->mark != 0)
return;
l = &dp->rr;
while ((rp = *l) != nil){
assert(rp->magic == RRmagic && rp->cached);
if(!rp->db && ((long)(rp->expire - now) <= 0 || diff > dnvars.oldest))
rrdelhead(l); /* rp == *l before; *l == rp->next after */
else
l = &rp->next;
}
}
#define MARK(dp) { if (dp) (dp)->mark |= 2; }
/* mark a domain name and those in its RRs as never to be aged */
void
dnagenever(DN *dp)
{
RR *rp;
lock(&dnlock);
/* mark all referenced domain names */
MARK(dp);
for(rp = dp->rr; rp; rp = rp->next){
MARK(rp->owner);
if(rp->negative){
MARK(rp->negsoaowner);
continue;
}
switch(rp->type){
case Thinfo:
MARK(rp->cpu);
MARK(rp->os);
break;
case Ttxt:
break;
case Tcname:
case Tmb:
case Tmd:
case Tmf:
case Tns:
case Tmx:
case Tsrv:
MARK(rp->host);
break;
case Tmg:
case Tmr:
MARK(rp->mb);
break;
case Tminfo:
MARK(rp->rmb);
MARK(rp->mb);
break;
case Trp:
MARK(rp->rmb);
MARK(rp->rp);
break;
case Ta:
case Taaaa:
MARK(rp->ip);
break;
case Tptr:
MARK(rp->ptr);
break;
case Tsoa:
MARK(rp->host);
MARK(rp->rmb);
break;
case Tsig:
MARK(rp->sig->signer);
break;
}
}
unlock(&dnlock);
}
#define REF(dp) { if (dp) (dp)->mark |= 1; }
/*
* periodicly sweep for old records and remove unreferenced domain names
*
* only called when all other threads are locked out
*/
void
dnageall(int doit)
{
DN *dp, **l;
int i;
RR *rp;
static ulong nextage;
if(dnvars.names >= target) {
dnslog("more names (%lud) than target (%lud)", dnvars.names,
target);
dnvars.oldest /= 2;
if (dnvars.oldest < Minage)
dnvars.oldest = Minage; /* don't be silly */
}
if (agefreq > dnvars.oldest / 2)
nextage = now + dnvars.oldest / 2;
else
nextage = now + (ulong)agefreq;
lock(&dnlock);
/* time out all old entries (and set refs to 0) */
for(i = 0; i < HTLEN; i++)
for(dp = ht[i]; dp; dp = dp->next){
dp->mark &= ~1;
dnage(dp);
}
/* mark all referenced domain names */
for(i = 0; i < HTLEN; i++)
for(dp = ht[i]; dp; dp = dp->next)
for(rp = dp->rr; rp; rp = rp->next){
REF(rp->owner);
if(rp->negative){
REF(rp->negsoaowner);
continue;
}
switch(rp->type){
case Thinfo:
REF(rp->cpu);
REF(rp->os);
break;
case Ttxt:
break;
case Tcname:
case Tmb:
case Tmd:
case Tmf:
case Tns:
case Tmx:
case Tsrv:
REF(rp->host);
break;
case Tmg:
case Tmr:
REF(rp->mb);
break;
case Tminfo:
REF(rp->rmb);
REF(rp->mb);
break;
case Trp:
REF(rp->rmb);
REF(rp->rp);
break;
case Ta:
case Taaaa:
REF(rp->ip);
break;
case Tptr:
REF(rp->ptr);
break;
case Tsoa:
REF(rp->host);
REF(rp->rmb);
break;
case Tsig:
REF(rp->sig->signer);
break;
}
}
/*
* timeout all database records (used when rereading db)
*/
void
dnagedb(void)
{
DN *dp;
int i;
RR *rp;
lock(&dnlock);
/* time out all database entries */
for(i = 0; i < HTLEN; i++)
for(dp = ht[i]; dp; dp = dp->next) {
dp->mark = 0;
for(rp = dp->rr; rp; rp = rp->next)
if(rp->db)
rp->expire = 0;
}
unlock(&dnlock);
}
/*
* mark all local db records about my area as authoritative,
* delete timed out ones
*/
void
dnauthdb(void)
{
int i;
ulong minttl;
Area *area;
DN *dp;
RR *rp, **l;
lock(&dnlock);
/* time out all database entries */
for(i = 0; i < HTLEN; i++)
for(dp = ht[i]; dp; dp = dp->next){
area = inmyarea(dp->name);
l = &dp->rr;
for(rp = *l; rp; rp = *l){
if(rp->db){
if(rp->expire == 0){
rrdelhead(l);
continue;
}
if(area){
minttl = area->soarr->soa->minttl;
if(rp->ttl < minttl)
rp->ttl = minttl;
rp->auth = 1;
}
}
l = &rp->next;
}
}
unlock(&dnlock);
}
/*
* keep track of other processes to know if we can
* garbage collect. block while garbage collecting.
*/
int
getactivity(Request *req, int recursive)
{
int rv;
if(traceactivity)
dnslog("get: %d active by pid %d from %p",
dnvars.active, getpid(), getcallerpc(&req));
lock(&dnvars);
/*
* can't block here if we're already holding one
* of the dnvars.active (recursive). will deadlock.
*/
while(!recursive && dnvars.mutex){
unlock(&dnvars);
sleep(100); /* tune; was 200 */
lock(&dnvars);
}
rv = ++dnvars.active;
now = time(nil);
nowns = nsec();
req->id = ++dnvars.id;
req->aux = nil;
unlock(&dnvars);
/*
* clean out old entries and check for new db periodicly
* can't block here if being called to let go a "recursive" lock
* or we'll deadlock waiting for ourselves to give up the dnvars.active.
*/
if (recursive || dnvars.mutex ||
(needrefresh == 0 && dnvars.active > 0)){
unlock(&dnvars);
return;
}
/* let others back in */
needrefresh = 0;
dnvars.mutex = 0;
}
int
rrlistlen(RR *rp)
{
int n;
n = 0;
for(; rp; rp = rp->next)
++n;
return n;
}
/*
* Attach a single resource record to a domain name (new->owner).
* - Avoid duplicates with already present RR's
* - Chain all RR's of the same type adjacent to one another
* - chain authoritative RR's ahead of non-authoritative ones
* - remove any expired RR's
* If new is a stale duplicate, rrfree it.
* Must be called with dnlock held.
*/
static void
rrattach1(RR *new, int auth)
{
RR **l;
RR *rp;
DN *dp;
ulong ttl;
/*
* try not to let responses expire before we
* can use them to complete this query, by extending
* past (or nearly past) expiration time.
*/
if(new->db)
ttl = Year;
else
ttl = new->ttl;
if(ttl <= Min)
ttl = 10*Min;
new->expire = now + ttl;
/*
* find first rr of the right type
*/
l = &dp->rr;
for(rp = *l; rp; rp = *l){
assert(rp->magic == RRmagic && rp->cached);
if(rp->type == new->type)
break;
l = &rp->next;
}
/*
* negative entries replace positive entries
* positive entries replace negative entries
* newer entries replace older entries with the same fields
*
* look farther ahead than just the next entry when looking
* for duplicates; RRs of a given type can have different rdata
* fields (e.g. multiple NS servers).
*/
while ((rp = *l) != nil){
assert(rp->magic == RRmagic && rp->cached);
if(rp->type != new->type)
break;
if(rp->db == new->db && rp->auth == new->auth){
/* negative drives out positive and vice versa */
if(rp->negative != new->negative) {
/* rp == *l before; *l == rp->next after */
rrdelhead(l);
continue;
}
/* all things equal, pick the newer one */
else if(rp->arg0 == new->arg0 && rp->arg1 == new->arg1){
/* old drives out new */
if((long)(rp->expire - new->expire) > 0) {
rrfree(new);
return;
}
/* rp == *l before; *l == rp->next after */
rrdelhead(l);
continue;
}
/*
* Hack for pointer records. This makes sure
* the ordering in the list reflects the ordering
* received or read from the database
*/
else if(rp->type == Tptr &&
!rp->negative && !new->negative &&
rp->ptr->ordinal > new->ptr->ordinal)
break;
}
l = &rp->next;
}
if (rronlist(new, rp)) {
/* should not happen; duplicates were processed above */
dnslog("adding duplicate %R to list of %R; aborting", new, rp);
abort();
}
/*
* add to chain
*/
new->cached = 1;
new->next = rp;
*l = new;
}
/*
* Attach a list of resource records to a domain name.
* May rrfree any stale duplicate RRs; dismembers the list.
* Upon return, every RR in the list will have been rrfree-d
* or attached to its domain name.
* See rrattach1 for properties preserved.
*/
void
rrattach(RR *rp, int auth)
{
RR *next;
DN *dp;
/*
* lookup a resource record of a particular type and
* class attached to a domain name. Return copies.
*
* Priority ordering is:
* db authoritative
* not timed out network authoritative
* not timed out network unauthoritative
* unauthoritative db
*
* if flag NOneg is set, don't return negative cached entries.
* return nothing instead.
*/
RR*
rrlookup(DN *dp, int type, int flag)
{
RR *rp, *first, **last;
assert(dp->magic == DNmagic);
first = nil;
last = &first;
lock(&dnlock);
/* try for an authoritative db entry */
for(rp = dp->rr; rp; rp = rp->next){
assert(rp->magic == RRmagic && rp->cached);
if(rp->db)
if(rp->auth)
if(tsame(type, rp->type))
last = rrcopy(rp, last);
}
if(first)
goto out;
/* try for a living authoritative network entry */
for(rp = dp->rr; rp; rp = rp->next){
if(!rp->db)
if(rp->auth)
if((long)(rp->expire - now) > 0)
if(tsame(type, rp->type)){
if(flag == NOneg && rp->negative)
goto out;
last = rrcopy(rp, last);
}
}
if(first)
goto out;
/* try for a living unauthoritative network entry */
for(rp = dp->rr; rp; rp = rp->next){
if(!rp->db)
if((long)(rp->expire - now) > 0)
if(tsame(type, rp->type)){
if(flag == NOneg && rp->negative)
goto out;
last = rrcopy(rp, last);
}
}
if(first)
goto out;
/* try for an unauthoritative db entry */
for(rp = dp->rr; rp; rp = rp->next){
if(rp->db)
if(tsame(type, rp->type))
last = rrcopy(rp, last);
}
if(first)
goto out;
/* otherwise, settle for anything we got (except for negative caches) */
for(rp = dp->rr; rp; rp = rp->next)
if(tsame(type, rp->type)){
if(rp->negative)
goto out;
last = rrcopy(rp, last);
}
/* make any a synonym for all */
if(strcmp(atype, "any") == 0)
return Tall;
else if(isascii(atype[0]) && isdigit(atype[0]))
return atoi(atype);
else
return -1;
}
/*
* return 0 if not a supported rr type
*/
int
rrsupported(int type)
{
if(type < 0 || type >Tall)
return 0;
return rrtname[type] != nil;
}
/*
* print conversion for rr records
*/
int
rrfmt(Fmt *f)
{
int rv;
char *strp;
char buf[Domlen];
Fmt fstr;
RR *rp;
Server *s;
SOA *soa;
Srv *srv;
Txt *t;
switch(rp->type){
case Thinfo:
fmtprint(&fstr, "\t%s %s", dnname(rp->cpu), dnname(rp->os));
break;
case Tcname:
case Tmb:
case Tmd:
case Tmf:
case Tns:
fmtprint(&fstr, "\t%s", dnname(rp->host));
break;
case Tmg:
case Tmr:
fmtprint(&fstr, "\t%s", dnname(rp->mb));
break;
case Tminfo:
fmtprint(&fstr, "\t%s %s", dnname(rp->mb), dnname(rp->rmb));
break;
case Tmx:
fmtprint(&fstr, "\t%lud %s", rp->pref, dnname(rp->host));
break;
case Ta:
case Taaaa:
fmtprint(&fstr, "\t%s", dnname(rp->ip));
break;
case Tptr:
fmtprint(&fstr, "\t%s", dnname(rp->ptr));
break;
case Tsoa:
soa = rp->soa;
fmtprint(&fstr, "\t%s %s %lud %lud %lud %lud %lud",
dnname(rp->host), dnname(rp->rmb),
(soa? soa->serial: 0),
(soa? soa->refresh: 0), (soa? soa->retry: 0),
(soa? soa->expire: 0), (soa? soa->minttl: 0));
if (soa)
for(s = soa->slaves; s != nil; s = s->next)
fmtprint(&fstr, " %s", s->name);
break;
case Tsrv:
srv = rp->srv;
fmtprint(&fstr, "\t%ud %ud %ud %s",
(srv? srv->pri: 0), (srv? srv->weight: 0),
rp->port, dnname(rp->host));
break;
case Tnull:
if (rp->null == nil)
fmtprint(&fstr, "\t<null>");
else
fmtprint(&fstr, "\t%.*H", rp->null->dlen,
rp->null->data);
break;
case Ttxt:
fmtprint(&fstr, "\t");
for(t = rp->txt; t != nil; t = t->next)
fmtprint(&fstr, "%s", t->p);
break;
case Trp:
fmtprint(&fstr, "\t%s %s", dnname(rp->rmb), dnname(rp->rp));
break;
case Tkey:
if (rp->key == nil)
fmtprint(&fstr, "\t<null> <null> <null>");
else
fmtprint(&fstr, "\t%d %d %d", rp->key->flags,
rp->key->proto, rp->key->alg);
break;
case Tsig:
if (rp->sig == nil)
fmtprint(&fstr,
"\t<null> <null> <null> <null> <null> <null> <null> <null>");
else
fmtprint(&fstr, "\t%d %d %d %lud %lud %lud %d %s",
rp->sig->type, rp->sig->alg, rp->sig->labels,
rp->sig->ttl, rp->sig->exp, rp->sig->incep,
rp->sig->tag, dnname(rp->sig->signer));
break;
case Tcert:
if (rp->cert == nil)
fmtprint(&fstr, "\t<null> <null> <null>");
else
fmtprint(&fstr, "\t%d %d %d",
rp->cert->type, rp->cert->tag, rp->cert->alg);
break;
}
out:
strp = fmtstrflush(&fstr);
rv = fmtstrcpy(f, strp);
free(strp);
return rv;
}
/*
* print conversion for rr records in attribute value form
*/
int
rravfmt(Fmt *f)
{
int rv, quote;
char buf[Domlen], *strp;
Fmt fstr;
RR *rp;
Server *s;
SOA *soa;
Srv *srv;
Txt *t;
/*
* create a slave process to handle a request to avoid one request blocking
* another
*/
void
slave(Request *req)
{
int ppid, procs;
if(req->isslave)
return; /* we're already a slave process */
/*
* These calls to putactivity cannot block.
* After getactivity(), the current process is counted
* twice in dnvars.active (one will pass to the child).
* If putactivity tries to wait for dnvars.active == 0,
* it will never happen.
*/
/*
* parent returns to main loop, child does the work.
* don't change note group.
*/
ppid = getpid();
switch(rfork(RFPROC|RFMEM|RFNOWAIT)){
case -1:
putactivity(1);
break;
case 0:
procsetname("request slave of pid %d", ppid);
if(traceactivity)
dnslog("[%d] take activity from %d", getpid(), ppid);
req->isslave = 1; /* why not `= getpid()'? */
break;
default:
/*
* this relies on rfork producing separate, initially-identical
* stacks, thus giving us two copies of `req', one in each
* process.
*/
alarm(0);
longjmp(req->mret, 1);
}
}
/*
* randomize the order we return items to provide some
* load balancing for servers.
*
* only randomize the first class of entries
*/
RR*
randomize(RR *rp)
{
RR *first, *last, *x, *base;
ulong n;
if(rp == nil || rp->next == nil)
return rp;
/* just randomize addresses, mx's and ns's */
for(x = rp; x; x = x->next)
if(x->type != Ta && x->type != Taaaa &&
x->type != Tmx && x->type != Tns)
return rp;
base = rp;
n = rand();
last = first = nil;
while(rp != nil){
/* stop randomizing if we've moved past our class */
if(base->auth != rp->auth || base->db != rp->db){
last->next = rp;
break;
}
/* unchain */
x = rp;
rp = x->next;
x->next = nil;
if(n&1){
/* add to tail */
if(last == nil)
first = x;
else
last->next = x;
last = x;
} else {
/* add to head */
if(last == nil)
last = x;
x->next = first;
first = x;
}
/* reroll the dice */
n >>= 1;
}
return first;
}
static int
sencodefmt(Fmt *f)
{
int i, len, ilen, rv;
char *out, *buf;
uchar *b;
char obuf[64]; /* rsc optimization */
void bytes2nibbles(uchar *nibbles, uchar *bytes, int nbytes);
/*
* look for all ip addresses in this network and make
* pointer records for them.
*/
void
dnptr(uchar *net, uchar *mask, char *dom, int forwtype, int subdoms, int ttl)
{
int i, j, len;
char *p, *e;
char ptr[Domlen];
uchar *ipp;
uchar ip[IPaddrlen], nnet[IPaddrlen];
uchar nibip[IPaddrlen*2];
DN *dp;
RR *rp, *nrp, *first, **l;
ipp = ip;
len = IPaddrlen;
if (forwtype == Taaaa) {
bytes2nibbles(nibip, ip, IPaddrlen);
ipp = nibip;
len = 2*IPaddrlen;
}
p = ptr;
e = ptr+sizeof(ptr);
for(j = len - 1; j >= len - subdoms; j--)
p = seprint(p, e, (forwtype == Ta?
"%d.": "%x."), ipp[j]);
seprint(p, e, "%s", dom);