/*
* ntp_intres.c - Implements a generic blocking worker child or thread,
* initially to provide a nonblocking solution for DNS
* name to address lookups available with getaddrinfo().
*
* This is a new implementation as of 2009 sharing the filename and
* very little else with the prior implementation, which used a
* temporary file to receive a single set of requests from the parent,
* and a NTP mode 7 authenticated request to push back responses.
*
* A primary goal in rewriting this code was the need to support the
* pool configuration directive's requirement to retrieve multiple
* addresses resolving a single name, which has previously been
* satisfied with blocking resolver calls from the ntpd mainline code.
*
* A secondary goal is to provide a generic mechanism for other
* blocking operations to be delegated to a worker using a common
* model for both Unix and Windows ntpd. ntp_worker.c, work_fork.c,
* and work_thread.c implement the generic mechanism. This file
* implements the two current consumers, getaddrinfo_sometime() and the
* presently unused getnameinfo_sometime().
*
* Both routines deliver results to a callback and manage memory
* allocation, meaning there is no freeaddrinfo_sometime().
*
* The initial implementation for Unix uses a pair of unidirectional
* pipes, one each for requests and responses, connecting the forked
* blocking child worker with the ntpd mainline. The threaded code
* uses arrays of pointers to queue requests and responses.
*
* The parent drives the process, including scheduling sleeps between
* retries.
*
* Memory is managed differently for a child process, which mallocs
* request buffers to read from the pipe into, whereas the threaded
* code mallocs a copy of the request to hand off to the worker via
* the queueing array. The resulting request buffer is free()d by
* platform-independent code. A wrinkle is the request needs to be
* available to the requestor during response processing.
*
* Response memory allocation is also platform-dependent. With a
* separate process and pipes, the response is free()d after being
* written to the pipe. With threads, the same memory is handed
* over and the requestor frees it after processing is completed.
*
* The code should be generalized to support threads on Unix using
* much of the same code used for Windows initially.
*
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
/*
* Following are implementations of getaddrinfo_sometime() and
* getnameinfo_sometime(). Each is implemented in three routines:
*
* getaddrinfo_sometime() getnameinfo_sometime()
* blocking_getaddrinfo() blocking_getnameinfo()
* getaddrinfo_sometime_complete() getnameinfo_sometime_complete()
*
* The first runs in the parent and marshalls (or serializes) request
* parameters into a request blob which is processed in the child by
* the second routine, blocking_*(), which serializes the results into
* a response blob unpacked by the third routine, *_complete(), which
* calls the callback routine provided with the request and frees
* _request_ memory allocated by the first routine. Response memory
* is managed by the code which calls the *_complete routines.
*/
typedef struct blocking_gai_resp_tag {
size_t octets;
int retcode;
int retry;
int gai_errno; /* for EAI_SYSTEM case */
int ai_count;
/*
* Followed by ai_count struct addrinfo and then ai_count
* sockaddr_u and finally the canonical name strings.
*/
} blocking_gai_resp;
typedef struct blocking_gni_resp_tag {
size_t octets;
int retcode;
int gni_errno; /* for EAI_SYSTEM case */
int retry;
size_t hostoctets;
size_t servoctets;
/*
* Followed by hostoctets bytes of null-terminated host,
* then servoctets bytes of null-terminated service.
*/
} blocking_gni_resp;
/* per-DNS-worker state in parent */
typedef struct dnschild_ctx_tag {
u_int index;
time_t next_dns_timeslot;
} dnschild_ctx;
if (0 == gai_resp->retcode) {
ai = ai_res;
while (NULL != ai) {
gai_resp->ai_count++;
if (ai->ai_canonname)
canons_octets += strlen(ai->ai_canonname) + 1;
ai = ai->ai_next;
}
/*
* If this query succeeded only after retrying, DNS may have
* just become responsive. Ignore previously-scheduled
* retry sleeps once for each pending request, similar to
* the way scheduled_sleep() does when its worker_sleep()
* is interrupted.
*/
if (gai_resp->retry > INITIAL_DNS_RETRY) {
time_now = time(NULL);
worker_ctx->ignore_scheduled_before = time_now;
TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
humantime(time_now)));
}
}
/*
* Our response consists of a header, followed by ai_count
* addrinfo structs followed by ai_count sockaddr_storage
* structs followed by the canonical names.
*/
gai_resp->octets = sizeof(*gai_resp)
+ gai_resp->ai_count
* (sizeof(gai_req->hints)
+ sizeof(sockaddr_u))
+ canons_octets;
for (i = 0; i < gai_resp->ai_count; i++) {
if (NULL != ai[i].ai_addr)
ai[i].ai_addr = &psau->sa;
psau++;
if (NULL != ai[i].ai_canonname)
ai[i].ai_canonname += (size_t)canon_start;
}
/*
* Some alloca() implementations are fragile regarding
* large allocations. We only need room for the host
* and service names.
*/
REQUIRE(octets < sizeof(host));
service = host + gni_req->hostoctets;
if (0 != gni_resp->retcode) {
gni_resp->hostoctets = 0;
gni_resp->servoctets = 0;
} else {
gni_resp->hostoctets = strlen(host) + 1;
gni_resp->servoctets = strlen(service) + 1;
/*
* If this query succeeded only after retrying, DNS may have
* just become responsive. Ignore previously-scheduled
* retry sleeps once for each pending request, similar to
* the way scheduled_sleep() does when its worker_sleep()
* is interrupted.
*/
if (gni_req->retry > INITIAL_DNS_RETRY) {
time_now = time(NULL);
worker_ctx->ignore_scheduled_before = time_now;
TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
humantime(time_now)));
}
}
octets = gni_resp->hostoctets + gni_resp->servoctets;
/*
* Our response consists of a header, followed by the host and
* service strings, each null-terminated.
*/
resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
if (0 == gni_resp->retcode) {
/*
* If this query succeeded only after retrying, DNS may have
* just become responsive.
*/
if (gni_resp->retry > INITIAL_DNS_RETRY) {
time_now = time(NULL);
child_ctx->next_dns_timeslot = time_now;
TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
gni_req->dns_idx, humantime(time_now)));
}
} else {
again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
/*
* exponential backoff of DNS retries to 64s
*/
if (gni_req->retry > 0)
manage_dns_retry_interval(&gni_req->scheduled,
&gni_req->earliest, &gni_req->retry,
&child_ctx->next_dns_timeslot, FALSE);
if (gni_req->retry > 0 && again) {
if (!queue_blocking_request(
BLOCKING_GETNAMEINFO,
gni_req,
gni_req->octets,
&getnameinfo_sometime_complete,
gni_req))
return;
msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
}
}
if (!gni_resp->hostoctets) {
host = NULL;
service = NULL;
} else {
host = (char *)gni_resp + sizeof(*gni_resp);
service = (gni_resp->servoctets)
? host + gni_resp->hostoctets
: NULL;
}
/*
* This is ad-hoc. Reload /etc/resolv.conf once per minute
* to pick up on changes from the DHCP client. [Bug 1226]
* When using threads for the workers, this needs to happen
* only once per minute process-wide.
*/
time_now = time(NULL);
# ifdef WORK_THREAD
worker_ctx->next_res_init = next_res_init;
# endif
if (worker_ctx->next_res_init <= time_now) {
if (worker_ctx->next_res_init != 0)
res_init();
worker_ctx->next_res_init = time_now + 60;
# ifdef WORK_THREAD
next_res_init = worker_ctx->next_res_init;
# endif
}
}
#endif /* HAVE_RES_INIT */
if (scheduled < worker_ctx->ignore_scheduled_before) {
TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
humantime(earliest), humantime(scheduled),
humantime(worker_ctx->ignore_scheduled_before)));
return;
}
now = time(NULL);
if (now < earliest) {
TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
humantime(earliest), humantime(scheduled),
humantime(worker_ctx->ignore_scheduled_before)));
if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
/* our sleep was interrupted */
now = time(NULL);
worker_ctx->ignore_scheduled_before = now;
#ifdef HAVE_RES_INIT
worker_ctx->next_res_init = now + 60;
next_res_init = worker_ctx->next_res_init;
res_init();
#endif
TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
humantime(worker_ctx->ignore_scheduled_before)));
}
}
}
/*
* manage_dns_retry_interval is a helper used by
* getaddrinfo_sometime_complete and getnameinfo_sometime_complete
* to calculate the new retry interval and schedule the next query.
*/
static void
manage_dns_retry_interval(
time_t * pscheduled,
time_t * pwhen,
int * pretry,
time_t * pnext_timeslot,
int forever
)
{
time_t now;
time_t when;
int retry;
int retmax;
now = time(NULL);
retry = *pretry;
when = max(now + retry, *pnext_timeslot);
*pnext_timeslot = when;
/* this exponential backoff is slower than doubling up: The
* sequence goes 2-3-4-6-8-12-16-24-32... and the upper limit is
* 64 seconds for things that should not repeat forever, and
* 1024 when repeated forever.
*/
retmax = forever ? 1024 : 64;
retry <<= 1;
if (retry & (retry - 1))
retry &= (retry - 1);
else
retry -= (retry >> 2);
retry = min(retmax, retry);
/*
* should_retry_dns is a helper used by getaddrinfo_sometime_complete
* and getnameinfo_sometime_complete which implements ntpd's DNS retry
* policy.
*/
static int
should_retry_dns(
int rescode,
int res_errno
)
{
static int eai_again_seen;
int again;
#if defined (EAI_SYSTEM) && defined(DEBUG)
char msg[256];
#endif
/*
* If the resolver failed, see if the failure is
* temporary. If so, return success.
*/
again = 0;
switch (rescode) {
case EAI_FAIL:
again = 1;
break;
case EAI_AGAIN:
again = 1;
eai_again_seen = 1; /* [Bug 1178] */
break;
case EAI_NONAME:
#if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
case EAI_NODATA:
#endif
again = !eai_again_seen; /* [Bug 1178] */
break;
#ifdef EAI_SYSTEM
case EAI_SYSTEM:
/*
* EAI_SYSTEM means the real error is in errno. We should be more
* discriminating about which errno values require retrying, but
* this matches existing behavior.
*/
again = 1;
# ifdef DEBUG
errno_to_str(res_errno, msg, sizeof(msg));
TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
res_errno, msg));
# endif
break;
#endif
}