/*-
* Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)kern_ktrace.c 8.5 (Berkeley) 5/14/95
*/
/*
* TODO:
* - need better error reporting?
* - userland utility to sort ktrace.out by timestamp.
* - keep minimum information in ktrace_entry when rest of alloc failed.
* - per trace control of configurable parameters.
*/
struct ktr_desc {
TAILQ_ENTRY(ktr_desc) ktd_list;
int ktd_flags;
#define KTDF_WAIT 0x0001
#define KTDF_DONE 0x0002
#define KTDF_BLOCKING 0x0004
#define KTDF_INTERACTIVE 0x0008
int ktd_error;
#define KTDE_ENOMEM 0x0001
#define KTDE_ENOSPC 0x0002
int ktd_errcnt;
int ktd_ref; /* # of reference */
int ktd_qcount; /* # of entry in the queue */
/*
* Params to control behaviour.
*/
int ktd_delayqcnt; /* # of entry allowed to delay */
int ktd_wakedelay; /* delay of wakeup in *tick* */
int ktd_intrwakdl; /* ditto, but when interactive */
/*
* Patchable variables.
*/
int ktd_maxentry = KTD_MAXENTRY; /* max # of entry in the queue */
int ktd_timeout = KTD_TIMEOUT; /* timeout in seconds */
int ktd_delayqcnt = KTD_DELAYQCNT; /* # of entry allowed to delay */
int ktd_wakedelay = KTD_WAKEDELAY; /* delay of wakeup in *ms* */
int ktd_intrwakdl = KTD_INTRWAKDL; /* ditto, but when interactive */
if (p->p_traceflag & KTRFAC_TRC_EMUL) {
/* Add emulation trace before first entry for this process */
p->p_traceflag &= ~KTRFAC_TRC_EMUL;
mutex_exit(&ktrace_lock);
ktrexit(l);
ktremul();
(void)ktrenter(l);
mutex_enter(&ktrace_lock);
}
/* Tracing may have been cancelled. */
ktd = p->p_tracep;
if (ktd == NULL)
goto freekte;
/*
* Bump reference count so that the object will remain while
* we are here. Note that the trace is controlled by other
* process.
*/
ktdref(ktd);
if (ktd->ktd_flags & KTDF_DONE)
goto relktd;
if (ktd->ktd_qcount > ktd_maxentry) {
ktd_logerrl(ktd, KTDE_ENOSPC);
goto relktd;
}
TAILQ_INSERT_TAIL(&ktd->ktd_queue, kte, kte_list);
ktd->ktd_qcount++;
if (ktd->ktd_flags & KTDF_BLOCKING)
goto skip_sync;
if (flags & KTA_WAITOK &&
(/* flags & KTA_LARGE */0 || ktd->ktd_flags & KTDF_WAIT ||
ktd->ktd_qcount > ktd_maxentry >> 1))
/*
* Sync with writer thread since we're requesting rather
* big one or many requests are pending.
*/
do {
ktd->ktd_flags |= KTDF_WAIT;
ktd_wakeup(ktd);
#ifdef DEBUG
getmicrouptime(&t1);
#endif
if (cv_timedwait(&ktd->ktd_sync_cv, &ktrace_lock,
ktd_timeout * hz) != 0) {
ktd->ktd_flags |= KTDF_BLOCKING;
/*
* Maybe the writer thread is blocking
* completely for some reason, but
* don't stop target process forever.
*/
log(LOG_NOTICE, "ktrace timeout\n");
break;
}
#ifdef DEBUG
getmicrouptime(&t2);
timersub(&t2, &t1, &t2);
if (t2.tv_sec > 0)
log(LOG_NOTICE,
"ktrace long wait: %lld.%06ld\n",
(long long)t2.tv_sec, (long)t2.tv_usec);
#endif
} while (p->p_tracep == ktd &&
(ktd->ktd_flags & (KTDF_WAIT | KTDF_DONE)) == KTDF_WAIT);
else {
/* Schedule delayed wakeup */
if (ktd->ktd_qcount > ktd->ktd_delayqcnt)
ktd_wakeup(ktd); /* Wakeup now */
else if (!callout_pending(&ktd->ktd_wakch))
callout_reset(&ktd->ktd_wakch,
ktd->ktd_flags & KTDF_INTERACTIVE ?
ktd->ktd_intrwakdl : ktd->ktd_wakedelay,
ktd_callout, ktd);
}
if (kte->kte_buf != kte->kte_space)
kmem_free(kte->kte_buf, kte->kte_bufsz);
pool_cache_put(kte_cache, kte);
}
/*
* "deep" compare of two files for the purposes of clearing a trace.
* Returns true if they're the same open file, or if they point at the
* same underlying vnode/socket.
*/
/*
* Don't push so many entry at once. It will cause kmem map
* shortage.
*/
ktraddentry(l, kte, KTA_WAITOK | KTA_LARGE);
if (resid > 0) {
if (preempt_needed()) {
(void)ktrenter(l);
preempt();
ktrexit(l);
}
/*
* Don't record context switches resulting from blocking on
* locks; the results are not useful, and the mutex may be in a
* softint, which would lead us to ktealloc in softint context,
* which is forbidden.
*/
if (syncobj == &mutex_syncobj || syncobj == &rw_syncobj)
return;
KASSERT(!cpu_intr_p());
KASSERT(!cpu_softintr_p());
/*
* We can't sleep if we're already going to sleep (if original
* condition is met during sleep, we hang up).
*
* XXX This is not ideal: it would be better to maintain a pool
* of ktes and actually push this to the kthread when context
* switch happens, however given the points where we are called
* from that is difficult to do.
*/
if (out) {
if (ktrenter(l))
return;
/*
* On the way back in, we need to record twice: once for entry, and
* once for exit.
*/
if ((l->l_pflag & LP_KTRCSW) != 0) {
struct timespec *ts;
l->l_pflag &= ~LP_KTRCSW;
if (ktealloc(&kte, (void *)&kc, l, KTR_CSW, sizeof(*kc)))
return;
int
ktrace_common(lwp_t *curl, int ops, int facs, int pid, file_t **fpp)
{
struct proc *p;
struct pgrp *pg;
struct ktr_desc *ktd = NULL, *nktd;
file_t *fp = *fpp;
int ret = 0;
int error = 0;
int descend;
/*
* need something to (un)trace (XXX - why is this here?)
*/
if (!facs) {
error = EINVAL;
*fpp = NULL;
goto done;
}
/*
* do it
*/
mutex_enter(&proc_lock);
if (pid < 0) {
/*
* by process group
*/
if (pid == INT_MIN)
pg = NULL;
else
pg = pgrp_find(-pid);
if (pg == NULL)
error = ESRCH;
else {
LIST_FOREACH(p, &pg->pg_members, p_pglist) {
if (descend)
ret |= ktrsetchildren(curl, p, ops,
facs, ktd);
else
ret |= ktrops(curl, p, ops, facs,
ktd);
}
}
} else {
/*
* by pid
*/
p = proc_find(pid);
if (p == NULL)
error = ESRCH;
else if (descend)
ret |= ktrsetchildren(curl, p, ops, facs, ktd);
else
ret |= ktrops(curl, p, ops, facs, ktd);
}
mutex_exit(&proc_lock);
if (error == 0 && !ret)
error = EPERM;
*fpp = NULL;
done:
if (ktd != NULL) {
mutex_enter(&ktrace_lock);
if (error != 0) {
/*
* Wakeup the thread so that it can be die if we
* can't trace any process.
*/
ktd_wakeup(ktd);
}
if (KTROP(ops) == KTROP_SET || KTROP(ops) == KTROP_CLEARFILE)
ktdrel(ktd);
mutex_exit(&ktrace_lock);
}
ktrexit(curl);
return (error);
}
switch (vers) {
case KTRFACv0:
case KTRFACv1:
case KTRFACv2:
break;
default:
error = EINVAL;
goto out;
}
if (KTROP(ops) == KTROP_SET) {
if (p->p_tracep != ktd) {
/*
* if trace file already in use, relinquish
*/
ktrderef(p);
p->p_tracep = ktd;
ktradref(p);
}
p->p_traceflag |= facs;
if (kauth_authorize_process(curl->l_cred, KAUTH_PROCESS_KTRACE,
p, KAUTH_ARG(KAUTH_REQ_PROCESS_KTRACE_PERSISTENT), NULL,
NULL) == 0)
p->p_traceflag |= KTRFAC_PERSISTENT;
} else {
/* KTROP_CLEAR */
if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
/* no more tracing */
ktrderef(p);
}
}
if (p->p_traceflag)
p->p_traceflag |= vers;
/*
* Emit an emulation record, every time there is a ktrace
* change/attach request.
*/
if (KTRPOINT(p, KTR_EMUL))
p->p_traceflag |= KTRFAC_TRC_EMUL;
static int
ktrsetchildren(lwp_t *curl, struct proc *top, int ops, int facs,
struct ktr_desc *ktd)
{
struct proc *p;
int ret = 0;
KASSERT(mutex_owned(&proc_lock));
p = top;
for (;;) {
ret |= ktrops(curl, p, ops, facs, ktd);
/*
* If this process has children, descend to them next,
* otherwise do any siblings, and if done with this level,
* follow back up the tree (but not past top).
*/
if (LIST_FIRST(&p->p_children) != NULL) {
p = LIST_FIRST(&p->p_children);
continue;
}
for (;;) {
if (p == top)
return (ret);
if (LIST_NEXT(p, p_sibling) != NULL) {
p = LIST_NEXT(p, p_sibling);
break;
}
p = p->p_pptr;
}
}
/*NOTREACHED*/
}
case 0:
if (auio.uio_resid > 0)
goto again;
if (kte != NULL)
goto next;
break;
case EWOULDBLOCK:
kpause("ktrzzz", false, 1, NULL);
goto again;
default:
/*
* If error encountered, give up tracing on this
* vnode. Don't report EPIPE as this can easily
* happen with fktrace()/ktruss.
*/
#ifndef DEBUG
if (error != EPIPE)
#endif
log(LOG_NOTICE,
"ktrace write failed, errno %d, tracing stopped\n",
error);
(void)ktrderefall(ktd, 0);
}
while ((kte = top) != NULL) {
top = TAILQ_NEXT(top, kte_list);
ktefree(kte);
}
}
/*
* ktrace file descriptor can't be watched (are not visible to
* userspace), so no kqueue stuff here
* XXX: The above comment is wrong, because the fktrace file
* descriptor is available in userland.
*/
closef(fp);
/*
* Return true if caller has permission to set the ktracing state
* of target. Essentially, the target can't possess any
* more permissions than the caller. KTRFAC_PERSISTENT signifies that
* the tracing will persist on sugid processes during exec; it is only
* settable by a process with appropriate credentials.
*
* TODO: check groups. use caller effective gid.
*/
static int
ktrcanset(lwp_t *calll, struct proc *targetp)
{
KASSERT(mutex_owned(targetp->p_lock));
KASSERT(mutex_owned(&ktrace_lock));