/*-
* Copyright (c) 1999, 2006, 2007, 2008, 2020, 2023
* The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
* NASA Ames Research Center, and by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1982, 1986, 1989, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
*/
/*
* pid to lwp/proc lookup is done by indexing the pid_table array.
* Since pid numbers are only allocated when an empty slot
* has been found, there is no need to search any lists ever.
* (an orphaned pgrp will lock the slot, a session will lock
* the pgrp with the same number.)
* If the table is too small it is reallocated with twice the
* previous size and the entries 'unzipped' into the two halves.
* A linked list of free entries is passed through the pt_lwp
* field of 'free' items - set odd to be an invalid ptr. Two
* additional bits are also used to indicate if the slot is
* currently occupied by a proc or lwp, and if the PID is
* hidden from certain kinds of lookups. We thus require a
* minimum alignment for proc and lwp structures (LWPs are
* at least 32-byte aligned).
*/
static const int nofile = NOFILE;
static const int maxuprc = MAXUPRC;
static int sysctl_doeproc(SYSCTLFN_PROTO);
static int sysctl_kern_proc_args(SYSCTLFN_PROTO);
static int sysctl_security_expose_address(SYSCTLFN_PROTO);
#ifdef KASLR
static int kern_expose_address = 0;
#else
static int kern_expose_address = 1;
#endif
/*
* The process list descriptors, used during pid allocation and
* by sysctl. No locking on this data structure is needed since
* it is completely static.
*/
const struct proclist_desc proclists[] = {
{ &allproc },
{ &zombproc },
{ NULL },
};
switch (action) {
case KAUTH_PROCESS_CANSEE: {
enum kauth_process_req req;
req = (enum kauth_process_req)(uintptr_t)arg1;
switch (req) {
case KAUTH_REQ_PROCESS_CANSEE_ARGS:
case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
case KAUTH_REQ_PROCESS_CANSEE_EPROC:
result = KAUTH_RESULT_ALLOW;
break;
case KAUTH_REQ_PROCESS_CANSEE_ENV:
if (kauth_cred_getuid(cred) !=
kauth_cred_getuid(p->p_cred) ||
kauth_cred_getuid(cred) !=
kauth_cred_getsvuid(p->p_cred))
break;
result = KAUTH_RESULT_ALLOW;
break;
case KAUTH_REQ_PROCESS_CANSEE_KPTR:
if (!kern_expose_address)
break;
if (kern_expose_address == 1 && !(p->p_flag & PK_KMEM))
break;
result = KAUTH_RESULT_ALLOW;
break;
default:
break;
}
break;
}
case KAUTH_PROCESS_FORK: {
int lnprocs = (int)(unsigned long)arg2;
/*
* Don't allow a nonprivileged user to use the last few
* processes. The variable lnprocs is the current number of
* processes, maxproc is the limit.
*/
if (__predict_false((lnprocs >= maxproc - 5)))
break;
result = KAUTH_RESULT_ALLOW;
break;
}
case KAUTH_PROCESS_CORENAME:
case KAUTH_PROCESS_STOPFLAG:
if (proc_uidmatch(cred, p->p_cred) == 0)
result = KAUTH_RESULT_ALLOW;
break;
default:
break;
}
return result;
}
static int
proc_ctor(void *arg __unused, void *obj, int flags __unused)
{
struct proc *p = obj;
/* Set free list running through table...
Preset 'use count' above PID_MAX so we allocate pid 1 next. */
for (i = 0; i <= pid_tbl_mask; i++) {
pid_table[i].pt_slot = PT_SET_FREE(LINK_EMPTY + i + 1);
pid_table[i].pt_pgrp = 0;
pid_table[i].pt_pid = 0;
}
/* slot 0 is just grabbed */
next_free_pt = 1;
/* Need to fix last entry. */
last_free_pt = pid_tbl_mask;
pid_table[last_free_pt].pt_slot = PT_SET_FREE(LINK_EMPTY);
/* point at which we grow table - to avoid reusing pids too often */
pid_alloc_lim = pid_tbl_mask - 1;
#undef LINK_EMPTY
/* Reserve PID 1 for init(8). */ /* XXX slightly gross */
mutex_enter(&proc_lock);
if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1)
panic("failed to reserve PID 1 for init(8)");
mutex_exit(&proc_lock);
/* Note that default core name has zero length. */
limit0.pl_corename = defcorename;
limit0.pl_cnlen = 0;
limit0.pl_refcnt = 1;
limit0.pl_writeable = false;
limit0.pl_sv_limit = NULL;
/* Configure virtual memory system, set vm rlimits. */
uvm_init_limits(p);
/* Initialize file descriptor table for proc0. */
fd_init(&filedesc0);
/*
* Initialize proc0's vmspace, which uses the kernel pmap.
* All kernel processes (which never have user space mappings)
* share proc0's vmspace, and thus, the kernel pmap.
*/
uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
trunc_page(VM_MAXUSER_ADDRESS),
#ifdef __USE_TOPDOWN_VM
true
#else
false
#endif
);
/* Initialize signal state for proc0. XXX IPL_SCHED */
mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
siginit(p);
/*
* We keep the pgrp with the same id as the session in order to
* stop a process being given the same pid. Since the pgrp holds
* a reference to the session, it must be a 'zombie' pgrp by now.
*/
if (--ss->s_count == 0) {
pg = pg_remove(ss->s_sid);
} else {
pg = NULL;
ss = NULL;
}
mutex_exit(&proc_lock);
if (pg)
kmem_free(pg, sizeof(struct pgrp));
if (ss)
kmem_free(ss, sizeof(struct session));
}
/*
* Check that the specified process group is in the session of the
* specified process.
* Treats -ve ids as process ids.
* Used to validate TIOCSPGRP requests.
*/
int
pgid_in_session(struct proc *p, pid_t pg_id)
{
struct pgrp *pgrp;
struct session *session;
int error;
/*
* p_inferior: is p an inferior of q?
*/
static inline bool
p_inferior(struct proc *p, struct proc *q)
{
KASSERT(mutex_owned(&proc_lock));
for (; p != q; p = p->p_pptr)
if (p->p_pid == 0)
return false;
return true;
}
/*
* proc_find_lwp: locate an lwp in said proc by the ID.
*
* => Must be called with p::p_lock held.
* => LSIDL lwps are not returned because they are only partially
* constructed while occupying the slot.
* => Callers need to be careful about lwp::l_stat of the returned
* lwp.
*/
struct lwp *
proc_find_lwp(proc_t *p, pid_t pid)
{
struct pid_table *pt;
unsigned pt_mask;
struct lwp *l = NULL;
uintptr_t slot;
int s;
KASSERT(mutex_owned(p->p_lock));
/*
* Look in the pid_table. This is done unlocked inside a
* pserialize read section covering pid_table's memory
* allocation only, so take care to read things in the correct
* order:
*
* 1. First read the table mask -- this only ever increases, in
* expand_pid_table, so a stale value is safely
* conservative.
*
* 2. Next read the pid table -- this is always set _before_
* the mask increases, so if we see a new table and stale
* mask, the mask is still valid for the table.
*/
s = pserialize_read_enter();
pt_mask = atomic_load_acquire(&pid_tbl_mask);
pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
slot = atomic_load_consume(&pt->pt_slot);
if (__predict_false(!PT_IS_LWP(slot))) {
pserialize_read_exit(s);
return NULL;
}
/*
* Check to see if the LWP is from the correct process. We won't
* see entries in pid_table from a prior process that also used "p",
* by virtue of the fact that allocating "p" means all prior updates
* to dependant data structures are visible to this thread.
*/
l = PT_GET_LWP(slot);
if (__predict_false(atomic_load_relaxed(&l->l_proc) != p)) {
pserialize_read_exit(s);
return NULL;
}
/*
* We now know that p->p_lock holds this LWP stable.
*
* If the status is not LSIDL, it means the LWP is intended to be
* findable by LID and l_lid cannot change behind us.
*
* No need to acquire the LWP's lock to check for LSIDL, as
* p->p_lock must be held to transition in and out of LSIDL.
* Any other observed state of is no particular interest.
*/
pserialize_read_exit(s);
return l->l_stat != LSIDL && l->l_lid == pid ? l : NULL;
}
/*
* proc_find_lwp_unlocked: locate an lwp in said proc by the ID.
*
* => Called in a pserialize read section with no locks held.
* => LSIDL lwps are not returned because they are only partially
* constructed while occupying the slot.
* => Callers need to be careful about lwp::l_stat of the returned
* lwp.
* => If an LWP is found, it's returned locked.
*/
struct lwp *
proc_find_lwp_unlocked(proc_t *p, pid_t pid)
{
struct pid_table *pt;
unsigned pt_mask;
struct lwp *l = NULL;
uintptr_t slot;
KASSERT(pserialize_in_read_section());
/*
* Look in the pid_table. This is done unlocked inside a
* pserialize read section covering pid_table's memory
* allocation only, so take care to read things in the correct
* order:
*
* 1. First read the table mask -- this only ever increases, in
* expand_pid_table, so a stale value is safely
* conservative.
*
* 2. Next read the pid table -- this is always set _before_
* the mask increases, so if we see a new table and stale
* mask, the mask is still valid for the table.
*/
pt_mask = atomic_load_acquire(&pid_tbl_mask);
pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
slot = atomic_load_consume(&pt->pt_slot);
if (__predict_false(!PT_IS_LWP(slot))) {
return NULL;
}
/*
* Lock the LWP we found to get it stable. If it's embryonic or
* reaped (LSIDL) then none of the other fields can safely be
* checked.
*/
l = PT_GET_LWP(slot);
lwp_lock(l);
if (__predict_false(l->l_stat == LSIDL)) {
lwp_unlock(l);
return NULL;
}
/*
* l_proc and l_lid are now known stable because the LWP is not
* LSIDL, so check those fields too to make sure we found the
* right thing.
*/
if (__predict_false(l->l_proc != p || l->l_lid != pid)) {
lwp_unlock(l);
return NULL;
}
/*
* proc_find_lwp_acquire_proc: locate an lwp and acquire a lock
* on its containing proc.
*
* => Similar to proc_find_lwp(), but does not require you to have
* the proc a priori.
* => Also returns proc * to caller, with p::p_lock held.
* => Same caveats apply.
*/
struct lwp *
proc_find_lwp_acquire_proc(pid_t pid, struct proc **pp)
{
struct pid_table *pt;
struct proc *p = NULL;
struct lwp *l = NULL;
uintptr_t slot;
/*
* proc_find_raw_pid_table_locked: locate a process by the ID.
*
* => Must be called with proc_lock held.
*/
static proc_t *
proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid)
{
struct pid_table *pt;
proc_t *p = NULL;
uintptr_t slot;
/* No - used by DDB. KASSERT(mutex_owned(&proc_lock)); */
pt = &pid_table[pid & pid_tbl_mask];
slot = pt->pt_slot;
if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
/*
* When looking up processes, require a direct match
* on the PID assigned to the proc, not just one of
* its LWPs.
*
* N.B. We require lwp::l_proc of LSIDL LWPs to be
* valid here.
*/
p = PT_GET_LWP(slot)->l_proc;
if (__predict_false(p->p_pid != pid && !any_lwpid))
p = NULL;
} else if (PT_IS_PROC(slot) && pt->pt_pid == pid) {
p = PT_GET_PROC(slot);
}
return p;
}
p = proc_find_raw_pid_table_locked(pid, any_lwpid);
if (__predict_false(p == NULL)) {
return NULL;
}
/*
* Only allow live processes to be found by PID.
* XXX: p_stat might change, since proc unlocked.
*/
if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) {
return p;
}
return NULL;
}
/*
* pgrp_find: locate a process group by the ID.
*
* => Must be called with proc_lock held.
*/
struct pgrp *
pgrp_find(pid_t pgid)
{
struct pgrp *pg;
KASSERT(mutex_owned(&proc_lock));
pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
/*
* Cannot look up a process group that only exists because the
* session has not died yet (traditional).
*/
if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
return NULL;
}
return pg;
}
/* XXX For now. The practical limit is much lower anyway. */
KASSERT(new_pt_mask <= FUTEX_TID_MASK);
mutex_enter(&proc_lock);
if (pt_size != pid_tbl_mask + 1) {
/* Another process beat us to it... */
mutex_exit(&proc_lock);
kmem_free(new_pt, tsz);
goto out;
}
/*
* Copy entries from old table into new one.
* If 'pid' is 'odd' we need to place in the upper half,
* even pid's to the lower half.
* Free items stay in the low half so we don't have to
* fixup the reference to them.
* We stuff free items on the front of the freelist
* because we can't write to unmodified entries.
* Processing the table backwards maintains a semblance
* of issuing pid numbers that increase with time.
*/
i = pt_size - 1;
n_pt = new_pt + i;
for (; ; i--, n_pt--) {
slot = pid_table[i].pt_slot;
pgrp = pid_table[i].pt_pgrp;
if (!PT_VALID(slot)) {
/* Up 'use count' so that link is valid */
pid = (PT_NEXT(slot) + pt_size) & ~pt_size;
rpid = 0;
slot = PT_SET_FREE(pid);
if (pgrp)
pid = pgrp->pg_id;
} else {
pid = pid_table[i].pt_pid;
rpid = pid;
}
/* Save entry in appropriate half of table */
n_pt[pid & pt_size].pt_slot = slot;
n_pt[pid & pt_size].pt_pgrp = pgrp;
n_pt[pid & pt_size].pt_pid = rpid;
/* Put other piece on start of free list */
pid = (pid ^ pt_size) & ~pid_tbl_mask;
n_pt[pid & pt_size].pt_slot =
PT_SET_FREE((pid & ~pt_size) | next_free_pt);
n_pt[pid & pt_size].pt_pgrp = 0;
n_pt[pid & pt_size].pt_pid = 0;
next_free_pt = i | (pid & pt_size);
if (i == 0)
break;
}
/* Save old table size and switch tables */
tsz = pt_size * sizeof(struct pid_table);
n_pt = pid_table;
atomic_store_release(&pid_table, new_pt);
KASSERT(new_pt_mask >= pid_tbl_mask);
atomic_store_release(&pid_tbl_mask, new_pt_mask);
/*
* pid_max starts as PID_MAX (= 30000), once we have 16384
* allocated pids we need it to be larger!
*/
if (pid_tbl_mask > PID_MAX) {
pid_max = pid_tbl_mask * 2 + 1;
pid_alloc_lim |= pid_alloc_lim << 1;
} else
pid_alloc_lim <<= 1; /* doubles number of free slots... */
mutex_exit(&proc_lock);
/*
* Make sure that unlocked access to the old pid_table is complete
* and then free it.
*/
pserialize_perform(proc_psz);
kmem_free(n_pt, tsz);
out: /* Return with proc_lock held again. */
mutex_enter(&proc_lock);
}
struct proc *
proc_alloc(void)
{
struct proc *p;
p = pool_cache_get(proc_cache, PR_WAITOK);
p->p_stat = SIDL; /* protect against others */
proc_initspecific(p);
kdtrace_proc_ctor(NULL, p);
/*
* Allocate a placeholder in the pid_table. When we create the
* first LWP for this process, it will take ownership of the
* slot.
*/
if (__predict_false(proc_alloc_pid(p) == -1)) {
/* Allocating the PID failed; unwind. */
proc_finispecific(p);
proc_free_mem(p);
p = NULL;
}
return p;
}
/*
* proc_alloc_pid_slot: allocate PID and record the occupant so that
* proc_find_raw() can find it by the PID.
*/
static pid_t __noinline
proc_alloc_pid_slot(struct proc *p, uintptr_t slot)
{
struct pid_table *pt;
pid_t pid;
int nxt;
KASSERT(mutex_owned(&proc_lock));
for (;;expand_pid_table()) {
if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) {
/* ensure pids cycle through 2000+ values */
continue;
}
/*
* The first user process *must* be given PID 1.
* it has already been reserved for us. This
* will be coming in from the proc_alloc() call
* above, and the entry will be usurped later when
* the first user LWP is created.
* XXX this is slightly gross.
*/
if (__predict_false(PT_RESERVED(pid_table[1].pt_slot) &&
p != &proc0)) {
KASSERT(PT_IS_PROC(slot));
pt = &pid_table[1];
pt->pt_slot = slot;
return 1;
}
pt = &pid_table[next_free_pt];
#ifdef DIAGNOSTIC
if (__predict_false(PT_VALID(pt->pt_slot) || pt->pt_pgrp))
panic("proc_alloc: slot busy");
#endif
nxt = PT_NEXT(pt->pt_slot);
if (nxt & pid_tbl_mask)
break;
/* Table full - expand (NB last entry not used....) */
}
/*
* For unlocked lookup in proc_find_lwp(), make sure l->l_proc
* is globally visible before the LWP becomes visible via the
* pid_table.
*/
#ifndef __HAVE_ATOMIC_AS_MEMBAR
membar_producer();
#endif
/*
* If the slot for p->p_pid currently points to the proc,
* then we should usurp this ID for the LWP. This happens
* at least once per process (for the first LWP), and can
* happen again if the first LWP for a process exits and
* before the process creates another.
*/
mutex_enter(&proc_lock);
pid = p->p_pid;
pt = &pid_table[pid & pid_tbl_mask];
KASSERT(pt->pt_pid == pid);
if (PT_IS_PROC(pt->pt_slot)) {
KASSERT(PT_GET_PROC(pt->pt_slot) == p);
l->l_lid = pid;
pt->pt_slot = PT_SET_LWP(l);
} else {
/* Need to allocate a new slot. */
pid = proc_alloc_pid_slot(p, PT_SET_LWP(l));
if (pid != -1)
l->l_lid = pid;
}
mutex_exit(&proc_lock);
/*
* Free a process id used by an LWP. If this was the process's
* first LWP, we convert the slot to point to the process; the
* entry will get cleaned up later when the process finishes exiting.
*
* If not, then it's the same as proc_free_pid().
*/
void
proc_free_lwpid(struct proc *p, pid_t pid)
{
KASSERT(mutex_owned(&proc_lock));
if (__predict_true(p->p_pid == pid)) {
struct pid_table *pt;
/*
* proc_enterpgrp: move p to a new or existing process group (and session).
*
* If we are creating a new pgrp, the pgid should equal
* the calling process' pid.
* If is only valid to enter a process group that is in the session
* of the process.
* Also mksess should only be set if we are creating a process group
*
* Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return.
*/
int
proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
{
struct pgrp *new_pgrp, *pgrp;
struct session *sess;
struct proc *p;
int rval;
pid_t pg_id = NO_PGID;
/* Allocate data areas we might need before doing any validity checks */
sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
mutex_enter(&proc_lock);
/* Check pgrp exists or can be created */
pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
if (pgrp != NULL && pgrp->pg_id != pgid)
goto eperm;
/* Can only set another process under restricted circumstances. */
if (pid != curp->p_pid) {
/* Must exist and be one of our children... */
p = proc_find_internal(pid, false);
if (p == NULL || !p_inferior(p, curp)) {
rval = SET_ERROR(ESRCH);
goto done;
}
/* ... in the same session... */
if (sess != NULL || p->p_session != curp->p_session)
goto eperm;
/* ... existing pgid must be in same session ... */
if (pgrp != NULL && pgrp->pg_session != p->p_session)
goto eperm;
/* ... and not done an exec. */
if (p->p_flag & PK_EXEC) {
rval = SET_ERROR(EACCES);
goto done;
}
} else {
/* ... setsid() cannot re-enter a pgrp */
if (mksess && (curp->p_pgid == curp->p_pid ||
pgrp_find(curp->p_pid)))
goto eperm;
p = curp;
}
/* Changing the process group/session of a session
leader is definitely off limits. */
if (SESS_LEADER(p)) {
if (sess == NULL && p->p_pgrp == pgrp) {
/* unless it's a definite noop */
rval = 0;
goto done;
}
goto eperm;
}
/* Can only create a process group with id of process */
if (pgrp == NULL && pgid != pid)
goto eperm;
/* Can only create a session if creating pgrp */
if (sess != NULL && pgrp != NULL)
goto eperm;
/* Check we allocated memory for a pgrp... */
if (pgrp == NULL && new_pgrp == NULL)
goto eperm;
/* Don't attach to 'zombie' pgrp */
if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
goto eperm;
/* Expect to succeed now */
rval = 0;
if (pgrp == p->p_pgrp)
/* nothing to do */
goto done;
pgrp->pg_id = pgid;
LIST_INIT(&pgrp->pg_members);
#ifdef DIAGNOSTIC
if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
panic("enterpgrp: pgrp table slot in use");
if (__predict_false(mksess && p != curp))
panic("enterpgrp: mksession and p != curproc");
#endif
pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
pgrp->pg_jobc = 0;
}
/*
* Adjust eligibility of affected pgrps to participate in job control.
* Increment eligibility counts before decrementing, otherwise we
* could reach 0 spuriously during the first call.
*/
fixjobc(p, pgrp, 1);
fixjobc(p, p->p_pgrp, 0);
/* Interlock with ttread(). */
mutex_spin_enter(&tty_lock);
/* Move process to requested group. */
LIST_REMOVE(p, p_pglist);
if (LIST_EMPTY(&p->p_pgrp->pg_members))
/* defer delete until we've dumped the lock */
pg_id = p->p_pgrp->pg_id;
p->p_pgrp = pgrp;
LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
/* Done with the swap; we can release the tty mutex. */
mutex_spin_exit(&tty_lock);
goto done;
/*
* proc_leavepgrp: remove a process from its process group.
* => must be called with the proc_lock held, which will be released;
*/
void
proc_leavepgrp(struct proc *p)
{
struct pgrp *pgrp;
/*
* pg_remove: remove a process group from the table.
* => must be called with the proc_lock held;
* => returns process group to free;
*/
static struct pgrp *
pg_remove(pid_t pg_id)
{
struct pgrp *pgrp;
struct pid_table *pt;
/*
* pg_delete: delete and free a process group.
* => must be called with the proc_lock held, which will be released.
*/
static void
pg_delete(pid_t pg_id)
{
struct pgrp *pg;
struct tty *ttyp;
struct session *ss;
/* Remove reference (if any) from tty to this process group */
mutex_spin_enter(&tty_lock);
ttyp = ss->s_ttyp;
if (ttyp != NULL && ttyp->t_pgrp == pg) {
ttyp->t_pgrp = NULL;
KASSERT(ttyp->t_session == ss);
}
mutex_spin_exit(&tty_lock);
/*
* The leading process group in a session is freed by proc_sessrele(),
* if last reference. It will also release the locks.
*/
pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
proc_sessrele(ss);
if (pg != NULL) {
/* Free it, if was not done above. */
kmem_free(pg, sizeof(struct pgrp));
}
}
/*
* Adjust pgrp jobc counters when specified process changes process group.
* We count the number of processes in each process group that "qualify"
* the group for terminal job control (those with a parent in a different
* process group of the same session). If that count reaches zero, the
* process group becomes orphaned. Check both the specified process'
* process group and that of its children.
* entering == 0 => p is leaving specified group.
* entering == 1 => p is entering specified group.
*
* Call with proc_lock held.
*/
void
fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
{
struct pgrp *hispgrp;
struct session *mysession = pgrp->pg_session;
struct proc *child;
KASSERT(mutex_owned(&proc_lock));
/*
* Check p's parent to see whether p qualifies its own process
* group; if so, adjust count for p's process group.
*/
hispgrp = p->p_pptr->p_pgrp;
if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
if (entering) {
pgrp->pg_jobc++;
p->p_lflag &= ~PL_ORPHANPG;
} else {
/* KASSERT(pgrp->pg_jobc > 0); */
if (--pgrp->pg_jobc == 0)
orphanpg(pgrp);
}
}
/*
* Check this process' children to see whether they qualify
* their process groups; if so, adjust counts for children's
* process groups.
*/
LIST_FOREACH(child, &p->p_children, p_sibling) {
hispgrp = child->p_pgrp;
if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
!P_ZOMBIE(child)) {
if (entering) {
child->p_lflag &= ~PL_ORPHANPG;
hispgrp->pg_jobc++;
} else {
KASSERT(hispgrp->pg_jobc > 0);
if (--hispgrp->pg_jobc == 0)
orphanpg(hispgrp);
}
}
}
}
/*
* A process group has become orphaned;
* if there are any stopped processes in the group,
* hang-up all process in that group.
*
* Call with proc_lock held.
*/
static void
orphanpg(struct pgrp *pg)
{
struct proc *p;
/*
* fill all the stack with magic number
* so that later modification on it can be detected.
*/
ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
for (; ip < end; ip++) {
*ip = KSTACK_MAGIC;
}
}
if (kstackleftmin > stackleft) {
kstackleftmin = stackleft;
if (stackleft < kstackleftthres)
printf("warning: kernel stack left %d bytes"
"(pid %u:lid %u)\n", stackleft,
(u_int)l->l_proc->p_pid, (u_int)l->l_lid);
}
if (stackleft <= 0) {
panic("magic on the top of kernel stack changed for "
"pid %u, lid %u: maybe kernel stack overflow",
(u_int)l->l_proc->p_pid, (u_int)l->l_lid);
}
}
#endif /* KSTACK_CHECK_MAGIC */
int
proclist_foreach_call(struct proclist *list,
int (*callback)(struct proc *, void *arg), void *arg)
{
struct proc marker;
struct proc *p;
int ret = 0;
marker.p_flag = PK_MARKER;
mutex_enter(&proc_lock);
for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
if (p->p_flag & PK_MARKER) {
p = LIST_NEXT(p, p_list);
continue;
}
LIST_INSERT_AFTER(p, &marker, p_list);
ret = (*callback)(p, arg);
KASSERT(mutex_owned(&proc_lock));
p = LIST_NEXT(&marker, p_list);
LIST_REMOVE(&marker, p_list);
}
mutex_exit(&proc_lock);
return ret;
}
int
proc_vmspace_getref(struct proc *p, struct vmspace **vm)
{
/*
* Acquire a write lock on the process credential.
*/
void
proc_crmod_enter(void)
{
struct lwp *l = curlwp;
struct proc *p = l->l_proc;
kauth_cred_t oc;
/* Reset what needs to be reset in plimit. */
if (p->p_limit->pl_corename != defcorename) {
lim_setcorename(p, defcorename, 0);
}
mutex_enter(p->p_lock);
/* Ensure the LWP cached credentials are up to date. */
if ((oc = l->l_cred) != p->p_cred) {
l->l_cred = kauth_cred_hold(p->p_cred);
kauth_cred_free(oc);
}
}
/*
* Set in a new process credential, and drop the write lock. The credential
* must have a reference already. Optionally, free a no-longer required
* credential.
*/
void
proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
{
struct lwp *l = curlwp, *l2;
struct proc *p = l->l_proc;
kauth_cred_t oc;
KASSERT(mutex_owned(p->p_lock));
/* Is there a new credential to set in? */
if (scred != NULL) {
p->p_cred = scred;
LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
if (l2 != l) {
lwp_lock(l2);
l2->l_flag |= LW_CACHECRED;
lwp_need_userret(l2);
lwp_unlock(l2);
}
}
/* Ensure the LWP cached credentials are up to date. */
if ((oc = l->l_cred) != scred) {
l->l_cred = kauth_cred_hold(scred);
}
} else
oc = NULL; /* XXXgcc */
if (sugid) {
/*
* Mark process as having changed credentials, stops
* tracing etc.
*/
p->p_flag |= PK_SUGID;
}
mutex_exit(p->p_lock);
/* If there is a credential to be released, free it now. */
if (fcred != NULL) {
KASSERT(scred != NULL);
kauth_cred_free(fcred);
if (oc != scred)
kauth_cred_free(oc);
}
}
/*
* proc_specific_key_create --
* Create a key for subsystem proc-specific data.
*/
int
proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
{
int
proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
{
if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) ||
kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
/*
* suid proc of ours or proc not ours
*/
return SET_ERROR(EPERM);
} else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
/*
* sgid proc has sgid back to us temporarily
*/
return SET_ERROR(EPERM);
} else {
/*
* our rgid must be in target's group list (ie,
* sub-processes started by a sgid process)
*/
int ismember = 0;
mutex_enter(&proc_lock);
/*
* Start with zombies to prevent reporting processes twice, in case they
* are dying and being moved from the list of alive processes to zombies.
*/
mmmbrains = true;
for (p = LIST_FIRST(&zombproc);; p = next) {
if (p == NULL) {
if (mmmbrains) {
p = LIST_FIRST(&allproc);
mmmbrains = false;
}
if (p == NULL)
break;
}
next = LIST_NEXT(p, p_list);
if ((p->p_flag & PK_MARKER) != 0)
continue;
/*
* Hande all the operations in one switch on the cost of
* algorithm complexity is on purpose. The win splitting this
* function into several similar copies makes maintenance
* burden, code grow and boost is negligible in practical
* systems.
*/
switch (op) {
case KERN_PROC_PID:
match = (p->p_pid == (pid_t)arg);
break;
case KERN_PROC_PGRP:
match = (p->p_pgrp->pg_id == (pid_t)arg);
break;
case KERN_PROC_SESSION:
match = (p->p_session->s_sid == (pid_t)arg);
break;
case KERN_PROC_TTY:
match = true;
if (arg == (int) KERN_PROC_TTY_REVOKE) {
if ((p->p_lflag & PL_CONTROLT) == 0 ||
p->p_session->s_ttyp == NULL ||
p->p_session->s_ttyvp != NULL) {
match = false;
}
} else if ((p->p_lflag & PL_CONTROLT) == 0 ||
p->p_session->s_ttyp == NULL) {
if ((dev_t)arg != KERN_PROC_TTY_NODEV) {
match = false;
}
} else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) {
match = false;
}
break;
case KERN_PROC_UID:
match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg);
break;
case KERN_PROC_RUID:
match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg);
break;
case KERN_PROC_GID:
match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg);
break;
case KERN_PROC_RGID:
match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg);
break;
case KERN_PROC_ALL:
match = true;
/* allow everything */
break;
/*
* Grab a hold on the process.
*/
if (mmmbrains) {
zombie = true;
} else {
zombie = !rw_tryenter(&p->p_reflock, RW_READER);
}
if (zombie) {
LIST_INSERT_AFTER(p, marker, p_list);
}
if (buflen >= elem_size &&
(type == KERN_PROC || elem_count > 0)) {
ruspace(p); /* Update process vm resource use */
/*
* Zombies don't have a stack, so we can't read their psstrings.
* System processes also don't have a user stack.
*/
if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) {
error = SET_ERROR(EINVAL);
mutex_exit(p->p_lock);
goto out_locked;
}
int
copy_procargs(struct proc *p, int oid, size_t *limit,
int (*cb)(void *, const void *, size_t, size_t), void *cookie)
{
struct ps_strings pss;
size_t len, i, loaded, entry_len;
struct uio auio;
struct iovec aiov;
int error, argvlen;
char *arg;
char **argv;
vaddr_t user_argv;
struct vmspace *vmspace;
/*
* Allocate a temporary buffer to hold the argument vector and
* the arguments themselve.
*/
arg = kmem_alloc(PAGE_SIZE, KM_SLEEP);
argv = kmem_alloc(PAGE_SIZE, KM_SLEEP);
/*
* Lock the process down in memory.
*/
vmspace = p->p_vmspace;
uvmspace_addref(vmspace);
/*
* Read in the ps_strings structure.
*/
if ((error = copyin_psstrings(p, &pss)) != 0)
goto done;
/*
* Now read the address of the argument vector.
*/
switch (oid) {
case KERN_PROC_ARGV:
user_argv = (uintptr_t)pss.ps_argvstr;
argvlen = pss.ps_nargvstr;
break;
case KERN_PROC_ENV:
user_argv = (uintptr_t)pss.ps_envstr;
argvlen = pss.ps_nenvstr;
break;
default:
error = SET_ERROR(EINVAL);
goto done;
}
/*
* Now copy each string.
*/
len = 0; /* bytes written to user buffer */
loaded = 0; /* bytes from argv already processed */
i = 0; /* To make compiler happy */
entry_len = PROC_PTRSZ(p);
for (; argvlen; --argvlen) {
int finished = 0;
vaddr_t base;
size_t xlen;
int j;
if (loaded == 0) {
size_t rem = entry_len * argvlen;
loaded = MIN(rem, PAGE_SIZE);
error = copyin_vmspace(vmspace,
(const void *)user_argv, argv, loaded);
if (error)
break;
user_argv += loaded;
i = 0;
}
/*
* The program has messed around with its arguments,
* possibly deleting some, and replacing them with
* NULL's. Treat this as the last argument and not
* a failure.
*/
if (base == 0)
break;
while (!finished) {
xlen = PAGE_SIZE - (base & PAGE_MASK);