/* $NetBSD: kern_resource.c,v 1.195 2023/10/04 20:28:06 ad Exp $ */
/*-
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)kern_resource.c 8.8 (Berkeley) 2/14/95
*/
int
dosetrlimit(struct lwp *l, struct proc *p, int which, struct rlimit *limp)
{
struct rlimit *alimp;
int error;
if ((u_int)which >= RLIM_NLIMITS)
return EINVAL;
if (limp->rlim_cur > limp->rlim_max) {
/*
* This is programming error. According to SUSv2, we should
* return error in this case.
*/
return EINVAL;
}
alimp = &p->p_rlimit[which];
/* if we don't change the value, no need to limcopy() */
if (limp->rlim_cur == alimp->rlim_cur &&
limp->rlim_max == alimp->rlim_max)
return 0;
lim_privatise(p);
/* p->p_limit is now unchangeable */
alimp = &p->p_rlimit[which];
switch (which) {
case RLIMIT_DATA:
if (limp->rlim_cur > maxdmap)
limp->rlim_cur = maxdmap;
if (limp->rlim_max > maxdmap)
limp->rlim_max = maxdmap;
break;
case RLIMIT_STACK:
if (limp->rlim_cur > maxsmap)
limp->rlim_cur = maxsmap;
if (limp->rlim_max > maxsmap)
limp->rlim_max = maxsmap;
/*
* Return EINVAL if the new stack size limit is lower than
* current usage. Otherwise, the process would get SIGSEGV the
* moment it would try to access anything on its current stack.
* This conforms to SUSv2.
*/
if (btoc(limp->rlim_cur) < p->p_vmspace->vm_ssize ||
btoc(limp->rlim_max) < p->p_vmspace->vm_ssize) {
return EINVAL;
}
/*
* Stack is allocated to the max at exec time with
* only "rlim_cur" bytes accessible (In other words,
* allocates stack dividing two contiguous regions at
* "rlim_cur" bytes boundary).
*
* Since allocation is done in terms of page, roundup
* "rlim_cur" (otherwise, contiguous regions
* overlap). If stack limit is going up make more
* accessible, if going down make inaccessible.
*/
limp->rlim_max = round_page(limp->rlim_max);
limp->rlim_cur = round_page(limp->rlim_cur);
if (limp->rlim_cur != alimp->rlim_cur) {
vaddr_t addr;
vsize_t size;
vm_prot_t prot;
char *base, *tmp;
lwp_lock(l);
bintime_add(tm, &l->l_rtime);
if ((l->l_pflag & LP_RUNNING) != 0 &&
(l->l_pflag & (LP_INTR | LP_TIMEINTR)) != LP_INTR) {
struct bintime diff;
/*
* Adjust for the current time slice. This is
* actually fairly important since the error
* here is on the order of a time quantum,
* which is much greater than the sampling
* error.
*/
binuptime(&diff);
membar_consumer(); /* for softint_dispatch() */
bintime_sub(&diff, &l->l_stime);
bintime_add(tm, &diff);
}
lwp_unlock(l);
}
/*
* Transform the running time and tick information in proc p into user,
* system, and interrupt time usage.
*
* Should be called with p->p_lock held unless called from exit1().
*/
void
calcru(struct proc *p, struct timeval *up, struct timeval *sp,
struct timeval *ip, struct timeval *rp)
{
uint64_t u, st, ut, it, tot, dt;
struct lwp *l;
struct bintime tm;
struct timeval tv;
tot = st + ut + it;
bintime2timeval(&tm, &tv);
u = (uint64_t)tv.tv_sec * 1000000ul + tv.tv_usec;
if (tot == 0) {
/* No ticks, so can't use to share time out, split 50-50 */
st = ut = u / 2;
} else {
st = (u * st) / tot;
ut = (u * ut) / tot;
}
/*
* Try to avoid lying to the users (too much)
*
* Of course, user/sys time are based on sampling (ie: statistics)
* so that would be impossible, but convincing the mark
* that we have used less ?time this call than we had
* last time, is beyond reasonable... (the con fails!)
*
* Note that since actual used time cannot decrease, either
* utime or stime (or both) must be greater now than last time
* (or both the same) - if one seems to have decreased, hold
* it constant and steal the necessary bump from the other
* which must have increased.
*/
if (p->p_xutime > ut) {
dt = p->p_xutime - ut;
st -= uimin(dt, st);
ut = p->p_xutime;
} else if (p->p_xstime > st) {
dt = p->p_xstime - st;
ut -= uimin(dt, ut);
st = p->p_xstime;
}
if (sp != NULL) {
p->p_xstime = st;
sp->tv_sec = st / 1000000;
sp->tv_usec = st % 1000000;
}
if (up != NULL) {
p->p_xutime = ut;
up->tv_sec = ut / 1000000;
up->tv_usec = ut % 1000000;
}
if (ip != NULL) {
if (it != 0) /* it != 0 --> tot != 0 */
it = (u * it) / tot;
ip->tv_sec = it / 1000000;
ip->tv_usec = it % 1000000;
}
if (rp != NULL) {
*rp = tv;
}
}
/*
* lim_copy: make a copy of the plimit structure.
*
* We use copy-on-write after fork, and copy when a limit is changed.
*/
struct plimit *
lim_copy(struct plimit *lim)
{
struct plimit *newlim;
char *corename;
size_t alen, len;
/*
* lim_privatise: give a process its own private plimit structure.
*/
void
lim_privatise(proc_t *p)
{
struct plimit *lim = p->p_limit, *newlim;
if (lim->pl_writeable) {
return;
}
newlim = lim_copy(lim);
mutex_enter(p->p_lock);
if (p->p_limit->pl_writeable) {
/* Other thread won the race. */
mutex_exit(p->p_lock);
lim_free(newlim);
return;
}
/*
* Since p->p_limit can be accessed without locked held,
* old limit structure must not be deleted yet.
*/
newlim->pl_sv_limit = p->p_limit;
newlim->pl_writeable = true;
p->p_limit = newlim;
mutex_exit(p->p_lock);
}
len = (char *)&nps->pstat_endzero - (char *)&nps->pstat_startzero;
memset(&nps->pstat_startzero, 0, len);
len = (char *)&nps->pstat_endcopy - (char *)&nps->pstat_startcopy;
memcpy(&nps->pstat_startcopy, &ps->pstat_startcopy, len);
return nps;
}
void
pstatsfree(struct pstats *ps)
{
kmem_free(ps, sizeof(*ps));
}
/*
* sysctl_proc_findproc: a routine for sysctl proc subtree helpers that
* need to pick a valid process by PID.
*
* => Hold a reference on the process, on success.
*/
static int
sysctl_proc_findproc(lwp_t *l, pid_t pid, proc_t **p2)
{
proc_t *p;
int error;
if (pid == PROC_CURPROC) {
p = l->l_proc;
} else {
mutex_enter(&proc_lock);
p = proc_find(pid);
if (p == NULL) {
mutex_exit(&proc_lock);
return ESRCH;
}
}
error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY;
if (pid != PROC_CURPROC) {
mutex_exit(&proc_lock);
}
*p2 = p;
return error;
}
/*
* sysctl_proc_paxflags: helper routine to get process's paxctl flags
*/
static int
sysctl_proc_paxflags(SYSCTLFN_ARGS)
{
struct proc *p;
struct sysctlnode node;
int paxflags;
int error;
/* First, validate the request. */
if (namelen != 0 || name[-1] != PROC_PID_PAXFLAGS)
return EINVAL;
/* Find the process. Hold a reference (p_reflock), if found. */
error = sysctl_proc_findproc(l, (pid_t)name[-2], &p);
if (error)
return error;
/* If attempting to write new value, it's an error */
if (error == 0 && newp != NULL)
error = EACCES;
rw_exit(&p->p_reflock);
return error;
}
/*
* sysctl_proc_corename: helper routine to get or set the core file name
* for a process specified by PID.
*/
static int
sysctl_proc_corename(SYSCTLFN_ARGS)
{
struct proc *p;
struct plimit *lim;
char *cnbuf, *cname;
struct sysctlnode node;
size_t len;
int error;
/* First, validate the request. */
if (namelen != 0 || name[-1] != PROC_PID_CORENAME)
return EINVAL;
/* Find the process. Hold a reference (p_reflock), if found. */
error = sysctl_proc_findproc(l, (pid_t)name[-2], &p);
if (error)
return error;
/* Return if error, or if we are only retrieving the limits. */
if (error || newp == NULL) {
goto out;
}
error = dosetrlimit(l, p, limitno, &alim);
out:
rw_exit(&p->p_reflock);
return error;
}