/*
* Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* System calls relating to the scheduler.
*
* Lock order:
*
* cpu_lock ->
* proc_lock ->
* proc_t::p_lock ->
* lwp_t::lwp_lock
*
* TODO:
* - Handle pthread_setschedprio() as defined by POSIX;
*/
/*
* Convert user priority or the in-kernel priority or convert the current
* priority to the appropriate range according to the policy change.
*/
static pri_t
convert_pri(lwp_t *l, int policy, pri_t pri)
{
/* Convert user priority to the in-kernel */
if (pri != PRI_NONE) {
/* Only for real-time threads */
KASSERT(pri >= SCHED_PRI_MIN);
KASSERT(pri <= SCHED_PRI_MAX);
KASSERT(policy != SCHED_OTHER);
return PRI_USER_RT + pri;
}
/* Neither policy, nor priority change */
if (l->l_class == policy)
return l->l_priority;
/* Real-time -> time-sharing */
if (policy == SCHED_OTHER) {
KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
/*
* this is a bit arbitrary because the priority is dynamic
* for SCHED_OTHER threads and will likely be changed by
* the scheduler soon anyway.
*/
return l->l_priority - PRI_USER_RT;
}
int
do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
const struct sched_param *params)
{
struct proc *p;
struct lwp *t;
pri_t pri;
u_int lcnt;
int error;
error = 0;
pri = params->sched_priority;
/* If no parameters specified, just return (this should not happen) */
if (pri == PRI_NONE && policy == SCHED_NONE)
return 0;
/* Validate scheduling class */
if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
return EINVAL;
/* Validate priority */
if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
return EINVAL;
if (pid != 0) {
/* Find the process */
mutex_enter(&proc_lock);
p = proc_find(pid);
if (p == NULL) {
mutex_exit(&proc_lock);
return ESRCH;
}
mutex_enter(p->p_lock);
mutex_exit(&proc_lock);
/* Disallow modification of system processes */
if ((p->p_flag & PK_SYSTEM) != 0) {
mutex_exit(p->p_lock);
return EPERM;
}
} else {
/* Use the calling process */
p = curlwp->l_proc;
mutex_enter(p->p_lock);
}
/* Find the LWP(s) */
lcnt = 0;
LIST_FOREACH(t, &p->p_lwps, l_sibling) {
pri_t kpri;
int lpolicy;
/*
* do_sched_getparam:
*
* if lid=0, returns the parameter of the first LWP in the process.
*/
int
do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
struct sched_param *params)
{
struct sched_param lparams;
struct lwp *t;
int error, lpolicy;
if (pid < 0 || lid < 0)
return EINVAL;
t = lwp_find2(pid, lid); /* acquire p_lock */
if (t == NULL)
return ESRCH;
/*
* convert to the user-visible priority value.
* it's an inversion of convert_pri().
*
* the SCHED_OTHER case is a bit arbitrary given that
* - we don't allow setting the priority.
* - the priority is dynamic.
*/
switch (lpolicy) {
case SCHED_OTHER:
lparams.sched_priority -= PRI_USER;
break;
case SCHED_RR:
case SCHED_FIFO:
lparams.sched_priority -= PRI_USER_RT;
break;
}
/*
* Allocate the CPU set, and get it from userspace.
*/
static int
genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
{
kcpuset_t *kset;
int error;
/*
* Traverse _each_ CPU to:
* - Check that CPUs in the mask have no assigned processor set.
* - Check that at least one CPU from the mask is online.
* - Find the first target CPU to migrate.
*
* To avoid the race with CPU online/offline calls and processor sets,
* cpu_lock will be locked for the entire operation.
*/
ci = NULL;
alloff = false;
mutex_enter(&cpu_lock);
for (CPU_INFO_FOREACH(cii, ici)) {
struct schedstate_percpu *ispc;
if (!kcpuset_isset(kcset, cpu_index(ici))) {
continue;
}
ispc = &ici->ci_schedstate;
/* Check that CPU is not in the processor-set */
if (ispc->spc_psid != PS_NONE) {
error = EPERM;
goto out;
}
/* Skip offline CPUs */
if (ispc->spc_flags & SPCF_OFFLINE) {
alloff = true;
continue;
}
/* Target CPU to migrate */
if (ci == NULL) {
ci = ici;
}
}
if (ci == NULL) {
if (alloff) {
/* All CPUs in the set are offline */
error = EPERM;
goto out;
}
/* Empty set */
kcpuset_unuse(kcset, &kcpulst);
kcset = NULL;
}
if (SCARG(uap, pid) != 0) {
/* Find the process */
mutex_enter(&proc_lock);
p = proc_find(SCARG(uap, pid));
if (p == NULL) {
mutex_exit(&proc_lock);
error = ESRCH;
goto out;
}
mutex_enter(p->p_lock);
mutex_exit(&proc_lock);
/* Disallow modification of system processes. */
if ((p->p_flag & PK_SYSTEM) != 0) {
mutex_exit(p->p_lock);
error = EPERM;
goto out;
}
} else {
/* Use the calling process */
p = l->l_proc;
mutex_enter(p->p_lock);
}
/*
* Check the permission.
*/
error = kauth_authorize_process(l->l_cred,
KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
if (error != 0) {
mutex_exit(p->p_lock);
goto out;
}
/* Iterate through LWP(s). */
lcnt = 0;
lid = SCARG(uap, lid);
LIST_FOREACH(t, &p->p_lwps, l_sibling) {
if (lid && lid != t->l_lid) {
continue;
}
lwp_lock(t);
/* No affinity for zombie LWPs. */
if (t->l_stat == LSZOMB) {
lwp_unlock(t);
continue;
}
/* First, release existing affinity, if any. */
if (t->l_affinity) {
kcpuset_unuse(t->l_affinity, &kcpulst);
}
if (kcset) {
/*
* Hold a reference on affinity mask, assign mask to
* LWP and migrate it to another CPU (unlocks LWP).
*/
kcpuset_use(kcset);
t->l_affinity = kcset;
lwp_migrate(t, ci);
} else {
/* Old affinity mask is released, just clear. */
t->l_affinity = NULL;
lwp_unlock(t);
}
lcnt++;
}
mutex_exit(p->p_lock);
if (lcnt == 0) {
error = ESRCH;
}
out:
mutex_exit(&cpu_lock);
/*
* Drop the initial reference (LWPs, if any, have the ownership now),
* and destroy whatever is in the G/C list, if filled.
*/
if (kcset) {
kcpuset_unuse(kcset, &kcpulst);
}
if (kcpulst) {
kcpuset_destroy(kcpulst);
}
return error;
}
/*
* Priority protection for PTHREAD_PRIO_PROTECT. This is a weak
* analogue of priority inheritance: temp raise the priority
* of the caller when accessing a protected resource.
*/
int
sys__sched_protect(struct lwp *l,
const struct sys__sched_protect_args *uap, register_t *retval)
{
/* {
syscallarg(int) priority;
syscallarg(int *) opriority;
} */
int error;
pri_t pri;
pri = SCARG(uap, priority);
error = 0;
lwp_lock(l);
if (pri == -1) {
/* back out priority changes */
switch(l->l_protectdepth) {
case 0:
error = EINVAL;
break;
case 1:
l->l_protectdepth = 0;
l->l_protectprio = -1;
l->l_auxprio = -1;
break;
default:
l->l_protectdepth--;
break;
}
} else if (pri < 0) {
/* Just retrieve the current value, for debugging */
if (l->l_protectprio == -1)
error = ENOENT;
else
*retval = l->l_protectprio - PRI_USER_RT;
} else if (__predict_false(pri < SCHED_PRI_MIN ||
pri > SCHED_PRI_MAX || l->l_priority > pri + PRI_USER_RT)) {
/* must fail if existing priority is higher */
error = EPERM;
} else {
/* play along but make no changes if not a realtime LWP. */
l->l_protectdepth++;
pri += PRI_USER_RT;
if (__predict_true(l->l_class != SCHED_OTHER &&
pri > l->l_protectprio)) {
l->l_protectprio = pri;
l->l_auxprio = pri;
}
}
lwp_unlock(l);