/*-
* Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
* NASA Ames Research Center, and by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Implementation of SVID semaphores
*
* Author: Daniel Boulet
*
* This software is provided ``AS IS'' without any warranties of any kind.
*/
/*
* List of active undo structures, total number of semaphores,
* and total number of semop waiters.
*/
static struct sem_undo *semu_list __read_mostly;
static u_int semtot __cacheline_aligned;
static u_int sem_waiters __cacheline_aligned;
/* Macro to find a particular sem_undo vector */
#define SEMU(s, ix) ((struct sem_undo *)(((long)s) + ix * seminfo.semusz))
#ifdef SEM_DEBUG
#define SEM_PRINTF(a) printf a
#else
#define SEM_PRINTF(a)
#endif
void *hook; /* cookie from exithook_establish() */
/* Allocate the wired memory for our structures */
sz = ALIGN(newsemmni * sizeof(struct semid_ds)) +
ALIGN(newsemmns * sizeof(struct __sem)) +
ALIGN(newsemmni * sizeof(kcondvar_t)) +
ALIGN(newsemmnu * seminfo.semusz);
sz = round_page(sz);
v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
if (v == 0)
return ENOMEM;
mutex_enter(&semlock);
if (sem_realloc_state) {
mutex_exit(&semlock);
uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
return EBUSY;
}
sem_realloc_state = true;
if (sem_waiters) {
/*
* Mark reallocation state, wake-up all waiters,
* and wait while they will all exit.
*/
for (i = 0; i < seminfo.semmni; i++)
cv_broadcast(&semcv[i]);
while (sem_waiters)
cv_wait(&sem_realloc_cv, &semlock);
}
old_sema = sema;
/* Get the number of last slot */
lsemid = 0;
for (i = 0; i < seminfo.semmni; i++)
if (sema[i].sem_perm.mode & SEM_ALLOC)
lsemid = i;
/* Get the number of currently used undo structures */
nmnus = 0;
for (i = 0; i < seminfo.semmnu; i++) {
suptr = SEMU(semu, i);
if (suptr->un_proc == NULL)
continue;
nmnus++;
}
/* We cannot reallocate less memory than we use */
if (lsemid >= newsemmni || semtot > newsemmns || nmnus > newsemmnu) {
mutex_exit(&semlock);
uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
return EBUSY;
}
/* Initialize all semaphore identifiers and condvars */
for (i = 0; i < newsemmni; i++) {
new_sema[i]._sem_base = 0;
new_sema[i].sem_perm.mode = 0;
cv_init(&new_semcv[i], "semwait");
}
for (i = 0; i < newsemmnu; i++) {
nsuptr = SEMU(new_semu, i);
nsuptr->un_proc = NULL;
}
/*
* Copy all identifiers, semaphores and list of the
* undo structures to the new memory allocation.
*/
j = 0;
for (i = 0; i <= lsemid; i++) {
if ((sema[i].sem_perm.mode & SEM_ALLOC) == 0)
continue;
memcpy(&new_sema[i], &sema[i], sizeof(struct semid_ds));
new_sema[i]._sem_base = &new_sem[j];
memcpy(new_sema[i]._sem_base, sema[i]._sem_base,
(sizeof(struct __sem) * sema[i].sem_nsems));
j += sema[i].sem_nsems;
}
KASSERT(j == semtot);
/*
* Adjust a particular entry for a particular proc
*/
int
semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum,
int adjval)
{
struct sem_undo *suptr;
struct sem_undo_entry *sunptr;
int i;
KASSERT(mutex_owned(&semlock));
/*
* Look for and remember the sem_undo if the caller doesn't
* provide it
*/
suptr = *supptr;
if (suptr == NULL) {
for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
if (suptr->un_proc == p)
break;
if (suptr == NULL) {
suptr = semu_alloc(p);
if (suptr == NULL)
return (ENOSPC);
}
*supptr = suptr;
}
/*
* Look for the requested entry and adjust it (delete if
* adjval becomes 0).
*/
sunptr = &suptr->un_ent[0];
for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
if (sunptr->un_id != semid || sunptr->un_num != semnum)
continue;
sunptr->un_adjval += adjval;
if (sunptr->un_adjval == 0) {
suptr->un_cnt--;
if (i < suptr->un_cnt)
suptr->un_ent[i] =
suptr->un_ent[suptr->un_cnt];
}
return (0);
}
/* Didn't find the right entry - create it */
if (suptr->un_cnt == SEMUME)
return (EINVAL);
int
semctl1(struct lwp *l, int semid, int semnum, int cmd, void *v,
register_t *retval)
{
kauth_cred_t cred = l->l_cred;
union __semun *arg = v;
struct semid_ds *sembuf = v, *semaptr;
int i, error, ix;
SEM_PRINTF(("call to semctl(%d, %d, %d, %p)\n",
semid, semnum, cmd, v));
mutex_enter(&semlock);
ix = IPCID_TO_IX(semid);
if (ix < 0 || ix >= seminfo.semmni) {
mutex_exit(&semlock);
return (EINVAL);
}
restart:
mutex_enter(&semlock);
/* In case of reallocation, we will wait for completion */
while (__predict_false(sem_realloc_state))
cv_wait(&sem_realloc_cv, &semlock);
semid = IPCID_TO_IX(usemid); /* Convert back to zero origin */
if (semid < 0 || semid >= seminfo.semmni) {
error = EINVAL;
goto out;
}
if (timeout) {
error = ts2timo(CLOCK_MONOTONIC, TIMER_RELTIME, timeout,
&timo, NULL);
if (error)
return error;
}
if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) {
SEM_PRINTF(("error = %d from ipaccess\n", error));
goto out;
}
for (i = 0; i < nsops; i++)
if (sops[i].sem_num >= semaptr->sem_nsems) {
error = EFBIG;
goto out;
}
/*
* Loop trying to satisfy the vector of requests.
* If we reach a point where we must wait, any requests already
* performed are rolled back and we go to sleep until some other
* process wakes us up. At this point, we start all over again.
*
* This ensures that from the perspective of other tasks, a set
* of requests is atomic (never partially satisfied).
*/
do_undos = 0;
for (;;) {
do_wakeup = 0;
for (i = 0; i < nsops; i++) {
sopptr = &sops[i];
semptr = &semaptr->_sem_base[sopptr->sem_num];
if (sopptr->sem_op < 0) {
if ((int)(semptr->semval +
sopptr->sem_op) < 0) {
SEM_PRINTF(("semop: "
"can't do it now\n"));
break;
} else {
semptr->semval += sopptr->sem_op;
if (semptr->semval == 0 &&
semptr->semzcnt > 0)
do_wakeup = 1;
}
if (sopptr->sem_flg & SEM_UNDO)
do_undos = 1;
} else if (sopptr->sem_op == 0) {
if (semptr->semval > 0) {
SEM_PRINTF(("semop: not zero now\n"));
break;
}
} else {
if (semptr->semncnt > 0)
do_wakeup = 1;
semptr->semval += sopptr->sem_op;
if (sopptr->sem_flg & SEM_UNDO)
do_undos = 1;
}
}
/*
* Did we get through the entire vector?
*/
if (i >= nsops)
goto done;
/*
* No ... rollback anything that we've already done
*/
SEM_PRINTF(("semop: rollback 0 through %d\n", i - 1));
while (i-- > 0)
semaptr->_sem_base[sops[i].sem_num].semval -=
sops[i].sem_op;
/*
* If the request that we couldn't satisfy has the
* NOWAIT flag set then return with EAGAIN.
*/
if (sopptr->sem_flg & IPC_NOWAIT) {
error = EAGAIN;
goto out;
}
if (sopptr->sem_op == 0)
semptr->semzcnt++;
else
semptr->semncnt++;
sem_waiters++;
SEM_PRINTF(("semop: good night!\n"));
error = cv_timedwait_sig(&semcv[semid], &semlock, timo);
SEM_PRINTF(("semop: good morning (error=%d)!\n", error));
sem_waiters--;
/* Notify reallocator, if it is waiting */
cv_broadcast(&sem_realloc_cv);
/*
* Make sure that the semaphore still exists
*/
if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
semaptr->sem_perm._seq != seq) {
error = EIDRM;
goto out;
}
/*
* The semaphore is still alive. Readjust the count of
* waiting processes.
*/
semptr = &semaptr->_sem_base[sopptr->sem_num];
if (sopptr->sem_op == 0)
semptr->semzcnt--;
else
semptr->semncnt--;
/* In case of such state, restart the call */
if (sem_realloc_state) {
mutex_exit(&semlock);
goto restart;
}
/* Is it really morning, or was our sleep interrupted? */
if (error != 0) {
if (error == ERESTART)
error = EINTR; // Simplify to just EINTR
else if (error == EWOULDBLOCK)
error = EAGAIN; // Convert timeout to EAGAIN
goto out;
}
SEM_PRINTF(("semop: good morning!\n"));
}
done:
/*
* Process any SEM_UNDO requests.
*/
if (do_undos) {
for (i = 0; i < nsops; i++) {
/*
* We only need to deal with SEM_UNDO's for non-zero
* op's.
*/
int adjval;
if ((sops[i].sem_flg & SEM_UNDO) == 0)
continue;
adjval = sops[i].sem_op;
if (adjval == 0)
continue;
error = semundo_adjust(p, &suptr, semid,
sops[i].sem_num, -adjval);
if (error == 0)
continue;
/*
* Oh-Oh! We ran out of either sem_undo's or undo's.
* Rollback the adjustments to this point and then
* rollback the semaphore ups and down so we can return
* with an error with all structures restored. We
* rollback the undo's in the exact reverse order that
* we applied them. This guarantees that we won't run
* out of space as we roll things back out.
*/
while (i-- > 0) {
if ((sops[i].sem_flg & SEM_UNDO) == 0)
continue;
adjval = sops[i].sem_op;
if (adjval == 0)
continue;
if (semundo_adjust(p, &suptr, semid,
sops[i].sem_num, adjval) != 0)
panic("semop - can't undo undos");
}
for (i = 0; i < nsops; i++)
semaptr->_sem_base[sops[i].sem_num].semval -=
sops[i].sem_op;
SEM_PRINTF(("error = %d from semundo_adjust\n",
error));
goto out;
} /* loop through the sops */
} /* if (do_undos) */
/* We're definitely done - set the sempid's */
for (i = 0; i < nsops; i++) {
sopptr = &sops[i];
semptr = &semaptr->_sem_base[sopptr->sem_num];
semptr->sempid = p->p_pid;
}
/* Do a wakeup if any semaphore was up'd. */
if (do_wakeup) {
SEM_PRINTF(("semop: doing wakeup\n"));
cv_broadcast(&semcv[semid]);
SEM_PRINTF(("semop: back from wakeup\n"));
}
SEM_PRINTF(("semop: done\n"));
*retval = 0;
out:
mutex_exit(&semlock);
return error;
}
static int
do_semop(struct lwp *l, int usemid, struct sembuf *usops,
size_t nsops, struct timespec *utimeout, register_t *retval)
{
struct sembuf small_sops[SMALL_SOPS];
struct sembuf *sops;
struct timespec timeout;
int error;
/*
* Go through the undo structures for this process and apply the
* adjustments to semaphores.
*/
/*ARGSUSED*/
void
semexit(struct proc *p, void *v)
{
struct sem_undo *suptr;
struct sem_undo **supptr;
if ((p->p_flag & PK_SYSVSEM) == 0)
return;
mutex_enter(&semlock);
/*
* Go through the chain of undo vectors looking for one
* associated with this process.
*/
for (supptr = &semu_list; (suptr = *supptr) != NULL;
supptr = &suptr->un_next) {
if (suptr->un_proc == p)
break;
}
/*
* If there is no undo vector, skip to the end.
*/
if (suptr == NULL) {
mutex_exit(&semlock);
return;
}
/*
* We now have an undo vector for this process.
*/
SEM_PRINTF(("proc @%p has undo structure with %d entries\n", p,
suptr->un_cnt));
/*
* If there are any active undo elements then process them.
*/
if (suptr->un_cnt > 0) {
int ix;
for (ix = 0; ix < suptr->un_cnt; ix++) {
int semid = suptr->un_ent[ix].un_id;
int semnum = suptr->un_ent[ix].un_num;
int adjval = suptr->un_ent[ix].un_adjval;
struct semid_ds *semaptr;
semaptr = &sema[semid];
if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
if (semnum >= semaptr->sem_nsems)
panic("semexit - semnum out of range");