/*-
* Copyright (c) 1999, 2006, 2007 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
* NASA Ames Research Center, and by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Implementation of SVID messages
*
* Author: Daniel Boulet
*
* Copyright 1993 Daniel Boulet and RTMX Inc.
*
* This system call was implemented by Daniel Boulet under contract from RTMX.
*
* Redistribution and use in source forms, with and without modification,
* are permitted provided that this entire comment appears intact.
*
* Redistribution in binary form may occur without any restrictions.
* Obviously, it would be nice if you gave credit where credit is due
* but requiring it would be too onerous.
*
* This software is provided ``AS IS'' without any warranties of any kind.
*/
static u_int msg_waiters = 0; /* total number of msgrcv waiters */
static bool msg_realloc_state;
static kcondvar_t msg_realloc_cv;
static void msg_freehdr(struct __msg *);
extern int kern_has_sysvmsg;
SYSCTL_SETUP_PROTO(sysctl_ipc_msg_setup);
int
msginit(void)
{
int i, sz;
vaddr_t v;
/*
* msginfo.msgssz should be a power of two for efficiency reasons.
* It is also pretty silly if msginfo.msgssz is less than 8
* or greater than about 256 so ...
*/
i = 8;
while (i < 1024 && i != msginfo.msgssz)
i <<= 1;
if (i != msginfo.msgssz) {
printf("msginfo.msgssz = %d, not a small power of 2",
msginfo.msgssz);
return EINVAL;
}
for (i = 0; i < (msginfo.msgseg - 1); i++)
msgmaps[i].next = i + 1;
msgmaps[msginfo.msgseg - 1].next = -1;
free_msgmaps = 0;
nfree_msgmaps = msginfo.msgseg;
for (i = 0; i < (msginfo.msgtql - 1); i++) {
msghdrs[i].msg_type = 0;
msghdrs[i].msg_next = &msghdrs[i + 1];
}
i = msginfo.msgtql - 1;
msghdrs[i].msg_type = 0;
msghdrs[i].msg_next = NULL;
free_msghdrs = &msghdrs[0];
for (i = 0; i < msginfo.msgmni; i++) {
cv_init(&msqs[i].msq_cv, "msgwait");
/* Implies entry is available */
msqs[i].msq_u.msg_qbytes = 0;
/* Reset to a known value */
msqs[i].msq_u.msg_perm._seq = 0;
}
static int
msgrealloc(int newmsgmni, int newmsgseg)
{
struct msgmap *new_msgmaps;
struct __msg *new_msghdrs, *new_free_msghdrs;
char *old_msgpool, *new_msgpool;
kmsq_t *new_msqs;
vaddr_t v;
int i, sz, msqid, newmsgmax, new_nfree_msgmaps;
short new_free_msgmaps;
if (newmsgmni < 1 || newmsgseg < 1)
return EINVAL;
/* Allocate the wired memory for our structures */
newmsgmax = msginfo.msgssz * newmsgseg;
sz = ALIGN(newmsgmax) +
ALIGN(newmsgseg * sizeof(struct msgmap)) +
ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
ALIGN(newmsgmni * sizeof(kmsq_t));
sz = round_page(sz);
v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
if (v == 0)
return ENOMEM;
mutex_enter(&msgmutex);
if (msg_realloc_state) {
mutex_exit(&msgmutex);
uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
return EBUSY;
}
msg_realloc_state = true;
if (msg_waiters) {
/*
* Mark reallocation state, wake-up all waiters,
* and wait while they will all exit.
*/
for (i = 0; i < msginfo.msgmni; i++)
cv_broadcast(&msqs[i].msq_cv);
while (msg_waiters)
cv_wait(&msg_realloc_cv, &msgmutex);
}
old_msgpool = msgpool;
/* We cannot reallocate less memory than we use */
i = 0;
for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
struct msqid_ds *mptr;
kmsq_t *msq;
msq = &msqs[msqid];
mptr = &msq->msq_u;
if (mptr->msg_qbytes || (mptr->msg_perm.mode & MSG_LOCKED))
i = msqid;
}
if (i >= newmsgmni || (msginfo.msgseg - nfree_msgmaps) > newmsgseg) {
mutex_exit(&msgmutex);
uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
return EBUSY;
}
/*
* Go through the message headers, and copy each one
* by taking the new ones, and thus defragmenting.
*/
nmsghdr = pmsghdr = NULL;
msghdr = mptr->_msg_first;
while (msghdr) {
short nnext = 0, next;
u_short msgsz, segcnt;
/* Take an entry from the new list of free msghdrs */
nmsghdr = new_free_msghdrs;
KASSERT(nmsghdr != NULL);
new_free_msghdrs = nmsghdr->msg_next;
mutex_enter(&msgmutex);
/* In case of reallocation, we will wait for completion */
while (__predict_false(msg_realloc_state))
cv_wait(&msg_realloc_cv, &msgmutex);
if (msqid < 0 || msqid >= msginfo.msgmni) {
MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
msginfo.msgmni));
error = EINVAL;
goto unlock;
}
msq = &msqs[msqid];
msqptr = &msq->msq_u;
if (msqptr->msg_qbytes == 0) {
MSG_PRINTF(("no such message queue id\n"));
error = EINVAL;
goto unlock;
}
if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
MSG_PRINTF(("wrong sequence number\n"));
error = EINVAL;
goto unlock;
}
if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_W))) {
MSG_PRINTF(("requester doesn't have write access\n"));
goto unlock;
}
if (msqptr->msg_perm.mode & MSG_LOCKED) {
MSG_PRINTF(("msqid is locked\n"));
need_more_resources = 1;
}
if (msgsz + msqptr->_msg_cbytes > msqptr->msg_qbytes) {
MSG_PRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
need_more_resources = 1;
}
if (segs_needed > nfree_msgmaps) {
MSG_PRINTF(("segs_needed > nfree_msgmaps\n"));
need_more_resources = 1;
}
if (free_msghdrs == NULL) {
MSG_PRINTF(("no more msghdrs\n"));
need_more_resources = 1;
}
if (need_more_resources) {
int we_own_it;
if ((msgflg & IPC_NOWAIT) != 0) {
MSG_PRINTF(("need more resources but caller "
"doesn't want to wait\n"));
error = EAGAIN;
goto unlock;
}
if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
MSG_PRINTF(("we don't own the msqid_ds\n"));
we_own_it = 0;
} else {
/* Force later arrivals to wait for our
request */
MSG_PRINTF(("we own the msqid_ds\n"));
msqptr->msg_perm.mode |= MSG_LOCKED;
we_own_it = 1;
}
if (we_own_it)
msqptr->msg_perm.mode &= ~MSG_LOCKED;
/*
* In case of such state, notify reallocator and
* restart the call.
*/
if (msg_realloc_state) {
cv_broadcast(&msg_realloc_cv);
mutex_exit(&msgmutex);
goto restart;
}
if (error != 0) {
MSG_PRINTF(("msgsnd: interrupted system "
"call\n"));
error = EINTR;
goto unlock;
}
/*
* Make sure that the msq queue still exists
*/
if (msqptr->msg_qbytes == 0) {
MSG_PRINTF(("msqid deleted\n"));
error = EIDRM;
goto unlock;
}
} else {
MSG_PRINTF(("got all the resources that we need\n"));
break;
}
}
/*
* We have the resources that we need.
* Make sure!
*/
mutex_enter(&msgmutex);
/* In case of reallocation, we will wait for completion */
while (__predict_false(msg_realloc_state))
cv_wait(&msg_realloc_cv, &msgmutex);
if (msqid < 0 || msqid >= msginfo.msgmni) {
MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
msginfo.msgmni));
error = EINVAL;
goto unlock;
}
msq = &msqs[msqid];
msqptr = &msq->msq_u;
if (msqptr->msg_qbytes == 0) {
MSG_PRINTF(("no such message queue id\n"));
error = EINVAL;
goto unlock;
}
if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
MSG_PRINTF(("wrong sequence number\n"));
error = EINVAL;
goto unlock;
}
if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) {
MSG_PRINTF(("requester doesn't have read access\n"));
goto unlock;
}
msghdr = NULL;
while (msghdr == NULL) {
if (msgtyp == 0) {
msghdr = msqptr->_msg_first;
if (msghdr != NULL) {
if (msgsz < msghdr->msg_ts &&
(msgflg & MSG_NOERROR) == 0) {
MSG_PRINTF(("first msg on the queue "
"is too big (want %lld, got %d)\n",
(long long)msgsz, msghdr->msg_ts));
error = E2BIG;
goto unlock;
}
if (msqptr->_msg_first == msqptr->_msg_last) {
msqptr->_msg_first = NULL;
msqptr->_msg_last = NULL;
} else {
msqptr->_msg_first = msghdr->msg_next;
KASSERT(msqptr->_msg_first != NULL);
}
}
} else {
struct __msg *previous;
struct __msg **prev;
for (previous = NULL, prev = &msqptr->_msg_first;
(msghdr = *prev) != NULL;
previous = msghdr, prev = &msghdr->msg_next) {
/*
* Is this message's type an exact match or is
* this message's type less than or equal to
* the absolute value of a negative msgtyp?
* Note that the second half of this test can
* NEVER be true if msgtyp is positive since
* msg_type is always positive!
*/
MSG_PRINTF(("found message type %ld, requested %ld\n",
msghdr->msg_type, msgtyp));
if (msgsz < msghdr->msg_ts &&
(msgflg & MSG_NOERROR) == 0) {
MSG_PRINTF(("requested message on the queue "
"is too big (want %lld, got %d)\n",
(long long)msgsz, msghdr->msg_ts));
error = E2BIG;
goto unlock;
}
*prev = msghdr->msg_next;
if (msghdr != msqptr->_msg_last)
break;
if (previous == NULL) {
KASSERT(prev == &msqptr->_msg_first);
msqptr->_msg_first = NULL;
msqptr->_msg_last = NULL;
} else {
KASSERT(prev != &msqptr->_msg_first);
msqptr->_msg_last = previous;
}
break;
}
}
/*
* We've either extracted the msghdr for the appropriate
* message or there isn't one.
* If there is one then bail out of this loop.
*/
if (msghdr != NULL)
break;
/*
* Hmph! No message found. Does the user want to wait?
*/
if ((msgflg & IPC_NOWAIT) != 0) {
MSG_PRINTF(("no appropriate message found (msgtyp=%ld)\n",
msgtyp));
error = ENOMSG;
goto unlock;
}
/*
* In case of such state, notify reallocator and
* restart the call.
*/
if (msg_realloc_state) {
cv_broadcast(&msg_realloc_cv);
mutex_exit(&msgmutex);
goto restart;
}
if (error != 0) {
MSG_PRINTF(("msgsnd: interrupted system call\n"));
error = EINTR;
goto unlock;
}
/*
* Make msgsz the actual amount that we'll be returning.
* Note that this effectively truncates the message if it is too long
* (since msgsz is never increased).
*/
MSG_PRINTF(("found a message, msgsz=%lld, msg_ts=%d\n",
(long long)msgsz, msghdr->msg_ts));
if (msgsz > msghdr->msg_ts)
msgsz = msghdr->msg_ts;
/*
* Return the type to the user.
*/
mutex_exit(&msgmutex);
error = (*put_type)(&msghdr->msg_type, user_msgp, typesz);
mutex_enter(&msgmutex);
if (error != 0) {
MSG_PRINTF(("error (%d) copying out message type\n", error));
msg_freehdr(msghdr);
cv_broadcast(&msq->msq_cv);
goto unlock;
}
user_msgp += typesz;
/*
* Return the segments to the user
*/
next = msghdr->msg_spot;
for (len = 0; len < msgsz; len += msginfo.msgssz) {
size_t tlen;
KASSERT(next > -1);
KASSERT(next < msginfo.msgseg);
if (msgsz - len > msginfo.msgssz)
tlen = msginfo.msgssz;
else
tlen = msgsz - len;
mutex_exit(&msgmutex);
error = copyout(&msgpool[next * msginfo.msgssz],
user_msgp, tlen);
mutex_enter(&msgmutex);
if (error != 0) {
MSG_PRINTF(("error (%d) copying out message segment\n",
error));
msg_freehdr(msghdr);
cv_broadcast(&msq->msq_cv);
goto unlock;
}
user_msgp += tlen;
next = msgmaps[next].next;
}
/*
* Done, return the actual number of bytes copied out.
*/