/* Craft a return frame which will cause the child to pop out of
* the scheduler in user mode with the return register zero
*/
forkchild(p, up->dbgreg);
/* Abort the child process on error */
if(waserror()){
p->kp = 1;
kprocchild(p, abortion, 0);
ready(p);
nexterror();
}
/* Make a new set of memory segments */
n = flag & RFMEM;
qlock(&p->seglock);
if(waserror()){
qunlock(&p->seglock);
nexterror();
}
for(i = 0; i < NSEG; i++)
if(up->seg[i] != nil)
p->seg[i] = dupseg(up->seg, i, n);
qunlock(&p->seglock);
poperror();
/*
* since the bss/data segments are now shareable,
* any mmu info about this process is now stale
* (i.e. has bad properties) and has to be discarded.
*/
flushmmu();
p->basepri = up->basepri;
p->priority = up->basepri;
p->fixedpri = up->fixedpri;
p->mp = up->mp;
wm = up->wired;
if(wm != nil)
procwired(p, wm->machno);
p->psstate = nil;
ready(p);
sched();
return pid;
}
static int
shargs(char *s, int n, char **ap)
{
int i;
data = l2be(exec.data);
bss = l2be(exec.bss);
align--;
t = (UTZERO+sizeof(Exec)+text+align) & ~align;
align = BY2PG-1;
d = (t + data + align) & ~align;
bssend = t + data + bss;
b = (bssend + align) & ~align;
if(t >= (USTKTOP-USTKSIZE) || d >= (USTKTOP-USTKSIZE) || b >= (USTKTOP-USTKSIZE))
error(Ebadexec);
/*
* Args: pass 1: count
*/
nbytes = sizeof(Tos); /* hole for profiling clock at top of stack (and more) */
nargs = 0;
if(indir){
argp = progarg;
while(*argp != nil){
a = *argp++;
nbytes += strlen(a) + 1;
nargs++;
}
}
argp = argp0;
while(*argp != nil){
a = *argp++;
if(((uintptr)argp&(BY2PG-1)) < BY2WD)
validaddr((uintptr)argp, BY2WD, 0);
validaddr((uintptr)a, 1, 0);
e = vmemchr(a, 0, USTKSIZE);
if(e == nil)
error(Ebadarg);
nbytes += (e - a) + 1;
if(nbytes >= USTKSIZE)
error(Enovmem);
nargs++;
}
ssize = BY2WD*(nargs+1) + ((nbytes+(BY2WD-1)) & ~(BY2WD-1));
/*
* 8-byte align SP for those (e.g. sparc) that need it.
* execregs() will subtract another 4 bytes for argc.
*/
if(BY2WD == 4 && (ssize+4) & 7)
ssize += 4;
if(PGROUND(ssize) >= USTKSIZE)
error(Enovmem);
/*
* Build the stack segment, putting it in kernel virtual for the moment
*/
qlock(&up->seglock);
if(waserror()){
qunlock(&up->seglock);
nexterror();
}
for(i=0; i<nargs; i++){
if(indir && *argp==nil) {
indir = 0;
argp = argp0;
}
*argv++ = charp + (USTKTOP-tstk);
a = *argp++;
if(indir)
e = strchr(a, 0);
else {
if(charp >= (char*)tos)
error(Ebadarg);
validaddr((uintptr)a, 1, 0);
e = vmemchr(a, 0, (char*)tos - charp);
if(e == nil)
error(Ebadarg);
}
n = (e - a) + 1;
memmove(charp, a, n);
charp += n;
}
/* copy args; easiest from new process's stack */
a = (char*)(tstk - nbytes);
n = charp - a;
if(n > 128) /* don't waste too much space on huge arg lists */
n = 128;
args = smalloc(n);
memmove(args, a, n);
if(n>0 && args[n-1]!='\0'){
/* make sure last arg is NUL-terminated */
/* put NUL at UTF-8 character boundary */
for(i=n-1; i>0; --i)
if(fullrune(args+i, n-i))
break;
args[i] = 0;
n = i+1;
}
/*
* Committed.
* Free old memory.
* Special segments are maintained across exec
*/
for(i = SSEG; i <= BSEG; i++) {
putseg(up->seg[i]);
/* prevent a second free if we have an error */
up->seg[i] = nil;
}
for(i = ESEG+1; i < NSEG; i++) {
s = up->seg[i];
if(s != nil && (s->type&SG_CEXEC) != 0) {
putseg(s);
up->seg[i] = nil;
}
}
/*
* The implementation of semaphores is complicated by needing
* to avoid rescheduling in syssemrelease, so that it is safe
* to call from real-time processes. This means syssemrelease
* cannot acquire any qlocks, only spin locks.
*
* Semacquire and semrelease must both manipulate the semaphore
* wait list. Lock-free linked lists only exist in theory, not
* in practice, so the wait list is protected by a spin lock.
*
* The semaphore value *addr is stored in user memory, so it
* cannot be read or written while holding spin locks.
*
* Thus, we can access the list only when holding the lock, and
* we can access the semaphore only when not holding the lock.
* This makes things interesting. Note that sleep's condition function
* is called while holding two locks - r and up->rlock - so it cannot
* access the semaphore value either.
*
* An acquirer announces its intention to try for the semaphore
* by putting a Sema structure onto the wait list and then
* setting Sema.waiting. After one last check of semaphore,
* the acquirer sleeps until Sema.waiting==0. A releaser of n
* must wake up n acquirers who have Sema.waiting set. It does
* this by clearing Sema.waiting and then calling wakeup.
*
* There are three interesting races here.
* The first is that in this particular sleep/wakeup usage, a single
* wakeup can rouse a process from two consecutive sleeps!
* The ordering is:
*
* (a) set Sema.waiting = 1
* (a) call sleep
* (b) set Sema.waiting = 0
* (a) check Sema.waiting inside sleep, return w/o sleeping
* (a) try for semaphore, fail
* (a) set Sema.waiting = 1
* (a) call sleep
* (b) call wakeup(a)
* (a) wake up again
*
* This is okay - semacquire will just go around the loop
* again. It does mean that at the top of the for(;;) loop in
* semacquire, phore.waiting might already be set to 1.
*
* The second is that a releaser might wake an acquirer who is
* interrupted before he can acquire the lock. Since
* release(n) issues only n wakeup calls -- only n can be used
* anyway -- if the interrupted process is not going to use his
* wakeup call he must pass it on to another acquirer.
*
* The third race is similar to the second but more subtle. An
* acquirer sets waiting=1 and then does a final canacquire()
* before going to sleep. The opposite order would result in
* missing wakeups that happen between canacquire and
* waiting=1. (In fact, the whole point of Sema.waiting is to
* avoid missing wakeups between canacquire() and sleep().) But
* there can be spurious wakeups between a successful
* canacquire() and the following semdequeue(). This wakeup is
* not useful to the acquirer, since he has already acquired
* the semaphore. Like in the previous case, though, the
* acquirer must pass the wakeup call along.
*
* This is all rather subtle. The code below has been verified
* with the spin model /sys/src/9/port/semaphore.p. The
* original code anticipated the second race but not the first
* or third, which were caught only with spin. The first race
* is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
* It was lucky that my abstract model of sleep/wakeup still managed
* to preserve that behavior.
*
* I remain slightly concerned about memory coherence
* outside of locks. The spin model does not take
* queued processor writes into account so we have to
* think hard. The only variables accessed outside locks
* are the semaphore value itself and the boolean flag
* Sema.waiting. The value is only accessed with cmpswap,
* whose job description includes doing the right thing as
* far as memory coherence across processors. That leaves
* Sema.waiting. To handle it, we call coherence() before each
* read and after each write. - rsc
*/
/* Add semaphore p with addr a to list in seg. */
static void
semqueue(Segment *s, long *a, Sema *p)
{
memset(p, 0, sizeof *p);
p->addr = a;
lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */
p->next = &s->sema;
p->prev = s->sema.prev;
p->next->prev = p;
p->prev->next = p;
unlock(&s->sema);
}
/* Remove semaphore p from list in seg. */
static void
semdequeue(Segment *s, Sema *p)
{
lock(&s->sema);
p->next->prev = p->prev;
p->prev->next = p->next;
unlock(&s->sema);
}
/* Wake up n waiters with addr a on list in seg. */
static void
semwakeup(Segment *s, long *a, long n)
{
Sema *p;