Index: sys/kern/exec_elf_common.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/exec_elf_common.c,v
retrieving revision 1.15
diff -u -r1.15 exec_elf_common.c
--- sys/kern/exec_elf_common.c  2002/05/31 16:49:12     1.15
+++ sys/kern/exec_elf_common.c  2002/07/08 00:50:22
@@ -62,6 +62,9 @@
int
exec_elf_setup_stack(struct proc *p, struct exec_package *epp)
{
+       u_long max_stack_size;
+       u_long access_linear_min, access_size;
+       u_long noaccess_linear_min, noaccess_size;

#ifndef        USRSTACK32
#define USRSTACK32     (0x00000000ffffffffL&~PGOFSET)
@@ -69,11 +72,13 @@

       if (epp->ep_flags & EXEC_32) {
               epp->ep_minsaddr = USRSTACK32;
-               epp->ep_maxsaddr = epp->ep_minsaddr - MAXSSIZ;
+               max_stack_size = MAXSSIZ;
       } else {
-               epp->ep_maxsaddr = USRSTACK - MAXSSIZ;
               epp->ep_minsaddr = USRSTACK;
+               max_stack_size = MAXSSIZ;
       }
+       epp->ep_maxsaddr = (u_long)__STACK_GROW(epp->ep_minsaddr,
+               max_stack_size);
       epp->ep_ssize = p->p_rlimit[RLIMIT_STACK].rlim_cur;

       /*
@@ -83,15 +88,17 @@
        *
        * arguably, it could be made into one, but that would require the
        * addition of another mapping proc, which is unnecessary
-        *
-        * note that in memory, things assumed to be: 0 ... ep_maxsaddr
-        * <stack> ep_minsaddr
        */
-       NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero,
-           ((epp->ep_minsaddr - epp->ep_ssize) - epp->ep_maxsaddr),
-           epp->ep_maxsaddr, NULLVP, 0, VM_PROT_NONE);
-       NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, epp->ep_ssize,
-           (epp->ep_minsaddr - epp->ep_ssize), NULLVP, 0,
+       access_size = epp->ep_ssize;
+       access_linear_min = (u_long)__STACK_ALLOC(epp->ep_minsaddr,
+           access_size);
+       noaccess_size = max_stack_size - access_size;
+       noaccess_linear_min = (u_long)__STACK_ALLOC(
+           __STACK_GROW(epp->ep_minsaddr, access_size), noaccess_size);
+       NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
+           noaccess_linear_min, NULLVP, 0, VM_PROT_NONE);
+       NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
+           access_linear_min, NULLVP, 0,
           VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

       return 0;
Index: sys/kern/init_main.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/init_main.c,v
retrieving revision 1.201
diff -u -r1.201 init_main.c
--- sys/kern/init_main.c        2002/06/17 16:22:50     1.201
+++ sys/kern/init_main.c        2002/07/08 00:50:22
@@ -598,17 +598,17 @@
       /*
        * Need just enough stack to hold the faked-up "execve()" arguments.
        */
-       addr = USRSTACK - PAGE_SIZE;
+       addr = (vaddr_t)__STACK_ALLOC(USRSTACK, PAGE_SIZE);
       if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
                    NULL, UVM_UNKNOWN_OFFSET, 0,
                    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
                   UVM_ADV_NORMAL,
                    UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)) != 0)
               panic("init: couldn't allocate argument space");
-       p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
+       p->p_vmspace->vm_maxsaddr = (caddr_t)__STACK_MAX(addr, PAGE_SIZE);

       for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
-               ucp = (char *)(addr + PAGE_SIZE);
+               ucp = (char *)USRSTACK;

               /*
                * Construct the boot flag argument.
@@ -637,8 +637,9 @@
#ifdef DEBUG
                       printf("init: copying out flags `%s' %d\n", flags, i);
#endif
-                       (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
-                       arg1 = ucp;
+                       arg1 = __STACK_ALLOC(ucp, i);
+                       ucp = __STACK_MAX(arg1, i);
+                       (void)copyout((caddr_t)flags, arg1, i);
               }

               /*
@@ -648,29 +649,27 @@
#ifdef DEBUG
               printf("init: copying out path `%s' %d\n", path, i);
#endif
-               (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
-               arg0 = ucp;
+               arg0 = __STACK_ALLOC(ucp, i);
+               ucp = __STACK_MAX(arg0, i);
+               (void)copyout((caddr_t)path, arg0, i);

               /*
                * Move out the arg pointers.
                */
-               uap = (char **)((long)ucp & ~ALIGNBYTES);
-               (void)suword((caddr_t)--uap, 0);        /* terminator */
-               if (options != 0)
-                       (void)suword((caddr_t)--uap, (long)arg1);
+               ucp = (caddr_t)__STACK_ALIGN(ucp, ALIGNBYTES);
+               uap = (char **)__STACK_ALLOC(ucp, sizeof(char *) * 3);
+               SCARG(&args, path) = arg0;
+               SCARG(&args, argp) = uap;
+               SCARG(&args, envp) = NULL;
               slash = strrchr(path, '/');
               if (slash)
-                       (void)suword((caddr_t)--uap,
+                       (void)suword((caddr_t)uap++,
                           (long)arg0 + (slash + 1 - path));
               else
-                       (void)suword((caddr_t)--uap, (long)arg0);
-
-               /*
-                * Point at the arguments.
-                */
-               SCARG(&args, path) = arg0;
-               SCARG(&args, argp) = uap;
-               SCARG(&args, envp) = NULL;
+                       (void)suword((caddr_t)uap++, (long)arg0);
+               if (options != 0)
+                       (void)suword((caddr_t)uap++, (long)arg1);
+               (void)suword((caddr_t)uap++, 0);        /* terminator */

               /*
                * Now try to exec the program.  If can't for any reason
Index: sys/kern/kern_exec.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/kern_exec.c,v
retrieving revision 1.152
diff -u -r1.152 kern_exec.c
--- sys/kern/kern_exec.c        2002/04/23 15:11:25     1.152
+++ sys/kern/kern_exec.c        2002/07/08 00:50:22
@@ -545,7 +545,36 @@
       arginfo.ps_nargvstr = argc;
       arginfo.ps_nenvstr = envc;

-       stack = (char *) (vm->vm_minsaddr - len);
+       stack = (char *)__STACK_ALLOC(__STACK_GROW(vm->vm_minsaddr,
+               sizeof(struct ps_strings) + szsigcode),
+               len - (sizeof(struct ps_strings) + szsigcode));
+#ifdef __MACHINE_STACK_GROWS_UP
+       /*
+        * The copyargs call always copies into lower addresses
+        * first, moving towards higher addresses, starting with
+        * the stack pointer that we give.  When the stack grows
+        * down, this puts argc/argv/envp very shallow on the
+        * stack, right at the first user stack pointer, and puts
+        * STACKGAPLEN very deep in the stack.  When the stack
+        * grows up, the situation is reversed.
+        *
+        * Normally, this is no big deal.  But the ld_elf.so _rtld()
+        * function expects to be called with a single pointer to
+        * a region that has a few words it can stash values into,
+        * followed by argc/argv/envp.  When the stack grows down,
+        * it's easy to decrement the stack pointer a little bit to
+        * allocate the space for these few words and pass the new
+        * stack pointer to _rtld.  When the stack grows up, however,
+        * a few words before argc is part of the signal trampoline,
+        * so we have a problem.
+        *
+        * Instead of changing how _rtld works, we take the easy way
+        * out and steal 32 bytes before we call copyargs.  This
+        * space is effectively stolen from STACKGAPLEN.
+        */
+       stack += 32;
+#endif /* __MACHINE_STACK_GROWS_UP */
+
       /* Now copy argc, args & environ to new stack */
       error = (*pack.ep_es->es_copyargs)(&pack, &arginfo, &stack, argp);
       if (error) {
@@ -553,11 +582,11 @@
               goto exec_abort;
       }
       /* Move the stack back to original point */
-       stack = (char *) (vm->vm_minsaddr - len);
+       stack = (char *)__STACK_GROW(vm->vm_minsaddr, len);

       /* fill process ps_strings info */
-       p->p_psstr = (struct ps_strings *)(vm->vm_minsaddr
-               - sizeof(struct ps_strings));
+       p->p_psstr = (struct ps_strings *)__STACK_ALLOC(vm->vm_minsaddr,
+           sizeof(struct ps_strings));
       p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
       p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
       p->p_psenv = offsetof(struct ps_strings, ps_envstr);
@@ -573,9 +602,10 @@

       /* copy out the process's signal trapoline code */
       if (szsigcode) {
+               p->p_sigctx.ps_sigcode = __STACK_ALLOC(__STACK_MAX(p->p_psstr,
+                   sizeof(struct ps_strings)), szsigcode);
               if ((error = copyout((char *)pack.ep_es->es_emul->e_sigcode,
-                   p->p_sigctx.ps_sigcode = (char *)p->p_psstr - szsigcode,
-                   szsigcode)) != 0) {
+                   p->p_sigctx.ps_sigcode, szsigcode)) != 0) {
                       DPRINTF(("execve: sig trampoline copyout failed\n"));
                       goto exec_abort;
               }
Index: sys/sys/param.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/param.h,v
retrieving revision 1.143
diff -u -r1.143 param.h
--- sys/sys/param.h     2002/07/06 01:31:33     1.143
+++ sys/sys/param.h     2002/07/08 00:50:23
@@ -145,6 +145,35 @@
#include <machine/limits.h>

/*
+ * Stack macros.  On most architectures, the stack grows down,
+ * towards lower addresses; it is the rare architecture where
+ * it grows up, towards higher addresses.
+ *
+ * __STACK_GROW and __STACK_SHRINK adjust a stack pointer by some
+ * size, no questions asked.  __STACK_ALIGN aligns a stack pointer.
+ *
+ * __STACK_ALLOC returns a pointer to allocated stack space of
+ * some size; given such a pointer and a size, __STACK_MAX gives
+ * the maximum (in the "maxsaddr" sense) stack address of the
+ * allocated memory.
+ */
+#ifdef __MACHINE_STACK_GROWS_UP
+#define        __STACK_GROW(sp, _size)         (((caddr_t)(sp)) + (_size))
+#define        __STACK_SHRINK(sp, _size)       (((caddr_t)(sp)) - (_size))
+#define        __STACK_ALIGN(sp, bytes)        \
+       ((caddr_t)((((unsigned long)(sp)) + (bytes)) & ~(bytes)))
+#define        __STACK_ALLOC(sp, _size)        ((caddr_t)(sp))
+#define        __STACK_MAX(p, _size)           (((caddr_t)(p)) + (_size))
+#else
+#define        __STACK_GROW(sp, _size)         (((caddr_t)(sp)) - (_size))
+#define        __STACK_SHRINK(sp, _size)       (((caddr_t)(sp)) + (_size))
+#define        __STACK_ALIGN(sp, bytes)        \
+       ((caddr_t)(((unsigned long)(sp)) & ~(bytes)))
+#define        __STACK_ALLOC(sp, _size)        (((caddr_t)(sp)) - (_size))
+#define        __STACK_MAX(p, _size)           ((caddr_t)(p))
+#endif
+
+/*
 * Priorities.  Note that with 32 run queues, differences less than 4 are
 * insignificant.
 */