/* $NetBSD: pthread.c,v 1.187 2025/04/02 14:23:34 riastradh Exp $ */

/* $NetBSD: pthread.c,v 1.187 2025/04/02 14:23:34 riastradh Exp $ */

/*-
* Copyright (c) 2001, 2002, 2003, 2006, 2007, 2008, 2020
* The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Nathan J. Williams and Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include <sys/cdefs.h>
__RCSID("$NetBSD: pthread.c,v 1.187 2025/04/02 14:23:34 riastradh Exp $");

#define __EXPOSE_STACK 1

/* Need to use libc-private names for atomic operations. */
#include "../../common/lib/libc/atomic/atomic_op_namespace.h"

#include <sys/param.h>
#include <sys/exec_elf.h>
#include <sys/mman.h>
#include <sys/lwp.h>
#include <sys/lwpctl.h>
#include <sys/resource.h>
#include <sys/sysctl.h>
#include <sys/tls.h>
#include <uvm/uvm_param.h>

#include <assert.h>
#include <dlfcn.h>
#include <err.h>
#include <errno.h>
#include <lwp.h>
#include <signal.h>
#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <syslog.h>
#include <ucontext.h>
#include <unistd.h>
#include <sched.h>

#include "atexit.h"
#include "pthread.h"
#include "pthread_int.h"
#include "pthread_makelwp.h"
#include "reentrant.h"

#define atomic_load_relaxed(p) \
atomic_load_explicit(p, memory_order_relaxed)

#define atomic_store_relaxed(p, v) \
atomic_store_explicit(p, v, memory_order_relaxed)

#define atomic_store_release(p, v) \
atomic_store_explicit(p, v, memory_order_release)

__BEGIN_DECLS
void _malloc_thread_cleanup(void) __weak;
__END_DECLS

pthread_rwlock_t pthread__alltree_lock = PTHREAD_RWLOCK_INITIALIZER;
static rb_tree_t pthread__alltree;

static signed int pthread__cmp(void *, const void *, const void *);

static const rb_tree_ops_t pthread__alltree_ops = {
.rbto_compare_nodes = pthread__cmp,
.rbto_compare_key = pthread__cmp,
.rbto_node_offset = offsetof(struct __pthread_st, pt_alltree),
.rbto_context = NULL
};

static void pthread__create_tramp(void *);
static void pthread__initthread(pthread_t);
static void pthread__scrubthread(pthread_t, char *, int);
static void pthread__initmain(pthread_t *);
static void pthread__reap(pthread_t);

void pthread__init(void);

int pthread__started;
int __uselibcstub = 1;
pthread_mutex_t pthread__deadqueue_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_queue_t pthread__deadqueue;
pthread_queue_t pthread__allqueue;

static pthread_attr_t pthread_default_attr;
static lwpctl_t pthread__dummy_lwpctl = { .lc_curcpu = LWPCTL_CPU_NONE };

enum {
DIAGASSERT_ABORT = 1<<0,
DIAGASSERT_STDERR = 1<<1,
DIAGASSERT_SYSLOG = 1<<2
};

static int pthread__diagassert;

int pthread__concurrency;
int pthread__nspins;
size_t pthread__unpark_max = PTHREAD__UNPARK_MAX;
int pthread__dbg; /* set by libpthread_dbg if active */

/*
* We have to initialize the pthread_stack* variables here because
* mutexes are used before pthread_init() and thus pthread__initmain()
* are called. Since mutexes only save the stack pointer and not a
* pointer to the thread data, it is safe to change the mapping from
* stack pointer to thread data afterwards.
*/
size_t pthread__stacksize;
size_t pthread__guardsize;
size_t pthread__pagesize;
static struct __pthread_st *pthread__main;
static size_t __pthread_st_size;

int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);

__strong_alias(__libc_thr_self,pthread_self)
__strong_alias(__libc_thr_create,pthread_create)
__strong_alias(__libc_thr_exit,pthread_exit)
__strong_alias(__libc_thr_errno,pthread__errno)
__strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
__strong_alias(__libc_thr_equal,pthread_equal)
__strong_alias(__libc_thr_init,pthread__init)

/*
* Static library kludge. Place a reference to a symbol any library
* file which does not already have a reference here.
*/
extern int pthread__cancel_stub_binder;

void *pthread__static_lib_binder[] = {
&pthread__cancel_stub_binder,
pthread_cond_init,
pthread_mutex_init,
pthread_rwlock_init,
pthread_barrier_init,
pthread_key_create,
pthread_setspecific,
};

#define NHASHLOCK 64

static union hashlock {
pthread_mutex_t mutex;
char pad[64];
} hashlocks[NHASHLOCK] __aligned(64);

static void
pthread__prefork(void)
{
pthread_mutex_lock(&pthread__deadqueue_lock);
}

static void
pthread__fork_parent(void)
{
pthread_mutex_unlock(&pthread__deadqueue_lock);
}

static void
pthread__fork_child(void)
{
struct __pthread_st *self = pthread__self();

pthread_mutex_init(&pthread__deadqueue_lock, NULL);

/* lwpctl state is not copied across fork. */
if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
err(EXIT_FAILURE, "_lwp_ctl");
}
self->pt_lid = _lwp_self();
}

/*
* This needs to be started by the library loading code, before main()
* gets to run, for various things that use the state of the initial thread
* to work properly (thread-specific data is an application-visible example;
* spinlock counts for mutexes is an internal example).
*/
void
pthread__init(void)
{
pthread_t first;
char *p;
int mib[2];
unsigned int value;
ssize_t slen;
size_t len;
extern int __isthreaded;

/*
* Allocate pthread_keys descriptors before
* resetting __uselibcstub because otherwise
* malloc() will call pthread_keys_create()
* while pthread_keys descriptors are not
* yet allocated.
*/
pthread__main = pthread_tsd_init(&__pthread_st_size);
if (pthread__main == NULL)
err(EXIT_FAILURE, "Cannot allocate pthread storage");

__uselibcstub = 0;

pthread__pagesize = (size_t)sysconf(_SC_PAGESIZE);
pthread__concurrency = (int)sysconf(_SC_NPROCESSORS_CONF);

mib[0] = CTL_VM;
mib[1] = VM_THREAD_GUARD_SIZE;
len = sizeof(value);
if (sysctl(mib, __arraycount(mib), &value, &len, NULL, 0) == 0)
pthread__guardsize = value;
else
pthread__guardsize = pthread__pagesize;

/* Initialize locks first; they're needed elsewhere. */
pthread__lockprim_init();
for (int i = 0; i < NHASHLOCK; i++) {
pthread_mutex_init(&hashlocks[i].mutex, NULL);
}

/* Fetch parameters. */
slen = _lwp_unpark_all(NULL, 0, NULL);
if (slen < 0)
err(EXIT_FAILURE, "_lwp_unpark_all");
if ((size_t)slen < pthread__unpark_max)
pthread__unpark_max = slen;

/* Basic data structure setup */
pthread_attr_init(&pthread_default_attr);
PTQ_INIT(&pthread__allqueue);
PTQ_INIT(&pthread__deadqueue);

rb_tree_init(&pthread__alltree, &pthread__alltree_ops);

/* Create the thread structure corresponding to main() */
pthread__initmain(&first);
pthread__initthread(first);
pthread__scrubthread(first, NULL, 0);

first->pt_lid = _lwp_self();
PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
(void)rb_tree_insert_node(&pthread__alltree, first);

if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &first->pt_lwpctl) != 0) {
err(EXIT_FAILURE, "_lwp_ctl");
}

/* Start subsystems */
PTHREAD_MD_INIT

for (p = pthread__getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
switch (*p) {
case 'a':
pthread__diagassert |= DIAGASSERT_ABORT;
break;
case 'A':
pthread__diagassert &= ~DIAGASSERT_ABORT;
break;
case 'e':
pthread__diagassert |= DIAGASSERT_STDERR;
break;
case 'E':
pthread__diagassert &= ~DIAGASSERT_STDERR;
break;
case 'l':
pthread__diagassert |= DIAGASSERT_SYSLOG;
break;
case 'L':
pthread__diagassert &= ~DIAGASSERT_SYSLOG;
break;
}
}

/* Tell libc that we're here and it should role-play accordingly. */
pthread_atfork(pthread__prefork, pthread__fork_parent, pthread__fork_child);
__isthreaded = 1;
}

/* General-purpose thread data structure sanitization. */
/* ARGSUSED */
static void
pthread__initthread(pthread_t t)
{

t->pt_self = t;
t->pt_magic = PT_MAGIC;
t->pt_sleepobj = NULL;
t->pt_havespecific = 0;
t->pt_lwpctl = &pthread__dummy_lwpctl;

memcpy(&t->pt_lockops, pthread__lock_ops, sizeof(t->pt_lockops));
pthread_mutex_init(&t->pt_lock, NULL);
PTQ_INIT(&t->pt_cleanup_stack);
}

static void
pthread__scrubthread(pthread_t t, char *name, int flags)
{

t->pt_state = PT_STATE_RUNNING;
t->pt_exitval = NULL;
t->pt_flags = flags;
t->pt_cancel = 0;
t->pt_errno = 0;
t->pt_name = name;
t->pt_lid = 0;
}

static int
pthread__getstack(pthread_t newthread, const pthread_attr_t *attr)
{
void *stackbase, *stackbase2, *redzone;
size_t stacksize, guardsize;
bool allocated;

if (attr != NULL) {
pthread_attr_getstack(attr, &stackbase, &stacksize);
if (stackbase == NULL)
pthread_attr_getguardsize(attr, &guardsize);
else
guardsize = 0;
} else {
stackbase = NULL;
stacksize = 0;
guardsize = pthread__guardsize;
}
if (stacksize == 0)
stacksize = pthread__stacksize;

if (newthread->pt_stack_allocated) {
if (stackbase == NULL &&
newthread->pt_stack.ss_size == stacksize &&
newthread->pt_guardsize == guardsize)
return 0;
stackbase2 = newthread->pt_stack.ss_sp;
#ifndef __MACHINE_STACK_GROWS_UP
stackbase2 = (char *)stackbase2 - newthread->pt_guardsize;
#endif
munmap(stackbase2,
newthread->pt_stack.ss_size + newthread->pt_guardsize);
newthread->pt_stack.ss_sp = NULL;
newthread->pt_stack.ss_size = 0;
newthread->pt_guardsize = 0;
newthread->pt_stack_allocated = false;
}

newthread->pt_stack_allocated = false;

if (stackbase == NULL) {
stacksize = ((stacksize - 1) | (pthread__pagesize - 1)) + 1;
guardsize = ((guardsize - 1) | (pthread__pagesize - 1)) + 1;
stackbase = mmap(NULL, stacksize + guardsize,
PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, (off_t)0);
if (stackbase == MAP_FAILED)
return ENOMEM;
allocated = true;
} else {
allocated = false;
}
#ifdef __MACHINE_STACK_GROWS_UP
redzone = (char *)stackbase + stacksize;
stackbase2 = (char *)stackbase;
#else
redzone = (char *)stackbase;
stackbase2 = (char *)stackbase + guardsize;
#endif
if (allocated && guardsize &&
mprotect(redzone, guardsize, PROT_NONE) == -1) {
munmap(stackbase, stacksize + guardsize);
return EPERM;
}
newthread->pt_stack.ss_size = stacksize;
newthread->pt_stack.ss_sp = stackbase2;
newthread->pt_guardsize = guardsize;
newthread->pt_stack_allocated = allocated;
return 0;
}

int
pthread_create(pthread_t *thread, const pthread_attr_t *attr,
void *(*startfunc)(void *), void *arg)
{
pthread_t newthread;
pthread_attr_t nattr;
struct pthread_attr_private *p;
char * volatile name;
unsigned long flag;
void *private_area;
int ret;

if (__predict_false(__uselibcstub)) {
pthread__errorfunc(__FILE__, __LINE__, __func__,
"pthread_create() requires linking with -lpthread");
return __libc_thr_create_stub(thread, attr, startfunc, arg);
}

if (attr == NULL)
nattr = pthread_default_attr;
else if (attr->pta_magic == PT_ATTR_MAGIC)
nattr = *attr;
else
return EINVAL;

if (!pthread__started) {
/*
* Force the _lwp_park symbol to be resolved before we
* begin any activity that might rely on concurrent
* wakeups.
*
* This is necessary because rtld itself uses _lwp_park
* and _lwp_unpark internally for its own locking: If
* we wait to resolve _lwp_park until there is an
* _lwp_unpark from another thread pending in the
* current lwp (for example, pthread_mutex_unlock or
* pthread_cond_signal), rtld's internal use of
* _lwp_park might consume the pending unpark. The
* result is a deadlock where libpthread and rtld have
* both correctly used _lwp_park and _lwp_unpark for
* themselves, but rtld has consumed the wakeup meant
* for libpthread so it is lost to libpthread.
*
* For the very first thread, before pthread__started
* is set to true, pthread__self()->pt_lid should have
* been initialized in pthread__init by the time we get
* here to the correct lid so we go to sleep and wake
* ourselves at the same time as a no-op.
*/
_lwp_park(CLOCK_REALTIME, 0, NULL, pthread__self()->pt_lid,
NULL, NULL);
}

pthread__started = 1;

/* Fetch misc. attributes from the attr structure. */
name = NULL;
if ((p = nattr.pta_private) != NULL)
if (p->ptap_name[0] != '\0')
if ((name = strdup(p->ptap_name)) == NULL)
return ENOMEM;

newthread = NULL;

/*
* Try to reclaim a dead thread.
*/
if (!PTQ_EMPTY(&pthread__deadqueue)) {
pthread_mutex_lock(&pthread__deadqueue_lock);
PTQ_FOREACH(newthread, &pthread__deadqueue, pt_deadq) {
/* Still running? */
if (_lwp_kill(newthread->pt_lid, 0) == -1 &&
errno == ESRCH)
break;
}
if (newthread)
PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
pthread_mutex_unlock(&pthread__deadqueue_lock);
#if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
if (newthread && newthread->pt_tls) {
_rtld_tls_free(newthread->pt_tls);
newthread->pt_tls = NULL;
}
#endif
}

/*
* If necessary set up a stack, allocate space for a pthread_st,
* and initialize it.
*/
if (newthread == NULL) {
newthread = calloc(1, __pthread_st_size);
if (newthread == NULL) {
free(name);
return ENOMEM;
}
newthread->pt_stack_allocated = false;

if (pthread__getstack(newthread, attr)) {
free(newthread);
free(name);
return ENOMEM;
}

#if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
newthread->pt_tls = NULL;
#endif

/* Add to list of all threads. */
pthread_rwlock_wrlock(&pthread__alltree_lock);
PTQ_INSERT_TAIL(&pthread__allqueue, newthread, pt_allq);
(void)rb_tree_insert_node(&pthread__alltree, newthread);
pthread_rwlock_unlock(&pthread__alltree_lock);

/* Will be reset by the thread upon exit. */
pthread__initthread(newthread);
} else {
if (pthread__getstack(newthread, attr)) {
pthread_mutex_lock(&pthread__deadqueue_lock);
PTQ_INSERT_TAIL(&pthread__deadqueue, newthread, pt_deadq);
pthread_mutex_unlock(&pthread__deadqueue_lock);
return ENOMEM;
}
}

/*
* Create the new LWP.
*/
pthread__scrubthread(newthread, name, nattr.pta_flags);
newthread->pt_func = startfunc;
newthread->pt_arg = arg;
#if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
private_area = newthread->pt_tls = _rtld_tls_allocate();
newthread->pt_tls->tcb_pthread = newthread;
#else
private_area = newthread;
#endif

flag = 0;
if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0 ||
(nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0)
flag |= LWP_SUSPENDED;
if ((newthread->pt_flags & PT_FLAG_DETACHED) != 0)
flag |= LWP_DETACHED;

ret = pthread__makelwp(pthread__create_tramp, newthread, private_area,
newthread->pt_stack.ss_sp, newthread->pt_stack.ss_size,
flag, &newthread->pt_lid);
if (ret != 0) {
ret = errno;
pthread_mutex_lock(&newthread->pt_lock);
/* Will unlock and free name. */
pthread__reap(newthread);
return ret;
}

if ((nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0) {
if (p != NULL) {
(void)pthread_setschedparam(newthread, p->ptap_policy,
&p->ptap_sp);
}
if ((newthread->pt_flags & PT_FLAG_SUSPENDED) == 0) {
(void)_lwp_continue(newthread->pt_lid);
}
}

*thread = newthread;

return 0;
}

__dead static void
pthread__create_tramp(void *cookie)
{
pthread_t self;
void *retval;
void *junk __unused;

self = cookie;

/*
* Throw away some stack in a feeble attempt to reduce cache
* thrash. May help for SMT processors. XXX We should not
* be allocating stacks on fixed 2MB boundaries. Needs a
* thread register or decent thread local storage.
*/
junk = alloca(((unsigned)self->pt_lid & 7) << 8);

if (self->pt_name != NULL) {
pthread_mutex_lock(&self->pt_lock);
if (self->pt_name != NULL)
(void)_lwp_setname(0, self->pt_name);
pthread_mutex_unlock(&self->pt_lock);
}

if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
err(EXIT_FAILURE, "_lwp_ctl");
}

retval = (*self->pt_func)(self->pt_arg);

pthread_exit(retval);

/*NOTREACHED*/
pthread__abort();
}

int
pthread_suspend_np(pthread_t thread)
{
pthread_t self;

pthread__error(EINVAL, "Invalid thread",
thread->pt_magic == PT_MAGIC);

self = pthread__self();
if (self == thread) {
return EDEADLK;
}
if (pthread__find(thread) != 0)
return ESRCH;
if (_lwp_suspend(thread->pt_lid) == 0)
return 0;
return errno;
}

int
pthread_resume_np(pthread_t thread)
{

pthread__error(EINVAL, "Invalid thread",
thread->pt_magic == PT_MAGIC);

if (pthread__find(thread) != 0)
return ESRCH;
if (_lwp_continue(thread->pt_lid) == 0)
return 0;
return errno;
}

void
pthread_exit(void *retval)
{
pthread_t self;
struct pt_clean_t *cleanup;

if (__predict_false(__uselibcstub)) {
__libc_thr_exit_stub(retval);
goto out;
}

self = pthread__self();

/* Disable cancellability. */
atomic_store_relaxed(&self->pt_cancel, PT_CANCEL_DISABLED);

/* Call any cancellation cleanup handlers */
if (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
}
}

__cxa_thread_run_atexit();

/* Perform cleanup of thread-specific data */
pthread__destroy_tsd(self);

if (_malloc_thread_cleanup)
_malloc_thread_cleanup();

/*
* Signal our exit. Our stack and pthread_t won't be reused until
* pthread_create() can see from kernel info that this LWP is gone.
*/
pthread_mutex_lock(&self->pt_lock);
self->pt_exitval = retval;
if (self->pt_flags & PT_FLAG_DETACHED) {
/* pthread__reap() will drop the lock. */
pthread__reap(self);
_lwp_exit();
} else {
self->pt_state = PT_STATE_ZOMBIE;
pthread_mutex_unlock(&self->pt_lock);
/* Note: name will be freed by the joiner. */
_lwp_exit();
}

out:
/*NOTREACHED*/
pthread__abort();
exit(1);
}

int
pthread_join(pthread_t thread, void **valptr)
{
pthread_t self;

pthread__error(EINVAL, "Invalid thread",
thread->pt_magic == PT_MAGIC);

self = pthread__self();

if (pthread__find(thread) != 0)
return ESRCH;

if (thread == self)
return EDEADLK;

/* IEEE Std 1003.1 says pthread_join() never returns EINTR. */
for (;;) {
pthread__testcancel(self);
if (_lwp_wait(thread->pt_lid, NULL) == 0)
break;
if (errno != EINTR)
return errno;
}

/*
* Don't test for cancellation again. The spec is that if
* cancelled, pthread_join() must not have succeeded.
*/
pthread_mutex_lock(&thread->pt_lock);
if (thread->pt_state != PT_STATE_ZOMBIE) {
pthread__errorfunc(__FILE__, __LINE__, __func__,
"not a zombie");
}
if (valptr != NULL)
*valptr = thread->pt_exitval;

/* pthread__reap() will drop the lock. */
pthread__reap(thread);
return 0;
}

static void
pthread__reap(pthread_t thread)
{
char *name;

name = thread->pt_name;
thread->pt_name = NULL;
thread->pt_state = PT_STATE_DEAD;
pthread_mutex_unlock(&thread->pt_lock);

pthread_mutex_lock(&pthread__deadqueue_lock);
PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
pthread_mutex_unlock(&pthread__deadqueue_lock);

if (name != NULL)
free(name);
}

int
pthread_equal(pthread_t t1, pthread_t t2)
{

if (__predict_false(__uselibcstub))
return __libc_thr_equal_stub(t1, t2);

pthread__error(0, "Invalid thread",
(t1 != NULL) && (t1->pt_magic == PT_MAGIC));

pthread__error(0, "Invalid thread",
(t2 != NULL) && (t2->pt_magic == PT_MAGIC));

/* Nothing special here. */
return (t1 == t2);
}

int
pthread_detach(pthread_t thread)
{
int error;

pthread__error(EINVAL, "Invalid thread",
thread->pt_magic == PT_MAGIC);

if (pthread__find(thread) != 0)
return ESRCH;

pthread_mutex_lock(&thread->pt_lock);
if ((thread->pt_flags & PT_FLAG_DETACHED) != 0) {
error = EINVAL;
} else {
error = _lwp_detach(thread->pt_lid);
if (error == 0)
thread->pt_flags |= PT_FLAG_DETACHED;
else
error = errno;
}
if (thread->pt_state == PT_STATE_ZOMBIE) {
/* pthread__reap() will drop the lock. */
pthread__reap(thread);
} else
pthread_mutex_unlock(&thread->pt_lock);
return error;
}

int
pthread_getname_np(pthread_t thread, char *name, size_t len)
{

pthread__error(EINVAL, "Invalid thread",
thread->pt_magic == PT_MAGIC);

if (pthread__find(thread) != 0)
return ESRCH;

pthread_mutex_lock(&thread->pt_lock);
if (thread->pt_name == NULL)
name[0] = '\0';
else
strlcpy(name, thread->pt_name, len);
pthread_mutex_unlock(&thread->pt_lock);

return 0;
}

int
pthread_setname_np(pthread_t thread, const char *name, void *arg)
{
char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
int namelen;

pthread__error(EINVAL, "Invalid thread",
thread->pt_magic == PT_MAGIC);

if (pthread__find(thread) != 0)
return ESRCH;

namelen = snprintf(newname, sizeof(newname), name, arg);
if (namelen >= PTHREAD_MAX_NAMELEN_NP)
return EINVAL;

cp = strdup(newname);
if (cp == NULL)
return ENOMEM;

pthread_mutex_lock(&thread->pt_lock);
oldname = thread->pt_name;
thread->pt_name = cp;
(void)_lwp_setname(thread->pt_lid, cp);
pthread_mutex_unlock(&thread->pt_lock);

if (oldname != NULL)
free(oldname);

return 0;
}

pthread_t
pthread_self(void)
{
if (__predict_false(__uselibcstub))
return (pthread_t)__libc_thr_self_stub();

return pthread__self();
}

int
pthread_cancel(pthread_t thread)
{
unsigned old, new;
bool wake;

pthread__error(EINVAL, "Invalid thread",
thread->pt_magic == PT_MAGIC);

if (pthread__find(thread) != 0)
return ESRCH;

/*
* membar_release matches membar_acquire in
* pthread_setcancelstate and pthread__testcancel.
*/
membar_release();

do {
old = atomic_load_relaxed(&thread->pt_cancel);
new = old | PT_CANCEL_PENDING;
wake = false;
if ((old & PT_CANCEL_DISABLED) == 0) {
new |= PT_CANCEL_CANCELLED;
wake = true;
}
} while (__predict_false(!atomic_compare_exchange_weak_explicit(
&thread->pt_cancel, &old, new,
memory_order_relaxed, memory_order_relaxed)));

if (wake)
_lwp_wakeup(thread->pt_lid);

return 0;
}

int
pthread_setcancelstate(int state, int *oldstate)
{
pthread_t self;
unsigned flags, old, new;
bool cancelled;

if (__predict_false(__uselibcstub))
return __libc_thr_setcancelstate_stub(state, oldstate);

self = pthread__self();

switch (state) {
case PTHREAD_CANCEL_ENABLE:
flags = 0;
break;
case PTHREAD_CANCEL_DISABLE:
flags = PT_CANCEL_DISABLED;
break;
default:
return EINVAL;
}

do {
old = atomic_load_relaxed(&self->pt_cancel);
new = (old & ~PT_CANCEL_DISABLED) | flags;
/*
* If we disable while cancelled, switch back to
* pending so future cancellation tests will not fire
* until enabled again.
*
* If a cancellation was requested while cancellation
* was disabled, note that fact for future
* cancellation tests.
*/
cancelled = false;
if (__predict_false((flags | (old & PT_CANCEL_CANCELLED)) ==
(PT_CANCEL_DISABLED|PT_CANCEL_CANCELLED))) {
new &= ~PT_CANCEL_CANCELLED;
new |= PT_CANCEL_PENDING;
} else if (__predict_false((flags |
(old & PT_CANCEL_PENDING)) ==
PT_CANCEL_PENDING)) {
new |= PT_CANCEL_CANCELLED;
/* This is not a deferred cancellation point. */
if (__predict_false(old & PT_CANCEL_ASYNC))
cancelled = true;
}
} while (__predict_false(!atomic_compare_exchange_weak_explicit(
&self->pt_cancel, &old, new,
memory_order_relaxed, memory_order_relaxed)));

/*
* If we transitioned from PTHREAD_CANCEL_DISABLED to
* PTHREAD_CANCEL_ENABLED, there was a pending cancel, and we
* are configured with asynchronous cancellation, we are now
* cancelled -- make it happen.
*/
if (__predict_false(cancelled)) {
/*
* membar_acquire matches membar_release in
* pthread_cancel.
*/
membar_acquire();
pthread__cancelled();
}

if (oldstate) {
if (old & PT_CANCEL_DISABLED)
*oldstate = PTHREAD_CANCEL_DISABLE;
else
*oldstate = PTHREAD_CANCEL_ENABLE;
}

return 0;
}

int
pthread_setcanceltype(int type, int *oldtype)
{
pthread_t self;
unsigned flags, old, new;
bool cancelled;

self = pthread__self();

switch (type) {
case PTHREAD_CANCEL_DEFERRED:
flags = 0;
break;
case PTHREAD_CANCEL_ASYNCHRONOUS:
flags = PT_CANCEL_ASYNC;
break;
default:
return EINVAL;
}

do {
old = atomic_load_relaxed(&self->pt_cancel);
new = (old & ~PT_CANCEL_ASYNC) | flags;
cancelled = false;
if (__predict_false((flags | (old & PT_CANCEL_CANCELLED)) ==
(PT_CANCEL_ASYNC|PT_CANCEL_CANCELLED)))
cancelled = true;
} while (__predict_false(!atomic_compare_exchange_weak_explicit(
&self->pt_cancel, &old, new,
memory_order_relaxed, memory_order_relaxed)));

if (__predict_false(cancelled)) {
membar_acquire();
pthread__cancelled();
}

if (oldtype != NULL) {
if (old & PT_CANCEL_ASYNC)
*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
else
*oldtype = PTHREAD_CANCEL_DEFERRED;
}

return 0;
}

void
pthread_testcancel(void)
{
pthread_t self;

self = pthread__self();

pthread__testcancel(self);
}

/*
* POSIX requires that certain functions return an error rather than
* invoking undefined behavior even when handed completely bogus
* pthread_t values, e.g. stack garbage.
*/
int
pthread__find(pthread_t id)
{
pthread_t target;
int error;

pthread_rwlock_rdlock(&pthread__alltree_lock);
target = rb_tree_find_node(&pthread__alltree, id);
error = (target && target->pt_state != PT_STATE_DEAD) ? 0 : ESRCH;
pthread_rwlock_unlock(&pthread__alltree_lock);

return error;
}

void
pthread__testcancel(pthread_t self)
{

/*
* We use atomic_load_relaxed and then a conditional
* membar_acquire, rather than atomic_load_acquire, in order to
* avoid incurring the cost of an acquire barrier in the common
* case of not having been cancelled.
*
* membar_acquire matches membar_release in pthread_cancel.
*/
if (__predict_false(atomic_load_relaxed(&self->pt_cancel) &
PT_CANCEL_CANCELLED)) {
membar_acquire();
pthread__cancelled();
}
}

void
pthread__cancelled(void)
{

pthread_exit(PTHREAD_CANCELED);
}

void
pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
{
pthread_t self;
struct pt_clean_t *entry;

self = pthread__self();
entry = store;
entry->ptc_cleanup = cleanup;
entry->ptc_arg = arg;
PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
}

void
pthread__cleanup_pop(int ex, void *store)
{
pthread_t self;
struct pt_clean_t *entry;

self = pthread__self();
entry = store;

PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
if (ex)
(*entry->ptc_cleanup)(entry->ptc_arg);
}

int *
pthread__errno(void)
{
pthread_t self;

if (__predict_false(__uselibcstub)) {
pthread__errorfunc(__FILE__, __LINE__, __func__,
"pthread__errno() requires linking with -lpthread");
return __libc_thr_errno_stub();
}

self = pthread__self();

return &(self->pt_errno);
}

ssize_t _sys_write(int, const void *, size_t);

void
pthread__assertfunc(const char *file, int line, const char *function,
const char *expr)
{
char buf[1024];
int len;

/*
* snprintf_ss should not acquire any locks, or we could
* end up deadlocked if the assert caller held locks.
*/
len = snprintf_ss(buf, 1024,
"assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
expr, file, line,
function ? ", function \"" : "",
function ? function : "",
function ? "\"" : "");

_sys_write(STDERR_FILENO, buf, (size_t)len);
(void)raise(SIGABRT);
_exit(1);
}

void
pthread__errorfunc(const char *file, int line, const char *function,
const char *msg, ...)
{
char buf[1024];
char buf2[1024];
size_t len;
va_list ap;

if (pthread__diagassert == 0)
return;

va_start(ap, msg);
vsnprintf_ss(buf2, sizeof(buf2), msg, ap);
va_end(ap);

/*
* snprintf_ss should not acquire any locks, or we could
* end up deadlocked if the assert caller held locks.
*/
len = snprintf_ss(buf, sizeof(buf),
"%s: Error detected by libpthread: %s.\n"
"Detected by file \"%s\", line %d%s%s%s.\n"
"See pthread(3) for information.\n",
getprogname(), buf2, file, line,
function ? ", function \"" : "",
function ? function : "",
function ? "\"" : "");

if (pthread__diagassert & DIAGASSERT_STDERR)
_sys_write(STDERR_FILENO, buf, len);

if (pthread__diagassert & DIAGASSERT_SYSLOG)
syslog(LOG_DEBUG | LOG_USER, "%s", buf);

if (pthread__diagassert & DIAGASSERT_ABORT) {
(void)raise(SIGABRT);
_exit(1);
}
}

/*
* Thread park/unpark operations. The kernel operations are
* modelled after a brief description from "Multithreading in
* the Solaris Operating Environment":
*
* http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
*/

int
pthread__park(pthread_t self, pthread_mutex_t *lock,
pthread_queue_t *queue, const struct timespec *abstime,
int cancelpt)
{
int rv, error;

pthread_mutex_unlock(lock);

/*
* Wait until we are awoken by a pending unpark operation,
* a signal, an unpark posted after we have gone asleep,
* or an expired timeout.
*
* It is fine to test the value of pt_sleepobj without
* holding any locks, because:
*
* o Only the blocking thread (this thread) ever sets it
* to a non-NULL value.
*
* o Other threads may set it NULL, but if they do so they
* must also make this thread return from _lwp_park.
*
* o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
* calls and all make use of spinlocks in the kernel. So
* these system calls act as full memory barriers.
*/
rv = 0;
do {
/*
* If we deferred unparking a thread, arrange to
* have _lwp_park() restart it before blocking.
*/
error = _lwp_park(CLOCK_REALTIME, TIMER_ABSTIME,
__UNCONST(abstime), 0, NULL, NULL);
if (error != 0) {
switch (rv = errno) {
case EINTR:
case EALREADY:
rv = 0;
break;
case ETIMEDOUT:
break;
default:
pthread__errorfunc(__FILE__, __LINE__,
__func__, "_lwp_park failed: %d", errno);
break;
}
}
/* Check for cancellation. */
if (cancelpt &&
(atomic_load_relaxed(&self->pt_cancel) &
PT_CANCEL_CANCELLED))
rv = EINTR;
} while (self->pt_sleepobj != NULL && rv == 0);
return rv;
}

void
pthread__unpark(pthread_queue_t *queue, pthread_t self,
pthread_mutex_t *interlock)
{
pthread_t target;

target = PTQ_FIRST(queue);
target->pt_sleepobj = NULL;
PTQ_REMOVE(queue, target, pt_sleep);
(void)_lwp_unpark(target->pt_lid, NULL);
}

void
pthread__unpark_all(pthread_queue_t *queue, pthread_t self,
pthread_mutex_t *interlock)
{
lwpid_t lids[PTHREAD__UNPARK_MAX];
const size_t mlid = pthread__unpark_max;
pthread_t target;
size_t nlid = 0;

PTQ_FOREACH(target, queue, pt_sleep) {
if (nlid == mlid) {
(void)_lwp_unpark_all(lids, nlid, NULL);
nlid = 0;
}
target->pt_sleepobj = NULL;
lids[nlid++] = target->pt_lid;
}
PTQ_INIT(queue);
if (nlid == 1) {
(void)_lwp_unpark(lids[0], NULL);
} else if (nlid > 1) {
(void)_lwp_unpark_all(lids, nlid, NULL);
}
}

#undef OOPS

static void
pthread__initmainstack(void)
{
struct rlimit slimit;
const AuxInfo *aux;
size_t size, len;
int mib[2];
unsigned int value;

_DIAGASSERT(_dlauxinfo() != NULL);

if (getrlimit(RLIMIT_STACK, &slimit) == -1)
err(EXIT_FAILURE,
"Couldn't get stack resource consumption limits");
size = slimit.rlim_cur;
pthread__main->pt_stack.ss_size = size;
pthread__main->pt_guardsize = pthread__pagesize;

mib[0] = CTL_VM;
mib[1] = VM_GUARD_SIZE;
len = sizeof(value);
if (sysctl(mib, __arraycount(mib), &value, &len, NULL, 0) == 0)
pthread__main->pt_guardsize = value;

for (aux = _dlauxinfo(); aux->a_type != AT_NULL; ++aux) {
if (aux->a_type == AT_STACKBASE) {
#ifdef __MACHINE_STACK_GROWS_UP
pthread__main->pt_stack.ss_sp = (void *)aux->a_v;
#else
pthread__main->pt_stack.ss_sp = (char *)aux->a_v - size;
#endif
break;
}
}
pthread__copy_tsd(pthread__main);
}

/*
* Set up the slightly special stack for the "initial" thread, which
* runs on the normal system stack, and thus gets slightly different
* treatment.
*/
static void
pthread__initmain(pthread_t *newt)
{
char *value;

pthread__initmainstack();

value = pthread__getenv("PTHREAD_STACKSIZE");
if (value != NULL) {
pthread__stacksize = atoi(value) * 1024;
if (pthread__stacksize > pthread__main->pt_stack.ss_size)
pthread__stacksize = pthread__main->pt_stack.ss_size;
}
if (pthread__stacksize == 0)
pthread__stacksize = pthread__main->pt_stack.ss_size;
pthread__stacksize += pthread__pagesize - 1;
pthread__stacksize &= ~(pthread__pagesize - 1);
if (pthread__stacksize < 4 * pthread__pagesize)
errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
4 * pthread__pagesize / 1024);

*newt = pthread__main;
#if defined(_PTHREAD_GETTCB_EXT)
pthread__main->pt_tls = _PTHREAD_GETTCB_EXT();
#elif defined(__HAVE___LWP_GETTCB_FAST)
pthread__main->pt_tls = __lwp_gettcb_fast();
#else
pthread__main->pt_tls = _lwp_getprivate();
#endif
pthread__main->pt_tls->tcb_pthread = pthread__main;
}

static signed int
/*ARGSUSED*/
pthread__cmp(void *ctx, const void *n1, const void *n2)
{
const uintptr_t p1 = (const uintptr_t)n1;
const uintptr_t p2 = (const uintptr_t)n2;

if (p1 < p2)
return -1;
if (p1 > p2)
return 1;
return 0;
}

/* Because getenv() wants to use locks. */
char *
pthread__getenv(const char *name)
{
extern char **environ;
size_t l_name, offset;

if (issetugid())
return (NULL);

l_name = strlen(name);
for (offset = 0; environ[offset] != NULL; offset++) {
if (strncmp(name, environ[offset], l_name) == 0 &&
environ[offset][l_name] == '=') {
return environ[offset] + l_name + 1;
}
}

return NULL;
}

pthread_mutex_t *
pthread__hashlock(volatile const void *p)
{
uintptr_t v;

v = (uintptr_t)p;
return &hashlocks[((v >> 9) ^ (v >> 3)) & (NHASHLOCK - 1)].mutex;
}

int
pthread__checkpri(int pri)
{
static int havepri;
static long min, max;

if (!havepri) {
min = sysconf(_SC_SCHED_PRI_MIN);
max = sysconf(_SC_SCHED_PRI_MAX);
havepri = 1;
}
return (pri < min || pri > max) ? EINVAL : 0;
}