/*-
* Copyright (c) 2002, 2006, 2007, 2008, 2019, 2023
* The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jason R. Thorpe and Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Kernel mutex implementation, modeled after those found in Solaris,
* a description of which can be found in:
*
* Solaris Internals: Core Kernel Architecture, Jim Mauro and
* Richard McDougall.
*/
#define MUTEX_DASSERT(mtx, cond) \
do { \
if (__predict_false(!(cond))) \
MUTEX_ABORT(mtx, "assertion failed: " #cond); \
} while (/* CONSTCOND */ 0)
#else /* LOCKDEBUG */
#define MUTEX_DASSERT(mtx, cond) /* nothing */
#endif /* LOCKDEBUG */
#if defined(DIAGNOSTIC)
#define MUTEX_ASSERT(mtx, cond) \
do { \
if (__predict_false(!(cond))) \
MUTEX_ABORT(mtx, "assertion failed: " #cond); \
} while (/* CONSTCOND */ 0)
#else /* DIAGNOSTIC */
#define MUTEX_ASSERT(mtx, cond) /* nothing */
#endif /* DIAGNOSTIC */
/*
* Some architectures can't use __cpu_simple_lock as is so allow a way
* for them to use an alternate definition.
*/
#ifndef MUTEX_SPINBIT_LOCK_INIT
#define MUTEX_SPINBIT_LOCK_INIT(mtx) __cpu_simple_lock_init(&(mtx)->mtx_lock)
#endif
#ifndef MUTEX_SPINBIT_LOCKED_P
#define MUTEX_SPINBIT_LOCKED_P(mtx) __SIMPLELOCK_LOCKED_P(&(mtx)->mtx_lock)
#endif
#ifndef MUTEX_SPINBIT_LOCK_TRY
#define MUTEX_SPINBIT_LOCK_TRY(mtx) __cpu_simple_lock_try(&(mtx)->mtx_lock)
#endif
#ifndef MUTEX_SPINBIT_LOCK_UNLOCK
#define MUTEX_SPINBIT_LOCK_UNLOCK(mtx) __cpu_simple_unlock(&(mtx)->mtx_lock)
#endif
/*
* For architectures that provide 'simple' mutexes: they provide a
* CAS function that is either MP-safe, or does not need to be MP
* safe. Adaptive mutexes on these architectures do not require an
* additional interlock.
*/
pr("owner field : %#018lx wait/spin: %16d/%d\n",
(long)MUTEX_OWNER(owner), MUTEX_HAS_WAITERS(mtx),
MUTEX_SPIN_P(owner));
}
/*
* mutex_abort:
*
* Dump information about an error and panic the system. This
* generates a lot of machine code in the DIAGNOSTIC case, so
* we ask the compiler to not inline it.
*/
static void __noinline
mutex_abort(const char *func, size_t line, volatile const kmutex_t *mtx,
const char *msg)
{
/*
* mutex_init:
*
* Initialize a mutex for use. Note that adaptive mutexes are in
* essence spin mutexes that can sleep to avoid deadlock and wasting
* CPU time. We can't easily provide a type of mutex that always
* sleeps - see comments in mutex_vector_enter() about releasing
* mutexes unlocked.
*/
void
_mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl,
uintptr_t return_address)
{
lockops_t *lockops __unused;
bool dodebug;
#ifdef MULTIPROCESSOR
/*
* mutex_oncpu:
*
* Return true if an adaptive mutex owner is running on a CPU in the
* system. If the target is waiting on the kernel big lock, then we
* must release it. This is necessary to avoid deadlock.
*/
static bool
mutex_oncpu(uintptr_t owner)
{
struct cpu_info *ci;
lwp_t *l;
KASSERT(kpreempt_disabled());
if (!MUTEX_OWNED(owner)) {
return false;
}
/*
* See lwp_dtor() why dereference of the LWP pointer is safe.
* We must have kernel preemption disabled for that.
*/
l = (lwp_t *)MUTEX_OWNER(owner);
ci = l->l_cpu;
if (ci && ci->ci_curlwp == l) {
/* Target is running; do we need to block? */
return (atomic_load_relaxed(&ci->ci_biglock_wanted) != l);
}
/* Not running. It may be safe to block now. */
return false;
}
#endif /* MULTIPROCESSOR */
/*
* mutex_vector_enter:
*
* Support routine for mutex_enter() that must handle all cases. In
* the LOCKDEBUG case, mutex_enter() is always aliased here, even if
* fast-path stubs are available. If a mutex_spin_enter() stub is
* not available, then it is also aliased directly here.
*/
void
mutex_vector_enter(kmutex_t *mtx)
{
uintptr_t owner, curthread;
turnstile_t *ts;
#ifdef MULTIPROCESSOR
u_int count;
#endif
LOCKSTAT_COUNTER(spincnt);
LOCKSTAT_COUNTER(slpcnt);
LOCKSTAT_TIMER(spintime);
LOCKSTAT_TIMER(slptime);
LOCKSTAT_FLAG(lsflag);
/*
* Spin testing the lock word and do exponential backoff
* to reduce cache line ping-ponging between CPUs.
*/
do {
while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
SPINLOCK_SPIN_HOOK;
SPINLOCK_BACKOFF(count);
#ifdef LOCKDEBUG
if (SPINLOCK_SPINOUT(spins))
MUTEX_ABORT(mtx, "spinout");
#endif /* LOCKDEBUG */
}
} while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
if (__predict_true(panicstr == NULL)) {
KDASSERT(pserialize_not_in_read_section());
LOCKDEBUG_BARRIER(&kernel_lock, 1);
}
LOCKSTAT_ENTER(lsflag);
/*
* Adaptive mutex; spin trying to acquire the mutex. If we
* determine that the owner is not running on a processor,
* then we stop spinning, and sleep instead.
*/
for (;;) {
if (!MUTEX_OWNED(owner)) {
/*
* Mutex owner clear could mean two things:
*
* * The mutex has been released.
* * The owner field hasn't been set yet.
*
* Try to acquire it again. If that fails,
* we'll just loop again.
*/
if (MUTEX_ACQUIRE(mtx, curthread))
break;
owner = mtx->mtx_owner;
continue;
}
if (__predict_false(MUTEX_OWNER(owner) == curthread)) {
MUTEX_ABORT(mtx, "locking against myself");
}
#ifdef MULTIPROCESSOR
/*
* Check to see if the owner is running on a processor.
* If so, then we should just spin, as the owner will
* likely release the lock very soon.
*/
if (mutex_oncpu(owner)) {
LOCKSTAT_START_TIMER(lsflag, spintime);
count = SPINLOCK_BACKOFF_MIN;
do {
KPREEMPT_ENABLE(curlwp);
SPINLOCK_BACKOFF(count);
KPREEMPT_DISABLE(curlwp);
owner = mtx->mtx_owner;
} while (mutex_oncpu(owner));
LOCKSTAT_STOP_TIMER(lsflag, spintime);
LOCKSTAT_COUNT(spincnt, 1);
if (!MUTEX_OWNED(owner))
continue;
}
#endif
ts = turnstile_lookup(mtx);
/*
* Once we have the turnstile chain interlock, mark the
* mutex as having waiters. If that fails, spin again:
* chances are that the mutex has been released.
*/
if (!MUTEX_SET_WAITERS(mtx, owner)) {
turnstile_exit(mtx);
owner = mtx->mtx_owner;
continue;
}
#ifdef MULTIPROCESSOR
/*
* mutex_exit() is permitted to release the mutex without
* any interlocking instructions, and the following can
* occur as a result:
*
* CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit()
* ---------------------------- ----------------------------
* .. load mtx->mtx_owner
* .. see has-waiters bit clear
* set has-waiters bit ..
* .. store mtx->mtx_owner := 0
* return success
*
* There is another race that can occur: a third CPU could
* acquire the mutex as soon as it is released. Since
* adaptive mutexes are primarily spin mutexes, this is not
* something that we need to worry about too much. What we
* do need to ensure is that the waiters bit gets set.
*
* To allow the unlocked release, we need to make some
* assumptions here:
*
* o Release is the only non-atomic/unlocked operation
* that can be performed on the mutex. (It must still
* be atomic on the local CPU, e.g. in case interrupted
* or preempted).
*
* o At any given time on each mutex, MUTEX_SET_WAITERS()
* can only ever be in progress on one CPU in the
* system - guaranteed by the turnstile chain lock.
*
* o No other operations other than MUTEX_SET_WAITERS()
* and release can modify a mutex with a non-zero
* owner field.
*
* o If the holding LWP switches away, it posts a store
* fence before changing curlwp, ensuring that any
* overwrite of the mutex waiters flag by mutex_exit()
* completes before the modification of curlwp becomes
* visible to this CPU.
*
* o cpu_switchto() posts a store fence after setting curlwp
* and before resuming execution of an LWP.
*
* o _kernel_lock() posts a store fence before setting
* curcpu()->ci_biglock_wanted, and after clearing it.
* This ensures that any overwrite of the mutex waiters
* flag by mutex_exit() completes before the modification
* of ci_biglock_wanted becomes visible.
*
* After MUTEX_SET_WAITERS() succeeds, simultaneously
* confirming that the same LWP still holds the mutex
* since we took the turnstile lock and notifying it that
* we're waiting, we check the lock holder's status again.
* Some of the possible outcomes (not an exhaustive list;
* XXX this should be made exhaustive):
*
* 1. The on-CPU check returns true: the holding LWP is
* running again. The lock may be released soon and
* we should spin. Importantly, we can't trust the
* value of the waiters flag.
*
* 2. The on-CPU check returns false: the holding LWP is
* not running. We now have the opportunity to check
* if mutex_exit() has blatted the modifications made
* by MUTEX_SET_WAITERS().
*
* 3. The on-CPU check returns false: the holding LWP may
* or may not be running. It has context switched at
* some point during our check. Again, we have the
* chance to see if the waiters bit is still set or
* has been overwritten.
*
* 4. The on-CPU check returns false: the holding LWP is
* running on a CPU, but wants the big lock. It's OK
* to check the waiters field in this case.
*
* 5. The has-waiters check fails: the mutex has been
* released, the waiters flag cleared and another LWP
* now owns the mutex.
*
* 6. The has-waiters check fails: the mutex has been
* released.
*
* If the waiters bit is not set it's unsafe to go asleep,
* as we might never be awoken.
*/
if (mutex_oncpu(owner)) {
turnstile_exit(mtx);
owner = mtx->mtx_owner;
continue;
}
membar_consumer();
if (!MUTEX_HAS_WAITERS(mtx)) {
turnstile_exit(mtx);
owner = mtx->mtx_owner;
continue;
}
#endif /* MULTIPROCESSOR */
/*
* mutex_vector_exit:
*
* Support routine for mutex_exit() that handles all cases.
*/
void
mutex_vector_exit(kmutex_t *mtx)
{
turnstile_t *ts;
uintptr_t curthread;
if (MUTEX_SPIN_P(mtx->mtx_owner)) {
#ifdef FULL
if (__predict_false(!MUTEX_SPINBIT_LOCKED_P(mtx))) {
MUTEX_ABORT(mtx, "exiting unheld spin mutex");
}
MUTEX_UNLOCKED(mtx);
MUTEX_SPINBIT_LOCK_UNLOCK(mtx);
#endif
MUTEX_SPIN_SPLRESTORE(mtx);
return;
}
#ifndef __HAVE_MUTEX_STUBS
/*
* On some architectures without mutex stubs, we can enter here to
* release mutexes before interrupts and whatnot are up and running.
* We need this hack to keep them sweet.
*/
if (__predict_false(cold)) {
MUTEX_UNLOCKED(mtx);
MUTEX_RELEASE(mtx);
return;
}
#endif
#ifdef LOCKDEBUG
/*
* Avoid having to take the turnstile chain lock every time
* around. Raise the priority level to splhigh() in order
* to disable preemption and so make the following atomic.
* This also blocks out soft interrupts that could set the
* waiters bit.
*/
{
int s = splhigh();
if (!MUTEX_HAS_WAITERS(mtx)) {
MUTEX_RELEASE(mtx);
splx(s);
return;
}
splx(s);
}
#endif
/*
* Get this lock's turnstile. This gets the interlock on
* the sleep queue. Once we have that, we can clear the
* lock. If there was no turnstile for the lock, there
* were no waiters remaining.
*/
ts = turnstile_lookup(mtx);
#ifndef __HAVE_SIMPLE_MUTEXES
/*
* mutex_wakeup:
*
* Support routine for mutex_exit() that wakes up all waiters.
* We assume that the mutex has been released, but it need not
* be.
*/
void
mutex_wakeup(kmutex_t *mtx)
{
turnstile_t *ts;
/*
* mutex_ownable:
*
* When compiled with DEBUG and LOCKDEBUG defined, ensure that
* the mutex is available. We cannot use !mutex_owned() since
* that won't work correctly for spin mutexes.
*/
int
mutex_ownable(const kmutex_t *mtx)
{
/*
* Spin testing the lock word and do exponential backoff
* to reduce cache line ping-ponging between CPUs.
*/
do {
while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
SPINLOCK_BACKOFF(count);
#ifdef LOCKDEBUG
if (SPINLOCK_SPINOUT(spins))
MUTEX_ABORT(mtx, "spinout");
#endif /* LOCKDEBUG */
}
} while (!MUTEX_SPINBIT_LOCK_TRY(mtx));