/* $NetBSD: subr_workqueue.c,v 1.48 2024/03/01 04:32:38 mrg Exp $ */

/* $NetBSD: subr_workqueue.c,v 1.48 2024/03/01 04:32:38 mrg Exp $ */

/*-
* Copyright (c)2002, 2005, 2006, 2007 YAMAMOTO Takashi,
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_workqueue.c,v 1.48 2024/03/01 04:32:38 mrg Exp $");

#include <sys/param.h>

#include <sys/condvar.h>
#include <sys/cpu.h>
#include <sys/kmem.h>
#include <sys/kthread.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/sdt.h>
#include <sys/systm.h>
#include <sys/workqueue.h>

typedef struct work_impl {
SIMPLEQ_ENTRY(work_impl) wk_entry;
} work_impl_t;

SIMPLEQ_HEAD(workqhead, work_impl);

struct workqueue_queue {
kmutex_t q_mutex;
kcondvar_t q_cv;
struct workqhead q_queue_pending;
uint64_t q_gen;
lwp_t *q_worker;
};

struct workqueue {
void (*wq_func)(struct work *, void *);
void *wq_arg;
int wq_flags;

char wq_name[MAXCOMLEN];
pri_t wq_prio;
void *wq_ptr;
};

#define WQ_SIZE (roundup2(sizeof(struct workqueue), coherency_unit))
#define WQ_QUEUE_SIZE (roundup2(sizeof(struct workqueue_queue), coherency_unit))

#define POISON 0xaabbccdd

SDT_PROBE_DEFINE7(sdt, kernel, workqueue, create,
"struct workqueue *"/*wq*/,
"const char *"/*name*/,
"void (*)(struct work *, void *)"/*func*/,
"void *"/*arg*/,
"pri_t"/*prio*/,
"int"/*ipl*/,
"int"/*flags*/);
SDT_PROBE_DEFINE1(sdt, kernel, workqueue, destroy,
"struct workqueue *"/*wq*/);

SDT_PROBE_DEFINE3(sdt, kernel, workqueue, enqueue,
"struct workqueue *"/*wq*/,
"struct work *"/*wk*/,
"struct cpu_info *"/*ci*/);
SDT_PROBE_DEFINE4(sdt, kernel, workqueue, entry,
"struct workqueue *"/*wq*/,
"struct work *"/*wk*/,
"void (*)(struct work *, void *)"/*func*/,
"void *"/*arg*/);
SDT_PROBE_DEFINE4(sdt, kernel, workqueue, return,
"struct workqueue *"/*wq*/,
"struct work *"/*wk*/,
"void (*)(struct work *, void *)"/*func*/,
"void *"/*arg*/);
SDT_PROBE_DEFINE2(sdt, kernel, workqueue, wait__start,
"struct workqueue *"/*wq*/,
"struct work *"/*wk*/);
SDT_PROBE_DEFINE2(sdt, kernel, workqueue, wait__self,
"struct workqueue *"/*wq*/,
"struct work *"/*wk*/);
SDT_PROBE_DEFINE2(sdt, kernel, workqueue, wait__hit,
"struct workqueue *"/*wq*/,
"struct work *"/*wk*/);
SDT_PROBE_DEFINE2(sdt, kernel, workqueue, wait__done,
"struct workqueue *"/*wq*/,
"struct work *"/*wk*/);

SDT_PROBE_DEFINE1(sdt, kernel, workqueue, exit__start,
"struct workqueue *"/*wq*/);
SDT_PROBE_DEFINE1(sdt, kernel, workqueue, exit__done,
"struct workqueue *"/*wq*/);

static size_t
workqueue_size(int flags)
{

return WQ_SIZE
+ ((flags & WQ_PERCPU) != 0 ? ncpu : 1) * WQ_QUEUE_SIZE
+ coherency_unit;
}

static struct workqueue_queue *
workqueue_queue_lookup(struct workqueue *wq, struct cpu_info *ci)
{
u_int idx = 0;

if (wq->wq_flags & WQ_PERCPU) {
idx = ci ? cpu_index(ci) : cpu_index(curcpu());
}

return (void *)((uintptr_t)(wq) + WQ_SIZE + (idx * WQ_QUEUE_SIZE));
}

static void
workqueue_runlist(struct workqueue *wq, struct workqhead *list)
{
work_impl_t *wk;
work_impl_t *next;
struct lwp *l = curlwp;

KASSERTMSG(l->l_nopreempt == 0, "lwp %p nopreempt %d",
l, l->l_nopreempt);

for (wk = SIMPLEQ_FIRST(list); wk != NULL; wk = next) {
next = SIMPLEQ_NEXT(wk, wk_entry);
SDT_PROBE4(sdt, kernel, workqueue, entry,
wq, wk, wq->wq_func, wq->wq_arg);
(*wq->wq_func)((void *)wk, wq->wq_arg);
SDT_PROBE4(sdt, kernel, workqueue, return,
wq, wk, wq->wq_func, wq->wq_arg);
KASSERTMSG(l->l_nopreempt == 0,
"lwp %p nopreempt %d func %p",
l, l->l_nopreempt, wq->wq_func);
}
}

static void
workqueue_worker(void *cookie)
{
struct workqueue *wq = cookie;
struct workqueue_queue *q;
int s, fpu = wq->wq_flags & WQ_FPU;

/* find the workqueue of this kthread */
q = workqueue_queue_lookup(wq, curlwp->l_cpu);

if (fpu)
s = kthread_fpu_enter();
mutex_enter(&q->q_mutex);
for (;;) {
struct workqhead tmp;

SIMPLEQ_INIT(&tmp);

while (SIMPLEQ_EMPTY(&q->q_queue_pending))
cv_wait(&q->q_cv, &q->q_mutex);
SIMPLEQ_CONCAT(&tmp, &q->q_queue_pending);
SIMPLEQ_INIT(&q->q_queue_pending);

/*
* Mark the queue as actively running a batch of work
* by setting the generation number odd.
*/
q->q_gen |= 1;
mutex_exit(&q->q_mutex);

workqueue_runlist(wq, &tmp);

/*
* Notify workqueue_wait that we have completed a batch
* of work by incrementing the generation number.
*/
mutex_enter(&q->q_mutex);
KASSERTMSG(q->q_gen & 1, "q=%p gen=%"PRIu64, q, q->q_gen);
q->q_gen++;
cv_broadcast(&q->q_cv);
}
mutex_exit(&q->q_mutex);
if (fpu)
kthread_fpu_exit(s);
}

static void
workqueue_init(struct workqueue *wq, const char *name,
void (*callback_func)(struct work *, void *), void *callback_arg,
pri_t prio, int ipl)
{

KASSERT(sizeof(wq->wq_name) > strlen(name));
strncpy(wq->wq_name, name, sizeof(wq->wq_name));

wq->wq_prio = prio;
wq->wq_func = callback_func;
wq->wq_arg = callback_arg;
}

static int
workqueue_initqueue(struct workqueue *wq, struct workqueue_queue *q,
int ipl, struct cpu_info *ci)
{
int error, ktf;

KASSERT(q->q_worker == NULL);

mutex_init(&q->q_mutex, MUTEX_DEFAULT, ipl);
cv_init(&q->q_cv, wq->wq_name);
SIMPLEQ_INIT(&q->q_queue_pending);
q->q_gen = 0;
ktf = ((wq->wq_flags & WQ_MPSAFE) != 0 ? KTHREAD_MPSAFE : 0);
if (wq->wq_prio < PRI_KERNEL)
ktf |= KTHREAD_TS;
if (ci) {
error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker,
wq, &q->q_worker, "%s/%u", wq->wq_name, ci->ci_index);
} else {
error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker,
wq, &q->q_worker, "%s", wq->wq_name);
}
if (error != 0) {
mutex_destroy(&q->q_mutex);
cv_destroy(&q->q_cv);
KASSERT(q->q_worker == NULL);
}
return error;
}

struct workqueue_exitargs {
work_impl_t wqe_wk;
struct workqueue_queue *wqe_q;
};

static void
workqueue_exit(struct work *wk, void *arg)
{
struct workqueue_exitargs *wqe = (void *)wk;
struct workqueue_queue *q = wqe->wqe_q;

/*
* only competition at this point is workqueue_finiqueue.
*/

KASSERT(q->q_worker == curlwp);
KASSERT(SIMPLEQ_EMPTY(&q->q_queue_pending));
mutex_enter(&q->q_mutex);
q->q_worker = NULL;
cv_broadcast(&q->q_cv);
mutex_exit(&q->q_mutex);
kthread_exit(0);
}

static void
workqueue_finiqueue(struct workqueue *wq, struct workqueue_queue *q)
{
struct workqueue_exitargs wqe;

KASSERT(wq->wq_func == workqueue_exit);

wqe.wqe_q = q;
KASSERT(SIMPLEQ_EMPTY(&q->q_queue_pending));
KASSERT(q->q_worker != NULL);
mutex_enter(&q->q_mutex);
SIMPLEQ_INSERT_TAIL(&q->q_queue_pending, &wqe.wqe_wk, wk_entry);
cv_broadcast(&q->q_cv);
while (q->q_worker != NULL) {
cv_wait(&q->q_cv, &q->q_mutex);
}
mutex_exit(&q->q_mutex);
mutex_destroy(&q->q_mutex);
cv_destroy(&q->q_cv);
}

/* --- */

int
workqueue_create(struct workqueue **wqp, const char *name,
void (*callback_func)(struct work *, void *), void *callback_arg,
pri_t prio, int ipl, int flags)
{
struct workqueue *wq;
struct workqueue_queue *q;
void *ptr;
int error = 0;

CTASSERT(sizeof(work_impl_t) <= sizeof(struct work));

ptr = kmem_zalloc(workqueue_size(flags), KM_SLEEP);
wq = (void *)roundup2((uintptr_t)ptr, coherency_unit);
wq->wq_ptr = ptr;
wq->wq_flags = flags;

workqueue_init(wq, name, callback_func, callback_arg, prio, ipl);

if (flags & WQ_PERCPU) {
struct cpu_info *ci;
CPU_INFO_ITERATOR cii;

/* create the work-queue for each CPU */
for (CPU_INFO_FOREACH(cii, ci)) {
q = workqueue_queue_lookup(wq, ci);
error = workqueue_initqueue(wq, q, ipl, ci);
if (error) {
break;
}
}
} else {
/* initialize a work-queue */
q = workqueue_queue_lookup(wq, NULL);
error = workqueue_initqueue(wq, q, ipl, NULL);
}

if (error != 0) {
workqueue_destroy(wq);
} else {
*wqp = wq;
}

return error;
}

static bool
workqueue_q_wait(struct workqueue *wq, struct workqueue_queue *q,
work_impl_t *wk_target)
{
work_impl_t *wk;
bool found = false;
uint64_t gen;

mutex_enter(&q->q_mutex);

/*
* Avoid a deadlock scenario. We can't guarantee that
* wk_target has completed at this point, but we can't wait for
* it either, so do nothing.
*
* XXX Are there use-cases that require this semantics?
*/
if (q->q_worker == curlwp) {
SDT_PROBE2(sdt, kernel, workqueue, wait__self, wq, wk_target);
goto out;
}

/*
* Wait until the target is no longer pending. If we find it
* on this queue, the caller can stop looking in other queues.
* If we don't find it in this queue, however, we can't skip
* waiting -- it may be hidden in the running queue which we
* have no access to.
*/
again:
SIMPLEQ_FOREACH(wk, &q->q_queue_pending, wk_entry) {
if (wk == wk_target) {
SDT_PROBE2(sdt, kernel, workqueue, wait__hit, wq, wk);
found = true;
cv_wait(&q->q_cv, &q->q_mutex);
goto again;
}
}

/*
* The target may be in the batch of work currently running,
* but we can't touch that queue. So if there's anything
* running, wait until the generation changes.
*/
gen = q->q_gen;
if (gen & 1) {
do
cv_wait(&q->q_cv, &q->q_mutex);
while (gen == q->q_gen);
}

out:
mutex_exit(&q->q_mutex);

return found;
}

/*
* Wait for a specified work to finish. The caller must ensure that no new
* work will be enqueued before calling workqueue_wait. Note that if the
* workqueue is WQ_PERCPU, the caller can enqueue a new work to another queue
* other than the waiting queue.
*/
void
workqueue_wait(struct workqueue *wq, struct work *wk)
{
struct workqueue_queue *q;
bool found;

ASSERT_SLEEPABLE();

SDT_PROBE2(sdt, kernel, workqueue, wait__start, wq, wk);
if (ISSET(wq->wq_flags, WQ_PERCPU)) {
struct cpu_info *ci;
CPU_INFO_ITERATOR cii;
for (CPU_INFO_FOREACH(cii, ci)) {
q = workqueue_queue_lookup(wq, ci);
found = workqueue_q_wait(wq, q, (work_impl_t *)wk);
if (found)
break;
}
} else {
q = workqueue_queue_lookup(wq, NULL);
(void)workqueue_q_wait(wq, q, (work_impl_t *)wk);
}
SDT_PROBE2(sdt, kernel, workqueue, wait__done, wq, wk);
}

void
workqueue_destroy(struct workqueue *wq)
{
struct workqueue_queue *q;
struct cpu_info *ci;
CPU_INFO_ITERATOR cii;

ASSERT_SLEEPABLE();

SDT_PROBE1(sdt, kernel, workqueue, exit__start, wq);
wq->wq_func = workqueue_exit;
for (CPU_INFO_FOREACH(cii, ci)) {
q = workqueue_queue_lookup(wq, ci);
if (q->q_worker != NULL) {
workqueue_finiqueue(wq, q);
}
}
SDT_PROBE1(sdt, kernel, workqueue, exit__done, wq);
kmem_free(wq->wq_ptr, workqueue_size(wq->wq_flags));
}

#ifdef DEBUG
static void
workqueue_check_duplication(struct workqueue_queue *q, work_impl_t *wk)
{
work_impl_t *_wk;

SIMPLEQ_FOREACH(_wk, &q->q_queue_pending, wk_entry) {
if (_wk == wk)
panic("%s: tried to enqueue a queued work", __func__);
}
}
#endif

void
workqueue_enqueue(struct workqueue *wq, struct work *wk0, struct cpu_info *ci)
{
struct workqueue_queue *q;
work_impl_t *wk = (void *)wk0;

SDT_PROBE3(sdt, kernel, workqueue, enqueue, wq, wk0, ci);

KASSERT(wq->wq_flags & WQ_PERCPU || ci == NULL);
q = workqueue_queue_lookup(wq, ci);

mutex_enter(&q->q_mutex);
#ifdef DEBUG
workqueue_check_duplication(q, wk);
#endif
SIMPLEQ_INSERT_TAIL(&q->q_queue_pending, wk, wk_entry);
cv_broadcast(&q->q_cv);
mutex_exit(&q->q_mutex);
}