/* $NetBSD: xenbus_dev.c,v 1.19 2024/02/09 22:08:34 andvar Exp $ */
/*
* xenbus_dev.c
*
* Driver giving user-space access to the kernel's xenbus connection
* to xenstore.
*
* Copyright (c) 2005, Christian Limpach
* Copyright (c) 2005, Rusty Russell, IBM Corporation
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: xenbus_dev.c,v 1.19 2024/02/09 22:08:34 andvar Exp $");

#include "opt_xen.h"

#include <sys/types.h>
#include <sys/null.h>
#include <sys/errno.h>
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/dirent.h>
#include <sys/stat.h>
#include <sys/tree.h>
#include <sys/vnode.h>
#include <miscfs/specfs/specdev.h>
#include <miscfs/kernfs/kernfs.h>

#include <xen/kernfs_machdep.h>

#include <xen/intr.h>
#include <xen/hypervisor.h>
#include <xen/xenbus.h>
#include "xenbus_comms.h"

static int xenbus_dev_read(void *);
static int xenbus_dev_write(void *);
static int xenbus_dev_open(void *);
static int xenbus_dev_close(void *);
static int xsd_port_read(void *);

#define DIR_MODE         (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
#define PRIVCMD_MODE    (S_IRUSR | S_IWUSR)
static const struct kernfs_fileop xenbus_fileops[] = {
 { .kf_fileop = KERNFS_FILEOP_OPEN, .kf_vop = xenbus_dev_open },
 { .kf_fileop = KERNFS_FILEOP_CLOSE, .kf_vop = xenbus_dev_close },
 { .kf_fileop = KERNFS_FILEOP_READ, .kf_vop = xenbus_dev_read },
 { .kf_fileop = KERNFS_FILEOP_WRITE, .kf_vop = xenbus_dev_write },
};

#define XSD_MODE    (S_IRUSR)
static const struct kernfs_fileop xsd_port_fileops[] = {
   { .kf_fileop = KERNFS_FILEOP_READ, .kf_vop = xsd_port_read },
};

static kmutex_t xenbus_dev_open_mtx;

void
xenbus_kernfs_init(void)
{
       kernfs_entry_t *dkt;
       kfstype kfst;

       kfst = KERNFS_ALLOCTYPE(xenbus_fileops);
       KERNFS_ALLOCENTRY(dkt, KM_SLEEP);
       KERNFS_INITENTRY(dkt, DT_REG, "xenbus", NULL, kfst, VREG,
           PRIVCMD_MODE);
       kernfs_addentry(kernxen_pkt, dkt);

       if (xendomain_is_dom0()) {
               kfst = KERNFS_ALLOCTYPE(xsd_port_fileops);
               KERNFS_ALLOCENTRY(dkt, KM_SLEEP);
               KERNFS_INITENTRY(dkt, DT_REG, "xsd_port", NULL,
                   kfst, VREG, XSD_MODE);
               kernfs_addentry(kernxen_pkt, dkt);
       }
       mutex_init(&xenbus_dev_open_mtx, MUTEX_DEFAULT, IPL_NONE);
}

/*
* several process may open /kern/xen/xenbus in parallel.
* In a transaction one or more write is followed by one or more read.
* Unfortunately we don't get a file descriptor identifier down there,
* which we could use to link a read() to a transaction started in a write().
* To work around this we keep a list of lwp that opended the xenbus file.
* This assumes that a single lwp won't open /kern/xen/xenbus more
* than once, and that a transaction started by one lwp won't be ended
* by another.
* because of this, we also assume that we always got the data before
* the read() syscall.
*/

struct xenbus_dev_transaction {
       SLIST_ENTRY(xenbus_dev_transaction) trans_next;
       struct xenbus_transaction *handle;
};

struct xenbus_dev_lwp {
       SLIST_ENTRY(xenbus_dev_lwp) lwp_next;
       SLIST_HEAD(, xenbus_dev_transaction) transactions;
       lwp_t *lwp;
       /* Response queue. */
#define BUFFER_SIZE (PAGE_SIZE)
#define MASK_READ_IDX(idx) ((idx)&(BUFFER_SIZE-1))
       char read_buffer[BUFFER_SIZE];
       unsigned int read_cons, read_prod;
       /* Partial request. */
       unsigned int len;
       union {
               struct xsd_sockmsg msg;
               char buffer[BUFFER_SIZE];
       } u;
       kmutex_t mtx;
};

struct xenbus_dev_data {
       /* lwps which opended this device */
       SLIST_HEAD(, xenbus_dev_lwp) lwps;
       kmutex_t mtx;
};


static int
xenbus_dev_read(void *v)
{
       struct vop_read_args /* {
               struct vnode *a_vp;
               struct uio *a_uio;
               int  a_ioflag;
               struct ucred *a_cred;
       } */ *ap = v;
       struct kernfs_node *kfs = VTOKERN(ap->a_vp);
       struct uio *uio = ap->a_uio;
       struct xenbus_dev_data *u;
       struct xenbus_dev_lwp *xlwp;
       int err;
       off_t offset;

       mutex_enter(&xenbus_dev_open_mtx);
       u = kfs->kfs_v;
       if (u == NULL) {
               mutex_exit(&xenbus_dev_open_mtx);
               return EBADF;
       }
       mutex_enter(&u->mtx);
       mutex_exit(&xenbus_dev_open_mtx);
       SLIST_FOREACH(xlwp, &u->lwps, lwp_next) {
               if (xlwp->lwp == curlwp) {
                       break;
               }
       }
       if (xlwp == NULL) {
               mutex_exit(&u->mtx);
               return EBADF;
       }
       mutex_enter(&xlwp->mtx);
       mutex_exit(&u->mtx);

       if (xlwp->read_prod == xlwp->read_cons) {
               err = EWOULDBLOCK;
               goto end;
       }

       offset = uio->uio_offset;
       if (xlwp->read_cons > xlwp->read_prod) {
               err = uiomove(
                   &xlwp->read_buffer[MASK_READ_IDX(xlwp->read_cons)],
                   0U - xlwp->read_cons, uio);
               if (err)
                       goto end;
               xlwp->read_cons += (uio->uio_offset - offset);
               offset = uio->uio_offset;
       }
       err = uiomove(&xlwp->read_buffer[MASK_READ_IDX(xlwp->read_cons)],
           xlwp->read_prod - xlwp->read_cons, uio);
       if (err == 0)
               xlwp->read_cons += (uio->uio_offset - offset);

end:
       mutex_exit(&xlwp->mtx);
       return err;
}

static void
queue_reply(struct xenbus_dev_lwp *xlwp,
                       char *data, unsigned int len)
{
       int i;
       KASSERT(mutex_owned(&xlwp->mtx));
       for (i = 0; i < len; i++, xlwp->read_prod++)
               xlwp->read_buffer[MASK_READ_IDX(xlwp->read_prod)] = data[i];

       KASSERT((xlwp->read_prod - xlwp->read_cons) <= sizeof(xlwp->read_buffer));
}

static int
xenbus_dev_write(void *v)
{
       struct vop_write_args /* {
               struct vnode *a_vp;
               struct uio *a_uio;
               int  a_ioflag;
               struct ucred *a_cred;
       } */ *ap = v;
       struct kernfs_node *kfs = VTOKERN(ap->a_vp);
       struct uio *uio = ap->a_uio;

       struct xenbus_dev_data *u;
       struct xenbus_dev_lwp *xlwp;
       struct xenbus_dev_transaction *trans;
       void *reply;
       int err;
       size_t size;

       mutex_enter(&xenbus_dev_open_mtx);
       u = kfs->kfs_v;
       if (u == NULL) {
               mutex_exit(&xenbus_dev_open_mtx);
               return EBADF;
       }
       mutex_enter(&u->mtx);
       mutex_exit(&xenbus_dev_open_mtx);
       SLIST_FOREACH(xlwp, &u->lwps, lwp_next) {
               if (xlwp->lwp == curlwp) {
                       break;
               }
       }
       if (xlwp == NULL) {
               mutex_exit(&u->mtx);
               return EBADF;
       }
       mutex_enter(&xlwp->mtx);
       mutex_exit(&u->mtx);

       if (uio->uio_offset < 0) {
               err = EINVAL;
               goto end;
       }
       size = uio->uio_resid;

       if ((size + xlwp->len) > sizeof(xlwp->u.buffer)) {
               err = EINVAL;
               goto end;
       }

       err = uiomove(xlwp->u.buffer + xlwp->len,
                     sizeof(xlwp->u.buffer) -  xlwp->len, uio);
       if (err)
               goto end;

       xlwp->len += size;
       if (xlwp->len < (sizeof(xlwp->u.msg) + xlwp->u.msg.len))
               goto end;

       switch (xlwp->u.msg.type) {
       case XS_TRANSACTION_START:
       case XS_TRANSACTION_END:
       case XS_DIRECTORY:
       case XS_READ:
       case XS_GET_PERMS:
       case XS_RELEASE:
       case XS_GET_DOMAIN_PATH:
       case XS_WRITE:
       case XS_MKDIR:
       case XS_RM:
       case XS_SET_PERMS:
               err = xenbus_dev_request_and_reply(&xlwp->u.msg, &reply);
               if (err == 0) {
                       if (xlwp->u.msg.type == XS_TRANSACTION_START) {
                               trans = kmem_alloc(sizeof(*trans), KM_SLEEP);
                               trans->handle = (struct xenbus_transaction *)
                                       strtoul(reply, NULL, 0);
                               SLIST_INSERT_HEAD(&xlwp->transactions,
                                   trans, trans_next);
                       } else if (xlwp->u.msg.type == XS_TRANSACTION_END) {
                               SLIST_FOREACH(trans, &xlwp->transactions,
                                                   trans_next) {
                                       if ((unsigned long)trans->handle ==
                                           (unsigned long)xlwp->u.msg.tx_id)
                                               break;
                               }
                               if (trans == NULL) {
                                       err = EINVAL;
                                       goto end;
                               }
                               SLIST_REMOVE(&xlwp->transactions, trans,
                                   xenbus_dev_transaction, trans_next);
                               kmem_free(trans, sizeof(*trans));
                       }
                       queue_reply(xlwp, (char *)&xlwp->u.msg,
                                               sizeof(xlwp->u.msg));
                       queue_reply(xlwp, (char *)reply, xlwp->u.msg.len);

                       xenbus_dev_reply_free(&xlwp->u.msg, reply);
               }
               break;

       default:
               err = EINVAL;
               break;
       }

       if (err == 0) {
               xlwp->len = 0;
       }
end:
       mutex_exit(&xlwp->mtx);
       return err;
}

static int
xenbus_dev_open(void *v)
{
       struct vop_open_args /* {
               struct vnode *a_vp;
               int a_mode;
               struct ucred *a_cred;
       } */ *ap = v;
       struct kernfs_node *kfs = VTOKERN(ap->a_vp);
       struct xenbus_dev_data *u;
       struct xenbus_dev_lwp *xlwp;

       if (xen_start_info.store_evtchn == 0)
               return ENOENT;

       mutex_enter(&xenbus_dev_open_mtx);
       u = kfs->kfs_v;
       if (u == NULL) {
               mutex_exit(&xenbus_dev_open_mtx);

               u = kmem_zalloc(sizeof(*u), KM_SLEEP);
               SLIST_INIT(&u->lwps);
               mutex_init(&u->mtx, MUTEX_DEFAULT, IPL_NONE);

               mutex_enter(&xenbus_dev_open_mtx);
               /*
                * Must re-check if filled while waiting in alloc
                * by some other lwp.
                */
               if (kfs->kfs_v) {
                       kmem_free(u, sizeof(*u));
                       u = kfs->kfs_v;
               } else {
                       kfs->kfs_v = u;
               }
       };
       mutex_exit(&xenbus_dev_open_mtx);

       mutex_enter(&u->mtx);
       SLIST_FOREACH(xlwp, &u->lwps, lwp_next) {
               if (xlwp->lwp == curlwp) {
                       break;
               }
       }
       if (xlwp == NULL) {
               mutex_exit(&u->mtx);

               xlwp = kmem_zalloc(sizeof(*xlwp), KM_SLEEP);
               xlwp->lwp = curlwp;
               SLIST_INIT(&xlwp->transactions);
               mutex_init(&xlwp->mtx, MUTEX_DEFAULT, IPL_NONE);

               mutex_enter(&u->mtx);
               /*
                * While alloc can block, this can't be re-entered with
                * curlwp, so no need to re-check. Also the node can't
                * be closed while we are blocked here.
                */
               SLIST_INSERT_HEAD(&u->lwps, xlwp, lwp_next);
       }
       mutex_exit(&u->mtx);

       return 0;
}

static int
xenbus_dev_close(void *v)
{
       struct vop_close_args /* {
               struct vnode *a_vp;
               int a_fflag;
               struct ucred *a_cred;
       } */ *ap = v;
       struct kernfs_node *kfs = VTOKERN(ap->a_vp);

       struct xenbus_dev_data *u;
       struct xenbus_dev_lwp *xlwp;
       struct xenbus_dev_transaction *trans;

       mutex_enter(&xenbus_dev_open_mtx);
       u = kfs->kfs_v;
       KASSERT(u != NULL);
       mutex_enter(&u->mtx);
       SLIST_FOREACH(xlwp, &u->lwps, lwp_next) {
               if (xlwp->lwp == curlwp) {
                       break;
               }
       }
       if (xlwp == NULL) {
               mutex_exit(&u->mtx);
               mutex_exit(&xenbus_dev_open_mtx);
               return EBADF;
       }
       mutex_enter(&xlwp->mtx);
       while (!SLIST_EMPTY(&xlwp->transactions)) {
               trans = SLIST_FIRST(&xlwp->transactions);
               xenbus_transaction_end(trans->handle, 1);
               SLIST_REMOVE_HEAD(&xlwp->transactions, trans_next);
               kmem_free(trans, sizeof(*trans));
       }
       mutex_exit(&xlwp->mtx);
       SLIST_REMOVE(&u->lwps, xlwp, xenbus_dev_lwp, lwp_next);
       mutex_destroy(&xlwp->mtx);

       if (!SLIST_EMPTY(&u->lwps)) {
               mutex_exit(&u->mtx);
               mutex_exit(&xenbus_dev_open_mtx);
               return 0;
       }
       mutex_exit(&u->mtx);
       mutex_destroy(&u->mtx);
       kfs->kfs_v = NULL;
       mutex_exit(&xenbus_dev_open_mtx);
       kmem_free(xlwp, sizeof(*xlwp));
       kmem_free(u, sizeof(*u));
       return 0;
}

#define LD_STRLEN 21 /* a 64bit integer needs 20 digits in base10 */

static int
xsd_port_read(void *v)
{
       struct vop_read_args /* {
               struct vnode *a_vp;
               struct uio *a_uio;
               int  a_ioflag;
               struct ucred *a_cred;
       } */ *ap = v;
       struct uio *uio = ap->a_uio;
       int off, error;
       size_t len;
       char strbuf[LD_STRLEN], *bf;

       off = (int)uio->uio_offset;
       if (off < 0)
               return EINVAL;

       len  = snprintf(strbuf, sizeof(strbuf), "%ld\n",
           (long)xen_start_info.store_evtchn);
       if (off >= len) {
               bf = strbuf;
               len = 0;
       } else {
               bf = &strbuf[off];
               len -= off;
       }
       error = uiomove(bf, len, uio);
       return error;
}

/*
* Local variables:
*  c-file-style: "linux"
*  indent-tabs-mode: t
*  c-indent-level: 8
*  c-basic-offset: 8
*  tab-width: 8
* End:
*/