/*      $NetBSD: usb_mem.c,v 1.84 2021/12/21 09:51:22 skrll Exp $       */

/*
* Copyright (c) 1998 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Lennart Augustsson ([email protected]) at
* Carlstedt Research & Technology.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

/*
* USB DMA memory allocation.
* We need to allocate a lot of small (many 8 byte, some larger)
* memory blocks that can be used for DMA.  Using the bus_dma
* routines directly would incur large overheads in space and time.
*/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: usb_mem.c,v 1.84 2021/12/21 09:51:22 skrll Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/device.h>         /* for usbdivar.h */
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/once.h>
#include <sys/queue.h>
#include <sys/systm.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>   /* just for usb_dma_t */
#include <dev/usb/usbhist.h>
#include <dev/usb/usb_mem.h>

#define DPRINTF(FMT,A,B,C,D)    USBHIST_LOG(usbdebug,FMT,A,B,C,D)
#define DPRINTFN(N,FMT,A,B,C,D) USBHIST_LOGN(usbdebug,N,FMT,A,B,C,D)

#define USB_MEM_SMALL roundup(64, CACHE_LINE_SIZE)
#define USB_MEM_CHUNKS 64
#define USB_MEM_BLOCK (USB_MEM_SMALL * USB_MEM_CHUNKS)

/* This struct is overlayed on free fragments. */
struct usb_frag_dma {
       usb_dma_block_t         *ufd_block;
       u_int                   ufd_offs;
       LIST_ENTRY(usb_frag_dma) ufd_next;
};

Static int      usb_block_allocmem(bus_dma_tag_t, size_t, size_t,
                   u_int, usb_dma_block_t **);
Static void     usb_block_freemem(usb_dma_block_t *);

LIST_HEAD(usb_dma_block_qh, usb_dma_block);
Static struct usb_dma_block_qh usb_blk_freelist =
       LIST_HEAD_INITIALIZER(usb_blk_freelist);
kmutex_t usb_blk_lock;

#ifdef DEBUG
Static struct usb_dma_block_qh usb_blk_fraglist =
       LIST_HEAD_INITIALIZER(usb_blk_fraglist);
Static struct usb_dma_block_qh usb_blk_fulllist =
       LIST_HEAD_INITIALIZER(usb_blk_fulllist);
#endif
Static u_int usb_blk_nfree = 0;
/* XXX should have different free list for different tags (for speed) */
Static LIST_HEAD(, usb_frag_dma) usb_frag_freelist =
       LIST_HEAD_INITIALIZER(usb_frag_freelist);

Static int usb_mem_init(void);

Static int
usb_mem_init(void)
{

       mutex_init(&usb_blk_lock, MUTEX_DEFAULT, IPL_NONE);
       return 0;
}

Static int
usb_block_allocmem(bus_dma_tag_t tag, size_t size, size_t align,
   u_int flags, usb_dma_block_t **dmap)
{
       usb_dma_block_t *b;
       int error;

       USBHIST_FUNC();
       USBHIST_CALLARGS(usbdebug, "size=%ju align=%ju flags=%#jx", size, align, flags, 0);

       ASSERT_SLEEPABLE();
       KASSERT(size != 0);
       KASSERT(mutex_owned(&usb_blk_lock));

#ifdef USB_FRAG_DMA_WORKAROUND
       flags |= USBMALLOC_ZERO;
#endif

       bool multiseg = (flags & USBMALLOC_MULTISEG) != 0;
       bool coherent = (flags & USBMALLOC_COHERENT) != 0;
       bool zero = (flags & USBMALLOC_ZERO) != 0;
       u_int dmaflags = coherent ? USB_DMA_COHERENT : 0;

       /* First check the free list. */
       LIST_FOREACH(b, &usb_blk_freelist, next) {
               /* Don't allocate multiple segments to unwilling callers */
               if (b->nsegs != 1 && !multiseg)
                       continue;
               if (b->tag == tag &&
                   b->size >= size &&
                   b->align >= align &&
                   (b->flags & USB_DMA_COHERENT) == dmaflags) {
                       LIST_REMOVE(b, next);
                       usb_blk_nfree--;
                       *dmap = b;
                       if (zero) {
                               memset(b->kaddr, 0, b->size);
                               bus_dmamap_sync(b->tag, b->map, 0, b->size,
                                   BUS_DMASYNC_PREWRITE);
                       }
                       DPRINTFN(6, "free list size=%ju", b->size, 0, 0, 0);
                       return 0;
               }
       }

       DPRINTFN(6, "no freelist entry", 0, 0, 0, 0);
       mutex_exit(&usb_blk_lock);

       b = kmem_zalloc(sizeof(*b), KM_SLEEP);
       b->tag = tag;
       b->size = size;
       b->align = align;
       b->flags = dmaflags;

       if (!multiseg)
               /* Caller wants one segment */
               b->nsegs = 1;
       else
               b->nsegs = howmany(size, PAGE_SIZE);

       b->segs = kmem_alloc(b->nsegs * sizeof(*b->segs), KM_SLEEP);
       b->nsegs_alloc = b->nsegs;

       error = bus_dmamem_alloc(tag, b->size, align, 0, b->segs, b->nsegs,
           &b->nsegs, BUS_DMA_WAITOK);
       if (error)
               goto free0;

       error = bus_dmamem_map(tag, b->segs, b->nsegs, b->size, &b->kaddr,
           BUS_DMA_WAITOK | (coherent ? BUS_DMA_COHERENT : 0));
       if (error)
               goto free1;

       error = bus_dmamap_create(tag, b->size, b->nsegs, b->size, 0,
           BUS_DMA_WAITOK, &b->map);
       if (error)
               goto unmap;

       error = bus_dmamap_load(tag, b->map, b->kaddr, b->size, NULL,
           BUS_DMA_WAITOK);
       if (error)
               goto destroy;

       *dmap = b;

       if (zero) {
               memset(b->kaddr, 0, b->size);
               bus_dmamap_sync(b->tag, b->map, 0, b->size,
                   BUS_DMASYNC_PREWRITE);
       }

       mutex_enter(&usb_blk_lock);

       return 0;

destroy:
       bus_dmamap_destroy(tag, b->map);
unmap:
       bus_dmamem_unmap(tag, b->kaddr, b->size);
free1:
       bus_dmamem_free(tag, b->segs, b->nsegs);
free0:
       kmem_free(b->segs, b->nsegs_alloc * sizeof(*b->segs));
       kmem_free(b, sizeof(*b));
       mutex_enter(&usb_blk_lock);

       return error;
}

#if 0
void
usb_block_real_freemem(usb_dma_block_t *b)
{
       ASSERT_SLEEPABLE();

       bus_dmamap_unload(b->tag, b->map);
       bus_dmamap_destroy(b->tag, b->map);
       bus_dmamem_unmap(b->tag, b->kaddr, b->size);
       bus_dmamem_free(b->tag, b->segs, b->nsegs);
       kmem_free(b->segs, b->nsegs_alloc * sizeof(*b->segs));
       kmem_free(b, sizeof(*b));
}
#endif

#ifdef DEBUG
static bool
usb_valid_block_p(usb_dma_block_t *b, struct usb_dma_block_qh *qh)
{
       usb_dma_block_t *xb;
       LIST_FOREACH(xb, qh, next) {
               if (xb == b)
                       return true;
       }
       return false;
}
#endif

/*
* Do not free the memory unconditionally since we might be called
* from an interrupt context and that is BAD.
* XXX when should we really free?
*/
Static void
usb_block_freemem(usb_dma_block_t *b)
{
       USBHIST_FUNC();
       USBHIST_CALLARGS(usbdebug, "size=%ju", b->size, 0, 0, 0);

       KASSERT(mutex_owned(&usb_blk_lock));

#ifdef DEBUG
       LIST_REMOVE(b, next);
#endif
       LIST_INSERT_HEAD(&usb_blk_freelist, b, next);
       usb_blk_nfree++;
}

int
usb_allocmem(bus_dma_tag_t tag, size_t size, size_t align, u_int flags,
   usb_dma_t *p)
{
       usbd_status err;
       struct usb_frag_dma *f;
       usb_dma_block_t *b;
       int i;
       static ONCE_DECL(init_control);

       USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

       ASSERT_SLEEPABLE();

       RUN_ONCE(&init_control, usb_mem_init);

       u_int dmaflags = (flags & USBMALLOC_COHERENT) ? USB_DMA_COHERENT : 0;

       /* If the request is large then just use a full block. */
       if (size > USB_MEM_SMALL || align > USB_MEM_SMALL) {
               DPRINTFN(1, "large alloc %jd", size, 0, 0, 0);
               size = (size + USB_MEM_BLOCK - 1) & ~(USB_MEM_BLOCK - 1);
               mutex_enter(&usb_blk_lock);
               err = usb_block_allocmem(tag, size, align, flags,
                   &p->udma_block);
               if (!err) {
#ifdef DEBUG
                       LIST_INSERT_HEAD(&usb_blk_fulllist, p->udma_block, next);
#endif
                       p->udma_block->flags = USB_DMA_FULLBLOCK | dmaflags;
                       p->udma_offs = 0;
               }
               mutex_exit(&usb_blk_lock);
               return err;
       }

       mutex_enter(&usb_blk_lock);
       /* Check for free fragments. */
       LIST_FOREACH(f, &usb_frag_freelist, ufd_next) {
               KDASSERTMSG(usb_valid_block_p(f->ufd_block, &usb_blk_fraglist),
                   "%s: usb frag %p: unknown block pointer %p",
                   __func__, f, f->ufd_block);
               if (f->ufd_block->tag == tag &&
                   (f->ufd_block->flags & USB_DMA_COHERENT) == dmaflags)
                       break;
       }
       if (f == NULL) {
               DPRINTFN(1, "adding fragments", 0, 0, 0, 0);

               err = usb_block_allocmem(tag, USB_MEM_BLOCK, USB_MEM_SMALL,
                   flags, &b);
               if (err) {
                       mutex_exit(&usb_blk_lock);
                       return err;
               }
#ifdef DEBUG
               LIST_INSERT_HEAD(&usb_blk_fraglist, b, next);
#endif
               b->flags = 0;
               for (i = 0; i < USB_MEM_BLOCK; i += USB_MEM_SMALL) {
                       f = (struct usb_frag_dma *)((char *)b->kaddr + i);
                       f->ufd_block = b;
                       f->ufd_offs = i;
                       LIST_INSERT_HEAD(&usb_frag_freelist, f, ufd_next);
#ifdef USB_FRAG_DMA_WORKAROUND
                       i += 1 * USB_MEM_SMALL;
#endif
               }
               f = LIST_FIRST(&usb_frag_freelist);
       }
       p->udma_block = f->ufd_block;
       p->udma_offs = f->ufd_offs;
#ifdef USB_FRAG_DMA_WORKAROUND
       p->udma_offs += USB_MEM_SMALL;
#endif
       LIST_REMOVE(f, ufd_next);
       mutex_exit(&usb_blk_lock);
       DPRINTFN(5, "use frag=%#jx size=%jd", (uintptr_t)f, size, 0, 0);

       return 0;
}

void
usb_freemem(usb_dma_t *p)
{
       struct usb_frag_dma *f;

       USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

       mutex_enter(&usb_blk_lock);
       if (p->udma_block->flags & USB_DMA_FULLBLOCK) {
               KDASSERTMSG(usb_valid_block_p(p->udma_block, &usb_blk_fulllist),
                   "%s: dma %p: invalid block pointer %p",
                   __func__, p, p->udma_block);
               DPRINTFN(1, "large free", 0, 0, 0, 0);
               usb_block_freemem(p->udma_block);
               mutex_exit(&usb_blk_lock);
               return;
       }
       KDASSERTMSG(usb_valid_block_p(p->udma_block, &usb_blk_fraglist),
           "%s: dma %p: invalid block pointer %p",
           __func__, p, p->udma_block);
       //usb_syncmem(p, 0, USB_MEM_SMALL, BUS_DMASYNC_POSTREAD);
       f = KERNADDR(p, 0);
#ifdef USB_FRAG_DMA_WORKAROUND
       f = (void *)((uintptr_t)f - USB_MEM_SMALL);
#endif
       f->ufd_block = p->udma_block;
       f->ufd_offs = p->udma_offs;
#ifdef USB_FRAG_DMA_WORKAROUND
       f->ufd_offs -= USB_MEM_SMALL;
#endif
       LIST_INSERT_HEAD(&usb_frag_freelist, f, ufd_next);
       mutex_exit(&usb_blk_lock);
       DPRINTFN(5, "frag=%#jx", (uintptr_t)f, 0, 0, 0);
}

bus_addr_t
usb_dmaaddr(usb_dma_t *dma, unsigned int offset)
{
       unsigned int i;
       bus_size_t seg_offs;

       offset += dma->udma_offs;

       KASSERTMSG(offset < dma->udma_block->size, "offset %d vs %zu", offset,
           dma->udma_block->size);

       if (dma->udma_block->nsegs == 1) {
               KASSERT(dma->udma_block->map->dm_segs[0].ds_len > offset);
               return dma->udma_block->map->dm_segs[0].ds_addr + offset;
       }

       /*
        * Search for a bus_segment_t corresponding to this offset. With no
        * record of the offset in the map to a particular dma_segment_t, we
        * have to iterate from the start of the list each time. Could be
        * improved
        */
       seg_offs = 0;
       for (i = 0; i < dma->udma_block->nsegs; i++) {
               if (seg_offs + dma->udma_block->map->dm_segs[i].ds_len > offset)
                       break;

               seg_offs += dma->udma_block->map->dm_segs[i].ds_len;
       }

       KASSERT(i != dma->udma_block->nsegs);
       offset -= seg_offs;
       return dma->udma_block->map->dm_segs[i].ds_addr + offset;
}

void
usb_syncmem(usb_dma_t *p, bus_addr_t offset, bus_size_t len, int ops)
{

       bus_dmamap_sync(p->udma_block->tag, p->udma_block->map,
           p->udma_offs + offset, len, ops);
}