/*      $NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $       */

/*-
* Copyright (c)2011,2012,2013 YAMAMOTO Takashi,
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/

/*
* radixtree.c
*
* Overview:
*
* This is an implementation of radix tree, whose keys are uint64_t and leafs
* are user provided pointers.
*
* Leaf nodes are just void * and this implementation doesn't care about
* what they actually point to.  However, this implementation has an assumption
* about their alignment.  Specifically, this implementation assumes that their
* 2 LSBs are always zero and uses them for internal accounting.
*
* Intermediate nodes and memory allocation:
*
* Intermediate nodes are automatically allocated and freed internally and
* basically users don't need to care about them.  The allocation is done via
* kmem_zalloc(9) for _KERNEL, malloc(3) for userland, and alloc() for
* _STANDALONE environment.  Only radix_tree_insert_node function can allocate
* memory for intermediate nodes and thus can fail for ENOMEM.
*
* Memory Efficiency:
*
* It's designed to work efficiently with dense index distribution.
* The memory consumption (number of necessary intermediate nodes) heavily
* depends on the index distribution.  Basically, more dense index distribution
* consumes less nodes per item.  Approximately,
*
*  - the best case: about RADIX_TREE_PTR_PER_NODE items per intermediate node.
*    it would look like the following.
*
*     root (t_height=1)
*      |
*      v
*      [ | | | ]   (intermediate node.  RADIX_TREE_PTR_PER_NODE=4 in this fig)
*       | | | |
*       v v v v
*       p p p p    (items)
*
*  - the worst case: RADIX_TREE_MAX_HEIGHT intermediate nodes per item.
*    it would look like the following if RADIX_TREE_MAX_HEIGHT=3.
*
*     root (t_height=3)
*      |
*      v
*      [ | | | ]
*           |
*           v
*           [ | | | ]
*                |
*                v
*                [ | | | ]
*                   |
*                   v
*                   p
*
* The height of tree (t_height) is dynamic.  It's smaller if only small
* index values are used.  As an extreme case, if only index 0 is used,
* the corresponding value is directly stored in the root of the tree
* (struct radix_tree) without allocating any intermediate nodes.  In that
* case, t_height=0.
*
* Gang lookup:
*
* This implementation provides a way to scan many nodes quickly via
* radix_tree_gang_lookup_node function and its varients.
*
* Tags:
*
* This implementation provides tagging functionality, which allows quick
* scanning of a subset of leaf nodes.  Leaf nodes are untagged when inserted
* into the tree and can be tagged by radix_tree_set_tag function.
* radix_tree_gang_lookup_tagged_node function and its variants returns only
* leaf nodes with the given tag.  To reduce amount of nodes to visit for
* these functions, this implementation keeps tagging information in internal
* intermediate nodes and quickly skips uninterested parts of a tree.
*
* A tree has RADIX_TREE_TAG_ID_MAX independent tag spaces, each of which are
* identified by a zero-origin numbers, tagid.  For the current implementation,
* RADIX_TREE_TAG_ID_MAX is 2.  A set of tags is described as a bitmask tagmask,
* which is a bitwise OR of (1 << tagid).
*/

#include <sys/cdefs.h>

#if defined(_KERNEL) || defined(_STANDALONE)
__KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $");
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/radixtree.h>
#include <lib/libkern/libkern.h>
#if defined(_STANDALONE)
#include <lib/libsa/stand.h>
#endif /* defined(_STANDALONE) */
#else /* defined(_KERNEL) || defined(_STANDALONE) */
__RCSID("$NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $");
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#if 1
#define KASSERT assert
#else
#define KASSERT(a)      /* nothing */
#endif
#endif /* defined(_KERNEL) || defined(_STANDALONE) */

#include <sys/radixtree.h>

#define RADIX_TREE_BITS_PER_HEIGHT      4       /* XXX tune */
#define RADIX_TREE_PTR_PER_NODE         (1 << RADIX_TREE_BITS_PER_HEIGHT)
#define RADIX_TREE_MAX_HEIGHT           (64 / RADIX_TREE_BITS_PER_HEIGHT)
#define RADIX_TREE_INVALID_HEIGHT       (RADIX_TREE_MAX_HEIGHT + 1)
__CTASSERT((64 % RADIX_TREE_BITS_PER_HEIGHT) == 0);

__CTASSERT(((1 << RADIX_TREE_TAG_ID_MAX) & (sizeof(int) - 1)) == 0);
#define RADIX_TREE_TAG_MASK     ((1 << RADIX_TREE_TAG_ID_MAX) - 1)

static inline void *
entry_ptr(void *p)
{

       return (void *)((uintptr_t)p & ~RADIX_TREE_TAG_MASK);
}

static inline unsigned int
entry_tagmask(void *p)
{

       return (uintptr_t)p & RADIX_TREE_TAG_MASK;
}

static inline void *
entry_compose(void *p, unsigned int tagmask)
{

       return (void *)((uintptr_t)p | tagmask);
}

static inline bool
entry_match_p(void *p, unsigned int tagmask)
{

       KASSERT(entry_ptr(p) != NULL || entry_tagmask(p) == 0);
       if (p == NULL) {
               return false;
       }
       if (tagmask == 0) {
               return true;
       }
       return (entry_tagmask(p) & tagmask) != 0;
}

/*
* radix_tree_node: an intermediate node
*
* we don't care the type of leaf nodes.  they are just void *.
*
* we used to maintain a count of non-NULL nodes in this structure, but it
* prevented it from being aligned to a cache line boundary; the performance
* benefit from being cache friendly is greater than the benefit of having
* a dedicated count value, especially in multi-processor situations where
* we need to avoid intra-pool-page false sharing.
*/

struct radix_tree_node {
       void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
};

/*
* p_refs[0].pptr == &t->t_root
*      :
* p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
*      :
*      :
* p_refs[t->t_height].pptr == &leaf_pointer
*/

struct radix_tree_path {
       struct radix_tree_node_ref {
               void **pptr;
       } p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
       /*
        * p_lastidx is either the index of the last valid element of p_refs[]
        * or RADIX_TREE_INVALID_HEIGHT.
        * RADIX_TREE_INVALID_HEIGHT means that radix_tree_lookup_ptr found
        * that the height of the tree is not enough to cover the given index.
        */
       unsigned int p_lastidx;
};

static inline void **
path_pptr(const struct radix_tree *t, const struct radix_tree_path *p,
   unsigned int height)
{

       KASSERT(height <= t->t_height);
       return p->p_refs[height].pptr;
}

static inline struct radix_tree_node *
path_node(const struct radix_tree * t, const struct radix_tree_path *p,
   unsigned int height)
{

       KASSERT(height <= t->t_height);
       return entry_ptr(*path_pptr(t, p, height));
}

/*
* radix_tree_init_tree:
*
* Initialize a tree.
*/

void
radix_tree_init_tree(struct radix_tree *t)
{

       t->t_height = 0;
       t->t_root = NULL;
}

/*
* radix_tree_fini_tree:
*
* Finish using a tree.
*/

void
radix_tree_fini_tree(struct radix_tree *t)
{

       KASSERT(t->t_root == NULL);
       KASSERT(t->t_height == 0);
}

/*
* radix_tree_empty_tree_p:
*
* Return if the tree is empty.
*/

bool
radix_tree_empty_tree_p(struct radix_tree *t)
{

       return t->t_root == NULL;
}

/*
* radix_tree_empty_tree_p:
*
* Return true if the tree has any nodes with the given tag.  Otherwise
* return false.
*
* It's illegal to call this function with tagmask 0.
*/

bool
radix_tree_empty_tagged_tree_p(struct radix_tree *t, unsigned int tagmask)
{

       KASSERT(tagmask != 0);
       return (entry_tagmask(t->t_root) & tagmask) == 0;
}

static void
radix_tree_node_init(struct radix_tree_node *n)
{

       memset(n, 0, sizeof(*n));
}

#if defined(_KERNEL)
/*
* radix_tree_init:
*
* initialize the subsystem.
*/

void
radix_tree_init(void)
{

       /* nothing right now */
}

/*
* radix_tree_await_memory:
*
* after an insert has failed with ENOMEM, wait for memory to become
* available, so the caller can retry.  this needs to ensure that the
* maximum possible required number of nodes is available.
*/

void
radix_tree_await_memory(void)
{
       struct radix_tree_node *nodes[RADIX_TREE_MAX_HEIGHT];
       int i;

       for (i = 0; i < __arraycount(nodes); i++) {
               nodes[i] = kmem_intr_alloc(sizeof(struct radix_tree_node),
                   KM_SLEEP);
       }
       while (--i >= 0) {
               kmem_intr_free(nodes[i], sizeof(struct radix_tree_node));
       }
}

#endif /* defined(_KERNEL) */

/*
* radix_tree_sum_node:
*
* return the logical sum of all entries in the given node.  used to quickly
* check for tag masks or empty nodes.
*/

static uintptr_t
radix_tree_sum_node(const struct radix_tree_node *n)
{
#if RADIX_TREE_PTR_PER_NODE > 16
       unsigned int i;
       uintptr_t sum;

       for (i = 0, sum = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
               sum |= (uintptr_t)n->n_ptrs[i];
       }
       return sum;
#else /* RADIX_TREE_PTR_PER_NODE > 16 */
       uintptr_t sum;

       /*
        * Unrolling the above is much better than a tight loop with two
        * test+branch pairs.  On x86 with gcc 5.5.0 this compiles into 19
        * deterministic instructions including the "return" and prologue &
        * epilogue.
        */
       sum = (uintptr_t)n->n_ptrs[0];
       sum |= (uintptr_t)n->n_ptrs[1];
       sum |= (uintptr_t)n->n_ptrs[2];
       sum |= (uintptr_t)n->n_ptrs[3];
#if RADIX_TREE_PTR_PER_NODE > 4
       sum |= (uintptr_t)n->n_ptrs[4];
       sum |= (uintptr_t)n->n_ptrs[5];
       sum |= (uintptr_t)n->n_ptrs[6];
       sum |= (uintptr_t)n->n_ptrs[7];
#endif
#if RADIX_TREE_PTR_PER_NODE > 8
       sum |= (uintptr_t)n->n_ptrs[8];
       sum |= (uintptr_t)n->n_ptrs[9];
       sum |= (uintptr_t)n->n_ptrs[10];
       sum |= (uintptr_t)n->n_ptrs[11];
       sum |= (uintptr_t)n->n_ptrs[12];
       sum |= (uintptr_t)n->n_ptrs[13];
       sum |= (uintptr_t)n->n_ptrs[14];
       sum |= (uintptr_t)n->n_ptrs[15];
#endif
       return sum;
#endif /* RADIX_TREE_PTR_PER_NODE > 16 */
}

static int __unused
radix_tree_node_count_ptrs(const struct radix_tree_node *n)
{
       unsigned int i, c;

       for (i = c = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
               c += (n->n_ptrs[i] != NULL);
       }
       return c;
}

static struct radix_tree_node *
radix_tree_alloc_node(void)
{
       struct radix_tree_node *n;

#if defined(_KERNEL)
       /*
        * We must not block waiting for memory because this function
        * can be called in contexts where waiting for memory is illegal.
        */
       n = kmem_intr_alloc(sizeof(struct radix_tree_node), KM_NOSLEEP);
#elif defined(_STANDALONE)
       n = alloc(sizeof(*n));
#else /* defined(_STANDALONE) */
       n = malloc(sizeof(*n));
#endif /* defined(_STANDALONE) */
       if (n != NULL) {
               radix_tree_node_init(n);
       }
       KASSERT(n == NULL || radix_tree_sum_node(n) == 0);
       return n;
}

static void
radix_tree_free_node(struct radix_tree_node *n)
{

       KASSERT(radix_tree_sum_node(n) == 0);
#if defined(_KERNEL)
       kmem_intr_free(n, sizeof(struct radix_tree_node));
#elif defined(_STANDALONE)
       dealloc(n, sizeof(*n));
#else
       free(n);
#endif
}

/*
* radix_tree_grow:
*
* increase the height of the tree.
*/

static __noinline int
radix_tree_grow(struct radix_tree *t, unsigned int newheight)
{
       const unsigned int tagmask = entry_tagmask(t->t_root);
       struct radix_tree_node *newnodes[RADIX_TREE_MAX_HEIGHT];
       void *root;
       int h;

       KASSERT(newheight <= RADIX_TREE_MAX_HEIGHT);
       if ((root = t->t_root) == NULL) {
               t->t_height = newheight;
               return 0;
       }
       for (h = t->t_height; h < newheight; h++) {
               newnodes[h] = radix_tree_alloc_node();
               if (__predict_false(newnodes[h] == NULL)) {
                       while (--h >= (int)t->t_height) {
                               newnodes[h]->n_ptrs[0] = NULL;
                               radix_tree_free_node(newnodes[h]);
                       }
                       return ENOMEM;
               }
               newnodes[h]->n_ptrs[0] = root;
               root = entry_compose(newnodes[h], tagmask);
       }
       t->t_root = root;
       t->t_height = h;
       return 0;
}

/*
* radix_tree_lookup_ptr:
*
* an internal helper function used for various exported functions.
*
* return the pointer to store the node for the given index.
*
* if alloc is true, try to allocate the storage.  (note for _KERNEL:
* in that case, this function can block.)  if the allocation failed or
* alloc is false, return NULL.
*
* if path is not NULL, fill it for the caller's investigation.
*
* if tagmask is not zero, search only for nodes with the tag set.
* note that, however, this function doesn't check the tagmask for the leaf
* pointer.  it's a caller's responsibility to investigate the value which
* is pointed by the returned pointer if necessary.
*
* while this function is a bit large, as it's called with some constant
* arguments, inlining might have benefits.  anyway, a compiler will decide.
*/

static inline void **
radix_tree_lookup_ptr(struct radix_tree *t, uint64_t idx,
   struct radix_tree_path *path, bool alloc, const unsigned int tagmask)
{
       struct radix_tree_node *n;
       int hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
       int shift;
       void **vpp;
       const uint64_t mask = (UINT64_C(1) << RADIX_TREE_BITS_PER_HEIGHT) - 1;
       struct radix_tree_node_ref *refs = NULL;

       /*
        * check unsupported combinations
        */
       KASSERT(tagmask == 0 || !alloc);
       KASSERT(path == NULL || !alloc);
       vpp = &t->t_root;
       if (path != NULL) {
               refs = path->p_refs;
               refs->pptr = vpp;
       }
       n = NULL;
       for (shift = 64 - RADIX_TREE_BITS_PER_HEIGHT; shift >= 0;) {
               struct radix_tree_node *c;
               void *entry;
               const uint64_t i = (idx >> shift) & mask;

               if (shift >= hshift) {
                       unsigned int newheight;

                       KASSERT(vpp == &t->t_root);
                       if (i == 0) {
                               shift -= RADIX_TREE_BITS_PER_HEIGHT;
                               continue;
                       }
                       if (!alloc) {
                               if (path != NULL) {
                                       KASSERT((refs - path->p_refs) == 0);
                                       path->p_lastidx =
                                           RADIX_TREE_INVALID_HEIGHT;
                               }
                               return NULL;
                       }
                       newheight = shift / RADIX_TREE_BITS_PER_HEIGHT + 1;
                       if (radix_tree_grow(t, newheight)) {
                               return NULL;
                       }
                       hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
               }
               entry = *vpp;
               c = entry_ptr(entry);
               if (c == NULL ||
                   (tagmask != 0 &&
                   (entry_tagmask(entry) & tagmask) == 0)) {
                       if (!alloc) {
                               if (path != NULL) {
                                       path->p_lastidx = refs - path->p_refs;
                               }
                               return NULL;
                       }
                       c = radix_tree_alloc_node();
                       if (c == NULL) {
                               return NULL;
                       }
                       *vpp = c;
               }
               n = c;
               vpp = &n->n_ptrs[i];
               if (path != NULL) {
                       refs++;
                       refs->pptr = vpp;
               }
               shift -= RADIX_TREE_BITS_PER_HEIGHT;
       }
       if (alloc) {
               KASSERT(*vpp == NULL);
       }
       if (path != NULL) {
               path->p_lastidx = refs - path->p_refs;
       }
       return vpp;
}

/*
* radix_tree_undo_insert_node:
*
* Undo the effects of a failed insert.  The conditions that led to the
* insert may change and it may not be retried.  If the insert is not
* retried, there will be no corresponding radix_tree_remove_node() for
* this index in the future.  Therefore any adjustments made to the tree
* before memory was exhausted must be reverted.
*/

static __noinline void
radix_tree_undo_insert_node(struct radix_tree *t, uint64_t idx)
{
       struct radix_tree_path path;
       int i;

       (void)radix_tree_lookup_ptr(t, idx, &path, false, 0);
       if (path.p_lastidx == RADIX_TREE_INVALID_HEIGHT) {
               /*
                * no nodes were inserted.
                */
               return;
       }
       for (i = path.p_lastidx - 1; i >= 0; i--) {
               struct radix_tree_node ** const pptr =
                   (struct radix_tree_node **)path_pptr(t, &path, i);
               struct radix_tree_node *n;

               KASSERT(pptr != NULL);
               n = entry_ptr(*pptr);
               KASSERT(n != NULL);
               if (radix_tree_sum_node(n) != 0) {
                       break;
               }
               radix_tree_free_node(n);
               *pptr = NULL;
       }
       /*
        * fix up height
        */
       if (i < 0) {
               KASSERT(t->t_root == NULL);
               t->t_height = 0;
       }
}

/*
* radix_tree_insert_node:
*
* Insert the node at the given index.
*
* It's illegal to insert NULL.  It's illegal to insert a non-aligned pointer.
*
* This function returns ENOMEM if necessary memory allocation failed.
* Otherwise, this function returns 0.
*
* Note that inserting a node can involves memory allocation for intermediate
* nodes.  If _KERNEL, it's done with no-sleep IPL_NONE memory allocation.
*
* For the newly inserted node, all tags are cleared.
*/

int
radix_tree_insert_node(struct radix_tree *t, uint64_t idx, void *p)
{
       void **vpp;

       KASSERT(p != NULL);
       KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
       vpp = radix_tree_lookup_ptr(t, idx, NULL, true, 0);
       if (__predict_false(vpp == NULL)) {
               radix_tree_undo_insert_node(t, idx);
               return ENOMEM;
       }
       KASSERT(*vpp == NULL);
       *vpp = p;
       return 0;
}

/*
* radix_tree_replace_node:
*
* Replace a node at the given index with the given node and return the
* replaced one.
*
* It's illegal to try to replace a node which has not been inserted.
*
* This function keeps tags intact.
*/

void *
radix_tree_replace_node(struct radix_tree *t, uint64_t idx, void *p)
{
       void **vpp;
       void *oldp;

       KASSERT(p != NULL);
       KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
       vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
       KASSERT(vpp != NULL);
       oldp = *vpp;
       KASSERT(oldp != NULL);
       *vpp = entry_compose(p, entry_tagmask(*vpp));
       return entry_ptr(oldp);
}

/*
* radix_tree_remove_node:
*
* Remove the node at the given index.
*
* It's illegal to try to remove a node which has not been inserted.
*/

void *
radix_tree_remove_node(struct radix_tree *t, uint64_t idx)
{
       struct radix_tree_path path;
       void **vpp;
       void *oldp;
       int i;

       vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
       KASSERT(vpp != NULL);
       oldp = *vpp;
       KASSERT(oldp != NULL);
       KASSERT(path.p_lastidx == t->t_height);
       KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
       *vpp = NULL;
       for (i = t->t_height - 1; i >= 0; i--) {
               void *entry;
               struct radix_tree_node ** const pptr =
                   (struct radix_tree_node **)path_pptr(t, &path, i);
               struct radix_tree_node *n;

               KASSERT(pptr != NULL);
               entry = *pptr;
               n = entry_ptr(entry);
               KASSERT(n != NULL);
               if (radix_tree_sum_node(n) != 0) {
                       break;
               }
               radix_tree_free_node(n);
               *pptr = NULL;
       }
       /*
        * fix up height
        */
       if (i < 0) {
               KASSERT(t->t_root == NULL);
               t->t_height = 0;
       }
       /*
        * update tags
        */
       for (; i >= 0; i--) {
               void *entry;
               struct radix_tree_node ** const pptr =
                   (struct radix_tree_node **)path_pptr(t, &path, i);
               struct radix_tree_node *n;
               unsigned int newmask;

               KASSERT(pptr != NULL);
               entry = *pptr;
               n = entry_ptr(entry);
               KASSERT(n != NULL);
               KASSERT(radix_tree_sum_node(n) != 0);
               newmask = radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK;
               if (newmask == entry_tagmask(entry)) {
                       break;
               }
               *pptr = entry_compose(n, newmask);
       }
       /*
        * XXX is it worth to try to reduce height?
        * if we do that, make radix_tree_grow rollback its change as well.
        */
       return entry_ptr(oldp);
}

/*
* radix_tree_lookup_node:
*
* Returns the node at the given index.
* Returns NULL if nothing is found at the given index.
*/

void *
radix_tree_lookup_node(struct radix_tree *t, uint64_t idx)
{
       void **vpp;

       vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
       if (vpp == NULL) {
               return NULL;
       }
       return entry_ptr(*vpp);
}

static inline void
gang_lookup_init(struct radix_tree *t, uint64_t idx,
   struct radix_tree_path *path, const unsigned int tagmask)
{
       void **vpp __unused;

       vpp = radix_tree_lookup_ptr(t, idx, path, false, tagmask);
       KASSERT(vpp == NULL ||
           vpp == path_pptr(t, path, path->p_lastidx));
       KASSERT(&t->t_root == path_pptr(t, path, 0));
       KASSERT(path->p_lastidx == RADIX_TREE_INVALID_HEIGHT ||
          path->p_lastidx == t->t_height ||
          !entry_match_p(*path_pptr(t, path, path->p_lastidx), tagmask));
}

/*
* gang_lookup_scan:
*
* a helper routine for radix_tree_gang_lookup_node and its variants.
*/

static inline unsigned int
__attribute__((__always_inline__))
gang_lookup_scan(struct radix_tree *t, struct radix_tree_path *path,
   void **results, const unsigned int maxresults, const unsigned int tagmask,
   const bool reverse, const bool dense)
{

       /*
        * we keep the path updated only for lastidx-1.
        * vpp is what path_pptr(t, path, lastidx) would be.
        */
       void **vpp;
       unsigned int nfound;
       unsigned int lastidx;
       /*
        * set up scan direction dependant constants so that we can iterate
        * n_ptrs as the following.
        *
        *      for (i = first; i != guard; i += step)
        *              visit n->n_ptrs[i];
        */
       const int step = reverse ? -1 : 1;
       const unsigned int first = reverse ? RADIX_TREE_PTR_PER_NODE - 1 : 0;
       const unsigned int last = reverse ? 0 : RADIX_TREE_PTR_PER_NODE - 1;
       const unsigned int guard = last + step;

       KASSERT(maxresults > 0);
       KASSERT(&t->t_root == path_pptr(t, path, 0));
       lastidx = path->p_lastidx;
       KASSERT(lastidx == RADIX_TREE_INVALID_HEIGHT ||
          lastidx == t->t_height ||
          !entry_match_p(*path_pptr(t, path, lastidx), tagmask));
       nfound = 0;
       if (lastidx == RADIX_TREE_INVALID_HEIGHT) {
               /*
                * requested idx is beyond the right-most node.
                */
               if (reverse && !dense) {
                       lastidx = 0;
                       vpp = path_pptr(t, path, lastidx);
                       goto descend;
               }
               return 0;
       }
       vpp = path_pptr(t, path, lastidx);
       while (/*CONSTCOND*/true) {
               struct radix_tree_node *n;
               unsigned int i;

               if (entry_match_p(*vpp, tagmask)) {
                       KASSERT(lastidx == t->t_height);
                       /*
                        * record the matching non-NULL leaf.
                        */
                       results[nfound] = entry_ptr(*vpp);
                       nfound++;
                       if (nfound == maxresults) {
                               return nfound;
                       }
               } else if (dense) {
                       return nfound;
               }
scan_siblings:
               /*
                * try to find the next matching non-NULL sibling.
                */
               if (lastidx == 0) {
                       /*
                        * the root has no siblings.
                        * we've done.
                        */
                       KASSERT(vpp == &t->t_root);
                       break;
               }
               n = path_node(t, path, lastidx - 1);
               for (i = vpp - n->n_ptrs + step; i != guard; i += step) {
                       KASSERT(i < RADIX_TREE_PTR_PER_NODE);
                       if (entry_match_p(n->n_ptrs[i], tagmask)) {
                               vpp = &n->n_ptrs[i];
                               break;
                       } else if (dense) {
                               return nfound;
                       }
               }
               if (i == guard) {
                       /*
                        * not found.  go to parent.
                        */
                       lastidx--;
                       vpp = path_pptr(t, path, lastidx);
                       goto scan_siblings;
               }
descend:
               /*
                * following the left-most (or right-most in the case of
                * reverse scan) child node, descend until reaching the leaf or
                * a non-matching entry.
                */
               while (entry_match_p(*vpp, tagmask) && lastidx < t->t_height) {
                       /*
                        * save vpp in the path so that we can come back to this
                        * node after finishing visiting children.
                        */
                       path->p_refs[lastidx].pptr = vpp;
                       n = entry_ptr(*vpp);
                       vpp = &n->n_ptrs[first];
                       lastidx++;
               }
       }
       return nfound;
}

/*
* radix_tree_gang_lookup_node:
*
* Scan the tree starting from the given index in the ascending order and
* return found nodes.
*
* results should be an array large enough to hold maxresults pointers.
* This function returns the number of nodes found, up to maxresults.
* Returning less than maxresults means there are no more nodes in the tree.
*
* If dense == true, this function stops scanning when it founds a hole of
* indexes.  I.e. an index for which radix_tree_lookup_node would returns NULL.
* If dense == false, this function skips holes and continue scanning until
* maxresults nodes are found or it reaches the limit of the index range.
*
* The result of this function is semantically equivalent to what could be
* obtained by repeated calls of radix_tree_lookup_node with increasing index.
* but this function is expected to be computationally cheaper when looking up
* multiple nodes at once.  Especially, it's expected to be much cheaper when
* node indexes are distributed sparsely.
*
* Note that this function doesn't return index values of found nodes.
* Thus, in the case of dense == false, if index values are important for
* a caller, it's the caller's responsibility to check them, typically
* by examining the returned nodes using some caller-specific knowledge
* about them.
* In the case of dense == true, a node returned via results[N] is always for
* the index (idx + N).
*/

unsigned int
radix_tree_gang_lookup_node(struct radix_tree *t, uint64_t idx,
   void **results, unsigned int maxresults, bool dense)
{
       struct radix_tree_path path;

       gang_lookup_init(t, idx, &path, 0);
       return gang_lookup_scan(t, &path, results, maxresults, 0, false, dense);
}

/*
* radix_tree_gang_lookup_node_reverse:
*
* Same as radix_tree_gang_lookup_node except that this one scans the
* tree in the reverse order.  I.e. descending index values.
*/

unsigned int
radix_tree_gang_lookup_node_reverse(struct radix_tree *t, uint64_t idx,
   void **results, unsigned int maxresults, bool dense)
{
       struct radix_tree_path path;

       gang_lookup_init(t, idx, &path, 0);
       return gang_lookup_scan(t, &path, results, maxresults, 0, true, dense);
}

/*
* radix_tree_gang_lookup_tagged_node:
*
* Same as radix_tree_gang_lookup_node except that this one only returns
* nodes tagged with tagid.
*
* It's illegal to call this function with tagmask 0.
*/

unsigned int
radix_tree_gang_lookup_tagged_node(struct radix_tree *t, uint64_t idx,
   void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
{
       struct radix_tree_path path;

       KASSERT(tagmask != 0);
       gang_lookup_init(t, idx, &path, tagmask);
       return gang_lookup_scan(t, &path, results, maxresults, tagmask, false,
           dense);
}

/*
* radix_tree_gang_lookup_tagged_node_reverse:
*
* Same as radix_tree_gang_lookup_tagged_node except that this one scans the
* tree in the reverse order.  I.e. descending index values.
*/

unsigned int
radix_tree_gang_lookup_tagged_node_reverse(struct radix_tree *t, uint64_t idx,
   void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
{
       struct radix_tree_path path;

       KASSERT(tagmask != 0);
       gang_lookup_init(t, idx, &path, tagmask);
       return gang_lookup_scan(t, &path, results, maxresults, tagmask, true,
           dense);
}

/*
* radix_tree_get_tag:
*
* Return the tagmask for the node at the given index.
*
* It's illegal to call this function for a node which has not been inserted.
*/

unsigned int
radix_tree_get_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
{
       /*
        * the following two implementations should behave same.
        * the former one was chosen because it seems faster.
        */
#if 1
       void **vpp;

       vpp = radix_tree_lookup_ptr(t, idx, NULL, false, tagmask);
       if (vpp == NULL) {
               return false;
       }
       KASSERT(*vpp != NULL);
       return (entry_tagmask(*vpp) & tagmask);
#else
       void **vpp;

       vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
       KASSERT(vpp != NULL);
       return (entry_tagmask(*vpp) & tagmask);
#endif
}

/*
* radix_tree_set_tag:
*
* Set the tag for the node at the given index.
*
* It's illegal to call this function for a node which has not been inserted.
* It's illegal to call this function with tagmask 0.
*/

void
radix_tree_set_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
{
       struct radix_tree_path path;
       void **vpp __unused;
       int i;

       KASSERT(tagmask != 0);
       vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
       KASSERT(vpp != NULL);
       KASSERT(*vpp != NULL);
       KASSERT(path.p_lastidx == t->t_height);
       KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
       for (i = t->t_height; i >= 0; i--) {
               void ** const pptr = (void **)path_pptr(t, &path, i);
               void *entry;

               KASSERT(pptr != NULL);
               entry = *pptr;
               if ((entry_tagmask(entry) & tagmask) != 0) {
                       break;
               }
               *pptr = (void *)((uintptr_t)entry | tagmask);
       }
}

/*
* radix_tree_clear_tag:
*
* Clear the tag for the node at the given index.
*
* It's illegal to call this function for a node which has not been inserted.
* It's illegal to call this function with tagmask 0.
*/

void
radix_tree_clear_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
{
       struct radix_tree_path path;
       void **vpp;
       int i;

       KASSERT(tagmask != 0);
       vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
       KASSERT(vpp != NULL);
       KASSERT(*vpp != NULL);
       KASSERT(path.p_lastidx == t->t_height);
       KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
       /*
        * if already cleared, nothing to do
        */
       if ((entry_tagmask(*vpp) & tagmask) == 0) {
               return;
       }
       /*
        * clear the tag only if no children have the tag.
        */
       for (i = t->t_height; i >= 0; i--) {
               void ** const pptr = (void **)path_pptr(t, &path, i);
               void *entry;

               KASSERT(pptr != NULL);
               entry = *pptr;
               KASSERT((entry_tagmask(entry) & tagmask) != 0);
               *pptr = entry_compose(entry_ptr(entry),
                   entry_tagmask(entry) & ~tagmask);
               /*
                * check if we should proceed to process the next level.
                */
               if (0 < i) {
                       struct radix_tree_node *n = path_node(t, &path, i - 1);

                       if ((radix_tree_sum_node(n) & tagmask) != 0) {
                               break;
                       }
               }
       }
}

#if defined(UNITTEST)

#include <inttypes.h>
#include <stdio.h>

static void
radix_tree_dump_node(const struct radix_tree *t, void *vp,
   uint64_t offset, unsigned int height)
{
       struct radix_tree_node *n;
       unsigned int i;

       for (i = 0; i < t->t_height - height; i++) {
               printf(" ");
       }
       if (entry_tagmask(vp) == 0) {
               printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
       } else {
               printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
                   entry_tagmask(vp));
       }
       if (height == 0) {
               printf(" (leaf)\n");
               return;
       }
       n = entry_ptr(vp);
       assert((radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK) ==
           entry_tagmask(vp));
       printf(" (%u children)\n", radix_tree_node_count_ptrs(n));
       for (i = 0; i < __arraycount(n->n_ptrs); i++) {
               void *c;

               c = n->n_ptrs[i];
               if (c == NULL) {
                       continue;
               }
               radix_tree_dump_node(t, c,
                   offset + i * (UINT64_C(1) <<
                   (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
       }
}

void radix_tree_dump(const struct radix_tree *);

void
radix_tree_dump(const struct radix_tree *t)
{

       printf("tree %p height=%u\n", t, t->t_height);
       radix_tree_dump_node(t, t->t_root, 0, t->t_height);
}

static void
test1(void)
{
       struct radix_tree s;
       struct radix_tree *t = &s;
       void *results[3];

       radix_tree_init_tree(t);
       radix_tree_dump(t);
       assert(radix_tree_lookup_node(t, 0) == NULL);
       assert(radix_tree_lookup_node(t, 1000) == NULL);
       assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 0);
       assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
       assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
       assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
       assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
           0);
       assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
           0);
       assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
           == 0);
       assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
           == 0);
       assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
           == 0);
       assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
           == 0);
       assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, false, 1)
           == 0);
       assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, true, 1)
           == 0);
       assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
           false, 1) == 0);
       assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
           true, 1) == 0);
       assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
           false, 1) == 0);
       assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
           true, 1) == 0);
       assert(radix_tree_empty_tree_p(t));
       assert(radix_tree_empty_tagged_tree_p(t, 1));
       assert(radix_tree_empty_tagged_tree_p(t, 2));
       assert(radix_tree_insert_node(t, 0, (void *)0xdeadbea0) == 0);
       assert(!radix_tree_empty_tree_p(t));
       assert(radix_tree_empty_tagged_tree_p(t, 1));
       assert(radix_tree_empty_tagged_tree_p(t, 2));
       assert(radix_tree_lookup_node(t, 0) == (void *)0xdeadbea0);
       assert(radix_tree_lookup_node(t, 1000) == NULL);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
       assert(results[0] == (void *)0xdeadbea0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
       assert(results[0] == (void *)0xdeadbea0);
       assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
       assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
           1);
       assert(results[0] == (void *)0xdeadbea0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
           1);
       assert(results[0] == (void *)0xdeadbea0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
           == 1);
       assert(results[0] == (void *)0xdeadbea0);
       assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
           == 0);
       assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
           == 0);
       assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
           == 0);
       assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
           false, 1) == 0);
       assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
           true, 1) == 0);
       assert(radix_tree_insert_node(t, 1000, (void *)0xdeadbea0) == 0);
       assert(radix_tree_remove_node(t, 0) == (void *)0xdeadbea0);
       assert(!radix_tree_empty_tree_p(t));
       radix_tree_dump(t);
       assert(radix_tree_lookup_node(t, 0) == NULL);
       assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
       assert(results[0] == (void *)0xdeadbea0);
       assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 1);
       assert(results[0] == (void *)0xdeadbea0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 1);
       assert(results[0] == (void *)0xdeadbea0);
       assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false)
           == 0);
       assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true)
           == 0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
           == 1);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
           == 1);
       assert(results[0] == (void *)0xdeadbea0);
       assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
           == 0);
       assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
           == 0);
       assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
           false, 1) == 0);
       assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
           true, 1) == 0);
       assert(!radix_tree_get_tag(t, 1000, 1));
       assert(!radix_tree_get_tag(t, 1000, 2));
       assert(radix_tree_get_tag(t, 1000, 2 | 1) == 0);
       assert(radix_tree_empty_tagged_tree_p(t, 1));
       assert(radix_tree_empty_tagged_tree_p(t, 2));
       radix_tree_set_tag(t, 1000, 2);
       assert(!radix_tree_get_tag(t, 1000, 1));
       assert(radix_tree_get_tag(t, 1000, 2));
       assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
       assert(radix_tree_empty_tagged_tree_p(t, 1));
       assert(!radix_tree_empty_tagged_tree_p(t, 2));
       radix_tree_dump(t);
       assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
       assert(radix_tree_insert_node(t, 0, (void *)0xbea0) == 0);
       radix_tree_dump(t);
       assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
       assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
       assert(radix_tree_insert_node(t, UINT64_C(10000000000), (void *)0xdea0)
           == 0);
       radix_tree_dump(t);
       assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
       assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
       assert(radix_tree_lookup_node(t, UINT64_C(10000000000)) ==
           (void *)0xdea0);
       radix_tree_dump(t);
       assert(!radix_tree_get_tag(t, 0, 2));
       assert(radix_tree_get_tag(t, 1000, 2));
       assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
       radix_tree_set_tag(t, 0, 2);
       radix_tree_set_tag(t, UINT64_C(10000000000), 2);
       radix_tree_dump(t);
       assert(radix_tree_get_tag(t, 0, 2));
       assert(radix_tree_get_tag(t, 1000, 2));
       assert(radix_tree_get_tag(t, UINT64_C(10000000000), 2));
       radix_tree_clear_tag(t, 0, 2);
       radix_tree_clear_tag(t, UINT64_C(10000000000), 2);
       radix_tree_dump(t);
       assert(!radix_tree_get_tag(t, 0, 2));
       assert(radix_tree_get_tag(t, 1000, 2));
       assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 2));
       radix_tree_dump(t);
       assert(radix_tree_replace_node(t, 1000, (void *)0x12345678) ==
           (void *)0xdeadbea0);
       assert(!radix_tree_get_tag(t, 1000, 1));
       assert(radix_tree_get_tag(t, 1000, 2));
       assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 3);
       assert(results[0] == (void *)0xbea0);
       assert(results[1] == (void *)0x12345678);
       assert(results[2] == (void *)0xdea0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
       assert(results[0] == (void *)0xbea0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, 1, results, 3, false) == 2);
       assert(results[0] == (void *)0x12345678);
       assert(results[1] == (void *)0xdea0);
       assert(radix_tree_gang_lookup_node(t, 1, results, 3, true) == 0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, 1001, results, 3, false) == 1);
       assert(results[0] == (void *)0xdea0);
       assert(radix_tree_gang_lookup_node(t, 1001, results, 3, true) == 0);
       assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
           false) == 0);
       assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
           true) == 0);
       assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
           3, false) == 0);
       assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
           3, true) == 0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, false, 2)
           == 1);
       assert(results[0] == (void *)0x12345678);
       assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, true, 2)
           == 0);
       assert(entry_tagmask(t->t_root) != 0);
       assert(radix_tree_remove_node(t, 1000) == (void *)0x12345678);
       assert(entry_tagmask(t->t_root) == 0);
       radix_tree_dump(t);
       assert(radix_tree_insert_node(t, UINT64_C(10000000001), (void *)0xfff0)
           == 0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
           false) == 2);
       assert(results[0] == (void *)0xdea0);
       assert(results[1] == (void *)0xfff0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
           true) == 2);
       assert(results[0] == (void *)0xdea0);
       assert(results[1] == (void *)0xfff0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
           results, 3, false) == 3);
       assert(results[0] == (void *)0xfff0);
       assert(results[1] == (void *)0xdea0);
       assert(results[2] == (void *)0xbea0);
       memset(results, 0, sizeof(results));
       assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
           results, 3, true) == 2);
       assert(results[0] == (void *)0xfff0);
       assert(results[1] == (void *)0xdea0);
       assert(radix_tree_remove_node(t, UINT64_C(10000000000)) ==
           (void *)0xdea0);
       assert(radix_tree_remove_node(t, UINT64_C(10000000001)) ==
           (void *)0xfff0);
       radix_tree_dump(t);
       assert(radix_tree_remove_node(t, 0) == (void *)0xbea0);
       radix_tree_dump(t);
       radix_tree_fini_tree(t);
}

#include <sys/time.h>

struct testnode {
       uint64_t idx;
       bool tagged[RADIX_TREE_TAG_ID_MAX];
};

static void
printops(const char *title, const char *name, int tag, unsigned int n,
   const struct timeval *stv, const struct timeval *etv)
{
       uint64_t s = stv->tv_sec * 1000000 + stv->tv_usec;
       uint64_t e = etv->tv_sec * 1000000 + etv->tv_usec;

       printf("RESULT %s %s %d %lf op/s\n", title, name, tag,
           (double)n / (e - s) * 1000000);
}

#define TEST2_GANG_LOOKUP_NODES 16

static bool
test2_should_tag(unsigned int i, unsigned int tagid)
{

       if (tagid == 0) {
               return (i % 4) == 0;    /* 25% */
       } else {
               return (i % 7) == 0;    /* 14% */
       }
       return 1;
}

static void
check_tag_count(const unsigned int *ntagged, unsigned int tagmask,
   unsigned int count)
{
       unsigned int tag;

       for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
               if ((tagmask & (1 << tag)) == 0) {
                       continue;
               }
               if (((tagmask - 1) & tagmask) == 0) {
                       assert(count == ntagged[tag]);
               } else {
                       assert(count >= ntagged[tag]);
               }
       }
}

static void
test2(const char *title, bool dense)
{
       struct radix_tree s;
       struct radix_tree *t = &s;
       struct testnode *n;
       unsigned int i;
       unsigned int nnodes = 100000;
       unsigned int removed;
       unsigned int tag;
       unsigned int tagmask;
       unsigned int ntagged[RADIX_TREE_TAG_ID_MAX];
       struct testnode *nodes;
       struct timeval stv;
       struct timeval etv;

       nodes = malloc(nnodes * sizeof(*nodes));
       for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
               ntagged[tag] = 0;
       }
       radix_tree_init_tree(t);
       for (i = 0; i < nnodes; i++) {
               n = &nodes[i];
               n->idx = random();
               if (sizeof(long) == 4) {
                       n->idx <<= 32;
                       n->idx |= (uint32_t)random();
               }
               if (dense) {
                       n->idx %= nnodes * 2;
               }
               while (radix_tree_lookup_node(t, n->idx) != NULL) {
                       n->idx++;
               }
               radix_tree_insert_node(t, n->idx, n);
               for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
                       tagmask = 1 << tag;

                       n->tagged[tag] = test2_should_tag(i, tag);
                       if (n->tagged[tag]) {
                               radix_tree_set_tag(t, n->idx, tagmask);
                               ntagged[tag]++;
                       }
                       assert((n->tagged[tag] ? tagmask : 0) ==
                           radix_tree_get_tag(t, n->idx, tagmask));
               }
       }

       gettimeofday(&stv, NULL);
       for (i = 0; i < nnodes; i++) {
               n = &nodes[i];
               assert(radix_tree_lookup_node(t, n->idx) == n);
       }
       gettimeofday(&etv, NULL);
       printops(title, "lookup", 0, nnodes, &stv, &etv);

       for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
               unsigned int count = 0;

               gettimeofday(&stv, NULL);
               for (i = 0; i < nnodes; i++) {
                       unsigned int tagged;

                       n = &nodes[i];
                       tagged = radix_tree_get_tag(t, n->idx, tagmask);
                       assert((tagged & ~tagmask) == 0);
                       for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
                               assert((tagmask & (1 << tag)) == 0 ||
                                   n->tagged[tag] == !!(tagged & (1 << tag)));
                       }
                       if (tagged) {
                               count++;
                       }
               }
               gettimeofday(&etv, NULL);
               check_tag_count(ntagged, tagmask, count);
               printops(title, "get_tag", tagmask, nnodes, &stv, &etv);
       }

       gettimeofday(&stv, NULL);
       for (i = 0; i < nnodes; i++) {
               n = &nodes[i];
               radix_tree_remove_node(t, n->idx);
       }
       gettimeofday(&etv, NULL);
       printops(title, "remove", 0, nnodes, &stv, &etv);

       gettimeofday(&stv, NULL);
       for (i = 0; i < nnodes; i++) {
               n = &nodes[i];
               radix_tree_insert_node(t, n->idx, n);
       }
       gettimeofday(&etv, NULL);
       printops(title, "insert", 0, nnodes, &stv, &etv);

       for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
               tagmask = 1 << tag;

               ntagged[tag] = 0;
               gettimeofday(&stv, NULL);
               for (i = 0; i < nnodes; i++) {
                       n = &nodes[i];
                       if (n->tagged[tag]) {
                               radix_tree_set_tag(t, n->idx, tagmask);
                               ntagged[tag]++;
                       }
               }
               gettimeofday(&etv, NULL);
               printops(title, "set_tag", tag, ntagged[tag], &stv, &etv);
       }

       gettimeofday(&stv, NULL);
       {
               struct testnode *results[TEST2_GANG_LOOKUP_NODES];
               uint64_t nextidx;
               unsigned int nfound;
               unsigned int total;

               nextidx = 0;
               total = 0;
               while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
                   (void *)results, __arraycount(results), false)) > 0) {
                       nextidx = results[nfound - 1]->idx + 1;
                       total += nfound;
                       if (nextidx == 0) {
                               break;
                       }
               }
               assert(total == nnodes);
       }
       gettimeofday(&etv, NULL);
       printops(title, "ganglookup", 0, nnodes, &stv, &etv);

       gettimeofday(&stv, NULL);
       {
               struct testnode *results[TEST2_GANG_LOOKUP_NODES];
               uint64_t nextidx;
               unsigned int nfound;
               unsigned int total;

               nextidx = UINT64_MAX;
               total = 0;
               while ((nfound = radix_tree_gang_lookup_node_reverse(t, nextidx,
                   (void *)results, __arraycount(results), false)) > 0) {
                       nextidx = results[nfound - 1]->idx - 1;
                       total += nfound;
                       if (nextidx == UINT64_MAX) {
                               break;
                       }
               }
               assert(total == nnodes);
       }
       gettimeofday(&etv, NULL);
       printops(title, "ganglookup_reverse", 0, nnodes, &stv, &etv);

       for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
               unsigned int total = 0;

               gettimeofday(&stv, NULL);
               {
                       struct testnode *results[TEST2_GANG_LOOKUP_NODES];
                       uint64_t nextidx;
                       unsigned int nfound;

                       nextidx = 0;
                       while ((nfound = radix_tree_gang_lookup_tagged_node(t,
                           nextidx, (void *)results, __arraycount(results),
                           false, tagmask)) > 0) {
                               nextidx = results[nfound - 1]->idx + 1;
                               total += nfound;
                       }
               }
               gettimeofday(&etv, NULL);
               check_tag_count(ntagged, tagmask, total);
               assert(tagmask != 0 || total == 0);
               printops(title, "ganglookup_tag", tagmask, total, &stv, &etv);
       }

       for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
               unsigned int total = 0;

               gettimeofday(&stv, NULL);
               {
                       struct testnode *results[TEST2_GANG_LOOKUP_NODES];
                       uint64_t nextidx;
                       unsigned int nfound;

                       nextidx = UINT64_MAX;
                       while ((nfound =
                           radix_tree_gang_lookup_tagged_node_reverse(t,
                           nextidx, (void *)results, __arraycount(results),
                           false, tagmask)) > 0) {
                               nextidx = results[nfound - 1]->idx - 1;
                               total += nfound;
                               if (nextidx == UINT64_MAX) {
                                       break;
                               }
                       }
               }
               gettimeofday(&etv, NULL);
               check_tag_count(ntagged, tagmask, total);
               assert(tagmask != 0 || total == 0);
               printops(title, "ganglookup_tag_reverse", tagmask, total,
                   &stv, &etv);
       }

       removed = 0;
       for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
               unsigned int total;

               total = 0;
               tagmask = 1 << tag;
               gettimeofday(&stv, NULL);
               {
                       struct testnode *results[TEST2_GANG_LOOKUP_NODES];
                       uint64_t nextidx;
                       unsigned int nfound;

                       nextidx = 0;
                       while ((nfound = radix_tree_gang_lookup_tagged_node(t,
                           nextidx, (void *)results, __arraycount(results),
                           false, tagmask)) > 0) {
                               for (i = 0; i < nfound; i++) {
                                       radix_tree_remove_node(t,
                                           results[i]->idx);
                               }
                               nextidx = results[nfound - 1]->idx + 1;
                               total += nfound;
                               if (nextidx == 0) {
                                       break;
                               }
                       }
               }
               gettimeofday(&etv, NULL);
               if (tag == 0) {
                       check_tag_count(ntagged, tagmask, total);
               } else {
                       assert(total <= ntagged[tag]);
               }
               printops(title, "ganglookup_tag+remove", tagmask, total, &stv,
                   &etv);
               removed += total;
       }

       gettimeofday(&stv, NULL);
       {
               struct testnode *results[TEST2_GANG_LOOKUP_NODES];
               uint64_t nextidx;
               unsigned int nfound;
               unsigned int total;

               nextidx = 0;
               total = 0;
               while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
                   (void *)results, __arraycount(results), false)) > 0) {
                       for (i = 0; i < nfound; i++) {
                               assert(results[i] == radix_tree_remove_node(t,
                                   results[i]->idx));
                       }
                       nextidx = results[nfound - 1]->idx + 1;
                       total += nfound;
                       if (nextidx == 0) {
                               break;
                       }
               }
               assert(total == nnodes - removed);
       }
       gettimeofday(&etv, NULL);
       printops(title, "ganglookup+remove", 0, nnodes - removed, &stv, &etv);

       assert(radix_tree_empty_tree_p(t));
       for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
               assert(radix_tree_empty_tagged_tree_p(t, tagmask));
       }
       radix_tree_fini_tree(t);
       free(nodes);
}

int
main(int argc, char *argv[])
{

       test1();
       test2("dense", true);
       test2("sparse", false);
       return 0;
}

#endif /* defined(UNITTEST) */