/* $NetBSD: uvm_physseg.c,v 1.20 2024/01/13 09:44:42 tnn Exp $ */

/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* Copyright (c) 1991, 1993, The Regents of the University of California.
*
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
*    may be used to endorse or promote products derived from this software
*    without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*      @(#)vm_page.h   7.3 (Berkeley) 4/21/91
* from: Id: uvm_page.h,v 1.1.2.6 1998/02/04 02:31:42 chuck Exp
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
*  Software Distribution Coordinator  or  [email protected]
*  School of Computer Science
*  Carnegie Mellon University
*  Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/

/*
* Consolidated API from uvm_page.c and others.
* Consolidated and designed by Cherry G. Mathew <[email protected]>
* rbtree(3) backing implementation by:
* Santhosh N. Raju <[email protected]>
*/

#ifdef _KERNEL_OPT
#include "opt_uvm.h"
#endif

#include <sys/param.h>
#include <sys/types.h>
#include <sys/extent.h>
#include <sys/kmem.h>

#include <uvm/uvm.h>
#include <uvm/uvm_page.h>
#include <uvm/uvm_param.h>
#include <uvm/uvm_pdpolicy.h>
#include <uvm/uvm_physseg.h>

/*
* uvm_physseg: describes one segment of physical memory
*/
struct uvm_physseg {
       /* used during RB tree lookup for PHYS_TO_VM_PAGE(). */
#if defined(UVM_HOTPLUG)
       struct  rb_node rb_node;        /* tree information */
#endif
       paddr_t start;                  /* PF# of first page in segment */
       paddr_t end;                    /* (PF# of last page in segment) + 1 */
       struct  vm_page *pgs;           /* vm_page structures (from start) */

       /* less performance sensitive fields. */
       paddr_t avail_start;            /* PF# of first free page in segment */
       paddr_t avail_end;              /* (PF# of last free page in segment) +1  */
       struct  extent *ext;            /* extent(9) structure to manage pgs[] */
       int     free_list;              /* which free list they belong on */
       u_long  start_hint;             /* start looking for free pages here */
#ifdef __HAVE_PMAP_PHYSSEG
       struct  pmap_physseg pmseg;     /* pmap specific (MD) data */
#endif
};

/*
* These functions are reserved for uvm(9) internal use and are not
* exported in the header file uvm_physseg.h
*
* Thus they are redefined here.
*/
void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *);
void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t);

/* returns a pgs array */
struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t);

#if defined(UVM_HOTPLUG) /* rbtree impementation */

#define         HANDLE_TO_PHYSSEG_NODE(h)       ((struct uvm_physseg *)(h))
#define         PHYSSEG_NODE_TO_HANDLE(u)       ((uvm_physseg_t)(u))

struct uvm_physseg_graph {
       struct rb_tree rb_tree;         /* Tree for entries */
       int            nentries;        /* Number of entries */
} __aligned(COHERENCY_UNIT);

static struct uvm_physseg_graph uvm_physseg_graph __read_mostly;

/*
* Note on kmem(9) allocator usage:
* We take the conservative approach that plug/unplug are allowed to
* fail in high memory stress situations.
*
* We want to avoid re-entrant situations in which one plug/unplug
* operation is waiting on a previous one to complete, since this
* makes the design more complicated than necessary.
*
* We may review this and change its behaviour, once the use cases
* become more obvious.
*/

/*
* Special alloc()/free() functions for boot time support:
* We assume that alloc() at boot time is only for new 'vm_physseg's
* This allows us to use a static array for memory allocation at boot
* time. Thus we avoid using kmem(9) which is not ready at this point
* in boot.
*
* After kmem(9) is ready, we use it. We currently discard any free()s
* to this static array, since the size is small enough to be a
* trivial waste on all architectures we run on.
*/

static size_t nseg = 0;
static struct uvm_physseg uvm_physseg[VM_PHYSSEG_MAX];

static void *
uvm_physseg_alloc(size_t sz)
{
       /*
        * During boot time, we only support allocating vm_physseg
        * entries from the static array.
        * We need to assert for this.
        */

       if (__predict_false(uvm.page_init_done == false)) {
               if (sz % sizeof(struct uvm_physseg))
                       panic("%s: tried to alloc size other than multiple"
                           " of struct uvm_physseg at boot\n", __func__);

               size_t n = sz / sizeof(struct uvm_physseg);
               nseg += n;

               KASSERT(nseg > 0);
               KASSERT(nseg <= VM_PHYSSEG_MAX);

               return &uvm_physseg[nseg - n];
       }

       return kmem_zalloc(sz, KM_NOSLEEP);
}

static void
uvm_physseg_free(void *p, size_t sz)
{
       /*
        * This is a bit tricky. We do allow simulation of free()
        * during boot (for eg: when MD code is "steal"ing memory,
        * and the segment has been exhausted (and thus needs to be
        * free() - ed.
        * free() also complicates things because we leak the
        * free(). Therefore calling code can't assume that free()-ed
        * memory is available for alloc() again, at boot time.
        *
        * Thus we can't explicitly disallow free()s during
        * boot time. However, the same restriction for alloc()
        * applies to free(). We only allow uvm_physseg related free()s
        * via this function during boot time.
        */

       if (__predict_false(uvm.page_init_done == false)) {
               if (sz % sizeof(struct uvm_physseg))
                       panic("%s: tried to free size other than struct uvm_physseg"
                           " at boot\n", __func__);

       }

       /*
        * Could have been in a single if(){} block - split for
        * clarity
        */

       if ((struct uvm_physseg *)p >= uvm_physseg &&
           (struct uvm_physseg *)p < (uvm_physseg + VM_PHYSSEG_MAX)) {
               if (sz % sizeof(struct uvm_physseg))
                       panic("%s: tried to free() other than struct uvm_physseg"
                           " from static array\n", __func__);

               if ((sz / sizeof(struct uvm_physseg)) >= VM_PHYSSEG_MAX)
                       panic("%s: tried to free() the entire static array!", __func__);
               return; /* Nothing to free */
       }

       kmem_free(p, sz);
}

/* XXX: Multi page size */
bool
uvm_physseg_plug(paddr_t pfn, size_t pages, uvm_physseg_t *psp)
{
       int preload;
       size_t slabpages;
       struct uvm_physseg *ps, *current_ps = NULL;
       struct vm_page *slab = NULL, *pgs = NULL;

#ifdef DEBUG
       paddr_t off;
       uvm_physseg_t upm;
       upm = uvm_physseg_find(pfn, &off);

       ps = HANDLE_TO_PHYSSEG_NODE(upm);

       if (ps != NULL) /* XXX; do we allow "update" plugs ? */
               return false;
#endif

       /*
        * do we have room?
        */

       ps = uvm_physseg_alloc(sizeof (struct uvm_physseg));
       if (ps == NULL) {
               printf("uvm_page_physload: unable to load physical memory "
                   "segment\n");
               printf("\t%d segments allocated, ignoring 0x%"PRIxPADDR" -> 0x%"PRIxPADDR"\n",
                   VM_PHYSSEG_MAX, pfn, pfn + pages + 1);
               printf("\tincrease VM_PHYSSEG_MAX\n");
               return false;
       }

       /* span init */
       ps->start = pfn;
       ps->end = pfn + pages;

       /*
        * XXX: Ugly hack because uvmexp.npages accounts for only
        * those pages in the segment included below as well - this
        * should be legacy and removed.
        */

       ps->avail_start = ps->start;
       ps->avail_end = ps->end;

       /*
        * check to see if this is a "preload" (i.e. uvm_page_init hasn't been
        * called yet, so kmem is not available).
        */

       preload = 1; /* We are going to assume it is a preload */

       RB_TREE_FOREACH(current_ps, &(uvm_physseg_graph.rb_tree)) {
               /* If there are non NULL pages then we are not in a preload */
               if (current_ps->pgs != NULL) {
                       preload = 0;
                       /* Try to scavenge from earlier unplug()s. */
                       pgs = uvm_physseg_seg_alloc_from_slab(current_ps, pages);

                       if (pgs != NULL) {
                               break;
                       }
               }
       }


       /*
        * if VM is already running, attempt to kmem_alloc vm_page structures
        */

       if (!preload) {
               if (pgs == NULL) { /* Brand new */
                       /* Iteratively try alloc down from uvmexp.npages */
                       for (slabpages = (size_t) uvmexp.npages; slabpages >= pages; slabpages--) {
                               slab = kmem_zalloc(sizeof *pgs * (long unsigned int)slabpages, KM_NOSLEEP);
                               if (slab != NULL)
                                       break;
                       }

                       if (slab == NULL) {
                               uvm_physseg_free(ps, sizeof(struct uvm_physseg));
                               return false;
                       }

                       uvm_physseg_seg_chomp_slab(ps, slab, (size_t) slabpages);
                       /* We allocate enough for this plug */
                       pgs = uvm_physseg_seg_alloc_from_slab(ps, pages);

                       if (pgs == NULL) {
                               printf("unable to uvm_physseg_seg_alloc_from_slab() from backend\n");
                               return false;
                       }
               } else {
                       /* Reuse scavenged extent */
                       ps->ext = current_ps->ext;
               }

               physmem += pages;
               uvmpdpol_reinit();
       } else { /* Boot time - see uvm_page.c:uvm_page_init() */
               pgs = NULL;
               ps->pgs = pgs;
       }

       /*
        * now insert us in the proper place in uvm_physseg_graph.rb_tree
        */

       current_ps = rb_tree_insert_node(&(uvm_physseg_graph.rb_tree), ps);
       if (current_ps != ps) {
               panic("uvm_page_physload: Duplicate address range detected!");
       }
       uvm_physseg_graph.nentries++;

       /*
        * uvm_pagefree() requires the PHYS_TO_VM_PAGE(pgs[i]) on the
        * newly allocated pgs[] to return the correct value. This is
        * a bit of a chicken and egg problem, since it needs
        * uvm_physseg_find() to succeed. For this, the node needs to
        * be inserted *before* uvm_physseg_init_seg() happens.
        *
        * During boot, this happens anyway, since
        * uvm_physseg_init_seg() is called later on and separately
        * from uvm_page.c:uvm_page_init().
        * In the case of hotplug we need to ensure this.
        */

       if (__predict_true(!preload))
               uvm_physseg_init_seg(ps, pgs);

       if (psp != NULL)
               *psp = ps;

       return true;
}

static int
uvm_physseg_compare_nodes(void *ctx, const void *nnode1, const void *nnode2)
{
       const struct uvm_physseg *enode1 = nnode1;
       const struct uvm_physseg *enode2 = nnode2;

       KASSERT(enode1->start < enode2->start || enode1->start >= enode2->end);
       KASSERT(enode2->start < enode1->start || enode2->start >= enode1->end);

       if (enode1->start < enode2->start)
               return -1;
       if (enode1->start >= enode2->end)
               return 1;
       return 0;
}

static int
uvm_physseg_compare_key(void *ctx, const void *nnode, const void *pkey)
{
       const struct uvm_physseg *enode = nnode;
       const paddr_t pa = *(const paddr_t *) pkey;

       if(enode->start <= pa && pa < enode->end)
               return 0;
       if (enode->start < pa)
               return -1;
       if (enode->end > pa)
               return 1;

       return 0;
}

static const rb_tree_ops_t uvm_physseg_tree_ops = {
       .rbto_compare_nodes = uvm_physseg_compare_nodes,
       .rbto_compare_key = uvm_physseg_compare_key,
       .rbto_node_offset = offsetof(struct uvm_physseg, rb_node),
       .rbto_context = NULL
};

/*
* uvm_physseg_init: init the physmem
*
* => physmem unit should not be in use at this point
*/

void
uvm_physseg_init(void)
{
       rb_tree_init(&(uvm_physseg_graph.rb_tree), &uvm_physseg_tree_ops);
       uvm_physseg_graph.nentries = 0;
}

uvm_physseg_t
uvm_physseg_get_next(uvm_physseg_t upm)
{
       /* next of invalid is invalid, not fatal */
       if (uvm_physseg_valid_p(upm) == false)
               return UVM_PHYSSEG_TYPE_INVALID;

       return (uvm_physseg_t) rb_tree_iterate(&(uvm_physseg_graph.rb_tree), upm,
           RB_DIR_RIGHT);
}

uvm_physseg_t
uvm_physseg_get_prev(uvm_physseg_t upm)
{
       /* prev of invalid is invalid, not fatal */
       if (uvm_physseg_valid_p(upm) == false)
               return UVM_PHYSSEG_TYPE_INVALID;

       return (uvm_physseg_t) rb_tree_iterate(&(uvm_physseg_graph.rb_tree), upm,
           RB_DIR_LEFT);
}

uvm_physseg_t
uvm_physseg_get_last(void)
{
       return (uvm_physseg_t) RB_TREE_MAX(&(uvm_physseg_graph.rb_tree));
}

uvm_physseg_t
uvm_physseg_get_first(void)
{
       return (uvm_physseg_t) RB_TREE_MIN(&(uvm_physseg_graph.rb_tree));
}

paddr_t
uvm_physseg_get_highest_frame(void)
{
       struct uvm_physseg *ps =
           (uvm_physseg_t) RB_TREE_MAX(&(uvm_physseg_graph.rb_tree));

       return ps->end - 1;
}

/*
* uvm_page_physunload: unload physical memory and return it to
* caller.
*/
bool
uvm_page_physunload(uvm_physseg_t upm, int freelist, paddr_t *paddrp)
{
       struct uvm_physseg *seg;

       if (__predict_true(uvm.page_init_done == true))
               panic("%s: unload attempted after uvm_page_init()\n", __func__);

       seg = HANDLE_TO_PHYSSEG_NODE(upm);

       if (seg->free_list != freelist) {
               return false;
       }

       /*
        * During cold boot, what we're about to unplug hasn't been
        * put on the uvm freelist, nor has uvmexp.npages been
        * updated. (This happens in uvm_page.c:uvm_page_init())
        *
        * For hotplug, we assume here that the pages being unloaded
        * here are completely out of sight of uvm (ie; not on any uvm
        * lists), and that  uvmexp.npages has been suitably
        * decremented before we're called.
        *
        * XXX: will avail_end == start if avail_start < avail_end?
        */

       /* try from front */
       if (seg->avail_start == seg->start &&
           seg->avail_start < seg->avail_end) {
               *paddrp = ctob(seg->avail_start);
               return uvm_physseg_unplug(seg->avail_start, 1);
       }

       /* try from rear */
       if (seg->avail_end == seg->end &&
           seg->avail_start < seg->avail_end) {
               *paddrp = ctob(seg->avail_end - 1);
               return uvm_physseg_unplug(seg->avail_end - 1, 1);
       }

       return false;
}

bool
uvm_page_physunload_force(uvm_physseg_t upm, int freelist, paddr_t *paddrp)
{
       struct uvm_physseg *seg;

       seg = HANDLE_TO_PHYSSEG_NODE(upm);

       if (__predict_true(uvm.page_init_done == true))
               panic("%s: unload attempted after uvm_page_init()\n", __func__);
       /* any room in this bank? */
       if (seg->avail_start >= seg->avail_end) {
               return false; /* nope */
       }

       *paddrp = ctob(seg->avail_start);

       /* Always unplug from front */
       return uvm_physseg_unplug(seg->avail_start, 1);
}


/*
* vm_physseg_find: find vm_physseg structure that belongs to a PA
*/
uvm_physseg_t
uvm_physseg_find(paddr_t pframe, psize_t *offp)
{
       struct uvm_physseg * ps = NULL;

       ps = rb_tree_find_node(&(uvm_physseg_graph.rb_tree), &pframe);

       if(ps != NULL && offp != NULL)
               *offp = pframe - ps->start;

       return ps;
}

#else  /* UVM_HOTPLUG */

/*
* physical memory config is stored in vm_physmem.
*/

#define VM_PHYSMEM_PTR(i)       (&vm_physmem[i])
#if VM_PHYSSEG_MAX == 1
#define VM_PHYSMEM_PTR_SWAP(i, j) /* impossible */
#else
#define VM_PHYSMEM_PTR_SWAP(i, j)                                             \
       do { vm_physmem[(i)] = vm_physmem[(j)]; } while (0)
#endif

#define         HANDLE_TO_PHYSSEG_NODE(h)       (VM_PHYSMEM_PTR((int)h))
#define         PHYSSEG_NODE_TO_HANDLE(u)       ((int)((vsize_t) (u - vm_physmem) / sizeof(struct uvm_physseg)))

/* XXXCDC: uvm.physmem */
static struct uvm_physseg vm_physmem[VM_PHYSSEG_MAX] __read_mostly;
/* XXXCDC: uvm.nphysseg */
static int vm_nphysseg __read_mostly = 0;
#define vm_nphysmem     vm_nphysseg

void
uvm_physseg_init(void)
{
       /* XXX: Provisioning for rb_tree related init(s) */
       return;
}

int
uvm_physseg_get_next(uvm_physseg_t lcv)
{
       /* next of invalid is invalid, not fatal */
       if (uvm_physseg_valid_p(lcv) == false)
               return UVM_PHYSSEG_TYPE_INVALID;

       return (lcv + 1);
}

int
uvm_physseg_get_prev(uvm_physseg_t lcv)
{
       /* prev of invalid is invalid, not fatal */
       if (uvm_physseg_valid_p(lcv) == false)
               return UVM_PHYSSEG_TYPE_INVALID;

       return (lcv - 1);
}

int
uvm_physseg_get_last(void)
{
       return (vm_nphysseg - 1);
}

int
uvm_physseg_get_first(void)
{
       return 0;
}

paddr_t
uvm_physseg_get_highest_frame(void)
{
       int lcv;
       paddr_t last = 0;
       struct uvm_physseg *ps;

       for (lcv = 0; lcv < vm_nphysseg; lcv++) {
               ps = VM_PHYSMEM_PTR(lcv);
               if (last < ps->end)
                       last = ps->end;
       }

       return last;
}


static struct vm_page *
uvm_post_preload_check(void)
{
       int preload, lcv;

       /*
        * check to see if this is a "preload" (i.e. uvm_page_init hasn't been
        * called yet, so kmem is not available).
        */

       for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) {
               if (VM_PHYSMEM_PTR(lcv)->pgs)
                       break;
       }
       preload = (lcv == vm_nphysmem);

       /*
        * if VM is already running, attempt to kmem_alloc vm_page structures
        */

       if (!preload) {
               panic("Tried to add RAM after uvm_page_init");
       }

       return NULL;
}

/*
* uvm_page_physunload: unload physical memory and return it to
* caller.
*/
bool
uvm_page_physunload(uvm_physseg_t psi, int freelist, paddr_t *paddrp)
{
       int x;
       struct uvm_physseg *seg;

       uvm_post_preload_check();

       seg = VM_PHYSMEM_PTR(psi);

       if (seg->free_list != freelist) {
               return false;
       }

       /* try from front */
       if (seg->avail_start == seg->start &&
           seg->avail_start < seg->avail_end) {
               *paddrp = ctob(seg->avail_start);
               seg->avail_start++;
               seg->start++;
               /* nothing left?   nuke it */
               if (seg->avail_start == seg->end) {
                       if (vm_nphysmem == 1)
                               panic("uvm_page_physget: out of memory!");
                       vm_nphysmem--;
                       for (x = psi ; x < vm_nphysmem ; x++)
                               /* structure copy */
                               VM_PHYSMEM_PTR_SWAP(x, x + 1);
               }
               return (true);
       }

       /* try from rear */
       if (seg->avail_end == seg->end &&
           seg->avail_start < seg->avail_end) {
               *paddrp = ctob(seg->avail_end - 1);
               seg->avail_end--;
               seg->end--;
               /* nothing left?   nuke it */
               if (seg->avail_end == seg->start) {
                       if (vm_nphysmem == 1)
                               panic("uvm_page_physget: out of memory!");
                       vm_nphysmem--;
                       for (x = psi ; x < vm_nphysmem ; x++)
                               /* structure copy */
                               VM_PHYSMEM_PTR_SWAP(x, x + 1);
               }
               return (true);
       }

       return false;
}

bool
uvm_page_physunload_force(uvm_physseg_t psi, int freelist, paddr_t *paddrp)
{
       int x;
       struct uvm_physseg *seg;

       uvm_post_preload_check();

       seg = VM_PHYSMEM_PTR(psi);

       /* any room in this bank? */
       if (seg->avail_start >= seg->avail_end) {
               return false; /* nope */
       }

       *paddrp = ctob(seg->avail_start);
       seg->avail_start++;
       /* truncate! */
       seg->start = seg->avail_start;

       /* nothing left?   nuke it */
       if (seg->avail_start == seg->end) {
               if (vm_nphysmem == 1)
                       panic("uvm_page_physget: out of memory!");
               vm_nphysmem--;
               for (x = psi ; x < vm_nphysmem ; x++)
                       /* structure copy */
                       VM_PHYSMEM_PTR_SWAP(x, x + 1);
       }
       return (true);
}

bool
uvm_physseg_plug(paddr_t pfn, size_t pages, uvm_physseg_t *psp)
{
       int lcv;
       struct vm_page *pgs;
       struct uvm_physseg *ps;

#ifdef DEBUG
       paddr_t off;
       uvm_physseg_t upm;
       upm = uvm_physseg_find(pfn, &off);

       if (uvm_physseg_valid_p(upm)) /* XXX; do we allow "update" plugs ? */
               return false;
#endif

       paddr_t start = pfn;
       paddr_t end = pfn + pages;
       paddr_t avail_start = start;
       paddr_t avail_end = end;

       if (uvmexp.pagesize == 0)
               panic("uvm_page_physload: page size not set!");

       /*
        * do we have room?
        */

       if (vm_nphysmem == VM_PHYSSEG_MAX) {
               printf("uvm_page_physload: unable to load physical memory "
                   "segment\n");
               printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
                   VM_PHYSSEG_MAX, (long long)start, (long long)end);
               printf("\tincrease VM_PHYSSEG_MAX\n");
               if (psp != NULL)
                       *psp = UVM_PHYSSEG_TYPE_INVALID_OVERFLOW;
               return false;
       }

       /*
        * check to see if this is a "preload" (i.e. uvm_page_init hasn't been
        * called yet, so kmem is not available).
        */
       pgs = uvm_post_preload_check();

       /*
        * now insert us in the proper place in vm_physmem[]
        */

#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
       /* random: put it at the end (easy!) */
       ps = VM_PHYSMEM_PTR(vm_nphysmem);
       lcv = vm_nphysmem;
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
       {
               int x;
               /* sort by address for binary search */
               for (lcv = 0 ; lcv < vm_nphysmem ; lcv++)
                       if (start < VM_PHYSMEM_PTR(lcv)->start)
                               break;
               ps = VM_PHYSMEM_PTR(lcv);
               /* move back other entries, if necessary ... */
               for (x = vm_nphysmem ; x > lcv ; x--)
                       /* structure copy */
                       VM_PHYSMEM_PTR_SWAP(x, x - 1);
       }
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
       {
               int x;
               /* sort by largest segment first */
               for (lcv = 0 ; lcv < vm_nphysmem ; lcv++)
                       if ((end - start) >
                           (VM_PHYSMEM_PTR(lcv)->end - VM_PHYSMEM_PTR(lcv)->start))
                               break;
               ps = VM_PHYSMEM_PTR(lcv);
               /* move back other entries, if necessary ... */
               for (x = vm_nphysmem ; x > lcv ; x--)
                       /* structure copy */
                       VM_PHYSMEM_PTR_SWAP(x, x - 1);
       }
#else
       panic("uvm_page_physload: unknown physseg strategy selected!");
#endif

       ps->start = start;
       ps->end = end;
       ps->avail_start = avail_start;
       ps->avail_end = avail_end;

       ps->pgs = pgs;

       vm_nphysmem++;

       if (psp != NULL)
               *psp = lcv;

       return true;
}

/*
* when VM_PHYSSEG_MAX is 1, we can simplify these functions
*/

#if VM_PHYSSEG_MAX == 1
static inline int vm_physseg_find_contig(struct uvm_physseg *, int, paddr_t, psize_t *);
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
static inline int vm_physseg_find_bsearch(struct uvm_physseg *, int, paddr_t, psize_t *);
#else
static inline int vm_physseg_find_linear(struct uvm_physseg *, int, paddr_t, psize_t *);
#endif

/*
* vm_physseg_find: find vm_physseg structure that belongs to a PA
*/
inline int
uvm_physseg_find(paddr_t pframe, psize_t *offp)
{

#if VM_PHYSSEG_MAX == 1
       return vm_physseg_find_contig(vm_physmem, vm_nphysseg, pframe, offp);
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
       return vm_physseg_find_bsearch(vm_physmem, vm_nphysseg, pframe, offp);
#else
       return vm_physseg_find_linear(vm_physmem, vm_nphysseg, pframe, offp);
#endif
}

#if VM_PHYSSEG_MAX == 1
static inline int
vm_physseg_find_contig(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp)
{

       /* 'contig' case */
       if (pframe >= segs[0].start && pframe < segs[0].end) {
               if (offp)
                       *offp = pframe - segs[0].start;
               return(0);
       }
       return(-1);
}

#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)

static inline int
vm_physseg_find_bsearch(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp)
{
       /* binary search for it */
       int     start, len, guess;

       /*
        * if try is too large (thus target is less than try) we reduce
        * the length to trunc(len/2) [i.e. everything smaller than "try"]
        *
        * if the try is too small (thus target is greater than try) then
        * we set the new start to be (try + 1).   this means we need to
        * reduce the length to (round(len/2) - 1).
        *
        * note "adjust" below which takes advantage of the fact that
        *  (round(len/2) - 1) == trunc((len - 1) / 2)
        * for any value of len we may have
        */

       for (start = 0, len = nsegs ; len != 0 ; len = len / 2) {
               guess = start + (len / 2);      /* try in the middle */

               /* start past our try? */
               if (pframe >= segs[guess].start) {
                       /* was try correct? */
                       if (pframe < segs[guess].end) {
                               if (offp)
                                       *offp = pframe - segs[guess].start;
                               return guess;            /* got it */
                       }
                       start = guess + 1;      /* next time, start here */
                       len--;                  /* "adjust" */
               } else {
                       /*
                        * pframe before try, just reduce length of
                        * region, done in "for" loop
                        */
               }
       }
       return(-1);
}

#else

static inline int
vm_physseg_find_linear(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp)
{
       /* linear search for it */
       int     lcv;

       for (lcv = 0; lcv < nsegs; lcv++) {
               if (pframe >= segs[lcv].start &&
                   pframe < segs[lcv].end) {
                       if (offp)
                               *offp = pframe - segs[lcv].start;
                       return(lcv);               /* got it */
               }
       }
       return(-1);
}
#endif
#endif /* UVM_HOTPLUG */

/*
* PHYS_TO_VM_PAGE: find vm_page for a PA.  used by MI code to get vm_pages
* back from an I/O mapping (ugh!).  used in some MD code as well.  it can
* be prominent in flamegraphs, so optimise it and try to make it easy for
* the compiler by including next to the inline lookup routines.
*/
struct vm_page *
uvm_phys_to_vm_page(paddr_t pa)
{
#if VM_PHYSSEG_STRAT != VM_PSTRAT_BSEARCH
       /* 'contig' and linear cases */
       KASSERT(vm_nphysseg > 0);
       struct uvm_physseg *ps = &vm_physmem[0];
       struct uvm_physseg *end = &vm_physmem[vm_nphysseg];
       paddr_t pframe = atop(pa);
       do {
               if (pframe >= ps->start && pframe < ps->end) {
                       return &ps->pgs[pframe - ps->start];
               }
       } while (VM_PHYSSEG_MAX > 1 && __predict_false(++ps < end));
       return NULL;
#else
       /* binary search for it */
       paddr_t pf = atop(pa);
       paddr_t off;
       uvm_physseg_t   upm;

       upm = uvm_physseg_find(pf, &off);
       if (upm != UVM_PHYSSEG_TYPE_INVALID)
               return uvm_physseg_get_pg(upm, off);
       return(NULL);
#endif
}

bool
uvm_physseg_valid_p(uvm_physseg_t upm)
{
       struct uvm_physseg *ps;

       if (upm == UVM_PHYSSEG_TYPE_INVALID ||
           upm == UVM_PHYSSEG_TYPE_INVALID_EMPTY ||
           upm == UVM_PHYSSEG_TYPE_INVALID_OVERFLOW)
               return false;

       /*
        * This is the delicate init dance -
        * needs to go with the dance.
        */
       if (uvm.page_init_done != true)
               return true;

       ps = HANDLE_TO_PHYSSEG_NODE(upm);

       /* Extra checks needed only post uvm_page_init() */
       if (ps->pgs == NULL)
               return false;

       /* XXX: etc. */

       return true;

}

/*
* Boot protocol dictates that these must be able to return partially
* initialised segments.
*/
paddr_t
uvm_physseg_get_start(uvm_physseg_t upm)
{
       if (uvm_physseg_valid_p(upm) == false)
               return (paddr_t) -1;

       return HANDLE_TO_PHYSSEG_NODE(upm)->start;
}

paddr_t
uvm_physseg_get_end(uvm_physseg_t upm)
{
       if (uvm_physseg_valid_p(upm) == false)
               return (paddr_t) -1;

       return HANDLE_TO_PHYSSEG_NODE(upm)->end;
}

paddr_t
uvm_physseg_get_avail_start(uvm_physseg_t upm)
{
       if (uvm_physseg_valid_p(upm) == false)
               return (paddr_t) -1;

       return HANDLE_TO_PHYSSEG_NODE(upm)->avail_start;
}

#if defined(UVM_PHYSSEG_LEGACY)
void
uvm_physseg_set_avail_start(uvm_physseg_t upm, paddr_t avail_start)
{
       struct uvm_physseg *ps = HANDLE_TO_PHYSSEG_NODE(upm);

#if defined(DIAGNOSTIC)
       paddr_t avail_end;
       avail_end = uvm_physseg_get_avail_end(upm);
       KASSERT(uvm_physseg_valid_p(upm));
       KASSERT(avail_start < avail_end);
       KASSERT(avail_start >= ps->start);
#endif

       ps->avail_start = avail_start;
}

void
uvm_physseg_set_avail_end(uvm_physseg_t upm, paddr_t avail_end)
{
       struct uvm_physseg *ps = HANDLE_TO_PHYSSEG_NODE(upm);

#if defined(DIAGNOSTIC)
       paddr_t avail_start;
       avail_start = uvm_physseg_get_avail_start(upm);
       KASSERT(uvm_physseg_valid_p(upm));
       KASSERT(avail_end > avail_start);
       KASSERT(avail_end <= ps->end);
#endif

       ps->avail_end = avail_end;
}

#endif /* UVM_PHYSSEG_LEGACY */

paddr_t
uvm_physseg_get_avail_end(uvm_physseg_t upm)
{
       if (uvm_physseg_valid_p(upm) == false)
               return (paddr_t) -1;

       return HANDLE_TO_PHYSSEG_NODE(upm)->avail_end;
}

inline struct vm_page *
uvm_physseg_get_pg(uvm_physseg_t upm, paddr_t idx)
{
       KASSERT(uvm_physseg_valid_p(upm));
       return &HANDLE_TO_PHYSSEG_NODE(upm)->pgs[idx];
}

#ifdef __HAVE_PMAP_PHYSSEG
struct pmap_physseg *
uvm_physseg_get_pmseg(uvm_physseg_t upm)
{
       KASSERT(uvm_physseg_valid_p(upm));
       return &(HANDLE_TO_PHYSSEG_NODE(upm)->pmseg);
}
#endif

int
uvm_physseg_get_free_list(uvm_physseg_t upm)
{
       KASSERT(uvm_physseg_valid_p(upm));
       return HANDLE_TO_PHYSSEG_NODE(upm)->free_list;
}

u_long
uvm_physseg_get_start_hint(uvm_physseg_t upm)
{
       KASSERT(uvm_physseg_valid_p(upm));
       return HANDLE_TO_PHYSSEG_NODE(upm)->start_hint;
}

bool
uvm_physseg_set_start_hint(uvm_physseg_t upm, u_long start_hint)
{
       if (uvm_physseg_valid_p(upm) == false)
               return false;

       HANDLE_TO_PHYSSEG_NODE(upm)->start_hint = start_hint;
       return true;
}

void
uvm_physseg_init_seg(uvm_physseg_t upm, struct vm_page *pgs)
{
       psize_t i;
       psize_t n;
       paddr_t paddr;
       struct uvm_physseg *seg;
       struct vm_page *pg;

       KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID);
       KASSERT(pgs != NULL);

       seg = HANDLE_TO_PHYSSEG_NODE(upm);
       KASSERT(seg != NULL);
       KASSERT(seg->pgs == NULL);

       n = seg->end - seg->start;
       seg->pgs = pgs;

       /* init and free vm_pages (we've already zeroed them) */
       paddr = ctob(seg->start);
       for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
               pg = &seg->pgs[i];
               pg->phys_addr = paddr;
#ifdef __HAVE_VM_PAGE_MD
               VM_MDPAGE_INIT(pg);
#endif
               if (atop(paddr) >= seg->avail_start &&
                   atop(paddr) < seg->avail_end) {
                       uvmexp.npages++;
                       /* add page to free pool */
                       uvm_page_set_freelist(pg,
                           uvm_page_lookup_freelist(pg));
                       /* Disable LOCKDEBUG: too many and too early. */
                       mutex_init(&pg->interlock, MUTEX_NODEBUG, IPL_NONE);
                       uvm_pagefree(pg);
               }
       }
}

void
uvm_physseg_seg_chomp_slab(uvm_physseg_t upm, struct vm_page *pgs, size_t n)
{
       struct uvm_physseg *seg = HANDLE_TO_PHYSSEG_NODE(upm);

       /* max number of pre-boot unplug()s allowed */
#define UVM_PHYSSEG_BOOT_UNPLUG_MAX VM_PHYSSEG_MAX

       static char btslab_ex_storage[EXTENT_FIXED_STORAGE_SIZE(UVM_PHYSSEG_BOOT_UNPLUG_MAX)];

       if (__predict_false(uvm.page_init_done == false)) {
               seg->ext = extent_create("Boot time slab", (u_long) pgs, (u_long) (pgs + n),
                   (void *)btslab_ex_storage, sizeof(btslab_ex_storage), 0);
       } else {
               seg->ext = extent_create("Hotplug slab", (u_long) pgs, (u_long) (pgs + n), NULL, 0, 0);
       }

       KASSERT(seg->ext != NULL);

}

struct vm_page *
uvm_physseg_seg_alloc_from_slab(uvm_physseg_t upm, size_t pages)
{
       int err;
       struct uvm_physseg *seg;
       struct vm_page *pgs = NULL;

       KASSERT(pages > 0);

       seg = HANDLE_TO_PHYSSEG_NODE(upm);

       if (__predict_false(seg->ext == NULL)) {
               /*
                * This is a situation unique to boot time.
                * It shouldn't happen at any point other than from
                * the first uvm_page.c:uvm_page_init() call
                * Since we're in a loop, we can get away with the
                * below.
                */
               KASSERT(uvm.page_init_done != true);

               uvm_physseg_t upmp = uvm_physseg_get_prev(upm);
               KASSERT(upmp != UVM_PHYSSEG_TYPE_INVALID);

               seg->ext = HANDLE_TO_PHYSSEG_NODE(upmp)->ext;

               KASSERT(seg->ext != NULL);
       }

       /* We allocate enough for this segment */
       err = extent_alloc(seg->ext, sizeof(*pgs) * pages, 1, 0, EX_BOUNDZERO, (u_long *)&pgs);

       if (err != 0) {
#ifdef DEBUG
               printf("%s: extent_alloc failed with error: %d \n",
                   __func__, err);
#endif
       }

       return pgs;
}

/*
* uvm_page_physload: load physical memory into VM system
*
* => all args are PFs
* => all pages in start/end get vm_page structures
* => areas marked by avail_start/avail_end get added to the free page pool
* => we are limited to VM_PHYSSEG_MAX physical memory segments
*/

uvm_physseg_t
uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
   paddr_t avail_end, int free_list)
{
       struct uvm_physseg *ps;
       uvm_physseg_t upm;

       if (__predict_true(uvm.page_init_done == true))
               panic("%s: unload attempted after uvm_page_init()\n", __func__);
       if (uvmexp.pagesize == 0)
               panic("uvm_page_physload: page size not set!");
       if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
               panic("uvm_page_physload: bad free list %d", free_list);
       if (start >= end)
               panic("uvm_page_physload: start[%" PRIxPADDR "] >= end[%"
                   PRIxPADDR "]", start, end);

       if (uvm_physseg_plug(start, end - start, &upm) == false) {
               panic("uvm_physseg_plug() failed at boot.");
               /* NOTREACHED */
               return UVM_PHYSSEG_TYPE_INVALID; /* XXX: correct type */
       }

       ps = HANDLE_TO_PHYSSEG_NODE(upm);

       /* Legacy */
       ps->avail_start = avail_start;
       ps->avail_end = avail_end;

       ps->free_list = free_list; /* XXX: */


       return upm;
}

bool
uvm_physseg_unplug(paddr_t pfn, size_t pages)
{
       uvm_physseg_t upm;
       paddr_t off = 0, start __diagused, end;
       struct uvm_physseg *seg;

       upm = uvm_physseg_find(pfn, &off);

       if (!uvm_physseg_valid_p(upm)) {
               printf("%s: Tried to unplug from unknown offset\n", __func__);
               return false;
       }

       seg = HANDLE_TO_PHYSSEG_NODE(upm);

       start = uvm_physseg_get_start(upm);
       end = uvm_physseg_get_end(upm);

       if (end < (pfn + pages)) {
               printf("%s: Tried to unplug oversized span \n", __func__);
               return false;
       }

       KASSERT(pfn == start + off); /* sanity */

       if (__predict_true(uvm.page_init_done == true)) {
               /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
               if (extent_free(seg->ext, (u_long)(seg->pgs + off), sizeof(struct vm_page) * pages, EX_MALLOCOK | EX_NOWAIT) != 0)
                       return false;
       }

       if (off == 0 && (pfn + pages) == end) {
#if defined(UVM_HOTPLUG) /* rbtree implementation */
               int segcount = 0;
               struct uvm_physseg *current_ps;
               /* Complete segment */
               if (uvm_physseg_graph.nentries == 1)
                       panic("%s: out of memory!", __func__);

               if (__predict_true(uvm.page_init_done == true)) {
                       RB_TREE_FOREACH(current_ps, &(uvm_physseg_graph.rb_tree)) {
                               if (seg->ext == current_ps->ext)
                                       segcount++;
                       }
                       KASSERT(segcount > 0);

                       if (segcount == 1) {
                               extent_destroy(seg->ext);
                       }

                       /*
                        * We assume that the unplug will succeed from
                        *  this point onwards
                        */
                       uvmexp.npages -= (int) pages;
               }

               rb_tree_remove_node(&(uvm_physseg_graph.rb_tree), upm);
               memset(seg, 0, sizeof(struct uvm_physseg));
               uvm_physseg_free(seg, sizeof(struct uvm_physseg));
               uvm_physseg_graph.nentries--;
#else /* UVM_HOTPLUG */
               int x;
               if (vm_nphysmem == 1)
                       panic("uvm_page_physget: out of memory!");
               vm_nphysmem--;
               for (x = upm ; x < vm_nphysmem ; x++)
                       /* structure copy */
                       VM_PHYSMEM_PTR_SWAP(x, x + 1);
#endif /* UVM_HOTPLUG */
               /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
               return true;
       }

       if (off > 0 &&
           (pfn + pages) < end) {
#if defined(UVM_HOTPLUG) /* rbtree implementation */
               /* middle chunk - need a new segment */
               struct uvm_physseg *ps, *current_ps;
               ps = uvm_physseg_alloc(sizeof (struct uvm_physseg));
               if (ps == NULL) {
                       printf("%s: Unable to allocated new fragment vm_physseg \n",
                           __func__);
                       return false;
               }

               /* Remove middle chunk */
               if (__predict_true(uvm.page_init_done == true)) {
                       KASSERT(seg->ext != NULL);
                       ps->ext = seg->ext;

                       /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
                       /*
                        * We assume that the unplug will succeed from
                        *  this point onwards
                        */
                       uvmexp.npages -= (int) pages;
               }

               ps->start = pfn + pages;
               ps->avail_start = ps->start; /* XXX: Legacy */

               ps->end = seg->end;
               ps->avail_end = ps->end; /* XXX: Legacy */

               seg->end = pfn;
               seg->avail_end = seg->end; /* XXX: Legacy */


               /*
                * The new pgs array points to the beginning of the
                * tail fragment.
                */
               if (__predict_true(uvm.page_init_done == true))
                       ps->pgs = seg->pgs + off + pages;

               current_ps = rb_tree_insert_node(&(uvm_physseg_graph.rb_tree), ps);
               if (current_ps != ps) {
                       panic("uvm_page_physload: Duplicate address range detected!");
               }
               uvm_physseg_graph.nentries++;
#else /* UVM_HOTPLUG */
               panic("%s: can't unplug() from the middle of a segment without"
                   " UVM_HOTPLUG\n",  __func__);
               /* NOTREACHED */
#endif /* UVM_HOTPLUG */
               return true;
       }

       if (off == 0 && (pfn + pages) < end) {
               /* Remove front chunk */
               if (__predict_true(uvm.page_init_done == true)) {
                       /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
                       /*
                        * We assume that the unplug will succeed from
                        *  this point onwards
                        */
                       uvmexp.npages -= (int) pages;
               }

               /* Truncate */
               seg->start = pfn + pages;
               seg->avail_start = seg->start; /* XXX: Legacy */

               /*
                * Move the pgs array start to the beginning of the
                * tail end.
                */
               if (__predict_true(uvm.page_init_done == true))
                       seg->pgs += pages;

               return true;
       }

       if (off > 0 && (pfn + pages) == end) {
               /* back chunk */


               /* Truncate! */
               seg->end = pfn;
               seg->avail_end = seg->end; /* XXX: Legacy */

               uvmexp.npages -= (int) pages;

               return true;
       }

       printf("%s: Tried to unplug unknown range \n", __func__);

       return false;
}