/*      $NetBSD: uvm_loan.c,v 1.104 2020/06/11 22:21:05 ad Exp $        */

/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
*/

/*
* uvm_loan.c: page loanout handler
*/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.104 2020/06/11 22:21:05 ad Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/mman.h>

#include <uvm/uvm.h>

#ifdef UVMHIST
UVMHIST_DEFINE(loanhist);
#endif

/*
* "loaned" pages are pages which are (read-only, copy-on-write) loaned
* from the VM system to other parts of the kernel.   this allows page
* copying to be avoided (e.g. you can loan pages from objs/anons to
* the mbuf system).
*
* there are 3 types of loans possible:
*  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
*  A->K  anon page to wired kernel page (e.g. mbuf data area)
*  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
* note that it possible to have an O page loaned to both an A and K
* at the same time.
*
* loans are tracked by pg->loan_count.  an O->A page will have both
* a uvm_object and a vm_anon, but PG_ANON will not be set.   this sort
* of page is considered "owned" by the uvm_object (not the anon).
*
* each loan of a page to the kernel bumps the pg->wire_count.  the
* kernel mappings for these pages will be read-only and wired.  since
* the page will also be wired, it will not be a candidate for pageout,
* and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
* write fault in the kernel to one of these pages will not cause
* copy-on-write.  instead, the page fault is considered fatal.  this
* is because the kernel mapping will have no way to look up the
* object/anon which the page is owned by.  this is a good side-effect,
* since a kernel write to a loaned page is an error.
*
* owners that want to free their pages and discover that they are
* loaned out simply "disown" them (the page becomes an orphan).  these
* pages should be freed when the last loan is dropped.   in some cases
* an anon may "adopt" an orphaned page.
*
* locking: to read pg->loan_count either the owner or pg->interlock
* must be locked.   to modify pg->loan_count, both the owner of the page
* and pg->interlock must be locked.   pg->flags is (as always) locked by
* the owner of the page.
*
* note that locking from the "loaned" side is tricky since the object
* getting the loaned page has no reference to the page's owner and thus
* the owner could "die" at any time.   in order to prevent the owner
* from dying pg->interlock should be locked.   this forces us to sometimes
* use "try" locking.
*
* loans are typically broken by the following events:
*  1. user-level xwrite fault to a loaned page
*  2. pageout of clean+inactive O->A loaned page
*  3. owner frees page (e.g. pager flush)
*
* note that loaning a page causes all mappings of the page to become
* read-only (via pmap_page_protect).   this could have an unexpected
* effect on normal "wired" pages if one is not careful (XXX).
*/

/*
* local prototypes
*/

static int      uvm_loananon(struct uvm_faultinfo *, void ***,
                            int, struct vm_anon *);
static int      uvm_loanuobj(struct uvm_faultinfo *, void ***,
                            int, vaddr_t);
static int      uvm_loanzero(struct uvm_faultinfo *, void ***, int);
static void     uvm_unloananon(struct vm_anon **, int);
static void     uvm_unloanpage(struct vm_page **, int);
static int      uvm_loanpage(struct vm_page **, int, bool);


/*
* inlines
*/

/*
* uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
*
* => "ufi" is the result of a successful map lookup (meaning that
*      on entry the map is locked by the caller)
* => we may unlock and then relock the map if needed (for I/O)
* => we put our output result in "output"
* => we always return with the map unlocked
* => possible return values:
*      -1 == error, map is unlocked
*       0 == map relock error (try again!), map is unlocked
*      >0 == number of pages we loaned, map is unlocked
*
* NOTE: We can live with this being an inline, because it is only called
* from one place.
*/

static inline int
uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
{
       vaddr_t curaddr = ufi->orig_rvaddr;
       vsize_t togo = ufi->size;
       struct vm_aref *aref = &ufi->entry->aref;
       struct uvm_object *uobj = ufi->entry->object.uvm_obj;
       struct vm_anon *anon;
       int rv, result = 0;

       UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);

       /*
        * lock us the rest of the way down (we unlock before return)
        */
       if (aref->ar_amap) {
               amap_lock(aref->ar_amap, RW_WRITER);
       }

       /*
        * loop until done
        */
       while (togo) {

               /*
                * find the page we want.   check the anon layer first.
                */

               if (aref->ar_amap) {
                       anon = amap_lookup(aref, curaddr - ufi->entry->start);
               } else {
                       anon = NULL;
               }

               /* locked: map, amap, uobj */
               if (anon) {
                       rv = uvm_loananon(ufi, output, flags, anon);
               } else if (uobj) {
                       rv = uvm_loanuobj(ufi, output, flags, curaddr);
               } else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
                       rv = uvm_loanzero(ufi, output, flags);
               } else {
                       uvmfault_unlockall(ufi, aref->ar_amap, uobj);
                       rv = -1;
               }
               /* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
               KASSERT(rv > 0 || aref->ar_amap == NULL ||
                   !rw_write_held(aref->ar_amap->am_lock));
               KASSERT(rv > 0 || uobj == NULL ||
                   !rw_write_held(uobj->vmobjlock));

               /* total failure */
               if (rv < 0) {
                       UVMHIST_LOG(loanhist, "failure %jd", rv, 0,0,0);
                       return (-1);
               }

               /* relock failed, need to do another lookup */
               if (rv == 0) {
                       UVMHIST_LOG(loanhist, "relock failure %jd", result
                           ,0,0,0);
                       return (result);
               }

               /*
                * got it... advance to next page
                */

               result++;
               togo -= PAGE_SIZE;
               curaddr += PAGE_SIZE;
       }

       /*
        * unlock what we locked, unlock the maps and return
        */

       if (aref->ar_amap) {
               amap_unlock(aref->ar_amap);
       }
       uvmfault_unlockmaps(ufi, false);
       UVMHIST_LOG(loanhist, "done %jd", result, 0,0,0);
       return (result);
}

/*
* normal functions
*/

/*
* uvm_loan: loan pages in a map out to anons or to the kernel
*
* => map should be unlocked
* => start and len should be multiples of PAGE_SIZE
* => result is either an array of anon's or vm_pages (depending on flags)
* => flag values: UVM_LOAN_TOANON - loan to anons
*                 UVM_LOAN_TOPAGE - loan to wired kernel page
*    one and only one of these flags must be set!
* => returns 0 (success), or an appropriate error number
*/

int
uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
{
       struct uvm_faultinfo ufi;
       void **result, **output;
       int rv, error;

       UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);

       /*
        * ensure that one and only one of the flags is set
        */

       KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
               ((flags & UVM_LOAN_TOPAGE) == 0));

       /*
        * "output" is a pointer to the current place to put the loaned page.
        */

       result = v;
       output = &result[0];    /* start at the beginning ... */

       /*
        * while we've got pages to do
        */

       while (len > 0) {

               /*
                * fill in params for a call to uvmfault_lookup
                */

               ufi.orig_map = map;
               ufi.orig_rvaddr = start;
               ufi.orig_size = len;

               /*
                * do the lookup, the only time this will fail is if we hit on
                * an unmapped region (an error)
                */

               if (!uvmfault_lookup(&ufi, false)) {
                       error = ENOENT;
                       goto fail;
               }

               /*
                * map now locked.  now do the loanout...
                */

               rv = uvm_loanentry(&ufi, &output, flags);
               if (rv < 0) {
                       /* all unlocked due to error */
                       error = EINVAL;
                       goto fail;
               }

               /*
                * done!  the map is unlocked.  advance, if possible.
                *
                * XXXCDC: could be recoded to hold the map lock with
                *         smarter code (but it only happens on map entry
                *         boundaries, so it isn't that bad).
                */

               if (rv) {
                       rv <<= PAGE_SHIFT;
                       len -= rv;
                       start += rv;
               }
       }
       UVMHIST_LOG(loanhist, "success", 0,0,0,0);
       return 0;

fail:
       /*
        * failed to complete loans.  drop any loans and return failure code.
        * map is already unlocked.
        */

       if (output - result) {
               if (flags & UVM_LOAN_TOANON) {
                       uvm_unloananon((struct vm_anon **)result,
                           output - result);
               } else {
                       uvm_unloanpage((struct vm_page **)result,
                           output - result);
               }
       }
       UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
       return (error);
}

/*
* uvm_loananon: loan a page from an anon out
*
* => called with map, amap, uobj locked
* => return value:
*      -1 = fatal error, everything is unlocked, abort.
*       0 = lookup in ufi went stale, everything unlocked, relookup and
*              try again
*       1 = got it, everything still locked
*/

int
uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
   struct vm_anon *anon)
{
       struct vm_page *pg;
       int error;

       UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);

       /*
        * if we are loaning to "another" anon then it is easy, we just
        * bump the reference count on the current anon and return a
        * pointer to it (it becomes copy-on-write shared).
        */

       if (flags & UVM_LOAN_TOANON) {
               KASSERT(rw_write_held(anon->an_lock));
               pg = anon->an_page;
               if (pg && (pg->flags & PG_ANON) != 0 && anon->an_ref == 1) {
                       if (pg->wire_count > 0) {
                               UVMHIST_LOG(loanhist, "->A wired %#jx",
                                   (uintptr_t)pg, 0, 0, 0);
                               uvmfault_unlockall(ufi,
                                   ufi->entry->aref.ar_amap,
                                   ufi->entry->object.uvm_obj);
                               return (-1);
                       }
                       pmap_page_protect(pg, VM_PROT_READ);
               }
               anon->an_ref++;
               **output = anon;
               (*output)++;
               UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
               return (1);
       }

       /*
        * we are loaning to a kernel-page.   we need to get the page
        * resident so we can wire it.   uvmfault_anonget will handle
        * this for us.
        */

       KASSERT(rw_write_held(anon->an_lock));
       error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);

       /*
        * if we were unable to get the anon, then uvmfault_anonget has
        * unlocked everything and returned an error code.
        */

       if (error) {
               UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
               KASSERT(error != ENOLCK);

               /* need to refault (i.e. refresh our lookup) ? */
               if (error == ERESTART) {
                       return (0);
               }

               /* "try again"?   sleep a bit and retry ... */
               if (error == EAGAIN) {
                       kpause("loanagain", false, hz/2, NULL);
                       return (0);
               }

               /* otherwise flag it as an error */
               return (-1);
       }

       /*
        * we have the page and its owner locked: do the loan now.
        */

       pg = anon->an_page;
       if (pg->wire_count > 0) {
               UVMHIST_LOG(loanhist, "->K wired %#jx", (uintptr_t)pg, 0, 0, 0);
               KASSERT(pg->uobject == NULL);
               uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL);
               return (-1);
       }
       if (pg->loan_count == 0) {
               pmap_page_protect(pg, VM_PROT_READ);
       }
       uvm_pagelock(pg);
       pg->loan_count++;
       KASSERT(pg->loan_count > 0);    /* detect wrap-around */
       uvm_pageactivate(pg);
       uvm_pageunlock(pg);
       **output = pg;
       (*output)++;

       /* unlock and return success */
       if (pg->uobject)
               rw_exit(pg->uobject->vmobjlock);
       UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
       return (1);
}

/*
* uvm_loanpage: loan out pages to kernel (->K)
*
* => pages should be object-owned and the object should be locked.
* => in the case of error, the object might be unlocked and relocked.
* => pages will be unbusied (if busied is true).
* => fail with EBUSY if meet a wired page.
*/
static int
uvm_loanpage(struct vm_page **pgpp, int npages, bool busied)
{
       int i;
       int error = 0;

       UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);

       for (i = 0; i < npages; i++) {
               struct vm_page *pg = pgpp[i];

               KASSERT(pg->uobject != NULL);
               KASSERT(pg->uobject == pgpp[0]->uobject);
               KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
               KASSERT(rw_write_held(pg->uobject->vmobjlock));
               KASSERT(busied == ((pg->flags & PG_BUSY) != 0));

               if (pg->wire_count > 0) {
                       UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg,
                           0, 0, 0);
                       error = EBUSY;
                       break;
               }
               if (pg->loan_count == 0) {
                       pmap_page_protect(pg, VM_PROT_READ);
               }
               uvm_pagelock(pg);
               pg->loan_count++;
               KASSERT(pg->loan_count > 0);    /* detect wrap-around */
               uvm_pageactivate(pg);
               uvm_pageunlock(pg);
       }

       if (busied) {
               uvm_page_unbusy(pgpp, npages);
       }

       if (error) {
               /*
                * backout what we've done
                */
               krwlock_t *slock = pgpp[0]->uobject->vmobjlock;

               rw_exit(slock);
               uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
               rw_enter(slock, RW_WRITER);
       }

       UVMHIST_LOG(loanhist, "done %jd", error, 0, 0, 0);
       return error;
}

/*
* XXX UBC temp limit
* number of pages to get at once.
* should be <= MAX_READ_AHEAD in genfs_vnops.c
*/
#define UVM_LOAN_GET_CHUNK      16

/*
* uvm_loanuobjchunk: helper for uvm_loanuobjpages()
*/
static int
uvm_loanuobjchunk(struct uvm_object *uobj, voff_t pgoff, int orignpages,
   struct vm_page **pgpp)
{
       int error, npages;

       rw_enter(uobj->vmobjlock, RW_WRITER);
reget:
       npages = orignpages;
       error = (*uobj->pgops->pgo_get)(uobj, pgoff, pgpp, &npages, 0,
           VM_PROT_READ, 0, PGO_SYNCIO);
       switch (error) {
       case 0:
               KASSERT(npages == orignpages);

               /* check for released pages */
               rw_enter(uobj->vmobjlock, RW_WRITER);
               for (int i = 0; i < npages; i++) {
                       KASSERT(pgpp[i]->uobject->vmobjlock == uobj->vmobjlock);
                       if ((pgpp[i]->flags & PG_RELEASED) != 0) {
                               /*
                                * release pages and try again.
                                */
                               uvm_page_unbusy(pgpp, npages);
                               goto reget;
                       }
               }

               /* loan out pages.  they will be unbusied whatever happens. */
               error = uvm_loanpage(pgpp, npages, true);
               rw_exit(uobj->vmobjlock);
               if (error != 0) {
                       memset(pgpp, 0, sizeof(pgpp[0]) * npages);
               }
               return error;

       case EAGAIN:
               kpause("loanuopg", false, hz/2, NULL);
               rw_enter(uobj->vmobjlock, RW_WRITER);
               goto reget;

       default:
               return error;
       }
}

/*
* uvm_loanuobjpages: loan pages from a uobj out (O->K)
*
* => uobj shouldn't be locked.  (we'll lock it)
* => fail with EBUSY if we meet a wired page.
*/
int
uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int npages,
   struct vm_page **pgpp)
{
       int ndone, error, chunk;

       KASSERT(npages > 0);

       memset(pgpp, 0, sizeof(pgpp[0]) * npages);
       for (ndone = 0; ndone < npages; ndone += chunk) {
               chunk = MIN(UVM_LOAN_GET_CHUNK, npages - ndone);
               error = uvm_loanuobjchunk(uobj, pgoff + (ndone << PAGE_SHIFT),
                   chunk, pgpp + ndone);
               if (error != 0) {
                       if (ndone != 0) {
                               uvm_unloan(pgpp, ndone, UVM_LOAN_TOPAGE);
                       }
                       break;
               }
       }

       return error;
}

/*
* uvm_loanuobj: loan a page from a uobj out
*
* => called with map, amap, uobj locked
* => return value:
*      -1 = fatal error, everything is unlocked, abort.
*       0 = lookup in ufi went stale, everything unlocked, relookup and
*              try again
*       1 = got it, everything still locked
*/

static int
uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
{
       struct vm_amap *amap = ufi->entry->aref.ar_amap;
       struct uvm_object *uobj = ufi->entry->object.uvm_obj;
       struct vm_page *pg;
       int error, npages;
       bool locked;

       UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);

       /*
        * first we must make sure the page is resident.
        *
        * XXXCDC: duplicate code with uvm_fault().
        */

       /* locked: maps(read), amap(if there) */
       rw_enter(uobj->vmobjlock, RW_WRITER);
       /* locked: maps(read), amap(if there), uobj */

       if (uobj->pgops->pgo_get) {     /* try locked pgo_get */
               npages = 1;
               pg = NULL;
               error = (*uobj->pgops->pgo_get)(uobj,
                   va - ufi->entry->start + ufi->entry->offset,
                   &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
       } else {
               error = EIO;            /* must have pgo_get op */
       }

       /*
        * check the result of the locked pgo_get.  if there is a problem,
        * then we fail the loan.
        */

       if (error && error != EBUSY) {
               uvmfault_unlockall(ufi, amap, uobj);
               return (-1);
       }

       /*
        * if we need to unlock for I/O, do so now.
        */

       if (error == EBUSY) {
               uvmfault_unlockall(ufi, amap, NULL);

               /* locked: uobj */
               npages = 1;
               error = (*uobj->pgops->pgo_get)(uobj,
                   va - ufi->entry->start + ufi->entry->offset,
                   &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
               /* locked: <nothing> */

               if (error) {
                       if (error == EAGAIN) {
                               kpause("fltagain2", false, hz/2, NULL);
                               return (0);
                       }
                       return (-1);
               }

               /*
                * pgo_get was a success.   attempt to relock everything.
                */

               locked = uvmfault_relock(ufi);
               if (locked && amap)
                       amap_lock(amap, RW_WRITER);
               uobj = pg->uobject;
               rw_enter(uobj->vmobjlock, RW_WRITER);

               /*
                * verify that the page has not be released and re-verify
                * that amap slot is still free.   if there is a problem we
                * drop our lock (thus force a lookup refresh/retry).
                */

               if ((pg->flags & PG_RELEASED) != 0 ||
                   (locked && amap && amap_lookup(&ufi->entry->aref,
                   ufi->orig_rvaddr - ufi->entry->start))) {
                       if (locked)
                               uvmfault_unlockall(ufi, amap, NULL);
                       locked = false;
               }

               /*
                * unbusy the page.
                */

               if ((pg->flags & PG_RELEASED) == 0) {
                       uvm_pagelock(pg);
                       uvm_pagewakeup(pg);
                       uvm_pageunlock(pg);
                       pg->flags &= ~PG_BUSY;
                       UVM_PAGE_OWN(pg, NULL);
               }

               /*
                * didn't get the lock?   release the page and retry.
                */

               if (locked == false) {
                       if (pg->flags & PG_RELEASED) {
                               uvm_pagefree(pg);
                       }
                       rw_exit(uobj->vmobjlock);
                       return (0);
               }
       }

       /*
        * for tmpfs vnodes, the page will be from a UAO rather than
        * the vnode.  just check the locks match.
        */

       KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);

       /*
        * at this point we have the page we want ("pg") and we have
        * all data structures locked.  do the loanout.  page can not
        * be PG_RELEASED (we caught this above).
        */

       if ((flags & UVM_LOAN_TOANON) == 0) {
               if (uvm_loanpage(&pg, 1, false)) {
                       uvmfault_unlockall(ufi, amap, uobj);
                       return (-1);
               }
               rw_exit(uobj->vmobjlock);
               **output = pg;
               (*output)++;
               return (1);
       }

#ifdef notdef
       /*
        * must be a loan to an anon.   check to see if there is already
        * an anon associated with this page.  if so, then just return
        * a reference to this object.   the page should already be
        * mapped read-only because it is already on loan.
        */

       if (pg->uanon) {
               /* XXX: locking */
               anon = pg->uanon;
               anon->an_ref++;
               uvm_pagelock(pg);
               uvm_pagewakeup(pg);
               uvm_pageunlock(pg);
               pg->flags &= ~PG_BUSY;
               UVM_PAGE_OWN(pg, NULL);
               rw_exit(uobj->vmobjlock);
               **output = anon;
               (*output)++;
               return (1);
       }

       /*
        * need to allocate a new anon
        */

       anon = uvm_analloc();
       if (anon == NULL) {
               goto fail;
       }
       if (pg->wire_count > 0) {
               UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 0, 0, 0);
               goto fail;
       }
       if (pg->loan_count == 0) {
               pmap_page_protect(pg, VM_PROT_READ);
       }
       uvm_pagelock(pg);
       pg->loan_count++;
       KASSERT(pg->loan_count > 0);    /* detect wrap-around */
       pg->uanon = anon;
       anon->an_page = pg;
       anon->an_lock = /* TODO: share amap lock */
       uvm_pageactivate(pg);
       uvm_pagewakeup(pg);
       uvm_pageunlock(pg);
       pg->flags &= ~PG_BUSY;
       UVM_PAGE_OWN(pg, NULL);
       rw_exit(uobj->vmobjlock);
       rw_exit(&anon->an_lock);
       **output = anon;
       (*output)++;
       return (1);

fail:
       UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
       /*
        * unlock everything and bail out.
        */
       uvm_pagelock(pg);
       uvm_pagewakeup(pg);
       uvm_pageunlock(pg);
       pg->flags &= ~PG_BUSY;
       UVM_PAGE_OWN(pg, NULL);
       uvmfault_unlockall(ufi, amap, uobj, NULL);
       if (anon) {
               anon->an_ref--;
               uvm_anfree(anon);
       }
#endif  /* notdef */
       return (-1);
}

/*
* uvm_loanzero: loan a zero-fill page out
*
* => called with map, amap, uobj locked
* => return value:
*      -1 = fatal error, everything is unlocked, abort.
*       0 = lookup in ufi went stale, everything unlocked, relookup and
*              try again
*       1 = got it, everything still locked
*/

static struct uvm_object uvm_loanzero_object;
static krwlock_t uvm_loanzero_lock __cacheline_aligned;

static int
uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
{
       struct vm_page *pg;
       struct vm_amap *amap = ufi->entry->aref.ar_amap;

       UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
again:
       rw_enter(uvm_loanzero_object.vmobjlock, RW_WRITER);

       /*
        * first, get ahold of our single zero page.
        */

       pg = uvm_pagelookup(&uvm_loanzero_object, 0);
       if (__predict_false(pg == NULL)) {
               while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
                                          UVM_PGA_ZERO)) == NULL) {
                       rw_exit(uvm_loanzero_object.vmobjlock);
                       uvmfault_unlockall(ufi, amap, NULL);
                       uvm_wait("loanzero");
                       if (!uvmfault_relock(ufi)) {
                               return (0);
                       }
                       if (amap) {
                               amap_lock(amap, RW_WRITER);
                       }
                       goto again;
               }

               /* got a zero'd page. */
               pg->flags &= ~(PG_BUSY|PG_FAKE);
               pg->flags |= PG_RDONLY;
               uvm_pagelock(pg);
               uvm_pageactivate(pg);
               uvm_pagewakeup(pg);
               uvm_pageunlock(pg);
               UVM_PAGE_OWN(pg, NULL);
       }

       if ((flags & UVM_LOAN_TOANON) == 0) {   /* loaning to kernel-page */
               mutex_enter(&pg->interlock);
               pg->loan_count++;
               KASSERT(pg->loan_count > 0);    /* detect wrap-around */
               mutex_exit(&pg->interlock);
               rw_exit(uvm_loanzero_object.vmobjlock);
               **output = pg;
               (*output)++;
               return (1);
       }

#ifdef notdef
       /*
        * loaning to an anon.  check to see if there is already an anon
        * associated with this page.  if so, then just return a reference
        * to this object.
        */

       if (pg->uanon) {
               anon = pg->uanon;
               rw_enter(&anon->an_lock, RW_WRITER);
               anon->an_ref++;
               rw_exit(&anon->an_lock);
               rw_exit(uvm_loanzero_object.vmobjlock);
               **output = anon;
               (*output)++;
               return (1);
       }

       /*
        * need to allocate a new anon
        */

       anon = uvm_analloc();
       if (anon == NULL) {
               /* out of swap causes us to fail */
               rw_exit(uvm_loanzero_object.vmobjlock);
               uvmfault_unlockall(ufi, amap, NULL, NULL);
               return (-1);
       }
       anon->an_page = pg;
       pg->uanon = anon;
       uvm_pagelock(pg);
       pg->loan_count++;
       KASSERT(pg->loan_count > 0);    /* detect wrap-around */
       uvm_pageactivate(pg);
       uvm_pageunlock(pg);
       rw_exit(&anon->an_lock);
       rw_exit(uvm_loanzero_object.vmobjlock);
       **output = anon;
       (*output)++;
       return (1);
#else
       return (-1);
#endif
}


/*
* uvm_unloananon: kill loans on anons (basically a normal ref drop)
*
* => we expect all our resources to be unlocked
*/

static void
uvm_unloananon(struct vm_anon **aloans, int nanons)
{
#ifdef notdef
       struct vm_anon *anon, *to_free = NULL;

       /* TODO: locking */
       amap_lock(amap, RW_WRITER);
       while (nanons-- > 0) {
               anon = *aloans++;
               if (--anon->an_ref == 0) {
                       uvm_anfree(anon);
               }
       }
       amap_unlock(amap);
#endif  /* notdef */
}

/*
* uvm_unloanpage: kill loans on pages loaned out to the kernel
*
* => we expect all our resources to be unlocked
*/

static void
uvm_unloanpage(struct vm_page **ploans, int npages)
{
       struct vm_page *pg;
       krwlock_t *slock;

       while (npages-- > 0) {
               pg = *ploans++;

               /*
                * do a little dance to acquire the object or anon lock
                * as appropriate.  we are locking in the wrong order,
                * so we have to do a try-lock here.
                */

               mutex_enter(&pg->interlock);
               slock = NULL;
               while (pg->uobject != NULL || pg->uanon != NULL) {
                       if (pg->uobject != NULL) {
                               slock = pg->uobject->vmobjlock;
                       } else {
                               slock = pg->uanon->an_lock;
                       }
                       if (rw_tryenter(slock, RW_WRITER)) {
                               break;
                       }
                       /* XXX Better than yielding but inadequate. */
                       kpause("livelock", false, 1, &pg->interlock);
                       slock = NULL;
               }

               /*
                * drop our loan.  if page is owned by an anon but
                * PG_ANON is not set, the page was loaned to the anon
                * from an object which dropped ownership, so resolve
                * this by turning the anon's loan into real ownership
                * (ie. decrement loan_count again and set PG_ANON).
                * after all this, if there are no loans left, put the
                * page back a paging queue (if the page is owned by
                * an anon) or free it (if the page is now unowned).
                */

               KASSERT(pg->loan_count > 0);
               pg->loan_count--;
               if (pg->uobject == NULL && pg->uanon != NULL &&
                   (pg->flags & PG_ANON) == 0) {
                       KASSERT(pg->loan_count > 0);
                       pg->loan_count--;
                       pg->flags |= PG_ANON;
               }
               mutex_exit(&pg->interlock);
               if (pg->loan_count == 0 && pg->uobject == NULL &&
                   pg->uanon == NULL) {
                       KASSERT((pg->flags & PG_BUSY) == 0);
                       uvm_pagefree(pg);
               }
               if (slock != NULL) {
                       rw_exit(slock);
               }
       }
}

/*
* uvm_unloan: kill loans on pages or anons.
*/

void
uvm_unloan(void *v, int npages, int flags)
{
       if (flags & UVM_LOAN_TOANON) {
               uvm_unloananon(v, npages);
       } else {
               uvm_unloanpage(v, npages);
       }
}

/*
* Minimal pager for uvm_loanzero_object.  We need to provide a "put"
* method, because the page can end up on a paging queue, and the
* page daemon will want to call pgo_put when it encounters the page
* on the inactive list.
*/

static int
ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
{
       struct vm_page *pg;

       KDASSERT(uobj == &uvm_loanzero_object);

       /*
        * Don't need to do any work here if we're not freeing pages.
        */

       if ((flags & PGO_FREE) == 0) {
               rw_exit(uobj->vmobjlock);
               return 0;
       }

       /*
        * we don't actually want to ever free the uvm_loanzero_page, so
        * just reactivate or dequeue it.
        */

       pg = uvm_pagelookup(uobj, 0);
       KASSERT(pg != NULL);

       uvm_pagelock(pg);
       if (pg->uanon) {
               uvm_pageactivate(pg);
       } else {
               uvm_pagedequeue(pg);
       }
       uvm_pageunlock(pg);

       rw_exit(uobj->vmobjlock);
       return 0;
}

static const struct uvm_pagerops ulz_pager = {
       .pgo_put = ulz_put,
};

/*
* uvm_loan_init(): initialize the uvm_loan() facility.
*/

void
uvm_loan_init(void)
{

       rw_init(&uvm_loanzero_lock);
       uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0);
       uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock);

       UVMHIST_INIT(loanhist, 300);
}

/*
* uvm_loanbreak: break loan on a uobj page
*
* => called with uobj locked
* => the page may be busy; if it's busy, it will be unbusied
* => return value:
*      newly allocated page if succeeded
*/
struct vm_page *
uvm_loanbreak(struct vm_page *uobjpage)
{
       struct vm_page *pg;
       struct uvm_object *uobj __diagused = uobjpage->uobject;

       KASSERT(uobj != NULL);
       KASSERT(rw_write_held(uobj->vmobjlock));

       /* alloc new un-owned page */
       pg = uvm_pagealloc(NULL, 0, NULL, 0);
       if (pg == NULL)
               return NULL;

       /*
        * copy the data from the old page to the new
        * one and clear the fake flags on the new page (keep it busy).
        * force a reload of the old page by clearing it from all
        * pmaps.
        * then rename the pages.
        */

       uvm_pagecopy(uobjpage, pg);     /* old -> new */
       pg->flags &= ~PG_FAKE;
       KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY);
       pmap_page_protect(uobjpage, VM_PROT_NONE);
       /* uobj still locked */
       if ((uobjpage->flags & PG_BUSY) != 0) {
               uobjpage->flags &= ~PG_BUSY;
               UVM_PAGE_OWN(uobjpage, NULL);
       }

       /*
        * if the page is no longer referenced by
        * an anon (i.e. we are breaking an O->K
        * loan), then remove it from any pageq's.
        */

       uvm_pagelock2(uobjpage, pg);
       uvm_pagewakeup(uobjpage);
       if (uobjpage->uanon == NULL)
               uvm_pagedequeue(uobjpage);

       /*
        * replace uobjpage with new page.
        */

       uvm_pagereplace(uobjpage, pg);

       /*
        * at this point we have absolutely no
        * control over uobjpage
        */

       uvm_pageactivate(pg);
       uvm_pageunlock2(uobjpage, pg);

       /*
        * done!  loan is broken and "pg" is
        * PG_BUSY.   it can now replace uobjpage.
        */

       return pg;
}

int
uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
{
       struct vm_page *newpg, *oldpg;
       unsigned oldstatus;

       KASSERT(rw_write_held(anon->an_lock));
       KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
       KASSERT(anon->an_page->loan_count > 0);

       /* get new un-owned replacement page */
       newpg = uvm_pagealloc(NULL, 0, NULL, 0);
       if (newpg == NULL) {
               return ENOMEM;
       }

       oldpg = anon->an_page;
       /* copy old -> new */
       uvm_pagecopy(oldpg, newpg);
       KASSERT(uvm_pagegetdirty(newpg) == UVM_PAGE_STATUS_DIRTY);

       /* force reload */
       pmap_page_protect(oldpg, VM_PROT_NONE);
       oldstatus = uvm_pagegetdirty(anon->an_page);

       uvm_pagelock2(oldpg, newpg);
       if (uobj == NULL) {
               /*
                * we were the lender (A->K); need to remove the page from
                * pageq's.
                *
                * PG_ANON is updated by the caller.
                */
               KASSERT((oldpg->flags & PG_ANON) != 0);
               oldpg->flags &= ~PG_ANON;
               uvm_pagedequeue(oldpg);
       }
       oldpg->uanon = NULL;

       if (uobj) {
               /* if we were receiver of loan */
               KASSERT((oldpg->pqflags & PG_ANON) == 0);
               oldpg->loan_count--;
       }

       /* install new page in anon */
       anon->an_page = newpg;
       newpg->uanon = anon;
       newpg->flags |= PG_ANON;

       uvm_pageactivate(newpg);
       uvm_pageunlock2(oldpg, newpg);

       newpg->flags &= ~(PG_BUSY|PG_FAKE);
       UVM_PAGE_OWN(newpg, NULL);

       if (uobj) {
               rw_exit(uobj->vmobjlock);
       }

       /* done! */
       kpreempt_disable();
       if (uobj == NULL) {
               CPU_COUNT(CPU_COUNT_ANONUNKNOWN + oldstatus, -1);
       }
       CPU_COUNT(CPU_COUNT_ANONDIRTY, 1);
       kpreempt_enable();
       return 0;
}