/* $NetBSD: uvm_amap.c,v 1.129 2023/09/10 14:54:34 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* uvm_amap.c: amap operations
*/
/*
* this file contains functions that perform operations on amaps. see
* uvm_amap.h for a brief explanation of the role of amaps in uvm.
*/
/*
* cache for allocation of vm_map structures. note that in order to
* avoid an endless loop, the amap cache's allocator cannot allocate
* memory from an amap (it currently goes through the kernel uobj, so
* we are ok).
*/
static struct pool_cache uvm_amap_cache;
static kmutex_t amap_list_lock __cacheline_aligned;
static LIST_HEAD(, vm_amap) amap_list;
#ifdef UVM_AMAP_PPREF
/*
* what is ppref? ppref is an _optional_ amap feature which is used
* to keep track of reference counts on a per-page basis. it is enabled
* when UVM_AMAP_PPREF is defined.
*
* when enabled, an array of ints is allocated for the pprefs. this
* array is allocated only when a partial reference is added to the
* map (either by unmapping part of the amap, or gaining a reference
* to only a part of an amap). if the allocation of the array fails
* (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
* that we tried to do ppref's but couldn't alloc the array so just
* give up (after all, this is an optional feature!).
*
* the array is divided into page sized "chunks." for chunks of length 1,
* the chunk reference count plus one is stored in that chunk's slot.
* for chunks of length > 1 the first slot contains (the reference count
* plus one) * -1. [the negative value indicates that the length is
* greater than one.] the second slot of the chunk contains the length
* of the chunk. here is an example:
*
* actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1
* ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x
* <----------><-><----><-------><----><-><------->
* (x = don't care)
*
* this allows us to allow one int to contain the ref count for the whole
* chunk. note that the "plus one" part is needed because a reference
* count of zero is neither positive or negative (need a way to tell
* if we've got one zero or a bunch of them).
*
* here are some in-line functions to help us.
*/
/*
* pp_getreflen: get the reference and length for a specific offset
*
* => ppref's amap must be locked
*/
static inline void
pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
{
/*
* Note: since allocations are likely big, we expect to reduce the
* memory fragmentation by allocating them in separate blocks.
*/
amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
if (amap->am_slots == NULL)
goto fail1;
/*
* XXX hack to tell the pagedaemon how many pages we need,
* since we can need more than it would normally free.
*/
if (nowait) {
extern u_int uvm_extrapages;
atomic_add_int(&uvm_extrapages,
((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
totalslots) >> PAGE_SHIFT);
}
return NULL;
}
/*
* amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
*
* => caller should ensure sz is a multiple of PAGE_SIZE
* => reference count to new amap is set to one
* => new amap is returned unlocked
*/
struct vm_amap *
amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
{
struct vm_amap *amap;
int slots, padslots;
UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
/*
* amap_free: free an amap
*
* => the amap must be unlocked
* => the amap should have a zero reference count and be empty
*/
void
amap_free(struct vm_amap *amap)
{
int slots;
/*
* amap_extend: extend the size of an amap (if needed)
*
* => called from uvm_map when we want to extend an amap to cover
* a new mapping (rather than allocate a new one)
* => amap should be unlocked (we will lock it)
* => to safely extend an amap it should have a reference count of
* one (thus it can't be shared)
*/
int
amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
{
struct vm_amap *amap = entry->aref.ar_amap;
int slotoff = entry->aref.ar_pageoff;
int slotmapped, slotadd, slotneed, slotadded, slotalloc;
int slotadj, slotarea, slotendoff;
int oldnslots;
#ifdef UVM_AMAP_PPREF
int *newppref, *oldppref;
#endif
int i, *newsl, *newbck, *oldsl, *oldbck;
struct vm_anon **newover, **oldover;
const km_flag_t kmflags =
(flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
/*
* first, determine how many slots we need in the amap. don't
* forget that ar_pageoff could be non-zero: this means that
* there are some unused slots before us in the amap.
*/
/*
* Because this amap only has 1 ref, we know that there is
* only one vm_map_entry pointing to it, and the one entry is
* using slots between slotoff and slotoff + slotmapped. If
* we have been using ppref then we know that only slots in
* the one map entry's range can have anons, since ppref
* allowed us to free any anons outside that range as other map
* entries which used this amap were removed. But without ppref,
* we couldn't know which slots were still needed by other map
* entries, so we couldn't free any anons as we removed map
* entries, and so any slot from 0 to am_nslot can have an
* anon. But now that we know there is only one map entry
* left and we know its range, we can free up any anons
* outside that range. This is necessary because the rest of
* this function assumes that there are no anons in the amap
* outside of the one map entry's range.
*/
slotendoff = slotoff + slotmapped;
if (amap->am_ppref == PPREF_NONE) {
amap_wiperange(amap, 0, slotoff);
amap_wiperange(amap, slotendoff, amap->am_nslot - slotendoff);
}
for (i = 0; i < slotoff; i++) {
KASSERT(amap->am_anon[i] == NULL);
}
for (i = slotendoff; i < amap->am_nslot - slotendoff; i++) {
KASSERT(amap->am_anon[i] == NULL);
}
/*
* case 1: we already have enough slots in the map and thus
* only need to bump the reference counts on the slots we are
* adding.
*/
/*
* no need to zero am_anon since that was done at
* alloc time and we never shrink an allocation.
*/
UVMHIST_LOG(maphist,"<- done (case 2f), amap = %#jx, "
"slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
return 0;
} else {
#ifdef UVM_AMAP_PPREF
if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
/*
* Slide up the ref counts on the pages that
* are actually in use.
*/
memmove(amap->am_ppref + slotarea,
amap->am_ppref + slotoff,
slotmapped * sizeof(int));
/*
* Mark the (adjusted) gap at the front as
* referenced/not referenced.
*/
pp_setreflen(amap->am_ppref,
0, 0, slotarea - slotadd);
pp_setreflen(amap->am_ppref,
slotarea - slotadd, 1, slotadd);
}
#endif
/*
* Slide the anon pointers up and clear out
* the space we just made.
*/
memmove(amap->am_anon + slotarea,
amap->am_anon + slotoff,
slotmapped * sizeof(struct vm_anon*));
memset(amap->am_anon + slotoff, 0,
(slotarea - slotoff) * sizeof(struct vm_anon *));
/*
* Slide the backpointers up, but don't bother
* wiping out the old slots.
*/
memmove(amap->am_bckptr + slotarea,
amap->am_bckptr + slotoff,
slotmapped * sizeof(int));
/*
* Adjust all the useful active slot numbers.
*/
for (i = 0; i < amap->am_nused; i++)
amap->am_slots[i] += (slotarea - slotoff);
/*
* We just filled all the empty space in the
* front of the amap by activating a few new
* slots.
*/
amap->am_nslot = amap->am_maxslot;
entry->aref.ar_pageoff = slotarea - slotadd;
amap_unlock(amap);
/*
* Case 3: we need to allocate a new amap and copy all the amap
* data over from old amap to the new one. Drop the lock before
* performing allocation.
*
* Note: since allocations are likely big, we expect to reduce the
* memory fragmentation by allocating them in separate blocks.
*/
/*
* amap_share_protect: change protection of anons in a shared amap
*
* for shared amaps, given the current data structure layout, it is
* not possible for us to directly locate all maps referencing the
* shared anon (to change the protection). in order to protect data
* in shared maps we use pmap_page_protect(). [this is useful for IPC
* mechanisms like map entry passing that may want to write-protect
* all mappings of a shared amap.] we traverse am_anon or am_slots
* depending on the current state of the amap.
*
* => entry's map and amap must be locked by the caller
*/
void
amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
{
struct vm_amap *amap = entry->aref.ar_amap;
u_int slots, lcv, slot, stop;
struct vm_anon *anon;
/*
* amap_wipeout: wipeout all anon's in an amap; then free the amap!
*
* => Called from amap_unref(), when reference count drops to zero.
* => amap must be locked.
*/
/*
* amap_copy: ensure that a map entry's "needs_copy" flag is false
* by copying the amap if necessary.
*
* => an entry with a null amap pointer will get a new (blank) one.
* => the map that the map entry belongs to must be locked by caller.
* => the amap currently attached to "entry" (if any) must be unlocked.
* => if canchunk is true, then we may clip the entry into a chunk
* => "startva" and "endva" are used only if canchunk is true. they are
* used to limit chunking (e.g. if you have a large space that you
* know you are going to need to allocate amaps for, there is no point
* in allowing that to be chunked)
*/
/*
* Allocate an initialised amap and install it.
* Note: we must update the length after clipping.
*/
len = entry->end - entry->start;
entry->aref.ar_pageoff = 0;
entry->aref.ar_amap = amap_alloc(len, 0, waitf);
if (entry->aref.ar_amap != NULL) {
entry->etype &= ~UVM_ET_NEEDSCOPY;
}
return;
}
/*
* First check and see if we are the only map entry referencing
* he amap we currently have. If so, then just take it over instead
* of copying it. Note that we are reading am_ref without lock held
* as the value can only be one if we have the only reference
* to the amap (via our locked map). If the value is greater than
* one, then allocate amap and re-check the value.
*/
if (srcamap->am_ref == 1) {
entry->etype &= ~UVM_ET_NEEDSCOPY;
UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
0, 0, 0, 0);
return;
}
UVMHIST_LOG(maphist," amap=%#jx, ref=%jd, must copy it",
(uintptr_t)srcamap, srcamap->am_ref, 0, 0);
/*
* Allocate a new amap (note: not initialised, etc).
*/
/*
* Make the new amap share the source amap's lock, and then lock
* both. We must do this before we set am_nused != 0, otherwise
* amap_swap_off() can become interested in the amap.
*/
/*
* Re-check the reference count with the lock held. If it has
* dropped to one - we can take over the existing map.
*/
if (srcamap->am_ref == 1) {
/* Just take over the existing amap. */
entry->etype &= ~UVM_ET_NEEDSCOPY;
amap_unlock(srcamap);
/* Destroy the new (unused) amap. */
amap->am_ref--;
amap_free(amap);
return;
}
/*
* Drop our reference to the old amap (srcamap) and unlock.
* Since the reference count on srcamap is greater than one,
* (we checked above), it cannot drop to zero while it is locked.
*/
/*
* amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
*
* called during fork(2) when the parent process has a wired map
* entry. in that case we want to avoid write-protecting pages
* in the parent's map (e.g. like what you'd do for a COW page)
* so we resolve the COW here.
*
* => assume parent's entry was wired, thus all pages are resident.
* => assume pages that are loaned out (loan_count) are already mapped
* read-only in all maps, and thus no need for us to worry about them
* => assume both parent and child vm_map's are locked
* => caller passes child's map/entry in to us
* => if we run out of memory we will unlock the amap and sleep _with_ the
* parent and child vm_map's locked(!). we have to do this since
* we are in the middle of a fork(2) and we can't let the parent
* map change until we are done copying all the map entrys.
* => XXXCDC: out of memory should cause fork to fail, but there is
* currently no easy way to do this (needs fix)
*/
/*
* note that if we unlock the amap then we must ReStart the "lcv" for
* loop because some other process could reorder the anon's in the
* am_anon[] array on us while the lock is dropped.
*/
/*
* If anon has only one reference - we must have already
* copied it. This can happen if we needed to sleep waiting
* for memory in a previous run through this loop. The new
* page might even have been paged out, since is not wired.
*/
/*
* Drop PG_BUSY on new page. Since its owner was write
* locked all this time - it cannot be PG_RELEASED or
* waited on.
*/
uvm_pagelock(npg);
uvm_pageactivate(npg);
uvm_pageunlock(npg);
npg->flags &= ~(PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(npg, NULL);
}
amap_unlock(amap);
}
/*
* amap_splitref: split a single reference into two separate references
*
* => called from uvm_map's clip routines
* => origref's map should be locked
* => origref->ar_amap should be unlocked (we will lock)
*/
void
amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
{
struct vm_amap *amap = origref->ar_amap;
u_int leftslots;
#ifdef UVM_AMAP_PPREF
/* Establish ppref before we add a duplicate reference to the amap. */
if (amap->am_ppref == NULL) {
amap_pp_establish(amap, origref->ar_pageoff);
}
#endif
/* Note: not a share reference. */
amap->am_ref++;
splitref->ar_pageoff = origref->ar_pageoff + leftslots;
amap_unlock(amap);
}
#ifdef UVM_AMAP_PPREF
/*
* amap_pp_establish: add a ppref array to an amap, if possible.
*
* => amap should be locked by caller.
*/
void
amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
{
const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref);
KASSERT(rw_write_held(amap->am_lock));
amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP);
if (amap->am_ppref == NULL) {
/* Failure - just do not use ppref. */
amap->am_ppref = PPREF_NONE;
return;
}
pp_setreflen(amap->am_ppref, 0, 0, offset);
pp_setreflen(amap->am_ppref, offset, amap->am_ref,
amap->am_nslot - offset);
}
/*
* amap_pp_adjref: adjust reference count to a part of an amap using the
* per-page reference count array.
*
* => caller must check that ppref != PPREF_NONE before calling.
* => map and amap must be locked.
*/
void
amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
{
int stopslot, *ppref, lcv, prevlcv;
int ref, len, prevref, prevlen;
/*
* Advance to the correct place in the array, fragment if needed.
*/
for (lcv = 0 ; lcv < curslot ; lcv += len) {
pp_getreflen(ppref, lcv, &ref, &len);
if (lcv + len > curslot) { /* goes past start? */
pp_setreflen(ppref, lcv, ref, curslot - lcv);
pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
len = curslot - lcv; /* new length of entry @ lcv */
}
prevlcv = lcv;
}
if (lcv == 0) {
/*
* Ensure that the "prevref == ref" test below always
* fails, since we are starting from the beginning of
* the ppref array; that is, there is no previous chunk.
*/
prevref = -1;
prevlen = 0;
} else {
pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
}
/*
* Now adjust reference counts in range. Merge the first
* changed entry with the last unchanged entry if possible.
*/
KASSERT(lcv == curslot);
for (/* lcv already set */; lcv < stopslot ; lcv += len) {
pp_getreflen(ppref, lcv, &ref, &len);
if (lcv + len > stopslot) { /* goes past end? */
pp_setreflen(ppref, lcv, ref, stopslot - lcv);
pp_setreflen(ppref, stopslot, ref,
len - (stopslot - lcv));
len = stopslot - lcv;
}
ref += adjval;
KASSERT(ref >= 0);
KASSERT(ref <= amap->am_ref);
if (lcv == prevlcv + prevlen && ref == prevref) {
pp_setreflen(ppref, prevlcv, ref, prevlen + len);
} else {
pp_setreflen(ppref, lcv, ref, len);
}
if (ref == 0) {
amap_wiperange(amap, lcv, len);
}
}
}
/*
* amap_wiperange: wipe out a range of an amap.
* Note: different from amap_wipeout because the amap is kept intact.
*
* => Both map and amap must be locked by caller.
*/
void
amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
{
u_int lcv, stop, slotend;
bool byanon;
KASSERT(rw_write_held(amap->am_lock));
/*
* We can either traverse the amap by am_anon or by am_slots.
* Determine which way is less expensive.
*/
/*
* amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
*
* => called with swap_syscall_lock held.
* => note that we don't always traverse all anons.
* eg. amaps being wiped out, released anons.
* => return true if failed.
*/
/*
* amap_lookups: look up a range of pages in an amap.
*
* => amap should be locked by caller.
*/
void
amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
int npages)
{
struct vm_amap *amap = aref->ar_amap;
u_int slot;
#if defined(DIAGNOSTIC)
for (int i = 0; i < npages; i++) {
struct vm_anon * const an = anons[i];
if (an == NULL) {
continue;
}
KASSERT(an->an_ref != 0);
KASSERT(an->an_lock == amap->am_lock);
}
#endif
UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
}
/*
* amap_add: add (or replace) a page to an amap.
*
* => amap should be locked by caller.
* => anon must have the lock associated with this amap.
*/
void
amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
bool replace)
{
struct vm_amap *amap = aref->ar_amap;
u_int slot;
/*
* amap_ref: gain a reference to an amap.
*
* => amap must not be locked (we will lock).
* => "offset" and "len" are in units of pages.
* => Called at fork time to gain the child's reference.
*/
void
amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
{
UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
/*
* amap_unref: remove a reference to an amap.
*
* => All pmap-level references to this amap must be already removed.
* => Called from uvm_unmap_detach(); entry is already removed from the map.
* => We will lock amap, so it must be unlocked.
*/
void
amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
{
UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
/*
* If the last reference - wipeout and destroy the amap.
*/
amap->am_ref--;
amap_wipeout(amap);
UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
return;
}
/*
* Otherwise, drop the reference count(s) on anons.
*/