/*-
* Copyright (c) 1998, 2001 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
* NASA Ames Research Center and by Chris G. Demetriou.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department and Ralph Campbell.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)pmap.c 8.4 (Berkeley) 1/26/94
*/
/*
* Manages physical address maps.
*
* In addition to hardware address maps, this
* module is called upon to provide software-use-only
* maps which may or may not be stored in the same
* form as hardware maps. These pseudo-maps are
* used to store intermediate results from copy
* operations to and from address spaces.
*
* Since the information managed by this module is
* also stored by the logical address mapping module,
* this module may throw away valid virtual-to-physical
* mappings at almost any time. However, invalidations
* of virtual-to-physical mappings must be done as
* requested.
*
* In order to cope with hardware architectures which
* make virtual-to-physical map invalidates expensive,
* this module may delay invalidate or reduced protection
* operations until such time as they are actually
* necessary. This module is given full information as
* to which processors are currently using which maps,
* and to when physical maps must be made correct.
*/
#ifndef PMAP_HWPAGEWALKER
/*
* Check that a seg_ppg[] array is empty.
*
* This is used when allocating or freeing a pmap_segtab_t. The stb
* should be unused -- meaning, none of the seg_ppg[] pointers are
* not NULL, as it transitions from either freshly allocated segtab from
* pmap pool, an unused allocated page segtab alloc from the SMP case,
* where two CPUs attempt to allocate the same underlying segtab, the
* release of a segtab entry to the freelist, or for SMP, where reserve
* also frees a freshly allocated but unused entry.
*/
static void
pmap_check_stb(pmap_segtab_t *stb, const char *caller, const char *why)
{
#ifdef DEBUG
for (size_t i = 0; i < PMAP_SEGTABSIZE; i++) {
if (stb->seg_ppg[i] != NULL) {
#define DEBUG_NOISY
#ifdef DEBUG_NOISY
UVMHIST_FUNC(__func__);
UVMHIST_CALLARGS(pmapxtabhist, "stb=%#jx",
(uintptr_t)stb, 0, 0, 0);
for (size_t j = i; j < PMAP_SEGTABSIZE; j++)
if (stb->seg_ppg[j] != NULL)
printf("%s: stb->seg_ppg[%zu] = %p\n",
caller, j, stb->seg_ppg[j]);
#endif
panic("%s: pm_segtab.seg_ppg[%zu] != 0 (%p): %s",
caller, i, stb->seg_ppg[i], why);
}
}
#endif
}
#endif /* PMAP_HWPAGEWALKER */
// Regardless of how many levels deep this page table is, we only
// need to verify the first level PDEs match up.
#ifdef XSEGSHIFT
idx &= va >> XSEGSHIFT;
#else
idx &= va >> SEGSHIFT;
#endif
if (uptb->pde_pde[idx] != kptb->pde_pde[idx]) {
pte_pde_set(&uptb->pde_pde[idx], kptb->pde_pde[idx]);
#if !defined(PMAP_MAP_PDETABPAGE)
ustb->seg_seg[idx] = kstb->seg_seg[idx]; // copy KVA of PTP
#endif
return true;
}
return false;
}
#endif /* PMAP_HWPAGEWALKER */
/*
* Now set each vm_page that maps this page to point to the
* pmap and set the offset to what we want.
*/
KASSERTMSG(pg->uobject == NULL, "pg %p pg->uobject %p", pg, pg->uobject);
pg->uobject = uobj;
pg->offset = off;
}
const vaddr_t kva = (vaddr_t)ppg;
/*
* All pte arrays should be page aligned.
*/
if ((kva & PAGE_MASK) != 0) {
panic("%s: pte entry at %p not page aligned", caller, ppg);
}
#ifdef DEBUG
for (size_t j = 0; j < NPTEPG; j++) {
if (ppg->ppg_ptes[j] != 0) {
UVMHIST_LOG(pmapxtabhist,
"pte entry %#jx not 0 (%#jx)",
(uintptr_t)&ppg->ppg_ptes[j],
(uintptr_t)ppg->ppg_ptes[j], 0, 0);
for (size_t i = j + 1; i < NPTEPG; i++)
if (ppg->ppg_ptes[i] != 0)
UVMHIST_LOG(pmapxtabhist,
"pte[%zu] = %#"PRIxPTE,
i, ppg->ppg_ptes[i], 0, 0);
UVMHIST_LOG(pmapxtabhist, "ptb_pg=%#jx",
(uintptr_t)ptb_pg, 0, 0, 0);
if (__predict_false(ptb_pg == NULL)) {
/*
* XXX What else can we do? Could we deadlock here?
*/
uvm_wait("pdetab");
goto again;
}
UVMHIST_LOG(pmapxtabhist, "... ptb %#jx found on freelist %d",
(uintptr_t)ptb, found_on_freelist, 0, 0);
return ptb;
}
#else
/*
* Create and return a physical map.
*
* If the size specified for the map
* is zero, the map is an actual physical
* map, and may be referenced by the
* hardware.
*
* If the size specified is non-zero,
* the map will be used in software only, and
* is bounded by that size.
*/
static pmap_segtab_t *
pmap_segtab_alloc(struct pmap *pmap)
{
UVMHIST_FUNC(__func__);
UVMHIST_CALLARGS(pmapxtabhist, "pm %#jx", (uintptr_t)pmap, 0, 0, 0);
if (__predict_false(stb_pg == NULL)) {
/*
* XXX What else can we do? Could we deadlock here?
*/
uvm_wait("segtab");
goto again;
}
SEGTAB_ADD(npage, 1);
const paddr_t stb_pa = VM_PAGE_TO_PHYS(stb_pg);
stb = (pmap_segtab_t *)PMAP_MAP_SEGTABPAGE(stb_pa);
UVMHIST_LOG(pmapxtabhist, "new stb=%#jx", (uintptr_t)stb, 0,
0, 0);
#if 0
CTASSERT(NBPG / sizeof(*stb) == 1);
const size_t n = NBPG / sizeof(*stb);
if (n > 1) {
/*
* link all the segtabs in this page together
*/
for (size_t i = 1; i < n - 1; i++) {
stb[i].seg_next = &stb[i + 1];
}
/*
* Now link the new segtabs into the free segtab list.
*/
mutex_spin_enter(&pmap_segtab_lock);
stb[n - 1].seg_next = pmap_segtab_info.segalloc.free_segtab;
pmap_segtab_info.segalloc.free_segtab = stb + 1;
SEGTAB_ADD(nput, n - 1);
mutex_spin_exit(&pmap_segtab_lock);
}
#endif
}
for (size_t i = (va / vinc) & pdetab_mask;
i < PMAP_PDETABSIZE;
i++, va += vinc) {
#ifdef _LP64
if (vinc > NBSEG) {
if (pte_pde_valid_p(ptb->pde_pde[i])) {
pmap_pdetab_t *nptb =
pmap_pde_to_pdetab(ptb->pde_pde[i]);
UVMHIST_LOG(pmapxtabhist,
" va %#jx ptp->pde_pde[%jd] (*%#jx) = %#jx "
"recursing", va, i, &ptb->pde_pde[i],
ptb->pde_pde[i]);
pmap_pdetab_release(pmap, &nptb, true,
va, vinc / NPDEPG);
ptb->pde_pde[i] = pte_invalid_pde();
KASSERT(nptb == NULL);
}
continue;
}
#endif
KASSERT(vinc == NBSEG);
/* get pointer to PT page */
pmap_ptpage_t *ppg = pmap_pde_to_ptpage(ptb->pde_pde[i]);
UVMHIST_LOG(pmapxtabhist,
" va %#jx ptb->pde_pde[%jd] (*%#jx) = %#jx", va, i,
(uintptr_t)&ptb->pde_pde[i], ptb->pde_pde[i]);
if (ppg == NULL)
continue;
UVMHIST_LOG(pmapxtabhist, " zeroing tab (%#jx)[%jd] (%#jx)",
(uintptr_t)ptb->pde_pde, i, (uintptr_t)&ptb->pde_pde[i], 0);
/*
* Allocate the top segment table for the pmap.
*/
void
pmap_segtab_init(pmap_t pmap)
{
UVMHIST_FUNC(__func__);
UVMHIST_CALLARGS(pmaphist, "pm %#jx", (uintptr_t)pmap, 0, 0, 0);
#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE)
/*
* Constantly converting from extracted PA to VA is somewhat expensive
* for systems with hardware page walkers and without an inexpensive
* way to access arbitrary virtual addresses, so we allocate an extra
* root segtab so that it can contain non-virtual addresses.
*/
pmap->pm_segtab = pmap_segtab_alloc(pmap);
#endif
#if defined(PMAP_HWPAGEWALKER)
pmap->pm_pdetab = pmap_pdetab_alloc(pmap);
pmap_md_pdetab_init(pmap);
#endif
}
/*
* Retire the given physical map from service.
* Should only be called if the map contains
* no valid mappings.
*/
void
pmap_segtab_destroy(pmap_t pmap, pte_callback_t func, uintptr_t flags)
{
KASSERT(pmap != pmap_kernel());
#ifdef _LP64
const vsize_t vinc = NBXSEG;
#else
const vsize_t vinc = NBSEG;
#endif
/*
* Act on the given range of addresses from the specified map.
*
* It is assumed that the start and end are properly rounded to
* the page size.
*/
void
pmap_pte_process(pmap_t pmap, vaddr_t sva, vaddr_t eva,
pte_callback_t callback, uintptr_t flags)
{
#if 0
printf("%s: %p, %"PRIxVADDR", %"PRIxVADDR", %p, %"PRIxPTR"\n",
__func__, pmap, sva, eva, callback, flags);
#endif
while (sva < eva) {
vaddr_t lastseg_va = pmap_trunc_seg(sva) + NBSEG;
if (lastseg_va == 0 || lastseg_va > eva)
lastseg_va = eva;
/*
* If VA belongs to an unallocated segment,
* skip to the next segment boundary.
*/
pt_entry_t * const ptep = pmap_pte_lookup(pmap, sva);
if (ptep != NULL) {
/*
* Callback to deal with the ptes for this segment.
*/
(*callback)(pmap, sva, lastseg_va, ptep, flags);
}
/*
* In theory we could release pages with no entries,
* but that takes more effort than we want here.
*/
sva = lastseg_va;
}
}
/*
* Return a pointer for the pte that corresponds to the specified virtual
* address (va) in the target physical map, allocating if needed.
*/
pt_entry_t *
pmap_pte_reserve(pmap_t pmap, vaddr_t va, int flags)
{
UVMHIST_FUNC(__func__);
UVMHIST_CALLARGS(pmaphist, "pm=%#jx va=%#jx flags=%#jx",
(uintptr_t)pmap, (uintptr_t)va, flags, 0);
pmap_ptpage_t *ppg;
paddr_t pa = 0;