<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv=Content-Type content="text/html; charset=utf8">
<title>/usr/web/sources/contrib/rsc/mmu.c - Plan 9 from Bell Labs</title>
<!-- THIS FILE IS AUTOMATICALLY GENERATED. -->
<!-- EDIT sources.tr INSTEAD. -->
</meta>
</head>
<body>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<p style="line-height: 1.2em; margin-left: 1.00in; text-indent: 0.00in; margin-right: 1.00in; margin-top: 0; margin-bottom: 0; text-align: center;">
<span style="font-size: 10pt"><a href="/plan9/">Plan 9 from Bell Labs</a>&rsquo;s /usr/web/sources/contrib/rsc/mmu.c</span></p>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<center><font size=-1>
Copyright © 2009 Alcatel-Lucent.<br />
Distributed under the
<a href="/plan9/license.html">Lucent Public License version 1.02</a>.
<br />
<a href="/plan9/download.html">Download the Plan 9 distribution.</a>
</font>
</center>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<table width="100%" cellspacing=0 border=0><tr><td align="center">
<table cellspacing=0 cellpadding=5 bgcolor="#eeeeff"><tr><td align="left">
<pre>
<!-- END HEADER -->
/*
* Memory mappings.  Life was easier when 2G of memory was enough.
*
* The kernel memory starts at KZERO, with the text loaded at KZERO+1M
* (9load sits under 1M during the load).  The memory from KZERO to the
* top of memory is mapped 1-1 with physical memory, starting at physical
* address 0.  All kernel memory and data structures (i.e., the entries stored
* into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
* then the kernel can only have 256MB of memory for itself.
*
* The 256M below KZERO comprises three parts.  The lowest 4M is the
* virtual page table, a virtual address representation of the current
* page table tree.  The second 4M is used for temporary per-process
* mappings managed by kmap and kunmap.  The remaining 248M is used
* for global (shared by all procs and all processors) device memory
* mappings and managed by vmap and vunmap.  The total amount (256M)
* could probably be reduced somewhat if desired.  The largest device
* mapping is that of the video card, and even though modern video cards
* have embarrassing amounts of memory, the video drivers only use one
* frame buffer worth (at most 16M).  Each is described in more detail below.
*
* The VPT is a 4M frame constructed by inserting the pdb into itself.
* This short-circuits one level of the page tables, with the result that
* the contents of second-level page tables can be accessed at VPT.
* We use the VPT to edit the page tables (see mmu) after inserting them
* into the page directory.  It is a convenient mechanism for mapping what
* might be otherwise-inaccessible pages.  The idea was borrowed from
* the Exokernel.
*
* The VPT doesn't solve all our problems, because we still need to
* prepare page directories before we can install them.  For that, we
* use tmpmap/tmpunmap, which map a single page at TMPADDR.
*/

#include        "u.h"
#include        "../port/lib.h"
#include        "mem.h"
#include        "dat.h"
#include        "fns.h"
#include        "io.h"

/*
* Simple segment descriptors with no translation.
*/
#define DATASEGM(p)     { 0xFFFF, SEGG|SEGB|(0xF&lt;&lt;16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
#define EXECSEGM(p)     { 0xFFFF, SEGG|SEGD|(0xF&lt;&lt;16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
#define EXEC16SEGM(p)   { 0xFFFF, SEGG|(0xF&lt;&lt;16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
#define TSSSEGM(b,p)    { ((b)&lt;&lt;16)|sizeof(Tss),\
                         ((b)&amp;0xFF000000)|(((b)&gt;&gt;16)&amp;0xFF)|SEGTSS|SEGPL(p)|SEGP }

Segdesc gdt[NGDT] =
{
[NULLSEG]       { 0, 0},                /* null descriptor */
[KDSEG]         DATASEGM(0),            /* kernel data/stack */
[KESEG]         EXECSEGM(0),            /* kernel code */
[UDSEG]         DATASEGM(3),            /* user data/stack */
[UESEG]         EXECSEGM(3),            /* user code */
[TSSSEG]        TSSSEGM(0,0),           /* tss segment */
[KESEG16]               EXEC16SEGM(0),  /* kernel code 16-bit */
};

static int didmmuinit;
static void taskswitch(ulong, ulong);
static void memglobal(void);

#define vpt ((ulong*)VPT)
#define VPTX(va)                (((ulong)(va))&gt;&gt;12)
#define vpd (vpt+VPTX(VPT))

void
mmuinit0(void)
{
       memmove(m-&gt;gdt, gdt, sizeof gdt);
}

void
mmuinit(void)
{
       ulong x, *p;
       ushort ptr[3];

       didmmuinit = 1;

       if(0) print("vpt=%#.8ux vpd=%#.8lux kmap=%#.8ux\n",
               VPT, (ulong)vpd, KMAP);

       memglobal();
       m-&gt;pdb[PDX(VPT)] = PADDR(m-&gt;pdb)|PTEWRITE|PTEVALID;

       m-&gt;tss = malloc(sizeof(Tss));
       memset(m-&gt;tss, 0, sizeof(Tss));
       m-&gt;tss-&gt;iomap = 0xDFFF&lt;&lt;16;

       /*
        * We used to keep the GDT in the Mach structure, but it
        * turns out that that slows down access to the rest of the
        * page.  Since the Mach structure is accessed quite often,
        * it pays off anywhere from a factor of 1.25 to 2 on real
        * hardware to separate them (the AMDs are more sensitive
        * than Intels in this regard).  Under VMware it pays off
        * a factor of about 10 to 100.
        */
       memmove(m-&gt;gdt, gdt, sizeof gdt);
       x = (ulong)m-&gt;tss;
       m-&gt;gdt[TSSSEG].d0 = (x&lt;&lt;16)|sizeof(Tss);
       m-&gt;gdt[TSSSEG].d1 = (x&amp;0xFF000000)|((x&gt;&gt;16)&amp;0xFF)|SEGTSS|SEGPL(0)|SEGP;

       ptr[0] = sizeof(gdt)-1;
       x = (ulong)m-&gt;gdt;
       ptr[1] = x &amp; 0xFFFF;
       ptr[2] = (x&gt;&gt;16) &amp; 0xFFFF;
       lgdt(ptr);

       ptr[0] = sizeof(Segdesc)*256-1;
       x = IDTADDR;
       ptr[1] = x &amp; 0xFFFF;
       ptr[2] = (x&gt;&gt;16) &amp; 0xFFFF;
       lidt(ptr);

       /* make kernel text unwritable */
       for(x = KTZERO; x &lt; (ulong)etext; x += BY2PG){
               p = mmuwalk(m-&gt;pdb, x, 2, 0);
               if(p == nil)
                       panic("mmuinit");
               *p &amp;= ~PTEWRITE;
       }

       taskswitch(PADDR(m-&gt;pdb),  (ulong)m + BY2PG);
       ltr(TSSSEL);
}

/*
* On processors that support it, we set the PTEGLOBAL bit in
* page table and page directory entries that map kernel memory.
* Doing this tells the processor not to bother flushing them
* from the TLB when doing the TLB flush associated with a
* context switch (write to CR3).  Since kernel memory mappings
* are never removed, this is safe.  (If we ever remove kernel memory
* mappings, we can do a full flush by turning off the PGE bit in CR4,
* writing to CR3, and then turning the PGE bit back on.)
*
* See also mmukmap below.
*
* Processor support for the PTEGLOBAL bit is enabled in devarch.c.
*/
static void
memglobal(void)
{
       int i, j;
       ulong *pde, *pte;

       /* only need to do this once, on bootstrap processor */
       if(m-&gt;machno != 0)
               return;

       if(!m-&gt;havepge)
               return;

       pde = m-&gt;pdb;
       for(i=PDX(KZERO); i&lt;1024; i++){
               if(pde[i] &amp; PTEVALID){
                       pde[i] |= PTEGLOBAL;
                       if(!(pde[i] &amp; PTESIZE)){
                               pte = KADDR(pde[i]&amp;~(BY2PG-1));
                               for(j=0; j&lt;1024; j++)
                                       if(pte[j] &amp; PTEVALID)
                                               pte[j] |= PTEGLOBAL;
                       }
               }
       }
}

/*
* Flush all the user-space and device-mapping mmu info
* for this process, because something has been deleted.
* It will be paged back in on demand.
*/
void
flushmmu(void)
{
       int s;

       s = splhi();
       up-&gt;newtlb = 1;
       mmuswitch(up);
       splx(s);
}

/*
* Flush a single page mapping from the tlb.
*/
void
flushpg(ulong va)
{
       if(X86FAMILY(m-&gt;cpuidax) &gt;= 4)
               invlpg(va);
       else
               putcr3(m-&gt;tss-&gt;cr3);
}

/*
* Allocate a new page for a page directory.
* We keep a small cache of pre-initialized
* page directories in each mach.
*/
static Page*
mmupdballoc(void)
{
       int s;
       Page *page;
       ulong *pdb;

       s = splhi();
       if(m-&gt;pdbpool == 0){
               spllo();
               page = newpage(0, 0, 0);
               page-&gt;va = (ulong)vpd;
               splhi();
               pdb = tmpmap(page);
               memmove(pdb, m-&gt;pdb, BY2PG);
               pdb[PDX(VPT)] = page-&gt;pa|PTEWRITE|PTEVALID;  /* set up VPT */
               tmpunmap(pdb);
       }else{
               page = m-&gt;pdbpool;
               m-&gt;pdbpool = page-&gt;next;
               m-&gt;pdbcnt--;
       }
       splx(s);
       return page;
}

static void
mmupdbfree(Proc *proc, Page *p)
{
       if(islo())
               panic("mmupdbfree: islo");
       if(m-&gt;pdbcnt &gt;= 10){
               p-&gt;next = proc-&gt;mmufree;
               proc-&gt;mmufree = p;
       }else{
               p-&gt;next = m-&gt;pdbpool;
               m-&gt;pdbpool = p;
       }
}

/*
* A user-space memory segment has been deleted, or the
* process is exiting.  Clear all the pde entries for user-space
* memory mappings and device mappings.  Any entries that
* are needed will be paged back in as necessary.
*/
static void
mmuptefree(Proc* proc)
{
       int s;
       ulong *pdb;
       Page **last, *page;

       if(proc-&gt;mmupdb == nil || proc-&gt;mmuused == nil)
               return;
       s = splhi();
       pdb = tmpmap(proc-&gt;mmupdb);
       last = &amp;proc-&gt;mmuused;
       for(page = *last; page; page = page-&gt;next){
               pdb[page-&gt;daddr] = 0;
               last = &amp;page-&gt;next;
       }
       tmpunmap(pdb);
       splx(s);
       *last = proc-&gt;mmufree;
       proc-&gt;mmufree = proc-&gt;mmuused;
       proc-&gt;mmuused = 0;
}

static void
taskswitch(ulong pdb, ulong stack)
{
       Tss *tss;

       tss = m-&gt;tss;
       tss-&gt;ss0 = KDSEL;
       tss-&gt;esp0 = stack;
       tss-&gt;ss1 = KDSEL;
       tss-&gt;esp1 = stack;
       tss-&gt;ss2 = KDSEL;
       tss-&gt;esp2 = stack;
       tss-&gt;cr3 = pdb;
       putcr3(pdb);
}

void
mmuswitch(Proc* proc)
{
       ulong *pdb;

       if(proc-&gt;newtlb){
               mmuptefree(proc);
               proc-&gt;newtlb = 0;
       }

       if(proc-&gt;mmupdb){
               pdb = tmpmap(proc-&gt;mmupdb);
               pdb[PDX(MACHADDR)] = m-&gt;pdb[PDX(MACHADDR)];
               tmpunmap(pdb);
               taskswitch(proc-&gt;mmupdb-&gt;pa, (ulong)(proc-&gt;kstack+KSTACK));
       }else
               taskswitch(PADDR(m-&gt;pdb), (ulong)(proc-&gt;kstack+KSTACK));
}

/*
* Release any pages allocated for a page directory base or page-tables
* for this process:
*   switch to the prototype pdb for this processor (m-&gt;pdb);
*   call mmuptefree() to place all pages used for page-tables (proc-&gt;mmuused)
*   onto the process' free list (proc-&gt;mmufree). This has the side-effect of
*   cleaning any user entries in the pdb (proc-&gt;mmupdb);
*   if there's a pdb put it in the cache of pre-initialised pdb's
*   for this processor (m-&gt;pdbpool) or on the process' free list;
*   finally, place any pages freed back into the free pool (palloc).
* This routine is only called from schedinit() with palloc locked.
*/
void
mmurelease(Proc* proc)
{
       int s;
       Page *page, *next;
       ulong *pdb;

       taskswitch(PADDR(m-&gt;pdb), (ulong)m + BY2PG);
       if(proc-&gt;kmaptable){
               if(proc-&gt;mmupdb == nil)
                       panic("mmurelease: no mmupdb");
               if(--proc-&gt;kmaptable-&gt;ref)
                       panic("mmurelease: kmap ref %d\n", proc-&gt;kmaptable-&gt;ref);
               if(up-&gt;nkmap)
                       panic("mmurelease: nkmap %d\n", up-&gt;nkmap);
               /*
                * remove kmaptable from pdb before putting pdb up for reuse.
                */
               s = splhi();
               pdb = tmpmap(proc-&gt;mmupdb);
               if(PPN(pdb[PDX(KMAP)]) != proc-&gt;kmaptable-&gt;pa)
                       panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
                               pdb[PDX(KMAP)], proc-&gt;kmaptable-&gt;pa);
               pdb[PDX(KMAP)] = 0;
               tmpunmap(pdb);
               splx(s);
               /*
                * move kmaptable to free list.
                */
               pagechainhead(proc-&gt;kmaptable);
               proc-&gt;kmaptable = 0;
       }
       if(proc-&gt;mmupdb){
               mmuptefree(proc);
               mmupdbfree(proc, proc-&gt;mmupdb);
               proc-&gt;mmupdb = 0;
       }
       for(page = proc-&gt;mmufree; page; page = next){
               next = page-&gt;next;
               if(--page-&gt;ref)
                       panic("mmurelease: page-&gt;ref %d\n", page-&gt;ref);
               pagechainhead(page);
       }
       if(proc-&gt;mmufree &amp;&amp; palloc.r.p)
               wakeup(&amp;palloc.r);
       proc-&gt;mmufree = 0;
}

/*
* Allocate and install pdb for the current process.
*/
static void
upallocpdb(void)
{
       int s;
       ulong *pdb;
       Page *page;

       page = mmupdballoc();
       s = splhi();
       pdb = tmpmap(page);
       pdb[PDX(MACHADDR)] = m-&gt;pdb[PDX(MACHADDR)];
       tmpunmap(pdb);
       up-&gt;mmupdb = page;
       mmuflushtlb(up-&gt;mmupdb-&gt;pa);
       splx(s);
}

/*
* Update the mmu in response to a user fault.  pa may have PTEWRITE set.
*/
void
putmmu(ulong va, ulong pa, Page*)
{
       int old, s;
       Page *page;

       if(up-&gt;mmupdb == nil)
               upallocpdb();

       s = splhi();
       if(!(vpd[PDX(va)]&amp;PTEVALID)){
               if(up-&gt;mmufree == 0)
                       page = newpage(0, 0, 0);
               else{
                       page = up-&gt;mmufree;
                       up-&gt;mmufree = page-&gt;next;
               }
               vpd[PDX(va)] = PPN(page-&gt;pa)|PTEUSER|PTEWRITE|PTEVALID;
               /* page is now mapped into the VPT - clear it */
               memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
               page-&gt;daddr = PDX(va);
               page-&gt;next = up-&gt;mmuused;
               up-&gt;mmuused = page;
       }
       old = vpt[VPTX(va)];
       vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
       if(old&amp;PTEVALID)
               flushpg(va);
       splx(s);
}

/*
* Double-check the user MMU.
* Error checking only.
*/
void
checkmmu(ulong va, ulong pa)
{
       if(up-&gt;mmupdb == 0)
               return;
       if(!(vpd[PDX(va)]&amp;PTEVALID) || !(vpt[VPTX(va)]&amp;PTEVALID))
               return;
       if(PPN(vpt[VPTX(va)]) != pa)
               print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%08lux\n",
                       up-&gt;pid, up-&gt;text,
                       va, pa, vpt[VPTX(va)]);
}

/*
* Walk the page-table pointed to by pdb and return a pointer
* to the entry for virtual address va at the requested level.
* If the entry is invalid and create isn't requested then bail
* out early. Otherwise, for the 2nd level walk, allocate a new
* page-table page and register it in the 1st level.  This is used
* only to edit kernel mappings, which use pages from kernel memory,
* so it's okay to use KADDR to look at the tables.
*/
ulong*
mmuwalk(ulong* pdb, ulong va, int level, int create)
{
       ulong *table;
       void *map;

       table = &amp;pdb[PDX(va)];
       if(!(*table &amp; PTEVALID) &amp;&amp; create == 0)
               return 0;

       switch(level){

       default:
               return 0;

       case 1:
               return table;

       case 2:
               if(*table &amp; PTESIZE)
                       panic("mmuwalk2: va %luX entry %luX\n", va, *table);
               if(!(*table &amp; PTEVALID)){
                       /*
                        * Have to call low-level allocator from
                        * memory.c if we haven't set up the xalloc
                        * tables yet.
                        */
                       if(didmmuinit)
                               map = xspanalloc(BY2PG, BY2PG, 0);
                       else
                               map = rampage();
                       if(map == nil)
                               panic("mmuwalk xspanalloc failed");
                       *table = PADDR(map)|PTEWRITE|PTEVALID;
               }
               table = KADDR(PPN(*table));
               return &amp;table[PTX(va)];
       }
}

/*
* Device mappings are shared by all procs and processors and
* live in the virtual range VMAP to VMAP+VMAPSIZE.  The master
* copy of the mappings is stored in mach0-&gt;pdb, and they are
* paged in from there as necessary by vmapsync during faults.
*/

static Lock vmaplock;

static int findhole(ulong *a, int n, int count);
static ulong vmapalloc(ulong size);
static void pdbunmap(ulong*, ulong, int);

/*
* Add a device mapping to the vmap range.
*/
void*
vmap(ulong pa, int size)
{
       int osize;
       ulong o, va;

       /*
        * might be asking for less than a page.
        */
       osize = size;
       o = pa &amp; (BY2PG-1);
       pa -= o;
       size += o;

       size = ROUND(size, BY2PG);
       if(pa == 0){
               print("vmap pa=0 pc=%#.8lux\n", getcallerpc(&amp;pa));
               return nil;
       }
       ilock(&amp;vmaplock);
       if((va = vmapalloc(size)) == 0
       || pdbmap(MACHP(0)-&gt;pdb, pa|PTEUNCACHED|PTEWRITE, va, size) &lt; 0){
               iunlock(&amp;vmaplock);
               return 0;
       }
       iunlock(&amp;vmaplock);
       /* avoid trap on local processor
       for(i=0; i&lt;size; i+=4*MB)
               vmapsync(va+i);
       */
       USED(osize);
//      print("  vmap %#.8lux %d =&gt; %#.8lux\n", pa+o, osize, va+o);
       return (void*)(va + o);
}

static int
findhole(ulong *a, int n, int count)
{
       int have, i;

       have = 0;
       for(i=0; i&lt;n; i++){
               if(a[i] == 0)
                       have++;
               else
                       have = 0;
               if(have &gt;= count)
                       return i+1 - have;
       }
       return -1;
}

/*
* Look for free space in the vmap.
*/
static ulong
vmapalloc(ulong size)
{
       int i, n, o;
       ulong *vpdb;
       int vpdbsize;

       vpdb = &amp;MACHP(0)-&gt;pdb[PDX(VMAP)];
       vpdbsize = VMAPSIZE/(4*MB);

       if(size &gt;= 4*MB){
               n = (size+4*MB-1) / (4*MB);
               if((o = findhole(vpdb, vpdbsize, n)) != -1)
                       return VMAP + o*4*MB;
               return VMAP + o;
       }
       n = (size+BY2PG-1) / BY2PG;
       for(i=0; i&lt;vpdbsize; i++)
               if((vpdb[i]&amp;PTEVALID) &amp;&amp; !(vpdb[i]&amp;PTESIZE))
                       if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
                               return VMAP + i*4*MB + o*BY2PG;
       if((o = findhole(vpdb, vpdbsize, 1)) != -1)
               return VMAP + o*4*MB;

       /*
        * could span page directory entries, but not worth the trouble.
        * not going to be very much contention.
        */
       return 0;
}

/*
* Remove a device mapping from the vmap range.
* Since pdbunmap does not remove page tables, just entries,
* the call need not be interlocked with vmap.
*/
void
vunmap(void *v, int size)
{
       int i;
       ulong va, o;
       Mach *nm;
       Proc *p;

       /*
        * might not be aligned
        */
       va = (ulong)v;
       o = va&amp;(BY2PG-1);
       va -= o;
       size += o;
       size = ROUND(size, BY2PG);

       if(size &lt; 0 || va &lt; VMAP || va+size &gt; VMAP+VMAPSIZE)
               panic("vunmap va=%#.8lux size=%#x pc=%#.8lux\n",
                       va, size, getcallerpc(&amp;va));

       pdbunmap(MACHP(0)-&gt;pdb, va, size);

       /*
        * Flush mapping from all the tlbs and copied pdbs.
        * This can be (and is) slow, since it is called only rarely.
        */
       for(i=0; i&lt;conf.nproc; i++){
               p = proctab(i);
               if(p-&gt;state == Dead)
                       continue;
               if(p != up)
                       p-&gt;newtlb = 1;
       }
       for(i=0; i&lt;conf.nmach; i++){
               nm = MACHP(i);
               if(nm != m)
                       nm-&gt;flushmmu = 1;
       }
       flushmmu();
       for(i=0; i&lt;conf.nmach; i++){
               nm = MACHP(i);
               if(nm != m)
                       while((active.machs&amp;(1&lt;&lt;nm-&gt;machno)) &amp;&amp; nm-&gt;flushmmu)
                               ;
       }
}

/*
* Add kernel mappings for pa -&gt; va for a section of size bytes.
*/
int
pdbmap(ulong *pdb, ulong pa, ulong va, int size)
{
       int pse;
       ulong pae, pgsz, *pte, *table;
       ulong flag;

       flag = pa&amp;0xFFF;
       pa &amp;= ~0xFFF;

       if((MACHP(0)-&gt;cpuiddx &amp; 0x08) &amp;&amp; (getcr4() &amp; 0x10))
               pse = 1;
       else
               pse = 0;

       pae = pa + size;
       while(pa &lt; pae){
               table = &amp;pdb[PDX(va)];
               if((*table&amp;PTEVALID) &amp;&amp; (*table&amp;PTESIZE))
                       panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
                               va, pa, *table);

               /*
                * Check if it can be mapped using a 4MB page:
                * va, pa aligned and size &gt;= 4MB and processor can do it.
                */
               if(pse &amp;&amp; pa%(4*MB) == 0 &amp;&amp; va%(4*MB) == 0 &amp;&amp; (pae &gt;= pa+4*MB)){
                       *table = pa|PTESIZE|flag|PTEVALID;
                       pgsz = 4*MB;
               }else{
                       pte = mmuwalk(pdb, va, 2, 1);
                       if(*pte&amp;PTEVALID)
                               panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
                                       va, pa, *pte);
                       *pte = pa|flag|PTEVALID;
                       pgsz = BY2PG;
               }
               pa += pgsz;
               va += pgsz;
       }
       return 0;
}

/*
* Remove mappings.  Must already exist, for sanity.
* Only used for kernel mappings, so okay to use KADDR.
*/
static void
pdbunmap(ulong *pdb, ulong va, int size)
{
       ulong vae;
       ulong *table;

       vae = va+size;
       while(va &lt; vae){
               table = &amp;pdb[PDX(va)];
               if(!(*table &amp; PTEVALID)){
                       panic("vunmap: not mapped");
                       /*
                       va = (va+4*MB-1) &amp; ~(4*MB-1);
                       continue;
                       */
               }
               if(*table &amp; PTESIZE){
                       *table = 0;
                       va = (va+4*MB-1) &amp; ~(4*MB-1);
                       continue;
               }
               table = KADDR(PPN(*table));
               if(!(table[PTX(va)] &amp; PTEVALID))
                       panic("vunmap: not mapped");
               table[PTX(va)] = 0;
               va += BY2PG;
       }
}

/*
* Handle a fault by bringing vmap up to date.
* Only copy pdb entries and they never go away,
* so no locking needed.
*/
int
vmapsync(ulong va)
{
       ulong entry, *table;

       if(va &lt; VMAP || va &gt;= VMAP+VMAPSIZE)
               return 0;

       entry = MACHP(0)-&gt;pdb[PDX(va)];
       if(!(entry&amp;PTEVALID))
               return 0;
       if(!(entry&amp;PTESIZE)){
               /* make sure entry will help the fault */
               table = KADDR(PPN(entry));
               if(!(table[PTX(va)]&amp;PTEVALID))
                       return 0;
       }
       vpd[PDX(va)] = entry;
       /*
        * TLB doesn't cache negative results, so no flush needed.
        */
       return 1;
}


/*
* KMap is used to map individual pages into virtual memory.
* It is rare to have more than a few KMaps at a time (in the
* absence of interrupts, only two at a time are ever used,
* but interrupts can stack).  The mappings are local to a process,
* so we can use the same range of virtual address space for
* all processes without any coordination.
*/
#define kpt (vpt+VPTX(KMAP))
#define NKPT (KMAPSIZE/BY2PG)

KMap*
kmap(Page *page)
{
       int i, o, s;

       if(up == nil)
               panic("kmap: up=0 pc=%#.8lux", getcallerpc(&amp;page));
       if(up-&gt;mmupdb == nil)
               upallocpdb();
       up-&gt;nkmap++;
       if(!(vpd[PDX(KMAP)]&amp;PTEVALID)){
               /* allocate page directory */
               if(KMAPSIZE &gt; BY2XPG)
                       panic("bad kmapsize");
               if(up-&gt;kmaptable != nil)
                       panic("kmaptable");
               s = spllo();
               up-&gt;kmaptable = newpage(0, 0, 0);
               splx(s);
               vpd[PDX(KMAP)] = up-&gt;kmaptable-&gt;pa|PTEWRITE|PTEVALID;
               memset(kpt, 0, BY2PG);

               /* might as well finish the job */
               kpt[0] = page-&gt;pa|PTEWRITE|PTEVALID;
               up-&gt;lastkmap = 0;
               return (KMap*)KMAP;
       }
       if(up-&gt;kmaptable == nil)
               panic("no kmaptable");
       o = up-&gt;lastkmap+1;
       for(i=0; i&lt;NKPT; i++){
               if(kpt[(i+o)%NKPT] == 0){
                       o = (i+o)%NKPT;
                       kpt[o] = page-&gt;pa|PTEWRITE|PTEVALID;
                       up-&gt;lastkmap = o;
                       return (KMap*)(KMAP+o*BY2PG);
               }
       }
       panic("out of kmap");
       return nil;
}

void
kunmap(KMap *k)
{
       ulong va;

       va = (ulong)k;
       if(up-&gt;mmupdb == nil || !(vpd[PDX(KMAP)]&amp;PTEVALID))
               panic("kunmap: no kmaps");
       if(va &lt; KMAP || va &gt;= KMAP+KMAPSIZE)
               panic("kunmap: bad address %#.8lux pc=%#.8lux", va, getcallerpc(&amp;k));
       if(!(vpt[VPTX(va)]&amp;PTEVALID))
               panic("kunmap: not mapped %#.8lux pc=%#.8lux", va, getcallerpc(&amp;k));
       up-&gt;nkmap--;
       vpt[VPTX(va)] = 0;
       flushpg(va);
}


/*
* Temporary one-page mapping used to edit page directories.
*
* The fasttmp #define controls whether the code optimizes
* the case where the page is already mapped in the physical
* memory window.
*/
#define fasttmp 1

void*
tmpmap(Page *p)
{
       ulong i;
       ulong *entry;

       if(islo())
               panic("tmpaddr: islo");

       if(fasttmp &amp;&amp; p-&gt;pa &lt; -KZERO)
               return KADDR(p-&gt;pa);

       /*
        * PDX(TMPADDR) == PDX(MACHADDR), so this
        * entry is private to the processor and shared
        * between up-&gt;mmupdb (if any) and m-&gt;pdb.
        */
       entry = &amp;vpt[VPTX(TMPADDR)];
       if(!(*entry&amp;PTEVALID)){
               for(i=KZERO; i&lt;=CPU0MACH; i+=BY2PG)
                       print("%.8lux: *%.8lux=%.8lux (vpt=%.8lux index=%.8lux)\n", i, &amp;vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
               panic("tmpmap: no entry");
       }
       if(PPN(*entry) != PPN(TMPADDR-KZERO))
               panic("tmpmap: already mapped entry=%#.8lux", *entry);
       *entry = p-&gt;pa|PTEWRITE|PTEVALID;
       flushpg(TMPADDR);
       return (void*)TMPADDR;
}

void
tmpunmap(void *v)
{
       ulong *entry;

       if(islo())
               panic("tmpaddr: islo");
       if(fasttmp &amp;&amp; (ulong)v &gt;= KZERO &amp;&amp; v != (void*)TMPADDR)
               return;
       if(v != (void*)TMPADDR)
               panic("tmpunmap: bad address");
       entry = &amp;vpt[VPTX(TMPADDR)];
       if(!(*entry&amp;PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
               panic("tmpmap: not mapped entry=%#.8lux", *entry);
       *entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
       flushpg(TMPADDR);
}

/*
* These could go back to being macros once the kernel is debugged,
* but the extra checking is nice to have.
*/
void*
kaddr(ulong pa)
{
       if(pa &gt; (ulong)-KZERO)
               panic("kaddr: pa=%#.8lux", pa);
       return (void*)(pa+KZERO);
}

ulong
paddr(void *v)
{
       ulong va;

       va = (ulong)v;
       if(va &lt; KZERO)
               panic("paddr: va=%#.8lux", va);
       return va-KZERO;
}

/*
* More debugging.
*/
void
countpagerefs(ulong *ref, int print)
{
       int i, n;
       Mach *mm;
       Page *pg;
       Proc *p;

       n = 0;
       for(i=0; i&lt;conf.nproc; i++){
               p = proctab(i);
               if(p-&gt;mmupdb){
                       if(print){
                               if(ref[pagenumber(p-&gt;mmupdb)])
                                       iprint("page %#.8lux is proc %d (pid %lud) pdb\n",
                                               p-&gt;mmupdb-&gt;pa, i, p-&gt;pid);
                               continue;
                       }
                       if(ref[pagenumber(p-&gt;mmupdb)]++ == 0)
                               n++;
                       else
                               iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n",
                                       p-&gt;mmupdb-&gt;pa, i, p-&gt;pid);
               }
               if(p-&gt;kmaptable){
                       if(print){
                               if(ref[pagenumber(p-&gt;kmaptable)])
                                       iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n",
                                               p-&gt;kmaptable-&gt;pa, i, p-&gt;pid);
                               continue;
                       }
                       if(ref[pagenumber(p-&gt;kmaptable)]++ == 0)
                               n++;
                       else
                               iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n",
                                       p-&gt;kmaptable-&gt;pa, i, p-&gt;pid);
               }
               for(pg=p-&gt;mmuused; pg; pg=pg-&gt;next){
                       if(print){
                               if(ref[pagenumber(pg)])
                                       iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n",
                                               pg-&gt;pa, i, p-&gt;pid);
                               continue;
                       }
                       if(ref[pagenumber(pg)]++ == 0)
                               n++;
                       else
                               iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n",
                                       pg-&gt;pa, i, p-&gt;pid);
               }
               for(pg=p-&gt;mmufree; pg; pg=pg-&gt;next){
                       if(print){
                               if(ref[pagenumber(pg)])
                                       iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n",
                                               pg-&gt;pa, i, p-&gt;pid);
                               continue;
                       }
                       if(ref[pagenumber(pg)]++ == 0)
                               n++;
                       else
                               iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n",
                                       pg-&gt;pa, i, p-&gt;pid);
               }
       }
       if(!print)
               iprint("%d pages in proc mmu\n", n);
       n = 0;
       for(i=0; i&lt;conf.nmach; i++){
               mm = MACHP(i);
               for(pg=mm-&gt;pdbpool; pg; pg=pg-&gt;next){
                       if(print){
                               if(ref[pagenumber(pg)])
                                       iprint("page %#.8lux is in cpu%d pdbpool\n",
                                               pg-&gt;pa, i);
                               continue;
                       }
                       if(ref[pagenumber(pg)]++ == 0)
                               n++;
                       else
                               iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n",
                                       pg-&gt;pa, i);
               }
       }
       if(!print)
               iprint("%d pages in mach pdbpools\n", n);
}
<!-- BEGIN TAIL -->
</pre>
</td></tr></table>
</td></tr></table>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<p style="line-height: 1.2em; margin-left: 1.00in; text-indent: 0.00in; margin-right: 1.00in; margin-top: 0; margin-bottom: 0; text-align: center;">
<span style="font-size: 10pt"></span></p>
<p style="margin-top: 0; margin-bottom: 0.50in"></p>
<p style="margin-top: 0; margin-bottom: 0.33in"></p>
<center><table border="0"><tr>
<td valign="middle"><a href="http://www.alcatel-lucent.com/"><img border="0" src="/plan9/img/logo_ft.gif" alt="Bell Labs" />
</a></td>
<td valign="middle"><a href="http://www.opensource.org"><img border="0" alt="OSI certified" src="/plan9/img/osi-certified-60x50.gif" />
</a></td>
<td><img style="padding-right: 45px;" alt="Powered by Plan 9" src="/plan9/img/power36.gif" />
</td>
</tr></table></center>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<center>
<span style="font-size: 10pt">(<a href="/plan9/">Return to Plan 9 Home Page</a>)</span>
</center>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<center><font size=-1>
<span style="font-size: 10pt"><a href="http://www.lucent.com/copyright.html">Copyright</a></span>
<span style="font-size: 10pt">© 2009 Alcatel-Lucent.</span>
<span style="font-size: 10pt">All Rights Reserved.</span>
<br />
<span style="font-size: 10pt">Comments to</span>
<span style="font-size: 10pt"><a href="mailto:[email protected]">[email protected]</a>.</span>
</font></center>
</body>
</html>