--- linux-2.3.48-pre2/Documentation/kiobuf.sample.c.~1~ Fri Feb 25 15:17:17 2000
+++ linux-2.3.48-pre2/Documentation/kiobuf.sample.c     Fri Feb 25 16:26:01 2000
@@ -0,0 +1,216 @@
+/*
+ * Example code for using kiobufs within a device driver for memory
+ * mapping of kernel memory into user space.
+ *
+ * This module creates a device driver which allocates memory in the
+ * kernel from arbitrary pages via vmalloc(), and then uses kiobufs to
+ * map those into user space.
+ *
+ * This module registers a misc char device driver called "test".  Its
+ * major number will be 10; the minor number is registered dynamically
+ * and can be found by looking up /proc/misc (the usual minor number
+ * will be 63 unless other misc devices have already been registered).
+ *
+ * The device driver here can be opened, but read and write functions
+ * are not declared.  However, the file descriptor to the driver can be
+ * mmap()ed, and the driver will use the kiobuf mmaping routines to map
+ * an area of kernel memory into a process's address space.
+ *
+ * Any number of processes may map the same memory at once, and it will
+ * act as shared memory.  The kiobuf_vmap code will track the number of
+ * references to the memory, and the driver's module reference count is
+ * adjusted to make sure that the driver can be unloaded only when there
+ * are no users active.
+ *
+ * Written by Stephen C. Tweedie, 2000
+ * (C) Red Hat, Inc. 2000
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/iobuf.h>
+#include <asm/semaphore.h>
+
+#if 1
+#define dprintk(x...)
+#else
+#define dprintk printk
+#endif
+
+static int kiomap_mmap(struct file * file, struct vm_area_struct * vma);
+static int kiomap_open(struct inode * inode, struct file * file);
+static int kiomap_release(struct inode * inode, struct file * file);
+
+
+static struct file_operations kiomap_fops = {
+       mmap:   kiomap_mmap,
+       open:   kiomap_open,
+       release:kiomap_release,
+};
+
+static struct miscdevice kiomap_device = {
+       minor:  MISC_DYNAMIC_MINOR,
+       name:   "kiomap",
+       fops:   &kiomap_fops
+};
+
+
+static void * local_data_area;
+#define DATA_SIZE (100 * PAGE_SIZE)
+
+static struct kiobuf local_kiobuf;
+static struct kiobuf_vmap local_vmap;
+
+
+/* A pointer to the kiomap_finished function will be stored in the
+ * kiobuf_vmap we use for mmap()ing, and this function will be called
+ * once there are no more users of the kvmap.  */
+
+static void kiomap_finished(struct kiobuf_vmap *vmap)
+{
+       MOD_DEC_USE_COUNT;
+       dprintk(KERN_INFO __FUNCTION__ ": decremented module count\n");
+}
+
+
+/* The initialisation function here creates three data structures.
+ * First of all it uses vmalloc() to reserve an area of memory.
+ * Secondly, it initialises a kiobuf into which that vmalloced memory is
+ * mapped.  Finally, it initialises a kiobuf_vmap which can be used to
+ * mmap that kiobuf into user space. */
+
+void __init create_local_heap(void)
+{
+       int err;
+
+       /* Get our kernel memory for the data heap first */
+
+       local_data_area = vmalloc(DATA_SIZE);
+       if (!local_data_area)
+               return;
+
+       /* Now initialise a kiobuf and kiobuf_vmap structure */
+
+       kiobuf_init(&local_kiobuf);
+       kvmap_init(&local_vmap, &local_kiobuf);
+       local_vmap.kiobuf = &local_kiobuf;
+       local_vmap.deref_callback = kiomap_finished;
+
+       /* map_kernel_kiobuf will find all of the physical pages
+        * referred to by the vmalloc()ed virtual memory area, and will
+        * map those physical pages into the kiobuf we have just
+        * prepared. */
+
+       err = map_kernel_kiobuf(&local_kiobuf,
+                               (unsigned long) local_data_area,
+                               DATA_SIZE);
+       if (err) {
+               vfree(local_data_area);
+               local_data_area = 0;
+       }
+
+       /* Initialise the vmalloced area --- we don't want users mapping
+        * this memory and peeking into stale kernel data! */
+
+       memset(local_data_area, 0xff, DATA_SIZE);
+}
+
+
+/* All we have to do on device open/close is to maintain the module
+ * reference counts. */
+
+static int kiomap_open(struct inode * inode, struct file * file)
+{
+       MOD_INC_USE_COUNT;
+       dprintk(KERN_INFO __FUNCTION__ ": incremented module count\n");
+       return 0;
+}
+
+static int kiomap_release(struct inode * inode, struct file * file)
+{
+       MOD_DEC_USE_COUNT;
+       dprintk(KERN_INFO __FUNCTION__ ": decremented module count\n");
+       return 0;
+}
+
+/* This is our example device's mmap function, as declared to the rest
+ * of the VM.  All we have to do in our case is call mmap_kiobuf()
+ * supplying the pre-initialised kiobuf_vmap struct that we created in
+ * create_local_heap() above.
+ *
+ * We have to be careful to take the vmap semaphore here before calling
+ * mmap_kiobuf --- that's something all callers of that function will
+ * always have to do to be safe on SMP systems.  While we hold that
+ * semaphore we can change the module reference count safely, since the
+ * same semaphore will protect the call to MOD_DEC_USE_COUNT in the
+ * kvmap close function above.
+ */
+
+static int kiomap_mmap(struct file * file, struct vm_area_struct * vma)
+{
+       int err;
+
+       dprintk (KERN_INFO __FUNCTION__ ": begin(file %p, vma %p)\n",
+                file, vma);
+
+       /* A quick check to make sure we were initialised properly... */
+
+       if (!local_data_area)
+               return -ENOMEM;
+
+       /* Now we can take the kvmap semaphore and perform the mmap. */
+
+       down(&local_vmap.sem);
+       dprintk (KERN_INFO __FUNCTION__ ": Attempting mmap.\n");
+       err = mmap_kiobuf(&local_vmap, vma);
+       dprintk (KERN_INFO __FUNCTION__ ": mmap_kiobuf returned %d\n", err);
+
+       /* A return value of one means the kvmap was not in use when we
+        * called mmap_kiobuf() */
+
+       if (err == 1) {
+               MOD_INC_USE_COUNT;
+               dprintk(KERN_INFO __FUNCTION__ ": incremented module count\n");
+       }
+       up(&local_vmap.sem);
+
+       /* and a negative return value means an error. */
+
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+
+/* The module initialisation and cleanup functions just create and
+ * destroy our local kvmap data structures, and register and deregister
+ * the testing character device driver. */
+
+int kiomap_init_module(void)
+{
+       int err;
+
+       create_local_heap();
+       if (!local_data_area)
+               return -ENOMEM;
+
+       err = misc_register(&kiomap_device);
+       return err;
+}
+
+int kiomap_destroy_module(void)
+{
+       int err;
+
+       vfree(local_data_area);
+       err = misc_deregister(&kiomap_device);
+       return err;
+}
+
+module_init(kiomap_init_module);
+module_exit(kiomap_destroy_module);
+
+MODULE_DESCRIPTION("kiobuf vmap test driver");
--- linux-2.3.48-pre2/drivers/char/raw.c.~1~    Fri Feb 25 10:08:48 2000
+++ linux-2.3.48-pre2/drivers/char/raw.c        Fri Feb 25 15:18:58 2000
@@ -197,14 +197,17 @@
                       raw_device_bindings[minor] =
                               bdget(kdev_t_to_nr(MKDEV(rq.block_major, rq.block_minor)));
               } else {
+                       struct block_device *bdev;
                       kdev_t dev;
-                       if (!raw_device_bindings[minor]) {
-                               err = -ENODEV;
-                               break;
+
+                       bdev = raw_device_bindings[minor];
+                       if (bdev) {
+                               dev = to_kdev_t(bdev->bd_dev);
+                               rq.block_major = MAJOR(dev);
+                               rq.block_minor = MINOR(dev);
+                       } else {
+                               rq.block_major = rq.block_minor = 0;
                       }
-                       dev = to_kdev_t(raw_device_bindings[minor]->bd_dev);
-                       rq.block_major = MAJOR(dev);
-                       rq.block_minor = MINOR(dev);
                       err = copy_to_user((void *) arg, &rq, sizeof(rq));
               }
               break;
@@ -304,7 +307,12 @@
               err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
               if (err)
                       break;
-
+#if 0
+               err = lock_kiovec(1, &iobuf, 1);
+               if (err)
+                       break;
+#endif
+
               for (i=0; i < blocks; i++)
                       b[i] = blocknr++;

@@ -316,7 +324,7 @@
                       buf += err;
               }

-               unmap_kiobuf(iobuf);
+               unmap_kiobuf(iobuf); /* The unlock_kiobuf is implicit here */

               if (err != iosize)
                       break;
--- linux-2.3.48-pre2/fs/buffer.c.~1~   Fri Feb 25 10:08:49 2000
+++ linux-2.3.48-pre2/fs/buffer.c       Fri Feb 25 15:17:17 2000
@@ -1754,10 +1754,10 @@
       mark_buffer_uptodate(bh, uptodate);

       kiobuf = bh->b_kiobuf;
-       if (!uptodate)
-               kiobuf->errno = -EIO;
-       if (atomic_dec_and_test(&kiobuf->io_count))
-               kiobuf->end_io(kiobuf);
+       unlock_buffer(bh);
+
+       kiobuf = bh->b_kiobuf;
+       end_kio_request(kiobuf, uptodate);
}


@@ -1766,8 +1766,7 @@
 * for them to complete.  Clean up the buffer_heads afterwards.
 */

-static int do_kio(struct kiobuf *kiobuf,
-                 int rw, int nr, struct buffer_head *bh[], int size)
+static int do_kio(int rw, int nr, struct buffer_head *bh[], int size)
{
       int iosize;
       int i;
@@ -1778,18 +1777,20 @@

       if (rw == WRITE)
               rw = WRITERAW;
-       atomic_add(nr, &kiobuf->io_count);
-       kiobuf->errno = 0;
       ll_rw_block(rw, nr, bh);

-       kiobuf_wait_for_io(kiobuf);
-
-       spin_lock(&unused_list_lock);
-
       iosize = 0;
+       spin_lock(&unused_list_lock);
+
       for (i = nr; --i >= 0; ) {
               iosize += size;
               tmp = bh[i];
+               if (buffer_locked(tmp)) {
+                       spin_unlock(&unused_list_lock);
+                       wait_on_buffer(tmp);
+                       spin_lock(&unused_list_lock);
+               }
+
               if (!buffer_uptodate(tmp)) {
                       /* We are traversing bh'es in reverse order so
                           clearing iosize on error calculates the
@@ -1801,11 +1802,7 @@

       spin_unlock(&unused_list_lock);

-       if (iosize)
-               return iosize;
-       if (kiobuf->errno)
-               return kiobuf->errno;
-       return -EIO;
+       return iosize;
}

/*
@@ -1847,8 +1844,6 @@
               if ((iobuf->offset & (size-1)) ||
                   (iobuf->length & (size-1)))
                       return -EINVAL;
-               if (!iobuf->locked)
-                       panic("brw_kiovec: iobuf not locked for I/O");
               if (!iobuf->nr_pages)
                       panic("brw_kiovec: iobuf not initialised");
       }
@@ -1861,10 +1856,15 @@
               iobuf = iovec[i];
               offset = iobuf->offset;
               length = iobuf->length;
-
+               iobuf->errno = 0;
+
               for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
                       map  = iobuf->maplist[pageind];
-
+                       if (!map) {
+                               err = -EFAULT;
+                               goto error;
+                       }
+
                       while (length > 0) {
                               blocknr = b[bufind++];
                               tmp = get_unused_buffer_head(0);
@@ -1893,11 +1893,13 @@
                               length -= size;
                               offset += size;

+                               atomic_inc(&iobuf->io_count);
+
                               /*
                                * Start the IO if we have got too much
                                */
                               if (bhind >= KIO_MAX_SECTORS) {
-                                       err = do_kio(iobuf, rw, bhind, bh, size);
+                                       err = do_kio(rw, bhind, bh, size);
                                       if (err >= 0)
                                               transferred += err;
                                       else
@@ -1915,7 +1917,7 @@

       /* Is there any IO still left to submit? */
       if (bhind) {
-               err = do_kio(iobuf, rw, bhind, bh, size);
+               err = do_kio(rw, bhind, bh, size);
               if (err >= 0)
                       transferred += err;
               else
--- linux-2.3.48-pre2/fs/iobuf.c.~1~    Sun Jan 23 20:34:33 2000
+++ linux-2.3.48-pre2/fs/iobuf.c        Fri Feb 25 15:17:17 2000
@@ -12,18 +12,21 @@

static kmem_cache_t *kiobuf_cachep;

-/*
- * The default IO completion routine for kiobufs: just wake up
- * the kiobuf, nothing more.
- */

-void simple_wakeup_kiobuf(struct kiobuf *kiobuf)
+void end_kio_request(struct kiobuf *kiobuf, int uptodate)
{
-       wake_up(&kiobuf->wait_queue);
+       if ((!uptodate) && !kiobuf->errno)
+               kiobuf->errno = -EIO;
+
+       if (atomic_dec_and_test(&kiobuf->io_count)) {
+               if (kiobuf->end_io)
+                       kiobuf->end_io(kiobuf);
+               wake_up(&kiobuf->wait_queue);
+       }
}


-void __init kiobuf_init(void)
+void __init kiobuf_setup(void)
{
       kiobuf_cachep =  kmem_cache_create("kiobuf",
                                          sizeof(struct kiobuf),
@@ -33,6 +36,13 @@
               panic("Cannot create kernel iobuf cache\n");
}

+void kiobuf_init(struct kiobuf *iobuf)
+{
+       memset(iobuf, 0, sizeof(*iobuf));
+       init_waitqueue_head(&iobuf->wait_queue);
+       iobuf->array_len = KIO_STATIC_PAGES;
+       iobuf->maplist   = iobuf->map_array;
+}

int alloc_kiovec(int nr, struct kiobuf **bufp)
{
@@ -45,12 +55,7 @@
                       free_kiovec(i, bufp);
                       return -ENOMEM;
               }
-
-               memset(iobuf, 0, sizeof(*iobuf));
-               init_waitqueue_head(&iobuf->wait_queue);
-               iobuf->end_io = simple_wakeup_kiobuf;
-               iobuf->array_len = KIO_STATIC_PAGES;
-               iobuf->maplist   = iobuf->map_array;
+               kiobuf_init(iobuf);
               *bufp++ = iobuf;
       }

@@ -64,6 +69,8 @@

       for (i = 0; i < nr; i++) {
               iobuf = bufp[i];
+               if (iobuf->locked)
+                       unlock_kiovec(1, &iobuf);
               if (iobuf->array_len > KIO_STATIC_PAGES)
                       kfree (iobuf->maplist);
               kmem_cache_free(kiobuf_cachep, bufp[i]);
@@ -103,6 +110,9 @@
{
       struct task_struct *tsk = current;
       DECLARE_WAITQUEUE(wait, tsk);
+
+       if (atomic_read(&kiobuf->io_count) == 0)
+               return;

       add_wait_queue(&kiobuf->wait_queue, &wait);
repeat:
--- linux-2.3.48-pre2/include/linux/iobuf.h.~1~ Fri Feb 25 14:59:49 2000
+++ linux-2.3.48-pre2/include/linux/iobuf.h     Fri Feb 25 15:17:17 2000
@@ -29,6 +29,8 @@
#define KIO_STATIC_PAGES       (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
#define KIO_MAX_SECTORS                (KIO_MAX_ATOMIC_IO * 2)

+/* The main kiobuf struct used for all our IO! */
+
struct kiobuf
{
       int             nr_pages;       /* Pages actually referenced */
@@ -46,7 +48,6 @@
       unsigned int    locked : 1;     /* If set, pages has been locked */

       /* Always embed enough struct pages for 64k of IO */
-       unsigned long   page_array[KIO_STATIC_PAGES];
       struct page *   map_array[KIO_STATIC_PAGES];

       /* Dynamic state for IO completion: */
@@ -57,14 +58,51 @@
};


+/* For true mmap() of kiobufs, we need to be able to refcount the number
+ * of vmas accessing the kiobuf to be able to clean up properly when the
+ * entire kiobuf is no longer being accessed.
+ *
+ * The kvmap semaphore is necessary to synchronise reference counting in
+ * all cases.  It is not sufficient to rely on the mm semaphore for
+ * this, as vmap references are inherited over fork() and we need to do
+ * the right thing for vmaps which end up shared as a result.
+ */
+
+struct kiobuf_vmap;
+typedef void kvmap_deref_fn (struct kiobuf_vmap *);
+
+struct kiobuf_vmap
+{
+       struct kiobuf * kiobuf;
+       void *          private_data;
+
+       struct semaphore sem;
+
+       /* The following are always protected by the semaphore mutex */
+       int             refcount;
+       kvmap_deref_fn  *deref_callback;
+};
+
+
/* mm/memory.c */

int    map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
+int    map_kernel_kiobuf(struct kiobuf *, unsigned long va, size_t len);
void   unmap_kiobuf(struct kiobuf *iobuf);
+int    lock_kiovec(int nr, struct kiobuf *iovec[], int wait);
+int    unlock_kiovec(int nr, struct kiobuf *iovec[]);
+
+/* mm/iomap.c */
+
+#define KIOMAP_PREFAULT                0x0001
+int mmap_kiobuf(struct kiobuf_vmap *iobuf, struct vm_area_struct * vma);
+void kvmap_init(struct kiobuf_vmap *, struct kiobuf *);

/* fs/iobuf.c */

-void __init kiobuf_init(void);
+void __init kiobuf_setup(void);
+void   kiobuf_init(struct kiobuf *);
+void   end_kio_request(struct kiobuf *, int);
void   simple_wakeup_kiobuf(struct kiobuf *);
int    alloc_kiovec(int nr, struct kiobuf **);
void   free_kiovec(int nr, struct kiobuf **);
--- linux-2.3.48-pre2/init/main.c.~1~   Thu Feb 17 11:50:55 2000
+++ linux-2.3.48-pre2/init/main.c       Fri Feb 25 15:17:17 2000
@@ -534,7 +534,7 @@
       vma_init();
       buffer_init(mempages);
       page_cache_init(mempages);
-       kiobuf_init();
+       kiobuf_setup();
       signals_init();
       bdev_init();
       inode_init();
--- linux-2.3.48-pre2/kernel/ksyms.c.~1~        Fri Feb 25 10:08:50 2000
+++ linux-2.3.48-pre2/kernel/ksyms.c    Fri Feb 25 15:17:17 2000
@@ -43,6 +43,7 @@
#include <linux/mm.h>
#include <linux/capability.h>
#include <linux/highuid.h>
+#include <linux/iobuf.h>

#if defined(CONFIG_PROC_FS)
#include <linux/proc_fs.h>
@@ -156,11 +157,6 @@
EXPORT_SYMBOL(mark_buffer_dirty);
EXPORT_SYMBOL(__mark_buffer_dirty);
EXPORT_SYMBOL(__mark_inode_dirty);
-EXPORT_SYMBOL(free_kiovec);
-EXPORT_SYMBOL(brw_kiovec);
-EXPORT_SYMBOL(alloc_kiovec);
-EXPORT_SYMBOL(expand_kiobuf);
-EXPORT_SYMBOL(unmap_kiobuf);
EXPORT_SYMBOL(get_empty_filp);
EXPORT_SYMBOL(init_private_file);
EXPORT_SYMBOL(filp_open);
@@ -339,6 +335,23 @@
/* Various random spinlocks we want to export */
EXPORT_SYMBOL(tqueue_lock);
#endif
+
+/* Kiobufs */
+EXPORT_SYMBOL(kiobuf_init);
+EXPORT_SYMBOL(kvmap_init);
+
+EXPORT_SYMBOL(alloc_kiovec);
+EXPORT_SYMBOL(free_kiovec);
+EXPORT_SYMBOL(expand_kiobuf);
+
+EXPORT_SYMBOL(map_user_kiobuf);
+EXPORT_SYMBOL(map_kernel_kiobuf);
+EXPORT_SYMBOL(unmap_kiobuf);
+EXPORT_SYMBOL(mmap_kiobuf);
+
+EXPORT_SYMBOL(lock_kiovec);
+EXPORT_SYMBOL(unlock_kiovec);
+EXPORT_SYMBOL(brw_kiovec);

/* autoirq from  drivers/net/auto_irq.c */
EXPORT_SYMBOL(autoirq_setup);
--- linux-2.3.48-pre2/mm/Makefile.~1~   Fri Dec 10 15:24:41 1999
+++ linux-2.3.48-pre2/mm/Makefile       Fri Feb 25 15:17:17 2000
@@ -10,7 +10,7 @@
O_TARGET := mm.o
O_OBJS  := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
           vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
-           page_alloc.o swap_state.o swapfile.o numa.o
+           page_alloc.o swap_state.o swapfile.o numa.o iomap.o

ifeq ($(CONFIG_HIGHMEM),y)
O_OBJS += highmem.o
--- linux-2.3.48-pre2/mm/iomap.c.~1~    Fri Feb 25 15:17:17 2000
+++ linux-2.3.48-pre2/mm/iomap.c        Fri Feb 25 15:17:17 2000
@@ -0,0 +1,144 @@
+/*
+ * iomap.c
+ *
+ * Perform mmap()ing of arbitrary kiobufs.
+ *
+ * Written by Stephen C. Tweedie, 2000
+ * (C) Red Hat, Inc.  2000
+ *
+ * Refer to Documentation/kiobuf* for instructions.
+ *
+ * The kiobuf_vmap structure contains the information necessary to track
+ * the mmap of a kiobuf into user space.  The rest of this file defines
+ * functions necessary to maintain that mapping.
+ */
+
+#include <linux/iobuf.h>
+#include <linux/pagemap.h>
+#include <asm/atomic.h>
+
+#define dprintk(x...)
+
+/*
+ * Open/close methods for the kvmap only need to track the reference counts.
+ *
+ * Both open and close will be called with the vma mm semaphore held,
+ * but without the mm page lock.
+ */
+
+static void kvmap_open(struct vm_area_struct *vma)
+{
+       struct kiobuf_vmap *vmap;
+       vmap = (struct kiobuf_vmap *) vma->vm_private_data;
+
+       /* Just increment the refcount on open: there has been an unmap
+        * or fork increasing the number of vmas on this kvmap. */
+       down(&vmap->sem);
+       vmap->refcount++;
+       up(&vmap->sem);
+}
+
+static void kvmap_close(struct vm_area_struct *vma)
+{
+       struct kiobuf_vmap *vmap;
+       vmap = (struct kiobuf_vmap *) vma->vm_private_data;
+
+       down(&vmap->sem);
+       if (--vmap->refcount == 0) {
+               if (vmap->deref_callback)
+                       vmap->deref_callback(vmap);
+       }
+       up(&vmap->sem);
+}
+
+
+static struct page * kvmap_nopage(struct vm_area_struct * vma,
+                                 unsigned long address,
+                                 int no_share)
+{
+       unsigned long pgoff;
+       struct kiobuf_vmap *vmap;
+       struct kiobuf *iobuf;
+       struct page *page;
+
+       vmap = (struct kiobuf_vmap *) vma->vm_private_data;
+       iobuf = vmap->kiobuf;
+       pgoff = ((address - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
+       dprintk(KERN_INFO __FUNCTION__ "(%p, %p, %d): offset %lu\n",
+              vma, (void *) address, no_share, pgoff);
+
+       if (no_share)
+               BUG();
+
+       /* mmap() of a larger region than specified by the kiobuf
+        * results in a SIGBUS, as does faulting on a page not present
+        * in the kiobuf. */
+
+       if (pgoff > iobuf->nr_pages) {
+               dprintk(KERN_INFO __FUNCTION__ ": no such page in iobuf\n");
+               return NULL;
+       }
+
+       page = iobuf->maplist[pgoff];
+       dprintk(KERN_INFO __FUNCTION__ ": found page %p\n", page);
+       if (!page)
+               return NULL;
+
+       /* We need to obey the same rules as copy_page_range() when it
+        * comes to maintaining reference counts on pages in the kvmap.
+        * We only increment the refcount for normal, physically
+        * present, unreserved pages. */
+
+       if ((page-mem_map) < max_mapnr && ! PageReserved(page)) {
+               get_page(page);
+               dprintk(KERN_INFO __FUNCTION__ ": page count now %d\n",
+                      atomic_read(&page->count));
+       }
+
+       return page;
+}
+
+
+static struct vm_operations_struct kio_vmops =
+{
+       open:   kvmap_open,
+       close:  kvmap_close,
+       nopage: kvmap_nopage,
+};
+
+void kvmap_init(struct kiobuf_vmap *vmap, struct kiobuf *iobuf)
+{
+       memset(vmap, 0, sizeof(*vmap));
+       vmap->kiobuf = iobuf;
+       init_MUTEX(&vmap->sem);
+}
+
+/*
+ * This routine is intended to be called by the mmap_* methods of other
+ * device drivers.
+ *
+ * Returns <0 on error, 1 if this is the first reference to the vmap, else 0.
+ *
+ * The vmap semaphore must be held before calling this.
+ */
+
+int mmap_kiobuf(struct kiobuf_vmap *vmap, struct vm_area_struct * vma)
+{
+       int retval = 0;
+
+       dprintk(KERN_INFO __FUNCTION__ "(vmap %p, vma %p)\n", vmap, vma);
+
+       vma->vm_ops = &kio_vmops;
+       vma->vm_private_data = vmap;
+       vma->vm_flags |= VM_LOCKED;     /* Don't swap out kvmaps! */
+
+       /* This is supposed to be a shared map --- reject COW mappings. */
+       if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
+               return -EINVAL;
+
+       vmap->refcount++;
+       if (vmap->refcount == 1)
+               retval = 1;
+
+       return retval;
+}
--- linux-2.3.48-pre2/mm/memory.c.~1~   Fri Feb 25 10:08:50 2000
+++ linux-2.3.48-pre2/mm/memory.c       Fri Feb 25 15:17:17 2000
@@ -408,28 +408,25 @@
                       return pte_page(*pte);
       }

-       printk(KERN_ERR "Missing page in follow_page\n");
       return NULL;
}

/*
- * Given a physical address, is there a useful struct page pointing to it?
+ * Given a physical address, is there a useful struct page pointing to
+ * it?  This may become more complex in the future if we start dealing
+ * with IO-aperture pages in kiobufs.
 */

-struct page * get_page_map(struct page *page, unsigned long vaddr)
+static inline struct page * get_page_map(struct page *page)
{
-       if (MAP_NR(vaddr) >= max_mapnr)
-               return 0;
-       if (page == ZERO_PAGE(vaddr))
-               return 0;
-       if (PageReserved(page))
+       if (page > (mem_map + max_mapnr))
               return 0;
       return page;
}

/*
 * Force in an entire range of pages from the current process's user VA,
- * and pin and lock the pages for IO.
+ * and pin them in physical memory.
 */

#define dprintk(x...)
@@ -440,8 +437,6 @@
       struct mm_struct *      mm;
       struct vm_area_struct * vma = 0;
       struct page *           map;
-       int                     doublepage = 0;
-       int                     repeat = 0;
       int                     i;

       /* Make sure the iobuf is not already mapped somewhere. */
@@ -457,11 +452,10 @@
       if (err)
               return err;

- repeat:
       down(&mm->mmap_sem);

       err = -EFAULT;
-       iobuf->locked = 1;
+       iobuf->locked = 0;
       iobuf->offset = va & ~PAGE_MASK;
       iobuf->length = len;

@@ -481,16 +475,15 @@
               spin_lock(&mm->page_table_lock);
               map = follow_page(ptr);
               if (!map) {
+                       spin_unlock(&mm->page_table_lock);
                       dprintk (KERN_ERR "Missing page in map_user_kiobuf\n");
-                       goto retry;
+                       goto out_unlock;
               }
-               map = get_page_map(map, ptr);
-               if (map) {
-                       if (TryLockPage(map)) {
-                               goto retry;
-                       }
+               map = get_page_map(map);
+               if (map)
                       atomic_inc(&map->count);
-               }
+               else
+                       printk (KERN_INFO "Mapped page missing [%d]\n", i);
               spin_unlock(&mm->page_table_lock);
               iobuf->maplist[i] = map;
               iobuf->nr_pages = ++i;
@@ -507,42 +500,110 @@
       unmap_kiobuf(iobuf);
       dprintk ("map_user_kiobuf: end %d\n", err);
       return err;
+}

- retry:

-       /*
-        * Undo the locking so far, wait on the page we got to, and try again.
-        */
-       spin_unlock(&mm->page_table_lock);
-       unmap_kiobuf(iobuf);
-       up(&mm->mmap_sem);
+/*
+ * Force in an entire range of pages from the current process's kernel
+ * VA.  We do not expect to see unmapped pages here, so no page faults
+ * will be taken.  The map_kernel_kiobuf() routine should work happily
+ * both for normal kernel allocations and for vmalloc()ed regions.
+ */

-       /*
-        * Did the release also unlock the page we got stuck on?
-        */
-       if (map) {
-               if (!PageLocked(map)) {
-                       /* If so, we may well have the page mapped twice
-                        * in the IO address range.  Bad news.  Of
-                        * course, it _might_ * just be a coincidence,
-                        * but if it happens more than * once, chances
-                        * are we have a double-mapped page. */
-                       if (++doublepage >= 3) {
-                               return -EINVAL;
-                       }
+static inline int map_pte_range(struct page **pmap, pmd_t * pmd, unsigned long va, unsigned long end)
+{
+       pte_t * pte;
+       int nr_pages = 0;
+       struct page *page;
+
+       if (pmd_none(*pmd))
+               return 0;
+
+       pte = pte_offset(pmd, va);
+
+       do {
+               if (pte_none(*pte))
+                       page = NULL;
+               else {
+                       page = pte_page(*pte);
+                       atomic_inc(&page->count);
               }
+               *pmap++ = page;
+
+               va += PAGE_SIZE;
+               pte++;
+               nr_pages++;
+       } while (va < end && (va && PMD_MASK));

-               /*
-                * Try again...
-                */
-               wait_on_page(map);
-       }
+       return nr_pages;
+}
+
+static inline int map_pmd_range(struct page **pmap, pgd_t * dir, unsigned long va, unsigned long end)
+{
+       pmd_t * pmd;
+       int total_pages = 0;
+       int nr_pages;

-       if (++repeat < 16) {
-               ptr = va & PAGE_MASK;
-               goto repeat;
+       if (pgd_none(*dir))
+               return 0;
+       pmd = pmd_offset(dir, va);
+
+       do {
+               nr_pages = map_pte_range(pmap, pmd, va, end);
+               pmd++;
+               pmap += nr_pages;
+               va += (nr_pages << PAGE_SHIFT);
+               total_pages += nr_pages;
+       } while (nr_pages && va < end && (va && PGDIR_MASK));
+       return total_pages;
+}
+
+int map_kernel_kiobuf(struct kiobuf *iobuf, unsigned long va, size_t len)
+{
+       unsigned long           ptr, end;
+       int                     err;
+       struct mm_struct *      mm;
+       struct page **          pmap;
+       int                     nr_pages;
+       pgd_t *                 dir;
+
+       /* Make sure the iobuf is not already mapped somewhere. */
+       if (iobuf->nr_pages)
+               return -EINVAL;
+
+       mm = current->mm;
+       dprintk ("map_kernel_kiobuf: begin\n");
+
+       ptr = va & PAGE_MASK;
+       end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
+       err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
+       if (err)
+               return err;
+
+       err = -EFAULT;
+       iobuf->locked = 0;
+       iobuf->nr_pages = 0;
+       iobuf->offset = va & ~PAGE_MASK;
+       iobuf->length = len;
+
+       spin_lock(&mm->page_table_lock);
+
+       dir = pgd_offset(mm, va);
+       pmap = iobuf->maplist;
+
+       while (ptr < end) {
+               nr_pages = map_pmd_range(pmap, dir, ptr, end);
+               if (!nr_pages)
+                       break;
+               dir++;
+               ptr += (nr_pages << PAGE_SHIFT);
+               pmap += nr_pages;
+               iobuf->nr_pages += nr_pages;
       }
-       return -EAGAIN;
+
+       spin_unlock(&mm->page_table_lock);
+       dprintk ("map_kernel_kiobuf: end OK\n");
+       return 0;
}


@@ -558,9 +619,9 @@

       for (i = 0; i < iobuf->nr_pages; i++) {
               map = iobuf->maplist[i];
-
-               if (map && iobuf->locked) {
-                       UnlockPage(map);
+               if (map) {
+                       if (iobuf->locked)
+                               UnlockPage(map);
                       __free_page(map);
               }
       }
@@ -568,6 +629,109 @@
       iobuf->nr_pages = 0;
       iobuf->locked = 0;
}
+
+/*
+ * Lock down all of the pages of a kiovec for IO.
+ *
+ * If any page is mapped twice in the kiovec, we return the error -EINVAL.
+ *
+ * The optional wait parameter causes the lock call to block until all
+ * pages can be locked if set.  If wait==0, the lock operation is
+ * aborted if any locked pages are found and -EAGAIN is returned.
+ */
+
+int lock_kiovec(int nr, struct kiobuf *iovec[], int wait)
+{
+       struct kiobuf *iobuf;
+       int i, j;
+       struct page *page, **ppage;
+       int doublepage = 0;
+       int repeat = 0;
+
+ repeat:
+
+       for (i = 0; i < nr; i++) {
+               iobuf = iovec[i];
+
+               if (iobuf->locked)
+                       continue;
+               iobuf->locked = 1;
+
+               ppage = iobuf->maplist;
+               for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
+                       page = *ppage;
+                       if (!page)
+                               continue;
+
+                       if (TryLockPage(page))
+                               goto retry;
+               }
+       }
+
+       return 0;
+
+ retry:
+
+       /*
+        * We couldn't lock one of the pages.  Undo the locking so far,
+        * wait on the page we got to, and try again.
+        */
+
+       unlock_kiovec(nr, iovec);
+       if (!wait)
+               return -EAGAIN;
+
+       /*
+        * Did the release also unlock the page we got stuck on?
+        */
+       if (!PageLocked(page)) {
+               /*
+                * If so, we may well have the page mapped twice
+                * in the IO address range.  Bad news.  Of
+                * course, it _might_ just be a coincidence,
+                * but if it happens more than once, chances
+                * are we have a double-mapped page.
+                */
+               if (++doublepage >= 3)
+                       return -EINVAL;
+
+               /* Try again...  */
+               wait_on_page(page);
+       }
+
+       if (++repeat < 16)
+               goto repeat;
+       return -EAGAIN;
+}
+
+/*
+ * Unlock all of the pages of a kiovec after IO.
+ */
+
+int unlock_kiovec(int nr, struct kiobuf *iovec[])
+{
+       struct kiobuf *iobuf;
+       int i, j;
+       struct page *page, **ppage;
+
+       for (i = 0; i < nr; i++) {
+               iobuf = iovec[i];
+
+               if (!iobuf->locked)
+                       continue;
+               iobuf->locked = 0;
+
+               ppage = iobuf->maplist;
+               for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
+                       page = *ppage;
+                       if (!page)
+                               continue;
+                       UnlockPage(page);
+               }
+       }
+       return 0;
+}
+

static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
                                     unsigned long size, pgprot_t prot)
--- linux-2.3.48-pre2/mm/vmscan.c.~1~   Thu Feb 17 11:50:53 2000
+++ linux-2.3.48-pre2/mm/vmscan.c       Fri Feb 25 15:17:17 2000
@@ -259,7 +259,7 @@
       unsigned long end;

       /* Don't swap out areas which are locked down */
-       if (vma->vm_flags & VM_LOCKED)
+       if (vma->vm_flags & (VM_LOCKED | VM_IO))
               return 0;

       pgdir = pgd_offset(vma->vm_mm, address);