/*
* Copyright (c) 1998 Chuck Silvers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
/* Use value at least as big as maximum page size supported by architecture */
#define UBC_MAX_WINSHIFT \
((1 << UBC_WINSHIFT) > MAX_PAGE_SIZE ? UBC_WINSHIFT : ilog2(MAX_PAGE_SIZE))
int ubc_nwins = UBC_NWINS;
const int ubc_winshift = UBC_MAX_WINSHIFT;
const int ubc_winsize = 1 << UBC_MAX_WINSHIFT;
#if defined(PMAP_PREFER)
int ubc_nqueues;
#define UBC_NQUEUES ubc_nqueues
#else
#define UBC_NQUEUES 1
#endif
/*
* Note that a page whose backing store is partially allocated
* is marked as PG_RDONLY.
*
* it's a responsibility of ubc_alloc's caller to allocate backing
* blocks before writing to the window.
*/
static int
ubc_fault(struct uvm_faultinfo *ufi, vaddr_t ign1, struct vm_page **ign2,
int ign3, int ign4, vm_prot_t access_type, int flags)
{
struct uvm_object *uobj;
struct ubc_map *umap;
vaddr_t va, eva, ubc_offset, slot_offset;
struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];
int i, error, npages;
vm_prot_t prot;
UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
/*
* no need to try with PGO_LOCKED...
* we don't need to have the map locked since we know that
* no one will mess with it until our reference is released.
*/
/*
* some platforms cannot write to individual bytes atomically, so
* software has to do read/modify/write of larger quantities instead.
* this means that the access_type for "write" operations
* can be VM_PROT_READ, which confuses us mightily.
*
* deal with this by resetting access_type based on the info
* that ubc_alloc() stores for us.
*/
if (error == EAGAIN) {
kpause("ubc_fault", false, hz >> 2, NULL);
goto again;
}
if (error) {
return error;
}
/*
* For virtually-indexed, virtually-tagged caches we should avoid
* creating writable mappings when we do not absolutely need them,
* since the "compatible alias" trick does not work on such caches.
* Otherwise, we can always map the pages writable.
*/
va = ufi->orig_rvaddr;
eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT);
UVMHIST_LOG(ubchist, "va %#jx eva %#jx", va, eva, 0, 0);
/*
* Note: normally all returned pages would have the same UVM object.
* However, layered file-systems and e.g. tmpfs, may return pages
* which belong to underlying UVM object. In such case, lock is
* shared amongst the objects.
*/
rw_enter(uobj->vmobjlock, RW_WRITER);
for (i = 0; va < eva; i++, va += PAGE_SIZE) {
struct vm_page *pg;
#ifdef UBC_USE_PMAP_DIRECT
/*
* during direct access pages need to be held busy to prevent them
* changing identity, and therefore if we read or write an object
* into a mapped view of same we could deadlock while faulting.
*
* avoid the problem by disallowing direct access if the object
* might be visible somewhere via mmap().
*
* XXX concurrent reads cause thundering herd issues with PG_BUSY.
* In the future enable by default for writes or if ncpu<=2, and
* make the toggle override that.
*/
if ((ubc_direct && (flags & UBC_ISMAPPED) == 0) ||
(flags & UBC_FAULTBUSY) != 0) {
return ubc_uiomove_direct(uobj, uio, todo, advice, flags);
}
#endif
off = uio->uio_offset;
error = 0;
while (todo > 0) {
vsize_t bytelen = todo;
void *win;
npages = __arraycount(pgs);
win = ubc_alloc(uobj, off, &bytelen, advice, flags, pgs,
&npages);
if (error == 0) {
error = uiomove(win, bytelen, uio);
}
if (error != 0 && overwrite) {
/*
* if we haven't initialized the pages yet,
* do it now. it's safe to use memset here
* because we just mapped the pages above.
*/
memset(win, 0, bytelen);
}
ubc_release(win, flags, pgs, npages);
off += bytelen;
todo -= bytelen;
if (error != 0 && (flags & UBC_PARTIALOK) != 0) {
break;
}
}
return error;
}
/*
* ubc_zerorange: set a range of bytes in an object to zero.
*/
void
ubc_zerorange(struct uvm_object *uobj, off_t off, size_t len, int flags)
{
struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];
int npages;
#ifdef UBC_USE_PMAP_DIRECT
/* Copy data using direct map */
/*
* ubc_alloc_direct: allocate a file mapping window using direct map
*/
static int __noinline
ubc_alloc_direct(struct uvm_object *uobj, voff_t offset, vsize_t *lenp,
int advice, int flags, struct vm_page **pgs, int *npages)
{
voff_t pgoff;
int error;
int gpflags = flags | PGO_NOTIMESTAMP | PGO_SYNCIO;
int access_type = VM_PROT_READ;
UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
if (flags & UBC_WRITE) {
if (flags & UBC_FAULTBUSY)
gpflags |= PGO_OVERWRITE | PGO_NOBLOCKALLOC;
#if 0
KASSERT(!UVM_OBJ_NEEDS_WRITEFAULT(uobj));
#endif
/*
* Tell genfs_getpages() we already have the journal lock,
* allow allocation past current EOF.
*/
gpflags |= PGO_JOURNALLOCKED | PGO_PASTEOF;
access_type |= VM_PROT_WRITE;
} else {
/* Don't need the empty blocks allocated, PG_RDONLY is okay */
gpflags |= PGO_NOBLOCKALLOC;
}
/*
* if we haven't initialized the pages yet due to an
* error above, do it now.
*/
if (error != 0) {
(void) uvm_direct_process(pgs, npages, off,
bytelen, ubc_zerorange_process, NULL);
}
off += bytelen;
todo -= bytelen;
endoff = off & (PAGE_SIZE - 1);
/*
* zero out the remaining portion of the final page
* (if any).
*/
if (todo == 0 && endoff != 0) {
vsize_t zlen = PAGE_SIZE - endoff;
(void) uvm_direct_process(pgs + npages - 1, 1,
off, zlen, ubc_zerorange_process, NULL);
}
} else {
off += bytelen;
todo -= bytelen;
}
/*
* Safe to check without lock held, as ubc_alloc() removes
* the mapping and list entry in the correct order.
*/
if (__predict_true(LIST_EMPTY(&uobj->uo_ubc))) {
return;
}
rw_enter(ubc_object.uobj.vmobjlock, RW_WRITER);
while ((umap = LIST_FIRST(&uobj->uo_ubc)) != NULL) {
KASSERT(umap->refcount == 0);
for (va = 0; va < ubc_winsize; va += PAGE_SIZE) {
KASSERT(!pmap_extract(pmap_kernel(),
va + UBC_UMAP_ADDR(umap), NULL));
}
LIST_REMOVE(umap, list);
LIST_REMOVE(umap, hash);
umap->flags &= ~UMAP_MAPPING_CACHED;
umap->uobj = NULL;
}
rw_exit(ubc_object.uobj.vmobjlock);
}