/*
* Copyright (c) 2005, 2006, 2007 Antti Kantee. All Rights Reserved.
*
* Development of this software was supported by the
* Google Summer of Code program and the Ulla Tuominen Foundation.
* The Google SoC project was mentored by Bill Studenmund.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
int puffs_vnop_lookup(void *);
int puffs_vnop_create(void *);
int puffs_vnop_access(void *);
int puffs_vnop_mknod(void *);
int puffs_vnop_open(void *);
int puffs_vnop_close(void *);
int puffs_vnop_getattr(void *);
int puffs_vnop_setattr(void *);
int puffs_vnop_reclaim(void *);
int puffs_vnop_readdir(void *);
int puffs_vnop_poll(void *);
int puffs_vnop_fsync(void *);
int puffs_vnop_seek(void *);
int puffs_vnop_remove(void *);
int puffs_vnop_mkdir(void *);
int puffs_vnop_rmdir(void *);
int puffs_vnop_link(void *);
int puffs_vnop_readlink(void *);
int puffs_vnop_symlink(void *);
int puffs_vnop_rename(void *);
int puffs_vnop_read(void *);
int puffs_vnop_write(void *);
int puffs_vnop_fallocate(void *);
int puffs_vnop_fdiscard(void *);
int puffs_vnop_fcntl(void *);
int puffs_vnop_ioctl(void *);
int puffs_vnop_inactive(void *);
int puffs_vnop_print(void *);
int puffs_vnop_pathconf(void *);
int puffs_vnop_advlock(void *);
int puffs_vnop_strategy(void *);
int puffs_vnop_bmap(void *);
int puffs_vnop_mmap(void *);
int puffs_vnop_getpages(void *);
int puffs_vnop_abortop(void *);
int puffs_vnop_getextattr(void *);
int puffs_vnop_setextattr(void *);
int puffs_vnop_listextattr(void *);
int puffs_vnop_deleteextattr(void *);
int puffs_vnop_spec_read(void *);
int puffs_vnop_spec_write(void *);
int puffs_vnop_fifo_read(void *);
int puffs_vnop_fifo_write(void *);
#define ERROUT(err) \
do { \
error = err; \
goto out; \
} while (/*CONSTCOND*/0)
/*
* This is a generic vnode operation handler. It checks if the necessary
* operations for the called vnode operation are implemented by userspace
* and either returns a dummy return value or proceeds to call the real
* vnode operation from puffs_msgop_v.
*
* XXX: this should described elsewhere and autogenerated, the complexity
* of the vnode operations vectors and their interrelationships is also
* getting a bit out of hand. Another problem is that we need this same
* information in the fs server code, so keeping the two in sync manually
* is not a viable (long term) plan.
*/
/* not supported, handle locking protocol */
#define CHECKOP_NOTSUPP(op) \
case VOP_##op##_DESCOFFSET: \
if (pmp->pmp_vnopmask[PUFFS_VN_##op] == 0) \
return genfs_eopnotsupp(v); \
break
/* always succeed, no locking */
#define CHECKOP_SUCCESS(op) \
case VOP_##op##_DESCOFFSET: \
if (pmp->pmp_vnopmask[PUFFS_VN_##op] == 0) \
return 0; \
break
/*
* Begin vnode operations.
*
* A word from the keymaster about locks: generally we don't want
* to use the vnode locks at all: it creates an ugly dependency between
* the userlandia file server and the kernel. But we'll play along with
* the kernel vnode locks for now. However, even currently we attempt
* to release locks as early as possible. This is possible for some
* operations which a) don't need a locked vnode after the userspace op
* and b) return with the vnode unlocked. Theoretically we could
* unlock-do op-lock for others and order the graph in userspace, but I
* don't want to think of the consequences for the time being.
*/
/*
* If dotdot cache is enabled, add reference to .. and return.
*/
if (PUFFS_USE_DOTDOTCACHE(pmp) && (cnp->cn_flags & ISDOTDOT)) {
vp = VPTOPP(ap->a_dvp)->pn_parent;
vref(vp);
*ap->a_vpp = vp;
return 0;
}
/*
* Check if someone fed it into the cache
*/
if (!isdot && PUFFS_USE_NAMECACHE(pmp)) {
int found, iswhiteout;
found = cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
cnp->cn_nameiop, cnp->cn_flags,
&iswhiteout, ap->a_vpp);
if (iswhiteout) {
cnp->cn_flags |= ISWHITEOUT;
}
if (TIMED_OUT(cpn->pn_cn_timeout)) {
cache_purge(cvp);
/*
* cached vnode (cvp) is still referenced
* so that we can reuse it upon a new
* successful lookup.
*/
*ap->a_vpp = NULL;
found = 0;
}
}
/*
* Do not use negative caching, since the filesystem
* provides no TTL for it.
*/
if (found && *ap->a_vpp == NULLVP && PUFFS_USE_FS_TTL(pmp))
found = 0;
/*
* This is what would have been left in ERROR before
* the rearrangement of cache_lookup(). What with all
* the macros, I am not sure if this is a dead value
* below or not.
*/
error = -1;
}
if (isdot) {
/* deal with rename lookup semantics */
if (cnp->cn_nameiop == RENAME && (cnp->cn_flags & ISLASTCN))
return EISDIR;
puffs_msg_setinfo(park_lookup, PUFFSOP_VN,
PUFFS_VN_LOOKUP, VPTOPNC(dvp));
PUFFS_MSG_ENQUEUEWAIT2(pmp, park_lookup, dvp->v_data, NULL, error);
DPRINTF(("puffs_lookup: return of the userspace, part %d\n", error));
/*
* In case of error, there is no new vnode to play with, so be
* happy with the NULL value given to vpp in the beginning.
* Also, check if this really was an error or the target was not
* present. Either treat it as a non-error for CREATE/RENAME or
* enter the component into the negative name cache (if desired).
*/
if (error) {
error = checkerr(pmp, error, __func__);
if (error == ENOENT) {
/* don't allow to create files on r/o fs */
if ((dvp->v_mount->mnt_flag & MNT_RDONLY)
&& cnp->cn_nameiop == CREATE) {
error = EROFS;
/* adjust values if we are creating */
} else if ((cnp->cn_flags & ISLASTCN)
&& (cnp->cn_nameiop == CREATE
|| cnp->cn_nameiop == RENAME)) {
error = EJUSTRETURN;
/*
* Check that we don't get our parent node back, that would cause
* a pretty obvious deadlock.
*/
dpn = dvp->v_data;
if (lookup_msg->pvnr_newnode == dpn->pn_cookie) {
puffs_senderr(pmp, PUFFS_ERR_LOOKUP, EINVAL,
"lookup produced parent cookie", lookup_msg->pvnr_newnode);
error = EPROTO;
goto out;
}
/*
* Check if we looked up the cached vnode
*/
vp = NULL;
if (cvp && (VPTOPP(cvp)->pn_cookie == lookup_msg->pvnr_newnode)) {
int grace;
/*
* Bump grace time of this node so that it does not get
* reclaimed too fast. We try to increase a bit more the
* lifetime of busiest * nodes - with some limits.
*/
grace = 10 * puffs_sopreq_expire_timeout;
cpn->pn_cn_grace = getticks() + grace;
vp = cvp;
}
/*
* No cached vnode available, or the cached vnode does not
* match the userland cookie anymore: is the node known?
*/
if (vp == NULL) {
error = puffs_getvnode(dvp->v_mount,
lookup_msg->pvnr_newnode, lookup_msg->pvnr_vtype,
lookup_msg->pvnr_size, lookup_msg->pvnr_rdev, &vp);
if (error) {
puffs_abortbutton(pmp, PUFFS_ABORT_LOOKUP,
VPTOPNC(dvp), lookup_msg->pvnr_newnode,
ap->a_cnp);
goto out;
}
if (open_msg->pvnr_oflags & PUFFS_OPEN_IO_DIRECT) {
/*
* Flush cache:
* - we do not want to discard cached write by direct write
* - read cache is now useless and should be freed
*/
mutex_enter(&pn->pn_sizemtx);
flushvncache(vp, 0, 0, true);
mutex_exit(&pn->pn_sizemtx);
/*
* Don't listen to the file server regarding special device
* size info, the file server doesn't know anything about them.
*/
if (vp->v_type == VBLK || vp->v_type == VCHR)
rvap->va_size = vp->v_size;
/* Ditto for blocksize (ufs comment: this doesn't belong here) */
if (vp->v_type == VBLK)
rvap->va_blocksize = BLKDEV_IOSIZE;
else if (vp->v_type == VCHR)
rvap->va_blocksize = MAXBSIZE;
/*
* A lock is required so that we do not race with
* setattr, write and fsync when changing vp->v_size.
* This is critical, since setting a stall smaller value
* triggers a file truncate in uvm_vnp_setsize(), which
* most of the time means data corruption (a chunk of
* data is replaced by zeroes). This can be removed if
* we decide one day that VOP_GETATTR must operate on
* a locked vnode.
*
* XXX Should be useless now that VOP_GETATTR has been
* fixed to always require a shared lock at least.
*/
mutex_enter(&pn->pn_sizemtx);
REFPN(pn);
vap = ap->a_vap;
if (PUFFS_USE_FS_TTL(pmp)) {
if (!TIMED_OUT(pn->pn_va_timeout)) {
update_va(vp, vap, pn->pn_va_cache,
NULL, NULL, SETATTR_CHSIZE);
goto out2;
}
}
/*
* Flush metacache first. If we are called with some explicit
* parameters, treat them as information overriding metacache
* information.
*/
if (PUFFS_USE_METAFLUSH(pmp) && pn->pn_stat & PNODE_METACACHE_MASK) {
if ((pn->pn_stat & PNODE_METACACHE_ATIME)
&& vap->va_atime.tv_sec == VNOVAL)
vap->va_atime = pn->pn_mc_atime;
if ((pn->pn_stat & PNODE_METACACHE_CTIME)
&& vap->va_ctime.tv_sec == VNOVAL)
vap->va_ctime = pn->pn_mc_ctime;
if ((pn->pn_stat & PNODE_METACACHE_MTIME)
&& vap->va_mtime.tv_sec == VNOVAL)
vap->va_mtime = pn->pn_mc_mtime;
if ((pn->pn_stat & PNODE_METACACHE_SIZE)
&& vap->va_size == VNOVAL)
vap->va_size = pn->pn_mc_size;
pn->pn_stat &= ~PNODE_METACACHE_MASK;
}
/*
* Flush attribute cache so that another thread do
* not get a stale value during the operation.
*/
if (PUFFS_USE_FS_TTL(pmp))
pn->pn_va_timeout = 0;
if (vap->va_size != VNOVAL) {
/*
* If we truncated the file, make sure the data beyond
* EOF in last page does not remain in cache, otherwise
* if the file is later truncated to a larger size (creating
* a hole), that area will not return zeroes as it
* should.
*/
if ((flags & SETATTR_CHSIZE) && PUFFS_USE_PAGECACHE(pmp) &&
(vap->va_size < oldsize))
zerofill_lastpage(vp, vap->va_size);
/*
* When puffs_cookie2vnode() misses an entry, vcache_get()
* creates a new node (puffs_vfsop_loadvnode being called to
* initialize the PUFFS part), then it discovers it is VNON,
* and tries to vrele() it. This leads us there, while the
* cookie was stall and the node likely already reclaimed.
*/
if (vp->v_type == VNON) {
return 0;
}
/*
* file server thinks it's gone? then don't be afraid care,
* node's life was already all it would ever be
*/
if (pnode->pn_stat & PNODE_NOREFS) {
pnode->pn_stat |= PNODE_DYING;
recycle = true;
}
/*
* Handle node TTL.
* If grace has already timed out, make it reclaimed.
* Otherwise, we queue its expiration by sop thread, so
* that it does not remain for ages in the freelist,
* holding memory in userspace, while we will have
* to look it up again anyway.
*/
if (PUFFS_USE_FS_TTL(pmp) && !(vp->v_vflag & VV_ROOT) && !recycle) {
bool incache = !TIMED_OUT(pnode->pn_cn_timeout);
bool ingrace = !TIMED_OUT(pnode->pn_cn_grace);
bool reclaimqueued = pnode->pn_stat & PNODE_SOPEXP;
/*
* If thread has disappeared, just give up. The
* fs is being unmounted and the node will be
* be reclaimed anyway.
*
* Otherwise, we queue the request but do not
* immediately signal the thread, as the node
* has not been expired yet.
*/
if (pmp->pmp_sopthrcount == 0) {
kmem_free(psopr, sizeof(*psopr));
} else {
TAILQ_INSERT_TAIL(&pmp->pmp_sopnodereqs,
psopr, psopr_entries);
pnode->pn_stat |= PNODE_SOPEXP;
}
mutex_exit(&pmp->pmp_sopmtx);
}
}
/*
* Wipe direct I/O flags
*/
pnode->pn_stat &= ~(PNODE_RDIRECT|PNODE_WDIRECT);
/*
* always FAF, we don't really care if the server wants to fail to
* reclaim the node or not
*/
int
puffs_vnop_reclaim(void *v)
{
struct vop_reclaim_v2_args /* {
const struct vnodeop_desc *a_desc;
struct vnode *a_vp;
} */ *ap = v;
struct vnode *vp = ap->a_vp;
struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
bool notifyserver = true;
VOP_UNLOCK(vp);
/*
* first things first: check if someone is trying to reclaim the
* root vnode. do not allow that to travel to userspace.
* Note that we don't need to take the lock similarly to
* puffs_root(), since there is only one of us.
*/
if (vp->v_vflag & VV_ROOT) {
mutex_enter(&pmp->pmp_lock);
KASSERT(pmp->pmp_root != NULL);
pmp->pmp_root = NULL;
mutex_exit(&pmp->pmp_lock);
notifyserver = false;
}
/* See the comment on top of puffs_vnop_inactive(). */
if (vp->v_type == VNON)
notifyserver = false;
/*
* purge info from kernel before issuing FAF, since we
* don't really know when we'll get around to it after
* that and someone might race us into node creation
*/
mutex_enter(&pmp->pmp_lock);
if (PUFFS_USE_NAMECACHE(pmp))
cache_purge(vp);
mutex_exit(&pmp->pmp_lock);
if (notifyserver) {
int nlookup = VPTOPP(vp)->pn_nlookup;
/*
* ok, so we need: resid + cookiemem = maxreq
* => resid + cookiesize * (resid/minsize) = maxreq
* => resid + cookiesize/minsize * resid = maxreq
* => (cookiesize/minsize + 1) * resid = maxreq
* => resid = maxreq / (cookiesize/minsize + 1)
*
* Since cookiesize <= minsize and we're not very big on floats,
* we approximate that to be 1. Therefore:
*
* resid = maxreq / 2;
*
* Well, at least we didn't have to use differential equations
* or the Gram-Schmidt process.
*
* (yes, I'm very afraid of this)
*/
KASSERT(CSIZE <= _DIRENT_MINSIZE((struct dirent *)0));
/* provide cookies to caller if so desired */
if (ap->a_cookies) {
KASSERT(curlwp != uvm.pagedaemon_lwp);
*ap->a_cookies = malloc(readdir_msg->pvnr_ncookies*CSIZE,
M_TEMP, M_WAITOK);
*ap->a_ncookies = readdir_msg->pvnr_ncookies;
memcpy(*ap->a_cookies, readdir_msg->pvnr_data,
*ap->a_ncookies*CSIZE);
}
/* next readdir starts here */
uio->uio_offset = readdir_msg->pvnr_offset;
/*
* poll works by consuming the bitmask in pn_revents. If there are
* events available, poll returns immediately. If not, it issues a
* poll to userspace, selrecords itself and returns with no available
* events. When the file server returns, it executes puffs_parkdone_poll(),
* where available events are added to the bitmask. selnotify() is
* then also executed by that function causing us to enter here again
* and hopefully find the missing bits (unless someone got them first,
* in which case it starts all over again).
*/
int
puffs_vnop_poll(void *v)
{
struct vop_poll_args /* {
const struct vnodeop_desc *a_desc;
struct vnode *a_vp;
int a_events;
} */ *ap = v;
PUFFS_MSG_VARS(vn, poll);
struct vnode *vp = ap->a_vp;
struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
struct puffs_node *pn = vp->v_data;
int events;
if (EXISTSOP(pmp, POLL)) {
mutex_enter(&pn->pn_mtx);
events = pn->pn_revents & ap->a_events;
if (events & ap->a_events) {
pn->pn_revents &= ~ap->a_events;
mutex_exit(&pn->pn_mtx);
/*
* HELLO! We exit already here if the user server does not
* support fsync OR if we should call fsync for a node which
* has references neither in the kernel or the fs server.
* Otherwise we continue to issue fsync() forward.
*/
error = 0;
if (!EXISTSOP(pmp, FSYNC) || (pn->pn_stat & PNODE_DYING))
goto out;
dofaf = (ap->a_flags & FSYNC_WAIT) == 0 || ap->a_flags == FSYNC_LAZY;
/*
* We abuse VXLOCK to mean "vnode is going to die", so we issue
* only FAFs for those. Otherwise there's a danger of deadlock,
* since the execution context here might be the user server
* doing some operation on another fs, which in turn caused a
* vnode to be reclaimed from the freelist for this fs.
*/
if (dofaf == 0) {
mutex_enter(vp->v_interlock);
if (vdead_check(vp, VDEAD_NOWAIT) != 0)
dofaf = 1;
mutex_exit(vp->v_interlock);
}
PUFFS_MSG_ALLOC(vn, fsync);
if (dofaf)
puffs_msg_setfaf(park_fsync);
/*
* XXX: stay in touch with the cache. I don't like this, but
* don't have a better solution either. See also puffs_rename().
*/
if (error == 0) {
puffs_updatenode(pn, PUFFS_UPDATECTIME, 0);
puffs_updatenode(VPTOPP(dvp),
PUFFS_UPDATECTIME|PUFFS_UPDATEMTIME, 0);
}
/* bad bad user file server */
if (readlink_msg->pvnr_linklen > linklen) {
puffs_senderr(pmp, PUFFS_ERR_READLINK, E2BIG,
"linklen too big", VPTOPNC(ap->a_vp));
error = EPROTO;
goto out;
}
/*
* XXX: stay in touch with the cache. I don't like this, but
* don't have a better solution either. See also puffs_link().
*/
if (error == 0) {
puffs_updatenode(fpn, PUFFS_UPDATECTIME, 0);
puffs_updatenode(VPTOPP(fdvp),
PUFFS_UPDATECTIME|PUFFS_UPDATEMTIME, 0);
if (fdvp != tdvp)
puffs_updatenode(VPTOPP(tdvp),
PUFFS_UPDATECTIME|PUFFS_UPDATEMTIME,
0);
if (PUFFS_USE_DOTDOTCACHE(pmp) &&
(VPTOPP(fvp)->pn_parent != tdvp))
update_parent(fvp, tdvp);
/* XXX Update ap->ctx_vp_new_nlink */
}
out:
if (doabort)
VOP_ABORTOP(tdvp, ap->a_tcnp);
if (tvp != NULL)
vput(tvp);
if (tdvp == tvp)
vrele(tdvp);
else
vput(tdvp);
if (doabort)
VOP_ABORTOP(fdvp, ap->a_fcnp);
vrele(fdvp);
vrele(fvp);
/* std sanity */
if (uio->uio_resid == 0)
return 0;
if (uio->uio_offset < 0)
return EFBIG;
/*
* On the case of reading empty files and (vp->v_size != 0) below:
* some filesystems (hint: FUSE and distributed filesystems) still
* expect to get the READ in order to update atime. Reading through
* the case filters empty files, therefore we prefer to bypass the
* cache here.
*/
if (vp->v_type == VREG &&
PUFFS_USE_PAGECACHE(pmp) &&
!(pn->pn_stat & PNODE_RDIRECT) &&
(vp->v_size != 0)) {
const int advice = IO_ADV_DECODE(ap->a_ioflag);
while (uio->uio_resid > 0) {
if (vp->v_size <= uio->uio_offset) {
break;
}
bytelen = MIN(uio->uio_resid,
vp->v_size - uio->uio_offset);
if (bytelen == 0)
break;
if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
puffs_updatenode(VPTOPP(vp), PUFFS_UPDATEATIME, 0);
} else {
/*
* in case it's not a regular file or we're operating
* uncached, do read in the old-fashioned style,
* i.e. explicit read operations
*/
/*
* in case the file is out of juice, resid from
* userspace is != 0. and the error-case is
* quite obvious
*/
if (error || read_msg->pvnr_resid)
break;
}
puffs_msgmem_release(park_read);
}
return error;
}
/*
* XXX: in case of a failure, this leaves uio in a bad state.
* We could theoretically copy the uio and iovecs and "replay"
* them the right amount after the userspace trip, but don't
* bother for now.
*/
int
puffs_vnop_write(void *v)
{
struct vop_write_args /* {
const struct vnodeop_desc *a_desc;
struct vnode *a_vp;
struct uio *a_uio;
int a_ioflag;
kauth_cred_t a_cred;
} */ *ap = v;
PUFFS_MSG_VARS(vn, write);
struct vnode *vp = ap->a_vp;
struct puffs_node *pn = VPTOPP(vp);
struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
struct uio *uio = ap->a_uio;
size_t tomove, argsize;
off_t oldoff, newoff, origoff;
vsize_t bytelen;
int error, uflags;
int ubcflags;
error = uflags = 0;
write_msg = NULL;
/* std sanity */
if (uio->uio_resid == 0)
return 0;
if (uio->uio_offset < 0)
return EFBIG;
mutex_enter(&pn->pn_sizemtx);
/*
* userspace *should* be allowed to control this,
* but with UBC it's a bit unclear how to handle it
*/
if (ap->a_ioflag & IO_APPEND)
uio->uio_offset = vp->v_size;
/*
* In case of a ubc_uiomove() error,
* opt to not extend the file at all and
* return an error. Otherwise, if we attempt
* to clear the memory we couldn't fault to,
* we might generate a kernel page fault.
*/
if (vp->v_size < newoff) {
if (error == 0) {
uflags |= PUFFS_UPDATESIZE;
uvm_vnp_setsize(vp, newoff);
} else {
uvm_vnp_setwritesize(vp, vp->v_size);
}
}
if (error)
break;
/*
* If we're writing large files, flush to file server
* every 64k. Otherwise we can very easily exhaust
* kernel and user memory, as the file server cannot
* really keep up with our writing speed.
*
* Note: this does *NOT* honor MNT_ASYNC, because
* that gives userland too much say in the kernel.
*/
if (oldoff >> 16 != uio->uio_offset >> 16) {
rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
error = VOP_PUTPAGES(vp, oldoff & ~0xffff,
uio->uio_offset & ~0xffff,
PGO_CLEANIT | PGO_SYNCIO);
if (error)
break;
}
}
/* didn't move everything? bad userspace. bail */
if (write_msg->pvnr_resid != 0) {
error = EIO;
break;
}
}
puffs_msgmem_release(park_write);
/*
* Direct I/O on write but not on read: we must
* invalidate the written pages so that we read
* the written data and not the stalled cache.
*/
if ((error == 0) &&
(vp->v_type == VREG) && PUFFS_USE_PAGECACHE(pmp) &&
(pn->pn_stat & PNODE_WDIRECT) &&
!(pn->pn_stat & PNODE_RDIRECT)) {
voff_t off_lo = trunc_page(origoff);
voff_t off_hi = round_page(uio->uio_offset);
/*
* If we do not use meta flush, we need to update the
* filesystem now, otherwise we will get a stale value
* on the next GETATTR
*/
if (!PUFFS_USE_METAFLUSH(pmp) && (uflags & PUFFS_UPDATESIZE)) {
struct vattr va;
int ret;
vattr_null(&va);
va.va_size = vp->v_size;
ret = dosetattr(vp, &va, FSCRED, 0);
if (ret) {
DPRINTF(("dosetattr set size to %jd failed: %d\n",
(intmax_t)vp->v_size, ret));
}
}
mutex_exit(&pn->pn_sizemtx);
return error;
}
/*
* Short-circuit optimization: don't flush buffer in between
* VOP_INACTIVE and VOP_RECLAIM in case the node has no references.
*/
if (pn->pn_stat & PNODE_DYING) {
KASSERT(BUF_ISWRITE(bp));
bp->b_resid = 0;
goto out;
}
/*
* See explanation for the necessity of a FAF in puffs_fsync.
*
* Also, do FAF in case we're suspending.
* See puffs_vfsops.c:pageflush()
*/
if (BUF_ISWRITE(bp)) {
mutex_enter(vp->v_interlock);
if (vdead_check(vp, VDEAD_NOWAIT) != 0)
dofaf = 1;
if (pn->pn_stat & PNODE_FAF)
dofaf = 1;
mutex_exit(vp->v_interlock);
}
/*
* FAF moved everything. Frankly, we don't
* really have a choice.
*/
if (dofaf && error == 0)
moved = tomove;
else
moved = tomove - rw_msg->pvnr_resid;
/*
* The rest don't get a free trip to userspace and back, they
* have to stay within the kernel.
*/
/*
* bmap doesn't really make any sense for puffs, so just 1:1 map it.
* well, maybe somehow, somewhere, some day ....
*/
int
puffs_vnop_bmap(void *v)
{
struct vop_bmap_args /* {
const struct vnodeop_desc *a_desc;
struct vnode *a_vp;
daddr_t a_bn;
struct vnode **a_vpp;
daddr_t *a_bnp;
int *a_runp;
} */ *ap = v;
struct puffs_mount *pmp;
pmp = MPTOPUFFSMP(ap->a_vp->v_mount);
if (ap->a_vpp)
*ap->a_vpp = ap->a_vp;
if (ap->a_bnp)
*ap->a_bnp = ap->a_bn;
if (ap->a_runp)
*ap->a_runp
= (PUFFS_TOMOVE(pmp->pmp_msg_maxsize, pmp)>>DEV_BSHIFT) - 1;
return 0;
}
/*
* Handle getpages faults in puffs. We let genfs_getpages() do most
* of the dirty work, but we come in this route to do accounting tasks.
* If the user server has specified functions for cache notifications
* about reads and/or writes, we record which type of operation we got,
* for which page range, and proceed to issue a FAF notification to the
* server about it.
*/
int
puffs_vnop_getpages(void *v)
{
struct vop_getpages_args /* {
const struct vnodeop_desc *a_desc;
struct vnode *a_vp;
voff_t a_offset;
struct vm_page **a_m;
int *a_count;
int a_centeridx;
vm_prot_t a_access_type;
int a_advice;
int a_flags;
} */ *ap = v;
struct puffs_mount *pmp;
struct puffs_node *pn;
struct vnode *vp;
struct vm_page **pgs;
struct puffs_cacheinfo *pcinfo = NULL;
struct puffs_cacherun *pcrun;
void *parkmem = NULL;
size_t runsizes;
int i, npages, si, streakon;
int error, locked, write;
/*
* Check that we aren't trying to fault in pages which our file
* server doesn't know about. This happens if we extend a file by
* skipping some pages and later try to fault in pages which
* are between pn_serversize and vp_size. This check optimizes
* away the common case where a file is being extended.
*/
if (ap->a_offset >= pn->pn_serversize && ap->a_offset < vp->v_size) {
struct vattr va;
/* try again later when we can block */
if (locked)
ERROUT(EBUSY);
/*
* can't block if we're locked and can't mess up caching
* information for fs server. so come back later, please
*/
if (pcinfo == NULL)
ERROUT(ENOMEM);
parkmem = puffs_park_alloc(locked == 0);
if (parkmem == NULL)
ERROUT(ENOMEM);
/*
* Let's see whose fault it was and inform the user server of
* possibly read/written pages. Map pages from read faults
* strictly read-only, since otherwise we might miss info on
* when the page is actually write-faulted to.
*/
if (!locked)
rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
for (i = 0, si = 0, streakon = 0; i < npages; i++) {
if (pgs[i] == NULL || pgs[i] == PGO_DONTCARE) {
if (streakon && write) {
streakon = 0;
pcrun[si].pcache_runend
= trunc_page(pgs[i]->offset) + PAGE_MASK;
si++;
}
continue;
}
if (streakon == 0 && write) {
streakon = 1;
pcrun[si].pcache_runstart = pgs[i]->offset;
}
if (!write)
pgs[i]->flags |= PG_RDONLY;
}
/* was the last page part of our streak? */
if (streakon) {
pcrun[si].pcache_runend
= trunc_page(pgs[i-1]->offset) + PAGE_MASK;
si++;
}
if (!locked)
rw_exit(vp->v_uobj.vmobjlock);
KASSERT(si <= (npages / 2) + 1);
#ifdef notnowjohn
/* send results to userspace */
if (write)
puffs_cacheop(pmp, parkmem, pcinfo,
sizeof(struct puffs_cacheinfo) + runsizes, VPTOPNC(vp));
#endif
out:
if (error) {
if (pcinfo != NULL)
kmem_free(pcinfo,
sizeof(struct puffs_cacheinfo) + runsizes);
#ifdef notnowjohn
if (parkmem != NULL)
puffs_park_release(parkmem, 1);
#endif
}