/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Rick Macklem at The University of Guelph.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)nfs_vnops.c 8.19 (Berkeley) 7/31/95
*/
/*
* vnode op calls for Sun NFS version 2 and 3
*/
/*
* Check access cache first. If this request has been made for this
* uid shortly before, use the cached result.
*/
if (cachevalid) {
if (!np->n_accerror) {
if ((np->n_accmode & ap->a_accmode) == ap->a_accmode)
return np->n_accerror;
} else if ((np->n_accmode & ap->a_accmode) == np->n_accmode)
return np->n_accerror;
}
#ifndef NFS_V2_ONLY
/*
* For nfs v3, do an access rpc, otherwise you are stuck emulating
* ufs_access() locally using the vattr. This may not be correct,
* since the server may apply other access criteria such as
* client uid-->server uid mapping that we do not know about, but
* this is better than just returning anything that is lying about
* in the cache.
*/
if (v3) {
nfsstats.rpccnt[NFSPROC_ACCESS]++;
nfsm_reqhead(np, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
nfsm_fhtom(np, v3);
nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
if (ap->a_accmode & VREAD)
mode = NFSV3ACCESS_READ;
else
mode = 0;
if (vp->v_type != VDIR) {
if (ap->a_accmode & VWRITE)
mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
if (ap->a_accmode & VEXEC)
mode |= NFSV3ACCESS_EXECUTE;
} else {
if (ap->a_accmode & VWRITE)
mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
NFSV3ACCESS_DELETE);
if (ap->a_accmode & VEXEC)
mode |= NFSV3ACCESS_LOOKUP;
}
*tl = txdr_unsigned(mode);
nfsm_request(np, NFSPROC_ACCESS, curlwp, ap->a_cred);
nfsm_postop_attr(vp, attrflag, 0);
if (!error) {
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
rmode = fxdr_unsigned(u_int32_t, *tl);
/*
* The NFS V3 spec does not clarify whether or not
* the returned access bits can be a superset of
* the ones requested, so...
*/
if ((rmode & mode) != mode)
error = EACCES;
}
nfsm_reqdone;
} else
#endif
return (nfsspec_access(ap));
#ifndef NFS_V2_ONLY
/*
* Disallow write attempts on filesystems mounted read-only;
* unless the file is a socket, fifo, or a block or character
* device resident on the filesystem.
*/
if (!error && (ap->a_accmode & VWRITE) &&
(vp->v_mount->mnt_flag & MNT_RDONLY)) {
switch (vp->v_type) {
case VREG:
case VDIR:
case VLNK:
error = EROFS;
default:
break;
}
}
if (!error || error == EACCES) {
/*
* If we got the same result as for a previous,
* different request, OR it in. Don't update
* the timestamp in that case.
*/
if (cachevalid && np->n_accstamp != -1 &&
error == np->n_accerror) {
if (!error)
np->n_accmode |= ap->a_accmode;
else if ((np->n_accmode & ap->a_accmode) == ap->a_accmode)
np->n_accmode = ap->a_accmode;
} else {
np->n_accstamp = time_uptime;
np->n_accuid = kauth_cred_geteuid(ap->a_cred);
np->n_accmode = ap->a_accmode;
np->n_accerror = error;
}
}
return (error);
#endif
}
/*
* nfs open vnode op
* Check to see if the type is ok
* and that deletion is not in progress.
* For paged in text files, you will need to flush the page cache
* if consistency is lost.
*/
/* ARGSUSED */
int
nfs_open(void *v)
{
struct vop_open_args /* {
struct vnode *a_vp;
int a_mode;
kauth_cred_t a_cred;
} */ *ap = v;
struct vnode *vp = ap->a_vp;
struct nfsnode *np = VTONFS(vp);
int error;
if (ap->a_mode & FREAD) {
if (np->n_rcred != NULL)
kauth_cred_free(np->n_rcred);
np->n_rcred = ap->a_cred;
kauth_cred_hold(np->n_rcred);
}
if (ap->a_mode & FWRITE) {
if (np->n_wcred != NULL)
kauth_cred_free(np->n_wcred);
np->n_wcred = ap->a_cred;
kauth_cred_hold(np->n_wcred);
}
error = nfs_flushstalebuf(vp, ap->a_cred, curlwp, 0);
if (error)
return error;
NFS_INVALIDATE_ATTRCACHE(np); /* For Open/Close consistency */
return (0);
}
/*
* nfs close vnode op
* What an NFS client should do upon close after writing is a debatable issue.
* Most NFS clients push delayed writes to the server upon close, basically for
* two reasons:
* 1 - So that any write errors may be reported back to the client process
* doing the close system call. By far the two most likely errors are
* NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
* 2 - To put a worst case upper bound on cache inconsistency between
* multiple clients for the file.
* There is also a consistency problem for Version 2 of the protocol w.r.t.
* not being able to tell if other clients are writing a file concurrently,
* since there is no way of knowing if the changed modify time in the reply
* is only due to the write for this client.
* (NFS Version 3 provides weak cache consistency data in the reply that
* should be sufficient to detect and handle this case.)
*
* The current code does the following:
* for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
* for NFS Version 3 - flush dirty buffers to the server but don't invalidate
* or commit them (this satisfies 1 and 2 except for the
* case where the server crashes after this close but
* before the commit RPC, which is felt to be "good
* enough". Changing the last argument to nfs_flush() to
* a 1 would force a commit operation, if it is felt a
* commit is necessary now.
*/
/* ARGSUSED */
int
nfs_close(void *v)
{
struct vop_close_args /* {
struct vnodeop_desc *a_desc;
struct vnode *a_vp;
int a_fflag;
kauth_cred_t a_cred;
} */ *ap = v;
struct vnode *vp = ap->a_vp;
struct nfsnode *np = VTONFS(vp);
int error = 0;
UVMHIST_FUNC("nfs_close"); UVMHIST_CALLED(ubchist);
/*
* RFC1813(nfsv3) 3.2 says clients should handle "." by themselves.
*/
if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
if (error)
return error;
if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN))
return EISDIR;
vref(dvp);
*vpp = dvp;
return 0;
}
np = VTONFS(dvp);
/*
* Before performing an RPC, check the name cache to see if
* the directory/name pair we are looking for is known already.
* If the directory/name pair is found in the name cache,
* we have to ensure the directory has not changed from
* the time the cache entry has been created. If it has,
* the cache entry has to be ignored.
*/
cachefound = cache_lookup_raw(dvp, cnp->cn_nameptr, cnp->cn_namelen,
cnp->cn_flags, NULL, vpp);
KASSERT(dvp != *vpp);
KASSERT((cnp->cn_flags & ISWHITEOUT) == 0);
if (cachefound) {
struct vattr vattr;
if (*vpp == NULLVP) {
/* namecache gave us a negative result */
error = ENOENT;
goto noentry;
}
/*
* investigate the vnode returned by cache_lookup_raw.
* if it isn't appropriate, do an rpc.
*/
newvp = *vpp;
if ((flags & ISDOTDOT) != 0) {
VOP_UNLOCK(dvp);
}
error = vn_lock(newvp, LK_SHARED);
if ((flags & ISDOTDOT) != 0) {
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
}
if (error != 0) {
/* newvp has been reclaimed. */
vrele(newvp);
*vpp = NULLVP;
goto dorpc;
}
if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred)
&& vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
nfsstats.lookupcache_hits++;
KASSERT(newvp->v_type != VNON);
VOP_UNLOCK(newvp);
return (0);
}
cache_purge1(newvp, NULL, 0, PURGE_PARENTS);
vput(newvp);
*vpp = NULLVP;
}
dorpc:
#if 0
/*
* because nfsv3 has the same CREATE semantics as ours,
* we don't have to perform LOOKUPs beforehand.
*
* XXX ideally we can do the same for nfsv2 in the case of !O_EXCL.
* XXX although we have no way to know if O_EXCL is requested or not.
*/
/*
* The postop attr handling is duplicated for each if case,
* because it should be done while dvp is locked (unlocking
* dvp is different for each case).
*/
if (NFS_CMPFH(np, fhp, fhsize)) {
/*
* As we handle "." lookup locally, this is
* a broken server.
*/
m_freem(mrep);
return EBADRPC;
} else if (flags & ISDOTDOT) {
/*
* ".." lookup
*/
VOP_UNLOCK(dvp);
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
if (error) {
m_freem(mrep);
return error;
}
newvp = NFSTOV(np);
#ifndef NFS_V2_ONLY
if (v3) {
nfsm_postop_attr(newvp, attrflag, 0);
nfsm_postop_attr(dvp, attrflag, 0);
} else
#endif
nfsm_loadattr(newvp, (struct vattr *)0, 0);
} else {
/*
* Other lookups.
*/
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
if (error) {
m_freem(mrep);
return error;
}
newvp = NFSTOV(np);
#ifndef NFS_V2_ONLY
if (v3) {
nfsm_postop_attr(newvp, attrflag, 0);
nfsm_postop_attr(dvp, attrflag, 0);
} else
#endif
nfsm_loadattr(newvp, (struct vattr *)0, 0);
}
if (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) {
nfs_cache_enter(dvp, newvp, cnp);
}
*vpp = newvp;
nfsm_reqdone;
if (error) {
/*
* We get here only because of errors returned by
* the RPC. Otherwise we'll have returned above
* (the nfsm_* macros will jump to nfsm_reqdone
* on error).
*/
if (error == ENOENT && cnp->cn_nameiop != CREATE) {
nfs_cache_enter(dvp, NULL, cnp);
}
if (newvp != NULLVP) {
if (newvp == dvp) {
vrele(newvp);
} else {
vput(newvp);
}
}
noentry:
if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
(flags & ISLASTCN) && error == ENOENT) {
if (dvp->v_mount->mnt_flag & MNT_RDONLY) {
error = EROFS;
} else {
error = EJUSTRETURN;
}
}
*vpp = NULL;
return error;
}
validate:
/*
* make sure we have valid type and size.
*/
nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
/* Set both "begin" and "current" to non-garbage. */
x = txdr_unsigned((u_int32_t)uiop->uio_offset);
*tl++ = x; /* "begin offset" */
*tl++ = x; /* "current offset" */
x = txdr_unsigned(len);
*tl++ = x; /* total to this offset */
*tl = x; /* size of this write */
}
if (pageprotected) {
/*
* since we know pages can't be modified during i/o,
* no need to copy them for us.
*/
struct mbuf *m;
struct iovec *iovp = uiop->uio_iov;
m = m_get(M_WAIT, MT_DATA);
MCLAIM(m, &nfs_mowner);
MEXTADD(m, iovp->iov_base, len, M_MBUF,
nfs_writerpc_extfree, &ctx);
m->m_flags |= M_EXT_ROMAP;
m->m_len = len;
mb->m_next = m;
/*
* no need to maintain mb and bpos here
* because no one care them later.
*/
#if 0
mb = m;
bpos = mtod(void *, mb) + mb->m_len;
#endif
UIO_ADVANCE(uiop, len);
uiop->uio_offset += len;
mutex_enter(&ctx.nwc_lock);
ctx.nwc_mbufcount++;
mutex_exit(&ctx.nwc_lock);
nfs_zeropad(mb, 0, nfsm_padlen(len));
} else {
nfsm_uiotom(uiop, len);
}
nfsm_request(np, NFSPROC_WRITE, curlwp, np->n_wcred);
#ifndef NFS_V2_ONLY
if (v3) {
wccflag = NFSV3_WCCCHK;
nfsm_wcc_data(vp, wccflag, NAC_NOTRUNC, !error);
if (!error) {
nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED
+ NFSX_V3WRITEVERF);
rlen = fxdr_unsigned(int, *tl++);
if (rlen == 0) {
error = NFSERR_IO;
m_freem(mrep);
break;
} else if (rlen < len) {
backup = len - rlen;
UIO_ADVANCE(uiop, -backup);
uiop->uio_offset -= backup;
len = rlen;
}
commit = fxdr_unsigned(int, *tl++);
/*
* Return the lowest commitment level
* obtained by any of the RPCs.
*/
if (committed == NFSV3WRITE_FILESYNC)
committed = commit;
else if (committed == NFSV3WRITE_DATASYNC &&
commit == NFSV3WRITE_UNSTABLE)
committed = commit;
mutex_enter(&nmp->nm_lock);
if ((nmp->nm_iflag & NFSMNT_HASWRITEVERF) == 0){
memcpy(nmp->nm_writeverf, tl,
NFSX_V3WRITEVERF);
nmp->nm_iflag |= NFSMNT_HASWRITEVERF;
} else if ((nmp->nm_iflag &
NFSMNT_STALEWRITEVERF) ||
memcmp(tl, nmp->nm_writeverf,
NFSX_V3WRITEVERF)) {
memcpy(nmp->nm_writeverf, tl,
NFSX_V3WRITEVERF);
/*
* note NFSMNT_STALEWRITEVERF
* if we're the first thread to
* notice it.
*/
if ((nmp->nm_iflag &
NFSMNT_STALEWRITEVERF) == 0) {
stalewriteverf = true;
nmp->nm_iflag |=
NFSMNT_STALEWRITEVERF;
}
}
mutex_exit(&nmp->nm_lock);
}
} else
#endif
nfsm_loadattr(vp, (struct vattr *)0, NAC_NOTRUNC);
if (wccflag)
VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr->va_mtime;
m_freem(mrep);
if (error)
break;
tsiz -= len;
byte_count += len;
if (stalewriteverf) {
*stalewriteverfp = true;
stalewriteverf = false;
if (committed == NFSV3WRITE_UNSTABLE &&
len != origresid) {
/*
* if our write requests weren't atomic but
* unstable, datas in previous iterations
* might have already been lost now.
* then, we should resend them to nfsd.
*/
backup = origresid - tsiz;
UIO_ADVANCE(uiop, -backup);
uiop->uio_offset -= backup;
tsiz = origresid;
goto retry;
}
}
}
nfsmout:
iostat_unbusy(nmp->nm_stats, byte_count, 0);
if (pageprotected) {
/*
* wait until mbufs go away.
* retransmitted mbufs can survive longer than rpc requests
* themselves.
*/
mutex_enter(&ctx.nwc_lock);
ctx.nwc_mbufcount--;
while (ctx.nwc_mbufcount > 0) {
cv_wait(&ctx.nwc_cv, &ctx.nwc_lock);
}
mutex_exit(&ctx.nwc_lock);
}
mutex_destroy(&ctx.nwc_lock);
cv_destroy(&ctx.nwc_cv);
*iomode = committed;
if (error)
uiop->uio_resid = tsiz;
return (error);
}
/*
* nfs mknod rpc
* For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
* mode set to specify the file type and the size field for rdev.
*/
int
nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap)
{
struct nfsv2_sattr *sp;
u_int32_t *tl;
char *cp;
int32_t t1, t2;
struct vnode *newvp = (struct vnode *)0;
struct nfsnode *dnp, *np;
char *cp2;
char *bpos, *dpos;
int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
struct mbuf *mreq, *mrep, *md, *mb;
u_int32_t rdev;
const int v3 = NFS_ISV3(dvp);
/*
* make sure that we'll update timestamps as
* most server implementations use them to store
* the create verifier.
*
* XXX it's better to use TOSERVER always.
*/
if (vap->va_atime.tv_sec == VNOVAL)
vap->va_atime = ts;
if (vap->va_mtime.tv_sec == VNOVAL)
vap->va_mtime = ts;
/*
* nfs file remove call
* To try and make nfs semantics closer to ufs semantics, a file that has
* other processes using the vnode is renamed instead of removed and then
* removed later on the last close.
* - If vrefcnt(vp) > 1
* If a rename is not already in the works
* call nfs_sillyrename() to set it up
* else
* do the remove rpc
*/
int
nfs_remove(void *v)
{
struct vop_remove_v3_args /* {
struct vnodeop_desc *a_desc;
struct vnode * a_dvp;
struct vnode * a_vp;
struct componentname * a_cnp;
nlink_t ctx_vp_new_nlink;
} */ *ap = v;
struct vnode *vp = ap->a_vp;
struct vnode *dvp = ap->a_dvp;
struct componentname *cnp = ap->a_cnp;
struct nfsnode *np = VTONFS(vp);
int error = 0;
struct vattr vattr;
#ifndef DIAGNOSTIC
if (vrefcnt(vp) < 1)
panic("nfs_remove: bad vrefcnt(vp)");
#endif
if (vp->v_type == VDIR)
error = EPERM;
else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 &&
vattr.va_nlink > 1)) {
/*
* Purge the name cache so that the chance of a lookup for
* the name succeeding while the remove is in progress is
* minimized. Without node locking it can still happen, such
* that an I/O op returns ESTALE, but since you get this if
* another host removes the file..
*/
cache_purge(vp);
/*
* throw away biocache buffers, mainly to avoid
* unnecessary delayed writes later.
*/
error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, curlwp, 1);
/* Do the rpc */
if (error != EINTR)
error = nfs_removerpc(dvp, cnp->cn_nameptr,
cnp->cn_namelen, cnp->cn_cred, curlwp);
} else if (!np->n_sillyrename)
error = nfs_sillyrename(dvp, vp, cnp, false);
if (error == 0 && nfs_getattrcache(vp, &vattr) == 0) {
ap->ctx_vp_new_nlink = vattr.va_nlink - 1;
if (vattr.va_nlink == 1)
np->n_flag |= NREMOVED;
}
NFS_INVALIDATE_ATTRCACHE(np);
if (dvp == vp)
vrele(vp);
else
vput(vp);
return (error);
}
/*
* nfs file remove rpc called from nfs_inactive
*/
int
nfs_removeit(struct sillyrename *sp)
{
/*
* Nfs remove rpc, called from nfs_remove() and nfs_removeit().
*/
int
nfs_removerpc(struct vnode *dvp, const char *name, int namelen, kauth_cred_t cred, struct lwp *l)
{
u_int32_t *tl;
char *cp;
#ifndef NFS_V2_ONLY
int32_t t1;
char *cp2;
#endif
int32_t t2;
char *bpos, *dpos;
int error = 0, wccflag = NFSV3_WCCRATTR;
struct mbuf *mreq, *mrep, *md, *mb;
const int v3 = NFS_ISV3(dvp);
int rexmit = 0;
struct nfsnode *dnp = VTONFS(dvp);
nfsstats.rpccnt[NFSPROC_REMOVE]++;
nfsm_reqhead(dnp, NFSPROC_REMOVE,
NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
nfsm_fhtom(dnp, v3);
nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
nfsm_request1(dnp, NFSPROC_REMOVE, l, cred, &rexmit);
#ifndef NFS_V2_ONLY
if (v3)
nfsm_wcc_data(dvp, wccflag, 0, !error);
#endif
nfsm_reqdone;
VTONFS(dvp)->n_flag |= NMODIFIED;
if (!wccflag)
NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
/*
* Kludge City: If the first reply to the remove rpc is lost..
* the reply to the retransmitted request will be ENOENT
* since the file was in fact removed
* Therefore, we cheat and return success.
*/
if (rexmit && error == ENOENT)
error = 0;
return (error);
}
/*
* If the tvp exists and is in use, sillyrename it before doing the
* rename of the new file over it.
*
* Have sillyrename use link instead of rename if possible,
* so that we don't lose the file if the rename fails, and so
* that there's no window when the "to" file doesn't exist.
*/
if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp, true)) {
VN_KNOTE(tvp, NOTE_DELETE);
vput(tvp);
tvp = NULL;
}
abrt = 0;
/*
* Push all writes to the server, so that the attribute cache
* doesn't get "out of sync" with the server.
* XXX There should be a better way!
*/
VOP_FSYNC(vp, cnp->cn_cred, FSYNC_WAIT, 0, 0);
if (!error && ap->a_cookies) {
/*
* Only the NFS server and emulations use cookies, and they
* load the directory block into system space, so we can
* just look at it directly.
*/
if (!VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ||
uio->uio_iovcnt != 1)
panic("nfs_readdir: lost in space");
for (nc = 0; ncookies-- &&
base < (char *)uio->uio_iov->iov_base; nc++){
dp = (struct dirent *) base;
if (dp->d_reclen == 0)
break;
if (nmp->nm_flag & NFSMNT_XLATECOOKIE)
*(cookies++) = (off_t)NFS_GETCOOKIE32(dp);
else
*(cookies++) = NFS_GETCOOKIE(dp);
base += dp->d_reclen;
}
uio->uio_resid +=
((char *)uio->uio_iov->iov_base - base);
uio->uio_iov->iov_len +=
((char *)uio->uio_iov->iov_base - base);
uio->uio_iov->iov_base = base;
*ap->a_ncookies = nc;
}
/*
* Readdir rpc call.
* Called from below the buffer cache by nfs_doio().
*/
int
nfs_readdirrpc(struct vnode *vp, struct uio *uiop, kauth_cred_t cred)
{
int len, left;
struct dirent *dp = NULL;
u_int32_t *tl;
char *cp;
int32_t t1, t2;
char *bpos, *dpos, *cp2;
struct mbuf *mreq, *mrep, *md, *mb;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
struct nfsnode *dnp = VTONFS(vp);
u_quad_t fileno;
int error = 0, more_dirs = 1, blksiz = 0, bigenough = 1;
#ifndef NFS_V2_ONLY
int attrflag;
#endif
int nrpcs = 0, reclen;
const int v3 = NFS_ISV3(vp);
#ifdef DIAGNOSTIC
/*
* Should be called from buffer cache, so only amount of
* NFS_DIRBLKSIZ will be requested.
*/
if (uiop->uio_iovcnt != 1 || uiop->uio_resid != NFS_DIRBLKSIZ)
panic("nfs readdirrpc bad uio");
#endif
/*
* Loop around doing readdir rpc's of size nm_readdirsize
* truncated to a multiple of NFS_DIRFRAGSIZ.
* The stopping criteria is EOF or buffer full.
*/
while (more_dirs && bigenough) {
/*
* Heuristic: don't bother to do another RPC to further
* fill up this block if there is not much room left. (< 50%
* of the readdir RPC size). This wastes some buffer space
* but can save up to 50% in RPC calls.
*/
if (nrpcs > 0 && uiop->uio_resid < (nmp->nm_readdirsize / 2)) {
bigenough = 0;
break;
}
nfsstats.rpccnt[NFSPROC_READDIR]++;
nfsm_reqhead(dnp, NFSPROC_READDIR, NFSX_FH(v3) +
NFSX_READDIR(v3));
nfsm_fhtom(dnp, v3);
#ifndef NFS_V2_ONLY
if (v3) {
nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
if (nmp->nm_iflag & NFSMNT_SWAPCOOKIE) {
txdr_swapcookie3(uiop->uio_offset, tl);
} else {
txdr_cookie3(uiop->uio_offset, tl);
}
tl += 2;
if (uiop->uio_offset == 0) {
*tl++ = 0;
*tl++ = 0;
} else {
*tl++ = dnp->n_cookieverf.nfsuquad[0];
*tl++ = dnp->n_cookieverf.nfsuquad[1];
}
} else
#endif
{
nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
*tl++ = txdr_unsigned(uiop->uio_offset);
}
*tl = txdr_unsigned(nmp->nm_readdirsize);
nfsm_request(dnp, NFSPROC_READDIR, curlwp, cred);
nrpcs++;
#ifndef NFS_V2_ONLY
if (v3) {
nfsm_postop_attr(vp, attrflag, 0);
if (!error) {
nfsm_dissect(tl, u_int32_t *,
2 * NFSX_UNSIGNED);
dnp->n_cookieverf.nfsuquad[0] = *tl++;
dnp->n_cookieverf.nfsuquad[1] = *tl;
} else {
m_freem(mrep);
goto nfsmout;
}
}
#endif
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
more_dirs = fxdr_unsigned(int, *tl);
/* loop thru the dir entries, doctoring them to 4bsd form */
while (more_dirs && bigenough) {
#ifndef NFS_V2_ONLY
if (v3) {
nfsm_dissect(tl, u_int32_t *,
3 * NFSX_UNSIGNED);
fileno = fxdr_hyper(tl);
len = fxdr_unsigned(int, *(tl + 2));
} else
#endif
{
nfsm_dissect(tl, u_int32_t *,
2 * NFSX_UNSIGNED);
fileno = fxdr_unsigned(u_quad_t, *tl++);
len = fxdr_unsigned(int, *tl);
}
if (len <= 0 || len > NFS_MAXNAMLEN) {
error = EBADRPC;
m_freem(mrep);
goto nfsmout;
}
/* for cookie stashing */
reclen = _DIRENT_RECLEN(dp, len) + 2 * sizeof(off_t);
left = NFS_DIRFRAGSIZ - blksiz;
if (reclen > left) {
memset(uiop->uio_iov->iov_base, 0, left);
dp->d_reclen += left;
UIO_ADVANCE(uiop, left);
blksiz = 0;
NFS_STASHCOOKIE(dp, uiop->uio_offset);
}
if (reclen > uiop->uio_resid)
bigenough = 0;
if (bigenough) {
int tlen;
/*
* kludge: if we got no entries, treat it as EOF.
* some server sometimes send a reply without any
* entries or EOF.
* although it might mean the server has very long name,
* we can't handle such entries anyway.
*/
if (uiop->uio_resid >= NFS_DIRBLKSIZ)
more_dirs = 0;
}
m_freem(mrep);
}
/*
* Fill last record, iff any, out to a multiple of NFS_DIRFRAGSIZ
* by increasing d_reclen for the last record.
*/
if (blksiz > 0) {
left = NFS_DIRFRAGSIZ - blksiz;
memset(uiop->uio_iov->iov_base, 0, left);
dp->d_reclen += left;
NFS_STASHCOOKIE(dp, uiop->uio_offset);
UIO_ADVANCE(uiop, left);
}
/*
* We are now either at the end of the directory or have filled the
* block.
*/
if (bigenough) {
dnp->n_direofoffset = uiop->uio_offset;
dnp->n_flag |= NEOFVALID;
}
nfsmout:
return (error);
}
/*
* Since the attributes are before the file handle
* (sigh), we must skip over the attributes and then
* come back and get them.
*/
attrflag = fxdr_unsigned(int, *tl);
if (attrflag) {
nfsm_dissect(fp, struct nfs_fattr *, NFSX_V3FATTR);
memcpy(&fattr, fp, NFSX_V3FATTR);
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
doit = fxdr_unsigned(int, *tl);
if (doit) {
nfsm_getfh(fhp, fhsize, 1);
if (NFS_CMPFH(dnp, fhp, fhsize)) {
vref(vp);
newvp = vp;
np = dnp;
} else {
error = nfs_nget1(vp->v_mount, fhp,
fhsize, &np, LK_NOWAIT);
if (!error)
newvp = NFSTOV(np);
}
if (!error) {
nfs_loadattrcache(&newvp, &fattr, 0, 0);
if (bigenough) {
dp->d_type =
IFTODT(VTTOIF(np->n_vattr->va_type));
ndp->ni_vp = newvp;
nfs_cache_enter(ndp->ni_dvp,
ndp->ni_vp, cnp);
}
}
error = 0;
}
} else {
/* Just skip over the file handle */
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
i = fxdr_unsigned(int, *tl);
nfsm_adv(nfsm_rndup(i));
}
if (newvp != NULLVP) {
if (newvp == vp)
vrele(newvp);
else
vput(newvp);
newvp = NULLVP;
}
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
more_dirs = fxdr_unsigned(int, *tl);
}
/*
* If at end of rpc data, get the eof boolean
*/
if (!more_dirs) {
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
more_dirs = (fxdr_unsigned(int, *tl) == 0);
/*
* kludge: see a comment in nfs_readdirrpc.
*/
if (uiop->uio_resid >= NFS_DIRBLKSIZ)
more_dirs = 0;
}
m_freem(mrep);
}
/*
* Fill last record, iff any, out to a multiple of NFS_DIRFRAGSIZ
* by increasing d_reclen for the last record.
*/
if (blksiz > 0) {
left = NFS_DIRFRAGSIZ - blksiz;
memset(uiop->uio_iov->iov_base, 0, left);
dp->d_reclen += left;
NFS_STASHCOOKIE(dp, uiop->uio_offset);
UIO_ADVANCE(uiop, left);
}
/*
* We are now either at the end of the directory or have filled the
* block.
*/
if (bigenough) {
dnp->n_direofoffset = uiop->uio_offset;
dnp->n_flag |= NEOFVALID;
}
nfsmout:
if (newvp != NULLVP) {
if(newvp == vp)
vrele(newvp);
else
vput(newvp);
}
return (error);
}
#endif
/*
* Silly rename. To make the NFS filesystem that is stateless look a little
* more like the "ufs" a remove of an active vnode is translated to a rename
* to a funny looking filename that is removed by nfs_inactive on the
* nfsnode. There is the potential for another process on a different client
* to create the same funny name between the nfs_lookitup() fails and the
* nfs_rename() completes, but...
*/
int
nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, bool dolink)
{
struct sillyrename *sp;
struct nfsnode *np;
int error;
pid_t pid;
/* Try lookitups until we get one that isn't there */
while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
curlwp, (struct nfsnode **)0) == 0) {
sp->s_name[4]++;
if (sp->s_name[4] > 'z') {
error = EINVAL;
goto bad;
}
}
if (dolink) {
error = nfs_linkrpc(dvp, vp, sp->s_name, sp->s_namlen,
sp->s_cred, curlwp);
/*
* nfs_request maps NFSERR_NOTSUPP to ENOTSUP.
*/
if (error == ENOTSUP) {
error = nfs_renameit(dvp, cnp, sp);
}
} else {
error = nfs_renameit(dvp, cnp, sp);
}
if (error)
goto bad;
error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
curlwp, &np);
np->n_sillyrename = sp;
return (0);
bad:
vrele(sp->s_dvp);
kauth_cred_free(sp->s_cred);
kmem_free(sp, sizeof(*sp));
return (error);
}
/*
* Look up a file name and optionally either update the file handle or
* allocate an nfsnode, depending on the value of npp.
* npp == NULL --> just do the lookup
* *npp == NULL --> allocate a new nfsnode and make sure attributes are
* handled too
* *npp != NULL --> update the file handle in the vnode
*/
int
nfs_lookitup(struct vnode *dvp, const char *name, int len, kauth_cred_t cred, struct lwp *l, struct nfsnode **npp)
{
u_int32_t *tl;
char *cp;
int32_t t1, t2;
struct vnode *newvp = (struct vnode *)0;
struct nfsnode *np, *dnp = VTONFS(dvp);
char *bpos, *dpos, *cp2;
int error = 0, ofhlen, fhlen;
#ifndef NFS_V2_ONLY
int attrflag;
#endif
struct mbuf *mreq, *mrep, *md, *mb;
nfsfh_t *ofhp, *nfhp;
const int v3 = NFS_ISV3(dvp);
/*
* Kludge City..
* - make nfs_bmap() essentially a no-op that does no translation
* - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
* (Maybe I could use the process's page mapping, but I was concerned that
* Kernel Write might not be enabled and also figured copyout() would do
* a lot more work than memcpy() and also it currently happens in the
* context of the swapper process (2).
*/
int
nfs_bmap(void *v)
{
struct vop_bmap_args /* {
struct vnode *a_vp;
daddr_t a_bn;
struct vnode **a_vpp;
daddr_t *a_bnp;
int *a_runp;
} */ *ap = v;
struct vnode *vp = ap->a_vp;
int bshift = vp->v_mount->mnt_fs_bshift - vp->v_mount->mnt_dev_bshift;
/*
* Strategy routine.
* For async requests when nfsiod(s) are running, queue the request by
* calling nfs_asyncio(), otherwise just all nfs_doio() to do the
* request.
*/
int
nfs_strategy(void *v)
{
struct vop_strategy_args *ap = v;
struct buf *bp = ap->a_bp;
int error = 0;
if ((bp->b_flags & (B_PHYS|B_ASYNC)) == (B_PHYS|B_ASYNC))
panic("nfs physio/async");
/*
* If the op is asynchronous and an i/o daemon is waiting
* queue the request, wake it up and wait for completion
* otherwise just do it ourselves.
*/
if ((bp->b_flags & B_ASYNC) == 0 || nfs_asyncio(bp))
error = nfs_doio(bp);
return (error);
}
/*
* Flush all the data associated with a vnode.
*/
int
nfs_flush(struct vnode *vp, kauth_cred_t cred, int waitfor, struct lwp *l,
int commit)
{
struct nfsnode *np = VTONFS(vp);
int error;
int flushflags = PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO;
UVMHIST_FUNC("nfs_flush"); UVMHIST_CALLED(ubchist);
/*
* VOP_UNLOCK can be called by nfs_loadattrcache
* with v_data == 0.
*/
if (VTONFS(vp)) {
nfs_delayedtruncate(vp);
}
return genfs_unlock(v);
}
/*
* nfs special file access vnode op.
* Essentially just get vattr and then imitate iaccess() since the device is
* local to the client.
*/
int
nfsspec_access(void *v)
{
struct vop_access_args /* {
struct vnode *a_vp;
accmode_t a_accmode;
kauth_cred_t a_cred;
struct lwp *a_l;
} */ *ap = v;
struct vattr va;
struct vnode *vp = ap->a_vp;
int error;
error = VOP_GETATTR(vp, &va, ap->a_cred);
if (error)
return (error);
/*
* Disallow write attempts on filesystems mounted read-only;
* unless the file is a socket, fifo, or a block or character
* device resident on the filesystem.
*/
if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
switch (vp->v_type) {
case VREG:
case VDIR:
case VLNK:
return (EROFS);
default:
break;
}
}