/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Rick Macklem at The University of Guelph.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)nfs_serv.c 8.8 (Berkeley) 7/31/95
*/
/*
* nfs version 2 and 3 server calls to vnode ops
* - these routines generally have 3 phases
* 1 - break down and validate rpc request in mbuf list
* 2 - do the vnode ops for the request
* (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
* 3 - build the rpc reply in an mbuf list
* nb:
* - do not mix the phases, since the nfsm_?? macros can return failures
* on a bad rpc or similar and do not do any vrele() or vput()'s
*
* - the nfsm_reply() macro generates an nfs rpc reply with the nfs
* error number iff error != 0 whereas
* returning an error from the server function implies a fatal error
* such as a badly constructed rpc request that should be dropped without
* a reply.
* For Version 3, nfsm_reply() does not return for the error case, since
* most version 3 rpcs return more than the status for error cases.
*/
static int
nfsserver_modcmd(modcmd_t cmd, void *arg)
{
extern struct vfs_hooks nfs_export_hooks; /* XXX */
int error;
switch (cmd) {
case MODULE_CMD_INIT:
error = syscall_establish(NULL, nfsserver_syscalls);
if (error != 0) {
return error;
}
nfs_init(); /* XXX for monolithic kernel */
netexport_init();
nfsrv_initcache(); /* Init the server request cache */
nfsrv_init(0); /* Init server data structures */
vfs_hooks_attach(&nfs_export_hooks);
nfs_timer_srvinit(nfsrv_timer);
return 0;
case MODULE_CMD_FINI:
error = syscall_disestablish(NULL, nfsserver_syscalls);
if (error != 0) {
return error;
}
/*
* Kill export list before detaching VFS hooks, so we
* we don't leak state due to a concurrent umount().
*/
netexport_fini();
vfs_hooks_detach(&nfs_export_hooks);
/* Kill timer before server goes away. */
nfs_timer_srvfini();
nfsrv_fini();
/* Server uses server cache, so kill cache last. */
nfsrv_finicache();
nfs_fini();
return 0;
case MODULE_CMD_AUTOUNLOAD:
if (netexport_hasexports())
return EBUSY;
/*FALLTHROUGH*/
default:
return ENOTTY;
}
}
nfsm_srvmtofh(&nsfh);
vattr_null(&va);
if (v3) {
nfsm_srvsattr(&va);
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
gcheck = fxdr_unsigned(int, *tl);
if (gcheck) {
nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
fxdr_nfsv3time(tl, &guard);
}
} else {
nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
/*
* Nah nah nah nah na nah
* There is a bug in the Sun client that puts 0xffff in the mode
* field of sattr when it should put in 0xffffffff. The u_short
* doesn't sign extend.
* --> check the low order 2 bytes for 0xffff
*/
if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
va.va_mode = nfstov_mode(sp->sa_mode);
if (sp->sa_uid != nfs_xdrneg1)
va.va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
if (sp->sa_gid != nfs_xdrneg1)
va.va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
if (sp->sa_size != nfs_xdrneg1)
va.va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
#ifdef notyet
fxdr_nfsv2time(&sp->sa_atime, &va.va_atime);
#else
va.va_atime.tv_sec =
fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
va.va_atime.tv_nsec = 0;
#endif
}
if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
fxdr_nfsv2time(&sp->sa_mtime, &va.va_mtime);
}
/*
* Now that we have all the fields, lets do it.
*/
error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam, &rdonly,
(nfsd->nd_flag & ND_KERBAUTH), false);
if (error) {
nfsm_reply(2 * NFSX_UNSIGNED);
nfsm_srvwcc_data(preat_ret, &preat, postat_ret, &va);
return (0);
}
nqsrv_getl(vp, ND_WRITE);
if (v3) {
error = preat_ret = VOP_GETATTR(vp, &preat, cred);
if (!error && gcheck &&
(preat.va_ctime.tv_sec != guard.tv_sec ||
preat.va_ctime.tv_nsec != guard.tv_nsec))
error = NFSERR_NOT_SYNC;
if (error) {
vput(vp);
nfsm_reply(NFSX_WCCDATA(v3));
nfsm_srvwcc_data(preat_ret, &preat, postat_ret, &va);
return (0);
}
}
/*
* If the size is being changed write access is required, otherwise
* just check for a read only file system.
*/
if (va.va_size == ((u_quad_t)((quad_t) -1))) {
if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
error = EROFS;
goto out;
}
} else {
if (vp->v_type == VDIR) {
error = EISDIR;
goto out;
} else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
lwp, 0)) != 0)
goto out;
}
error = VOP_SETATTR(vp, &va, cred);
postat_ret = VOP_GETATTR(vp, &va, cred);
if (!error)
error = postat_ret;
out:
vput(vp);
nfsm_reply(NFSX_WCCORFATTR(v3));
if (v3) {
nfsm_srvwcc_data(preat_ret, &preat, postat_ret, &va);
return (0);
} else {
nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
nfsm_srvfillattr(&va, fp);
}
nfsm_srvdone;
}
if (!error && pubflag) {
if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL &&
(ipb = pathbuf_create(nfs_pub.np_index)) != NULL) {
/*
* Setup call to lookup() to see if we can find
* the index file. Arguably, this doesn't belong
* in a kernel.. Ugh.
*/
ind = nd;
VOP_UNLOCK(nd.ni_vp);
ind.ni_pathbuf = ipb;
error = lookup_for_nfsd_index(&ind, nd.ni_vp);
if (!error) {
/*
* Found an index file. Get rid of
* the old references.
*/
if (dirp)
vrele(dirp);
dirp = nd.ni_vp;
ndp = &ind;
} else
error = 0;
}
/*
* If the public filehandle was used, check that this lookup
* didn't result in a filehandle outside the publicly exported
* filesystem.
*/
/* allocate kva for mbuf data */
lva = sokvaalloc(pgoff, npages << PAGE_SHIFT,
slp->ns_so);
if (lva == 0) {
/* fall back to VOP_READ */
goto loan_fail;
}
if (mrep == NULL) {
*mrq = NULL;
return (0);
}
nfsm_srvmtofh(&nsfh);
if (v3) {
nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
off = fxdr_hyper(tl);
tl += 3;
stable = fxdr_unsigned(int, *tl++);
} else {
nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
tl += 2;
}
retlen = len = fxdr_unsigned(int32_t, *tl);
cnt = i = 0;
/*
* For NFS Version 2, it is not obvious what a write of zero length
* should do, but I might as well be consistent with Version 3,
* which is to return ok so long as there are no permission problems.
*/
if (len > 0) {
zeroing = 1;
mp = mrep;
while (mp) {
if (mp == md) {
zeroing = 0;
adjust = dpos - mtod(mp, char *);
mp->m_len -= adjust;
if (mp->m_len > 0 && adjust > 0)
NFSMADV(mp, adjust);
}
if (zeroing)
mp->m_len = 0;
else if (mp->m_len > 0) {
i += mp->m_len;
if (i > len) {
mp->m_len -= (i - len);
zeroing = 1;
}
if (mp->m_len > 0)
cnt++;
}
mp = mp->m_next;
}
}
if (len > NFS_MAXDATA || len < 0 || i < len) {
error = EIO;
nfsm_reply(2 * NFSX_UNSIGNED);
nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
return (0);
}
error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam,
&rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
if (error) {
nfsm_reply(2 * NFSX_UNSIGNED);
nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
return (0);
}
if (v3)
forat_ret = VOP_GETATTR(vp, &forat, cred);
if (vp->v_type != VREG) {
if (v3)
error = EINVAL;
else
error = (vp->v_type == VDIR) ? EISDIR : EACCES;
}
if (!error) {
nqsrv_getl(vp, ND_WRITE);
error = nfsrv_access(vp, VWRITE, cred, rdonly, lwp, 1);
}
if (error) {
vput(vp);
nfsm_reply(NFSX_WCCDATA(v3));
nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
return (0);
}
/*
* XXX
* The IO_METASYNC flag indicates that all metadata (and not
* just enough to ensure data integrity) must be written to
* stable storage synchronously.
* (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
*/
if (stable == NFSV3WRITE_UNSTABLE)
ioflags = IO_NODELOCKED;
else if (stable == NFSV3WRITE_DATASYNC)
ioflags = (IO_SYNC | IO_NODELOCKED);
else
ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
uiop->uio_resid = len;
uiop->uio_rw = UIO_WRITE;
uiop->uio_offset = off;
UIO_SETUP_SYSSPACE(uiop);
error = VOP_WRITE(vp, uiop, ioflags, cred);
nfsstats.srvvop_writes++;
free(iv, M_TEMP);
}
aftat_ret = VOP_GETATTR(vp, &va, cred);
vput(vp);
if (!error)
error = aftat_ret;
nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
if (v3) {
nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
if (error)
return (0);
nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
*tl++ = txdr_unsigned(retlen);
if (stable == NFSV3WRITE_UNSTABLE)
*tl++ = txdr_unsigned(stable);
else
*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
/*
* Actually, there is no need to txdr these fields,
* but it may make the values more human readable,
* for debugging purposes.
*/
struct timeval btv;
getmicroboottime(&btv);
*tl++ = txdr_unsigned(btv.tv_sec);
*tl = txdr_unsigned(btv.tv_usec);
} else {
nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
nfsm_srvfillattr(&va, fp);
}
nfsm_srvdone;
}
/*
* XXX elad: the original NFSW_SAMECRED() macro also made sure the
* two nd_flag fields of the descriptors contained
* ND_KERBAUTH.
*/
static int
nfsrv_samecred(kauth_cred_t cred1, kauth_cred_t cred2)
{
int i, do_ngroups;
if (kauth_cred_geteuid(cred1) != kauth_cred_geteuid(cred2))
return (0);
if (kauth_cred_ngroups(cred1) != kauth_cred_ngroups(cred2))
return (0);
do_ngroups = kauth_cred_ngroups(cred1);
for (i = 0; i < do_ngroups; i++)
if (kauth_cred_group(cred1, i) !=
kauth_cred_group(cred2, i))
return (0);
/*
* Trim the header out of the mbuf list and trim off any trailing
* junk so that the mbuf list has only the write data.
*/
zeroing = 1;
i = 0;
mp = mrep;
while (mp) {
if (mp == md) {
zeroing = 0;
adjust = dpos - mtod(mp, char *);
mp->m_len -= adjust;
if (mp->m_len > 0 && adjust > 0)
NFSMADV(mp, adjust);
}
if (zeroing)
mp->m_len = 0;
else {
i += mp->m_len;
if (i > len) {
mp->m_len -= (i - len);
zeroing = 1;
}
}
mp = mp->m_next;
}
if (len > NFS_MAXDATA || len < 0 || i < len) {
nfsmout:
m_freem(mrep);
error = EIO;
nfsm_writereply(2 * NFSX_UNSIGNED, v3);
if (v3)
nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
nfsd->nd_mreq = mreq;
nfsd->nd_mrep = NULL;
nfsd->nd_time = 0;
}
/*
* Add this entry to the hash and time queues.
*/
owp = NULL;
mutex_enter(&nfsd_lock);
wp = LIST_FIRST(&slp->ns_tq);
while (wp && wp->nd_time < nfsd->nd_time) {
owp = wp;
wp = LIST_NEXT(wp, nd_tq);
}
if (owp) {
LIST_INSERT_AFTER(owp, nfsd, nd_tq);
} else {
LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
}
if (nfsd->nd_mrep) {
wpp = nfsrv_nwdelayhash(slp, &nfsd->nd_fh);
owp = NULL;
wp = LIST_FIRST(wpp);
while (wp && nfsrv_comparefh(&nfsd->nd_fh, &wp->nd_fh)) {
owp = wp;
wp = LIST_NEXT(wp, nd_hash);
}
while (wp && wp->nd_off < nfsd->nd_off &&
!nfsrv_comparefh(&nfsd->nd_fh, &wp->nd_fh)) {
owp = wp;
wp = LIST_NEXT(wp, nd_hash);
}
if (owp) {
LIST_INSERT_AFTER(owp, nfsd, nd_hash);
/*
* Search the hash list for overlapping entries and
* coalesce.
*/
for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
wp = LIST_NEXT(nfsd, nd_hash);
if (nfsrv_samecred(owp->nd_cr, nfsd->nd_cr))
nfsrvw_coalesce(owp, nfsd);
}
} else {
LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
}
}
mutex_exit(&nfsd_lock);
}
/*
* Now, do VOP_WRITE()s for any one(s) that need to be done now
* and generate the associated reply mbuf list(s).
*/
loop1:
getmicrotime(&now);
cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
mutex_enter(&nfsd_lock);
for (nfsd = LIST_FIRST(&slp->ns_tq); nfsd; nfsd = owp) {
owp = LIST_NEXT(nfsd, nd_tq);
if (nfsd->nd_time > cur_usec)
break;
if (nfsd->nd_mreq)
continue;
LIST_REMOVE(nfsd, nd_tq);
LIST_REMOVE(nfsd, nd_hash);
mutex_exit(&nfsd_lock);
/*
* Loop around generating replies for all write rpcs that have
* now been completed.
*/
swp = nfsd;
do {
if (error) {
nfsm_writereply(NFSX_WCCDATA(v3), v3);
if (v3) {
nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
}
} else {
nfsm_writereply(NFSX_PREOPATTR(v3) +
NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
NFSX_WRITEVERF(v3), v3);
if (v3) {
nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
*tl++ = txdr_unsigned(nfsd->nd_len);
*tl++ = txdr_unsigned(swp->nd_stable);
/*
* Actually, there is no need to txdr these fields,
* but it may make the values more human readable,
* for debugging purposes.
*/
struct timeval btv;
getmicroboottime(&btv);
*tl++ = txdr_unsigned(btv.tv_sec);
*tl = txdr_unsigned(btv.tv_usec);
} else {
nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
nfsm_srvfillattr(&va, fp);
}
}
nfsd->nd_mreq = mreq;
if (nfsd->nd_mrep)
panic("nfsrv_write: nd_mrep not free");
/*
* Done. Put it at the head of the timer queue so that
* the final phase can return the reply.
*/
mutex_enter(&nfsd_lock);
if (nfsd != swp) {
nfsd->nd_time = 0;
LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
}
nfsd = LIST_FIRST(&swp->nd_coalesce);
if (nfsd) {
LIST_REMOVE(nfsd, nd_tq);
}
mutex_exit(&nfsd_lock);
} while (nfsd);
swp->nd_time = 0;
/*
* Search for a reply to return.
*/
mutex_enter(&nfsd_lock);
LIST_FOREACH(nfsd, &slp->ns_tq, nd_tq) {
if (nfsd->nd_mreq) {
LIST_REMOVE(nfsd, nd_tq);
*mrq = nfsd->nd_mreq;
*ndp = nfsd;
break;
}
}
mutex_exit(&nfsd_lock);
return (0);
}
/*
* Coalesce the write request nfsd into owp. To do this we must:
* - remove nfsd from the queues
* - merge nfsd->nd_mrep into owp->nd_mrep
* - update the nd_eoff and nd_stable for owp
* - put nfsd on owp's nd_coalesce list
* NB: Must be called at splsoftclock().
*/
void
nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
{
int overlap;
struct mbuf *mp;
struct nfsrv_descript *m;
KASSERT(mutex_owned(&nfsd_lock));
LIST_REMOVE(nfsd, nd_hash);
LIST_REMOVE(nfsd, nd_tq);
if (owp->nd_eoff < nfsd->nd_eoff) {
overlap = owp->nd_eoff - nfsd->nd_off;
if (overlap < 0)
panic("nfsrv_coalesce: bad off");
if (overlap > 0)
m_adj(nfsd->nd_mrep, overlap);
mp = owp->nd_mrep;
while (mp->m_next)
mp = mp->m_next;
mp->m_next = nfsd->nd_mrep;
owp->nd_eoff = nfsd->nd_eoff;
} else
m_freem(nfsd->nd_mrep);
nfsd->nd_mrep = NULL;
if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
owp->nd_stable = NFSV3WRITE_FILESYNC;
else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
owp->nd_stable == NFSV3WRITE_UNSTABLE)
owp->nd_stable = NFSV3WRITE_DATASYNC;
LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
/*
* nfsd might hold coalesce elements! Move them to owp.
* Otherwise, requests may be lost and clients will be stuck.
*/
while ((m = LIST_FIRST(&nfsd->nd_coalesce)) != NULL) {
LIST_REMOVE(m, nd_tq);
LIST_INSERT_HEAD(&owp->nd_coalesce, m, nd_tq);
}
}
/*
* nfs readdir service
* - mallocs what it thinks is enough to read
* count rounded up to a multiple of NFS_SRVDIRBLKSIZ <= NFS_MAXREADDIR
* - calls VOP_READDIR()
* - loops around building the reply
* if the output generated exceeds count break out of loop
* The nfsm_clget macro is used here so that the reply will be packed
* tightly in mbuf clusters.
* - it only knows that it has encountered eof when the VOP_READDIR()
* reads nothing
* - as such one readdir rpc will return eof false although you are there
* and then the next will return eof
* - it trims out records with d_fileno == 0
* this doesn't matter for Unix clients, but they might confuse clients
* for other os'.
* - it trims out records with d_type == DT_WHT
* these cannot be seen through NFS (unless we extend the protocol)
* NB: It is tempting to set eof to true if the VOP_READDIR() reads less
* than requested, but this may not apply to all filesystems. For
* example, client NFS does not { although it is never remote mounted
* anyhow }
* The alternate call nfsrv_readdirplus() does lookups as well.
* PS: The NFS protocol spec. does not clarify what the "count" byte
* argument is a count of.. just name strings and file id's or the
* entire reply rpc or ...
* I tried just file name and id sizes and it confused the Sun client,
* so I am using the full rpc size now. The "paranoia.." comment refers
* to including the status longwords that are not a part of the dir.
* "entry" structures, but are in the rpc.
*/
#define NFS_SRVDIRBLKSIZ 1024
struct flrep {
nfsuint64 fl_off;
u_int32_t fl_postopok;
struct nfs_fattr fl_fattr; /* XXX: must be of fattr3 size */
u_int32_t fl_fhok;
u_int32_t fl_fhsize;
/* handle comes here, filled in dynamically */
};
len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */
nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
if (v3) {
nfsm_srvpostop_attr(getret, &at);
nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
txdr_hyper(at.va_filerev, tl);
}
mp = mp2 = mb;
bp = bpos;
be = bp + M_TRAILINGSPACE(mp);
/* Loop through the records and build reply */
while (cpos < cend && ncookies > 0) {
if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
nlen = dp->d_namlen;
rem = nfsm_rndup(nlen)-nlen;
len += (4 * NFSX_UNSIGNED + nlen + rem);
if (v3)
len += 2 * NFSX_UNSIGNED;
if (len > cnt) {
eofflag = 0;
break;
}
/*
* Build the directory record xdr from
* the dirent entry.
*/
nfsm_clget;
*tl = nfs_true;
bp += NFSX_UNSIGNED;
if (v3) {
nfsm_clget;
*tl = txdr_unsigned(dp->d_fileno >> 32);
bp += NFSX_UNSIGNED;
}
nfsm_clget;
*tl = txdr_unsigned(dp->d_fileno);
bp += NFSX_UNSIGNED;
nfsm_clget;
*tl = txdr_unsigned(nlen);
bp += NFSX_UNSIGNED;
/* And loop around copying the name */
xfer = nlen;
cp = dp->d_name;
while (xfer > 0) {
nfsm_clget;
if ((bp+xfer) > be)
tsiz = be-bp;
else
tsiz = xfer;
memcpy(bp, cp, tsiz);
bp += tsiz;
xfer -= tsiz;
if (xfer > 0)
cp += tsiz;
}
/* And null pad to an int32_t boundary */
for (i = 0; i < rem; i++)
*bp++ = '\0';
nfsm_clget;
/* Finish off the record */
txdr_hyper(*cookiep, &jar);
if (v3) {
*tl = jar.nfsuquad[0];
bp += NFSX_UNSIGNED;
nfsm_clget;
}
*tl = jar.nfsuquad[1];
bp += NFSX_UNSIGNED;
}
cpos += dp->d_reclen;
dp = (struct dirent *)cpos;
cookiep++;
ncookies--;
}
vrele(vp);
nfsm_clget;
*tl = nfs_false;
bp += NFSX_UNSIGNED;
nfsm_clget;
if (eofflag)
*tl = nfs_true;
else
*tl = nfs_false;
bp += NFSX_UNSIGNED;
if (mp != mb) {
if (bp < be)
mp->m_len = bp - mtod(mp, char *);
} else
mp->m_len += bp - bpos;
free((void *)rbuf, M_TEMP);
free((void *)cookies, M_TEMP);
nfsm_srvdone;
}
off = (u_quad_t)io.uio_offset;
getret = VOP_GETATTR(vp, &at, cred);
VOP_UNLOCK(vp);
/*
* If the VGET operation doesn't work for this filesystem,
* we can't support readdirplus. Returning NOTSUPP should
* make clients fall back to plain readdir.
* There's no need to check for VPTOFH as well, we wouldn't
* even be here otherwise.
*/
if (!getret) {
if ((getret = VFS_VGET(vp->v_mount, at.va_fileid,
LK_EXCLUSIVE, &nvp)))
getret = (getret == EOPNOTSUPP) ?
NFSERR_NOTSUPP : NFSERR_IO;
else
vput(nvp);
}
if (!cookies && !error)
error = NFSERR_PERM;
if (!error)
error = getret;
if (error) {
vrele(vp);
if (cookies)
free((void *)cookies, M_TEMP);
free((void *)rbuf, M_TEMP);
nfsm_reply(NFSX_V3POSTOPATTR);
nfsm_srvpostop_attr(getret, &at);
return (0);
}
if (io.uio_resid) {
siz -= io.uio_resid;
dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED;
nfsm_reply(cnt);
nfsm_srvpostop_attr(getret, &at);
nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
txdr_hyper(at.va_filerev, tl);
mp = mp2 = mb;
bp = bpos;
be = bp + M_TRAILINGSPACE(mp);
/* Loop through the records and build reply */
while (cpos < cend && ncookies > 0) {
if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
nfsrvfh_t nnsfh;
nlen = dp->d_namlen;
rem = nfsm_rndup(nlen)-nlen;
/*
* For readdir_and_lookup get the vnode using
* the file number.
*/
if (VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE,
&nvp))
goto invalid;
if (nfsrv_composefh(nvp, &nnsfh, true)) {
vput(nvp);
goto invalid;
}
if (VOP_GETATTR(nvp, vap, cred)) {
vput(nvp);
goto invalid;
}
vput(nvp);
/*
* If either the dircount or maxcount will be
* exceeded, get out now. Both of these lengths
* are calculated conservatively, including all
* XDR overheads.
*/
len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
NFSX_V3POSTOPATTR);
dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
if (len > cnt || dirlen > fullsiz) {
eofflag = 0;
break;
}
/*
* Build the directory record xdr from
* the dirent entry.
*/
fp = (struct nfs_fattr *)&fl.fl_fattr;
nfsm_srvfillattr(vap, fp);
fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
fl.fl_fhok = nfs_true;
fl.fl_postopok = nfs_true;
txdr_hyper(*cookiep, fl.fl_off.nfsuquad);
nfsm_clget;
*tl = nfs_true;
bp += NFSX_UNSIGNED;
nfsm_clget;
*tl = txdr_unsigned(dp->d_fileno >> 32);
bp += NFSX_UNSIGNED;
nfsm_clget;
*tl = txdr_unsigned(dp->d_fileno);
bp += NFSX_UNSIGNED;
nfsm_clget;
*tl = txdr_unsigned(nlen);
bp += NFSX_UNSIGNED;
/* And loop around copying the name */
xfer = nlen;
cp = dp->d_name;
while (xfer > 0) {
nfsm_clget;
if ((bp + xfer) > be)
tsiz = be - bp;
else
tsiz = xfer;
memcpy(bp, cp, tsiz);
bp += tsiz;
xfer -= tsiz;
if (xfer > 0)
cp += tsiz;
}
/* And null pad to an int32_t boundary */
for (i = 0; i < rem; i++)
*bp++ = '\0';
/*
* Now copy the flrep structure out.
*/
xfer = sizeof(struct flrep);
cp = (void *)&fl;
while (xfer > 0) {
nfsm_clget;
if ((bp + xfer) > be)
tsiz = be - bp;
else
tsiz = xfer;
memcpy(bp, cp, tsiz);
bp += tsiz;
xfer -= tsiz;
if (xfer > 0)
cp += tsiz;
}
/*
* ... and filehandle.
*/
xfer = NFSRVFH_SIZE(&nnsfh);
cp = NFSRVFH_DATA(&nnsfh);
while (xfer > 0) {
nfsm_clget;
if ((bp + xfer) > be)
tsiz = be - bp;
else
tsiz = xfer;
memcpy(bp, cp, tsiz);
bp += tsiz;
xfer -= tsiz;
if (xfer > 0)
cp += tsiz;
}
}
invalid:
cpos += dp->d_reclen;
dp = (struct dirent *)cpos;
cookiep++;
ncookies--;
}
vrele(vp);
nfsm_clget;
*tl = nfs_false;
bp += NFSX_UNSIGNED;
nfsm_clget;
if (eofflag)
*tl = nfs_true;
else
*tl = nfs_false;
bp += NFSX_UNSIGNED;
if (mp != mb) {
if (bp < be)
mp->m_len = bp - mtod(mp, char *);
} else
mp->m_len += bp - bpos;
free((void *)cookies, M_TEMP);
free((void *)rbuf, M_TEMP);
nfsm_srvdone;
}
/* XXX Try to make a guess on the max file size. */
sb = malloc(sizeof(*sb), M_TEMP, M_WAITOK);
VFS_STATVFS(vp->v_mount, sb);
maxfsize = (u_quad_t)0x80000000 * sb->f_frsize - 1;
free(sb, M_TEMP);
/*
* These should probably be supported by VOP_PATHCONF(), but
* until msdosfs is exportable (why would you want to?), the
* Unix defaults should be ok.
*/
pc->pc_caseinsensitive = nfs_false;
pc->pc_casepreserving = nfs_true;
nfsm_srvdone;
}
/*
* Null operation, used by clients to ping server
*/
/* ARGSUSED */
int
nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
struct lwp *lwp, struct mbuf **mrq)
{
struct mbuf *mrep = nfsd->nd_mrep;
char *bpos;
int error = NFSERR_RETVOID, cache = 0;
struct mbuf *mb, *mreq __unused;
u_quad_t frev;
nfsm_reply(0);
nfsmout:
return (0);
}
/*
* No operation, used for obsolete procedures
*/
/* ARGSUSED */
int
nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
struct lwp *lwp, struct mbuf **mrq)
{
struct mbuf *mrep = nfsd->nd_mrep;
char *bpos;
int error, cache = 0;
struct mbuf *mb, *mreq __unused;
u_quad_t frev;
/*
* Perform access checking for vnodes obtained from file handles that would
* refer to files already opened by a Unix client. You cannot just use
* vn_writechk() and VOP_ACCESS() for two reasons.
* 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
* 2 - The owner is to be given access irrespective of mode bits for some
* operations, so that processes that chmod after opening a file don't
* break. I don't like this because it opens a security hole, but since
* the nfs server opens a security hole the size of a barn door anyhow,
* what the heck.
*
* The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
* will return EPERM instead of EACCES. EPERM is always an error.
*/
int
nfsrv_access(struct vnode *vp, int flags, kauth_cred_t cred, int rdonly, struct lwp *lwp, int override)
{
struct vattr vattr;
int error;
if (flags & VWRITE) {
/* Just vn_writechk() changed to check rdonly */
/*
* Disallow write attempts on read-only file systems;
* unless the file is a socket or a block or character
* device resident on the file system.
*/
if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
switch (vp->v_type) {
case VREG:
case VDIR:
case VLNK:
return (EROFS);
default:
break;
}
}
/*
* If the vnode is in use as a process's text,
* we can't allow writing.
*/
if (vp->v_iflag & VI_TEXT)
return (ETXTBSY);
}
error = VOP_GETATTR(vp, &vattr, cred);
if (error)
return (error);
error = VOP_ACCESS(vp, flags, cred);
/*
* Allow certain operations for the owner (reads and writes
* on files that are already open).
*/
if (override && error == EACCES && kauth_cred_geteuid(cred) == vattr.va_uid)
error = 0;
return error;
}