/* $NetBSD: genfs_vnops.c,v 1.220 2023/03/03 10:02:51 hannken Exp $ */

/* $NetBSD: genfs_vnops.c,v 1.220 2023/03/03 10:02:51 hannken Exp $ */

/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

/*
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.220 2023/03/03 10:02:51 hannken Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/fstrans.h>
#include <sys/namei.h>
#include <sys/vnode_impl.h>
#include <sys/fcntl.h>
#include <sys/kmem.h>
#include <sys/poll.h>
#include <sys/mman.h>
#include <sys/file.h>
#include <sys/kauth.h>
#include <sys/stat.h>
#include <sys/extattr.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/genfs/genfs_node.h>
#include <miscfs/specfs/specdev.h>

static void filt_genfsdetach(struct knote *);
static int filt_genfsread(struct knote *, long);
static int filt_genfsvnode(struct knote *, long);

/*
* Find the end of the first path component in NAME and return its
* length.
*/
int
genfs_parsepath(void *v)
{
struct vop_parsepath_args /* {
struct vnode *a_dvp;
const char *a_name;
size_t *a_ret;
} */ *ap = v;
const char *name = ap->a_name;
size_t pos;

(void)ap->a_dvp;

pos = 0;
while (name[pos] != '\0' && name[pos] != '/') {
pos++;
}
*ap->a_retval = pos;
return 0;
}

int
genfs_poll(void *v)
{
struct vop_poll_args /* {
struct vnode *a_vp;
int a_events;
struct lwp *a_l;
} */ *ap = v;

return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
}

int
genfs_seek(void *v)
{
struct vop_seek_args /* {
struct vnode *a_vp;
off_t a_oldoff;
off_t a_newoff;
kauth_cred_t cred;
} */ *ap = v;

if (ap->a_newoff < 0)
return (EINVAL);

return (0);
}

int
genfs_abortop(void *v)
{
struct vop_abortop_args /* {
struct vnode *a_dvp;
struct componentname *a_cnp;
} */ *ap = v;

(void)ap;

return (0);
}

int
genfs_fcntl(void *v)
{
struct vop_fcntl_args /* {
struct vnode *a_vp;
u_int a_command;
void *a_data;
int a_fflag;
kauth_cred_t a_cred;
struct lwp *a_l;
} */ *ap = v;

if (ap->a_command == F_SETFL)
return (0);
else
return (EOPNOTSUPP);
}

/*ARGSUSED*/
int
genfs_badop(void *v)
{

panic("genfs: bad op");
}

/*ARGSUSED*/
int
genfs_nullop(void *v)
{

return (0);
}

/*ARGSUSED*/
int
genfs_einval(void *v)
{

return (EINVAL);
}

int
genfs_erofs_link(void *v)
{
/* also for symlink */
struct vop_link_v2_args /* {
struct vnode *a_dvp;
struct vnode **a_vpp;
struct componentname *a_cnp;
} */ *ap = v;

VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
return EROFS;
}

/*
* Called when an fs doesn't support a particular vop.
* This takes care to vrele, vput, or vunlock passed in vnodes
* and calls VOP_ABORTOP for a componentname (in non-rename VOP).
*/
int
genfs_eopnotsupp(void *v)
{
struct vop_generic_args /*
struct vnodeop_desc *a_desc;
/ * other random data follows, presumably * /
} */ *ap = v;
struct vnodeop_desc *desc = ap->a_desc;
struct vnode *vp, *vp_last = NULL;
int flags, i, j, offset_cnp, offset_vp;

KASSERT(desc->vdesc_offset != VOP_LOOKUP_DESCOFFSET);
KASSERT(desc->vdesc_offset != VOP_ABORTOP_DESCOFFSET);

/*
* Abort any componentname that lookup potentially left state in.
*
* As is logical, componentnames for VOP_RENAME are handled by
* the caller of VOP_RENAME. Yay, rename!
*/
if (desc->vdesc_offset != VOP_RENAME_DESCOFFSET &&
(offset_vp = desc->vdesc_vp_offsets[0]) != VDESC_NO_OFFSET &&
(offset_cnp = desc->vdesc_componentname_offset) != VDESC_NO_OFFSET){
struct componentname *cnp;
struct vnode *dvp;

dvp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);
cnp = *VOPARG_OFFSETTO(struct componentname **, offset_cnp, ap);

VOP_ABORTOP(dvp, cnp);
}

flags = desc->vdesc_flags;
for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
if ((offset_vp = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
break; /* stop at end of list */
if ((j = flags & VDESC_VP0_WILLPUT)) {
vp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);

/* Skip if NULL */
if (!vp)
continue;

switch (j) {
case VDESC_VP0_WILLPUT:
/* Check for dvp == vp cases */
if (vp == vp_last)
vrele(vp);
else {
vput(vp);
vp_last = vp;
}
break;
case VDESC_VP0_WILLRELE:
vrele(vp);
break;
}
}
}

return (EOPNOTSUPP);
}

/*ARGSUSED*/
int
genfs_ebadf(void *v)
{

return (EBADF);
}

/* ARGSUSED */
int
genfs_enoioctl(void *v)
{

return (EPASSTHROUGH);
}

/*
* Eliminate all activity associated with the requested vnode
* and with all vnodes aliased to the requested vnode.
*/
int
genfs_revoke(void *v)
{
struct vop_revoke_args /* {
struct vnode *a_vp;
int a_flags;
} */ *ap = v;

#ifdef DIAGNOSTIC
if ((ap->a_flags & REVOKEALL) == 0)
panic("genfs_revoke: not revokeall");
#endif
vrevoke(ap->a_vp);
return (0);
}

/*
* Lock the node (for deadfs).
*/
int
genfs_deadlock(void *v)
{
struct vop_lock_args /* {
struct vnode *a_vp;
int a_flags;
} */ *ap = v;
vnode_t *vp = ap->a_vp;
vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
int flags = ap->a_flags;
krw_t op;

if (! ISSET(flags, LK_RETRY))
return ENOENT;

if (ISSET(flags, LK_DOWNGRADE)) {
rw_downgrade(&vip->vi_lock);
} else if (ISSET(flags, LK_UPGRADE)) {
KASSERT(ISSET(flags, LK_NOWAIT));
if (!rw_tryupgrade(&vip->vi_lock)) {
return EBUSY;
}
} else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
if (ISSET(flags, LK_NOWAIT)) {
if (!rw_tryenter(&vip->vi_lock, op))
return EBUSY;
} else {
rw_enter(&vip->vi_lock, op);
}
}
VSTATE_ASSERT_UNLOCKED(vp, VS_RECLAIMED);
return 0;
}

/*
* Unlock the node (for deadfs).
*/
int
genfs_deadunlock(void *v)
{
struct vop_unlock_args /* {
struct vnode *a_vp;
} */ *ap = v;
vnode_t *vp = ap->a_vp;
vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

rw_exit(&vip->vi_lock);

return 0;
}

/*
* Lock the node.
*/
int
genfs_lock(void *v)
{
struct vop_lock_args /* {
struct vnode *a_vp;
int a_flags;
} */ *ap = v;
vnode_t *vp = ap->a_vp;
vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
int flags = ap->a_flags;
krw_t op;

if (ISSET(flags, LK_DOWNGRADE)) {
rw_downgrade(&vip->vi_lock);
} else if (ISSET(flags, LK_UPGRADE)) {
KASSERT(ISSET(flags, LK_NOWAIT));
if (!rw_tryupgrade(&vip->vi_lock)) {
return EBUSY;
}
} else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
if (ISSET(flags, LK_NOWAIT)) {
if (!rw_tryenter(&vip->vi_lock, op))
return EBUSY;
} else {
rw_enter(&vip->vi_lock, op);
}
}
VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE);
return 0;
}

/*
* Unlock the node.
*/
int
genfs_unlock(void *v)
{
struct vop_unlock_args /* {
struct vnode *a_vp;
} */ *ap = v;
vnode_t *vp = ap->a_vp;
vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

rw_exit(&vip->vi_lock);

return 0;
}

/*
* Return whether or not the node is locked.
*/
int
genfs_islocked(void *v)
{
struct vop_islocked_args /* {
struct vnode *a_vp;
} */ *ap = v;
vnode_t *vp = ap->a_vp;
vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

if (rw_write_held(&vip->vi_lock))
return LK_EXCLUSIVE;

if (rw_read_held(&vip->vi_lock))
return LK_SHARED;

return 0;
}

int
genfs_mmap(void *v)
{

return (0);
}

/*
* VOP_PUTPAGES() for vnodes which never have pages.
*/

int
genfs_null_putpages(void *v)
{
struct vop_putpages_args /* {
struct vnode *a_vp;
voff_t a_offlo;
voff_t a_offhi;
int a_flags;
} */ *ap = v;
struct vnode *vp = ap->a_vp;

KASSERT(vp->v_uobj.uo_npages == 0);
rw_exit(vp->v_uobj.vmobjlock);
return (0);
}

void
genfs_node_init(struct vnode *vp, const struct genfs_ops *ops)
{
struct genfs_node *gp = VTOG(vp);

rw_init(&gp->g_glock);
gp->g_op = ops;
}

void
genfs_node_destroy(struct vnode *vp)
{
struct genfs_node *gp = VTOG(vp);

rw_destroy(&gp->g_glock);
}

void
genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
{
int bsize;

bsize = 1 << vp->v_mount->mnt_fs_bshift;
*eobp = (size + bsize - 1) & ~(bsize - 1);
}

static void
filt_genfsdetach(struct knote *kn)
{
struct vnode *vp = (struct vnode *)kn->kn_hook;

vn_knote_detach(vp, kn);
}

static int
filt_genfsread(struct knote *kn, long hint)
{
struct vnode *vp = (struct vnode *)kn->kn_hook;
int rv;

/*
* filesystem is gone, so set the EOF flag and schedule
* the knote for deletion.
*/
switch (hint) {
case NOTE_REVOKE:
KASSERT(mutex_owned(vp->v_interlock));
knote_set_eof(kn, EV_ONESHOT);
return (1);
case 0:
mutex_enter(vp->v_interlock);
kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
rv = (kn->kn_data != 0);
mutex_exit(vp->v_interlock);
return rv;
default:
KASSERT(mutex_owned(vp->v_interlock));
kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
return (kn->kn_data != 0);
}
}

static int
filt_genfswrite(struct knote *kn, long hint)
{
struct vnode *vp = (struct vnode *)kn->kn_hook;

/*
* filesystem is gone, so set the EOF flag and schedule
* the knote for deletion.
*/
switch (hint) {
case NOTE_REVOKE:
KASSERT(mutex_owned(vp->v_interlock));
knote_set_eof(kn, EV_ONESHOT);
return (1);
case 0:
mutex_enter(vp->v_interlock);
kn->kn_data = 0;
mutex_exit(vp->v_interlock);
return 1;
default:
KASSERT(mutex_owned(vp->v_interlock));
kn->kn_data = 0;
return 1;
}
}

static int
filt_genfsvnode(struct knote *kn, long hint)
{
struct vnode *vp = (struct vnode *)kn->kn_hook;
int fflags;

switch (hint) {
case NOTE_REVOKE:
KASSERT(mutex_owned(vp->v_interlock));
knote_set_eof(kn, 0);
if ((kn->kn_sfflags & hint) != 0)
kn->kn_fflags |= hint;
return (1);
case 0:
mutex_enter(vp->v_interlock);
fflags = kn->kn_fflags;
mutex_exit(vp->v_interlock);
break;
default:
KASSERT(mutex_owned(vp->v_interlock));
if ((kn->kn_sfflags & hint) != 0)
kn->kn_fflags |= hint;
fflags = kn->kn_fflags;
break;
}

return (fflags != 0);
}

static const struct filterops genfsread_filtops = {
.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
.f_attach = NULL,
.f_detach = filt_genfsdetach,
.f_event = filt_genfsread,
};

static const struct filterops genfswrite_filtops = {
.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
.f_attach = NULL,
.f_detach = filt_genfsdetach,
.f_event = filt_genfswrite,
};

static const struct filterops genfsvnode_filtops = {
.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
.f_attach = NULL,
.f_detach = filt_genfsdetach,
.f_event = filt_genfsvnode,
};

int
genfs_kqfilter(void *v)
{
struct vop_kqfilter_args /* {
struct vnode *a_vp;
struct knote *a_kn;
} */ *ap = v;
struct vnode *vp;
struct knote *kn;

vp = ap->a_vp;
kn = ap->a_kn;
switch (kn->kn_filter) {
case EVFILT_READ:
kn->kn_fop = &genfsread_filtops;
break;
case EVFILT_WRITE:
kn->kn_fop = &genfswrite_filtops;
break;
case EVFILT_VNODE:
kn->kn_fop = &genfsvnode_filtops;
break;
default:
return (EINVAL);
}

kn->kn_hook = vp;

vn_knote_attach(vp, kn);

return (0);
}

void
genfs_node_wrlock(struct vnode *vp)
{
struct genfs_node *gp = VTOG(vp);

rw_enter(&gp->g_glock, RW_WRITER);
}

void
genfs_node_rdlock(struct vnode *vp)
{
struct genfs_node *gp = VTOG(vp);

rw_enter(&gp->g_glock, RW_READER);
}

int
genfs_node_rdtrylock(struct vnode *vp)
{
struct genfs_node *gp = VTOG(vp);

return rw_tryenter(&gp->g_glock, RW_READER);
}

void
genfs_node_unlock(struct vnode *vp)
{
struct genfs_node *gp = VTOG(vp);

rw_exit(&gp->g_glock);
}

int
genfs_node_wrlocked(struct vnode *vp)
{
struct genfs_node *gp = VTOG(vp);

return rw_write_held(&gp->g_glock);
}

/*
* Common filesystem object access control check routine. Accepts a
* vnode, cred, uid, gid, mode, acl, requested access mode.
* Returns 0 on success, or an errno on failure.
*/
int
genfs_can_access(vnode_t *vp, kauth_cred_t cred, uid_t file_uid, gid_t file_gid,
mode_t file_mode, struct acl *acl, accmode_t accmode)
{
accmode_t dac_granted;
int error;

KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));

/*
* Look for a normal, non-privileged way to access the file/directory
* as requested. If it exists, go with that.
*/

dac_granted = 0;

/* Check the owner. */
if (kauth_cred_geteuid(cred) == file_uid) {
dac_granted |= VADMIN;
if (file_mode & S_IXUSR)
dac_granted |= VEXEC;
if (file_mode & S_IRUSR)
dac_granted |= VREAD;
if (file_mode & S_IWUSR)
dac_granted |= (VWRITE | VAPPEND);

goto privchk;
}

/* Otherwise, check the groups (first match) */
/* Otherwise, check the groups. */
error = kauth_cred_groupmember(cred, file_gid);
if (error > 0)
return error;
if (error == 0) {
if (file_mode & S_IXGRP)
dac_granted |= VEXEC;
if (file_mode & S_IRGRP)
dac_granted |= VREAD;
if (file_mode & S_IWGRP)
dac_granted |= (VWRITE | VAPPEND);

goto privchk;
}

/* Otherwise, check everyone else. */
if (file_mode & S_IXOTH)
dac_granted |= VEXEC;
if (file_mode & S_IROTH)
dac_granted |= VREAD;
if (file_mode & S_IWOTH)
dac_granted |= (VWRITE | VAPPEND);

privchk:
if ((accmode & dac_granted) == accmode)
return 0;

return (accmode & VADMIN) ? EPERM : EACCES;
}

/*
* Implement a version of genfs_can_access() that understands POSIX.1e ACL
* semantics;
* the access ACL has already been prepared for evaluation by the file system
* and is passed via 'uid', 'gid', and 'acl'. Return 0 on success, else an
* errno value.
*/
int
genfs_can_access_acl_posix1e(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
gid_t file_gid, mode_t file_mode, struct acl *acl, accmode_t accmode)
{
struct acl_entry *acl_other, *acl_mask;
accmode_t dac_granted;
accmode_t acl_mask_granted;
int group_matched, i;
int error;

KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));

/*
* The owner matches if the effective uid associated with the
* credential matches that of the ACL_USER_OBJ entry. While we're
* doing the first scan, also cache the location of the ACL_MASK and
* ACL_OTHER entries, preventing some future iterations.
*/
acl_mask = acl_other = NULL;
for (i = 0; i < acl->acl_cnt; i++) {
struct acl_entry *ae = &acl->acl_entry[i];
switch (ae->ae_tag) {
case ACL_USER_OBJ:
if (kauth_cred_geteuid(cred) != file_uid)
break;
dac_granted = 0;
dac_granted |= VADMIN;
if (ae->ae_perm & ACL_EXECUTE)
dac_granted |= VEXEC;
if (ae->ae_perm & ACL_READ)
dac_granted |= VREAD;
if (ae->ae_perm & ACL_WRITE)
dac_granted |= (VWRITE | VAPPEND);
goto out;

case ACL_MASK:
acl_mask = ae;
break;

case ACL_OTHER:
acl_other = ae;
break;

default:
break;
}
}

/*
* An ACL_OTHER entry should always exist in a valid access ACL. If
* it doesn't, then generate a serious failure. For now, this means
* a debugging message and EPERM, but in the future should probably
* be a panic.
*/
if (acl_other == NULL) {
/*
* XXX This should never happen
*/
printf("%s: ACL_OTHER missing\n", __func__);
return EPERM;
}

/*
* Checks against ACL_USER, ACL_GROUP_OBJ, and ACL_GROUP fields are
* masked by an ACL_MASK entry, if any. As such, first identify the
* ACL_MASK field, then iterate through identifying potential user
* matches, then group matches. If there is no ACL_MASK, assume that
* the mask allows all requests to succeed.
*/
if (acl_mask != NULL) {
acl_mask_granted = 0;
if (acl_mask->ae_perm & ACL_EXECUTE)
acl_mask_granted |= VEXEC;
if (acl_mask->ae_perm & ACL_READ)
acl_mask_granted |= VREAD;
if (acl_mask->ae_perm & ACL_WRITE)
acl_mask_granted |= (VWRITE | VAPPEND);
} else
acl_mask_granted = VEXEC | VREAD | VWRITE | VAPPEND;

/*
* Check ACL_USER ACL entries. There will either be one or no
* matches; if there is one, we accept or rejected based on the
* match; otherwise, we continue on to groups.
*/
for (i = 0; i < acl->acl_cnt; i++) {
struct acl_entry *ae = &acl->acl_entry[i];
switch (ae->ae_tag) {
case ACL_USER:
if (kauth_cred_geteuid(cred) != ae->ae_id)
break;
dac_granted = 0;
if (ae->ae_perm & ACL_EXECUTE)
dac_granted |= VEXEC;
if (ae->ae_perm & ACL_READ)
dac_granted |= VREAD;
if (ae->ae_perm & ACL_WRITE)
dac_granted |= (VWRITE | VAPPEND);
dac_granted &= acl_mask_granted;
goto out;
}
}

/*
* Group match is best-match, not first-match, so find a "best"
* match. Iterate across, testing each potential group match. Make
* sure we keep track of whether we found a match or not, so that we
* know if we should try again with any available privilege, or if we
* should move on to ACL_OTHER.
*/
group_matched = 0;
for (i = 0; i < acl->acl_cnt; i++) {
struct acl_entry *ae = &acl->acl_entry[i];
switch (ae->ae_tag) {
case ACL_GROUP_OBJ:
error = kauth_cred_groupmember(cred, file_gid);
if (error > 0)
return error;
if (error)
break;
dac_granted = 0;
if (ae->ae_perm & ACL_EXECUTE)
dac_granted |= VEXEC;
if (ae->ae_perm & ACL_READ)
dac_granted |= VREAD;
if (ae->ae_perm & ACL_WRITE)
dac_granted |= (VWRITE | VAPPEND);
dac_granted &= acl_mask_granted;

if ((accmode & dac_granted) == accmode)
return 0;

group_matched = 1;
break;

case ACL_GROUP:
error = kauth_cred_groupmember(cred, ae->ae_id);
if (error > 0)
return error;
if (error)
break;
dac_granted = 0;
if (ae->ae_perm & ACL_EXECUTE)
dac_granted |= VEXEC;
if (ae->ae_perm & ACL_READ)
dac_granted |= VREAD;
if (ae->ae_perm & ACL_WRITE)
dac_granted |= (VWRITE | VAPPEND);
dac_granted &= acl_mask_granted;

if ((accmode & dac_granted) == accmode)
return 0;

group_matched = 1;
break;

default:
break;
}
}

if (group_matched == 1) {
/*
* There was a match, but it did not grant rights via pure
* DAC. Try again, this time with privilege.
*/
for (i = 0; i < acl->acl_cnt; i++) {
struct acl_entry *ae = &acl->acl_entry[i];
switch (ae->ae_tag) {
case ACL_GROUP_OBJ:
error = kauth_cred_groupmember(cred, file_gid);
if (error > 0)
return error;
if (error)
break;
dac_granted = 0;
if (ae->ae_perm & ACL_EXECUTE)
dac_granted |= VEXEC;
if (ae->ae_perm & ACL_READ)
dac_granted |= VREAD;
if (ae->ae_perm & ACL_WRITE)
dac_granted |= (VWRITE | VAPPEND);
dac_granted &= acl_mask_granted;
goto out;

case ACL_GROUP:
error = kauth_cred_groupmember(cred, ae->ae_id);
if (error > 0)
return error;
if (error)
break;
dac_granted = 0;
if (ae->ae_perm & ACL_EXECUTE)
dac_granted |= VEXEC;
if (ae->ae_perm & ACL_READ)
dac_granted |= VREAD;
if (ae->ae_perm & ACL_WRITE)
dac_granted |= (VWRITE | VAPPEND);
dac_granted &= acl_mask_granted;

goto out;
default:
break;
}
}
/*
* Even with privilege, group membership was not sufficient.
* Return failure.
*/
dac_granted = 0;
goto out;
}

/*
* Fall back on ACL_OTHER. ACL_MASK is not applied to ACL_OTHER.
*/
dac_granted = 0;
if (acl_other->ae_perm & ACL_EXECUTE)
dac_granted |= VEXEC;
if (acl_other->ae_perm & ACL_READ)
dac_granted |= VREAD;
if (acl_other->ae_perm & ACL_WRITE)
dac_granted |= (VWRITE | VAPPEND);

out:
if ((accmode & dac_granted) == accmode)
return 0;
return (accmode & VADMIN) ? EPERM : EACCES;
}

static struct {
accmode_t accmode;
int mask;
} accmode2mask[] = {
{ VREAD, ACL_READ_DATA },
{ VWRITE, ACL_WRITE_DATA },
{ VAPPEND, ACL_APPEND_DATA },
{ VEXEC, ACL_EXECUTE },
{ VREAD_NAMED_ATTRS, ACL_READ_NAMED_ATTRS },
{ VWRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS },
{ VDELETE_CHILD, ACL_DELETE_CHILD },
{ VREAD_ATTRIBUTES, ACL_READ_ATTRIBUTES },
{ VWRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES },
{ VDELETE, ACL_DELETE },
{ VREAD_ACL, ACL_READ_ACL },
{ VWRITE_ACL, ACL_WRITE_ACL },
{ VWRITE_OWNER, ACL_WRITE_OWNER },
{ VSYNCHRONIZE, ACL_SYNCHRONIZE },
{ 0, 0 },
};

static int
_access_mask_from_accmode(accmode_t accmode)
{
int access_mask = 0, i;

for (i = 0; accmode2mask[i].accmode != 0; i++) {
if (accmode & accmode2mask[i].accmode)
access_mask |= accmode2mask[i].mask;
}

/*
* VAPPEND is just a modifier for VWRITE; if the caller asked
* for 'VAPPEND | VWRITE', we want to check for ACL_APPEND_DATA only.
*/
if (access_mask & ACL_APPEND_DATA)
access_mask &= ~ACL_WRITE_DATA;

return (access_mask);
}

/*
* Return 0, iff access is allowed, 1 otherwise.
*/
static int
_acl_denies(const struct acl *aclp, int access_mask, kauth_cred_t cred,
int file_uid, int file_gid, int *denied_explicitly)
{
int i, error;
const struct acl_entry *ae;

if (denied_explicitly != NULL)
*denied_explicitly = 0;

KASSERT(aclp->acl_cnt <= ACL_MAX_ENTRIES);

for (i = 0; i < aclp->acl_cnt; i++) {
ae = &(aclp->acl_entry[i]);

if (ae->ae_entry_type != ACL_ENTRY_TYPE_ALLOW &&
ae->ae_entry_type != ACL_ENTRY_TYPE_DENY)
continue;
if (ae->ae_flags & ACL_ENTRY_INHERIT_ONLY)
continue;
switch (ae->ae_tag) {
case ACL_USER_OBJ:
if (kauth_cred_geteuid(cred) != file_uid)
continue;
break;
case ACL_USER:
if (kauth_cred_geteuid(cred) != ae->ae_id)
continue;
break;
case ACL_GROUP_OBJ:
error = kauth_cred_groupmember(cred, file_gid);
if (error > 0)
return error;
if (error != 0)
continue;
break;
case ACL_GROUP:
error = kauth_cred_groupmember(cred, ae->ae_id);
if (error > 0)
return error;
if (error != 0)
continue;
break;
default:
KASSERT(ae->ae_tag == ACL_EVERYONE);
}

if (ae->ae_entry_type == ACL_ENTRY_TYPE_DENY) {
if (ae->ae_perm & access_mask) {
if (denied_explicitly != NULL)
*denied_explicitly = 1;
return (1);
}
}

access_mask &= ~(ae->ae_perm);
if (access_mask == 0)
return (0);
}

if (access_mask == 0)
return (0);

return (1);
}

int
genfs_can_access_acl_nfs4(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
gid_t file_gid, mode_t file_mode, struct acl *aclp, accmode_t accmode)
{
int denied, explicitly_denied, access_mask, is_directory,
must_be_owner = 0;
file_mode = 0;

KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND |
VEXPLICIT_DENY | VREAD_NAMED_ATTRS | VWRITE_NAMED_ATTRS |
VDELETE_CHILD | VREAD_ATTRIBUTES | VWRITE_ATTRIBUTES | VDELETE |
VREAD_ACL | VWRITE_ACL | VWRITE_OWNER | VSYNCHRONIZE)) == 0);
KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));

if (accmode & VADMIN)
must_be_owner = 1;

/*
* Ignore VSYNCHRONIZE permission.
*/
accmode &= ~VSYNCHRONIZE;

access_mask = _access_mask_from_accmode(accmode);

if (vp && vp->v_type == VDIR)
is_directory = 1;
else
is_directory = 0;

/*
* File owner is always allowed to read and write the ACL
* and basic attributes. This is to prevent a situation
* where user would change ACL in a way that prevents him
* from undoing the change.
*/
if (kauth_cred_geteuid(cred) == file_uid)
access_mask &= ~(ACL_READ_ACL | ACL_WRITE_ACL |
ACL_READ_ATTRIBUTES | ACL_WRITE_ATTRIBUTES);

/*
* Ignore append permission for regular files; use write
* permission instead.
*/
if (!is_directory && (access_mask & ACL_APPEND_DATA)) {
access_mask &= ~ACL_APPEND_DATA;
access_mask |= ACL_WRITE_DATA;
}

denied = _acl_denies(aclp, access_mask, cred, file_uid, file_gid,
&explicitly_denied);

if (must_be_owner) {
if (kauth_cred_geteuid(cred) != file_uid)
denied = EPERM;
}

/*
* For VEXEC, ensure that at least one execute bit is set for
* non-directories. We have to check the mode here to stay
* consistent with execve(2). See the test in
* exec_check_permissions().
*/
__acl_nfs4_sync_mode_from_acl(&file_mode, aclp);
if (!denied && !is_directory && (accmode & VEXEC) &&
(file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
denied = EACCES;

if (!denied)
return (0);

/*
* Access failed. Iff it was not denied explicitly and
* VEXPLICIT_DENY flag was specified, allow access.
*/
if ((accmode & VEXPLICIT_DENY) && explicitly_denied == 0)
return (0);

accmode &= ~VEXPLICIT_DENY;

if (accmode & (VADMIN_PERMS | VDELETE_CHILD | VDELETE))
denied = EPERM;
else
denied = EACCES;

return (denied);
}

/*
* Common routine to check if chmod() is allowed.
*
* Policy:
* - You must own the file, and
* - You must not set the "sticky" bit (meaningless, see chmod(2))
* - You must be a member of the group if you're trying to set the
* SGIDf bit
*
* vp - vnode of the file-system object
* cred - credentials of the invoker
* cur_uid, cur_gid - current uid/gid of the file-system object
* new_mode - new mode for the file-system object
*
* Returns 0 if the change is allowed, or an error value otherwise.
*/
int
genfs_can_chmod(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
gid_t cur_gid, mode_t new_mode)
{
int error;

/*
* To modify the permissions on a file, must possess VADMIN
* for that file.
*/
if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred)) != 0)
return (error);

/*
* Unprivileged users can't set the sticky bit on files.
*/
if ((vp->v_type != VDIR) && (new_mode & S_ISTXT))
return (EFTYPE);

/*
* If the invoker is trying to set the SGID bit on the file,
* check group membership.
*/
if (new_mode & S_ISGID) {
int ismember;

error = kauth_cred_ismember_gid(cred, cur_gid,
&ismember);
if (error || !ismember)
return (EPERM);
}

/*
* Deny setting setuid if we are not the file owner.
*/
if ((new_mode & S_ISUID) && cur_uid != kauth_cred_geteuid(cred))
return (EPERM);

return (0);
}

/*
* Common routine to check if chown() is allowed.
*
* Policy:
* - You must own the file, and
* - You must not try to change ownership, and
* - You must be member of the new group
*
* vp - vnode
* cred - credentials of the invoker
* cur_uid, cur_gid - current uid/gid of the file-system object
* new_uid, new_gid - target uid/gid of the file-system object
*
* Returns 0 if the change is allowed, or an error value otherwise.
*/
int
genfs_can_chown(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
gid_t cur_gid, uid_t new_uid, gid_t new_gid)
{
int error, ismember;

/*
* To modify the ownership of a file, must possess VADMIN for that
* file.
*/
if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred)) != 0)
return (error);

/*
* You can only change ownership of a file if:
* You own the file and...
*/
if (kauth_cred_geteuid(cred) == cur_uid) {
/*
* You don't try to change ownership, and...
*/
if (new_uid != cur_uid)
return (EPERM);

/*
* You don't try to change group (no-op), or...
*/
if (new_gid == cur_gid)
return (0);

/*
* Your effective gid is the new gid, or...
*/
if (kauth_cred_getegid(cred) == new_gid)
return (0);

/*
* The new gid is one you're a member of.
*/
ismember = 0;
error = kauth_cred_ismember_gid(cred, new_gid,
&ismember);
if (!error && ismember)
return (0);
}

return (EPERM);
}

int
genfs_can_chtimes(vnode_t *vp, kauth_cred_t cred, uid_t owner_uid,
u_int vaflags)
{
int error;
/*
* Grant permission if the caller is the owner of the file, or
* the super-user, or has ACL_WRITE_ATTRIBUTES permission on
* on the file. If the time pointer is null, then write
* permission on the file is also sufficient.
*
* From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes:
* A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
* will be allowed to set the times [..] to the current
* server time.
*/
error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred);
if (error != 0 && (vaflags & VA_UTIMES_NULL) != 0)
error = VOP_ACCESS(vp, VWRITE, cred);

if (error)
return (vaflags & VA_UTIMES_NULL) == 0 ? EPERM : EACCES;

return 0;
}

/*
* Common routine to check if chflags() is allowed.
*
* Policy:
* - You must own the file, and
* - You must not change system flags, and
* - You must not change flags on character/block devices.
*
* vp - vnode
* cred - credentials of the invoker
* owner_uid - uid of the file-system object
* changing_sysflags - true if the invoker wants to change system flags
*/
int
genfs_can_chflags(vnode_t *vp, kauth_cred_t cred,
uid_t owner_uid, bool changing_sysflags)
{

/* The user must own the file. */
if (kauth_cred_geteuid(cred) != owner_uid) {
return EPERM;
}

if (changing_sysflags) {
return EPERM;
}

/*
* Unprivileged users cannot change the flags on devices, even if they
* own them.
*/
if (vp->v_type == VCHR || vp->v_type == VBLK) {
return EPERM;
}

return 0;
}

/*
* Common "sticky" policy.
*
* When a directory is "sticky" (as determined by the caller), this
* function may help implementing the following policy:
* - Renaming a file in it is only possible if the user owns the directory
* or the file being renamed.
* - Deleting a file from it is only possible if the user owns the
* directory or the file being deleted.
*/
int
genfs_can_sticky(vnode_t *vp, kauth_cred_t cred, uid_t dir_uid, uid_t file_uid)
{
if (kauth_cred_geteuid(cred) != dir_uid &&
kauth_cred_geteuid(cred) != file_uid)
return EPERM;

return 0;
}

int
genfs_can_extattr(vnode_t *vp, kauth_cred_t cred, accmode_t accmode,
int attrnamespace)
{
/*
* Kernel-invoked always succeeds.
*/
if (cred == NOCRED)
return 0;

switch (attrnamespace) {
case EXTATTR_NAMESPACE_SYSTEM:
return kauth_authorize_system(cred, KAUTH_SYSTEM_FS_EXTATTR,
0, vp->v_mount, NULL, NULL);
case EXTATTR_NAMESPACE_USER:
return VOP_ACCESS(vp, accmode, cred);
default:
return EPERM;
}
}

int
genfs_access(void *v)
{
struct vop_access_args *ap = v;

KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
VAPPEND)) == 0);

return VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred);
}

int
genfs_accessx(void *v)
{
struct vop_accessx_args *ap = v;
int error;
accmode_t accmode = ap->a_accmode;
error = vfs_unixify_accmode(&accmode);
if (error != 0)
return error;

if (accmode == 0)
return 0;

return VOP_ACCESS(ap->a_vp, accmode, ap->a_cred);
}

/*
* genfs_pathconf:
*
* Standard implementation of POSIX pathconf, to get information about limits
* for a filesystem.
* Override per filesystem for the case where the filesystem has smaller
* limits.
*/
int
genfs_pathconf(void *v)
{
struct vop_pathconf_args *ap = v;

switch (ap->a_name) {
case _PC_PATH_MAX:
*ap->a_retval = PATH_MAX;
return 0;
case _PC_ACL_EXTENDED:
case _PC_ACL_NFS4:
*ap->a_retval = 0;
return 0;
default:
return EINVAL;
}
}