/*-
* Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
if (PN_ISDIR(opc)) {
op = FUSE_RELEASEDIR;
mode = FREAD;
} else {
op = FUSE_RELEASE;
}
/*
* Destroy the filehandle before sending the
* request to the FUSE filesystem, otherwise
* we may get a second close() while we wait
* for the reply, and we would end up closing
* the same fh twice instead of closng both.
*/
fh = perfuse_get_fh(opc, mode);
perfuse_destroy_fh(pn, fh);
/*
* release_flags may be set to FUSE_RELEASE_FLUSH
* to flush locks. lock_owner must be set in that case
*
* ps_new_msg() is called with NULL creds, which will
* be interpreted as FUSE superuser. We come here from the
* inactive method, which provides no creds, but obviously
* runs with kernel privilege.
*/
pm = ps->ps_new_msg(pu, opc, op, sizeof(*fri), NULL);
fri = GET_INPAYLOAD(ps, pm, fuse_release_in);
fri->fh = fh;
fri->flags = 0;
fri->release_flags = 0;
fri->lock_owner = pnd->pnd_lock_owner;
fri->flags = (fri->lock_owner != 0) ? FUSE_RELEASE_FLUSH : 0;
/*
* pcr is NULL for self open through fsync or readdir.
* In both case, access control is useless, as it was
* done before, at open time.
*/
if (pcr == NULL)
return 0;
static int
sticky_access(puffs_cookie_t opc, struct puffs_node *targ,
const struct puffs_cred *pcr)
{
uid_t uid;
int sticky, owner, parent_owner;
/*
* This covers the case where the kernel requests a DELETE
* or RENAME on its own, and where puffs_cred_getuid would
* return -1. While such a situation should not happen,
* we allow it here.
*
* This also allows root to tamper with other users' files
* that have the sticky bit.
*/
if (puffs_cred_isjuggernaut(pcr))
return 0;
if (puffs_cred_getuid(pcr, &uid) != 0)
DERRX(EX_SOFTWARE, "puffs_cred_getuid fails in %s", __func__);
/*
* Starting with ABI 7.4, inode number 0 means ENOENT,
* with entry_valid / entry_valid_nsec giving negative
* cache timeout (which we do not implement yet).
*/
if (feo->attr.ino == 0) {
ps->ps_destroy_msg(pm);
return ENOENT;
}
/*
* Check for a known node, not reclaimed, with another name.
* It may have been moved, or we can lookup ../
*/
if (((oldpnd = perfuse_node_bynodeid(ps, feo->nodeid)) != NULL) &&
!(oldpnd->pnd_flags & PND_RECLAIMED)) {
/*
* Save the new node name if not ..
*/
if (strncmp(path, "..", len) != 0)
(void)strlcpy(oldpnd->pnd_name,
path, MAXPATHLEN);
pn = oldpnd->pnd_pn;
do {
char *ndp;
size_t reclen;
char name[MAXPATHLEN];
reclen = _DIRENT_RECLEN(dents, fd->namelen);
/*
* Check we do not overflow the output buffer
* struct fuse_dirent is bigger than struct dirent,
* so we should always use fd_len and never reallocate
* later.
* If we have to reallocate, try to double the buffer
* each time so that we do not have to do it too often.
*/
if (written + reclen > dents_len) {
if (dents_len == 0)
dents_len = fd_len;
else
dents_len =
MAX(2 * dents_len, written + reclen);
/*
* Filesystem was mounted without -o use_ino
* Perform a lookup to find it.
*/
if (fd->ino == PERFUSE_UNKNOWN_INO) {
struct puffs_node *pn;
struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
if (strcmp(name, "..") == 0) {
/*
* Avoid breaking out of fs
* by lookup to .. on root
*/
if (pnd->pnd_nodeid == FUSE_ROOT_ID)
fd->ino = FUSE_ROOT_ID;
else
fd->ino = pnd->pnd_parent_nodeid;
} else if (strcmp(name, ".") == 0 ) {
fd->ino = pnd->pnd_nodeid;
} else {
int error;
/*
* Move to the next record.
* fd->off is not the offset, it is an opaque cookie
* given by the filesystem to keep state across multiple
* readdir() operation.
* Use record alignment instead.
*/
len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
#ifdef PERFUSE_DEBUG
if (perfuse_diagflags & PDF_READDIR)
DPRINTF("%s: record at %"PRId64"/0x%"PRIx64" "
"length = %zd/0x%zx. "
"next record at %"PRId64"/0x%"PRIx64" "
"max %zd/0x%zx\n",
__func__, fd_offset, fd_offset, len, len,
fd_offset + len, fd_offset + len,
fd_len, fd_len);
#endif
fd_offset += len;
/*
* Check if next record is still within the packet
* If it is not, we reached the end of the buffer.
*/
if (fd_offset >= fd_len)
break;
int
perfuse_fs_sync(struct puffs_usermount *pu, int waitfor,
const struct puffs_cred *pcr)
{
/*
* FUSE does not seem to have a FS sync callback.
* Maybe do not even register this callback
*/
return puffs_fsnop_sync(pu, waitfor, pcr);
}
/*
* Kernel would kill us if the filesystem returned the parent
* itself. If we want to live, hide that!
*/
if ((opc == (puffs_cookie_t)pn) && (strcmp(pcn->pcn_name, ".") != 0)) {
DERRX(EX_SOFTWARE, "lookup \"%s\" in \"%s\" returned parent",
pcn->pcn_name, perfuse_node_path(ps, opc));
/* NOTREACHED */
error = ESTALE;
goto out;
}
/*
* Check for sticky bit. Unfortunately there is no way to
* do this before creating the puffs_node, since we require
* this operation to get the node owner.
*/
switch (pcn->pcn_nameiop) {
case NAMEI_DELETE: /* FALLTHROUGH */
case NAMEI_RENAME:
error = sticky_access(opc, pn, pcn->pcn_cred);
if (error != 0) {
(void)perfuse_node_reclaim2(pu, pn, 1);
goto out;
}
break;
default:
break;
}
if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
return ENOENT;
node_ref(opc);
/*
* If create is unimplemented: Check that it does not
* already exists, and if not, do mknod and open
*/
ps = puffs_getspecific(pu);
if (ps->ps_flags & PS_NO_CREAT) {
error = node_lookup_common(pu, opc, NULL, pcn->pcn_name,
pcn->pcn_cred, &pn);
if (error == 0) {
(void)perfuse_node_reclaim2(pu, pn, 1);
error = EEXIST;
goto out;
}
/*
* FUSE does the open at create time, while
* NetBSD will open in a subsequent operation.
* We need to open now, in order to retain FUSE
* semantics. The calling process will not get
* a file descriptor before the kernel sends
* the open operation.
*/
error = perfuse_node_open(pu, (puffs_cookie_t)pn,
FWRITE, pcn->pcn_cred);
goto out;
}
name = pcn->pcn_name;
namelen = pcn->pcn_namelen + 1;
len = sizeof(*fci) + namelen;
/*
* flags should use O_WRONLY instead of O_RDWR, but it
* breaks when the caller tries to read from file.
*
* mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
*/
pm = ps->ps_new_msg(pu, opc, FUSE_CREATE, len, pcn->pcn_cred);
fci = GET_INPAYLOAD(ps, pm, fuse_create_in);
fci->flags = O_CREAT | O_TRUNC | O_RDWR;
fci->mode = vap->va_mode | VTTOIF(vap->va_type);
fci->umask = 0; /* Seems unused by libfuse */
(void)strlcpy((char*)(void *)(fci + 1), name, namelen);
len = sizeof(*feo) + sizeof(*foo);
if ((error = xchg_msg(pu, opc, pm, len, wait_reply)) != 0) {
/*
* create is unimplemented, remember it for later,
* and start over using mknod and open instead.
*/
if (error == ENOSYS) {
ps->ps_flags |= PS_NO_CREAT;
error = perfuse_node_create(pu, opc, pni, pcn, vap);
}
/*
* Save the file handle and inode in node private data
* so that we can reuse it later
*/
pn = perfuse_new_pn(pu, name, opc);
perfuse_new_fh((puffs_cookie_t)pn, foo->fh, FWRITE);
PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
PERFUSE_NODE_DATA(pn)->pnd_fuse_nlookup++;
PERFUSE_NODE_DATA(pn)->pnd_puffs_nlookup++;
perfuse_node_cache(ps, pn);
if (PN_ISDIR(opc))
op = FUSE_OPENDIR;
else
op = FUSE_OPEN;
/*
* libfuse docs says
* - O_CREAT and O_EXCL should never be set.
* - O_TRUNC may be used if mount option atomic_o_trunc is used XXX
*
* O_APPEND makes no sense since FUSE always sends
* the file offset for write operations. If the
* filesystem uses pwrite(), O_APPEND would cause
* the offset to be ignored and cause file corruption.
*/
mode &= ~(O_CREAT|O_EXCL|O_APPEND);
/*
* Do not open twice, and do not reopen for reading
* if we already have write handle.
*/
switch (mode & (FREAD|FWRITE)) {
case FREAD:
if (pnd->pnd_flags & (PND_RFH|PND_WFH))
goto out;
break;
case FWRITE:
if (pnd->pnd_flags & PND_WFH)
goto out;
break;
case FREAD|FWRITE:
if (pnd->pnd_flags & PND_WFH)
goto out;
/*
* Corner case: if already open for reading (PND_RFH)
* and re-opening FREAD|FWRITE, we need to reopen,
* but only for writing. Note the change on mode
* will only affect perfuse_new_fh()
*/
if (pnd->pnd_flags & PND_RFH)
mode &= ~FREAD;
break;
default:
DWARNX("open without either FREAD nor FWRITE");
error = EPERM;
goto out;
}
/*
* Queue open on a node so that we do not open
* twice. This would be better with read and
* write distinguished.
*/
while (pnd->pnd_flags & PND_INOPEN)
requeue_request(pu, opc, PCQ_OPEN);
pnd->pnd_flags |= PND_INOPEN;
/*
* Convert PUFFS mode to FUSE mode: convert FREAD/FWRITE
* to O_RDONLY/O_WRONLY while perserving the other options.
*/
fmode = mode & ~(FREAD|FWRITE);
fmode |= (mode & FWRITE) ? O_RDWR : O_RDONLY;
/* ARGSUSED0 */
int
perfuse_node_close(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
const struct puffs_cred *pcr)
{
struct perfuse_node_data *pnd;
pnd = PERFUSE_NODE_DATA(opc);
if (!(pnd->pnd_flags & PND_OPEN))
return EBADF;
/*
* Actual close is postponed at inactive time.
*/
return 0;
}
int
perfuse_node_access(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
const struct puffs_cred *pcr)
{
perfuse_msg_t *pm;
struct perfuse_state *ps;
struct fuse_access_in *fai;
int error;
if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
return ENOENT;
node_ref(opc);
/*
* If we previously detected the filesystem does not
* implement access(), short-circuit the call and skip
* to libpuffs access() emulation.
*/
ps = puffs_getspecific(pu);
if (ps->ps_flags & PS_NO_ACCESS) {
const struct vattr *vap;
if ((pnd->pnd_flags & PND_REMOVED) && !(pnd->pnd_flags & PND_OPEN))
return ENOENT;
node_ref(opc);
/*
* Serialize size access, see comment in perfuse_node_setattr().
*/
while (pnd->pnd_flags & PND_INRESIZE)
requeue_request(pu, opc, PCQ_RESIZE);
pnd->pnd_flags |= PND_INRESIZE;
ps = puffs_getspecific(pu);
/*
* FUSE_GETATTR_FH must be set in fgi->flags
* if we use for fgi->fh
*/
pm = ps->ps_new_msg(pu, opc, FUSE_GETATTR, sizeof(*fgi), pcr);
fgi = GET_INPAYLOAD(ps, pm, fuse_getattr_in);
fgi->getattr_flags = 0;
fgi->dummy = 0;
fgi->fh = FUSE_UNKNOWN_FH;
/*
* We set birthtime, flags, filerev,vaflags to 0.
* This seems the best bet, since the information is
* not available from filesystem.
*/
fuse_attr_to_vap(ps, vap, &fao->attr);
/*
* The only operation we can do once the file is removed
* is to resize it, and we can do it only if it is open.
* Do not even send the operation to the filesystem: the
* file is not there anymore.
*/
if (pnd->pnd_flags & PND_REMOVED) {
if (!(pnd->pnd_flags & PND_OPEN))
return ENOENT;
/*
* Check for permission to change size
* It is always allowed if we already have a write file handle
*/
if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
!(pnd->pnd_flags & PND_WFH) &&
(error = mode_access(opc, pcr, PUFFS_VWRITE)) != 0)
return error;
/*
* Check for permission to change dates
*/
if (((vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) ||
(vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL)) &&
(puffs_access_times(old_va->va_uid, old_va->va_gid,
old_va->va_mode, 0, pcr) != 0))
return EPERM;
/*
* Check for permission to change owner and group
*/
if (((vap->va_uid != (uid_t)PUFFS_VNOVAL) ||
(vap->va_gid != (gid_t)PUFFS_VNOVAL)) &&
(puffs_access_chown(old_va->va_uid, old_va->va_gid,
vap->va_uid, vap->va_gid, pcr)) != 0)
return EPERM;
/*
* Check for sticky bit on non-directory by non root user
*/
if ((vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
(vap->va_mode & S_ISTXT) && (old_va->va_type != VDIR) &&
!puffs_cred_isjuggernaut(pcr))
return EFTYPE;
/*
* Check for permission to change permissions
*/
if ((vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
(puffs_access_chmod(old_va->va_uid, old_va->va_gid,
old_va->va_type, vap->va_mode, pcr)) != 0)
return EPERM;
/*
* fchmod() sets mode and fh, and it may carry
* a resize as well. That may break if the
* filesystem does chmod then resize, and fails
* because it does not have permission anymore.
* We work this around by splitting into two setattr.
*/
if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
(vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
(fh != FUSE_UNKNOWN_FH)) {
struct vattr resize_va;
/*
* Serialize anything that can touch file size
* to avoid reordered GETATTR and SETATTR.
* Out of order SETATTR can report stale size,
* which will cause the kernel to truncate the file.
* XXX Probably useless now we have a lock on GETATTR
*/
while (pnd->pnd_flags & PND_INRESIZE)
requeue_request(pu, opc, PCQ_RESIZE);
pnd->pnd_flags |= PND_INRESIZE;
}
/*
* When not sending a time field, still fill with
* current value, as the filesystem may just reset
* the field to Epoch even if fsi->valid bit is
* not set (GlusterFS does that).
*/
if (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) {
fsi->atime = vap->va_atime.tv_sec;
fsi->atimensec = (uint32_t)vap->va_atime.tv_nsec;
fsi->valid |= FUSE_FATTR_ATIME;
} else {
fsi->atime = old_va->va_atime.tv_sec;
fsi->atimensec = (uint32_t)old_va->va_atime.tv_nsec;
}
#ifndef PUFFS_KFLAG_NOFLUSH_META
/*
* ftruncate() sends only va_size, and metadata cache
* flush adds va_atime and va_mtime. Some FUSE
* filesystems will attempt to detect ftruncate by
* checking for FATTR_SIZE being set without
* FATTR_UID|FATTR_GID|FATTR_ATIME|FATTR_MTIME|FATTR_MODE
*
* Try to adapt and remove FATTR_ATIME|FATTR_MTIME
* if we suspect a ftruncate().
*/
if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
((vap->va_mode == (mode_t)PUFFS_VNOVAL) &&
(vap->va_uid == (uid_t)PUFFS_VNOVAL) &&
(vap->va_gid == (gid_t)PUFFS_VNOVAL))) {
fsi->atime = 0;
fsi->atimensec = 0;
fsi->mtime = 0;
fsi->mtimensec = 0;
fsi->valid &= ~(FUSE_FATTR_ATIME|FUSE_FATTR_MTIME);
}
/*
* If only atime is changed, discard the operation: it
* happens after read, and in that case the filesystem
* already updated atime. NB: utimes() also change mtime.
*/
if (fsi->valid == FUSE_FATTR_ATIME)
fsi->valid &= ~FUSE_FATTR_ATIME;
#endif /* PUFFS_KFLAG_NOFLUSH_META */
/*
* If nothing remain, discard the operation.
*/
if (!(fsi->valid & (FUSE_FATTR_SIZE|FUSE_FATTR_ATIME|FUSE_FATTR_MTIME|
FUSE_FATTR_MODE|FUSE_FATTR_UID|FUSE_FATTR_GID))) {
error = 0;
ps->ps_destroy_msg(pm);
goto out;
}
/*
* Do not honour FAF when changing size. How do
* you want such a thing to work?
*/
reply = wait_reply;
#ifdef PUFFS_SETATTR_FAF
if ((xflag & PUFFS_SETATTR_FAF) && !(fsi->valid & FUSE_FATTR_SIZE))
reply = no_reply;
#endif
if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), reply)) != 0)
goto out;
if (reply == no_reply)
goto out;
/*
* Copy back the new values
*/
fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
int
perfuse_node_poll(struct puffs_usermount *pu, puffs_cookie_t opc, int *events)
{
struct perfuse_state *ps;
perfuse_msg_t *pm;
struct fuse_poll_in *fpi;
struct fuse_poll_out *fpo;
int error;
node_ref(opc);
ps = puffs_getspecific(pu);
/*
* kh is set if FUSE_POLL_SCHEDULE_NOTIFY is set.
*
* XXX ps_new_msg() is called with NULL creds, which will
* be interpreted as FUSE superuser. We have no way to
* know the requesting process' credential, but since poll
* is supposed to operate on a file that has been open,
* permission should have already been checked at open time.
* That still may breaks on filesystems that provides odd
* semantics.
*/
pm = ps->ps_new_msg(pu, opc, FUSE_POLL, sizeof(*fpi), NULL);
fpi = GET_INPAYLOAD(ps, pm, fuse_poll_in);
fpi->fh = PN_ISDIR(opc) ? FUSE_UNKNOWN_FH : perfuse_get_fh(opc, FREAD);
fpi->kh = 0;
fpi->flags = 0;
/*
* No need to sync a removed node
*/
if (pnd->pnd_flags & PND_REMOVED)
return 0;
/*
* We do not sync closed files. They have been
* sync at inactive time already.
*/
if (!(pnd->pnd_flags & PND_OPEN))
return 0;
node_ref(opc);
if (PN_ISDIR(opc))
op = FUSE_FSYNCDIR;
else /* VREG but also other types such as VLNK */
op = FUSE_FSYNC;
/*
* Do not sync if there are no change to sync
* XXX remove that test on files if we implement mmap
*/
#ifdef PERFUSE_DEBUG
if (perfuse_diagflags & PDF_SYNC)
DPRINTF("%s: TEST opc = %p, file = \"%s\" is %sdirty\n",
__func__, (void*)opc, perfuse_node_path(ps, opc),
pnd->pnd_flags & PND_DIRTY ? "" : "not ");
#endif
if (!(pnd->pnd_flags & PND_DIRTY))
goto out;
/*
* It seems NetBSD can call fsync without open first
* glusterfs complain in such a situation:
* "FSYNC() ERR => -1 (Invalid argument)"
* The file will be closed at inactive time.
*
* We open the directory for reading in order to sync.
* This sounds rather counterintuitive, but it works.
*/
if (!(pnd->pnd_flags & PND_WFH)) {
if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
goto out;
}
/*
* Await for all operations on the deleted node to drain,
* as the filesystem may be confused to have it deleted
* during a getattr
*/
while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
requeue_request(pu, targ, PCQ_AFTERXCHG);
ps = puffs_getspecific(pu);
pnd = PERFUSE_NODE_DATA(opc);
name = pcn->pcn_name;
len = pcn->pcn_namelen + 1;
/*
* Await for all operations on the deleted node to drain,
* as the filesystem may be confused to have it deleted
* during a getattr
*/
if ((struct puffs_node *)targ != NULL) {
node_ref(targ);
while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
requeue_request(pu, targ, PCQ_AFTERXCHG);
} else {
while (PERFUSE_NODE_DATA(src)->pnd_inxchg)
requeue_request(pu, src, PCQ_AFTERXCHG);
}
if ((pnd->pnd_flags & PND_REMOVED) ||
(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
return ENOENT;
/*
* Attempt to rmdir dir/.. should raise ENOTEMPTY
*/
if (PERFUSE_NODE_DATA(targ)->pnd_nodeid == pnd->pnd_parent_nodeid)
return ENOTEMPTY;
node_ref(opc);
node_ref(targ);
/*
* Await for all operations on the deleted node to drain,
* as the filesystem may be confused to have it deleted
* during a getattr
*/
while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
requeue_request(pu, targ, PCQ_AFTERXCHG);
ps = puffs_getspecific(pu);
name = pcn->pcn_name;
len = pcn->pcn_namelen + 1;
/*
* readdir state is kept at node level, and several readdir
* requests can be issued at the same time on the same node.
* We need to queue requests so that only one is in readdir
* code at the same time.
*/
pnd = PERFUSE_NODE_DATA(opc);
while (pnd->pnd_flags & PND_INREADDIR)
requeue_request(pu, opc, PCQ_READDIR);
pnd->pnd_flags |= PND_INREADDIR;
#ifdef PERFUSE_DEBUG
if (perfuse_diagflags & PDF_READDIR)
DPRINTF("%s: READDIR opc = %p enter critical section\n",
__func__, (void *)opc);
#endif
/*
* Re-initialize pnd->pnd_fd_cookie on the first readdir for a node
*/
if (*readoff == 0)
pnd->pnd_fd_cookie = 0;
/*
* Do we already have the data buffered?
*/
if (pnd->pnd_dirent != NULL)
goto out;
pnd->pnd_dirent_len = 0;
/*
* It seems NetBSD can call readdir without open first
* libfuse will crash if it is done that way, hence open first.
*/
if (!(pnd->pnd_flags & PND_OPEN)) {
if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
goto out;
}
/*
* There are many puffs_framebufs calls later,
* therefore foh will not be valid for a long time.
* Just get the length and forget it.
*/
foh = GET_OUTHDR(ps, pm);
foh_len = foh->len;
/*
* Empty read: we reached the end of the buffer.
*/
if (foh_len == sizeof(*foh)) {
ps->ps_destroy_msg(pm);
*eofflag = 1;
break;
}
/*
* Check for corrupted message.
*/
if (foh_len < sizeof(*foh) + sizeof(*fd)) {
ps->ps_destroy_msg(pm);
DWARNX("readdir reply too short");
error = EIO;
goto out;
}
/*
* The fd->off field is used as a cookie for
* resuming the next readdir() where this one was left.
*/
pnd->pnd_fd_cookie = readdir_last_cookie(fd, fd_len);
ps->ps_destroy_msg(pm);
} while (1 /* CONSTCOND */);
if (pnd->pnd_all_fd != NULL) {
if (fuse_to_dirent(pu, opc, pnd->pnd_all_fd,
pnd->pnd_all_fd_len) == -1)
error = EIO;
}
foh = GET_OUTHDR(ps, pm);
len = foh->len - sizeof(*foh);
if (len > *linklen)
DERRX(EX_PROTOCOL, "path len = %zd too long", len);
if (len == 0)
DERRX(EX_PROTOCOL, "path len = %zd too short", len);
/*
* Never forget the root.
*/
if (pnd->pnd_nodeid == FUSE_ROOT_ID)
return 0;
#ifdef PERFUSE_DEBUG
if (perfuse_diagflags & PDF_RECLAIM)
DPRINTF("%s (nodeid %"PRId64") reclaimed, nlookup = %d/%d\n",
perfuse_node_path(ps, opc), pnd->pnd_nodeid,
nlookup, pnd->pnd_puffs_nlookup);
#endif
/*
* The kernel tells us how many lookups it made, which allows
* us to detect that we have an uncompleted lookup and that the
* node should not disappear.
*/
pnd->pnd_puffs_nlookup -= nlookup;
if (pnd->pnd_puffs_nlookup > 0)
return 0;
/*
* Purge any activity on the node, while checking
* that it remains eligible for a reclaim.
*/
while (pnd->pnd_ref > 1)
requeue_request(pu, opc, PCQ_REF);
#ifdef PERFUSE_DEBUG
if ((pnd->pnd_flags & PND_OPEN) ||
!TAILQ_EMPTY(&pnd->pnd_pcq))
DERRX(EX_SOFTWARE, "%s: opc = %p \"%s\": still open",
__func__, opc, pnd->pnd_name);
/*
* Send the FORGET message
*
* ps_new_msg() is called with NULL creds, which will
* be interpreted as FUSE superuser. This is obviously
* fine since we operate with kernel creds here.
*/
pm = ps->ps_new_msg(pu, opc, FUSE_FORGET,
sizeof(*ffi), NULL);
ffi = GET_INPAYLOAD(ps, pm, fuse_forget_in);
ffi->nlookup = pnd->pnd_fuse_nlookup;
/*
* No reply is expected, pm is freed in xchg_msg
*/
(void)xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, no_reply);
int
perfuse_node_inactive(struct puffs_usermount *pu, puffs_cookie_t opc)
{
struct perfuse_node_data *pnd;
int error;
if (opc == 0)
return 0;
pnd = PERFUSE_NODE_DATA(opc);
if (!(pnd->pnd_flags & (PND_OPEN|PND_REMOVED)))
return 0;
node_ref(opc);
/*
* Make sure all operation are finished
* There can be an ongoing write. Other
* operation wait for all data before
* the close/inactive.
*/
while (pnd->pnd_flags & PND_INWRITE)
requeue_request(pu, opc, PCQ_AFTERWRITE);
/*
* The inactive operation may be cancelled,
* If no open is in progress, set PND_INOPEN
* so that a new open will be queued.
*/
if (pnd->pnd_flags & PND_INOPEN)
goto out;
pnd->pnd_flags |= PND_INOPEN;
/*
* Sync data
*/
if (pnd->pnd_flags & PND_DIRTY) {
if ((error = perfuse_node_fsync(pu, opc, NULL, 0, 0, 0)) != 0)
DWARN("%s: perfuse_node_fsync failed error = %d",
__func__, error);
}
/*
* Close handles
*/
if (pnd->pnd_flags & PND_WFH) {
if ((error = perfuse_node_close_common(pu, opc, FWRITE)) != 0)
DWARN("%s: close write FH failed error = %d",
__func__, error);
}
if (pnd->pnd_flags & PND_RFH) {
if ((error = perfuse_node_close_common(pu, opc, FREAD)) != 0)
DWARN("%s: close read FH failed error = %d",
__func__, error);
}
/*
* This will cause a reclaim to be sent
*/
if (pnd->pnd_flags & PND_REMOVED)
puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N1);
break;
default:
DWARN("Unimplemented pathconf for name = %d", name);
error = ENOSYS;
break;
}
return error;
}
int
perfuse_node_advlock(struct puffs_usermount *pu, puffs_cookie_t opc,
void *id, int op, struct flock *fl, int flags)
{
struct perfuse_state *ps;
int fop;
perfuse_msg_t *pm;
uint64_t fh;
struct fuse_lk_in *fli;
struct fuse_out_header *foh;
struct fuse_lk_out *flo;
uint32_t owner;
size_t len;
int error;
node_ref(opc);
/*
* Make sure we do have a filehandle, as the FUSE filesystem
* expect one. E.g.: if we provide none, GlusterFS logs an error
* "0-glusterfs-fuse: xl is NULL"
*
* We need the read file handle if the file is open read only,
* in order to support shared locks on read-only files.
* NB: The kernel always sends advlock for read-only
* files at exit time when the process used lock, see
* sys_exit -> exit1 -> fd_free -> fd_close -> VOP_ADVLOCK
*/
if ((fh = perfuse_get_fh(opc, FREAD)) == FUSE_UNKNOWN_FH) {
error = EBADF;
goto out;
}
/*
* XXX ps_new_msg() is called with NULL creds, which will
* be interpreted as FUSE superuser. We have no way to
* know the requesting process' credential, but since advlock()
* is supposed to operate on a file that has been open(),
* permission should have already been checked at open() time.
*/
pm = ps->ps_new_msg(pu, opc, fop, sizeof(*fli), NULL);
fli = GET_INPAYLOAD(ps, pm, fuse_lk_in);
fli->fh = fh;
fli->owner = (uint64_t)(vaddr_t)id;
fli->lk.start = fl->l_start;
fli->lk.end = fl->l_start + fl->l_len;
fli->lk.type = fl->l_type;
fli->lk.pid = fl->l_pid;
fli->lk_flags = (flags & F_FLOCK) ? FUSE_LK_FLOCK : 0;
foh = GET_OUTHDR(ps, pm);
len = foh->len - sizeof(*foh);
/*
* Save or clear the lock
*/
switch (op) {
case F_GETLK:
if (len != sizeof(*flo))
DERRX(EX_SOFTWARE,
"%s: Unexpected lock reply len %zd",
__func__, len);
/*
* We need to queue write requests in order to avoid
* dequeueing PCQ_AFTERWRITE when there are pending writes.
*/
while (pnd->pnd_flags & PND_INWRITE)
requeue_request(pu, opc, PCQ_WRITE);
pnd->pnd_flags |= PND_INWRITE;
/*
* append flag: re-read the file size so that
* we get the latest value.
*/
if (ioflag & PUFFS_IO_APPEND) {
if ((error = perfuse_node_getattr(pu, opc, vap, pcr)) != 0)
goto out;
offset = vap->va_size;
}
/*
* Serialize size access, see comment in perfuse_node_setattr().
*/
if ((u_quad_t)offset + *resid > vap->va_size) {
while (pnd->pnd_flags & PND_INRESIZE)
requeue_request(pu, opc, PCQ_RESIZE);
pnd->pnd_flags |= PND_INRESIZE;
inresize = 1;
}
fh = perfuse_get_fh(opc, FWRITE); /* Cannot be VDIR */
do {
size_t max_write;
/*
* There is a writepage flag when data
* is aligned to page size. Use it for
* everything but the data after the last
* page boundary.
*/
max_write = ps->ps_max_write - sizeof(*fwi);
if (inresize) {
#ifdef PERFUSE_DEBUG
if (!(pnd->pnd_flags & PND_INRESIZE))
DERRX(EX_SOFTWARE, "file write grow without resize");
#endif
pnd->pnd_flags &= ~PND_INRESIZE;
(void)dequeue_requests(opc, PCQ_RESIZE, DEQUEUE_ALL);
}
/*
* VOP_PUTPAGE causes FAF write where kernel does not
* check operation result. At least warn if it failed.
*/
#ifdef PUFFS_WRITE_FAF
if (error && (xflag & PUFFS_WRITE_FAF))
DWARN("Data loss caused by FAF write failed on \"%s\"",
pnd->pnd_name);
#endif /* PUFFS_WRITE_FAF */
/*
* If there are no more queued write, we can resume
* an operation awaiting write completion.
*/
pnd->pnd_flags &= ~PND_INWRITE;
if (dequeue_requests(opc, PCQ_WRITE, 1) == 0)
(void)dequeue_requests(opc, PCQ_AFTERWRITE, DEQUEUE_ALL);
/* system namespace attrs are not accessible to non root users */
if (attrns == EXTATTR_NAMESPACE_SYSTEM && !puffs_cred_isjuggernaut(pcr))
return EPERM;
/*
* We just get fuse_getattr_out with list size if we requested
* a null size.
*/
if (resid == NULL) {
fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
if (attrsize != NULL)
*attrsize = fgo->size;
ps->ps_destroy_msg(pm);
error = 0;
goto out;
}
/*
* And with a non null requested size, we get the list just
* after the header
*/
foh = GET_OUTHDR(ps, pm);
np = (char *)(void *)(foh + 1);
len = foh->len - sizeof(*foh);
if (attrsize != NULL)
*attrsize = len;
if (resid != NULL) {
if (*resid < len) {
error = ERANGE;
ps->ps_destroy_msg(pm);
goto out;
}
/* system namespace attrs are not accessible to non root users */
if (attrns == EXTATTR_NAMESPACE_SYSTEM && !puffs_cred_isjuggernaut(pcr))
return EPERM;
ps->ps_destroy_msg(pm);
if (resid)
*resid = 0;
error = 0;
out:
node_rele(opc);
return error;
}
/* ARGSUSED2 */
int
perfuse_node_listextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
int attrns, size_t *attrsize, uint8_t *attrs, size_t *resid, int flag,
const struct puffs_cred *pcr)
{
struct perfuse_state *ps;
perfuse_msg_t *pm;
struct fuse_getxattr_in *fgi;
struct fuse_getxattr_out *fgo;
struct fuse_out_header *foh;
char *np;
size_t len, puffs_len, i, attrlen, outlen;
int error;
/* system namespace attrs are not accessible to non root users */
if (attrns == EXTATTR_NAMESPACE_SYSTEM && !puffs_cred_isjuggernaut(pcr))
return EPERM;
/*
* We just get fuse_getattr_out with list size if we requested
* a null size.
*/
if (resid == NULL) {
fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
if (attrsize != NULL)
*attrsize = fgo->size;
ps->ps_destroy_msg(pm);
error = 0;
goto out;
}
/*
* And with a non null requested size, we get the list just
* after the header
*/
foh = GET_OUTHDR(ps, pm);
np = (char *)(void *)(foh + 1);
puffs_len = foh->len - sizeof(*foh);
if (attrsize != NULL)
*attrsize = puffs_len;
if (attrs != NULL) {
if (*resid < puffs_len) {
error = ERANGE;
ps->ps_destroy_msg(pm);
goto out;
}
outlen = 0;
for (i = 0; i < puffs_len; i += attrlen + 1) {
attrlen = strlen(np + i);
/*
* Filter attributes per namespace
*/
if (!perfuse_ns_match(attrns, np + i))
continue;
#ifdef PUFFS_EXTATTR_LIST_LENPREFIX
/*
* Convert the FUSE reply to length prefixed strings
* if this is what the kernel wants.
*/
if (flag & PUFFS_EXTATTR_LIST_LENPREFIX) {
(void)memcpy(attrs + outlen + 1,
np + i, attrlen);
*(attrs + outlen) = (uint8_t)attrlen;
} else
#endif /* PUFFS_EXTATTR_LIST_LENPREFIX */
(void)memcpy(attrs + outlen, np + i, attrlen + 1);
outlen += attrlen + 1;
}
/* system namespace attrs are not accessible to non root users */
if (attrns == EXTATTR_NAMESPACE_SYSTEM && !puffs_cred_isjuggernaut(pcr))
return EPERM;