/* $NetBSD: ext2fs_vfsops.c,v 1.229 2025/02/16 16:34:01 joe Exp $ */
/*
* Copyright (c) 1989, 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ffs_vfsops.c 8.14 (Berkeley) 11/28/94
* Modified for ext2fs by Manuel Bouyer.
*/
/*
* Copyright (c) 1997 Manuel Bouyer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @(#)ffs_vfsops.c 8.14 (Berkeley) 11/28/94
* Modified for ext2fs by Manuel Bouyer.
*/
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_NODE, "ext2fs",
SYSCTL_DESCR("Linux EXT2FS file system"),
NULL, 0, NULL, 0,
CTL_VFS, 17, CTL_EOL);
/*
* XXX the "17" above could be dynamic, thereby eliminating
* one more instance of the "number to vfs" mapping problem,
* but "17" is the order as taken from sys/mount.h
*/
}
static int
ext2fs_modcmd(modcmd_t cmd, void *arg)
{
int error;
/* Check arguments */
if (args->fspec != NULL) {
/*
* Look up the name and verify that it's sane.
*/
error = namei_simple_user(args->fspec,
NSM_FOLLOW_NOEMULROOT, &devvp);
if (error != 0)
return error;
if (!update) {
/*
* Be sure this is a valid block device
*/
if (devvp->v_type != VBLK)
error = ENOTBLK;
else if (bdevsw_lookup(devvp->v_rdev) == NULL)
error = ENXIO;
} else {
/*
* Be sure we're still naming the same device
* used for our initial mount
*/
ump = VFSTOUFS(mp);
if (devvp != ump->um_devvp) {
if (devvp->v_rdev != ump->um_devvp->v_rdev)
error = EINVAL;
else {
vrele(devvp);
devvp = ump->um_devvp;
vref(devvp);
}
}
}
} else {
if (!update) {
/* New mounts must have a filename for the device */
return EINVAL;
} else {
ump = VFSTOUFS(mp);
devvp = ump->um_devvp;
vref(devvp);
}
}
/*
* If mount by non-root, then verify that user has necessary
* permissions on the device.
*
* Permission to update a mount is checked higher, so here we presume
* updating the mount is okay (for example, as far as securelevel goes)
* which leaves us with the normal check.
*/
if (error == 0) {
accessmode = VREAD;
if (update ?
(mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
(mp->mnt_flag & MNT_RDONLY) == 0)
accessmode |= VWRITE;
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp,
KAUTH_ARG(accessmode));
VOP_UNLOCK(devvp);
}
if (fs->e2fs_fmod != 0) { /* XXX */
fs->e2fs_fmod = 0;
if (fs->e2fs.e2fs_state == 0)
fs->e2fs.e2fs_wtime = time_second;
else
printf("%s: file system not clean; please fsck(8)\n",
mp->mnt_stat.f_mntfromname);
(void) ext2fs_cgupdate(ump, MNT_WAIT);
}
return error;
fail:
vrele(devvp);
return error;
}
/*
* Sanity check the disk vnode content, and copy it over to inode structure.
*/
static int
ext2fs_loadvnode_content(struct m_ext2fs *fs, ino_t ino, struct buf *bp, struct inode *ip)
{
struct ext2fs_dinode *din;
int error = 0;
/*
* Reload all incore data for a filesystem (used after running fsck on
* the root filesystem and finding things to fix). The filesystem must
* be mounted read-only.
*
* Things to do to update the mount:
* 1) invalidate all cached meta-data.
* 2) re-read superblock from disk.
* 3) re-read summary information from disk.
* 4) invalidate all inactive vnodes.
* 5) invalidate all cached file data.
* 6) re-read inode data for all active vnodes.
*/
int
ext2fs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
{
struct vnode *vp, *devvp;
struct inode *ip;
struct buf *bp;
struct m_ext2fs *fs;
struct ext2fs *newfs;
int i, error;
struct ufsmount *ump;
struct vnode_iterator *marker;
if ((mp->mnt_flag & MNT_RDONLY) == 0)
return EINVAL;
/*
* Common code for mount and mountroot
*/
int
ext2fs_mountfs(struct vnode *devvp, struct mount *mp)
{
struct lwp *l = curlwp;
struct ufsmount *ump;
struct buf *bp;
struct ext2fs *fs;
struct m_ext2fs *m_fs;
dev_t dev;
int error, i, ronly;
kauth_cred_t cred;
dev = devvp->v_rdev;
cred = l->l_cred;
/* Flush out any old buffers remaining from a previous use. */
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
VOP_UNLOCK(devvp);
if (error)
return error;
ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
bp = NULL;
ump = NULL;
/* Read the superblock from disk, and swap it directly. */
error = bread(devvp, SBLOCK, SBSIZE, 0, &bp);
if (error)
goto out;
fs = (struct ext2fs *)bp->b_data;
m_fs = kmem_zalloc(sizeof(*m_fs), KM_SLEEP);
e2fs_sbload(fs, &m_fs->e2fs);
brelse(bp, 0);
bp = NULL;
/* Once swapped, validate and fill in the superblock. */
error = ext2fs_sbfill(m_fs, ronly);
if (error) {
kmem_free(m_fs, sizeof(*m_fs));
goto out;
}
m_fs->e2fs_ronly = ronly;
/*
* Get file system statistics.
*/
int
ext2fs_statvfs(struct mount *mp, struct statvfs *sbp)
{
struct ufsmount *ump;
struct m_ext2fs *fs;
uint32_t overhead, overhead_per_group, ngdb;
int i, ngroups;
/*
* Go through the disk queues to initiate sandbagged IO;
* go through the inodes to write those that have been modified;
* initiate the writing of the super block if it has been modified.
*/
int
ext2fs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
{
struct vnode *vp;
struct ufsmount *ump = VFSTOUFS(mp);
struct m_ext2fs *fs;
struct vnode_iterator *marker;
int error, allerror = 0;
/*
* Write back each (modified) inode.
*/
vfs_vnode_iterator_init(mp, &marker);
while ((vp = vfs_vnode_iterator_next(marker, ext2fs_sync_selector,
NULL)))
{
error = vn_lock(vp, LK_EXCLUSIVE);
if (error) {
vrele(vp);
continue;
}
if (vp->v_type == VREG && waitfor == MNT_LAZY)
error = ext2fs_update(vp, NULL, NULL, 0);
else
error = VOP_FSYNC(vp, cred,
waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0);
if (error)
allerror = error;
vput(vp);
}
vfs_vnode_iterator_destroy(marker);
/*
* Force stale file system control information to be flushed.
*/
if (waitfor != MNT_LAZY) {
vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = VOP_FSYNC(ump->um_devvp, cred,
waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0)
allerror = error;
VOP_UNLOCK(ump->um_devvp);
}
/*
* Write back modified superblock.
*/
if (fs->e2fs_fmod != 0) {
fs->e2fs_fmod = 0;
fs->e2fs.e2fs_wtime = time_second;
if ((error = ext2fs_cgupdate(ump, waitfor)))
allerror = error;
}
return allerror;
}
/*
* Load inode from disk and initialize vnode.
*/
static int
ext2fs_init_vnode(struct ufsmount *ump, struct vnode *vp, ino_t ino)
{
struct m_ext2fs *fs;
struct inode *ip;
struct buf *bp;
int error;
fs = ump->um_e2fs;
/* Read in the disk contents for the inode, copy into the inode. */
error = bread(ump->um_devvp, EXT2_FSBTODB(fs, ino_to_fsba(fs, ino)),
(int)fs->e2fs_bsize, 0, &bp);
if (error)
return error;
/*
* Read an inode from disk and initialize this vnode / inode pair.
* Caller assures no other thread will try to load this inode.
*/
int
ext2fs_loadvnode(struct mount *mp, struct vnode *vp,
const void *key, size_t key_len, const void **new_key)
{
ino_t ino;
struct inode *ip;
struct ufsmount *ump;
int error;
/* Authorize setting SGID if needed. */
if (ip->i_e2fs_mode & ISGID) {
error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY,
vp, NULL, genfs_can_chmod(vp, cred, ip->i_uid, ip->i_gid,
mode));
if (error)
ip->i_e2fs_mode &= ~ISGID;
}
/* Initialize extra_isize according to what is set in superblock */
if (EXT2F_HAS_ROCOMPAT_FEATURE(ip->i_e2fs, EXT2F_ROCOMPAT_EXTRA_ISIZE)
&& EXT2_DINODE_SIZE(ip->i_e2fs) > EXT2_REV0_DINODE_SIZE) {
ip->i_din.e2fs_din->e2di_extra_isize =
ip->i_e2fs->e2fs.e4fs_want_extra_isize;
}
/* Set create time if possible */
if (EXT2_DINODE_FITS(ip->i_din.e2fs_din, e2di_crtime,
EXT2_DINODE_SIZE(ip->i_e2fs))) {
struct timespec now;
vfs_timestamp(&now);
EXT2_DINODE_TIME_SET(&now, ip->i_din.e2fs_din, e2di_crtime,
EXT2_DINODE_SIZE(ip->i_e2fs));
}
/* Initialize the vnode from the inode. */
ext2fs_vinit(mp, ext2fs_specop_p, ext2fs_fifoop_p, &vp);
/*
* File handle to vnode
*
* Have to be really careful about stale file handles:
* - check that the inode number is valid
* - call ext2fs_vget() to get the locked inode
* - check for an unallocated inode (i_mode == 0)
*/
int
ext2fs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, struct vnode **vpp)
{
struct inode *ip;
struct vnode *nvp;
int error;
struct ufid ufh;
struct m_ext2fs *fs;
if (fhp->fid_len != sizeof(struct ufid))
return EINVAL;
/*
* Write a superblock and associated information back to disk.
*/
int
ext2fs_sbupdate(struct ufsmount *mp, int waitfor)
{
struct m_ext2fs *fs = mp->um_e2fs;
struct buf *bp;
int error = 0;
int
ext2fs_cgupdate(struct ufsmount *mp, int waitfor)
{
struct m_ext2fs *fs = mp->um_e2fs;
struct buf *bp;
int i, error = 0, allerror = 0;
allerror = ext2fs_sbupdate(mp, waitfor);
for (i = 0; i < fs->e2fs_ngdb; i++) {
bp = getblk(mp->um_devvp, EXT2_FSBTODB(fs,
fs->e2fs.e2fs_first_dblock +
1 /* superblock */ + i), fs->e2fs_bsize, 0, 0);
e2fs_cgsave(&fs->e2fs_gd[i *
(fs->e2fs_bsize >> fs->e2fs_group_desc_shift)],
bp->b_data, fs->e2fs_bsize, fs->e2fs_group_desc_shift);
if (waitfor == MNT_WAIT)
error = bwrite(bp);
else
bawrite(bp);
}
if (!allerror && error)
allerror = error;
return allerror;
}
/*
* Fill in the m_fs structure, and validate the fields of the superblock.
* NOTE: here, the superblock is already swapped.
*/
static int
ext2fs_sbfill(struct m_ext2fs *m_fs, int ronly)
{
uint32_t u32;
struct ext2fs *fs = &m_fs->e2fs;
/*
* General sanity checks
*/
if (fs->e2fs_magic != E2FS_MAGIC)
return EINVAL;
if (fs->e2fs_rev > E2FS_REV1) {
printf("ext2fs: unsupported revision number: %#x\n",
fs->e2fs_rev);
return EINVAL;
}
if (fs->e2fs_log_bsize > 2) {
/* block size = 1024|2048|4096 */
printf("ext2fs: bad block size: %d\n", fs->e2fs_log_bsize);
return EINVAL;
}
if (fs->e2fs_bpg == 0) {
printf("ext2fs: zero blocks per group\n");
return EINVAL;
}
if (fs->e2fs_ipg == 0) {
printf("ext2fs: zero inodes per group\n");
return EINVAL;
}
if (fs->e2fs_first_dblock >= fs->e2fs_bcount) {
printf("ext2fs: invalid first data block\n");
return EINVAL;
}
if (fs->e2fs_rbcount > fs->e2fs_bcount ||
fs->e2fs_fbcount > fs->e2fs_bcount) {
printf("ext2fs: invalid block count\n");
return EINVAL;
}
/*
* Compute the fields of the superblock
*/
u32 = fs->e2fs_bcount - fs->e2fs_first_dblock; /* > 0 */
m_fs->e2fs_ncg = howmany(u32, fs->e2fs_bpg);
if (m_fs->e2fs_ncg == 0) {
printf("ext2fs: invalid number of cylinder groups\n");
return EINVAL;
}