/*-
* Copyright (c) 2007, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Juergen Hannken-Illjes.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* pool_cache constructor for fstrans_lwp_info. Updating the global list
* produces cache misses on MP. Minimise by keeping free entries on list.
*/
int
fstrans_lwp_pcc(void *arg, void *obj, int flags)
{
struct fstrans_lwp_info *fli = obj;
/*
* retrieve fstrans_mount_info by mount or NULL
*/
static inline struct fstrans_mount_info *
fstrans_mount_get(struct mount *mp)
{
uint32_t indx;
struct fstrans_mount_info *fmi, *fmi_lower;
KASSERT(mutex_owned(&fstrans_lock));
indx = fstrans_mount_hash(mp);
SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) {
if (fmi->fmi_mount == mp) {
if (__predict_false(mp->mnt_lower != NULL &&
fmi->fmi_lower_info == NULL)) {
/*
* Intern the lower/lowest mount into
* this mount info on first lookup.
*/
KASSERT(fmi->fmi_ref_cnt == 1);
/*
* Allocate and return per lwp info for this mount.
*/
static struct fstrans_lwp_info *
fstrans_alloc_lwp_info(struct mount *mp)
{
struct fstrans_lwp_info *fli, *fli_lower;
struct fstrans_mount_info *fmi;
for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
if (fli->fli_mount == mp)
return fli;
}
/*
* Lookup mount info and get lower mount per lwp info.
*/
mutex_enter(&fstrans_lock);
fmi = fstrans_mount_get(mp);
if (fmi == NULL) {
mutex_exit(&fstrans_lock);
return NULL;
}
fmi->fmi_ref_cnt += 1;
mutex_exit(&fstrans_lock);
if (fmi->fmi_lower_info) {
fli_lower =
fstrans_alloc_lwp_info(fmi->fmi_lower_info->fmi_mount);
if (fli_lower == NULL) {
mutex_enter(&fstrans_lock);
fstrans_mount_dtor(fmi);
mutex_exit(&fstrans_lock);
/*
* Retrieve the per lwp info for this mount allocating if necessary.
*/
static inline struct fstrans_lwp_info *
fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
{
struct fstrans_lwp_info *fli;
/*
* Scan our list for a match.
*/
for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
if (fli->fli_mount == mp) {
KASSERT(mp->mnt_lower == NULL ||
fli->fli_alias != NULL);
if (fli->fli_alias != NULL)
fli = fli->fli_alias;
break;
}
}
if (do_alloc) {
if (__predict_false(fli == NULL))
fli = fstrans_alloc_lwp_info(mp);
}
return fli;
}
/*
* Check if this lock type is granted at this state.
*/
static bool
grant_lock(const struct fstrans_mount_info *fmi,
const enum fstrans_lock_type type)
{
if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL))
return true;
if (fmi->fmi_owner == curlwp)
return true;
if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
return true;
return false;
}
/*
* Start a transaction. If this thread already has a transaction on this
* file system increment the reference counter.
*/
static inline int
_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
{
int s;
struct fstrans_lwp_info *fli;
struct fstrans_mount_info *fmi;
/*
* Check if this thread has an exclusive lock.
*/
int
fstrans_is_owner(struct mount *mp)
{
struct fstrans_lwp_info *fli;
struct fstrans_mount_info *fmi;
/*
* True, if no thread is in a transaction not granted at the current state.
*/
static bool
state_change_done(const struct fstrans_mount_info *fmi)
{
struct fstrans_lwp_info *fli;
KASSERT(mutex_owned(&fstrans_lock));
LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
if (fli->fli_mountinfo != fmi)
continue;
if (fli->fli_trans_cnt == 0)
continue;
if (fli->fli_self == curlwp)
continue;
if (grant_lock(fmi, fli->fli_lock_type))
continue;
return false;
}
return true;
}
/*
* Set new file system state.
*/
int
fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
{
int error;
enum fstrans_state old_state;
struct fstrans_lwp_info *fli;
struct fstrans_mount_info *fmi;
/*
* All threads see the new state now.
* Wait for transactions invalid at this state to leave.
*/
error = 0;
while (! state_change_done(fmi)) {
error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
if (error) {
new_state = fmi->fmi_state = FSTRANS_NORMAL;
break;
}
}
if (old_state != new_state) {
if (old_state == FSTRANS_NORMAL) {
KASSERT(fmi->fmi_owner == NULL);
fmi->fmi_owner = curlwp;
}
if (new_state == FSTRANS_NORMAL) {
KASSERT(fmi->fmi_owner == curlwp);
fmi->fmi_owner = NULL;
}
}
cv_broadcast(&fstrans_state_cv);
mutex_exit(&fstrans_lock);
return error;
}
/*
* Get current file system state.
*/
enum fstrans_state
fstrans_getstate(struct mount *mp)
{
struct fstrans_lwp_info *fli;
struct fstrans_mount_info *fmi;
/*
* Add a handler to this mount.
*/
int
fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
void *arg)
{
struct fstrans_mount_info *fmi;
struct fscow_handler *newch;
/*
* Check for need to copy block that is about to be written.
*/
int
fscow_run(struct buf *bp, bool data_valid)
{
int error, s;
struct mount *mp;
struct fstrans_lwp_info *fli;
struct fstrans_mount_info *fmi;
struct fscow_handler *hp;
/*
* First check if we need run the copy-on-write handler.
*/
if ((bp->b_flags & B_COWDONE))
return 0;
if (bp->b_vp == NULL) {
bp->b_flags |= B_COWDONE;
return 0;
}
if (bp->b_vp->v_type == VBLK)
mp = spec_node_getmountedfs(bp->b_vp);
else
mp = bp->b_vp->v_mount;
if (mp == NULL || mp == dead_rootmount) {
bp->b_flags |= B_COWDONE;
return 0;
}
/*
* On non-recursed run check if other threads
* want to change the list.
*/
if (fli->fli_cow_cnt == 0) {
s = pserialize_read_enter();
if (__predict_false(fmi->fmi_cow_change)) {
pserialize_read_exit(s);
mutex_enter(&fstrans_lock);
while (fmi->fmi_cow_change)
cv_wait(&fstrans_state_cv, &fstrans_lock);
fli->fli_cow_cnt = 1;
mutex_exit(&fstrans_lock);
} else {
fli->fli_cow_cnt = 1;
pserialize_read_exit(s);
}
} else
fli->fli_cow_cnt += 1;
/*
* Run all copy-on-write handlers, stop on error.
*/
error = 0;
LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
break;
if (error == 0)
bp->b_flags |= B_COWDONE;
/*
* Check if other threads want to change the list.
*/
if (fli->fli_cow_cnt > 1) {
fli->fli_cow_cnt -= 1;
} else {
s = pserialize_read_enter();
if (__predict_false(fmi->fmi_cow_change)) {
pserialize_read_exit(s);
mutex_enter(&fstrans_lock);
fli->fli_cow_cnt = 0;
cv_signal(&fstrans_count_cv);
mutex_exit(&fstrans_lock);
} else {
fli->fli_cow_cnt = 0;
pserialize_read_exit(s);
}
}