Index: src/share/man/man9/vnode.9
===================================================================
RCS file: /home/chs/netbsd/cvs/src/share/man/man9/vnode.9,v
retrieving revision 1.80
diff -u -p -r1.80 vnode.9
--- src/share/man/man9/vnode.9  28 May 2017 16:39:41 -0000      1.80
+++ src/share/man/man9/vnode.9  14 Jun 2017 16:51:43 -0000
@@ -27,7 +27,7 @@
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd May 28, 2017
+.Dd June 14, 2017
.Dt VNODE 9
.Os
.Sh NAME
@@ -35,10 +35,12 @@
.Nm vref ,
.Nm vrele ,
.Nm vrele_async ,
+.Nm vrele_async_cleaner ,
.Nm vput ,
.Nm vhold ,
.Nm holdrele ,
.Nm vcache_get ,
+.Nm vcache_get_cleaner ,
.Nm vcache_new ,
.Nm vcache_rekey_enter ,
.Nm vcache_rekey_exit ,
@@ -68,6 +70,8 @@
.Ft void
.Fn vrele_async "struct vnode *vp"
.Ft void
+.Fn vrele_async_cleaner "struct vnode *vp"
+.Ft void
.Fn vput "struct vnode *vp"
.Ft void
.Fn vhold "struct vnode *vp"
@@ -76,6 +80,8 @@
.Ft int
.Fn vcache_get "struct mount *mp" "const void *key" "size_t key_len" "struct vnode **vpp"
.Ft int
+.Fn vcache_get_cleaner "struct mount *mp" "const void *key" "size_t key_len" "struct vnode **vpp"
+.Ft int
.Fn vcache_new "struct mount *mp" "struct vnode *dvp" "struct vattr *vap" "kauth_cred_t cred" "struct vnode **vpp"
.Ft int
.Fn vcache_rekey_enter "struct mount *mp" "struct vnode *vp" "const void *old_key" "size_t old_key_len" "const void *new_key" "size_t new_key_len"
@@ -513,6 +519,11 @@ are zero, the vnode is cached.
.It Fn vrele_async "vp"
Will asynchronously release the vnode in different context than the caller,
sometime after the call.
+.It Fn vrele_async_cleaner "vp"
+Equivalent to
+.Fn vrele_async
+for references which were taken with
+.Fn vcache_get_cleaner .
.It Fn vput "vp"
Legacy convenience routine for unlocking and releasing
.Fa vp .
@@ -557,6 +568,24 @@ uniquely identify the file in the file s
.Pp
If a vnode is successfully retrieved zero is returned, otherwise an
appropriate error code is returned.
+.It Fn vcache_get_cleaner "mp" "key" "key_len" "vpp"
+Equivalent to
+.Fn vcache_get ,
+but this version must be used if the call is made in the context of cleaning the vnode for reuse, which is within the file system's implemenation of
+.Fn VOP_FSYNC
+when
+.Em FSYNC_RECLAIM
+is specified, or
+.Fn VOP_PUTPAGES
+when
+.Em PGO_RECLAIM
+is specified.
+A reference taken with this interface must be releasd with
+.Fn vrele_async_cleaner ,
+and the reference must be released before another reference can be taken on the same vnode with this interface.
+File systems using this interface must also set
+.Em IMNT_CLEANING
+in mnt_iflag.
.It Fn vcache_new "mp" "dvp" "vap" "cred" "vpp"
Allocate a new vnode with a new file.
The new vnode is returned referenced in the address specified by
Index: src/sys/sys/fstypes.h
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/sys/fstypes.h,v
retrieving revision 1.35
diff -u -p -r1.35 fstypes.h
--- src/sys/sys/fstypes.h       1 Mar 2017 10:44:47 -0000       1.35
+++ src/sys/sys/fstypes.h       14 Jun 2017 16:11:02 -0000
@@ -217,6 +217,7 @@ typedef struct fhandle      fhandle_t;
#define        IMNT_UNMOUNT    0x00000002      /* unmount in progress */
#define        IMNT_WANTRDWR   0x00000004      /* upgrade to read/write requested */
#define        IMNT_WANTRDONLY 0x00000008      /* upgrade to readonly requested */
+#define        IMNT_CLEANING   0x00000010      /* fs takes a new ref while cleaning */
#define        IMNT_DTYPE      0x00000040      /* returns d_type fields */
#define        IMNT_HAS_TRANS  0x00000080      /* supports transactions */
#define        IMNT_MPSAFE     0x00000100      /* file system code MP safe */
@@ -271,6 +272,7 @@ typedef struct fhandle      fhandle_t;
       "\11IMNT_MPSAFE" \
       "\10IMNT_HAS_TRANS" \
       "\07IMNT_DTYPE" \
+       "\05IMNT_CLEANING" \
       "\04IMNT_WANTRDONLY" \
       "\03IMNT_WANTRDWR" \
       "\02IMNT_UNMOUNT" \
Index: src/sys/sys/vnode.h
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/sys/vnode.h,v
retrieving revision 1.278
diff -u -p -r1.278 vnode.h
--- src/sys/sys/vnode.h 4 Jun 2017 08:02:26 -0000       1.278
+++ src/sys/sys/vnode.h 14 Jun 2017 16:02:25 -0000
@@ -186,6 +186,7 @@ typedef struct vnode vnode_t;
/*
 * The second set are locked by vp->v_interlock.
 */
+#define        VI_CLEANERREF   0x00000001      /* cleaner has a ref */
#define        VI_TEXT         0x00000100      /* vnode is a pure text prototype */
#define        VI_EXECMAP      0x00000200      /* might have PROT_EXEC mappings */
#define        VI_WRMAP        0x00000400      /* might have PROT_WRITE u. mappings */
@@ -511,6 +512,7 @@ void        vput(struct vnode *);
bool   vrecycle(struct vnode *);
void   vrele(struct vnode *);
void   vrele_async(struct vnode *);
+void   vrele_async_cleaner(struct vnode *);
void   vrele_flush(struct mount *);
int    vtruncbuf(struct vnode *, daddr_t, bool, int);
void   vwakeup(struct buf *);
@@ -518,6 +520,7 @@ int vdead_check(struct vnode *, int);
void   vrevoke(struct vnode *);
void   vremfree(struct vnode *);
int    vcache_get(struct mount *, const void *, size_t, struct vnode **);
+int    vcache_get_cleaner(struct mount *, const void *, size_t, struct vnode **);
int    vcache_new(struct mount *, struct vnode *,
           struct vattr *, kauth_cred_t, struct vnode **);
int    vcache_rekey_enter(struct mount *, struct vnode *,
Index: src/sys/sys/vnode_impl.h
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/sys/vnode_impl.h,v
retrieving revision 1.15
diff -u -p -r1.15 vnode_impl.h
--- src/sys/sys/vnode_impl.h    4 Jun 2017 08:02:26 -0000       1.15
+++ src/sys/sys/vnode_impl.h    14 Jun 2017 16:44:42 -0000
@@ -42,6 +42,7 @@ enum vnode_state {
       VS_LOADING,     /* Intermediate, initialising the fs node. */
       VS_LOADED,      /* Stable, valid fs node attached. */
       VS_BLOCKED,     /* Intermediate, active, no new references allowed. */
+       VS_CLEANING,    /* Intermediate, cleaning the vnode. */
       VS_RECLAIMING,  /* Intermediate, detaching the fs node. */
       VS_RECLAIMED    /* Stable, no fs node attached. */
};
Index: src/sys/kern/vfs_subr.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.468
diff -u -p -r1.468 vfs_subr.c
--- src/sys/kern/vfs_subr.c     4 Jun 2017 07:58:29 -0000       1.468
+++ src/sys/kern/vfs_subr.c     7 Jun 2017 19:02:56 -0000
@@ -1065,6 +1065,8 @@ vstate_name(enum vnode_state state)
               return "LOADED";
       case VS_BLOCKED:
               return "BLOCKED";
+       case VS_CLEANING:
+               return "CLEANING";
       case VS_RECLAIMING:
               return "RECLAIMING";
       case VS_RECLAIMED:
Index: src/sys/kern/vfs_vnode.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/kern/vfs_vnode.c,v
retrieving revision 1.96
diff -u -p -r1.96 vfs_vnode.c
--- src/sys/kern/vfs_vnode.c    4 Jun 2017 08:05:42 -0000       1.96
+++ src/sys/kern/vfs_vnode.c    14 Jun 2017 16:53:38 -0000
@@ -103,6 +103,7 @@
 *     - LOADED        Vnode has associated underlying file system and is
 *                     ready to use.
 *     - BLOCKED       Vnode is active but cannot get new references.
+ *     - CLEANING      Vnode is being cleaned in preparation for reclaiming.
 *     - RECLAIMING    Vnode is disassociating from the underlying file
 *                     system.
 *     - RECLAIMED     Vnode has disassociated from underlying file system
@@ -112,7 +113,11 @@
 *     LOADING -> LOADED
 *                     Vnode has been initialised in vcache_get() or
 *                     vcache_new() and is ready to use.
- *     LOADED -> RECLAIMING
+ *     LOADED -> CLEANING
+ *                     Vnode starts being cleaned before being disassociated
+ *                     from the underlying file system in vcache_reclaim().
+ *     LOADED -> RECLAIMED (if the fs does not need separate CLEANING)
+ *     CLEANING -> RECLAIMING
 *                     Vnode starts disassociation from underlying file
 *                     system in vcache_reclaim().
 *     RECLAIMING -> RECLAIMED
@@ -217,6 +222,7 @@ static void         vcache_free(vnode_impl_t *)
static void            vcache_init(void);
static void            vcache_reinit(void);
static void            vcache_reclaim(vnode_t *);
+static bool            vcache_rele_cleaner(vnode_t *);
static void            vrelel(vnode_t *, int);
static void            vdrain_thread(void *);
static void            vnpanic(vnode_t *, const char *, ...)
@@ -229,6 +235,27 @@ extern struct vfsops       dead_vfsops;

/* Vnode state operations and diagnostics. */

+static void
+vstate_wait_stable(vnode_t *vp)
+{
+       vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
+
+       while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED)
+               cv_wait(&vp->v_cv, vp->v_interlock);
+}
+
+static void
+vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to)
+{
+       vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
+
+       vip->vi_state = to;
+       if (from == VS_LOADING)
+               cv_broadcast(&vcache_cv);
+       if (to == VS_LOADED || to == VS_RECLAIMED)
+               cv_broadcast(&vp->v_cv);
+}
+
#if defined(DIAGNOSTIC)

#define VSTATE_VALID(state) \
@@ -280,8 +307,7 @@ vstate_assert_wait_stable(vnode_t *vp, c
               vnpanic(vp, "state is %s at %s:%d",
                   vstate_name(vip->vi_state), func, line);

-       while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED)
-               cv_wait(&vp->v_cv, vp->v_interlock);
+       vstate_wait_stable(vp);

       if (! VSTATE_VALID(vip->vi_state))
               vnpanic(vp, "state is %s at %s:%d",
@@ -312,11 +338,7 @@ vstate_assert_change(vnode_t *vp, enum v
                   vstate_name(from), vstate_name(to), vp->v_usecount,
                   func, line);

-       vip->vi_state = to;
-       if (from == VS_LOADING)
-               cv_broadcast(&vcache_cv);
-       if (to == VS_LOADED || to == VS_RECLAIMED)
-               cv_broadcast(&vp->v_cv);
+       vstate_change(vp, from, to);
}

#else /* defined(DIAGNOSTIC) */
@@ -332,28 +354,6 @@ _vstate_assert(vnode_t *vp, enum vnode_s
{

}
-
-static void
-vstate_wait_stable(vnode_t *vp)
-{
-       vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
-
-       while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED)
-               cv_wait(&vp->v_cv, vp->v_interlock);
-}
-
-static void
-vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to)
-{
-       vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
-
-       vip->vi_state = to;
-       if (from == VS_LOADING)
-               cv_broadcast(&vcache_cv);
-       if (to == VS_LOADED || to == VS_RECLAIMED)
-               cv_broadcast(&vp->v_cv);
-}
-
#endif /* defined(DIAGNOSTIC) */

void
@@ -840,6 +840,29 @@ vrele_async(vnode_t *vp)
}

/*
+ * Asynchronous vnode release, vnode is released in different context.
+ */
+void
+vrele_async_cleaner(vnode_t *vp)
+{
+
+       mutex_enter(vp->v_interlock);
+
+       /*
+        * If this vnode has a cleaner ref, release that.
+        */
+       if (vcache_rele_cleaner(vp)) {
+               mutex_exit(vp->v_interlock);
+               return;
+       }
+
+       /*
+        * Otherwise do a normal async rele.
+        */
+       vrelel(vp, VRELEL_ASYNC_RELE);
+}
+
+/*
 * Vnode reference, where a reference is already held by some other
 * object (for example, a file structure).
 */
@@ -1241,12 +1264,38 @@ vcache_vget(vnode_t *vp)
       return 0;
}

+static void
+vcache_ref_cleaner(vnode_t *vp)
+{
+
+       KASSERT(mutex_owned(vp->v_interlock));
+       KASSERT((vp->v_mount->mnt_iflag & IMNT_CLEANING) != 0);
+       KASSERTMSG((vp->v_iflag & VI_CLEANERREF) == 0, "vp %p", vp);
+       vp->v_iflag |= VI_CLEANERREF;
+}
+
+
+static bool
+vcache_rele_cleaner(vnode_t *vp)
+{
+
+       KASSERT(mutex_owned(vp->v_interlock));
+       KASSERT((vp->v_mount->mnt_iflag & IMNT_CLEANING) != 0);
+       if ((vp->v_iflag & VI_CLEANERREF) != 0) {
+               vp->v_iflag &= ~VI_CLEANERREF;
+               return true;
+       }
+       return false;
+}
+
/*
 * Get a vnode / fs node pair by key and return it referenced through vpp.
+ * If requested and the vnode is being cleaned, take a cleaner reference
+ * rather than waiting for the reclamation process to finish.
 */
-int
-vcache_get(struct mount *mp, const void *key, size_t key_len,
-    struct vnode **vpp)
+static int
+do_vcache_get(struct mount *mp, const void *key, size_t key_len,
+    struct vnode **vpp, bool cleaner)
{
       int error;
       uint32_t hash;
@@ -1269,6 +1318,7 @@ again:

       /* If found, take a reference or retry. */
       if (__predict_true(vip != NULL)) {
+
               /*
                * If the vnode is loading we cannot take the v_interlock
                * here as it might change during load (see uvm_obj_setlock()).
@@ -1284,6 +1334,23 @@ again:
               }
               vp = VIMPL_TO_VNODE(vip);
               mutex_enter(vp->v_interlock);
+
+               /*
+                * Take a cleaner reference if the vnode is being cleaned
+                * and a cleaner reference was requested.
+                */
+               if (__predict_false(vip->vi_state == VS_CLEANING &&
+                   cleaner)) {
+                       vcache_ref_cleaner(vp);
+                       mutex_exit(vp->v_interlock);
+                       mutex_exit(&vcache_lock);
+                       *vpp = VIMPL_TO_VNODE(vip);
+                       return 0;
+               }
+
+               /*
+                * Otherwise take a normal reference.
+                */
               mutex_exit(&vcache_lock);
               error = vcache_vget(vp);
               if (error == ENOENT)
@@ -1350,6 +1417,25 @@ again:
}

/*
+ * Get a vnode / fs node pair by key and return it referenced through vpp.
+ */
+int
+vcache_get(struct mount *mp, const void *key, size_t key_len,
+    struct vnode **vpp)
+{
+
+       return do_vcache_get(mp, key, key_len, vpp, false);
+}
+
+int
+vcache_get_cleaner(struct mount *mp, const void *key, size_t key_len,
+    struct vnode **vpp)
+{
+
+       return do_vcache_get(mp, key, key_len, vpp, true);
+}
+
+/*
 * Create a new vnode / fs node pair and return it referenced through vpp.
 */
int
@@ -1528,7 +1614,7 @@ vcache_reclaim(vnode_t *vp)
       uint32_t hash;
       uint8_t temp_buf[64], *temp_key;
       size_t temp_key_len;
-       bool recycle, active;
+       bool recycle, active, usecleaning;
       int error;

       KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 ||
@@ -1538,11 +1624,14 @@ vcache_reclaim(vnode_t *vp)

       active = (vp->v_usecount > 1);
       temp_key_len = vip->vi_key.vk_key_len;
+       usecleaning = (vp->v_mount->mnt_iflag & IMNT_CLEANING) != 0;
+
       /*
        * Prevent the vnode from being recycled or brought into use
-        * while we clean it out.
+        * while we clean it out.  Use VS_CLEANING if the fs needs it,
+        * otherwise go straight to VS_RECLAIMING.
        */
-       VSTATE_CHANGE(vp, VS_LOADED, VS_RECLAIMING);
+       VSTATE_CHANGE(vp, VS_LOADED, usecleaning ? VS_CLEANING : VS_RECLAIMING);
       if (vp->v_iflag & VI_EXECMAP) {
               atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
               atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
@@ -1581,6 +1670,17 @@ vcache_reclaim(vnode_t *vp)
       }

       /*
+        * Cleaning is done, move on to reclaiming.
+        * Change state to VS_RECLAIMING if we weren't already there.
+        */
+       if (usecleaning) {
+               KASSERT((vp->v_iflag & VI_CLEANERREF) == 0);
+               mutex_enter(vp->v_interlock);
+               VSTATE_CHANGE(vp, VS_CLEANING, VS_RECLAIMING);
+               mutex_exit(vp->v_interlock);
+       }
+
+       /*
        * Disassociate the underlying file system from the vnode.
        * VOP_INACTIVE leaves the vnode locked; VOP_RECLAIM unlocks
        * the vnode, and may destroy the vnode so that VOP_UNLOCK