diff --git a/Dump/ufs/ffs/ffs_alloc.c b/Dump/ufs/ffs/ffs_alloc.c
new file mode 100644
index 0000000..042d4e6
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_alloc.c
@@ -0,0 +1,3237 @@
+/*-
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Marshall
+ * Kirk McKusick and Network Associates Laboratories, the Security
+ * Research Division of Network Associates, Inc. under DARPA/SPAWAR
+ * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
+ * research program
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_alloc.c	8.18 (Berkeley) 5/26/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_alloc.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include "opt_quota.h"
+
+#include <sys/param.h>
+#include <sys/capsicum.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/taskqueue.h>
+
+#include <security/audit/audit.h>
+
+#include <geom/geom.h>
+
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+#include <ufs/ffs/softdep.h>
+
+typedef ufs2_daddr_t allocfcn_t(struct inode *ip, u_int cg, ufs2_daddr_t bpref,
+				  int size, int rsize);
+
+static ufs2_daddr_t ffs_alloccg(struct inode *, u_int, ufs2_daddr_t, int, int);
+static ufs2_daddr_t
+	      ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t, int);
+static void	ffs_blkfree_cg(struct ufsmount *, struct fs *,
+		    struct vnode *, ufs2_daddr_t, long, ino_t,
+		    struct workhead *);
+static void	ffs_blkfree_trim_completed(struct bio *);
+static void	ffs_blkfree_trim_task(void *ctx, int pending __unused);
+#ifdef INVARIANTS
+static int	ffs_checkblk(struct inode *, ufs2_daddr_t, long);
+#endif
+static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int);
+static ino_t	ffs_dirpref(struct inode *);
+static ufs2_daddr_t ffs_fragextend(struct inode *, u_int, ufs2_daddr_t,
+		    int, int);
+static ufs2_daddr_t	ffs_hashalloc
+		(struct inode *, u_int, ufs2_daddr_t, int, int, allocfcn_t *);
+static ufs2_daddr_t ffs_nodealloccg(struct inode *, u_int, ufs2_daddr_t, int,
+		    int);
+static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int);
+static int	ffs_reallocblks_ufs1(struct vop_reallocblks_args *);
+static int	ffs_reallocblks_ufs2(struct vop_reallocblks_args *);
+
+/*
+ * Allocate a block in the filesystem.
+ *
+ * The size of the requested block is given, which must be some
+ * multiple of fs_fsize and <= fs_bsize.
+ * A preference may be optionally specified. If a preference is given
+ * the following hierarchy is used to allocate a block:
+ *   1) allocate the requested block.
+ *   2) allocate a rotationally optimal block in the same cylinder.
+ *   3) allocate a block in the same cylinder group.
+ *   4) quadradically rehash into other cylinder groups, until an
+ *      available block is located.
+ * If no block preference is given the following hierarchy is used
+ * to allocate a block:
+ *   1) allocate a block in the cylinder group that contains the
+ *      inode for the file.
+ *   2) quadradically rehash into other cylinder groups, until an
+ *      available block is located.
+ */
+int
+ffs_alloc(ip, lbn, bpref, size, flags, cred, bnp)
+	struct inode *ip;
+	ufs2_daddr_t lbn, bpref;
+	int size, flags;
+	struct ucred *cred;
+	ufs2_daddr_t *bnp;
+{
+	struct fs *fs;
+	struct ufsmount *ump;
+	ufs2_daddr_t bno;
+	u_int cg, reclaimed;
+	static struct timeval lastfail;
+	static int curfail;
+	int64_t delta;
+#ifdef QUOTA
+	int error;
+#endif
+
+	*bnp = 0;
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+	mtx_assert(UFS_MTX(ump), MA_OWNED);
+#ifdef INVARIANTS
+	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+		printf("dev = %s, bsize = %ld, size = %d, fs = %s\n",
+		    devtoname(ump->um_dev), (long)fs->fs_bsize, size,
+		    fs->fs_fsmnt);
+		panic("ffs_alloc: bad size");
+	}
+	if (cred == NOCRED)
+		panic("ffs_alloc: missing credential");
+#endif /* INVARIANTS */
+	reclaimed = 0;
+retry:
+#ifdef QUOTA
+	UFS_UNLOCK(ump);
+	error = chkdq(ip, btodb(size), cred, 0);
+	if (error)
+		return (error);
+	UFS_LOCK(ump);
+#endif
+	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
+		goto nospace;
+	if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0) &&
+	    freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0)
+		goto nospace;
+	if (bpref >= fs->fs_size)
+		bpref = 0;
+	if (bpref == 0)
+		cg = ino_to_cg(fs, ip->i_number);
+	else
+		cg = dtog(fs, bpref);
+	bno = ffs_hashalloc(ip, cg, bpref, size, size, ffs_alloccg);
+	if (bno > 0) {
+		delta = btodb(size);
+		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
+		if (flags & IO_EXT)
+			ip->i_flag |= IN_CHANGE;
+		else
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		*bnp = bno;
+		return (0);
+	}
+nospace:
+#ifdef QUOTA
+	UFS_UNLOCK(ump);
+	/*
+	 * Restore user's disk quota because allocation failed.
+	 */
+	(void) chkdq(ip, -btodb(size), cred, FORCE);
+	UFS_LOCK(ump);
+#endif
+	if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) {
+		reclaimed = 1;
+		softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT);
+		goto retry;
+	}
+	UFS_UNLOCK(ump);
+	if (reclaimed > 0 && ppsratecheck(&lastfail, &curfail, 1)) {
+		ffs_fserr(fs, ip->i_number, "filesystem full");
+		uprintf("\n%s: write failed, filesystem is full\n",
+		    fs->fs_fsmnt);
+	}
+	return (ENOSPC);
+}
+
+/*
+ * Reallocate a fragment to a bigger size
+ *
+ * The number and size of the old block is given, and a preference
+ * and new size is also specified. The allocator attempts to extend
+ * the original block. Failing that, the regular block allocator is
+ * invoked to get an appropriate block.
+ */
+int
+ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp)
+	struct inode *ip;
+	ufs2_daddr_t lbprev;
+	ufs2_daddr_t bprev;
+	ufs2_daddr_t bpref;
+	int osize, nsize, flags;
+	struct ucred *cred;
+	struct buf **bpp;
+{
+	struct vnode *vp;
+	struct fs *fs;
+	struct buf *bp;
+	struct ufsmount *ump;
+	u_int cg, request, reclaimed;
+	int error, gbflags;
+	ufs2_daddr_t bno;
+	static struct timeval lastfail;
+	static int curfail;
+	int64_t delta;
+
+	vp = ITOV(ip);
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+	bp = NULL;
+	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
+
+	mtx_assert(UFS_MTX(ump), MA_OWNED);
+#ifdef INVARIANTS
+	if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
+		panic("ffs_realloccg: allocation on suspended filesystem");
+	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
+	    (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
+		printf(
+		"dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n",
+		    devtoname(ump->um_dev), (long)fs->fs_bsize, osize,
+		    nsize, fs->fs_fsmnt);
+		panic("ffs_realloccg: bad size");
+	}
+	if (cred == NOCRED)
+		panic("ffs_realloccg: missing credential");
+#endif /* INVARIANTS */
+	reclaimed = 0;
+retry:
+	if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0) &&
+	    freespace(fs, fs->fs_minfree) -  numfrags(fs, nsize - osize) < 0) {
+		goto nospace;
+	}
+	if (bprev == 0) {
+		printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n",
+		    devtoname(ump->um_dev), (long)fs->fs_bsize, (intmax_t)bprev,
+		    fs->fs_fsmnt);
+		panic("ffs_realloccg: bad bprev");
+	}
+	UFS_UNLOCK(ump);
+	/*
+	 * Allocate the extra space in the buffer.
+	 */
+	error = bread_gb(vp, lbprev, osize, NOCRED, gbflags, &bp);
+	if (error) {
+		brelse(bp);
+		return (error);
+	}
+
+	if (bp->b_blkno == bp->b_lblkno) {
+		if (lbprev >= NDADDR)
+			panic("ffs_realloccg: lbprev out of range");
+		bp->b_blkno = fsbtodb(fs, bprev);
+	}
+
+#ifdef QUOTA
+	error = chkdq(ip, btodb(nsize - osize), cred, 0);
+	if (error) {
+		brelse(bp);
+		return (error);
+	}
+#endif
+	/*
+	 * Check for extension in the existing location.
+	 */
+	*bpp = NULL;
+	cg = dtog(fs, bprev);
+	UFS_LOCK(ump);
+	bno = ffs_fragextend(ip, cg, bprev, osize, nsize);
+	if (bno) {
+		if (bp->b_blkno != fsbtodb(fs, bno))
+			panic("ffs_realloccg: bad blockno");
+		delta = btodb(nsize - osize);
+		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
+		if (flags & IO_EXT)
+			ip->i_flag |= IN_CHANGE;
+		else
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		allocbuf(bp, nsize);
+		bp->b_flags |= B_DONE;
+		vfs_bio_bzero_buf(bp, osize, nsize - osize);
+		if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO)
+			vfs_bio_set_valid(bp, osize, nsize - osize);
+		*bpp = bp;
+		return (0);
+	}
+	/*
+	 * Allocate a new disk location.
+	 */
+	if (bpref >= fs->fs_size)
+		bpref = 0;
+	switch ((int)fs->fs_optim) {
+	case FS_OPTSPACE:
+		/*
+		 * Allocate an exact sized fragment. Although this makes
+		 * best use of space, we will waste time relocating it if
+		 * the file continues to grow. If the fragmentation is
+		 * less than half of the minimum free reserve, we choose
+		 * to begin optimizing for time.
+		 */
+		request = nsize;
+		if (fs->fs_minfree <= 5 ||
+		    fs->fs_cstotal.cs_nffree >
+		    (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100))
+			break;
+		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
+			fs->fs_fsmnt);
+		fs->fs_optim = FS_OPTTIME;
+		break;
+	case FS_OPTTIME:
+		/*
+		 * At this point we have discovered a file that is trying to
+		 * grow a small fragment to a larger fragment. To save time,
+		 * we allocate a full sized block, then free the unused portion.
+		 * If the file continues to grow, the `ffs_fragextend' call
+		 * above will be able to grow it in place without further
+		 * copying. If aberrant programs cause disk fragmentation to
+		 * grow within 2% of the free reserve, we choose to begin
+		 * optimizing for space.
+		 */
+		request = fs->fs_bsize;
+		if (fs->fs_cstotal.cs_nffree <
+		    (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100)
+			break;
+		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
+			fs->fs_fsmnt);
+		fs->fs_optim = FS_OPTSPACE;
+		break;
+	default:
+		printf("dev = %s, optim = %ld, fs = %s\n",
+		    devtoname(ump->um_dev), (long)fs->fs_optim, fs->fs_fsmnt);
+		panic("ffs_realloccg: bad optim");
+		/* NOTREACHED */
+	}
+	bno = ffs_hashalloc(ip, cg, bpref, request, nsize, ffs_alloccg);
+	if (bno > 0) {
+		bp->b_blkno = fsbtodb(fs, bno);
+		if (!DOINGSOFTDEP(vp))
+			ffs_blkfree(ump, fs, ump->um_devvp, bprev, (long)osize,
+			    ip->i_number, vp->v_type, NULL);
+		delta = btodb(nsize - osize);
+		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
+		if (flags & IO_EXT)
+			ip->i_flag |= IN_CHANGE;
+		else
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		allocbuf(bp, nsize);
+		bp->b_flags |= B_DONE;
+		vfs_bio_bzero_buf(bp, osize, nsize - osize);
+		if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO)
+			vfs_bio_set_valid(bp, osize, nsize - osize);
+		*bpp = bp;
+		return (0);
+	}
+#ifdef QUOTA
+	UFS_UNLOCK(ump);
+	/*
+	 * Restore user's disk quota because allocation failed.
+	 */
+	(void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
+	UFS_LOCK(ump);
+#endif
+nospace:
+	/*
+	 * no space available
+	 */
+	if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) {
+		reclaimed = 1;
+		UFS_UNLOCK(ump);
+		if (bp) {
+			brelse(bp);
+			bp = NULL;
+		}
+		UFS_LOCK(ump);
+		softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
+		goto retry;
+	}
+	UFS_UNLOCK(ump);
+	if (bp)
+		brelse(bp);
+	if (reclaimed > 0 && ppsratecheck(&lastfail, &curfail, 1)) {
+		ffs_fserr(fs, ip->i_number, "filesystem full");
+		uprintf("\n%s: write failed, filesystem is full\n",
+		    fs->fs_fsmnt);
+	}
+	return (ENOSPC);
+}
+
+/*
+ * Reallocate a sequence of blocks into a contiguous sequence of blocks.
+ *
+ * The vnode and an array of buffer pointers for a range of sequential
+ * logical blocks to be made contiguous is given. The allocator attempts
+ * to find a range of sequential blocks starting as close as possible
+ * from the end of the allocation for the logical block immediately
+ * preceding the current range. If successful, the physical block numbers
+ * in the buffer pointers and in the inode are changed to reflect the new
+ * allocation. If unsuccessful, the allocation is left unchanged. The
+ * success in doing the reallocation is returned. Note that the error
+ * return is not reflected back to the user. Rather the previous block
+ * allocation will be used.
+ */
+
+SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem");
+
+static int doasyncfree = 1;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
+"do not force synchronous writes when blocks are reallocated");
+
+static int doreallocblks = 1;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0,
+"enable block reallocation");
+
+static int maxclustersearch = 10;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, maxclustersearch, CTLFLAG_RW, &maxclustersearch,
+0, "max number of cylinder group to search for contigous blocks");
+
+#ifdef DEBUG
+static volatile int prtrealloc = 0;
+#endif
+
+int
+ffs_reallocblks(ap)
+	struct vop_reallocblks_args /* {
+		struct vnode *a_vp;
+		struct cluster_save *a_buflist;
+	} */ *ap;
+{
+	struct ufsmount *ump;
+
+	/*
+	 * If the underlying device can do deletes, then skip reallocating
+	 * the blocks of this file into contiguous sequences. Devices that
+	 * benefit from BIO_DELETE also benefit from not moving the data.
+	 * These devices are flash and therefore work less well with this
+	 * optimization. Also skip if reallocblks has been disabled globally.
+	 */
+	ump = ap->a_vp->v_mount->mnt_data;
+	if (ump->um_candelete || doreallocblks == 0)
+		return (ENOSPC);
+
+	/*
+	 * We can't wait in softdep prealloc as it may fsync and recurse
+	 * here.  Instead we simply fail to reallocate blocks if this
+	 * rare condition arises.
+	 */
+	if (DOINGSOFTDEP(ap->a_vp))
+		if (softdep_prealloc(ap->a_vp, MNT_NOWAIT) != 0)
+			return (ENOSPC);
+	if (ump->um_fstype == UFS1)
+		return (ffs_reallocblks_ufs1(ap));
+	return (ffs_reallocblks_ufs2(ap));
+}
+	
+static int
+ffs_reallocblks_ufs1(ap)
+	struct vop_reallocblks_args /* {
+		struct vnode *a_vp;
+		struct cluster_save *a_buflist;
+	} */ *ap;
+{
+	struct fs *fs;
+	struct inode *ip;
+	struct vnode *vp;
+	struct buf *sbp, *ebp;
+	ufs1_daddr_t *bap, *sbap, *ebap;
+	struct cluster_save *buflist;
+	struct ufsmount *ump;
+	ufs_lbn_t start_lbn, end_lbn;
+	ufs1_daddr_t soff, newblk, blkno;
+	ufs2_daddr_t pref;
+	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
+	int i, cg, len, start_lvl, end_lvl, ssize;
+
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+	/*
+	 * If we are not tracking block clusters or if we have less than 4%
+	 * free blocks left, then do not attempt to cluster. Running with
+	 * less than 5% free block reserve is not recommended and those that
+	 * choose to do so do not expect to have good file layout.
+	 */
+	if (fs->fs_contigsumsize <= 0 || freespace(fs, 4) < 0)
+		return (ENOSPC);
+	buflist = ap->a_buflist;
+	len = buflist->bs_nchildren;
+	start_lbn = buflist->bs_children[0]->b_lblkno;
+	end_lbn = start_lbn + len - 1;
+#ifdef INVARIANTS
+	for (i = 0; i < len; i++)
+		if (!ffs_checkblk(ip,
+		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+			panic("ffs_reallocblks: unallocated block 1");
+	for (i = 1; i < len; i++)
+		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
+			panic("ffs_reallocblks: non-logical cluster");
+	blkno = buflist->bs_children[0]->b_blkno;
+	ssize = fsbtodb(fs, fs->fs_frag);
+	for (i = 1; i < len - 1; i++)
+		if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
+			panic("ffs_reallocblks: non-physical cluster %d", i);
+#endif
+	/*
+	 * If the cluster crosses the boundary for the first indirect
+	 * block, leave space for the indirect block. Indirect blocks
+	 * are initially laid out in a position after the last direct
+	 * block. Block reallocation would usually destroy locality by
+	 * moving the indirect block out of the way to make room for
+	 * data blocks if we didn't compensate here. We should also do
+	 * this for other indirect block boundaries, but it is only
+	 * important for the first one.
+	 */
+	if (start_lbn < NDADDR && end_lbn >= NDADDR)
+		return (ENOSPC);
+	/*
+	 * If the latest allocation is in a new cylinder group, assume that
+	 * the filesystem has decided to move and do not force it back to
+	 * the previous cylinder group.
+	 */
+	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
+	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
+		return (ENOSPC);
+	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
+	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
+		return (ENOSPC);
+	/*
+	 * Get the starting offset and block map for the first block.
+	 */
+	if (start_lvl == 0) {
+		sbap = &ip->i_din1->di_db[0];
+		soff = start_lbn;
+	} else {
+		idp = &start_ap[start_lvl - 1];
+		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
+			brelse(sbp);
+			return (ENOSPC);
+		}
+		sbap = (ufs1_daddr_t *)sbp->b_data;
+		soff = idp->in_off;
+	}
+	/*
+	 * If the block range spans two block maps, get the second map.
+	 */
+	ebap = NULL;
+	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
+		ssize = len;
+	} else {
+#ifdef INVARIANTS
+		if (start_lvl > 0 &&
+		    start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
+			panic("ffs_reallocblk: start == end");
+#endif
+		ssize = len - (idp->in_off + 1);
+		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
+			goto fail;
+		ebap = (ufs1_daddr_t *)ebp->b_data;
+	}
+	/*
+	 * Find the preferred location for the cluster. If we have not
+	 * previously failed at this endeavor, then follow our standard
+	 * preference calculation. If we have failed at it, then pick up
+	 * where we last ended our search.
+	 */
+	UFS_LOCK(ump);
+	if (ip->i_nextclustercg == -1)
+		pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap);
+	else
+		pref = cgdata(fs, ip->i_nextclustercg);
+	/*
+	 * Search the block map looking for an allocation of the desired size.
+	 * To avoid wasting too much time, we limit the number of cylinder
+	 * groups that we will search.
+	 */
+	cg = dtog(fs, pref);
+	for (i = min(maxclustersearch, fs->fs_ncg); i > 0; i--) {
+		if ((newblk = ffs_clusteralloc(ip, cg, pref, len)) != 0)
+			break;
+		cg += 1;
+		if (cg >= fs->fs_ncg)
+			cg = 0;
+	}
+	/*
+	 * If we have failed in our search, record where we gave up for
+	 * next time. Otherwise, fall back to our usual search citerion.
+	 */
+	if (newblk == 0) {
+		ip->i_nextclustercg = cg;
+		UFS_UNLOCK(ump);
+		goto fail;
+	}
+	ip->i_nextclustercg = -1;
+	/*
+	 * We have found a new contiguous block.
+	 *
+	 * First we have to replace the old block pointers with the new
+	 * block pointers in the inode and indirect blocks associated
+	 * with the file.
+	 */
+#ifdef DEBUG
+	if (prtrealloc)
+		printf("realloc: ino %ju, lbns %jd-%jd\n\told:",
+		    (uintmax_t)ip->i_number,
+		    (intmax_t)start_lbn, (intmax_t)end_lbn);
+#endif
+	blkno = newblk;
+	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
+		if (i == ssize) {
+			bap = ebap;
+			soff = -i;
+		}
+#ifdef INVARIANTS
+		if (!ffs_checkblk(ip,
+		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+			panic("ffs_reallocblks: unallocated block 2");
+		if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
+			panic("ffs_reallocblks: alloc mismatch");
+#endif
+#ifdef DEBUG
+		if (prtrealloc)
+			printf(" %d,", *bap);
+#endif
+		if (DOINGSOFTDEP(vp)) {
+			if (sbap == &ip->i_din1->di_db[0] && i < ssize)
+				softdep_setup_allocdirect(ip, start_lbn + i,
+				    blkno, *bap, fs->fs_bsize, fs->fs_bsize,
+				    buflist->bs_children[i]);
+			else
+				softdep_setup_allocindir_page(ip, start_lbn + i,
+				    i < ssize ? sbp : ebp, soff + i, blkno,
+				    *bap, buflist->bs_children[i]);
+		}
+		*bap++ = blkno;
+	}
+	/*
+	 * Next we must write out the modified inode and indirect blocks.
+	 * For strict correctness, the writes should be synchronous since
+	 * the old block values may have been written to disk. In practise
+	 * they are almost never written, but if we are concerned about
+	 * strict correctness, the `doasyncfree' flag should be set to zero.
+	 *
+	 * The test on `doasyncfree' should be changed to test a flag
+	 * that shows whether the associated buffers and inodes have
+	 * been written. The flag should be set when the cluster is
+	 * started and cleared whenever the buffer or inode is flushed.
+	 * We can then check below to see if it is set, and do the
+	 * synchronous write only when it has been cleared.
+	 */
+	if (sbap != &ip->i_din1->di_db[0]) {
+		if (doasyncfree)
+			bdwrite(sbp);
+		else
+			bwrite(sbp);
+	} else {
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		if (!doasyncfree)
+			ffs_update(vp, 1);
+	}
+	if (ssize < len) {
+		if (doasyncfree)
+			bdwrite(ebp);
+		else
+			bwrite(ebp);
+	}
+	/*
+	 * Last, free the old blocks and assign the new blocks to the buffers.
+	 */
+#ifdef DEBUG
+	if (prtrealloc)
+		printf("\n\tnew:");
+#endif
+	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
+		if (!DOINGSOFTDEP(vp))
+			ffs_blkfree(ump, fs, ump->um_devvp,
+			    dbtofsb(fs, buflist->bs_children[i]->b_blkno),
+			    fs->fs_bsize, ip->i_number, vp->v_type, NULL);
+		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
+#ifdef INVARIANTS
+		if (!ffs_checkblk(ip,
+		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+			panic("ffs_reallocblks: unallocated block 3");
+#endif
+#ifdef DEBUG
+		if (prtrealloc)
+			printf(" %d,", blkno);
+#endif
+	}
+#ifdef DEBUG
+	if (prtrealloc) {
+		prtrealloc--;
+		printf("\n");
+	}
+#endif
+	return (0);
+
+fail:
+	if (ssize < len)
+		brelse(ebp);
+	if (sbap != &ip->i_din1->di_db[0])
+		brelse(sbp);
+	return (ENOSPC);
+}
+
+static int
+ffs_reallocblks_ufs2(ap)
+	struct vop_reallocblks_args /* {
+		struct vnode *a_vp;
+		struct cluster_save *a_buflist;
+	} */ *ap;
+{
+	struct fs *fs;
+	struct inode *ip;
+	struct vnode *vp;
+	struct buf *sbp, *ebp;
+	ufs2_daddr_t *bap, *sbap, *ebap;
+	struct cluster_save *buflist;
+	struct ufsmount *ump;
+	ufs_lbn_t start_lbn, end_lbn;
+	ufs2_daddr_t soff, newblk, blkno, pref;
+	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
+	int i, cg, len, start_lvl, end_lvl, ssize;
+
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+	/*
+	 * If we are not tracking block clusters or if we have less than 4%
+	 * free blocks left, then do not attempt to cluster. Running with
+	 * less than 5% free block reserve is not recommended and those that
+	 * choose to do so do not expect to have good file layout.
+	 */
+	if (fs->fs_contigsumsize <= 0 || freespace(fs, 4) < 0)
+		return (ENOSPC);
+	buflist = ap->a_buflist;
+	len = buflist->bs_nchildren;
+	start_lbn = buflist->bs_children[0]->b_lblkno;
+	end_lbn = start_lbn + len - 1;
+#ifdef INVARIANTS
+	for (i = 0; i < len; i++)
+		if (!ffs_checkblk(ip,
+		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+			panic("ffs_reallocblks: unallocated block 1");
+	for (i = 1; i < len; i++)
+		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
+			panic("ffs_reallocblks: non-logical cluster");
+	blkno = buflist->bs_children[0]->b_blkno;
+	ssize = fsbtodb(fs, fs->fs_frag);
+	for (i = 1; i < len - 1; i++)
+		if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
+			panic("ffs_reallocblks: non-physical cluster %d", i);
+#endif
+	/*
+	 * If the cluster crosses the boundary for the first indirect
+	 * block, do not move anything in it. Indirect blocks are
+	 * usually initially laid out in a position between the data
+	 * blocks. Block reallocation would usually destroy locality by
+	 * moving the indirect block out of the way to make room for
+	 * data blocks if we didn't compensate here. We should also do
+	 * this for other indirect block boundaries, but it is only
+	 * important for the first one.
+	 */
+	if (start_lbn < NDADDR && end_lbn >= NDADDR)
+		return (ENOSPC);
+	/*
+	 * If the latest allocation is in a new cylinder group, assume that
+	 * the filesystem has decided to move and do not force it back to
+	 * the previous cylinder group.
+	 */
+	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
+	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
+		return (ENOSPC);
+	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
+	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
+		return (ENOSPC);
+	/*
+	 * Get the starting offset and block map for the first block.
+	 */
+	if (start_lvl == 0) {
+		sbap = &ip->i_din2->di_db[0];
+		soff = start_lbn;
+	} else {
+		idp = &start_ap[start_lvl - 1];
+		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
+			brelse(sbp);
+			return (ENOSPC);
+		}
+		sbap = (ufs2_daddr_t *)sbp->b_data;
+		soff = idp->in_off;
+	}
+	/*
+	 * If the block range spans two block maps, get the second map.
+	 */
+	ebap = NULL;
+	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
+		ssize = len;
+	} else {
+#ifdef INVARIANTS
+		if (start_lvl > 0 &&
+		    start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
+			panic("ffs_reallocblk: start == end");
+#endif
+		ssize = len - (idp->in_off + 1);
+		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
+			goto fail;
+		ebap = (ufs2_daddr_t *)ebp->b_data;
+	}
+	/*
+	 * Find the preferred location for the cluster. If we have not
+	 * previously failed at this endeavor, then follow our standard
+	 * preference calculation. If we have failed at it, then pick up
+	 * where we last ended our search.
+	 */
+	UFS_LOCK(ump);
+	if (ip->i_nextclustercg == -1)
+		pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
+	else
+		pref = cgdata(fs, ip->i_nextclustercg);
+	/*
+	 * Search the block map looking for an allocation of the desired size.
+	 * To avoid wasting too much time, we limit the number of cylinder
+	 * groups that we will search.
+	 */
+	cg = dtog(fs, pref);
+	for (i = min(maxclustersearch, fs->fs_ncg); i > 0; i--) {
+		if ((newblk = ffs_clusteralloc(ip, cg, pref, len)) != 0)
+			break;
+		cg += 1;
+		if (cg >= fs->fs_ncg)
+			cg = 0;
+	}
+	/*
+	 * If we have failed in our search, record where we gave up for
+	 * next time. Otherwise, fall back to our usual search citerion.
+	 */
+	if (newblk == 0) {
+		ip->i_nextclustercg = cg;
+		UFS_UNLOCK(ump);
+		goto fail;
+	}
+	ip->i_nextclustercg = -1;
+	/*
+	 * We have found a new contiguous block.
+	 *
+	 * First we have to replace the old block pointers with the new
+	 * block pointers in the inode and indirect blocks associated
+	 * with the file.
+	 */
+#ifdef DEBUG
+	if (prtrealloc)
+		printf("realloc: ino %ju, lbns %jd-%jd\n\told:", (uintmax_t)ip->i_number,
+		    (intmax_t)start_lbn, (intmax_t)end_lbn);
+#endif
+	blkno = newblk;
+	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
+		if (i == ssize) {
+			bap = ebap;
+			soff = -i;
+		}
+#ifdef INVARIANTS
+		if (!ffs_checkblk(ip,
+		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+			panic("ffs_reallocblks: unallocated block 2");
+		if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
+			panic("ffs_reallocblks: alloc mismatch");
+#endif
+#ifdef DEBUG
+		if (prtrealloc)
+			printf(" %jd,", (intmax_t)*bap);
+#endif
+		if (DOINGSOFTDEP(vp)) {
+			if (sbap == &ip->i_din2->di_db[0] && i < ssize)
+				softdep_setup_allocdirect(ip, start_lbn + i,
+				    blkno, *bap, fs->fs_bsize, fs->fs_bsize,
+				    buflist->bs_children[i]);
+			else
+				softdep_setup_allocindir_page(ip, start_lbn + i,
+				    i < ssize ? sbp : ebp, soff + i, blkno,
+				    *bap, buflist->bs_children[i]);
+		}
+		*bap++ = blkno;
+	}
+	/*
+	 * Next we must write out the modified inode and indirect blocks.
+	 * For strict correctness, the writes should be synchronous since
+	 * the old block values may have been written to disk. In practise
+	 * they are almost never written, but if we are concerned about
+	 * strict correctness, the `doasyncfree' flag should be set to zero.
+	 *
+	 * The test on `doasyncfree' should be changed to test a flag
+	 * that shows whether the associated buffers and inodes have
+	 * been written. The flag should be set when the cluster is
+	 * started and cleared whenever the buffer or inode is flushed.
+	 * We can then check below to see if it is set, and do the
+	 * synchronous write only when it has been cleared.
+	 */
+	if (sbap != &ip->i_din2->di_db[0]) {
+		if (doasyncfree)
+			bdwrite(sbp);
+		else
+			bwrite(sbp);
+	} else {
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		if (!doasyncfree)
+			ffs_update(vp, 1);
+	}
+	if (ssize < len) {
+		if (doasyncfree)
+			bdwrite(ebp);
+		else
+			bwrite(ebp);
+	}
+	/*
+	 * Last, free the old blocks and assign the new blocks to the buffers.
+	 */
+#ifdef DEBUG
+	if (prtrealloc)
+		printf("\n\tnew:");
+#endif
+	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
+		if (!DOINGSOFTDEP(vp))
+			ffs_blkfree(ump, fs, ump->um_devvp,
+			    dbtofsb(fs, buflist->bs_children[i]->b_blkno),
+			    fs->fs_bsize, ip->i_number, vp->v_type, NULL);
+		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
+#ifdef INVARIANTS
+		if (!ffs_checkblk(ip,
+		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+			panic("ffs_reallocblks: unallocated block 3");
+#endif
+#ifdef DEBUG
+		if (prtrealloc)
+			printf(" %jd,", (intmax_t)blkno);
+#endif
+	}
+#ifdef DEBUG
+	if (prtrealloc) {
+		prtrealloc--;
+		printf("\n");
+	}
+#endif
+	return (0);
+
+fail:
+	if (ssize < len)
+		brelse(ebp);
+	if (sbap != &ip->i_din2->di_db[0])
+		brelse(sbp);
+	return (ENOSPC);
+}
+
+/*
+ * Allocate an inode in the filesystem.
+ *
+ * If allocating a directory, use ffs_dirpref to select the inode.
+ * If allocating in a directory, the following hierarchy is followed:
+ *   1) allocate the preferred inode.
+ *   2) allocate an inode in the same cylinder group.
+ *   3) quadradically rehash into other cylinder groups, until an
+ *      available inode is located.
+ * If no inode preference is given the following hierarchy is used
+ * to allocate an inode:
+ *   1) allocate an inode in cylinder group 0.
+ *   2) quadradically rehash into other cylinder groups, until an
+ *      available inode is located.
+ */
+int
+ffs_valloc(pvp, mode, cred, vpp)
+	struct vnode *pvp;
+	int mode;
+	struct ucred *cred;
+	struct vnode **vpp;
+{
+	struct inode *pip;
+	struct fs *fs;
+	struct inode *ip;
+	struct timespec ts;
+	struct ufsmount *ump;
+	ino_t ino, ipref;
+	u_int cg;
+	int error, error1, reclaimed;
+	static struct timeval lastfail;
+	static int curfail;
+
+	*vpp = NULL;
+	pip = VTOI(pvp);
+	ump = ITOUMP(pip);
+	fs = ump->um_fs;
+
+	UFS_LOCK(ump);
+	reclaimed = 0;
+retry:
+	if (fs->fs_cstotal.cs_nifree == 0)
+		goto noinodes;
+
+	if ((mode & IFMT) == IFDIR)
+		ipref = ffs_dirpref(pip);
+	else
+		ipref = pip->i_number;
+	if (ipref >= fs->fs_ncg * fs->fs_ipg)
+		ipref = 0;
+	cg = ino_to_cg(fs, ipref);
+	/*
+	 * Track number of dirs created one after another
+	 * in a same cg without intervening by files.
+	 */
+	if ((mode & IFMT) == IFDIR) {
+		if (fs->fs_contigdirs[cg] < 255)
+			fs->fs_contigdirs[cg]++;
+	} else {
+		if (fs->fs_contigdirs[cg] > 0)
+			fs->fs_contigdirs[cg]--;
+	}
+	ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0,
+					(allocfcn_t *)ffs_nodealloccg);
+	if (ino == 0)
+		goto noinodes;
+	error = ffs_vget(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
+	if (error) {
+		error1 = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp,
+		    FFSV_FORCEINSMQ);
+		ffs_vfree(pvp, ino, mode);
+		if (error1 == 0) {
+			ip = VTOI(*vpp);
+			if (ip->i_mode)
+				goto dup_alloc;
+			ip->i_flag |= IN_MODIFIED;
+			vput(*vpp);
+		}
+		return (error);
+	}
+	ip = VTOI(*vpp);
+	if (ip->i_mode) {
+dup_alloc:
+		printf("mode = 0%o, inum = %ju, fs = %s\n",
+		    ip->i_mode, (uintmax_t)ip->i_number, fs->fs_fsmnt);
+		panic("ffs_valloc: dup alloc");
+	}
+	if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) {  /* XXX */
+		printf("free inode %s/%lu had %ld blocks\n",
+		    fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks));
+		DIP_SET(ip, i_blocks, 0);
+	}
+	ip->i_flags = 0;
+	DIP_SET(ip, i_flags, 0);
+	/*
+	 * Set up a new generation number for this inode.
+	 */
+	while (ip->i_gen == 0 || ++ip->i_gen == 0)
+		ip->i_gen = arc4random();
+	DIP_SET(ip, i_gen, ip->i_gen);
+	if (fs->fs_magic == FS_UFS2_MAGIC) {
+		vfs_timestamp(&ts);
+		ip->i_din2->di_birthtime = ts.tv_sec;
+		ip->i_din2->di_birthnsec = ts.tv_nsec;
+	}
+	ufs_prepare_reclaim(*vpp);
+	ip->i_flag = 0;
+	(*vpp)->v_vflag = 0;
+	(*vpp)->v_type = VNON;
+	if (fs->fs_magic == FS_UFS2_MAGIC) {
+		(*vpp)->v_op = &ffs_vnodeops2;
+		ip->i_flag |= IN_UFS2;
+	} else {
+		(*vpp)->v_op = &ffs_vnodeops1;
+	}
+	return (0);
+noinodes:
+	if (reclaimed == 0) {
+		reclaimed = 1;
+		softdep_request_cleanup(fs, pvp, cred, FLUSH_INODES_WAIT);
+		goto retry;
+	}
+	UFS_UNLOCK(ump);
+	if (ppsratecheck(&lastfail, &curfail, 1)) {
+		ffs_fserr(fs, pip->i_number, "out of inodes");
+		uprintf("\n%s: create/symlink failed, no inodes free\n",
+		    fs->fs_fsmnt);
+	}
+	return (ENOSPC);
+}
+
+/*
+ * Find a cylinder group to place a directory.
+ *
+ * The policy implemented by this algorithm is to allocate a
+ * directory inode in the same cylinder group as its parent
+ * directory, but also to reserve space for its files inodes
+ * and data. Restrict the number of directories which may be
+ * allocated one after another in the same cylinder group
+ * without intervening allocation of files.
+ *
+ * If we allocate a first level directory then force allocation
+ * in another cylinder group.
+ */
+static ino_t
+ffs_dirpref(pip)
+	struct inode *pip;
+{
+	struct fs *fs;
+	int cg, prefcg, dirsize, cgsize;
+	u_int avgifree, avgbfree, avgndir, curdirsize;
+	u_int minifree, minbfree, maxndir;
+	u_int mincg, minndir;
+	u_int maxcontigdirs;
+
+	mtx_assert(UFS_MTX(ITOUMP(pip)), MA_OWNED);
+	fs = ITOFS(pip);
+
+	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
+	avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
+	avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
+
+	/*
+	 * Force allocation in another cg if creating a first level dir.
+	 */
+	ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref");
+	if (ITOV(pip)->v_vflag & VV_ROOT) {
+		prefcg = arc4random() % fs->fs_ncg;
+		mincg = prefcg;
+		minndir = fs->fs_ipg;
+		for (cg = prefcg; cg < fs->fs_ncg; cg++)
+			if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
+			    fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
+			    fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+				mincg = cg;
+				minndir = fs->fs_cs(fs, cg).cs_ndir;
+			}
+		for (cg = 0; cg < prefcg; cg++)
+			if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
+			    fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
+			    fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+				mincg = cg;
+				minndir = fs->fs_cs(fs, cg).cs_ndir;
+			}
+		return ((ino_t)(fs->fs_ipg * mincg));
+	}
+
+	/*
+	 * Count various limits which used for
+	 * optimal allocation of a directory inode.
+	 */
+	maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
+	minifree = avgifree - avgifree / 4;
+	if (minifree < 1)
+		minifree = 1;
+	minbfree = avgbfree - avgbfree / 4;
+	if (minbfree < 1)
+		minbfree = 1;
+	cgsize = fs->fs_fsize * fs->fs_fpg;
+	dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir;
+	curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0;
+	if (dirsize < curdirsize)
+		dirsize = curdirsize;
+	if (dirsize <= 0)
+		maxcontigdirs = 0;		/* dirsize overflowed */
+	else
+		maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255);
+	if (fs->fs_avgfpdir > 0)
+		maxcontigdirs = min(maxcontigdirs,
+				    fs->fs_ipg / fs->fs_avgfpdir);
+	if (maxcontigdirs == 0)
+		maxcontigdirs = 1;
+
+	/*
+	 * Limit number of dirs in one cg and reserve space for 
+	 * regular files, but only if we have no deficit in
+	 * inodes or space.
+	 *
+	 * We are trying to find a suitable cylinder group nearby
+	 * our preferred cylinder group to place a new directory.
+	 * We scan from our preferred cylinder group forward looking
+	 * for a cylinder group that meets our criterion. If we get
+	 * to the final cylinder group and do not find anything,
+	 * we start scanning forwards from the beginning of the
+	 * filesystem. While it might seem sensible to start scanning
+	 * backwards or even to alternate looking forward and backward,
+	 * this approach fails badly when the filesystem is nearly full.
+	 * Specifically, we first search all the areas that have no space
+	 * and finally try the one preceding that. We repeat this on
+	 * every request and in the case of the final block end up
+	 * searching the entire filesystem. By jumping to the front
+	 * of the filesystem, our future forward searches always look
+	 * in new cylinder groups so finds every possible block after
+	 * one pass over the filesystem.
+	 */
+	prefcg = ino_to_cg(fs, pip->i_number);
+	for (cg = prefcg; cg < fs->fs_ncg; cg++)
+		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
+		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
+		    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
+			if (fs->fs_contigdirs[cg] < maxcontigdirs)
+				return ((ino_t)(fs->fs_ipg * cg));
+		}
+	for (cg = 0; cg < prefcg; cg++)
+		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
+		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
+		    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
+			if (fs->fs_contigdirs[cg] < maxcontigdirs)
+				return ((ino_t)(fs->fs_ipg * cg));
+		}
+	/*
+	 * This is a backstop when we have deficit in space.
+	 */
+	for (cg = prefcg; cg < fs->fs_ncg; cg++)
+		if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
+			return ((ino_t)(fs->fs_ipg * cg));
+	for (cg = 0; cg < prefcg; cg++)
+		if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
+			break;
+	return ((ino_t)(fs->fs_ipg * cg));
+}
+
+/*
+ * Select the desired position for the next block in a file.  The file is
+ * logically divided into sections. The first section is composed of the
+ * direct blocks and the next fs_maxbpg blocks. Each additional section
+ * contains fs_maxbpg blocks.
+ *
+ * If no blocks have been allocated in the first section, the policy is to
+ * request a block in the same cylinder group as the inode that describes
+ * the file. The first indirect is allocated immediately following the last
+ * direct block and the data blocks for the first indirect immediately
+ * follow it.
+ *
+ * If no blocks have been allocated in any other section, the indirect 
+ * block(s) are allocated in the same cylinder group as its inode in an
+ * area reserved immediately following the inode blocks. The policy for
+ * the data blocks is to place them in a cylinder group with a greater than
+ * average number of free blocks. An appropriate cylinder group is found
+ * by using a rotor that sweeps the cylinder groups. When a new group of
+ * blocks is needed, the sweep begins in the cylinder group following the
+ * cylinder group from which the previous allocation was made. The sweep
+ * continues until a cylinder group with greater than the average number
+ * of free blocks is found. If the allocation is for the first block in an
+ * indirect block or the previous block is a hole, then the information on
+ * the previous allocation is unavailable; here a best guess is made based
+ * on the logical block number being allocated.
+ *
+ * If a section is already partially allocated, the policy is to
+ * allocate blocks contiguously within the section if possible.
+ */
+ufs2_daddr_t
+ffs_blkpref_ufs1(ip, lbn, indx, bap)
+	struct inode *ip;
+	ufs_lbn_t lbn;
+	int indx;
+	ufs1_daddr_t *bap;
+{
+	struct fs *fs;
+	u_int cg, inocg;
+	u_int avgbfree, startcg;
+	ufs2_daddr_t pref;
+
+	KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap"));
+	mtx_assert(UFS_MTX(ITOUMP(ip)), MA_OWNED);
+	fs = ITOFS(ip);
+	/*
+	 * Allocation of indirect blocks is indicated by passing negative
+	 * values in indx: -1 for single indirect, -2 for double indirect,
+	 * -3 for triple indirect. As noted below, we attempt to allocate
+	 * the first indirect inline with the file data. For all later
+	 * indirect blocks, the data is often allocated in other cylinder
+	 * groups. However to speed random file access and to speed up
+	 * fsck, the filesystem reserves the first fs_metaspace blocks
+	 * (typically half of fs_minfree) of the data area of each cylinder
+	 * group to hold these later indirect blocks.
+	 */
+	inocg = ino_to_cg(fs, ip->i_number);
+	if (indx < 0) {
+		/*
+		 * Our preference for indirect blocks is the zone at the
+		 * beginning of the inode's cylinder group data area that
+		 * we try to reserve for indirect blocks.
+		 */
+		pref = cgmeta(fs, inocg);
+		/*
+		 * If we are allocating the first indirect block, try to
+		 * place it immediately following the last direct block.
+		 */
+		if (indx == -1 && lbn < NDADDR + NINDIR(fs) &&
+		    ip->i_din1->di_db[NDADDR - 1] != 0)
+			pref = ip->i_din1->di_db[NDADDR - 1] + fs->fs_frag;
+		return (pref);
+	}
+	/*
+	 * If we are allocating the first data block in the first indirect
+	 * block and the indirect has been allocated in the data block area,
+	 * try to place it immediately following the indirect block.
+	 */
+	if (lbn == NDADDR) {
+		pref = ip->i_din1->di_ib[0];
+		if (pref != 0 && pref >= cgdata(fs, inocg) &&
+		    pref < cgbase(fs, inocg + 1))
+			return (pref + fs->fs_frag);
+	}
+	/*
+	 * If we are at the beginning of a file, or we have already allocated
+	 * the maximum number of blocks per cylinder group, or we do not
+	 * have a block allocated immediately preceding us, then we need
+	 * to decide where to start allocating new blocks.
+	 */
+	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
+		/*
+		 * If we are allocating a directory data block, we want
+		 * to place it in the metadata area.
+		 */
+		if ((ip->i_mode & IFMT) == IFDIR)
+			return (cgmeta(fs, inocg));
+		/*
+		 * Until we fill all the direct and all the first indirect's
+		 * blocks, we try to allocate in the data area of the inode's
+		 * cylinder group.
+		 */
+		if (lbn < NDADDR + NINDIR(fs))
+			return (cgdata(fs, inocg));
+		/*
+		 * Find a cylinder with greater than average number of
+		 * unused data blocks.
+		 */
+		if (indx == 0 || bap[indx - 1] == 0)
+			startcg = inocg + lbn / fs->fs_maxbpg;
+		else
+			startcg = dtog(fs, bap[indx - 1]) + 1;
+		startcg %= fs->fs_ncg;
+		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
+		for (cg = startcg; cg < fs->fs_ncg; cg++)
+			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+				fs->fs_cgrotor = cg;
+				return (cgdata(fs, cg));
+			}
+		for (cg = 0; cg <= startcg; cg++)
+			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+				fs->fs_cgrotor = cg;
+				return (cgdata(fs, cg));
+			}
+		return (0);
+	}
+	/*
+	 * Otherwise, we just always try to lay things out contiguously.
+	 */
+	return (bap[indx - 1] + fs->fs_frag);
+}
+
+/*
+ * Same as above, but for UFS2
+ */
+ufs2_daddr_t
+ffs_blkpref_ufs2(ip, lbn, indx, bap)
+	struct inode *ip;
+	ufs_lbn_t lbn;
+	int indx;
+	ufs2_daddr_t *bap;
+{
+	struct fs *fs;
+	u_int cg, inocg;
+	u_int avgbfree, startcg;
+	ufs2_daddr_t pref;
+
+	KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap"));
+	mtx_assert(UFS_MTX(ITOUMP(ip)), MA_OWNED);
+	fs = ITOFS(ip);
+	/*
+	 * Allocation of indirect blocks is indicated by passing negative
+	 * values in indx: -1 for single indirect, -2 for double indirect,
+	 * -3 for triple indirect. As noted below, we attempt to allocate
+	 * the first indirect inline with the file data. For all later
+	 * indirect blocks, the data is often allocated in other cylinder
+	 * groups. However to speed random file access and to speed up
+	 * fsck, the filesystem reserves the first fs_metaspace blocks
+	 * (typically half of fs_minfree) of the data area of each cylinder
+	 * group to hold these later indirect blocks.
+	 */
+	inocg = ino_to_cg(fs, ip->i_number);
+	if (indx < 0) {
+		/*
+		 * Our preference for indirect blocks is the zone at the
+		 * beginning of the inode's cylinder group data area that
+		 * we try to reserve for indirect blocks.
+		 */
+		pref = cgmeta(fs, inocg);
+		/*
+		 * If we are allocating the first indirect block, try to
+		 * place it immediately following the last direct block.
+		 */
+		if (indx == -1 && lbn < NDADDR + NINDIR(fs) &&
+		    ip->i_din2->di_db[NDADDR - 1] != 0)
+			pref = ip->i_din2->di_db[NDADDR - 1] + fs->fs_frag;
+		return (pref);
+	}
+	/*
+	 * If we are allocating the first data block in the first indirect
+	 * block and the indirect has been allocated in the data block area,
+	 * try to place it immediately following the indirect block.
+	 */
+	if (lbn == NDADDR) {
+		pref = ip->i_din2->di_ib[0];
+		if (pref != 0 && pref >= cgdata(fs, inocg) &&
+		    pref < cgbase(fs, inocg + 1))
+			return (pref + fs->fs_frag);
+	}
+	/*
+	 * If we are at the beginning of a file, or we have already allocated
+	 * the maximum number of blocks per cylinder group, or we do not
+	 * have a block allocated immediately preceding us, then we need
+	 * to decide where to start allocating new blocks.
+	 */
+	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
+		/*
+		 * If we are allocating a directory data block, we want
+		 * to place it in the metadata area.
+		 */
+		if ((ip->i_mode & IFMT) == IFDIR)
+			return (cgmeta(fs, inocg));
+		/*
+		 * Until we fill all the direct and all the first indirect's
+		 * blocks, we try to allocate in the data area of the inode's
+		 * cylinder group.
+		 */
+		if (lbn < NDADDR + NINDIR(fs))
+			return (cgdata(fs, inocg));
+		/*
+		 * Find a cylinder with greater than average number of
+		 * unused data blocks.
+		 */
+		if (indx == 0 || bap[indx - 1] == 0)
+			startcg = inocg + lbn / fs->fs_maxbpg;
+		else
+			startcg = dtog(fs, bap[indx - 1]) + 1;
+		startcg %= fs->fs_ncg;
+		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
+		for (cg = startcg; cg < fs->fs_ncg; cg++)
+			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+				fs->fs_cgrotor = cg;
+				return (cgdata(fs, cg));
+			}
+		for (cg = 0; cg <= startcg; cg++)
+			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+				fs->fs_cgrotor = cg;
+				return (cgdata(fs, cg));
+			}
+		return (0);
+	}
+	/*
+	 * Otherwise, we just always try to lay things out contiguously.
+	 */
+	return (bap[indx - 1] + fs->fs_frag);
+}
+
+/*
+ * Implement the cylinder overflow algorithm.
+ *
+ * The policy implemented by this algorithm is:
+ *   1) allocate the block in its requested cylinder group.
+ *   2) quadradically rehash on the cylinder group number.
+ *   3) brute force search for a free block.
+ *
+ * Must be called with the UFS lock held.  Will release the lock on success
+ * and return with it held on failure.
+ */
+/*VARARGS5*/
+static ufs2_daddr_t
+ffs_hashalloc(ip, cg, pref, size, rsize, allocator)
+	struct inode *ip;
+	u_int cg;
+	ufs2_daddr_t pref;
+	int size;	/* Search size for data blocks, mode for inodes */
+	int rsize;	/* Real allocated size. */
+	allocfcn_t *allocator;
+{
+	struct fs *fs;
+	ufs2_daddr_t result;
+	u_int i, icg = cg;
+
+	mtx_assert(UFS_MTX(ITOUMP(ip)), MA_OWNED);
+#ifdef INVARIANTS
+	if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
+		panic("ffs_hashalloc: allocation on suspended filesystem");
+#endif
+	fs = ITOFS(ip);
+	/*
+	 * 1: preferred cylinder group
+	 */
+	result = (*allocator)(ip, cg, pref, size, rsize);
+	if (result)
+		return (result);
+	/*
+	 * 2: quadratic rehash
+	 */
+	for (i = 1; i < fs->fs_ncg; i *= 2) {
+		cg += i;
+		if (cg >= fs->fs_ncg)
+			cg -= fs->fs_ncg;
+		result = (*allocator)(ip, cg, 0, size, rsize);
+		if (result)
+			return (result);
+	}
+	/*
+	 * 3: brute force search
+	 * Note that we start at i == 2, since 0 was checked initially,
+	 * and 1 is always checked in the quadratic rehash.
+	 */
+	cg = (icg + 2) % fs->fs_ncg;
+	for (i = 2; i < fs->fs_ncg; i++) {
+		result = (*allocator)(ip, cg, 0, size, rsize);
+		if (result)
+			return (result);
+		cg++;
+		if (cg == fs->fs_ncg)
+			cg = 0;
+	}
+	return (0);
+}
+
+/*
+ * Determine whether a fragment can be extended.
+ *
+ * Check to see if the necessary fragments are available, and
+ * if they are, allocate them.
+ */
+static ufs2_daddr_t
+ffs_fragextend(ip, cg, bprev, osize, nsize)
+	struct inode *ip;
+	u_int cg;
+	ufs2_daddr_t bprev;
+	int osize, nsize;
+{
+	struct fs *fs;
+	struct cg *cgp;
+	struct buf *bp;
+	struct ufsmount *ump;
+	int nffree;
+	long bno;
+	int frags, bbase;
+	int i, error;
+	u_int8_t *blksfree;
+
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
+		return (0);
+	frags = numfrags(fs, nsize);
+	bbase = fragnum(fs, bprev);
+	if (bbase > fragnum(fs, (bprev + frags - 1))) {
+		/* cannot extend across a block boundary */
+		return (0);
+	}
+	UFS_UNLOCK(ump);
+	error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)),
+	    (int)fs->fs_cgsize, NOCRED, &bp);
+	if (error)
+		goto fail;
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp))
+		goto fail;
+	bp->b_xflags |= BX_BKGRDWRITE;
+	cgp->cg_old_time = cgp->cg_time = time_second;
+	bno = dtogd(fs, bprev);
+	blksfree = cg_blksfree(cgp);
+	for (i = numfrags(fs, osize); i < frags; i++)
+		if (isclr(blksfree, bno + i))
+			goto fail;
+	/*
+	 * the current fragment can be extended
+	 * deduct the count on fragment being extended into
+	 * increase the count on the remaining fragment (if any)
+	 * allocate the extended piece
+	 */
+	for (i = frags; i < fs->fs_frag - bbase; i++)
+		if (isclr(blksfree, bno + i))
+			break;
+	cgp->cg_frsum[i - numfrags(fs, osize)]--;
+	if (i != frags)
+		cgp->cg_frsum[i - frags]++;
+	for (i = numfrags(fs, osize), nffree = 0; i < frags; i++) {
+		clrbit(blksfree, bno + i);
+		cgp->cg_cs.cs_nffree--;
+		nffree++;
+	}
+	UFS_LOCK(ump);
+	fs->fs_cstotal.cs_nffree -= nffree;
+	fs->fs_cs(fs, cg).cs_nffree -= nffree;
+	fs->fs_fmod = 1;
+	ACTIVECLEAR(fs, cg);
+	UFS_UNLOCK(ump);
+	if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev,
+		    frags, numfrags(fs, osize));
+	bdwrite(bp);
+	return (bprev);
+
+fail:
+	brelse(bp);
+	UFS_LOCK(ump);
+	return (0);
+
+}
+
+/*
+ * Determine whether a block can be allocated.
+ *
+ * Check to see if a block of the appropriate size is available,
+ * and if it is, allocate it.
+ */
+static ufs2_daddr_t
+ffs_alloccg(ip, cg, bpref, size, rsize)
+	struct inode *ip;
+	u_int cg;
+	ufs2_daddr_t bpref;
+	int size;
+	int rsize;
+{
+	struct fs *fs;
+	struct cg *cgp;
+	struct buf *bp;
+	struct ufsmount *ump;
+	ufs1_daddr_t bno;
+	ufs2_daddr_t blkno;
+	int i, allocsiz, error, frags;
+	u_int8_t *blksfree;
+
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
+		return (0);
+	UFS_UNLOCK(ump);
+	error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)),
+	    (int)fs->fs_cgsize, NOCRED, &bp);
+	if (error)
+		goto fail;
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp) ||
+	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize))
+		goto fail;
+	bp->b_xflags |= BX_BKGRDWRITE;
+	cgp->cg_old_time = cgp->cg_time = time_second;
+	if (size == fs->fs_bsize) {
+		UFS_LOCK(ump);
+		blkno = ffs_alloccgblk(ip, bp, bpref, rsize);
+		ACTIVECLEAR(fs, cg);
+		UFS_UNLOCK(ump);
+		bdwrite(bp);
+		return (blkno);
+	}
+	/*
+	 * check to see if any fragments are already available
+	 * allocsiz is the size which will be allocated, hacking
+	 * it down to a smaller size if necessary
+	 */
+	blksfree = cg_blksfree(cgp);
+	frags = numfrags(fs, size);
+	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
+		if (cgp->cg_frsum[allocsiz] != 0)
+			break;
+	if (allocsiz == fs->fs_frag) {
+		/*
+		 * no fragments were available, so a block will be
+		 * allocated, and hacked up
+		 */
+		if (cgp->cg_cs.cs_nbfree == 0)
+			goto fail;
+		UFS_LOCK(ump);
+		blkno = ffs_alloccgblk(ip, bp, bpref, rsize);
+		ACTIVECLEAR(fs, cg);
+		UFS_UNLOCK(ump);
+		bdwrite(bp);
+		return (blkno);
+	}
+	KASSERT(size == rsize,
+	    ("ffs_alloccg: size(%d) != rsize(%d)", size, rsize));
+	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
+	if (bno < 0)
+		goto fail;
+	for (i = 0; i < frags; i++)
+		clrbit(blksfree, bno + i);
+	cgp->cg_cs.cs_nffree -= frags;
+	cgp->cg_frsum[allocsiz]--;
+	if (frags != allocsiz)
+		cgp->cg_frsum[allocsiz - frags]++;
+	UFS_LOCK(ump);
+	fs->fs_cstotal.cs_nffree -= frags;
+	fs->fs_cs(fs, cg).cs_nffree -= frags;
+	fs->fs_fmod = 1;
+	blkno = cgbase(fs, cg) + bno;
+	ACTIVECLEAR(fs, cg);
+	UFS_UNLOCK(ump);
+	if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, frags, 0);
+	bdwrite(bp);
+	return (blkno);
+
+fail:
+	brelse(bp);
+	UFS_LOCK(ump);
+	return (0);
+}
+
+/*
+ * Allocate a block in a cylinder group.
+ *
+ * This algorithm implements the following policy:
+ *   1) allocate the requested block.
+ *   2) allocate a rotationally optimal block in the same cylinder.
+ *   3) allocate the next available block on the block rotor for the
+ *      specified cylinder group.
+ * Note that this routine only allocates fs_bsize blocks; these
+ * blocks may be fragmented by the routine that allocates them.
+ */
+static ufs2_daddr_t
+ffs_alloccgblk(ip, bp, bpref, size)
+	struct inode *ip;
+	struct buf *bp;
+	ufs2_daddr_t bpref;
+	int size;
+{
+	struct fs *fs;
+	struct cg *cgp;
+	struct ufsmount *ump;
+	ufs1_daddr_t bno;
+	ufs2_daddr_t blkno;
+	u_int8_t *blksfree;
+	int i, cgbpref;
+
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+	mtx_assert(UFS_MTX(ump), MA_OWNED);
+	cgp = (struct cg *)bp->b_data;
+	blksfree = cg_blksfree(cgp);
+	if (bpref == 0) {
+		bpref = cgbase(fs, cgp->cg_cgx) + cgp->cg_rotor + fs->fs_frag;
+	} else if ((cgbpref = dtog(fs, bpref)) != cgp->cg_cgx) {
+		/* map bpref to correct zone in this cg */
+		if (bpref < cgdata(fs, cgbpref))
+			bpref = cgmeta(fs, cgp->cg_cgx);
+		else
+			bpref = cgdata(fs, cgp->cg_cgx);
+	}
+	/*
+	 * if the requested block is available, use it
+	 */
+	bno = dtogd(fs, blknum(fs, bpref));
+	if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
+		goto gotit;
+	/*
+	 * Take the next available block in this cylinder group.
+	 */
+	bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
+	if (bno < 0)
+		return (0);
+	/* Update cg_rotor only if allocated from the data zone */
+	if (bno >= dtogd(fs, cgdata(fs, cgp->cg_cgx)))
+		cgp->cg_rotor = bno;
+gotit:
+	blkno = fragstoblks(fs, bno);
+	ffs_clrblock(fs, blksfree, (long)blkno);
+	ffs_clusteracct(fs, cgp, blkno, -1);
+	cgp->cg_cs.cs_nbfree--;
+	fs->fs_cstotal.cs_nbfree--;
+	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
+	fs->fs_fmod = 1;
+	blkno = cgbase(fs, cgp->cg_cgx) + bno;
+	/*
+	 * If the caller didn't want the whole block free the frags here.
+	 */
+	size = numfrags(fs, size);
+	if (size != fs->fs_frag) {
+		bno = dtogd(fs, blkno);
+		for (i = size; i < fs->fs_frag; i++)
+			setbit(blksfree, bno + i);
+		i = fs->fs_frag - size;
+		cgp->cg_cs.cs_nffree += i;
+		fs->fs_cstotal.cs_nffree += i;
+		fs->fs_cs(fs, cgp->cg_cgx).cs_nffree += i;
+		fs->fs_fmod = 1;
+		cgp->cg_frsum[i]++;
+	}
+	/* XXX Fixme. */
+	UFS_UNLOCK(ump);
+	if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno,
+		    size, 0);
+	UFS_LOCK(ump);
+	return (blkno);
+}
+
+/*
+ * Determine whether a cluster can be allocated.
+ *
+ * We do not currently check for optimal rotational layout if there
+ * are multiple choices in the same cylinder group. Instead we just
+ * take the first one that we find following bpref.
+ */
+static ufs2_daddr_t
+ffs_clusteralloc(ip, cg, bpref, len)
+	struct inode *ip;
+	u_int cg;
+	ufs2_daddr_t bpref;
+	int len;
+{
+	struct fs *fs;
+	struct cg *cgp;
+	struct buf *bp;
+	struct ufsmount *ump;
+	int i, run, bit, map, got;
+	ufs2_daddr_t bno;
+	u_char *mapp;
+	int32_t *lp;
+	u_int8_t *blksfree;
+
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+	if (fs->fs_maxcluster[cg] < len)
+		return (0);
+	UFS_UNLOCK(ump);
+	if (bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
+	    NOCRED, &bp))
+		goto fail_lock;
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp))
+		goto fail_lock;
+	bp->b_xflags |= BX_BKGRDWRITE;
+	/*
+	 * Check to see if a cluster of the needed size (or bigger) is
+	 * available in this cylinder group.
+	 */
+	lp = &cg_clustersum(cgp)[len];
+	for (i = len; i <= fs->fs_contigsumsize; i++)
+		if (*lp++ > 0)
+			break;
+	if (i > fs->fs_contigsumsize) {
+		/*
+		 * This is the first time looking for a cluster in this
+		 * cylinder group. Update the cluster summary information
+		 * to reflect the true maximum sized cluster so that
+		 * future cluster allocation requests can avoid reading
+		 * the cylinder group map only to find no clusters.
+		 */
+		lp = &cg_clustersum(cgp)[len - 1];
+		for (i = len - 1; i > 0; i--)
+			if (*lp-- > 0)
+				break;
+		UFS_LOCK(ump);
+		fs->fs_maxcluster[cg] = i;
+		goto fail;
+	}
+	/*
+	 * Search the cluster map to find a big enough cluster.
+	 * We take the first one that we find, even if it is larger
+	 * than we need as we prefer to get one close to the previous
+	 * block allocation. We do not search before the current
+	 * preference point as we do not want to allocate a block
+	 * that is allocated before the previous one (as we will
+	 * then have to wait for another pass of the elevator
+	 * algorithm before it will be read). We prefer to fail and
+	 * be recalled to try an allocation in the next cylinder group.
+	 */
+	if (dtog(fs, bpref) != cg)
+		bpref = cgdata(fs, cg);
+	else
+		bpref = blknum(fs, bpref);
+	bpref = fragstoblks(fs, dtogd(fs, bpref));
+	mapp = &cg_clustersfree(cgp)[bpref / NBBY];
+	map = *mapp++;
+	bit = 1 << (bpref % NBBY);
+	for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
+		if ((map & bit) == 0) {
+			run = 0;
+		} else {
+			run++;
+			if (run == len)
+				break;
+		}
+		if ((got & (NBBY - 1)) != (NBBY - 1)) {
+			bit <<= 1;
+		} else {
+			map = *mapp++;
+			bit = 1;
+		}
+	}
+	if (got >= cgp->cg_nclusterblks)
+		goto fail_lock;
+	/*
+	 * Allocate the cluster that we have found.
+	 */
+	blksfree = cg_blksfree(cgp);
+	for (i = 1; i <= len; i++)
+		if (!ffs_isblock(fs, blksfree, got - run + i))
+			panic("ffs_clusteralloc: map mismatch");
+	bno = cgbase(fs, cg) + blkstofrags(fs, got - run + 1);
+	if (dtog(fs, bno) != cg)
+		panic("ffs_clusteralloc: allocated out of group");
+	len = blkstofrags(fs, len);
+	UFS_LOCK(ump);
+	for (i = 0; i < len; i += fs->fs_frag)
+		if (ffs_alloccgblk(ip, bp, bno + i, fs->fs_bsize) != bno + i)
+			panic("ffs_clusteralloc: lost block");
+	ACTIVECLEAR(fs, cg);
+	UFS_UNLOCK(ump);
+	bdwrite(bp);
+	return (bno);
+
+fail_lock:
+	UFS_LOCK(ump);
+fail:
+	brelse(bp);
+	return (0);
+}
+
+static inline struct buf *
+getinobuf(struct inode *ip, u_int cg, u_int32_t cginoblk, int gbflags)
+{
+	struct fs *fs;
+
+	fs = ITOFS(ip);
+	return (getblk(ITODEVVP(ip), fsbtodb(fs, ino_to_fsba(fs,
+	    cg * fs->fs_ipg + cginoblk)), (int)fs->fs_bsize, 0, 0,
+	    gbflags));
+}
+
+/*
+ * Synchronous inode initialization is needed only when barrier writes do not
+ * work as advertised, and will impose a heavy cost on file creation in a newly
+ * created filesystem.
+ */
+static int doasyncinodeinit = 1;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncinodeinit, CTLFLAG_RWTUN,
+    &doasyncinodeinit, 0,
+    "Perform inode block initialization using asynchronous writes");
+
+/*
+ * Determine whether an inode can be allocated.
+ *
+ * Check to see if an inode is available, and if it is,
+ * allocate it using the following policy:
+ *   1) allocate the requested inode.
+ *   2) allocate the next available inode after the requested
+ *      inode in the specified cylinder group.
+ */
+static ufs2_daddr_t
+ffs_nodealloccg(ip, cg, ipref, mode, unused)
+	struct inode *ip;
+	u_int cg;
+	ufs2_daddr_t ipref;
+	int mode;
+	int unused;
+{
+	struct fs *fs;
+	struct cg *cgp;
+	struct buf *bp, *ibp;
+	struct ufsmount *ump;
+	u_int8_t *inosused, *loc;
+	struct ufs2_dinode *dp2;
+	int error, start, len, i;
+	u_int32_t old_initediblk;
+
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+check_nifree:
+	if (fs->fs_cs(fs, cg).cs_nifree == 0)
+		return (0);
+	UFS_UNLOCK(ump);
+	error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		UFS_LOCK(ump);
+		return (0);
+	}
+	cgp = (struct cg *)bp->b_data;
+restart:
+	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
+		brelse(bp);
+		UFS_LOCK(ump);
+		return (0);
+	}
+	bp->b_xflags |= BX_BKGRDWRITE;
+	inosused = cg_inosused(cgp);
+	if (ipref) {
+		ipref %= fs->fs_ipg;
+		if (isclr(inosused, ipref))
+			goto gotit;
+	}
+	start = cgp->cg_irotor / NBBY;
+	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
+	loc = memcchr(&inosused[start], 0xff, len);
+	if (loc == NULL) {
+		len = start + 1;
+		start = 0;
+		loc = memcchr(&inosused[start], 0xff, len);
+		if (loc == NULL) {
+			printf("cg = %d, irotor = %ld, fs = %s\n",
+			    cg, (long)cgp->cg_irotor, fs->fs_fsmnt);
+			panic("ffs_nodealloccg: map corrupted");
+			/* NOTREACHED */
+		}
+	}
+	ipref = (loc - inosused) * NBBY + ffs(~*loc) - 1;
+gotit:
+	/*
+	 * Check to see if we need to initialize more inodes.
+	 */
+	if (fs->fs_magic == FS_UFS2_MAGIC &&
+	    ipref + INOPB(fs) > cgp->cg_initediblk &&
+	    cgp->cg_initediblk < cgp->cg_niblk) {
+		old_initediblk = cgp->cg_initediblk;
+
+		/*
+		 * Free the cylinder group lock before writing the
+		 * initialized inode block.  Entering the
+		 * babarrierwrite() with the cylinder group lock
+		 * causes lock order violation between the lock and
+		 * snaplk.
+		 *
+		 * Another thread can decide to initialize the same
+		 * inode block, but whichever thread first gets the
+		 * cylinder group lock after writing the newly
+		 * allocated inode block will update it and the other
+		 * will realize that it has lost and leave the
+		 * cylinder group unchanged.
+		 */
+		ibp = getinobuf(ip, cg, old_initediblk, GB_LOCK_NOWAIT);
+		brelse(bp);
+		if (ibp == NULL) {
+			/*
+			 * The inode block buffer is already owned by
+			 * another thread, which must initialize it.
+			 * Wait on the buffer to allow another thread
+			 * to finish the updates, with dropped cg
+			 * buffer lock, then retry.
+			 */
+			ibp = getinobuf(ip, cg, old_initediblk, 0);
+			brelse(ibp);
+			UFS_LOCK(ump);
+			goto check_nifree;
+		}
+		bzero(ibp->b_data, (int)fs->fs_bsize);
+		dp2 = (struct ufs2_dinode *)(ibp->b_data);
+		for (i = 0; i < INOPB(fs); i++) {
+			while (dp2->di_gen == 0)
+				dp2->di_gen = arc4random();
+			dp2++;
+		}
+
+		/*
+		 * Rather than adding a soft updates dependency to ensure
+		 * that the new inode block is written before it is claimed
+		 * by the cylinder group map, we just do a barrier write
+		 * here. The barrier write will ensure that the inode block
+		 * gets written before the updated cylinder group map can be
+		 * written. The barrier write should only slow down bulk
+		 * loading of newly created filesystems.
+		 */
+		if (doasyncinodeinit)
+			babarrierwrite(ibp);
+		else
+			bwrite(ibp);
+
+		/*
+		 * After the inode block is written, try to update the
+		 * cg initediblk pointer.  If another thread beat us
+		 * to it, then leave it unchanged as the other thread
+		 * has already set it correctly.
+		 */
+		error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		    (int)fs->fs_cgsize, NOCRED, &bp);
+		UFS_LOCK(ump);
+		ACTIVECLEAR(fs, cg);
+		UFS_UNLOCK(ump);
+		if (error != 0) {
+			brelse(bp);
+			return (error);
+		}
+		cgp = (struct cg *)bp->b_data;
+		if (cgp->cg_initediblk == old_initediblk)
+			cgp->cg_initediblk += INOPB(fs);
+		goto restart;
+	}
+	cgp->cg_old_time = cgp->cg_time = time_second;
+	cgp->cg_irotor = ipref;
+	UFS_LOCK(ump);
+	ACTIVECLEAR(fs, cg);
+	setbit(inosused, ipref);
+	cgp->cg_cs.cs_nifree--;
+	fs->fs_cstotal.cs_nifree--;
+	fs->fs_cs(fs, cg).cs_nifree--;
+	fs->fs_fmod = 1;
+	if ((mode & IFMT) == IFDIR) {
+		cgp->cg_cs.cs_ndir++;
+		fs->fs_cstotal.cs_ndir++;
+		fs->fs_cs(fs, cg).cs_ndir++;
+	}
+	UFS_UNLOCK(ump);
+	if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref, mode);
+	bdwrite(bp);
+	return ((ino_t)(cg * fs->fs_ipg + ipref));
+}
+
+/*
+ * Free a block or fragment.
+ *
+ * The specified block or fragment is placed back in the
+ * free map. If a fragment is deallocated, a possible
+ * block reassembly is checked.
+ */
+static void
+ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
+	struct ufsmount *ump;
+	struct fs *fs;
+	struct vnode *devvp;
+	ufs2_daddr_t bno;
+	long size;
+	ino_t inum;
+	struct workhead *dephd;
+{
+	struct mount *mp;
+	struct cg *cgp;
+	struct buf *bp;
+	ufs1_daddr_t fragno, cgbno;
+	ufs2_daddr_t cgblkno;
+	int i, blk, frags, bbase;
+	u_int cg;
+	u_int8_t *blksfree;
+	struct cdev *dev;
+
+	cg = dtog(fs, bno);
+	if (devvp->v_type == VREG) {
+		/* devvp is a snapshot */
+		MPASS(devvp->v_mount->mnt_data == ump);
+		dev = ump->um_devvp->v_rdev;
+		cgblkno = fragstoblks(fs, cgtod(fs, cg));
+	} else if (devvp->v_type == VCHR) {
+		/* devvp is a normal disk device */
+		dev = devvp->v_rdev;
+		cgblkno = fsbtodb(fs, cgtod(fs, cg));
+		ASSERT_VOP_LOCKED(devvp, "ffs_blkfree_cg");
+	} else
+		return;
+#ifdef INVARIANTS
+	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
+	    fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
+		printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n",
+		    devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize,
+		    size, fs->fs_fsmnt);
+		panic("ffs_blkfree_cg: bad size");
+	}
+#endif
+	if ((u_int)bno >= fs->fs_size) {
+		printf("bad block %jd, ino %lu\n", (intmax_t)bno,
+		    (u_long)inum);
+		ffs_fserr(fs, inum, "bad block");
+		return;
+	}
+	if (bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp)) {
+		brelse(bp);
+		return;
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return;
+	}
+	bp->b_xflags |= BX_BKGRDWRITE;
+	cgp->cg_old_time = cgp->cg_time = time_second;
+	cgbno = dtogd(fs, bno);
+	blksfree = cg_blksfree(cgp);
+	UFS_LOCK(ump);
+	if (size == fs->fs_bsize) {
+		fragno = fragstoblks(fs, cgbno);
+		if (!ffs_isfreeblock(fs, blksfree, fragno)) {
+			if (devvp->v_type == VREG) {
+				UFS_UNLOCK(ump);
+				/* devvp is a snapshot */
+				brelse(bp);
+				return;
+			}
+			printf("dev = %s, block = %jd, fs = %s\n",
+			    devtoname(dev), (intmax_t)bno, fs->fs_fsmnt);
+			panic("ffs_blkfree_cg: freeing free block");
+		}
+		ffs_setblock(fs, blksfree, fragno);
+		ffs_clusteracct(fs, cgp, fragno, 1);
+		cgp->cg_cs.cs_nbfree++;
+		fs->fs_cstotal.cs_nbfree++;
+		fs->fs_cs(fs, cg).cs_nbfree++;
+	} else {
+		bbase = cgbno - fragnum(fs, cgbno);
+		/*
+		 * decrement the counts associated with the old frags
+		 */
+		blk = blkmap(fs, blksfree, bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
+		/*
+		 * deallocate the fragment
+		 */
+		frags = numfrags(fs, size);
+		for (i = 0; i < frags; i++) {
+			if (isset(blksfree, cgbno + i)) {
+				printf("dev = %s, block = %jd, fs = %s\n",
+				    devtoname(dev), (intmax_t)(bno + i),
+				    fs->fs_fsmnt);
+				panic("ffs_blkfree_cg: freeing free frag");
+			}
+			setbit(blksfree, cgbno + i);
+		}
+		cgp->cg_cs.cs_nffree += i;
+		fs->fs_cstotal.cs_nffree += i;
+		fs->fs_cs(fs, cg).cs_nffree += i;
+		/*
+		 * add back in counts associated with the new frags
+		 */
+		blk = blkmap(fs, blksfree, bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
+		/*
+		 * if a complete block has been reassembled, account for it
+		 */
+		fragno = fragstoblks(fs, bbase);
+		if (ffs_isblock(fs, blksfree, fragno)) {
+			cgp->cg_cs.cs_nffree -= fs->fs_frag;
+			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
+			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
+			ffs_clusteracct(fs, cgp, fragno, 1);
+			cgp->cg_cs.cs_nbfree++;
+			fs->fs_cstotal.cs_nbfree++;
+			fs->fs_cs(fs, cg).cs_nbfree++;
+		}
+	}
+	fs->fs_fmod = 1;
+	ACTIVECLEAR(fs, cg);
+	UFS_UNLOCK(ump);
+	mp = UFSTOVFS(ump);
+	if (MOUNTEDSOFTDEP(mp) && devvp->v_type == VCHR)
+		softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
+		    numfrags(fs, size), dephd);
+	bdwrite(bp);
+}
+
+struct ffs_blkfree_trim_params {
+	struct task task;
+	struct ufsmount *ump;
+	struct vnode *devvp;
+	ufs2_daddr_t bno;
+	long size;
+	ino_t inum;
+	struct workhead *pdephd;
+	struct workhead dephd;
+};
+
+static void
+ffs_blkfree_trim_task(ctx, pending)
+	void *ctx;
+	int pending;
+{
+	struct ffs_blkfree_trim_params *tp;
+
+	tp = ctx;
+	ffs_blkfree_cg(tp->ump, tp->ump->um_fs, tp->devvp, tp->bno, tp->size,
+	    tp->inum, tp->pdephd);
+	vn_finished_secondary_write(UFSTOVFS(tp->ump));
+	atomic_add_int(&tp->ump->um_trim_inflight, -1);
+	free(tp, M_TEMP);
+}
+
+static void
+ffs_blkfree_trim_completed(bip)
+	struct bio *bip;
+{
+	struct ffs_blkfree_trim_params *tp;
+
+	tp = bip->bio_caller2;
+	g_destroy_bio(bip);
+	TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp);
+	taskqueue_enqueue(tp->ump->um_trim_tq, &tp->task);
+}
+
+void
+ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd)
+	struct ufsmount *ump;
+	struct fs *fs;
+	struct vnode *devvp;
+	ufs2_daddr_t bno;
+	long size;
+	ino_t inum;
+	enum vtype vtype;
+	struct workhead *dephd;
+{
+	struct mount *mp;
+	struct bio *bip;
+	struct ffs_blkfree_trim_params *tp;
+
+	/*
+	 * Check to see if a snapshot wants to claim the block.
+	 * Check that devvp is a normal disk device, not a snapshot,
+	 * it has a snapshot(s) associated with it, and one of the
+	 * snapshots wants to claim the block.
+	 */
+	if (devvp->v_type == VCHR &&
+	    (devvp->v_vflag & VV_COPYONWRITE) &&
+	    ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, dephd)) {
+		return;
+	}
+	/*
+	 * Nothing to delay if TRIM is disabled, or the operation is
+	 * performed on the snapshot.
+	 */
+	if (!ump->um_candelete || devvp->v_type == VREG) {
+		ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd);
+		return;
+	}
+
+	/*
+	 * Postpone the set of the free bit in the cg bitmap until the
+	 * BIO_DELETE is completed.  Otherwise, due to disk queue
+	 * reordering, TRIM might be issued after we reuse the block
+	 * and write some new data into it.
+	 */
+	atomic_add_int(&ump->um_trim_inflight, 1);
+	tp = malloc(sizeof(struct ffs_blkfree_trim_params), M_TEMP, M_WAITOK);
+	tp->ump = ump;
+	tp->devvp = devvp;
+	tp->bno = bno;
+	tp->size = size;
+	tp->inum = inum;
+	if (dephd != NULL) {
+		LIST_INIT(&tp->dephd);
+		LIST_SWAP(dephd, &tp->dephd, worklist, wk_list);
+		tp->pdephd = &tp->dephd;
+	} else
+		tp->pdephd = NULL;
+
+	bip = g_alloc_bio();
+	bip->bio_cmd = BIO_DELETE;
+	bip->bio_offset = dbtob(fsbtodb(fs, bno));
+	bip->bio_done = ffs_blkfree_trim_completed;
+	bip->bio_length = size;
+	bip->bio_caller2 = tp;
+
+	mp = UFSTOVFS(ump);
+	vn_start_secondary_write(NULL, &mp, 0);
+	g_io_request(bip, (struct g_consumer *)devvp->v_bufobj.bo_private);
+}
+
+#ifdef INVARIANTS
+/*
+ * Verify allocation of a block or fragment. Returns true if block or
+ * fragment is allocated, false if it is free.
+ */
+static int
+ffs_checkblk(ip, bno, size)
+	struct inode *ip;
+	ufs2_daddr_t bno;
+	long size;
+{
+	struct fs *fs;
+	struct cg *cgp;
+	struct buf *bp;
+	ufs1_daddr_t cgbno;
+	int i, error, frags, free;
+	u_int8_t *blksfree;
+
+	fs = ITOFS(ip);
+	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+		printf("bsize = %ld, size = %ld, fs = %s\n",
+		    (long)fs->fs_bsize, size, fs->fs_fsmnt);
+		panic("ffs_checkblk: bad size");
+	}
+	if ((u_int)bno >= fs->fs_size)
+		panic("ffs_checkblk: bad block %jd", (intmax_t)bno);
+	error = bread(ITODEVVP(ip), fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error)
+		panic("ffs_checkblk: cg bread failed");
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp))
+		panic("ffs_checkblk: cg magic mismatch");
+	bp->b_xflags |= BX_BKGRDWRITE;
+	blksfree = cg_blksfree(cgp);
+	cgbno = dtogd(fs, bno);
+	if (size == fs->fs_bsize) {
+		free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno));
+	} else {
+		frags = numfrags(fs, size);
+		for (free = 0, i = 0; i < frags; i++)
+			if (isset(blksfree, cgbno + i))
+				free++;
+		if (free != 0 && free != frags)
+			panic("ffs_checkblk: partially free fragment");
+	}
+	brelse(bp);
+	return (!free);
+}
+#endif /* INVARIANTS */
+
+/*
+ * Free an inode.
+ */
+int
+ffs_vfree(pvp, ino, mode)
+	struct vnode *pvp;
+	ino_t ino;
+	int mode;
+{
+	struct ufsmount *ump;
+	struct inode *ip;
+
+	if (DOINGSOFTDEP(pvp)) {
+		softdep_freefile(pvp, ino, mode);
+		return (0);
+	}
+	ip = VTOI(pvp);
+	ump = VFSTOUFS(pvp->v_mount);
+	return (ffs_freefile(ump, ump->um_fs, ump->um_devvp, ino, mode, NULL));
+}
+
+/*
+ * Do the actual free operation.
+ * The specified inode is placed back in the free map.
+ */
+int
+ffs_freefile(ump, fs, devvp, ino, mode, wkhd)
+	struct ufsmount *ump;
+	struct fs *fs;
+	struct vnode *devvp;
+	ino_t ino;
+	int mode;
+	struct workhead *wkhd;
+{
+	struct cg *cgp;
+	struct buf *bp;
+	ufs2_daddr_t cgbno;
+	int error;
+	u_int cg;
+	u_int8_t *inosused;
+	struct cdev *dev;
+
+	cg = ino_to_cg(fs, ino);
+	if (devvp->v_type == VREG) {
+		/* devvp is a snapshot */
+		MPASS(devvp->v_mount->mnt_data == ump);
+		dev = ump->um_devvp->v_rdev;
+		cgbno = fragstoblks(fs, cgtod(fs, cg));
+	} else if (devvp->v_type == VCHR) {
+		/* devvp is a normal disk device */
+		dev = devvp->v_rdev;
+		cgbno = fsbtodb(fs, cgtod(fs, cg));
+	} else {
+		bp = NULL;
+		return (0);
+	}
+	if (ino >= fs->fs_ipg * fs->fs_ncg)
+		panic("ffs_freefile: range: dev = %s, ino = %ju, fs = %s",
+		    devtoname(dev), (uintmax_t)ino, fs->fs_fsmnt);
+	if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) {
+		brelse(bp);
+		return (error);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return (0);
+	}
+	bp->b_xflags |= BX_BKGRDWRITE;
+	cgp->cg_old_time = cgp->cg_time = time_second;
+	inosused = cg_inosused(cgp);
+	ino %= fs->fs_ipg;
+	if (isclr(inosused, ino)) {
+		printf("dev = %s, ino = %ju, fs = %s\n", devtoname(dev),
+		    (uintmax_t)(ino + cg * fs->fs_ipg), fs->fs_fsmnt);
+		if (fs->fs_ronly == 0)
+			panic("ffs_freefile: freeing free inode");
+	}
+	clrbit(inosused, ino);
+	if (ino < cgp->cg_irotor)
+		cgp->cg_irotor = ino;
+	cgp->cg_cs.cs_nifree++;
+	UFS_LOCK(ump);
+	fs->fs_cstotal.cs_nifree++;
+	fs->fs_cs(fs, cg).cs_nifree++;
+	if ((mode & IFMT) == IFDIR) {
+		cgp->cg_cs.cs_ndir--;
+		fs->fs_cstotal.cs_ndir--;
+		fs->fs_cs(fs, cg).cs_ndir--;
+	}
+	fs->fs_fmod = 1;
+	ACTIVECLEAR(fs, cg);
+	UFS_UNLOCK(ump);
+	if (MOUNTEDSOFTDEP(UFSTOVFS(ump)) && devvp->v_type == VCHR)
+		softdep_setup_inofree(UFSTOVFS(ump), bp,
+		    ino + cg * fs->fs_ipg, wkhd);
+	bdwrite(bp);
+	return (0);
+}
+
+/*
+ * Check to see if a file is free.
+ */
+int
+ffs_checkfreefile(fs, devvp, ino)
+	struct fs *fs;
+	struct vnode *devvp;
+	ino_t ino;
+{
+	struct cg *cgp;
+	struct buf *bp;
+	ufs2_daddr_t cgbno;
+	int ret;
+	u_int cg;
+	u_int8_t *inosused;
+
+	cg = ino_to_cg(fs, ino);
+	if (devvp->v_type == VREG) {
+		/* devvp is a snapshot */
+		cgbno = fragstoblks(fs, cgtod(fs, cg));
+	} else if (devvp->v_type == VCHR) {
+		/* devvp is a normal disk device */
+		cgbno = fsbtodb(fs, cgtod(fs, cg));
+	} else {
+		return (1);
+	}
+	if (ino >= fs->fs_ipg * fs->fs_ncg)
+		return (1);
+	if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) {
+		brelse(bp);
+		return (1);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return (1);
+	}
+	inosused = cg_inosused(cgp);
+	ino %= fs->fs_ipg;
+	ret = isclr(inosused, ino);
+	brelse(bp);
+	return (ret);
+}
+
+/*
+ * Find a block of the specified size in the specified cylinder group.
+ *
+ * It is a panic if a request is made to find a block if none are
+ * available.
+ */
+static ufs1_daddr_t
+ffs_mapsearch(fs, cgp, bpref, allocsiz)
+	struct fs *fs;
+	struct cg *cgp;
+	ufs2_daddr_t bpref;
+	int allocsiz;
+{
+	ufs1_daddr_t bno;
+	int start, len, loc, i;
+	int blk, field, subfield, pos;
+	u_int8_t *blksfree;
+
+	/*
+	 * find the fragment by searching through the free block
+	 * map for an appropriate bit pattern
+	 */
+	if (bpref)
+		start = dtogd(fs, bpref) / NBBY;
+	else
+		start = cgp->cg_frotor / NBBY;
+	blksfree = cg_blksfree(cgp);
+	len = howmany(fs->fs_fpg, NBBY) - start;
+	loc = scanc((u_int)len, (u_char *)&blksfree[start],
+		fragtbl[fs->fs_frag],
+		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
+	if (loc == 0) {
+		len = start + 1;
+		start = 0;
+		loc = scanc((u_int)len, (u_char *)&blksfree[0],
+			fragtbl[fs->fs_frag],
+			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
+		if (loc == 0) {
+			printf("start = %d, len = %d, fs = %s\n",
+			    start, len, fs->fs_fsmnt);
+			panic("ffs_alloccg: map corrupted");
+			/* NOTREACHED */
+		}
+	}
+	bno = (start + len - loc) * NBBY;
+	cgp->cg_frotor = bno;
+	/*
+	 * found the byte in the map
+	 * sift through the bits to find the selected frag
+	 */
+	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
+		blk = blkmap(fs, blksfree, bno);
+		blk <<= 1;
+		field = around[allocsiz];
+		subfield = inside[allocsiz];
+		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
+			if ((blk & field) == subfield)
+				return (bno + pos);
+			field <<= 1;
+			subfield <<= 1;
+		}
+	}
+	printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt);
+	panic("ffs_alloccg: block not in map");
+	return (-1);
+}
+
+/*
+ * Fserr prints the name of a filesystem with an error diagnostic.
+ *
+ * The form of the error message is:
+ *	fs: error message
+ */
+void
+ffs_fserr(fs, inum, cp)
+	struct fs *fs;
+	ino_t inum;
+	char *cp;
+{
+	struct thread *td = curthread;	/* XXX */
+	struct proc *p = td->td_proc;
+
+	log(LOG_ERR, "pid %d (%s), uid %d inumber %ju on %s: %s\n",
+	    p->p_pid, p->p_comm, td->td_ucred->cr_uid, (uintmax_t)inum,
+	    fs->fs_fsmnt, cp);
+}
+
+/*
+ * This function provides the capability for the fsck program to
+ * update an active filesystem. Fourteen operations are provided:
+ *
+ * adjrefcnt(inode, amt) - adjusts the reference count on the
+ *	specified inode by the specified amount. Under normal
+ *	operation the count should always go down. Decrementing
+ *	the count to zero will cause the inode to be freed.
+ * adjblkcnt(inode, amt) - adjust the number of blocks used by the
+ *	inode by the specified amount.
+ * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) -
+ *	adjust the superblock summary.
+ * freedirs(inode, count) - directory inodes [inode..inode + count - 1]
+ *	are marked as free. Inodes should never have to be marked
+ *	as in use.
+ * freefiles(inode, count) - file inodes [inode..inode + count - 1]
+ *	are marked as free. Inodes should never have to be marked
+ *	as in use.
+ * freeblks(blockno, size) - blocks [blockno..blockno + size - 1]
+ *	are marked as free. Blocks should never have to be marked
+ *	as in use.
+ * setflags(flags, set/clear) - the fs_flags field has the specified
+ *	flags set (second parameter +1) or cleared (second parameter -1).
+ * setcwd(dirinode) - set the current directory to dirinode in the
+ *	filesystem associated with the snapshot.
+ * setdotdot(oldvalue, newvalue) - Verify that the inode number for ".."
+ *	in the current directory is oldvalue then change it to newvalue.
+ * unlink(nameptr, oldvalue) - Verify that the inode number associated
+ *	with nameptr in the current directory is oldvalue then unlink it.
+ *
+ * The following functions may only be used on a quiescent filesystem
+ * by the soft updates journal. They are not safe to be run on an active
+ * filesystem.
+ *
+ * setinode(inode, dip) - the specified disk inode is replaced with the
+ *	contents pointed to by dip.
+ * setbufoutput(fd, flags) - output associated with the specified file
+ *	descriptor (which must reference the character device supporting
+ *	the filesystem) switches from using physio to running through the
+ *	buffer cache when flags is set to 1. The descriptor reverts to
+ *	physio for output when flags is set to zero.
+ */
+
+static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS);
+
+SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT,
+	0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Adjust Inode Used Blocks Count");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NDIR, adjndir, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Adjust number of directories");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NBFREE, adjnbfree, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Adjust number of free blocks");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NIFREE, adjnifree, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Adjust number of free inodes");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NFFREE, adjnffree, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Adjust number of free frags");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NUMCLUSTERS, adjnumclusters, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Adjust number of free clusters");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Free Range of Directory Inodes");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Free Range of File Inodes");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Free Range of Blocks");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Change Filesystem Flags");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_SET_CWD, setcwd, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Set Current Working Directory");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_SET_DOTDOT, setdotdot, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Change Value of .. Entry");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_UNLINK, unlink, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Unlink a Duplicate Name");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_SET_INODE, setinode, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Update an On-Disk Inode");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_SET_BUFOUTPUT, setbufoutput, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Set Buffered Writing for Descriptor");
+
+#define DEBUG 1
+#ifdef DEBUG
+static int fsckcmds = 0;
+SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, "");
+#endif /* DEBUG */
+
+static int buffered_write(struct file *, struct uio *, struct ucred *,
+	int, struct thread *);
+
+static int
+sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
+{
+	struct thread *td = curthread;
+	struct fsck_cmd cmd;
+	struct ufsmount *ump;
+	struct vnode *vp, *dvp, *fdvp;
+	struct inode *ip, *dp;
+	struct mount *mp;
+	struct fs *fs;
+	ufs2_daddr_t blkno;
+	long blkcnt, blksize;
+	struct file *fp, *vfp;
+	cap_rights_t rights;
+	int filetype, error;
+	static struct fileops *origops, bufferedops;
+
+	if (req->newlen > sizeof cmd)
+		return (EBADRPC);
+	if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0)
+		return (error);
+	if (cmd.version != FFS_CMD_VERSION)
+		return (ERPCMISMATCH);
+	if ((error = getvnode(td, cmd.handle,
+	    cap_rights_init(&rights, CAP_FSCK), &fp)) != 0)
+		return (error);
+	vp = fp->f_data;
+	if (vp->v_type != VREG && vp->v_type != VDIR) {
+		fdrop(fp, td);
+		return (EINVAL);
+	}
+	vn_start_write(vp, &mp, V_WAIT);
+	if (mp == NULL ||
+	    strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) {
+		vn_finished_write(mp);
+		fdrop(fp, td);
+		return (EINVAL);
+	}
+	ump = VFSTOUFS(mp);
+	if ((mp->mnt_flag & MNT_RDONLY) &&
+	    ump->um_fsckpid != td->td_proc->p_pid) {
+		vn_finished_write(mp);
+		fdrop(fp, td);
+		return (EROFS);
+	}
+	fs = ump->um_fs;
+	filetype = IFREG;
+
+	switch (oidp->oid_number) {
+
+	case FFS_SET_FLAGS:
+#ifdef DEBUG
+		if (fsckcmds)
+			printf("%s: %s flags\n", mp->mnt_stat.f_mntonname,
+			    cmd.size > 0 ? "set" : "clear");
+#endif /* DEBUG */
+		if (cmd.size > 0)
+			fs->fs_flags |= (long)cmd.value;
+		else
+			fs->fs_flags &= ~(long)cmd.value;
+		break;
+
+	case FFS_ADJ_REFCNT:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: adjust inode %jd link count by %jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
+			    (intmax_t)cmd.size);
+		}
+#endif /* DEBUG */
+		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
+			break;
+		ip = VTOI(vp);
+		ip->i_nlink += cmd.size;
+		DIP_SET(ip, i_nlink, ip->i_nlink);
+		ip->i_effnlink += cmd.size;
+		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
+		error = ffs_update(vp, 1);
+		if (DOINGSOFTDEP(vp))
+			softdep_change_linkcnt(ip);
+		vput(vp);
+		break;
+
+	case FFS_ADJ_BLKCNT:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: adjust inode %jd block count by %jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
+			    (intmax_t)cmd.size);
+		}
+#endif /* DEBUG */
+		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
+			break;
+		ip = VTOI(vp);
+		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size);
+		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
+		error = ffs_update(vp, 1);
+		vput(vp);
+		break;
+
+	case FFS_DIR_FREE:
+		filetype = IFDIR;
+		/* fall through */
+
+	case FFS_FILE_FREE:
+#ifdef DEBUG
+		if (fsckcmds) {
+			if (cmd.size == 1)
+				printf("%s: free %s inode %ju\n",
+				    mp->mnt_stat.f_mntonname,
+				    filetype == IFDIR ? "directory" : "file",
+				    (uintmax_t)cmd.value);
+			else
+				printf("%s: free %s inodes %ju-%ju\n",
+				    mp->mnt_stat.f_mntonname,
+				    filetype == IFDIR ? "directory" : "file",
+				    (uintmax_t)cmd.value,
+				    (uintmax_t)(cmd.value + cmd.size - 1));
+		}
+#endif /* DEBUG */
+		while (cmd.size > 0) {
+			if ((error = ffs_freefile(ump, fs, ump->um_devvp,
+			    cmd.value, filetype, NULL)))
+				break;
+			cmd.size -= 1;
+			cmd.value += 1;
+		}
+		break;
+
+	case FFS_BLK_FREE:
+#ifdef DEBUG
+		if (fsckcmds) {
+			if (cmd.size == 1)
+				printf("%s: free block %jd\n",
+				    mp->mnt_stat.f_mntonname,
+				    (intmax_t)cmd.value);
+			else
+				printf("%s: free blocks %jd-%jd\n",
+				    mp->mnt_stat.f_mntonname, 
+				    (intmax_t)cmd.value,
+				    (intmax_t)cmd.value + cmd.size - 1);
+		}
+#endif /* DEBUG */
+		blkno = cmd.value;
+		blkcnt = cmd.size;
+		blksize = fs->fs_frag - (blkno % fs->fs_frag);
+		while (blkcnt > 0) {
+			if (blksize > blkcnt)
+				blksize = blkcnt;
+			ffs_blkfree(ump, fs, ump->um_devvp, blkno,
+			    blksize * fs->fs_fsize, ROOTINO, VDIR, NULL);
+			blkno += blksize;
+			blkcnt -= blksize;
+			blksize = fs->fs_frag;
+		}
+		break;
+
+	/*
+	 * Adjust superblock summaries.  fsck(8) is expected to
+	 * submit deltas when necessary.
+	 */
+	case FFS_ADJ_NDIR:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: adjust number of directories by %jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
+		}
+#endif /* DEBUG */
+		fs->fs_cstotal.cs_ndir += cmd.value;
+		break;
+
+	case FFS_ADJ_NBFREE:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: adjust number of free blocks by %+jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
+		}
+#endif /* DEBUG */
+		fs->fs_cstotal.cs_nbfree += cmd.value;
+		break;
+
+	case FFS_ADJ_NIFREE:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: adjust number of free inodes by %+jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
+		}
+#endif /* DEBUG */
+		fs->fs_cstotal.cs_nifree += cmd.value;
+		break;
+
+	case FFS_ADJ_NFFREE:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: adjust number of free frags by %+jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
+		}
+#endif /* DEBUG */
+		fs->fs_cstotal.cs_nffree += cmd.value;
+		break;
+
+	case FFS_ADJ_NUMCLUSTERS:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: adjust number of free clusters by %+jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
+		}
+#endif /* DEBUG */
+		fs->fs_cstotal.cs_numclusters += cmd.value;
+		break;
+
+	case FFS_SET_CWD:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: set current directory to inode %jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
+		}
+#endif /* DEBUG */
+		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_SHARED, &vp)))
+			break;
+		AUDIT_ARG_VNODE1(vp);
+		if ((error = change_dir(vp, td)) != 0) {
+			vput(vp);
+			break;
+		}
+		VOP_UNLOCK(vp, 0);
+		pwd_chdir(td, vp);
+		break;
+
+	case FFS_SET_DOTDOT:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: change .. in cwd from %jd to %jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
+			    (intmax_t)cmd.size);
+		}
+#endif /* DEBUG */
+		/*
+		 * First we have to get and lock the parent directory
+		 * to which ".." points.
+		 */
+		error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &fdvp);
+		if (error)
+			break;
+		/*
+		 * Now we get and lock the child directory containing "..".
+		 */
+		FILEDESC_SLOCK(td->td_proc->p_fd);
+		dvp = td->td_proc->p_fd->fd_cdir;
+		FILEDESC_SUNLOCK(td->td_proc->p_fd);
+		if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
+			vput(fdvp);
+			break;
+		}
+		dp = VTOI(dvp);
+		dp->i_offset = 12;	/* XXX mastertemplate.dot_reclen */
+		error = ufs_dirrewrite(dp, VTOI(fdvp), (ino_t)cmd.size,
+		    DT_DIR, 0);
+		cache_purge(fdvp);
+		cache_purge(dvp);
+		vput(dvp);
+		vput(fdvp);
+		break;
+
+	case FFS_UNLINK:
+#ifdef DEBUG
+		if (fsckcmds) {
+			char buf[32];
+
+			if (copyinstr((char *)(intptr_t)cmd.value, buf,32,NULL))
+				strncpy(buf, "Name_too_long", 32);
+			printf("%s: unlink %s (inode %jd)\n",
+			    mp->mnt_stat.f_mntonname, buf, (intmax_t)cmd.size);
+		}
+#endif /* DEBUG */
+		/*
+		 * kern_unlinkat will do its own start/finish writes and
+		 * they do not nest, so drop ours here. Setting mp == NULL
+		 * indicates that vn_finished_write is not needed down below.
+		 */
+		vn_finished_write(mp);
+		mp = NULL;
+		error = kern_unlinkat(td, AT_FDCWD, (char *)(intptr_t)cmd.value,
+		    UIO_USERSPACE, (ino_t)cmd.size);
+		break;
+
+	case FFS_SET_INODE:
+		if (ump->um_fsckpid != td->td_proc->p_pid) {
+			error = EPERM;
+			break;
+		}
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: update inode %jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
+		}
+#endif /* DEBUG */
+		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
+			break;
+		AUDIT_ARG_VNODE1(vp);
+		ip = VTOI(vp);
+		if (I_IS_UFS1(ip))
+			error = copyin((void *)(intptr_t)cmd.size, ip->i_din1,
+			    sizeof(struct ufs1_dinode));
+		else
+			error = copyin((void *)(intptr_t)cmd.size, ip->i_din2,
+			    sizeof(struct ufs2_dinode));
+		if (error) {
+			vput(vp);
+			break;
+		}
+		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
+		error = ffs_update(vp, 1);
+		vput(vp);
+		break;
+
+	case FFS_SET_BUFOUTPUT:
+		if (ump->um_fsckpid != td->td_proc->p_pid) {
+			error = EPERM;
+			break;
+		}
+		if (ITOUMP(VTOI(vp)) != ump) {
+			error = EINVAL;
+			break;
+		}
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: %s buffered output for descriptor %jd\n",
+			    mp->mnt_stat.f_mntonname,
+			    cmd.size == 1 ? "enable" : "disable",
+			    (intmax_t)cmd.value);
+		}
+#endif /* DEBUG */
+		if ((error = getvnode(td, cmd.value,
+		    cap_rights_init(&rights, CAP_FSCK), &vfp)) != 0)
+			break;
+		if (vfp->f_vnode->v_type != VCHR) {
+			fdrop(vfp, td);
+			error = EINVAL;
+			break;
+		}
+		if (origops == NULL) {
+			origops = vfp->f_ops;
+			bcopy((void *)origops, (void *)&bufferedops,
+			    sizeof(bufferedops));
+			bufferedops.fo_write = buffered_write;
+		}
+		if (cmd.size == 1)
+			atomic_store_rel_ptr((volatile uintptr_t *)&vfp->f_ops,
+			    (uintptr_t)&bufferedops);
+		else
+			atomic_store_rel_ptr((volatile uintptr_t *)&vfp->f_ops,
+			    (uintptr_t)origops);
+		fdrop(vfp, td);
+		break;
+
+	default:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("Invalid request %d from fsck\n",
+			    oidp->oid_number);
+		}
+#endif /* DEBUG */
+		error = EINVAL;
+		break;
+
+	}
+	fdrop(fp, td);
+	vn_finished_write(mp);
+	return (error);
+}
+
+/*
+ * Function to switch a descriptor to use the buffer cache to stage
+ * its I/O. This is needed so that writes to the filesystem device
+ * will give snapshots a chance to copy modified blocks for which it
+ * needs to retain copies.
+ */
+static int
+buffered_write(fp, uio, active_cred, flags, td)
+	struct file *fp;
+	struct uio *uio;
+	struct ucred *active_cred;
+	int flags;
+	struct thread *td;
+{
+	struct vnode *devvp, *vp;
+	struct inode *ip;
+	struct buf *bp;
+	struct fs *fs;
+	struct filedesc *fdp;
+	int error;
+	daddr_t lbn;
+
+	/*
+	 * The devvp is associated with the /dev filesystem. To discover
+	 * the filesystem with which the device is associated, we depend
+	 * on the application setting the current directory to a location
+	 * within the filesystem being written. Yes, this is an ugly hack.
+	 */
+	devvp = fp->f_vnode;
+	if (!vn_isdisk(devvp, NULL))
+		return (EINVAL);
+	fdp = td->td_proc->p_fd;
+	FILEDESC_SLOCK(fdp);
+	vp = fdp->fd_cdir;
+	vref(vp);
+	FILEDESC_SUNLOCK(fdp);
+	vn_lock(vp, LK_SHARED | LK_RETRY);
+	/*
+	 * Check that the current directory vnode indeed belongs to
+	 * UFS before trying to dereference UFS-specific v_data fields.
+	 */
+	if (vp->v_op != &ffs_vnodeops1 && vp->v_op != &ffs_vnodeops2) {
+		vput(vp);
+		return (EINVAL);
+	}
+	ip = VTOI(vp);
+	if (ITODEVVP(ip) != devvp) {
+		vput(vp);
+		return (EINVAL);
+	}
+	fs = ITOFS(ip);
+	vput(vp);
+	foffset_lock_uio(fp, uio, flags);
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+#ifdef DEBUG
+	if (fsckcmds) {
+		printf("%s: buffered write for block %jd\n",
+		    fs->fs_fsmnt, (intmax_t)btodb(uio->uio_offset));
+	}
+#endif /* DEBUG */
+	/*
+	 * All I/O must be contained within a filesystem block, start on
+	 * a fragment boundary, and be a multiple of fragments in length.
+	 */
+	if (uio->uio_resid > fs->fs_bsize - (uio->uio_offset % fs->fs_bsize) ||
+	    fragoff(fs, uio->uio_offset) != 0 ||
+	    fragoff(fs, uio->uio_resid) != 0) {
+		error = EINVAL;
+		goto out;
+	}
+	lbn = numfrags(fs, uio->uio_offset);
+	bp = getblk(devvp, lbn, uio->uio_resid, 0, 0, 0);
+	bp->b_flags |= B_RELBUF;
+	if ((error = uiomove((char *)bp->b_data, uio->uio_resid, uio)) != 0) {
+		brelse(bp);
+		goto out;
+	}
+	error = bwrite(bp);
+out:
+	VOP_UNLOCK(devvp, 0);
+	foffset_unlock_uio(fp, uio, flags | FOF_NEXTOFF);
+	return (error);
+}
diff --git a/Dump/ufs/ffs/ffs_balloc.c b/Dump/ufs/ffs/ffs_balloc.c
new file mode 100644
index 0000000..0aa2f40
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_balloc.c
@@ -0,0 +1,1151 @@
+/*-
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Marshall
+ * Kirk McKusick and Network Associates Laboratories, the Security
+ * Research Division of Network Associates, Inc. under DARPA/SPAWAR
+ * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
+ * research program
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_balloc.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/lock.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/vmmeter.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * Balloc defines the structure of filesystem storage
+ * by allocating the physical blocks on a device given
+ * the inode and the logical block number in a file.
+ * This is the allocation strategy for UFS1. Below is
+ * the allocation strategy for UFS2.
+ */
+int
+ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
+    struct ucred *cred, int flags, struct buf **bpp)
+{
+	struct inode *ip;
+	struct ufs1_dinode *dp;
+	ufs_lbn_t lbn, lastlbn;
+	struct fs *fs;
+	ufs1_daddr_t nb;
+	struct buf *bp, *nbp;
+	struct ufsmount *ump;
+	struct indir indirs[NIADDR + 2];
+	int deallocated, osize, nsize, num, i, error;
+	ufs2_daddr_t newb;
+	ufs1_daddr_t *bap, pref;
+	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
+	ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
+	int unwindidx = -1;
+	int saved_inbdflush;
+	static struct timeval lastfail;
+	static int curfail;
+	int gbflags, reclaimed;
+
+	ip = VTOI(vp);
+	dp = ip->i_din1;
+	fs = ITOFS(ip);
+	ump = ITOUMP(ip);
+	lbn = lblkno(fs, startoffset);
+	size = blkoff(fs, startoffset) + size;
+	reclaimed = 0;
+	if (size > fs->fs_bsize)
+		panic("ffs_balloc_ufs1: blk too big");
+	*bpp = NULL;
+	if (flags & IO_EXT)
+		return (EOPNOTSUPP);
+	if (lbn < 0)
+		return (EFBIG);
+	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
+
+	if (DOINGSOFTDEP(vp))
+		softdep_prealloc(vp, MNT_WAIT);
+	/*
+	 * If the next write will extend the file into a new block,
+	 * and the file is currently composed of a fragment
+	 * this fragment has to be extended to be a full block.
+	 */
+	lastlbn = lblkno(fs, ip->i_size);
+	if (lastlbn < NDADDR && lastlbn < lbn) {
+		nb = lastlbn;
+		osize = blksize(fs, ip, nb);
+		if (osize < fs->fs_bsize && osize > 0) {
+			UFS_LOCK(ump);
+			error = ffs_realloccg(ip, nb, dp->di_db[nb],
+			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
+			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
+			   cred, &bp);
+			if (error)
+				return (error);
+			if (DOINGSOFTDEP(vp))
+				softdep_setup_allocdirect(ip, nb,
+				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
+				    fs->fs_bsize, osize, bp);
+			ip->i_size = smalllblktosize(fs, nb + 1);
+			dp->di_size = ip->i_size;
+			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			if (flags & IO_SYNC)
+				bwrite(bp);
+			else
+				bawrite(bp);
+		}
+	}
+	/*
+	 * The first NDADDR blocks are direct blocks
+	 */
+	if (lbn < NDADDR) {
+		if (flags & BA_METAONLY)
+			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
+		nb = dp->di_db[lbn];
+		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
+			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			bp->b_blkno = fsbtodb(fs, nb);
+			*bpp = bp;
+			return (0);
+		}
+		if (nb != 0) {
+			/*
+			 * Consider need to reallocate a fragment.
+			 */
+			osize = fragroundup(fs, blkoff(fs, ip->i_size));
+			nsize = fragroundup(fs, size);
+			if (nsize <= osize) {
+				error = bread(vp, lbn, osize, NOCRED, &bp);
+				if (error) {
+					brelse(bp);
+					return (error);
+				}
+				bp->b_blkno = fsbtodb(fs, nb);
+			} else {
+				UFS_LOCK(ump);
+				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
+				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
+				    &dp->di_db[0]), osize, nsize, flags,
+				    cred, &bp);
+				if (error)
+					return (error);
+				if (DOINGSOFTDEP(vp))
+					softdep_setup_allocdirect(ip, lbn,
+					    dbtofsb(fs, bp->b_blkno), nb,
+					    nsize, osize, bp);
+			}
+		} else {
+			if (ip->i_size < smalllblktosize(fs, lbn + 1))
+				nsize = fragroundup(fs, size);
+			else
+				nsize = fs->fs_bsize;
+			UFS_LOCK(ump);
+			error = ffs_alloc(ip, lbn,
+			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
+			    nsize, flags, cred, &newb);
+			if (error)
+				return (error);
+			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
+			bp->b_blkno = fsbtodb(fs, newb);
+			if (flags & BA_CLRBUF)
+				vfs_bio_clrbuf(bp);
+			if (DOINGSOFTDEP(vp))
+				softdep_setup_allocdirect(ip, lbn, newb, 0,
+				    nsize, 0, bp);
+		}
+		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		*bpp = bp;
+		return (0);
+	}
+	/*
+	 * Determine the number of levels of indirection.
+	 */
+	pref = 0;
+	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
+		return(error);
+#ifdef INVARIANTS
+	if (num < 1)
+		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
+#endif
+	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
+	/*
+	 * Fetch the first indirect block allocating if necessary.
+	 */
+	--num;
+	nb = dp->di_ib[indirs[0].in_off];
+	allocib = NULL;
+	allocblk = allociblk;
+	lbns_remfree = lbns;
+	if (nb == 0) {
+		UFS_LOCK(ump);
+		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
+		    (ufs1_daddr_t *)0);
+		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+		    flags, cred, &newb)) != 0) {
+			curthread_pflags_restore(saved_inbdflush);
+			return (error);
+		}
+		pref = newb + fs->fs_frag;
+		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
+		*allocblk++ = nb;
+		*lbns_remfree++ = indirs[1].in_lbn;
+		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
+		bp->b_blkno = fsbtodb(fs, nb);
+		vfs_bio_clrbuf(bp);
+		if (DOINGSOFTDEP(vp)) {
+			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
+			    newb, 0, fs->fs_bsize, 0, bp);
+			bdwrite(bp);
+		} else {
+			/*
+			 * Write synchronously so that indirect blocks
+			 * never point at garbage.
+			 */
+			if (DOINGASYNC(vp))
+				bdwrite(bp);
+			else if ((error = bwrite(bp)) != 0)
+				goto fail;
+		}
+		allocib = &dp->di_ib[indirs[0].in_off];
+		*allocib = nb;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	/*
+	 * Fetch through the indirect blocks, allocating as necessary.
+	 */
+retry:
+	for (i = 1;;) {
+		error = bread(vp,
+		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
+		if (error) {
+			brelse(bp);
+			goto fail;
+		}
+		bap = (ufs1_daddr_t *)bp->b_data;
+		nb = bap[indirs[i].in_off];
+		if (i == num)
+			break;
+		i += 1;
+		if (nb != 0) {
+			bqrelse(bp);
+			continue;
+		}
+		UFS_LOCK(ump);
+		/*
+		 * If parent indirect has just been allocated, try to cluster
+		 * immediately following it.
+		 */
+		if (pref == 0)
+			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
+			    (ufs1_daddr_t *)0);
+		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
+			brelse(bp);
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
+				UFS_LOCK(ump);
+				softdep_request_cleanup(fs, vp, cred,
+				    FLUSH_BLOCKS_WAIT);
+				UFS_UNLOCK(ump);
+				goto retry;
+			}
+			if (ppsratecheck(&lastfail, &curfail, 1)) {
+				ffs_fserr(fs, ip->i_number, "filesystem full");
+				uprintf("\n%s: write failed, filesystem "
+				    "is full\n", fs->fs_fsmnt);
+			}
+			goto fail;
+		}
+		pref = newb + fs->fs_frag;
+		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
+		*allocblk++ = nb;
+		*lbns_remfree++ = indirs[i].in_lbn;
+		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
+		nbp->b_blkno = fsbtodb(fs, nb);
+		vfs_bio_clrbuf(nbp);
+		if (DOINGSOFTDEP(vp)) {
+			softdep_setup_allocindir_meta(nbp, ip, bp,
+			    indirs[i - 1].in_off, nb);
+			bdwrite(nbp);
+		} else {
+			/*
+			 * Write synchronously so that indirect blocks
+			 * never point at garbage.
+			 */
+			if ((error = bwrite(nbp)) != 0) {
+				brelse(bp);
+				goto fail;
+			}
+		}
+		bap[indirs[i - 1].in_off] = nb;
+		if (allocib == NULL && unwindidx < 0)
+			unwindidx = i - 1;
+		/*
+		 * If required, write synchronously, otherwise use
+		 * delayed write.
+		 */
+		if (flags & IO_SYNC) {
+			bwrite(bp);
+		} else {
+			if (bp->b_bufsize == fs->fs_bsize)
+				bp->b_flags |= B_CLUSTEROK;
+			bdwrite(bp);
+		}
+	}
+	/*
+	 * If asked only for the indirect block, then return it.
+	 */
+	if (flags & BA_METAONLY) {
+		curthread_pflags_restore(saved_inbdflush);
+		*bpp = bp;
+		return (0);
+	}
+	/*
+	 * Get the data block, allocating if necessary.
+	 */
+	if (nb == 0) {
+		UFS_LOCK(ump);
+		/*
+		 * If allocating metadata at the front of the cylinder
+		 * group and parent indirect block has just been allocated,
+		 * then cluster next to it if it is the first indirect in
+		 * the file. Otherwise it has been allocated in the metadata
+		 * area, so we want to find our own place out in the data area.
+		 */
+		if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
+			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
+			    &bap[0]);
+		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+		    flags | IO_BUFLOCKED, cred, &newb);
+		if (error) {
+			brelse(bp);
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
+				UFS_LOCK(ump);
+				softdep_request_cleanup(fs, vp, cred,
+				    FLUSH_BLOCKS_WAIT);
+				UFS_UNLOCK(ump);
+				goto retry;
+			}
+			if (ppsratecheck(&lastfail, &curfail, 1)) {
+				ffs_fserr(fs, ip->i_number, "filesystem full");
+				uprintf("\n%s: write failed, filesystem "
+				    "is full\n", fs->fs_fsmnt);
+			}
+			goto fail;
+		}
+		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
+		*allocblk++ = nb;
+		*lbns_remfree++ = lbn;
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
+		nbp->b_blkno = fsbtodb(fs, nb);
+		if (flags & BA_CLRBUF)
+			vfs_bio_clrbuf(nbp);
+		if (DOINGSOFTDEP(vp))
+			softdep_setup_allocindir_page(ip, lbn, bp,
+			    indirs[i].in_off, nb, 0, nbp);
+		bap[indirs[i].in_off] = nb;
+		/*
+		 * If required, write synchronously, otherwise use
+		 * delayed write.
+		 */
+		if (flags & IO_SYNC) {
+			bwrite(bp);
+		} else {
+			if (bp->b_bufsize == fs->fs_bsize)
+				bp->b_flags |= B_CLUSTEROK;
+			bdwrite(bp);
+		}
+		curthread_pflags_restore(saved_inbdflush);
+		*bpp = nbp;
+		return (0);
+	}
+	brelse(bp);
+	if (flags & BA_CLRBUF) {
+		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
+		if (seqcount != 0 &&
+		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
+		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
+			error = cluster_read(vp, ip->i_size, lbn,
+			    (int)fs->fs_bsize, NOCRED,
+			    MAXBSIZE, seqcount, gbflags, &nbp);
+		} else {
+			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
+			    gbflags, &nbp);
+		}
+		if (error) {
+			brelse(nbp);
+			goto fail;
+		}
+	} else {
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
+		nbp->b_blkno = fsbtodb(fs, nb);
+	}
+	curthread_pflags_restore(saved_inbdflush);
+	*bpp = nbp;
+	return (0);
+fail:
+	curthread_pflags_restore(saved_inbdflush);
+	/*
+	 * If we have failed to allocate any blocks, simply return the error.
+	 * This is the usual case and avoids the need to fsync the file.
+	 */
+	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
+		return (error);
+	/*
+	 * If we have failed part way through block allocation, we
+	 * have to deallocate any indirect blocks that we have allocated.
+	 * We have to fsync the file before we start to get rid of all
+	 * of its dependencies so that we do not leave them dangling.
+	 * We have to sync it at the end so that the soft updates code
+	 * does not find any untracked changes. Although this is really
+	 * slow, running out of disk space is not expected to be a common
+	 * occurrence. The error return from fsync is ignored as we already
+	 * have an error to return to the user.
+	 *
+	 * XXX Still have to journal the free below
+	 */
+	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
+	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
+	     blkp < allocblk; blkp++, lbns_remfree++) {
+		/*
+		 * We shall not leave the freed blocks on the vnode
+		 * buffer object lists.
+		 */
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
+		if (bp != NULL) {
+			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
+			    ("mismatch1 l %jd %jd b %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
+			    (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp)));
+			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+			bp->b_flags &= ~(B_ASYNC | B_CACHE);
+			brelse(bp);
+		}
+		deallocated += fs->fs_bsize;
+	}
+	if (allocib != NULL) {
+		*allocib = 0;
+	} else if (unwindidx >= 0) {
+		int r;
+
+		r = bread(vp, indirs[unwindidx].in_lbn, 
+		    (int)fs->fs_bsize, NOCRED, &bp);
+		if (r) {
+			panic("Could not unwind indirect block, error %d", r);
+			brelse(bp);
+		} else {
+			bap = (ufs1_daddr_t *)bp->b_data;
+			bap[indirs[unwindidx].in_off] = 0;
+			if (flags & IO_SYNC) {
+				bwrite(bp);
+			} else {
+				if (bp->b_bufsize == fs->fs_bsize)
+					bp->b_flags |= B_CLUSTEROK;
+				bdwrite(bp);
+			}
+		}
+	}
+	if (deallocated) {
+#ifdef QUOTA
+		/*
+		 * Restore user's disk quota because allocation failed.
+		 */
+		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
+#endif
+		dp->di_blocks -= btodb(deallocated);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
+	/*
+	 * After the buffers are invalidated and on-disk pointers are
+	 * cleared, free the blocks.
+	 */
+	for (blkp = allociblk; blkp < allocblk; blkp++) {
+#ifdef INVARIANTS
+		if (blkp == allociblk)
+			lbns_remfree = lbns;
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
+		if (bp != NULL) {
+			panic("zombie1 %jd %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp));
+		}
+		lbns_remfree++;
+#endif
+		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
+		    ip->i_number, vp->v_type, NULL);
+	}
+	return (error);
+}
+
+/*
+ * Balloc defines the structure of file system storage
+ * by allocating the physical blocks on a device given
+ * the inode and the logical block number in a file.
+ * This is the allocation strategy for UFS2. Above is
+ * the allocation strategy for UFS1.
+ */
+int
+ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
+    struct ucred *cred, int flags, struct buf **bpp)
+{
+	struct inode *ip;
+	struct ufs2_dinode *dp;
+	ufs_lbn_t lbn, lastlbn;
+	struct fs *fs;
+	struct buf *bp, *nbp;
+	struct ufsmount *ump;
+	struct indir indirs[NIADDR + 2];
+	ufs2_daddr_t nb, newb, *bap, pref;
+	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
+	ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
+	int deallocated, osize, nsize, num, i, error;
+	int unwindidx = -1;
+	int saved_inbdflush;
+	static struct timeval lastfail;
+	static int curfail;
+	int gbflags, reclaimed;
+
+	ip = VTOI(vp);
+	dp = ip->i_din2;
+	fs = ITOFS(ip);
+	ump = ITOUMP(ip);
+	lbn = lblkno(fs, startoffset);
+	size = blkoff(fs, startoffset) + size;
+	reclaimed = 0;
+	if (size > fs->fs_bsize)
+		panic("ffs_balloc_ufs2: blk too big");
+	*bpp = NULL;
+	if (lbn < 0)
+		return (EFBIG);
+	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
+
+	if (DOINGSOFTDEP(vp))
+		softdep_prealloc(vp, MNT_WAIT);
+	
+	/*
+	 * Check for allocating external data.
+	 */
+	if (flags & IO_EXT) {
+		if (lbn >= NXADDR)
+			return (EFBIG);
+		/*
+		 * If the next write will extend the data into a new block,
+		 * and the data is currently composed of a fragment
+		 * this fragment has to be extended to be a full block.
+		 */
+		lastlbn = lblkno(fs, dp->di_extsize);
+		if (lastlbn < lbn) {
+			nb = lastlbn;
+			osize = sblksize(fs, dp->di_extsize, nb);
+			if (osize < fs->fs_bsize && osize > 0) {
+				UFS_LOCK(ump);
+				error = ffs_realloccg(ip, -1 - nb,
+				    dp->di_extb[nb],
+				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
+				    &dp->di_extb[0]), osize,
+				    (int)fs->fs_bsize, flags, cred, &bp);
+				if (error)
+					return (error);
+				if (DOINGSOFTDEP(vp))
+					softdep_setup_allocext(ip, nb,
+					    dbtofsb(fs, bp->b_blkno),
+					    dp->di_extb[nb],
+					    fs->fs_bsize, osize, bp);
+				dp->di_extsize = smalllblktosize(fs, nb + 1);
+				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
+				bp->b_xflags |= BX_ALTDATA;
+				ip->i_flag |= IN_CHANGE;
+				if (flags & IO_SYNC)
+					bwrite(bp);
+				else
+					bawrite(bp);
+			}
+		}
+		/*
+		 * All blocks are direct blocks
+		 */
+		if (flags & BA_METAONLY)
+			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
+		nb = dp->di_extb[lbn];
+		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
+			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
+			    gbflags, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			bp->b_blkno = fsbtodb(fs, nb);
+			bp->b_xflags |= BX_ALTDATA;
+			*bpp = bp;
+			return (0);
+		}
+		if (nb != 0) {
+			/*
+			 * Consider need to reallocate a fragment.
+			 */
+			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
+			nsize = fragroundup(fs, size);
+			if (nsize <= osize) {
+				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
+				    gbflags, &bp);
+				if (error) {
+					brelse(bp);
+					return (error);
+				}
+				bp->b_blkno = fsbtodb(fs, nb);
+				bp->b_xflags |= BX_ALTDATA;
+			} else {
+				UFS_LOCK(ump);
+				error = ffs_realloccg(ip, -1 - lbn,
+				    dp->di_extb[lbn],
+				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
+				    &dp->di_extb[0]), osize, nsize, flags,
+				    cred, &bp);
+				if (error)
+					return (error);
+				bp->b_xflags |= BX_ALTDATA;
+				if (DOINGSOFTDEP(vp))
+					softdep_setup_allocext(ip, lbn,
+					    dbtofsb(fs, bp->b_blkno), nb,
+					    nsize, osize, bp);
+			}
+		} else {
+			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
+				nsize = fragroundup(fs, size);
+			else
+				nsize = fs->fs_bsize;
+			UFS_LOCK(ump);
+			error = ffs_alloc(ip, lbn,
+			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
+			   nsize, flags, cred, &newb);
+			if (error)
+				return (error);
+			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
+			bp->b_blkno = fsbtodb(fs, newb);
+			bp->b_xflags |= BX_ALTDATA;
+			if (flags & BA_CLRBUF)
+				vfs_bio_clrbuf(bp);
+			if (DOINGSOFTDEP(vp))
+				softdep_setup_allocext(ip, lbn, newb, 0,
+				    nsize, 0, bp);
+		}
+		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
+		ip->i_flag |= IN_CHANGE;
+		*bpp = bp;
+		return (0);
+	}
+	/*
+	 * If the next write will extend the file into a new block,
+	 * and the file is currently composed of a fragment
+	 * this fragment has to be extended to be a full block.
+	 */
+	lastlbn = lblkno(fs, ip->i_size);
+	if (lastlbn < NDADDR && lastlbn < lbn) {
+		nb = lastlbn;
+		osize = blksize(fs, ip, nb);
+		if (osize < fs->fs_bsize && osize > 0) {
+			UFS_LOCK(ump);
+			error = ffs_realloccg(ip, nb, dp->di_db[nb],
+			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
+			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
+			    flags, cred, &bp);
+			if (error)
+				return (error);
+			if (DOINGSOFTDEP(vp))
+				softdep_setup_allocdirect(ip, nb,
+				    dbtofsb(fs, bp->b_blkno),
+				    dp->di_db[nb],
+				    fs->fs_bsize, osize, bp);
+			ip->i_size = smalllblktosize(fs, nb + 1);
+			dp->di_size = ip->i_size;
+			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			if (flags & IO_SYNC)
+				bwrite(bp);
+			else
+				bawrite(bp);
+		}
+	}
+	/*
+	 * The first NDADDR blocks are direct blocks
+	 */
+	if (lbn < NDADDR) {
+		if (flags & BA_METAONLY)
+			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
+		nb = dp->di_db[lbn];
+		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
+			error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
+			    gbflags, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			bp->b_blkno = fsbtodb(fs, nb);
+			*bpp = bp;
+			return (0);
+		}
+		if (nb != 0) {
+			/*
+			 * Consider need to reallocate a fragment.
+			 */
+			osize = fragroundup(fs, blkoff(fs, ip->i_size));
+			nsize = fragroundup(fs, size);
+			if (nsize <= osize) {
+				error = bread_gb(vp, lbn, osize, NOCRED,
+				    gbflags, &bp);
+				if (error) {
+					brelse(bp);
+					return (error);
+				}
+				bp->b_blkno = fsbtodb(fs, nb);
+			} else {
+				UFS_LOCK(ump);
+				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
+				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
+				    &dp->di_db[0]), osize, nsize, flags,
+				    cred, &bp);
+				if (error)
+					return (error);
+				if (DOINGSOFTDEP(vp))
+					softdep_setup_allocdirect(ip, lbn,
+					    dbtofsb(fs, bp->b_blkno), nb,
+					    nsize, osize, bp);
+			}
+		} else {
+			if (ip->i_size < smalllblktosize(fs, lbn + 1))
+				nsize = fragroundup(fs, size);
+			else
+				nsize = fs->fs_bsize;
+			UFS_LOCK(ump);
+			error = ffs_alloc(ip, lbn,
+			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
+				&dp->di_db[0]), nsize, flags, cred, &newb);
+			if (error)
+				return (error);
+			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
+			bp->b_blkno = fsbtodb(fs, newb);
+			if (flags & BA_CLRBUF)
+				vfs_bio_clrbuf(bp);
+			if (DOINGSOFTDEP(vp))
+				softdep_setup_allocdirect(ip, lbn, newb, 0,
+				    nsize, 0, bp);
+		}
+		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		*bpp = bp;
+		return (0);
+	}
+	/*
+	 * Determine the number of levels of indirection.
+	 */
+	pref = 0;
+	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
+		return(error);
+#ifdef INVARIANTS
+	if (num < 1)
+		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
+#endif
+	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
+	/*
+	 * Fetch the first indirect block allocating if necessary.
+	 */
+	--num;
+	nb = dp->di_ib[indirs[0].in_off];
+	allocib = NULL;
+	allocblk = allociblk;
+	lbns_remfree = lbns;
+	if (nb == 0) {
+		UFS_LOCK(ump);
+		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
+		    (ufs2_daddr_t *)0);
+		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+		    flags, cred, &newb)) != 0) {
+			curthread_pflags_restore(saved_inbdflush);
+			return (error);
+		}
+		pref = newb + fs->fs_frag;
+		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
+		*allocblk++ = nb;
+		*lbns_remfree++ = indirs[1].in_lbn;
+		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
+		    GB_UNMAPPED);
+		bp->b_blkno = fsbtodb(fs, nb);
+		vfs_bio_clrbuf(bp);
+		if (DOINGSOFTDEP(vp)) {
+			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
+			    newb, 0, fs->fs_bsize, 0, bp);
+			bdwrite(bp);
+		} else {
+			/*
+			 * Write synchronously so that indirect blocks
+			 * never point at garbage.
+			 */
+			if (DOINGASYNC(vp))
+				bdwrite(bp);
+			else if ((error = bwrite(bp)) != 0)
+				goto fail;
+		}
+		allocib = &dp->di_ib[indirs[0].in_off];
+		*allocib = nb;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	/*
+	 * Fetch through the indirect blocks, allocating as necessary.
+	 */
+retry:
+	for (i = 1;;) {
+		error = bread(vp,
+		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
+		if (error) {
+			brelse(bp);
+			goto fail;
+		}
+		bap = (ufs2_daddr_t *)bp->b_data;
+		nb = bap[indirs[i].in_off];
+		if (i == num)
+			break;
+		i += 1;
+		if (nb != 0) {
+			bqrelse(bp);
+			continue;
+		}
+		UFS_LOCK(ump);
+		/*
+		 * If parent indirect has just been allocated, try to cluster
+		 * immediately following it.
+		 */
+		if (pref == 0)
+			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
+			    (ufs2_daddr_t *)0);
+		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
+			brelse(bp);
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
+				UFS_LOCK(ump);
+				softdep_request_cleanup(fs, vp, cred,
+				    FLUSH_BLOCKS_WAIT);
+				UFS_UNLOCK(ump);
+				goto retry;
+			}
+			if (ppsratecheck(&lastfail, &curfail, 1)) {
+				ffs_fserr(fs, ip->i_number, "filesystem full");
+				uprintf("\n%s: write failed, filesystem "
+				    "is full\n", fs->fs_fsmnt);
+			}
+			goto fail;
+		}
+		pref = newb + fs->fs_frag;
+		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
+		*allocblk++ = nb;
+		*lbns_remfree++ = indirs[i].in_lbn;
+		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
+		    GB_UNMAPPED);
+		nbp->b_blkno = fsbtodb(fs, nb);
+		vfs_bio_clrbuf(nbp);
+		if (DOINGSOFTDEP(vp)) {
+			softdep_setup_allocindir_meta(nbp, ip, bp,
+			    indirs[i - 1].in_off, nb);
+			bdwrite(nbp);
+		} else {
+			/*
+			 * Write synchronously so that indirect blocks
+			 * never point at garbage.
+			 */
+			if ((error = bwrite(nbp)) != 0) {
+				brelse(bp);
+				goto fail;
+			}
+		}
+		bap[indirs[i - 1].in_off] = nb;
+		if (allocib == NULL && unwindidx < 0)
+			unwindidx = i - 1;
+		/*
+		 * If required, write synchronously, otherwise use
+		 * delayed write.
+		 */
+		if (flags & IO_SYNC) {
+			bwrite(bp);
+		} else {
+			if (bp->b_bufsize == fs->fs_bsize)
+				bp->b_flags |= B_CLUSTEROK;
+			bdwrite(bp);
+		}
+	}
+	/*
+	 * If asked only for the indirect block, then return it.
+	 */
+	if (flags & BA_METAONLY) {
+		curthread_pflags_restore(saved_inbdflush);
+		*bpp = bp;
+		return (0);
+	}
+	/*
+	 * Get the data block, allocating if necessary.
+	 */
+	if (nb == 0) {
+		UFS_LOCK(ump);
+		/*
+		 * If allocating metadata at the front of the cylinder
+		 * group and parent indirect block has just been allocated,
+		 * then cluster next to it if it is the first indirect in
+		 * the file. Otherwise it has been allocated in the metadata
+		 * area, so we want to find our own place out in the data area.
+		 */
+		if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
+			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
+			    &bap[0]);
+		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+		    flags | IO_BUFLOCKED, cred, &newb);
+		if (error) {
+			brelse(bp);
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
+				UFS_LOCK(ump);
+				softdep_request_cleanup(fs, vp, cred,
+				    FLUSH_BLOCKS_WAIT);
+				UFS_UNLOCK(ump);
+				goto retry;
+			}
+			if (ppsratecheck(&lastfail, &curfail, 1)) {
+				ffs_fserr(fs, ip->i_number, "filesystem full");
+				uprintf("\n%s: write failed, filesystem "
+				    "is full\n", fs->fs_fsmnt);
+			}
+			goto fail;
+		}
+		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
+		*allocblk++ = nb;
+		*lbns_remfree++ = lbn;
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
+		nbp->b_blkno = fsbtodb(fs, nb);
+		if (flags & BA_CLRBUF)
+			vfs_bio_clrbuf(nbp);
+		if (DOINGSOFTDEP(vp))
+			softdep_setup_allocindir_page(ip, lbn, bp,
+			    indirs[i].in_off, nb, 0, nbp);
+		bap[indirs[i].in_off] = nb;
+		/*
+		 * If required, write synchronously, otherwise use
+		 * delayed write.
+		 */
+		if (flags & IO_SYNC) {
+			bwrite(bp);
+		} else {
+			if (bp->b_bufsize == fs->fs_bsize)
+				bp->b_flags |= B_CLUSTEROK;
+			bdwrite(bp);
+		}
+		curthread_pflags_restore(saved_inbdflush);
+		*bpp = nbp;
+		return (0);
+	}
+	brelse(bp);
+	/*
+	 * If requested clear invalid portions of the buffer.  If we
+	 * have to do a read-before-write (typical if BA_CLRBUF is set),
+	 * try to do some read-ahead in the sequential case to reduce
+	 * the number of I/O transactions.
+	 */
+	if (flags & BA_CLRBUF) {
+		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
+		if (seqcount != 0 &&
+		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
+		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
+			error = cluster_read(vp, ip->i_size, lbn,
+			    (int)fs->fs_bsize, NOCRED,
+			    MAXBSIZE, seqcount, gbflags, &nbp);
+		} else {
+			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
+			    NOCRED, gbflags, &nbp);
+		}
+		if (error) {
+			brelse(nbp);
+			goto fail;
+		}
+	} else {
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
+		nbp->b_blkno = fsbtodb(fs, nb);
+	}
+	curthread_pflags_restore(saved_inbdflush);
+	*bpp = nbp;
+	return (0);
+fail:
+	curthread_pflags_restore(saved_inbdflush);
+	/*
+	 * If we have failed to allocate any blocks, simply return the error.
+	 * This is the usual case and avoids the need to fsync the file.
+	 */
+	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
+		return (error);
+	/*
+	 * If we have failed part way through block allocation, we
+	 * have to deallocate any indirect blocks that we have allocated.
+	 * We have to fsync the file before we start to get rid of all
+	 * of its dependencies so that we do not leave them dangling.
+	 * We have to sync it at the end so that the soft updates code
+	 * does not find any untracked changes. Although this is really
+	 * slow, running out of disk space is not expected to be a common
+	 * occurrence. The error return from fsync is ignored as we already
+	 * have an error to return to the user.
+	 *
+	 * XXX Still have to journal the free below
+	 */
+	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
+	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
+	     blkp < allocblk; blkp++, lbns_remfree++) {
+		/*
+		 * We shall not leave the freed blocks on the vnode
+		 * buffer object lists.
+		 */
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
+		if (bp != NULL) {
+			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
+			    ("mismatch2 l %jd %jd b %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
+			    (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp)));
+			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+			bp->b_flags &= ~(B_ASYNC | B_CACHE);
+			brelse(bp);
+		}
+		deallocated += fs->fs_bsize;
+	}
+	if (allocib != NULL) {
+		*allocib = 0;
+	} else if (unwindidx >= 0) {
+		int r;
+
+		r = bread(vp, indirs[unwindidx].in_lbn, 
+		    (int)fs->fs_bsize, NOCRED, &bp);
+		if (r) {
+			panic("Could not unwind indirect block, error %d", r);
+			brelse(bp);
+		} else {
+			bap = (ufs2_daddr_t *)bp->b_data;
+			bap[indirs[unwindidx].in_off] = 0;
+			if (flags & IO_SYNC) {
+				bwrite(bp);
+			} else {
+				if (bp->b_bufsize == fs->fs_bsize)
+					bp->b_flags |= B_CLUSTEROK;
+				bdwrite(bp);
+			}
+		}
+	}
+	if (deallocated) {
+#ifdef QUOTA
+		/*
+		 * Restore user's disk quota because allocation failed.
+		 */
+		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
+#endif
+		dp->di_blocks -= btodb(deallocated);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
+	/*
+	 * After the buffers are invalidated and on-disk pointers are
+	 * cleared, free the blocks.
+	 */
+	for (blkp = allociblk; blkp < allocblk; blkp++) {
+#ifdef INVARIANTS
+		if (blkp == allociblk)
+			lbns_remfree = lbns;
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
+		if (bp != NULL) {
+			panic("zombie2 %jd %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp));
+		}
+		lbns_remfree++;
+#endif
+		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
+		    ip->i_number, vp->v_type, NULL);
+	}
+	return (error);
+}
diff --git a/Dump/ufs/ffs/ffs_extern.h b/Dump/ufs/ffs/ffs_extern.h
new file mode 100644
index 0000000..f50b403
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_extern.h
@@ -0,0 +1,200 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_extern.h	8.6 (Berkeley) 3/30/95
+ * $FreeBSD: releng/11.2/sys/ufs/ffs/ffs_extern.h 331722 2018-03-29 02:50:57Z eadler $
+ */
+
+#ifndef _UFS_FFS_EXTERN_H
+#define	_UFS_FFS_EXTERN_H
+
+#ifndef _KERNEL
+#error "No user-serving parts inside"
+#else
+
+struct buf;
+struct cg;
+struct fid;
+struct fs;
+struct inode;
+struct malloc_type;
+struct mount;
+struct thread;
+struct sockaddr;
+struct statfs;
+struct ucred;
+struct vnode;
+struct vop_fsync_args;
+struct vop_reallocblks_args;
+struct workhead;
+
+int	ffs_alloc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, int, int,
+	    struct ucred *, ufs2_daddr_t *);
+int	ffs_balloc_ufs1(struct vnode *a_vp, off_t a_startoffset, int a_size,
+            struct ucred *a_cred, int a_flags, struct buf **a_bpp);
+int	ffs_balloc_ufs2(struct vnode *a_vp, off_t a_startoffset, int a_size,
+            struct ucred *a_cred, int a_flags, struct buf **a_bpp);
+int	ffs_blkatoff(struct vnode *, off_t, char **, struct buf **);
+void	ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *,
+	    ufs2_daddr_t, long, ino_t, enum vtype, struct workhead *);
+ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *);
+ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *);
+int	ffs_checkfreefile(struct fs *, struct vnode *, ino_t);
+void	ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t);
+void	ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int);
+void	ffs_bdflush(struct bufobj *, struct buf *);
+int	ffs_copyonwrite(struct vnode *, struct buf *);
+int	ffs_flushfiles(struct mount *, int, struct thread *);
+void	ffs_fragacct(struct fs *, int, int32_t [], int);
+int	ffs_freefile(struct ufsmount *, struct fs *, struct vnode *, ino_t,
+	    int, struct workhead *);
+void	ffs_fserr(struct fs *, ino_t, char *);
+int	ffs_isblock(struct fs *, u_char *, ufs1_daddr_t);
+int	ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t);
+void	ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t);
+void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
+int	ffs_own_mount(const struct mount *mp);
+int	ffs_reallocblks(struct vop_reallocblks_args *);
+int	ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
+	    ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
+int	ffs_reload(struct mount *, struct thread *, int);
+int	ffs_sbupdate(struct ufsmount *, int, int);
+void	ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
+int	ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t,
+	    enum vtype, struct workhead *);
+void	ffs_snapremove(struct vnode *vp);
+int	ffs_snapshot(struct mount *mp, char *snapfile);
+void	ffs_snapshot_mount(struct mount *mp);
+void	ffs_snapshot_unmount(struct mount *mp);
+void	process_deferred_inactive(struct mount *mp);
+void	ffs_sync_snap(struct mount *, int);
+int	ffs_syncvnode(struct vnode *vp, int waitfor, int flags);
+int	ffs_truncate(struct vnode *, off_t, int, struct ucred *);
+int	ffs_update(struct vnode *, int);
+int	ffs_valloc(struct vnode *, int, struct ucred *, struct vnode **);
+
+int	ffs_vfree(struct vnode *, ino_t, int);
+vfs_vget_t ffs_vget;
+int	ffs_vgetf(struct mount *, ino_t, int, struct vnode **, int);
+void	ffs_susp_initialize(void);
+void	ffs_susp_uninitialize(void);
+
+#define	FFSV_FORCEINSMQ	0x0001
+
+#define	FFSR_FORCE	0x0001
+#define	FFSR_UNSUSPEND	0x0002
+
+extern struct vop_vector ffs_vnodeops1;
+extern struct vop_vector ffs_fifoops1;
+extern struct vop_vector ffs_vnodeops2;
+extern struct vop_vector ffs_fifoops2;
+
+/*
+ * Soft update function prototypes.
+ */
+
+int	softdep_check_suspend(struct mount *, struct vnode *,
+	  int, int, int, int);
+void	softdep_get_depcounts(struct mount *, int *, int *);
+void	softdep_initialize(void);
+void	softdep_uninitialize(void);
+int	softdep_mount(struct vnode *, struct mount *, struct fs *,
+	    struct ucred *);
+void	softdep_unmount(struct mount *);
+int	softdep_move_dependencies(struct buf *, struct buf *);
+int	softdep_flushworklist(struct mount *, int *, struct thread *);
+int	softdep_flushfiles(struct mount *, int, struct thread *);
+void	softdep_update_inodeblock(struct inode *, struct buf *, int);
+void	softdep_load_inodeblock(struct inode *);
+void	softdep_freefile(struct vnode *, ino_t, int);
+int	softdep_request_cleanup(struct fs *, struct vnode *,
+	    struct ucred *, int);
+void	softdep_setup_freeblocks(struct inode *, off_t, int);
+void	softdep_setup_inomapdep(struct buf *, struct inode *, ino_t, int);
+void	softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t,
+	    int, int);
+void	softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t,
+	    ufs2_daddr_t, long, long, struct buf *);
+void	softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t,
+	    ufs2_daddr_t, long, long, struct buf *);
+void	softdep_setup_allocindir_meta(struct buf *, struct inode *,
+	    struct buf *, int, ufs2_daddr_t);
+void	softdep_setup_allocindir_page(struct inode *, ufs_lbn_t,
+	    struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *);
+void	softdep_setup_blkfree(struct mount *, struct buf *, ufs2_daddr_t, int,
+	    struct workhead *);
+void	softdep_setup_inofree(struct mount *, struct buf *, ino_t,
+	    struct workhead *);
+void	softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *);
+void	softdep_fsync_mountdev(struct vnode *);
+int	softdep_sync_metadata(struct vnode *);
+int	softdep_sync_buf(struct vnode *, struct buf *, int);
+int     softdep_fsync(struct vnode *);
+int	softdep_prealloc(struct vnode *, int);
+int	softdep_journal_lookup(struct mount *, struct vnode **);
+void	softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int);
+void	softdep_journal_fsync(struct inode *);
+void	softdep_buf_append(struct buf *, struct workhead *);
+void	softdep_inode_append(struct inode *, struct ucred *, struct workhead *);
+void	softdep_freework(struct workhead *);
+
+
+/*
+ * Things to request flushing in softdep_request_cleanup()
+ */
+#define	FLUSH_INODES		1
+#define	FLUSH_INODES_WAIT	2
+#define	FLUSH_BLOCKS		3
+#define	FLUSH_BLOCKS_WAIT	4
+/*
+ * Flag to ffs_syncvnode() to request flushing of data only,
+ * but skip the ffs_update() on the inode itself. Used to avoid
+ * deadlock when flushing snapshot inodes while holding snaplk.
+ */
+#define	NO_INO_UPDT		0x00000001
+/*
+ * Request data sync only from ffs_syncvnode(), not touching even more
+ * metadata than NO_INO_UPDT.
+ */
+#define	DATA_ONLY		0x00000002
+
+int	ffs_rdonly(struct inode *);
+
+TAILQ_HEAD(snaphead, inode);
+
+struct snapdata {
+	LIST_ENTRY(snapdata) sn_link;
+	struct snaphead sn_head;
+	daddr_t sn_listsize;
+	daddr_t *sn_blklist;
+	struct lock sn_lock;
+};
+
+#endif /* _KERNEL */
+
+#endif /* !_UFS_FFS_EXTERN_H */
diff --git a/Dump/ufs/ffs/ffs_inode.c b/Dump/ufs/ffs/ffs_inode.c
new file mode 100644
index 0000000..1652f51
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_inode.c
@@ -0,0 +1,765 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_inode.c	8.13 (Berkeley) 4/21/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_inode.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include "opt_quota.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/racct.h>
+#include <sys/random.h>
+#include <sys/resourcevar.h>
+#include <sys/rwlock.h>
+#include <sys/stat.h>
+#include <sys/vmmeter.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
+	    ufs2_daddr_t, int, ufs2_daddr_t *);
+
+/*
+ * Update the access, modified, and inode change times as specified by the
+ * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively.  Write the inode
+ * to disk if the IN_MODIFIED flag is set (it may be set initially, or by
+ * the timestamp update).  The IN_LAZYMOD flag is set to force a write
+ * later if not now.  The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs
+ * is currently being suspended (or is suspended) and vnode has been accessed.
+ * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to
+ * reflect the presumably successful write, and if waitfor is set, then wait
+ * for the write to complete.
+ */
+int
+ffs_update(vp, waitfor)
+	struct vnode *vp;
+	int waitfor;
+{
+	struct fs *fs;
+	struct buf *bp;
+	struct inode *ip;
+	int flags, error;
+
+	ASSERT_VOP_ELOCKED(vp, "ffs_update");
+	ufs_itimes(vp);
+	ip = VTOI(vp);
+	if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
+		return (0);
+	ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
+	fs = ITOFS(ip);
+	if (fs->fs_ronly && ITOUMP(ip)->um_fsckpid == 0)
+		return (0);
+	/*
+	 * If we are updating a snapshot and another process is currently
+	 * writing the buffer containing the inode for this snapshot then
+	 * a deadlock can occur when it tries to check the snapshot to see
+	 * if that block needs to be copied. Thus when updating a snapshot
+	 * we check to see if the buffer is already locked, and if it is
+	 * we drop the snapshot lock until the buffer has been written
+	 * and is available to us. We have to grab a reference to the
+	 * snapshot vnode to prevent it from being removed while we are
+	 * waiting for the buffer.
+	 */
+	flags = 0;
+	if (IS_SNAPSHOT(ip))
+		flags = GB_LOCK_NOWAIT;
+loop:
+	error = breadn_flags(ITODEVVP(ip),
+	     fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+	     (int) fs->fs_bsize, 0, 0, 0, NOCRED, flags, &bp);
+	if (error != 0) {
+		if (error != EBUSY)
+			return (error);
+		KASSERT((IS_SNAPSHOT(ip)), ("EBUSY from non-snapshot"));
+		/*
+		 * Wait for our inode block to become available.
+		 *
+		 * Hold a reference to the vnode to protect against
+		 * ffs_snapgone(). Since we hold a reference, it can only
+		 * get reclaimed (VI_DOOMED flag) in a forcible downgrade
+		 * or unmount. For an unmount, the entire filesystem will be
+		 * gone, so we cannot attempt to touch anything associated
+		 * with it while the vnode is unlocked; all we can do is 
+		 * pause briefly and try again. If when we relock the vnode
+		 * we discover that it has been reclaimed, updating it is no
+		 * longer necessary and we can just return an error.
+		 */
+		vref(vp);
+		VOP_UNLOCK(vp, 0);
+		pause("ffsupd", 1);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+		vrele(vp);
+		if ((vp->v_iflag & VI_DOOMED) != 0)
+			return (ENOENT);
+		goto loop;
+	}
+	if (DOINGSOFTDEP(vp))
+		softdep_update_inodeblock(ip, bp, waitfor);
+	else if (ip->i_effnlink != ip->i_nlink)
+		panic("ffs_update: bad link cnt");
+	if (I_IS_UFS1(ip)) {
+		*((struct ufs1_dinode *)bp->b_data +
+		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
+		/* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */
+		random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), 1, RANDOM_FS_ATIME);
+	} else {
+		*((struct ufs2_dinode *)bp->b_data +
+		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
+		/* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */
+		random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), 1, RANDOM_FS_ATIME);
+	}
+	if (waitfor && !DOINGASYNC(vp))
+		error = bwrite(bp);
+	else if (vm_page_count_severe() || buf_dirty_count_severe()) {
+		bawrite(bp);
+		error = 0;
+	} else {
+		if (bp->b_bufsize == fs->fs_bsize)
+			bp->b_flags |= B_CLUSTEROK;
+		bdwrite(bp);
+		error = 0;
+	}
+	return (error);
+}
+
+#define	SINGLE	0	/* index of single indirect block */
+#define	DOUBLE	1	/* index of double indirect block */
+#define	TRIPLE	2	/* index of triple indirect block */
+/*
+ * Truncate the inode ip to at most length size, freeing the
+ * disk blocks.
+ */
+int
+ffs_truncate(vp, length, flags, cred)
+	struct vnode *vp;
+	off_t length;
+	int flags;
+	struct ucred *cred;
+{
+	struct inode *ip;
+	ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
+	ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
+	ufs2_daddr_t count, blocksreleased = 0, datablocks, blkno;
+	struct bufobj *bo;
+	struct fs *fs;
+	struct buf *bp;
+	struct ufsmount *ump;
+	int softdeptrunc, journaltrunc;
+	int needextclean, extblocks;
+	int offset, size, level, nblocks;
+	int i, error, allerror, indiroff;
+	off_t osize;
+
+	ip = VTOI(vp);
+	ump = VFSTOUFS(vp->v_mount);
+	fs = ump->um_fs;
+	bo = &vp->v_bufobj;
+
+	ASSERT_VOP_LOCKED(vp, "ffs_truncate");
+
+	if (length < 0)
+		return (EINVAL);
+	if (length > fs->fs_maxfilesize)
+		return (EFBIG);
+#ifdef QUOTA
+	error = getinoquota(ip);
+	if (error)
+		return (error);
+#endif
+	/*
+	 * Historically clients did not have to specify which data
+	 * they were truncating. So, if not specified, we assume
+	 * traditional behavior, e.g., just the normal data.
+	 */
+	if ((flags & (IO_EXT | IO_NORMAL)) == 0)
+		flags |= IO_NORMAL;
+	if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp))
+		flags |= IO_SYNC;
+	/*
+	 * If we are truncating the extended-attributes, and cannot
+	 * do it with soft updates, then do it slowly here. If we are
+	 * truncating both the extended attributes and the file contents
+	 * (e.g., the file is being unlinked), then pick it off with
+	 * soft updates below.
+	 */
+	allerror = 0;
+	needextclean = 0;
+	softdeptrunc = 0;
+	journaltrunc = DOINGSUJ(vp);
+	if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0)
+		softdeptrunc = !softdep_slowdown(vp);
+	extblocks = 0;
+	datablocks = DIP(ip, i_blocks);
+	if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) {
+		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
+		datablocks -= extblocks;
+	}
+	if ((flags & IO_EXT) && extblocks > 0) {
+		if (length != 0)
+			panic("ffs_truncate: partial trunc of extdata");
+		if (softdeptrunc || journaltrunc) {
+			if ((flags & IO_NORMAL) == 0)
+				goto extclean;
+			needextclean = 1;
+		} else {
+			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
+				return (error);
+#ifdef QUOTA
+			(void) chkdq(ip, -extblocks, NOCRED, 0);
+#endif
+			vinvalbuf(vp, V_ALT, 0, 0);
+			vn_pages_remove(vp,
+			    OFF_TO_IDX(lblktosize(fs, -extblocks)), 0);
+			osize = ip->i_din2->di_extsize;
+			ip->i_din2->di_blocks -= extblocks;
+			ip->i_din2->di_extsize = 0;
+			for (i = 0; i < NXADDR; i++) {
+				oldblks[i] = ip->i_din2->di_extb[i];
+				ip->i_din2->di_extb[i] = 0;
+			}
+			ip->i_flag |= IN_CHANGE;
+			if ((error = ffs_update(vp, !DOINGASYNC(vp))))
+				return (error);
+			for (i = 0; i < NXADDR; i++) {
+				if (oldblks[i] == 0)
+					continue;
+				ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i],
+				    sblksize(fs, osize, i), ip->i_number,
+				    vp->v_type, NULL);
+			}
+		}
+	}
+	if ((flags & IO_NORMAL) == 0)
+		return (0);
+	if (vp->v_type == VLNK &&
+	    (ip->i_size < vp->v_mount->mnt_maxsymlinklen ||
+	     datablocks == 0)) {
+#ifdef INVARIANTS
+		if (length != 0)
+			panic("ffs_truncate: partial truncate of symlink");
+#endif
+		bzero(SHORTLINK(ip), (u_int)ip->i_size);
+		ip->i_size = 0;
+		DIP_SET(ip, i_size, 0);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		if (needextclean)
+			goto extclean;
+		return (ffs_update(vp, !DOINGASYNC(vp)));
+	}
+	if (ip->i_size == length) {
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		if (needextclean)
+			goto extclean;
+		return (ffs_update(vp, 0));
+	}
+	if (fs->fs_ronly)
+		panic("ffs_truncate: read-only filesystem");
+	if (IS_SNAPSHOT(ip))
+		ffs_snapremove(vp);
+	vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
+	osize = ip->i_size;
+	/*
+	 * Lengthen the size of the file. We must ensure that the
+	 * last byte of the file is allocated. Since the smallest
+	 * value of osize is 0, length will be at least 1.
+	 */
+	if (osize < length) {
+		vnode_pager_setsize(vp, length);
+		flags |= BA_CLRBUF;
+		error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
+		if (error) {
+			vnode_pager_setsize(vp, osize);
+			return (error);
+		}
+		ip->i_size = length;
+		DIP_SET(ip, i_size, length);
+		if (bp->b_bufsize == fs->fs_bsize)
+			bp->b_flags |= B_CLUSTEROK;
+		if (flags & IO_SYNC)
+			bwrite(bp);
+		else if (DOINGASYNC(vp))
+			bdwrite(bp);
+		else
+			bawrite(bp);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (ffs_update(vp, !DOINGASYNC(vp)));
+	}
+	/*
+	 * Lookup block number for a given offset. Zero length files
+	 * have no blocks, so return a blkno of -1.
+	 */
+	lbn = lblkno(fs, length - 1);
+	if (length == 0) {
+		blkno = -1;
+	} else if (lbn < NDADDR) {
+		blkno = DIP(ip, i_db[lbn]);
+	} else {
+		error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize,
+		    cred, BA_METAONLY, &bp);
+		if (error)
+			return (error);
+		indiroff = (lbn - NDADDR) % NINDIR(fs);
+		if (I_IS_UFS1(ip))
+			blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff];
+		else
+			blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff];
+		/*
+		 * If the block number is non-zero, then the indirect block
+		 * must have been previously allocated and need not be written.
+		 * If the block number is zero, then we may have allocated
+		 * the indirect block and hence need to write it out.
+		 */
+		if (blkno != 0)
+			brelse(bp);
+		else if (DOINGSOFTDEP(vp) || DOINGASYNC(vp))
+			bdwrite(bp);
+		else
+			bwrite(bp);
+	}
+	/*
+	 * If the block number at the new end of the file is zero,
+	 * then we must allocate it to ensure that the last block of 
+	 * the file is allocated. Soft updates does not handle this
+	 * case, so here we have to clean up the soft updates data
+	 * structures describing the allocation past the truncation
+	 * point. Finding and deallocating those structures is a lot of
+	 * work. Since partial truncation with a hole at the end occurs
+	 * rarely, we solve the problem by syncing the file so that it
+	 * will have no soft updates data structures left.
+	 */
+	if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
+		return (error);
+	if (blkno != 0 && DOINGSOFTDEP(vp)) {
+		if (softdeptrunc == 0 && journaltrunc == 0) {
+			/*
+			 * If soft updates cannot handle this truncation,
+			 * clean up soft dependency data structures and
+			 * fall through to the synchronous truncation.
+			 */
+			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
+				return (error);
+		} else {
+			flags = IO_NORMAL | (needextclean ? IO_EXT: 0);
+			if (journaltrunc)
+				softdep_journal_freeblocks(ip, cred, length,
+				    flags);
+			else
+				softdep_setup_freeblocks(ip, length, flags);
+			ASSERT_VOP_LOCKED(vp, "ffs_truncate1");
+			if (journaltrunc == 0) {
+				ip->i_flag |= IN_CHANGE | IN_UPDATE;
+				error = ffs_update(vp, 0);
+			}
+			return (error);
+		}
+	}
+	/*
+	 * Shorten the size of the file. If the last block of the
+	 * shortened file is unallocated, we must allocate it.
+	 * Additionally, if the file is not being truncated to a
+	 * block boundary, the contents of the partial block
+	 * following the end of the file must be zero'ed in
+	 * case it ever becomes accessible again because of
+	 * subsequent file growth. Directories however are not
+	 * zero'ed as they should grow back initialized to empty.
+	 */
+	offset = blkoff(fs, length);
+	if (blkno != 0 && offset == 0) {
+		ip->i_size = length;
+		DIP_SET(ip, i_size, length);
+	} else {
+		lbn = lblkno(fs, length);
+		flags |= BA_CLRBUF;
+		error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
+		if (error)
+			return (error);
+		/*
+		 * When we are doing soft updates and the UFS_BALLOC
+		 * above fills in a direct block hole with a full sized
+		 * block that will be truncated down to a fragment below,
+		 * we must flush out the block dependency with an FSYNC
+		 * so that we do not get a soft updates inconsistency
+		 * when we create the fragment below.
+		 */
+		if (DOINGSOFTDEP(vp) && lbn < NDADDR &&
+		    fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
+		    (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
+			return (error);
+		ip->i_size = length;
+		DIP_SET(ip, i_size, length);
+		size = blksize(fs, ip, lbn);
+		if (vp->v_type != VDIR && offset != 0)
+			bzero((char *)bp->b_data + offset,
+			    (u_int)(size - offset));
+		/* Kirk's code has reallocbuf(bp, size, 1) here */
+		allocbuf(bp, size);
+		if (bp->b_bufsize == fs->fs_bsize)
+			bp->b_flags |= B_CLUSTEROK;
+		if (flags & IO_SYNC)
+			bwrite(bp);
+		else if (DOINGASYNC(vp))
+			bdwrite(bp);
+		else
+			bawrite(bp);
+	}
+	/*
+	 * Calculate index into inode's block list of
+	 * last direct and indirect blocks (if any)
+	 * which we want to keep.  Lastblock is -1 when
+	 * the file is truncated to 0.
+	 */
+	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
+	lastiblock[SINGLE] = lastblock - NDADDR;
+	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
+	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
+	nblocks = btodb(fs->fs_bsize);
+	/*
+	 * Update file and block pointers on disk before we start freeing
+	 * blocks.  If we crash before free'ing blocks below, the blocks
+	 * will be returned to the free list.  lastiblock values are also
+	 * normalized to -1 for calls to ffs_indirtrunc below.
+	 */
+	for (level = TRIPLE; level >= SINGLE; level--) {
+		oldblks[NDADDR + level] = DIP(ip, i_ib[level]);
+		if (lastiblock[level] < 0) {
+			DIP_SET(ip, i_ib[level], 0);
+			lastiblock[level] = -1;
+		}
+	}
+	for (i = 0; i < NDADDR; i++) {
+		oldblks[i] = DIP(ip, i_db[i]);
+		if (i > lastblock)
+			DIP_SET(ip, i_db[i], 0);
+	}
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	allerror = ffs_update(vp, !DOINGASYNC(vp));
+	
+	/*
+	 * Having written the new inode to disk, save its new configuration
+	 * and put back the old block pointers long enough to process them.
+	 * Note that we save the new block configuration so we can check it
+	 * when we are done.
+	 */
+	for (i = 0; i < NDADDR; i++) {
+		newblks[i] = DIP(ip, i_db[i]);
+		DIP_SET(ip, i_db[i], oldblks[i]);
+	}
+	for (i = 0; i < NIADDR; i++) {
+		newblks[NDADDR + i] = DIP(ip, i_ib[i]);
+		DIP_SET(ip, i_ib[i], oldblks[NDADDR + i]);
+	}
+	ip->i_size = osize;
+	DIP_SET(ip, i_size, osize);
+
+	error = vtruncbuf(vp, cred, length, fs->fs_bsize);
+	if (error && (allerror == 0))
+		allerror = error;
+
+	/*
+	 * Indirect blocks first.
+	 */
+	indir_lbn[SINGLE] = -NDADDR;
+	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
+	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
+	for (level = TRIPLE; level >= SINGLE; level--) {
+		bn = DIP(ip, i_ib[level]);
+		if (bn != 0) {
+			error = ffs_indirtrunc(ip, indir_lbn[level],
+			    fsbtodb(fs, bn), lastiblock[level], level, &count);
+			if (error)
+				allerror = error;
+			blocksreleased += count;
+			if (lastiblock[level] < 0) {
+				DIP_SET(ip, i_ib[level], 0);
+				ffs_blkfree(ump, fs, ump->um_devvp, bn,
+				    fs->fs_bsize, ip->i_number,
+				    vp->v_type, NULL);
+				blocksreleased += nblocks;
+			}
+		}
+		if (lastiblock[level] >= 0)
+			goto done;
+	}
+
+	/*
+	 * All whole direct blocks or frags.
+	 */
+	for (i = NDADDR - 1; i > lastblock; i--) {
+		long bsize;
+
+		bn = DIP(ip, i_db[i]);
+		if (bn == 0)
+			continue;
+		DIP_SET(ip, i_db[i], 0);
+		bsize = blksize(fs, ip, i);
+		ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number,
+		    vp->v_type, NULL);
+		blocksreleased += btodb(bsize);
+	}
+	if (lastblock < 0)
+		goto done;
+
+	/*
+	 * Finally, look for a change in size of the
+	 * last direct block; release any frags.
+	 */
+	bn = DIP(ip, i_db[lastblock]);
+	if (bn != 0) {
+		long oldspace, newspace;
+
+		/*
+		 * Calculate amount of space we're giving
+		 * back as old block size minus new block size.
+		 */
+		oldspace = blksize(fs, ip, lastblock);
+		ip->i_size = length;
+		DIP_SET(ip, i_size, length);
+		newspace = blksize(fs, ip, lastblock);
+		if (newspace == 0)
+			panic("ffs_truncate: newspace");
+		if (oldspace - newspace > 0) {
+			/*
+			 * Block number of space to be free'd is
+			 * the old block # plus the number of frags
+			 * required for the storage we're keeping.
+			 */
+			bn += numfrags(fs, newspace);
+			ffs_blkfree(ump, fs, ump->um_devvp, bn,
+			   oldspace - newspace, ip->i_number, vp->v_type, NULL);
+			blocksreleased += btodb(oldspace - newspace);
+		}
+	}
+done:
+#ifdef INVARIANTS
+	for (level = SINGLE; level <= TRIPLE; level++)
+		if (newblks[NDADDR + level] != DIP(ip, i_ib[level]))
+			panic("ffs_truncate1");
+	for (i = 0; i < NDADDR; i++)
+		if (newblks[i] != DIP(ip, i_db[i]))
+			panic("ffs_truncate2");
+	BO_LOCK(bo);
+	if (length == 0 &&
+	    (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) &&
+	    (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0))
+		panic("ffs_truncate3");
+	BO_UNLOCK(bo);
+#endif /* INVARIANTS */
+	/*
+	 * Put back the real size.
+	 */
+	ip->i_size = length;
+	DIP_SET(ip, i_size, length);
+	if (DIP(ip, i_blocks) >= blocksreleased)
+		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased);
+	else	/* sanity */
+		DIP_SET(ip, i_blocks, 0);
+	ip->i_flag |= IN_CHANGE;
+#ifdef QUOTA
+	(void) chkdq(ip, -blocksreleased, NOCRED, 0);
+#endif
+	return (allerror);
+
+extclean:
+	if (journaltrunc)
+		softdep_journal_freeblocks(ip, cred, length, IO_EXT);
+	else
+		softdep_setup_freeblocks(ip, length, IO_EXT);
+	return (ffs_update(vp, (flags & IO_SYNC) != 0 || !DOINGASYNC(vp)));
+}
+
+/*
+ * Release blocks associated with the inode ip and stored in the indirect
+ * block bn.  Blocks are free'd in LIFO order up to (but not including)
+ * lastbn.  If level is greater than SINGLE, the block is an indirect block
+ * and recursive calls to indirtrunc must be used to cleanse other indirect
+ * blocks.
+ */
+static int
+ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
+	struct inode *ip;
+	ufs2_daddr_t lbn, lastbn;
+	ufs2_daddr_t dbn;
+	int level;
+	ufs2_daddr_t *countp;
+{
+	struct buf *bp;
+	struct fs *fs;
+	struct vnode *vp;
+	caddr_t copy = NULL;
+	int i, nblocks, error = 0, allerror = 0;
+	ufs2_daddr_t nb, nlbn, last;
+	ufs2_daddr_t blkcount, factor, blocksreleased = 0;
+	ufs1_daddr_t *bap1 = NULL;
+	ufs2_daddr_t *bap2 = NULL;
+#define BAP(ip, i) (I_IS_UFS1(ip) ? bap1[i] : bap2[i])
+
+	fs = ITOFS(ip);
+
+	/*
+	 * Calculate index in current block of last
+	 * block to be kept.  -1 indicates the entire
+	 * block so we need not calculate the index.
+	 */
+	factor = lbn_offset(fs, level);
+	last = lastbn;
+	if (lastbn > 0)
+		last /= factor;
+	nblocks = btodb(fs->fs_bsize);
+	/*
+	 * Get buffer of block pointers, zero those entries corresponding
+	 * to blocks to be free'd, and update on disk copy first.  Since
+	 * double(triple) indirect before single(double) indirect, calls
+	 * to bmap on these blocks will fail.  However, we already have
+	 * the on disk address, so we have to set the b_blkno field
+	 * explicitly instead of letting bread do everything for us.
+	 */
+	vp = ITOV(ip);
+	bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0, 0);
+	if ((bp->b_flags & B_CACHE) == 0) {
+#ifdef RACCT
+		if (racct_enable) {
+			PROC_LOCK(curproc);
+			racct_add_buf(curproc, bp, 0);
+			PROC_UNLOCK(curproc);
+		}
+#endif /* RACCT */
+		curthread->td_ru.ru_inblock++;	/* pay for read */
+		bp->b_iocmd = BIO_READ;
+		bp->b_flags &= ~B_INVAL;
+		bp->b_ioflags &= ~BIO_ERROR;
+		if (bp->b_bcount > bp->b_bufsize)
+			panic("ffs_indirtrunc: bad buffer size");
+		bp->b_blkno = dbn;
+		vfs_busy_pages(bp, 0);
+		bp->b_iooffset = dbtob(bp->b_blkno);
+		bstrategy(bp);
+		error = bufwait(bp);
+	}
+	if (error) {
+		brelse(bp);
+		*countp = 0;
+		return (error);
+	}
+
+	if (I_IS_UFS1(ip))
+		bap1 = (ufs1_daddr_t *)bp->b_data;
+	else
+		bap2 = (ufs2_daddr_t *)bp->b_data;
+	if (lastbn != -1) {
+		copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK);
+		bcopy((caddr_t)bp->b_data, copy, (u_int)fs->fs_bsize);
+		for (i = last + 1; i < NINDIR(fs); i++)
+			if (I_IS_UFS1(ip))
+				bap1[i] = 0;
+			else
+				bap2[i] = 0;
+		if (DOINGASYNC(vp)) {
+			bdwrite(bp);
+		} else {
+			error = bwrite(bp);
+			if (error)
+				allerror = error;
+		}
+		if (I_IS_UFS1(ip))
+			bap1 = (ufs1_daddr_t *)copy;
+		else
+			bap2 = (ufs2_daddr_t *)copy;
+	}
+
+	/*
+	 * Recursively free totally unused blocks.
+	 */
+	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
+	    i--, nlbn += factor) {
+		nb = BAP(ip, i);
+		if (nb == 0)
+			continue;
+		if (level > SINGLE) {
+			if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
+			    (ufs2_daddr_t)-1, level - 1, &blkcount)) != 0)
+				allerror = error;
+			blocksreleased += blkcount;
+		}
+		ffs_blkfree(ITOUMP(ip), fs, ITODEVVP(ip), nb, fs->fs_bsize,
+		    ip->i_number, vp->v_type, NULL);
+		blocksreleased += nblocks;
+	}
+
+	/*
+	 * Recursively free last partial block.
+	 */
+	if (level > SINGLE && lastbn >= 0) {
+		last = lastbn % factor;
+		nb = BAP(ip, i);
+		if (nb != 0) {
+			error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
+			    last, level - 1, &blkcount);
+			if (error)
+				allerror = error;
+			blocksreleased += blkcount;
+		}
+	}
+	if (copy != NULL) {
+		free(copy, M_TEMP);
+	} else {
+		bp->b_flags |= B_INVAL | B_NOCACHE;
+		brelse(bp);
+	}
+
+	*countp = blocksreleased;
+	return (allerror);
+}
+
+int
+ffs_rdonly(struct inode *ip)
+{
+
+	return (ITOFS(ip)->fs_ronly != 0);
+}
+
diff --git a/Dump/ufs/ffs/ffs_rawread.c b/Dump/ufs/ffs/ffs_rawread.c
new file mode 100644
index 0000000..4cb577a
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_rawread.c
@@ -0,0 +1,474 @@
+/*-
+ * Copyright (c) 2000-2003 Tor Egge
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_rawread.c 318266 2017-05-14 11:51:30Z kib $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/proc.h>
+#include <sys/limits.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+#include <sys/filio.h>
+#include <sys/ttycom.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/rwlock.h>
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+static int ffs_rawread_readahead(struct vnode *vp,
+				 caddr_t udata,
+				 off_t offset,
+				 size_t len,
+				 struct thread *td,
+				 struct buf *bp);
+static int ffs_rawread_main(struct vnode *vp,
+			    struct uio *uio);
+
+static int ffs_rawread_sync(struct vnode *vp);
+
+int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
+
+SYSCTL_DECL(_vfs_ffs);
+
+static int ffsrawbufcnt = 4;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0,
+	   "Buffers available for raw reads");
+
+static int allowrawread = 1;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0,
+	   "Flag to enable raw reads");
+
+static int rawreadahead = 1;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0,
+	   "Flag to enable readahead for long raw reads");
+
+static void
+ffs_rawread_setup(void *arg __unused)
+{
+
+	ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8;
+}
+SYSINIT(ffs_raw, SI_SUB_VM_CONF, SI_ORDER_ANY, ffs_rawread_setup, NULL);
+
+static int
+ffs_rawread_sync(struct vnode *vp)
+{
+	int error;
+	int upgraded;
+	struct bufobj *bo;
+	struct mount *mp;
+	vm_object_t obj;
+
+	/* Check for dirty mmap, pending writes and dirty buffers */
+	bo = &vp->v_bufobj;
+	BO_LOCK(bo);
+	VI_LOCK(vp);
+	if (bo->bo_numoutput > 0 ||
+	    bo->bo_dirty.bv_cnt > 0 ||
+	    ((obj = vp->v_object) != NULL &&
+	     (obj->flags & OBJ_MIGHTBEDIRTY) != 0)) {
+		VI_UNLOCK(vp);
+		BO_UNLOCK(bo);
+		
+		if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
+			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
+				upgraded = 1;
+			else
+				upgraded = 0;
+			VOP_UNLOCK(vp, 0);
+			(void) vn_start_write(vp, &mp, V_WAIT);
+			VOP_LOCK(vp, LK_EXCLUSIVE);
+		} else if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
+			upgraded = 1;
+			/* Upgrade to exclusive lock, this might block */
+			VOP_LOCK(vp, LK_UPGRADE);
+		} else
+			upgraded = 0;
+			
+		
+		VI_LOCK(vp);
+		/* Check if vnode was reclaimed while unlocked. */
+		if ((vp->v_iflag & VI_DOOMED) != 0) {
+			VI_UNLOCK(vp);
+			if (upgraded != 0)
+				VOP_LOCK(vp, LK_DOWNGRADE);
+			vn_finished_write(mp);
+			return (EIO);
+		}
+		/* Attempt to msync mmap() regions to clean dirty mmap */ 
+		if ((obj = vp->v_object) != NULL &&
+		    (obj->flags & OBJ_MIGHTBEDIRTY) != 0) {
+			VI_UNLOCK(vp);
+			VM_OBJECT_WLOCK(obj);
+			vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
+			VM_OBJECT_WUNLOCK(obj);
+		} else
+			VI_UNLOCK(vp);
+
+		/* Wait for pending writes to complete */
+		BO_LOCK(bo);
+		error = bufobj_wwait(&vp->v_bufobj, 0, 0);
+		if (error != 0) {
+			/* XXX: can't happen with a zero timeout ??? */
+			BO_UNLOCK(bo);
+			if (upgraded != 0)
+				VOP_LOCK(vp, LK_DOWNGRADE);
+			vn_finished_write(mp);
+			return (error);
+		}
+		/* Flush dirty buffers */
+		if (bo->bo_dirty.bv_cnt > 0) {
+			BO_UNLOCK(bo);
+			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) {
+				if (upgraded != 0)
+					VOP_LOCK(vp, LK_DOWNGRADE);
+				vn_finished_write(mp);
+				return (error);
+			}
+			BO_LOCK(bo);
+			if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)
+				panic("ffs_rawread_sync: dirty bufs");
+		}
+		BO_UNLOCK(bo);
+		if (upgraded != 0)
+			VOP_LOCK(vp, LK_DOWNGRADE);
+		vn_finished_write(mp);
+	} else {
+		VI_UNLOCK(vp);
+		BO_UNLOCK(bo);
+	}
+	return 0;
+}
+
+
+static int
+ffs_rawread_readahead(struct vnode *vp,
+		      caddr_t udata,
+		      off_t offset,
+		      size_t len,
+		      struct thread *td,
+		      struct buf *bp)
+{
+	int error;
+	u_int iolen;
+	off_t blockno;
+	int blockoff;
+	int bsize;
+	struct vnode *dp;
+	int bforwards;
+	struct inode *ip;
+	ufs2_daddr_t blkno;
+	
+	bsize = vp->v_mount->mnt_stat.f_iosize;
+	
+	ip = VTOI(vp);
+	dp = ITODEVVP(ip);
+
+	iolen = ((vm_offset_t) udata) & PAGE_MASK;
+	bp->b_bcount = len;
+	if (bp->b_bcount + iolen > bp->b_kvasize) {
+		bp->b_bcount = bp->b_kvasize;
+		if (iolen != 0)
+			bp->b_bcount -= PAGE_SIZE;
+	}
+	bp->b_flags = 0;	/* XXX necessary ? */
+	bp->b_iocmd = BIO_READ;
+	bp->b_iodone = bdone;
+	bp->b_data = udata;
+	blockno = offset / bsize;
+	blockoff = (offset % bsize) / DEV_BSIZE;
+	if ((daddr_t) blockno != blockno) {
+		return EINVAL; /* blockno overflow */
+	}
+	
+	bp->b_lblkno = bp->b_blkno = blockno;
+	
+	error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, NULL, &bforwards, NULL);
+	if (error != 0)
+		return error;
+	if (blkno == -1) {
+
+		/* Fill holes with NULs to preserve semantics */
+		
+		if (bp->b_bcount + blockoff * DEV_BSIZE > bsize)
+			bp->b_bcount = bsize - blockoff * DEV_BSIZE;
+		bp->b_bufsize = bp->b_bcount;
+		
+		if (vmapbuf(bp, 1) < 0)
+			return EFAULT;
+		
+		maybe_yield();
+		bzero(bp->b_data, bp->b_bufsize);
+
+		/* Mark operation completed (similar to bufdone()) */
+
+		bp->b_resid = 0;
+		bp->b_flags |= B_DONE;
+		return 0;
+	}
+	bp->b_blkno = blkno + blockoff;
+	bp->b_offset = bp->b_iooffset = (blkno + blockoff) * DEV_BSIZE;
+	
+	if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards))
+		bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE;
+	bp->b_bufsize = bp->b_bcount;
+	
+	if (vmapbuf(bp, 1) < 0)
+		return EFAULT;
+	
+	BO_STRATEGY(&dp->v_bufobj, bp);
+	return 0;
+}
+
+
+static int
+ffs_rawread_main(struct vnode *vp,
+		 struct uio *uio)
+{
+	int error, nerror;
+	struct buf *bp, *nbp, *tbp;
+	u_int iolen;
+	caddr_t udata;
+	long resid;
+	off_t offset;
+	struct thread *td;
+	
+	td = uio->uio_td ? uio->uio_td : curthread;
+	udata = uio->uio_iov->iov_base;
+	resid = uio->uio_resid;
+	offset = uio->uio_offset;
+
+	/*
+	 * keep the process from being swapped
+	 */
+	PHOLD(td->td_proc);
+	
+	error = 0;
+	nerror = 0;
+	
+	bp = NULL;
+	nbp = NULL;
+	
+	while (resid > 0) {
+		
+		if (bp == NULL) { /* Setup first read */
+			/* XXX: Leave some bufs for swap */
+			bp = getpbuf(&ffsrawbufcnt);
+			pbgetvp(vp, bp);
+			error = ffs_rawread_readahead(vp, udata, offset,
+						     resid, td, bp);
+			if (error != 0)
+				break;
+			
+			if (resid > bp->b_bufsize) { /* Setup fist readahead */
+				/* XXX: Leave bufs for swap */
+				if (rawreadahead != 0) 
+					nbp = trypbuf(&ffsrawbufcnt);
+				else
+					nbp = NULL;
+				if (nbp != NULL) {
+					pbgetvp(vp, nbp);
+					
+					nerror = ffs_rawread_readahead(vp, 
+								       udata +
+								       bp->b_bufsize,
+								       offset +
+								       bp->b_bufsize,
+								       resid -
+								       bp->b_bufsize,
+								       td,
+								       nbp);
+					if (nerror) {
+						pbrelvp(nbp);
+						relpbuf(nbp, &ffsrawbufcnt);
+						nbp = NULL;
+					}
+				}
+			}
+		}
+		
+		bwait(bp, PRIBIO, "rawrd");
+		vunmapbuf(bp);
+		
+		iolen = bp->b_bcount - bp->b_resid;
+		if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) {
+			nerror = 0;	/* Ignore possible beyond EOF error */
+			break; /* EOF */
+		}
+		
+		if ((bp->b_ioflags & BIO_ERROR) != 0) {
+			error = bp->b_error;
+			break;
+		}
+		resid -= iolen;
+		udata += iolen;
+		offset += iolen;
+		if (iolen < bp->b_bufsize) {
+			/* Incomplete read.  Try to read remaining part */
+			error = ffs_rawread_readahead(vp,
+						      udata,
+						      offset,
+						      bp->b_bufsize - iolen,
+						      td,
+						      bp);
+			if (error != 0)
+				break;
+		} else if (nbp != NULL) { /* Complete read with readahead */
+			
+			tbp = bp;
+			bp = nbp;
+			nbp = tbp;
+			
+			if (resid <= bp->b_bufsize) { /* No more readaheads */
+				pbrelvp(nbp);
+				relpbuf(nbp, &ffsrawbufcnt);
+				nbp = NULL;
+			} else { /* Setup next readahead */
+				nerror = ffs_rawread_readahead(vp,
+							       udata +
+							       bp->b_bufsize,
+							       offset +
+							       bp->b_bufsize,
+							       resid -
+							       bp->b_bufsize,
+							       td,
+							       nbp);
+				if (nerror != 0) {
+					pbrelvp(nbp);
+					relpbuf(nbp, &ffsrawbufcnt);
+					nbp = NULL;
+				}
+			}
+		} else if (nerror != 0) {/* Deferred Readahead error */
+			break;		
+		}  else if (resid > 0) { /* More to read, no readahead */
+			error = ffs_rawread_readahead(vp, udata, offset,
+						      resid, td, bp);
+			if (error != 0)
+				break;
+		}
+	}
+	
+	if (bp != NULL) {
+		pbrelvp(bp);
+		relpbuf(bp, &ffsrawbufcnt);
+	}
+	if (nbp != NULL) {			/* Run down readahead buffer */
+		bwait(nbp, PRIBIO, "rawrd");
+		vunmapbuf(nbp);
+		pbrelvp(nbp);
+		relpbuf(nbp, &ffsrawbufcnt);
+	}
+	
+	if (error == 0)
+		error = nerror;
+	PRELE(td->td_proc);
+	uio->uio_iov->iov_base = udata;
+	uio->uio_resid = resid;
+	uio->uio_offset = offset;
+	return error;
+}
+
+
+int
+ffs_rawread(struct vnode *vp,
+	    struct uio *uio,
+	    int *workdone)
+{
+	if (allowrawread != 0 &&
+	    uio->uio_iovcnt == 1 && 
+	    uio->uio_segflg == UIO_USERSPACE &&
+	    uio->uio_resid == uio->uio_iov->iov_len &&
+	    (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_pflags &
+	     TDP_DEADLKTREAT) == 0) {
+		int secsize;		/* Media sector size */
+		off_t filebytes;	/* Bytes left of file */
+		int blockbytes;		/* Bytes left of file in full blocks */
+		int partialbytes;	/* Bytes in last partial block */
+		int skipbytes;		/* Bytes not to read in ffs_rawread */
+		struct inode *ip;
+		int error;
+		
+
+		/* Only handle sector aligned reads */
+		ip = VTOI(vp);
+		secsize = ITODEVVP(ip)->v_bufobj.bo_bsize;
+		if ((uio->uio_offset & (secsize - 1)) == 0 &&
+		    (uio->uio_resid & (secsize - 1)) == 0) {
+			
+			/* Sync dirty pages and buffers if needed */
+			error = ffs_rawread_sync(vp);
+			if (error != 0)
+				return error;
+			
+			/* Check for end of file */
+			if (ip->i_size > uio->uio_offset) {
+				filebytes = ip->i_size - uio->uio_offset;
+
+				/* No special eof handling needed ? */
+				if (uio->uio_resid <= filebytes) {
+					*workdone = 1;
+					return ffs_rawread_main(vp, uio);
+				}
+				
+				partialbytes = ((unsigned int) ip->i_size) %
+				    ITOFS(ip)->fs_bsize;
+				blockbytes = (int) filebytes - partialbytes;
+				if (blockbytes > 0) {
+					skipbytes = uio->uio_resid -
+						blockbytes;
+					uio->uio_resid = blockbytes;
+					error = ffs_rawread_main(vp, uio);
+					uio->uio_resid += skipbytes;
+					if (error != 0)
+						return error;
+					/* Read remaining part using buffer */
+				}
+			}
+		}
+	}
+	*workdone = 0;
+	return 0;
+}
diff --git a/Dump/ufs/ffs/ffs_snapshot.c b/Dump/ufs/ffs/ffs_snapshot.c
new file mode 100644
index 0000000..f30dfca
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_snapshot.c
@@ -0,0 +1,2699 @@
+/*-
+ * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
+ *
+ * Further information about snapshots can be obtained from:
+ *
+ *	Marshall Kirk McKusick		http://www.mckusick.com/softdep/
+ *	1614 Oxford Street		mckusick@mckusick.com
+ *	Berkeley, CA 94709-1608		+1-510-843-9542
+ *	USA
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_snapshot.c	8.11 (McKusick) 7/23/00
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_snapshot.c 322130 2017-08-07 02:17:15Z mckusick $");
+
+#include "opt_quota.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/fcntl.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/sched.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <sys/rwlock.h>
+#include <sys/vnode.h>
+
+#include <geom/geom.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+#define KERNCRED thread0.td_ucred
+#define DEBUG 1
+
+#include "opt_ffs.h"
+
+#ifdef NO_FFS_SNAPSHOT
+int
+ffs_snapshot(mp, snapfile)
+	struct mount *mp;
+	char *snapfile;
+{
+	return (EINVAL);
+}
+
+int
+ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd)
+	struct fs *fs;
+	struct vnode *devvp;
+	ufs2_daddr_t bno;
+	long size;
+	ino_t inum;
+	enum vtype vtype;
+	struct workhead *wkhd;
+{
+	return (EINVAL);
+}
+
+void
+ffs_snapremove(vp)
+	struct vnode *vp;
+{
+}
+
+void
+ffs_snapshot_mount(mp)
+	struct mount *mp;
+{
+}
+
+void
+ffs_snapshot_unmount(mp)
+	struct mount *mp;
+{
+}
+
+void
+ffs_snapgone(ip)
+	struct inode *ip;
+{
+}
+
+int
+ffs_copyonwrite(devvp, bp)
+	struct vnode *devvp;
+	struct buf *bp;
+{
+	return (EINVAL);
+}
+
+void
+ffs_sync_snap(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+}
+
+#else
+FEATURE(ffs_snapshot, "FFS snapshot support");
+
+LIST_HEAD(, snapdata) snapfree;
+static struct mtx snapfree_lock;
+MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF);
+
+static int cgaccount(int, struct vnode *, struct buf *, int);
+static int expunge_ufs1(struct vnode *, struct inode *, struct fs *,
+    int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
+    ufs_lbn_t, int), int, int);
+static int indiracct_ufs1(struct vnode *, struct vnode *, int,
+    ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
+    int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
+    ufs_lbn_t, int), int);
+static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
+    struct fs *, ufs_lbn_t, int);
+static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
+    struct fs *, ufs_lbn_t, int);
+static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
+    struct fs *, ufs_lbn_t, int);
+static int expunge_ufs2(struct vnode *, struct inode *, struct fs *,
+    int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
+    ufs_lbn_t, int), int, int);
+static int indiracct_ufs2(struct vnode *, struct vnode *, int,
+    ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
+    int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
+    ufs_lbn_t, int), int);
+static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
+    struct fs *, ufs_lbn_t, int);
+static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
+    struct fs *, ufs_lbn_t, int);
+static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
+    struct fs *, ufs_lbn_t, int);
+static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t);
+static void try_free_snapdata(struct vnode *devvp);
+static struct snapdata *ffs_snapdata_acquire(struct vnode *devvp);
+static int ffs_bp_snapblk(struct vnode *, struct buf *);
+
+/*
+ * To ensure the consistency of snapshots across crashes, we must
+ * synchronously write out copied blocks before allowing the
+ * originals to be modified. Because of the rather severe speed
+ * penalty that this imposes, the code normally only ensures
+ * persistence for the filesystem metadata contained within a
+ * snapshot. Setting the following flag allows this crash
+ * persistence to be enabled for file contents.
+ */
+int dopersistence = 0;
+
+#ifdef DEBUG
+#include <sys/sysctl.h>
+SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, "");
+static int snapdebug = 0;
+SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, "");
+int collectsnapstats = 0;
+SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats,
+	0, "");
+#endif /* DEBUG */
+
+/*
+ * Create a snapshot file and initialize it for the filesystem.
+ */
+int
+ffs_snapshot(mp, snapfile)
+	struct mount *mp;
+	char *snapfile;
+{
+	ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
+	int error, cg, snaploc;
+	int i, size, len, loc;
+	ufs2_daddr_t blockno;
+	uint64_t flag;
+	struct timespec starttime = {0, 0}, endtime;
+	char saved_nice = 0;
+	long redo = 0, snaplistsize = 0;
+	int32_t *lp;
+	void *space;
+	struct fs *copy_fs = NULL, *fs;
+	struct thread *td = curthread;
+	struct inode *ip, *xp;
+	struct buf *bp, *nbp, *ibp;
+	struct nameidata nd;
+	struct mount *wrtmp;
+	struct vattr vat;
+	struct vnode *vp, *xvp, *mvp, *devvp;
+	struct uio auio;
+	struct iovec aiov;
+	struct snapdata *sn;
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	sn = NULL;
+	/*
+	 * At the moment, journaled soft updates cannot support
+	 * taking snapshots.
+	 */
+	if (MOUNTEDSUJ(mp)) {
+		vfs_mount_error(mp, "%s: Snapshots are not yet supported when "
+		    "running with journaled soft updates", fs->fs_fsmnt);
+		return (EOPNOTSUPP);
+	}
+	MNT_ILOCK(mp);
+	flag = mp->mnt_flag;
+	MNT_IUNLOCK(mp);
+	/*
+	 * Need to serialize access to snapshot code per filesystem.
+	 */
+	/*
+	 * Assign a snapshot slot in the superblock.
+	 */
+	UFS_LOCK(ump);
+	for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
+		if (fs->fs_snapinum[snaploc] == 0)
+			break;
+	UFS_UNLOCK(ump);
+	if (snaploc == FSMAXSNAP)
+		return (ENOSPC);
+	/*
+	 * Create the snapshot file.
+	 */
+restart:
+	NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE, UIO_SYSSPACE,
+	    snapfile, td);
+	if ((error = namei(&nd)) != 0)
+		return (error);
+	if (nd.ni_vp != NULL) {
+		vput(nd.ni_vp);
+		error = EEXIST;
+	}
+	if (nd.ni_dvp->v_mount != mp)
+		error = EXDEV;
+	if (error) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		return (error);
+	}
+	VATTR_NULL(&vat);
+	vat.va_type = VREG;
+	vat.va_mode = S_IRUSR;
+	vat.va_vaflags |= VA_EXCLUSIVE;
+	if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp))
+		wrtmp = NULL;
+	if (wrtmp != mp)
+		panic("ffs_snapshot: mount mismatch");
+	vfs_rel(wrtmp);
+	if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &wrtmp,
+		    V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat);
+	VOP_UNLOCK(nd.ni_dvp, 0);
+	if (error) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vn_finished_write(wrtmp);
+		vrele(nd.ni_dvp);
+		return (error);
+	}
+	vp = nd.ni_vp;
+	vp->v_vflag |= VV_SYSTEM;
+	ip = VTOI(vp);
+	devvp = ITODEVVP(ip);
+	/*
+	 * Allocate and copy the last block contents so as to be able
+	 * to set size to that of the filesystem.
+	 */
+	numblks = howmany(fs->fs_size, fs->fs_frag);
+	error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)),
+	    fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
+	if (error)
+		goto out;
+	ip->i_size = lblktosize(fs, (off_t)numblks);
+	DIP_SET(ip, i_size, ip->i_size);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	error = readblock(vp, bp, numblks - 1);
+	bawrite(bp);
+	if (error != 0)
+		goto out;
+	/*
+	 * Preallocate critical data structures so that we can copy
+	 * them in without further allocation after we suspend all
+	 * operations on the filesystem. We would like to just release
+	 * the allocated buffers without writing them since they will
+	 * be filled in below once we are ready to go, but this upsets
+	 * the soft update code, so we go ahead and write the new buffers.
+	 *
+	 * Allocate all indirect blocks and mark all of them as not
+	 * needing to be copied.
+	 */
+	for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
+		error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp);
+		if (error)
+			goto out;
+		bawrite(ibp);
+	}
+	/*
+	 * Allocate copies for the superblock and its summary information.
+	 */
+	error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED,
+	    0, &nbp);
+	if (error)
+		goto out;
+	bawrite(nbp);
+	blkno = fragstoblks(fs, fs->fs_csaddr);
+	len = howmany(fs->fs_cssize, fs->fs_bsize);
+	for (loc = 0; loc < len; loc++) {
+		error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out;
+		bawrite(nbp);
+	}
+	/*
+	 * Allocate all cylinder group blocks.
+	 */
+	for (cg = 0; cg < fs->fs_ncg; cg++) {
+		error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out;
+		bawrite(nbp);
+		if (cg % 10 == 0)
+			ffs_syncvnode(vp, MNT_WAIT, 0);
+	}
+	/*
+	 * Copy all the cylinder group maps. Although the
+	 * filesystem is still active, we hope that only a few
+	 * cylinder groups will change between now and when we
+	 * suspend operations. Thus, we will be able to quickly
+	 * touch up the few cylinder groups that changed during
+	 * the suspension period.
+	 */
+	len = howmany(fs->fs_ncg, NBBY);
+	space = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
+	UFS_LOCK(ump);
+	fs->fs_active = space;
+	UFS_UNLOCK(ump);
+	for (cg = 0; cg < fs->fs_ncg; cg++) {
+		error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out;
+		error = cgaccount(cg, vp, nbp, 1);
+		bawrite(nbp);
+		if (cg % 10 == 0)
+			ffs_syncvnode(vp, MNT_WAIT, 0);
+		if (error)
+			goto out;
+	}
+	/*
+	 * Change inode to snapshot type file.
+	 */
+	ip->i_flags |= SF_SNAPSHOT;
+	DIP_SET(ip, i_flags, ip->i_flags);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	/*
+	 * Ensure that the snapshot is completely on disk.
+	 * Since we have marked it as a snapshot it is safe to
+	 * unlock it as no process will be allowed to write to it.
+	 */
+	if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
+		goto out;
+	VOP_UNLOCK(vp, 0);
+	/*
+	 * All allocations are done, so we can now snapshot the system.
+	 *
+	 * Recind nice scheduling while running with the filesystem suspended.
+	 */
+	if (td->td_proc->p_nice > 0) {
+		struct proc *p;
+
+		p = td->td_proc;
+		PROC_LOCK(p);
+		saved_nice = p->p_nice;
+		sched_nice(p, 0);
+		PROC_UNLOCK(p);
+	}
+	/*
+	 * Suspend operation on filesystem.
+	 */
+	for (;;) {
+		vn_finished_write(wrtmp);
+		if ((error = vfs_write_suspend(vp->v_mount, 0)) != 0) {
+			vn_start_write(NULL, &wrtmp, V_WAIT);
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+			goto out;
+		}
+		if (mp->mnt_kern_flag & MNTK_SUSPENDED)
+			break;
+		vn_start_write(NULL, &wrtmp, V_WAIT);
+	}
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	if (ip->i_effnlink == 0) {
+		error = ENOENT;		/* Snapshot file unlinked */
+		goto out1;
+	}
+	if (collectsnapstats)
+		nanotime(&starttime);
+
+	/* The last block might have changed.  Copy it again to be sure. */
+	error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)),
+	    fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
+	if (error != 0)
+		goto out1;
+	error = readblock(vp, bp, numblks - 1);
+	bp->b_flags |= B_VALIDSUSPWRT;
+	bawrite(bp);
+	if (error != 0)
+		goto out1;
+	/*
+	 * First, copy all the cylinder group maps that have changed.
+	 */
+	for (cg = 0; cg < fs->fs_ncg; cg++) {
+		if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0)
+			continue;
+		redo++;
+		error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out1;
+		error = cgaccount(cg, vp, nbp, 2);
+		bawrite(nbp);
+		if (error)
+			goto out1;
+	}
+	/*
+	 * Grab a copy of the superblock and its summary information.
+	 * We delay writing it until the suspension is released below.
+	 */
+	copy_fs = malloc((u_long)fs->fs_bsize, M_UFSMNT, M_WAITOK);
+	bcopy(fs, copy_fs, fs->fs_sbsize);
+	if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
+		copy_fs->fs_clean = 1;
+	size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
+	if (fs->fs_sbsize < size)
+		bzero(&((char *)copy_fs)[fs->fs_sbsize],
+		    size - fs->fs_sbsize);
+	size = blkroundup(fs, fs->fs_cssize);
+	if (fs->fs_contigsumsize > 0)
+		size += fs->fs_ncg * sizeof(int32_t);
+	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
+	copy_fs->fs_csp = space;
+	bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize);
+	space = (char *)space + fs->fs_cssize;
+	loc = howmany(fs->fs_cssize, fs->fs_fsize);
+	i = fs->fs_frag - loc % fs->fs_frag;
+	len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize;
+	if (len > 0) {
+		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc),
+		    len, KERNCRED, &bp)) != 0) {
+			brelse(bp);
+			free(copy_fs->fs_csp, M_UFSMNT);
+			free(copy_fs, M_UFSMNT);
+			copy_fs = NULL;
+			goto out1;
+		}
+		bcopy(bp->b_data, space, (u_int)len);
+		space = (char *)space + len;
+		bp->b_flags |= B_INVAL | B_NOCACHE;
+		brelse(bp);
+	}
+	if (fs->fs_contigsumsize > 0) {
+		copy_fs->fs_maxcluster = lp = space;
+		for (i = 0; i < fs->fs_ncg; i++)
+			*lp++ = fs->fs_contigsumsize;
+	}
+	/*
+	 * We must check for active files that have been unlinked
+	 * (e.g., with a zero link count). We have to expunge all
+	 * trace of these files from the snapshot so that they are
+	 * not reclaimed prematurely by fsck or unnecessarily dumped.
+	 * We turn off the MNTK_SUSPENDED flag to avoid a panic from
+	 * spec_strategy about writing on a suspended filesystem.
+	 * Note that we skip unlinked snapshot files as they will
+	 * be handled separately below.
+	 *
+	 * We also calculate the needed size for the snapshot list.
+	 */
+	snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
+	    FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
+	MNT_ILOCK(mp);
+	mp->mnt_kern_flag &= ~MNTK_SUSPENDED;
+	MNT_IUNLOCK(mp);
+loop:
+	MNT_VNODE_FOREACH_ALL(xvp, mp, mvp) {
+		if ((xvp->v_usecount == 0 &&
+		     (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) ||
+		    xvp->v_type == VNON ||
+		    IS_SNAPSHOT(VTOI(xvp))) {
+			VI_UNLOCK(xvp);
+			continue;
+		}
+		/*
+		 * We can skip parent directory vnode because it must have
+		 * this snapshot file in it.
+		 */
+		if (xvp == nd.ni_dvp) {
+			VI_UNLOCK(xvp);
+			continue;
+		}
+		vholdl(xvp);
+		if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK) != 0) {
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+			vdrop(xvp);
+			goto loop;
+		}
+		VI_LOCK(xvp);
+		if (xvp->v_usecount == 0 &&
+		    (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) {
+			VI_UNLOCK(xvp);
+			VOP_UNLOCK(xvp, 0);
+			vdrop(xvp);
+			continue;
+		}
+		VI_UNLOCK(xvp);
+		if (snapdebug)
+			vn_printf(xvp, "ffs_snapshot: busy vnode ");
+		if (VOP_GETATTR(xvp, &vat, td->td_ucred) == 0 &&
+		    vat.va_nlink > 0) {
+			VOP_UNLOCK(xvp, 0);
+			vdrop(xvp);
+			continue;
+		}
+		xp = VTOI(xvp);
+		if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) {
+			VOP_UNLOCK(xvp, 0);
+			vdrop(xvp);
+			continue;
+		}
+		/*
+		 * If there is a fragment, clear it here.
+		 */
+		blkno = 0;
+		loc = howmany(xp->i_size, fs->fs_bsize) - 1;
+		if (loc < NDADDR) {
+			len = fragroundup(fs, blkoff(fs, xp->i_size));
+			if (len != 0 && len < fs->fs_bsize) {
+				ffs_blkfree(ump, copy_fs, vp,
+				    DIP(xp, i_db[loc]), len, xp->i_number,
+				    xvp->v_type, NULL);
+				blkno = DIP(xp, i_db[loc]);
+				DIP_SET(xp, i_db[loc], 0);
+			}
+		}
+		snaplistsize += 1;
+		if (I_IS_UFS1(xp))
+			error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
+			    BLK_NOCOPY, 1);
+		else
+			error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
+			    BLK_NOCOPY, 1);
+		if (blkno)
+			DIP_SET(xp, i_db[loc], blkno);
+		if (!error)
+			error = ffs_freefile(ump, copy_fs, vp, xp->i_number,
+			    xp->i_mode, NULL);
+		VOP_UNLOCK(xvp, 0);
+		vdrop(xvp);
+		if (error) {
+			free(copy_fs->fs_csp, M_UFSMNT);
+			free(copy_fs, M_UFSMNT);
+			copy_fs = NULL;
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+			goto out1;
+		}
+	}
+	/*
+	 * Erase the journal file from the snapshot.
+	 */
+	if (fs->fs_flags & FS_SUJ) {
+		error = softdep_journal_lookup(mp, &xvp);
+		if (error) {
+			free(copy_fs->fs_csp, M_UFSMNT);
+			free(copy_fs, M_UFSMNT);
+			copy_fs = NULL;
+			goto out1;
+		}
+		xp = VTOI(xvp);
+		if (I_IS_UFS1(xp))
+			error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
+			    BLK_NOCOPY, 0);
+		else
+			error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
+			    BLK_NOCOPY, 0);
+		vput(xvp);
+	}
+	/*
+	 * Acquire a lock on the snapdata structure, creating it if necessary.
+	 */
+	sn = ffs_snapdata_acquire(devvp);
+	/* 
+	 * Change vnode to use shared snapshot lock instead of the original
+	 * private lock.
+	 */
+	vp->v_vnlock = &sn->sn_lock;
+	lockmgr(&vp->v_lock, LK_RELEASE, NULL);
+	xp = TAILQ_FIRST(&sn->sn_head);
+	/*
+	 * If this is the first snapshot on this filesystem, then we need
+	 * to allocate the space for the list of preallocated snapshot blocks.
+	 * This list will be refined below, but this preliminary one will
+	 * keep us out of deadlock until the full one is ready.
+	 */
+	if (xp == NULL) {
+		snapblklist = malloc(snaplistsize * sizeof(daddr_t),
+		    M_UFSMNT, M_WAITOK);
+		blkp = &snapblklist[1];
+		*blkp++ = lblkno(fs, fs->fs_sblockloc);
+		blkno = fragstoblks(fs, fs->fs_csaddr);
+		for (cg = 0; cg < fs->fs_ncg; cg++) {
+			if (fragstoblks(fs, cgtod(fs, cg) > blkno))
+				break;
+			*blkp++ = fragstoblks(fs, cgtod(fs, cg));
+		}
+		len = howmany(fs->fs_cssize, fs->fs_bsize);
+		for (loc = 0; loc < len; loc++)
+			*blkp++ = blkno + loc;
+		for (; cg < fs->fs_ncg; cg++)
+			*blkp++ = fragstoblks(fs, cgtod(fs, cg));
+		snapblklist[0] = blkp - snapblklist;
+		VI_LOCK(devvp);
+		if (sn->sn_blklist != NULL)
+			panic("ffs_snapshot: non-empty list");
+		sn->sn_blklist = snapblklist;
+		sn->sn_listsize = blkp - snapblklist;
+		VI_UNLOCK(devvp);
+	}
+	/*
+	 * Record snapshot inode. Since this is the newest snapshot,
+	 * it must be placed at the end of the list.
+	 */
+	VI_LOCK(devvp);
+	fs->fs_snapinum[snaploc] = ip->i_number;
+	if (ip->i_nextsnap.tqe_prev != 0)
+		panic("ffs_snapshot: %ju already on list",
+		    (uintmax_t)ip->i_number);
+	TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap);
+	devvp->v_vflag |= VV_COPYONWRITE;
+	VI_UNLOCK(devvp);
+	ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp");
+out1:
+	KASSERT((sn != NULL && copy_fs != NULL && error == 0) ||
+		(sn == NULL && copy_fs == NULL && error != 0),
+		("email phk@ and mckusick@"));
+	/*
+	 * Resume operation on filesystem.
+	 */
+	vfs_write_resume(vp->v_mount, VR_START_WRITE | VR_NO_SUSPCLR);
+	if (collectsnapstats && starttime.tv_sec > 0) {
+		nanotime(&endtime);
+		timespecsub(&endtime, &starttime);
+		printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n",
+		    vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec,
+		    endtime.tv_nsec / 1000000, redo, fs->fs_ncg);
+	}
+	if (copy_fs == NULL)
+		goto out;
+	/*
+	 * Copy allocation information from all the snapshots in
+	 * this snapshot and then expunge them from its view.
+	 */
+	TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap) {
+		if (xp == ip)
+			break;
+		if (I_IS_UFS1(xp))
+			error = expunge_ufs1(vp, xp, fs, snapacct_ufs1,
+			    BLK_SNAP, 0);
+		else
+			error = expunge_ufs2(vp, xp, fs, snapacct_ufs2,
+			    BLK_SNAP, 0);
+		if (error == 0 && xp->i_effnlink == 0) {
+			error = ffs_freefile(ump,
+					     copy_fs,
+					     vp,
+					     xp->i_number,
+					     xp->i_mode, NULL);
+		}
+		if (error) {
+			fs->fs_snapinum[snaploc] = 0;
+			goto done;
+		}
+	}
+	/*
+	 * Allocate space for the full list of preallocated snapshot blocks.
+	 */
+	snapblklist = malloc(snaplistsize * sizeof(daddr_t),
+	    M_UFSMNT, M_WAITOK);
+	ip->i_snapblklist = &snapblklist[1];
+	/*
+	 * Expunge the blocks used by the snapshots from the set of
+	 * blocks marked as used in the snapshot bitmaps. Also, collect
+	 * the list of allocated blocks in i_snapblklist.
+	 */
+	if (I_IS_UFS1(ip))
+		error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1,
+		    BLK_SNAP, 0);
+	else
+		error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2,
+		    BLK_SNAP, 0);
+	if (error) {
+		fs->fs_snapinum[snaploc] = 0;
+		free(snapblklist, M_UFSMNT);
+		goto done;
+	}
+	if (snaplistsize < ip->i_snapblklist - snapblklist)
+		panic("ffs_snapshot: list too small");
+	snaplistsize = ip->i_snapblklist - snapblklist;
+	snapblklist[0] = snaplistsize;
+	ip->i_snapblklist = 0;
+	/*
+	 * Write out the list of allocated blocks to the end of the snapshot.
+	 */
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = (void *)snapblklist;
+	aiov.iov_len = snaplistsize * sizeof(daddr_t);
+	auio.uio_resid = aiov.iov_len;
+	auio.uio_offset = ip->i_size;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_td = td;
+	if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
+		fs->fs_snapinum[snaploc] = 0;
+		free(snapblklist, M_UFSMNT);
+		goto done;
+	}
+	/*
+	 * Write the superblock and its summary information
+	 * to the snapshot.
+	 */
+	blkno = fragstoblks(fs, fs->fs_csaddr);
+	len = howmany(fs->fs_cssize, fs->fs_bsize);
+	space = copy_fs->fs_csp;
+	for (loc = 0; loc < len; loc++) {
+		error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp);
+		if (error) {
+			brelse(nbp);
+			fs->fs_snapinum[snaploc] = 0;
+			free(snapblklist, M_UFSMNT);
+			goto done;
+		}
+		bcopy(space, nbp->b_data, fs->fs_bsize);
+		space = (char *)space + fs->fs_bsize;
+		bawrite(nbp);
+	}
+	error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize,
+	    KERNCRED, &nbp);
+	if (error) {
+		brelse(nbp);
+	} else {
+		loc = blkoff(fs, fs->fs_sblockloc);
+		bcopy((char *)copy_fs, &nbp->b_data[loc], (u_int)fs->fs_sbsize);
+		bawrite(nbp);
+	}
+	/*
+	 * As this is the newest list, it is the most inclusive, so
+	 * should replace the previous list.
+	 */
+	VI_LOCK(devvp);
+	space = sn->sn_blklist;
+	sn->sn_blklist = snapblklist;
+	sn->sn_listsize = snaplistsize;
+	VI_UNLOCK(devvp);
+	if (space != NULL)
+		free(space, M_UFSMNT);
+	/*
+	 * Preallocate all the direct blocks in the snapshot inode so
+	 * that we never have to write the inode itself to commit an
+	 * update to the contents of the snapshot. Note that once
+	 * created, the size of the snapshot will never change, so
+	 * there will never be a need to write the inode except to
+	 * update the non-integrity-critical time fields and
+	 * allocated-block count.
+	 */
+	for (blockno = 0; blockno < NDADDR; blockno++) {
+		if (DIP(ip, i_db[blockno]) != 0)
+			continue;
+		error = UFS_BALLOC(vp, lblktosize(fs, blockno),
+		    fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
+		if (error)
+			break;
+		error = readblock(vp, bp, blockno);
+		bawrite(bp);
+		if (error != 0)
+			break;
+	}
+done:
+	free(copy_fs->fs_csp, M_UFSMNT);
+	free(copy_fs, M_UFSMNT);
+	copy_fs = NULL;
+out:
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+	if (saved_nice > 0) {
+		struct proc *p;
+
+		p = td->td_proc;
+		PROC_LOCK(p);
+		sched_nice(td->td_proc, saved_nice);
+		PROC_UNLOCK(td->td_proc);
+	}
+	UFS_LOCK(ump);
+	if (fs->fs_active != 0) {
+		free(fs->fs_active, M_DEVBUF);
+		fs->fs_active = 0;
+	}
+	UFS_UNLOCK(ump);
+	MNT_ILOCK(mp);
+	mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA);
+	MNT_IUNLOCK(mp);
+	if (error)
+		(void) ffs_truncate(vp, (off_t)0, 0, NOCRED);
+	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
+	if (error)
+		vput(vp);
+	else
+		VOP_UNLOCK(vp, 0);
+	vrele(nd.ni_dvp);
+	vn_finished_write(wrtmp);
+	process_deferred_inactive(mp);
+	return (error);
+}
+
+/*
+ * Copy a cylinder group map. All the unallocated blocks are marked
+ * BLK_NOCOPY so that the snapshot knows that it need not copy them
+ * if they are later written. If passno is one, then this is a first
+ * pass, so only setting needs to be done. If passno is 2, then this
+ * is a revision to a previous pass which must be undone as the
+ * replacement pass is done.
+ */
+static int
+cgaccount(cg, vp, nbp, passno)
+	int cg;
+	struct vnode *vp;
+	struct buf *nbp;
+	int passno;
+{
+	struct buf *bp, *ibp;
+	struct inode *ip;
+	struct cg *cgp;
+	struct fs *fs;
+	ufs2_daddr_t base, numblks;
+	int error, len, loc, indiroff;
+
+	ip = VTOI(vp);
+	fs = ITOFS(ip);
+	error = bread(ITODEVVP(ip), fsbtodb(fs, cgtod(fs, cg)),
+	    (int)fs->fs_cgsize, KERNCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (error);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return (EIO);
+	}
+	UFS_LOCK(ITOUMP(ip));
+	ACTIVESET(fs, cg);
+	/*
+	 * Recomputation of summary information might not have been performed
+	 * at mount time.  Sync up summary information for current cylinder
+	 * group while data is in memory to ensure that result of background
+	 * fsck is slightly more consistent.
+	 */
+	fs->fs_cs(fs, cg) = cgp->cg_cs;
+	UFS_UNLOCK(ITOUMP(ip));
+	bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize);
+	if (fs->fs_cgsize < fs->fs_bsize)
+		bzero(&nbp->b_data[fs->fs_cgsize],
+		    fs->fs_bsize - fs->fs_cgsize);
+	cgp = (struct cg *)nbp->b_data;
+	bqrelse(bp);
+	if (passno == 2)
+		nbp->b_flags |= B_VALIDSUSPWRT;
+	numblks = howmany(fs->fs_size, fs->fs_frag);
+	len = howmany(fs->fs_fpg, fs->fs_frag);
+	base = cgbase(fs, cg) / fs->fs_frag;
+	if (base + len >= numblks)
+		len = numblks - base - 1;
+	loc = 0;
+	if (base < NDADDR) {
+		for ( ; loc < NDADDR; loc++) {
+			if (ffs_isblock(fs, cg_blksfree(cgp), loc))
+				DIP_SET(ip, i_db[loc], BLK_NOCOPY);
+			else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY)
+				DIP_SET(ip, i_db[loc], 0);
+			else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY)
+				panic("ffs_snapshot: lost direct block");
+		}
+	}
+	error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)),
+	    fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
+	if (error) {
+		return (error);
+	}
+	indiroff = (base + loc - NDADDR) % NINDIR(fs);
+	for ( ; loc < len; loc++, indiroff++) {
+		if (indiroff >= NINDIR(fs)) {
+			if (passno == 2)
+				ibp->b_flags |= B_VALIDSUSPWRT;
+			bawrite(ibp);
+			error = UFS_BALLOC(vp,
+			    lblktosize(fs, (off_t)(base + loc)),
+			    fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
+			if (error) {
+				return (error);
+			}
+			indiroff = 0;
+		}
+		if (I_IS_UFS1(ip)) {
+			if (ffs_isblock(fs, cg_blksfree(cgp), loc))
+				((ufs1_daddr_t *)(ibp->b_data))[indiroff] =
+				    BLK_NOCOPY;
+			else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data))
+			    [indiroff] == BLK_NOCOPY)
+				((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0;
+			else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data))
+			    [indiroff] == BLK_NOCOPY)
+				panic("ffs_snapshot: lost indirect block");
+			continue;
+		}
+		if (ffs_isblock(fs, cg_blksfree(cgp), loc))
+			((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY;
+		else if (passno == 2 &&
+		    ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY)
+			((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0;
+		else if (passno == 1 &&
+		    ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY)
+			panic("ffs_snapshot: lost indirect block");
+	}
+	if (passno == 2)
+		ibp->b_flags |= B_VALIDSUSPWRT;
+	bdwrite(ibp);
+	return (0);
+}
+
+/*
+ * Before expunging a snapshot inode, note all the
+ * blocks that it claims with BLK_SNAP so that fsck will
+ * be able to account for those blocks properly and so
+ * that this snapshot knows that it need not copy them
+ * if the other snapshot holding them is freed. This code
+ * is reproduced once each for UFS1 and UFS2.
+ */
+static int
+expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
+	struct vnode *snapvp;
+	struct inode *cancelip;
+	struct fs *fs;
+	int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
+	    struct fs *, ufs_lbn_t, int);
+	int expungetype;
+	int clearmode;
+{
+	int i, error, indiroff;
+	ufs_lbn_t lbn, rlbn;
+	ufs2_daddr_t len, blkno, numblks, blksperindir;
+	struct ufs1_dinode *dip;
+	struct thread *td = curthread;
+	struct buf *bp;
+
+	/*
+	 * Prepare to expunge the inode. If its inode block has not
+	 * yet been copied, then allocate and fill the copy.
+	 */
+	lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
+	blkno = 0;
+	if (lbn < NDADDR) {
+		blkno = VTOI(snapvp)->i_din1->di_db[lbn];
+	} else {
+		if (DOINGSOFTDEP(snapvp))
+			softdep_prealloc(snapvp, MNT_WAIT);
+		td->td_pflags |= TDP_COWINPROGRESS;
+		error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn),
+		   fs->fs_bsize, KERNCRED, BA_METAONLY, &bp);
+		td->td_pflags &= ~TDP_COWINPROGRESS;
+		if (error)
+			return (error);
+		indiroff = (lbn - NDADDR) % NINDIR(fs);
+		blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff];
+		bqrelse(bp);
+	}
+	if (blkno != 0) {
+		if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp)))
+			return (error);
+	} else {
+		error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn),
+		    fs->fs_bsize, KERNCRED, 0, &bp);
+		if (error)
+			return (error);
+		if ((error = readblock(snapvp, bp, lbn)) != 0)
+			return (error);
+	}
+	/*
+	 * Set a snapshot inode to be a zero length file, regular files
+	 * or unlinked snapshots to be completely unallocated.
+	 */
+	dip = (struct ufs1_dinode *)bp->b_data +
+	    ino_to_fsbo(fs, cancelip->i_number);
+	if (clearmode || cancelip->i_effnlink == 0)
+		dip->di_mode = 0;
+	dip->di_size = 0;
+	dip->di_blocks = 0;
+	dip->di_flags &= ~SF_SNAPSHOT;
+	bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t));
+	bdwrite(bp);
+	/*
+	 * Now go through and expunge all the blocks in the file
+	 * using the function requested.
+	 */
+	numblks = howmany(cancelip->i_size, fs->fs_bsize);
+	if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0],
+	    &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype)))
+		return (error);
+	if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0],
+	    &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype)))
+		return (error);
+	blksperindir = 1;
+	lbn = -NDADDR;
+	len = numblks - NDADDR;
+	rlbn = NDADDR;
+	for (i = 0; len > 0 && i < NIADDR; i++) {
+		error = indiracct_ufs1(snapvp, ITOV(cancelip), i,
+		    cancelip->i_din1->di_ib[i], lbn, rlbn, len,
+		    blksperindir, fs, acctfunc, expungetype);
+		if (error)
+			return (error);
+		blksperindir *= NINDIR(fs);
+		lbn -= blksperindir + 1;
+		len -= blksperindir;
+		rlbn += blksperindir;
+	}
+	return (0);
+}
+
+/*
+ * Descend an indirect block chain for vnode cancelvp accounting for all
+ * its indirect blocks in snapvp.
+ */ 
+static int
+indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks,
+	    blksperindir, fs, acctfunc, expungetype)
+	struct vnode *snapvp;
+	struct vnode *cancelvp;
+	int level;
+	ufs1_daddr_t blkno;
+	ufs_lbn_t lbn;
+	ufs_lbn_t rlbn;
+	ufs_lbn_t remblks;
+	ufs_lbn_t blksperindir;
+	struct fs *fs;
+	int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
+	    struct fs *, ufs_lbn_t, int);
+	int expungetype;
+{
+	int error, num, i;
+	ufs_lbn_t subblksperindir;
+	struct indir indirs[NIADDR + 2];
+	ufs1_daddr_t last, *bap;
+	struct buf *bp;
+
+	if (blkno == 0) {
+		if (expungetype == BLK_NOCOPY)
+			return (0);
+		panic("indiracct_ufs1: missing indir");
+	}
+	if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
+		return (error);
+	if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
+		panic("indiracct_ufs1: botched params");
+	/*
+	 * We have to expand bread here since it will deadlock looking
+	 * up the block number for any blocks that are not in the cache.
+	 */
+	bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0);
+	bp->b_blkno = fsbtodb(fs, blkno);
+	if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
+	    (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) {
+		brelse(bp);
+		return (error);
+	}
+	/*
+	 * Account for the block pointers in this indirect block.
+	 */
+	last = howmany(remblks, blksperindir);
+	if (last > NINDIR(fs))
+		last = NINDIR(fs);
+	bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK);
+	bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
+	bqrelse(bp);
+	error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
+	    level == 0 ? rlbn : -1, expungetype);
+	if (error || level == 0)
+		goto out;
+	/*
+	 * Account for the block pointers in each of the indirect blocks
+	 * in the levels below us.
+	 */
+	subblksperindir = blksperindir / NINDIR(fs);
+	for (lbn++, level--, i = 0; i < last; i++) {
+		error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn,
+		    rlbn, remblks, subblksperindir, fs, acctfunc, expungetype);
+		if (error)
+			goto out;
+		rlbn += blksperindir;
+		lbn -= blksperindir;
+		remblks -= blksperindir;
+	}
+out:
+	free(bap, M_DEVBUF);
+	return (error);
+}
+
+/*
+ * Do both snap accounting and map accounting.
+ */
+static int
+fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)
+	struct vnode *vp;
+	ufs1_daddr_t *oldblkp, *lastblkp;
+	struct fs *fs;
+	ufs_lbn_t lblkno;
+	int exptype;	/* BLK_SNAP or BLK_NOCOPY */
+{
+	int error;
+
+	if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
+		return (error);
+	return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype));
+}
+
+/*
+ * Identify a set of blocks allocated in a snapshot inode.
+ */
+static int
+snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
+	struct vnode *vp;
+	ufs1_daddr_t *oldblkp, *lastblkp;
+	struct fs *fs;
+	ufs_lbn_t lblkno;
+	int expungetype;	/* BLK_SNAP or BLK_NOCOPY */
+{
+	struct inode *ip = VTOI(vp);
+	ufs1_daddr_t blkno, *blkp;
+	ufs_lbn_t lbn;
+	struct buf *ibp;
+	int error;
+
+	for ( ; oldblkp < lastblkp; oldblkp++) {
+		blkno = *oldblkp;
+		if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
+			continue;
+		lbn = fragstoblks(fs, blkno);
+		if (lbn < NDADDR) {
+			blkp = &ip->i_din1->di_db[lbn];
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		} else {
+			error = ffs_balloc_ufs1(vp, lblktosize(fs, (off_t)lbn),
+			    fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
+			if (error)
+				return (error);
+			blkp = &((ufs1_daddr_t *)(ibp->b_data))
+			    [(lbn - NDADDR) % NINDIR(fs)];
+		}
+		/*
+		 * If we are expunging a snapshot vnode and we
+		 * find a block marked BLK_NOCOPY, then it is
+		 * one that has been allocated to this snapshot after
+		 * we took our current snapshot and can be ignored.
+		 */
+		if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) {
+			if (lbn >= NDADDR)
+				brelse(ibp);
+		} else {
+			if (*blkp != 0)
+				panic("snapacct_ufs1: bad block");
+			*blkp = expungetype;
+			if (lbn >= NDADDR)
+				bdwrite(ibp);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Account for a set of blocks allocated in a snapshot inode.
+ */
+static int
+mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
+	struct vnode *vp;
+	ufs1_daddr_t *oldblkp, *lastblkp;
+	struct fs *fs;
+	ufs_lbn_t lblkno;
+	int expungetype;
+{
+	ufs1_daddr_t blkno;
+	struct inode *ip;
+	ino_t inum;
+	int acctit;
+
+	ip = VTOI(vp);
+	inum = ip->i_number;
+	if (lblkno == -1)
+		acctit = 0;
+	else
+		acctit = 1;
+	for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
+		blkno = *oldblkp;
+		if (blkno == 0 || blkno == BLK_NOCOPY)
+			continue;
+		if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
+			*ip->i_snapblklist++ = lblkno;
+		if (blkno == BLK_SNAP)
+			blkno = blkstofrags(fs, lblkno);
+		ffs_blkfree(ITOUMP(ip), fs, vp, blkno, fs->fs_bsize, inum,
+		    vp->v_type, NULL);
+	}
+	return (0);
+}
+
+/*
+ * Before expunging a snapshot inode, note all the
+ * blocks that it claims with BLK_SNAP so that fsck will
+ * be able to account for those blocks properly and so
+ * that this snapshot knows that it need not copy them
+ * if the other snapshot holding them is freed. This code
+ * is reproduced once each for UFS1 and UFS2.
+ */
+static int
+expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
+	struct vnode *snapvp;
+	struct inode *cancelip;
+	struct fs *fs;
+	int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
+	    struct fs *, ufs_lbn_t, int);
+	int expungetype;
+	int clearmode;
+{
+	int i, error, indiroff;
+	ufs_lbn_t lbn, rlbn;
+	ufs2_daddr_t len, blkno, numblks, blksperindir;
+	struct ufs2_dinode *dip;
+	struct thread *td = curthread;
+	struct buf *bp;
+
+	/*
+	 * Prepare to expunge the inode. If its inode block has not
+	 * yet been copied, then allocate and fill the copy.
+	 */
+	lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
+	blkno = 0;
+	if (lbn < NDADDR) {
+		blkno = VTOI(snapvp)->i_din2->di_db[lbn];
+	} else {
+		if (DOINGSOFTDEP(snapvp))
+			softdep_prealloc(snapvp, MNT_WAIT);
+		td->td_pflags |= TDP_COWINPROGRESS;
+		error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn),
+		   fs->fs_bsize, KERNCRED, BA_METAONLY, &bp);
+		td->td_pflags &= ~TDP_COWINPROGRESS;
+		if (error)
+			return (error);
+		indiroff = (lbn - NDADDR) % NINDIR(fs);
+		blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff];
+		bqrelse(bp);
+	}
+	if (blkno != 0) {
+		if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp)))
+			return (error);
+	} else {
+		error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn),
+		    fs->fs_bsize, KERNCRED, 0, &bp);
+		if (error)
+			return (error);
+		if ((error = readblock(snapvp, bp, lbn)) != 0)
+			return (error);
+	}
+	/*
+	 * Set a snapshot inode to be a zero length file, regular files
+	 * to be completely unallocated.
+	 */
+	dip = (struct ufs2_dinode *)bp->b_data +
+	    ino_to_fsbo(fs, cancelip->i_number);
+	if (clearmode || cancelip->i_effnlink == 0)
+		dip->di_mode = 0;
+	dip->di_size = 0;
+	dip->di_blocks = 0;
+	dip->di_flags &= ~SF_SNAPSHOT;
+	bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t));
+	bdwrite(bp);
+	/*
+	 * Now go through and expunge all the blocks in the file
+	 * using the function requested.
+	 */
+	numblks = howmany(cancelip->i_size, fs->fs_bsize);
+	if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0],
+	    &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype)))
+		return (error);
+	if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0],
+	    &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype)))
+		return (error);
+	blksperindir = 1;
+	lbn = -NDADDR;
+	len = numblks - NDADDR;
+	rlbn = NDADDR;
+	for (i = 0; len > 0 && i < NIADDR; i++) {
+		error = indiracct_ufs2(snapvp, ITOV(cancelip), i,
+		    cancelip->i_din2->di_ib[i], lbn, rlbn, len,
+		    blksperindir, fs, acctfunc, expungetype);
+		if (error)
+			return (error);
+		blksperindir *= NINDIR(fs);
+		lbn -= blksperindir + 1;
+		len -= blksperindir;
+		rlbn += blksperindir;
+	}
+	return (0);
+}
+
+/*
+ * Descend an indirect block chain for vnode cancelvp accounting for all
+ * its indirect blocks in snapvp.
+ */ 
+static int
+indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks,
+	    blksperindir, fs, acctfunc, expungetype)
+	struct vnode *snapvp;
+	struct vnode *cancelvp;
+	int level;
+	ufs2_daddr_t blkno;
+	ufs_lbn_t lbn;
+	ufs_lbn_t rlbn;
+	ufs_lbn_t remblks;
+	ufs_lbn_t blksperindir;
+	struct fs *fs;
+	int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
+	    struct fs *, ufs_lbn_t, int);
+	int expungetype;
+{
+	int error, num, i;
+	ufs_lbn_t subblksperindir;
+	struct indir indirs[NIADDR + 2];
+	ufs2_daddr_t last, *bap;
+	struct buf *bp;
+
+	if (blkno == 0) {
+		if (expungetype == BLK_NOCOPY)
+			return (0);
+		panic("indiracct_ufs2: missing indir");
+	}
+	if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
+		return (error);
+	if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
+		panic("indiracct_ufs2: botched params");
+	/*
+	 * We have to expand bread here since it will deadlock looking
+	 * up the block number for any blocks that are not in the cache.
+	 */
+	bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0);
+	bp->b_blkno = fsbtodb(fs, blkno);
+	if ((bp->b_flags & B_CACHE) == 0 &&
+	    (error = readblock(cancelvp, bp, fragstoblks(fs, blkno)))) {
+		brelse(bp);
+		return (error);
+	}
+	/*
+	 * Account for the block pointers in this indirect block.
+	 */
+	last = howmany(remblks, blksperindir);
+	if (last > NINDIR(fs))
+		last = NINDIR(fs);
+	bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK);
+	bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
+	bqrelse(bp);
+	error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
+	    level == 0 ? rlbn : -1, expungetype);
+	if (error || level == 0)
+		goto out;
+	/*
+	 * Account for the block pointers in each of the indirect blocks
+	 * in the levels below us.
+	 */
+	subblksperindir = blksperindir / NINDIR(fs);
+	for (lbn++, level--, i = 0; i < last; i++) {
+		error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn,
+		    rlbn, remblks, subblksperindir, fs, acctfunc, expungetype);
+		if (error)
+			goto out;
+		rlbn += blksperindir;
+		lbn -= blksperindir;
+		remblks -= blksperindir;
+	}
+out:
+	free(bap, M_DEVBUF);
+	return (error);
+}
+
+/*
+ * Do both snap accounting and map accounting.
+ */
+static int
+fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)
+	struct vnode *vp;
+	ufs2_daddr_t *oldblkp, *lastblkp;
+	struct fs *fs;
+	ufs_lbn_t lblkno;
+	int exptype;	/* BLK_SNAP or BLK_NOCOPY */
+{
+	int error;
+
+	if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
+		return (error);
+	return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype));
+}
+
+/*
+ * Identify a set of blocks allocated in a snapshot inode.
+ */
+static int
+snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
+	struct vnode *vp;
+	ufs2_daddr_t *oldblkp, *lastblkp;
+	struct fs *fs;
+	ufs_lbn_t lblkno;
+	int expungetype;	/* BLK_SNAP or BLK_NOCOPY */
+{
+	struct inode *ip = VTOI(vp);
+	ufs2_daddr_t blkno, *blkp;
+	ufs_lbn_t lbn;
+	struct buf *ibp;
+	int error;
+
+	for ( ; oldblkp < lastblkp; oldblkp++) {
+		blkno = *oldblkp;
+		if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
+			continue;
+		lbn = fragstoblks(fs, blkno);
+		if (lbn < NDADDR) {
+			blkp = &ip->i_din2->di_db[lbn];
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		} else {
+			error = ffs_balloc_ufs2(vp, lblktosize(fs, (off_t)lbn),
+			    fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
+			if (error)
+				return (error);
+			blkp = &((ufs2_daddr_t *)(ibp->b_data))
+			    [(lbn - NDADDR) % NINDIR(fs)];
+		}
+		/*
+		 * If we are expunging a snapshot vnode and we
+		 * find a block marked BLK_NOCOPY, then it is
+		 * one that has been allocated to this snapshot after
+		 * we took our current snapshot and can be ignored.
+		 */
+		if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) {
+			if (lbn >= NDADDR)
+				brelse(ibp);
+		} else {
+			if (*blkp != 0)
+				panic("snapacct_ufs2: bad block");
+			*blkp = expungetype;
+			if (lbn >= NDADDR)
+				bdwrite(ibp);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Account for a set of blocks allocated in a snapshot inode.
+ */
+static int
+mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
+	struct vnode *vp;
+	ufs2_daddr_t *oldblkp, *lastblkp;
+	struct fs *fs;
+	ufs_lbn_t lblkno;
+	int expungetype;
+{
+	ufs2_daddr_t blkno;
+	struct inode *ip;
+	ino_t inum;
+	int acctit;
+
+	ip = VTOI(vp);
+	inum = ip->i_number;
+	if (lblkno == -1)
+		acctit = 0;
+	else
+		acctit = 1;
+	for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
+		blkno = *oldblkp;
+		if (blkno == 0 || blkno == BLK_NOCOPY)
+			continue;
+		if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
+			*ip->i_snapblklist++ = lblkno;
+		if (blkno == BLK_SNAP)
+			blkno = blkstofrags(fs, lblkno);
+		ffs_blkfree(ITOUMP(ip), fs, vp, blkno, fs->fs_bsize, inum,
+		    vp->v_type, NULL);
+	}
+	return (0);
+}
+
+/*
+ * Decrement extra reference on snapshot when last name is removed.
+ * It will not be freed until the last open reference goes away.
+ */
+void
+ffs_snapgone(ip)
+	struct inode *ip;
+{
+	struct inode *xp;
+	struct fs *fs;
+	int snaploc;
+	struct snapdata *sn;
+	struct ufsmount *ump;
+
+	/*
+	 * Find snapshot in incore list.
+	 */
+	xp = NULL;
+	sn = ITODEVVP(ip)->v_rdev->si_snapdata;
+	if (sn != NULL)
+		TAILQ_FOREACH(xp, &sn->sn_head, i_nextsnap)
+			if (xp == ip)
+				break;
+	if (xp != NULL)
+		vrele(ITOV(ip));
+	else if (snapdebug)
+		printf("ffs_snapgone: lost snapshot vnode %ju\n",
+		    (uintmax_t)ip->i_number);
+	/*
+	 * Delete snapshot inode from superblock. Keep list dense.
+	 */
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+	UFS_LOCK(ump);
+	for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
+		if (fs->fs_snapinum[snaploc] == ip->i_number)
+			break;
+	if (snaploc < FSMAXSNAP) {
+		for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
+			if (fs->fs_snapinum[snaploc] == 0)
+				break;
+			fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
+		}
+		fs->fs_snapinum[snaploc - 1] = 0;
+	}
+	UFS_UNLOCK(ump);
+}
+
+/*
+ * Prepare a snapshot file for being removed.
+ */
+void
+ffs_snapremove(vp)
+	struct vnode *vp;
+{
+	struct inode *ip;
+	struct vnode *devvp;
+	struct buf *ibp;
+	struct fs *fs;
+	ufs2_daddr_t numblks, blkno, dblk;
+	int error, i, last, loc;
+	struct snapdata *sn;
+
+	ip = VTOI(vp);
+	fs = ITOFS(ip);
+	devvp = ITODEVVP(ip);
+	/*
+	 * If active, delete from incore list (this snapshot may
+	 * already have been in the process of being deleted, so
+	 * would not have been active).
+	 *
+	 * Clear copy-on-write flag if last snapshot.
+	 */
+	VI_LOCK(devvp);
+	if (ip->i_nextsnap.tqe_prev != 0) {
+		sn = devvp->v_rdev->si_snapdata;
+		TAILQ_REMOVE(&sn->sn_head, ip, i_nextsnap);
+		ip->i_nextsnap.tqe_prev = 0;
+		VI_UNLOCK(devvp);
+		lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
+		for (i = 0; i < sn->sn_lock.lk_recurse; i++)
+			lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
+		KASSERT(vp->v_vnlock == &sn->sn_lock,
+			("ffs_snapremove: lost lock mutation")); 
+		vp->v_vnlock = &vp->v_lock;
+		VI_LOCK(devvp);
+		while (sn->sn_lock.lk_recurse > 0)
+			lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
+		lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
+		try_free_snapdata(devvp);
+	} else
+		VI_UNLOCK(devvp);
+	/*
+	 * Clear all BLK_NOCOPY fields. Pass any block claims to other
+	 * snapshots that want them (see ffs_snapblkfree below).
+	 */
+	for (blkno = 1; blkno < NDADDR; blkno++) {
+		dblk = DIP(ip, i_db[blkno]);
+		if (dblk == 0)
+			continue;
+		if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
+			DIP_SET(ip, i_db[blkno], 0);
+		else if ((dblk == blkstofrags(fs, blkno) &&
+		     ffs_snapblkfree(fs, ITODEVVP(ip), dblk, fs->fs_bsize,
+		     ip->i_number, vp->v_type, NULL))) {
+			DIP_SET(ip, i_blocks, DIP(ip, i_blocks) -
+			    btodb(fs->fs_bsize));
+			DIP_SET(ip, i_db[blkno], 0);
+		}
+	}
+	numblks = howmany(ip->i_size, fs->fs_bsize);
+	for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
+		error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
+		if (error)
+			continue;
+		if (fs->fs_size - blkno > NINDIR(fs))
+			last = NINDIR(fs);
+		else
+			last = fs->fs_size - blkno;
+		for (loc = 0; loc < last; loc++) {
+			if (I_IS_UFS1(ip)) {
+				dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc];
+				if (dblk == 0)
+					continue;
+				if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
+					((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
+				else if ((dblk == blkstofrags(fs, blkno) &&
+				     ffs_snapblkfree(fs, ITODEVVP(ip), dblk,
+				     fs->fs_bsize, ip->i_number, vp->v_type,
+				     NULL))) {
+					ip->i_din1->di_blocks -=
+					    btodb(fs->fs_bsize);
+					((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
+				}
+				continue;
+			}
+			dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc];
+			if (dblk == 0)
+				continue;
+			if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
+				((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
+			else if ((dblk == blkstofrags(fs, blkno) &&
+			     ffs_snapblkfree(fs, ITODEVVP(ip), dblk,
+			     fs->fs_bsize, ip->i_number, vp->v_type, NULL))) {
+				ip->i_din2->di_blocks -= btodb(fs->fs_bsize);
+				((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
+			}
+		}
+		bawrite(ibp);
+	}
+	/*
+	 * Clear snapshot flag and drop reference.
+	 */
+	ip->i_flags &= ~SF_SNAPSHOT;
+	DIP_SET(ip, i_flags, ip->i_flags);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	/*
+	 * The dirtied indirects must be written out before
+	 * softdep_setup_freeblocks() is called.  Otherwise indir_trunc()
+	 * may find indirect pointers using the magic BLK_* values.
+	 */
+	if (DOINGSOFTDEP(vp))
+		ffs_syncvnode(vp, MNT_WAIT, 0);
+#ifdef QUOTA
+	/*
+	 * Reenable disk quotas for ex-snapshot file.
+	 */
+	if (!getinoquota(ip))
+		(void) chkdq(ip, DIP(ip, i_blocks), KERNCRED, FORCE);
+#endif
+}
+
+/*
+ * Notification that a block is being freed. Return zero if the free
+ * should be allowed to proceed. Return non-zero if the snapshot file
+ * wants to claim the block. The block will be claimed if it is an
+ * uncopied part of one of the snapshots. It will be freed if it is
+ * either a BLK_NOCOPY or has already been copied in all of the snapshots.
+ * If a fragment is being freed, then all snapshots that care about
+ * it must make a copy since a snapshot file can only claim full sized
+ * blocks. Note that if more than one snapshot file maps the block,
+ * we can pick one at random to claim it. Since none of the snapshots
+ * can change, we are assurred that they will all see the same unmodified
+ * image. When deleting a snapshot file (see ffs_snapremove above), we
+ * must push any of these claimed blocks to one of the other snapshots
+ * that maps it. These claimed blocks are easily identified as they will
+ * have a block number equal to their logical block number within the
+ * snapshot. A copied block can never have this property because they
+ * must always have been allocated from a BLK_NOCOPY location.
+ */
+int
+ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd)
+	struct fs *fs;
+	struct vnode *devvp;
+	ufs2_daddr_t bno;
+	long size;
+	ino_t inum;
+	enum vtype vtype;
+	struct workhead *wkhd;
+{
+	struct buf *ibp, *cbp, *savedcbp = NULL;
+	struct thread *td = curthread;
+	struct inode *ip;
+	struct vnode *vp = NULL;
+	ufs_lbn_t lbn;
+	ufs2_daddr_t blkno;
+	int indiroff = 0, error = 0, claimedblk = 0;
+	struct snapdata *sn;
+
+	lbn = fragstoblks(fs, bno);
+retry:
+	VI_LOCK(devvp);
+	sn = devvp->v_rdev->si_snapdata;
+	if (sn == NULL) {
+		VI_UNLOCK(devvp);
+		return (0);
+	}
+	if (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
+	    VI_MTX(devvp)) != 0)
+		goto retry;
+	TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
+		vp = ITOV(ip);
+		if (DOINGSOFTDEP(vp))
+			softdep_prealloc(vp, MNT_WAIT);
+		/*
+		 * Lookup block being written.
+		 */
+		if (lbn < NDADDR) {
+			blkno = DIP(ip, i_db[lbn]);
+		} else {
+			td->td_pflags |= TDP_COWINPROGRESS;
+			error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+			    fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
+			td->td_pflags &= ~TDP_COWINPROGRESS;
+			if (error)
+				break;
+			indiroff = (lbn - NDADDR) % NINDIR(fs);
+			if (I_IS_UFS1(ip))
+				blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff];
+			else
+				blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff];
+		}
+		/*
+		 * Check to see if block needs to be copied.
+		 */
+		if (blkno == 0) {
+			/*
+			 * A block that we map is being freed. If it has not
+			 * been claimed yet, we will claim or copy it (below).
+			 */
+			claimedblk = 1;
+		} else if (blkno == BLK_SNAP) {
+			/*
+			 * No previous snapshot claimed the block,
+			 * so it will be freed and become a BLK_NOCOPY
+			 * (don't care) for us.
+			 */
+			if (claimedblk)
+				panic("snapblkfree: inconsistent block type");
+			if (lbn < NDADDR) {
+				DIP_SET(ip, i_db[lbn], BLK_NOCOPY);
+				ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			} else if (I_IS_UFS1(ip)) {
+				((ufs1_daddr_t *)(ibp->b_data))[indiroff] =
+				    BLK_NOCOPY;
+				bdwrite(ibp);
+			} else {
+				((ufs2_daddr_t *)(ibp->b_data))[indiroff] =
+				    BLK_NOCOPY;
+				bdwrite(ibp);
+			}
+			continue;
+		} else /* BLK_NOCOPY or default */ {
+			/*
+			 * If the snapshot has already copied the block
+			 * (default), or does not care about the block,
+			 * it is not needed.
+			 */
+			if (lbn >= NDADDR)
+				bqrelse(ibp);
+			continue;
+		}
+		/*
+		 * If this is a full size block, we will just grab it
+		 * and assign it to the snapshot inode. Otherwise we
+		 * will proceed to copy it. See explanation for this
+		 * routine as to why only a single snapshot needs to
+		 * claim this block.
+		 */
+		if (size == fs->fs_bsize) {
+#ifdef DEBUG
+			if (snapdebug)
+				printf("%s %ju lbn %jd from inum %ju\n",
+				    "Grabonremove: snapino",
+				    (uintmax_t)ip->i_number,
+				    (intmax_t)lbn, (uintmax_t)inum);
+#endif
+			/*
+			 * If journaling is tracking this write we must add
+			 * the work to the inode or indirect being written.
+			 */
+			if (wkhd != NULL) {
+				if (lbn < NDADDR)
+					softdep_inode_append(ip,
+					    curthread->td_ucred, wkhd);
+				else
+					softdep_buf_append(ibp, wkhd);
+			}
+			if (lbn < NDADDR) {
+				DIP_SET(ip, i_db[lbn], bno);
+			} else if (I_IS_UFS1(ip)) {
+				((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno;
+				bdwrite(ibp);
+			} else {
+				((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno;
+				bdwrite(ibp);
+			}
+			DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + btodb(size));
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			lockmgr(vp->v_vnlock, LK_RELEASE, NULL);
+			return (1);
+		}
+		if (lbn >= NDADDR)
+			bqrelse(ibp);
+		/*
+		 * Allocate the block into which to do the copy. Note that this
+		 * allocation will never require any additional allocations for
+		 * the snapshot inode.
+		 */
+		td->td_pflags |= TDP_COWINPROGRESS;
+		error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+		    fs->fs_bsize, KERNCRED, 0, &cbp);
+		td->td_pflags &= ~TDP_COWINPROGRESS;
+		if (error)
+			break;
+#ifdef DEBUG
+		if (snapdebug)
+			printf("%s%ju lbn %jd %s %ju size %ld to blkno %jd\n",
+			    "Copyonremove: snapino ", (uintmax_t)ip->i_number,
+			    (intmax_t)lbn, "for inum", (uintmax_t)inum, size,
+			    (intmax_t)cbp->b_blkno);
+#endif
+		/*
+		 * If we have already read the old block contents, then
+		 * simply copy them to the new block. Note that we need
+		 * to synchronously write snapshots that have not been
+		 * unlinked, and hence will be visible after a crash,
+		 * to ensure their integrity. At a minimum we ensure the
+		 * integrity of the filesystem metadata, but use the
+		 * dopersistence sysctl-setable flag to decide on the
+		 * persistence needed for file content data.
+		 */
+		if (savedcbp != NULL) {
+			bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
+			bawrite(cbp);
+			if ((vtype == VDIR || dopersistence) &&
+			    ip->i_effnlink > 0)
+				(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
+			continue;
+		}
+		/*
+		 * Otherwise, read the old block contents into the buffer.
+		 */
+		if ((error = readblock(vp, cbp, lbn)) != 0) {
+			bzero(cbp->b_data, fs->fs_bsize);
+			bawrite(cbp);
+			if ((vtype == VDIR || dopersistence) &&
+			    ip->i_effnlink > 0)
+				(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
+			break;
+		}
+		savedcbp = cbp;
+	}
+	/*
+	 * Note that we need to synchronously write snapshots that
+	 * have not been unlinked, and hence will be visible after
+	 * a crash, to ensure their integrity. At a minimum we
+	 * ensure the integrity of the filesystem metadata, but
+	 * use the dopersistence sysctl-setable flag to decide on
+	 * the persistence needed for file content data.
+	 */
+	if (savedcbp) {
+		vp = savedcbp->b_vp;
+		bawrite(savedcbp);
+		if ((vtype == VDIR || dopersistence) &&
+		    VTOI(vp)->i_effnlink > 0)
+			(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
+	}
+	/*
+	 * If we have been unable to allocate a block in which to do
+	 * the copy, then return non-zero so that the fragment will
+	 * not be freed. Although space will be lost, the snapshot
+	 * will stay consistent.
+	 */
+	if (error != 0 && wkhd != NULL)
+		softdep_freework(wkhd);
+	lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
+	return (error);
+}
+
+/*
+ * Associate snapshot files when mounting.
+ */
+void
+ffs_snapshot_mount(mp)
+	struct mount *mp;
+{
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct vnode *devvp = ump->um_devvp;
+	struct fs *fs = ump->um_fs;
+	struct thread *td = curthread;
+	struct snapdata *sn;
+	struct vnode *vp;
+	struct vnode *lastvp;
+	struct inode *ip;
+	struct uio auio;
+	struct iovec aiov;
+	void *snapblklist;
+	char *reason;
+	daddr_t snaplistsize;
+	int error, snaploc, loc;
+
+	/*
+	 * XXX The following needs to be set before ffs_truncate or
+	 * VOP_READ can be called.
+	 */
+	mp->mnt_stat.f_iosize = fs->fs_bsize;
+	/*
+	 * Process each snapshot listed in the superblock.
+	 */
+	vp = NULL;
+	lastvp = NULL;
+	sn = NULL;
+	for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
+		if (fs->fs_snapinum[snaploc] == 0)
+			break;
+		if ((error = ffs_vget(mp, fs->fs_snapinum[snaploc],
+		    LK_EXCLUSIVE, &vp)) != 0){
+			printf("ffs_snapshot_mount: vget failed %d\n", error);
+			continue;
+		}
+		ip = VTOI(vp);
+		if (!IS_SNAPSHOT(ip) || ip->i_size ==
+		    lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) {
+			if (!IS_SNAPSHOT(ip)) {
+				reason = "non-snapshot";
+			} else {
+				reason = "old format snapshot";
+				(void)ffs_truncate(vp, (off_t)0, 0, NOCRED);
+				(void)ffs_syncvnode(vp, MNT_WAIT, 0);
+			}
+			printf("ffs_snapshot_mount: %s inode %d\n",
+			    reason, fs->fs_snapinum[snaploc]);
+			vput(vp);
+			vp = NULL;
+			for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
+				if (fs->fs_snapinum[loc] == 0)
+					break;
+				fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
+			}
+			fs->fs_snapinum[loc - 1] = 0;
+			snaploc--;
+			continue;
+		}
+		/*
+		 * Acquire a lock on the snapdata structure, creating it if
+		 * necessary.
+		 */
+		sn = ffs_snapdata_acquire(devvp);
+		/* 
+		 * Change vnode to use shared snapshot lock instead of the
+		 * original private lock.
+		 */
+		vp->v_vnlock = &sn->sn_lock;
+		lockmgr(&vp->v_lock, LK_RELEASE, NULL);
+		/*
+		 * Link it onto the active snapshot list.
+		 */
+		VI_LOCK(devvp);
+		if (ip->i_nextsnap.tqe_prev != 0)
+			panic("ffs_snapshot_mount: %ju already on list",
+			    (uintmax_t)ip->i_number);
+		else
+			TAILQ_INSERT_TAIL(&sn->sn_head, ip, i_nextsnap);
+		vp->v_vflag |= VV_SYSTEM;
+		VI_UNLOCK(devvp);
+		VOP_UNLOCK(vp, 0);
+		lastvp = vp;
+	}
+	vp = lastvp;
+	/*
+	 * No usable snapshots found.
+	 */
+	if (sn == NULL || vp == NULL)
+		return;
+	/*
+	 * Allocate the space for the block hints list. We always want to
+	 * use the list from the newest snapshot.
+	 */
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = (void *)&snaplistsize;
+	aiov.iov_len = sizeof(snaplistsize);
+	auio.uio_resid = aiov.iov_len;
+	auio.uio_offset =
+	    lblktosize(fs, howmany(fs->fs_size, fs->fs_frag));
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_td = td;
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
+		printf("ffs_snapshot_mount: read_1 failed %d\n", error);
+		VOP_UNLOCK(vp, 0);
+		return;
+	}
+	snapblklist = malloc(snaplistsize * sizeof(daddr_t),
+	    M_UFSMNT, M_WAITOK);
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = snapblklist;
+	aiov.iov_len = snaplistsize * sizeof (daddr_t);
+	auio.uio_resid = aiov.iov_len;
+	auio.uio_offset -= sizeof(snaplistsize);
+	if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
+		printf("ffs_snapshot_mount: read_2 failed %d\n", error);
+		VOP_UNLOCK(vp, 0);
+		free(snapblklist, M_UFSMNT);
+		return;
+	}
+	VOP_UNLOCK(vp, 0);
+	VI_LOCK(devvp);
+	ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount");
+	sn->sn_listsize = snaplistsize;
+	sn->sn_blklist = (daddr_t *)snapblklist;
+	devvp->v_vflag |= VV_COPYONWRITE;
+	VI_UNLOCK(devvp);
+}
+
+/*
+ * Disassociate snapshot files when unmounting.
+ */
+void
+ffs_snapshot_unmount(mp)
+	struct mount *mp;
+{
+	struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
+	struct snapdata *sn;
+	struct inode *xp;
+	struct vnode *vp;
+
+	VI_LOCK(devvp);
+	sn = devvp->v_rdev->si_snapdata;
+	while (sn != NULL && (xp = TAILQ_FIRST(&sn->sn_head)) != NULL) {
+		vp = ITOV(xp);
+		TAILQ_REMOVE(&sn->sn_head, xp, i_nextsnap);
+		xp->i_nextsnap.tqe_prev = 0;
+		lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE,
+		    VI_MTX(devvp));
+		lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
+		KASSERT(vp->v_vnlock == &sn->sn_lock,
+		("ffs_snapshot_unmount: lost lock mutation")); 
+		vp->v_vnlock = &vp->v_lock;
+		lockmgr(&vp->v_lock, LK_RELEASE, NULL);
+		lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
+		if (xp->i_effnlink > 0)
+			vrele(vp);
+		VI_LOCK(devvp);
+		sn = devvp->v_rdev->si_snapdata;
+	}
+	try_free_snapdata(devvp);
+	ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount");
+}
+
+/*
+ * Check the buffer block to be belong to device buffer that shall be
+ * locked after snaplk. devvp shall be locked on entry, and will be
+ * leaved locked upon exit.
+ */
+static int
+ffs_bp_snapblk(devvp, bp)
+	struct vnode *devvp;
+	struct buf *bp;
+{
+	struct snapdata *sn;
+	struct fs *fs;
+	ufs2_daddr_t lbn, *snapblklist;
+	int lower, upper, mid;
+
+	ASSERT_VI_LOCKED(devvp, "ffs_bp_snapblk");
+	KASSERT(devvp->v_type == VCHR, ("Not a device %p", devvp));
+	sn = devvp->v_rdev->si_snapdata;
+	if (sn == NULL || TAILQ_FIRST(&sn->sn_head) == NULL)
+		return (0);
+	fs = ITOFS(TAILQ_FIRST(&sn->sn_head));
+	lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
+	snapblklist = sn->sn_blklist;
+	upper = sn->sn_listsize - 1;
+	lower = 1;
+	while (lower <= upper) {
+		mid = (lower + upper) / 2;
+		if (snapblklist[mid] == lbn)
+			break;
+		if (snapblklist[mid] < lbn)
+			lower = mid + 1;
+		else
+			upper = mid - 1;
+	}
+	if (lower <= upper)
+		return (1);
+	return (0);
+}
+
+void
+ffs_bdflush(bo, bp)
+	struct bufobj *bo;
+	struct buf *bp;
+{
+	struct thread *td;
+	struct vnode *vp, *devvp;
+	struct buf *nbp;
+	int bp_bdskip;
+
+	if (bo->bo_dirty.bv_cnt <= dirtybufthresh)
+		return;
+
+	td = curthread;
+	vp = bp->b_vp;
+	devvp = bo->__bo_vnode;
+	KASSERT(vp == devvp, ("devvp != vp %p %p", bo, bp));
+
+	VI_LOCK(devvp);
+	bp_bdskip = ffs_bp_snapblk(devvp, bp);
+	if (bp_bdskip)
+		bdwriteskip++;
+	VI_UNLOCK(devvp);
+	if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10 && !bp_bdskip) {
+		(void) VOP_FSYNC(vp, MNT_NOWAIT, td);
+		altbufferflushes++;
+	} else {
+		BO_LOCK(bo);
+		/*
+		 * Try to find a buffer to flush.
+		 */
+		TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
+			if ((nbp->b_vflags & BV_BKGRDINPROG) ||
+			    BUF_LOCK(nbp,
+				     LK_EXCLUSIVE | LK_NOWAIT, NULL))
+				continue;
+			if (bp == nbp)
+				panic("bdwrite: found ourselves");
+			BO_UNLOCK(bo);
+			/*
+			 * Don't countdeps with the bo lock
+			 * held.
+			 */
+			if (buf_countdeps(nbp, 0)) {
+				BO_LOCK(bo);
+				BUF_UNLOCK(nbp);
+				continue;
+			}
+			if (bp_bdskip) {
+				VI_LOCK(devvp);
+				if (!ffs_bp_snapblk(vp, nbp)) {
+					VI_UNLOCK(devvp);
+					BO_LOCK(bo);
+					BUF_UNLOCK(nbp);
+					continue;
+				}
+				VI_UNLOCK(devvp);
+			}
+			if (nbp->b_flags & B_CLUSTEROK) {
+				vfs_bio_awrite(nbp);
+			} else {
+				bremfree(nbp);
+				bawrite(nbp);
+			}
+			dirtybufferflushes++;
+			break;
+		}
+		if (nbp == NULL)
+			BO_UNLOCK(bo);
+	}
+}
+
+/*
+ * Check for need to copy block that is about to be written,
+ * copying the block if necessary.
+ */
+int
+ffs_copyonwrite(devvp, bp)
+	struct vnode *devvp;
+	struct buf *bp;
+{
+	struct snapdata *sn;
+	struct buf *ibp, *cbp, *savedcbp = NULL;
+	struct thread *td = curthread;
+	struct fs *fs;
+	struct inode *ip;
+	struct vnode *vp = NULL;
+	ufs2_daddr_t lbn, blkno, *snapblklist;
+	int lower, upper, mid, indiroff, error = 0;
+	int launched_async_io, prev_norunningbuf;
+	long saved_runningbufspace;
+
+	if (devvp != bp->b_vp && IS_SNAPSHOT(VTOI(bp->b_vp)))
+		return (0);		/* Update on a snapshot file */
+	if (td->td_pflags & TDP_COWINPROGRESS)
+		panic("ffs_copyonwrite: recursive call");
+	/*
+	 * First check to see if it is in the preallocated list.
+	 * By doing this check we avoid several potential deadlocks.
+	 */
+	VI_LOCK(devvp);
+	sn = devvp->v_rdev->si_snapdata;
+	if (sn == NULL ||
+	    TAILQ_EMPTY(&sn->sn_head)) {
+		VI_UNLOCK(devvp);
+		return (0);		/* No snapshot */
+	}
+	ip = TAILQ_FIRST(&sn->sn_head);
+	fs = ITOFS(ip);
+	lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
+	snapblklist = sn->sn_blklist;
+	upper = sn->sn_listsize - 1;
+	lower = 1;
+	while (lower <= upper) {
+		mid = (lower + upper) / 2;
+		if (snapblklist[mid] == lbn)
+			break;
+		if (snapblklist[mid] < lbn)
+			lower = mid + 1;
+		else
+			upper = mid - 1;
+	}
+	if (lower <= upper) {
+		VI_UNLOCK(devvp);
+		return (0);
+	}
+	launched_async_io = 0;
+	prev_norunningbuf = td->td_pflags & TDP_NORUNNINGBUF;
+	/*
+	 * Since I/O on bp isn't yet in progress and it may be blocked
+	 * for a long time waiting on snaplk, back it out of
+	 * runningbufspace, possibly waking other threads waiting for space.
+	 */
+	saved_runningbufspace = bp->b_runningbufspace;
+	if (saved_runningbufspace != 0)
+		runningbufwakeup(bp);
+	/*
+	 * Not in the precomputed list, so check the snapshots.
+	 */
+	while (lockmgr(&sn->sn_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
+	    VI_MTX(devvp)) != 0) {
+		VI_LOCK(devvp);
+		sn = devvp->v_rdev->si_snapdata;
+		if (sn == NULL ||
+		    TAILQ_EMPTY(&sn->sn_head)) {
+			VI_UNLOCK(devvp);
+			if (saved_runningbufspace != 0) {
+				bp->b_runningbufspace = saved_runningbufspace;
+				atomic_add_long(&runningbufspace,
+					       bp->b_runningbufspace);
+			}
+			return (0);		/* Snapshot gone */
+		}
+	}
+	TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
+		vp = ITOV(ip);
+		if (DOINGSOFTDEP(vp))
+			softdep_prealloc(vp, MNT_WAIT);
+		/*
+		 * We ensure that everything of our own that needs to be
+		 * copied will be done at the time that ffs_snapshot is
+		 * called. Thus we can skip the check here which can
+		 * deadlock in doing the lookup in UFS_BALLOC.
+		 */
+		if (bp->b_vp == vp)
+			continue;
+		/*
+		 * Check to see if block needs to be copied. We do not have
+		 * to hold the snapshot lock while doing this lookup as it
+		 * will never require any additional allocations for the
+		 * snapshot inode.
+		 */
+		if (lbn < NDADDR) {
+			blkno = DIP(ip, i_db[lbn]);
+		} else {
+			td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF;
+			error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+			   fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
+			td->td_pflags &= ~TDP_COWINPROGRESS;
+			if (error)
+				break;
+			indiroff = (lbn - NDADDR) % NINDIR(fs);
+			if (I_IS_UFS1(ip))
+				blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff];
+			else
+				blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff];
+			bqrelse(ibp);
+		}
+#ifdef INVARIANTS
+		if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
+			panic("ffs_copyonwrite: bad copy block");
+#endif
+		if (blkno != 0)
+			continue;
+		/*
+		 * Allocate the block into which to do the copy. Since
+		 * multiple processes may all try to copy the same block,
+		 * we have to recheck our need to do a copy if we sleep
+		 * waiting for the lock.
+		 *
+		 * Because all snapshots on a filesystem share a single
+		 * lock, we ensure that we will never be in competition
+		 * with another process to allocate a block.
+		 */
+		td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF;
+		error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+		    fs->fs_bsize, KERNCRED, 0, &cbp);
+		td->td_pflags &= ~TDP_COWINPROGRESS;
+		if (error)
+			break;
+#ifdef DEBUG
+		if (snapdebug) {
+			printf("Copyonwrite: snapino %ju lbn %jd for ",
+			    (uintmax_t)ip->i_number, (intmax_t)lbn);
+			if (bp->b_vp == devvp)
+				printf("fs metadata");
+			else
+				printf("inum %ju",
+				    (uintmax_t)VTOI(bp->b_vp)->i_number);
+			printf(" lblkno %jd to blkno %jd\n",
+			    (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno);
+		}
+#endif
+		/*
+		 * If we have already read the old block contents, then
+		 * simply copy them to the new block. Note that we need
+		 * to synchronously write snapshots that have not been
+		 * unlinked, and hence will be visible after a crash,
+		 * to ensure their integrity. At a minimum we ensure the
+		 * integrity of the filesystem metadata, but use the
+		 * dopersistence sysctl-setable flag to decide on the
+		 * persistence needed for file content data.
+		 */
+		if (savedcbp != NULL) {
+			bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
+			bawrite(cbp);
+			if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
+			    dopersistence) && ip->i_effnlink > 0)
+				(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
+			else
+				launched_async_io = 1;
+			continue;
+		}
+		/*
+		 * Otherwise, read the old block contents into the buffer.
+		 */
+		if ((error = readblock(vp, cbp, lbn)) != 0) {
+			bzero(cbp->b_data, fs->fs_bsize);
+			bawrite(cbp);
+			if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
+			    dopersistence) && ip->i_effnlink > 0)
+				(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
+			else
+				launched_async_io = 1;
+			break;
+		}
+		savedcbp = cbp;
+	}
+	/*
+	 * Note that we need to synchronously write snapshots that
+	 * have not been unlinked, and hence will be visible after
+	 * a crash, to ensure their integrity. At a minimum we
+	 * ensure the integrity of the filesystem metadata, but
+	 * use the dopersistence sysctl-setable flag to decide on
+	 * the persistence needed for file content data.
+	 */
+	if (savedcbp) {
+		vp = savedcbp->b_vp;
+		bawrite(savedcbp);
+		if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
+		    dopersistence) && VTOI(vp)->i_effnlink > 0)
+			(void) ffs_syncvnode(vp, MNT_WAIT, NO_INO_UPDT);
+		else
+			launched_async_io = 1;
+	}
+	lockmgr(vp->v_vnlock, LK_RELEASE, NULL);
+	td->td_pflags = (td->td_pflags & ~TDP_NORUNNINGBUF) |
+		prev_norunningbuf;
+	if (launched_async_io && (td->td_pflags & TDP_NORUNNINGBUF) == 0)
+		waitrunningbufspace();
+	/*
+	 * I/O on bp will now be started, so count it in runningbufspace.
+	 */
+	if (saved_runningbufspace != 0) {
+		bp->b_runningbufspace = saved_runningbufspace;
+		atomic_add_long(&runningbufspace, bp->b_runningbufspace);
+	}
+	return (error);
+}
+
+/*
+ * sync snapshots to force freework records waiting on snapshots to claim
+ * blocks to free.
+ */
+void
+ffs_sync_snap(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+	struct snapdata *sn;
+	struct vnode *devvp;
+	struct vnode *vp;
+	struct inode *ip;
+
+	devvp = VFSTOUFS(mp)->um_devvp;
+	if ((devvp->v_vflag & VV_COPYONWRITE) == 0)
+		return;
+	for (;;) {
+		VI_LOCK(devvp);
+		sn = devvp->v_rdev->si_snapdata;
+		if (sn == NULL) {
+			VI_UNLOCK(devvp);
+			return;
+		}
+		if (lockmgr(&sn->sn_lock,
+		    LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
+		    VI_MTX(devvp)) == 0)
+			break;
+	}
+	TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
+		vp = ITOV(ip);
+		ffs_syncvnode(vp, waitfor, NO_INO_UPDT);
+	}
+	lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
+}
+
+/*
+ * Read the specified block into the given buffer.
+ * Much of this boiler-plate comes from bwrite().
+ */
+static int
+readblock(vp, bp, lbn)
+	struct vnode *vp;
+	struct buf *bp;
+	ufs2_daddr_t lbn;
+{
+	struct inode *ip = VTOI(vp);
+	struct bio *bip;
+	struct fs *fs;
+
+	ip = VTOI(vp);
+	fs = ITOFS(ip);
+
+	bip = g_alloc_bio();
+	bip->bio_cmd = BIO_READ;
+	bip->bio_offset = dbtob(fsbtodb(fs, blkstofrags(fs, lbn)));
+	bip->bio_data = bp->b_data;
+	bip->bio_length = bp->b_bcount;
+	bip->bio_done = NULL;
+
+	g_io_request(bip, ITODEVVP(ip)->v_bufobj.bo_private);
+	bp->b_error = biowait(bip, "snaprdb");
+	g_destroy_bio(bip);
+	return (bp->b_error);
+}
+
+#endif
+
+/*
+ * Process file deletes that were deferred by ufs_inactive() due to
+ * the file system being suspended. Transfer IN_LAZYACCESS into
+ * IN_MODIFIED for vnodes that were accessed during suspension.
+ */
+void
+process_deferred_inactive(struct mount *mp)
+{
+	struct vnode *vp, *mvp;
+	struct inode *ip;
+	struct thread *td;
+	int error;
+
+	td = curthread;
+	(void) vn_start_secondary_write(NULL, &mp, V_WAIT);
+ loop:
+	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
+		/*
+		 * IN_LAZYACCESS is checked here without holding any
+		 * vnode lock, but this flag is set only while holding
+		 * vnode interlock.
+		 */
+		if (vp->v_type == VNON ||
+		    ((VTOI(vp)->i_flag & IN_LAZYACCESS) == 0 &&
+		    ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0))) {
+			VI_UNLOCK(vp);
+			continue;
+		}
+		vholdl(vp);
+		error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK);
+		if (error != 0) {
+			vdrop(vp);
+			if (error == ENOENT)
+				continue;	/* vnode recycled */
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+			goto loop;
+		}
+		ip = VTOI(vp);
+		if ((ip->i_flag & IN_LAZYACCESS) != 0) {
+			ip->i_flag &= ~IN_LAZYACCESS;
+			ip->i_flag |= IN_MODIFIED;
+		}
+		VI_LOCK(vp);
+		if ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0) {
+			VI_UNLOCK(vp);
+			VOP_UNLOCK(vp, 0);
+			vdrop(vp);
+			continue;
+		}
+		vinactive(vp, td);
+		VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp,
+			 ("process_deferred_inactive: got VI_OWEINACT"));
+		VI_UNLOCK(vp);
+		VOP_UNLOCK(vp, 0);
+		vdrop(vp);
+	}
+	vn_finished_secondary_write(mp);
+}
+
+#ifndef NO_FFS_SNAPSHOT
+
+static struct snapdata *
+ffs_snapdata_alloc(void)
+{
+	struct snapdata *sn;
+
+	/*
+	 * Fetch a snapdata from the free list if there is one available.
+	 */
+	mtx_lock(&snapfree_lock);
+	sn = LIST_FIRST(&snapfree);
+	if (sn != NULL)
+		LIST_REMOVE(sn, sn_link);
+	mtx_unlock(&snapfree_lock);
+	if (sn != NULL)
+		return (sn);
+	/*
+ 	 * If there were no free snapdatas allocate one.
+	 */
+	sn = malloc(sizeof *sn, M_UFSMNT, M_WAITOK | M_ZERO);
+	TAILQ_INIT(&sn->sn_head);
+	lockinit(&sn->sn_lock, PVFS, "snaplk", VLKTIMEOUT,
+	    LK_CANRECURSE | LK_NOSHARE);
+	return (sn);
+}
+
+/*
+ * The snapdata is never freed because we can not be certain that
+ * there are no threads sleeping on the snap lock.  Persisting
+ * them permanently avoids costly synchronization in ffs_lock().
+ */
+static void
+ffs_snapdata_free(struct snapdata *sn)
+{
+	mtx_lock(&snapfree_lock);
+	LIST_INSERT_HEAD(&snapfree, sn, sn_link);
+	mtx_unlock(&snapfree_lock);
+}
+
+/* Try to free snapdata associated with devvp */
+static void
+try_free_snapdata(struct vnode *devvp)
+{
+	struct snapdata *sn;
+	ufs2_daddr_t *snapblklist;
+
+	ASSERT_VI_LOCKED(devvp, "try_free_snapdata");
+	sn = devvp->v_rdev->si_snapdata;
+
+	if (sn == NULL || TAILQ_FIRST(&sn->sn_head) != NULL ||
+	    (devvp->v_vflag & VV_COPYONWRITE) == 0) {
+		VI_UNLOCK(devvp);
+		return;
+	}
+
+	devvp->v_rdev->si_snapdata = NULL;
+	devvp->v_vflag &= ~VV_COPYONWRITE;
+	lockmgr(&sn->sn_lock, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp));
+	snapblklist = sn->sn_blklist;
+	sn->sn_blklist = NULL;
+	sn->sn_listsize = 0;
+	lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
+	if (snapblklist != NULL)
+		free(snapblklist, M_UFSMNT);
+	ffs_snapdata_free(sn);
+}
+
+static struct snapdata *
+ffs_snapdata_acquire(struct vnode *devvp)
+{
+	struct snapdata *nsn, *sn;
+	int error;
+
+	/*
+	 * Allocate a free snapdata.  This is done before acquiring the
+	 * devvp lock to avoid allocation while the devvp interlock is
+	 * held.
+	 */
+	nsn = ffs_snapdata_alloc();
+
+	for (;;) {
+		VI_LOCK(devvp);
+		sn = devvp->v_rdev->si_snapdata;
+		if (sn == NULL) {
+			/*
+			 * This is the first snapshot on this
+			 * filesystem and we use our pre-allocated
+			 * snapdata.  Publish sn with the sn_lock
+			 * owned by us, to avoid the race.
+			 */
+			error = lockmgr(&nsn->sn_lock, LK_EXCLUSIVE |
+			    LK_NOWAIT, NULL);
+			if (error != 0)
+				panic("leaked sn, lockmgr error %d", error);
+			sn = devvp->v_rdev->si_snapdata = nsn;
+			VI_UNLOCK(devvp);
+			nsn = NULL;
+			break;
+		}
+
+		/*
+		 * There is a snapshots which already exists on this
+		 * filesystem, grab a reference to the common lock.
+		 */
+		error = lockmgr(&sn->sn_lock, LK_INTERLOCK |
+		    LK_EXCLUSIVE | LK_SLEEPFAIL, VI_MTX(devvp));
+		if (error == 0)
+			break;
+	}
+
+	/*
+	 * Free any unused snapdata.
+	 */
+	if (nsn != NULL)
+		ffs_snapdata_free(nsn);
+
+	return (sn);
+}
+
+#endif
diff --git a/Dump/ufs/ffs/ffs_softdep.c b/Dump/ufs/ffs/ffs_softdep.c
new file mode 100644
index 0000000..c154435
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_softdep.c
@@ -0,0 +1,14469 @@
+/*-
+ * Copyright 1998, 2000 Marshall Kirk McKusick.
+ * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org>
+ * All rights reserved.
+ *
+ * The soft updates code is derived from the appendix of a University
+ * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
+ * "Soft Updates: A Solution to the Metadata Update Problem in File
+ * Systems", CSE-TR-254-95, August 1995).
+ *
+ * Further information about soft updates can be obtained from:
+ *
+ *	Marshall Kirk McKusick		http://www.mckusick.com/softdep/
+ *	1614 Oxford Street		mckusick@mckusick.com
+ *	Berkeley, CA 94709-1608		+1-510-843-9542
+ *	USA
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	from: @(#)ffs_softdep.c	9.59 (McKusick) 6/21/00
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_softdep.c 330446 2018-03-05 06:59:30Z eadler $");
+
+#include "opt_ffs.h"
+#include "opt_quota.h"
+#include "opt_ddb.h"
+
+/*
+ * For now we want the safety net that the DEBUG flag provides.
+ */
+#ifndef DEBUG
+#define DEBUG
+#endif
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/kdb.h>
+#include <sys/kthread.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/racct.h>
+#include <sys/rwlock.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/softdep.h>
+#include <ufs/ffs/ffs_extern.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+
+#include <geom/geom.h>
+
+#include <ddb/ddb.h>
+
+#define	KTR_SUJ	0	/* Define to KTR_SPARE. */
+
+#ifndef SOFTUPDATES
+
+int
+softdep_flushfiles(oldmnt, flags, td)
+	struct mount *oldmnt;
+	int flags;
+	struct thread *td;
+{
+
+	panic("softdep_flushfiles called");
+}
+
+int
+softdep_mount(devvp, mp, fs, cred)
+	struct vnode *devvp;
+	struct mount *mp;
+	struct fs *fs;
+	struct ucred *cred;
+{
+
+	return (0);
+}
+
+void
+softdep_initialize()
+{
+
+	return;
+}
+
+void
+softdep_uninitialize()
+{
+
+	return;
+}
+
+void
+softdep_unmount(mp)
+	struct mount *mp;
+{
+
+	panic("softdep_unmount called");
+}
+
+void
+softdep_setup_sbupdate(ump, fs, bp)
+	struct ufsmount *ump;
+	struct fs *fs;
+	struct buf *bp;
+{
+
+	panic("softdep_setup_sbupdate called");
+}
+
+void
+softdep_setup_inomapdep(bp, ip, newinum, mode)
+	struct buf *bp;
+	struct inode *ip;
+	ino_t newinum;
+	int mode;
+{
+
+	panic("softdep_setup_inomapdep called");
+}
+
+void
+softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags)
+	struct buf *bp;
+	struct mount *mp;
+	ufs2_daddr_t newblkno;
+	int frags;
+	int oldfrags;
+{
+
+	panic("softdep_setup_blkmapdep called");
+}
+
+void
+softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
+	struct inode *ip;
+	ufs_lbn_t lbn;
+	ufs2_daddr_t newblkno;
+	ufs2_daddr_t oldblkno;
+	long newsize;
+	long oldsize;
+	struct buf *bp;
+{
+	
+	panic("softdep_setup_allocdirect called");
+}
+
+void
+softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
+	struct inode *ip;
+	ufs_lbn_t lbn;
+	ufs2_daddr_t newblkno;
+	ufs2_daddr_t oldblkno;
+	long newsize;
+	long oldsize;
+	struct buf *bp;
+{
+	
+	panic("softdep_setup_allocext called");
+}
+
+void
+softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
+	struct inode *ip;
+	ufs_lbn_t lbn;
+	struct buf *bp;
+	int ptrno;
+	ufs2_daddr_t newblkno;
+	ufs2_daddr_t oldblkno;
+	struct buf *nbp;
+{
+
+	panic("softdep_setup_allocindir_page called");
+}
+
+void
+softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
+	struct buf *nbp;
+	struct inode *ip;
+	struct buf *bp;
+	int ptrno;
+	ufs2_daddr_t newblkno;
+{
+
+	panic("softdep_setup_allocindir_meta called");
+}
+
+void
+softdep_journal_freeblocks(ip, cred, length, flags)
+	struct inode *ip;
+	struct ucred *cred;
+	off_t length;
+	int flags;
+{
+	
+	panic("softdep_journal_freeblocks called");
+}
+
+void
+softdep_journal_fsync(ip)
+	struct inode *ip;
+{
+
+	panic("softdep_journal_fsync called");
+}
+
+void
+softdep_setup_freeblocks(ip, length, flags)
+	struct inode *ip;
+	off_t length;
+	int flags;
+{
+	
+	panic("softdep_setup_freeblocks called");
+}
+
+void
+softdep_freefile(pvp, ino, mode)
+		struct vnode *pvp;
+		ino_t ino;
+		int mode;
+{
+
+	panic("softdep_freefile called");
+}
+
+int
+softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
+	struct buf *bp;
+	struct inode *dp;
+	off_t diroffset;
+	ino_t newinum;
+	struct buf *newdirbp;
+	int isnewblk;
+{
+
+	panic("softdep_setup_directory_add called");
+}
+
+void
+softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize)
+	struct buf *bp;
+	struct inode *dp;
+	caddr_t base;
+	caddr_t oldloc;
+	caddr_t newloc;
+	int entrysize;
+{
+
+	panic("softdep_change_directoryentry_offset called");
+}
+
+void
+softdep_setup_remove(bp, dp, ip, isrmdir)
+	struct buf *bp;
+	struct inode *dp;
+	struct inode *ip;
+	int isrmdir;
+{
+	
+	panic("softdep_setup_remove called");
+}
+
+void
+softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
+	struct buf *bp;
+	struct inode *dp;
+	struct inode *ip;
+	ino_t newinum;
+	int isrmdir;
+{
+
+	panic("softdep_setup_directory_change called");
+}
+
+void
+softdep_setup_blkfree(mp, bp, blkno, frags, wkhd)
+	struct mount *mp;
+	struct buf *bp;
+	ufs2_daddr_t blkno;
+	int frags;
+	struct workhead *wkhd;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_setup_inofree(mp, bp, ino, wkhd)
+	struct mount *mp;
+	struct buf *bp;
+	ino_t ino;
+	struct workhead *wkhd;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_setup_unlink(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_setup_link(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_revert_link(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_setup_rmdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_revert_rmdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_setup_create(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_revert_create(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_setup_mkdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_revert_mkdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+void
+softdep_setup_dotdot_link(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+int
+softdep_prealloc(vp, waitok)
+	struct vnode *vp;
+	int waitok;
+{
+
+	panic("%s called", __FUNCTION__);
+}
+
+int
+softdep_journal_lookup(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+
+	return (ENOENT);
+}
+
+void
+softdep_change_linkcnt(ip)
+	struct inode *ip;
+{
+
+	panic("softdep_change_linkcnt called");
+}
+
+void 
+softdep_load_inodeblock(ip)
+	struct inode *ip;
+{
+
+	panic("softdep_load_inodeblock called");
+}
+
+void
+softdep_update_inodeblock(ip, bp, waitfor)
+	struct inode *ip;
+	struct buf *bp;
+	int waitfor;
+{
+
+	panic("softdep_update_inodeblock called");
+}
+
+int
+softdep_fsync(vp)
+	struct vnode *vp;	/* the "in_core" copy of the inode */
+{
+
+	return (0);
+}
+
+void
+softdep_fsync_mountdev(vp)
+	struct vnode *vp;
+{
+
+	return;
+}
+
+int
+softdep_flushworklist(oldmnt, countp, td)
+	struct mount *oldmnt;
+	int *countp;
+	struct thread *td;
+{
+
+	*countp = 0;
+	return (0);
+}
+
+int
+softdep_sync_metadata(struct vnode *vp)
+{
+
+	panic("softdep_sync_metadata called");
+}
+
+int
+softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
+{
+
+	panic("softdep_sync_buf called");
+}
+
+int
+softdep_slowdown(vp)
+	struct vnode *vp;
+{
+
+	panic("softdep_slowdown called");
+}
+
+int
+softdep_request_cleanup(fs, vp, cred, resource)
+	struct fs *fs;
+	struct vnode *vp;
+	struct ucred *cred;
+	int resource;
+{
+
+	return (0);
+}
+
+int
+softdep_check_suspend(struct mount *mp,
+		      struct vnode *devvp,
+		      int softdep_depcnt,
+		      int softdep_accdepcnt,
+		      int secondary_writes,
+		      int secondary_accwrites)
+{
+	struct bufobj *bo;
+	int error;
+	
+	(void) softdep_depcnt,
+	(void) softdep_accdepcnt;
+
+	bo = &devvp->v_bufobj;
+	ASSERT_BO_WLOCKED(bo);
+
+	MNT_ILOCK(mp);
+	while (mp->mnt_secondary_writes != 0) {
+		BO_UNLOCK(bo);
+		msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
+		    (PUSER - 1) | PDROP, "secwr", 0);
+		BO_LOCK(bo);
+		MNT_ILOCK(mp);
+	}
+
+	/*
+	 * Reasons for needing more work before suspend:
+	 * - Dirty buffers on devvp.
+	 * - Secondary writes occurred after start of vnode sync loop
+	 */
+	error = 0;
+	if (bo->bo_numoutput > 0 ||
+	    bo->bo_dirty.bv_cnt > 0 ||
+	    secondary_writes != 0 ||
+	    mp->mnt_secondary_writes != 0 ||
+	    secondary_accwrites != mp->mnt_secondary_accwrites)
+		error = EAGAIN;
+	BO_UNLOCK(bo);
+	return (error);
+}
+
+void
+softdep_get_depcounts(struct mount *mp,
+		      int *softdepactivep,
+		      int *softdepactiveaccp)
+{
+	(void) mp;
+	*softdepactivep = 0;
+	*softdepactiveaccp = 0;
+}
+
+void
+softdep_buf_append(bp, wkhd)
+	struct buf *bp;
+	struct workhead *wkhd;
+{
+
+	panic("softdep_buf_appendwork called");
+}
+
+void
+softdep_inode_append(ip, cred, wkhd)
+	struct inode *ip;
+	struct ucred *cred;
+	struct workhead *wkhd;
+{
+
+	panic("softdep_inode_appendwork called");
+}
+
+void
+softdep_freework(wkhd)
+	struct workhead *wkhd;
+{
+
+	panic("softdep_freework called");
+}
+
+#else
+
+FEATURE(softupdates, "FFS soft-updates support");
+
+static SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0,
+    "soft updates stats");
+static SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0,
+    "total dependencies allocated");
+static SYSCTL_NODE(_debug_softdep, OID_AUTO, highuse, CTLFLAG_RW, 0,
+    "high use dependencies allocated");
+static SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0,
+    "current dependencies allocated");
+static SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0,
+    "current dependencies written");
+
+unsigned long dep_current[D_LAST + 1];
+unsigned long dep_highuse[D_LAST + 1];
+unsigned long dep_total[D_LAST + 1];
+unsigned long dep_write[D_LAST + 1];
+
+#define	SOFTDEP_TYPE(type, str, long)					\
+    static MALLOC_DEFINE(M_ ## type, #str, long);			\
+    SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD,	\
+	&dep_total[D_ ## type], 0, "");					\
+    SYSCTL_ULONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, 	\
+	&dep_current[D_ ## type], 0, "");				\
+    SYSCTL_ULONG(_debug_softdep_highuse, OID_AUTO, str, CTLFLAG_RD, 	\
+	&dep_highuse[D_ ## type], 0, "");				\
+    SYSCTL_ULONG(_debug_softdep_write, OID_AUTO, str, CTLFLAG_RD, 	\
+	&dep_write[D_ ## type], 0, "");
+
+SOFTDEP_TYPE(PAGEDEP, pagedep, "File page dependencies"); 
+SOFTDEP_TYPE(INODEDEP, inodedep, "Inode dependencies");
+SOFTDEP_TYPE(BMSAFEMAP, bmsafemap,
+    "Block or frag allocated from cyl group map");
+SOFTDEP_TYPE(NEWBLK, newblk, "New block or frag allocation dependency");
+SOFTDEP_TYPE(ALLOCDIRECT, allocdirect, "Block or frag dependency for an inode");
+SOFTDEP_TYPE(INDIRDEP, indirdep, "Indirect block dependencies");
+SOFTDEP_TYPE(ALLOCINDIR, allocindir, "Block dependency for an indirect block");
+SOFTDEP_TYPE(FREEFRAG, freefrag, "Previously used frag for an inode");
+SOFTDEP_TYPE(FREEBLKS, freeblks, "Blocks freed from an inode");
+SOFTDEP_TYPE(FREEFILE, freefile, "Inode deallocated");
+SOFTDEP_TYPE(DIRADD, diradd, "New directory entry");
+SOFTDEP_TYPE(MKDIR, mkdir, "New directory");
+SOFTDEP_TYPE(DIRREM, dirrem, "Directory entry deleted");
+SOFTDEP_TYPE(NEWDIRBLK, newdirblk, "Unclaimed new directory block");
+SOFTDEP_TYPE(FREEWORK, freework, "free an inode block");
+SOFTDEP_TYPE(FREEDEP, freedep, "track a block free");
+SOFTDEP_TYPE(JADDREF, jaddref, "Journal inode ref add");
+SOFTDEP_TYPE(JREMREF, jremref, "Journal inode ref remove");
+SOFTDEP_TYPE(JMVREF, jmvref, "Journal inode ref move");
+SOFTDEP_TYPE(JNEWBLK, jnewblk, "Journal new block");
+SOFTDEP_TYPE(JFREEBLK, jfreeblk, "Journal free block");
+SOFTDEP_TYPE(JFREEFRAG, jfreefrag, "Journal free frag");
+SOFTDEP_TYPE(JSEG, jseg, "Journal segment");
+SOFTDEP_TYPE(JSEGDEP, jsegdep, "Journal segment complete");
+SOFTDEP_TYPE(SBDEP, sbdep, "Superblock write dependency");
+SOFTDEP_TYPE(JTRUNC, jtrunc, "Journal inode truncation");
+SOFTDEP_TYPE(JFSYNC, jfsync, "Journal fsync complete");
+
+static MALLOC_DEFINE(M_SENTINEL, "sentinel", "Worklist sentinel");
+
+static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
+static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
+static MALLOC_DEFINE(M_MOUNTDATA, "softdep", "Softdep per-mount data");
+
+#define M_SOFTDEP_FLAGS	(M_WAITOK)
+
+/* 
+ * translate from workitem type to memory type
+ * MUST match the defines above, such that memtype[D_XXX] == M_XXX
+ */
+static struct malloc_type *memtype[] = {
+	M_PAGEDEP,
+	M_INODEDEP,
+	M_BMSAFEMAP,
+	M_NEWBLK,
+	M_ALLOCDIRECT,
+	M_INDIRDEP,
+	M_ALLOCINDIR,
+	M_FREEFRAG,
+	M_FREEBLKS,
+	M_FREEFILE,
+	M_DIRADD,
+	M_MKDIR,
+	M_DIRREM,
+	M_NEWDIRBLK,
+	M_FREEWORK,
+	M_FREEDEP,
+	M_JADDREF,
+	M_JREMREF,
+	M_JMVREF,
+	M_JNEWBLK,
+	M_JFREEBLK,
+	M_JFREEFRAG,
+	M_JSEG,
+	M_JSEGDEP,
+	M_SBDEP,
+	M_JTRUNC,
+	M_JFSYNC,
+	M_SENTINEL
+};
+
+#define DtoM(type) (memtype[type])
+
+/*
+ * Names of malloc types.
+ */
+#define TYPENAME(type)  \
+	((unsigned)(type) <= D_LAST ? memtype[type]->ks_shortdesc : "???")
+/*
+ * End system adaptation definitions.
+ */
+
+#define	DOTDOT_OFFSET	offsetof(struct dirtemplate, dotdot_ino)
+#define	DOT_OFFSET	offsetof(struct dirtemplate, dot_ino)
+
+/*
+ * Internal function prototypes.
+ */
+static	void check_clear_deps(struct mount *);
+static	void softdep_error(char *, int);
+static	int softdep_process_worklist(struct mount *, int);
+static	int softdep_waitidle(struct mount *, int);
+static	void drain_output(struct vnode *);
+static	struct buf *getdirtybuf(struct buf *, struct rwlock *, int);
+static	int check_inodedep_free(struct inodedep *);
+static	void clear_remove(struct mount *);
+static	void clear_inodedeps(struct mount *);
+static	void unlinked_inodedep(struct mount *, struct inodedep *);
+static	void clear_unlinked_inodedep(struct inodedep *);
+static	struct inodedep *first_unlinked_inodedep(struct ufsmount *);
+static	int flush_pagedep_deps(struct vnode *, struct mount *,
+	    struct diraddhd *);
+static	int free_pagedep(struct pagedep *);
+static	int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t);
+static	int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
+static	int flush_deplist(struct allocdirectlst *, int, int *);
+static	int sync_cgs(struct mount *, int);
+static	int handle_written_filepage(struct pagedep *, struct buf *, int);
+static	int handle_written_sbdep(struct sbdep *, struct buf *);
+static	void initiate_write_sbdep(struct sbdep *);
+static	void diradd_inode_written(struct diradd *, struct inodedep *);
+static	int handle_written_indirdep(struct indirdep *, struct buf *,
+	    struct buf**, int);
+static	int handle_written_inodeblock(struct inodedep *, struct buf *, int);
+static	int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *,
+	    uint8_t *);
+static	int handle_written_bmsafemap(struct bmsafemap *, struct buf *, int);
+static	void handle_written_jaddref(struct jaddref *);
+static	void handle_written_jremref(struct jremref *);
+static	void handle_written_jseg(struct jseg *, struct buf *);
+static	void handle_written_jnewblk(struct jnewblk *);
+static	void handle_written_jblkdep(struct jblkdep *);
+static	void handle_written_jfreefrag(struct jfreefrag *);
+static	void complete_jseg(struct jseg *);
+static	void complete_jsegs(struct jseg *);
+static	void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *);
+static	void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
+static	void jremref_write(struct jremref *, struct jseg *, uint8_t *);
+static	void jmvref_write(struct jmvref *, struct jseg *, uint8_t *);
+static	void jtrunc_write(struct jtrunc *, struct jseg *, uint8_t *);
+static	void jfsync_write(struct jfsync *, struct jseg *, uint8_t *data);
+static	void jnewblk_write(struct jnewblk *, struct jseg *, uint8_t *);
+static	void jfreeblk_write(struct jfreeblk *, struct jseg *, uint8_t *);
+static	void jfreefrag_write(struct jfreefrag *, struct jseg *, uint8_t *);
+static	inline void inoref_write(struct inoref *, struct jseg *,
+	    struct jrefrec *);
+static	void handle_allocdirect_partdone(struct allocdirect *,
+	    struct workhead *);
+static	struct jnewblk *cancel_newblk(struct newblk *, struct worklist *,
+	    struct workhead *);
+static	void indirdep_complete(struct indirdep *);
+static	int indirblk_lookup(struct mount *, ufs2_daddr_t);
+static	void indirblk_insert(struct freework *);
+static	void indirblk_remove(struct freework *);
+static	void handle_allocindir_partdone(struct allocindir *);
+static	void initiate_write_filepage(struct pagedep *, struct buf *);
+static	void initiate_write_indirdep(struct indirdep*, struct buf *);
+static	void handle_written_mkdir(struct mkdir *, int);
+static	int jnewblk_rollback(struct jnewblk *, struct fs *, struct cg *,
+	    uint8_t *);
+static	void initiate_write_bmsafemap(struct bmsafemap *, struct buf *);
+static	void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
+static	void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
+static	void handle_workitem_freefile(struct freefile *);
+static	int handle_workitem_remove(struct dirrem *, int);
+static	struct dirrem *newdirrem(struct buf *, struct inode *,
+	    struct inode *, int, struct dirrem **);
+static	struct indirdep *indirdep_lookup(struct mount *, struct inode *,
+	    struct buf *);
+static	void cancel_indirdep(struct indirdep *, struct buf *,
+	    struct freeblks *);
+static	void free_indirdep(struct indirdep *);
+static	void free_diradd(struct diradd *, struct workhead *);
+static	void merge_diradd(struct inodedep *, struct diradd *);
+static	void complete_diradd(struct diradd *);
+static	struct diradd *diradd_lookup(struct pagedep *, int);
+static	struct jremref *cancel_diradd_dotdot(struct inode *, struct dirrem *,
+	    struct jremref *);
+static	struct jremref *cancel_mkdir_dotdot(struct inode *, struct dirrem *,
+	    struct jremref *);
+static	void cancel_diradd(struct diradd *, struct dirrem *, struct jremref *,
+	    struct jremref *, struct jremref *);
+static	void dirrem_journal(struct dirrem *, struct jremref *, struct jremref *,
+	    struct jremref *);
+static	void cancel_allocindir(struct allocindir *, struct buf *bp,
+	    struct freeblks *, int);
+static	int setup_trunc_indir(struct freeblks *, struct inode *,
+	    ufs_lbn_t, ufs_lbn_t, ufs2_daddr_t);
+static	void complete_trunc_indir(struct freework *);
+static	void trunc_indirdep(struct indirdep *, struct freeblks *, struct buf *,
+	    int);
+static	void complete_mkdir(struct mkdir *);
+static	void free_newdirblk(struct newdirblk *);
+static	void free_jremref(struct jremref *);
+static	void free_jaddref(struct jaddref *);
+static	void free_jsegdep(struct jsegdep *);
+static	void free_jsegs(struct jblocks *);
+static	void rele_jseg(struct jseg *);
+static	void free_jseg(struct jseg *, struct jblocks *);
+static	void free_jnewblk(struct jnewblk *);
+static	void free_jblkdep(struct jblkdep *);
+static	void free_jfreefrag(struct jfreefrag *);
+static	void free_freedep(struct freedep *);
+static	void journal_jremref(struct dirrem *, struct jremref *,
+	    struct inodedep *);
+static	void cancel_jnewblk(struct jnewblk *, struct workhead *);
+static	int cancel_jaddref(struct jaddref *, struct inodedep *,
+	    struct workhead *);
+static	void cancel_jfreefrag(struct jfreefrag *);
+static	inline void setup_freedirect(struct freeblks *, struct inode *,
+	    int, int);
+static	inline void setup_freeext(struct freeblks *, struct inode *, int, int);
+static	inline void setup_freeindir(struct freeblks *, struct inode *, int,
+	    ufs_lbn_t, int);
+static	inline struct freeblks *newfreeblks(struct mount *, struct inode *);
+static	void freeblks_free(struct ufsmount *, struct freeblks *, int);
+static	void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
+static	ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
+static	int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int);
+static	void trunc_dependencies(struct inode *, struct freeblks *, ufs_lbn_t,
+	    int, int);
+static	void trunc_pages(struct inode *, off_t, ufs2_daddr_t, int);
+static 	int cancel_pagedep(struct pagedep *, struct freeblks *, int);
+static	int deallocate_dependencies(struct buf *, struct freeblks *, int);
+static	void newblk_freefrag(struct newblk*);
+static	void free_newblk(struct newblk *);
+static	void cancel_allocdirect(struct allocdirectlst *,
+	    struct allocdirect *, struct freeblks *);
+static	int check_inode_unwritten(struct inodedep *);
+static	int free_inodedep(struct inodedep *);
+static	void freework_freeblock(struct freework *);
+static	void freework_enqueue(struct freework *);
+static	int handle_workitem_freeblocks(struct freeblks *, int);
+static	int handle_complete_freeblocks(struct freeblks *, int);
+static	void handle_workitem_indirblk(struct freework *);
+static	void handle_written_freework(struct freework *);
+static	void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
+static	struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
+	    struct workhead *);
+static	struct freefrag *setup_allocindir_phase2(struct buf *, struct inode *,
+	    struct inodedep *, struct allocindir *, ufs_lbn_t);
+static	struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
+	    ufs2_daddr_t, ufs_lbn_t);
+static	void handle_workitem_freefrag(struct freefrag *);
+static	struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long,
+	    ufs_lbn_t);
+static	void allocdirect_merge(struct allocdirectlst *,
+	    struct allocdirect *, struct allocdirect *);
+static	struct freefrag *allocindir_merge(struct allocindir *,
+	    struct allocindir *);
+static	int bmsafemap_find(struct bmsafemap_hashhead *, int,
+	    struct bmsafemap **);
+static	struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *,
+	    int cg, struct bmsafemap *);
+static	int newblk_find(struct newblk_hashhead *, ufs2_daddr_t, int,
+	    struct newblk **);
+static	int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **);
+static	int inodedep_find(struct inodedep_hashhead *, ino_t,
+	    struct inodedep **);
+static	int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
+static	int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
+	    int, struct pagedep **);
+static	int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
+	    struct pagedep **);
+static	void pause_timer(void *);
+static	int request_cleanup(struct mount *, int);
+static	int softdep_request_cleanup_flush(struct mount *, struct ufsmount *);
+static	void schedule_cleanup(struct mount *);
+static void softdep_ast_cleanup_proc(struct thread *);
+static struct ufsmount *softdep_bp_to_mp(struct buf *bp);
+static	int process_worklist_item(struct mount *, int, int);
+static	void process_removes(struct vnode *);
+static	void process_truncates(struct vnode *);
+static	void jwork_move(struct workhead *, struct workhead *);
+static	void jwork_insert(struct workhead *, struct jsegdep *);
+static	void add_to_worklist(struct worklist *, int);
+static	void wake_worklist(struct worklist *);
+static	void wait_worklist(struct worklist *, char *);
+static	void remove_from_worklist(struct worklist *);
+static	void softdep_flush(void *);
+static	void softdep_flushjournal(struct mount *);
+static	int softdep_speedup(struct ufsmount *);
+static	void worklist_speedup(struct mount *);
+static	int journal_mount(struct mount *, struct fs *, struct ucred *);
+static	void journal_unmount(struct ufsmount *);
+static	int journal_space(struct ufsmount *, int);
+static	void journal_suspend(struct ufsmount *);
+static	int journal_unsuspend(struct ufsmount *ump);
+static	void softdep_prelink(struct vnode *, struct vnode *);
+static	void add_to_journal(struct worklist *);
+static	void remove_from_journal(struct worklist *);
+static	bool softdep_excess_items(struct ufsmount *, int);
+static	void softdep_process_journal(struct mount *, struct worklist *, int);
+static	struct jremref *newjremref(struct dirrem *, struct inode *,
+	    struct inode *ip, off_t, nlink_t);
+static	struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t,
+	    uint16_t);
+static	inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
+	    uint16_t);
+static	inline struct jsegdep *inoref_jseg(struct inoref *);
+static	struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
+static	struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
+	    ufs2_daddr_t, int);
+static	void adjust_newfreework(struct freeblks *, int);
+static	struct jtrunc *newjtrunc(struct freeblks *, off_t, int);
+static	void move_newblock_dep(struct jaddref *, struct inodedep *);
+static	void cancel_jfreeblk(struct freeblks *, ufs2_daddr_t);
+static	struct jfreefrag *newjfreefrag(struct freefrag *, struct inode *,
+	    ufs2_daddr_t, long, ufs_lbn_t);
+static	struct freework *newfreework(struct ufsmount *, struct freeblks *,
+	    struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int, int);
+static	int jwait(struct worklist *, int);
+static	struct inodedep *inodedep_lookup_ip(struct inode *);
+static	int bmsafemap_backgroundwrite(struct bmsafemap *, struct buf *);
+static	struct freefile *handle_bufwait(struct inodedep *, struct workhead *);
+static	void handle_jwork(struct workhead *);
+static	struct mkdir *setup_newdir(struct diradd *, ino_t, ino_t, struct buf *,
+	    struct mkdir **);
+static	struct jblocks *jblocks_create(void);
+static	ufs2_daddr_t jblocks_alloc(struct jblocks *, int, int *);
+static	void jblocks_free(struct jblocks *, struct mount *, int);
+static	void jblocks_destroy(struct jblocks *);
+static	void jblocks_add(struct jblocks *, ufs2_daddr_t, int);
+
+/*
+ * Exported softdep operations.
+ */
+static	void softdep_disk_io_initiation(struct buf *);
+static	void softdep_disk_write_complete(struct buf *);
+static	void softdep_deallocate_dependencies(struct buf *);
+static	int softdep_count_dependencies(struct buf *bp, int);
+
+/*
+ * Global lock over all of soft updates.
+ */
+static struct mtx lk;
+MTX_SYSINIT(softdep_lock, &lk, "Global Softdep Lock", MTX_DEF);
+
+#define ACQUIRE_GBLLOCK(lk)	mtx_lock(lk)
+#define FREE_GBLLOCK(lk)	mtx_unlock(lk)
+#define GBLLOCK_OWNED(lk)	mtx_assert((lk), MA_OWNED)
+
+/*
+ * Per-filesystem soft-updates locking.
+ */
+#define LOCK_PTR(ump)		(&(ump)->um_softdep->sd_fslock)
+#define TRY_ACQUIRE_LOCK(ump)	rw_try_wlock(&(ump)->um_softdep->sd_fslock)
+#define ACQUIRE_LOCK(ump)	rw_wlock(&(ump)->um_softdep->sd_fslock)
+#define FREE_LOCK(ump)		rw_wunlock(&(ump)->um_softdep->sd_fslock)
+#define LOCK_OWNED(ump)		rw_assert(&(ump)->um_softdep->sd_fslock, \
+				    RA_WLOCKED)
+
+#define	BUF_AREC(bp)		lockallowrecurse(&(bp)->b_lock)
+#define	BUF_NOREC(bp)		lockdisablerecurse(&(bp)->b_lock)
+
+/*
+ * Worklist queue management.
+ * These routines require that the lock be held.
+ */
+#ifndef /* NOT */ DEBUG
+#define WORKLIST_INSERT(head, item) do {	\
+	(item)->wk_state |= ONWORKLIST;		\
+	LIST_INSERT_HEAD(head, item, wk_list);	\
+} while (0)
+#define WORKLIST_REMOVE(item) do {		\
+	(item)->wk_state &= ~ONWORKLIST;	\
+	LIST_REMOVE(item, wk_list);		\
+} while (0)
+#define WORKLIST_INSERT_UNLOCKED	WORKLIST_INSERT
+#define WORKLIST_REMOVE_UNLOCKED	WORKLIST_REMOVE
+
+#else /* DEBUG */
+static	void worklist_insert(struct workhead *, struct worklist *, int);
+static	void worklist_remove(struct worklist *, int);
+
+#define WORKLIST_INSERT(head, item) worklist_insert(head, item, 1)
+#define WORKLIST_INSERT_UNLOCKED(head, item) worklist_insert(head, item, 0)
+#define WORKLIST_REMOVE(item) worklist_remove(item, 1)
+#define WORKLIST_REMOVE_UNLOCKED(item) worklist_remove(item, 0)
+
+static void
+worklist_insert(head, item, locked)
+	struct workhead *head;
+	struct worklist *item;
+	int locked;
+{
+
+	if (locked)
+		LOCK_OWNED(VFSTOUFS(item->wk_mp));
+	if (item->wk_state & ONWORKLIST)
+		panic("worklist_insert: %p %s(0x%X) already on list",
+		    item, TYPENAME(item->wk_type), item->wk_state);
+	item->wk_state |= ONWORKLIST;
+	LIST_INSERT_HEAD(head, item, wk_list);
+}
+
+static void
+worklist_remove(item, locked)
+	struct worklist *item;
+	int locked;
+{
+
+	if (locked)
+		LOCK_OWNED(VFSTOUFS(item->wk_mp));
+	if ((item->wk_state & ONWORKLIST) == 0)
+		panic("worklist_remove: %p %s(0x%X) not on list",
+		    item, TYPENAME(item->wk_type), item->wk_state);
+	item->wk_state &= ~ONWORKLIST;
+	LIST_REMOVE(item, wk_list);
+}
+#endif /* DEBUG */
+
+/*
+ * Merge two jsegdeps keeping only the oldest one as newer references
+ * can't be discarded until after older references.
+ */
+static inline struct jsegdep *
+jsegdep_merge(struct jsegdep *one, struct jsegdep *two)
+{
+	struct jsegdep *swp;
+
+	if (two == NULL)
+		return (one);
+
+	if (one->jd_seg->js_seq > two->jd_seg->js_seq) {
+		swp = one;
+		one = two;
+		two = swp;
+	}
+	WORKLIST_REMOVE(&two->jd_list);
+	free_jsegdep(two);
+
+	return (one);
+}
+
+/*
+ * If two freedeps are compatible free one to reduce list size.
+ */
+static inline struct freedep *
+freedep_merge(struct freedep *one, struct freedep *two)
+{
+	if (two == NULL)
+		return (one);
+
+	if (one->fd_freework == two->fd_freework) {
+		WORKLIST_REMOVE(&two->fd_list);
+		free_freedep(two);
+	}
+	return (one);
+}
+
+/*
+ * Move journal work from one list to another.  Duplicate freedeps and
+ * jsegdeps are coalesced to keep the lists as small as possible.
+ */
+static void
+jwork_move(dst, src)
+	struct workhead *dst;
+	struct workhead *src;
+{
+	struct freedep *freedep;
+	struct jsegdep *jsegdep;
+	struct worklist *wkn;
+	struct worklist *wk;
+
+	KASSERT(dst != src,
+	    ("jwork_move: dst == src"));
+	freedep = NULL;
+	jsegdep = NULL;
+	LIST_FOREACH_SAFE(wk, dst, wk_list, wkn) {
+		if (wk->wk_type == D_JSEGDEP)
+			jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep);
+		else if (wk->wk_type == D_FREEDEP)
+			freedep = freedep_merge(WK_FREEDEP(wk), freedep);
+	}
+
+	while ((wk = LIST_FIRST(src)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		WORKLIST_INSERT(dst, wk);
+		if (wk->wk_type == D_JSEGDEP) {
+			jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep);
+			continue;
+		}
+		if (wk->wk_type == D_FREEDEP)
+			freedep = freedep_merge(WK_FREEDEP(wk), freedep);
+	}
+}
+
+static void
+jwork_insert(dst, jsegdep)
+	struct workhead *dst;
+	struct jsegdep *jsegdep;
+{
+	struct jsegdep *jsegdepn;
+	struct worklist *wk;
+
+	LIST_FOREACH(wk, dst, wk_list)
+		if (wk->wk_type == D_JSEGDEP)
+			break;
+	if (wk == NULL) {
+		WORKLIST_INSERT(dst, &jsegdep->jd_list);
+		return;
+	}
+	jsegdepn = WK_JSEGDEP(wk);
+	if (jsegdep->jd_seg->js_seq < jsegdepn->jd_seg->js_seq) {
+		WORKLIST_REMOVE(wk);
+		free_jsegdep(jsegdepn);
+		WORKLIST_INSERT(dst, &jsegdep->jd_list);
+	} else
+		free_jsegdep(jsegdep);
+}
+
+/*
+ * Routines for tracking and managing workitems.
+ */
+static	void workitem_free(struct worklist *, int);
+static	void workitem_alloc(struct worklist *, int, struct mount *);
+static	void workitem_reassign(struct worklist *, int);
+
+#define	WORKITEM_FREE(item, type) \
+	workitem_free((struct worklist *)(item), (type))
+#define	WORKITEM_REASSIGN(item, type) \
+	workitem_reassign((struct worklist *)(item), (type))
+
+static void
+workitem_free(item, type)
+	struct worklist *item;
+	int type;
+{
+	struct ufsmount *ump;
+
+#ifdef DEBUG
+	if (item->wk_state & ONWORKLIST)
+		panic("workitem_free: %s(0x%X) still on list",
+		    TYPENAME(item->wk_type), item->wk_state);
+	if (item->wk_type != type && type != D_NEWBLK)
+		panic("workitem_free: type mismatch %s != %s",
+		    TYPENAME(item->wk_type), TYPENAME(type));
+#endif
+	if (item->wk_state & IOWAITING)
+		wakeup(item);
+	ump = VFSTOUFS(item->wk_mp);
+	LOCK_OWNED(ump);
+	KASSERT(ump->softdep_deps > 0,
+	    ("workitem_free: %s: softdep_deps going negative",
+	    ump->um_fs->fs_fsmnt));
+	if (--ump->softdep_deps == 0 && ump->softdep_req)
+		wakeup(&ump->softdep_deps);
+	KASSERT(dep_current[item->wk_type] > 0,
+	    ("workitem_free: %s: dep_current[%s] going negative",
+	    ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+	KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+	    ("workitem_free: %s: softdep_curdeps[%s] going negative",
+	    ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+	atomic_subtract_long(&dep_current[item->wk_type], 1);
+	ump->softdep_curdeps[item->wk_type] -= 1;
+	free(item, DtoM(type));
+}
+
+static void
+workitem_alloc(item, type, mp)
+	struct worklist *item;
+	int type;
+	struct mount *mp;
+{
+	struct ufsmount *ump;
+
+	item->wk_type = type;
+	item->wk_mp = mp;
+	item->wk_state = 0;
+
+	ump = VFSTOUFS(mp);
+	ACQUIRE_GBLLOCK(&lk);
+	dep_current[type]++;
+	if (dep_current[type] > dep_highuse[type])
+		dep_highuse[type] = dep_current[type];
+	dep_total[type]++;
+	FREE_GBLLOCK(&lk);
+	ACQUIRE_LOCK(ump);
+	ump->softdep_curdeps[type] += 1;
+	ump->softdep_deps++;
+	ump->softdep_accdeps++;
+	FREE_LOCK(ump);
+}
+
+static void
+workitem_reassign(item, newtype)
+	struct worklist *item;
+	int newtype;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(item->wk_mp);
+	LOCK_OWNED(ump);
+	KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+	    ("workitem_reassign: %s: softdep_curdeps[%s] going negative",
+	    VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+	ump->softdep_curdeps[item->wk_type] -= 1;
+	ump->softdep_curdeps[newtype] += 1;
+	KASSERT(dep_current[item->wk_type] > 0,
+	    ("workitem_reassign: %s: dep_current[%s] going negative",
+	    VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+	ACQUIRE_GBLLOCK(&lk);
+	dep_current[newtype]++;
+	dep_current[item->wk_type]--;
+	if (dep_current[newtype] > dep_highuse[newtype])
+		dep_highuse[newtype] = dep_current[newtype];
+	dep_total[newtype]++;
+	FREE_GBLLOCK(&lk);
+	item->wk_type = newtype;
+}
+
+/*
+ * Workitem queue management
+ */
+static int max_softdeps;	/* maximum number of structs before slowdown */
+static int tickdelay = 2;	/* number of ticks to pause during slowdown */
+static int proc_waiting;	/* tracks whether we have a timeout posted */
+static int *stat_countp;	/* statistic to count in proc_waiting timeout */
+static struct callout softdep_callout;
+static int req_clear_inodedeps;	/* syncer process flush some inodedeps */
+static int req_clear_remove;	/* syncer process flush some freeblks */
+static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
+
+/*
+ * runtime statistics
+ */
+static int stat_flush_threads;	/* number of softdep flushing threads */
+static int stat_worklist_push;	/* number of worklist cleanups */
+static int stat_blk_limit_push;	/* number of times block limit neared */
+static int stat_ino_limit_push;	/* number of times inode limit neared */
+static int stat_blk_limit_hit;	/* number of times block slowdown imposed */
+static int stat_ino_limit_hit;	/* number of times inode slowdown imposed */
+static int stat_sync_limit_hit;	/* number of synchronous slowdowns imposed */
+static int stat_indir_blk_ptrs;	/* bufs redirtied as indir ptrs not written */
+static int stat_inode_bitmap;	/* bufs redirtied as inode bitmap not written */
+static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
+static int stat_dir_entry;	/* bufs redirtied as dir entry cannot write */
+static int stat_jaddref;	/* bufs redirtied as ino bitmap can not write */
+static int stat_jnewblk;	/* bufs redirtied as blk bitmap can not write */
+static int stat_journal_min;	/* Times hit journal min threshold */
+static int stat_journal_low;	/* Times hit journal low threshold */
+static int stat_journal_wait;	/* Times blocked in jwait(). */
+static int stat_jwait_filepage;	/* Times blocked in jwait() for filepage. */
+static int stat_jwait_freeblks;	/* Times blocked in jwait() for freeblks. */
+static int stat_jwait_inode;	/* Times blocked in jwait() for inodes. */
+static int stat_jwait_newblk;	/* Times blocked in jwait() for newblks. */
+static int stat_cleanup_high_delay; /* Maximum cleanup delay (in ticks) */
+static int stat_cleanup_blkrequests; /* Number of block cleanup requests */
+static int stat_cleanup_inorequests; /* Number of inode cleanup requests */
+static int stat_cleanup_retries; /* Number of cleanups that needed to flush */
+static int stat_cleanup_failures; /* Number of cleanup requests that failed */
+static int stat_emptyjblocks; /* Number of potentially empty journal blocks */
+
+SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
+    &max_softdeps, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, tickdelay, CTLFLAG_RW,
+    &tickdelay, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, flush_threads, CTLFLAG_RD,
+    &stat_flush_threads, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW,
+    &stat_worklist_push, 0,"");
+SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW,
+    &stat_blk_limit_push, 0,"");
+SYSCTL_INT(_debug_softdep, OID_AUTO, ino_limit_push, CTLFLAG_RW,
+    &stat_ino_limit_push, 0,"");
+SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_hit, CTLFLAG_RW,
+    &stat_blk_limit_hit, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, ino_limit_hit, CTLFLAG_RW,
+    &stat_ino_limit_hit, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, sync_limit_hit, CTLFLAG_RW,
+    &stat_sync_limit_hit, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW,
+    &stat_indir_blk_ptrs, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, inode_bitmap, CTLFLAG_RW,
+    &stat_inode_bitmap, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW,
+    &stat_direct_blk_ptrs, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, dir_entry, CTLFLAG_RW,
+    &stat_dir_entry, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jaddref_rollback, CTLFLAG_RW,
+    &stat_jaddref, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jnewblk_rollback, CTLFLAG_RW,
+    &stat_jnewblk, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, journal_low, CTLFLAG_RW,
+    &stat_journal_low, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW,
+    &stat_journal_min, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, journal_wait, CTLFLAG_RW,
+    &stat_journal_wait, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_filepage, CTLFLAG_RW,
+    &stat_jwait_filepage, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_freeblks, CTLFLAG_RW,
+    &stat_jwait_freeblks, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_inode, CTLFLAG_RW,
+    &stat_jwait_inode, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_newblk, CTLFLAG_RW,
+    &stat_jwait_newblk, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_blkrequests, CTLFLAG_RW,
+    &stat_cleanup_blkrequests, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_inorequests, CTLFLAG_RW,
+    &stat_cleanup_inorequests, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_high_delay, CTLFLAG_RW,
+    &stat_cleanup_high_delay, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_retries, CTLFLAG_RW,
+    &stat_cleanup_retries, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_failures, CTLFLAG_RW,
+    &stat_cleanup_failures, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, flushcache, CTLFLAG_RW,
+    &softdep_flushcache, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, emptyjblocks, CTLFLAG_RD,
+    &stat_emptyjblocks, 0, "");
+
+SYSCTL_DECL(_vfs_ffs);
+
+/* Whether to recompute the summary at mount time */
+static int compute_summary_at_mount = 0;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,
+	   &compute_summary_at_mount, 0, "Recompute summary at mount");
+static int print_threads = 0;
+SYSCTL_INT(_debug_softdep, OID_AUTO, print_threads, CTLFLAG_RW,
+    &print_threads, 0, "Notify flusher thread start/stop");
+
+/* List of all filesystems mounted with soft updates */
+static TAILQ_HEAD(, mount_softdeps) softdepmounts;
+
+/*
+ * This function cleans the worklist for a filesystem.
+ * Each filesystem running with soft dependencies gets its own
+ * thread to run in this function. The thread is started up in
+ * softdep_mount and shutdown in softdep_unmount. They show up
+ * as part of the kernel "bufdaemon" process whose process
+ * entry is available in bufdaemonproc.
+ */
+static int searchfailed;
+extern struct proc *bufdaemonproc;
+static void
+softdep_flush(addr)
+	void *addr;
+{
+	struct mount *mp;
+	struct thread *td;
+	struct ufsmount *ump;
+
+	td = curthread;
+	td->td_pflags |= TDP_NORUNNINGBUF;
+	mp = (struct mount *)addr;
+	ump = VFSTOUFS(mp);
+	atomic_add_int(&stat_flush_threads, 1);
+	ACQUIRE_LOCK(ump);
+	ump->softdep_flags &= ~FLUSH_STARTING;
+	wakeup(&ump->softdep_flushtd);
+	FREE_LOCK(ump);
+	if (print_threads) {
+		if (stat_flush_threads == 1)
+			printf("Running %s at pid %d\n", bufdaemonproc->p_comm,
+			    bufdaemonproc->p_pid);
+		printf("Start thread %s\n", td->td_name);
+	}
+	for (;;) {	
+		while (softdep_process_worklist(mp, 0) > 0 ||
+		    (MOUNTEDSUJ(mp) &&
+		    VFSTOUFS(mp)->softdep_jblocks->jb_suspended))
+			kthread_suspend_check();
+		ACQUIRE_LOCK(ump);
+		if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
+			msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM,
+			    "sdflush", hz / 2);
+		ump->softdep_flags &= ~FLUSH_CLEANUP;
+		/*
+		 * Check to see if we are done and need to exit.
+		 */
+		if ((ump->softdep_flags & FLUSH_EXIT) == 0) {
+			FREE_LOCK(ump);
+			continue;
+		}
+		ump->softdep_flags &= ~FLUSH_EXIT;
+		FREE_LOCK(ump);
+		wakeup(&ump->softdep_flags);
+		if (print_threads)
+			printf("Stop thread %s: searchfailed %d, did cleanups %d\n", td->td_name, searchfailed, ump->um_softdep->sd_cleanups);
+		atomic_subtract_int(&stat_flush_threads, 1);
+		kthread_exit();
+		panic("kthread_exit failed\n");
+	}
+}
+
+static void
+worklist_speedup(mp)
+	struct mount *mp;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
+		ump->softdep_flags |= FLUSH_CLEANUP;
+	wakeup(&ump->softdep_flushtd);
+}
+
+static int
+softdep_speedup(ump)
+	struct ufsmount *ump;
+{
+	struct ufsmount *altump;
+	struct mount_softdeps *sdp;
+
+	LOCK_OWNED(ump);
+	worklist_speedup(ump->um_mountp);
+	bd_speedup();
+	/*
+	 * If we have global shortages, then we need other
+	 * filesystems to help with the cleanup. Here we wakeup a
+	 * flusher thread for a filesystem that is over its fair
+	 * share of resources.
+	 */
+	if (req_clear_inodedeps || req_clear_remove) {
+		ACQUIRE_GBLLOCK(&lk);
+		TAILQ_FOREACH(sdp, &softdepmounts, sd_next) {
+			if ((altump = sdp->sd_ump) == ump)
+				continue;
+			if (((req_clear_inodedeps &&
+			    altump->softdep_curdeps[D_INODEDEP] >
+			    max_softdeps / stat_flush_threads) ||
+			    (req_clear_remove &&
+			    altump->softdep_curdeps[D_DIRREM] >
+			    (max_softdeps / 2) / stat_flush_threads)) &&
+			    TRY_ACQUIRE_LOCK(altump))
+				break;
+		}
+		if (sdp == NULL) {
+			searchfailed++;
+			FREE_GBLLOCK(&lk);
+		} else {
+			/*
+			 * Move to the end of the list so we pick a
+			 * different one on out next try.
+			 */
+			TAILQ_REMOVE(&softdepmounts, sdp, sd_next);
+			TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
+			FREE_GBLLOCK(&lk);
+			if ((altump->softdep_flags &
+			    (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
+				altump->softdep_flags |= FLUSH_CLEANUP;
+			altump->um_softdep->sd_cleanups++;
+			wakeup(&altump->softdep_flushtd);
+			FREE_LOCK(altump);
+		}
+	}
+	return (speedup_syncer());
+}
+
+/*
+ * Add an item to the end of the work queue.
+ * This routine requires that the lock be held.
+ * This is the only routine that adds items to the list.
+ * The following routine is the only one that removes items
+ * and does so in order from first to last.
+ */
+
+#define	WK_HEAD		0x0001	/* Add to HEAD. */
+#define	WK_NODELAY	0x0002	/* Process immediately. */
+
+static void
+add_to_worklist(wk, flags)
+	struct worklist *wk;
+	int flags;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(wk->wk_mp);
+	LOCK_OWNED(ump);
+	if (wk->wk_state & ONWORKLIST)
+		panic("add_to_worklist: %s(0x%X) already on list",
+		    TYPENAME(wk->wk_type), wk->wk_state);
+	wk->wk_state |= ONWORKLIST;
+	if (ump->softdep_on_worklist == 0) {
+		LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
+		ump->softdep_worklist_tail = wk;
+	} else if (flags & WK_HEAD) {
+		LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
+	} else {
+		LIST_INSERT_AFTER(ump->softdep_worklist_tail, wk, wk_list);
+		ump->softdep_worklist_tail = wk;
+	}
+	ump->softdep_on_worklist += 1;
+	if (flags & WK_NODELAY)
+		worklist_speedup(wk->wk_mp);
+}
+
+/*
+ * Remove the item to be processed. If we are removing the last
+ * item on the list, we need to recalculate the tail pointer.
+ */
+static void
+remove_from_worklist(wk)
+	struct worklist *wk;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(wk->wk_mp);
+	if (ump->softdep_worklist_tail == wk)
+		ump->softdep_worklist_tail =
+		    (struct worklist *)wk->wk_list.le_prev;
+	WORKLIST_REMOVE(wk);
+	ump->softdep_on_worklist -= 1;
+}
+
+static void
+wake_worklist(wk)
+	struct worklist *wk;
+{
+	if (wk->wk_state & IOWAITING) {
+		wk->wk_state &= ~IOWAITING;
+		wakeup(wk);
+	}
+}
+
+static void
+wait_worklist(wk, wmesg)
+	struct worklist *wk;
+	char *wmesg;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(wk->wk_mp);
+	wk->wk_state |= IOWAITING;
+	msleep(wk, LOCK_PTR(ump), PVM, wmesg, 0);
+}
+
+/*
+ * Process that runs once per second to handle items in the background queue.
+ *
+ * Note that we ensure that everything is done in the order in which they
+ * appear in the queue. The code below depends on this property to ensure
+ * that blocks of a file are freed before the inode itself is freed. This
+ * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
+ * until all the old ones have been purged from the dependency lists.
+ */
+static int 
+softdep_process_worklist(mp, full)
+	struct mount *mp;
+	int full;
+{
+	int cnt, matchcnt;
+	struct ufsmount *ump;
+	long starttime;
+
+	KASSERT(mp != NULL, ("softdep_process_worklist: NULL mp"));
+	if (MOUNTEDSOFTDEP(mp) == 0)
+		return (0);
+	matchcnt = 0;
+	ump = VFSTOUFS(mp);
+	ACQUIRE_LOCK(ump);
+	starttime = time_second;
+	softdep_process_journal(mp, NULL, full ? MNT_WAIT : 0);
+	check_clear_deps(mp);
+	while (ump->softdep_on_worklist > 0) {
+		if ((cnt = process_worklist_item(mp, 10, LK_NOWAIT)) == 0)
+			break;
+		else
+			matchcnt += cnt;
+		check_clear_deps(mp);
+		/*
+		 * We do not generally want to stop for buffer space, but if
+		 * we are really being a buffer hog, we will stop and wait.
+		 */
+		if (should_yield()) {
+			FREE_LOCK(ump);
+			kern_yield(PRI_USER);
+			bwillwrite();
+			ACQUIRE_LOCK(ump);
+		}
+		/*
+		 * Never allow processing to run for more than one
+		 * second. This gives the syncer thread the opportunity
+		 * to pause if appropriate.
+		 */
+		if (!full && starttime != time_second)
+			break;
+	}
+	if (full == 0)
+		journal_unsuspend(ump);
+	FREE_LOCK(ump);
+	return (matchcnt);
+}
+
+/*
+ * Process all removes associated with a vnode if we are running out of
+ * journal space.  Any other process which attempts to flush these will
+ * be unable as we have the vnodes locked.
+ */
+static void
+process_removes(vp)
+	struct vnode *vp;
+{
+	struct inodedep *inodedep;
+	struct dirrem *dirrem;
+	struct ufsmount *ump;
+	struct mount *mp;
+	ino_t inum;
+
+	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	inum = VTOI(vp)->i_number;
+	for (;;) {
+top:
+		if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
+			return;
+		LIST_FOREACH(dirrem, &inodedep->id_dirremhd, dm_inonext) {
+			/*
+			 * If another thread is trying to lock this vnode
+			 * it will fail but we must wait for it to do so
+			 * before we can proceed.
+			 */
+			if (dirrem->dm_state & INPROGRESS) {
+				wait_worklist(&dirrem->dm_list, "pwrwait");
+				goto top;
+			}
+			if ((dirrem->dm_state & (COMPLETE | ONWORKLIST)) == 
+			    (COMPLETE | ONWORKLIST))
+				break;
+		}
+		if (dirrem == NULL)
+			return;
+		remove_from_worklist(&dirrem->dm_list);
+		FREE_LOCK(ump);
+		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
+			panic("process_removes: suspended filesystem");
+		handle_workitem_remove(dirrem, 0);
+		vn_finished_secondary_write(mp);
+		ACQUIRE_LOCK(ump);
+	}
+}
+
+/*
+ * Process all truncations associated with a vnode if we are running out
+ * of journal space.  This is called when the vnode lock is already held
+ * and no other process can clear the truncation.  This function returns
+ * a value greater than zero if it did any work.
+ */
+static void
+process_truncates(vp)
+	struct vnode *vp;
+{
+	struct inodedep *inodedep;
+	struct freeblks *freeblks;
+	struct ufsmount *ump;
+	struct mount *mp;
+	ino_t inum;
+	int cgwait;
+
+	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	inum = VTOI(vp)->i_number;
+	for (;;) {
+		if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
+			return;
+		cgwait = 0;
+		TAILQ_FOREACH(freeblks, &inodedep->id_freeblklst, fb_next) {
+			/* Journal entries not yet written.  */
+			if (!LIST_EMPTY(&freeblks->fb_jblkdephd)) {
+				jwait(&LIST_FIRST(
+				    &freeblks->fb_jblkdephd)->jb_list,
+				    MNT_WAIT);
+				break;
+			}
+			/* Another thread is executing this item. */
+			if (freeblks->fb_state & INPROGRESS) {
+				wait_worklist(&freeblks->fb_list, "ptrwait");
+				break;
+			}
+			/* Freeblks is waiting on a inode write. */
+			if ((freeblks->fb_state & COMPLETE) == 0) {
+				FREE_LOCK(ump);
+				ffs_update(vp, 1);
+				ACQUIRE_LOCK(ump);
+				break;
+			}
+			if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST)) ==
+			    (ALLCOMPLETE | ONWORKLIST)) {
+				remove_from_worklist(&freeblks->fb_list);
+				freeblks->fb_state |= INPROGRESS;
+				FREE_LOCK(ump);
+				if (vn_start_secondary_write(NULL, &mp,
+				    V_NOWAIT))
+					panic("process_truncates: "
+					    "suspended filesystem");
+				handle_workitem_freeblocks(freeblks, 0);
+				vn_finished_secondary_write(mp);
+				ACQUIRE_LOCK(ump);
+				break;
+			}
+			if (freeblks->fb_cgwait)
+				cgwait++;
+		}
+		if (cgwait) {
+			FREE_LOCK(ump);
+			sync_cgs(mp, MNT_WAIT);
+			ffs_sync_snap(mp, MNT_WAIT);
+			ACQUIRE_LOCK(ump);
+			continue;
+		}
+		if (freeblks == NULL)
+			break;
+	}
+	return;
+}
+
+/*
+ * Process one item on the worklist.
+ */
+static int
+process_worklist_item(mp, target, flags)
+	struct mount *mp;
+	int target;
+	int flags;
+{
+	struct worklist sentinel;
+	struct worklist *wk;
+	struct ufsmount *ump;
+	int matchcnt;
+	int error;
+
+	KASSERT(mp != NULL, ("process_worklist_item: NULL mp"));
+	/*
+	 * If we are being called because of a process doing a
+	 * copy-on-write, then it is not safe to write as we may
+	 * recurse into the copy-on-write routine.
+	 */
+	if (curthread->td_pflags & TDP_COWINPROGRESS)
+		return (-1);
+	PHOLD(curproc);	/* Don't let the stack go away. */
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	matchcnt = 0;
+	sentinel.wk_mp = NULL;
+	sentinel.wk_type = D_SENTINEL;
+	LIST_INSERT_HEAD(&ump->softdep_workitem_pending, &sentinel, wk_list);
+	for (wk = LIST_NEXT(&sentinel, wk_list); wk != NULL;
+	    wk = LIST_NEXT(&sentinel, wk_list)) {
+		if (wk->wk_type == D_SENTINEL) {
+			LIST_REMOVE(&sentinel, wk_list);
+			LIST_INSERT_AFTER(wk, &sentinel, wk_list);
+			continue;
+		}
+		if (wk->wk_state & INPROGRESS)
+			panic("process_worklist_item: %p already in progress.",
+			    wk);
+		wk->wk_state |= INPROGRESS;
+		remove_from_worklist(wk);
+		FREE_LOCK(ump);
+		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
+			panic("process_worklist_item: suspended filesystem");
+		switch (wk->wk_type) {
+		case D_DIRREM:
+			/* removal of a directory entry */
+			error = handle_workitem_remove(WK_DIRREM(wk), flags);
+			break;
+
+		case D_FREEBLKS:
+			/* releasing blocks and/or fragments from a file */
+			error = handle_workitem_freeblocks(WK_FREEBLKS(wk),
+			    flags);
+			break;
+
+		case D_FREEFRAG:
+			/* releasing a fragment when replaced as a file grows */
+			handle_workitem_freefrag(WK_FREEFRAG(wk));
+			error = 0;
+			break;
+
+		case D_FREEFILE:
+			/* releasing an inode when its link count drops to 0 */
+			handle_workitem_freefile(WK_FREEFILE(wk));
+			error = 0;
+			break;
+
+		default:
+			panic("%s_process_worklist: Unknown type %s",
+			    "softdep", TYPENAME(wk->wk_type));
+			/* NOTREACHED */
+		}
+		vn_finished_secondary_write(mp);
+		ACQUIRE_LOCK(ump);
+		if (error == 0) {
+			if (++matchcnt == target)
+				break;
+			continue;
+		}
+		/*
+		 * We have to retry the worklist item later.  Wake up any
+		 * waiters who may be able to complete it immediately and
+		 * add the item back to the head so we don't try to execute
+		 * it again.
+		 */
+		wk->wk_state &= ~INPROGRESS;
+		wake_worklist(wk);
+		add_to_worklist(wk, WK_HEAD);
+	}
+	/* Sentinal could've become the tail from remove_from_worklist. */
+	if (ump->softdep_worklist_tail == &sentinel)
+		ump->softdep_worklist_tail =
+		    (struct worklist *)sentinel.wk_list.le_prev;
+	LIST_REMOVE(&sentinel, wk_list);
+	PRELE(curproc);
+	return (matchcnt);
+}
+
+/*
+ * Move dependencies from one buffer to another.
+ */
+int
+softdep_move_dependencies(oldbp, newbp)
+	struct buf *oldbp;
+	struct buf *newbp;
+{
+	struct worklist *wk, *wktail;
+	struct ufsmount *ump;
+	int dirty;
+
+	if ((wk = LIST_FIRST(&oldbp->b_dep)) == NULL)
+		return (0);
+	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
+	    ("softdep_move_dependencies called on non-softdep filesystem"));
+	dirty = 0;
+	wktail = NULL;
+	ump = VFSTOUFS(wk->wk_mp);
+	ACQUIRE_LOCK(ump);
+	while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
+		LIST_REMOVE(wk, wk_list);
+		if (wk->wk_type == D_BMSAFEMAP &&
+		    bmsafemap_backgroundwrite(WK_BMSAFEMAP(wk), newbp))
+			dirty = 1;
+		if (wktail == NULL)
+			LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
+		else
+			LIST_INSERT_AFTER(wktail, wk, wk_list);
+		wktail = wk;
+	}
+	FREE_LOCK(ump);
+
+	return (dirty);
+}
+
+/*
+ * Purge the work list of all items associated with a particular mount point.
+ */
+int
+softdep_flushworklist(oldmnt, countp, td)
+	struct mount *oldmnt;
+	int *countp;
+	struct thread *td;
+{
+	struct vnode *devvp;
+	struct ufsmount *ump;
+	int count, error;
+
+	/*
+	 * Alternately flush the block device associated with the mount
+	 * point and process any dependencies that the flushing
+	 * creates. We continue until no more worklist dependencies
+	 * are found.
+	 */
+	*countp = 0;
+	error = 0;
+	ump = VFSTOUFS(oldmnt);
+	devvp = ump->um_devvp;
+	while ((count = softdep_process_worklist(oldmnt, 1)) > 0) {
+		*countp += count;
+		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+		error = VOP_FSYNC(devvp, MNT_WAIT, td);
+		VOP_UNLOCK(devvp, 0);
+		if (error != 0)
+			break;
+	}
+	return (error);
+}
+
+#define	SU_WAITIDLE_RETRIES	20
+static int
+softdep_waitidle(struct mount *mp, int flags __unused)
+{
+	struct ufsmount *ump;
+	struct vnode *devvp;
+	struct thread *td;
+	int error, i;
+
+	ump = VFSTOUFS(mp);
+	devvp = ump->um_devvp;
+	td = curthread;
+	error = 0;
+	ACQUIRE_LOCK(ump);
+	for (i = 0; i < SU_WAITIDLE_RETRIES && ump->softdep_deps != 0; i++) {
+		ump->softdep_req = 1;
+		KASSERT((flags & FORCECLOSE) == 0 ||
+		    ump->softdep_on_worklist == 0,
+		    ("softdep_waitidle: work added after flush"));
+		msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM | PDROP,
+		    "softdeps", 10 * hz);
+		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+		error = VOP_FSYNC(devvp, MNT_WAIT, td);
+		VOP_UNLOCK(devvp, 0);
+		ACQUIRE_LOCK(ump);
+		if (error != 0)
+			break;
+	}
+	ump->softdep_req = 0;
+	if (i == SU_WAITIDLE_RETRIES && error == 0 && ump->softdep_deps != 0) {
+		error = EBUSY;
+		printf("softdep_waitidle: Failed to flush worklist for %p\n",
+		    mp);
+	}
+	FREE_LOCK(ump);
+	return (error);
+}
+
+/*
+ * Flush all vnodes and worklist items associated with a specified mount point.
+ */
+int
+softdep_flushfiles(oldmnt, flags, td)
+	struct mount *oldmnt;
+	int flags;
+	struct thread *td;
+{
+#ifdef QUOTA
+	struct ufsmount *ump;
+	int i;
+#endif
+	int error, early, depcount, loopcnt, retry_flush_count, retry;
+	int morework;
+
+	KASSERT(MOUNTEDSOFTDEP(oldmnt) != 0,
+	    ("softdep_flushfiles called on non-softdep filesystem"));
+	loopcnt = 10;
+	retry_flush_count = 3;
+retry_flush:
+	error = 0;
+
+	/*
+	 * Alternately flush the vnodes associated with the mount
+	 * point and process any dependencies that the flushing
+	 * creates. In theory, this loop can happen at most twice,
+	 * but we give it a few extra just to be sure.
+	 */
+	for (; loopcnt > 0; loopcnt--) {
+		/*
+		 * Do another flush in case any vnodes were brought in
+		 * as part of the cleanup operations.
+		 */
+		early = retry_flush_count == 1 || (oldmnt->mnt_kern_flag &
+		    MNTK_UNMOUNT) == 0 ? 0 : EARLYFLUSH;
+		if ((error = ffs_flushfiles(oldmnt, flags | early, td)) != 0)
+			break;
+		if ((error = softdep_flushworklist(oldmnt, &depcount, td)) != 0 ||
+		    depcount == 0)
+			break;
+	}
+	/*
+	 * If we are unmounting then it is an error to fail. If we
+	 * are simply trying to downgrade to read-only, then filesystem
+	 * activity can keep us busy forever, so we just fail with EBUSY.
+	 */
+	if (loopcnt == 0) {
+		if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT)
+			panic("softdep_flushfiles: looping");
+		error = EBUSY;
+	}
+	if (!error)
+		error = softdep_waitidle(oldmnt, flags);
+	if (!error) {
+		if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT) {
+			retry = 0;
+			MNT_ILOCK(oldmnt);
+			KASSERT((oldmnt->mnt_kern_flag & MNTK_NOINSMNTQ) != 0,
+			    ("softdep_flushfiles: !MNTK_NOINSMNTQ"));
+			morework = oldmnt->mnt_nvnodelistsize > 0;
+#ifdef QUOTA
+			ump = VFSTOUFS(oldmnt);
+			UFS_LOCK(ump);
+			for (i = 0; i < MAXQUOTAS; i++) {
+				if (ump->um_quotas[i] != NULLVP)
+					morework = 1;
+			}
+			UFS_UNLOCK(ump);
+#endif
+			if (morework) {
+				if (--retry_flush_count > 0) {
+					retry = 1;
+					loopcnt = 3;
+				} else
+					error = EBUSY;
+			}
+			MNT_IUNLOCK(oldmnt);
+			if (retry)
+				goto retry_flush;
+		}
+	}
+	return (error);
+}
+
+/*
+ * Structure hashing.
+ * 
+ * There are four types of structures that can be looked up:
+ *	1) pagedep structures identified by mount point, inode number,
+ *	   and logical block.
+ *	2) inodedep structures identified by mount point and inode number.
+ *	3) newblk structures identified by mount point and
+ *	   physical block number.
+ *	4) bmsafemap structures identified by mount point and
+ *	   cylinder group number.
+ *
+ * The "pagedep" and "inodedep" dependency structures are hashed
+ * separately from the file blocks and inodes to which they correspond.
+ * This separation helps when the in-memory copy of an inode or
+ * file block must be replaced. It also obviates the need to access
+ * an inode or file page when simply updating (or de-allocating)
+ * dependency structures. Lookup of newblk structures is needed to
+ * find newly allocated blocks when trying to associate them with
+ * their allocdirect or allocindir structure.
+ *
+ * The lookup routines optionally create and hash a new instance when
+ * an existing entry is not found. The bmsafemap lookup routine always
+ * allocates a new structure if an existing one is not found.
+ */
+#define DEPALLOC	0x0001	/* allocate structure if lookup fails */
+
+/*
+ * Structures and routines associated with pagedep caching.
+ */
+#define	PAGEDEP_HASH(ump, inum, lbn) \
+	(&(ump)->pagedep_hashtbl[((inum) + (lbn)) & (ump)->pagedep_hash_size])
+
+static int
+pagedep_find(pagedephd, ino, lbn, pagedeppp)
+	struct pagedep_hashhead *pagedephd;
+	ino_t ino;
+	ufs_lbn_t lbn;
+	struct pagedep **pagedeppp;
+{
+	struct pagedep *pagedep;
+
+	LIST_FOREACH(pagedep, pagedephd, pd_hash) {
+		if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn) {
+			*pagedeppp = pagedep;
+			return (1);
+		}
+	}
+	*pagedeppp = NULL;
+	return (0);
+}
+/*
+ * Look up a pagedep. Return 1 if found, 0 otherwise.
+ * If not found, allocate if DEPALLOC flag is passed.
+ * Found or allocated entry is returned in pagedeppp.
+ * This routine must be called with splbio interrupts blocked.
+ */
+static int
+pagedep_lookup(mp, bp, ino, lbn, flags, pagedeppp)
+	struct mount *mp;
+	struct buf *bp;
+	ino_t ino;
+	ufs_lbn_t lbn;
+	int flags;
+	struct pagedep **pagedeppp;
+{
+	struct pagedep *pagedep;
+	struct pagedep_hashhead *pagedephd;
+	struct worklist *wk;
+	struct ufsmount *ump;
+	int ret;
+	int i;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	if (bp) {
+		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+			if (wk->wk_type == D_PAGEDEP) {
+				*pagedeppp = WK_PAGEDEP(wk);
+				return (1);
+			}
+		}
+	}
+	pagedephd = PAGEDEP_HASH(ump, ino, lbn);
+	ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
+	if (ret) {
+		if (((*pagedeppp)->pd_state & ONWORKLIST) == 0 && bp)
+			WORKLIST_INSERT(&bp->b_dep, &(*pagedeppp)->pd_list);
+		return (1);
+	}
+	if ((flags & DEPALLOC) == 0)
+		return (0);
+	FREE_LOCK(ump);
+	pagedep = malloc(sizeof(struct pagedep),
+	    M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO);
+	workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp);
+	ACQUIRE_LOCK(ump);
+	ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
+	if (*pagedeppp) {
+		/*
+		 * This should never happen since we only create pagedeps
+		 * with the vnode lock held.  Could be an assert.
+		 */
+		WORKITEM_FREE(pagedep, D_PAGEDEP);
+		return (ret);
+	}
+	pagedep->pd_ino = ino;
+	pagedep->pd_lbn = lbn;
+	LIST_INIT(&pagedep->pd_dirremhd);
+	LIST_INIT(&pagedep->pd_pendinghd);
+	for (i = 0; i < DAHASHSZ; i++)
+		LIST_INIT(&pagedep->pd_diraddhd[i]);
+	LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash);
+	WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
+	*pagedeppp = pagedep;
+	return (0);
+}
+
+/*
+ * Structures and routines associated with inodedep caching.
+ */
+#define	INODEDEP_HASH(ump, inum) \
+      (&(ump)->inodedep_hashtbl[(inum) & (ump)->inodedep_hash_size])
+
+static int
+inodedep_find(inodedephd, inum, inodedeppp)
+	struct inodedep_hashhead *inodedephd;
+	ino_t inum;
+	struct inodedep **inodedeppp;
+{
+	struct inodedep *inodedep;
+
+	LIST_FOREACH(inodedep, inodedephd, id_hash)
+		if (inum == inodedep->id_ino)
+			break;
+	if (inodedep) {
+		*inodedeppp = inodedep;
+		return (1);
+	}
+	*inodedeppp = NULL;
+
+	return (0);
+}
+/*
+ * Look up an inodedep. Return 1 if found, 0 if not found.
+ * If not found, allocate if DEPALLOC flag is passed.
+ * Found or allocated entry is returned in inodedeppp.
+ * This routine must be called with splbio interrupts blocked.
+ */
+static int
+inodedep_lookup(mp, inum, flags, inodedeppp)
+	struct mount *mp;
+	ino_t inum;
+	int flags;
+	struct inodedep **inodedeppp;
+{
+	struct inodedep *inodedep;
+	struct inodedep_hashhead *inodedephd;
+	struct ufsmount *ump;
+	struct fs *fs;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	fs = ump->um_fs;
+	inodedephd = INODEDEP_HASH(ump, inum);
+
+	if (inodedep_find(inodedephd, inum, inodedeppp))
+		return (1);
+	if ((flags & DEPALLOC) == 0)
+		return (0);
+	/*
+	 * If the system is over its limit and our filesystem is
+	 * responsible for more than our share of that usage and
+	 * we are not in a rush, request some inodedep cleanup.
+	 */
+	if (softdep_excess_items(ump, D_INODEDEP))
+		schedule_cleanup(mp);
+	else
+		FREE_LOCK(ump);
+	inodedep = malloc(sizeof(struct inodedep),
+		M_INODEDEP, M_SOFTDEP_FLAGS);
+	workitem_alloc(&inodedep->id_list, D_INODEDEP, mp);
+	ACQUIRE_LOCK(ump);
+	if (inodedep_find(inodedephd, inum, inodedeppp)) {
+		WORKITEM_FREE(inodedep, D_INODEDEP);
+		return (1);
+	}
+	inodedep->id_fs = fs;
+	inodedep->id_ino = inum;
+	inodedep->id_state = ALLCOMPLETE;
+	inodedep->id_nlinkdelta = 0;
+	inodedep->id_savedino1 = NULL;
+	inodedep->id_savedsize = -1;
+	inodedep->id_savedextsize = -1;
+	inodedep->id_savednlink = -1;
+	inodedep->id_bmsafemap = NULL;
+	inodedep->id_mkdiradd = NULL;
+	LIST_INIT(&inodedep->id_dirremhd);
+	LIST_INIT(&inodedep->id_pendinghd);
+	LIST_INIT(&inodedep->id_inowait);
+	LIST_INIT(&inodedep->id_bufwait);
+	TAILQ_INIT(&inodedep->id_inoreflst);
+	TAILQ_INIT(&inodedep->id_inoupdt);
+	TAILQ_INIT(&inodedep->id_newinoupdt);
+	TAILQ_INIT(&inodedep->id_extupdt);
+	TAILQ_INIT(&inodedep->id_newextupdt);
+	TAILQ_INIT(&inodedep->id_freeblklst);
+	LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
+	*inodedeppp = inodedep;
+	return (0);
+}
+
+/*
+ * Structures and routines associated with newblk caching.
+ */
+#define	NEWBLK_HASH(ump, inum) \
+	(&(ump)->newblk_hashtbl[(inum) & (ump)->newblk_hash_size])
+
+static int
+newblk_find(newblkhd, newblkno, flags, newblkpp)
+	struct newblk_hashhead *newblkhd;
+	ufs2_daddr_t newblkno;
+	int flags;
+	struct newblk **newblkpp;
+{
+	struct newblk *newblk;
+
+	LIST_FOREACH(newblk, newblkhd, nb_hash) {
+		if (newblkno != newblk->nb_newblkno)
+			continue;
+		/*
+		 * If we're creating a new dependency don't match those that
+		 * have already been converted to allocdirects.  This is for
+		 * a frag extend.
+		 */
+		if ((flags & DEPALLOC) && newblk->nb_list.wk_type != D_NEWBLK)
+			continue;
+		break;
+	}
+	if (newblk) {
+		*newblkpp = newblk;
+		return (1);
+	}
+	*newblkpp = NULL;
+	return (0);
+}
+
+/*
+ * Look up a newblk. Return 1 if found, 0 if not found.
+ * If not found, allocate if DEPALLOC flag is passed.
+ * Found or allocated entry is returned in newblkpp.
+ */
+static int
+newblk_lookup(mp, newblkno, flags, newblkpp)
+	struct mount *mp;
+	ufs2_daddr_t newblkno;
+	int flags;
+	struct newblk **newblkpp;
+{
+	struct newblk *newblk;
+	struct newblk_hashhead *newblkhd;
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	newblkhd = NEWBLK_HASH(ump, newblkno);
+	if (newblk_find(newblkhd, newblkno, flags, newblkpp))
+		return (1);
+	if ((flags & DEPALLOC) == 0)
+		return (0);
+	if (softdep_excess_items(ump, D_NEWBLK) ||
+	    softdep_excess_items(ump, D_ALLOCDIRECT) ||
+	    softdep_excess_items(ump, D_ALLOCINDIR))
+		schedule_cleanup(mp);
+	else
+		FREE_LOCK(ump);
+	newblk = malloc(sizeof(union allblk), M_NEWBLK,
+	    M_SOFTDEP_FLAGS | M_ZERO);
+	workitem_alloc(&newblk->nb_list, D_NEWBLK, mp);
+	ACQUIRE_LOCK(ump);
+	if (newblk_find(newblkhd, newblkno, flags, newblkpp)) {
+		WORKITEM_FREE(newblk, D_NEWBLK);
+		return (1);
+	}
+	newblk->nb_freefrag = NULL;
+	LIST_INIT(&newblk->nb_indirdeps);
+	LIST_INIT(&newblk->nb_newdirblk);
+	LIST_INIT(&newblk->nb_jwork);
+	newblk->nb_state = ATTACHED;
+	newblk->nb_newblkno = newblkno;
+	LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);
+	*newblkpp = newblk;
+	return (0);
+}
+
+/*
+ * Structures and routines associated with freed indirect block caching.
+ */
+#define	INDIR_HASH(ump, blkno) \
+	(&(ump)->indir_hashtbl[(blkno) & (ump)->indir_hash_size])
+
+/*
+ * Lookup an indirect block in the indir hash table.  The freework is
+ * removed and potentially freed.  The caller must do a blocking journal
+ * write before writing to the blkno.
+ */
+static int
+indirblk_lookup(mp, blkno)
+	struct mount *mp;
+	ufs2_daddr_t blkno;
+{
+	struct freework *freework;
+	struct indir_hashhead *wkhd;
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	wkhd = INDIR_HASH(ump, blkno);
+	TAILQ_FOREACH(freework, wkhd, fw_next) {
+		if (freework->fw_blkno != blkno)
+			continue;
+		indirblk_remove(freework);
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * Insert an indirect block represented by freework into the indirblk
+ * hash table so that it may prevent the block from being re-used prior
+ * to the journal being written.
+ */
+static void
+indirblk_insert(freework)
+	struct freework *freework;
+{
+	struct jblocks *jblocks;
+	struct jseg *jseg;
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(freework->fw_list.wk_mp);
+	jblocks = ump->softdep_jblocks;
+	jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst);
+	if (jseg == NULL)
+		return;
+	
+	LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs);
+	TAILQ_INSERT_HEAD(INDIR_HASH(ump, freework->fw_blkno), freework,
+	    fw_next);
+	freework->fw_state &= ~DEPCOMPLETE;
+}
+
+static void
+indirblk_remove(freework)
+	struct freework *freework;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(freework->fw_list.wk_mp);
+	LIST_REMOVE(freework, fw_segs);
+	TAILQ_REMOVE(INDIR_HASH(ump, freework->fw_blkno), freework, fw_next);
+	freework->fw_state |= DEPCOMPLETE;
+	if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
+		WORKITEM_FREE(freework, D_FREEWORK);
+}
+
+/*
+ * Executed during filesystem system initialization before
+ * mounting any filesystems.
+ */
+void 
+softdep_initialize()
+{
+
+	TAILQ_INIT(&softdepmounts);
+#ifdef __LP64__
+	max_softdeps = desiredvnodes * 4;
+#else
+	max_softdeps = desiredvnodes * 2;
+#endif
+
+	/* initialise bioops hack */
+	bioops.io_start = softdep_disk_io_initiation;
+	bioops.io_complete = softdep_disk_write_complete;
+	bioops.io_deallocate = softdep_deallocate_dependencies;
+	bioops.io_countdeps = softdep_count_dependencies;
+	softdep_ast_cleanup = softdep_ast_cleanup_proc;
+
+	/* Initialize the callout with an mtx. */
+	callout_init_mtx(&softdep_callout, &lk, 0);
+}
+
+/*
+ * Executed after all filesystems have been unmounted during
+ * filesystem module unload.
+ */
+void
+softdep_uninitialize()
+{
+
+	/* clear bioops hack */
+	bioops.io_start = NULL;
+	bioops.io_complete = NULL;
+	bioops.io_deallocate = NULL;
+	bioops.io_countdeps = NULL;
+	softdep_ast_cleanup = NULL;
+
+	callout_drain(&softdep_callout);
+}
+
+/*
+ * Called at mount time to notify the dependency code that a
+ * filesystem wishes to use it.
+ */
+int
+softdep_mount(devvp, mp, fs, cred)
+	struct vnode *devvp;
+	struct mount *mp;
+	struct fs *fs;
+	struct ucred *cred;
+{
+	struct csum_total cstotal;
+	struct mount_softdeps *sdp;
+	struct ufsmount *ump;
+	struct cg *cgp;
+	struct buf *bp;
+	int i, error, cyl;
+
+	sdp = malloc(sizeof(struct mount_softdeps), M_MOUNTDATA,
+	    M_WAITOK | M_ZERO);
+	MNT_ILOCK(mp);
+	mp->mnt_flag = (mp->mnt_flag & ~MNT_ASYNC) | MNT_SOFTDEP;
+	if ((mp->mnt_kern_flag & MNTK_SOFTDEP) == 0) {
+		mp->mnt_kern_flag = (mp->mnt_kern_flag & ~MNTK_ASYNC) | 
+			MNTK_SOFTDEP | MNTK_NOASYNC;
+	}
+	ump = VFSTOUFS(mp);
+	ump->um_softdep = sdp;
+	MNT_IUNLOCK(mp);
+	rw_init(LOCK_PTR(ump), "Per-Filesystem Softdep Lock");
+	sdp->sd_ump = ump;
+	LIST_INIT(&ump->softdep_workitem_pending);
+	LIST_INIT(&ump->softdep_journal_pending);
+	TAILQ_INIT(&ump->softdep_unlinked);
+	LIST_INIT(&ump->softdep_dirtycg);
+	ump->softdep_worklist_tail = NULL;
+	ump->softdep_on_worklist = 0;
+	ump->softdep_deps = 0;
+	LIST_INIT(&ump->softdep_mkdirlisthd);
+	ump->pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP,
+	    &ump->pagedep_hash_size);
+	ump->pagedep_nextclean = 0;
+	ump->inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP,
+	    &ump->inodedep_hash_size);
+	ump->inodedep_nextclean = 0;
+	ump->newblk_hashtbl = hashinit(max_softdeps / 2,  M_NEWBLK,
+	    &ump->newblk_hash_size);
+	ump->bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP,
+	    &ump->bmsafemap_hash_size);
+	i = 1 << (ffs(desiredvnodes / 10) - 1);
+	ump->indir_hashtbl = malloc(i * sizeof(struct indir_hashhead),
+	    M_FREEWORK, M_WAITOK);
+	ump->indir_hash_size = i - 1;
+	for (i = 0; i <= ump->indir_hash_size; i++)
+		TAILQ_INIT(&ump->indir_hashtbl[i]);
+	ACQUIRE_GBLLOCK(&lk);
+	TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
+	FREE_GBLLOCK(&lk);
+	if ((fs->fs_flags & FS_SUJ) &&
+	    (error = journal_mount(mp, fs, cred)) != 0) {
+		printf("Failed to start journal: %d\n", error);
+		softdep_unmount(mp);
+		return (error);
+	}
+	/*
+	 * Start our flushing thread in the bufdaemon process.
+	 */
+	ACQUIRE_LOCK(ump);
+	ump->softdep_flags |= FLUSH_STARTING;
+	FREE_LOCK(ump);
+	kproc_kthread_add(&softdep_flush, mp, &bufdaemonproc,
+	    &ump->softdep_flushtd, 0, 0, "softdepflush", "%s worker",
+	    mp->mnt_stat.f_mntonname);
+	ACQUIRE_LOCK(ump);
+	while ((ump->softdep_flags & FLUSH_STARTING) != 0) {
+		msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM, "sdstart",
+		    hz / 2);
+	}
+	FREE_LOCK(ump);
+	/*
+	 * When doing soft updates, the counters in the
+	 * superblock may have gotten out of sync. Recomputation
+	 * can take a long time and can be deferred for background
+	 * fsck.  However, the old behavior of scanning the cylinder
+	 * groups and recalculating them at mount time is available
+	 * by setting vfs.ffs.compute_summary_at_mount to one.
+	 */
+	if (compute_summary_at_mount == 0 || fs->fs_clean != 0)
+		return (0);
+	bzero(&cstotal, sizeof cstotal);
+	for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
+		if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
+		    fs->fs_cgsize, cred, &bp)) != 0) {
+			brelse(bp);
+			softdep_unmount(mp);
+			return (error);
+		}
+		cgp = (struct cg *)bp->b_data;
+		cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
+		cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
+		cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
+		cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
+		fs->fs_cs(fs, cyl) = cgp->cg_cs;
+		brelse(bp);
+	}
+#ifdef DEBUG
+	if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal))
+		printf("%s: superblock summary recomputed\n", fs->fs_fsmnt);
+#endif
+	bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal);
+	return (0);
+}
+
+void
+softdep_unmount(mp)
+	struct mount *mp;
+{
+	struct ufsmount *ump;
+#ifdef INVARIANTS
+	int i;
+#endif
+
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_unmount called on non-softdep filesystem"));
+	ump = VFSTOUFS(mp);
+	MNT_ILOCK(mp);
+	mp->mnt_flag &= ~MNT_SOFTDEP;
+	if (MOUNTEDSUJ(mp) == 0) {
+		MNT_IUNLOCK(mp);
+	} else {
+		mp->mnt_flag &= ~MNT_SUJ;
+		MNT_IUNLOCK(mp);
+		journal_unmount(ump);
+	}
+	/*
+	 * Shut down our flushing thread. Check for NULL is if
+	 * softdep_mount errors out before the thread has been created.
+	 */
+	if (ump->softdep_flushtd != NULL) {
+		ACQUIRE_LOCK(ump);
+		ump->softdep_flags |= FLUSH_EXIT;
+		wakeup(&ump->softdep_flushtd);
+		msleep(&ump->softdep_flags, LOCK_PTR(ump), PVM | PDROP,
+		    "sdwait", 0);
+		KASSERT((ump->softdep_flags & FLUSH_EXIT) == 0,
+		    ("Thread shutdown failed"));
+	}
+	/*
+	 * Free up our resources.
+	 */
+	ACQUIRE_GBLLOCK(&lk);
+	TAILQ_REMOVE(&softdepmounts, ump->um_softdep, sd_next);
+	FREE_GBLLOCK(&lk);
+	rw_destroy(LOCK_PTR(ump));
+	hashdestroy(ump->pagedep_hashtbl, M_PAGEDEP, ump->pagedep_hash_size);
+	hashdestroy(ump->inodedep_hashtbl, M_INODEDEP, ump->inodedep_hash_size);
+	hashdestroy(ump->newblk_hashtbl, M_NEWBLK, ump->newblk_hash_size);
+	hashdestroy(ump->bmsafemap_hashtbl, M_BMSAFEMAP,
+	    ump->bmsafemap_hash_size);
+	free(ump->indir_hashtbl, M_FREEWORK);
+#ifdef INVARIANTS
+	for (i = 0; i <= D_LAST; i++)
+		KASSERT(ump->softdep_curdeps[i] == 0,
+		    ("Unmount %s: Dep type %s != 0 (%ld)", ump->um_fs->fs_fsmnt,
+		    TYPENAME(i), ump->softdep_curdeps[i]));
+#endif
+	free(ump->um_softdep, M_MOUNTDATA);
+}
+
+static struct jblocks *
+jblocks_create(void)
+{
+	struct jblocks *jblocks;
+
+	jblocks = malloc(sizeof(*jblocks), M_JBLOCKS, M_WAITOK | M_ZERO);
+	TAILQ_INIT(&jblocks->jb_segs);
+	jblocks->jb_avail = 10;
+	jblocks->jb_extent = malloc(sizeof(struct jextent) * jblocks->jb_avail,
+	    M_JBLOCKS, M_WAITOK | M_ZERO);
+
+	return (jblocks);
+}
+
+static ufs2_daddr_t
+jblocks_alloc(jblocks, bytes, actual)
+	struct jblocks *jblocks;
+	int bytes;
+	int *actual;
+{
+	ufs2_daddr_t daddr;
+	struct jextent *jext;
+	int freecnt;
+	int blocks;
+
+	blocks = bytes / DEV_BSIZE;
+	jext = &jblocks->jb_extent[jblocks->jb_head];
+	freecnt = jext->je_blocks - jblocks->jb_off;
+	if (freecnt == 0) {
+		jblocks->jb_off = 0;
+		if (++jblocks->jb_head > jblocks->jb_used)
+			jblocks->jb_head = 0;
+		jext = &jblocks->jb_extent[jblocks->jb_head];
+		freecnt = jext->je_blocks;
+	}
+	if (freecnt > blocks)
+		freecnt = blocks;
+	*actual = freecnt * DEV_BSIZE;
+	daddr = jext->je_daddr + jblocks->jb_off;
+	jblocks->jb_off += freecnt;
+	jblocks->jb_free -= freecnt;
+
+	return (daddr);
+}
+
+static void
+jblocks_free(jblocks, mp, bytes)
+	struct jblocks *jblocks;
+	struct mount *mp;
+	int bytes;
+{
+
+	LOCK_OWNED(VFSTOUFS(mp));
+	jblocks->jb_free += bytes / DEV_BSIZE;
+	if (jblocks->jb_suspended)
+		worklist_speedup(mp);
+	wakeup(jblocks);
+}
+
+static void
+jblocks_destroy(jblocks)
+	struct jblocks *jblocks;
+{
+
+	if (jblocks->jb_extent)
+		free(jblocks->jb_extent, M_JBLOCKS);
+	free(jblocks, M_JBLOCKS);
+}
+
+static void
+jblocks_add(jblocks, daddr, blocks)
+	struct jblocks *jblocks;
+	ufs2_daddr_t daddr;
+	int blocks;
+{
+	struct jextent *jext;
+
+	jblocks->jb_blocks += blocks;
+	jblocks->jb_free += blocks;
+	jext = &jblocks->jb_extent[jblocks->jb_used];
+	/* Adding the first block. */
+	if (jext->je_daddr == 0) {
+		jext->je_daddr = daddr;
+		jext->je_blocks = blocks;
+		return;
+	}
+	/* Extending the last extent. */
+	if (jext->je_daddr + jext->je_blocks == daddr) {
+		jext->je_blocks += blocks;
+		return;
+	}
+	/* Adding a new extent. */
+	if (++jblocks->jb_used == jblocks->jb_avail) {
+		jblocks->jb_avail *= 2;
+		jext = malloc(sizeof(struct jextent) * jblocks->jb_avail,
+		    M_JBLOCKS, M_WAITOK | M_ZERO);
+		memcpy(jext, jblocks->jb_extent,
+		    sizeof(struct jextent) * jblocks->jb_used);
+		free(jblocks->jb_extent, M_JBLOCKS);
+		jblocks->jb_extent = jext;
+	}
+	jext = &jblocks->jb_extent[jblocks->jb_used];
+	jext->je_daddr = daddr;
+	jext->je_blocks = blocks;
+	return;
+}
+
+int
+softdep_journal_lookup(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct componentname cnp;
+	struct vnode *dvp;
+	ino_t sujournal;
+	int error;
+
+	error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp);
+	if (error)
+		return (error);
+	bzero(&cnp, sizeof(cnp));
+	cnp.cn_nameiop = LOOKUP;
+	cnp.cn_flags = ISLASTCN;
+	cnp.cn_thread = curthread;
+	cnp.cn_cred = curthread->td_ucred;
+	cnp.cn_pnbuf = SUJ_FILE;
+	cnp.cn_nameptr = SUJ_FILE;
+	cnp.cn_namelen = strlen(SUJ_FILE);
+	error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal);
+	vput(dvp);
+	if (error != 0)
+		return (error);
+	error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, vpp);
+	return (error);
+}
+
+/*
+ * Open and verify the journal file.
+ */
+static int
+journal_mount(mp, fs, cred)
+	struct mount *mp;
+	struct fs *fs;
+	struct ucred *cred;
+{
+	struct jblocks *jblocks;
+	struct ufsmount *ump;
+	struct vnode *vp;
+	struct inode *ip;
+	ufs2_daddr_t blkno;
+	int bcount;
+	int error;
+	int i;
+
+	ump = VFSTOUFS(mp);
+	ump->softdep_journal_tail = NULL;
+	ump->softdep_on_journal = 0;
+	ump->softdep_accdeps = 0;
+	ump->softdep_req = 0;
+	ump->softdep_jblocks = NULL;
+	error = softdep_journal_lookup(mp, &vp);
+	if (error != 0) {
+		printf("Failed to find journal.  Use tunefs to create one\n");
+		return (error);
+	}
+	ip = VTOI(vp);
+	if (ip->i_size < SUJ_MIN) {
+		error = ENOSPC;
+		goto out;
+	}
+	bcount = lblkno(fs, ip->i_size);	/* Only use whole blocks. */
+	jblocks = jblocks_create();
+	for (i = 0; i < bcount; i++) {
+		error = ufs_bmaparray(vp, i, &blkno, NULL, NULL, NULL);
+		if (error)
+			break;
+		jblocks_add(jblocks, blkno, fsbtodb(fs, fs->fs_frag));
+	}
+	if (error) {
+		jblocks_destroy(jblocks);
+		goto out;
+	}
+	jblocks->jb_low = jblocks->jb_free / 3;	/* Reserve 33%. */
+	jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */
+	ump->softdep_jblocks = jblocks;
+out:
+	if (error == 0) {
+		MNT_ILOCK(mp);
+		mp->mnt_flag |= MNT_SUJ;
+		mp->mnt_flag &= ~MNT_SOFTDEP;
+		MNT_IUNLOCK(mp);
+		/*
+		 * Only validate the journal contents if the
+		 * filesystem is clean, otherwise we write the logs
+		 * but they'll never be used.  If the filesystem was
+		 * still dirty when we mounted it the journal is
+		 * invalid and a new journal can only be valid if it
+		 * starts from a clean mount.
+		 */
+		if (fs->fs_clean) {
+			DIP_SET(ip, i_modrev, fs->fs_mtime);
+			ip->i_flags |= IN_MODIFIED;
+			ffs_update(vp, 1);
+		}
+	}
+	vput(vp);
+	return (error);
+}
+
+static void
+journal_unmount(ump)
+	struct ufsmount *ump;
+{
+
+	if (ump->softdep_jblocks)
+		jblocks_destroy(ump->softdep_jblocks);
+	ump->softdep_jblocks = NULL;
+}
+
+/*
+ * Called when a journal record is ready to be written.  Space is allocated
+ * and the journal entry is created when the journal is flushed to stable
+ * store.
+ */
+static void
+add_to_journal(wk)
+	struct worklist *wk;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(wk->wk_mp);
+	LOCK_OWNED(ump);
+	if (wk->wk_state & ONWORKLIST)
+		panic("add_to_journal: %s(0x%X) already on list",
+		    TYPENAME(wk->wk_type), wk->wk_state);
+	wk->wk_state |= ONWORKLIST | DEPCOMPLETE;
+	if (LIST_EMPTY(&ump->softdep_journal_pending)) {
+		ump->softdep_jblocks->jb_age = ticks;
+		LIST_INSERT_HEAD(&ump->softdep_journal_pending, wk, wk_list);
+	} else
+		LIST_INSERT_AFTER(ump->softdep_journal_tail, wk, wk_list);
+	ump->softdep_journal_tail = wk;
+	ump->softdep_on_journal += 1;
+}
+
+/*
+ * Remove an arbitrary item for the journal worklist maintain the tail
+ * pointer.  This happens when a new operation obviates the need to
+ * journal an old operation.
+ */
+static void
+remove_from_journal(wk)
+	struct worklist *wk;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(wk->wk_mp);
+	LOCK_OWNED(ump);
+#ifdef SUJ_DEBUG
+	{
+		struct worklist *wkn;
+
+		LIST_FOREACH(wkn, &ump->softdep_journal_pending, wk_list)
+			if (wkn == wk)
+				break;
+		if (wkn == NULL)
+			panic("remove_from_journal: %p is not in journal", wk);
+	}
+#endif
+	/*
+	 * We emulate a TAILQ to save space in most structures which do not
+	 * require TAILQ semantics.  Here we must update the tail position
+	 * when removing the tail which is not the final entry. This works
+	 * only if the worklist linkage are at the beginning of the structure.
+	 */
+	if (ump->softdep_journal_tail == wk)
+		ump->softdep_journal_tail =
+		    (struct worklist *)wk->wk_list.le_prev;
+	WORKLIST_REMOVE(wk);
+	ump->softdep_on_journal -= 1;
+}
+
+/*
+ * Check for journal space as well as dependency limits so the prelink
+ * code can throttle both journaled and non-journaled filesystems.
+ * Threshold is 0 for low and 1 for min.
+ */
+static int
+journal_space(ump, thresh)
+	struct ufsmount *ump;
+	int thresh;
+{
+	struct jblocks *jblocks;
+	int limit, avail;
+
+	jblocks = ump->softdep_jblocks;
+	if (jblocks == NULL)
+		return (1);
+	/*
+	 * We use a tighter restriction here to prevent request_cleanup()
+	 * running in threads from running into locks we currently hold.
+	 * We have to be over the limit and our filesystem has to be
+	 * responsible for more than our share of that usage.
+	 */
+	limit = (max_softdeps / 10) * 9;
+	if (dep_current[D_INODEDEP] > limit &&
+	    ump->softdep_curdeps[D_INODEDEP] > limit / stat_flush_threads)
+		return (0);
+	if (thresh)
+		thresh = jblocks->jb_min;
+	else
+		thresh = jblocks->jb_low;
+	avail = (ump->softdep_on_journal * JREC_SIZE) / DEV_BSIZE;
+	avail = jblocks->jb_free - avail;
+
+	return (avail > thresh);
+}
+
+static void
+journal_suspend(ump)
+	struct ufsmount *ump;
+{
+	struct jblocks *jblocks;
+	struct mount *mp;
+
+	mp = UFSTOVFS(ump);
+	jblocks = ump->softdep_jblocks;
+	MNT_ILOCK(mp);
+	if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
+		stat_journal_min++;
+		mp->mnt_kern_flag |= MNTK_SUSPEND;
+		mp->mnt_susp_owner = ump->softdep_flushtd;
+	}
+	jblocks->jb_suspended = 1;
+	MNT_IUNLOCK(mp);
+}
+
+static int
+journal_unsuspend(struct ufsmount *ump)
+{
+	struct jblocks *jblocks;
+	struct mount *mp;
+
+	mp = UFSTOVFS(ump);
+	jblocks = ump->softdep_jblocks;
+
+	if (jblocks != NULL && jblocks->jb_suspended &&
+	    journal_space(ump, jblocks->jb_min)) {
+		jblocks->jb_suspended = 0;
+		FREE_LOCK(ump);
+		mp->mnt_susp_owner = curthread;
+		vfs_write_resume(mp, 0);
+		ACQUIRE_LOCK(ump);
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * Called before any allocation function to be certain that there is
+ * sufficient space in the journal prior to creating any new records.
+ * Since in the case of block allocation we may have multiple locked
+ * buffers at the time of the actual allocation we can not block
+ * when the journal records are created.  Doing so would create a deadlock
+ * if any of these buffers needed to be flushed to reclaim space.  Instead
+ * we require a sufficiently large amount of available space such that
+ * each thread in the system could have passed this allocation check and
+ * still have sufficient free space.  With 20% of a minimum journal size
+ * of 1MB we have 6553 records available.
+ */
+int
+softdep_prealloc(vp, waitok)
+	struct vnode *vp;
+	int waitok;
+{
+	struct ufsmount *ump;
+
+	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
+	    ("softdep_prealloc called on non-softdep filesystem"));
+	/*
+	 * Nothing to do if we are not running journaled soft updates.
+	 * If we currently hold the snapshot lock, we must avoid
+	 * handling other resources that could cause deadlock.  Do not
+	 * touch quotas vnode since it is typically recursed with
+	 * other vnode locks held.
+	 */
+	if (DOINGSUJ(vp) == 0 || IS_SNAPSHOT(VTOI(vp)) ||
+	    (vp->v_vflag & VV_SYSTEM) != 0)
+		return (0);
+	ump = VFSTOUFS(vp->v_mount);
+	ACQUIRE_LOCK(ump);
+	if (journal_space(ump, 0)) {
+		FREE_LOCK(ump);
+		return (0);
+	}
+	stat_journal_low++;
+	FREE_LOCK(ump);
+	if (waitok == MNT_NOWAIT)
+		return (ENOSPC);
+	/*
+	 * Attempt to sync this vnode once to flush any journal
+	 * work attached to it.
+	 */
+	if ((curthread->td_pflags & TDP_COWINPROGRESS) == 0)
+		ffs_syncvnode(vp, waitok, 0);
+	ACQUIRE_LOCK(ump);
+	process_removes(vp);
+	process_truncates(vp);
+	if (journal_space(ump, 0) == 0) {
+		softdep_speedup(ump);
+		if (journal_space(ump, 1) == 0)
+			journal_suspend(ump);
+	}
+	FREE_LOCK(ump);
+
+	return (0);
+}
+
+/*
+ * Before adjusting a link count on a vnode verify that we have sufficient
+ * journal space.  If not, process operations that depend on the currently
+ * locked pair of vnodes to try to flush space as the syncer, buf daemon,
+ * and softdep flush threads can not acquire these locks to reclaim space.
+ */
+static void
+softdep_prelink(dvp, vp)
+	struct vnode *dvp;
+	struct vnode *vp;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(dvp->v_mount);
+	LOCK_OWNED(ump);
+	/*
+	 * Nothing to do if we have sufficient journal space.
+	 * If we currently hold the snapshot lock, we must avoid
+	 * handling other resources that could cause deadlock.
+	 */
+	if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp))))
+		return;
+	stat_journal_low++;
+	FREE_LOCK(ump);
+	if (vp)
+		ffs_syncvnode(vp, MNT_NOWAIT, 0);
+	ffs_syncvnode(dvp, MNT_WAIT, 0);
+	ACQUIRE_LOCK(ump);
+	/* Process vp before dvp as it may create .. removes. */
+	if (vp) {
+		process_removes(vp);
+		process_truncates(vp);
+	}
+	process_removes(dvp);
+	process_truncates(dvp);
+	softdep_speedup(ump);
+	process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
+	if (journal_space(ump, 0) == 0) {
+		softdep_speedup(ump);
+		if (journal_space(ump, 1) == 0)
+			journal_suspend(ump);
+	}
+}
+
+static void
+jseg_write(ump, jseg, data)
+	struct ufsmount *ump;
+	struct jseg *jseg;
+	uint8_t *data;
+{
+	struct jsegrec *rec;
+
+	rec = (struct jsegrec *)data;
+	rec->jsr_seq = jseg->js_seq;
+	rec->jsr_oldest = jseg->js_oldseq;
+	rec->jsr_cnt = jseg->js_cnt;
+	rec->jsr_blocks = jseg->js_size / ump->um_devvp->v_bufobj.bo_bsize;
+	rec->jsr_crc = 0;
+	rec->jsr_time = ump->um_fs->fs_mtime;
+}
+
+static inline void
+inoref_write(inoref, jseg, rec)
+	struct inoref *inoref;
+	struct jseg *jseg;
+	struct jrefrec *rec;
+{
+
+	inoref->if_jsegdep->jd_seg = jseg;
+	rec->jr_ino = inoref->if_ino;
+	rec->jr_parent = inoref->if_parent;
+	rec->jr_nlink = inoref->if_nlink;
+	rec->jr_mode = inoref->if_mode;
+	rec->jr_diroff = inoref->if_diroff;
+}
+
+static void
+jaddref_write(jaddref, jseg, data)
+	struct jaddref *jaddref;
+	struct jseg *jseg;
+	uint8_t *data;
+{
+	struct jrefrec *rec;
+
+	rec = (struct jrefrec *)data;
+	rec->jr_op = JOP_ADDREF;
+	inoref_write(&jaddref->ja_ref, jseg, rec);
+}
+
+static void
+jremref_write(jremref, jseg, data)
+	struct jremref *jremref;
+	struct jseg *jseg;
+	uint8_t *data;
+{
+	struct jrefrec *rec;
+
+	rec = (struct jrefrec *)data;
+	rec->jr_op = JOP_REMREF;
+	inoref_write(&jremref->jr_ref, jseg, rec);
+}
+
+static void
+jmvref_write(jmvref, jseg, data)
+	struct jmvref *jmvref;
+	struct jseg *jseg;
+	uint8_t *data;
+{
+	struct jmvrec *rec;
+
+	rec = (struct jmvrec *)data;
+	rec->jm_op = JOP_MVREF;
+	rec->jm_ino = jmvref->jm_ino;
+	rec->jm_parent = jmvref->jm_parent;
+	rec->jm_oldoff = jmvref->jm_oldoff;
+	rec->jm_newoff = jmvref->jm_newoff;
+}
+
+static void
+jnewblk_write(jnewblk, jseg, data)
+	struct jnewblk *jnewblk;
+	struct jseg *jseg;
+	uint8_t *data;
+{
+	struct jblkrec *rec;
+
+	jnewblk->jn_jsegdep->jd_seg = jseg;
+	rec = (struct jblkrec *)data;
+	rec->jb_op = JOP_NEWBLK;
+	rec->jb_ino = jnewblk->jn_ino;
+	rec->jb_blkno = jnewblk->jn_blkno;
+	rec->jb_lbn = jnewblk->jn_lbn;
+	rec->jb_frags = jnewblk->jn_frags;
+	rec->jb_oldfrags = jnewblk->jn_oldfrags;
+}
+
+static void
+jfreeblk_write(jfreeblk, jseg, data)
+	struct jfreeblk *jfreeblk;
+	struct jseg *jseg;
+	uint8_t *data;
+{
+	struct jblkrec *rec;
+
+	jfreeblk->jf_dep.jb_jsegdep->jd_seg = jseg;
+	rec = (struct jblkrec *)data;
+	rec->jb_op = JOP_FREEBLK;
+	rec->jb_ino = jfreeblk->jf_ino;
+	rec->jb_blkno = jfreeblk->jf_blkno;
+	rec->jb_lbn = jfreeblk->jf_lbn;
+	rec->jb_frags = jfreeblk->jf_frags;
+	rec->jb_oldfrags = 0;
+}
+
+static void
+jfreefrag_write(jfreefrag, jseg, data)
+	struct jfreefrag *jfreefrag;
+	struct jseg *jseg;
+	uint8_t *data;
+{
+	struct jblkrec *rec;
+
+	jfreefrag->fr_jsegdep->jd_seg = jseg;
+	rec = (struct jblkrec *)data;
+	rec->jb_op = JOP_FREEBLK;
+	rec->jb_ino = jfreefrag->fr_ino;
+	rec->jb_blkno = jfreefrag->fr_blkno;
+	rec->jb_lbn = jfreefrag->fr_lbn;
+	rec->jb_frags = jfreefrag->fr_frags;
+	rec->jb_oldfrags = 0;
+}
+
+static void
+jtrunc_write(jtrunc, jseg, data)
+	struct jtrunc *jtrunc;
+	struct jseg *jseg;
+	uint8_t *data;
+{
+	struct jtrncrec *rec;
+
+	jtrunc->jt_dep.jb_jsegdep->jd_seg = jseg;
+	rec = (struct jtrncrec *)data;
+	rec->jt_op = JOP_TRUNC;
+	rec->jt_ino = jtrunc->jt_ino;
+	rec->jt_size = jtrunc->jt_size;
+	rec->jt_extsize = jtrunc->jt_extsize;
+}
+
+static void
+jfsync_write(jfsync, jseg, data)
+	struct jfsync *jfsync;
+	struct jseg *jseg;
+	uint8_t *data;
+{
+	struct jtrncrec *rec;
+
+	rec = (struct jtrncrec *)data;
+	rec->jt_op = JOP_SYNC;
+	rec->jt_ino = jfsync->jfs_ino;
+	rec->jt_size = jfsync->jfs_size;
+	rec->jt_extsize = jfsync->jfs_extsize;
+}
+
+static void
+softdep_flushjournal(mp)
+	struct mount *mp;
+{
+	struct jblocks *jblocks;
+	struct ufsmount *ump;
+
+	if (MOUNTEDSUJ(mp) == 0)
+		return;
+	ump = VFSTOUFS(mp);
+	jblocks = ump->softdep_jblocks;
+	ACQUIRE_LOCK(ump);
+	while (ump->softdep_on_journal) {
+		jblocks->jb_needseg = 1;
+		softdep_process_journal(mp, NULL, MNT_WAIT);
+	}
+	FREE_LOCK(ump);
+}
+
+static void softdep_synchronize_completed(struct bio *);
+static void softdep_synchronize(struct bio *, struct ufsmount *, void *);
+
+static void
+softdep_synchronize_completed(bp)
+        struct bio *bp;
+{
+	struct jseg *oldest;
+	struct jseg *jseg;
+	struct ufsmount *ump;
+
+	/*
+	 * caller1 marks the last segment written before we issued the
+	 * synchronize cache.
+	 */
+	jseg = bp->bio_caller1;
+	if (jseg == NULL) {
+		g_destroy_bio(bp);
+		return;
+	}
+	ump = VFSTOUFS(jseg->js_list.wk_mp);
+	ACQUIRE_LOCK(ump);
+	oldest = NULL;
+	/*
+	 * Mark all the journal entries waiting on the synchronize cache
+	 * as completed so they may continue on.
+	 */
+	while (jseg != NULL && (jseg->js_state & COMPLETE) == 0) {
+		jseg->js_state |= COMPLETE;
+		oldest = jseg;
+		jseg = TAILQ_PREV(jseg, jseglst, js_next);
+	}
+	/*
+	 * Restart deferred journal entry processing from the oldest
+	 * completed jseg.
+	 */
+	if (oldest)
+		complete_jsegs(oldest);
+
+	FREE_LOCK(ump);
+	g_destroy_bio(bp);
+}
+
+/*
+ * Send BIO_FLUSH/SYNCHRONIZE CACHE to the device to enforce write ordering
+ * barriers.  The journal must be written prior to any blocks that depend
+ * on it and the journal can not be released until the blocks have be
+ * written.  This code handles both barriers simultaneously.
+ */
+static void
+softdep_synchronize(bp, ump, caller1)
+	struct bio *bp;
+	struct ufsmount *ump;
+	void *caller1;
+{
+
+	bp->bio_cmd = BIO_FLUSH;
+	bp->bio_flags |= BIO_ORDERED;
+	bp->bio_data = NULL;
+	bp->bio_offset = ump->um_cp->provider->mediasize;
+	bp->bio_length = 0;
+	bp->bio_done = softdep_synchronize_completed;
+	bp->bio_caller1 = caller1;
+	g_io_request(bp,
+	    (struct g_consumer *)ump->um_devvp->v_bufobj.bo_private);
+}
+
+/*
+ * Flush some journal records to disk.
+ */
+static void
+softdep_process_journal(mp, needwk, flags)
+	struct mount *mp;
+	struct worklist *needwk;
+	int flags;
+{
+	struct jblocks *jblocks;
+	struct ufsmount *ump;
+	struct worklist *wk;
+	struct jseg *jseg;
+	struct buf *bp;
+	struct bio *bio;
+	uint8_t *data;
+	struct fs *fs;
+	int shouldflush;
+	int segwritten;
+	int jrecmin;	/* Minimum records per block. */
+	int jrecmax;	/* Maximum records per block. */
+	int size;
+	int cnt;
+	int off;
+	int devbsize;
+
+	if (MOUNTEDSUJ(mp) == 0)
+		return;
+	shouldflush = softdep_flushcache;
+	bio = NULL;
+	jseg = NULL;
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	fs = ump->um_fs;
+	jblocks = ump->softdep_jblocks;
+	devbsize = ump->um_devvp->v_bufobj.bo_bsize;
+	/*
+	 * We write anywhere between a disk block and fs block.  The upper
+	 * bound is picked to prevent buffer cache fragmentation and limit
+	 * processing time per I/O.
+	 */
+	jrecmin = (devbsize / JREC_SIZE) - 1; /* -1 for seg header */
+	jrecmax = (fs->fs_bsize / devbsize) * jrecmin;
+	segwritten = 0;
+	for (;;) {
+		cnt = ump->softdep_on_journal;
+		/*
+		 * Criteria for writing a segment:
+		 * 1) We have a full block.
+		 * 2) We're called from jwait() and haven't found the
+		 *    journal item yet.
+		 * 3) Always write if needseg is set.
+		 * 4) If we are called from process_worklist and have
+		 *    not yet written anything we write a partial block
+		 *    to enforce a 1 second maximum latency on journal
+		 *    entries.
+		 */
+		if (cnt < (jrecmax - 1) && needwk == NULL &&
+		    jblocks->jb_needseg == 0 && (segwritten || cnt == 0))
+			break;
+		cnt++;
+		/*
+		 * Verify some free journal space.  softdep_prealloc() should
+		 * guarantee that we don't run out so this is indicative of
+		 * a problem with the flow control.  Try to recover
+		 * gracefully in any event.
+		 */
+		while (jblocks->jb_free == 0) {
+			if (flags != MNT_WAIT)
+				break;
+			printf("softdep: Out of journal space!\n");
+			softdep_speedup(ump);
+			msleep(jblocks, LOCK_PTR(ump), PRIBIO, "jblocks", hz);
+		}
+		FREE_LOCK(ump);
+		jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
+		workitem_alloc(&jseg->js_list, D_JSEG, mp);
+		LIST_INIT(&jseg->js_entries);
+		LIST_INIT(&jseg->js_indirs);
+		jseg->js_state = ATTACHED;
+		if (shouldflush == 0)
+			jseg->js_state |= COMPLETE;
+		else if (bio == NULL)
+			bio = g_alloc_bio();
+		jseg->js_jblocks = jblocks;
+		bp = geteblk(fs->fs_bsize, 0);
+		ACQUIRE_LOCK(ump);
+		/*
+		 * If there was a race while we were allocating the block
+		 * and jseg the entry we care about was likely written.
+		 * We bail out in both the WAIT and NOWAIT case and assume
+		 * the caller will loop if the entry it cares about is
+		 * not written.
+		 */
+		cnt = ump->softdep_on_journal;
+		if (cnt + jblocks->jb_needseg == 0 || jblocks->jb_free == 0) {
+			bp->b_flags |= B_INVAL | B_NOCACHE;
+			WORKITEM_FREE(jseg, D_JSEG);
+			FREE_LOCK(ump);
+			brelse(bp);
+			ACQUIRE_LOCK(ump);
+			break;
+		}
+		/*
+		 * Calculate the disk block size required for the available
+		 * records rounded to the min size.
+		 */
+		if (cnt == 0)
+			size = devbsize;
+		else if (cnt < jrecmax)
+			size = howmany(cnt, jrecmin) * devbsize;
+		else
+			size = fs->fs_bsize;
+		/*
+		 * Allocate a disk block for this journal data and account
+		 * for truncation of the requested size if enough contiguous
+		 * space was not available.
+		 */
+		bp->b_blkno = jblocks_alloc(jblocks, size, &size);
+		bp->b_lblkno = bp->b_blkno;
+		bp->b_offset = bp->b_blkno * DEV_BSIZE;
+		bp->b_bcount = size;
+		bp->b_flags &= ~B_INVAL;
+		bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY;
+		/*
+		 * Initialize our jseg with cnt records.  Assign the next
+		 * sequence number to it and link it in-order.
+		 */
+		cnt = MIN(cnt, (size / devbsize) * jrecmin);
+		jseg->js_buf = bp;
+		jseg->js_cnt = cnt;
+		jseg->js_refs = cnt + 1;	/* Self ref. */
+		jseg->js_size = size;
+		jseg->js_seq = jblocks->jb_nextseq++;
+		if (jblocks->jb_oldestseg == NULL)
+			jblocks->jb_oldestseg = jseg;
+		jseg->js_oldseq = jblocks->jb_oldestseg->js_seq;
+		TAILQ_INSERT_TAIL(&jblocks->jb_segs, jseg, js_next);
+		if (jblocks->jb_writeseg == NULL)
+			jblocks->jb_writeseg = jseg;
+		/*
+		 * Start filling in records from the pending list.
+		 */
+		data = bp->b_data;
+		off = 0;
+
+		/*
+		 * Always put a header on the first block.
+		 * XXX As with below, there might not be a chance to get
+		 * into the loop.  Ensure that something valid is written.
+		 */
+		jseg_write(ump, jseg, data);
+		off += JREC_SIZE;
+		data = bp->b_data + off;
+
+		/*
+		 * XXX Something is wrong here.  There's no work to do,
+		 * but we need to perform and I/O and allow it to complete
+		 * anyways.
+		 */
+		if (LIST_EMPTY(&ump->softdep_journal_pending))
+			stat_emptyjblocks++;
+
+		while ((wk = LIST_FIRST(&ump->softdep_journal_pending))
+		    != NULL) {
+			if (cnt == 0)
+				break;
+			/* Place a segment header on every device block. */
+			if ((off % devbsize) == 0) {
+				jseg_write(ump, jseg, data);
+				off += JREC_SIZE;
+				data = bp->b_data + off;
+			}
+			if (wk == needwk)
+				needwk = NULL;
+			remove_from_journal(wk);
+			wk->wk_state |= INPROGRESS;
+			WORKLIST_INSERT(&jseg->js_entries, wk);
+			switch (wk->wk_type) {
+			case D_JADDREF:
+				jaddref_write(WK_JADDREF(wk), jseg, data);
+				break;
+			case D_JREMREF:
+				jremref_write(WK_JREMREF(wk), jseg, data);
+				break;
+			case D_JMVREF:
+				jmvref_write(WK_JMVREF(wk), jseg, data);
+				break;
+			case D_JNEWBLK:
+				jnewblk_write(WK_JNEWBLK(wk), jseg, data);
+				break;
+			case D_JFREEBLK:
+				jfreeblk_write(WK_JFREEBLK(wk), jseg, data);
+				break;
+			case D_JFREEFRAG:
+				jfreefrag_write(WK_JFREEFRAG(wk), jseg, data);
+				break;
+			case D_JTRUNC:
+				jtrunc_write(WK_JTRUNC(wk), jseg, data);
+				break;
+			case D_JFSYNC:
+				jfsync_write(WK_JFSYNC(wk), jseg, data);
+				break;
+			default:
+				panic("process_journal: Unknown type %s",
+				    TYPENAME(wk->wk_type));
+				/* NOTREACHED */
+			}
+			off += JREC_SIZE;
+			data = bp->b_data + off;
+			cnt--;
+		}
+
+		/* Clear any remaining space so we don't leak kernel data */
+		if (size > off)
+			bzero(data, size - off);
+
+		/*
+		 * Write this one buffer and continue.
+		 */
+		segwritten = 1;
+		jblocks->jb_needseg = 0;
+		WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
+		FREE_LOCK(ump);
+		pbgetvp(ump->um_devvp, bp);
+		/*
+		 * We only do the blocking wait once we find the journal
+		 * entry we're looking for.
+		 */
+		if (needwk == NULL && flags == MNT_WAIT)
+			bwrite(bp);
+		else
+			bawrite(bp);
+		ACQUIRE_LOCK(ump);
+	}
+	/*
+	 * If we wrote a segment issue a synchronize cache so the journal
+	 * is reflected on disk before the data is written.  Since reclaiming
+	 * journal space also requires writing a journal record this
+	 * process also enforces a barrier before reclamation.
+	 */
+	if (segwritten && shouldflush) {
+		softdep_synchronize(bio, ump, 
+		    TAILQ_LAST(&jblocks->jb_segs, jseglst));
+	} else if (bio)
+		g_destroy_bio(bio);
+	/*
+	 * If we've suspended the filesystem because we ran out of journal
+	 * space either try to sync it here to make some progress or
+	 * unsuspend it if we already have.
+	 */
+	if (flags == 0 && jblocks->jb_suspended) {
+		if (journal_unsuspend(ump))
+			return;
+		FREE_LOCK(ump);
+		VFS_SYNC(mp, MNT_NOWAIT);
+		ffs_sbupdate(ump, MNT_WAIT, 0);
+		ACQUIRE_LOCK(ump);
+	}
+}
+
+/*
+ * Complete a jseg, allowing all dependencies awaiting journal writes
+ * to proceed.  Each journal dependency also attaches a jsegdep to dependent
+ * structures so that the journal segment can be freed to reclaim space.
+ */
+static void
+complete_jseg(jseg)
+	struct jseg *jseg;
+{
+	struct worklist *wk;
+	struct jmvref *jmvref;
+#ifdef INVARIANTS
+	int i = 0;
+#endif
+
+	while ((wk = LIST_FIRST(&jseg->js_entries)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		wk->wk_state &= ~INPROGRESS;
+		wk->wk_state |= COMPLETE;
+		KASSERT(i++ < jseg->js_cnt,
+		    ("handle_written_jseg: overflow %d >= %d",
+		    i - 1, jseg->js_cnt));
+		switch (wk->wk_type) {
+		case D_JADDREF:
+			handle_written_jaddref(WK_JADDREF(wk));
+			break;
+		case D_JREMREF:
+			handle_written_jremref(WK_JREMREF(wk));
+			break;
+		case D_JMVREF:
+			rele_jseg(jseg);	/* No jsegdep. */
+			jmvref = WK_JMVREF(wk);
+			LIST_REMOVE(jmvref, jm_deps);
+			if ((jmvref->jm_pagedep->pd_state & ONWORKLIST) == 0)
+				free_pagedep(jmvref->jm_pagedep);
+			WORKITEM_FREE(jmvref, D_JMVREF);
+			break;
+		case D_JNEWBLK:
+			handle_written_jnewblk(WK_JNEWBLK(wk));
+			break;
+		case D_JFREEBLK:
+			handle_written_jblkdep(&WK_JFREEBLK(wk)->jf_dep);
+			break;
+		case D_JTRUNC:
+			handle_written_jblkdep(&WK_JTRUNC(wk)->jt_dep);
+			break;
+		case D_JFSYNC:
+			rele_jseg(jseg);	/* No jsegdep. */
+			WORKITEM_FREE(wk, D_JFSYNC);
+			break;
+		case D_JFREEFRAG:
+			handle_written_jfreefrag(WK_JFREEFRAG(wk));
+			break;
+		default:
+			panic("handle_written_jseg: Unknown type %s",
+			    TYPENAME(wk->wk_type));
+			/* NOTREACHED */
+		}
+	}
+	/* Release the self reference so the structure may be freed. */
+	rele_jseg(jseg);
+}
+
+/*
+ * Determine which jsegs are ready for completion processing.  Waits for
+ * synchronize cache to complete as well as forcing in-order completion
+ * of journal entries.
+ */
+static void
+complete_jsegs(jseg)
+	struct jseg *jseg;
+{
+	struct jblocks *jblocks;
+	struct jseg *jsegn;
+
+	jblocks = jseg->js_jblocks;
+	/*
+	 * Don't allow out of order completions.  If this isn't the first
+	 * block wait for it to write before we're done.
+	 */
+	if (jseg != jblocks->jb_writeseg)
+		return;
+	/* Iterate through available jsegs processing their entries. */
+	while (jseg && (jseg->js_state & ALLCOMPLETE) == ALLCOMPLETE) {
+		jblocks->jb_oldestwrseq = jseg->js_oldseq;
+		jsegn = TAILQ_NEXT(jseg, js_next);
+		complete_jseg(jseg);
+		jseg = jsegn;
+	}
+	jblocks->jb_writeseg = jseg;
+	/*
+	 * Attempt to free jsegs now that oldestwrseq may have advanced. 
+	 */
+	free_jsegs(jblocks);
+}
+
+/*
+ * Mark a jseg as DEPCOMPLETE and throw away the buffer.  Attempt to handle
+ * the final completions.
+ */
+static void
+handle_written_jseg(jseg, bp)
+	struct jseg *jseg;
+	struct buf *bp;
+{
+
+	if (jseg->js_refs == 0)
+		panic("handle_written_jseg: No self-reference on %p", jseg);
+	jseg->js_state |= DEPCOMPLETE;
+	/*
+	 * We'll never need this buffer again, set flags so it will be
+	 * discarded.
+	 */
+	bp->b_flags |= B_INVAL | B_NOCACHE;
+	pbrelvp(bp);
+	complete_jsegs(jseg);
+}
+
+static inline struct jsegdep *
+inoref_jseg(inoref)
+	struct inoref *inoref;
+{
+	struct jsegdep *jsegdep;
+
+	jsegdep = inoref->if_jsegdep;
+	inoref->if_jsegdep = NULL;
+
+	return (jsegdep);
+}
+
+/*
+ * Called once a jremref has made it to stable store.  The jremref is marked
+ * complete and we attempt to free it.  Any pagedeps writes sleeping waiting
+ * for the jremref to complete will be awoken by free_jremref.
+ */
+static void
+handle_written_jremref(jremref)
+	struct jremref *jremref;
+{
+	struct inodedep *inodedep;
+	struct jsegdep *jsegdep;
+	struct dirrem *dirrem;
+
+	/* Grab the jsegdep. */
+	jsegdep = inoref_jseg(&jremref->jr_ref);
+	/*
+	 * Remove us from the inoref list.
+	 */
+	if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino,
+	    0, &inodedep) == 0)
+		panic("handle_written_jremref: Lost inodedep");
+	TAILQ_REMOVE(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps);
+	/*
+	 * Complete the dirrem.
+	 */
+	dirrem = jremref->jr_dirrem;
+	jremref->jr_dirrem = NULL;
+	LIST_REMOVE(jremref, jr_deps);
+	jsegdep->jd_state |= jremref->jr_state & MKDIR_PARENT;
+	jwork_insert(&dirrem->dm_jwork, jsegdep);
+	if (LIST_EMPTY(&dirrem->dm_jremrefhd) &&
+	    (dirrem->dm_state & COMPLETE) != 0)
+		add_to_worklist(&dirrem->dm_list, 0);
+	free_jremref(jremref);
+}
+
+/*
+ * Called once a jaddref has made it to stable store.  The dependency is
+ * marked complete and any dependent structures are added to the inode
+ * bufwait list to be completed as soon as it is written.  If a bitmap write
+ * depends on this entry we move the inode into the inodedephd of the
+ * bmsafemap dependency and attempt to remove the jaddref from the bmsafemap.
+ */
+static void
+handle_written_jaddref(jaddref)
+	struct jaddref *jaddref;
+{
+	struct jsegdep *jsegdep;
+	struct inodedep *inodedep;
+	struct diradd *diradd;
+	struct mkdir *mkdir;
+
+	/* Grab the jsegdep. */
+	jsegdep = inoref_jseg(&jaddref->ja_ref);
+	mkdir = NULL;
+	diradd = NULL;
+	if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
+	    0, &inodedep) == 0)
+		panic("handle_written_jaddref: Lost inodedep.");
+	if (jaddref->ja_diradd == NULL)
+		panic("handle_written_jaddref: No dependency");
+	if (jaddref->ja_diradd->da_list.wk_type == D_DIRADD) {
+		diradd = jaddref->ja_diradd;
+		WORKLIST_INSERT(&inodedep->id_bufwait, &diradd->da_list);
+	} else if (jaddref->ja_state & MKDIR_PARENT) {
+		mkdir = jaddref->ja_mkdir;
+		WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir->md_list);
+	} else if (jaddref->ja_state & MKDIR_BODY)
+		mkdir = jaddref->ja_mkdir;
+	else
+		panic("handle_written_jaddref: Unknown dependency %p",
+		    jaddref->ja_diradd);
+	jaddref->ja_diradd = NULL;	/* also clears ja_mkdir */
+	/*
+	 * Remove us from the inode list.
+	 */
+	TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref, if_deps);
+	/*
+	 * The mkdir may be waiting on the jaddref to clear before freeing.
+	 */
+	if (mkdir) {
+		KASSERT(mkdir->md_list.wk_type == D_MKDIR,
+		    ("handle_written_jaddref: Incorrect type for mkdir %s",
+		    TYPENAME(mkdir->md_list.wk_type)));
+		mkdir->md_jaddref = NULL;
+		diradd = mkdir->md_diradd;
+		mkdir->md_state |= DEPCOMPLETE;
+		complete_mkdir(mkdir);
+	}
+	jwork_insert(&diradd->da_jwork, jsegdep);
+	if (jaddref->ja_state & NEWBLOCK) {
+		inodedep->id_state |= ONDEPLIST;
+		LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_inodedephd,
+		    inodedep, id_deps);
+	}
+	free_jaddref(jaddref);
+}
+
+/*
+ * Called once a jnewblk journal is written.  The allocdirect or allocindir
+ * is placed in the bmsafemap to await notification of a written bitmap.  If
+ * the operation was canceled we add the segdep to the appropriate
+ * dependency to free the journal space once the canceling operation
+ * completes.
+ */
+static void
+handle_written_jnewblk(jnewblk)
+	struct jnewblk *jnewblk;
+{
+	struct bmsafemap *bmsafemap;
+	struct freefrag *freefrag;
+	struct freework *freework;
+	struct jsegdep *jsegdep;
+	struct newblk *newblk;
+
+	/* Grab the jsegdep. */
+	jsegdep = jnewblk->jn_jsegdep;
+	jnewblk->jn_jsegdep = NULL;
+	if (jnewblk->jn_dep == NULL) 
+		panic("handle_written_jnewblk: No dependency for the segdep.");
+	switch (jnewblk->jn_dep->wk_type) {
+	case D_NEWBLK:
+	case D_ALLOCDIRECT:
+	case D_ALLOCINDIR:
+		/*
+		 * Add the written block to the bmsafemap so it can
+		 * be notified when the bitmap is on disk.
+		 */
+		newblk = WK_NEWBLK(jnewblk->jn_dep);
+		newblk->nb_jnewblk = NULL;
+		if ((newblk->nb_state & GOINGAWAY) == 0) {
+			bmsafemap = newblk->nb_bmsafemap;
+			newblk->nb_state |= ONDEPLIST;
+			LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk,
+			    nb_deps);
+		}
+		jwork_insert(&newblk->nb_jwork, jsegdep);
+		break;
+	case D_FREEFRAG:
+		/*
+		 * A newblock being removed by a freefrag when replaced by
+		 * frag extension.
+		 */
+		freefrag = WK_FREEFRAG(jnewblk->jn_dep);
+		freefrag->ff_jdep = NULL;
+		jwork_insert(&freefrag->ff_jwork, jsegdep);
+		break;
+	case D_FREEWORK:
+		/*
+		 * A direct block was removed by truncate.
+		 */
+		freework = WK_FREEWORK(jnewblk->jn_dep);
+		freework->fw_jnewblk = NULL;
+		jwork_insert(&freework->fw_freeblks->fb_jwork, jsegdep);
+		break;
+	default:
+		panic("handle_written_jnewblk: Unknown type %d.",
+		    jnewblk->jn_dep->wk_type);
+	}
+	jnewblk->jn_dep = NULL;
+	free_jnewblk(jnewblk);
+}
+
+/*
+ * Cancel a jfreefrag that won't be needed, probably due to colliding with
+ * an in-flight allocation that has not yet been committed.  Divorce us
+ * from the freefrag and mark it DEPCOMPLETE so that it may be added
+ * to the worklist.
+ */
+static void
+cancel_jfreefrag(jfreefrag)
+	struct jfreefrag *jfreefrag;
+{
+	struct freefrag *freefrag;
+
+	if (jfreefrag->fr_jsegdep) {
+		free_jsegdep(jfreefrag->fr_jsegdep);
+		jfreefrag->fr_jsegdep = NULL;
+	}
+	freefrag = jfreefrag->fr_freefrag;
+	jfreefrag->fr_freefrag = NULL;
+	free_jfreefrag(jfreefrag);
+	freefrag->ff_state |= DEPCOMPLETE;
+	CTR1(KTR_SUJ, "cancel_jfreefrag: blkno %jd", freefrag->ff_blkno);
+}
+
+/*
+ * Free a jfreefrag when the parent freefrag is rendered obsolete.
+ */
+static void
+free_jfreefrag(jfreefrag)
+	struct jfreefrag *jfreefrag;
+{
+
+	if (jfreefrag->fr_state & INPROGRESS)
+		WORKLIST_REMOVE(&jfreefrag->fr_list);
+	else if (jfreefrag->fr_state & ONWORKLIST)
+		remove_from_journal(&jfreefrag->fr_list);
+	if (jfreefrag->fr_freefrag != NULL)
+		panic("free_jfreefrag:  Still attached to a freefrag.");
+	WORKITEM_FREE(jfreefrag, D_JFREEFRAG);
+}
+
+/*
+ * Called when the journal write for a jfreefrag completes.  The parent
+ * freefrag is added to the worklist if this completes its dependencies.
+ */
+static void
+handle_written_jfreefrag(jfreefrag)
+	struct jfreefrag *jfreefrag;
+{
+	struct jsegdep *jsegdep;
+	struct freefrag *freefrag;
+
+	/* Grab the jsegdep. */
+	jsegdep = jfreefrag->fr_jsegdep;
+	jfreefrag->fr_jsegdep = NULL;
+	freefrag = jfreefrag->fr_freefrag;
+	if (freefrag == NULL)
+		panic("handle_written_jfreefrag: No freefrag.");
+	freefrag->ff_state |= DEPCOMPLETE;
+	freefrag->ff_jdep = NULL;
+	jwork_insert(&freefrag->ff_jwork, jsegdep);
+	if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE)
+		add_to_worklist(&freefrag->ff_list, 0);
+	jfreefrag->fr_freefrag = NULL;
+	free_jfreefrag(jfreefrag);
+}
+
+/*
+ * Called when the journal write for a jfreeblk completes.  The jfreeblk
+ * is removed from the freeblks list of pending journal writes and the
+ * jsegdep is moved to the freeblks jwork to be completed when all blocks
+ * have been reclaimed.
+ */
+static void
+handle_written_jblkdep(jblkdep)
+	struct jblkdep *jblkdep;
+{
+	struct freeblks *freeblks;
+	struct jsegdep *jsegdep;
+
+	/* Grab the jsegdep. */
+	jsegdep = jblkdep->jb_jsegdep;
+	jblkdep->jb_jsegdep = NULL;
+	freeblks = jblkdep->jb_freeblks;
+	LIST_REMOVE(jblkdep, jb_deps);
+	jwork_insert(&freeblks->fb_jwork, jsegdep);
+	/*
+	 * If the freeblks is all journaled, we can add it to the worklist.
+	 */
+	if (LIST_EMPTY(&freeblks->fb_jblkdephd) &&
+	    (freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)
+		add_to_worklist(&freeblks->fb_list, WK_NODELAY);
+
+	free_jblkdep(jblkdep);
+}
+
+static struct jsegdep *
+newjsegdep(struct worklist *wk)
+{
+	struct jsegdep *jsegdep;
+
+	jsegdep = malloc(sizeof(*jsegdep), M_JSEGDEP, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jsegdep->jd_list, D_JSEGDEP, wk->wk_mp);
+	jsegdep->jd_seg = NULL;
+
+	return (jsegdep);
+}
+
+static struct jmvref *
+newjmvref(dp, ino, oldoff, newoff)
+	struct inode *dp;
+	ino_t ino;
+	off_t oldoff;
+	off_t newoff;
+{
+	struct jmvref *jmvref;
+
+	jmvref = malloc(sizeof(*jmvref), M_JMVREF, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jmvref->jm_list, D_JMVREF, ITOVFS(dp));
+	jmvref->jm_list.wk_state = ATTACHED | DEPCOMPLETE;
+	jmvref->jm_parent = dp->i_number;
+	jmvref->jm_ino = ino;
+	jmvref->jm_oldoff = oldoff;
+	jmvref->jm_newoff = newoff;
+
+	return (jmvref);
+}
+
+/*
+ * Allocate a new jremref that tracks the removal of ip from dp with the
+ * directory entry offset of diroff.  Mark the entry as ATTACHED and
+ * DEPCOMPLETE as we have all the information required for the journal write
+ * and the directory has already been removed from the buffer.  The caller
+ * is responsible for linking the jremref into the pagedep and adding it
+ * to the journal to write.  The MKDIR_PARENT flag is set if we're doing
+ * a DOTDOT addition so handle_workitem_remove() can properly assign
+ * the jsegdep when we're done.
+ */
+static struct jremref *
+newjremref(struct dirrem *dirrem, struct inode *dp, struct inode *ip,
+    off_t diroff, nlink_t nlink)
+{
+	struct jremref *jremref;
+
+	jremref = malloc(sizeof(*jremref), M_JREMREF, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jremref->jr_list, D_JREMREF, ITOVFS(dp));
+	jremref->jr_state = ATTACHED;
+	newinoref(&jremref->jr_ref, ip->i_number, dp->i_number, diroff,
+	   nlink, ip->i_mode);
+	jremref->jr_dirrem = dirrem;
+
+	return (jremref);
+}
+
+static inline void
+newinoref(struct inoref *inoref, ino_t ino, ino_t parent, off_t diroff,
+    nlink_t nlink, uint16_t mode)
+{
+
+	inoref->if_jsegdep = newjsegdep(&inoref->if_list);
+	inoref->if_diroff = diroff;
+	inoref->if_ino = ino;
+	inoref->if_parent = parent;
+	inoref->if_nlink = nlink;
+	inoref->if_mode = mode;
+}
+
+/*
+ * Allocate a new jaddref to track the addition of ino to dp at diroff.  The
+ * directory offset may not be known until later.  The caller is responsible
+ * adding the entry to the journal when this information is available.  nlink
+ * should be the link count prior to the addition and mode is only required
+ * to have the correct FMT.
+ */
+static struct jaddref *
+newjaddref(struct inode *dp, ino_t ino, off_t diroff, int16_t nlink,
+    uint16_t mode)
+{
+	struct jaddref *jaddref;
+
+	jaddref = malloc(sizeof(*jaddref), M_JADDREF, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jaddref->ja_list, D_JADDREF, ITOVFS(dp));
+	jaddref->ja_state = ATTACHED;
+	jaddref->ja_mkdir = NULL;
+	newinoref(&jaddref->ja_ref, ino, dp->i_number, diroff, nlink, mode);
+
+	return (jaddref);
+}
+
+/*
+ * Create a new free dependency for a freework.  The caller is responsible
+ * for adjusting the reference count when it has the lock held.  The freedep
+ * will track an outstanding bitmap write that will ultimately clear the
+ * freework to continue.
+ */
+static struct freedep *
+newfreedep(struct freework *freework)
+{
+	struct freedep *freedep;
+
+	freedep = malloc(sizeof(*freedep), M_FREEDEP, M_SOFTDEP_FLAGS);
+	workitem_alloc(&freedep->fd_list, D_FREEDEP, freework->fw_list.wk_mp);
+	freedep->fd_freework = freework;
+
+	return (freedep);
+}
+
+/*
+ * Free a freedep structure once the buffer it is linked to is written.  If
+ * this is the last reference to the freework schedule it for completion.
+ */
+static void
+free_freedep(freedep)
+	struct freedep *freedep;
+{
+	struct freework *freework;
+
+	freework = freedep->fd_freework;
+	freework->fw_freeblks->fb_cgwait--;
+	if (--freework->fw_ref == 0)
+		freework_enqueue(freework);
+	WORKITEM_FREE(freedep, D_FREEDEP);
+}
+
+/*
+ * Allocate a new freework structure that may be a level in an indirect
+ * when parent is not NULL or a top level block when it is.  The top level
+ * freework structures are allocated without the per-filesystem lock held
+ * and before the freeblks is visible outside of softdep_setup_freeblocks().
+ */
+static struct freework *
+newfreework(ump, freeblks, parent, lbn, nb, frags, off, journal)
+	struct ufsmount *ump;
+	struct freeblks *freeblks;
+	struct freework *parent;
+	ufs_lbn_t lbn;
+	ufs2_daddr_t nb;
+	int frags;
+	int off;
+	int journal;
+{
+	struct freework *freework;
+
+	freework = malloc(sizeof(*freework), M_FREEWORK, M_SOFTDEP_FLAGS);
+	workitem_alloc(&freework->fw_list, D_FREEWORK, freeblks->fb_list.wk_mp);
+	freework->fw_state = ATTACHED;
+	freework->fw_jnewblk = NULL;
+	freework->fw_freeblks = freeblks;
+	freework->fw_parent = parent;
+	freework->fw_lbn = lbn;
+	freework->fw_blkno = nb;
+	freework->fw_frags = frags;
+	freework->fw_indir = NULL;
+	freework->fw_ref = (MOUNTEDSUJ(UFSTOVFS(ump)) == 0 || lbn >= -NXADDR)
+		? 0 : NINDIR(ump->um_fs) + 1;
+	freework->fw_start = freework->fw_off = off;
+	if (journal)
+		newjfreeblk(freeblks, lbn, nb, frags);
+	if (parent == NULL) {
+		ACQUIRE_LOCK(ump);
+		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
+		freeblks->fb_ref++;
+		FREE_LOCK(ump);
+	}
+
+	return (freework);
+}
+
+/*
+ * Eliminate a jfreeblk for a block that does not need journaling.
+ */
+static void
+cancel_jfreeblk(freeblks, blkno)
+	struct freeblks *freeblks;
+	ufs2_daddr_t blkno;
+{
+	struct jfreeblk *jfreeblk;
+	struct jblkdep *jblkdep;
+
+	LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps) {
+		if (jblkdep->jb_list.wk_type != D_JFREEBLK)
+			continue;
+		jfreeblk = WK_JFREEBLK(&jblkdep->jb_list);
+		if (jfreeblk->jf_blkno == blkno)
+			break;
+	}
+	if (jblkdep == NULL)
+		return;
+	CTR1(KTR_SUJ, "cancel_jfreeblk: blkno %jd", blkno);
+	free_jsegdep(jblkdep->jb_jsegdep);
+	LIST_REMOVE(jblkdep, jb_deps);
+	WORKITEM_FREE(jfreeblk, D_JFREEBLK);
+}
+
+/*
+ * Allocate a new jfreeblk to journal top level block pointer when truncating
+ * a file.  The caller must add this to the worklist when the per-filesystem
+ * lock is held.
+ */
+static struct jfreeblk *
+newjfreeblk(freeblks, lbn, blkno, frags)
+	struct freeblks *freeblks;
+	ufs_lbn_t lbn;
+	ufs2_daddr_t blkno;
+	int frags;
+{
+	struct jfreeblk *jfreeblk;
+
+	jfreeblk = malloc(sizeof(*jfreeblk), M_JFREEBLK, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jfreeblk->jf_dep.jb_list, D_JFREEBLK,
+	    freeblks->fb_list.wk_mp);
+	jfreeblk->jf_dep.jb_jsegdep = newjsegdep(&jfreeblk->jf_dep.jb_list);
+	jfreeblk->jf_dep.jb_freeblks = freeblks;
+	jfreeblk->jf_ino = freeblks->fb_inum;
+	jfreeblk->jf_lbn = lbn;
+	jfreeblk->jf_blkno = blkno;
+	jfreeblk->jf_frags = frags;
+	LIST_INSERT_HEAD(&freeblks->fb_jblkdephd, &jfreeblk->jf_dep, jb_deps);
+
+	return (jfreeblk);
+}
+
+/*
+ * The journal is only prepared to handle full-size block numbers, so we
+ * have to adjust the record to reflect the change to a full-size block.
+ * For example, suppose we have a block made up of fragments 8-15 and
+ * want to free its last two fragments. We are given a request that says:
+ *     FREEBLK ino=5, blkno=14, lbn=0, frags=2, oldfrags=0
+ * where frags are the number of fragments to free and oldfrags are the
+ * number of fragments to keep. To block align it, we have to change it to
+ * have a valid full-size blkno, so it becomes:
+ *     FREEBLK ino=5, blkno=8, lbn=0, frags=2, oldfrags=6
+ */
+static void
+adjust_newfreework(freeblks, frag_offset)
+	struct freeblks *freeblks;
+	int frag_offset;
+{
+	struct jfreeblk *jfreeblk;
+
+	KASSERT((LIST_FIRST(&freeblks->fb_jblkdephd) != NULL &&
+	    LIST_FIRST(&freeblks->fb_jblkdephd)->jb_list.wk_type == D_JFREEBLK),
+	    ("adjust_newfreework: Missing freeblks dependency"));
+
+	jfreeblk = WK_JFREEBLK(LIST_FIRST(&freeblks->fb_jblkdephd));
+	jfreeblk->jf_blkno -= frag_offset;
+	jfreeblk->jf_frags += frag_offset;
+}
+
+/*
+ * Allocate a new jtrunc to track a partial truncation.
+ */
+static struct jtrunc *
+newjtrunc(freeblks, size, extsize)
+	struct freeblks *freeblks;
+	off_t size;
+	int extsize;
+{
+	struct jtrunc *jtrunc;
+
+	jtrunc = malloc(sizeof(*jtrunc), M_JTRUNC, M_SOFTDEP_FLAGS);
+	workitem_alloc(&jtrunc->jt_dep.jb_list, D_JTRUNC,
+	    freeblks->fb_list.wk_mp);
+	jtrunc->jt_dep.jb_jsegdep = newjsegdep(&jtrunc->jt_dep.jb_list);
+	jtrunc->jt_dep.jb_freeblks = freeblks;
+	jtrunc->jt_ino = freeblks->fb_inum;
+	jtrunc->jt_size = size;
+	jtrunc->jt_extsize = extsize;
+	LIST_INSERT_HEAD(&freeblks->fb_jblkdephd, &jtrunc->jt_dep, jb_deps);
+
+	return (jtrunc);
+}
+
+/*
+ * If we're canceling a new bitmap we have to search for another ref
+ * to move into the bmsafemap dep.  This might be better expressed
+ * with another structure.
+ */
+static void
+move_newblock_dep(jaddref, inodedep)
+	struct jaddref *jaddref;
+	struct inodedep *inodedep;
+{
+	struct inoref *inoref;
+	struct jaddref *jaddrefn;
+
+	jaddrefn = NULL;
+	for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
+	    inoref = TAILQ_NEXT(inoref, if_deps)) {
+		if ((jaddref->ja_state & NEWBLOCK) &&
+		    inoref->if_list.wk_type == D_JADDREF) {
+			jaddrefn = (struct jaddref *)inoref;
+			break;
+		}
+	}
+	if (jaddrefn == NULL)
+		return;
+	jaddrefn->ja_state &= ~(ATTACHED | UNDONE);
+	jaddrefn->ja_state |= jaddref->ja_state &
+	    (ATTACHED | UNDONE | NEWBLOCK);
+	jaddref->ja_state &= ~(ATTACHED | UNDONE | NEWBLOCK);
+	jaddref->ja_state |= ATTACHED;
+	LIST_REMOVE(jaddref, ja_bmdeps);
+	LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_jaddrefhd, jaddrefn,
+	    ja_bmdeps);
+}
+
+/*
+ * Cancel a jaddref either before it has been written or while it is being
+ * written.  This happens when a link is removed before the add reaches
+ * the disk.  The jaddref dependency is kept linked into the bmsafemap
+ * and inode to prevent the link count or bitmap from reaching the disk
+ * until handle_workitem_remove() re-adjusts the counts and bitmaps as
+ * required.
+ *
+ * Returns 1 if the canceled addref requires journaling of the remove and
+ * 0 otherwise.
+ */
+static int
+cancel_jaddref(jaddref, inodedep, wkhd)
+	struct jaddref *jaddref;
+	struct inodedep *inodedep;
+	struct workhead *wkhd;
+{
+	struct inoref *inoref;
+	struct jsegdep *jsegdep;
+	int needsj;
+
+	KASSERT((jaddref->ja_state & COMPLETE) == 0,
+	    ("cancel_jaddref: Canceling complete jaddref"));
+	if (jaddref->ja_state & (INPROGRESS | COMPLETE))
+		needsj = 1;
+	else
+		needsj = 0;
+	if (inodedep == NULL)
+		if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
+		    0, &inodedep) == 0)
+			panic("cancel_jaddref: Lost inodedep");
+	/*
+	 * We must adjust the nlink of any reference operation that follows
+	 * us so that it is consistent with the in-memory reference.  This
+	 * ensures that inode nlink rollbacks always have the correct link.
+	 */
+	if (needsj == 0) {
+		for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
+		    inoref = TAILQ_NEXT(inoref, if_deps)) {
+			if (inoref->if_state & GOINGAWAY)
+				break;
+			inoref->if_nlink--;
+		}
+	}
+	jsegdep = inoref_jseg(&jaddref->ja_ref);
+	if (jaddref->ja_state & NEWBLOCK)
+		move_newblock_dep(jaddref, inodedep);
+	wake_worklist(&jaddref->ja_list);
+	jaddref->ja_mkdir = NULL;
+	if (jaddref->ja_state & INPROGRESS) {
+		jaddref->ja_state &= ~INPROGRESS;
+		WORKLIST_REMOVE(&jaddref->ja_list);
+		jwork_insert(wkhd, jsegdep);
+	} else {
+		free_jsegdep(jsegdep);
+		if (jaddref->ja_state & DEPCOMPLETE)
+			remove_from_journal(&jaddref->ja_list);
+	}
+	jaddref->ja_state |= (GOINGAWAY | DEPCOMPLETE);
+	/*
+	 * Leave NEWBLOCK jaddrefs on the inodedep so handle_workitem_remove
+	 * can arrange for them to be freed with the bitmap.  Otherwise we
+	 * no longer need this addref attached to the inoreflst and it
+	 * will incorrectly adjust nlink if we leave it.
+	 */
+	if ((jaddref->ja_state & NEWBLOCK) == 0) {
+		TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref,
+		    if_deps);
+		jaddref->ja_state |= COMPLETE;
+		free_jaddref(jaddref);
+		return (needsj);
+	}
+	/*
+	 * Leave the head of the list for jsegdeps for fast merging.
+	 */
+	if (LIST_FIRST(wkhd) != NULL) {
+		jaddref->ja_state |= ONWORKLIST;
+		LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jaddref->ja_list, wk_list);
+	} else
+		WORKLIST_INSERT(wkhd, &jaddref->ja_list);
+
+	return (needsj);
+}
+
+/* 
+ * Attempt to free a jaddref structure when some work completes.  This
+ * should only succeed once the entry is written and all dependencies have
+ * been notified.
+ */
+static void
+free_jaddref(jaddref)
+	struct jaddref *jaddref;
+{
+
+	if ((jaddref->ja_state & ALLCOMPLETE) != ALLCOMPLETE)
+		return;
+	if (jaddref->ja_ref.if_jsegdep)
+		panic("free_jaddref: segdep attached to jaddref %p(0x%X)\n",
+		    jaddref, jaddref->ja_state);
+	if (jaddref->ja_state & NEWBLOCK)
+		LIST_REMOVE(jaddref, ja_bmdeps);
+	if (jaddref->ja_state & (INPROGRESS | ONWORKLIST))
+		panic("free_jaddref: Bad state %p(0x%X)",
+		    jaddref, jaddref->ja_state);
+	if (jaddref->ja_mkdir != NULL)
+		panic("free_jaddref: Work pending, 0x%X\n", jaddref->ja_state);
+	WORKITEM_FREE(jaddref, D_JADDREF);
+}
+
+/*
+ * Free a jremref structure once it has been written or discarded.
+ */
+static void
+free_jremref(jremref)
+	struct jremref *jremref;
+{
+
+	if (jremref->jr_ref.if_jsegdep)
+		free_jsegdep(jremref->jr_ref.if_jsegdep);
+	if (jremref->jr_state & INPROGRESS)
+		panic("free_jremref: IO still pending");
+	WORKITEM_FREE(jremref, D_JREMREF);
+}
+
+/*
+ * Free a jnewblk structure.
+ */
+static void
+free_jnewblk(jnewblk)
+	struct jnewblk *jnewblk;
+{
+
+	if ((jnewblk->jn_state & ALLCOMPLETE) != ALLCOMPLETE)
+		return;
+	LIST_REMOVE(jnewblk, jn_deps);
+	if (jnewblk->jn_dep != NULL)
+		panic("free_jnewblk: Dependency still attached.");
+	WORKITEM_FREE(jnewblk, D_JNEWBLK);
+}
+
+/*
+ * Cancel a jnewblk which has been been made redundant by frag extension.
+ */
+static void
+cancel_jnewblk(jnewblk, wkhd)
+	struct jnewblk *jnewblk;
+	struct workhead *wkhd;
+{
+	struct jsegdep *jsegdep;
+
+	CTR1(KTR_SUJ, "cancel_jnewblk: blkno %jd", jnewblk->jn_blkno);
+	jsegdep = jnewblk->jn_jsegdep;
+	if (jnewblk->jn_jsegdep == NULL || jnewblk->jn_dep == NULL)
+		panic("cancel_jnewblk: Invalid state");
+	jnewblk->jn_jsegdep  = NULL;
+	jnewblk->jn_dep = NULL;
+	jnewblk->jn_state |= GOINGAWAY;
+	if (jnewblk->jn_state & INPROGRESS) {
+		jnewblk->jn_state &= ~INPROGRESS;
+		WORKLIST_REMOVE(&jnewblk->jn_list);
+		jwork_insert(wkhd, jsegdep);
+	} else {
+		free_jsegdep(jsegdep);
+		remove_from_journal(&jnewblk->jn_list);
+	}
+	wake_worklist(&jnewblk->jn_list);
+	WORKLIST_INSERT(wkhd, &jnewblk->jn_list);
+}
+
+static void
+free_jblkdep(jblkdep)
+	struct jblkdep *jblkdep;
+{
+
+	if (jblkdep->jb_list.wk_type == D_JFREEBLK)
+		WORKITEM_FREE(jblkdep, D_JFREEBLK);
+	else if (jblkdep->jb_list.wk_type == D_JTRUNC)
+		WORKITEM_FREE(jblkdep, D_JTRUNC);
+	else
+		panic("free_jblkdep: Unexpected type %s",
+		    TYPENAME(jblkdep->jb_list.wk_type));
+}
+
+/*
+ * Free a single jseg once it is no longer referenced in memory or on
+ * disk.  Reclaim journal blocks and dependencies waiting for the segment
+ * to disappear.
+ */
+static void
+free_jseg(jseg, jblocks)
+	struct jseg *jseg;
+	struct jblocks *jblocks;
+{
+	struct freework *freework;
+
+	/*
+	 * Free freework structures that were lingering to indicate freed
+	 * indirect blocks that forced journal write ordering on reallocate.
+	 */
+	while ((freework = LIST_FIRST(&jseg->js_indirs)) != NULL)
+		indirblk_remove(freework);
+	if (jblocks->jb_oldestseg == jseg)
+		jblocks->jb_oldestseg = TAILQ_NEXT(jseg, js_next);
+	TAILQ_REMOVE(&jblocks->jb_segs, jseg, js_next);
+	jblocks_free(jblocks, jseg->js_list.wk_mp, jseg->js_size);
+	KASSERT(LIST_EMPTY(&jseg->js_entries),
+	    ("free_jseg: Freed jseg has valid entries."));
+	WORKITEM_FREE(jseg, D_JSEG);
+}
+
+/*
+ * Free all jsegs that meet the criteria for being reclaimed and update
+ * oldestseg.
+ */
+static void
+free_jsegs(jblocks)
+	struct jblocks *jblocks;
+{
+	struct jseg *jseg;
+
+	/*
+	 * Free only those jsegs which have none allocated before them to
+	 * preserve the journal space ordering.
+	 */
+	while ((jseg = TAILQ_FIRST(&jblocks->jb_segs)) != NULL) {
+		/*
+		 * Only reclaim space when nothing depends on this journal
+		 * set and another set has written that it is no longer
+		 * valid.
+		 */
+		if (jseg->js_refs != 0) {
+			jblocks->jb_oldestseg = jseg;
+			return;
+		}
+		if ((jseg->js_state & ALLCOMPLETE) != ALLCOMPLETE)
+			break;
+		if (jseg->js_seq > jblocks->jb_oldestwrseq)
+			break;
+		/*
+		 * We can free jsegs that didn't write entries when
+		 * oldestwrseq == js_seq.
+		 */
+		if (jseg->js_seq == jblocks->jb_oldestwrseq &&
+		    jseg->js_cnt != 0)
+			break;
+		free_jseg(jseg, jblocks);
+	}
+	/*
+	 * If we exited the loop above we still must discover the
+	 * oldest valid segment.
+	 */
+	if (jseg)
+		for (jseg = jblocks->jb_oldestseg; jseg != NULL;
+		     jseg = TAILQ_NEXT(jseg, js_next))
+			if (jseg->js_refs != 0)
+				break;
+	jblocks->jb_oldestseg = jseg;
+	/*
+	 * The journal has no valid records but some jsegs may still be
+	 * waiting on oldestwrseq to advance.  We force a small record
+	 * out to permit these lingering records to be reclaimed.
+	 */
+	if (jblocks->jb_oldestseg == NULL && !TAILQ_EMPTY(&jblocks->jb_segs))
+		jblocks->jb_needseg = 1;
+}
+
+/*
+ * Release one reference to a jseg and free it if the count reaches 0.  This
+ * should eventually reclaim journal space as well.
+ */
+static void
+rele_jseg(jseg)
+	struct jseg *jseg;
+{
+
+	KASSERT(jseg->js_refs > 0,
+	    ("free_jseg: Invalid refcnt %d", jseg->js_refs));
+	if (--jseg->js_refs != 0)
+		return;
+	free_jsegs(jseg->js_jblocks);
+}
+
+/*
+ * Release a jsegdep and decrement the jseg count.
+ */
+static void
+free_jsegdep(jsegdep)
+	struct jsegdep *jsegdep;
+{
+
+	if (jsegdep->jd_seg)
+		rele_jseg(jsegdep->jd_seg);
+	WORKITEM_FREE(jsegdep, D_JSEGDEP);
+}
+
+/*
+ * Wait for a journal item to make it to disk.  Initiate journal processing
+ * if required.
+ */
+static int
+jwait(wk, waitfor)
+	struct worklist *wk;
+	int waitfor;
+{
+
+	LOCK_OWNED(VFSTOUFS(wk->wk_mp));
+	/*
+	 * Blocking journal waits cause slow synchronous behavior.  Record
+	 * stats on the frequency of these blocking operations.
+	 */
+	if (waitfor == MNT_WAIT) {
+		stat_journal_wait++;
+		switch (wk->wk_type) {
+		case D_JREMREF:
+		case D_JMVREF:
+			stat_jwait_filepage++;
+			break;
+		case D_JTRUNC:
+		case D_JFREEBLK:
+			stat_jwait_freeblks++;
+			break;
+		case D_JNEWBLK:
+			stat_jwait_newblk++;
+			break;
+		case D_JADDREF:
+			stat_jwait_inode++;
+			break;
+		default:
+			break;
+		}
+	}
+	/*
+	 * If IO has not started we process the journal.  We can't mark the
+	 * worklist item as IOWAITING because we drop the lock while
+	 * processing the journal and the worklist entry may be freed after
+	 * this point.  The caller may call back in and re-issue the request.
+	 */
+	if ((wk->wk_state & INPROGRESS) == 0) {
+		softdep_process_journal(wk->wk_mp, wk, waitfor);
+		if (waitfor != MNT_WAIT)
+			return (EBUSY);
+		return (0);
+	}
+	if (waitfor != MNT_WAIT)
+		return (EBUSY);
+	wait_worklist(wk, "jwait");
+	return (0);
+}
+
+/*
+ * Lookup an inodedep based on an inode pointer and set the nlinkdelta as
+ * appropriate.  This is a convenience function to reduce duplicate code
+ * for the setup and revert functions below.
+ */
+static struct inodedep *
+inodedep_lookup_ip(ip)
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+
+	KASSERT(ip->i_nlink >= ip->i_effnlink,
+	    ("inodedep_lookup_ip: bad delta"));
+	(void) inodedep_lookup(ITOVFS(ip), ip->i_number, DEPALLOC,
+	    &inodedep);
+	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
+	KASSERT((inodedep->id_state & UNLINKED) == 0, ("inode unlinked"));
+
+	return (inodedep);
+}
+
+/*
+ * Called prior to creating a new inode and linking it to a directory.  The
+ * jaddref structure must already be allocated by softdep_setup_inomapdep
+ * and it is discovered here so we can initialize the mode and update
+ * nlinkdelta.
+ */
+void
+softdep_setup_create(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
+	    ("softdep_setup_create called on non-softdep filesystem"));
+	KASSERT(ip->i_nlink == 1,
+	    ("softdep_setup_create: Invalid link count."));
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(ITOUMP(dp));
+	inodedep = inodedep_lookup_ip(ip);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
+		    ("softdep_setup_create: No addref structure present."));
+	}
+	softdep_prelink(dvp, NULL);
+	FREE_LOCK(ITOUMP(dp));
+}
+
+/*
+ * Create a jaddref structure to track the addition of a DOTDOT link when
+ * we are reparenting an inode as part of a rename.  This jaddref will be
+ * found by softdep_setup_directory_change.  Adjusts nlinkdelta for
+ * non-journaling softdep.
+ */
+void
+softdep_setup_dotdot_link(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
+	    ("softdep_setup_dotdot_link called on non-softdep filesystem"));
+	dvp = ITOV(dp);
+	jaddref = NULL;
+	/*
+	 * We don't set MKDIR_PARENT as this is not tied to a mkdir and
+	 * is used as a normal link would be.
+	 */
+	if (DOINGSUJ(dvp))
+		jaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
+		    dp->i_effnlink - 1, dp->i_mode);
+	ACQUIRE_LOCK(ITOUMP(dp));
+	inodedep = inodedep_lookup_ip(dp);
+	if (jaddref)
+		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
+		    if_deps);
+	softdep_prelink(dvp, ITOV(ip));
+	FREE_LOCK(ITOUMP(dp));
+}
+
+/*
+ * Create a jaddref structure to track a new link to an inode.  The directory
+ * offset is not known until softdep_setup_directory_add or
+ * softdep_setup_directory_change.  Adjusts nlinkdelta for non-journaling
+ * softdep.
+ */
+void
+softdep_setup_link(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
+	    ("softdep_setup_link called on non-softdep filesystem"));
+	dvp = ITOV(dp);
+	jaddref = NULL;
+	if (DOINGSUJ(dvp))
+		jaddref = newjaddref(dp, ip->i_number, 0, ip->i_effnlink - 1,
+		    ip->i_mode);
+	ACQUIRE_LOCK(ITOUMP(dp));
+	inodedep = inodedep_lookup_ip(ip);
+	if (jaddref)
+		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
+		    if_deps);
+	softdep_prelink(dvp, ITOV(ip));
+	FREE_LOCK(ITOUMP(dp));
+}
+
+/*
+ * Called to create the jaddref structures to track . and .. references as
+ * well as lookup and further initialize the incomplete jaddref created
+ * by softdep_setup_inomapdep when the inode was allocated.  Adjusts
+ * nlinkdelta for non-journaling softdep.
+ */
+void
+softdep_setup_mkdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *dotdotaddref;
+	struct jaddref *dotaddref;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
+	    ("softdep_setup_mkdir called on non-softdep filesystem"));
+	dvp = ITOV(dp);
+	dotaddref = dotdotaddref = NULL;
+	if (DOINGSUJ(dvp)) {
+		dotaddref = newjaddref(ip, ip->i_number, DOT_OFFSET, 1,
+		    ip->i_mode);
+		dotaddref->ja_state |= MKDIR_BODY;
+		dotdotaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
+		    dp->i_effnlink - 1, dp->i_mode);
+		dotdotaddref->ja_state |= MKDIR_PARENT;
+	}
+	ACQUIRE_LOCK(ITOUMP(dp));
+	inodedep = inodedep_lookup_ip(ip);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref != NULL,
+		    ("softdep_setup_mkdir: No addref structure present."));
+		KASSERT(jaddref->ja_parent == dp->i_number, 
+		    ("softdep_setup_mkdir: bad parent %ju",
+		    (uintmax_t)jaddref->ja_parent));
+		TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref,
+		    if_deps);
+	}
+	inodedep = inodedep_lookup_ip(dp);
+	if (DOINGSUJ(dvp))
+		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst,
+		    &dotdotaddref->ja_ref, if_deps);
+	softdep_prelink(ITOV(dp), NULL);
+	FREE_LOCK(ITOUMP(dp));
+}
+
+/*
+ * Called to track nlinkdelta of the inode and parent directories prior to
+ * unlinking a directory.
+ */
+void
+softdep_setup_rmdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct vnode *dvp;
+
+	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
+	    ("softdep_setup_rmdir called on non-softdep filesystem"));
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(ITOUMP(dp));
+	(void) inodedep_lookup_ip(ip);
+	(void) inodedep_lookup_ip(dp);
+	softdep_prelink(dvp, ITOV(ip));
+	FREE_LOCK(ITOUMP(dp));
+}
+
+/*
+ * Called to track nlinkdelta of the inode and parent directories prior to
+ * unlink.
+ */
+void
+softdep_setup_unlink(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct vnode *dvp;
+
+	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
+	    ("softdep_setup_unlink called on non-softdep filesystem"));
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(ITOUMP(dp));
+	(void) inodedep_lookup_ip(ip);
+	(void) inodedep_lookup_ip(dp);
+	softdep_prelink(dvp, ITOV(ip));
+	FREE_LOCK(ITOUMP(dp));
+}
+
+/*
+ * Called to release the journal structures created by a failed non-directory
+ * creation.  Adjusts nlinkdelta for non-journaling softdep.
+ */
+void
+softdep_revert_create(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	KASSERT(MOUNTEDSOFTDEP(ITOVFS((dp))) != 0,
+	    ("softdep_revert_create called on non-softdep filesystem"));
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(ITOUMP(dp));
+	inodedep = inodedep_lookup_ip(ip);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref->ja_parent == dp->i_number,
+		    ("softdep_revert_create: addref parent mismatch"));
+		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+	}
+	FREE_LOCK(ITOUMP(dp));
+}
+
+/*
+ * Called to release the journal structures created by a failed link
+ * addition.  Adjusts nlinkdelta for non-journaling softdep.
+ */
+void
+softdep_revert_link(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct vnode *dvp;
+
+	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
+	    ("softdep_revert_link called on non-softdep filesystem"));
+	dvp = ITOV(dp);
+	ACQUIRE_LOCK(ITOUMP(dp));
+	inodedep = inodedep_lookup_ip(ip);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref->ja_parent == dp->i_number,
+		    ("softdep_revert_link: addref parent mismatch"));
+		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+	}
+	FREE_LOCK(ITOUMP(dp));
+}
+
+/*
+ * Called to release the journal structures created by a failed mkdir
+ * attempt.  Adjusts nlinkdelta for non-journaling softdep.
+ */
+void
+softdep_revert_mkdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct jaddref *dotaddref;
+	struct vnode *dvp;
+
+	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
+	    ("softdep_revert_mkdir called on non-softdep filesystem"));
+	dvp = ITOV(dp);
+
+	ACQUIRE_LOCK(ITOUMP(dp));
+	inodedep = inodedep_lookup_ip(dp);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref->ja_parent == ip->i_number,
+		    ("softdep_revert_mkdir: dotdot addref parent mismatch"));
+		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+	}
+	inodedep = inodedep_lookup_ip(ip);
+	if (DOINGSUJ(dvp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref->ja_parent == dp->i_number,
+		    ("softdep_revert_mkdir: addref parent mismatch"));
+		dotaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref,
+		    inoreflst, if_deps);
+		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+		KASSERT(dotaddref->ja_parent == ip->i_number,
+		    ("softdep_revert_mkdir: dot addref parent mismatch"));
+		cancel_jaddref(dotaddref, inodedep, &inodedep->id_inowait);
+	}
+	FREE_LOCK(ITOUMP(dp));
+}
+
+/* 
+ * Called to correct nlinkdelta after a failed rmdir.
+ */
+void
+softdep_revert_rmdir(dp, ip)
+	struct inode *dp;
+	struct inode *ip;
+{
+
+	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
+	    ("softdep_revert_rmdir called on non-softdep filesystem"));
+	ACQUIRE_LOCK(ITOUMP(dp));
+	(void) inodedep_lookup_ip(ip);
+	(void) inodedep_lookup_ip(dp);
+	FREE_LOCK(ITOUMP(dp));
+}
+
+/*
+ * Protecting the freemaps (or bitmaps).
+ * 
+ * To eliminate the need to execute fsck before mounting a filesystem
+ * after a power failure, one must (conservatively) guarantee that the
+ * on-disk copy of the bitmaps never indicate that a live inode or block is
+ * free.  So, when a block or inode is allocated, the bitmap should be
+ * updated (on disk) before any new pointers.  When a block or inode is
+ * freed, the bitmap should not be updated until all pointers have been
+ * reset.  The latter dependency is handled by the delayed de-allocation
+ * approach described below for block and inode de-allocation.  The former
+ * dependency is handled by calling the following procedure when a block or
+ * inode is allocated. When an inode is allocated an "inodedep" is created
+ * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
+ * Each "inodedep" is also inserted into the hash indexing structure so
+ * that any additional link additions can be made dependent on the inode
+ * allocation.
+ * 
+ * The ufs filesystem maintains a number of free block counts (e.g., per
+ * cylinder group, per cylinder and per <cylinder, rotational position> pair)
+ * in addition to the bitmaps.  These counts are used to improve efficiency
+ * during allocation and therefore must be consistent with the bitmaps.
+ * There is no convenient way to guarantee post-crash consistency of these
+ * counts with simple update ordering, for two main reasons: (1) The counts
+ * and bitmaps for a single cylinder group block are not in the same disk
+ * sector.  If a disk write is interrupted (e.g., by power failure), one may
+ * be written and the other not.  (2) Some of the counts are located in the
+ * superblock rather than the cylinder group block. So, we focus our soft
+ * updates implementation on protecting the bitmaps. When mounting a
+ * filesystem, we recompute the auxiliary counts from the bitmaps.
+ */
+
+/*
+ * Called just after updating the cylinder group block to allocate an inode.
+ */
+void
+softdep_setup_inomapdep(bp, ip, newinum, mode)
+	struct buf *bp;		/* buffer for cylgroup block with inode map */
+	struct inode *ip;	/* inode related to allocation */
+	ino_t newinum;		/* new inode number being allocated */
+	int mode;
+{
+	struct inodedep *inodedep;
+	struct bmsafemap *bmsafemap;
+	struct jaddref *jaddref;
+	struct mount *mp;
+	struct fs *fs;
+
+	mp = ITOVFS(ip);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_inomapdep called on non-softdep filesystem"));
+	fs = VFSTOUFS(mp)->um_fs;
+	jaddref = NULL;
+
+	/*
+	 * Allocate the journal reference add structure so that the bitmap
+	 * can be dependent on it.
+	 */
+	if (MOUNTEDSUJ(mp)) {
+		jaddref = newjaddref(ip, newinum, 0, 0, mode);
+		jaddref->ja_state |= NEWBLOCK;
+	}
+
+	/*
+	 * Create a dependency for the newly allocated inode.
+	 * Panic if it already exists as something is seriously wrong.
+	 * Otherwise add it to the dependency list for the buffer holding
+	 * the cylinder group map from which it was allocated.
+	 *
+	 * We have to preallocate a bmsafemap entry in case it is needed
+	 * in bmsafemap_lookup since once we allocate the inodedep, we
+	 * have to finish initializing it before we can FREE_LOCK().
+	 * By preallocating, we avoid FREE_LOCK() while doing a malloc
+	 * in bmsafemap_lookup. We cannot call bmsafemap_lookup before
+	 * creating the inodedep as it can be freed during the time
+	 * that we FREE_LOCK() while allocating the inodedep. We must
+	 * call workitem_alloc() before entering the locked section as
+	 * it also acquires the lock and we must avoid trying doing so
+	 * recursively.
+	 */
+	bmsafemap = malloc(sizeof(struct bmsafemap),
+	    M_BMSAFEMAP, M_SOFTDEP_FLAGS);
+	workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
+	ACQUIRE_LOCK(ITOUMP(ip));
+	if ((inodedep_lookup(mp, newinum, DEPALLOC, &inodedep)))
+		panic("softdep_setup_inomapdep: dependency %p for new"
+		    "inode already exists", inodedep);
+	bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, newinum), bmsafemap);
+	if (jaddref) {
+		LIST_INSERT_HEAD(&bmsafemap->sm_jaddrefhd, jaddref, ja_bmdeps);
+		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
+		    if_deps);
+	} else {
+		inodedep->id_state |= ONDEPLIST;
+		LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
+	}
+	inodedep->id_bmsafemap = bmsafemap;
+	inodedep->id_state &= ~DEPCOMPLETE;
+	FREE_LOCK(ITOUMP(ip));
+}
+
+/*
+ * Called just after updating the cylinder group block to
+ * allocate block or fragment.
+ */
+void
+softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags)
+	struct buf *bp;		/* buffer for cylgroup block with block map */
+	struct mount *mp;	/* filesystem doing allocation */
+	ufs2_daddr_t newblkno;	/* number of newly allocated block */
+	int frags;		/* Number of fragments. */
+	int oldfrags;		/* Previous number of fragments for extend. */
+{
+	struct newblk *newblk;
+	struct bmsafemap *bmsafemap;
+	struct jnewblk *jnewblk;
+	struct ufsmount *ump;
+	struct fs *fs;
+
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_blkmapdep called on non-softdep filesystem"));
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	jnewblk = NULL;
+	/*
+	 * Create a dependency for the newly allocated block.
+	 * Add it to the dependency list for the buffer holding
+	 * the cylinder group map from which it was allocated.
+	 */
+	if (MOUNTEDSUJ(mp)) {
+		jnewblk = malloc(sizeof(*jnewblk), M_JNEWBLK, M_SOFTDEP_FLAGS);
+		workitem_alloc(&jnewblk->jn_list, D_JNEWBLK, mp);
+		jnewblk->jn_jsegdep = newjsegdep(&jnewblk->jn_list);
+		jnewblk->jn_state = ATTACHED;
+		jnewblk->jn_blkno = newblkno;
+		jnewblk->jn_frags = frags;
+		jnewblk->jn_oldfrags = oldfrags;
+#ifdef SUJ_DEBUG
+		{
+			struct cg *cgp;
+			uint8_t *blksfree;
+			long bno;
+			int i;
+	
+			cgp = (struct cg *)bp->b_data;
+			blksfree = cg_blksfree(cgp);
+			bno = dtogd(fs, jnewblk->jn_blkno);
+			for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags;
+			    i++) {
+				if (isset(blksfree, bno + i))
+					panic("softdep_setup_blkmapdep: "
+					    "free fragment %d from %d-%d "
+					    "state 0x%X dep %p", i,
+					    jnewblk->jn_oldfrags,
+					    jnewblk->jn_frags,
+					    jnewblk->jn_state,
+					    jnewblk->jn_dep);
+			}
+		}
+#endif
+	}
+
+	CTR3(KTR_SUJ,
+	    "softdep_setup_blkmapdep: blkno %jd frags %d oldfrags %d",
+	    newblkno, frags, oldfrags);
+	ACQUIRE_LOCK(ump);
+	if (newblk_lookup(mp, newblkno, DEPALLOC, &newblk) != 0)
+		panic("softdep_setup_blkmapdep: found block");
+	newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp,
+	    dtog(fs, newblkno), NULL);
+	if (jnewblk) {
+		jnewblk->jn_dep = (struct worklist *)newblk;
+		LIST_INSERT_HEAD(&bmsafemap->sm_jnewblkhd, jnewblk, jn_deps);
+	} else {
+		newblk->nb_state |= ONDEPLIST;
+		LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
+	}
+	newblk->nb_bmsafemap = bmsafemap;
+	newblk->nb_jnewblk = jnewblk;
+	FREE_LOCK(ump);
+}
+
+#define	BMSAFEMAP_HASH(ump, cg) \
+      (&(ump)->bmsafemap_hashtbl[(cg) & (ump)->bmsafemap_hash_size])
+
+static int
+bmsafemap_find(bmsafemaphd, cg, bmsafemapp)
+	struct bmsafemap_hashhead *bmsafemaphd;
+	int cg;
+	struct bmsafemap **bmsafemapp;
+{
+	struct bmsafemap *bmsafemap;
+
+	LIST_FOREACH(bmsafemap, bmsafemaphd, sm_hash)
+		if (bmsafemap->sm_cg == cg)
+			break;
+	if (bmsafemap) {
+		*bmsafemapp = bmsafemap;
+		return (1);
+	}
+	*bmsafemapp = NULL;
+
+	return (0);
+}
+
+/*
+ * Find the bmsafemap associated with a cylinder group buffer.
+ * If none exists, create one. The buffer must be locked when
+ * this routine is called and this routine must be called with
+ * the softdep lock held. To avoid giving up the lock while
+ * allocating a new bmsafemap, a preallocated bmsafemap may be
+ * provided. If it is provided but not needed, it is freed.
+ */
+static struct bmsafemap *
+bmsafemap_lookup(mp, bp, cg, newbmsafemap)
+	struct mount *mp;
+	struct buf *bp;
+	int cg;
+	struct bmsafemap *newbmsafemap;
+{
+	struct bmsafemap_hashhead *bmsafemaphd;
+	struct bmsafemap *bmsafemap, *collision;
+	struct worklist *wk;
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	KASSERT(bp != NULL, ("bmsafemap_lookup: missing buffer"));
+	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+		if (wk->wk_type == D_BMSAFEMAP) {
+			if (newbmsafemap)
+				WORKITEM_FREE(newbmsafemap, D_BMSAFEMAP);
+			return (WK_BMSAFEMAP(wk));
+		}
+	}
+	bmsafemaphd = BMSAFEMAP_HASH(ump, cg);
+	if (bmsafemap_find(bmsafemaphd, cg, &bmsafemap) == 1) {
+		if (newbmsafemap)
+			WORKITEM_FREE(newbmsafemap, D_BMSAFEMAP);
+		return (bmsafemap);
+	}
+	if (newbmsafemap) {
+		bmsafemap = newbmsafemap;
+	} else {
+		FREE_LOCK(ump);
+		bmsafemap = malloc(sizeof(struct bmsafemap),
+			M_BMSAFEMAP, M_SOFTDEP_FLAGS);
+		workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
+		ACQUIRE_LOCK(ump);
+	}
+	bmsafemap->sm_buf = bp;
+	LIST_INIT(&bmsafemap->sm_inodedephd);
+	LIST_INIT(&bmsafemap->sm_inodedepwr);
+	LIST_INIT(&bmsafemap->sm_newblkhd);
+	LIST_INIT(&bmsafemap->sm_newblkwr);
+	LIST_INIT(&bmsafemap->sm_jaddrefhd);
+	LIST_INIT(&bmsafemap->sm_jnewblkhd);
+	LIST_INIT(&bmsafemap->sm_freehd);
+	LIST_INIT(&bmsafemap->sm_freewr);
+	if (bmsafemap_find(bmsafemaphd, cg, &collision) == 1) {
+		WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
+		return (collision);
+	}
+	bmsafemap->sm_cg = cg;
+	LIST_INSERT_HEAD(bmsafemaphd, bmsafemap, sm_hash);
+	LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
+	WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
+	return (bmsafemap);
+}
+
+/*
+ * Direct block allocation dependencies.
+ * 
+ * When a new block is allocated, the corresponding disk locations must be
+ * initialized (with zeros or new data) before the on-disk inode points to
+ * them.  Also, the freemap from which the block was allocated must be
+ * updated (on disk) before the inode's pointer. These two dependencies are
+ * independent of each other and are needed for all file blocks and indirect
+ * blocks that are pointed to directly by the inode.  Just before the
+ * "in-core" version of the inode is updated with a newly allocated block
+ * number, a procedure (below) is called to setup allocation dependency
+ * structures.  These structures are removed when the corresponding
+ * dependencies are satisfied or when the block allocation becomes obsolete
+ * (i.e., the file is deleted, the block is de-allocated, or the block is a
+ * fragment that gets upgraded).  All of these cases are handled in
+ * procedures described later.
+ * 
+ * When a file extension causes a fragment to be upgraded, either to a larger
+ * fragment or to a full block, the on-disk location may change (if the
+ * previous fragment could not simply be extended). In this case, the old
+ * fragment must be de-allocated, but not until after the inode's pointer has
+ * been updated. In most cases, this is handled by later procedures, which
+ * will construct a "freefrag" structure to be added to the workitem queue
+ * when the inode update is complete (or obsolete).  The main exception to
+ * this is when an allocation occurs while a pending allocation dependency
+ * (for the same block pointer) remains.  This case is handled in the main
+ * allocation dependency setup procedure by immediately freeing the
+ * unreferenced fragments.
+ */ 
+void 
+softdep_setup_allocdirect(ip, off, newblkno, oldblkno, newsize, oldsize, bp)
+	struct inode *ip;	/* inode to which block is being added */
+	ufs_lbn_t off;		/* block pointer within inode */
+	ufs2_daddr_t newblkno;	/* disk block number being added */
+	ufs2_daddr_t oldblkno;	/* previous block number, 0 unless frag */
+	long newsize;		/* size of new block */
+	long oldsize;		/* size of new block */
+	struct buf *bp;		/* bp for allocated block */
+{
+	struct allocdirect *adp, *oldadp;
+	struct allocdirectlst *adphead;
+	struct freefrag *freefrag;
+	struct inodedep *inodedep;
+	struct pagedep *pagedep;
+	struct jnewblk *jnewblk;
+	struct newblk *newblk;
+	struct mount *mp;
+	ufs_lbn_t lbn;
+
+	lbn = bp->b_lblkno;
+	mp = ITOVFS(ip);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_allocdirect called on non-softdep filesystem"));
+	if (oldblkno && oldblkno != newblkno)
+		freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
+	else
+		freefrag = NULL;
+
+	CTR6(KTR_SUJ,
+	    "softdep_setup_allocdirect: ino %d blkno %jd oldblkno %jd "
+	    "off %jd newsize %ld oldsize %d",
+	    ip->i_number, newblkno, oldblkno, off, newsize, oldsize);
+	ACQUIRE_LOCK(ITOUMP(ip));
+	if (off >= NDADDR) {
+		if (lbn > 0)
+			panic("softdep_setup_allocdirect: bad lbn %jd, off %jd",
+			    lbn, off);
+		/* allocating an indirect block */
+		if (oldblkno != 0)
+			panic("softdep_setup_allocdirect: non-zero indir");
+	} else {
+		if (off != lbn)
+			panic("softdep_setup_allocdirect: lbn %jd != off %jd",
+			    lbn, off);
+		/*
+		 * Allocating a direct block.
+		 *
+		 * If we are allocating a directory block, then we must
+		 * allocate an associated pagedep to track additions and
+		 * deletions.
+		 */
+		if ((ip->i_mode & IFMT) == IFDIR)
+			pagedep_lookup(mp, bp, ip->i_number, off, DEPALLOC,
+			    &pagedep);
+	}
+	if (newblk_lookup(mp, newblkno, 0, &newblk) == 0)
+		panic("softdep_setup_allocdirect: lost block");
+	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
+	    ("softdep_setup_allocdirect: newblk already initialized"));
+	/*
+	 * Convert the newblk to an allocdirect.
+	 */
+	WORKITEM_REASSIGN(newblk, D_ALLOCDIRECT);
+	adp = (struct allocdirect *)newblk;
+	newblk->nb_freefrag = freefrag;
+	adp->ad_offset = off;
+	adp->ad_oldblkno = oldblkno;
+	adp->ad_newsize = newsize;
+	adp->ad_oldsize = oldsize;
+
+	/*
+	 * Finish initializing the journal.
+	 */
+	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
+		jnewblk->jn_ino = ip->i_number;
+		jnewblk->jn_lbn = lbn;
+		add_to_journal(&jnewblk->jn_list);
+	}
+	if (freefrag && freefrag->ff_jdep != NULL &&
+	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
+		add_to_journal(freefrag->ff_jdep);
+	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	adp->ad_inodedep = inodedep;
+
+	WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
+	/*
+	 * The list of allocdirects must be kept in sorted and ascending
+	 * order so that the rollback routines can quickly determine the
+	 * first uncommitted block (the size of the file stored on disk
+	 * ends at the end of the lowest committed fragment, or if there
+	 * are no fragments, at the end of the highest committed block).
+	 * Since files generally grow, the typical case is that the new
+	 * block is to be added at the end of the list. We speed this
+	 * special case by checking against the last allocdirect in the
+	 * list before laboriously traversing the list looking for the
+	 * insertion point.
+	 */
+	adphead = &inodedep->id_newinoupdt;
+	oldadp = TAILQ_LAST(adphead, allocdirectlst);
+	if (oldadp == NULL || oldadp->ad_offset <= off) {
+		/* insert at end of list */
+		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
+		if (oldadp != NULL && oldadp->ad_offset == off)
+			allocdirect_merge(adphead, adp, oldadp);
+		FREE_LOCK(ITOUMP(ip));
+		return;
+	}
+	TAILQ_FOREACH(oldadp, adphead, ad_next) {
+		if (oldadp->ad_offset >= off)
+			break;
+	}
+	if (oldadp == NULL)
+		panic("softdep_setup_allocdirect: lost entry");
+	/* insert in middle of list */
+	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
+	if (oldadp->ad_offset == off)
+		allocdirect_merge(adphead, adp, oldadp);
+
+	FREE_LOCK(ITOUMP(ip));
+}
+
+/*
+ * Merge a newer and older journal record to be stored either in a
+ * newblock or freefrag.  This handles aggregating journal records for
+ * fragment allocation into a second record as well as replacing a
+ * journal free with an aborted journal allocation.  A segment for the
+ * oldest record will be placed on wkhd if it has been written.  If not
+ * the segment for the newer record will suffice.
+ */
+static struct worklist *
+jnewblk_merge(new, old, wkhd)
+	struct worklist *new;
+	struct worklist *old;
+	struct workhead *wkhd;
+{
+	struct jnewblk *njnewblk;
+	struct jnewblk *jnewblk;
+
+	/* Handle NULLs to simplify callers. */
+	if (new == NULL)
+		return (old);
+	if (old == NULL)
+		return (new);
+	/* Replace a jfreefrag with a jnewblk. */
+	if (new->wk_type == D_JFREEFRAG) {
+		if (WK_JNEWBLK(old)->jn_blkno != WK_JFREEFRAG(new)->fr_blkno)
+			panic("jnewblk_merge: blkno mismatch: %p, %p",
+			    old, new);
+		cancel_jfreefrag(WK_JFREEFRAG(new));
+		return (old);
+	}
+	if (old->wk_type != D_JNEWBLK || new->wk_type != D_JNEWBLK)
+		panic("jnewblk_merge: Bad type: old %d new %d\n",
+		    old->wk_type, new->wk_type);
+	/*
+	 * Handle merging of two jnewblk records that describe
+	 * different sets of fragments in the same block.
+	 */
+	jnewblk = WK_JNEWBLK(old);
+	njnewblk = WK_JNEWBLK(new);
+	if (jnewblk->jn_blkno != njnewblk->jn_blkno)
+		panic("jnewblk_merge: Merging disparate blocks.");
+	/*
+	 * The record may be rolled back in the cg.
+	 */
+	if (jnewblk->jn_state & UNDONE) {
+		jnewblk->jn_state &= ~UNDONE;
+		njnewblk->jn_state |= UNDONE;
+		njnewblk->jn_state &= ~ATTACHED;
+	}
+	/*
+	 * We modify the newer addref and free the older so that if neither
+	 * has been written the most up-to-date copy will be on disk.  If
+	 * both have been written but rolled back we only temporarily need
+	 * one of them to fix the bits when the cg write completes.
+	 */
+	jnewblk->jn_state |= ATTACHED | COMPLETE;
+	njnewblk->jn_oldfrags = jnewblk->jn_oldfrags;
+	cancel_jnewblk(jnewblk, wkhd);
+	WORKLIST_REMOVE(&jnewblk->jn_list);
+	free_jnewblk(jnewblk);
+	return (new);
+}
+
+/*
+ * Replace an old allocdirect dependency with a newer one.
+ * This routine must be called with splbio interrupts blocked.
+ */
+static void
+allocdirect_merge(adphead, newadp, oldadp)
+	struct allocdirectlst *adphead;	/* head of list holding allocdirects */
+	struct allocdirect *newadp;	/* allocdirect being added */
+	struct allocdirect *oldadp;	/* existing allocdirect being checked */
+{
+	struct worklist *wk;
+	struct freefrag *freefrag;
+
+	freefrag = NULL;
+	LOCK_OWNED(VFSTOUFS(newadp->ad_list.wk_mp));
+	if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
+	    newadp->ad_oldsize != oldadp->ad_newsize ||
+	    newadp->ad_offset >= NDADDR)
+		panic("%s %jd != new %jd || old size %ld != new %ld",
+		    "allocdirect_merge: old blkno",
+		    (intmax_t)newadp->ad_oldblkno,
+		    (intmax_t)oldadp->ad_newblkno,
+		    newadp->ad_oldsize, oldadp->ad_newsize);
+	newadp->ad_oldblkno = oldadp->ad_oldblkno;
+	newadp->ad_oldsize = oldadp->ad_oldsize;
+	/*
+	 * If the old dependency had a fragment to free or had never
+	 * previously had a block allocated, then the new dependency
+	 * can immediately post its freefrag and adopt the old freefrag.
+	 * This action is done by swapping the freefrag dependencies.
+	 * The new dependency gains the old one's freefrag, and the
+	 * old one gets the new one and then immediately puts it on
+	 * the worklist when it is freed by free_newblk. It is
+	 * not possible to do this swap when the old dependency had a
+	 * non-zero size but no previous fragment to free. This condition
+	 * arises when the new block is an extension of the old block.
+	 * Here, the first part of the fragment allocated to the new
+	 * dependency is part of the block currently claimed on disk by
+	 * the old dependency, so cannot legitimately be freed until the
+	 * conditions for the new dependency are fulfilled.
+	 */
+	freefrag = newadp->ad_freefrag;
+	if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) {
+		newadp->ad_freefrag = oldadp->ad_freefrag;
+		oldadp->ad_freefrag = freefrag;
+	}
+	/*
+	 * If we are tracking a new directory-block allocation,
+	 * move it from the old allocdirect to the new allocdirect.
+	 */
+	if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		if (!LIST_EMPTY(&oldadp->ad_newdirblk))
+			panic("allocdirect_merge: extra newdirblk");
+		WORKLIST_INSERT(&newadp->ad_newdirblk, wk);
+	}
+	TAILQ_REMOVE(adphead, oldadp, ad_next);
+	/*
+	 * We need to move any journal dependencies over to the freefrag
+	 * that releases this block if it exists.  Otherwise we are
+	 * extending an existing block and we'll wait until that is
+	 * complete to release the journal space and extend the
+	 * new journal to cover this old space as well.
+	 */
+	if (freefrag == NULL) {
+		if (oldadp->ad_newblkno != newadp->ad_newblkno)
+			panic("allocdirect_merge: %jd != %jd",
+			    oldadp->ad_newblkno, newadp->ad_newblkno);
+		newadp->ad_block.nb_jnewblk = (struct jnewblk *)
+		    jnewblk_merge(&newadp->ad_block.nb_jnewblk->jn_list, 
+		    &oldadp->ad_block.nb_jnewblk->jn_list,
+		    &newadp->ad_block.nb_jwork);
+		oldadp->ad_block.nb_jnewblk = NULL;
+		cancel_newblk(&oldadp->ad_block, NULL,
+		    &newadp->ad_block.nb_jwork);
+	} else {
+		wk = (struct worklist *) cancel_newblk(&oldadp->ad_block,
+		    &freefrag->ff_list, &freefrag->ff_jwork);
+		freefrag->ff_jdep = jnewblk_merge(freefrag->ff_jdep, wk,
+		    &freefrag->ff_jwork);
+	}
+	free_newblk(&oldadp->ad_block);
+}
+
+/*
+ * Allocate a jfreefrag structure to journal a single block free.
+ */
+static struct jfreefrag *
+newjfreefrag(freefrag, ip, blkno, size, lbn)
+	struct freefrag *freefrag;
+	struct inode *ip;
+	ufs2_daddr_t blkno;
+	long size;
+	ufs_lbn_t lbn;
+{
+	struct jfreefrag *jfreefrag;
+	struct fs *fs;
+
+	fs = ITOFS(ip);
+	jfreefrag = malloc(sizeof(struct jfreefrag), M_JFREEFRAG,
+	    M_SOFTDEP_FLAGS);
+	workitem_alloc(&jfreefrag->fr_list, D_JFREEFRAG, ITOVFS(ip));
+	jfreefrag->fr_jsegdep = newjsegdep(&jfreefrag->fr_list);
+	jfreefrag->fr_state = ATTACHED | DEPCOMPLETE;
+	jfreefrag->fr_ino = ip->i_number;
+	jfreefrag->fr_lbn = lbn;
+	jfreefrag->fr_blkno = blkno;
+	jfreefrag->fr_frags = numfrags(fs, size);
+	jfreefrag->fr_freefrag = freefrag;
+
+	return (jfreefrag);
+}
+
+/*
+ * Allocate a new freefrag structure.
+ */
+static struct freefrag *
+newfreefrag(ip, blkno, size, lbn)
+	struct inode *ip;
+	ufs2_daddr_t blkno;
+	long size;
+	ufs_lbn_t lbn;
+{
+	struct freefrag *freefrag;
+	struct ufsmount *ump;
+	struct fs *fs;
+
+	CTR4(KTR_SUJ, "newfreefrag: ino %d blkno %jd size %ld lbn %jd",
+	    ip->i_number, blkno, size, lbn);
+	ump = ITOUMP(ip);
+	fs = ump->um_fs;
+	if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
+		panic("newfreefrag: frag size");
+	freefrag = malloc(sizeof(struct freefrag),
+	    M_FREEFRAG, M_SOFTDEP_FLAGS);
+	workitem_alloc(&freefrag->ff_list, D_FREEFRAG, UFSTOVFS(ump));
+	freefrag->ff_state = ATTACHED;
+	LIST_INIT(&freefrag->ff_jwork);
+	freefrag->ff_inum = ip->i_number;
+	freefrag->ff_vtype = ITOV(ip)->v_type;
+	freefrag->ff_blkno = blkno;
+	freefrag->ff_fragsize = size;
+
+	if (MOUNTEDSUJ(UFSTOVFS(ump))) {
+		freefrag->ff_jdep = (struct worklist *)
+		    newjfreefrag(freefrag, ip, blkno, size, lbn);
+	} else {
+		freefrag->ff_state |= DEPCOMPLETE;
+		freefrag->ff_jdep = NULL;
+	}
+
+	return (freefrag);
+}
+
+/*
+ * This workitem de-allocates fragments that were replaced during
+ * file block allocation.
+ */
+static void 
+handle_workitem_freefrag(freefrag)
+	struct freefrag *freefrag;
+{
+	struct ufsmount *ump = VFSTOUFS(freefrag->ff_list.wk_mp);
+	struct workhead wkhd;
+
+	CTR3(KTR_SUJ,
+	    "handle_workitem_freefrag: ino %d blkno %jd size %ld",
+	    freefrag->ff_inum, freefrag->ff_blkno, freefrag->ff_fragsize);
+	/*
+	 * It would be illegal to add new completion items to the
+	 * freefrag after it was schedule to be done so it must be
+	 * safe to modify the list head here.
+	 */
+	LIST_INIT(&wkhd);
+	ACQUIRE_LOCK(ump);
+	LIST_SWAP(&freefrag->ff_jwork, &wkhd, worklist, wk_list);
+	/*
+	 * If the journal has not been written we must cancel it here.
+	 */
+	if (freefrag->ff_jdep) {
+		if (freefrag->ff_jdep->wk_type != D_JNEWBLK)
+			panic("handle_workitem_freefrag: Unexpected type %d\n",
+			    freefrag->ff_jdep->wk_type);
+		cancel_jnewblk(WK_JNEWBLK(freefrag->ff_jdep), &wkhd);
+	}
+	FREE_LOCK(ump);
+	ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno,
+	   freefrag->ff_fragsize, freefrag->ff_inum, freefrag->ff_vtype, &wkhd);
+	ACQUIRE_LOCK(ump);
+	WORKITEM_FREE(freefrag, D_FREEFRAG);
+	FREE_LOCK(ump);
+}
+
+/*
+ * Set up a dependency structure for an external attributes data block.
+ * This routine follows much of the structure of softdep_setup_allocdirect.
+ * See the description of softdep_setup_allocdirect above for details.
+ */
+void 
+softdep_setup_allocext(ip, off, newblkno, oldblkno, newsize, oldsize, bp)
+	struct inode *ip;
+	ufs_lbn_t off;
+	ufs2_daddr_t newblkno;
+	ufs2_daddr_t oldblkno;
+	long newsize;
+	long oldsize;
+	struct buf *bp;
+{
+	struct allocdirect *adp, *oldadp;
+	struct allocdirectlst *adphead;
+	struct freefrag *freefrag;
+	struct inodedep *inodedep;
+	struct jnewblk *jnewblk;
+	struct newblk *newblk;
+	struct mount *mp;
+	struct ufsmount *ump;
+	ufs_lbn_t lbn;
+
+	mp = ITOVFS(ip);
+	ump = VFSTOUFS(mp);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_allocext called on non-softdep filesystem"));
+	KASSERT(off < NXADDR, ("softdep_setup_allocext: lbn %lld > NXADDR",
+		    (long long)off));
+
+	lbn = bp->b_lblkno;
+	if (oldblkno && oldblkno != newblkno)
+		freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
+	else
+		freefrag = NULL;
+
+	ACQUIRE_LOCK(ump);
+	if (newblk_lookup(mp, newblkno, 0, &newblk) == 0)
+		panic("softdep_setup_allocext: lost block");
+	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
+	    ("softdep_setup_allocext: newblk already initialized"));
+	/*
+	 * Convert the newblk to an allocdirect.
+	 */
+	WORKITEM_REASSIGN(newblk, D_ALLOCDIRECT);
+	adp = (struct allocdirect *)newblk;
+	newblk->nb_freefrag = freefrag;
+	adp->ad_offset = off;
+	adp->ad_oldblkno = oldblkno;
+	adp->ad_newsize = newsize;
+	adp->ad_oldsize = oldsize;
+	adp->ad_state |=  EXTDATA;
+
+	/*
+	 * Finish initializing the journal.
+	 */
+	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
+		jnewblk->jn_ino = ip->i_number;
+		jnewblk->jn_lbn = lbn;
+		add_to_journal(&jnewblk->jn_list);
+	}
+	if (freefrag && freefrag->ff_jdep != NULL &&
+	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
+		add_to_journal(freefrag->ff_jdep);
+	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	adp->ad_inodedep = inodedep;
+
+	WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
+	/*
+	 * The list of allocdirects must be kept in sorted and ascending
+	 * order so that the rollback routines can quickly determine the
+	 * first uncommitted block (the size of the file stored on disk
+	 * ends at the end of the lowest committed fragment, or if there
+	 * are no fragments, at the end of the highest committed block).
+	 * Since files generally grow, the typical case is that the new
+	 * block is to be added at the end of the list. We speed this
+	 * special case by checking against the last allocdirect in the
+	 * list before laboriously traversing the list looking for the
+	 * insertion point.
+	 */
+	adphead = &inodedep->id_newextupdt;
+	oldadp = TAILQ_LAST(adphead, allocdirectlst);
+	if (oldadp == NULL || oldadp->ad_offset <= off) {
+		/* insert at end of list */
+		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
+		if (oldadp != NULL && oldadp->ad_offset == off)
+			allocdirect_merge(adphead, adp, oldadp);
+		FREE_LOCK(ump);
+		return;
+	}
+	TAILQ_FOREACH(oldadp, adphead, ad_next) {
+		if (oldadp->ad_offset >= off)
+			break;
+	}
+	if (oldadp == NULL)
+		panic("softdep_setup_allocext: lost entry");
+	/* insert in middle of list */
+	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
+	if (oldadp->ad_offset == off)
+		allocdirect_merge(adphead, adp, oldadp);
+	FREE_LOCK(ump);
+}
+
+/*
+ * Indirect block allocation dependencies.
+ * 
+ * The same dependencies that exist for a direct block also exist when
+ * a new block is allocated and pointed to by an entry in a block of
+ * indirect pointers. The undo/redo states described above are also
+ * used here. Because an indirect block contains many pointers that
+ * may have dependencies, a second copy of the entire in-memory indirect
+ * block is kept. The buffer cache copy is always completely up-to-date.
+ * The second copy, which is used only as a source for disk writes,
+ * contains only the safe pointers (i.e., those that have no remaining
+ * update dependencies). The second copy is freed when all pointers
+ * are safe. The cache is not allowed to replace indirect blocks with
+ * pending update dependencies. If a buffer containing an indirect
+ * block with dependencies is written, these routines will mark it
+ * dirty again. It can only be successfully written once all the
+ * dependencies are removed. The ffs_fsync routine in conjunction with
+ * softdep_sync_metadata work together to get all the dependencies
+ * removed so that a file can be successfully written to disk. Three
+ * procedures are used when setting up indirect block pointer
+ * dependencies. The division is necessary because of the organization
+ * of the "balloc" routine and because of the distinction between file
+ * pages and file metadata blocks.
+ */
+
+/*
+ * Allocate a new allocindir structure.
+ */
+static struct allocindir *
+newallocindir(ip, ptrno, newblkno, oldblkno, lbn)
+	struct inode *ip;	/* inode for file being extended */
+	int ptrno;		/* offset of pointer in indirect block */
+	ufs2_daddr_t newblkno;	/* disk block number being added */
+	ufs2_daddr_t oldblkno;	/* previous block number, 0 if none */
+	ufs_lbn_t lbn;
+{
+	struct newblk *newblk;
+	struct allocindir *aip;
+	struct freefrag *freefrag;
+	struct jnewblk *jnewblk;
+
+	if (oldblkno)
+		freefrag = newfreefrag(ip, oldblkno, ITOFS(ip)->fs_bsize, lbn);
+	else
+		freefrag = NULL;
+	ACQUIRE_LOCK(ITOUMP(ip));
+	if (newblk_lookup(ITOVFS(ip), newblkno, 0, &newblk) == 0)
+		panic("new_allocindir: lost block");
+	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
+	    ("newallocindir: newblk already initialized"));
+	WORKITEM_REASSIGN(newblk, D_ALLOCINDIR);
+	newblk->nb_freefrag = freefrag;
+	aip = (struct allocindir *)newblk;
+	aip->ai_offset = ptrno;
+	aip->ai_oldblkno = oldblkno;
+	aip->ai_lbn = lbn;
+	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
+		jnewblk->jn_ino = ip->i_number;
+		jnewblk->jn_lbn = lbn;
+		add_to_journal(&jnewblk->jn_list);
+	}
+	if (freefrag && freefrag->ff_jdep != NULL &&
+	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
+		add_to_journal(freefrag->ff_jdep);
+	return (aip);
+}
+
+/*
+ * Called just before setting an indirect block pointer
+ * to a newly allocated file page.
+ */
+void
+softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
+	struct inode *ip;	/* inode for file being extended */
+	ufs_lbn_t lbn;		/* allocated block number within file */
+	struct buf *bp;		/* buffer with indirect blk referencing page */
+	int ptrno;		/* offset of pointer in indirect block */
+	ufs2_daddr_t newblkno;	/* disk block number being added */
+	ufs2_daddr_t oldblkno;	/* previous block number, 0 if none */
+	struct buf *nbp;	/* buffer holding allocated page */
+{
+	struct inodedep *inodedep;
+	struct freefrag *freefrag;
+	struct allocindir *aip;
+	struct pagedep *pagedep;
+	struct mount *mp;
+	struct ufsmount *ump;
+
+	mp = ITOVFS(ip);
+	ump = VFSTOUFS(mp);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_allocindir_page called on non-softdep filesystem"));
+	KASSERT(lbn == nbp->b_lblkno,
+	    ("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
+	    lbn, bp->b_lblkno));
+	CTR4(KTR_SUJ,
+	    "softdep_setup_allocindir_page: ino %d blkno %jd oldblkno %jd "
+	    "lbn %jd", ip->i_number, newblkno, oldblkno, lbn);
+	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page");
+	aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn);
+	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	/*
+	 * If we are allocating a directory page, then we must
+	 * allocate an associated pagedep to track additions and
+	 * deletions.
+	 */
+	if ((ip->i_mode & IFMT) == IFDIR)
+		pagedep_lookup(mp, nbp, ip->i_number, lbn, DEPALLOC, &pagedep);
+	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
+	freefrag = setup_allocindir_phase2(bp, ip, inodedep, aip, lbn);
+	FREE_LOCK(ump);
+	if (freefrag)
+		handle_workitem_freefrag(freefrag);
+}
+
+/*
+ * Called just before setting an indirect block pointer to a
+ * newly allocated indirect block.
+ */
+void
+softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
+	struct buf *nbp;	/* newly allocated indirect block */
+	struct inode *ip;	/* inode for file being extended */
+	struct buf *bp;		/* indirect block referencing allocated block */
+	int ptrno;		/* offset of pointer in indirect block */
+	ufs2_daddr_t newblkno;	/* disk block number being added */
+{
+	struct inodedep *inodedep;
+	struct allocindir *aip;
+	struct ufsmount *ump;
+	ufs_lbn_t lbn;
+
+	ump = ITOUMP(ip);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_setup_allocindir_meta called on non-softdep filesystem"));
+	CTR3(KTR_SUJ,
+	    "softdep_setup_allocindir_meta: ino %d blkno %jd ptrno %d",
+	    ip->i_number, newblkno, ptrno);
+	lbn = nbp->b_lblkno;
+	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");
+	aip = newallocindir(ip, ptrno, newblkno, 0, lbn);
+	inodedep_lookup(UFSTOVFS(ump), ip->i_number, DEPALLOC, &inodedep);
+	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
+	if (setup_allocindir_phase2(bp, ip, inodedep, aip, lbn))
+		panic("softdep_setup_allocindir_meta: Block already existed");
+	FREE_LOCK(ump);
+}
+
+static void
+indirdep_complete(indirdep)
+	struct indirdep *indirdep;
+{
+	struct allocindir *aip;
+
+	LIST_REMOVE(indirdep, ir_next);
+	indirdep->ir_state |= DEPCOMPLETE;
+
+	while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) {
+		LIST_REMOVE(aip, ai_next);
+		free_newblk(&aip->ai_block);
+	}
+	/*
+	 * If this indirdep is not attached to a buf it was simply waiting
+	 * on completion to clear completehd.  free_indirdep() asserts
+	 * that nothing is dangling.
+	 */
+	if ((indirdep->ir_state & ONWORKLIST) == 0)
+		free_indirdep(indirdep);
+}
+
+static struct indirdep *
+indirdep_lookup(mp, ip, bp)
+	struct mount *mp;
+	struct inode *ip;
+	struct buf *bp;
+{
+	struct indirdep *indirdep, *newindirdep;
+	struct newblk *newblk;
+	struct ufsmount *ump;
+	struct worklist *wk;
+	struct fs *fs;
+	ufs2_daddr_t blkno;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	indirdep = NULL;
+	newindirdep = NULL;
+	fs = ump->um_fs;
+	for (;;) {
+		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+			if (wk->wk_type != D_INDIRDEP)
+				continue;
+			indirdep = WK_INDIRDEP(wk);
+			break;
+		}
+		/* Found on the buffer worklist, no new structure to free. */
+		if (indirdep != NULL && newindirdep == NULL)
+			return (indirdep);
+		if (indirdep != NULL && newindirdep != NULL)
+			panic("indirdep_lookup: simultaneous create");
+		/* None found on the buffer and a new structure is ready. */
+		if (indirdep == NULL && newindirdep != NULL)
+			break;
+		/* None found and no new structure available. */
+		FREE_LOCK(ump);
+		newindirdep = malloc(sizeof(struct indirdep),
+		    M_INDIRDEP, M_SOFTDEP_FLAGS);
+		workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp);
+		newindirdep->ir_state = ATTACHED;
+		if (I_IS_UFS1(ip))
+			newindirdep->ir_state |= UFS1FMT;
+		TAILQ_INIT(&newindirdep->ir_trunc);
+		newindirdep->ir_saveddata = NULL;
+		LIST_INIT(&newindirdep->ir_deplisthd);
+		LIST_INIT(&newindirdep->ir_donehd);
+		LIST_INIT(&newindirdep->ir_writehd);
+		LIST_INIT(&newindirdep->ir_completehd);
+		if (bp->b_blkno == bp->b_lblkno) {
+			ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp,
+			    NULL, NULL);
+			bp->b_blkno = blkno;
+		}
+		newindirdep->ir_freeblks = NULL;
+		newindirdep->ir_savebp =
+		    getblk(ump->um_devvp, bp->b_blkno, bp->b_bcount, 0, 0, 0);
+		newindirdep->ir_bp = bp;
+		BUF_KERNPROC(newindirdep->ir_savebp);
+		bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
+		ACQUIRE_LOCK(ump);
+	}
+	indirdep = newindirdep;
+	WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
+	/*
+	 * If the block is not yet allocated we don't set DEPCOMPLETE so
+	 * that we don't free dependencies until the pointers are valid.
+	 * This could search b_dep for D_ALLOCDIRECT/D_ALLOCINDIR rather
+	 * than using the hash.
+	 */
+	if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk))
+		LIST_INSERT_HEAD(&newblk->nb_indirdeps, indirdep, ir_next);
+	else
+		indirdep->ir_state |= DEPCOMPLETE;
+	return (indirdep);
+}
+
+/*
+ * Called to finish the allocation of the "aip" allocated
+ * by one of the two routines above.
+ */
+static struct freefrag *
+setup_allocindir_phase2(bp, ip, inodedep, aip, lbn)
+	struct buf *bp;		/* in-memory copy of the indirect block */
+	struct inode *ip;	/* inode for file being extended */
+	struct inodedep *inodedep; /* Inodedep for ip */
+	struct allocindir *aip;	/* allocindir allocated by the above routines */
+	ufs_lbn_t lbn;		/* Logical block number for this block. */
+{
+	struct fs *fs;
+	struct indirdep *indirdep;
+	struct allocindir *oldaip;
+	struct freefrag *freefrag;
+	struct mount *mp;
+	struct ufsmount *ump;
+
+	mp = ITOVFS(ip);
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	fs = ump->um_fs;
+	if (bp->b_lblkno >= 0)
+		panic("setup_allocindir_phase2: not indir blk");
+	KASSERT(aip->ai_offset >= 0 && aip->ai_offset < NINDIR(fs),
+	    ("setup_allocindir_phase2: Bad offset %d", aip->ai_offset));
+	indirdep = indirdep_lookup(mp, ip, bp);
+	KASSERT(indirdep->ir_savebp != NULL,
+	    ("setup_allocindir_phase2 NULL ir_savebp"));
+	aip->ai_indirdep = indirdep;
+	/*
+	 * Check for an unwritten dependency for this indirect offset.  If
+	 * there is, merge the old dependency into the new one.  This happens
+	 * as a result of reallocblk only.
+	 */
+	freefrag = NULL;
+	if (aip->ai_oldblkno != 0) {
+		LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next) {
+			if (oldaip->ai_offset == aip->ai_offset) {
+				freefrag = allocindir_merge(aip, oldaip);
+				goto done;
+			}
+		}
+		LIST_FOREACH(oldaip, &indirdep->ir_donehd, ai_next) {
+			if (oldaip->ai_offset == aip->ai_offset) {
+				freefrag = allocindir_merge(aip, oldaip);
+				goto done;
+			}
+		}
+	}
+done:
+	LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
+	return (freefrag);
+}
+
+/*
+ * Merge two allocindirs which refer to the same block.  Move newblock
+ * dependencies and setup the freefrags appropriately.
+ */
+static struct freefrag *
+allocindir_merge(aip, oldaip)
+	struct allocindir *aip;
+	struct allocindir *oldaip;
+{
+	struct freefrag *freefrag;
+	struct worklist *wk;
+
+	if (oldaip->ai_newblkno != aip->ai_oldblkno)
+		panic("allocindir_merge: blkno");
+	aip->ai_oldblkno = oldaip->ai_oldblkno;
+	freefrag = aip->ai_freefrag;
+	aip->ai_freefrag = oldaip->ai_freefrag;
+	oldaip->ai_freefrag = NULL;
+	KASSERT(freefrag != NULL, ("setup_allocindir_phase2: No freefrag"));
+	/*
+	 * If we are tracking a new directory-block allocation,
+	 * move it from the old allocindir to the new allocindir.
+	 */
+	if ((wk = LIST_FIRST(&oldaip->ai_newdirblk)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		if (!LIST_EMPTY(&oldaip->ai_newdirblk))
+			panic("allocindir_merge: extra newdirblk");
+		WORKLIST_INSERT(&aip->ai_newdirblk, wk);
+	}
+	/*
+	 * We can skip journaling for this freefrag and just complete
+	 * any pending journal work for the allocindir that is being
+	 * removed after the freefrag completes.
+	 */
+	if (freefrag->ff_jdep)
+		cancel_jfreefrag(WK_JFREEFRAG(freefrag->ff_jdep));
+	LIST_REMOVE(oldaip, ai_next);
+	freefrag->ff_jdep = (struct worklist *)cancel_newblk(&oldaip->ai_block,
+	    &freefrag->ff_list, &freefrag->ff_jwork);
+	free_newblk(&oldaip->ai_block);
+
+	return (freefrag);
+}
+
+static inline void
+setup_freedirect(freeblks, ip, i, needj)
+	struct freeblks *freeblks;
+	struct inode *ip;
+	int i;
+	int needj;
+{
+	struct ufsmount *ump;
+	ufs2_daddr_t blkno;
+	int frags;
+
+	blkno = DIP(ip, i_db[i]);
+	if (blkno == 0)
+		return;
+	DIP_SET(ip, i_db[i], 0);
+	ump = ITOUMP(ip);
+	frags = sblksize(ump->um_fs, ip->i_size, i);
+	frags = numfrags(ump->um_fs, frags);
+	newfreework(ump, freeblks, NULL, i, blkno, frags, 0, needj);
+}
+
+static inline void
+setup_freeext(freeblks, ip, i, needj)
+	struct freeblks *freeblks;
+	struct inode *ip;
+	int i;
+	int needj;
+{
+	struct ufsmount *ump;
+	ufs2_daddr_t blkno;
+	int frags;
+
+	blkno = ip->i_din2->di_extb[i];
+	if (blkno == 0)
+		return;
+	ip->i_din2->di_extb[i] = 0;
+	ump = ITOUMP(ip);
+	frags = sblksize(ump->um_fs, ip->i_din2->di_extsize, i);
+	frags = numfrags(ump->um_fs, frags);
+	newfreework(ump, freeblks, NULL, -1 - i, blkno, frags, 0, needj);
+}
+
+static inline void
+setup_freeindir(freeblks, ip, i, lbn, needj)
+	struct freeblks *freeblks;
+	struct inode *ip;
+	int i;
+	ufs_lbn_t lbn;
+	int needj;
+{
+	struct ufsmount *ump;
+	ufs2_daddr_t blkno;
+
+	blkno = DIP(ip, i_ib[i]);
+	if (blkno == 0)
+		return;
+	DIP_SET(ip, i_ib[i], 0);
+	ump = ITOUMP(ip);
+	newfreework(ump, freeblks, NULL, lbn, blkno, ump->um_fs->fs_frag,
+	    0, needj);
+}
+
+static inline struct freeblks *
+newfreeblks(mp, ip)
+	struct mount *mp;
+	struct inode *ip;
+{
+	struct freeblks *freeblks;
+
+	freeblks = malloc(sizeof(struct freeblks),
+		M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO);
+	workitem_alloc(&freeblks->fb_list, D_FREEBLKS, mp);
+	LIST_INIT(&freeblks->fb_jblkdephd);
+	LIST_INIT(&freeblks->fb_jwork);
+	freeblks->fb_ref = 0;
+	freeblks->fb_cgwait = 0;
+	freeblks->fb_state = ATTACHED;
+	freeblks->fb_uid = ip->i_uid;
+	freeblks->fb_inum = ip->i_number;
+	freeblks->fb_vtype = ITOV(ip)->v_type;
+	freeblks->fb_modrev = DIP(ip, i_modrev);
+	freeblks->fb_devvp = ITODEVVP(ip);
+	freeblks->fb_chkcnt = 0;
+	freeblks->fb_len = 0;
+
+	return (freeblks);
+}
+
+static void
+trunc_indirdep(indirdep, freeblks, bp, off)
+	struct indirdep *indirdep;
+	struct freeblks *freeblks;
+	struct buf *bp;
+	int off;
+{
+	struct allocindir *aip, *aipn;
+
+	/*
+	 * The first set of allocindirs won't be in savedbp.
+	 */
+	LIST_FOREACH_SAFE(aip, &indirdep->ir_deplisthd, ai_next, aipn)
+		if (aip->ai_offset > off)
+			cancel_allocindir(aip, bp, freeblks, 1);
+	LIST_FOREACH_SAFE(aip, &indirdep->ir_donehd, ai_next, aipn)
+		if (aip->ai_offset > off)
+			cancel_allocindir(aip, bp, freeblks, 1);
+	/*
+	 * These will exist in savedbp.
+	 */
+	LIST_FOREACH_SAFE(aip, &indirdep->ir_writehd, ai_next, aipn)
+		if (aip->ai_offset > off)
+			cancel_allocindir(aip, NULL, freeblks, 0);
+	LIST_FOREACH_SAFE(aip, &indirdep->ir_completehd, ai_next, aipn)
+		if (aip->ai_offset > off)
+			cancel_allocindir(aip, NULL, freeblks, 0);
+}
+
+/*
+ * Follow the chain of indirects down to lastlbn creating a freework
+ * structure for each.  This will be used to start indir_trunc() at
+ * the right offset and create the journal records for the parrtial
+ * truncation.  A second step will handle the truncated dependencies.
+ */
+static int
+setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno)
+	struct freeblks *freeblks;
+	struct inode *ip;
+	ufs_lbn_t lbn;
+	ufs_lbn_t lastlbn;
+	ufs2_daddr_t blkno;
+{
+	struct indirdep *indirdep;
+	struct indirdep *indirn;
+	struct freework *freework;
+	struct newblk *newblk;
+	struct mount *mp;
+	struct ufsmount *ump;
+	struct buf *bp;
+	uint8_t *start;
+	uint8_t *end;
+	ufs_lbn_t lbnadd;
+	int level;
+	int error;
+	int off;
+
+
+	freework = NULL;
+	if (blkno == 0)
+		return (0);
+	mp = freeblks->fb_list.wk_mp;
+	ump = VFSTOUFS(mp);
+	bp = getblk(ITOV(ip), lbn, mp->mnt_stat.f_iosize, 0, 0, 0);
+	if ((bp->b_flags & B_CACHE) == 0) {
+		bp->b_blkno = blkptrtodb(VFSTOUFS(mp), blkno);
+		bp->b_iocmd = BIO_READ;
+		bp->b_flags &= ~B_INVAL;
+		bp->b_ioflags &= ~BIO_ERROR;
+		vfs_busy_pages(bp, 0);
+		bp->b_iooffset = dbtob(bp->b_blkno);
+		bstrategy(bp);
+#ifdef RACCT
+		if (racct_enable) {
+			PROC_LOCK(curproc);
+			racct_add_buf(curproc, bp, 0);
+			PROC_UNLOCK(curproc);
+		}
+#endif /* RACCT */
+		curthread->td_ru.ru_inblock++;
+		error = bufwait(bp);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+	}
+	level = lbn_level(lbn);
+	lbnadd = lbn_offset(ump->um_fs, level);
+	/*
+	 * Compute the offset of the last block we want to keep.  Store
+	 * in the freework the first block we want to completely free.
+	 */
+	off = (lastlbn - -(lbn + level)) / lbnadd;
+	if (off + 1 == NINDIR(ump->um_fs))
+		goto nowork;
+	freework = newfreework(ump, freeblks, NULL, lbn, blkno, 0, off + 1, 0);
+	/*
+	 * Link the freework into the indirdep.  This will prevent any new
+	 * allocations from proceeding until we are finished with the
+	 * truncate and the block is written.
+	 */
+	ACQUIRE_LOCK(ump);
+	indirdep = indirdep_lookup(mp, ip, bp);
+	if (indirdep->ir_freeblks)
+		panic("setup_trunc_indir: indirdep already truncated.");
+	TAILQ_INSERT_TAIL(&indirdep->ir_trunc, freework, fw_next);
+	freework->fw_indir = indirdep;
+	/*
+	 * Cancel any allocindirs that will not make it to disk.
+	 * We have to do this for all copies of the indirdep that
+	 * live on this newblk.
+	 */
+	if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
+		newblk_lookup(mp, dbtofsb(ump->um_fs, bp->b_blkno), 0, &newblk);
+		LIST_FOREACH(indirn, &newblk->nb_indirdeps, ir_next)
+			trunc_indirdep(indirn, freeblks, bp, off);
+	} else
+		trunc_indirdep(indirdep, freeblks, bp, off);
+	FREE_LOCK(ump);
+	/*
+	 * Creation is protected by the buf lock. The saveddata is only
+	 * needed if a full truncation follows a partial truncation but it
+	 * is difficult to allocate in that case so we fetch it anyway.
+	 */
+	if (indirdep->ir_saveddata == NULL)
+		indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
+		    M_SOFTDEP_FLAGS);
+nowork:
+	/* Fetch the blkno of the child and the zero start offset. */
+	if (I_IS_UFS1(ip)) {
+		blkno = ((ufs1_daddr_t *)bp->b_data)[off];
+		start = (uint8_t *)&((ufs1_daddr_t *)bp->b_data)[off+1];
+	} else {
+		blkno = ((ufs2_daddr_t *)bp->b_data)[off];
+		start = (uint8_t *)&((ufs2_daddr_t *)bp->b_data)[off+1];
+	}
+	if (freework) {
+		/* Zero the truncated pointers. */
+		end = bp->b_data + bp->b_bcount;
+		bzero(start, end - start);
+		bdwrite(bp);
+	} else
+		bqrelse(bp);
+	if (level == 0)
+		return (0);
+	lbn++; /* adjust level */
+	lbn -= (off * lbnadd);
+	return setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno);
+}
+
+/*
+ * Complete the partial truncation of an indirect block setup by
+ * setup_trunc_indir().  This zeros the truncated pointers in the saved
+ * copy and writes them to disk before the freeblks is allowed to complete.
+ */
+static void
+complete_trunc_indir(freework)
+	struct freework *freework;
+{
+	struct freework *fwn;
+	struct indirdep *indirdep;
+	struct ufsmount *ump;
+	struct buf *bp;
+	uintptr_t start;
+	int count;
+
+	ump = VFSTOUFS(freework->fw_list.wk_mp);
+	LOCK_OWNED(ump);
+	indirdep = freework->fw_indir;
+	for (;;) {
+		bp = indirdep->ir_bp;
+		/* See if the block was discarded. */
+		if (bp == NULL)
+			break;
+		/* Inline part of getdirtybuf().  We dont want bremfree. */
+		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0)
+			break;
+		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+		    LOCK_PTR(ump)) == 0)
+			BUF_UNLOCK(bp);
+		ACQUIRE_LOCK(ump);
+	}
+	freework->fw_state |= DEPCOMPLETE;
+	TAILQ_REMOVE(&indirdep->ir_trunc, freework, fw_next);
+	/*
+	 * Zero the pointers in the saved copy.
+	 */
+	if (indirdep->ir_state & UFS1FMT)
+		start = sizeof(ufs1_daddr_t);
+	else
+		start = sizeof(ufs2_daddr_t);
+	start *= freework->fw_start;
+	count = indirdep->ir_savebp->b_bcount - start;
+	start += (uintptr_t)indirdep->ir_savebp->b_data;
+	bzero((char *)start, count);
+	/*
+	 * We need to start the next truncation in the list if it has not
+	 * been started yet.
+	 */
+	fwn = TAILQ_FIRST(&indirdep->ir_trunc);
+	if (fwn != NULL) {
+		if (fwn->fw_freeblks == indirdep->ir_freeblks)
+			TAILQ_REMOVE(&indirdep->ir_trunc, fwn, fw_next);
+		if ((fwn->fw_state & ONWORKLIST) == 0)
+			freework_enqueue(fwn);
+	}
+	/*
+	 * If bp is NULL the block was fully truncated, restore
+	 * the saved block list otherwise free it if it is no
+	 * longer needed.
+	 */
+	if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
+		if (bp == NULL)
+			bcopy(indirdep->ir_saveddata,
+			    indirdep->ir_savebp->b_data,
+			    indirdep->ir_savebp->b_bcount);
+		free(indirdep->ir_saveddata, M_INDIRDEP);
+		indirdep->ir_saveddata = NULL;
+	}
+	/*
+	 * When bp is NULL there is a full truncation pending.  We
+	 * must wait for this full truncation to be journaled before
+	 * we can release this freework because the disk pointers will
+	 * never be written as zero.
+	 */
+	if (bp == NULL)  {
+		if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jblkdephd))
+			handle_written_freework(freework);
+		else
+			WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd,
+			   &freework->fw_list);
+	} else {
+		/* Complete when the real copy is written. */
+		WORKLIST_INSERT(&bp->b_dep, &freework->fw_list);
+		BUF_UNLOCK(bp);
+	}
+}
+
+/*
+ * Calculate the number of blocks we are going to release where datablocks
+ * is the current total and length is the new file size.
+ */
+static ufs2_daddr_t
+blkcount(fs, datablocks, length)
+	struct fs *fs;
+	ufs2_daddr_t datablocks;
+	off_t length;
+{
+	off_t totblks, numblks;
+
+	totblks = 0;
+	numblks = howmany(length, fs->fs_bsize);
+	if (numblks <= NDADDR) {
+		totblks = howmany(length, fs->fs_fsize);
+		goto out;
+	}
+        totblks = blkstofrags(fs, numblks);
+	numblks -= NDADDR;
+	/*
+	 * Count all single, then double, then triple indirects required.
+	 * Subtracting one indirects worth of blocks for each pass
+	 * acknowledges one of each pointed to by the inode.
+	 */
+	for (;;) {
+		totblks += blkstofrags(fs, howmany(numblks, NINDIR(fs)));
+		numblks -= NINDIR(fs);
+		if (numblks <= 0)
+			break;
+		numblks = howmany(numblks, NINDIR(fs));
+	}
+out:
+	totblks = fsbtodb(fs, totblks);
+	/*
+	 * Handle sparse files.  We can't reclaim more blocks than the inode
+	 * references.  We will correct it later in handle_complete_freeblks()
+	 * when we know the real count.
+	 */
+	if (totblks > datablocks)
+		return (0);
+	return (datablocks - totblks);
+}
+
+/*
+ * Handle freeblocks for journaled softupdate filesystems.
+ *
+ * Contrary to normal softupdates, we must preserve the block pointers in
+ * indirects until their subordinates are free.  This is to avoid journaling
+ * every block that is freed which may consume more space than the journal
+ * itself.  The recovery program will see the free block journals at the
+ * base of the truncated area and traverse them to reclaim space.  The
+ * pointers in the inode may be cleared immediately after the journal
+ * records are written because each direct and indirect pointer in the
+ * inode is recorded in a journal.  This permits full truncation to proceed
+ * asynchronously.  The write order is journal -> inode -> cgs -> indirects.
+ *
+ * The algorithm is as follows:
+ * 1) Traverse the in-memory state and create journal entries to release
+ *    the relevant blocks and full indirect trees.
+ * 2) Traverse the indirect block chain adding partial truncation freework
+ *    records to indirects in the path to lastlbn.  The freework will
+ *    prevent new allocation dependencies from being satisfied in this
+ *    indirect until the truncation completes.
+ * 3) Read and lock the inode block, performing an update with the new size
+ *    and pointers.  This prevents truncated data from becoming valid on
+ *    disk through step 4.
+ * 4) Reap unsatisfied dependencies that are beyond the truncated area,
+ *    eliminate journal work for those records that do not require it.
+ * 5) Schedule the journal records to be written followed by the inode block.
+ * 6) Allocate any necessary frags for the end of file.
+ * 7) Zero any partially truncated blocks.
+ *
+ * From this truncation proceeds asynchronously using the freework and
+ * indir_trunc machinery.  The file will not be extended again into a
+ * partially truncated indirect block until all work is completed but
+ * the normal dependency mechanism ensures that it is rolled back/forward
+ * as appropriate.  Further truncation may occur without delay and is
+ * serialized in indir_trunc().
+ */
+void
+softdep_journal_freeblocks(ip, cred, length, flags)
+	struct inode *ip;	/* The inode whose length is to be reduced */
+	struct ucred *cred;
+	off_t length;		/* The new length for the file */
+	int flags;		/* IO_EXT and/or IO_NORMAL */
+{
+	struct freeblks *freeblks, *fbn;
+	struct worklist *wk, *wkn;
+	struct inodedep *inodedep;
+	struct jblkdep *jblkdep;
+	struct allocdirect *adp, *adpn;
+	struct ufsmount *ump;
+	struct fs *fs;
+	struct buf *bp;
+	struct vnode *vp;
+	struct mount *mp;
+	ufs2_daddr_t extblocks, datablocks;
+	ufs_lbn_t tmpval, lbn, lastlbn;
+	int frags, lastoff, iboff, allocblock, needj, error, i;
+
+	ump = ITOUMP(ip);
+	mp = UFSTOVFS(ump);
+	fs = ump->um_fs;
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_journal_freeblocks called on non-softdep filesystem"));
+	vp = ITOV(ip);
+	needj = 1;
+	iboff = -1;
+	allocblock = 0;
+	extblocks = 0;
+	datablocks = 0;
+	frags = 0;
+	freeblks = newfreeblks(mp, ip);
+	ACQUIRE_LOCK(ump);
+	/*
+	 * If we're truncating a removed file that will never be written
+	 * we don't need to journal the block frees.  The canceled journals
+	 * for the allocations will suffice.
+	 */
+	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED &&
+	    length == 0)
+		needj = 0;
+	CTR3(KTR_SUJ, "softdep_journal_freeblks: ip %d length %ld needj %d",
+	    ip->i_number, length, needj);
+	FREE_LOCK(ump);
+	/*
+	 * Calculate the lbn that we are truncating to.  This results in -1
+	 * if we're truncating the 0 bytes.  So it is the last lbn we want
+	 * to keep, not the first lbn we want to truncate.
+	 */
+	lastlbn = lblkno(fs, length + fs->fs_bsize - 1) - 1;
+	lastoff = blkoff(fs, length);
+	/*
+	 * Compute frags we are keeping in lastlbn.  0 means all.
+	 */
+	if (lastlbn >= 0 && lastlbn < NDADDR) {
+		frags = fragroundup(fs, lastoff);
+		/* adp offset of last valid allocdirect. */
+		iboff = lastlbn;
+	} else if (lastlbn > 0)
+		iboff = NDADDR;
+	if (fs->fs_magic == FS_UFS2_MAGIC)
+		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
+	/*
+	 * Handle normal data blocks and indirects.  This section saves
+	 * values used after the inode update to complete frag and indirect
+	 * truncation.
+	 */
+	if ((flags & IO_NORMAL) != 0) {
+		/*
+		 * Handle truncation of whole direct and indirect blocks.
+		 */
+		for (i = iboff + 1; i < NDADDR; i++)
+			setup_freedirect(freeblks, ip, i, needj);
+		for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR;
+		    i++, lbn += tmpval, tmpval *= NINDIR(fs)) {
+			/* Release a whole indirect tree. */
+			if (lbn > lastlbn) {
+				setup_freeindir(freeblks, ip, i, -lbn -i,
+				    needj);
+				continue;
+			}
+			iboff = i + NDADDR;
+			/*
+			 * Traverse partially truncated indirect tree.
+			 */
+			if (lbn <= lastlbn && lbn + tmpval - 1 > lastlbn)
+				setup_trunc_indir(freeblks, ip, -lbn - i,
+				    lastlbn, DIP(ip, i_ib[i]));
+		}
+		/*
+		 * Handle partial truncation to a frag boundary.
+		 */
+		if (frags) {
+			ufs2_daddr_t blkno;
+			long oldfrags;
+
+			oldfrags = blksize(fs, ip, lastlbn);
+			blkno = DIP(ip, i_db[lastlbn]);
+			if (blkno && oldfrags != frags) {
+				oldfrags -= frags;
+				oldfrags = numfrags(fs, oldfrags);
+				blkno += numfrags(fs, frags);
+				newfreework(ump, freeblks, NULL, lastlbn,
+				    blkno, oldfrags, 0, needj);
+				if (needj)
+					adjust_newfreework(freeblks,
+					    numfrags(fs, frags));
+			} else if (blkno == 0)
+				allocblock = 1;
+		}
+		/*
+		 * Add a journal record for partial truncate if we are
+		 * handling indirect blocks.  Non-indirects need no extra
+		 * journaling.
+		 */
+		if (length != 0 && lastlbn >= NDADDR) {
+			ip->i_flag |= IN_TRUNCATED;
+			newjtrunc(freeblks, length, 0);
+		}
+		ip->i_size = length;
+		DIP_SET(ip, i_size, ip->i_size);
+		datablocks = DIP(ip, i_blocks) - extblocks;
+		if (length != 0)
+			datablocks = blkcount(fs, datablocks, length);
+		freeblks->fb_len = length;
+	}
+	if ((flags & IO_EXT) != 0) {
+		for (i = 0; i < NXADDR; i++)
+			setup_freeext(freeblks, ip, i, needj);
+		ip->i_din2->di_extsize = 0;
+		datablocks += extblocks;
+	}
+#ifdef QUOTA
+	/* Reference the quotas in case the block count is wrong in the end. */
+	quotaref(vp, freeblks->fb_quota);
+	(void) chkdq(ip, -datablocks, NOCRED, 0);
+#endif
+	freeblks->fb_chkcnt = -datablocks;
+	UFS_LOCK(ump);
+	fs->fs_pendingblocks += datablocks;
+	UFS_UNLOCK(ump);
+	DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
+	/*
+	 * Handle truncation of incomplete alloc direct dependencies.  We
+	 * hold the inode block locked to prevent incomplete dependencies
+	 * from reaching the disk while we are eliminating those that
+	 * have been truncated.  This is a partially inlined ffs_update().
+	 */
+	ufs_itimes(vp);
+	ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
+	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+	    (int)fs->fs_bsize, cred, &bp);
+	if (error) {
+		brelse(bp);
+		softdep_error("softdep_journal_freeblocks", error);
+		return;
+	}
+	if (bp->b_bufsize == fs->fs_bsize)
+		bp->b_flags |= B_CLUSTEROK;
+	softdep_update_inodeblock(ip, bp, 0);
+	if (ump->um_fstype == UFS1)
+		*((struct ufs1_dinode *)bp->b_data +
+		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
+	else
+		*((struct ufs2_dinode *)bp->b_data +
+		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
+	ACQUIRE_LOCK(ump);
+	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	if ((inodedep->id_state & IOSTARTED) != 0)
+		panic("softdep_setup_freeblocks: inode busy");
+	/*
+	 * Add the freeblks structure to the list of operations that
+	 * must await the zero'ed inode being written to disk. If we
+	 * still have a bitmap dependency (needj), then the inode
+	 * has never been written to disk, so we can process the
+	 * freeblks below once we have deleted the dependencies.
+	 */
+	if (needj)
+		WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list);
+	else
+		freeblks->fb_state |= COMPLETE;
+	if ((flags & IO_NORMAL) != 0) {
+		TAILQ_FOREACH_SAFE(adp, &inodedep->id_inoupdt, ad_next, adpn) {
+			if (adp->ad_offset > iboff)
+				cancel_allocdirect(&inodedep->id_inoupdt, adp,
+				    freeblks);
+			/*
+			 * Truncate the allocdirect.  We could eliminate
+			 * or modify journal records as well.
+			 */
+			else if (adp->ad_offset == iboff && frags)
+				adp->ad_newsize = frags;
+		}
+	}
+	if ((flags & IO_EXT) != 0)
+		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
+			cancel_allocdirect(&inodedep->id_extupdt, adp,
+			    freeblks);
+	/*
+	 * Scan the bufwait list for newblock dependencies that will never
+	 * make it to disk.
+	 */
+	LIST_FOREACH_SAFE(wk, &inodedep->id_bufwait, wk_list, wkn) {
+		if (wk->wk_type != D_ALLOCDIRECT)
+			continue;
+		adp = WK_ALLOCDIRECT(wk);
+		if (((flags & IO_NORMAL) != 0 && (adp->ad_offset > iboff)) ||
+		    ((flags & IO_EXT) != 0 && (adp->ad_state & EXTDATA))) {
+			cancel_jfreeblk(freeblks, adp->ad_newblkno);
+			cancel_newblk(WK_NEWBLK(wk), NULL, &freeblks->fb_jwork);
+			WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk);
+		}
+	}
+	/*
+	 * Add journal work.
+	 */
+	LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps)
+		add_to_journal(&jblkdep->jb_list);
+	FREE_LOCK(ump);
+	bdwrite(bp);
+	/*
+	 * Truncate dependency structures beyond length.
+	 */
+	trunc_dependencies(ip, freeblks, lastlbn, frags, flags);
+	/*
+	 * This is only set when we need to allocate a fragment because
+	 * none existed at the end of a frag-sized file.  It handles only
+	 * allocating a new, zero filled block.
+	 */
+	if (allocblock) {
+		ip->i_size = length - lastoff;
+		DIP_SET(ip, i_size, ip->i_size);
+		error = UFS_BALLOC(vp, length - 1, 1, cred, BA_CLRBUF, &bp);
+		if (error != 0) {
+			softdep_error("softdep_journal_freeblks", error);
+			return;
+		}
+		ip->i_size = length;
+		DIP_SET(ip, i_size, length);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		allocbuf(bp, frags);
+		ffs_update(vp, 0);
+		bawrite(bp);
+	} else if (lastoff != 0 && vp->v_type != VDIR) {
+		int size;
+
+		/*
+		 * Zero the end of a truncated frag or block.
+		 */
+		size = sblksize(fs, length, lastlbn);
+		error = bread(vp, lastlbn, size, cred, &bp);
+		if (error) {
+			softdep_error("softdep_journal_freeblks", error);
+			return;
+		}
+		bzero((char *)bp->b_data + lastoff, size - lastoff);
+		bawrite(bp);
+
+	}
+	ACQUIRE_LOCK(ump);
+	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	TAILQ_INSERT_TAIL(&inodedep->id_freeblklst, freeblks, fb_next);
+	freeblks->fb_state |= DEPCOMPLETE | ONDEPLIST;
+	/*
+	 * We zero earlier truncations so they don't erroneously
+	 * update i_blocks.
+	 */
+	if (freeblks->fb_len == 0 && (flags & IO_NORMAL) != 0)
+		TAILQ_FOREACH(fbn, &inodedep->id_freeblklst, fb_next)
+			fbn->fb_len = 0;
+	if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE &&
+	    LIST_EMPTY(&freeblks->fb_jblkdephd))
+		freeblks->fb_state |= INPROGRESS;
+	else
+		freeblks = NULL;
+	FREE_LOCK(ump);
+	if (freeblks)
+		handle_workitem_freeblocks(freeblks, 0);
+	trunc_pages(ip, length, extblocks, flags);
+
+}
+
+/*
+ * Flush a JOP_SYNC to the journal.
+ */
+void
+softdep_journal_fsync(ip)
+	struct inode *ip;
+{
+	struct jfsync *jfsync;
+	struct ufsmount *ump;
+
+	ump = ITOUMP(ip);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_journal_fsync called on non-softdep filesystem"));
+	if ((ip->i_flag & IN_TRUNCATED) == 0)
+		return;
+	ip->i_flag &= ~IN_TRUNCATED;
+	jfsync = malloc(sizeof(*jfsync), M_JFSYNC, M_SOFTDEP_FLAGS | M_ZERO);
+	workitem_alloc(&jfsync->jfs_list, D_JFSYNC, UFSTOVFS(ump));
+	jfsync->jfs_size = ip->i_size;
+	jfsync->jfs_ino = ip->i_number;
+	ACQUIRE_LOCK(ump);
+	add_to_journal(&jfsync->jfs_list);
+	jwait(&jfsync->jfs_list, MNT_WAIT);
+	FREE_LOCK(ump);
+}
+
+/*
+ * Block de-allocation dependencies.
+ * 
+ * When blocks are de-allocated, the on-disk pointers must be nullified before
+ * the blocks are made available for use by other files.  (The true
+ * requirement is that old pointers must be nullified before new on-disk
+ * pointers are set.  We chose this slightly more stringent requirement to
+ * reduce complexity.) Our implementation handles this dependency by updating
+ * the inode (or indirect block) appropriately but delaying the actual block
+ * de-allocation (i.e., freemap and free space count manipulation) until
+ * after the updated versions reach stable storage.  After the disk is
+ * updated, the blocks can be safely de-allocated whenever it is convenient.
+ * This implementation handles only the common case of reducing a file's
+ * length to zero. Other cases are handled by the conventional synchronous
+ * write approach.
+ *
+ * The ffs implementation with which we worked double-checks
+ * the state of the block pointers and file size as it reduces
+ * a file's length.  Some of this code is replicated here in our
+ * soft updates implementation.  The freeblks->fb_chkcnt field is
+ * used to transfer a part of this information to the procedure
+ * that eventually de-allocates the blocks.
+ *
+ * This routine should be called from the routine that shortens
+ * a file's length, before the inode's size or block pointers
+ * are modified. It will save the block pointer information for
+ * later release and zero the inode so that the calling routine
+ * can release it.
+ */
+void
+softdep_setup_freeblocks(ip, length, flags)
+	struct inode *ip;	/* The inode whose length is to be reduced */
+	off_t length;		/* The new length for the file */
+	int flags;		/* IO_EXT and/or IO_NORMAL */
+{
+	struct ufs1_dinode *dp1;
+	struct ufs2_dinode *dp2;
+	struct freeblks *freeblks;
+	struct inodedep *inodedep;
+	struct allocdirect *adp;
+	struct ufsmount *ump;
+	struct buf *bp;
+	struct fs *fs;
+	ufs2_daddr_t extblocks, datablocks;
+	struct mount *mp;
+	int i, delay, error;
+	ufs_lbn_t tmpval;
+	ufs_lbn_t lbn;
+
+	ump = ITOUMP(ip);
+	mp = UFSTOVFS(ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_freeblocks called on non-softdep filesystem"));
+	CTR2(KTR_SUJ, "softdep_setup_freeblks: ip %d length %ld",
+	    ip->i_number, length);
+	KASSERT(length == 0, ("softdep_setup_freeblocks: non-zero length"));
+	fs = ump->um_fs;
+	if ((error = bread(ump->um_devvp,
+	    fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+	    (int)fs->fs_bsize, NOCRED, &bp)) != 0) {
+		brelse(bp);
+		softdep_error("softdep_setup_freeblocks", error);
+		return;
+	}
+	freeblks = newfreeblks(mp, ip);
+	extblocks = 0;
+	datablocks = 0;
+	if (fs->fs_magic == FS_UFS2_MAGIC)
+		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
+	if ((flags & IO_NORMAL) != 0) {
+		for (i = 0; i < NDADDR; i++)
+			setup_freedirect(freeblks, ip, i, 0);
+		for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR;
+		    i++, lbn += tmpval, tmpval *= NINDIR(fs))
+			setup_freeindir(freeblks, ip, i, -lbn -i, 0);
+		ip->i_size = 0;
+		DIP_SET(ip, i_size, 0);
+		datablocks = DIP(ip, i_blocks) - extblocks;
+	}
+	if ((flags & IO_EXT) != 0) {
+		for (i = 0; i < NXADDR; i++)
+			setup_freeext(freeblks, ip, i, 0);
+		ip->i_din2->di_extsize = 0;
+		datablocks += extblocks;
+	}
+#ifdef QUOTA
+	/* Reference the quotas in case the block count is wrong in the end. */
+	quotaref(ITOV(ip), freeblks->fb_quota);
+	(void) chkdq(ip, -datablocks, NOCRED, 0);
+#endif
+	freeblks->fb_chkcnt = -datablocks;
+	UFS_LOCK(ump);
+	fs->fs_pendingblocks += datablocks;
+	UFS_UNLOCK(ump);
+	DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
+	/*
+	 * Push the zero'ed inode to its disk buffer so that we are free
+	 * to delete its dependencies below. Once the dependencies are gone
+	 * the buffer can be safely released.
+	 */
+	if (ump->um_fstype == UFS1) {
+		dp1 = ((struct ufs1_dinode *)bp->b_data +
+		    ino_to_fsbo(fs, ip->i_number));
+		ip->i_din1->di_freelink = dp1->di_freelink;
+		*dp1 = *ip->i_din1;
+	} else {
+		dp2 = ((struct ufs2_dinode *)bp->b_data +
+		    ino_to_fsbo(fs, ip->i_number));
+		ip->i_din2->di_freelink = dp2->di_freelink;
+		*dp2 = *ip->i_din2;
+	}
+	/*
+	 * Find and eliminate any inode dependencies.
+	 */
+	ACQUIRE_LOCK(ump);
+	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	if ((inodedep->id_state & IOSTARTED) != 0)
+		panic("softdep_setup_freeblocks: inode busy");
+	/*
+	 * Add the freeblks structure to the list of operations that
+	 * must await the zero'ed inode being written to disk. If we
+	 * still have a bitmap dependency (delay == 0), then the inode
+	 * has never been written to disk, so we can process the
+	 * freeblks below once we have deleted the dependencies.
+	 */
+	delay = (inodedep->id_state & DEPCOMPLETE);
+	if (delay)
+		WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list);
+	else
+		freeblks->fb_state |= COMPLETE;
+	/*
+	 * Because the file length has been truncated to zero, any
+	 * pending block allocation dependency structures associated
+	 * with this inode are obsolete and can simply be de-allocated.
+	 * We must first merge the two dependency lists to get rid of
+	 * any duplicate freefrag structures, then purge the merged list.
+	 * If we still have a bitmap dependency, then the inode has never
+	 * been written to disk, so we can free any fragments without delay.
+	 */
+	if (flags & IO_NORMAL) {
+		merge_inode_lists(&inodedep->id_newinoupdt,
+		    &inodedep->id_inoupdt);
+		while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
+			cancel_allocdirect(&inodedep->id_inoupdt, adp,
+			    freeblks);
+	}
+	if (flags & IO_EXT) {
+		merge_inode_lists(&inodedep->id_newextupdt,
+		    &inodedep->id_extupdt);
+		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
+			cancel_allocdirect(&inodedep->id_extupdt, adp,
+			    freeblks);
+	}
+	FREE_LOCK(ump);
+	bdwrite(bp);
+	trunc_dependencies(ip, freeblks, -1, 0, flags);
+	ACQUIRE_LOCK(ump);
+	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
+		(void) free_inodedep(inodedep);
+	freeblks->fb_state |= DEPCOMPLETE;
+	/*
+	 * If the inode with zeroed block pointers is now on disk
+	 * we can start freeing blocks.
+	 */  
+	if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)
+		freeblks->fb_state |= INPROGRESS;
+	else
+		freeblks = NULL;
+	FREE_LOCK(ump);
+	if (freeblks)
+		handle_workitem_freeblocks(freeblks, 0);
+	trunc_pages(ip, length, extblocks, flags);
+}
+
+/*
+ * Eliminate pages from the page cache that back parts of this inode and
+ * adjust the vnode pager's idea of our size.  This prevents stale data
+ * from hanging around in the page cache.
+ */
+static void
+trunc_pages(ip, length, extblocks, flags)
+	struct inode *ip;
+	off_t length;
+	ufs2_daddr_t extblocks;
+	int flags;
+{
+	struct vnode *vp;
+	struct fs *fs;
+	ufs_lbn_t lbn;
+	off_t end, extend;
+
+	vp = ITOV(ip);
+	fs = ITOFS(ip);
+	extend = OFF_TO_IDX(lblktosize(fs, -extblocks));
+	if ((flags & IO_EXT) != 0)
+		vn_pages_remove(vp, extend, 0);
+	if ((flags & IO_NORMAL) == 0)
+		return;
+	BO_LOCK(&vp->v_bufobj);
+	drain_output(vp);
+	BO_UNLOCK(&vp->v_bufobj);
+	/*
+	 * The vnode pager eliminates file pages we eliminate indirects
+	 * below.
+	 */
+	vnode_pager_setsize(vp, length);
+	/*
+	 * Calculate the end based on the last indirect we want to keep.  If
+	 * the block extends into indirects we can just use the negative of
+	 * its lbn.  Doubles and triples exist at lower numbers so we must
+	 * be careful not to remove those, if they exist.  double and triple
+	 * indirect lbns do not overlap with others so it is not important
+	 * to verify how many levels are required.
+	 */
+	lbn = lblkno(fs, length);
+	if (lbn >= NDADDR) {
+		/* Calculate the virtual lbn of the triple indirect. */
+		lbn = -lbn - (NIADDR - 1);
+		end = OFF_TO_IDX(lblktosize(fs, lbn));
+	} else
+		end = extend;
+	vn_pages_remove(vp, OFF_TO_IDX(OFF_MAX), end);
+}
+
+/*
+ * See if the buf bp is in the range eliminated by truncation.
+ */
+static int
+trunc_check_buf(bp, blkoffp, lastlbn, lastoff, flags)
+	struct buf *bp;
+	int *blkoffp;
+	ufs_lbn_t lastlbn;
+	int lastoff;
+	int flags;
+{
+	ufs_lbn_t lbn;
+
+	*blkoffp = 0;
+	/* Only match ext/normal blocks as appropriate. */
+	if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) ||
+	    ((flags & IO_NORMAL) == 0 && (bp->b_xflags & BX_ALTDATA) == 0))
+		return (0);
+	/* ALTDATA is always a full truncation. */
+	if ((bp->b_xflags & BX_ALTDATA) != 0)
+		return (1);
+	/* -1 is full truncation. */
+	if (lastlbn == -1)
+		return (1);
+	/*
+	 * If this is a partial truncate we only want those
+	 * blocks and indirect blocks that cover the range
+	 * we're after.
+	 */
+	lbn = bp->b_lblkno;
+	if (lbn < 0)
+		lbn = -(lbn + lbn_level(lbn));
+	if (lbn < lastlbn)
+		return (0);
+	/* Here we only truncate lblkno if it's partial. */
+	if (lbn == lastlbn) {
+		if (lastoff == 0)
+			return (0);
+		*blkoffp = lastoff;
+	}
+	return (1);
+}
+
+/*
+ * Eliminate any dependencies that exist in memory beyond lblkno:off
+ */
+static void
+trunc_dependencies(ip, freeblks, lastlbn, lastoff, flags)
+	struct inode *ip;
+	struct freeblks *freeblks;
+	ufs_lbn_t lastlbn;
+	int lastoff;
+	int flags;
+{
+	struct bufobj *bo;
+	struct vnode *vp;
+	struct buf *bp;
+	int blkoff;
+
+	/*
+	 * We must wait for any I/O in progress to finish so that
+	 * all potential buffers on the dirty list will be visible.
+	 * Once they are all there, walk the list and get rid of
+	 * any dependencies.
+	 */
+	vp = ITOV(ip);
+	bo = &vp->v_bufobj;
+	BO_LOCK(bo);
+	drain_output(vp);
+	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
+		bp->b_vflags &= ~BV_SCANNED;
+restart:
+	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
+		if (bp->b_vflags & BV_SCANNED)
+			continue;
+		if (!trunc_check_buf(bp, &blkoff, lastlbn, lastoff, flags)) {
+			bp->b_vflags |= BV_SCANNED;
+			continue;
+		}
+		KASSERT(bp->b_bufobj == bo, ("Wrong object in buffer"));
+		if ((bp = getdirtybuf(bp, BO_LOCKPTR(bo), MNT_WAIT)) == NULL)
+			goto restart;
+		BO_UNLOCK(bo);
+		if (deallocate_dependencies(bp, freeblks, blkoff))
+			bqrelse(bp);
+		else
+			brelse(bp);
+		BO_LOCK(bo);
+		goto restart;
+	}
+	/*
+	 * Now do the work of vtruncbuf while also matching indirect blocks.
+	 */
+	TAILQ_FOREACH(bp, &bo->bo_clean.bv_hd, b_bobufs)
+		bp->b_vflags &= ~BV_SCANNED;
+cleanrestart:
+	TAILQ_FOREACH(bp, &bo->bo_clean.bv_hd, b_bobufs) {
+		if (bp->b_vflags & BV_SCANNED)
+			continue;
+		if (!trunc_check_buf(bp, &blkoff, lastlbn, lastoff, flags)) {
+			bp->b_vflags |= BV_SCANNED;
+			continue;
+		}
+		if (BUF_LOCK(bp,
+		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+		    BO_LOCKPTR(bo)) == ENOLCK) {
+			BO_LOCK(bo);
+			goto cleanrestart;
+		}
+		bp->b_vflags |= BV_SCANNED;
+		bremfree(bp);
+		if (blkoff != 0) {
+			allocbuf(bp, blkoff);
+			bqrelse(bp);
+		} else {
+			bp->b_flags |= B_INVAL | B_NOCACHE | B_RELBUF;
+			brelse(bp);
+		}
+		BO_LOCK(bo);
+		goto cleanrestart;
+	}
+	drain_output(vp);
+	BO_UNLOCK(bo);
+}
+
+static int
+cancel_pagedep(pagedep, freeblks, blkoff)
+	struct pagedep *pagedep;
+	struct freeblks *freeblks;
+	int blkoff;
+{
+	struct jremref *jremref;
+	struct jmvref *jmvref;
+	struct dirrem *dirrem, *tmp;
+	int i;
+
+	/*
+	 * Copy any directory remove dependencies to the list
+	 * to be processed after the freeblks proceeds.  If
+	 * directory entry never made it to disk they
+	 * can be dumped directly onto the work list.
+	 */
+	LIST_FOREACH_SAFE(dirrem, &pagedep->pd_dirremhd, dm_next, tmp) {
+		/* Skip this directory removal if it is intended to remain. */
+		if (dirrem->dm_offset < blkoff)
+			continue;
+		/*
+		 * If there are any dirrems we wait for the journal write
+		 * to complete and then restart the buf scan as the lock
+		 * has been dropped.
+		 */
+		while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) {
+			jwait(&jremref->jr_list, MNT_WAIT);
+			return (ERESTART);
+		}
+		LIST_REMOVE(dirrem, dm_next);
+		dirrem->dm_dirinum = pagedep->pd_ino;
+		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &dirrem->dm_list);
+	}
+	while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) {
+		jwait(&jmvref->jm_list, MNT_WAIT);
+		return (ERESTART);
+	}
+	/*
+	 * When we're partially truncating a pagedep we just want to flush
+	 * journal entries and return.  There can not be any adds in the
+	 * truncated portion of the directory and newblk must remain if
+	 * part of the block remains.
+	 */
+	if (blkoff != 0) {
+		struct diradd *dap;
+
+		LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
+			if (dap->da_offset > blkoff)
+				panic("cancel_pagedep: diradd %p off %d > %d",
+				    dap, dap->da_offset, blkoff);
+		for (i = 0; i < DAHASHSZ; i++)
+			LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist)
+				if (dap->da_offset > blkoff)
+					panic("cancel_pagedep: diradd %p off %d > %d",
+					    dap, dap->da_offset, blkoff);
+		return (0);
+	}
+	/*
+	 * There should be no directory add dependencies present
+	 * as the directory could not be truncated until all
+	 * children were removed.
+	 */
+	KASSERT(LIST_FIRST(&pagedep->pd_pendinghd) == NULL,
+	    ("deallocate_dependencies: pendinghd != NULL"));
+	for (i = 0; i < DAHASHSZ; i++)
+		KASSERT(LIST_FIRST(&pagedep->pd_diraddhd[i]) == NULL,
+		    ("deallocate_dependencies: diraddhd != NULL"));
+	if ((pagedep->pd_state & NEWBLOCK) != 0)
+		free_newdirblk(pagedep->pd_newdirblk);
+	if (free_pagedep(pagedep) == 0)
+		panic("Failed to free pagedep %p", pagedep);
+	return (0);
+}
+
+/*
+ * Reclaim any dependency structures from a buffer that is about to
+ * be reallocated to a new vnode. The buffer must be locked, thus,
+ * no I/O completion operations can occur while we are manipulating
+ * its associated dependencies. The mutex is held so that other I/O's
+ * associated with related dependencies do not occur.
+ */
+static int
+deallocate_dependencies(bp, freeblks, off)
+	struct buf *bp;
+	struct freeblks *freeblks;
+	int off;
+{
+	struct indirdep *indirdep;
+	struct pagedep *pagedep;
+	struct worklist *wk, *wkn;
+	struct ufsmount *ump;
+
+	ump = softdep_bp_to_mp(bp);
+	if (ump == NULL)
+		goto done;
+	ACQUIRE_LOCK(ump);
+	LIST_FOREACH_SAFE(wk, &bp->b_dep, wk_list, wkn) {
+		switch (wk->wk_type) {
+		case D_INDIRDEP:
+			indirdep = WK_INDIRDEP(wk);
+			if (bp->b_lblkno >= 0 ||
+			    bp->b_blkno != indirdep->ir_savebp->b_lblkno)
+				panic("deallocate_dependencies: not indir");
+			cancel_indirdep(indirdep, bp, freeblks);
+			continue;
+
+		case D_PAGEDEP:
+			pagedep = WK_PAGEDEP(wk);
+			if (cancel_pagedep(pagedep, freeblks, off)) {
+				FREE_LOCK(ump);
+				return (ERESTART);
+			}
+			continue;
+
+		case D_ALLOCINDIR:
+			/*
+			 * Simply remove the allocindir, we'll find it via
+			 * the indirdep where we can clear pointers if
+			 * needed.
+			 */
+			WORKLIST_REMOVE(wk);
+			continue;
+
+		case D_FREEWORK:
+			/*
+			 * A truncation is waiting for the zero'd pointers
+			 * to be written.  It can be freed when the freeblks
+			 * is journaled.
+			 */
+			WORKLIST_REMOVE(wk);
+			wk->wk_state |= ONDEPLIST;
+			WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk);
+			break;
+
+		case D_ALLOCDIRECT:
+			if (off != 0)
+				continue;
+			/* FALLTHROUGH */
+		default:
+			panic("deallocate_dependencies: Unexpected type %s",
+			    TYPENAME(wk->wk_type));
+			/* NOTREACHED */
+		}
+	}
+	FREE_LOCK(ump);
+done:
+	/*
+	 * Don't throw away this buf, we were partially truncating and
+	 * some deps may always remain.
+	 */
+	if (off) {
+		allocbuf(bp, off);
+		bp->b_vflags |= BV_SCANNED;
+		return (EBUSY);
+	}
+	bp->b_flags |= B_INVAL | B_NOCACHE;
+
+	return (0);
+}
+
+/*
+ * An allocdirect is being canceled due to a truncate.  We must make sure
+ * the journal entry is released in concert with the blkfree that releases
+ * the storage.  Completed journal entries must not be released until the
+ * space is no longer pointed to by the inode or in the bitmap.
+ */
+static void
+cancel_allocdirect(adphead, adp, freeblks)
+	struct allocdirectlst *adphead;
+	struct allocdirect *adp;
+	struct freeblks *freeblks;
+{
+	struct freework *freework;
+	struct newblk *newblk;
+	struct worklist *wk;
+
+	TAILQ_REMOVE(adphead, adp, ad_next);
+	newblk = (struct newblk *)adp;
+	freework = NULL;
+	/*
+	 * Find the correct freework structure.
+	 */
+	LIST_FOREACH(wk, &freeblks->fb_freeworkhd, wk_list) {
+		if (wk->wk_type != D_FREEWORK)
+			continue;
+		freework = WK_FREEWORK(wk);
+		if (freework->fw_blkno == newblk->nb_newblkno)
+			break;
+	}
+	if (freework == NULL)
+		panic("cancel_allocdirect: Freework not found");
+	/*
+	 * If a newblk exists at all we still have the journal entry that
+	 * initiated the allocation so we do not need to journal the free.
+	 */
+	cancel_jfreeblk(freeblks, freework->fw_blkno);
+	/*
+	 * If the journal hasn't been written the jnewblk must be passed
+	 * to the call to ffs_blkfree that reclaims the space.  We accomplish
+	 * this by linking the journal dependency into the freework to be
+	 * freed when freework_freeblock() is called.  If the journal has
+	 * been written we can simply reclaim the journal space when the
+	 * freeblks work is complete.
+	 */
+	freework->fw_jnewblk = cancel_newblk(newblk, &freework->fw_list,
+	    &freeblks->fb_jwork);
+	WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list);
+}
+
+
+/*
+ * Cancel a new block allocation.  May be an indirect or direct block.  We
+ * remove it from various lists and return any journal record that needs to
+ * be resolved by the caller.
+ *
+ * A special consideration is made for indirects which were never pointed
+ * at on disk and will never be found once this block is released.
+ */
+static struct jnewblk *
+cancel_newblk(newblk, wk, wkhd)
+	struct newblk *newblk;
+	struct worklist *wk;
+	struct workhead *wkhd;
+{
+	struct jnewblk *jnewblk;
+
+	CTR1(KTR_SUJ, "cancel_newblk: blkno %jd", newblk->nb_newblkno);
+	    
+	newblk->nb_state |= GOINGAWAY;
+	/*
+	 * Previously we traversed the completedhd on each indirdep
+	 * attached to this newblk to cancel them and gather journal
+	 * work.  Since we need only the oldest journal segment and
+	 * the lowest point on the tree will always have the oldest
+	 * journal segment we are free to release the segments
+	 * of any subordinates and may leave the indirdep list to
+	 * indirdep_complete() when this newblk is freed.
+	 */
+	if (newblk->nb_state & ONDEPLIST) {
+		newblk->nb_state &= ~ONDEPLIST;
+		LIST_REMOVE(newblk, nb_deps);
+	}
+	if (newblk->nb_state & ONWORKLIST)
+		WORKLIST_REMOVE(&newblk->nb_list);
+	/*
+	 * If the journal entry hasn't been written we save a pointer to
+	 * the dependency that frees it until it is written or the
+	 * superseding operation completes.
+	 */
+	jnewblk = newblk->nb_jnewblk;
+	if (jnewblk != NULL && wk != NULL) {
+		newblk->nb_jnewblk = NULL;
+		jnewblk->jn_dep = wk;
+	}
+	if (!LIST_EMPTY(&newblk->nb_jwork))
+		jwork_move(wkhd, &newblk->nb_jwork);
+	/*
+	 * When truncating we must free the newdirblk early to remove
+	 * the pagedep from the hash before returning.
+	 */
+	if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL)
+		free_newdirblk(WK_NEWDIRBLK(wk));
+	if (!LIST_EMPTY(&newblk->nb_newdirblk))
+		panic("cancel_newblk: extra newdirblk");
+
+	return (jnewblk);
+}
+
+/*
+ * Schedule the freefrag associated with a newblk to be released once
+ * the pointers are written and the previous block is no longer needed.
+ */
+static void
+newblk_freefrag(newblk)
+	struct newblk *newblk;
+{
+	struct freefrag *freefrag;
+
+	if (newblk->nb_freefrag == NULL)
+		return;
+	freefrag = newblk->nb_freefrag;
+	newblk->nb_freefrag = NULL;
+	freefrag->ff_state |= COMPLETE;
+	if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE)
+		add_to_worklist(&freefrag->ff_list, 0);
+}
+
+/*
+ * Free a newblk. Generate a new freefrag work request if appropriate.
+ * This must be called after the inode pointer and any direct block pointers
+ * are valid or fully removed via truncate or frag extension.
+ */
+static void
+free_newblk(newblk)
+	struct newblk *newblk;
+{
+	struct indirdep *indirdep;
+	struct worklist *wk;
+
+	KASSERT(newblk->nb_jnewblk == NULL,
+	    ("free_newblk: jnewblk %p still attached", newblk->nb_jnewblk));
+	KASSERT(newblk->nb_list.wk_type != D_NEWBLK,
+	    ("free_newblk: unclaimed newblk"));
+	LOCK_OWNED(VFSTOUFS(newblk->nb_list.wk_mp));
+	newblk_freefrag(newblk);
+	if (newblk->nb_state & ONDEPLIST)
+		LIST_REMOVE(newblk, nb_deps);
+	if (newblk->nb_state & ONWORKLIST)
+		WORKLIST_REMOVE(&newblk->nb_list);
+	LIST_REMOVE(newblk, nb_hash);
+	if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL)
+		free_newdirblk(WK_NEWDIRBLK(wk));
+	if (!LIST_EMPTY(&newblk->nb_newdirblk))
+		panic("free_newblk: extra newdirblk");
+	while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL)
+		indirdep_complete(indirdep);
+	handle_jwork(&newblk->nb_jwork);
+	WORKITEM_FREE(newblk, D_NEWBLK);
+}
+
+/*
+ * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
+ * This routine must be called with splbio interrupts blocked.
+ */
+static void
+free_newdirblk(newdirblk)
+	struct newdirblk *newdirblk;
+{
+	struct pagedep *pagedep;
+	struct diradd *dap;
+	struct worklist *wk;
+
+	LOCK_OWNED(VFSTOUFS(newdirblk->db_list.wk_mp));
+	WORKLIST_REMOVE(&newdirblk->db_list);
+	/*
+	 * If the pagedep is still linked onto the directory buffer
+	 * dependency chain, then some of the entries on the
+	 * pd_pendinghd list may not be committed to disk yet. In
+	 * this case, we will simply clear the NEWBLOCK flag and
+	 * let the pd_pendinghd list be processed when the pagedep
+	 * is next written. If the pagedep is no longer on the buffer
+	 * dependency chain, then all the entries on the pd_pending
+	 * list are committed to disk and we can free them here.
+	 */
+	pagedep = newdirblk->db_pagedep;
+	pagedep->pd_state &= ~NEWBLOCK;
+	if ((pagedep->pd_state & ONWORKLIST) == 0) {
+		while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
+			free_diradd(dap, NULL);
+		/*
+		 * If no dependencies remain, the pagedep will be freed.
+		 */
+		free_pagedep(pagedep);
+	}
+	/* Should only ever be one item in the list. */
+	while ((wk = LIST_FIRST(&newdirblk->db_mkdir)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
+	}
+	WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
+}
+
+/*
+ * Prepare an inode to be freed. The actual free operation is not
+ * done until the zero'ed inode has been written to disk.
+ */
+void
+softdep_freefile(pvp, ino, mode)
+	struct vnode *pvp;
+	ino_t ino;
+	int mode;
+{
+	struct inode *ip = VTOI(pvp);
+	struct inodedep *inodedep;
+	struct freefile *freefile;
+	struct freeblks *freeblks;
+	struct ufsmount *ump;
+
+	ump = ITOUMP(ip);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_freefile called on non-softdep filesystem"));
+	/*
+	 * This sets up the inode de-allocation dependency.
+	 */
+	freefile = malloc(sizeof(struct freefile),
+		M_FREEFILE, M_SOFTDEP_FLAGS);
+	workitem_alloc(&freefile->fx_list, D_FREEFILE, pvp->v_mount);
+	freefile->fx_mode = mode;
+	freefile->fx_oldinum = ino;
+	freefile->fx_devvp = ump->um_devvp;
+	LIST_INIT(&freefile->fx_jwork);
+	UFS_LOCK(ump);
+	ump->um_fs->fs_pendinginodes += 1;
+	UFS_UNLOCK(ump);
+
+	/*
+	 * If the inodedep does not exist, then the zero'ed inode has
+	 * been written to disk. If the allocated inode has never been
+	 * written to disk, then the on-disk inode is zero'ed. In either
+	 * case we can free the file immediately.  If the journal was
+	 * canceled before being written the inode will never make it to
+	 * disk and we must send the canceled journal entrys to
+	 * ffs_freefile() to be cleared in conjunction with the bitmap.
+	 * Any blocks waiting on the inode to write can be safely freed
+	 * here as it will never been written.
+	 */
+	ACQUIRE_LOCK(ump);
+	inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
+	if (inodedep) {
+		/*
+		 * Clear out freeblks that no longer need to reference
+		 * this inode.
+		 */
+		while ((freeblks =
+		    TAILQ_FIRST(&inodedep->id_freeblklst)) != NULL) {
+			TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks,
+			    fb_next);
+			freeblks->fb_state &= ~ONDEPLIST;
+		}
+		/*
+		 * Remove this inode from the unlinked list.
+		 */
+		if (inodedep->id_state & UNLINKED) {
+			/*
+			 * Save the journal work to be freed with the bitmap
+			 * before we clear UNLINKED.  Otherwise it can be lost
+			 * if the inode block is written.
+			 */
+			handle_bufwait(inodedep, &freefile->fx_jwork);
+			clear_unlinked_inodedep(inodedep);
+			/*
+			 * Re-acquire inodedep as we've dropped the
+			 * per-filesystem lock in clear_unlinked_inodedep().
+			 */
+			inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
+		}
+	}
+	if (inodedep == NULL || check_inode_unwritten(inodedep)) {
+		FREE_LOCK(ump);
+		handle_workitem_freefile(freefile);
+		return;
+	}
+	if ((inodedep->id_state & DEPCOMPLETE) == 0)
+		inodedep->id_state |= GOINGAWAY;
+	WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
+	FREE_LOCK(ump);
+	if (ip->i_number == ino)
+		ip->i_flag |= IN_MODIFIED;
+}
+
+/*
+ * Check to see if an inode has never been written to disk. If
+ * so free the inodedep and return success, otherwise return failure.
+ * This routine must be called with splbio interrupts blocked.
+ *
+ * If we still have a bitmap dependency, then the inode has never
+ * been written to disk. Drop the dependency as it is no longer
+ * necessary since the inode is being deallocated. We set the
+ * ALLCOMPLETE flags since the bitmap now properly shows that the
+ * inode is not allocated. Even if the inode is actively being
+ * written, it has been rolled back to its zero'ed state, so we
+ * are ensured that a zero inode is what is on the disk. For short
+ * lived files, this change will usually result in removing all the
+ * dependencies from the inode so that it can be freed immediately.
+ */
+static int
+check_inode_unwritten(inodedep)
+	struct inodedep *inodedep;
+{
+
+	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
+
+	if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) != 0 ||
+	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
+	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
+	    !LIST_EMPTY(&inodedep->id_bufwait) ||
+	    !LIST_EMPTY(&inodedep->id_inowait) ||
+	    !TAILQ_EMPTY(&inodedep->id_inoreflst) ||
+	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_newinoupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_freeblklst) ||
+	    inodedep->id_mkdiradd != NULL || 
+	    inodedep->id_nlinkdelta != 0)
+		return (0);
+	/*
+	 * Another process might be in initiate_write_inodeblock_ufs[12]
+	 * trying to allocate memory without holding "Softdep Lock".
+	 */
+	if ((inodedep->id_state & IOSTARTED) != 0 &&
+	    inodedep->id_savedino1 == NULL)
+		return (0);
+
+	if (inodedep->id_state & ONDEPLIST)
+		LIST_REMOVE(inodedep, id_deps);
+	inodedep->id_state &= ~ONDEPLIST;
+	inodedep->id_state |= ALLCOMPLETE;
+	inodedep->id_bmsafemap = NULL;
+	if (inodedep->id_state & ONWORKLIST)
+		WORKLIST_REMOVE(&inodedep->id_list);
+	if (inodedep->id_savedino1 != NULL) {
+		free(inodedep->id_savedino1, M_SAVEDINO);
+		inodedep->id_savedino1 = NULL;
+	}
+	if (free_inodedep(inodedep) == 0)
+		panic("check_inode_unwritten: busy inode");
+	return (1);
+}
+
+static int
+check_inodedep_free(inodedep)
+	struct inodedep *inodedep;
+{
+
+	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
+	if ((inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
+	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
+	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
+	    !LIST_EMPTY(&inodedep->id_bufwait) ||
+	    !LIST_EMPTY(&inodedep->id_inowait) ||
+	    !TAILQ_EMPTY(&inodedep->id_inoreflst) ||
+	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_newinoupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_freeblklst) ||
+	    inodedep->id_mkdiradd != NULL ||
+	    inodedep->id_nlinkdelta != 0 ||
+	    inodedep->id_savedino1 != NULL)
+		return (0);
+	return (1);
+}
+
+/*
+ * Try to free an inodedep structure. Return 1 if it could be freed.
+ */
+static int
+free_inodedep(inodedep)
+	struct inodedep *inodedep;
+{
+
+	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
+	if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
+	    !check_inodedep_free(inodedep))
+		return (0);
+	if (inodedep->id_state & ONDEPLIST)
+		LIST_REMOVE(inodedep, id_deps);
+	LIST_REMOVE(inodedep, id_hash);
+	WORKITEM_FREE(inodedep, D_INODEDEP);
+	return (1);
+}
+
+/*
+ * Free the block referenced by a freework structure.  The parent freeblks
+ * structure is released and completed when the final cg bitmap reaches
+ * the disk.  This routine may be freeing a jnewblk which never made it to
+ * disk in which case we do not have to wait as the operation is undone
+ * in memory immediately.
+ */
+static void
+freework_freeblock(freework)
+	struct freework *freework;
+{
+	struct freeblks *freeblks;
+	struct jnewblk *jnewblk;
+	struct ufsmount *ump;
+	struct workhead wkhd;
+	struct fs *fs;
+	int bsize;
+	int needj;
+
+	ump = VFSTOUFS(freework->fw_list.wk_mp);
+	LOCK_OWNED(ump);
+	/*
+	 * Handle partial truncate separately.
+	 */
+	if (freework->fw_indir) {
+		complete_trunc_indir(freework);
+		return;
+	}
+	freeblks = freework->fw_freeblks;
+	fs = ump->um_fs;
+	needj = MOUNTEDSUJ(freeblks->fb_list.wk_mp) != 0;
+	bsize = lfragtosize(fs, freework->fw_frags);
+	LIST_INIT(&wkhd);
+	/*
+	 * DEPCOMPLETE is cleared in indirblk_insert() if the block lives
+	 * on the indirblk hashtable and prevents premature freeing.
+	 */
+	freework->fw_state |= DEPCOMPLETE;
+	/*
+	 * SUJ needs to wait for the segment referencing freed indirect
+	 * blocks to expire so that we know the checker will not confuse
+	 * a re-allocated indirect block with its old contents.
+	 */
+	if (needj && freework->fw_lbn <= -NDADDR)
+		indirblk_insert(freework);
+	/*
+	 * If we are canceling an existing jnewblk pass it to the free
+	 * routine, otherwise pass the freeblk which will ultimately
+	 * release the freeblks.  If we're not journaling, we can just
+	 * free the freeblks immediately.
+	 */
+	jnewblk = freework->fw_jnewblk;
+	if (jnewblk != NULL) {
+		cancel_jnewblk(jnewblk, &wkhd);
+		needj = 0;
+	} else if (needj) {
+		freework->fw_state |= DELAYEDFREE;
+		freeblks->fb_cgwait++;
+		WORKLIST_INSERT(&wkhd, &freework->fw_list);
+	}
+	FREE_LOCK(ump);
+	freeblks_free(ump, freeblks, btodb(bsize));
+	CTR4(KTR_SUJ,
+	    "freework_freeblock: ino %d blkno %jd lbn %jd size %ld",
+	    freeblks->fb_inum, freework->fw_blkno, freework->fw_lbn, bsize);
+	ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize,
+	    freeblks->fb_inum, freeblks->fb_vtype, &wkhd);
+	ACQUIRE_LOCK(ump);
+	/*
+	 * The jnewblk will be discarded and the bits in the map never
+	 * made it to disk.  We can immediately free the freeblk.
+	 */
+	if (needj == 0)
+		handle_written_freework(freework);
+}
+
+/*
+ * We enqueue freework items that need processing back on the freeblks and
+ * add the freeblks to the worklist.  This makes it easier to find all work
+ * required to flush a truncation in process_truncates().
+ */
+static void
+freework_enqueue(freework)
+	struct freework *freework;
+{
+	struct freeblks *freeblks;
+
+	freeblks = freework->fw_freeblks;
+	if ((freework->fw_state & INPROGRESS) == 0)
+		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
+	if ((freeblks->fb_state &
+	    (ONWORKLIST | INPROGRESS | ALLCOMPLETE)) == ALLCOMPLETE &&
+	    LIST_EMPTY(&freeblks->fb_jblkdephd))
+		add_to_worklist(&freeblks->fb_list, WK_NODELAY);
+}
+
+/*
+ * Start, continue, or finish the process of freeing an indirect block tree.
+ * The free operation may be paused at any point with fw_off containing the
+ * offset to restart from.  This enables us to implement some flow control
+ * for large truncates which may fan out and generate a huge number of
+ * dependencies.
+ */
+static void
+handle_workitem_indirblk(freework)
+	struct freework *freework;
+{
+	struct freeblks *freeblks;
+	struct ufsmount *ump;
+	struct fs *fs;
+
+	freeblks = freework->fw_freeblks;
+	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
+	fs = ump->um_fs;
+	if (freework->fw_state & DEPCOMPLETE) {
+		handle_written_freework(freework);
+		return;
+	}
+	if (freework->fw_off == NINDIR(fs)) {
+		freework_freeblock(freework);
+		return;
+	}
+	freework->fw_state |= INPROGRESS;
+	FREE_LOCK(ump);
+	indir_trunc(freework, fsbtodb(fs, freework->fw_blkno),
+	    freework->fw_lbn);
+	ACQUIRE_LOCK(ump);
+}
+
+/*
+ * Called when a freework structure attached to a cg buf is written.  The
+ * ref on either the parent or the freeblks structure is released and
+ * the freeblks is added back to the worklist if there is more work to do.
+ */
+static void
+handle_written_freework(freework)
+	struct freework *freework;
+{
+	struct freeblks *freeblks;
+	struct freework *parent;
+
+	freeblks = freework->fw_freeblks;
+	parent = freework->fw_parent;
+	if (freework->fw_state & DELAYEDFREE)
+		freeblks->fb_cgwait--;
+	freework->fw_state |= COMPLETE;
+	if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
+		WORKITEM_FREE(freework, D_FREEWORK);
+	if (parent) {
+		if (--parent->fw_ref == 0)
+			freework_enqueue(parent);
+		return;
+	}
+	if (--freeblks->fb_ref != 0)
+		return;
+	if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST | INPROGRESS)) ==
+	    ALLCOMPLETE && LIST_EMPTY(&freeblks->fb_jblkdephd)) 
+		add_to_worklist(&freeblks->fb_list, WK_NODELAY);
+}
+
+/*
+ * This workitem routine performs the block de-allocation.
+ * The workitem is added to the pending list after the updated
+ * inode block has been written to disk.  As mentioned above,
+ * checks regarding the number of blocks de-allocated (compared
+ * to the number of blocks allocated for the file) are also
+ * performed in this function.
+ */
+static int
+handle_workitem_freeblocks(freeblks, flags)
+	struct freeblks *freeblks;
+	int flags;
+{
+	struct freework *freework;
+	struct newblk *newblk;
+	struct allocindir *aip;
+	struct ufsmount *ump;
+	struct worklist *wk;
+
+	KASSERT(LIST_EMPTY(&freeblks->fb_jblkdephd),
+	    ("handle_workitem_freeblocks: Journal entries not written."));
+	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
+	ACQUIRE_LOCK(ump);
+	while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		switch (wk->wk_type) {
+		case D_DIRREM:
+			wk->wk_state |= COMPLETE;
+			add_to_worklist(wk, 0);
+			continue;
+
+		case D_ALLOCDIRECT:
+			free_newblk(WK_NEWBLK(wk));
+			continue;
+
+		case D_ALLOCINDIR:
+			aip = WK_ALLOCINDIR(wk);
+			freework = NULL;
+			if (aip->ai_state & DELAYEDFREE) {
+				FREE_LOCK(ump);
+				freework = newfreework(ump, freeblks, NULL,
+				    aip->ai_lbn, aip->ai_newblkno,
+				    ump->um_fs->fs_frag, 0, 0);
+				ACQUIRE_LOCK(ump);
+			}
+			newblk = WK_NEWBLK(wk);
+			if (newblk->nb_jnewblk) {
+				freework->fw_jnewblk = newblk->nb_jnewblk;
+				newblk->nb_jnewblk->jn_dep = &freework->fw_list;
+				newblk->nb_jnewblk = NULL;
+			}
+			free_newblk(newblk);
+			continue;
+
+		case D_FREEWORK:
+			freework = WK_FREEWORK(wk);
+			if (freework->fw_lbn <= -NDADDR)
+				handle_workitem_indirblk(freework);
+			else
+				freework_freeblock(freework);
+			continue;
+		default:
+			panic("handle_workitem_freeblocks: Unknown type %s",
+			    TYPENAME(wk->wk_type));
+		}
+	}
+	if (freeblks->fb_ref != 0) {
+		freeblks->fb_state &= ~INPROGRESS;
+		wake_worklist(&freeblks->fb_list);
+		freeblks = NULL;
+	}
+	FREE_LOCK(ump);
+	if (freeblks)
+		return handle_complete_freeblocks(freeblks, flags);
+	return (0);
+}
+
+/*
+ * Handle completion of block free via truncate.  This allows fs_pending
+ * to track the actual free block count more closely than if we only updated
+ * it at the end.  We must be careful to handle cases where the block count
+ * on free was incorrect.
+ */
+static void
+freeblks_free(ump, freeblks, blocks)
+	struct ufsmount *ump;
+	struct freeblks *freeblks;
+	int blocks;
+{
+	struct fs *fs;
+	ufs2_daddr_t remain;
+
+	UFS_LOCK(ump);
+	remain = -freeblks->fb_chkcnt;
+	freeblks->fb_chkcnt += blocks;
+	if (remain > 0) {
+		if (remain < blocks)
+			blocks = remain;
+		fs = ump->um_fs;
+		fs->fs_pendingblocks -= blocks;
+	}
+	UFS_UNLOCK(ump);
+}
+
+/*
+ * Once all of the freework workitems are complete we can retire the
+ * freeblocks dependency and any journal work awaiting completion.  This
+ * can not be called until all other dependencies are stable on disk.
+ */
+static int
+handle_complete_freeblocks(freeblks, flags)
+	struct freeblks *freeblks;
+	int flags;
+{
+	struct inodedep *inodedep;
+	struct inode *ip;
+	struct vnode *vp;
+	struct fs *fs;
+	struct ufsmount *ump;
+	ufs2_daddr_t spare;
+
+	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
+	fs = ump->um_fs;
+	flags = LK_EXCLUSIVE | flags;
+	spare = freeblks->fb_chkcnt;
+
+	/*
+	 * If we did not release the expected number of blocks we may have
+	 * to adjust the inode block count here.  Only do so if it wasn't
+	 * a truncation to zero and the modrev still matches.
+	 */
+	if (spare && freeblks->fb_len != 0) {
+		if (ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_inum,
+		    flags, &vp, FFSV_FORCEINSMQ) != 0)
+			return (EBUSY);
+		ip = VTOI(vp);
+		if (DIP(ip, i_modrev) == freeblks->fb_modrev) {
+			DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - spare);
+			ip->i_flag |= IN_CHANGE;
+			/*
+			 * We must wait so this happens before the
+			 * journal is reclaimed.
+			 */
+			ffs_update(vp, 1);
+		}
+		vput(vp);
+	}
+	if (spare < 0) {
+		UFS_LOCK(ump);
+		fs->fs_pendingblocks += spare;
+		UFS_UNLOCK(ump);
+	}
+#ifdef QUOTA
+	/* Handle spare. */
+	if (spare)
+		quotaadj(freeblks->fb_quota, ump, -spare);
+	quotarele(freeblks->fb_quota);
+#endif
+	ACQUIRE_LOCK(ump);
+	if (freeblks->fb_state & ONDEPLIST) {
+		inodedep_lookup(freeblks->fb_list.wk_mp, freeblks->fb_inum,
+		    0, &inodedep);
+		TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks, fb_next);
+		freeblks->fb_state &= ~ONDEPLIST;
+		if (TAILQ_EMPTY(&inodedep->id_freeblklst))
+			free_inodedep(inodedep);
+	}
+	/*
+	 * All of the freeblock deps must be complete prior to this call
+	 * so it's now safe to complete earlier outstanding journal entries.
+	 */
+	handle_jwork(&freeblks->fb_jwork);
+	WORKITEM_FREE(freeblks, D_FREEBLKS);
+	FREE_LOCK(ump);
+	return (0);
+}
+
+/*
+ * Release blocks associated with the freeblks and stored in the indirect
+ * block dbn. If level is greater than SINGLE, the block is an indirect block
+ * and recursive calls to indirtrunc must be used to cleanse other indirect
+ * blocks.
+ *
+ * This handles partial and complete truncation of blocks.  Partial is noted
+ * with goingaway == 0.  In this case the freework is completed after the
+ * zero'd indirects are written to disk.  For full truncation the freework
+ * is completed after the block is freed.
+ */
+static void
+indir_trunc(freework, dbn, lbn)
+	struct freework *freework;
+	ufs2_daddr_t dbn;
+	ufs_lbn_t lbn;
+{
+	struct freework *nfreework;
+	struct workhead wkhd;
+	struct freeblks *freeblks;
+	struct buf *bp;
+	struct fs *fs;
+	struct indirdep *indirdep;
+	struct ufsmount *ump;
+	ufs1_daddr_t *bap1;
+	ufs2_daddr_t nb, nnb, *bap2;
+	ufs_lbn_t lbnadd, nlbn;
+	int i, nblocks, ufs1fmt;
+	int freedblocks;
+	int goingaway;
+	int freedeps;
+	int needj;
+	int level;
+	int cnt;
+
+	freeblks = freework->fw_freeblks;
+	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
+	fs = ump->um_fs;
+	/*
+	 * Get buffer of block pointers to be freed.  There are three cases:
+	 * 
+	 * 1) Partial truncate caches the indirdep pointer in the freework
+	 *    which provides us a back copy to the save bp which holds the
+	 *    pointers we want to clear.  When this completes the zero
+	 *    pointers are written to the real copy.
+	 * 2) The indirect is being completely truncated, cancel_indirdep()
+	 *    eliminated the real copy and placed the indirdep on the saved
+	 *    copy.  The indirdep and buf are discarded when this completes.
+	 * 3) The indirect was not in memory, we read a copy off of the disk
+	 *    using the devvp and drop and invalidate the buffer when we're
+	 *    done.
+	 */
+	goingaway = 1;
+	indirdep = NULL;
+	if (freework->fw_indir != NULL) {
+		goingaway = 0;
+		indirdep = freework->fw_indir;
+		bp = indirdep->ir_savebp;
+		if (bp == NULL || bp->b_blkno != dbn)
+			panic("indir_trunc: Bad saved buf %p blkno %jd",
+			    bp, (intmax_t)dbn);
+	} else if ((bp = incore(&freeblks->fb_devvp->v_bufobj, dbn)) != NULL) {
+		/*
+		 * The lock prevents the buf dep list from changing and
+	 	 * indirects on devvp should only ever have one dependency.
+		 */
+		indirdep = WK_INDIRDEP(LIST_FIRST(&bp->b_dep));
+		if (indirdep == NULL || (indirdep->ir_state & GOINGAWAY) == 0)
+			panic("indir_trunc: Bad indirdep %p from buf %p",
+			    indirdep, bp);
+	} else if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize,
+	    NOCRED, &bp) != 0) {
+		brelse(bp);
+		return;
+	}
+	ACQUIRE_LOCK(ump);
+	/* Protects against a race with complete_trunc_indir(). */
+	freework->fw_state &= ~INPROGRESS;
+	/*
+	 * If we have an indirdep we need to enforce the truncation order
+	 * and discard it when it is complete.
+	 */
+	if (indirdep) {
+		if (freework != TAILQ_FIRST(&indirdep->ir_trunc) &&
+		    !TAILQ_EMPTY(&indirdep->ir_trunc)) {
+			/*
+			 * Add the complete truncate to the list on the
+			 * indirdep to enforce in-order processing.
+			 */
+			if (freework->fw_indir == NULL)
+				TAILQ_INSERT_TAIL(&indirdep->ir_trunc,
+				    freework, fw_next);
+			FREE_LOCK(ump);
+			return;
+		}
+		/*
+		 * If we're goingaway, free the indirdep.  Otherwise it will
+		 * linger until the write completes.
+		 */
+		if (goingaway)
+			free_indirdep(indirdep);
+	}
+	FREE_LOCK(ump);
+	/* Initialize pointers depending on block size. */
+	if (ump->um_fstype == UFS1) {
+		bap1 = (ufs1_daddr_t *)bp->b_data;
+		nb = bap1[freework->fw_off];
+		ufs1fmt = 1;
+		bap2 = NULL;
+	} else {
+		bap2 = (ufs2_daddr_t *)bp->b_data;
+		nb = bap2[freework->fw_off];
+		ufs1fmt = 0;
+		bap1 = NULL;
+	}
+	level = lbn_level(lbn);
+	needj = MOUNTEDSUJ(UFSTOVFS(ump)) != 0;
+	lbnadd = lbn_offset(fs, level);
+	nblocks = btodb(fs->fs_bsize);
+	nfreework = freework;
+	freedeps = 0;
+	cnt = 0;
+	/*
+	 * Reclaim blocks.  Traverses into nested indirect levels and
+	 * arranges for the current level to be freed when subordinates
+	 * are free when journaling.
+	 */
+	for (i = freework->fw_off; i < NINDIR(fs); i++, nb = nnb) {
+		if (i != NINDIR(fs) - 1) {
+			if (ufs1fmt)
+				nnb = bap1[i+1];
+			else
+				nnb = bap2[i+1];
+		} else
+			nnb = 0;
+		if (nb == 0)
+			continue;
+		cnt++;
+		if (level != 0) {
+			nlbn = (lbn + 1) - (i * lbnadd);
+			if (needj != 0) {
+				nfreework = newfreework(ump, freeblks, freework,
+				    nlbn, nb, fs->fs_frag, 0, 0);
+				freedeps++;
+			}
+			indir_trunc(nfreework, fsbtodb(fs, nb), nlbn);
+		} else {
+			struct freedep *freedep;
+
+			/*
+			 * Attempt to aggregate freedep dependencies for
+			 * all blocks being released to the same CG.
+			 */
+			LIST_INIT(&wkhd);
+			if (needj != 0 &&
+			    (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb)))) {
+				freedep = newfreedep(freework);
+				WORKLIST_INSERT_UNLOCKED(&wkhd,
+				    &freedep->fd_list);
+				freedeps++;
+			}
+			CTR3(KTR_SUJ,
+			    "indir_trunc: ino %d blkno %jd size %ld",
+			    freeblks->fb_inum, nb, fs->fs_bsize);
+			ffs_blkfree(ump, fs, freeblks->fb_devvp, nb,
+			    fs->fs_bsize, freeblks->fb_inum,
+			    freeblks->fb_vtype, &wkhd);
+		}
+	}
+	if (goingaway) {
+		bp->b_flags |= B_INVAL | B_NOCACHE;
+		brelse(bp);
+	}
+	freedblocks = 0;
+	if (level == 0)
+		freedblocks = (nblocks * cnt);
+	if (needj == 0)
+		freedblocks += nblocks;
+	freeblks_free(ump, freeblks, freedblocks);
+	/*
+	 * If we are journaling set up the ref counts and offset so this
+	 * indirect can be completed when its children are free.
+	 */
+	if (needj) {
+		ACQUIRE_LOCK(ump);
+		freework->fw_off = i;
+		freework->fw_ref += freedeps;
+		freework->fw_ref -= NINDIR(fs) + 1;
+		if (level == 0)
+			freeblks->fb_cgwait += freedeps;
+		if (freework->fw_ref == 0)
+			freework_freeblock(freework);
+		FREE_LOCK(ump);
+		return;
+	}
+	/*
+	 * If we're not journaling we can free the indirect now.
+	 */
+	dbn = dbtofsb(fs, dbn);
+	CTR3(KTR_SUJ,
+	    "indir_trunc 2: ino %d blkno %jd size %ld",
+	    freeblks->fb_inum, dbn, fs->fs_bsize);
+	ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize,
+	    freeblks->fb_inum, freeblks->fb_vtype, NULL);
+	/* Non SUJ softdep does single-threaded truncations. */
+	if (freework->fw_blkno == dbn) {
+		freework->fw_state |= ALLCOMPLETE;
+		ACQUIRE_LOCK(ump);
+		handle_written_freework(freework);
+		FREE_LOCK(ump);
+	}
+	return;
+}
+
+/*
+ * Cancel an allocindir when it is removed via truncation.  When bp is not
+ * NULL the indirect never appeared on disk and is scheduled to be freed
+ * independently of the indir so we can more easily track journal work.
+ */
+static void
+cancel_allocindir(aip, bp, freeblks, trunc)
+	struct allocindir *aip;
+	struct buf *bp;
+	struct freeblks *freeblks;
+	int trunc;
+{
+	struct indirdep *indirdep;
+	struct freefrag *freefrag;
+	struct newblk *newblk;
+
+	newblk = (struct newblk *)aip;
+	LIST_REMOVE(aip, ai_next);
+	/*
+	 * We must eliminate the pointer in bp if it must be freed on its
+	 * own due to partial truncate or pending journal work.
+	 */
+	if (bp && (trunc || newblk->nb_jnewblk)) {
+		/*
+		 * Clear the pointer and mark the aip to be freed
+		 * directly if it never existed on disk.
+		 */
+		aip->ai_state |= DELAYEDFREE;
+		indirdep = aip->ai_indirdep;
+		if (indirdep->ir_state & UFS1FMT)
+			((ufs1_daddr_t *)bp->b_data)[aip->ai_offset] = 0;
+		else
+			((ufs2_daddr_t *)bp->b_data)[aip->ai_offset] = 0;
+	}
+	/*
+	 * When truncating the previous pointer will be freed via
+	 * savedbp.  Eliminate the freefrag which would dup free.
+	 */
+	if (trunc && (freefrag = newblk->nb_freefrag) != NULL) {
+		newblk->nb_freefrag = NULL;
+		if (freefrag->ff_jdep)
+			cancel_jfreefrag(
+			    WK_JFREEFRAG(freefrag->ff_jdep));
+		jwork_move(&freeblks->fb_jwork, &freefrag->ff_jwork);
+		WORKITEM_FREE(freefrag, D_FREEFRAG);
+	}
+	/*
+	 * If the journal hasn't been written the jnewblk must be passed
+	 * to the call to ffs_blkfree that reclaims the space.  We accomplish
+	 * this by leaving the journal dependency on the newblk to be freed
+	 * when a freework is created in handle_workitem_freeblocks().
+	 */
+	cancel_newblk(newblk, NULL, &freeblks->fb_jwork);
+	WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list);
+}
+
+/*
+ * Create the mkdir dependencies for . and .. in a new directory.  Link them
+ * in to a newdirblk so any subsequent additions are tracked properly.  The
+ * caller is responsible for adding the mkdir1 dependency to the journal
+ * and updating id_mkdiradd.  This function returns with the per-filesystem
+ * lock held.
+ */
+static struct mkdir *
+setup_newdir(dap, newinum, dinum, newdirbp, mkdirp)
+	struct diradd *dap;
+	ino_t newinum;
+	ino_t dinum;
+	struct buf *newdirbp;
+	struct mkdir **mkdirp;
+{
+	struct newblk *newblk;
+	struct pagedep *pagedep;
+	struct inodedep *inodedep;
+	struct newdirblk *newdirblk;
+	struct mkdir *mkdir1, *mkdir2;
+	struct worklist *wk;
+	struct jaddref *jaddref;
+	struct ufsmount *ump;
+	struct mount *mp;
+
+	mp = dap->da_list.wk_mp;
+	ump = VFSTOUFS(mp);
+	newdirblk = malloc(sizeof(struct newdirblk), M_NEWDIRBLK,
+	    M_SOFTDEP_FLAGS);
+	workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
+	LIST_INIT(&newdirblk->db_mkdir);
+	mkdir1 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS);
+	workitem_alloc(&mkdir1->md_list, D_MKDIR, mp);
+	mkdir1->md_state = ATTACHED | MKDIR_BODY;
+	mkdir1->md_diradd = dap;
+	mkdir1->md_jaddref = NULL;
+	mkdir2 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS);
+	workitem_alloc(&mkdir2->md_list, D_MKDIR, mp);
+	mkdir2->md_state = ATTACHED | MKDIR_PARENT;
+	mkdir2->md_diradd = dap;
+	mkdir2->md_jaddref = NULL;
+	if (MOUNTEDSUJ(mp) == 0) {
+		mkdir1->md_state |= DEPCOMPLETE;
+		mkdir2->md_state |= DEPCOMPLETE;
+	}
+	/*
+	 * Dependency on "." and ".." being written to disk.
+	 */
+	mkdir1->md_buf = newdirbp;
+	ACQUIRE_LOCK(VFSTOUFS(mp));
+	LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir1, md_mkdirs);
+	/*
+	 * We must link the pagedep, allocdirect, and newdirblk for
+	 * the initial file page so the pointer to the new directory
+	 * is not written until the directory contents are live and
+	 * any subsequent additions are not marked live until the
+	 * block is reachable via the inode.
+	 */
+	if (pagedep_lookup(mp, newdirbp, newinum, 0, 0, &pagedep) == 0)
+		panic("setup_newdir: lost pagedep");
+	LIST_FOREACH(wk, &newdirbp->b_dep, wk_list)
+		if (wk->wk_type == D_ALLOCDIRECT)
+			break;
+	if (wk == NULL)
+		panic("setup_newdir: lost allocdirect");
+	if (pagedep->pd_state & NEWBLOCK)
+		panic("setup_newdir: NEWBLOCK already set");
+	newblk = WK_NEWBLK(wk);
+	pagedep->pd_state |= NEWBLOCK;
+	pagedep->pd_newdirblk = newdirblk;
+	newdirblk->db_pagedep = pagedep;
+	WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list);
+	WORKLIST_INSERT(&newdirblk->db_mkdir, &mkdir1->md_list);
+	/*
+	 * Look up the inodedep for the parent directory so that we
+	 * can link mkdir2 into the pending dotdot jaddref or
+	 * the inode write if there is none.  If the inode is
+	 * ALLCOMPLETE and no jaddref is present all dependencies have
+	 * been satisfied and mkdir2 can be freed.
+	 */
+	inodedep_lookup(mp, dinum, 0, &inodedep);
+	if (MOUNTEDSUJ(mp)) {
+		if (inodedep == NULL)
+			panic("setup_newdir: Lost parent.");
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref != NULL && jaddref->ja_parent == newinum &&
+		    (jaddref->ja_state & MKDIR_PARENT),
+		    ("setup_newdir: bad dotdot jaddref %p", jaddref));
+		LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir2, md_mkdirs);
+		mkdir2->md_jaddref = jaddref;
+		jaddref->ja_mkdir = mkdir2;
+	} else if (inodedep == NULL ||
+	    (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
+		dap->da_state &= ~MKDIR_PARENT;
+		WORKITEM_FREE(mkdir2, D_MKDIR);
+		mkdir2 = NULL;
+	} else {
+		LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir2, md_mkdirs);
+		WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir2->md_list);
+	}
+	*mkdirp = mkdir2;
+
+	return (mkdir1);
+}
+
+/*
+ * Directory entry addition dependencies.
+ * 
+ * When adding a new directory entry, the inode (with its incremented link
+ * count) must be written to disk before the directory entry's pointer to it.
+ * Also, if the inode is newly allocated, the corresponding freemap must be
+ * updated (on disk) before the directory entry's pointer. These requirements
+ * are met via undo/redo on the directory entry's pointer, which consists
+ * simply of the inode number.
+ * 
+ * As directory entries are added and deleted, the free space within a
+ * directory block can become fragmented.  The ufs filesystem will compact
+ * a fragmented directory block to make space for a new entry. When this
+ * occurs, the offsets of previously added entries change. Any "diradd"
+ * dependency structures corresponding to these entries must be updated with
+ * the new offsets.
+ */
+
+/*
+ * This routine is called after the in-memory inode's link
+ * count has been incremented, but before the directory entry's
+ * pointer to the inode has been set.
+ */
+int
+softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
+	struct buf *bp;		/* buffer containing directory block */
+	struct inode *dp;	/* inode for directory */
+	off_t diroffset;	/* offset of new entry in directory */
+	ino_t newinum;		/* inode referenced by new directory entry */
+	struct buf *newdirbp;	/* non-NULL => contents of new mkdir */
+	int isnewblk;		/* entry is in a newly allocated block */
+{
+	int offset;		/* offset of new entry within directory block */
+	ufs_lbn_t lbn;		/* block in directory containing new entry */
+	struct fs *fs;
+	struct diradd *dap;
+	struct newblk *newblk;
+	struct pagedep *pagedep;
+	struct inodedep *inodedep;
+	struct newdirblk *newdirblk;
+	struct mkdir *mkdir1, *mkdir2;
+	struct jaddref *jaddref;
+	struct ufsmount *ump;
+	struct mount *mp;
+	int isindir;
+
+	mp = ITOVFS(dp);
+	ump = VFSTOUFS(mp);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_directory_add called on non-softdep filesystem"));
+	/*
+	 * Whiteouts have no dependencies.
+	 */
+	if (newinum == WINO) {
+		if (newdirbp != NULL)
+			bdwrite(newdirbp);
+		return (0);
+	}
+	jaddref = NULL;
+	mkdir1 = mkdir2 = NULL;
+	fs = ump->um_fs;
+	lbn = lblkno(fs, diroffset);
+	offset = blkoff(fs, diroffset);
+	dap = malloc(sizeof(struct diradd), M_DIRADD,
+		M_SOFTDEP_FLAGS|M_ZERO);
+	workitem_alloc(&dap->da_list, D_DIRADD, mp);
+	dap->da_offset = offset;
+	dap->da_newinum = newinum;
+	dap->da_state = ATTACHED;
+	LIST_INIT(&dap->da_jwork);
+	isindir = bp->b_lblkno >= NDADDR;
+	newdirblk = NULL;
+	if (isnewblk &&
+	    (isindir ? blkoff(fs, diroffset) : fragoff(fs, diroffset)) == 0) {
+		newdirblk = malloc(sizeof(struct newdirblk),
+		    M_NEWDIRBLK, M_SOFTDEP_FLAGS);
+		workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
+		LIST_INIT(&newdirblk->db_mkdir);
+	}
+	/*
+	 * If we're creating a new directory setup the dependencies and set
+	 * the dap state to wait for them.  Otherwise it's COMPLETE and
+	 * we can move on.
+	 */
+	if (newdirbp == NULL) {
+		dap->da_state |= DEPCOMPLETE;
+		ACQUIRE_LOCK(ump);
+	} else {
+		dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
+		mkdir1 = setup_newdir(dap, newinum, dp->i_number, newdirbp,
+		    &mkdir2);
+	}
+	/*
+	 * Link into parent directory pagedep to await its being written.
+	 */
+	pagedep_lookup(mp, bp, dp->i_number, lbn, DEPALLOC, &pagedep);
+#ifdef DEBUG
+	if (diradd_lookup(pagedep, offset) != NULL)
+		panic("softdep_setup_directory_add: %p already at off %d\n",
+		    diradd_lookup(pagedep, offset), offset);
+#endif
+	dap->da_pagedep = pagedep;
+	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
+	    da_pdlist);
+	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
+	/*
+	 * If we're journaling, link the diradd into the jaddref so it
+	 * may be completed after the journal entry is written.  Otherwise,
+	 * link the diradd into its inodedep.  If the inode is not yet
+	 * written place it on the bufwait list, otherwise do the post-inode
+	 * write processing to put it on the id_pendinghd list.
+	 */
+	if (MOUNTEDSUJ(mp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
+		    ("softdep_setup_directory_add: bad jaddref %p", jaddref));
+		jaddref->ja_diroff = diroffset;
+		jaddref->ja_diradd = dap;
+		add_to_journal(&jaddref->ja_list);
+	} else if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
+		diradd_inode_written(dap, inodedep);
+	else
+		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
+	/*
+	 * Add the journal entries for . and .. links now that the primary
+	 * link is written.
+	 */
+	if (mkdir1 != NULL && MOUNTEDSUJ(mp)) {
+		jaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref,
+		    inoreflst, if_deps);
+		KASSERT(jaddref != NULL &&
+		    jaddref->ja_ino == jaddref->ja_parent &&
+		    (jaddref->ja_state & MKDIR_BODY),
+		    ("softdep_setup_directory_add: bad dot jaddref %p",
+		    jaddref));
+		mkdir1->md_jaddref = jaddref;
+		jaddref->ja_mkdir = mkdir1;
+		/*
+		 * It is important that the dotdot journal entry
+		 * is added prior to the dot entry since dot writes
+		 * both the dot and dotdot links.  These both must
+		 * be added after the primary link for the journal
+		 * to remain consistent.
+		 */
+		add_to_journal(&mkdir2->md_jaddref->ja_list);
+		add_to_journal(&jaddref->ja_list);
+	}
+	/*
+	 * If we are adding a new directory remember this diradd so that if
+	 * we rename it we can keep the dot and dotdot dependencies.  If
+	 * we are adding a new name for an inode that has a mkdiradd we
+	 * must be in rename and we have to move the dot and dotdot
+	 * dependencies to this new name.  The old name is being orphaned
+	 * soon.
+	 */
+	if (mkdir1 != NULL) {
+		if (inodedep->id_mkdiradd != NULL)
+			panic("softdep_setup_directory_add: Existing mkdir");
+		inodedep->id_mkdiradd = dap;
+	} else if (inodedep->id_mkdiradd)
+		merge_diradd(inodedep, dap);
+	if (newdirblk != NULL) {
+		/*
+		 * There is nothing to do if we are already tracking
+		 * this block.
+		 */
+		if ((pagedep->pd_state & NEWBLOCK) != 0) {
+			WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
+			FREE_LOCK(ump);
+			return (0);
+		}
+		if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk)
+		    == 0)
+			panic("softdep_setup_directory_add: lost entry");
+		WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list);
+		pagedep->pd_state |= NEWBLOCK;
+		pagedep->pd_newdirblk = newdirblk;
+		newdirblk->db_pagedep = pagedep;
+		FREE_LOCK(ump);
+		/*
+		 * If we extended into an indirect signal direnter to sync.
+		 */
+		if (isindir)
+			return (1);
+		return (0);
+	}
+	FREE_LOCK(ump);
+	return (0);
+}
+
+/*
+ * This procedure is called to change the offset of a directory
+ * entry when compacting a directory block which must be owned
+ * exclusively by the caller. Note that the actual entry movement
+ * must be done in this procedure to ensure that no I/O completions
+ * occur while the move is in progress.
+ */
+void 
+softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize)
+	struct buf *bp;		/* Buffer holding directory block. */
+	struct inode *dp;	/* inode for directory */
+	caddr_t base;		/* address of dp->i_offset */
+	caddr_t oldloc;		/* address of old directory location */
+	caddr_t newloc;		/* address of new directory location */
+	int entrysize;		/* size of directory entry */
+{
+	int offset, oldoffset, newoffset;
+	struct pagedep *pagedep;
+	struct jmvref *jmvref;
+	struct diradd *dap;
+	struct direct *de;
+	struct mount *mp;
+	struct ufsmount *ump;
+	ufs_lbn_t lbn;
+	int flags;
+
+	mp = ITOVFS(dp);
+	ump = VFSTOUFS(mp);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_change_directoryentry_offset called on "
+	     "non-softdep filesystem"));
+	de = (struct direct *)oldloc;
+	jmvref = NULL;
+	flags = 0;
+	/*
+	 * Moves are always journaled as it would be too complex to
+	 * determine if any affected adds or removes are present in the
+	 * journal.
+	 */
+	if (MOUNTEDSUJ(mp)) {
+		flags = DEPALLOC;
+		jmvref = newjmvref(dp, de->d_ino,
+		    dp->i_offset + (oldloc - base),
+		    dp->i_offset + (newloc - base));
+	}
+	lbn = lblkno(ump->um_fs, dp->i_offset);
+	offset = blkoff(ump->um_fs, dp->i_offset);
+	oldoffset = offset + (oldloc - base);
+	newoffset = offset + (newloc - base);
+	ACQUIRE_LOCK(ump);
+	if (pagedep_lookup(mp, bp, dp->i_number, lbn, flags, &pagedep) == 0)
+		goto done;
+	dap = diradd_lookup(pagedep, oldoffset);
+	if (dap) {
+		dap->da_offset = newoffset;
+		newoffset = DIRADDHASH(newoffset);
+		oldoffset = DIRADDHASH(oldoffset);
+		if ((dap->da_state & ALLCOMPLETE) != ALLCOMPLETE &&
+		    newoffset != oldoffset) {
+			LIST_REMOVE(dap, da_pdlist);
+			LIST_INSERT_HEAD(&pagedep->pd_diraddhd[newoffset],
+			    dap, da_pdlist);
+		}
+	}
+done:
+	if (jmvref) {
+		jmvref->jm_pagedep = pagedep;
+		LIST_INSERT_HEAD(&pagedep->pd_jmvrefhd, jmvref, jm_deps);
+		add_to_journal(&jmvref->jm_list);
+	}
+	bcopy(oldloc, newloc, entrysize);
+	FREE_LOCK(ump);
+}
+
+/*
+ * Move the mkdir dependencies and journal work from one diradd to another
+ * when renaming a directory.  The new name must depend on the mkdir deps
+ * completing as the old name did.  Directories can only have one valid link
+ * at a time so one must be canonical.
+ */
+static void
+merge_diradd(inodedep, newdap)
+	struct inodedep *inodedep;
+	struct diradd *newdap;
+{
+	struct diradd *olddap;
+	struct mkdir *mkdir, *nextmd;
+	struct ufsmount *ump;
+	short state;
+
+	olddap = inodedep->id_mkdiradd;
+	inodedep->id_mkdiradd = newdap;
+	if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
+		newdap->da_state &= ~DEPCOMPLETE;
+		ump = VFSTOUFS(inodedep->id_list.wk_mp);
+		for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
+		     mkdir = nextmd) {
+			nextmd = LIST_NEXT(mkdir, md_mkdirs);
+			if (mkdir->md_diradd != olddap)
+				continue;
+			mkdir->md_diradd = newdap;
+			state = mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY);
+			newdap->da_state |= state;
+			olddap->da_state &= ~state;
+			if ((olddap->da_state &
+			    (MKDIR_PARENT | MKDIR_BODY)) == 0)
+				break;
+		}
+		if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
+			panic("merge_diradd: unfound ref");
+	}
+	/*
+	 * Any mkdir related journal items are not safe to be freed until
+	 * the new name is stable.
+	 */
+	jwork_move(&newdap->da_jwork, &olddap->da_jwork);
+	olddap->da_state |= DEPCOMPLETE;
+	complete_diradd(olddap);
+}
+
+/*
+ * Move the diradd to the pending list when all diradd dependencies are
+ * complete.
+ */
+static void
+complete_diradd(dap)
+	struct diradd *dap;
+{
+	struct pagedep *pagedep;
+
+	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
+		if (dap->da_state & DIRCHG)
+			pagedep = dap->da_previous->dm_pagedep;
+		else
+			pagedep = dap->da_pagedep;
+		LIST_REMOVE(dap, da_pdlist);
+		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
+	}
+}
+
+/*
+ * Cancel a diradd when a dirrem overlaps with it.  We must cancel the journal
+ * add entries and conditonally journal the remove.
+ */
+static void
+cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref)
+	struct diradd *dap;
+	struct dirrem *dirrem;
+	struct jremref *jremref;
+	struct jremref *dotremref;
+	struct jremref *dotdotremref;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct inoref *inoref;
+	struct ufsmount *ump;
+	struct mkdir *mkdir;
+
+	/*
+	 * If no remove references were allocated we're on a non-journaled
+	 * filesystem and can skip the cancel step.
+	 */
+	if (jremref == NULL) {
+		free_diradd(dap, NULL);
+		return;
+	}
+	/*
+	 * Cancel the primary name an free it if it does not require
+	 * journaling.
+	 */
+	if (inodedep_lookup(dap->da_list.wk_mp, dap->da_newinum,
+	    0, &inodedep) != 0) {
+		/* Abort the addref that reference this diradd.  */
+		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
+			if (inoref->if_list.wk_type != D_JADDREF)
+				continue;
+			jaddref = (struct jaddref *)inoref;
+			if (jaddref->ja_diradd != dap)
+				continue;
+			if (cancel_jaddref(jaddref, inodedep,
+			    &dirrem->dm_jwork) == 0) {
+				free_jremref(jremref);
+				jremref = NULL;
+			}
+			break;
+		}
+	}
+	/*
+	 * Cancel subordinate names and free them if they do not require
+	 * journaling.
+	 */
+	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
+		ump = VFSTOUFS(dap->da_list.wk_mp);
+		LIST_FOREACH(mkdir, &ump->softdep_mkdirlisthd, md_mkdirs) {
+			if (mkdir->md_diradd != dap)
+				continue;
+			if ((jaddref = mkdir->md_jaddref) == NULL)
+				continue;
+			mkdir->md_jaddref = NULL;
+			if (mkdir->md_state & MKDIR_PARENT) {
+				if (cancel_jaddref(jaddref, NULL,
+				    &dirrem->dm_jwork) == 0) {
+					free_jremref(dotdotremref);
+					dotdotremref = NULL;
+				}
+			} else {
+				if (cancel_jaddref(jaddref, inodedep,
+				    &dirrem->dm_jwork) == 0) {
+					free_jremref(dotremref);
+					dotremref = NULL;
+				}
+			}
+		}
+	}
+
+	if (jremref)
+		journal_jremref(dirrem, jremref, inodedep);
+	if (dotremref)
+		journal_jremref(dirrem, dotremref, inodedep);
+	if (dotdotremref)
+		journal_jremref(dirrem, dotdotremref, NULL);
+	jwork_move(&dirrem->dm_jwork, &dap->da_jwork);
+	free_diradd(dap, &dirrem->dm_jwork);
+}
+
+/*
+ * Free a diradd dependency structure. This routine must be called
+ * with splbio interrupts blocked.
+ */
+static void
+free_diradd(dap, wkhd)
+	struct diradd *dap;
+	struct workhead *wkhd;
+{
+	struct dirrem *dirrem;
+	struct pagedep *pagedep;
+	struct inodedep *inodedep;
+	struct mkdir *mkdir, *nextmd;
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(dap->da_list.wk_mp);
+	LOCK_OWNED(ump);
+	LIST_REMOVE(dap, da_pdlist);
+	if (dap->da_state & ONWORKLIST)
+		WORKLIST_REMOVE(&dap->da_list);
+	if ((dap->da_state & DIRCHG) == 0) {
+		pagedep = dap->da_pagedep;
+	} else {
+		dirrem = dap->da_previous;
+		pagedep = dirrem->dm_pagedep;
+		dirrem->dm_dirinum = pagedep->pd_ino;
+		dirrem->dm_state |= COMPLETE;
+		if (LIST_EMPTY(&dirrem->dm_jremrefhd))
+			add_to_worklist(&dirrem->dm_list, 0);
+	}
+	if (inodedep_lookup(pagedep->pd_list.wk_mp, dap->da_newinum,
+	    0, &inodedep) != 0)
+		if (inodedep->id_mkdiradd == dap)
+			inodedep->id_mkdiradd = NULL;
+	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
+		for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
+		     mkdir = nextmd) {
+			nextmd = LIST_NEXT(mkdir, md_mkdirs);
+			if (mkdir->md_diradd != dap)
+				continue;
+			dap->da_state &=
+			    ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY));
+			LIST_REMOVE(mkdir, md_mkdirs);
+			if (mkdir->md_state & ONWORKLIST)
+				WORKLIST_REMOVE(&mkdir->md_list);
+			if (mkdir->md_jaddref != NULL)
+				panic("free_diradd: Unexpected jaddref");
+			WORKITEM_FREE(mkdir, D_MKDIR);
+			if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)
+				break;
+		}
+		if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
+			panic("free_diradd: unfound ref");
+	}
+	if (inodedep)
+		free_inodedep(inodedep);
+	/*
+	 * Free any journal segments waiting for the directory write.
+	 */
+	handle_jwork(&dap->da_jwork);
+	WORKITEM_FREE(dap, D_DIRADD);
+}
+
+/*
+ * Directory entry removal dependencies.
+ * 
+ * When removing a directory entry, the entry's inode pointer must be
+ * zero'ed on disk before the corresponding inode's link count is decremented
+ * (possibly freeing the inode for re-use). This dependency is handled by
+ * updating the directory entry but delaying the inode count reduction until
+ * after the directory block has been written to disk. After this point, the
+ * inode count can be decremented whenever it is convenient.
+ */
+
+/*
+ * This routine should be called immediately after removing
+ * a directory entry.  The inode's link count should not be
+ * decremented by the calling procedure -- the soft updates
+ * code will do this task when it is safe.
+ */
+void 
+softdep_setup_remove(bp, dp, ip, isrmdir)
+	struct buf *bp;		/* buffer containing directory block */
+	struct inode *dp;	/* inode for the directory being modified */
+	struct inode *ip;	/* inode for directory entry being removed */
+	int isrmdir;		/* indicates if doing RMDIR */
+{
+	struct dirrem *dirrem, *prevdirrem;
+	struct inodedep *inodedep;
+	struct ufsmount *ump;
+	int direct;
+
+	ump = ITOUMP(ip);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_setup_remove called on non-softdep filesystem"));
+	/*
+	 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.  We want
+	 * newdirrem() to setup the full directory remove which requires
+	 * isrmdir > 1.
+	 */
+	dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
+	/*
+	 * Add the dirrem to the inodedep's pending remove list for quick
+	 * discovery later.
+	 */
+	if (inodedep_lookup(UFSTOVFS(ump), ip->i_number, 0, &inodedep) == 0)
+		panic("softdep_setup_remove: Lost inodedep.");
+	KASSERT((inodedep->id_state & UNLINKED) == 0, ("inode unlinked"));
+	dirrem->dm_state |= ONDEPLIST;
+	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
+
+	/*
+	 * If the COMPLETE flag is clear, then there were no active
+	 * entries and we want to roll back to a zeroed entry until
+	 * the new inode is committed to disk. If the COMPLETE flag is
+	 * set then we have deleted an entry that never made it to
+	 * disk. If the entry we deleted resulted from a name change,
+	 * then the old name still resides on disk. We cannot delete
+	 * its inode (returned to us in prevdirrem) until the zeroed
+	 * directory entry gets to disk. The new inode has never been
+	 * referenced on the disk, so can be deleted immediately.
+	 */
+	if ((dirrem->dm_state & COMPLETE) == 0) {
+		LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
+		    dm_next);
+		FREE_LOCK(ump);
+	} else {
+		if (prevdirrem != NULL)
+			LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
+			    prevdirrem, dm_next);
+		dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
+		direct = LIST_EMPTY(&dirrem->dm_jremrefhd);
+		FREE_LOCK(ump);
+		if (direct)
+			handle_workitem_remove(dirrem, 0);
+	}
+}
+
+/*
+ * Check for an entry matching 'offset' on both the pd_dirraddhd list and the
+ * pd_pendinghd list of a pagedep.
+ */
+static struct diradd *
+diradd_lookup(pagedep, offset)
+	struct pagedep *pagedep;
+	int offset;
+{
+	struct diradd *dap;
+
+	LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)
+		if (dap->da_offset == offset)
+			return (dap);
+	LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
+		if (dap->da_offset == offset)
+			return (dap);
+	return (NULL);
+}
+
+/*
+ * Search for a .. diradd dependency in a directory that is being removed.
+ * If the directory was renamed to a new parent we have a diradd rather
+ * than a mkdir for the .. entry.  We need to cancel it now before
+ * it is found in truncate().
+ */
+static struct jremref *
+cancel_diradd_dotdot(ip, dirrem, jremref)
+	struct inode *ip;
+	struct dirrem *dirrem;
+	struct jremref *jremref;
+{
+	struct pagedep *pagedep;
+	struct diradd *dap;
+	struct worklist *wk;
+
+	if (pagedep_lookup(ITOVFS(ip), NULL, ip->i_number, 0, 0, &pagedep) == 0)
+		return (jremref);
+	dap = diradd_lookup(pagedep, DOTDOT_OFFSET);
+	if (dap == NULL)
+		return (jremref);
+	cancel_diradd(dap, dirrem, jremref, NULL, NULL);
+	/*
+	 * Mark any journal work as belonging to the parent so it is freed
+	 * with the .. reference.
+	 */
+	LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list)
+		wk->wk_state |= MKDIR_PARENT;
+	return (NULL);
+}
+
+/*
+ * Cancel the MKDIR_PARENT mkdir component of a diradd when we're going to
+ * replace it with a dirrem/diradd pair as a result of re-parenting a
+ * directory.  This ensures that we don't simultaneously have a mkdir and
+ * a diradd for the same .. entry.
+ */
+static struct jremref *
+cancel_mkdir_dotdot(ip, dirrem, jremref)
+	struct inode *ip;
+	struct dirrem *dirrem;
+	struct jremref *jremref;
+{
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct ufsmount *ump;
+	struct mkdir *mkdir;
+	struct diradd *dap;
+	struct mount *mp;
+
+	mp = ITOVFS(ip);
+	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
+		return (jremref);
+	dap = inodedep->id_mkdiradd;
+	if (dap == NULL || (dap->da_state & MKDIR_PARENT) == 0)
+		return (jremref);
+	ump = VFSTOUFS(inodedep->id_list.wk_mp);
+	for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
+	    mkdir = LIST_NEXT(mkdir, md_mkdirs))
+		if (mkdir->md_diradd == dap && mkdir->md_state & MKDIR_PARENT)
+			break;
+	if (mkdir == NULL)
+		panic("cancel_mkdir_dotdot: Unable to find mkdir\n");
+	if ((jaddref = mkdir->md_jaddref) != NULL) {
+		mkdir->md_jaddref = NULL;
+		jaddref->ja_state &= ~MKDIR_PARENT;
+		if (inodedep_lookup(mp, jaddref->ja_ino, 0, &inodedep) == 0)
+			panic("cancel_mkdir_dotdot: Lost parent inodedep");
+		if (cancel_jaddref(jaddref, inodedep, &dirrem->dm_jwork)) {
+			journal_jremref(dirrem, jremref, inodedep);
+			jremref = NULL;
+		}
+	}
+	if (mkdir->md_state & ONWORKLIST)
+		WORKLIST_REMOVE(&mkdir->md_list);
+	mkdir->md_state |= ALLCOMPLETE;
+	complete_mkdir(mkdir);
+	return (jremref);
+}
+
+static void
+journal_jremref(dirrem, jremref, inodedep)
+	struct dirrem *dirrem;
+	struct jremref *jremref;
+	struct inodedep *inodedep;
+{
+
+	if (inodedep == NULL)
+		if (inodedep_lookup(jremref->jr_list.wk_mp,
+		    jremref->jr_ref.if_ino, 0, &inodedep) == 0)
+			panic("journal_jremref: Lost inodedep");
+	LIST_INSERT_HEAD(&dirrem->dm_jremrefhd, jremref, jr_deps);
+	TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps);
+	add_to_journal(&jremref->jr_list);
+}
+
+static void
+dirrem_journal(dirrem, jremref, dotremref, dotdotremref)
+	struct dirrem *dirrem;
+	struct jremref *jremref;
+	struct jremref *dotremref;
+	struct jremref *dotdotremref;
+{
+	struct inodedep *inodedep;
+
+
+	if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino, 0,
+	    &inodedep) == 0)
+		panic("dirrem_journal: Lost inodedep");
+	journal_jremref(dirrem, jremref, inodedep);
+	if (dotremref)
+		journal_jremref(dirrem, dotremref, inodedep);
+	if (dotdotremref)
+		journal_jremref(dirrem, dotdotremref, NULL);
+}
+
+/*
+ * Allocate a new dirrem if appropriate and return it along with
+ * its associated pagedep. Called without a lock, returns with lock.
+ */
+static struct dirrem *
+newdirrem(bp, dp, ip, isrmdir, prevdirremp)
+	struct buf *bp;		/* buffer containing directory block */
+	struct inode *dp;	/* inode for the directory being modified */
+	struct inode *ip;	/* inode for directory entry being removed */
+	int isrmdir;		/* indicates if doing RMDIR */
+	struct dirrem **prevdirremp; /* previously referenced inode, if any */
+{
+	int offset;
+	ufs_lbn_t lbn;
+	struct diradd *dap;
+	struct dirrem *dirrem;
+	struct pagedep *pagedep;
+	struct jremref *jremref;
+	struct jremref *dotremref;
+	struct jremref *dotdotremref;
+	struct vnode *dvp;
+	struct ufsmount *ump;
+
+	/*
+	 * Whiteouts have no deletion dependencies.
+	 */
+	if (ip == NULL)
+		panic("newdirrem: whiteout");
+	dvp = ITOV(dp);
+	ump = ITOUMP(dp);
+
+	/*
+	 * If the system is over its limit and our filesystem is
+	 * responsible for more than our share of that usage and
+	 * we are not a snapshot, request some inodedep cleanup.
+	 * Limiting the number of dirrem structures will also limit
+	 * the number of freefile and freeblks structures.
+	 */
+	ACQUIRE_LOCK(ump);
+	if (!IS_SNAPSHOT(ip) && softdep_excess_items(ump, D_DIRREM))
+		schedule_cleanup(UFSTOVFS(ump));
+	else
+		FREE_LOCK(ump);
+	dirrem = malloc(sizeof(struct dirrem), M_DIRREM, M_SOFTDEP_FLAGS |
+	    M_ZERO);
+	workitem_alloc(&dirrem->dm_list, D_DIRREM, dvp->v_mount);
+	LIST_INIT(&dirrem->dm_jremrefhd);
+	LIST_INIT(&dirrem->dm_jwork);
+	dirrem->dm_state = isrmdir ? RMDIR : 0;
+	dirrem->dm_oldinum = ip->i_number;
+	*prevdirremp = NULL;
+	/*
+	 * Allocate remove reference structures to track journal write
+	 * dependencies.  We will always have one for the link and
+	 * when doing directories we will always have one more for dot.
+	 * When renaming a directory we skip the dotdot link change so
+	 * this is not needed.
+	 */
+	jremref = dotremref = dotdotremref = NULL;
+	if (DOINGSUJ(dvp)) {
+		if (isrmdir) {
+			jremref = newjremref(dirrem, dp, ip, dp->i_offset,
+			    ip->i_effnlink + 2);
+			dotremref = newjremref(dirrem, ip, ip, DOT_OFFSET,
+			    ip->i_effnlink + 1);
+			dotdotremref = newjremref(dirrem, ip, dp, DOTDOT_OFFSET,
+			    dp->i_effnlink + 1);
+			dotdotremref->jr_state |= MKDIR_PARENT;
+		} else
+			jremref = newjremref(dirrem, dp, ip, dp->i_offset,
+			    ip->i_effnlink + 1);
+	}
+	ACQUIRE_LOCK(ump);
+	lbn = lblkno(ump->um_fs, dp->i_offset);
+	offset = blkoff(ump->um_fs, dp->i_offset);
+	pagedep_lookup(UFSTOVFS(ump), bp, dp->i_number, lbn, DEPALLOC,
+	    &pagedep);
+	dirrem->dm_pagedep = pagedep;
+	dirrem->dm_offset = offset;
+	/*
+	 * If we're renaming a .. link to a new directory, cancel any
+	 * existing MKDIR_PARENT mkdir.  If it has already been canceled
+	 * the jremref is preserved for any potential diradd in this
+	 * location.  This can not coincide with a rmdir.
+	 */
+	if (dp->i_offset == DOTDOT_OFFSET) {
+		if (isrmdir)
+			panic("newdirrem: .. directory change during remove?");
+		jremref = cancel_mkdir_dotdot(dp, dirrem, jremref);
+	}
+	/*
+	 * If we're removing a directory search for the .. dependency now and
+	 * cancel it.  Any pending journal work will be added to the dirrem
+	 * to be completed when the workitem remove completes.
+	 */
+	if (isrmdir)
+		dotdotremref = cancel_diradd_dotdot(ip, dirrem, dotdotremref);
+	/*
+	 * Check for a diradd dependency for the same directory entry.
+	 * If present, then both dependencies become obsolete and can
+	 * be de-allocated.
+	 */
+	dap = diradd_lookup(pagedep, offset);
+	if (dap == NULL) {
+		/*
+		 * Link the jremref structures into the dirrem so they are
+		 * written prior to the pagedep.
+		 */
+		if (jremref)
+			dirrem_journal(dirrem, jremref, dotremref,
+			    dotdotremref);
+		return (dirrem);
+	}
+	/*
+	 * Must be ATTACHED at this point.
+	 */
+	if ((dap->da_state & ATTACHED) == 0)
+		panic("newdirrem: not ATTACHED");
+	if (dap->da_newinum != ip->i_number)
+		panic("newdirrem: inum %ju should be %ju",
+		    (uintmax_t)ip->i_number, (uintmax_t)dap->da_newinum);
+	/*
+	 * If we are deleting a changed name that never made it to disk,
+	 * then return the dirrem describing the previous inode (which
+	 * represents the inode currently referenced from this entry on disk).
+	 */
+	if ((dap->da_state & DIRCHG) != 0) {
+		*prevdirremp = dap->da_previous;
+		dap->da_state &= ~DIRCHG;
+		dap->da_pagedep = pagedep;
+	}
+	/*
+	 * We are deleting an entry that never made it to disk.
+	 * Mark it COMPLETE so we can delete its inode immediately.
+	 */
+	dirrem->dm_state |= COMPLETE;
+	cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref);
+#ifdef SUJ_DEBUG
+	if (isrmdir == 0) {
+		struct worklist *wk;
+
+		LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list)
+			if (wk->wk_state & (MKDIR_BODY | MKDIR_PARENT))
+				panic("bad wk %p (0x%X)\n", wk, wk->wk_state);
+	}
+#endif
+
+	return (dirrem);
+}
+
+/*
+ * Directory entry change dependencies.
+ * 
+ * Changing an existing directory entry requires that an add operation
+ * be completed first followed by a deletion. The semantics for the addition
+ * are identical to the description of adding a new entry above except
+ * that the rollback is to the old inode number rather than zero. Once
+ * the addition dependency is completed, the removal is done as described
+ * in the removal routine above.
+ */
+
+/*
+ * This routine should be called immediately after changing
+ * a directory entry.  The inode's link count should not be
+ * decremented by the calling procedure -- the soft updates
+ * code will perform this task when it is safe.
+ */
+void 
+softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
+	struct buf *bp;		/* buffer containing directory block */
+	struct inode *dp;	/* inode for the directory being modified */
+	struct inode *ip;	/* inode for directory entry being removed */
+	ino_t newinum;		/* new inode number for changed entry */
+	int isrmdir;		/* indicates if doing RMDIR */
+{
+	int offset;
+	struct diradd *dap = NULL;
+	struct dirrem *dirrem, *prevdirrem;
+	struct pagedep *pagedep;
+	struct inodedep *inodedep;
+	struct jaddref *jaddref;
+	struct mount *mp;
+	struct ufsmount *ump;
+
+	mp = ITOVFS(dp);
+	ump = VFSTOUFS(mp);
+	offset = blkoff(ump->um_fs, dp->i_offset);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	   ("softdep_setup_directory_change called on non-softdep filesystem"));
+
+	/*
+	 * Whiteouts do not need diradd dependencies.
+	 */
+	if (newinum != WINO) {
+		dap = malloc(sizeof(struct diradd),
+		    M_DIRADD, M_SOFTDEP_FLAGS|M_ZERO);
+		workitem_alloc(&dap->da_list, D_DIRADD, mp);
+		dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
+		dap->da_offset = offset;
+		dap->da_newinum = newinum;
+		LIST_INIT(&dap->da_jwork);
+	}
+
+	/*
+	 * Allocate a new dirrem and ACQUIRE_LOCK.
+	 */
+	dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
+	pagedep = dirrem->dm_pagedep;
+	/*
+	 * The possible values for isrmdir:
+	 *	0 - non-directory file rename
+	 *	1 - directory rename within same directory
+	 *   inum - directory rename to new directory of given inode number
+	 * When renaming to a new directory, we are both deleting and
+	 * creating a new directory entry, so the link count on the new
+	 * directory should not change. Thus we do not need the followup
+	 * dirrem which is usually done in handle_workitem_remove. We set
+	 * the DIRCHG flag to tell handle_workitem_remove to skip the 
+	 * followup dirrem.
+	 */
+	if (isrmdir > 1)
+		dirrem->dm_state |= DIRCHG;
+
+	/*
+	 * Whiteouts have no additional dependencies,
+	 * so just put the dirrem on the correct list.
+	 */
+	if (newinum == WINO) {
+		if ((dirrem->dm_state & COMPLETE) == 0) {
+			LIST_INSERT_HEAD(&pagedep->pd_dirremhd, dirrem,
+			    dm_next);
+		} else {
+			dirrem->dm_dirinum = pagedep->pd_ino;
+			if (LIST_EMPTY(&dirrem->dm_jremrefhd))
+				add_to_worklist(&dirrem->dm_list, 0);
+		}
+		FREE_LOCK(ump);
+		return;
+	}
+	/*
+	 * Add the dirrem to the inodedep's pending remove list for quick
+	 * discovery later.  A valid nlinkdelta ensures that this lookup
+	 * will not fail.
+	 */
+	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
+		panic("softdep_setup_directory_change: Lost inodedep.");
+	dirrem->dm_state |= ONDEPLIST;
+	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
+
+	/*
+	 * If the COMPLETE flag is clear, then there were no active
+	 * entries and we want to roll back to the previous inode until
+	 * the new inode is committed to disk. If the COMPLETE flag is
+	 * set, then we have deleted an entry that never made it to disk.
+	 * If the entry we deleted resulted from a name change, then the old
+	 * inode reference still resides on disk. Any rollback that we do
+	 * needs to be to that old inode (returned to us in prevdirrem). If
+	 * the entry we deleted resulted from a create, then there is
+	 * no entry on the disk, so we want to roll back to zero rather
+	 * than the uncommitted inode. In either of the COMPLETE cases we
+	 * want to immediately free the unwritten and unreferenced inode.
+	 */
+	if ((dirrem->dm_state & COMPLETE) == 0) {
+		dap->da_previous = dirrem;
+	} else {
+		if (prevdirrem != NULL) {
+			dap->da_previous = prevdirrem;
+		} else {
+			dap->da_state &= ~DIRCHG;
+			dap->da_pagedep = pagedep;
+		}
+		dirrem->dm_dirinum = pagedep->pd_ino;
+		if (LIST_EMPTY(&dirrem->dm_jremrefhd))
+			add_to_worklist(&dirrem->dm_list, 0);
+	}
+	/*
+	 * Lookup the jaddref for this journal entry.  We must finish
+	 * initializing it and make the diradd write dependent on it.
+	 * If we're not journaling, put it on the id_bufwait list if the
+	 * inode is not yet written. If it is written, do the post-inode
+	 * write processing to put it on the id_pendinghd list.
+	 */
+	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
+	if (MOUNTEDSUJ(mp)) {
+		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
+		    inoreflst);
+		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
+		    ("softdep_setup_directory_change: bad jaddref %p",
+		    jaddref));
+		jaddref->ja_diroff = dp->i_offset;
+		jaddref->ja_diradd = dap;
+		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
+		    dap, da_pdlist);
+		add_to_journal(&jaddref->ja_list);
+	} else if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
+		dap->da_state |= COMPLETE;
+		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
+		WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
+	} else {
+		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
+		    dap, da_pdlist);
+		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
+	}
+	/*
+	 * If we're making a new name for a directory that has not been
+	 * committed when need to move the dot and dotdot references to
+	 * this new name.
+	 */
+	if (inodedep->id_mkdiradd && dp->i_offset != DOTDOT_OFFSET)
+		merge_diradd(inodedep, dap);
+	FREE_LOCK(ump);
+}
+
+/*
+ * Called whenever the link count on an inode is changed.
+ * It creates an inode dependency so that the new reference(s)
+ * to the inode cannot be committed to disk until the updated
+ * inode has been written.
+ */
+void
+softdep_change_linkcnt(ip)
+	struct inode *ip;	/* the inode with the increased link count */
+{
+	struct inodedep *inodedep;
+	struct ufsmount *ump;
+
+	ump = ITOUMP(ip);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_change_linkcnt called on non-softdep filesystem"));
+	ACQUIRE_LOCK(ump);
+	inodedep_lookup(UFSTOVFS(ump), ip->i_number, DEPALLOC, &inodedep);
+	if (ip->i_nlink < ip->i_effnlink)
+		panic("softdep_change_linkcnt: bad delta");
+	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
+	FREE_LOCK(ump);
+}
+
+/*
+ * Attach a sbdep dependency to the superblock buf so that we can keep
+ * track of the head of the linked list of referenced but unlinked inodes.
+ */
+void
+softdep_setup_sbupdate(ump, fs, bp)
+	struct ufsmount *ump;
+	struct fs *fs;
+	struct buf *bp;
+{
+	struct sbdep *sbdep;
+	struct worklist *wk;
+
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_setup_sbupdate called on non-softdep filesystem"));
+	LIST_FOREACH(wk, &bp->b_dep, wk_list)
+		if (wk->wk_type == D_SBDEP)
+			break;
+	if (wk != NULL)
+		return;
+	sbdep = malloc(sizeof(struct sbdep), M_SBDEP, M_SOFTDEP_FLAGS);
+	workitem_alloc(&sbdep->sb_list, D_SBDEP, UFSTOVFS(ump));
+	sbdep->sb_fs = fs;
+	sbdep->sb_ump = ump;
+	ACQUIRE_LOCK(ump);
+	WORKLIST_INSERT(&bp->b_dep, &sbdep->sb_list);
+	FREE_LOCK(ump);
+}
+
+/*
+ * Return the first unlinked inodedep which is ready to be the head of the
+ * list.  The inodedep and all those after it must have valid next pointers.
+ */
+static struct inodedep *
+first_unlinked_inodedep(ump)
+	struct ufsmount *ump;
+{
+	struct inodedep *inodedep;
+	struct inodedep *idp;
+
+	LOCK_OWNED(ump);
+	for (inodedep = TAILQ_LAST(&ump->softdep_unlinked, inodedeplst);
+	    inodedep; inodedep = idp) {
+		if ((inodedep->id_state & UNLINKNEXT) == 0)
+			return (NULL);
+		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
+		if (idp == NULL || (idp->id_state & UNLINKNEXT) == 0)
+			break;
+		if ((inodedep->id_state & UNLINKPREV) == 0)
+			break;
+	}
+	return (inodedep);
+}
+
+/*
+ * Set the sujfree unlinked head pointer prior to writing a superblock.
+ */
+static void
+initiate_write_sbdep(sbdep)
+	struct sbdep *sbdep;
+{
+	struct inodedep *inodedep;
+	struct fs *bpfs;
+	struct fs *fs;
+
+	bpfs = sbdep->sb_fs;
+	fs = sbdep->sb_ump->um_fs;
+	inodedep = first_unlinked_inodedep(sbdep->sb_ump);
+	if (inodedep) {
+		fs->fs_sujfree = inodedep->id_ino;
+		inodedep->id_state |= UNLINKPREV;
+	} else
+		fs->fs_sujfree = 0;
+	bpfs->fs_sujfree = fs->fs_sujfree;
+}
+
+/*
+ * After a superblock is written determine whether it must be written again
+ * due to a changing unlinked list head.
+ */
+static int
+handle_written_sbdep(sbdep, bp)
+	struct sbdep *sbdep;
+	struct buf *bp;
+{
+	struct inodedep *inodedep;
+	struct fs *fs;
+
+	LOCK_OWNED(sbdep->sb_ump);
+	fs = sbdep->sb_fs;
+	/*
+	 * If the superblock doesn't match the in-memory list start over.
+	 */
+	inodedep = first_unlinked_inodedep(sbdep->sb_ump);
+	if ((inodedep && fs->fs_sujfree != inodedep->id_ino) ||
+	    (inodedep == NULL && fs->fs_sujfree != 0)) {
+		bdirty(bp);
+		return (1);
+	}
+	WORKITEM_FREE(sbdep, D_SBDEP);
+	if (fs->fs_sujfree == 0)
+		return (0);
+	/*
+	 * Now that we have a record of this inode in stable store allow it
+	 * to be written to free up pending work.  Inodes may see a lot of
+	 * write activity after they are unlinked which we must not hold up.
+	 */
+	for (; inodedep != NULL; inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
+		if ((inodedep->id_state & UNLINKLINKS) != UNLINKLINKS)
+			panic("handle_written_sbdep: Bad inodedep %p (0x%X)",
+			    inodedep, inodedep->id_state);
+		if (inodedep->id_state & UNLINKONLIST)
+			break;
+		inodedep->id_state |= DEPCOMPLETE | UNLINKONLIST;
+	}
+
+	return (0);
+}
+
+/*
+ * Mark an inodedep as unlinked and insert it into the in-memory unlinked list.
+ */
+static void
+unlinked_inodedep(mp, inodedep)
+	struct mount *mp;
+	struct inodedep *inodedep;
+{
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	if (MOUNTEDSUJ(mp) == 0)
+		return;
+	ump->um_fs->fs_fmod = 1;
+	if (inodedep->id_state & UNLINKED)
+		panic("unlinked_inodedep: %p already unlinked\n", inodedep);
+	inodedep->id_state |= UNLINKED;
+	TAILQ_INSERT_HEAD(&ump->softdep_unlinked, inodedep, id_unlinked);
+}
+
+/*
+ * Remove an inodedep from the unlinked inodedep list.  This may require
+ * disk writes if the inode has made it that far.
+ */
+static void
+clear_unlinked_inodedep(inodedep)
+	struct inodedep *inodedep;
+{
+	struct ufsmount *ump;
+	struct inodedep *idp;
+	struct inodedep *idn;
+	struct fs *fs;
+	struct buf *bp;
+	ino_t ino;
+	ino_t nino;
+	ino_t pino;
+	int error;
+
+	ump = VFSTOUFS(inodedep->id_list.wk_mp);
+	fs = ump->um_fs;
+	ino = inodedep->id_ino;
+	error = 0;
+	for (;;) {
+		LOCK_OWNED(ump);
+		KASSERT((inodedep->id_state & UNLINKED) != 0,
+		    ("clear_unlinked_inodedep: inodedep %p not unlinked",
+		    inodedep));
+		/*
+		 * If nothing has yet been written simply remove us from
+		 * the in memory list and return.  This is the most common
+		 * case where handle_workitem_remove() loses the final
+		 * reference.
+		 */
+		if ((inodedep->id_state & UNLINKLINKS) == 0)
+			break;
+		/*
+		 * If we have a NEXT pointer and no PREV pointer we can simply
+		 * clear NEXT's PREV and remove ourselves from the list.  Be
+		 * careful not to clear PREV if the superblock points at
+		 * next as well.
+		 */
+		idn = TAILQ_NEXT(inodedep, id_unlinked);
+		if ((inodedep->id_state & UNLINKLINKS) == UNLINKNEXT) {
+			if (idn && fs->fs_sujfree != idn->id_ino)
+				idn->id_state &= ~UNLINKPREV;
+			break;
+		}
+		/*
+		 * Here we have an inodedep which is actually linked into
+		 * the list.  We must remove it by forcing a write to the
+		 * link before us, whether it be the superblock or an inode.
+		 * Unfortunately the list may change while we're waiting
+		 * on the buf lock for either resource so we must loop until
+		 * we lock the right one.  If both the superblock and an
+		 * inode point to this inode we must clear the inode first
+		 * followed by the superblock.
+		 */
+		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
+		pino = 0;
+		if (idp && (idp->id_state & UNLINKNEXT))
+			pino = idp->id_ino;
+		FREE_LOCK(ump);
+		if (pino == 0) {
+			bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
+			    (int)fs->fs_sbsize, 0, 0, 0);
+		} else {
+			error = bread(ump->um_devvp,
+			    fsbtodb(fs, ino_to_fsba(fs, pino)),
+			    (int)fs->fs_bsize, NOCRED, &bp);
+			if (error)
+				brelse(bp);
+		}
+		ACQUIRE_LOCK(ump);
+		if (error)
+			break;
+		/* If the list has changed restart the loop. */
+		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
+		nino = 0;
+		if (idp && (idp->id_state & UNLINKNEXT))
+			nino = idp->id_ino;
+		if (nino != pino ||
+		    (inodedep->id_state & UNLINKPREV) != UNLINKPREV) {
+			FREE_LOCK(ump);
+			brelse(bp);
+			ACQUIRE_LOCK(ump);
+			continue;
+		}
+		nino = 0;
+		idn = TAILQ_NEXT(inodedep, id_unlinked);
+		if (idn)
+			nino = idn->id_ino;
+		/*
+		 * Remove us from the in memory list.  After this we cannot
+		 * access the inodedep.
+		 */
+		KASSERT((inodedep->id_state & UNLINKED) != 0,
+		    ("clear_unlinked_inodedep: inodedep %p not unlinked",
+		    inodedep));
+		inodedep->id_state &= ~(UNLINKED | UNLINKLINKS | UNLINKONLIST);
+		TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
+		FREE_LOCK(ump);
+		/*
+		 * The predecessor's next pointer is manually updated here
+		 * so that the NEXT flag is never cleared for an element
+		 * that is in the list.
+		 */
+		if (pino == 0) {
+			bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
+			ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
+			softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
+			    bp);
+		} else if (fs->fs_magic == FS_UFS1_MAGIC)
+			((struct ufs1_dinode *)bp->b_data +
+			    ino_to_fsbo(fs, pino))->di_freelink = nino;
+		else
+			((struct ufs2_dinode *)bp->b_data +
+			    ino_to_fsbo(fs, pino))->di_freelink = nino;
+		/*
+		 * If the bwrite fails we have no recourse to recover.  The
+		 * filesystem is corrupted already.
+		 */
+		bwrite(bp);
+		ACQUIRE_LOCK(ump);
+		/*
+		 * If the superblock pointer still needs to be cleared force
+		 * a write here.
+		 */
+		if (fs->fs_sujfree == ino) {
+			FREE_LOCK(ump);
+			bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
+			    (int)fs->fs_sbsize, 0, 0, 0);
+			bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
+			ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
+			softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
+			    bp);
+			bwrite(bp);
+			ACQUIRE_LOCK(ump);
+		}
+
+		if (fs->fs_sujfree != ino)
+			return;
+		panic("clear_unlinked_inodedep: Failed to clear free head");
+	}
+	if (inodedep->id_ino == fs->fs_sujfree)
+		panic("clear_unlinked_inodedep: Freeing head of free list");
+	inodedep->id_state &= ~(UNLINKED | UNLINKLINKS | UNLINKONLIST);
+	TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
+	return;
+}
+
+/*
+ * This workitem decrements the inode's link count.
+ * If the link count reaches zero, the file is removed.
+ */
+static int
+handle_workitem_remove(dirrem, flags)
+	struct dirrem *dirrem;
+	int flags;
+{
+	struct inodedep *inodedep;
+	struct workhead dotdotwk;
+	struct worklist *wk;
+	struct ufsmount *ump;
+	struct mount *mp;
+	struct vnode *vp;
+	struct inode *ip;
+	ino_t oldinum;
+
+	if (dirrem->dm_state & ONWORKLIST)
+		panic("handle_workitem_remove: dirrem %p still on worklist",
+		    dirrem);
+	oldinum = dirrem->dm_oldinum;
+	mp = dirrem->dm_list.wk_mp;
+	ump = VFSTOUFS(mp);
+	flags |= LK_EXCLUSIVE;
+	if (ffs_vgetf(mp, oldinum, flags, &vp, FFSV_FORCEINSMQ) != 0)
+		return (EBUSY);
+	ip = VTOI(vp);
+	ACQUIRE_LOCK(ump);
+	if ((inodedep_lookup(mp, oldinum, 0, &inodedep)) == 0)
+		panic("handle_workitem_remove: lost inodedep");
+	if (dirrem->dm_state & ONDEPLIST)
+		LIST_REMOVE(dirrem, dm_inonext);
+	KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd),
+	    ("handle_workitem_remove:  Journal entries not written."));
+
+	/*
+	 * Move all dependencies waiting on the remove to complete
+	 * from the dirrem to the inode inowait list to be completed
+	 * after the inode has been updated and written to disk.  Any
+	 * marked MKDIR_PARENT are saved to be completed when the .. ref
+	 * is removed.
+	 */
+	LIST_INIT(&dotdotwk);
+	while ((wk = LIST_FIRST(&dirrem->dm_jwork)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		if (wk->wk_state & MKDIR_PARENT) {
+			wk->wk_state &= ~MKDIR_PARENT;
+			WORKLIST_INSERT(&dotdotwk, wk);
+			continue;
+		}
+		WORKLIST_INSERT(&inodedep->id_inowait, wk);
+	}
+	LIST_SWAP(&dirrem->dm_jwork, &dotdotwk, worklist, wk_list);
+	/*
+	 * Normal file deletion.
+	 */
+	if ((dirrem->dm_state & RMDIR) == 0) {
+		ip->i_nlink--;
+		DIP_SET(ip, i_nlink, ip->i_nlink);
+		ip->i_flag |= IN_CHANGE;
+		if (ip->i_nlink < ip->i_effnlink)
+			panic("handle_workitem_remove: bad file delta");
+		if (ip->i_nlink == 0) 
+			unlinked_inodedep(mp, inodedep);
+		inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
+		KASSERT(LIST_EMPTY(&dirrem->dm_jwork),
+		    ("handle_workitem_remove: worklist not empty. %s",
+		    TYPENAME(LIST_FIRST(&dirrem->dm_jwork)->wk_type)));
+		WORKITEM_FREE(dirrem, D_DIRREM);
+		FREE_LOCK(ump);
+		goto out;
+	}
+	/*
+	 * Directory deletion. Decrement reference count for both the
+	 * just deleted parent directory entry and the reference for ".".
+	 * Arrange to have the reference count on the parent decremented
+	 * to account for the loss of "..".
+	 */
+	ip->i_nlink -= 2;
+	DIP_SET(ip, i_nlink, ip->i_nlink);
+	ip->i_flag |= IN_CHANGE;
+	if (ip->i_nlink < ip->i_effnlink)
+		panic("handle_workitem_remove: bad dir delta");
+	if (ip->i_nlink == 0)
+		unlinked_inodedep(mp, inodedep);
+	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
+	/*
+	 * Rename a directory to a new parent. Since, we are both deleting
+	 * and creating a new directory entry, the link count on the new
+	 * directory should not change. Thus we skip the followup dirrem.
+	 */
+	if (dirrem->dm_state & DIRCHG) {
+		KASSERT(LIST_EMPTY(&dirrem->dm_jwork),
+		    ("handle_workitem_remove: DIRCHG and worklist not empty."));
+		WORKITEM_FREE(dirrem, D_DIRREM);
+		FREE_LOCK(ump);
+		goto out;
+	}
+	dirrem->dm_state = ONDEPLIST;
+	dirrem->dm_oldinum = dirrem->dm_dirinum;
+	/*
+	 * Place the dirrem on the parent's diremhd list.
+	 */
+	if (inodedep_lookup(mp, dirrem->dm_oldinum, 0, &inodedep) == 0)
+		panic("handle_workitem_remove: lost dir inodedep");
+	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
+	/*
+	 * If the allocated inode has never been written to disk, then
+	 * the on-disk inode is zero'ed and we can remove the file
+	 * immediately.  When journaling if the inode has been marked
+	 * unlinked and not DEPCOMPLETE we know it can never be written.
+	 */
+	inodedep_lookup(mp, oldinum, 0, &inodedep);
+	if (inodedep == NULL ||
+	    (inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED ||
+	    check_inode_unwritten(inodedep)) {
+		FREE_LOCK(ump);
+		vput(vp);
+		return handle_workitem_remove(dirrem, flags);
+	}
+	WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
+	FREE_LOCK(ump);
+	ip->i_flag |= IN_CHANGE;
+out:
+	ffs_update(vp, 0);
+	vput(vp);
+	return (0);
+}
+
+/*
+ * Inode de-allocation dependencies.
+ * 
+ * When an inode's link count is reduced to zero, it can be de-allocated. We
+ * found it convenient to postpone de-allocation until after the inode is
+ * written to disk with its new link count (zero).  At this point, all of the
+ * on-disk inode's block pointers are nullified and, with careful dependency
+ * list ordering, all dependencies related to the inode will be satisfied and
+ * the corresponding dependency structures de-allocated.  So, if/when the
+ * inode is reused, there will be no mixing of old dependencies with new
+ * ones.  This artificial dependency is set up by the block de-allocation
+ * procedure above (softdep_setup_freeblocks) and completed by the
+ * following procedure.
+ */
+static void 
+handle_workitem_freefile(freefile)
+	struct freefile *freefile;
+{
+	struct workhead wkhd;
+	struct fs *fs;
+	struct inodedep *idp;
+	struct ufsmount *ump;
+	int error;
+
+	ump = VFSTOUFS(freefile->fx_list.wk_mp);
+	fs = ump->um_fs;
+#ifdef DEBUG
+	ACQUIRE_LOCK(ump);
+	error = inodedep_lookup(UFSTOVFS(ump), freefile->fx_oldinum, 0, &idp);
+	FREE_LOCK(ump);
+	if (error)
+		panic("handle_workitem_freefile: inodedep %p survived", idp);
+#endif
+	UFS_LOCK(ump);
+	fs->fs_pendinginodes -= 1;
+	UFS_UNLOCK(ump);
+	LIST_INIT(&wkhd);
+	LIST_SWAP(&freefile->fx_jwork, &wkhd, worklist, wk_list);
+	if ((error = ffs_freefile(ump, fs, freefile->fx_devvp,
+	    freefile->fx_oldinum, freefile->fx_mode, &wkhd)) != 0)
+		softdep_error("handle_workitem_freefile", error);
+	ACQUIRE_LOCK(ump);
+	WORKITEM_FREE(freefile, D_FREEFILE);
+	FREE_LOCK(ump);
+}
+
+
+/*
+ * Helper function which unlinks marker element from work list and returns
+ * the next element on the list.
+ */
+static __inline struct worklist *
+markernext(struct worklist *marker)
+{
+	struct worklist *next;
+	
+	next = LIST_NEXT(marker, wk_list);
+	LIST_REMOVE(marker, wk_list);
+	return next;
+}
+
+/*
+ * Disk writes.
+ * 
+ * The dependency structures constructed above are most actively used when file
+ * system blocks are written to disk.  No constraints are placed on when a
+ * block can be written, but unsatisfied update dependencies are made safe by
+ * modifying (or replacing) the source memory for the duration of the disk
+ * write.  When the disk write completes, the memory block is again brought
+ * up-to-date.
+ *
+ * In-core inode structure reclamation.
+ * 
+ * Because there are a finite number of "in-core" inode structures, they are
+ * reused regularly.  By transferring all inode-related dependencies to the
+ * in-memory inode block and indexing them separately (via "inodedep"s), we
+ * can allow "in-core" inode structures to be reused at any time and avoid
+ * any increase in contention.
+ *
+ * Called just before entering the device driver to initiate a new disk I/O.
+ * The buffer must be locked, thus, no I/O completion operations can occur
+ * while we are manipulating its associated dependencies.
+ */
+static void 
+softdep_disk_io_initiation(bp)
+	struct buf *bp;		/* structure describing disk write to occur */
+{
+	struct worklist *wk;
+	struct worklist marker;
+	struct inodedep *inodedep;
+	struct freeblks *freeblks;
+	struct jblkdep *jblkdep;
+	struct newblk *newblk;
+	struct ufsmount *ump;
+
+	/*
+	 * We only care about write operations. There should never
+	 * be dependencies for reads.
+	 */
+	if (bp->b_iocmd != BIO_WRITE)
+		panic("softdep_disk_io_initiation: not write");
+
+	if (bp->b_vflags & BV_BKGRDINPROG)
+		panic("softdep_disk_io_initiation: Writing buffer with "
+		    "background write in progress: %p", bp);
+
+	ump = softdep_bp_to_mp(bp);
+	if (ump == NULL)
+		return;
+
+	marker.wk_type = D_LAST + 1;	/* Not a normal workitem */
+	PHOLD(curproc);			/* Don't swap out kernel stack */
+	ACQUIRE_LOCK(ump);
+	/*
+	 * Do any necessary pre-I/O processing.
+	 */
+	for (wk = LIST_FIRST(&bp->b_dep); wk != NULL;
+	     wk = markernext(&marker)) {
+		LIST_INSERT_AFTER(wk, &marker, wk_list);
+		switch (wk->wk_type) {
+
+		case D_PAGEDEP:
+			initiate_write_filepage(WK_PAGEDEP(wk), bp);
+			continue;
+
+		case D_INODEDEP:
+			inodedep = WK_INODEDEP(wk);
+			if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC)
+				initiate_write_inodeblock_ufs1(inodedep, bp);
+			else
+				initiate_write_inodeblock_ufs2(inodedep, bp);
+			continue;
+
+		case D_INDIRDEP:
+			initiate_write_indirdep(WK_INDIRDEP(wk), bp);
+			continue;
+
+		case D_BMSAFEMAP:
+			initiate_write_bmsafemap(WK_BMSAFEMAP(wk), bp);
+			continue;
+
+		case D_JSEG:
+			WK_JSEG(wk)->js_buf = NULL;
+			continue;
+
+		case D_FREEBLKS:
+			freeblks = WK_FREEBLKS(wk);
+			jblkdep = LIST_FIRST(&freeblks->fb_jblkdephd);
+			/*
+			 * We have to wait for the freeblks to be journaled
+			 * before we can write an inodeblock with updated
+			 * pointers.  Be careful to arrange the marker so
+			 * we revisit the freeblks if it's not removed by
+			 * the first jwait().
+			 */
+			if (jblkdep != NULL) {
+				LIST_REMOVE(&marker, wk_list);
+				LIST_INSERT_BEFORE(wk, &marker, wk_list);
+				jwait(&jblkdep->jb_list, MNT_WAIT);
+			}
+			continue;
+		case D_ALLOCDIRECT:
+		case D_ALLOCINDIR:
+			/*
+			 * We have to wait for the jnewblk to be journaled
+			 * before we can write to a block if the contents
+			 * may be confused with an earlier file's indirect
+			 * at recovery time.  Handle the marker as described
+			 * above.
+			 */
+			newblk = WK_NEWBLK(wk);
+			if (newblk->nb_jnewblk != NULL &&
+			    indirblk_lookup(newblk->nb_list.wk_mp,
+			    newblk->nb_newblkno)) {
+				LIST_REMOVE(&marker, wk_list);
+				LIST_INSERT_BEFORE(wk, &marker, wk_list);
+				jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT);
+			}
+			continue;
+
+		case D_SBDEP:
+			initiate_write_sbdep(WK_SBDEP(wk));
+			continue;
+
+		case D_MKDIR:
+		case D_FREEWORK:
+		case D_FREEDEP:
+		case D_JSEGDEP:
+			continue;
+
+		default:
+			panic("handle_disk_io_initiation: Unexpected type %s",
+			    TYPENAME(wk->wk_type));
+			/* NOTREACHED */
+		}
+	}
+	FREE_LOCK(ump);
+	PRELE(curproc);			/* Allow swapout of kernel stack */
+}
+
+/*
+ * Called from within the procedure above to deal with unsatisfied
+ * allocation dependencies in a directory. The buffer must be locked,
+ * thus, no I/O completion operations can occur while we are
+ * manipulating its associated dependencies.
+ */
+static void
+initiate_write_filepage(pagedep, bp)
+	struct pagedep *pagedep;
+	struct buf *bp;
+{
+	struct jremref *jremref;
+	struct jmvref *jmvref;
+	struct dirrem *dirrem;
+	struct diradd *dap;
+	struct direct *ep;
+	int i;
+
+	if (pagedep->pd_state & IOSTARTED) {
+		/*
+		 * This can only happen if there is a driver that does not
+		 * understand chaining. Here biodone will reissue the call
+		 * to strategy for the incomplete buffers.
+		 */
+		printf("initiate_write_filepage: already started\n");
+		return;
+	}
+	pagedep->pd_state |= IOSTARTED;
+	/*
+	 * Wait for all journal remove dependencies to hit the disk.
+	 * We can not allow any potentially conflicting directory adds
+	 * to be visible before removes and rollback is too difficult.
+	 * The per-filesystem lock may be dropped and re-acquired, however 
+	 * we hold the buf locked so the dependency can not go away.
+	 */
+	LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next)
+		while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL)
+			jwait(&jremref->jr_list, MNT_WAIT);
+	while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL)
+		jwait(&jmvref->jm_list, MNT_WAIT);
+	for (i = 0; i < DAHASHSZ; i++) {
+		LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
+			ep = (struct direct *)
+			    ((char *)bp->b_data + dap->da_offset);
+			if (ep->d_ino != dap->da_newinum)
+				panic("%s: dir inum %ju != new %ju",
+				    "initiate_write_filepage",
+				    (uintmax_t)ep->d_ino,
+				    (uintmax_t)dap->da_newinum);
+			if (dap->da_state & DIRCHG)
+				ep->d_ino = dap->da_previous->dm_oldinum;
+			else
+				ep->d_ino = 0;
+			dap->da_state &= ~ATTACHED;
+			dap->da_state |= UNDONE;
+		}
+	}
+}
+
+/*
+ * Version of initiate_write_inodeblock that handles UFS1 dinodes.
+ * Note that any bug fixes made to this routine must be done in the
+ * version found below.
+ *
+ * Called from within the procedure above to deal with unsatisfied
+ * allocation dependencies in an inodeblock. The buffer must be
+ * locked, thus, no I/O completion operations can occur while we
+ * are manipulating its associated dependencies.
+ */
+static void 
+initiate_write_inodeblock_ufs1(inodedep, bp)
+	struct inodedep *inodedep;
+	struct buf *bp;			/* The inode block */
+{
+	struct allocdirect *adp, *lastadp;
+	struct ufs1_dinode *dp;
+	struct ufs1_dinode *sip;
+	struct inoref *inoref;
+	struct ufsmount *ump;
+	struct fs *fs;
+	ufs_lbn_t i;
+#ifdef INVARIANTS
+	ufs_lbn_t prevlbn = 0;
+#endif
+	int deplist;
+
+	if (inodedep->id_state & IOSTARTED)
+		panic("initiate_write_inodeblock_ufs1: already started");
+	inodedep->id_state |= IOSTARTED;
+	fs = inodedep->id_fs;
+	ump = VFSTOUFS(inodedep->id_list.wk_mp);
+	LOCK_OWNED(ump);
+	dp = (struct ufs1_dinode *)bp->b_data +
+	    ino_to_fsbo(fs, inodedep->id_ino);
+
+	/*
+	 * If we're on the unlinked list but have not yet written our
+	 * next pointer initialize it here.
+	 */
+	if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) {
+		struct inodedep *inon;
+
+		inon = TAILQ_NEXT(inodedep, id_unlinked);
+		dp->di_freelink = inon ? inon->id_ino : 0;
+	}
+	/*
+	 * If the bitmap is not yet written, then the allocated
+	 * inode cannot be written to disk.
+	 */
+	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+		if (inodedep->id_savedino1 != NULL)
+			panic("initiate_write_inodeblock_ufs1: I/O underway");
+		FREE_LOCK(ump);
+		sip = malloc(sizeof(struct ufs1_dinode),
+		    M_SAVEDINO, M_SOFTDEP_FLAGS);
+		ACQUIRE_LOCK(ump);
+		inodedep->id_savedino1 = sip;
+		*inodedep->id_savedino1 = *dp;
+		bzero((caddr_t)dp, sizeof(struct ufs1_dinode));
+		dp->di_gen = inodedep->id_savedino1->di_gen;
+		dp->di_freelink = inodedep->id_savedino1->di_freelink;
+		return;
+	}
+	/*
+	 * If no dependencies, then there is nothing to roll back.
+	 */
+	inodedep->id_savedsize = dp->di_size;
+	inodedep->id_savedextsize = 0;
+	inodedep->id_savednlink = dp->di_nlink;
+	if (TAILQ_EMPTY(&inodedep->id_inoupdt) &&
+	    TAILQ_EMPTY(&inodedep->id_inoreflst))
+		return;
+	/*
+	 * Revert the link count to that of the first unwritten journal entry.
+	 */
+	inoref = TAILQ_FIRST(&inodedep->id_inoreflst);
+	if (inoref)
+		dp->di_nlink = inoref->if_nlink;
+	/*
+	 * Set the dependencies to busy.
+	 */
+	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
+	     adp = TAILQ_NEXT(adp, ad_next)) {
+#ifdef INVARIANTS
+		if (deplist != 0 && prevlbn >= adp->ad_offset)
+			panic("softdep_write_inodeblock: lbn order");
+		prevlbn = adp->ad_offset;
+		if (adp->ad_offset < NDADDR &&
+		    dp->di_db[adp->ad_offset] != adp->ad_newblkno)
+			panic("%s: direct pointer #%jd mismatch %d != %jd",
+			    "softdep_write_inodeblock",
+			    (intmax_t)adp->ad_offset,
+			    dp->di_db[adp->ad_offset],
+			    (intmax_t)adp->ad_newblkno);
+		if (adp->ad_offset >= NDADDR &&
+		    dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno)
+			panic("%s: indirect pointer #%jd mismatch %d != %jd",
+			    "softdep_write_inodeblock",
+			    (intmax_t)adp->ad_offset - NDADDR,
+			    dp->di_ib[adp->ad_offset - NDADDR],
+			    (intmax_t)adp->ad_newblkno);
+		deplist |= 1 << adp->ad_offset;
+		if ((adp->ad_state & ATTACHED) == 0)
+			panic("softdep_write_inodeblock: Unknown state 0x%x",
+			    adp->ad_state);
+#endif /* INVARIANTS */
+		adp->ad_state &= ~ATTACHED;
+		adp->ad_state |= UNDONE;
+	}
+	/*
+	 * The on-disk inode cannot claim to be any larger than the last
+	 * fragment that has been written. Otherwise, the on-disk inode
+	 * might have fragments that were not the last block in the file
+	 * which would corrupt the filesystem.
+	 */
+	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
+	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
+		if (adp->ad_offset >= NDADDR)
+			break;
+		dp->di_db[adp->ad_offset] = adp->ad_oldblkno;
+		/* keep going until hitting a rollback to a frag */
+		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
+			continue;
+		dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
+		for (i = adp->ad_offset + 1; i < NDADDR; i++) {
+#ifdef INVARIANTS
+			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
+				panic("softdep_write_inodeblock: lost dep1");
+#endif /* INVARIANTS */
+			dp->di_db[i] = 0;
+		}
+		for (i = 0; i < NIADDR; i++) {
+#ifdef INVARIANTS
+			if (dp->di_ib[i] != 0 &&
+			    (deplist & ((1 << NDADDR) << i)) == 0)
+				panic("softdep_write_inodeblock: lost dep2");
+#endif /* INVARIANTS */
+			dp->di_ib[i] = 0;
+		}
+		return;
+	}
+	/*
+	 * If we have zero'ed out the last allocated block of the file,
+	 * roll back the size to the last currently allocated block.
+	 * We know that this last allocated block is a full-sized as
+	 * we already checked for fragments in the loop above.
+	 */
+	if (lastadp != NULL &&
+	    dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
+		for (i = lastadp->ad_offset; i >= 0; i--)
+			if (dp->di_db[i] != 0)
+				break;
+		dp->di_size = (i + 1) * fs->fs_bsize;
+	}
+	/*
+	 * The only dependencies are for indirect blocks.
+	 *
+	 * The file size for indirect block additions is not guaranteed.
+	 * Such a guarantee would be non-trivial to achieve. The conventional
+	 * synchronous write implementation also does not make this guarantee.
+	 * Fsck should catch and fix discrepancies. Arguably, the file size
+	 * can be over-estimated without destroying integrity when the file
+	 * moves into the indirect blocks (i.e., is large). If we want to
+	 * postpone fsck, we are stuck with this argument.
+	 */
+	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
+		dp->di_ib[adp->ad_offset - NDADDR] = 0;
+}
+		
+/*
+ * Version of initiate_write_inodeblock that handles UFS2 dinodes.
+ * Note that any bug fixes made to this routine must be done in the
+ * version found above.
+ *
+ * Called from within the procedure above to deal with unsatisfied
+ * allocation dependencies in an inodeblock. The buffer must be
+ * locked, thus, no I/O completion operations can occur while we
+ * are manipulating its associated dependencies.
+ */
+static void 
+initiate_write_inodeblock_ufs2(inodedep, bp)
+	struct inodedep *inodedep;
+	struct buf *bp;			/* The inode block */
+{
+	struct allocdirect *adp, *lastadp;
+	struct ufs2_dinode *dp;
+	struct ufs2_dinode *sip;
+	struct inoref *inoref;
+	struct ufsmount *ump;
+	struct fs *fs;
+	ufs_lbn_t i;
+#ifdef INVARIANTS
+	ufs_lbn_t prevlbn = 0;
+#endif
+	int deplist;
+
+	if (inodedep->id_state & IOSTARTED)
+		panic("initiate_write_inodeblock_ufs2: already started");
+	inodedep->id_state |= IOSTARTED;
+	fs = inodedep->id_fs;
+	ump = VFSTOUFS(inodedep->id_list.wk_mp);
+	LOCK_OWNED(ump);
+	dp = (struct ufs2_dinode *)bp->b_data +
+	    ino_to_fsbo(fs, inodedep->id_ino);
+
+	/*
+	 * If we're on the unlinked list but have not yet written our
+	 * next pointer initialize it here.
+	 */
+	if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) {
+		struct inodedep *inon;
+
+		inon = TAILQ_NEXT(inodedep, id_unlinked);
+		dp->di_freelink = inon ? inon->id_ino : 0;
+	}
+	/*
+	 * If the bitmap is not yet written, then the allocated
+	 * inode cannot be written to disk.
+	 */
+	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+		if (inodedep->id_savedino2 != NULL)
+			panic("initiate_write_inodeblock_ufs2: I/O underway");
+		FREE_LOCK(ump);
+		sip = malloc(sizeof(struct ufs2_dinode),
+		    M_SAVEDINO, M_SOFTDEP_FLAGS);
+		ACQUIRE_LOCK(ump);
+		inodedep->id_savedino2 = sip;
+		*inodedep->id_savedino2 = *dp;
+		bzero((caddr_t)dp, sizeof(struct ufs2_dinode));
+		dp->di_gen = inodedep->id_savedino2->di_gen;
+		dp->di_freelink = inodedep->id_savedino2->di_freelink;
+		return;
+	}
+	/*
+	 * If no dependencies, then there is nothing to roll back.
+	 */
+	inodedep->id_savedsize = dp->di_size;
+	inodedep->id_savedextsize = dp->di_extsize;
+	inodedep->id_savednlink = dp->di_nlink;
+	if (TAILQ_EMPTY(&inodedep->id_inoupdt) &&
+	    TAILQ_EMPTY(&inodedep->id_extupdt) &&
+	    TAILQ_EMPTY(&inodedep->id_inoreflst))
+		return;
+	/*
+	 * Revert the link count to that of the first unwritten journal entry.
+	 */
+	inoref = TAILQ_FIRST(&inodedep->id_inoreflst);
+	if (inoref)
+		dp->di_nlink = inoref->if_nlink;
+
+	/*
+	 * Set the ext data dependencies to busy.
+	 */
+	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
+	     adp = TAILQ_NEXT(adp, ad_next)) {
+#ifdef INVARIANTS
+		if (deplist != 0 && prevlbn >= adp->ad_offset)
+			panic("softdep_write_inodeblock: lbn order");
+		prevlbn = adp->ad_offset;
+		if (dp->di_extb[adp->ad_offset] != adp->ad_newblkno)
+			panic("%s: direct pointer #%jd mismatch %jd != %jd",
+			    "softdep_write_inodeblock",
+			    (intmax_t)adp->ad_offset,
+			    (intmax_t)dp->di_extb[adp->ad_offset],
+			    (intmax_t)adp->ad_newblkno);
+		deplist |= 1 << adp->ad_offset;
+		if ((adp->ad_state & ATTACHED) == 0)
+			panic("softdep_write_inodeblock: Unknown state 0x%x",
+			    adp->ad_state);
+#endif /* INVARIANTS */
+		adp->ad_state &= ~ATTACHED;
+		adp->ad_state |= UNDONE;
+	}
+	/*
+	 * The on-disk inode cannot claim to be any larger than the last
+	 * fragment that has been written. Otherwise, the on-disk inode
+	 * might have fragments that were not the last block in the ext
+	 * data which would corrupt the filesystem.
+	 */
+	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
+	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
+		dp->di_extb[adp->ad_offset] = adp->ad_oldblkno;
+		/* keep going until hitting a rollback to a frag */
+		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
+			continue;
+		dp->di_extsize = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
+		for (i = adp->ad_offset + 1; i < NXADDR; i++) {
+#ifdef INVARIANTS
+			if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0)
+				panic("softdep_write_inodeblock: lost dep1");
+#endif /* INVARIANTS */
+			dp->di_extb[i] = 0;
+		}
+		lastadp = NULL;
+		break;
+	}
+	/*
+	 * If we have zero'ed out the last allocated block of the ext
+	 * data, roll back the size to the last currently allocated block.
+	 * We know that this last allocated block is a full-sized as
+	 * we already checked for fragments in the loop above.
+	 */
+	if (lastadp != NULL &&
+	    dp->di_extsize <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
+		for (i = lastadp->ad_offset; i >= 0; i--)
+			if (dp->di_extb[i] != 0)
+				break;
+		dp->di_extsize = (i + 1) * fs->fs_bsize;
+	}
+	/*
+	 * Set the file data dependencies to busy.
+	 */
+	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
+	     adp = TAILQ_NEXT(adp, ad_next)) {
+#ifdef INVARIANTS
+		if (deplist != 0 && prevlbn >= adp->ad_offset)
+			panic("softdep_write_inodeblock: lbn order");
+		if ((adp->ad_state & ATTACHED) == 0)
+			panic("inodedep %p and adp %p not attached", inodedep, adp);
+		prevlbn = adp->ad_offset;
+		if (adp->ad_offset < NDADDR &&
+		    dp->di_db[adp->ad_offset] != adp->ad_newblkno)
+			panic("%s: direct pointer #%jd mismatch %jd != %jd",
+			    "softdep_write_inodeblock",
+			    (intmax_t)adp->ad_offset,
+			    (intmax_t)dp->di_db[adp->ad_offset],
+			    (intmax_t)adp->ad_newblkno);
+		if (adp->ad_offset >= NDADDR &&
+		    dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno)
+			panic("%s indirect pointer #%jd mismatch %jd != %jd",
+			    "softdep_write_inodeblock:",
+			    (intmax_t)adp->ad_offset - NDADDR,
+			    (intmax_t)dp->di_ib[adp->ad_offset - NDADDR],
+			    (intmax_t)adp->ad_newblkno);
+		deplist |= 1 << adp->ad_offset;
+		if ((adp->ad_state & ATTACHED) == 0)
+			panic("softdep_write_inodeblock: Unknown state 0x%x",
+			    adp->ad_state);
+#endif /* INVARIANTS */
+		adp->ad_state &= ~ATTACHED;
+		adp->ad_state |= UNDONE;
+	}
+	/*
+	 * The on-disk inode cannot claim to be any larger than the last
+	 * fragment that has been written. Otherwise, the on-disk inode
+	 * might have fragments that were not the last block in the file
+	 * which would corrupt the filesystem.
+	 */
+	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
+	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
+		if (adp->ad_offset >= NDADDR)
+			break;
+		dp->di_db[adp->ad_offset] = adp->ad_oldblkno;
+		/* keep going until hitting a rollback to a frag */
+		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
+			continue;
+		dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
+		for (i = adp->ad_offset + 1; i < NDADDR; i++) {
+#ifdef INVARIANTS
+			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
+				panic("softdep_write_inodeblock: lost dep2");
+#endif /* INVARIANTS */
+			dp->di_db[i] = 0;
+		}
+		for (i = 0; i < NIADDR; i++) {
+#ifdef INVARIANTS
+			if (dp->di_ib[i] != 0 &&
+			    (deplist & ((1 << NDADDR) << i)) == 0)
+				panic("softdep_write_inodeblock: lost dep3");
+#endif /* INVARIANTS */
+			dp->di_ib[i] = 0;
+		}
+		return;
+	}
+	/*
+	 * If we have zero'ed out the last allocated block of the file,
+	 * roll back the size to the last currently allocated block.
+	 * We know that this last allocated block is a full-sized as
+	 * we already checked for fragments in the loop above.
+	 */
+	if (lastadp != NULL &&
+	    dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
+		for (i = lastadp->ad_offset; i >= 0; i--)
+			if (dp->di_db[i] != 0)
+				break;
+		dp->di_size = (i + 1) * fs->fs_bsize;
+	}
+	/*
+	 * The only dependencies are for indirect blocks.
+	 *
+	 * The file size for indirect block additions is not guaranteed.
+	 * Such a guarantee would be non-trivial to achieve. The conventional
+	 * synchronous write implementation also does not make this guarantee.
+	 * Fsck should catch and fix discrepancies. Arguably, the file size
+	 * can be over-estimated without destroying integrity when the file
+	 * moves into the indirect blocks (i.e., is large). If we want to
+	 * postpone fsck, we are stuck with this argument.
+	 */
+	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
+		dp->di_ib[adp->ad_offset - NDADDR] = 0;
+}
+
+/*
+ * Cancel an indirdep as a result of truncation.  Release all of the
+ * children allocindirs and place their journal work on the appropriate
+ * list.
+ */
+static void
+cancel_indirdep(indirdep, bp, freeblks)
+	struct indirdep *indirdep;
+	struct buf *bp;
+	struct freeblks *freeblks;
+{
+	struct allocindir *aip;
+
+	/*
+	 * None of the indirect pointers will ever be visible,
+	 * so they can simply be tossed. GOINGAWAY ensures
+	 * that allocated pointers will be saved in the buffer
+	 * cache until they are freed. Note that they will
+	 * only be able to be found by their physical address
+	 * since the inode mapping the logical address will
+	 * be gone. The save buffer used for the safe copy
+	 * was allocated in setup_allocindir_phase2 using
+	 * the physical address so it could be used for this
+	 * purpose. Hence we swap the safe copy with the real
+	 * copy, allowing the safe copy to be freed and holding
+	 * on to the real copy for later use in indir_trunc.
+	 */
+	if (indirdep->ir_state & GOINGAWAY)
+		panic("cancel_indirdep: already gone");
+	if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
+		indirdep->ir_state |= DEPCOMPLETE;
+		LIST_REMOVE(indirdep, ir_next);
+	}
+	indirdep->ir_state |= GOINGAWAY;
+	/*
+	 * Pass in bp for blocks still have journal writes
+	 * pending so we can cancel them on their own.
+	 */
+	while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != NULL)
+		cancel_allocindir(aip, bp, freeblks, 0);
+	while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != NULL)
+		cancel_allocindir(aip, NULL, freeblks, 0);
+	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != NULL)
+		cancel_allocindir(aip, NULL, freeblks, 0);
+	while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL)
+		cancel_allocindir(aip, NULL, freeblks, 0);
+	/*
+	 * If there are pending partial truncations we need to keep the
+	 * old block copy around until they complete.  This is because
+	 * the current b_data is not a perfect superset of the available
+	 * blocks.
+	 */
+	if (TAILQ_EMPTY(&indirdep->ir_trunc))
+		bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount);
+	else
+		bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
+	WORKLIST_REMOVE(&indirdep->ir_list);
+	WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, &indirdep->ir_list);
+	indirdep->ir_bp = NULL;
+	indirdep->ir_freeblks = freeblks;
+}
+
+/*
+ * Free an indirdep once it no longer has new pointers to track.
+ */
+static void
+free_indirdep(indirdep)
+	struct indirdep *indirdep;
+{
+
+	KASSERT(TAILQ_EMPTY(&indirdep->ir_trunc),
+	    ("free_indirdep: Indir trunc list not empty."));
+	KASSERT(LIST_EMPTY(&indirdep->ir_completehd),
+	    ("free_indirdep: Complete head not empty."));
+	KASSERT(LIST_EMPTY(&indirdep->ir_writehd),
+	    ("free_indirdep: write head not empty."));
+	KASSERT(LIST_EMPTY(&indirdep->ir_donehd),
+	    ("free_indirdep: done head not empty."));
+	KASSERT(LIST_EMPTY(&indirdep->ir_deplisthd),
+	    ("free_indirdep: deplist head not empty."));
+	KASSERT((indirdep->ir_state & DEPCOMPLETE),
+	    ("free_indirdep: %p still on newblk list.", indirdep));
+	KASSERT(indirdep->ir_saveddata == NULL,
+	    ("free_indirdep: %p still has saved data.", indirdep));
+	if (indirdep->ir_state & ONWORKLIST)
+		WORKLIST_REMOVE(&indirdep->ir_list);
+	WORKITEM_FREE(indirdep, D_INDIRDEP);
+}
+
+/*
+ * Called before a write to an indirdep.  This routine is responsible for
+ * rolling back pointers to a safe state which includes only those
+ * allocindirs which have been completed.
+ */
+static void
+initiate_write_indirdep(indirdep, bp)
+	struct indirdep *indirdep;
+	struct buf *bp;
+{
+	struct ufsmount *ump;
+
+	indirdep->ir_state |= IOSTARTED;
+	if (indirdep->ir_state & GOINGAWAY)
+		panic("disk_io_initiation: indirdep gone");
+	/*
+	 * If there are no remaining dependencies, this will be writing
+	 * the real pointers.
+	 */
+	if (LIST_EMPTY(&indirdep->ir_deplisthd) &&
+	    TAILQ_EMPTY(&indirdep->ir_trunc))
+		return;
+	/*
+	 * Replace up-to-date version with safe version.
+	 */
+	if (indirdep->ir_saveddata == NULL) {
+		ump = VFSTOUFS(indirdep->ir_list.wk_mp);
+		LOCK_OWNED(ump);
+		FREE_LOCK(ump);
+		indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
+		    M_SOFTDEP_FLAGS);
+		ACQUIRE_LOCK(ump);
+	}
+	indirdep->ir_state &= ~ATTACHED;
+	indirdep->ir_state |= UNDONE;
+	bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
+	bcopy(indirdep->ir_savebp->b_data, bp->b_data,
+	    bp->b_bcount);
+}
+
+/*
+ * Called when an inode has been cleared in a cg bitmap.  This finally
+ * eliminates any canceled jaddrefs
+ */
+void
+softdep_setup_inofree(mp, bp, ino, wkhd)
+	struct mount *mp;
+	struct buf *bp;
+	ino_t ino;
+	struct workhead *wkhd;
+{
+	struct worklist *wk, *wkn;
+	struct inodedep *inodedep;
+	struct ufsmount *ump;
+	uint8_t *inosused;
+	struct cg *cgp;
+	struct fs *fs;
+
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_setup_inofree called on non-softdep filesystem"));
+	ump = VFSTOUFS(mp);
+	ACQUIRE_LOCK(ump);
+	fs = ump->um_fs;
+	cgp = (struct cg *)bp->b_data;
+	inosused = cg_inosused(cgp);
+	if (isset(inosused, ino % fs->fs_ipg))
+		panic("softdep_setup_inofree: inode %ju not freed.",
+		    (uintmax_t)ino);
+	if (inodedep_lookup(mp, ino, 0, &inodedep))
+		panic("softdep_setup_inofree: ino %ju has existing inodedep %p",
+		    (uintmax_t)ino, inodedep);
+	if (wkhd) {
+		LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) {
+			if (wk->wk_type != D_JADDREF)
+				continue;
+			WORKLIST_REMOVE(wk);
+			/*
+			 * We can free immediately even if the jaddref
+			 * isn't attached in a background write as now
+			 * the bitmaps are reconciled.
+			 */
+			wk->wk_state |= COMPLETE | ATTACHED;
+			free_jaddref(WK_JADDREF(wk));
+		}
+		jwork_move(&bp->b_dep, wkhd);
+	}
+	FREE_LOCK(ump);
+}
+
+
+/*
+ * Called via ffs_blkfree() after a set of frags has been cleared from a cg
+ * map.  Any dependencies waiting for the write to clear are added to the
+ * buf's list and any jnewblks that are being canceled are discarded
+ * immediately.
+ */
+void
+softdep_setup_blkfree(mp, bp, blkno, frags, wkhd)
+	struct mount *mp;
+	struct buf *bp;
+	ufs2_daddr_t blkno;
+	int frags;
+	struct workhead *wkhd;
+{
+	struct bmsafemap *bmsafemap;
+	struct jnewblk *jnewblk;
+	struct ufsmount *ump;
+	struct worklist *wk;
+	struct fs *fs;
+#ifdef SUJ_DEBUG
+	uint8_t *blksfree;
+	struct cg *cgp;
+	ufs2_daddr_t jstart;
+	ufs2_daddr_t jend;
+	ufs2_daddr_t end;
+	long bno;
+	int i;
+#endif
+
+	CTR3(KTR_SUJ,
+	    "softdep_setup_blkfree: blkno %jd frags %d wk head %p",
+	    blkno, frags, wkhd);
+
+	ump = VFSTOUFS(mp);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_setup_blkfree called on non-softdep filesystem"));
+	ACQUIRE_LOCK(ump);
+	/* Lookup the bmsafemap so we track when it is dirty. */
+	fs = ump->um_fs;
+	bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno), NULL);
+	/*
+	 * Detach any jnewblks which have been canceled.  They must linger
+	 * until the bitmap is cleared again by ffs_blkfree() to prevent
+	 * an unjournaled allocation from hitting the disk.
+	 */
+	if (wkhd) {
+		while ((wk = LIST_FIRST(wkhd)) != NULL) {
+			CTR2(KTR_SUJ,
+			    "softdep_setup_blkfree: blkno %jd wk type %d",
+			    blkno, wk->wk_type);
+			WORKLIST_REMOVE(wk);
+			if (wk->wk_type != D_JNEWBLK) {
+				WORKLIST_INSERT(&bmsafemap->sm_freehd, wk);
+				continue;
+			}
+			jnewblk = WK_JNEWBLK(wk);
+			KASSERT(jnewblk->jn_state & GOINGAWAY,
+			    ("softdep_setup_blkfree: jnewblk not canceled."));
+#ifdef SUJ_DEBUG
+			/*
+			 * Assert that this block is free in the bitmap
+			 * before we discard the jnewblk.
+			 */
+			cgp = (struct cg *)bp->b_data;
+			blksfree = cg_blksfree(cgp);
+			bno = dtogd(fs, jnewblk->jn_blkno);
+			for (i = jnewblk->jn_oldfrags;
+			    i < jnewblk->jn_frags; i++) {
+				if (isset(blksfree, bno + i))
+					continue;
+				panic("softdep_setup_blkfree: not free");
+			}
+#endif
+			/*
+			 * Even if it's not attached we can free immediately
+			 * as the new bitmap is correct.
+			 */
+			wk->wk_state |= COMPLETE | ATTACHED;
+			free_jnewblk(jnewblk);
+		}
+	}
+
+#ifdef SUJ_DEBUG
+	/*
+	 * Assert that we are not freeing a block which has an outstanding
+	 * allocation dependency.
+	 */
+	fs = VFSTOUFS(mp)->um_fs;
+	bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno), NULL);
+	end = blkno + frags;
+	LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
+		/*
+		 * Don't match against blocks that will be freed when the
+		 * background write is done.
+		 */
+		if ((jnewblk->jn_state & (ATTACHED | COMPLETE | DEPCOMPLETE)) ==
+		    (COMPLETE | DEPCOMPLETE))
+			continue;
+		jstart = jnewblk->jn_blkno + jnewblk->jn_oldfrags;
+		jend = jnewblk->jn_blkno + jnewblk->jn_frags;
+		if ((blkno >= jstart && blkno < jend) ||
+		    (end > jstart && end <= jend)) {
+			printf("state 0x%X %jd - %d %d dep %p\n",
+			    jnewblk->jn_state, jnewblk->jn_blkno,
+			    jnewblk->jn_oldfrags, jnewblk->jn_frags,
+			    jnewblk->jn_dep);
+			panic("softdep_setup_blkfree: "
+			    "%jd-%jd(%d) overlaps with %jd-%jd",
+			    blkno, end, frags, jstart, jend);
+		}
+	}
+#endif
+	FREE_LOCK(ump);
+}
+
+/*
+ * Revert a block allocation when the journal record that describes it
+ * is not yet written.
+ */
+static int
+jnewblk_rollback(jnewblk, fs, cgp, blksfree)
+	struct jnewblk *jnewblk;
+	struct fs *fs;
+	struct cg *cgp;
+	uint8_t *blksfree;
+{
+	ufs1_daddr_t fragno;
+	long cgbno, bbase;
+	int frags, blk;
+	int i;
+
+	frags = 0;
+	cgbno = dtogd(fs, jnewblk->jn_blkno);
+	/*
+	 * We have to test which frags need to be rolled back.  We may
+	 * be operating on a stale copy when doing background writes.
+	 */
+	for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++)
+		if (isclr(blksfree, cgbno + i))
+			frags++;
+	if (frags == 0)
+		return (0);
+	/*
+	 * This is mostly ffs_blkfree() sans some validation and
+	 * superblock updates.
+	 */
+	if (frags == fs->fs_frag) {
+		fragno = fragstoblks(fs, cgbno);
+		ffs_setblock(fs, blksfree, fragno);
+		ffs_clusteracct(fs, cgp, fragno, 1);
+		cgp->cg_cs.cs_nbfree++;
+	} else {
+		cgbno += jnewblk->jn_oldfrags;
+		bbase = cgbno - fragnum(fs, cgbno);
+		/* Decrement the old frags.  */
+		blk = blkmap(fs, blksfree, bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
+		/* Deallocate the fragment */
+		for (i = 0; i < frags; i++)
+			setbit(blksfree, cgbno + i);
+		cgp->cg_cs.cs_nffree += frags;
+		/* Add back in counts associated with the new frags */
+		blk = blkmap(fs, blksfree, bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
+		/* If a complete block has been reassembled, account for it. */
+		fragno = fragstoblks(fs, bbase);
+		if (ffs_isblock(fs, blksfree, fragno)) {
+			cgp->cg_cs.cs_nffree -= fs->fs_frag;
+			ffs_clusteracct(fs, cgp, fragno, 1);
+			cgp->cg_cs.cs_nbfree++;
+		}
+	}
+	stat_jnewblk++;
+	jnewblk->jn_state &= ~ATTACHED;
+	jnewblk->jn_state |= UNDONE;
+
+	return (frags);
+}
+
+static void
+initiate_write_bmsafemap(bmsafemap, bp)
+	struct bmsafemap *bmsafemap;
+	struct buf *bp;			/* The cg block. */
+{
+	struct jaddref *jaddref;
+	struct jnewblk *jnewblk;
+	uint8_t *inosused;
+	uint8_t *blksfree;
+	struct cg *cgp;
+	struct fs *fs;
+	ino_t ino;
+
+	/*
+	 * If this is a background write, we did this at the time that
+	 * the copy was made, so do not need to do it again.
+	 */
+	if (bmsafemap->sm_state & IOSTARTED)
+		return;
+	bmsafemap->sm_state |= IOSTARTED;
+	/*
+	 * Clear any inode allocations which are pending journal writes.
+	 */
+	if (LIST_FIRST(&bmsafemap->sm_jaddrefhd) != NULL) {
+		cgp = (struct cg *)bp->b_data;
+		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
+		inosused = cg_inosused(cgp);
+		LIST_FOREACH(jaddref, &bmsafemap->sm_jaddrefhd, ja_bmdeps) {
+			ino = jaddref->ja_ino % fs->fs_ipg;
+			if (isset(inosused, ino)) {
+				if ((jaddref->ja_mode & IFMT) == IFDIR)
+					cgp->cg_cs.cs_ndir--;
+				cgp->cg_cs.cs_nifree++;
+				clrbit(inosused, ino);
+				jaddref->ja_state &= ~ATTACHED;
+				jaddref->ja_state |= UNDONE;
+				stat_jaddref++;
+			} else
+				panic("initiate_write_bmsafemap: inode %ju "
+				    "marked free", (uintmax_t)jaddref->ja_ino);
+		}
+	}
+	/*
+	 * Clear any block allocations which are pending journal writes.
+	 */
+	if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) {
+		cgp = (struct cg *)bp->b_data;
+		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
+		blksfree = cg_blksfree(cgp);
+		LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
+			if (jnewblk_rollback(jnewblk, fs, cgp, blksfree))
+				continue;
+			panic("initiate_write_bmsafemap: block %jd "
+			    "marked free", jnewblk->jn_blkno);
+		}
+	}
+	/*
+	 * Move allocation lists to the written lists so they can be
+	 * cleared once the block write is complete.
+	 */
+	LIST_SWAP(&bmsafemap->sm_inodedephd, &bmsafemap->sm_inodedepwr,
+	    inodedep, id_deps);
+	LIST_SWAP(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
+	    newblk, nb_deps);
+	LIST_SWAP(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr, worklist,
+	    wk_list);
+}
+
+/*
+ * This routine is called during the completion interrupt
+ * service routine for a disk write (from the procedure called
+ * by the device driver to inform the filesystem caches of
+ * a request completion).  It should be called early in this
+ * procedure, before the block is made available to other
+ * processes or other routines are called.
+ *
+ */
+static void 
+softdep_disk_write_complete(bp)
+	struct buf *bp;		/* describes the completed disk write */
+{
+	struct worklist *wk;
+	struct worklist *owk;
+	struct ufsmount *ump;
+	struct workhead reattach;
+	struct freeblks *freeblks;
+	struct buf *sbp;
+
+	ump = softdep_bp_to_mp(bp);
+	if (ump == NULL)
+		return;
+
+	/*
+	 * If an error occurred while doing the write, then the data
+	 * has not hit the disk and the dependencies cannot be processed.
+	 * But we do have to go through and roll forward any dependencies
+	 * that were rolled back before the disk write.
+	 */
+	ACQUIRE_LOCK(ump);
+	if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) {
+		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+			switch (wk->wk_type) {
+
+			case D_PAGEDEP:
+				handle_written_filepage(WK_PAGEDEP(wk), bp, 0);
+				continue;
+
+			case D_INODEDEP:
+				handle_written_inodeblock(WK_INODEDEP(wk),
+				    bp, 0);
+				continue;
+
+			case D_BMSAFEMAP:
+				handle_written_bmsafemap(WK_BMSAFEMAP(wk),
+				    bp, 0);
+				continue;
+
+			case D_INDIRDEP:
+				handle_written_indirdep(WK_INDIRDEP(wk),
+				    bp, &sbp, 0);
+				continue;
+			default:
+				/* nothing to roll forward */
+				continue;
+			}
+		}
+		FREE_LOCK(ump);
+		return;
+	}
+	LIST_INIT(&reattach);
+
+	/*
+	 * Ump SU lock must not be released anywhere in this code segment.
+	 */
+	sbp = NULL;
+	owk = NULL;
+	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		atomic_add_long(&dep_write[wk->wk_type], 1);
+		if (wk == owk)
+			panic("duplicate worklist: %p\n", wk);
+		owk = wk;
+		switch (wk->wk_type) {
+
+		case D_PAGEDEP:
+			if (handle_written_filepage(WK_PAGEDEP(wk), bp,
+			    WRITESUCCEEDED))
+				WORKLIST_INSERT(&reattach, wk);
+			continue;
+
+		case D_INODEDEP:
+			if (handle_written_inodeblock(WK_INODEDEP(wk), bp,
+			    WRITESUCCEEDED))
+				WORKLIST_INSERT(&reattach, wk);
+			continue;
+
+		case D_BMSAFEMAP:
+			if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp,
+			    WRITESUCCEEDED))
+				WORKLIST_INSERT(&reattach, wk);
+			continue;
+
+		case D_MKDIR:
+			handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
+			continue;
+
+		case D_ALLOCDIRECT:
+			wk->wk_state |= COMPLETE;
+			handle_allocdirect_partdone(WK_ALLOCDIRECT(wk), NULL);
+			continue;
+
+		case D_ALLOCINDIR:
+			wk->wk_state |= COMPLETE;
+			handle_allocindir_partdone(WK_ALLOCINDIR(wk));
+			continue;
+
+		case D_INDIRDEP:
+			if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp,
+			    WRITESUCCEEDED))
+				WORKLIST_INSERT(&reattach, wk);
+			continue;
+
+		case D_FREEBLKS:
+			wk->wk_state |= COMPLETE;
+			freeblks = WK_FREEBLKS(wk);
+			if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE &&
+			    LIST_EMPTY(&freeblks->fb_jblkdephd))
+				add_to_worklist(wk, WK_NODELAY);
+			continue;
+
+		case D_FREEWORK:
+			handle_written_freework(WK_FREEWORK(wk));
+			break;
+
+		case D_JSEGDEP:
+			free_jsegdep(WK_JSEGDEP(wk));
+			continue;
+
+		case D_JSEG:
+			handle_written_jseg(WK_JSEG(wk), bp);
+			continue;
+
+		case D_SBDEP:
+			if (handle_written_sbdep(WK_SBDEP(wk), bp))
+				WORKLIST_INSERT(&reattach, wk);
+			continue;
+
+		case D_FREEDEP:
+			free_freedep(WK_FREEDEP(wk));
+			continue;
+
+		default:
+			panic("handle_disk_write_complete: Unknown type %s",
+			    TYPENAME(wk->wk_type));
+			/* NOTREACHED */
+		}
+	}
+	/*
+	 * Reattach any requests that must be redone.
+	 */
+	while ((wk = LIST_FIRST(&reattach)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		WORKLIST_INSERT(&bp->b_dep, wk);
+	}
+	FREE_LOCK(ump);
+	if (sbp)
+		brelse(sbp);
+}
+
+/*
+ * Called from within softdep_disk_write_complete above. Note that
+ * this routine is always called from interrupt level with further
+ * splbio interrupts blocked.
+ */
+static void 
+handle_allocdirect_partdone(adp, wkhd)
+	struct allocdirect *adp;	/* the completed allocdirect */
+	struct workhead *wkhd;		/* Work to do when inode is writtne. */
+{
+	struct allocdirectlst *listhead;
+	struct allocdirect *listadp;
+	struct inodedep *inodedep;
+	long bsize;
+
+	if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
+		return;
+	/*
+	 * The on-disk inode cannot claim to be any larger than the last
+	 * fragment that has been written. Otherwise, the on-disk inode
+	 * might have fragments that were not the last block in the file
+	 * which would corrupt the filesystem. Thus, we cannot free any
+	 * allocdirects after one whose ad_oldblkno claims a fragment as
+	 * these blocks must be rolled back to zero before writing the inode.
+	 * We check the currently active set of allocdirects in id_inoupdt
+	 * or id_extupdt as appropriate.
+	 */
+	inodedep = adp->ad_inodedep;
+	bsize = inodedep->id_fs->fs_bsize;
+	if (adp->ad_state & EXTDATA)
+		listhead = &inodedep->id_extupdt;
+	else
+		listhead = &inodedep->id_inoupdt;
+	TAILQ_FOREACH(listadp, listhead, ad_next) {
+		/* found our block */
+		if (listadp == adp)
+			break;
+		/* continue if ad_oldlbn is not a fragment */
+		if (listadp->ad_oldsize == 0 ||
+		    listadp->ad_oldsize == bsize)
+			continue;
+		/* hit a fragment */
+		return;
+	}
+	/*
+	 * If we have reached the end of the current list without
+	 * finding the just finished dependency, then it must be
+	 * on the future dependency list. Future dependencies cannot
+	 * be freed until they are moved to the current list.
+	 */
+	if (listadp == NULL) {
+#ifdef DEBUG
+		if (adp->ad_state & EXTDATA)
+			listhead = &inodedep->id_newextupdt;
+		else
+			listhead = &inodedep->id_newinoupdt;
+		TAILQ_FOREACH(listadp, listhead, ad_next)
+			/* found our block */
+			if (listadp == adp)
+				break;
+		if (listadp == NULL)
+			panic("handle_allocdirect_partdone: lost dep");
+#endif /* DEBUG */
+		return;
+	}
+	/*
+	 * If we have found the just finished dependency, then queue
+	 * it along with anything that follows it that is complete.
+	 * Since the pointer has not yet been written in the inode
+	 * as the dependency prevents it, place the allocdirect on the
+	 * bufwait list where it will be freed once the pointer is
+	 * valid.
+	 */
+	if (wkhd == NULL)
+		wkhd = &inodedep->id_bufwait;
+	for (; adp; adp = listadp) {
+		listadp = TAILQ_NEXT(adp, ad_next);
+		if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
+			return;
+		TAILQ_REMOVE(listhead, adp, ad_next);
+		WORKLIST_INSERT(wkhd, &adp->ad_block.nb_list);
+	}
+}
+
+/*
+ * Called from within softdep_disk_write_complete above.  This routine
+ * completes successfully written allocindirs.
+ */
+static void
+handle_allocindir_partdone(aip)
+	struct allocindir *aip;		/* the completed allocindir */
+{
+	struct indirdep *indirdep;
+
+	if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)
+		return;
+	indirdep = aip->ai_indirdep;
+	LIST_REMOVE(aip, ai_next);
+	/*
+	 * Don't set a pointer while the buffer is undergoing IO or while
+	 * we have active truncations.
+	 */
+	if (indirdep->ir_state & UNDONE || !TAILQ_EMPTY(&indirdep->ir_trunc)) {
+		LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next);
+		return;
+	}
+	if (indirdep->ir_state & UFS1FMT)
+		((ufs1_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
+		    aip->ai_newblkno;
+	else
+		((ufs2_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
+		    aip->ai_newblkno;
+	/*
+	 * Await the pointer write before freeing the allocindir.
+	 */
+	LIST_INSERT_HEAD(&indirdep->ir_writehd, aip, ai_next);
+}
+
+/*
+ * Release segments held on a jwork list.
+ */
+static void
+handle_jwork(wkhd)
+	struct workhead *wkhd;
+{
+	struct worklist *wk;
+
+	while ((wk = LIST_FIRST(wkhd)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		switch (wk->wk_type) {
+		case D_JSEGDEP:
+			free_jsegdep(WK_JSEGDEP(wk));
+			continue;
+		case D_FREEDEP:
+			free_freedep(WK_FREEDEP(wk));
+			continue;
+		case D_FREEFRAG:
+			rele_jseg(WK_JSEG(WK_FREEFRAG(wk)->ff_jdep));
+			WORKITEM_FREE(wk, D_FREEFRAG);
+			continue;
+		case D_FREEWORK:
+			handle_written_freework(WK_FREEWORK(wk));
+			continue;
+		default:
+			panic("handle_jwork: Unknown type %s\n",
+			    TYPENAME(wk->wk_type));
+		}
+	}
+}
+
+/*
+ * Handle the bufwait list on an inode when it is safe to release items
+ * held there.  This normally happens after an inode block is written but
+ * may be delayed and handled later if there are pending journal items that
+ * are not yet safe to be released.
+ */
+static struct freefile *
+handle_bufwait(inodedep, refhd)
+	struct inodedep *inodedep;
+	struct workhead *refhd;
+{
+	struct jaddref *jaddref;
+	struct freefile *freefile;
+	struct worklist *wk;
+
+	freefile = NULL;
+	while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		switch (wk->wk_type) {
+		case D_FREEFILE:
+			/*
+			 * We defer adding freefile to the worklist
+			 * until all other additions have been made to
+			 * ensure that it will be done after all the
+			 * old blocks have been freed.
+			 */
+			if (freefile != NULL)
+				panic("handle_bufwait: freefile");
+			freefile = WK_FREEFILE(wk);
+			continue;
+
+		case D_MKDIR:
+			handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);
+			continue;
+
+		case D_DIRADD:
+			diradd_inode_written(WK_DIRADD(wk), inodedep);
+			continue;
+
+		case D_FREEFRAG:
+			wk->wk_state |= COMPLETE;
+			if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE)
+				add_to_worklist(wk, 0);
+			continue;
+
+		case D_DIRREM:
+			wk->wk_state |= COMPLETE;
+			add_to_worklist(wk, 0);
+			continue;
+
+		case D_ALLOCDIRECT:
+		case D_ALLOCINDIR:
+			free_newblk(WK_NEWBLK(wk));
+			continue;
+
+		case D_JNEWBLK:
+			wk->wk_state |= COMPLETE;
+			free_jnewblk(WK_JNEWBLK(wk));
+			continue;
+
+		/*
+		 * Save freed journal segments and add references on
+		 * the supplied list which will delay their release
+		 * until the cg bitmap is cleared on disk.
+		 */
+		case D_JSEGDEP:
+			if (refhd == NULL)
+				free_jsegdep(WK_JSEGDEP(wk));
+			else
+				WORKLIST_INSERT(refhd, wk);
+			continue;
+
+		case D_JADDREF:
+			jaddref = WK_JADDREF(wk);
+			TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref,
+			    if_deps);
+			/*
+			 * Transfer any jaddrefs to the list to be freed with
+			 * the bitmap if we're handling a removed file.
+			 */
+			if (refhd == NULL) {
+				wk->wk_state |= COMPLETE;
+				free_jaddref(jaddref);
+			} else
+				WORKLIST_INSERT(refhd, wk);
+			continue;
+
+		default:
+			panic("handle_bufwait: Unknown type %p(%s)",
+			    wk, TYPENAME(wk->wk_type));
+			/* NOTREACHED */
+		}
+	}
+	return (freefile);
+}
+/*
+ * Called from within softdep_disk_write_complete above to restore
+ * in-memory inode block contents to their most up-to-date state. Note
+ * that this routine is always called from interrupt level with further
+ * interrupts from this device blocked.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
+ */
+static int 
+handle_written_inodeblock(inodedep, bp, flags)
+	struct inodedep *inodedep;
+	struct buf *bp;		/* buffer containing the inode block */
+	int flags;
+{
+	struct freefile *freefile;
+	struct allocdirect *adp, *nextadp;
+	struct ufs1_dinode *dp1 = NULL;
+	struct ufs2_dinode *dp2 = NULL;
+	struct workhead wkhd;
+	int hadchanges, fstype;
+	ino_t freelink;
+
+	LIST_INIT(&wkhd);
+	hadchanges = 0;
+	freefile = NULL;
+	if ((inodedep->id_state & IOSTARTED) == 0)
+		panic("handle_written_inodeblock: not started");
+	inodedep->id_state &= ~IOSTARTED;
+	if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC) {
+		fstype = UFS1;
+		dp1 = (struct ufs1_dinode *)bp->b_data +
+		    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
+		freelink = dp1->di_freelink;
+	} else {
+		fstype = UFS2;
+		dp2 = (struct ufs2_dinode *)bp->b_data +
+		    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
+		freelink = dp2->di_freelink;
+	}
+	/*
+	 * Leave this inodeblock dirty until it's in the list.
+	 */
+	if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED &&
+	    (flags & WRITESUCCEEDED)) {
+		struct inodedep *inon;
+
+		inon = TAILQ_NEXT(inodedep, id_unlinked);
+		if ((inon == NULL && freelink == 0) ||
+		    (inon && inon->id_ino == freelink)) {
+			if (inon)
+				inon->id_state |= UNLINKPREV;
+			inodedep->id_state |= UNLINKNEXT;
+		}
+		hadchanges = 1;
+	}
+	/*
+	 * If we had to rollback the inode allocation because of
+	 * bitmaps being incomplete, then simply restore it.
+	 * Keep the block dirty so that it will not be reclaimed until
+	 * all associated dependencies have been cleared and the
+	 * corresponding updates written to disk.
+	 */
+	if (inodedep->id_savedino1 != NULL) {
+		hadchanges = 1;
+		if (fstype == UFS1)
+			*dp1 = *inodedep->id_savedino1;
+		else
+			*dp2 = *inodedep->id_savedino2;
+		free(inodedep->id_savedino1, M_SAVEDINO);
+		inodedep->id_savedino1 = NULL;
+		if ((bp->b_flags & B_DELWRI) == 0)
+			stat_inode_bitmap++;
+		bdirty(bp);
+		/*
+		 * If the inode is clear here and GOINGAWAY it will never
+		 * be written.  Process the bufwait and clear any pending
+		 * work which may include the freefile.
+		 */
+		if (inodedep->id_state & GOINGAWAY)
+			goto bufwait;
+		return (1);
+	}
+	if (flags & WRITESUCCEEDED)
+		inodedep->id_state |= COMPLETE;
+	/*
+	 * Roll forward anything that had to be rolled back before 
+	 * the inode could be updated.
+	 */
+	for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) {
+		nextadp = TAILQ_NEXT(adp, ad_next);
+		if (adp->ad_state & ATTACHED)
+			panic("handle_written_inodeblock: new entry");
+		if (fstype == UFS1) {
+			if (adp->ad_offset < NDADDR) {
+				if (dp1->di_db[adp->ad_offset]!=adp->ad_oldblkno)
+					panic("%s %s #%jd mismatch %d != %jd",
+					    "handle_written_inodeblock:",
+					    "direct pointer",
+					    (intmax_t)adp->ad_offset,
+					    dp1->di_db[adp->ad_offset],
+					    (intmax_t)adp->ad_oldblkno);
+				dp1->di_db[adp->ad_offset] = adp->ad_newblkno;
+			} else {
+				if (dp1->di_ib[adp->ad_offset - NDADDR] != 0)
+					panic("%s: %s #%jd allocated as %d",
+					    "handle_written_inodeblock",
+					    "indirect pointer",
+					    (intmax_t)adp->ad_offset - NDADDR,
+					    dp1->di_ib[adp->ad_offset - NDADDR]);
+				dp1->di_ib[adp->ad_offset - NDADDR] =
+				    adp->ad_newblkno;
+			}
+		} else {
+			if (adp->ad_offset < NDADDR) {
+				if (dp2->di_db[adp->ad_offset]!=adp->ad_oldblkno)
+					panic("%s: %s #%jd %s %jd != %jd",
+					    "handle_written_inodeblock",
+					    "direct pointer",
+					    (intmax_t)adp->ad_offset, "mismatch",
+					    (intmax_t)dp2->di_db[adp->ad_offset],
+					    (intmax_t)adp->ad_oldblkno);
+				dp2->di_db[adp->ad_offset] = adp->ad_newblkno;
+			} else {
+				if (dp2->di_ib[adp->ad_offset - NDADDR] != 0)
+					panic("%s: %s #%jd allocated as %jd",
+					    "handle_written_inodeblock",
+					    "indirect pointer",
+					    (intmax_t)adp->ad_offset - NDADDR,
+					    (intmax_t)
+					    dp2->di_ib[adp->ad_offset - NDADDR]);
+				dp2->di_ib[adp->ad_offset - NDADDR] =
+				    adp->ad_newblkno;
+			}
+		}
+		adp->ad_state &= ~UNDONE;
+		adp->ad_state |= ATTACHED;
+		hadchanges = 1;
+	}
+	for (adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; adp = nextadp) {
+		nextadp = TAILQ_NEXT(adp, ad_next);
+		if (adp->ad_state & ATTACHED)
+			panic("handle_written_inodeblock: new entry");
+		if (dp2->di_extb[adp->ad_offset] != adp->ad_oldblkno)
+			panic("%s: direct pointers #%jd %s %jd != %jd",
+			    "handle_written_inodeblock",
+			    (intmax_t)adp->ad_offset, "mismatch",
+			    (intmax_t)dp2->di_extb[adp->ad_offset],
+			    (intmax_t)adp->ad_oldblkno);
+		dp2->di_extb[adp->ad_offset] = adp->ad_newblkno;
+		adp->ad_state &= ~UNDONE;
+		adp->ad_state |= ATTACHED;
+		hadchanges = 1;
+	}
+	if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
+		stat_direct_blk_ptrs++;
+	/*
+	 * Reset the file size to its most up-to-date value.
+	 */
+	if (inodedep->id_savedsize == -1 || inodedep->id_savedextsize == -1)
+		panic("handle_written_inodeblock: bad size");
+	if (inodedep->id_savednlink > LINK_MAX)
+		panic("handle_written_inodeblock: Invalid link count "
+		    "%jd for inodedep %p", (uintmax_t)inodedep->id_savednlink,
+		    inodedep);
+	if (fstype == UFS1) {
+		if (dp1->di_nlink != inodedep->id_savednlink) { 
+			dp1->di_nlink = inodedep->id_savednlink;
+			hadchanges = 1;
+		}
+		if (dp1->di_size != inodedep->id_savedsize) {
+			dp1->di_size = inodedep->id_savedsize;
+			hadchanges = 1;
+		}
+	} else {
+		if (dp2->di_nlink != inodedep->id_savednlink) { 
+			dp2->di_nlink = inodedep->id_savednlink;
+			hadchanges = 1;
+		}
+		if (dp2->di_size != inodedep->id_savedsize) {
+			dp2->di_size = inodedep->id_savedsize;
+			hadchanges = 1;
+		}
+		if (dp2->di_extsize != inodedep->id_savedextsize) {
+			dp2->di_extsize = inodedep->id_savedextsize;
+			hadchanges = 1;
+		}
+	}
+	inodedep->id_savedsize = -1;
+	inodedep->id_savedextsize = -1;
+	inodedep->id_savednlink = -1;
+	/*
+	 * If there were any rollbacks in the inode block, then it must be
+	 * marked dirty so that its will eventually get written back in
+	 * its correct form.
+	 */
+	if (hadchanges)
+		bdirty(bp);
+bufwait:
+	/*
+	 * If the write did not succeed, we have done all the roll-forward
+	 * operations, but we cannot take the actions that will allow its
+	 * dependencies to be processed.
+	 */
+	if ((flags & WRITESUCCEEDED) == 0)
+		return (hadchanges);
+	/*
+	 * Process any allocdirects that completed during the update.
+	 */
+	if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
+		handle_allocdirect_partdone(adp, &wkhd);
+	if ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
+		handle_allocdirect_partdone(adp, &wkhd);
+	/*
+	 * Process deallocations that were held pending until the
+	 * inode had been written to disk. Freeing of the inode
+	 * is delayed until after all blocks have been freed to
+	 * avoid creation of new <vfsid, inum, lbn> triples
+	 * before the old ones have been deleted.  Completely
+	 * unlinked inodes are not processed until the unlinked
+	 * inode list is written or the last reference is removed.
+	 */
+	if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) != UNLINKED) {
+		freefile = handle_bufwait(inodedep, NULL);
+		if (freefile && !LIST_EMPTY(&wkhd)) {
+			WORKLIST_INSERT(&wkhd, &freefile->fx_list);
+			freefile = NULL;
+		}
+	}
+	/*
+	 * Move rolled forward dependency completions to the bufwait list
+	 * now that those that were already written have been processed.
+	 */
+	if (!LIST_EMPTY(&wkhd) && hadchanges == 0)
+		panic("handle_written_inodeblock: bufwait but no changes");
+	jwork_move(&inodedep->id_bufwait, &wkhd);
+
+	if (freefile != NULL) {
+		/*
+		 * If the inode is goingaway it was never written.  Fake up
+		 * the state here so free_inodedep() can succeed.
+		 */
+		if (inodedep->id_state & GOINGAWAY)
+			inodedep->id_state |= COMPLETE | DEPCOMPLETE;
+		if (free_inodedep(inodedep) == 0)
+			panic("handle_written_inodeblock: live inodedep %p",
+			    inodedep);
+		add_to_worklist(&freefile->fx_list, 0);
+		return (0);
+	}
+
+	/*
+	 * If no outstanding dependencies, free it.
+	 */
+	if (free_inodedep(inodedep) ||
+	    (TAILQ_FIRST(&inodedep->id_inoreflst) == 0 &&
+	     TAILQ_FIRST(&inodedep->id_inoupdt) == 0 &&
+	     TAILQ_FIRST(&inodedep->id_extupdt) == 0 &&
+	     LIST_FIRST(&inodedep->id_bufwait) == 0))
+		return (0);
+	return (hadchanges);
+}
+
+/*
+ * Perform needed roll-forwards and kick off any dependencies that
+ * can now be processed.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
+ */
+static int
+handle_written_indirdep(indirdep, bp, bpp, flags)
+	struct indirdep *indirdep;
+	struct buf *bp;
+	struct buf **bpp;
+	int flags;
+{
+	struct allocindir *aip;
+	struct buf *sbp;
+	int chgs;
+
+	if (indirdep->ir_state & GOINGAWAY)
+		panic("handle_written_indirdep: indirdep gone");
+	if ((indirdep->ir_state & IOSTARTED) == 0)
+		panic("handle_written_indirdep: IO not started");
+	chgs = 0;
+	/*
+	 * If there were rollbacks revert them here.
+	 */
+	if (indirdep->ir_saveddata) {
+		bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
+		if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
+			free(indirdep->ir_saveddata, M_INDIRDEP);
+			indirdep->ir_saveddata = NULL;
+		}
+		chgs = 1;
+	}
+	indirdep->ir_state &= ~(UNDONE | IOSTARTED);
+	indirdep->ir_state |= ATTACHED;
+	/*
+	 * If the write did not succeed, we have done all the roll-forward
+	 * operations, but we cannot take the actions that will allow its
+	 * dependencies to be processed.
+	 */
+	if ((flags & WRITESUCCEEDED) == 0) {
+		stat_indir_blk_ptrs++;
+		bdirty(bp);
+		return (1);
+	}
+	/*
+	 * Move allocindirs with written pointers to the completehd if
+	 * the indirdep's pointer is not yet written.  Otherwise
+	 * free them here.
+	 */
+	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != NULL) {
+		LIST_REMOVE(aip, ai_next);
+		if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
+			LIST_INSERT_HEAD(&indirdep->ir_completehd, aip,
+			    ai_next);
+			newblk_freefrag(&aip->ai_block);
+			continue;
+		}
+		free_newblk(&aip->ai_block);
+	}
+	/*
+	 * Move allocindirs that have finished dependency processing from
+	 * the done list to the write list after updating the pointers.
+	 */
+	if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
+		while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != NULL) {
+			handle_allocindir_partdone(aip);
+			if (aip == LIST_FIRST(&indirdep->ir_donehd))
+				panic("disk_write_complete: not gone");
+			chgs = 1;
+		}
+	}
+	/*
+	 * Preserve the indirdep if there were any changes or if it is not
+	 * yet valid on disk.
+	 */
+	if (chgs) {
+		stat_indir_blk_ptrs++;
+		bdirty(bp);
+		return (1);
+	}
+	/*
+	 * If there were no changes we can discard the savedbp and detach
+	 * ourselves from the buf.  We are only carrying completed pointers
+	 * in this case.
+	 */
+	sbp = indirdep->ir_savebp;
+	sbp->b_flags |= B_INVAL | B_NOCACHE;
+	indirdep->ir_savebp = NULL;
+	indirdep->ir_bp = NULL;
+	if (*bpp != NULL)
+		panic("handle_written_indirdep: bp already exists.");
+	*bpp = sbp;
+	/*
+	 * The indirdep may not be freed until its parent points at it.
+	 */
+	if (indirdep->ir_state & DEPCOMPLETE)
+		free_indirdep(indirdep);
+
+	return (0);
+}
+
+/*
+ * Process a diradd entry after its dependent inode has been written.
+ * This routine must be called with splbio interrupts blocked.
+ */
+static void
+diradd_inode_written(dap, inodedep)
+	struct diradd *dap;
+	struct inodedep *inodedep;
+{
+
+	dap->da_state |= COMPLETE;
+	complete_diradd(dap);
+	WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
+}
+
+/*
+ * Returns true if the bmsafemap will have rollbacks when written.  Must only
+ * be called with the per-filesystem lock and the buf lock on the cg held.
+ */
+static int
+bmsafemap_backgroundwrite(bmsafemap, bp)
+	struct bmsafemap *bmsafemap;
+	struct buf *bp;
+{
+	int dirty;
+
+	LOCK_OWNED(VFSTOUFS(bmsafemap->sm_list.wk_mp));
+	dirty = !LIST_EMPTY(&bmsafemap->sm_jaddrefhd) | 
+	    !LIST_EMPTY(&bmsafemap->sm_jnewblkhd);
+	/*
+	 * If we're initiating a background write we need to process the
+	 * rollbacks as they exist now, not as they exist when IO starts.
+	 * No other consumers will look at the contents of the shadowed
+	 * buf so this is safe to do here.
+	 */
+	if (bp->b_xflags & BX_BKGRDMARKER)
+		initiate_write_bmsafemap(bmsafemap, bp);
+
+	return (dirty);
+}
+
+/*
+ * Re-apply an allocation when a cg write is complete.
+ */
+static int
+jnewblk_rollforward(jnewblk, fs, cgp, blksfree)
+	struct jnewblk *jnewblk;
+	struct fs *fs;
+	struct cg *cgp;
+	uint8_t *blksfree;
+{
+	ufs1_daddr_t fragno;
+	ufs2_daddr_t blkno;
+	long cgbno, bbase;
+	int frags, blk;
+	int i;
+
+	frags = 0;
+	cgbno = dtogd(fs, jnewblk->jn_blkno);
+	for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) {
+		if (isclr(blksfree, cgbno + i))
+			panic("jnewblk_rollforward: re-allocated fragment");
+		frags++;
+	}
+	if (frags == fs->fs_frag) {
+		blkno = fragstoblks(fs, cgbno);
+		ffs_clrblock(fs, blksfree, (long)blkno);
+		ffs_clusteracct(fs, cgp, blkno, -1);
+		cgp->cg_cs.cs_nbfree--;
+	} else {
+		bbase = cgbno - fragnum(fs, cgbno);
+		cgbno += jnewblk->jn_oldfrags;
+                /* If a complete block had been reassembled, account for it. */
+		fragno = fragstoblks(fs, bbase);
+		if (ffs_isblock(fs, blksfree, fragno)) {
+			cgp->cg_cs.cs_nffree += fs->fs_frag;
+			ffs_clusteracct(fs, cgp, fragno, -1);
+			cgp->cg_cs.cs_nbfree--;
+		}
+		/* Decrement the old frags.  */
+		blk = blkmap(fs, blksfree, bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
+		/* Allocate the fragment */
+		for (i = 0; i < frags; i++)
+			clrbit(blksfree, cgbno + i);
+		cgp->cg_cs.cs_nffree -= frags;
+		/* Add back in counts associated with the new frags */
+		blk = blkmap(fs, blksfree, bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
+	}
+	return (frags);
+}
+
+/*
+ * Complete a write to a bmsafemap structure.  Roll forward any bitmap
+ * changes if it's not a background write.  Set all written dependencies 
+ * to DEPCOMPLETE and free the structure if possible.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
+ */
+static int
+handle_written_bmsafemap(bmsafemap, bp, flags)
+	struct bmsafemap *bmsafemap;
+	struct buf *bp;
+	int flags;
+{
+	struct newblk *newblk;
+	struct inodedep *inodedep;
+	struct jaddref *jaddref, *jatmp;
+	struct jnewblk *jnewblk, *jntmp;
+	struct ufsmount *ump;
+	uint8_t *inosused;
+	uint8_t *blksfree;
+	struct cg *cgp;
+	struct fs *fs;
+	ino_t ino;
+	int foreground;
+	int chgs;
+
+	if ((bmsafemap->sm_state & IOSTARTED) == 0)
+		panic("handle_written_bmsafemap: Not started\n");
+	ump = VFSTOUFS(bmsafemap->sm_list.wk_mp);
+	chgs = 0;
+	bmsafemap->sm_state &= ~IOSTARTED;
+	foreground = (bp->b_xflags & BX_BKGRDMARKER) == 0;
+	/*
+	 * If write was successful, release journal work that was waiting
+	 * on the write. Otherwise move the work back.
+	 */
+	if (flags & WRITESUCCEEDED)
+		handle_jwork(&bmsafemap->sm_freewr);
+	else
+		LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
+		    worklist, wk_list);
+
+	/*
+	 * Restore unwritten inode allocation pending jaddref writes.
+	 */
+	if (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd)) {
+		cgp = (struct cg *)bp->b_data;
+		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
+		inosused = cg_inosused(cgp);
+		LIST_FOREACH_SAFE(jaddref, &bmsafemap->sm_jaddrefhd,
+		    ja_bmdeps, jatmp) {
+			if ((jaddref->ja_state & UNDONE) == 0)
+				continue;
+			ino = jaddref->ja_ino % fs->fs_ipg;
+			if (isset(inosused, ino))
+				panic("handle_written_bmsafemap: "
+				    "re-allocated inode");
+			/* Do the roll-forward only if it's a real copy. */
+			if (foreground) {
+				if ((jaddref->ja_mode & IFMT) == IFDIR)
+					cgp->cg_cs.cs_ndir++;
+				cgp->cg_cs.cs_nifree--;
+				setbit(inosused, ino);
+				chgs = 1;
+			}
+			jaddref->ja_state &= ~UNDONE;
+			jaddref->ja_state |= ATTACHED;
+			free_jaddref(jaddref);
+		}
+	}
+	/*
+	 * Restore any block allocations which are pending journal writes.
+	 */
+	if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) {
+		cgp = (struct cg *)bp->b_data;
+		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
+		blksfree = cg_blksfree(cgp);
+		LIST_FOREACH_SAFE(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps,
+		    jntmp) {
+			if ((jnewblk->jn_state & UNDONE) == 0)
+				continue;
+			/* Do the roll-forward only if it's a real copy. */
+			if (foreground &&
+			    jnewblk_rollforward(jnewblk, fs, cgp, blksfree))
+				chgs = 1;
+			jnewblk->jn_state &= ~(UNDONE | NEWBLOCK);
+			jnewblk->jn_state |= ATTACHED;
+			free_jnewblk(jnewblk);
+		}
+	}
+	/*
+	 * If the write did not succeed, we have done all the roll-forward
+	 * operations, but we cannot take the actions that will allow its
+	 * dependencies to be processed.
+	 */
+	if ((flags & WRITESUCCEEDED) == 0) {
+		LIST_CONCAT(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
+		    newblk, nb_deps);
+		LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
+		    worklist, wk_list);
+		if (foreground)
+			bdirty(bp);
+		return (1);
+	}
+	while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) {
+		newblk->nb_state |= DEPCOMPLETE;
+		newblk->nb_state &= ~ONDEPLIST;
+		newblk->nb_bmsafemap = NULL;
+		LIST_REMOVE(newblk, nb_deps);
+		if (newblk->nb_list.wk_type == D_ALLOCDIRECT)
+			handle_allocdirect_partdone(
+			    WK_ALLOCDIRECT(&newblk->nb_list), NULL);
+		else if (newblk->nb_list.wk_type == D_ALLOCINDIR)
+			handle_allocindir_partdone(
+			    WK_ALLOCINDIR(&newblk->nb_list));
+		else if (newblk->nb_list.wk_type != D_NEWBLK)
+			panic("handle_written_bmsafemap: Unexpected type: %s",
+			    TYPENAME(newblk->nb_list.wk_type));
+	}
+	while ((inodedep = LIST_FIRST(&bmsafemap->sm_inodedepwr)) != NULL) {
+		inodedep->id_state |= DEPCOMPLETE;
+		inodedep->id_state &= ~ONDEPLIST;
+		LIST_REMOVE(inodedep, id_deps);
+		inodedep->id_bmsafemap = NULL;
+	}
+	LIST_REMOVE(bmsafemap, sm_next);
+	if (chgs == 0 && LIST_EMPTY(&bmsafemap->sm_jaddrefhd) &&
+	    LIST_EMPTY(&bmsafemap->sm_jnewblkhd) &&
+	    LIST_EMPTY(&bmsafemap->sm_newblkhd) &&
+	    LIST_EMPTY(&bmsafemap->sm_inodedephd) &&
+	    LIST_EMPTY(&bmsafemap->sm_freehd)) {
+		LIST_REMOVE(bmsafemap, sm_hash);
+		WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
+		return (0);
+	}
+	LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
+	if (foreground)
+		bdirty(bp);
+	return (1);
+}
+
+/*
+ * Try to free a mkdir dependency.
+ */
+static void
+complete_mkdir(mkdir)
+	struct mkdir *mkdir;
+{
+	struct diradd *dap;
+
+	if ((mkdir->md_state & ALLCOMPLETE) != ALLCOMPLETE)
+		return;
+	LIST_REMOVE(mkdir, md_mkdirs);
+	dap = mkdir->md_diradd;
+	dap->da_state &= ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY));
+	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) {
+		dap->da_state |= DEPCOMPLETE;
+		complete_diradd(dap);
+	}
+	WORKITEM_FREE(mkdir, D_MKDIR);
+}
+
+/*
+ * Handle the completion of a mkdir dependency.
+ */
+static void
+handle_written_mkdir(mkdir, type)
+	struct mkdir *mkdir;
+	int type;
+{
+
+	if ((mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY)) != type)
+		panic("handle_written_mkdir: bad type");
+	mkdir->md_state |= COMPLETE;
+	complete_mkdir(mkdir);
+}
+
+static int
+free_pagedep(pagedep)
+	struct pagedep *pagedep;
+{
+	int i;
+
+	if (pagedep->pd_state & NEWBLOCK)
+		return (0);
+	if (!LIST_EMPTY(&pagedep->pd_dirremhd))
+		return (0);
+	for (i = 0; i < DAHASHSZ; i++)
+		if (!LIST_EMPTY(&pagedep->pd_diraddhd[i]))
+			return (0);
+	if (!LIST_EMPTY(&pagedep->pd_pendinghd))
+		return (0);
+	if (!LIST_EMPTY(&pagedep->pd_jmvrefhd))
+		return (0);
+	if (pagedep->pd_state & ONWORKLIST)
+		WORKLIST_REMOVE(&pagedep->pd_list);
+	LIST_REMOVE(pagedep, pd_hash);
+	WORKITEM_FREE(pagedep, D_PAGEDEP);
+
+	return (1);
+}
+
+/*
+ * Called from within softdep_disk_write_complete above.
+ * A write operation was just completed. Removed inodes can
+ * now be freed and associated block pointers may be committed.
+ * Note that this routine is always called from interrupt level
+ * with further interrupts from this device blocked.
+ *
+ * If the write did not succeed, we will do all the roll-forward
+ * operations, but we will not take the actions that will allow its
+ * dependencies to be processed.
+ */
+static int 
+handle_written_filepage(pagedep, bp, flags)
+	struct pagedep *pagedep;
+	struct buf *bp;		/* buffer containing the written page */
+	int flags;
+{
+	struct dirrem *dirrem;
+	struct diradd *dap, *nextdap;
+	struct direct *ep;
+	int i, chgs;
+
+	if ((pagedep->pd_state & IOSTARTED) == 0)
+		panic("handle_written_filepage: not started");
+	pagedep->pd_state &= ~IOSTARTED;
+	if ((flags & WRITESUCCEEDED) == 0)
+		goto rollforward;
+	/*
+	 * Process any directory removals that have been committed.
+	 */
+	while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) {
+		LIST_REMOVE(dirrem, dm_next);
+		dirrem->dm_state |= COMPLETE;
+		dirrem->dm_dirinum = pagedep->pd_ino;
+		KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd),
+		    ("handle_written_filepage: Journal entries not written."));
+		add_to_worklist(&dirrem->dm_list, 0);
+	}
+	/*
+	 * Free any directory additions that have been committed.
+	 * If it is a newly allocated block, we have to wait until
+	 * the on-disk directory inode claims the new block.
+	 */
+	if ((pagedep->pd_state & NEWBLOCK) == 0)
+		while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
+			free_diradd(dap, NULL);
+rollforward:
+	/*
+	 * Uncommitted directory entries must be restored.
+	 */
+	for (chgs = 0, i = 0; i < DAHASHSZ; i++) {
+		for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
+		     dap = nextdap) {
+			nextdap = LIST_NEXT(dap, da_pdlist);
+			if (dap->da_state & ATTACHED)
+				panic("handle_written_filepage: attached");
+			ep = (struct direct *)
+			    ((char *)bp->b_data + dap->da_offset);
+			ep->d_ino = dap->da_newinum;
+			dap->da_state &= ~UNDONE;
+			dap->da_state |= ATTACHED;
+			chgs = 1;
+			/*
+			 * If the inode referenced by the directory has
+			 * been written out, then the dependency can be
+			 * moved to the pending list.
+			 */
+			if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
+				LIST_REMOVE(dap, da_pdlist);
+				LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,
+				    da_pdlist);
+			}
+		}
+	}
+	/*
+	 * If there were any rollbacks in the directory, then it must be
+	 * marked dirty so that its will eventually get written back in
+	 * its correct form.
+	 */
+	if (chgs || (flags & WRITESUCCEEDED) == 0) {
+		if ((bp->b_flags & B_DELWRI) == 0)
+			stat_dir_entry++;
+		bdirty(bp);
+		return (1);
+	}
+	/*
+	 * If we are not waiting for a new directory block to be
+	 * claimed by its inode, then the pagedep will be freed.
+	 * Otherwise it will remain to track any new entries on
+	 * the page in case they are fsync'ed.
+	 */
+	free_pagedep(pagedep);
+	return (0);
+}
+
+/*
+ * Writing back in-core inode structures.
+ * 
+ * The filesystem only accesses an inode's contents when it occupies an
+ * "in-core" inode structure.  These "in-core" structures are separate from
+ * the page frames used to cache inode blocks.  Only the latter are
+ * transferred to/from the disk.  So, when the updated contents of the
+ * "in-core" inode structure are copied to the corresponding in-memory inode
+ * block, the dependencies are also transferred.  The following procedure is
+ * called when copying a dirty "in-core" inode to a cached inode block.
+ */
+
+/*
+ * Called when an inode is loaded from disk. If the effective link count
+ * differed from the actual link count when it was last flushed, then we
+ * need to ensure that the correct effective link count is put back.
+ */
+void 
+softdep_load_inodeblock(ip)
+	struct inode *ip;	/* the "in_core" copy of the inode */
+{
+	struct inodedep *inodedep;
+	struct ufsmount *ump;
+
+	ump = ITOUMP(ip);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_load_inodeblock called on non-softdep filesystem"));
+	/*
+	 * Check for alternate nlink count.
+	 */
+	ip->i_effnlink = ip->i_nlink;
+	ACQUIRE_LOCK(ump);
+	if (inodedep_lookup(UFSTOVFS(ump), ip->i_number, 0, &inodedep) == 0) {
+		FREE_LOCK(ump);
+		return;
+	}
+	ip->i_effnlink -= inodedep->id_nlinkdelta;
+	FREE_LOCK(ump);
+}
+
+/*
+ * This routine is called just before the "in-core" inode
+ * information is to be copied to the in-memory inode block.
+ * Recall that an inode block contains several inodes. If
+ * the force flag is set, then the dependencies will be
+ * cleared so that the update can always be made. Note that
+ * the buffer is locked when this routine is called, so we
+ * will never be in the middle of writing the inode block 
+ * to disk.
+ */
+void 
+softdep_update_inodeblock(ip, bp, waitfor)
+	struct inode *ip;	/* the "in_core" copy of the inode */
+	struct buf *bp;		/* the buffer containing the inode block */
+	int waitfor;		/* nonzero => update must be allowed */
+{
+	struct inodedep *inodedep;
+	struct inoref *inoref;
+	struct ufsmount *ump;
+	struct worklist *wk;
+	struct mount *mp;
+	struct buf *ibp;
+	struct fs *fs;
+	int error;
+
+	ump = ITOUMP(ip);
+	mp = UFSTOVFS(ump);
+	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
+	    ("softdep_update_inodeblock called on non-softdep filesystem"));
+	fs = ump->um_fs;
+	/*
+	 * Preserve the freelink that is on disk.  clear_unlinked_inodedep()
+	 * does not have access to the in-core ip so must write directly into
+	 * the inode block buffer when setting freelink.
+	 */
+	if (fs->fs_magic == FS_UFS1_MAGIC)
+		DIP_SET(ip, i_freelink, ((struct ufs1_dinode *)bp->b_data +
+		    ino_to_fsbo(fs, ip->i_number))->di_freelink);
+	else
+		DIP_SET(ip, i_freelink, ((struct ufs2_dinode *)bp->b_data +
+		    ino_to_fsbo(fs, ip->i_number))->di_freelink);
+	/*
+	 * If the effective link count is not equal to the actual link
+	 * count, then we must track the difference in an inodedep while
+	 * the inode is (potentially) tossed out of the cache. Otherwise,
+	 * if there is no existing inodedep, then there are no dependencies
+	 * to track.
+	 */
+	ACQUIRE_LOCK(ump);
+again:
+	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
+		FREE_LOCK(ump);
+		if (ip->i_effnlink != ip->i_nlink)
+			panic("softdep_update_inodeblock: bad link count");
+		return;
+	}
+	if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)
+		panic("softdep_update_inodeblock: bad delta");
+	/*
+	 * If we're flushing all dependencies we must also move any waiting
+	 * for journal writes onto the bufwait list prior to I/O.
+	 */
+	if (waitfor) {
+		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
+			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
+			    == DEPCOMPLETE) {
+				jwait(&inoref->if_list, MNT_WAIT);
+				goto again;
+			}
+		}
+	}
+	/*
+	 * Changes have been initiated. Anything depending on these
+	 * changes cannot occur until this inode has been written.
+	 */
+	inodedep->id_state &= ~COMPLETE;
+	if ((inodedep->id_state & ONWORKLIST) == 0)
+		WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list);
+	/*
+	 * Any new dependencies associated with the incore inode must 
+	 * now be moved to the list associated with the buffer holding
+	 * the in-memory copy of the inode. Once merged process any
+	 * allocdirects that are completed by the merger.
+	 */
+	merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt);
+	if (!TAILQ_EMPTY(&inodedep->id_inoupdt))
+		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt),
+		    NULL);
+	merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt);
+	if (!TAILQ_EMPTY(&inodedep->id_extupdt))
+		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt),
+		    NULL);
+	/*
+	 * Now that the inode has been pushed into the buffer, the
+	 * operations dependent on the inode being written to disk
+	 * can be moved to the id_bufwait so that they will be
+	 * processed when the buffer I/O completes.
+	 */
+	while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		WORKLIST_INSERT(&inodedep->id_bufwait, wk);
+	}
+	/*
+	 * Newly allocated inodes cannot be written until the bitmap
+	 * that allocates them have been written (indicated by
+	 * DEPCOMPLETE being set in id_state). If we are doing a
+	 * forced sync (e.g., an fsync on a file), we force the bitmap
+	 * to be written so that the update can be done.
+	 */
+	if (waitfor == 0) {
+		FREE_LOCK(ump);
+		return;
+	}
+retry:
+	if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) != 0) {
+		FREE_LOCK(ump);
+		return;
+	}
+	ibp = inodedep->id_bmsafemap->sm_buf;
+	ibp = getdirtybuf(ibp, LOCK_PTR(ump), MNT_WAIT);
+	if (ibp == NULL) {
+		/*
+		 * If ibp came back as NULL, the dependency could have been
+		 * freed while we slept.  Look it up again, and check to see
+		 * that it has completed.
+		 */
+		if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
+			goto retry;
+		FREE_LOCK(ump);
+		return;
+	}
+	FREE_LOCK(ump);
+	if ((error = bwrite(ibp)) != 0)
+		softdep_error("softdep_update_inodeblock: bwrite", error);
+}
+
+/*
+ * Merge the a new inode dependency list (such as id_newinoupdt) into an
+ * old inode dependency list (such as id_inoupdt). This routine must be
+ * called with splbio interrupts blocked.
+ */
+static void
+merge_inode_lists(newlisthead, oldlisthead)
+	struct allocdirectlst *newlisthead;
+	struct allocdirectlst *oldlisthead;
+{
+	struct allocdirect *listadp, *newadp;
+
+	newadp = TAILQ_FIRST(newlisthead);
+	for (listadp = TAILQ_FIRST(oldlisthead); listadp && newadp;) {
+		if (listadp->ad_offset < newadp->ad_offset) {
+			listadp = TAILQ_NEXT(listadp, ad_next);
+			continue;
+		}
+		TAILQ_REMOVE(newlisthead, newadp, ad_next);
+		TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
+		if (listadp->ad_offset == newadp->ad_offset) {
+			allocdirect_merge(oldlisthead, newadp,
+			    listadp);
+			listadp = newadp;
+		}
+		newadp = TAILQ_FIRST(newlisthead);
+	}
+	while ((newadp = TAILQ_FIRST(newlisthead)) != NULL) {
+		TAILQ_REMOVE(newlisthead, newadp, ad_next);
+		TAILQ_INSERT_TAIL(oldlisthead, newadp, ad_next);
+	}
+}
+
+/*
+ * If we are doing an fsync, then we must ensure that any directory
+ * entries for the inode have been written after the inode gets to disk.
+ */
+int
+softdep_fsync(vp)
+	struct vnode *vp;	/* the "in_core" copy of the inode */
+{
+	struct inodedep *inodedep;
+	struct pagedep *pagedep;
+	struct inoref *inoref;
+	struct ufsmount *ump;
+	struct worklist *wk;
+	struct diradd *dap;
+	struct mount *mp;
+	struct vnode *pvp;
+	struct inode *ip;
+	struct buf *bp;
+	struct fs *fs;
+	struct thread *td = curthread;
+	int error, flushparent, pagedep_new_block;
+	ino_t parentino;
+	ufs_lbn_t lbn;
+
+	ip = VTOI(vp);
+	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	if (MOUNTEDSOFTDEP(mp) == 0)
+		return (0);
+	ACQUIRE_LOCK(ump);
+restart:
+	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
+		FREE_LOCK(ump);
+		return (0);
+	}
+	TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
+		if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
+		    == DEPCOMPLETE) {
+			jwait(&inoref->if_list, MNT_WAIT);
+			goto restart;
+		}
+	}
+	if (!LIST_EMPTY(&inodedep->id_inowait) ||
+	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_newinoupdt))
+		panic("softdep_fsync: pending ops %p", inodedep);
+	for (error = 0, flushparent = 0; ; ) {
+		if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)
+			break;
+		if (wk->wk_type != D_DIRADD)
+			panic("softdep_fsync: Unexpected type %s",
+			    TYPENAME(wk->wk_type));
+		dap = WK_DIRADD(wk);
+		/*
+		 * Flush our parent if this directory entry has a MKDIR_PARENT
+		 * dependency or is contained in a newly allocated block.
+		 */
+		if (dap->da_state & DIRCHG)
+			pagedep = dap->da_previous->dm_pagedep;
+		else
+			pagedep = dap->da_pagedep;
+		parentino = pagedep->pd_ino;
+		lbn = pagedep->pd_lbn;
+		if ((dap->da_state & (MKDIR_BODY | COMPLETE)) != COMPLETE)
+			panic("softdep_fsync: dirty");
+		if ((dap->da_state & MKDIR_PARENT) ||
+		    (pagedep->pd_state & NEWBLOCK))
+			flushparent = 1;
+		else
+			flushparent = 0;
+		/*
+		 * If we are being fsync'ed as part of vgone'ing this vnode,
+		 * then we will not be able to release and recover the
+		 * vnode below, so we just have to give up on writing its
+		 * directory entry out. It will eventually be written, just
+		 * not now, but then the user was not asking to have it
+		 * written, so we are not breaking any promises.
+		 */
+		if (vp->v_iflag & VI_DOOMED)
+			break;
+		/*
+		 * We prevent deadlock by always fetching inodes from the
+		 * root, moving down the directory tree. Thus, when fetching
+		 * our parent directory, we first try to get the lock. If
+		 * that fails, we must unlock ourselves before requesting
+		 * the lock on our parent. See the comment in ufs_lookup
+		 * for details on possible races.
+		 */
+		FREE_LOCK(ump);
+		if (ffs_vgetf(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp,
+		    FFSV_FORCEINSMQ)) {
+			error = vfs_busy(mp, MBF_NOWAIT);
+			if (error != 0) {
+				vfs_ref(mp);
+				VOP_UNLOCK(vp, 0);
+				error = vfs_busy(mp, 0);
+				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+				vfs_rel(mp);
+				if (error != 0)
+					return (ENOENT);
+				if (vp->v_iflag & VI_DOOMED) {
+					vfs_unbusy(mp);
+					return (ENOENT);
+				}
+			}
+			VOP_UNLOCK(vp, 0);
+			error = ffs_vgetf(mp, parentino, LK_EXCLUSIVE,
+			    &pvp, FFSV_FORCEINSMQ);
+			vfs_unbusy(mp);
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+			if (vp->v_iflag & VI_DOOMED) {
+				if (error == 0)
+					vput(pvp);
+				error = ENOENT;
+			}
+			if (error != 0)
+				return (error);
+		}
+		/*
+		 * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
+		 * that are contained in direct blocks will be resolved by 
+		 * doing a ffs_update. Pagedeps contained in indirect blocks
+		 * may require a complete sync'ing of the directory. So, we
+		 * try the cheap and fast ffs_update first, and if that fails,
+		 * then we do the slower ffs_syncvnode of the directory.
+		 */
+		if (flushparent) {
+			int locked;
+
+			if ((error = ffs_update(pvp, 1)) != 0) {
+				vput(pvp);
+				return (error);
+			}
+			ACQUIRE_LOCK(ump);
+			locked = 1;
+			if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) {
+				if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) != NULL) {
+					if (wk->wk_type != D_DIRADD)
+						panic("softdep_fsync: Unexpected type %s",
+						      TYPENAME(wk->wk_type));
+					dap = WK_DIRADD(wk);
+					if (dap->da_state & DIRCHG)
+						pagedep = dap->da_previous->dm_pagedep;
+					else
+						pagedep = dap->da_pagedep;
+					pagedep_new_block = pagedep->pd_state & NEWBLOCK;
+					FREE_LOCK(ump);
+					locked = 0;
+					if (pagedep_new_block && (error =
+					    ffs_syncvnode(pvp, MNT_WAIT, 0))) {
+						vput(pvp);
+						return (error);
+					}
+				}
+			}
+			if (locked)
+				FREE_LOCK(ump);
+		}
+		/*
+		 * Flush directory page containing the inode's name.
+		 */
+		error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), td->td_ucred,
+		    &bp);
+		if (error == 0)
+			error = bwrite(bp);
+		else
+			brelse(bp);
+		vput(pvp);
+		if (error != 0)
+			return (error);
+		ACQUIRE_LOCK(ump);
+		if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
+			break;
+	}
+	FREE_LOCK(ump);
+	return (0);
+}
+
+/*
+ * Flush all the dirty bitmaps associated with the block device
+ * before flushing the rest of the dirty blocks so as to reduce
+ * the number of dependencies that will have to be rolled back.
+ *
+ * XXX Unused?
+ */
+void
+softdep_fsync_mountdev(vp)
+	struct vnode *vp;
+{
+	struct buf *bp, *nbp;
+	struct worklist *wk;
+	struct bufobj *bo;
+
+	if (!vn_isdisk(vp, NULL))
+		panic("softdep_fsync_mountdev: vnode not a disk");
+	bo = &vp->v_bufobj;
+restart:
+	BO_LOCK(bo);
+	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
+		/* 
+		 * If it is already scheduled, skip to the next buffer.
+		 */
+		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
+			continue;
+
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("softdep_fsync_mountdev: not dirty");
+		/*
+		 * We are only interested in bitmaps with outstanding
+		 * dependencies.
+		 */
+		if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
+		    wk->wk_type != D_BMSAFEMAP ||
+		    (bp->b_vflags & BV_BKGRDINPROG)) {
+			BUF_UNLOCK(bp);
+			continue;
+		}
+		BO_UNLOCK(bo);
+		bremfree(bp);
+		(void) bawrite(bp);
+		goto restart;
+	}
+	drain_output(vp);
+	BO_UNLOCK(bo);
+}
+
+/*
+ * Sync all cylinder groups that were dirty at the time this function is
+ * called.  Newly dirtied cgs will be inserted before the sentinel.  This
+ * is used to flush freedep activity that may be holding up writes to a
+ * indirect block.
+ */
+static int
+sync_cgs(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+	struct bmsafemap *bmsafemap;
+	struct bmsafemap *sentinel;
+	struct ufsmount *ump;
+	struct buf *bp;
+	int error;
+
+	sentinel = malloc(sizeof(*sentinel), M_BMSAFEMAP, M_ZERO | M_WAITOK);
+	sentinel->sm_cg = -1;
+	ump = VFSTOUFS(mp);
+	error = 0;
+	ACQUIRE_LOCK(ump);
+	LIST_INSERT_HEAD(&ump->softdep_dirtycg, sentinel, sm_next);
+	for (bmsafemap = LIST_NEXT(sentinel, sm_next); bmsafemap != NULL;
+	    bmsafemap = LIST_NEXT(sentinel, sm_next)) {
+		/* Skip sentinels and cgs with no work to release. */
+		if (bmsafemap->sm_cg == -1 ||
+		    (LIST_EMPTY(&bmsafemap->sm_freehd) &&
+		    LIST_EMPTY(&bmsafemap->sm_freewr))) {
+			LIST_REMOVE(sentinel, sm_next);
+			LIST_INSERT_AFTER(bmsafemap, sentinel, sm_next);
+			continue;
+		}
+		/*
+		 * If we don't get the lock and we're waiting try again, if
+		 * not move on to the next buf and try to sync it.
+		 */
+		bp = getdirtybuf(bmsafemap->sm_buf, LOCK_PTR(ump), waitfor);
+		if (bp == NULL && waitfor == MNT_WAIT)
+			continue;
+		LIST_REMOVE(sentinel, sm_next);
+		LIST_INSERT_AFTER(bmsafemap, sentinel, sm_next);
+		if (bp == NULL)
+			continue;
+		FREE_LOCK(ump);
+		if (waitfor == MNT_NOWAIT)
+			bawrite(bp);
+		else
+			error = bwrite(bp);
+		ACQUIRE_LOCK(ump);
+		if (error)
+			break;
+	}
+	LIST_REMOVE(sentinel, sm_next);
+	FREE_LOCK(ump);
+	free(sentinel, M_BMSAFEMAP);
+	return (error);
+}
+
+/*
+ * This routine is called when we are trying to synchronously flush a
+ * file. This routine must eliminate any filesystem metadata dependencies
+ * so that the syncing routine can succeed.
+ */
+int
+softdep_sync_metadata(struct vnode *vp)
+{
+	struct inode *ip;
+	int error;
+
+	ip = VTOI(vp);
+	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
+	    ("softdep_sync_metadata called on non-softdep filesystem"));
+	/*
+	 * Ensure that any direct block dependencies have been cleared,
+	 * truncations are started, and inode references are journaled.
+	 */
+	ACQUIRE_LOCK(VFSTOUFS(vp->v_mount));
+	/*
+	 * Write all journal records to prevent rollbacks on devvp.
+	 */
+	if (vp->v_type == VCHR)
+		softdep_flushjournal(vp->v_mount);
+	error = flush_inodedep_deps(vp, vp->v_mount, ip->i_number);
+	/*
+	 * Ensure that all truncates are written so we won't find deps on
+	 * indirect blocks.
+	 */
+	process_truncates(vp);
+	FREE_LOCK(VFSTOUFS(vp->v_mount));
+
+	return (error);
+}
+
+/*
+ * This routine is called when we are attempting to sync a buf with
+ * dependencies.  If waitfor is MNT_NOWAIT it attempts to schedule any
+ * other IO it can but returns EBUSY if the buffer is not yet able to
+ * be written.  Dependencies which will not cause rollbacks will always
+ * return 0.
+ */
+int
+softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
+{
+	struct indirdep *indirdep;
+	struct pagedep *pagedep;
+	struct allocindir *aip;
+	struct newblk *newblk;
+	struct ufsmount *ump;
+	struct buf *nbp;
+	struct worklist *wk;
+	int i, error;
+
+	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
+	    ("softdep_sync_buf called on non-softdep filesystem"));
+	/*
+	 * For VCHR we just don't want to force flush any dependencies that
+	 * will cause rollbacks.
+	 */
+	if (vp->v_type == VCHR) {
+		if (waitfor == MNT_NOWAIT && softdep_count_dependencies(bp, 0))
+			return (EBUSY);
+		return (0);
+	}
+	ump = VFSTOUFS(vp->v_mount);
+	ACQUIRE_LOCK(ump);
+	/*
+	 * As we hold the buffer locked, none of its dependencies
+	 * will disappear.
+	 */
+	error = 0;
+top:
+	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+		switch (wk->wk_type) {
+
+		case D_ALLOCDIRECT:
+		case D_ALLOCINDIR:
+			newblk = WK_NEWBLK(wk);
+			if (newblk->nb_jnewblk != NULL) {
+				if (waitfor == MNT_NOWAIT) {
+					error = EBUSY;
+					goto out_unlock;
+				}
+				jwait(&newblk->nb_jnewblk->jn_list, waitfor);
+				goto top;
+			}
+			if (newblk->nb_state & DEPCOMPLETE ||
+			    waitfor == MNT_NOWAIT)
+				continue;
+			nbp = newblk->nb_bmsafemap->sm_buf;
+			nbp = getdirtybuf(nbp, LOCK_PTR(ump), waitfor);
+			if (nbp == NULL)
+				goto top;
+			FREE_LOCK(ump);
+			if ((error = bwrite(nbp)) != 0)
+				goto out;
+			ACQUIRE_LOCK(ump);
+			continue;
+
+		case D_INDIRDEP:
+			indirdep = WK_INDIRDEP(wk);
+			if (waitfor == MNT_NOWAIT) {
+				if (!TAILQ_EMPTY(&indirdep->ir_trunc) ||
+				    !LIST_EMPTY(&indirdep->ir_deplisthd)) {
+					error = EBUSY;
+					goto out_unlock;
+				}
+			}
+			if (!TAILQ_EMPTY(&indirdep->ir_trunc))
+				panic("softdep_sync_buf: truncation pending.");
+		restart:
+			LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {
+				newblk = (struct newblk *)aip;
+				if (newblk->nb_jnewblk != NULL) {
+					jwait(&newblk->nb_jnewblk->jn_list,
+					    waitfor);
+					goto restart;
+				}
+				if (newblk->nb_state & DEPCOMPLETE)
+					continue;
+				nbp = newblk->nb_bmsafemap->sm_buf;
+				nbp = getdirtybuf(nbp, LOCK_PTR(ump), waitfor);
+				if (nbp == NULL)
+					goto restart;
+				FREE_LOCK(ump);
+				if ((error = bwrite(nbp)) != 0)
+					goto out;
+				ACQUIRE_LOCK(ump);
+				goto restart;
+			}
+			continue;
+
+		case D_PAGEDEP:
+			/*
+			 * Only flush directory entries in synchronous passes.
+			 */
+			if (waitfor != MNT_WAIT) {
+				error = EBUSY;
+				goto out_unlock;
+			}
+			/*
+			 * While syncing snapshots, we must allow recursive
+			 * lookups.
+			 */
+			BUF_AREC(bp);
+			/*
+			 * We are trying to sync a directory that may
+			 * have dependencies on both its own metadata
+			 * and/or dependencies on the inodes of any
+			 * recently allocated files. We walk its diradd
+			 * lists pushing out the associated inode.
+			 */
+			pagedep = WK_PAGEDEP(wk);
+			for (i = 0; i < DAHASHSZ; i++) {
+				if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
+					continue;
+				if ((error = flush_pagedep_deps(vp, wk->wk_mp,
+				    &pagedep->pd_diraddhd[i]))) {
+					BUF_NOREC(bp);
+					goto out_unlock;
+				}
+			}
+			BUF_NOREC(bp);
+			continue;
+
+		case D_FREEWORK:
+		case D_FREEDEP:
+		case D_JSEGDEP:
+		case D_JNEWBLK:
+			continue;
+
+		default:
+			panic("softdep_sync_buf: Unknown type %s",
+			    TYPENAME(wk->wk_type));
+			/* NOTREACHED */
+		}
+	}
+out_unlock:
+	FREE_LOCK(ump);
+out:
+	return (error);
+}
+
+/*
+ * Flush the dependencies associated with an inodedep.
+ * Called with splbio blocked.
+ */
+static int
+flush_inodedep_deps(vp, mp, ino)
+	struct vnode *vp;
+	struct mount *mp;
+	ino_t ino;
+{
+	struct inodedep *inodedep;
+	struct inoref *inoref;
+	struct ufsmount *ump;
+	int error, waitfor;
+
+	/*
+	 * This work is done in two passes. The first pass grabs most
+	 * of the buffers and begins asynchronously writing them. The
+	 * only way to wait for these asynchronous writes is to sleep
+	 * on the filesystem vnode which may stay busy for a long time
+	 * if the filesystem is active. So, instead, we make a second
+	 * pass over the dependencies blocking on each write. In the
+	 * usual case we will be blocking against a write that we
+	 * initiated, so when it is done the dependency will have been
+	 * resolved. Thus the second pass is expected to end quickly.
+	 * We give a brief window at the top of the loop to allow
+	 * any pending I/O to complete.
+	 */
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	for (error = 0, waitfor = MNT_NOWAIT; ; ) {
+		if (error)
+			return (error);
+		FREE_LOCK(ump);
+		ACQUIRE_LOCK(ump);
+restart:
+		if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)
+			return (0);
+		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
+			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
+			    == DEPCOMPLETE) {
+				jwait(&inoref->if_list, MNT_WAIT);
+				goto restart;
+			}
+		}
+		if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) ||
+		    flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) ||
+		    flush_deplist(&inodedep->id_extupdt, waitfor, &error) ||
+		    flush_deplist(&inodedep->id_newextupdt, waitfor, &error))
+			continue;
+		/*
+		 * If pass2, we are done, otherwise do pass 2.
+		 */
+		if (waitfor == MNT_WAIT)
+			break;
+		waitfor = MNT_WAIT;
+	}
+	/*
+	 * Try freeing inodedep in case all dependencies have been removed.
+	 */
+	if (inodedep_lookup(mp, ino, 0, &inodedep) != 0)
+		(void) free_inodedep(inodedep);
+	return (0);
+}
+
+/*
+ * Flush an inode dependency list.
+ * Called with splbio blocked.
+ */
+static int
+flush_deplist(listhead, waitfor, errorp)
+	struct allocdirectlst *listhead;
+	int waitfor;
+	int *errorp;
+{
+	struct allocdirect *adp;
+	struct newblk *newblk;
+	struct ufsmount *ump;
+	struct buf *bp;
+
+	if ((adp = TAILQ_FIRST(listhead)) == NULL)
+		return (0);
+	ump = VFSTOUFS(adp->ad_list.wk_mp);
+	LOCK_OWNED(ump);
+	TAILQ_FOREACH(adp, listhead, ad_next) {
+		newblk = (struct newblk *)adp;
+		if (newblk->nb_jnewblk != NULL) {
+			jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT);
+			return (1);
+		}
+		if (newblk->nb_state & DEPCOMPLETE)
+			continue;
+		bp = newblk->nb_bmsafemap->sm_buf;
+		bp = getdirtybuf(bp, LOCK_PTR(ump), waitfor);
+		if (bp == NULL) {
+			if (waitfor == MNT_NOWAIT)
+				continue;
+			return (1);
+		}
+		FREE_LOCK(ump);
+		if (waitfor == MNT_NOWAIT)
+			bawrite(bp);
+		else 
+			*errorp = bwrite(bp);
+		ACQUIRE_LOCK(ump);
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * Flush dependencies associated with an allocdirect block.
+ */
+static int
+flush_newblk_dep(vp, mp, lbn)
+	struct vnode *vp;
+	struct mount *mp;
+	ufs_lbn_t lbn;
+{
+	struct newblk *newblk;
+	struct ufsmount *ump;
+	struct bufobj *bo;
+	struct inode *ip;
+	struct buf *bp;
+	ufs2_daddr_t blkno;
+	int error;
+
+	error = 0;
+	bo = &vp->v_bufobj;
+	ip = VTOI(vp);
+	blkno = DIP(ip, i_db[lbn]);
+	if (blkno == 0)
+		panic("flush_newblk_dep: Missing block");
+	ump = VFSTOUFS(mp);
+	ACQUIRE_LOCK(ump);
+	/*
+	 * Loop until all dependencies related to this block are satisfied.
+	 * We must be careful to restart after each sleep in case a write
+	 * completes some part of this process for us.
+	 */
+	for (;;) {
+		if (newblk_lookup(mp, blkno, 0, &newblk) == 0) {
+			FREE_LOCK(ump);
+			break;
+		}
+		if (newblk->nb_list.wk_type != D_ALLOCDIRECT)
+			panic("flush_newblk_deps: Bad newblk %p", newblk);
+		/*
+		 * Flush the journal.
+		 */
+		if (newblk->nb_jnewblk != NULL) {
+			jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT);
+			continue;
+		}
+		/*
+		 * Write the bitmap dependency.
+		 */
+		if ((newblk->nb_state & DEPCOMPLETE) == 0) {
+			bp = newblk->nb_bmsafemap->sm_buf;
+			bp = getdirtybuf(bp, LOCK_PTR(ump), MNT_WAIT);
+			if (bp == NULL)
+				continue;
+			FREE_LOCK(ump);
+			error = bwrite(bp);
+			if (error)
+				break;
+			ACQUIRE_LOCK(ump);
+			continue;
+		}
+		/*
+		 * Write the buffer.
+		 */
+		FREE_LOCK(ump);
+		BO_LOCK(bo);
+		bp = gbincore(bo, lbn);
+		if (bp != NULL) {
+			error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
+			    LK_INTERLOCK, BO_LOCKPTR(bo));
+			if (error == ENOLCK) {
+				ACQUIRE_LOCK(ump);
+				error = 0;
+				continue; /* Slept, retry */
+			}
+			if (error != 0)
+				break;	/* Failed */
+			if (bp->b_flags & B_DELWRI) {
+				bremfree(bp);
+				error = bwrite(bp);
+				if (error)
+					break;
+			} else
+				BUF_UNLOCK(bp);
+		} else
+			BO_UNLOCK(bo);
+		/*
+		 * We have to wait for the direct pointers to
+		 * point at the newdirblk before the dependency
+		 * will go away.
+		 */
+		error = ffs_update(vp, 1);
+		if (error)
+			break;
+		ACQUIRE_LOCK(ump);
+	}
+	return (error);
+}
+
+/*
+ * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
+ * Called with splbio blocked.
+ */
+static int
+flush_pagedep_deps(pvp, mp, diraddhdp)
+	struct vnode *pvp;
+	struct mount *mp;
+	struct diraddhd *diraddhdp;
+{
+	struct inodedep *inodedep;
+	struct inoref *inoref;
+	struct ufsmount *ump;
+	struct diradd *dap;
+	struct vnode *vp;
+	int error = 0;
+	struct buf *bp;
+	ino_t inum;
+	struct diraddhd unfinished;
+
+	LIST_INIT(&unfinished);
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+restart:
+	while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
+		/*
+		 * Flush ourselves if this directory entry
+		 * has a MKDIR_PARENT dependency.
+		 */
+		if (dap->da_state & MKDIR_PARENT) {
+			FREE_LOCK(ump);
+			if ((error = ffs_update(pvp, 1)) != 0)
+				break;
+			ACQUIRE_LOCK(ump);
+			/*
+			 * If that cleared dependencies, go on to next.
+			 */
+			if (dap != LIST_FIRST(diraddhdp))
+				continue;
+			/*
+			 * All MKDIR_PARENT dependencies and all the
+			 * NEWBLOCK pagedeps that are contained in direct
+			 * blocks were resolved by doing above ffs_update.
+			 * Pagedeps contained in indirect blocks may
+			 * require a complete sync'ing of the directory.
+			 * We are in the midst of doing a complete sync,
+			 * so if they are not resolved in this pass we
+			 * defer them for now as they will be sync'ed by
+			 * our caller shortly.
+			 */
+			LIST_REMOVE(dap, da_pdlist);
+			LIST_INSERT_HEAD(&unfinished, dap, da_pdlist);
+			continue;
+		}
+		/*
+		 * A newly allocated directory must have its "." and
+		 * ".." entries written out before its name can be
+		 * committed in its parent. 
+		 */
+		inum = dap->da_newinum;
+		if (inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep) == 0)
+			panic("flush_pagedep_deps: lost inode1");
+		/*
+		 * Wait for any pending journal adds to complete so we don't
+		 * cause rollbacks while syncing.
+		 */
+		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
+			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
+			    == DEPCOMPLETE) {
+				jwait(&inoref->if_list, MNT_WAIT);
+				goto restart;
+			}
+		}
+		if (dap->da_state & MKDIR_BODY) {
+			FREE_LOCK(ump);
+			if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
+			    FFSV_FORCEINSMQ)))
+				break;
+			error = flush_newblk_dep(vp, mp, 0);
+			/*
+			 * If we still have the dependency we might need to
+			 * update the vnode to sync the new link count to
+			 * disk.
+			 */
+			if (error == 0 && dap == LIST_FIRST(diraddhdp))
+				error = ffs_update(vp, 1);
+			vput(vp);
+			if (error != 0)
+				break;
+			ACQUIRE_LOCK(ump);
+			/*
+			 * If that cleared dependencies, go on to next.
+			 */
+			if (dap != LIST_FIRST(diraddhdp))
+				continue;
+			if (dap->da_state & MKDIR_BODY) {
+				inodedep_lookup(UFSTOVFS(ump), inum, 0,
+				    &inodedep);
+				panic("flush_pagedep_deps: MKDIR_BODY "
+				    "inodedep %p dap %p vp %p",
+				    inodedep, dap, vp);
+			}
+		}
+		/*
+		 * Flush the inode on which the directory entry depends.
+		 * Having accounted for MKDIR_PARENT and MKDIR_BODY above,
+		 * the only remaining dependency is that the updated inode
+		 * count must get pushed to disk. The inode has already
+		 * been pushed into its inode buffer (via VOP_UPDATE) at
+		 * the time of the reference count change. So we need only
+		 * locate that buffer, ensure that there will be no rollback
+		 * caused by a bitmap dependency, then write the inode buffer.
+		 */
+retry:
+		if (inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep) == 0)
+			panic("flush_pagedep_deps: lost inode");
+		/*
+		 * If the inode still has bitmap dependencies,
+		 * push them to disk.
+		 */
+		if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) == 0) {
+			bp = inodedep->id_bmsafemap->sm_buf;
+			bp = getdirtybuf(bp, LOCK_PTR(ump), MNT_WAIT);
+			if (bp == NULL)
+				goto retry;
+			FREE_LOCK(ump);
+			if ((error = bwrite(bp)) != 0)
+				break;
+			ACQUIRE_LOCK(ump);
+			if (dap != LIST_FIRST(diraddhdp))
+				continue;
+		}
+		/*
+		 * If the inode is still sitting in a buffer waiting
+		 * to be written or waiting for the link count to be
+		 * adjusted update it here to flush it to disk.
+		 */
+		if (dap == LIST_FIRST(diraddhdp)) {
+			FREE_LOCK(ump);
+			if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
+			    FFSV_FORCEINSMQ)))
+				break;
+			error = ffs_update(vp, 1);
+			vput(vp);
+			if (error)
+				break;
+			ACQUIRE_LOCK(ump);
+		}
+		/*
+		 * If we have failed to get rid of all the dependencies
+		 * then something is seriously wrong.
+		 */
+		if (dap == LIST_FIRST(diraddhdp)) {
+			inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep);
+			panic("flush_pagedep_deps: failed to flush " 
+			    "inodedep %p ino %ju dap %p",
+			    inodedep, (uintmax_t)inum, dap);
+		}
+	}
+	if (error)
+		ACQUIRE_LOCK(ump);
+	while ((dap = LIST_FIRST(&unfinished)) != NULL) {
+		LIST_REMOVE(dap, da_pdlist);
+		LIST_INSERT_HEAD(diraddhdp, dap, da_pdlist);
+	}
+	return (error);
+}
+
+/*
+ * A large burst of file addition or deletion activity can drive the
+ * memory load excessively high. First attempt to slow things down
+ * using the techniques below. If that fails, this routine requests
+ * the offending operations to fall back to running synchronously
+ * until the memory load returns to a reasonable level.
+ */
+int
+softdep_slowdown(vp)
+	struct vnode *vp;
+{
+	struct ufsmount *ump;
+	int jlow;
+	int max_softdeps_hard;
+
+	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
+	    ("softdep_slowdown called on non-softdep filesystem"));
+	ump = VFSTOUFS(vp->v_mount);
+	ACQUIRE_LOCK(ump);
+	jlow = 0;
+	/*
+	 * Check for journal space if needed.
+	 */
+	if (DOINGSUJ(vp)) {
+		if (journal_space(ump, 0) == 0)
+			jlow = 1;
+	}
+	/*
+	 * If the system is under its limits and our filesystem is
+	 * not responsible for more than our share of the usage and
+	 * we are not low on journal space, then no need to slow down.
+	 */
+	max_softdeps_hard = max_softdeps * 11 / 10;
+	if (dep_current[D_DIRREM] < max_softdeps_hard / 2 &&
+	    dep_current[D_INODEDEP] < max_softdeps_hard &&
+	    dep_current[D_INDIRDEP] < max_softdeps_hard / 1000 &&
+	    dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0 &&
+	    ump->softdep_curdeps[D_DIRREM] <
+	    (max_softdeps_hard / 2) / stat_flush_threads &&
+	    ump->softdep_curdeps[D_INODEDEP] <
+	    max_softdeps_hard / stat_flush_threads &&
+	    ump->softdep_curdeps[D_INDIRDEP] <
+	    (max_softdeps_hard / 1000) / stat_flush_threads &&
+	    ump->softdep_curdeps[D_FREEBLKS] <
+	    max_softdeps_hard / stat_flush_threads) {
+		FREE_LOCK(ump);
+  		return (0);
+	}
+	/*
+	 * If the journal is low or our filesystem is over its limit
+	 * then speedup the cleanup.
+	 */
+	if (ump->softdep_curdeps[D_INDIRDEP] <
+	    (max_softdeps_hard / 1000) / stat_flush_threads || jlow)
+		softdep_speedup(ump);
+	stat_sync_limit_hit += 1;
+	FREE_LOCK(ump);
+	/*
+	 * We only slow down the rate at which new dependencies are
+	 * generated if we are not using journaling. With journaling,
+	 * the cleanup should always be sufficient to keep things
+	 * under control.
+	 */
+	if (DOINGSUJ(vp))
+		return (0);
+	return (1);
+}
+
+/*
+ * Called by the allocation routines when they are about to fail
+ * in the hope that we can free up the requested resource (inodes
+ * or disk space).
+ * 
+ * First check to see if the work list has anything on it. If it has,
+ * clean up entries until we successfully free the requested resource.
+ * Because this process holds inodes locked, we cannot handle any remove
+ * requests that might block on a locked inode as that could lead to
+ * deadlock. If the worklist yields none of the requested resource,
+ * start syncing out vnodes to free up the needed space.
+ */
+int
+softdep_request_cleanup(fs, vp, cred, resource)
+	struct fs *fs;
+	struct vnode *vp;
+	struct ucred *cred;
+	int resource;
+{
+	struct ufsmount *ump;
+	struct mount *mp;
+	long starttime;
+	ufs2_daddr_t needed;
+	int error, failed_vnode;
+
+	/*
+	 * If we are being called because of a process doing a
+	 * copy-on-write, then it is not safe to process any
+	 * worklist items as we will recurse into the copyonwrite
+	 * routine.  This will result in an incoherent snapshot.
+	 * If the vnode that we hold is a snapshot, we must avoid
+	 * handling other resources that could cause deadlock.
+	 */
+	if ((curthread->td_pflags & TDP_COWINPROGRESS) || IS_SNAPSHOT(VTOI(vp)))
+		return (0);
+
+	if (resource == FLUSH_BLOCKS_WAIT)
+		stat_cleanup_blkrequests += 1;
+	else
+		stat_cleanup_inorequests += 1;
+
+	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+	mtx_assert(UFS_MTX(ump), MA_OWNED);
+	UFS_UNLOCK(ump);
+	error = ffs_update(vp, 1);
+	if (error != 0 || MOUNTEDSOFTDEP(mp) == 0) {
+		UFS_LOCK(ump);
+		return (0);
+	}
+	/*
+	 * If we are in need of resources, start by cleaning up
+	 * any block removals associated with our inode.
+	 */
+	ACQUIRE_LOCK(ump);
+	process_removes(vp);
+	process_truncates(vp);
+	FREE_LOCK(ump);
+	/*
+	 * Now clean up at least as many resources as we will need.
+	 *
+	 * When requested to clean up inodes, the number that are needed
+	 * is set by the number of simultaneous writers (mnt_writeopcount)
+	 * plus a bit of slop (2) in case some more writers show up while
+	 * we are cleaning.
+	 *
+	 * When requested to free up space, the amount of space that
+	 * we need is enough blocks to allocate a full-sized segment
+	 * (fs_contigsumsize). The number of such segments that will
+	 * be needed is set by the number of simultaneous writers
+	 * (mnt_writeopcount) plus a bit of slop (2) in case some more
+	 * writers show up while we are cleaning.
+	 *
+	 * Additionally, if we are unpriviledged and allocating space,
+	 * we need to ensure that we clean up enough blocks to get the
+	 * needed number of blocks over the threshold of the minimum
+	 * number of blocks required to be kept free by the filesystem
+	 * (fs_minfree).
+	 */
+	if (resource == FLUSH_INODES_WAIT) {
+		needed = vp->v_mount->mnt_writeopcount + 2;
+	} else if (resource == FLUSH_BLOCKS_WAIT) {
+		needed = (vp->v_mount->mnt_writeopcount + 2) *
+		    fs->fs_contigsumsize;
+		if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0))
+			needed += fragstoblks(fs,
+			    roundup((fs->fs_dsize * fs->fs_minfree / 100) -
+			    fs->fs_cstotal.cs_nffree, fs->fs_frag));
+	} else {
+		UFS_LOCK(ump);
+		printf("softdep_request_cleanup: Unknown resource type %d\n",
+		    resource);
+		return (0);
+	}
+	starttime = time_second;
+retry:
+	if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
+	    fs->fs_cstotal.cs_nbfree <= needed) ||
+	    (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
+	    fs->fs_cstotal.cs_nifree <= needed)) {
+		ACQUIRE_LOCK(ump);
+		if (ump->softdep_on_worklist > 0 &&
+		    process_worklist_item(UFSTOVFS(ump),
+		    ump->softdep_on_worklist, LK_NOWAIT) != 0)
+			stat_worklist_push += 1;
+		FREE_LOCK(ump);
+	}
+	/*
+	 * If we still need resources and there are no more worklist
+	 * entries to process to obtain them, we have to start flushing
+	 * the dirty vnodes to force the release of additional requests
+	 * to the worklist that we can then process to reap addition
+	 * resources. We walk the vnodes associated with the mount point
+	 * until we get the needed worklist requests that we can reap.
+	 *
+	 * If there are several threads all needing to clean the same
+	 * mount point, only one is allowed to walk the mount list.
+	 * When several threads all try to walk the same mount list,
+	 * they end up competing with each other and often end up in
+	 * livelock. This approach ensures that forward progress is
+	 * made at the cost of occational ENOSPC errors being returned
+	 * that might otherwise have been avoided.
+	 */
+	error = 1;
+	if ((resource == FLUSH_BLOCKS_WAIT && 
+	     fs->fs_cstotal.cs_nbfree <= needed) ||
+	    (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
+	     fs->fs_cstotal.cs_nifree <= needed)) {
+		ACQUIRE_LOCK(ump);
+		if ((ump->um_softdep->sd_flags & FLUSH_RC_ACTIVE) == 0) {
+			ump->um_softdep->sd_flags |= FLUSH_RC_ACTIVE;
+			FREE_LOCK(ump);
+			failed_vnode = softdep_request_cleanup_flush(mp, ump);
+			ACQUIRE_LOCK(ump);
+			ump->um_softdep->sd_flags &= ~FLUSH_RC_ACTIVE;
+			FREE_LOCK(ump);
+			if (ump->softdep_on_worklist > 0) {
+				stat_cleanup_retries += 1;
+				if (!failed_vnode)
+					goto retry;
+			}
+		} else {
+			FREE_LOCK(ump);
+			error = 0;
+		}
+		stat_cleanup_failures += 1;
+	}
+	if (time_second - starttime > stat_cleanup_high_delay)
+		stat_cleanup_high_delay = time_second - starttime;
+	UFS_LOCK(ump);
+	return (error);
+}
+
+/*
+ * Scan the vnodes for the specified mount point flushing out any
+ * vnodes that can be locked without waiting. Finally, try to flush
+ * the device associated with the mount point if it can be locked
+ * without waiting.
+ *
+ * We return 0 if we were able to lock every vnode in our scan.
+ * If we had to skip one or more vnodes, we return 1.
+ */
+static int
+softdep_request_cleanup_flush(mp, ump)
+	struct mount *mp;
+	struct ufsmount *ump;
+{
+	struct thread *td;
+	struct vnode *lvp, *mvp;
+	int failed_vnode;
+
+	failed_vnode = 0;
+	td = curthread;
+	MNT_VNODE_FOREACH_ALL(lvp, mp, mvp) {
+		if (TAILQ_FIRST(&lvp->v_bufobj.bo_dirty.bv_hd) == 0) {
+			VI_UNLOCK(lvp);
+			continue;
+		}
+		if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT,
+		    td) != 0) {
+			failed_vnode = 1;
+			continue;
+		}
+		if (lvp->v_vflag & VV_NOSYNC) {	/* unlinked */
+			vput(lvp);
+			continue;
+		}
+		(void) ffs_syncvnode(lvp, MNT_NOWAIT, 0);
+		vput(lvp);
+	}
+	lvp = ump->um_devvp;
+	if (vn_lock(lvp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
+		VOP_FSYNC(lvp, MNT_NOWAIT, td);
+		VOP_UNLOCK(lvp, 0);
+	}
+	return (failed_vnode);
+}
+
+static bool
+softdep_excess_items(struct ufsmount *ump, int item)
+{
+
+	KASSERT(item >= 0 && item < D_LAST, ("item %d", item));
+	return (dep_current[item] > max_softdeps &&
+	    ump->softdep_curdeps[item] > max_softdeps /
+	    stat_flush_threads);
+}
+
+static void
+schedule_cleanup(struct mount *mp)
+{
+	struct ufsmount *ump;
+	struct thread *td;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	FREE_LOCK(ump);
+	td = curthread;
+	if ((td->td_pflags & TDP_KTHREAD) != 0 &&
+	    (td->td_proc->p_flag2 & P2_AST_SU) == 0) {
+		/*
+		 * No ast is delivered to kernel threads, so nobody
+		 * would deref the mp.  Some kernel threads
+		 * explicitely check for AST, e.g. NFS daemon does
+		 * this in the serving loop.
+		 */
+		return;
+	}
+	if (td->td_su != NULL)
+		vfs_rel(td->td_su);
+	vfs_ref(mp);
+	td->td_su = mp;
+	thread_lock(td);
+	td->td_flags |= TDF_ASTPENDING;
+	thread_unlock(td);
+}
+
+static void
+softdep_ast_cleanup_proc(struct thread *td)
+{
+	struct mount *mp;
+	struct ufsmount *ump;
+	int error;
+	bool req;
+
+	while ((mp = td->td_su) != NULL) {
+		td->td_su = NULL;
+		error = vfs_busy(mp, MBF_NOWAIT);
+		vfs_rel(mp);
+		if (error != 0)
+			return;
+		if (ffs_own_mount(mp) && MOUNTEDSOFTDEP(mp)) {
+			ump = VFSTOUFS(mp);
+			for (;;) {
+				req = false;
+				ACQUIRE_LOCK(ump);
+				if (softdep_excess_items(ump, D_INODEDEP)) {
+					req = true;
+					request_cleanup(mp, FLUSH_INODES);
+				}
+				if (softdep_excess_items(ump, D_DIRREM)) {
+					req = true;
+					request_cleanup(mp, FLUSH_BLOCKS);
+				}
+				FREE_LOCK(ump);
+				if (softdep_excess_items(ump, D_NEWBLK) ||
+				    softdep_excess_items(ump, D_ALLOCDIRECT) ||
+				    softdep_excess_items(ump, D_ALLOCINDIR)) {
+					error = vn_start_write(NULL, &mp,
+					    V_WAIT);
+					if (error == 0) {
+						req = true;
+						VFS_SYNC(mp, MNT_WAIT);
+						vn_finished_write(mp);
+					}
+				}
+				if ((td->td_pflags & TDP_KTHREAD) != 0 || !req)
+					break;
+			}
+		}
+		vfs_unbusy(mp);
+	}
+	if ((mp = td->td_su) != NULL) {
+		td->td_su = NULL;
+		vfs_rel(mp);
+	}
+}
+
+/*
+ * If memory utilization has gotten too high, deliberately slow things
+ * down and speed up the I/O processing.
+ */
+static int
+request_cleanup(mp, resource)
+	struct mount *mp;
+	int resource;
+{
+	struct thread *td = curthread;
+	struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	/*
+	 * We never hold up the filesystem syncer or buf daemon.
+	 */
+	if (td->td_pflags & (TDP_SOFTDEP|TDP_NORUNNINGBUF))
+		return (0);
+	/*
+	 * First check to see if the work list has gotten backlogged.
+	 * If it has, co-opt this process to help clean up two entries.
+	 * Because this process may hold inodes locked, we cannot
+	 * handle any remove requests that might block on a locked
+	 * inode as that could lead to deadlock.  We set TDP_SOFTDEP
+	 * to avoid recursively processing the worklist.
+	 */
+	if (ump->softdep_on_worklist > max_softdeps / 10) {
+		td->td_pflags |= TDP_SOFTDEP;
+		process_worklist_item(mp, 2, LK_NOWAIT);
+		td->td_pflags &= ~TDP_SOFTDEP;
+		stat_worklist_push += 2;
+		return(1);
+	}
+	/*
+	 * Next, we attempt to speed up the syncer process. If that
+	 * is successful, then we allow the process to continue.
+	 */
+	if (softdep_speedup(ump) &&
+	    resource != FLUSH_BLOCKS_WAIT &&
+	    resource != FLUSH_INODES_WAIT)
+		return(0);
+	/*
+	 * If we are resource constrained on inode dependencies, try
+	 * flushing some dirty inodes. Otherwise, we are constrained
+	 * by file deletions, so try accelerating flushes of directories
+	 * with removal dependencies. We would like to do the cleanup
+	 * here, but we probably hold an inode locked at this point and 
+	 * that might deadlock against one that we try to clean. So,
+	 * the best that we can do is request the syncer daemon to do
+	 * the cleanup for us.
+	 */
+	switch (resource) {
+
+	case FLUSH_INODES:
+	case FLUSH_INODES_WAIT:
+		ACQUIRE_GBLLOCK(&lk);
+		stat_ino_limit_push += 1;
+		req_clear_inodedeps += 1;
+		FREE_GBLLOCK(&lk);
+		stat_countp = &stat_ino_limit_hit;
+		break;
+
+	case FLUSH_BLOCKS:
+	case FLUSH_BLOCKS_WAIT:
+		ACQUIRE_GBLLOCK(&lk);
+		stat_blk_limit_push += 1;
+		req_clear_remove += 1;
+		FREE_GBLLOCK(&lk);
+		stat_countp = &stat_blk_limit_hit;
+		break;
+
+	default:
+		panic("request_cleanup: unknown type");
+	}
+	/*
+	 * Hopefully the syncer daemon will catch up and awaken us.
+	 * We wait at most tickdelay before proceeding in any case.
+	 */
+	ACQUIRE_GBLLOCK(&lk);
+	FREE_LOCK(ump);
+	proc_waiting += 1;
+	if (callout_pending(&softdep_callout) == FALSE)
+		callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
+		    pause_timer, 0);
+
+	if ((td->td_pflags & TDP_KTHREAD) == 0)
+		msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
+	proc_waiting -= 1;
+	FREE_GBLLOCK(&lk);
+	ACQUIRE_LOCK(ump);
+	return (1);
+}
+
+/*
+ * Awaken processes pausing in request_cleanup and clear proc_waiting
+ * to indicate that there is no longer a timer running. Pause_timer
+ * will be called with the global softdep mutex (&lk) locked.
+ */
+static void
+pause_timer(arg)
+	void *arg;
+{
+
+	GBLLOCK_OWNED(&lk);
+	/*
+	 * The callout_ API has acquired mtx and will hold it around this
+	 * function call.
+	 */
+	*stat_countp += proc_waiting;
+	wakeup(&proc_waiting);
+}
+
+/*
+ * If requested, try removing inode or removal dependencies.
+ */
+static void
+check_clear_deps(mp)
+	struct mount *mp;
+{
+
+	/*
+	 * If we are suspended, it may be because of our using
+	 * too many inodedeps, so help clear them out.
+	 */
+	if (MOUNTEDSUJ(mp) && VFSTOUFS(mp)->softdep_jblocks->jb_suspended)
+		clear_inodedeps(mp);
+	/*
+	 * General requests for cleanup of backed up dependencies
+	 */
+	ACQUIRE_GBLLOCK(&lk);
+	if (req_clear_inodedeps) {
+		req_clear_inodedeps -= 1;
+		FREE_GBLLOCK(&lk);
+		clear_inodedeps(mp);
+		ACQUIRE_GBLLOCK(&lk);
+		wakeup(&proc_waiting);
+	}
+	if (req_clear_remove) {
+		req_clear_remove -= 1;
+		FREE_GBLLOCK(&lk);
+		clear_remove(mp);
+		ACQUIRE_GBLLOCK(&lk);
+		wakeup(&proc_waiting);
+	}
+	FREE_GBLLOCK(&lk);
+}
+
+/*
+ * Flush out a directory with at least one removal dependency in an effort to
+ * reduce the number of dirrem, freefile, and freeblks dependency structures.
+ */
+static void
+clear_remove(mp)
+	struct mount *mp;
+{
+	struct pagedep_hashhead *pagedephd;
+	struct pagedep *pagedep;
+	struct ufsmount *ump;
+	struct vnode *vp;
+	struct bufobj *bo;
+	int error, cnt;
+	ino_t ino;
+
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+
+	for (cnt = 0; cnt <= ump->pagedep_hash_size; cnt++) {
+		pagedephd = &ump->pagedep_hashtbl[ump->pagedep_nextclean++];
+		if (ump->pagedep_nextclean > ump->pagedep_hash_size)
+			ump->pagedep_nextclean = 0;
+		LIST_FOREACH(pagedep, pagedephd, pd_hash) {
+			if (LIST_EMPTY(&pagedep->pd_dirremhd))
+				continue;
+			ino = pagedep->pd_ino;
+			if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
+				continue;
+			FREE_LOCK(ump);
+
+			/*
+			 * Let unmount clear deps
+			 */
+			error = vfs_busy(mp, MBF_NOWAIT);
+			if (error != 0)
+				goto finish_write;
+			error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp,
+			     FFSV_FORCEINSMQ);
+			vfs_unbusy(mp);
+			if (error != 0) {
+				softdep_error("clear_remove: vget", error);
+				goto finish_write;
+			}
+			if ((error = ffs_syncvnode(vp, MNT_NOWAIT, 0)))
+				softdep_error("clear_remove: fsync", error);
+			bo = &vp->v_bufobj;
+			BO_LOCK(bo);
+			drain_output(vp);
+			BO_UNLOCK(bo);
+			vput(vp);
+		finish_write:
+			vn_finished_write(mp);
+			ACQUIRE_LOCK(ump);
+			return;
+		}
+	}
+}
+
+/*
+ * Clear out a block of dirty inodes in an effort to reduce
+ * the number of inodedep dependency structures.
+ */
+static void
+clear_inodedeps(mp)
+	struct mount *mp;
+{
+	struct inodedep_hashhead *inodedephd;
+	struct inodedep *inodedep;
+	struct ufsmount *ump;
+	struct vnode *vp;
+	struct fs *fs;
+	int error, cnt;
+	ino_t firstino, lastino, ino;
+
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	LOCK_OWNED(ump);
+	/*
+	 * Pick a random inode dependency to be cleared.
+	 * We will then gather up all the inodes in its block 
+	 * that have dependencies and flush them out.
+	 */
+	for (cnt = 0; cnt <= ump->inodedep_hash_size; cnt++) {
+		inodedephd = &ump->inodedep_hashtbl[ump->inodedep_nextclean++];
+		if (ump->inodedep_nextclean > ump->inodedep_hash_size)
+			ump->inodedep_nextclean = 0;
+		if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
+			break;
+	}
+	if (inodedep == NULL)
+		return;
+	/*
+	 * Find the last inode in the block with dependencies.
+	 */
+	firstino = rounddown2(inodedep->id_ino, INOPB(fs));
+	for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--)
+		if (inodedep_lookup(mp, lastino, 0, &inodedep) != 0)
+			break;
+	/*
+	 * Asynchronously push all but the last inode with dependencies.
+	 * Synchronously push the last inode with dependencies to ensure
+	 * that the inode block gets written to free up the inodedeps.
+	 */
+	for (ino = firstino; ino <= lastino; ino++) {
+		if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)
+			continue;
+		if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
+			continue;
+		FREE_LOCK(ump);
+		error = vfs_busy(mp, MBF_NOWAIT); /* Let unmount clear deps */
+		if (error != 0) {
+			vn_finished_write(mp);
+			ACQUIRE_LOCK(ump);
+			return;
+		}
+		if ((error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp,
+		    FFSV_FORCEINSMQ)) != 0) {
+			softdep_error("clear_inodedeps: vget", error);
+			vfs_unbusy(mp);
+			vn_finished_write(mp);
+			ACQUIRE_LOCK(ump);
+			return;
+		}
+		vfs_unbusy(mp);
+		if (ino == lastino) {
+			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)))
+				softdep_error("clear_inodedeps: fsync1", error);
+		} else {
+			if ((error = ffs_syncvnode(vp, MNT_NOWAIT, 0)))
+				softdep_error("clear_inodedeps: fsync2", error);
+			BO_LOCK(&vp->v_bufobj);
+			drain_output(vp);
+			BO_UNLOCK(&vp->v_bufobj);
+		}
+		vput(vp);
+		vn_finished_write(mp);
+		ACQUIRE_LOCK(ump);
+	}
+}
+
+void
+softdep_buf_append(bp, wkhd)
+	struct buf *bp;
+	struct workhead *wkhd;
+{
+	struct worklist *wk;
+	struct ufsmount *ump;
+
+	if ((wk = LIST_FIRST(wkhd)) == NULL)
+		return;
+	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
+	    ("softdep_buf_append called on non-softdep filesystem"));
+	ump = VFSTOUFS(wk->wk_mp);
+	ACQUIRE_LOCK(ump);
+	while ((wk = LIST_FIRST(wkhd)) != NULL) {
+		WORKLIST_REMOVE(wk);
+		WORKLIST_INSERT(&bp->b_dep, wk);
+	}
+	FREE_LOCK(ump);
+
+}
+
+void
+softdep_inode_append(ip, cred, wkhd)
+	struct inode *ip;
+	struct ucred *cred;
+	struct workhead *wkhd;
+{
+	struct buf *bp;
+	struct fs *fs;
+	struct ufsmount *ump;
+	int error;
+
+	ump = ITOUMP(ip);
+	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
+	    ("softdep_inode_append called on non-softdep filesystem"));
+	fs = ump->um_fs;
+	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+	    (int)fs->fs_bsize, cred, &bp);
+	if (error) {
+		bqrelse(bp);
+		softdep_freework(wkhd);
+		return;
+	}
+	softdep_buf_append(bp, wkhd);
+	bqrelse(bp);
+}
+
+void
+softdep_freework(wkhd)
+	struct workhead *wkhd;
+{
+	struct worklist *wk;
+	struct ufsmount *ump;
+
+	if ((wk = LIST_FIRST(wkhd)) == NULL)
+		return;
+	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
+	    ("softdep_freework called on non-softdep filesystem"));
+	ump = VFSTOUFS(wk->wk_mp);
+	ACQUIRE_LOCK(ump);
+	handle_jwork(wkhd);
+	FREE_LOCK(ump);
+}
+
+static struct ufsmount *
+softdep_bp_to_mp(bp)
+	struct buf *bp;
+{
+	struct mount *mp;
+	struct vnode *vp;
+
+	if (LIST_EMPTY(&bp->b_dep))
+		return (NULL);
+	vp = bp->b_vp;
+
+	/*
+	 * The ump mount point is stable after we get a correct
+	 * pointer, since bp is locked and this prevents unmount from
+	 * proceeding.  But to get to it, we cannot dereference bp->b_dep
+	 * head wk_mp, because we do not yet own SU ump lock and
+	 * workitem might be freed while dereferenced.
+	 */
+retry:
+	if (vp->v_type == VCHR) {
+		VI_LOCK(vp);
+		mp = vp->v_type == VCHR ? vp->v_rdev->si_mountpt : NULL;
+		VI_UNLOCK(vp);
+		if (mp == NULL)
+			goto retry;
+	} else if (vp->v_type == VREG || vp->v_type == VDIR ||
+	    vp->v_type == VLNK) {
+		mp = vp->v_mount;
+	} else {
+		return (NULL);
+	}
+	return (VFSTOUFS(mp));
+}
+
+/*
+ * Function to determine if the buffer has outstanding dependencies
+ * that will cause a roll-back if the buffer is written. If wantcount
+ * is set, return number of dependencies, otherwise just yes or no.
+ */
+static int
+softdep_count_dependencies(bp, wantcount)
+	struct buf *bp;
+	int wantcount;
+{
+	struct worklist *wk;
+	struct ufsmount *ump;
+	struct bmsafemap *bmsafemap;
+	struct freework *freework;
+	struct inodedep *inodedep;
+	struct indirdep *indirdep;
+	struct freeblks *freeblks;
+	struct allocindir *aip;
+	struct pagedep *pagedep;
+	struct dirrem *dirrem;
+	struct newblk *newblk;
+	struct mkdir *mkdir;
+	struct diradd *dap;
+	int i, retval;
+
+	ump = softdep_bp_to_mp(bp);
+	if (ump == NULL)
+		return (0);
+	retval = 0;
+	ACQUIRE_LOCK(ump);
+	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
+		switch (wk->wk_type) {
+
+		case D_INODEDEP:
+			inodedep = WK_INODEDEP(wk);
+			if ((inodedep->id_state & DEPCOMPLETE) == 0) {
+				/* bitmap allocation dependency */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
+				/* direct block pointer dependency */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			if (TAILQ_FIRST(&inodedep->id_extupdt)) {
+				/* direct block pointer dependency */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			if (TAILQ_FIRST(&inodedep->id_inoreflst)) {
+				/* Add reference dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			continue;
+
+		case D_INDIRDEP:
+			indirdep = WK_INDIRDEP(wk);
+
+			TAILQ_FOREACH(freework, &indirdep->ir_trunc, fw_next) {
+				/* indirect truncation dependency */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+
+			LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {
+				/* indirect block pointer dependency */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			continue;
+
+		case D_PAGEDEP:
+			pagedep = WK_PAGEDEP(wk);
+			LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) {
+				if (LIST_FIRST(&dirrem->dm_jremrefhd)) {
+					/* Journal remove ref dependency. */
+					retval += 1;
+					if (!wantcount)
+						goto out;
+				}
+			}
+			for (i = 0; i < DAHASHSZ; i++) {
+
+				LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
+					/* directory entry dependency */
+					retval += 1;
+					if (!wantcount)
+						goto out;
+				}
+			}
+			continue;
+
+		case D_BMSAFEMAP:
+			bmsafemap = WK_BMSAFEMAP(wk);
+			if (LIST_FIRST(&bmsafemap->sm_jaddrefhd)) {
+				/* Add reference dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			if (LIST_FIRST(&bmsafemap->sm_jnewblkhd)) {
+				/* Allocate block dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			continue;
+
+		case D_FREEBLKS:
+			freeblks = WK_FREEBLKS(wk);
+			if (LIST_FIRST(&freeblks->fb_jblkdephd)) {
+				/* Freeblk journal dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			continue;
+
+		case D_ALLOCDIRECT:
+		case D_ALLOCINDIR:
+			newblk = WK_NEWBLK(wk);
+			if (newblk->nb_jnewblk) {
+				/* Journal allocate dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			continue;
+
+		case D_MKDIR:
+			mkdir = WK_MKDIR(wk);
+			if (mkdir->md_jaddref) {
+				/* Journal reference dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			continue;
+
+		case D_FREEWORK:
+		case D_FREEDEP:
+		case D_JSEGDEP:
+		case D_JSEG:
+		case D_SBDEP:
+			/* never a dependency on these blocks */
+			continue;
+
+		default:
+			panic("softdep_count_dependencies: Unexpected type %s",
+			    TYPENAME(wk->wk_type));
+			/* NOTREACHED */
+		}
+	}
+out:
+	FREE_LOCK(ump);
+	return (retval);
+}
+
+/*
+ * Acquire exclusive access to a buffer.
+ * Must be called with a locked mtx parameter.
+ * Return acquired buffer or NULL on failure.
+ */
+static struct buf *
+getdirtybuf(bp, lock, waitfor)
+	struct buf *bp;
+	struct rwlock *lock;
+	int waitfor;
+{
+	int error;
+
+	if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) {
+		if (waitfor != MNT_WAIT)
+			return (NULL);
+		error = BUF_LOCK(bp,
+		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, lock);
+		/*
+		 * Even if we successfully acquire bp here, we have dropped
+		 * lock, which may violates our guarantee.
+		 */
+		if (error == 0)
+			BUF_UNLOCK(bp);
+		else if (error != ENOLCK)
+			panic("getdirtybuf: inconsistent lock: %d", error);
+		rw_wlock(lock);
+		return (NULL);
+	}
+	if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
+		if (lock != BO_LOCKPTR(bp->b_bufobj) && waitfor == MNT_WAIT) {
+			rw_wunlock(lock);
+			BO_LOCK(bp->b_bufobj);
+			BUF_UNLOCK(bp);
+			if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
+				bp->b_vflags |= BV_BKGRDWAIT;
+				msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj),
+				       PRIBIO | PDROP, "getbuf", 0);
+			} else
+				BO_UNLOCK(bp->b_bufobj);
+			rw_wlock(lock);
+			return (NULL);
+		}
+		BUF_UNLOCK(bp);
+		if (waitfor != MNT_WAIT)
+			return (NULL);
+#ifdef DEBUG_VFS_LOCKS
+		if (bp->b_vp->v_type != VCHR)
+			ASSERT_BO_WLOCKED(bp->b_bufobj);
+#endif
+		bp->b_vflags |= BV_BKGRDWAIT;
+		rw_sleep(&bp->b_xflags, lock, PRIBIO, "getbuf", 0);
+		return (NULL);
+	}
+	if ((bp->b_flags & B_DELWRI) == 0) {
+		BUF_UNLOCK(bp);
+		return (NULL);
+	}
+	bremfree(bp);
+	return (bp);
+}
+
+
+/*
+ * Check if it is safe to suspend the file system now.  On entry,
+ * the vnode interlock for devvp should be held.  Return 0 with
+ * the mount interlock held if the file system can be suspended now,
+ * otherwise return EAGAIN with the mount interlock held.
+ */
+int
+softdep_check_suspend(struct mount *mp,
+		      struct vnode *devvp,
+		      int softdep_depcnt,
+		      int softdep_accdepcnt,
+		      int secondary_writes,
+		      int secondary_accwrites)
+{
+	struct bufobj *bo;
+	struct ufsmount *ump;
+	struct inodedep *inodedep;
+	int error, unlinked;
+
+	bo = &devvp->v_bufobj;
+	ASSERT_BO_WLOCKED(bo);
+
+	/*
+	 * If we are not running with soft updates, then we need only
+	 * deal with secondary writes as we try to suspend.
+	 */
+	if (MOUNTEDSOFTDEP(mp) == 0) {
+		MNT_ILOCK(mp);
+		while (mp->mnt_secondary_writes != 0) {
+			BO_UNLOCK(bo);
+			msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
+			    (PUSER - 1) | PDROP, "secwr", 0);
+			BO_LOCK(bo);
+			MNT_ILOCK(mp);
+		}
+
+		/*
+		 * Reasons for needing more work before suspend:
+		 * - Dirty buffers on devvp.
+		 * - Secondary writes occurred after start of vnode sync loop
+		 */
+		error = 0;
+		if (bo->bo_numoutput > 0 ||
+		    bo->bo_dirty.bv_cnt > 0 ||
+		    secondary_writes != 0 ||
+		    mp->mnt_secondary_writes != 0 ||
+		    secondary_accwrites != mp->mnt_secondary_accwrites)
+			error = EAGAIN;
+		BO_UNLOCK(bo);
+		return (error);
+	}
+
+	/*
+	 * If we are running with soft updates, then we need to coordinate
+	 * with them as we try to suspend.
+	 */
+	ump = VFSTOUFS(mp);
+	for (;;) {
+		if (!TRY_ACQUIRE_LOCK(ump)) {
+			BO_UNLOCK(bo);
+			ACQUIRE_LOCK(ump);
+			FREE_LOCK(ump);
+			BO_LOCK(bo);
+			continue;
+		}
+		MNT_ILOCK(mp);
+		if (mp->mnt_secondary_writes != 0) {
+			FREE_LOCK(ump);
+			BO_UNLOCK(bo);
+			msleep(&mp->mnt_secondary_writes,
+			       MNT_MTX(mp),
+			       (PUSER - 1) | PDROP, "secwr", 0);
+			BO_LOCK(bo);
+			continue;
+		}
+		break;
+	}
+
+	unlinked = 0;
+	if (MOUNTEDSUJ(mp)) {
+		for (inodedep = TAILQ_FIRST(&ump->softdep_unlinked);
+		    inodedep != NULL;
+		    inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
+			if ((inodedep->id_state & (UNLINKED | UNLINKLINKS |
+			    UNLINKONLIST)) != (UNLINKED | UNLINKLINKS |
+			    UNLINKONLIST) ||
+			    !check_inodedep_free(inodedep))
+				continue;
+			unlinked++;
+		}
+	}
+
+	/*
+	 * Reasons for needing more work before suspend:
+	 * - Dirty buffers on devvp.
+	 * - Softdep activity occurred after start of vnode sync loop
+	 * - Secondary writes occurred after start of vnode sync loop
+	 */
+	error = 0;
+	if (bo->bo_numoutput > 0 ||
+	    bo->bo_dirty.bv_cnt > 0 ||
+	    softdep_depcnt != unlinked ||
+	    ump->softdep_deps != unlinked ||
+	    softdep_accdepcnt != ump->softdep_accdeps ||
+	    secondary_writes != 0 ||
+	    mp->mnt_secondary_writes != 0 ||
+	    secondary_accwrites != mp->mnt_secondary_accwrites)
+		error = EAGAIN;
+	FREE_LOCK(ump);
+	BO_UNLOCK(bo);
+	return (error);
+}
+
+
+/*
+ * Get the number of dependency structures for the file system, both
+ * the current number and the total number allocated.  These will
+ * later be used to detect that softdep processing has occurred.
+ */
+void
+softdep_get_depcounts(struct mount *mp,
+		      int *softdep_depsp,
+		      int *softdep_accdepsp)
+{
+	struct ufsmount *ump;
+
+	if (MOUNTEDSOFTDEP(mp) == 0) {
+		*softdep_depsp = 0;
+		*softdep_accdepsp = 0;
+		return;
+	}
+	ump = VFSTOUFS(mp);
+	ACQUIRE_LOCK(ump);
+	*softdep_depsp = ump->softdep_deps;
+	*softdep_accdepsp = ump->softdep_accdeps;
+	FREE_LOCK(ump);
+}
+
+/*
+ * Wait for pending output on a vnode to complete.
+ */
+static void
+drain_output(vp)
+	struct vnode *vp;
+{
+
+	ASSERT_VOP_LOCKED(vp, "drain_output");
+	(void)bufobj_wwait(&vp->v_bufobj, 0, 0);
+}
+
+/*
+ * Called whenever a buffer that is being invalidated or reallocated
+ * contains dependencies. This should only happen if an I/O error has
+ * occurred. The routine is called with the buffer locked.
+ */ 
+static void
+softdep_deallocate_dependencies(bp)
+	struct buf *bp;
+{
+
+	if ((bp->b_ioflags & BIO_ERROR) == 0)
+		panic("softdep_deallocate_dependencies: dangling deps");
+	if (bp->b_vp != NULL && bp->b_vp->v_mount != NULL)
+		softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
+	else
+		printf("softdep_deallocate_dependencies: "
+		    "got error %d while accessing filesystem\n", bp->b_error);
+	if (bp->b_error != ENXIO)
+		panic("softdep_deallocate_dependencies: unrecovered I/O error");
+}
+
+/*
+ * Function to handle asynchronous write errors in the filesystem.
+ */
+static void
+softdep_error(func, error)
+	char *func;
+	int error;
+{
+
+	/* XXX should do something better! */
+	printf("%s: got error %d while accessing filesystem\n", func, error);
+}
+
+#ifdef DDB
+
+static void
+inodedep_print(struct inodedep *inodedep, int verbose)
+{
+	db_printf("%p fs %p st %x ino %jd inoblk %jd delta %jd nlink %jd"
+	    " saveino %p\n",
+	    inodedep, inodedep->id_fs, inodedep->id_state,
+	    (intmax_t)inodedep->id_ino,
+	    (intmax_t)fsbtodb(inodedep->id_fs,
+	    ino_to_fsba(inodedep->id_fs, inodedep->id_ino)),
+	    (intmax_t)inodedep->id_nlinkdelta,
+	    (intmax_t)inodedep->id_savednlink,
+	    inodedep->id_savedino1);
+
+	if (verbose == 0)
+		return;
+
+	db_printf("\tpendinghd %p, bufwait %p, inowait %p, inoreflst %p, "
+	    "mkdiradd %p\n",
+	    LIST_FIRST(&inodedep->id_pendinghd),
+	    LIST_FIRST(&inodedep->id_bufwait),
+	    LIST_FIRST(&inodedep->id_inowait),
+	    TAILQ_FIRST(&inodedep->id_inoreflst),
+	    inodedep->id_mkdiradd);
+	db_printf("\tinoupdt %p, newinoupdt %p, extupdt %p, newextupdt %p\n",
+	    TAILQ_FIRST(&inodedep->id_inoupdt),
+	    TAILQ_FIRST(&inodedep->id_newinoupdt),
+	    TAILQ_FIRST(&inodedep->id_extupdt),
+	    TAILQ_FIRST(&inodedep->id_newextupdt));
+}
+
+DB_SHOW_COMMAND(inodedep, db_show_inodedep)
+{
+
+	if (have_addr == 0) {
+		db_printf("Address required\n");
+		return;
+	}
+	inodedep_print((struct inodedep*)addr, 1);
+}
+
+DB_SHOW_COMMAND(inodedeps, db_show_inodedeps)
+{
+	struct inodedep_hashhead *inodedephd;
+	struct inodedep *inodedep;
+	struct ufsmount *ump;
+	int cnt;
+
+	if (have_addr == 0) {
+		db_printf("Address required\n");
+		return;
+	}
+	ump = (struct ufsmount *)addr;
+	for (cnt = 0; cnt < ump->inodedep_hash_size; cnt++) {
+		inodedephd = &ump->inodedep_hashtbl[cnt];
+		LIST_FOREACH(inodedep, inodedephd, id_hash) {
+			inodedep_print(inodedep, 0);
+		}
+	}
+}
+
+DB_SHOW_COMMAND(worklist, db_show_worklist)
+{
+	struct worklist *wk;
+
+	if (have_addr == 0) {
+		db_printf("Address required\n");
+		return;
+	}
+	wk = (struct worklist *)addr;
+	printf("worklist: %p type %s state 0x%X\n",
+	    wk, TYPENAME(wk->wk_type), wk->wk_state);
+}
+
+DB_SHOW_COMMAND(workhead, db_show_workhead)
+{
+	struct workhead *wkhd;
+	struct worklist *wk;
+	int i;
+
+	if (have_addr == 0) {
+		db_printf("Address required\n");
+		return;
+	}
+	wkhd = (struct workhead *)addr;
+	wk = LIST_FIRST(wkhd);
+	for (i = 0; i < 100 && wk != NULL; i++, wk = LIST_NEXT(wk, wk_list))
+		db_printf("worklist: %p type %s state 0x%X",
+		    wk, TYPENAME(wk->wk_type), wk->wk_state);
+	if (i == 100)
+		db_printf("workhead overflow");
+	printf("\n");
+}
+
+
+DB_SHOW_COMMAND(mkdirs, db_show_mkdirs)
+{
+	struct mkdirlist *mkdirlisthd;
+	struct jaddref *jaddref;
+	struct diradd *diradd;
+	struct mkdir *mkdir;
+
+	if (have_addr == 0) {
+		db_printf("Address required\n");
+		return;
+	}
+	mkdirlisthd = (struct mkdirlist *)addr;
+	LIST_FOREACH(mkdir, mkdirlisthd, md_mkdirs) {
+		diradd = mkdir->md_diradd;
+		db_printf("mkdir: %p state 0x%X dap %p state 0x%X",
+		    mkdir, mkdir->md_state, diradd, diradd->da_state);
+		if ((jaddref = mkdir->md_jaddref) != NULL)
+			db_printf(" jaddref %p jaddref state 0x%X",
+			    jaddref, jaddref->ja_state);
+		db_printf("\n");
+	}
+}
+
+/* exported to ffs_vfsops.c */
+extern void db_print_ffs(struct ufsmount *ump);
+void
+db_print_ffs(struct ufsmount *ump)
+{
+	db_printf("mp %p %s devvp %p fs %p su_wl %d su_deps %d su_req %d\n",
+	    ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
+	    ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
+	    ump->softdep_deps, ump->softdep_req);
+}
+
+#endif /* DDB */
+
+#endif /* SOFTUPDATES */
diff --git a/Dump/ufs/ffs/ffs_subr.c b/Dump/ufs/ffs/ffs_subr.c
new file mode 100644
index 0000000..e75b89f
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_subr.c
@@ -0,0 +1,379 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_subr.c	8.5 (Berkeley) 3/21/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_subr.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include <sys/param.h>
+
+#ifndef _KERNEL
+#include <ufs/ufs/dinode.h>
+#include <ufs/ffs/fs.h>
+#else
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/ucred.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+#include <ufs/ffs/ffs_extern.h>
+#include <ufs/ffs/fs.h>
+
+/*
+ * Return buffer with the contents of block "offset" from the beginning of
+ * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+int
+ffs_blkatoff(vp, offset, res, bpp)
+	struct vnode *vp;
+	off_t offset;
+	char **res;
+	struct buf **bpp;
+{
+	struct inode *ip;
+	struct fs *fs;
+	struct buf *bp;
+	ufs_lbn_t lbn;
+	int bsize, error;
+
+	ip = VTOI(vp);
+	fs = ITOFS(ip);
+	lbn = lblkno(fs, offset);
+	bsize = blksize(fs, ip, lbn);
+
+	*bpp = NULL;
+	error = bread(vp, lbn, bsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (error);
+	}
+	if (res)
+		*res = (char *)bp->b_data + blkoff(fs, offset);
+	*bpp = bp;
+	return (0);
+}
+
+/*
+ * Load up the contents of an inode and copy the appropriate pieces
+ * to the incore copy.
+ */
+void
+ffs_load_inode(bp, ip, fs, ino)
+	struct buf *bp;
+	struct inode *ip;
+	struct fs *fs;
+	ino_t ino;
+{
+
+	if (I_IS_UFS1(ip)) {
+		*ip->i_din1 =
+		    *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
+		ip->i_mode = ip->i_din1->di_mode;
+		ip->i_nlink = ip->i_din1->di_nlink;
+		ip->i_size = ip->i_din1->di_size;
+		ip->i_flags = ip->i_din1->di_flags;
+		ip->i_gen = ip->i_din1->di_gen;
+		ip->i_uid = ip->i_din1->di_uid;
+		ip->i_gid = ip->i_din1->di_gid;
+	} else {
+		*ip->i_din2 =
+		    *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
+		ip->i_mode = ip->i_din2->di_mode;
+		ip->i_nlink = ip->i_din2->di_nlink;
+		ip->i_size = ip->i_din2->di_size;
+		ip->i_flags = ip->i_din2->di_flags;
+		ip->i_gen = ip->i_din2->di_gen;
+		ip->i_uid = ip->i_din2->di_uid;
+		ip->i_gid = ip->i_din2->di_gid;
+	}
+}
+#endif /* KERNEL */
+
+/*
+ * Update the frsum fields to reflect addition or deletion
+ * of some frags.
+ */
+void
+ffs_fragacct(fs, fragmap, fraglist, cnt)
+	struct fs *fs;
+	int fragmap;
+	int32_t fraglist[];
+	int cnt;
+{
+	int inblk;
+	int field, subfield;
+	int siz, pos;
+
+	inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
+	fragmap <<= 1;
+	for (siz = 1; siz < fs->fs_frag; siz++) {
+		if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0)
+			continue;
+		field = around[siz];
+		subfield = inside[siz];
+		for (pos = siz; pos <= fs->fs_frag; pos++) {
+			if ((fragmap & field) == subfield) {
+				fraglist[siz] += cnt;
+				pos += siz;
+				field <<= siz;
+				subfield <<= siz;
+			}
+			field <<= 1;
+			subfield <<= 1;
+		}
+	}
+}
+
+/*
+ * block operations
+ *
+ * check if a block is available
+ */
+int
+ffs_isblock(fs, cp, h)
+	struct fs *fs;
+	unsigned char *cp;
+	ufs1_daddr_t h;
+{
+	unsigned char mask;
+
+	switch ((int)fs->fs_frag) {
+	case 8:
+		return (cp[h] == 0xff);
+	case 4:
+		mask = 0x0f << ((h & 0x1) << 2);
+		return ((cp[h >> 1] & mask) == mask);
+	case 2:
+		mask = 0x03 << ((h & 0x3) << 1);
+		return ((cp[h >> 2] & mask) == mask);
+	case 1:
+		mask = 0x01 << (h & 0x7);
+		return ((cp[h >> 3] & mask) == mask);
+	default:
+#ifdef _KERNEL
+		panic("ffs_isblock");
+#endif
+		break;
+	}
+	return (0);
+}
+
+/*
+ * check if a block is free
+ */
+int
+ffs_isfreeblock(fs, cp, h)
+	struct fs *fs;
+	u_char *cp;
+	ufs1_daddr_t h;
+{
+ 
+	switch ((int)fs->fs_frag) {
+	case 8:
+		return (cp[h] == 0);
+	case 4:
+		return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
+	case 2:
+		return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
+	case 1:
+		return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
+	default:
+#ifdef _KERNEL
+		panic("ffs_isfreeblock");
+#endif
+		break;
+	}
+	return (0);
+}
+
+/*
+ * take a block out of the map
+ */
+void
+ffs_clrblock(fs, cp, h)
+	struct fs *fs;
+	u_char *cp;
+	ufs1_daddr_t h;
+{
+
+	switch ((int)fs->fs_frag) {
+	case 8:
+		cp[h] = 0;
+		return;
+	case 4:
+		cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
+		return;
+	case 2:
+		cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
+		return;
+	case 1:
+		cp[h >> 3] &= ~(0x01 << (h & 0x7));
+		return;
+	default:
+#ifdef _KERNEL
+		panic("ffs_clrblock");
+#endif
+		break;
+	}
+}
+
+/*
+ * put a block into the map
+ */
+void
+ffs_setblock(fs, cp, h)
+	struct fs *fs;
+	unsigned char *cp;
+	ufs1_daddr_t h;
+{
+
+	switch ((int)fs->fs_frag) {
+
+	case 8:
+		cp[h] = 0xff;
+		return;
+	case 4:
+		cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
+		return;
+	case 2:
+		cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
+		return;
+	case 1:
+		cp[h >> 3] |= (0x01 << (h & 0x7));
+		return;
+	default:
+#ifdef _KERNEL
+		panic("ffs_setblock");
+#endif
+		break;
+	}
+}
+
+/*
+ * Update the cluster map because of an allocation or free.
+ *
+ * Cnt == 1 means free; cnt == -1 means allocating.
+ */
+void
+ffs_clusteracct(fs, cgp, blkno, cnt)
+	struct fs *fs;
+	struct cg *cgp;
+	ufs1_daddr_t blkno;
+	int cnt;
+{
+	int32_t *sump;
+	int32_t *lp;
+	u_char *freemapp, *mapp;
+	int i, start, end, forw, back, map, bit;
+
+	if (fs->fs_contigsumsize <= 0)
+		return;
+	freemapp = cg_clustersfree(cgp);
+	sump = cg_clustersum(cgp);
+	/*
+	 * Allocate or clear the actual block.
+	 */
+	if (cnt > 0)
+		setbit(freemapp, blkno);
+	else
+		clrbit(freemapp, blkno);
+	/*
+	 * Find the size of the cluster going forward.
+	 */
+	start = blkno + 1;
+	end = start + fs->fs_contigsumsize;
+	if (end >= cgp->cg_nclusterblks)
+		end = cgp->cg_nclusterblks;
+	mapp = &freemapp[start / NBBY];
+	map = *mapp++;
+	bit = 1 << (start % NBBY);
+	for (i = start; i < end; i++) {
+		if ((map & bit) == 0)
+			break;
+		if ((i & (NBBY - 1)) != (NBBY - 1)) {
+			bit <<= 1;
+		} else {
+			map = *mapp++;
+			bit = 1;
+		}
+	}
+	forw = i - start;
+	/*
+	 * Find the size of the cluster going backward.
+	 */
+	start = blkno - 1;
+	end = start - fs->fs_contigsumsize;
+	if (end < 0)
+		end = -1;
+	mapp = &freemapp[start / NBBY];
+	map = *mapp--;
+	bit = 1 << (start % NBBY);
+	for (i = start; i > end; i--) {
+		if ((map & bit) == 0)
+			break;
+		if ((i & (NBBY - 1)) != 0) {
+			bit >>= 1;
+		} else {
+			map = *mapp--;
+			bit = 1 << (NBBY - 1);
+		}
+	}
+	back = start - i;
+	/*
+	 * Account for old cluster and the possibly new forward and
+	 * back clusters.
+	 */
+	i = back + forw + 1;
+	if (i > fs->fs_contigsumsize)
+		i = fs->fs_contigsumsize;
+	sump[i] += cnt;
+	if (back > 0)
+		sump[back] -= cnt;
+	if (forw > 0)
+		sump[forw] -= cnt;
+	/*
+	 * Update cluster summary information.
+	 */
+	lp = &sump[fs->fs_contigsumsize];
+	for (i = fs->fs_contigsumsize; i > 0; i--)
+		if (*lp-- > 0)
+			break;
+	fs->fs_maxcluster[cgp->cg_cgx] = i;
+}
diff --git a/Dump/ufs/ffs/ffs_suspend.c b/Dump/ufs/ffs/ffs_suspend.c
new file mode 100644
index 0000000..a714c1f
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_suspend.c
@@ -0,0 +1,337 @@
+/*-
+ * Copyright (c) 2012 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: releng/11.2/sys/ufs/ffs/ffs_suspend.c 306165 2016-09-22 08:56:54Z kib $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_suspend.c 306165 2016-09-22 08:56:54Z kib $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/ioccom.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+#include <sys/jail.h>
+#include <sys/sx.h>
+
+#include <security/mac/mac_framework.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+static d_open_t ffs_susp_open;
+static d_write_t ffs_susp_rdwr;
+static d_ioctl_t ffs_susp_ioctl;
+
+static struct cdevsw ffs_susp_cdevsw = {
+	.d_version =	D_VERSION,
+	.d_open =	ffs_susp_open,
+	.d_read =	ffs_susp_rdwr,
+	.d_write =	ffs_susp_rdwr,
+	.d_ioctl =	ffs_susp_ioctl,
+	.d_name =	"ffs_susp",
+};
+
+static struct cdev *ffs_susp_dev;
+static struct sx ffs_susp_lock;
+
+static int
+ffs_susp_suspended(struct mount *mp)
+{
+	struct ufsmount *ump;
+
+	sx_assert(&ffs_susp_lock, SA_LOCKED);
+
+	ump = VFSTOUFS(mp);
+	if (ump->um_writesuspended)
+		return (1);
+	return (0);
+}
+
+static int
+ffs_susp_open(struct cdev *dev __unused, int flags __unused,
+    int fmt __unused, struct thread *td __unused)
+{
+
+	return (0);
+}
+
+static int
+ffs_susp_rdwr(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	int error, i;
+	struct vnode *devvp;
+	struct mount *mp;
+	struct ufsmount *ump;
+	struct buf *bp;
+	void *base;
+	size_t len;
+	ssize_t cnt;
+	struct fs *fs;
+
+	sx_slock(&ffs_susp_lock);
+
+	error = devfs_get_cdevpriv((void **)&mp);
+	if (error != 0) {
+		sx_sunlock(&ffs_susp_lock);
+		return (ENXIO);
+	}
+
+	ump = VFSTOUFS(mp);
+	devvp = ump->um_devvp;
+	fs = ump->um_fs;
+
+	if (ffs_susp_suspended(mp) == 0) {
+		sx_sunlock(&ffs_susp_lock);
+		return (ENXIO);
+	}
+
+	KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
+	    ("neither UIO_READ or UIO_WRITE"));
+	KASSERT(uio->uio_segflg == UIO_USERSPACE,
+	    ("uio->uio_segflg != UIO_USERSPACE"));
+
+	cnt = uio->uio_resid;
+
+	for (i = 0; i < uio->uio_iovcnt; i++) {
+		while (uio->uio_iov[i].iov_len) {
+			base = uio->uio_iov[i].iov_base;
+			len = uio->uio_iov[i].iov_len;
+			if (len > fs->fs_bsize)
+				len = fs->fs_bsize;
+			if (fragoff(fs, uio->uio_offset) != 0 ||
+			    fragoff(fs, len) != 0) {
+				error = EINVAL;
+				goto out;
+			}
+			error = bread(devvp, btodb(uio->uio_offset), len,
+			    NOCRED, &bp);
+			if (error != 0)
+				goto out;
+			if (uio->uio_rw == UIO_WRITE) {
+				error = copyin(base, bp->b_data, len);
+				if (error != 0) {
+					bp->b_flags |= B_INVAL | B_NOCACHE;
+					brelse(bp);
+					goto out;
+				}
+				error = bwrite(bp);
+				if (error != 0)
+					goto out;
+			} else {
+				error = copyout(bp->b_data, base, len);
+				brelse(bp);
+				if (error != 0)
+					goto out;
+			}
+			uio->uio_iov[i].iov_base =
+			    (char *)uio->uio_iov[i].iov_base + len;
+			uio->uio_iov[i].iov_len -= len;
+			uio->uio_resid -= len;
+			uio->uio_offset += len;
+		}
+	}
+
+out:
+	sx_sunlock(&ffs_susp_lock);
+
+	if (uio->uio_resid < cnt)
+		return (0);
+
+	return (error);
+}
+
+static int
+ffs_susp_suspend(struct mount *mp)
+{
+	struct ufsmount *ump;
+	int error;
+
+	sx_assert(&ffs_susp_lock, SA_XLOCKED);
+
+	if (!ffs_own_mount(mp))
+		return (EINVAL);
+	if (ffs_susp_suspended(mp))
+		return (EBUSY);
+
+	ump = VFSTOUFS(mp);
+
+	/*
+	 * Make sure the calling thread is permitted to access the mounted
+	 * device.  The permissions can change after we unlock the vnode;
+	 * it's harmless.
+	 */
+	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
+	error = VOP_ACCESS(ump->um_devvp, VREAD | VWRITE,
+	    curthread->td_ucred, curthread);
+	VOP_UNLOCK(ump->um_devvp, 0);
+	if (error != 0)
+		return (error);
+#ifdef MAC
+	if (mac_mount_check_stat(curthread->td_ucred, mp) != 0)
+		return (EPERM);
+#endif
+
+	if ((error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT)) != 0)
+		return (error);
+
+	ump->um_writesuspended = 1;
+
+	return (0);
+}
+
+static void
+ffs_susp_dtor(void *data)
+{
+	struct fs *fs;
+	struct ufsmount *ump;
+	struct mount *mp;
+	int error;
+
+	sx_xlock(&ffs_susp_lock);
+
+	mp = (struct mount *)data;
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+
+	if (ffs_susp_suspended(mp) == 0) {
+		sx_xunlock(&ffs_susp_lock);
+		return;
+	}
+
+	KASSERT((mp->mnt_kern_flag & MNTK_SUSPEND) != 0,
+	    ("MNTK_SUSPEND not set"));
+
+	error = ffs_reload(mp, curthread, FFSR_FORCE | FFSR_UNSUSPEND);
+	if (error != 0)
+		panic("failed to unsuspend writes on %s", fs->fs_fsmnt);
+
+	/*
+	 * XXX: The status is kept per-process; the vfs_write_resume() routine
+	 * 	asserts that the resuming thread is the same one that called
+	 * 	vfs_write_suspend().  The cdevpriv data, however, is attached
+	 * 	to the file descriptor, e.g. is inherited during fork.  Thus,
+	 * 	it's possible that the resuming process will be different from
+	 * 	the one that started the suspension.
+	 *
+	 * 	Work around by fooling the check in vfs_write_resume().
+	 */
+	mp->mnt_susp_owner = curthread;
+
+	vfs_write_resume(mp, 0);
+	vfs_unbusy(mp);
+	ump->um_writesuspended = 0;
+
+	sx_xunlock(&ffs_susp_lock);
+}
+
+static int
+ffs_susp_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
+    struct thread *td)
+{
+	struct mount *mp;
+	fsid_t *fsidp;
+	int error;
+
+	/*
+	 * No suspend inside the jail.  Allowing it would require making
+	 * sure that e.g. the devfs ruleset for that jail permits access
+	 * to the devvp.
+	 */
+	if (jailed(td->td_ucred))
+		return (EPERM);
+
+	sx_xlock(&ffs_susp_lock);
+
+	switch (cmd) {
+	case UFSSUSPEND:
+		fsidp = (fsid_t *)addr;
+		mp = vfs_getvfs(fsidp);
+		if (mp == NULL) {
+			error = ENOENT;
+			break;
+		}
+		error = vfs_busy(mp, 0);
+		vfs_rel(mp);
+		if (error != 0)
+			break;
+		error = ffs_susp_suspend(mp);
+		if (error != 0) {
+			vfs_unbusy(mp);
+			break;
+		}
+		error = devfs_set_cdevpriv(mp, ffs_susp_dtor);
+		KASSERT(error == 0, ("devfs_set_cdevpriv failed"));
+		break;
+	case UFSRESUME:
+		error = devfs_get_cdevpriv((void **)&mp);
+		if (error != 0)
+			break;
+		/*
+		 * This calls ffs_susp_dtor, which in turn unsuspends the fs.
+		 * The dtor expects to be called without lock held, because
+		 * sometimes it's called from here, and sometimes due to the
+		 * file being closed or process exiting.
+		 */
+		sx_xunlock(&ffs_susp_lock);
+		devfs_clear_cdevpriv();
+		return (0);
+	default:
+		error = ENXIO;
+		break;
+	}
+
+	sx_xunlock(&ffs_susp_lock);
+
+	return (error);
+}
+
+void
+ffs_susp_initialize(void)
+{
+
+	sx_init(&ffs_susp_lock, "ffs_susp");
+	ffs_susp_dev = make_dev(&ffs_susp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
+	    "ufssuspend");
+}
+
+void
+ffs_susp_uninitialize(void)
+{
+
+	destroy_dev(ffs_susp_dev);
+	sx_destroy(&ffs_susp_lock);
+}
diff --git a/Dump/ufs/ffs/ffs_tables.c b/Dump/ufs/ffs/ffs_tables.c
new file mode 100644
index 0000000..ea4b15b
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_tables.c
@@ -0,0 +1,137 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_tables.c	8.1 (Berkeley) 6/11/93
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_tables.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include <sys/param.h>
+#include <ufs/ufs/dinode.h>
+#include <ufs/ffs/fs.h>
+
+/*
+ * Bit patterns for identifying fragments in the block map
+ * used as ((map & around) == inside)
+ */
+int around[9] = {
+	0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff
+};
+int inside[9] = {
+	0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe
+};
+
+/*
+ * Given a block map bit pattern, the frag tables tell whether a
+ * particular size fragment is available.
+ *
+ * used as:
+ * if ((1 << (size - 1)) & fragtbl[fs->fs_frag][map] {
+ *	at least one fragment of the indicated size is available
+ * }
+ *
+ * These tables are used by the scanc instruction on the VAX to
+ * quickly find an appropriate fragment.
+ */
+static u_char fragtbl124[256] = {
+	0x00, 0x16, 0x16, 0x2a, 0x16, 0x16, 0x26, 0x4e,
+	0x16, 0x16, 0x16, 0x3e, 0x2a, 0x3e, 0x4e, 0x8a,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x26, 0x36, 0x36, 0x2e, 0x36, 0x36, 0x26, 0x6e,
+	0x36, 0x36, 0x36, 0x3e, 0x2e, 0x3e, 0x6e, 0xae,
+	0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e,
+	0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe,
+	0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe,
+	0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e,
+	0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce,
+	0x8a, 0x9e, 0x9e, 0xaa, 0x9e, 0x9e, 0xae, 0xce,
+	0x9e, 0x9e, 0x9e, 0xbe, 0xaa, 0xbe, 0xce, 0x8a,
+};
+
+static u_char fragtbl8[256] = {
+	0x00, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x04,
+	0x01, 0x01, 0x01, 0x03, 0x02, 0x03, 0x04, 0x08,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x02, 0x03, 0x03, 0x02, 0x04, 0x05, 0x08, 0x10,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+	0x04, 0x05, 0x05, 0x06, 0x08, 0x09, 0x10, 0x20,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11,
+	0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+	0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a,
+	0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04,
+	0x08, 0x09, 0x09, 0x0a, 0x10, 0x11, 0x20, 0x40,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07,
+	0x05, 0x05, 0x05, 0x07, 0x09, 0x09, 0x11, 0x21,
+	0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+	0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a,
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07,
+	0x02, 0x03, 0x03, 0x02, 0x06, 0x07, 0x0a, 0x12,
+	0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04,
+	0x05, 0x05, 0x05, 0x07, 0x06, 0x07, 0x04, 0x0c,
+	0x08, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x0a, 0x0c,
+	0x10, 0x11, 0x11, 0x12, 0x20, 0x21, 0x40, 0x80,
+};
+
+/*
+ * The actual fragtbl array.
+ */
+u_char *fragtbl[MAXFRAG + 1] = {
+	0, fragtbl124, fragtbl124, 0, fragtbl124, 0, 0, 0, fragtbl8,
+};
diff --git a/Dump/ufs/ffs/ffs_vfsops.c b/Dump/ufs/ffs/ffs_vfsops.c
new file mode 100644
index 0000000..b3d822a
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_vfsops.c
@@ -0,0 +1,2289 @@
+/*-
+ * Copyright (c) 1989, 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_vfsops.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include "opt_quota.h"
+#include "opt_ufs.h"
+#include "opt_ffs.h"
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/taskqueue.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/vmmeter.h>
+
+#include <security/mac/mac_framework.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/gjournal.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+#include <vm/vm.h>
+#include <vm/uma.h>
+#include <vm/vm_page.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <ddb/ddb.h>
+
+static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
+
+static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
+static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
+		    ufs2_daddr_t);
+static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
+static int	ffs_sync_lazy(struct mount *mp);
+
+static vfs_init_t ffs_init;
+static vfs_uninit_t ffs_uninit;
+static vfs_extattrctl_t ffs_extattrctl;
+static vfs_cmount_t ffs_cmount;
+static vfs_unmount_t ffs_unmount;
+static vfs_mount_t ffs_mount;
+static vfs_statfs_t ffs_statfs;
+static vfs_fhtovp_t ffs_fhtovp;
+static vfs_sync_t ffs_sync;
+
+static struct vfsops ufs_vfsops = {
+	.vfs_extattrctl =	ffs_extattrctl,
+	.vfs_fhtovp =		ffs_fhtovp,
+	.vfs_init =		ffs_init,
+	.vfs_mount =		ffs_mount,
+	.vfs_cmount =		ffs_cmount,
+	.vfs_quotactl =		ufs_quotactl,
+	.vfs_root =		ufs_root,
+	.vfs_statfs =		ffs_statfs,
+	.vfs_sync =		ffs_sync,
+	.vfs_uninit =		ffs_uninit,
+	.vfs_unmount =		ffs_unmount,
+	.vfs_vget =		ffs_vget,
+	.vfs_susp_clean =	process_deferred_inactive,
+};
+
+VFS_SET(ufs_vfsops, ufs, 0);
+MODULE_VERSION(ufs, 1);
+
+static b_strategy_t ffs_geom_strategy;
+static b_write_t ffs_bufwrite;
+
+static struct buf_ops ffs_ops = {
+	.bop_name =	"FFS",
+	.bop_write =	ffs_bufwrite,
+	.bop_strategy =	ffs_geom_strategy,
+	.bop_sync =	bufsync,
+#ifdef NO_FFS_SNAPSHOT
+	.bop_bdflush =	bufbdflush,
+#else
+	.bop_bdflush =	ffs_bdflush,
+#endif
+};
+
+/*
+ * Note that userquota and groupquota options are not currently used
+ * by UFS/FFS code and generally mount(8) does not pass those options
+ * from userland, but they can be passed by loader(8) via
+ * vfs.root.mountfrom.options.
+ */
+static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
+    "noclusterw", "noexec", "export", "force", "from", "groupquota",
+    "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
+    "nosymfollow", "sync", "union", "userquota", NULL };
+
+static int
+ffs_mount(struct mount *mp)
+{
+	struct vnode *devvp;
+	struct thread *td;
+	struct ufsmount *ump = NULL;
+	struct fs *fs;
+	pid_t fsckpid = 0;
+	int error, error1, flags;
+	uint64_t mntorflags;
+	accmode_t accmode;
+	struct nameidata ndp;
+	char *fspec;
+
+	td = curthread;
+	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
+		return (EINVAL);
+	if (uma_inode == NULL) {
+		uma_inode = uma_zcreate("FFS inode",
+		    sizeof(struct inode), NULL, NULL, NULL, NULL,
+		    UMA_ALIGN_PTR, 0);
+		uma_ufs1 = uma_zcreate("FFS1 dinode",
+		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
+		    UMA_ALIGN_PTR, 0);
+		uma_ufs2 = uma_zcreate("FFS2 dinode",
+		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
+		    UMA_ALIGN_PTR, 0);
+	}
+
+	vfs_deleteopt(mp->mnt_optnew, "groupquota");
+	vfs_deleteopt(mp->mnt_optnew, "userquota");
+
+	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
+	if (error)
+		return (error);
+
+	mntorflags = 0;
+	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
+		mntorflags |= MNT_ACLS;
+
+	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
+		mntorflags |= MNT_SNAPSHOT;
+		/*
+		 * Once we have set the MNT_SNAPSHOT flag, do not
+		 * persist "snapshot" in the options list.
+		 */
+		vfs_deleteopt(mp->mnt_optnew, "snapshot");
+		vfs_deleteopt(mp->mnt_opt, "snapshot");
+	}
+
+	if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 &&
+	    vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) {
+		/*
+		 * Once we have set the restricted PID, do not
+		 * persist "fsckpid" in the options list.
+		 */
+		vfs_deleteopt(mp->mnt_optnew, "fsckpid");
+		vfs_deleteopt(mp->mnt_opt, "fsckpid");
+		if (mp->mnt_flag & MNT_UPDATE) {
+			if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 &&
+			     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
+				vfs_mount_error(mp,
+				    "Checker enable: Must be read-only");
+				return (EINVAL);
+			}
+		} else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
+			vfs_mount_error(mp,
+			    "Checker enable: Must be read-only");
+			return (EINVAL);
+		}
+		/* Set to -1 if we are done */
+		if (fsckpid == 0)
+			fsckpid = -1;
+	}
+
+	if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
+		if (mntorflags & MNT_ACLS) {
+			vfs_mount_error(mp,
+			    "\"acls\" and \"nfsv4acls\" options "
+			    "are mutually exclusive");
+			return (EINVAL);
+		}
+		mntorflags |= MNT_NFS4ACLS;
+	}
+
+	MNT_ILOCK(mp);
+	mp->mnt_flag |= mntorflags;
+	MNT_IUNLOCK(mp);
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		ump = VFSTOUFS(mp);
+		fs = ump->um_fs;
+		devvp = ump->um_devvp;
+		if (fsckpid == -1 && ump->um_fsckpid > 0) {
+			if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 ||
+			    (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0)
+				return (error);
+			g_topology_lock();
+			/*
+			 * Return to normal read-only mode.
+			 */
+			error = g_access(ump->um_cp, 0, -1, 0);
+			g_topology_unlock();
+			ump->um_fsckpid = 0;
+		}
+		if (fs->fs_ronly == 0 &&
+		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
+			/*
+			 * Flush any dirty data and suspend filesystem.
+			 */
+			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
+				return (error);
+			error = vfs_write_suspend_umnt(mp);
+			if (error != 0)
+				return (error);
+			/*
+			 * Check for and optionally get rid of files open
+			 * for writing.
+			 */
+			flags = WRITECLOSE;
+			if (mp->mnt_flag & MNT_FORCE)
+				flags |= FORCECLOSE;
+			if (MOUNTEDSOFTDEP(mp)) {
+				error = softdep_flushfiles(mp, flags, td);
+			} else {
+				error = ffs_flushfiles(mp, flags, td);
+			}
+			if (error) {
+				vfs_write_resume(mp, 0);
+				return (error);
+			}
+			if (fs->fs_pendingblocks != 0 ||
+			    fs->fs_pendinginodes != 0) {
+				printf("WARNING: %s Update error: blocks %jd "
+				    "files %d\n", fs->fs_fsmnt, 
+				    (intmax_t)fs->fs_pendingblocks,
+				    fs->fs_pendinginodes);
+				fs->fs_pendingblocks = 0;
+				fs->fs_pendinginodes = 0;
+			}
+			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
+				fs->fs_clean = 1;
+			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
+				fs->fs_ronly = 0;
+				fs->fs_clean = 0;
+				vfs_write_resume(mp, 0);
+				return (error);
+			}
+			if (MOUNTEDSOFTDEP(mp))
+				softdep_unmount(mp);
+			g_topology_lock();
+			/*
+			 * Drop our write and exclusive access.
+			 */
+			g_access(ump->um_cp, 0, -1, -1);
+			g_topology_unlock();
+			fs->fs_ronly = 1;
+			MNT_ILOCK(mp);
+			mp->mnt_flag |= MNT_RDONLY;
+			MNT_IUNLOCK(mp);
+			/*
+			 * Allow the writers to note that filesystem
+			 * is ro now.
+			 */
+			vfs_write_resume(mp, 0);
+		}
+		if ((mp->mnt_flag & MNT_RELOAD) &&
+		    (error = ffs_reload(mp, td, 0)) != 0)
+			return (error);
+		if (fs->fs_ronly &&
+		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
+			/*
+			 * If we are running a checker, do not allow upgrade.
+			 */
+			if (ump->um_fsckpid > 0) {
+				vfs_mount_error(mp,
+				    "Active checker, cannot upgrade to write");
+				return (EINVAL);
+			}
+			/*
+			 * If upgrade to read-write by non-root, then verify
+			 * that user has necessary permissions on the device.
+			 */
+			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+			error = VOP_ACCESS(devvp, VREAD | VWRITE,
+			    td->td_ucred, td);
+			if (error)
+				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
+			if (error) {
+				VOP_UNLOCK(devvp, 0);
+				return (error);
+			}
+			VOP_UNLOCK(devvp, 0);
+			fs->fs_flags &= ~FS_UNCLEAN;
+			if (fs->fs_clean == 0) {
+				fs->fs_flags |= FS_UNCLEAN;
+				if ((mp->mnt_flag & MNT_FORCE) ||
+				    ((fs->fs_flags &
+				     (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
+				     (fs->fs_flags & FS_DOSOFTDEP))) {
+					printf("WARNING: %s was not properly "
+					   "dismounted\n", fs->fs_fsmnt);
+				} else {
+					vfs_mount_error(mp,
+					   "R/W mount of %s denied. %s.%s",
+					   fs->fs_fsmnt,
+					   "Filesystem is not clean - run fsck",
+					   (fs->fs_flags & FS_SUJ) == 0 ? "" :
+					   " Forced mount will invalidate"
+					   " journal contents");
+					return (EPERM);
+				}
+			}
+			g_topology_lock();
+			/*
+			 * Request exclusive write access.
+			 */
+			error = g_access(ump->um_cp, 0, 1, 1);
+			g_topology_unlock();
+			if (error)
+				return (error);
+			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
+				return (error);
+			fs->fs_ronly = 0;
+			MNT_ILOCK(mp);
+			mp->mnt_flag &= ~MNT_RDONLY;
+			MNT_IUNLOCK(mp);
+			fs->fs_mtime = time_second;
+			/* check to see if we need to start softdep */
+			if ((fs->fs_flags & FS_DOSOFTDEP) &&
+			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
+				vn_finished_write(mp);
+				return (error);
+			}
+			fs->fs_clean = 0;
+			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
+				vn_finished_write(mp);
+				return (error);
+			}
+			if (fs->fs_snapinum[0] != 0)
+				ffs_snapshot_mount(mp);
+			vn_finished_write(mp);
+		}
+		/*
+		 * Soft updates is incompatible with "async",
+		 * so if we are doing softupdates stop the user
+		 * from setting the async flag in an update.
+		 * Softdep_mount() clears it in an initial mount
+		 * or ro->rw remount.
+		 */
+		if (MOUNTEDSOFTDEP(mp)) {
+			/* XXX: Reset too late ? */
+			MNT_ILOCK(mp);
+			mp->mnt_flag &= ~MNT_ASYNC;
+			MNT_IUNLOCK(mp);
+		}
+		/*
+		 * Keep MNT_ACLS flag if it is stored in superblock.
+		 */
+		if ((fs->fs_flags & FS_ACLS) != 0) {
+			/* XXX: Set too late ? */
+			MNT_ILOCK(mp);
+			mp->mnt_flag |= MNT_ACLS;
+			MNT_IUNLOCK(mp);
+		}
+
+		if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
+			/* XXX: Set too late ? */
+			MNT_ILOCK(mp);
+			mp->mnt_flag |= MNT_NFS4ACLS;
+			MNT_IUNLOCK(mp);
+		}
+		/*
+		 * If this is a request from fsck to clean up the filesystem,
+		 * then allow the specified pid to proceed.
+		 */
+		if (fsckpid > 0) {
+			if (ump->um_fsckpid != 0) {
+				vfs_mount_error(mp,
+				    "Active checker already running on %s",
+				    fs->fs_fsmnt);
+				return (EINVAL);
+			}
+			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
+			    ("soft updates enabled on read-only file system"));
+			g_topology_lock();
+			/*
+			 * Request write access.
+			 */
+			error = g_access(ump->um_cp, 0, 1, 0);
+			g_topology_unlock();
+			if (error) {
+				vfs_mount_error(mp,
+				    "Checker activation failed on %s",
+				    fs->fs_fsmnt);
+				return (error);
+			}
+			ump->um_fsckpid = fsckpid;
+			if (fs->fs_snapinum[0] != 0)
+				ffs_snapshot_mount(mp);
+			fs->fs_mtime = time_second;
+			fs->fs_fmod = 1;
+			fs->fs_clean = 0;
+			(void) ffs_sbupdate(ump, MNT_WAIT, 0);
+		}
+
+		/*
+		 * If this is a snapshot request, take the snapshot.
+		 */
+		if (mp->mnt_flag & MNT_SNAPSHOT)
+			return (ffs_snapshot(mp, fspec));
+
+		/*
+		 * Must not call namei() while owning busy ref.
+		 */
+		vfs_unbusy(mp);
+	}
+
+	/*
+	 * Not an update, or updating the name: look up the name
+	 * and verify that it refers to a sensible disk device.
+	 */
+	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
+	error = namei(&ndp);
+	if ((mp->mnt_flag & MNT_UPDATE) != 0) {
+		/*
+		 * Unmount does not start if MNT_UPDATE is set.  Mount
+		 * update busies mp before setting MNT_UPDATE.  We
+		 * must be able to retain our busy ref succesfully,
+		 * without sleep.
+		 */
+		error1 = vfs_busy(mp, MBF_NOWAIT);
+		MPASS(error1 == 0);
+	}
+	if (error != 0)
+		return (error);
+	NDFREE(&ndp, NDF_ONLY_PNBUF);
+	devvp = ndp.ni_vp;
+	if (!vn_isdisk(devvp, &error)) {
+		vput(devvp);
+		return (error);
+	}
+
+	/*
+	 * If mount by non-root, then verify that user has necessary
+	 * permissions on the device.
+	 */
+	accmode = VREAD;
+	if ((mp->mnt_flag & MNT_RDONLY) == 0)
+		accmode |= VWRITE;
+	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
+	if (error)
+		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
+	if (error) {
+		vput(devvp);
+		return (error);
+	}
+
+	if (mp->mnt_flag & MNT_UPDATE) {
+		/*
+		 * Update only
+		 *
+		 * If it's not the same vnode, or at least the same device
+		 * then it's not correct.
+		 */
+
+		if (devvp->v_rdev != ump->um_devvp->v_rdev)
+			error = EINVAL;	/* needs translation */
+		vput(devvp);
+		if (error)
+			return (error);
+	} else {
+		/*
+		 * New mount
+		 *
+		 * We need the name for the mount point (also used for
+		 * "last mounted on") copied in. If an error occurs,
+		 * the mount point is discarded by the upper level code.
+		 * Note that vfs_mount_alloc() populates f_mntonname for us.
+		 */
+		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
+			vrele(devvp);
+			return (error);
+		}
+		if (fsckpid > 0) {
+			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
+			    ("soft updates enabled on read-only file system"));
+			ump = VFSTOUFS(mp);
+			fs = ump->um_fs;
+			g_topology_lock();
+			/*
+			 * Request write access.
+			 */
+			error = g_access(ump->um_cp, 0, 1, 0);
+			g_topology_unlock();
+			if (error) {
+				printf("WARNING: %s: Checker activation "
+				    "failed\n", fs->fs_fsmnt);
+			} else { 
+				ump->um_fsckpid = fsckpid;
+				if (fs->fs_snapinum[0] != 0)
+					ffs_snapshot_mount(mp);
+				fs->fs_mtime = time_second;
+				fs->fs_clean = 0;
+				(void) ffs_sbupdate(ump, MNT_WAIT, 0);
+			}
+		}
+	}
+	vfs_mountedfrom(mp, fspec);
+	return (0);
+}
+
+/*
+ * Compatibility with old mount system call.
+ */
+
+static int
+ffs_cmount(struct mntarg *ma, void *data, uint64_t flags)
+{
+	struct ufs_args args;
+	struct export_args exp;
+	int error;
+
+	if (data == NULL)
+		return (EINVAL);
+	error = copyin(data, &args, sizeof args);
+	if (error)
+		return (error);
+	vfs_oexport_conv(&args.export, &exp);
+
+	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
+	ma = mount_arg(ma, "export", &exp, sizeof(exp));
+	error = kernel_mount(ma, flags);
+
+	return (error);
+}
+
+/*
+ * Reload all incore data for a filesystem (used after running fsck on
+ * the root filesystem and finding things to fix). If the 'force' flag
+ * is 0, the filesystem must be mounted read-only.
+ *
+ * Things to do to update the mount:
+ *	1) invalidate all cached meta-data.
+ *	2) re-read superblock from disk.
+ *	3) re-read summary information from disk.
+ *	4) invalidate all inactive vnodes.
+ *	5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary
+ *	   writers, if requested.
+ *	6) invalidate all cached file data.
+ *	7) re-read inode data for all active vnodes.
+ */
+int
+ffs_reload(struct mount *mp, struct thread *td, int flags)
+{
+	struct vnode *vp, *mvp, *devvp;
+	struct inode *ip;
+	void *space;
+	struct buf *bp;
+	struct fs *fs, *newfs;
+	struct ufsmount *ump;
+	ufs2_daddr_t sblockloc;
+	int i, blks, error;
+	u_long size;
+	int32_t *lp;
+
+	ump = VFSTOUFS(mp);
+
+	MNT_ILOCK(mp);
+	if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) {
+		MNT_IUNLOCK(mp);
+		return (EINVAL);
+	}
+	MNT_IUNLOCK(mp);
+	
+	/*
+	 * Step 1: invalidate all cached meta-data.
+	 */
+	devvp = VFSTOUFS(mp)->um_devvp;
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+	if (vinvalbuf(devvp, 0, 0, 0) != 0)
+		panic("ffs_reload: dirty1");
+	VOP_UNLOCK(devvp, 0);
+
+	/*
+	 * Step 2: re-read superblock from disk.
+	 */
+	fs = VFSTOUFS(mp)->um_fs;
+	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
+	    NOCRED, &bp)) != 0)
+		return (error);
+	newfs = (struct fs *)bp->b_data;
+	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
+	     newfs->fs_magic != FS_UFS2_MAGIC) ||
+	    newfs->fs_bsize > MAXBSIZE ||
+	    newfs->fs_bsize < sizeof(struct fs)) {
+			brelse(bp);
+			return (EIO);		/* XXX needs translation */
+	}
+	/*
+	 * Copy pointer fields back into superblock before copying in	XXX
+	 * new superblock. These should really be in the ufsmount.	XXX
+	 * Note that important parameters (eg fs_ncg) are unchanged.
+	 */
+	newfs->fs_csp = fs->fs_csp;
+	newfs->fs_maxcluster = fs->fs_maxcluster;
+	newfs->fs_contigdirs = fs->fs_contigdirs;
+	newfs->fs_active = fs->fs_active;
+	newfs->fs_ronly = fs->fs_ronly;
+	sblockloc = fs->fs_sblockloc;
+	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
+	brelse(bp);
+	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
+	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
+	UFS_LOCK(ump);
+	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
+		printf("WARNING: %s: reload pending error: blocks %jd "
+		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
+		    fs->fs_pendinginodes);
+		fs->fs_pendingblocks = 0;
+		fs->fs_pendinginodes = 0;
+	}
+	UFS_UNLOCK(ump);
+
+	/*
+	 * Step 3: re-read summary information from disk.
+	 */
+	size = fs->fs_cssize;
+	blks = howmany(size, fs->fs_fsize);
+	if (fs->fs_contigsumsize > 0)
+		size += fs->fs_ncg * sizeof(int32_t);
+	size += fs->fs_ncg * sizeof(u_int8_t);
+	free(fs->fs_csp, M_UFSMNT);
+	space = malloc(size, M_UFSMNT, M_WAITOK);
+	fs->fs_csp = space;
+	for (i = 0; i < blks; i += fs->fs_frag) {
+		size = fs->fs_bsize;
+		if (i + fs->fs_frag > blks)
+			size = (blks - i) * fs->fs_fsize;
+		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
+		    NOCRED, &bp);
+		if (error)
+			return (error);
+		bcopy(bp->b_data, space, (u_int)size);
+		space = (char *)space + size;
+		brelse(bp);
+	}
+	/*
+	 * We no longer know anything about clusters per cylinder group.
+	 */
+	if (fs->fs_contigsumsize > 0) {
+		fs->fs_maxcluster = lp = space;
+		for (i = 0; i < fs->fs_ncg; i++)
+			*lp++ = fs->fs_contigsumsize;
+		space = lp;
+	}
+	size = fs->fs_ncg * sizeof(u_int8_t);
+	fs->fs_contigdirs = (u_int8_t *)space;
+	bzero(fs->fs_contigdirs, size);
+	if ((flags & FFSR_UNSUSPEND) != 0) {
+		MNT_ILOCK(mp);
+		mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
+		wakeup(&mp->mnt_flag);
+		MNT_IUNLOCK(mp);
+	}
+
+loop:
+	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
+		/*
+		 * Skip syncer vnode.
+		 */
+		if (vp->v_type == VNON) {
+			VI_UNLOCK(vp);
+			continue;
+		}
+		/*
+		 * Step 4: invalidate all cached file data.
+		 */
+		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+			goto loop;
+		}
+		if (vinvalbuf(vp, 0, 0, 0))
+			panic("ffs_reload: dirty2");
+		/*
+		 * Step 5: re-read inode data for all active vnodes.
+		 */
+		ip = VTOI(vp);
+		error =
+		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+		    (int)fs->fs_bsize, NOCRED, &bp);
+		if (error) {
+			VOP_UNLOCK(vp, 0);
+			vrele(vp);
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+			return (error);
+		}
+		ffs_load_inode(bp, ip, fs, ip->i_number);
+		ip->i_effnlink = ip->i_nlink;
+		brelse(bp);
+		VOP_UNLOCK(vp, 0);
+		vrele(vp);
+	}
+	return (0);
+}
+
+/*
+ * Possible superblock locations ordered from most to least likely.
+ */
+static int sblock_try[] = SBLOCKSEARCH;
+
+/*
+ * Common code for mount and mountroot
+ */
+static int
+ffs_mountfs(devvp, mp, td)
+	struct vnode *devvp;
+	struct mount *mp;
+	struct thread *td;
+{
+	struct ufsmount *ump;
+	struct buf *bp;
+	struct fs *fs;
+	struct cdev *dev;
+	void *space;
+	ufs2_daddr_t sblockloc;
+	int error, i, blks, len, ronly;
+	u_long size;
+	int32_t *lp;
+	struct ucred *cred;
+	struct g_consumer *cp;
+	struct mount *nmp;
+
+	bp = NULL;
+	ump = NULL;
+	cred = td ? td->td_ucred : NOCRED;
+	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+
+	KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
+	dev = devvp->v_rdev;
+	if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
+	    (uintptr_t)mp) == 0) {
+		VOP_UNLOCK(devvp, 0);
+		return (EBUSY);
+	}
+	g_topology_lock();
+	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
+	g_topology_unlock();
+	if (error != 0) {
+		atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
+		VOP_UNLOCK(devvp, 0);
+		return (error);
+	}
+	dev_ref(dev);
+	devvp->v_bufobj.bo_ops = &ffs_ops;
+	VOP_UNLOCK(devvp, 0);
+	if (dev->si_iosize_max != 0)
+		mp->mnt_iosize_max = dev->si_iosize_max;
+	if (mp->mnt_iosize_max > MAXPHYS)
+		mp->mnt_iosize_max = MAXPHYS;
+
+	fs = NULL;
+	sblockloc = 0;
+	/*
+	 * Try reading the superblock in each of its possible locations.
+	 */
+	for (i = 0; sblock_try[i] != -1; i++) {
+		if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
+			error = EINVAL;
+			vfs_mount_error(mp,
+			    "Invalid sectorsize %d for superblock size %d",
+			    cp->provider->sectorsize, SBLOCKSIZE);
+			goto out;
+		}
+		if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
+		    cred, &bp)) != 0)
+			goto out;
+		fs = (struct fs *)bp->b_data;
+		sblockloc = sblock_try[i];
+		if ((fs->fs_magic == FS_UFS1_MAGIC ||
+		     (fs->fs_magic == FS_UFS2_MAGIC &&
+		      (fs->fs_sblockloc == sblockloc ||
+		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
+		    fs->fs_bsize <= MAXBSIZE &&
+		    fs->fs_bsize >= sizeof(struct fs))
+			break;
+		brelse(bp);
+		bp = NULL;
+	}
+	if (sblock_try[i] == -1) {
+		error = EINVAL;		/* XXX needs translation */
+		goto out;
+	}
+	fs->fs_fmod = 0;
+	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indices */
+	fs->fs_flags &= ~FS_UNCLEAN;
+	if (fs->fs_clean == 0) {
+		fs->fs_flags |= FS_UNCLEAN;
+		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
+		    ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
+		     (fs->fs_flags & FS_DOSOFTDEP))) {
+			printf("WARNING: %s was not properly dismounted\n",
+			    fs->fs_fsmnt);
+		} else {
+			vfs_mount_error(mp, "R/W mount of %s denied. %s%s",
+			    fs->fs_fsmnt, "Filesystem is not clean - run fsck.",
+			    (fs->fs_flags & FS_SUJ) == 0 ? "" :
+			    " Forced mount will invalidate journal contents");
+			error = EPERM;
+			goto out;
+		}
+		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
+		    (mp->mnt_flag & MNT_FORCE)) {
+			printf("WARNING: %s: lost blocks %jd files %d\n",
+			    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
+			    fs->fs_pendinginodes);
+			fs->fs_pendingblocks = 0;
+			fs->fs_pendinginodes = 0;
+		}
+	}
+	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
+		printf("WARNING: %s: mount pending error: blocks %jd "
+		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
+		    fs->fs_pendinginodes);
+		fs->fs_pendingblocks = 0;
+		fs->fs_pendinginodes = 0;
+	}
+	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
+#ifdef UFS_GJOURNAL
+		/*
+		 * Get journal provider name.
+		 */
+		len = 1024;
+		mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK);
+		if (g_io_getattr("GJOURNAL::provider", cp, &len,
+		    mp->mnt_gjprovider) == 0) {
+			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len,
+			    M_UFSMNT, M_WAITOK);
+			MNT_ILOCK(mp);
+			mp->mnt_flag |= MNT_GJOURNAL;
+			MNT_IUNLOCK(mp);
+		} else {
+			printf("WARNING: %s: GJOURNAL flag on fs "
+			    "but no gjournal provider below\n",
+			    mp->mnt_stat.f_mntonname);
+			free(mp->mnt_gjprovider, M_UFSMNT);
+			mp->mnt_gjprovider = NULL;
+		}
+#else
+		printf("WARNING: %s: GJOURNAL flag on fs but no "
+		    "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname);
+#endif
+	} else {
+		mp->mnt_gjprovider = NULL;
+	}
+	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
+	ump->um_cp = cp;
+	ump->um_bo = &devvp->v_bufobj;
+	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
+	if (fs->fs_magic == FS_UFS1_MAGIC) {
+		ump->um_fstype = UFS1;
+		ump->um_balloc = ffs_balloc_ufs1;
+	} else {
+		ump->um_fstype = UFS2;
+		ump->um_balloc = ffs_balloc_ufs2;
+	}
+	ump->um_blkatoff = ffs_blkatoff;
+	ump->um_truncate = ffs_truncate;
+	ump->um_update = ffs_update;
+	ump->um_valloc = ffs_valloc;
+	ump->um_vfree = ffs_vfree;
+	ump->um_ifree = ffs_ifree;
+	ump->um_rdonly = ffs_rdonly;
+	ump->um_snapgone = ffs_snapgone;
+	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
+	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
+	if (fs->fs_sbsize < SBLOCKSIZE)
+		bp->b_flags |= B_INVAL | B_NOCACHE;
+	brelse(bp);
+	bp = NULL;
+	fs = ump->um_fs;
+	ffs_oldfscompat_read(fs, ump, sblockloc);
+	fs->fs_ronly = ronly;
+	size = fs->fs_cssize;
+	blks = howmany(size, fs->fs_fsize);
+	if (fs->fs_contigsumsize > 0)
+		size += fs->fs_ncg * sizeof(int32_t);
+	size += fs->fs_ncg * sizeof(u_int8_t);
+	space = malloc(size, M_UFSMNT, M_WAITOK);
+	fs->fs_csp = space;
+	for (i = 0; i < blks; i += fs->fs_frag) {
+		size = fs->fs_bsize;
+		if (i + fs->fs_frag > blks)
+			size = (blks - i) * fs->fs_fsize;
+		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
+		    cred, &bp)) != 0) {
+			free(fs->fs_csp, M_UFSMNT);
+			goto out;
+		}
+		bcopy(bp->b_data, space, (u_int)size);
+		space = (char *)space + size;
+		brelse(bp);
+		bp = NULL;
+	}
+	if (fs->fs_contigsumsize > 0) {
+		fs->fs_maxcluster = lp = space;
+		for (i = 0; i < fs->fs_ncg; i++)
+			*lp++ = fs->fs_contigsumsize;
+		space = lp;
+	}
+	size = fs->fs_ncg * sizeof(u_int8_t);
+	fs->fs_contigdirs = (u_int8_t *)space;
+	bzero(fs->fs_contigdirs, size);
+	fs->fs_active = NULL;
+	mp->mnt_data = ump;
+	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
+	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
+	nmp = NULL;
+	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
+	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
+		if (nmp)
+			vfs_rel(nmp);
+		vfs_getnewfsid(mp);
+	}
+	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
+	MNT_ILOCK(mp);
+	mp->mnt_flag |= MNT_LOCAL;
+	MNT_IUNLOCK(mp);
+	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
+#ifdef MAC
+		MNT_ILOCK(mp);
+		mp->mnt_flag |= MNT_MULTILABEL;
+		MNT_IUNLOCK(mp);
+#else
+		printf("WARNING: %s: multilabel flag on fs but "
+		    "no MAC support\n", mp->mnt_stat.f_mntonname);
+#endif
+	}
+	if ((fs->fs_flags & FS_ACLS) != 0) {
+#ifdef UFS_ACL
+		MNT_ILOCK(mp);
+
+		if (mp->mnt_flag & MNT_NFS4ACLS)
+			printf("WARNING: %s: ACLs flag on fs conflicts with "
+			    "\"nfsv4acls\" mount option; option ignored\n",
+			    mp->mnt_stat.f_mntonname);
+		mp->mnt_flag &= ~MNT_NFS4ACLS;
+		mp->mnt_flag |= MNT_ACLS;
+
+		MNT_IUNLOCK(mp);
+#else
+		printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
+		    mp->mnt_stat.f_mntonname);
+#endif
+	}
+	if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
+#ifdef UFS_ACL
+		MNT_ILOCK(mp);
+
+		if (mp->mnt_flag & MNT_ACLS)
+			printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts "
+			    "with \"acls\" mount option; option ignored\n",
+			    mp->mnt_stat.f_mntonname);
+		mp->mnt_flag &= ~MNT_ACLS;
+		mp->mnt_flag |= MNT_NFS4ACLS;
+
+		MNT_IUNLOCK(mp);
+#else
+		printf("WARNING: %s: NFSv4 ACLs flag on fs but no "
+		    "ACLs support\n", mp->mnt_stat.f_mntonname);
+#endif
+	}
+	if ((fs->fs_flags & FS_TRIM) != 0) {
+		len = sizeof(int);
+		if (g_io_getattr("GEOM::candelete", cp, &len,
+		    &ump->um_candelete) == 0) {
+			if (!ump->um_candelete)
+				printf("WARNING: %s: TRIM flag on fs but disk "
+				    "does not support TRIM\n",
+				    mp->mnt_stat.f_mntonname);
+		} else {
+			printf("WARNING: %s: TRIM flag on fs but disk does "
+			    "not confirm that it supports TRIM\n",
+			    mp->mnt_stat.f_mntonname);
+			ump->um_candelete = 0;
+		}
+		if (ump->um_candelete) {
+			ump->um_trim_tq = taskqueue_create("trim", M_WAITOK,
+			    taskqueue_thread_enqueue, &ump->um_trim_tq);
+			taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS,
+			    "%s trim", mp->mnt_stat.f_mntonname);
+		}
+	}
+
+	ump->um_mountp = mp;
+	ump->um_dev = dev;
+	ump->um_devvp = devvp;
+	ump->um_nindir = fs->fs_nindir;
+	ump->um_bptrtodb = fs->fs_fsbtodb;
+	ump->um_seqinc = fs->fs_frag;
+	for (i = 0; i < MAXQUOTAS; i++)
+		ump->um_quotas[i] = NULLVP;
+#ifdef UFS_EXTATTR
+	ufs_extattr_uepm_init(&ump->um_extattr);
+#endif
+	/*
+	 * Set FS local "last mounted on" information (NULL pad)
+	 */
+	bzero(fs->fs_fsmnt, MAXMNTLEN);
+	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
+	mp->mnt_stat.f_iosize = fs->fs_bsize;
+
+	if (mp->mnt_flag & MNT_ROOTFS) {
+		/*
+		 * Root mount; update timestamp in mount structure.
+		 * this will be used by the common root mount code
+		 * to update the system clock.
+		 */
+		mp->mnt_time = fs->fs_time;
+	}
+
+	if (ronly == 0) {
+		fs->fs_mtime = time_second;
+		if ((fs->fs_flags & FS_DOSOFTDEP) &&
+		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
+			free(fs->fs_csp, M_UFSMNT);
+			ffs_flushfiles(mp, FORCECLOSE, td);
+			goto out;
+		}
+		if (fs->fs_snapinum[0] != 0)
+			ffs_snapshot_mount(mp);
+		fs->fs_fmod = 1;
+		fs->fs_clean = 0;
+		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
+	}
+	/*
+	 * Initialize filesystem state information in mount struct.
+	 */
+	MNT_ILOCK(mp);
+	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
+	    MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE;
+	MNT_IUNLOCK(mp);
+#ifdef UFS_EXTATTR
+#ifdef UFS_EXTATTR_AUTOSTART
+	/*
+	 *
+	 * Auto-starting does the following:
+	 *	- check for /.attribute in the fs, and extattr_start if so
+	 *	- for each file in .attribute, enable that file with
+	 * 	  an attribute of the same name.
+	 * Not clear how to report errors -- probably eat them.
+	 * This would all happen while the filesystem was busy/not
+	 * available, so would effectively be "atomic".
+	 */
+	(void) ufs_extattr_autostart(mp, td);
+#endif /* !UFS_EXTATTR_AUTOSTART */
+#endif /* !UFS_EXTATTR */
+	return (0);
+out:
+	if (bp)
+		brelse(bp);
+	if (cp != NULL) {
+		g_topology_lock();
+		g_vfs_close(cp);
+		g_topology_unlock();
+	}
+	if (ump) {
+		mtx_destroy(UFS_MTX(ump));
+		if (mp->mnt_gjprovider != NULL) {
+			free(mp->mnt_gjprovider, M_UFSMNT);
+			mp->mnt_gjprovider = NULL;
+		}
+		free(ump->um_fs, M_UFSMNT);
+		free(ump, M_UFSMNT);
+		mp->mnt_data = NULL;
+	}
+	atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
+	dev_rel(dev);
+	return (error);
+}
+
+#include <sys/sysctl.h>
+static int bigcgs = 0;
+SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
+
+/*
+ * Sanity checks for loading old filesystem superblocks.
+ * See ffs_oldfscompat_write below for unwound actions.
+ *
+ * XXX - Parts get retired eventually.
+ * Unfortunately new bits get added.
+ */
+static void
+ffs_oldfscompat_read(fs, ump, sblockloc)
+	struct fs *fs;
+	struct ufsmount *ump;
+	ufs2_daddr_t sblockloc;
+{
+	off_t maxfilesize;
+
+	/*
+	 * If not yet done, update fs_flags location and value of fs_sblockloc.
+	 */
+	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
+		fs->fs_flags = fs->fs_old_flags;
+		fs->fs_old_flags |= FS_FLAGS_UPDATED;
+		fs->fs_sblockloc = sblockloc;
+	}
+	/*
+	 * If not yet done, update UFS1 superblock with new wider fields.
+	 */
+	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
+		fs->fs_maxbsize = fs->fs_bsize;
+		fs->fs_time = fs->fs_old_time;
+		fs->fs_size = fs->fs_old_size;
+		fs->fs_dsize = fs->fs_old_dsize;
+		fs->fs_csaddr = fs->fs_old_csaddr;
+		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
+		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
+		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
+		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
+	}
+	if (fs->fs_magic == FS_UFS1_MAGIC &&
+	    fs->fs_old_inodefmt < FS_44INODEFMT) {
+		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
+		fs->fs_qbmask = ~fs->fs_bmask;
+		fs->fs_qfmask = ~fs->fs_fmask;
+	}
+	if (fs->fs_magic == FS_UFS1_MAGIC) {
+		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
+		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
+		if (fs->fs_maxfilesize > maxfilesize)
+			fs->fs_maxfilesize = maxfilesize;
+	}
+	/* Compatibility for old filesystems */
+	if (fs->fs_avgfilesize <= 0)
+		fs->fs_avgfilesize = AVFILESIZ;
+	if (fs->fs_avgfpdir <= 0)
+		fs->fs_avgfpdir = AFPDIR;
+	if (bigcgs) {
+		fs->fs_save_cgsize = fs->fs_cgsize;
+		fs->fs_cgsize = fs->fs_bsize;
+	}
+}
+
+/*
+ * Unwinding superblock updates for old filesystems.
+ * See ffs_oldfscompat_read above for details.
+ *
+ * XXX - Parts get retired eventually.
+ * Unfortunately new bits get added.
+ */
+void
+ffs_oldfscompat_write(fs, ump)
+	struct fs *fs;
+	struct ufsmount *ump;
+{
+
+	/*
+	 * Copy back UFS2 updated fields that UFS1 inspects.
+	 */
+	if (fs->fs_magic == FS_UFS1_MAGIC) {
+		fs->fs_old_time = fs->fs_time;
+		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
+		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
+		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
+		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
+		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
+	}
+	if (bigcgs) {
+		fs->fs_cgsize = fs->fs_save_cgsize;
+		fs->fs_save_cgsize = 0;
+	}
+}
+
+/*
+ * unmount system call
+ */
+static int
+ffs_unmount(mp, mntflags)
+	struct mount *mp;
+	int mntflags;
+{
+	struct thread *td;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct fs *fs;
+	int error, flags, susp;
+#ifdef UFS_EXTATTR
+	int e_restart;
+#endif
+
+	flags = 0;
+	td = curthread;
+	fs = ump->um_fs;
+	susp = 0;
+	if (mntflags & MNT_FORCE) {
+		flags |= FORCECLOSE;
+		susp = fs->fs_ronly == 0;
+	}
+#ifdef UFS_EXTATTR
+	if ((error = ufs_extattr_stop(mp, td))) {
+		if (error != EOPNOTSUPP)
+			printf("WARNING: unmount %s: ufs_extattr_stop "
+			    "returned errno %d\n", mp->mnt_stat.f_mntonname,
+			    error);
+		e_restart = 0;
+	} else {
+		ufs_extattr_uepm_destroy(&ump->um_extattr);
+		e_restart = 1;
+	}
+#endif
+	if (susp) {
+		error = vfs_write_suspend_umnt(mp);
+		if (error != 0)
+			goto fail1;
+	}
+	if (MOUNTEDSOFTDEP(mp))
+		error = softdep_flushfiles(mp, flags, td);
+	else
+		error = ffs_flushfiles(mp, flags, td);
+	if (error != 0 && error != ENXIO)
+		goto fail;
+
+	UFS_LOCK(ump);
+	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
+		printf("WARNING: unmount %s: pending error: blocks %jd "
+		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
+		    fs->fs_pendinginodes);
+		fs->fs_pendingblocks = 0;
+		fs->fs_pendinginodes = 0;
+	}
+	UFS_UNLOCK(ump);
+	if (MOUNTEDSOFTDEP(mp))
+		softdep_unmount(mp);
+	if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
+		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
+		error = ffs_sbupdate(ump, MNT_WAIT, 0);
+		if (error && error != ENXIO) {
+			fs->fs_clean = 0;
+			goto fail;
+		}
+	}
+	if (susp)
+		vfs_write_resume(mp, VR_START_WRITE);
+	if (ump->um_trim_tq != NULL) {
+		while (ump->um_trim_inflight != 0)
+			pause("ufsutr", hz);
+		taskqueue_drain_all(ump->um_trim_tq);
+		taskqueue_free(ump->um_trim_tq);
+	}
+	g_topology_lock();
+	if (ump->um_fsckpid > 0) {
+		/*
+		 * Return to normal read-only mode.
+		 */
+		error = g_access(ump->um_cp, 0, -1, 0);
+		ump->um_fsckpid = 0;
+	}
+	g_vfs_close(ump->um_cp);
+	g_topology_unlock();
+	atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
+	vrele(ump->um_devvp);
+	dev_rel(ump->um_dev);
+	mtx_destroy(UFS_MTX(ump));
+	if (mp->mnt_gjprovider != NULL) {
+		free(mp->mnt_gjprovider, M_UFSMNT);
+		mp->mnt_gjprovider = NULL;
+	}
+	free(fs->fs_csp, M_UFSMNT);
+	free(fs, M_UFSMNT);
+	free(ump, M_UFSMNT);
+	mp->mnt_data = NULL;
+	MNT_ILOCK(mp);
+	mp->mnt_flag &= ~MNT_LOCAL;
+	MNT_IUNLOCK(mp);
+	if (td->td_su == mp) {
+		td->td_su = NULL;
+		vfs_rel(mp);
+	}
+	return (error);
+
+fail:
+	if (susp)
+		vfs_write_resume(mp, VR_START_WRITE);
+fail1:
+#ifdef UFS_EXTATTR
+	if (e_restart) {
+		ufs_extattr_uepm_init(&ump->um_extattr);
+#ifdef UFS_EXTATTR_AUTOSTART
+		(void) ufs_extattr_autostart(mp, td);
+#endif
+	}
+#endif
+
+	return (error);
+}
+
+/*
+ * Flush out all the files in a filesystem.
+ */
+int
+ffs_flushfiles(mp, flags, td)
+	struct mount *mp;
+	int flags;
+	struct thread *td;
+{
+	struct ufsmount *ump;
+	int qerror, error;
+
+	ump = VFSTOUFS(mp);
+	qerror = 0;
+#ifdef QUOTA
+	if (mp->mnt_flag & MNT_QUOTA) {
+		int i;
+		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
+		if (error)
+			return (error);
+		for (i = 0; i < MAXQUOTAS; i++) {
+			error = quotaoff(td, mp, i);
+			if (error != 0) {
+				if ((flags & EARLYFLUSH) == 0)
+					return (error);
+				else
+					qerror = error;
+			}
+		}
+
+		/*
+		 * Here we fall through to vflush again to ensure that
+		 * we have gotten rid of all the system vnodes, unless
+		 * quotas must not be closed.
+		 */
+	}
+#endif
+	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
+	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
+		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
+			return (error);
+		ffs_snapshot_unmount(mp);
+		flags |= FORCECLOSE;
+		/*
+		 * Here we fall through to vflush again to ensure
+		 * that we have gotten rid of all the system vnodes.
+		 */
+	}
+
+	/*
+	 * Do not close system files if quotas were not closed, to be
+	 * able to sync the remaining dquots.  The freeblks softupdate
+	 * workitems might hold a reference on a dquot, preventing
+	 * quotaoff() from completing.  Next round of
+	 * softdep_flushworklist() iteration should process the
+	 * blockers, allowing the next run of quotaoff() to finally
+	 * flush held dquots.
+	 *
+	 * Otherwise, flush all the files.
+	 */
+	if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0)
+		return (error);
+
+	/*
+	 * Flush filesystem metadata.
+	 */
+	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
+	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
+	VOP_UNLOCK(ump->um_devvp, 0);
+	return (error);
+}
+
+/*
+ * Get filesystem statistics.
+ */
+static int
+ffs_statfs(mp, sbp)
+	struct mount *mp;
+	struct statfs *sbp;
+{
+	struct ufsmount *ump;
+	struct fs *fs;
+
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
+		panic("ffs_statfs");
+	sbp->f_version = STATFS_VERSION;
+	sbp->f_bsize = fs->fs_fsize;
+	sbp->f_iosize = fs->fs_bsize;
+	sbp->f_blocks = fs->fs_dsize;
+	UFS_LOCK(ump);
+	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
+	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
+	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
+	    dbtofsb(fs, fs->fs_pendingblocks);
+	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
+	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
+	UFS_UNLOCK(ump);
+	sbp->f_namemax = NAME_MAX;
+	return (0);
+}
+
+static bool
+sync_doupdate(struct inode *ip)
+{
+
+	return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
+	    IN_UPDATE)) != 0);
+}
+
+/*
+ * For a lazy sync, we only care about access times, quotas and the
+ * superblock.  Other filesystem changes are already converted to
+ * cylinder group blocks or inode blocks updates and are written to
+ * disk by syncer.
+ */
+static int
+ffs_sync_lazy(mp)
+     struct mount *mp;
+{
+	struct vnode *mvp, *vp;
+	struct inode *ip;
+	struct thread *td;
+	int allerror, error;
+
+	allerror = 0;
+	td = curthread;
+	if ((mp->mnt_flag & MNT_NOATIME) != 0)
+		goto qupdate;
+	MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
+		if (vp->v_type == VNON) {
+			VI_UNLOCK(vp);
+			continue;
+		}
+		ip = VTOI(vp);
+
+		/*
+		 * The IN_ACCESS flag is converted to IN_MODIFIED by
+		 * ufs_close() and ufs_getattr() by the calls to
+		 * ufs_itimes_locked(), without subsequent UFS_UPDATE().
+		 * Test also all the other timestamp flags too, to pick up
+		 * any other cases that could be missed.
+		 */
+		if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) {
+			VI_UNLOCK(vp);
+			continue;
+		}
+		if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
+		    td)) != 0)
+			continue;
+		if (sync_doupdate(ip))
+			error = ffs_update(vp, 0);
+		if (error != 0)
+			allerror = error;
+		vput(vp);
+	}
+
+qupdate:
+#ifdef QUOTA
+	qsync(mp);
+#endif
+
+	if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 &&
+	    (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0)
+		allerror = error;
+	return (allerror);
+}
+
+/*
+ * Go through the disk queues to initiate sandbagged IO;
+ * go through the inodes to write those that have been modified;
+ * initiate the writing of the super block if it has been modified.
+ *
+ * Note: we are always called with the filesystem marked busy using
+ * vfs_busy().
+ */
+static int
+ffs_sync(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+	struct vnode *mvp, *vp, *devvp;
+	struct thread *td;
+	struct inode *ip;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct fs *fs;
+	int error, count, lockreq, allerror = 0;
+	int suspend;
+	int suspended;
+	int secondary_writes;
+	int secondary_accwrites;
+	int softdep_deps;
+	int softdep_accdeps;
+	struct bufobj *bo;
+
+	suspend = 0;
+	suspended = 0;
+	td = curthread;
+	fs = ump->um_fs;
+	if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
+		panic("%s: ffs_sync: modification on read-only filesystem",
+		    fs->fs_fsmnt);
+	if (waitfor == MNT_LAZY) {
+		if (!rebooting)
+			return (ffs_sync_lazy(mp));
+		waitfor = MNT_NOWAIT;
+	}
+
+	/*
+	 * Write back each (modified) inode.
+	 */
+	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
+	if (waitfor == MNT_SUSPEND) {
+		suspend = 1;
+		waitfor = MNT_WAIT;
+	}
+	if (waitfor == MNT_WAIT)
+		lockreq = LK_EXCLUSIVE;
+	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
+loop:
+	/* Grab snapshot of secondary write counts */
+	MNT_ILOCK(mp);
+	secondary_writes = mp->mnt_secondary_writes;
+	secondary_accwrites = mp->mnt_secondary_accwrites;
+	MNT_IUNLOCK(mp);
+
+	/* Grab snapshot of softdep dependency counts */
+	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
+
+	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
+		/*
+		 * Depend on the vnode interlock to keep things stable enough
+		 * for a quick test.  Since there might be hundreds of
+		 * thousands of vnodes, we cannot afford even a subroutine
+		 * call unless there's a good chance that we have work to do.
+		 */
+		if (vp->v_type == VNON) {
+			VI_UNLOCK(vp);
+			continue;
+		}
+		ip = VTOI(vp);
+		if ((ip->i_flag &
+		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
+		    vp->v_bufobj.bo_dirty.bv_cnt == 0) {
+			VI_UNLOCK(vp);
+			continue;
+		}
+		if ((error = vget(vp, lockreq, td)) != 0) {
+			if (error == ENOENT || error == ENOLCK) {
+				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+				goto loop;
+			}
+			continue;
+		}
+		if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0)
+			allerror = error;
+		vput(vp);
+	}
+	/*
+	 * Force stale filesystem control information to be flushed.
+	 */
+	if (waitfor == MNT_WAIT || rebooting) {
+		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
+			allerror = error;
+		/* Flushed work items may create new vnodes to clean */
+		if (allerror == 0 && count)
+			goto loop;
+	}
+#ifdef QUOTA
+	qsync(mp);
+#endif
+
+	devvp = ump->um_devvp;
+	bo = &devvp->v_bufobj;
+	BO_LOCK(bo);
+	if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
+		BO_UNLOCK(bo);
+		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+		error = VOP_FSYNC(devvp, waitfor, td);
+		VOP_UNLOCK(devvp, 0);
+		if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
+			error = ffs_sbupdate(ump, waitfor, 0);
+		if (error != 0)
+			allerror = error;
+		if (allerror == 0 && waitfor == MNT_WAIT)
+			goto loop;
+	} else if (suspend != 0) {
+		if (softdep_check_suspend(mp,
+					  devvp,
+					  softdep_deps,
+					  softdep_accdeps,
+					  secondary_writes,
+					  secondary_accwrites) != 0) {
+			MNT_IUNLOCK(mp);
+			goto loop;	/* More work needed */
+		}
+		mtx_assert(MNT_MTX(mp), MA_OWNED);
+		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
+		MNT_IUNLOCK(mp);
+		suspended = 1;
+	} else
+		BO_UNLOCK(bo);
+	/*
+	 * Write back modified superblock.
+	 */
+	if (fs->fs_fmod != 0 &&
+	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
+		allerror = error;
+	return (allerror);
+}
+
+int
+ffs_vget(mp, ino, flags, vpp)
+	struct mount *mp;
+	ino_t ino;
+	int flags;
+	struct vnode **vpp;
+{
+	return (ffs_vgetf(mp, ino, flags, vpp, 0));
+}
+
+int
+ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
+	struct mount *mp;
+	ino_t ino;
+	int flags;
+	struct vnode **vpp;
+	int ffs_flags;
+{
+	struct fs *fs;
+	struct inode *ip;
+	struct ufsmount *ump;
+	struct buf *bp;
+	struct vnode *vp;
+	int error;
+
+	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
+	if (error || *vpp != NULL)
+		return (error);
+
+	/*
+	 * We must promote to an exclusive lock for vnode creation.  This
+	 * can happen if lookup is passed LOCKSHARED.
+	 */
+	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
+		flags &= ~LK_TYPE_MASK;
+		flags |= LK_EXCLUSIVE;
+	}
+
+	/*
+	 * We do not lock vnode creation as it is believed to be too
+	 * expensive for such rare case as simultaneous creation of vnode
+	 * for same ino by different processes. We just allow them to race
+	 * and check later to decide who wins. Let the race begin!
+	 */
+
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
+
+	/* Allocate a new vnode/inode. */
+	error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
+	    &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
+	if (error) {
+		*vpp = NULL;
+		uma_zfree(uma_inode, ip);
+		return (error);
+	}
+	/*
+	 * FFS supports recursive locking.
+	 */
+	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
+	VN_LOCK_AREC(vp);
+	vp->v_data = ip;
+	vp->v_bufobj.bo_bsize = fs->fs_bsize;
+	ip->i_vnode = vp;
+	ip->i_ump = ump;
+	ip->i_number = ino;
+	ip->i_ea_refs = 0;
+	ip->i_nextclustercg = -1;
+	ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2;
+#ifdef QUOTA
+	{
+		int i;
+		for (i = 0; i < MAXQUOTAS; i++)
+			ip->i_dquot[i] = NODQUOT;
+	}
+#endif
+
+	if (ffs_flags & FFSV_FORCEINSMQ)
+		vp->v_vflag |= VV_FORCEINSMQ;
+	error = insmntque(vp, mp);
+	if (error != 0) {
+		uma_zfree(uma_inode, ip);
+		*vpp = NULL;
+		return (error);
+	}
+	vp->v_vflag &= ~VV_FORCEINSMQ;
+	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
+	if (error || *vpp != NULL)
+		return (error);
+
+	/* Read in the disk contents for the inode, copy into the inode. */
+	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
+	    (int)fs->fs_bsize, NOCRED, &bp);
+	if (error) {
+		/*
+		 * The inode does not contain anything useful, so it would
+		 * be misleading to leave it on its hash chain. With mode
+		 * still zero, it will be unlinked and returned to the free
+		 * list by vput().
+		 */
+		brelse(bp);
+		vput(vp);
+		*vpp = NULL;
+		return (error);
+	}
+	if (I_IS_UFS1(ip))
+		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
+	else
+		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
+	ffs_load_inode(bp, ip, fs, ino);
+	if (DOINGSOFTDEP(vp))
+		softdep_load_inodeblock(ip);
+	else
+		ip->i_effnlink = ip->i_nlink;
+	bqrelse(bp);
+
+	/*
+	 * Initialize the vnode from the inode, check for aliases.
+	 * Note that the underlying vnode may have changed.
+	 */
+	error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2,
+	    &vp);
+	if (error) {
+		vput(vp);
+		*vpp = NULL;
+		return (error);
+	}
+
+	/*
+	 * Finish inode initialization.
+	 */
+	if (vp->v_type != VFIFO) {
+		/* FFS supports shared locking for all files except fifos. */
+		VN_LOCK_ASHARE(vp);
+	}
+
+	/*
+	 * Set up a generation number for this inode if it does not
+	 * already have one. This should only happen on old filesystems.
+	 */
+	if (ip->i_gen == 0) {
+		while (ip->i_gen == 0)
+			ip->i_gen = arc4random();
+		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+			ip->i_flag |= IN_MODIFIED;
+			DIP_SET(ip, i_gen, ip->i_gen);
+		}
+	}
+#ifdef MAC
+	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
+		/*
+		 * If this vnode is already allocated, and we're running
+		 * multi-label, attempt to perform a label association
+		 * from the extended attributes on the inode.
+		 */
+		error = mac_vnode_associate_extattr(mp, vp);
+		if (error) {
+			/* ufs_inactive will release ip->i_devvp ref. */
+			vput(vp);
+			*vpp = NULL;
+			return (error);
+		}
+	}
+#endif
+
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is valid
+ * - for UFS2 check that the inode number is initialized
+ * - call ffs_vget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the given client host has export rights and return
+ *   those rights via. exflagsp and credanonp
+ */
+static int
+ffs_fhtovp(mp, fhp, flags, vpp)
+	struct mount *mp;
+	struct fid *fhp;
+	int flags;
+	struct vnode **vpp;
+{
+	struct ufid *ufhp;
+	struct ufsmount *ump;
+	struct fs *fs;
+	struct cg *cgp;
+	struct buf *bp;
+	ino_t ino;
+	u_int cg;
+	int error;
+
+	ufhp = (struct ufid *)fhp;
+	ino = ufhp->ufid_ino;
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	if (ino < ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
+		return (ESTALE);
+	/*
+	 * Need to check if inode is initialized because UFS2 does lazy
+	 * initialization and nfs_fhtovp can offer arbitrary inode numbers.
+	 */
+	if (fs->fs_magic != FS_UFS2_MAGIC)
+		return (ufs_fhtovp(mp, ufhp, flags, vpp));
+	cg = ino_to_cg(fs, ino);
+	error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error)
+		return (error);
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp) || ino >= cg * fs->fs_ipg + cgp->cg_initediblk) {
+		brelse(bp);
+		return (ESTALE);
+	}
+	brelse(bp);
+	return (ufs_fhtovp(mp, ufhp, flags, vpp));
+}
+
+/*
+ * Initialize the filesystem.
+ */
+static int
+ffs_init(vfsp)
+	struct vfsconf *vfsp;
+{
+
+	ffs_susp_initialize();
+	softdep_initialize();
+	return (ufs_init(vfsp));
+}
+
+/*
+ * Undo the work of ffs_init().
+ */
+static int
+ffs_uninit(vfsp)
+	struct vfsconf *vfsp;
+{
+	int ret;
+
+	ret = ufs_uninit(vfsp);
+	softdep_uninitialize();
+	ffs_susp_uninitialize();
+	return (ret);
+}
+
+/*
+ * Write a superblock and associated information back to disk.
+ */
+int
+ffs_sbupdate(ump, waitfor, suspended)
+	struct ufsmount *ump;
+	int waitfor;
+	int suspended;
+{
+	struct fs *fs = ump->um_fs;
+	struct buf *sbbp;
+	struct buf *bp;
+	int blks;
+	void *space;
+	int i, size, error, allerror = 0;
+
+	if (fs->fs_ronly == 1 &&
+	    (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
+	    (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0)
+		panic("ffs_sbupdate: write read-only filesystem");
+	/*
+	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
+	 */
+	sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
+	    (int)fs->fs_sbsize, 0, 0, 0);
+	/*
+	 * First write back the summary information.
+	 */
+	blks = howmany(fs->fs_cssize, fs->fs_fsize);
+	space = fs->fs_csp;
+	for (i = 0; i < blks; i += fs->fs_frag) {
+		size = fs->fs_bsize;
+		if (i + fs->fs_frag > blks)
+			size = (blks - i) * fs->fs_fsize;
+		bp = getblk(ump->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
+		    size, 0, 0, 0);
+		bcopy(space, bp->b_data, (u_int)size);
+		space = (char *)space + size;
+		if (suspended)
+			bp->b_flags |= B_VALIDSUSPWRT;
+		if (waitfor != MNT_WAIT)
+			bawrite(bp);
+		else if ((error = bwrite(bp)) != 0)
+			allerror = error;
+	}
+	/*
+	 * Now write back the superblock itself. If any errors occurred
+	 * up to this point, then fail so that the superblock avoids
+	 * being written out as clean.
+	 */
+	if (allerror) {
+		brelse(sbbp);
+		return (allerror);
+	}
+	bp = sbbp;
+	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
+	    (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
+		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
+		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
+		fs->fs_sblockloc = SBLOCK_UFS1;
+	}
+	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
+	    (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
+		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
+		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
+		fs->fs_sblockloc = SBLOCK_UFS2;
+	}
+	fs->fs_fmod = 0;
+	fs->fs_time = time_second;
+	if (MOUNTEDSOFTDEP(ump->um_mountp))
+		softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp);
+	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
+	ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
+	if (suspended)
+		bp->b_flags |= B_VALIDSUSPWRT;
+	if (waitfor != MNT_WAIT)
+		bawrite(bp);
+	else if ((error = bwrite(bp)) != 0)
+		allerror = error;
+	return (allerror);
+}
+
+static int
+ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
+	int attrnamespace, const char *attrname)
+{
+
+#ifdef UFS_EXTATTR
+	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
+	    attrname));
+#else
+	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
+	    attrname));
+#endif
+}
+
+static void
+ffs_ifree(struct ufsmount *ump, struct inode *ip)
+{
+
+	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
+		uma_zfree(uma_ufs1, ip->i_din1);
+	else if (ip->i_din2 != NULL)
+		uma_zfree(uma_ufs2, ip->i_din2);
+	uma_zfree(uma_inode, ip);
+}
+
+static int dobkgrdwrite = 1;
+SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
+    "Do background writes (honoring the BV_BKGRDWRITE flag)?");
+
+/*
+ * Complete a background write started from bwrite.
+ */
+static void
+ffs_backgroundwritedone(struct buf *bp)
+{
+	struct bufobj *bufobj;
+	struct buf *origbp;
+
+	/*
+	 * Find the original buffer that we are writing.
+	 */
+	bufobj = bp->b_bufobj;
+	BO_LOCK(bufobj);
+	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
+		panic("backgroundwritedone: lost buffer");
+
+	/*
+	 * We should mark the cylinder group buffer origbp as
+	 * dirty, to not loose the failed write.
+	 */
+	if ((bp->b_ioflags & BIO_ERROR) != 0)
+		origbp->b_vflags |= BV_BKGRDERR;
+	BO_UNLOCK(bufobj);
+	/*
+	 * Process dependencies then return any unfinished ones.
+	 */
+	if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
+		buf_complete(bp);
+#ifdef SOFTUPDATES
+	if (!LIST_EMPTY(&bp->b_dep))
+		softdep_move_dependencies(bp, origbp);
+#endif
+	/*
+	 * This buffer is marked B_NOCACHE so when it is released
+	 * by biodone it will be tossed.
+	 */
+	bp->b_flags |= B_NOCACHE;
+	bp->b_flags &= ~B_CACHE;
+	pbrelvp(bp);
+
+	/*
+	 * Prevent brelse() from trying to keep and re-dirtying bp on
+	 * errors. It causes b_bufobj dereference in
+	 * bdirty()/reassignbuf(), and b_bufobj was cleared in
+	 * pbrelvp() above.
+	 */
+	if ((bp->b_ioflags & BIO_ERROR) != 0)
+		bp->b_flags |= B_INVAL;
+	bufdone(bp);
+	BO_LOCK(bufobj);
+	/*
+	 * Clear the BV_BKGRDINPROG flag in the original buffer
+	 * and awaken it if it is waiting for the write to complete.
+	 * If BV_BKGRDINPROG is not set in the original buffer it must
+	 * have been released and re-instantiated - which is not legal.
+	 */
+	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
+	    ("backgroundwritedone: lost buffer2"));
+	origbp->b_vflags &= ~BV_BKGRDINPROG;
+	if (origbp->b_vflags & BV_BKGRDWAIT) {
+		origbp->b_vflags &= ~BV_BKGRDWAIT;
+		wakeup(&origbp->b_xflags);
+	}
+	BO_UNLOCK(bufobj);
+}
+
+
+/*
+ * Write, release buffer on completion.  (Done by iodone
+ * if async).  Do not bother writing anything if the buffer
+ * is invalid.
+ *
+ * Note that we set B_CACHE here, indicating that buffer is
+ * fully valid and thus cacheable.  This is true even of NFS
+ * now so we set it generally.  This could be set either here
+ * or in biodone() since the I/O is synchronous.  We put it
+ * here.
+ */
+static int
+ffs_bufwrite(struct buf *bp)
+{
+	struct buf *newbp;
+
+	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
+	if (bp->b_flags & B_INVAL) {
+		brelse(bp);
+		return (0);
+	}
+
+	if (!BUF_ISLOCKED(bp))
+		panic("bufwrite: buffer is not busy???");
+	/*
+	 * If a background write is already in progress, delay
+	 * writing this block if it is asynchronous. Otherwise
+	 * wait for the background write to complete.
+	 */
+	BO_LOCK(bp->b_bufobj);
+	if (bp->b_vflags & BV_BKGRDINPROG) {
+		if (bp->b_flags & B_ASYNC) {
+			BO_UNLOCK(bp->b_bufobj);
+			bdwrite(bp);
+			return (0);
+		}
+		bp->b_vflags |= BV_BKGRDWAIT;
+		msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO,
+		    "bwrbg", 0);
+		if (bp->b_vflags & BV_BKGRDINPROG)
+			panic("bufwrite: still writing");
+	}
+	bp->b_vflags &= ~BV_BKGRDERR;
+	BO_UNLOCK(bp->b_bufobj);
+
+	/*
+	 * If this buffer is marked for background writing and we
+	 * do not have to wait for it, make a copy and write the
+	 * copy so as to leave this buffer ready for further use.
+	 *
+	 * This optimization eats a lot of memory.  If we have a page
+	 * or buffer shortfall we can't do it.
+	 */
+	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
+	    (bp->b_flags & B_ASYNC) &&
+	    !vm_page_count_severe() &&
+	    !buf_dirty_count_severe()) {
+		KASSERT(bp->b_iodone == NULL,
+		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
+
+		/* get a new block */
+		newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
+		if (newbp == NULL)
+			goto normal_write;
+
+		KASSERT(buf_mapped(bp), ("Unmapped cg"));
+		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
+		BO_LOCK(bp->b_bufobj);
+		bp->b_vflags |= BV_BKGRDINPROG;
+		BO_UNLOCK(bp->b_bufobj);
+		newbp->b_xflags |= BX_BKGRDMARKER;
+		newbp->b_lblkno = bp->b_lblkno;
+		newbp->b_blkno = bp->b_blkno;
+		newbp->b_offset = bp->b_offset;
+		newbp->b_iodone = ffs_backgroundwritedone;
+		newbp->b_flags |= B_ASYNC;
+		newbp->b_flags &= ~B_INVAL;
+		pbgetvp(bp->b_vp, newbp);
+
+#ifdef SOFTUPDATES
+		/*
+		 * Move over the dependencies.  If there are rollbacks,
+		 * leave the parent buffer dirtied as it will need to
+		 * be written again.
+		 */
+		if (LIST_EMPTY(&bp->b_dep) ||
+		    softdep_move_dependencies(bp, newbp) == 0)
+			bundirty(bp);
+#else
+		bundirty(bp);
+#endif
+
+		/*
+		 * Initiate write on the copy, release the original.  The
+		 * BKGRDINPROG flag prevents it from going away until 
+		 * the background write completes.
+		 */
+		bqrelse(bp);
+		bp = newbp;
+	} else
+		/* Mark the buffer clean */
+		bundirty(bp);
+
+
+	/* Let the normal bufwrite do the rest for us */
+normal_write:
+	return (bufwrite(bp));
+}
+
+
+static void
+ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
+{
+	struct vnode *vp;
+	int error;
+	struct buf *tbp;
+	int nocopy;
+
+	vp = bo->__bo_vnode;
+	if (bp->b_iocmd == BIO_WRITE) {
+		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
+		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
+		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
+			panic("ffs_geom_strategy: bad I/O");
+		nocopy = bp->b_flags & B_NOCOPY;
+		bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
+		if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
+		    vp->v_rdev->si_snapdata != NULL) {
+			if ((bp->b_flags & B_CLUSTER) != 0) {
+				runningbufwakeup(bp);
+				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
+					      b_cluster.cluster_entry) {
+					error = ffs_copyonwrite(vp, tbp);
+					if (error != 0 &&
+					    error != EOPNOTSUPP) {
+						bp->b_error = error;
+						bp->b_ioflags |= BIO_ERROR;
+						bufdone(bp);
+						return;
+					}
+				}
+				bp->b_runningbufspace = bp->b_bufsize;
+				atomic_add_long(&runningbufspace,
+					       bp->b_runningbufspace);
+			} else {
+				error = ffs_copyonwrite(vp, bp);
+				if (error != 0 && error != EOPNOTSUPP) {
+					bp->b_error = error;
+					bp->b_ioflags |= BIO_ERROR;
+					bufdone(bp);
+					return;
+				}
+			}
+		}
+#ifdef SOFTUPDATES
+		if ((bp->b_flags & B_CLUSTER) != 0) {
+			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
+				      b_cluster.cluster_entry) {
+				if (!LIST_EMPTY(&tbp->b_dep))
+					buf_start(tbp);
+			}
+		} else {
+			if (!LIST_EMPTY(&bp->b_dep))
+				buf_start(bp);
+		}
+
+#endif
+	}
+	g_vfs_strategy(bo, bp);
+}
+
+int
+ffs_own_mount(const struct mount *mp)
+{
+
+	if (mp->mnt_op == &ufs_vfsops)
+		return (1);
+	return (0);
+}
+
+#ifdef	DDB
+#ifdef SOFTUPDATES
+
+/* defined in ffs_softdep.c */
+extern void db_print_ffs(struct ufsmount *ump);
+
+DB_SHOW_COMMAND(ffs, db_show_ffs)
+{
+	struct mount *mp;
+	struct ufsmount *ump;
+
+	if (have_addr) {
+		ump = VFSTOUFS((struct mount *)addr);
+		db_print_ffs(ump);
+		return;
+	}
+
+	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
+		if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
+			db_print_ffs(VFSTOUFS(mp));
+	}
+}
+
+#endif	/* SOFTUPDATES */
+#endif	/* DDB */
diff --git a/Dump/ufs/ffs/ffs_vnops.c b/Dump/ufs/ffs/ffs_vnops.c
new file mode 100644
index 0000000..50ceebe
--- /dev/null
+++ b/Dump/ufs/ffs/ffs_vnops.c
@@ -0,0 +1,1745 @@
+/*-
+ * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Marshall
+ * Kirk McKusick and Network Associates Laboratories, the Security
+ * Research Division of Network Associates, Inc. under DARPA/SPAWAR
+ * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
+ * research program
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
+ * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
+ *	@(#)ffs_vnops.c	8.15 (Berkeley) 5/14/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ffs/ffs_vnops.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/extattr.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/priv.h>
+#include <sys/rwlock.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/vmmeter.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vnode_pager.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+#include "opt_directio.h"
+#include "opt_ffs.h"
+
+#ifdef DIRECTIO
+extern int	ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
+#endif
+static vop_fdatasync_t	ffs_fdatasync;
+static vop_fsync_t	ffs_fsync;
+static vop_getpages_t	ffs_getpages;
+static vop_lock1_t	ffs_lock;
+static vop_read_t	ffs_read;
+static vop_write_t	ffs_write;
+static int	ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
+static int	ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
+		    struct ucred *cred);
+static vop_strategy_t	ffsext_strategy;
+static vop_closeextattr_t	ffs_closeextattr;
+static vop_deleteextattr_t	ffs_deleteextattr;
+static vop_getextattr_t	ffs_getextattr;
+static vop_listextattr_t	ffs_listextattr;
+static vop_openextattr_t	ffs_openextattr;
+static vop_setextattr_t	ffs_setextattr;
+static vop_vptofh_t	ffs_vptofh;
+
+/* Global vfs data structures for ufs. */
+struct vop_vector ffs_vnodeops1 = {
+	.vop_default =		&ufs_vnodeops,
+	.vop_fsync =		ffs_fsync,
+	.vop_fdatasync =	ffs_fdatasync,
+	.vop_getpages =		ffs_getpages,
+	.vop_getpages_async =	vnode_pager_local_getpages_async,
+	.vop_lock1 =		ffs_lock,
+	.vop_read =		ffs_read,
+	.vop_reallocblks =	ffs_reallocblks,
+	.vop_write =		ffs_write,
+	.vop_vptofh =		ffs_vptofh,
+};
+
+struct vop_vector ffs_fifoops1 = {
+	.vop_default =		&ufs_fifoops,
+	.vop_fsync =		ffs_fsync,
+	.vop_fdatasync =	ffs_fdatasync,
+	.vop_reallocblks =	ffs_reallocblks, /* XXX: really ??? */
+	.vop_vptofh =		ffs_vptofh,
+};
+
+/* Global vfs data structures for ufs. */
+struct vop_vector ffs_vnodeops2 = {
+	.vop_default =		&ufs_vnodeops,
+	.vop_fsync =		ffs_fsync,
+	.vop_fdatasync =	ffs_fdatasync,
+	.vop_getpages =		ffs_getpages,
+	.vop_getpages_async =	vnode_pager_local_getpages_async,
+	.vop_lock1 =		ffs_lock,
+	.vop_read =		ffs_read,
+	.vop_reallocblks =	ffs_reallocblks,
+	.vop_write =		ffs_write,
+	.vop_closeextattr =	ffs_closeextattr,
+	.vop_deleteextattr =	ffs_deleteextattr,
+	.vop_getextattr =	ffs_getextattr,
+	.vop_listextattr =	ffs_listextattr,
+	.vop_openextattr =	ffs_openextattr,
+	.vop_setextattr =	ffs_setextattr,
+	.vop_vptofh =		ffs_vptofh,
+};
+
+struct vop_vector ffs_fifoops2 = {
+	.vop_default =		&ufs_fifoops,
+	.vop_fsync =		ffs_fsync,
+	.vop_fdatasync =	ffs_fdatasync,
+	.vop_lock1 =		ffs_lock,
+	.vop_reallocblks =	ffs_reallocblks,
+	.vop_strategy =		ffsext_strategy,
+	.vop_closeextattr =	ffs_closeextattr,
+	.vop_deleteextattr =	ffs_deleteextattr,
+	.vop_getextattr =	ffs_getextattr,
+	.vop_listextattr =	ffs_listextattr,
+	.vop_openextattr =	ffs_openextattr,
+	.vop_setextattr =	ffs_setextattr,
+	.vop_vptofh =		ffs_vptofh,
+};
+
+/*
+ * Synch an open file.
+ */
+/* ARGSUSED */
+static int
+ffs_fsync(struct vop_fsync_args *ap)
+{
+	struct vnode *vp;
+	struct bufobj *bo;
+	int error;
+
+	vp = ap->a_vp;
+	bo = &vp->v_bufobj;
+retry:
+	error = ffs_syncvnode(vp, ap->a_waitfor, 0);
+	if (error)
+		return (error);
+	if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) {
+		error = softdep_fsync(vp);
+		if (error)
+			return (error);
+
+		/*
+		 * The softdep_fsync() function may drop vp lock,
+		 * allowing for dirty buffers to reappear on the
+		 * bo_dirty list. Recheck and resync as needed.
+		 */
+		BO_LOCK(bo);
+		if ((vp->v_type == VREG || vp->v_type == VDIR) &&
+		    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
+			BO_UNLOCK(bo);
+			goto retry;
+		}
+		BO_UNLOCK(bo);
+	}
+	return (0);
+}
+
+int
+ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
+{
+	struct inode *ip;
+	struct bufobj *bo;
+	struct buf *bp, *nbp;
+	ufs_lbn_t lbn;
+	int error, passes;
+	bool still_dirty, wait;
+
+	ip = VTOI(vp);
+	ip->i_flag &= ~IN_NEEDSYNC;
+	bo = &vp->v_bufobj;
+
+	/*
+	 * When doing MNT_WAIT we must first flush all dependencies
+	 * on the inode.
+	 */
+	if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
+	    (error = softdep_sync_metadata(vp)) != 0)
+		return (error);
+
+	/*
+	 * Flush all dirty buffers associated with a vnode.
+	 */
+	error = 0;
+	passes = 0;
+	wait = false;	/* Always do an async pass first. */
+	lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
+	BO_LOCK(bo);
+loop:
+	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
+		bp->b_vflags &= ~BV_SCANNED;
+	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
+		/*
+		 * Reasons to skip this buffer: it has already been considered
+		 * on this pass, the buffer has dependencies that will cause
+		 * it to be redirtied and it has not already been deferred,
+		 * or it is already being written.
+		 */
+		if ((bp->b_vflags & BV_SCANNED) != 0)
+			continue;
+		bp->b_vflags |= BV_SCANNED;
+		/*
+		 * Flush indirects in order, if requested.
+		 *
+		 * Note that if only datasync is requested, we can
+		 * skip indirect blocks when softupdates are not
+		 * active.  Otherwise we must flush them with data,
+		 * since dependencies prevent data block writes.
+		 */
+		if (waitfor == MNT_WAIT && bp->b_lblkno <= -NDADDR &&
+		    (lbn_level(bp->b_lblkno) >= passes ||
+		    ((flags & DATA_ONLY) != 0 && !DOINGSOFTDEP(vp))))
+			continue;
+		if (bp->b_lblkno > lbn)
+			panic("ffs_syncvnode: syncing truncated data.");
+		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
+			BO_UNLOCK(bo);
+		} else if (wait) {
+			if (BUF_LOCK(bp,
+			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+			    BO_LOCKPTR(bo)) != 0) {
+				bp->b_vflags &= ~BV_SCANNED;
+				goto next;
+			}
+		} else
+			continue;
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("ffs_fsync: not dirty");
+		/*
+		 * Check for dependencies and potentially complete them.
+		 */
+		if (!LIST_EMPTY(&bp->b_dep) &&
+		    (error = softdep_sync_buf(vp, bp,
+		    wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
+			/* I/O error. */
+			if (error != EBUSY) {
+				BUF_UNLOCK(bp);
+				return (error);
+			}
+			/* If we deferred once, don't defer again. */
+		    	if ((bp->b_flags & B_DEFERRED) == 0) {
+				bp->b_flags |= B_DEFERRED;
+				BUF_UNLOCK(bp);
+				goto next;
+			}
+		}
+		if (wait) {
+			bremfree(bp);
+			if ((error = bwrite(bp)) != 0)
+				return (error);
+		} else if ((bp->b_flags & B_CLUSTEROK)) {
+			(void) vfs_bio_awrite(bp);
+		} else {
+			bremfree(bp);
+			(void) bawrite(bp);
+		}
+next:
+		/*
+		 * Since we may have slept during the I/O, we need
+		 * to start from a known point.
+		 */
+		BO_LOCK(bo);
+		nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd);
+	}
+	if (waitfor != MNT_WAIT) {
+		BO_UNLOCK(bo);
+		if ((flags & NO_INO_UPDT) != 0)
+			return (0);
+		else
+			return (ffs_update(vp, 0));
+	}
+	/* Drain IO to see if we're done. */
+	bufobj_wwait(bo, 0, 0);
+	/*
+	 * Block devices associated with filesystems may have new I/O
+	 * requests posted for them even if the vnode is locked, so no
+	 * amount of trying will get them clean.  We make several passes
+	 * as a best effort.
+	 *
+	 * Regular files may need multiple passes to flush all dependency
+	 * work as it is possible that we must write once per indirect
+	 * level, once for the leaf, and once for the inode and each of
+	 * these will be done with one sync and one async pass.
+	 */
+	if (bo->bo_dirty.bv_cnt > 0) {
+		if ((flags & DATA_ONLY) == 0) {
+			still_dirty = true;
+		} else {
+			/*
+			 * For data-only sync, dirty indirect buffers
+			 * are ignored.
+			 */
+			still_dirty = false;
+			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
+				if (bp->b_lblkno > -NDADDR) {
+					still_dirty = true;
+					break;
+				}
+			}
+		}
+
+		if (still_dirty) {
+			/* Write the inode after sync passes to flush deps. */
+			if (wait && DOINGSOFTDEP(vp) &&
+			    (flags & NO_INO_UPDT) == 0) {
+				BO_UNLOCK(bo);
+				ffs_update(vp, 1);
+				BO_LOCK(bo);
+			}
+			/* switch between sync/async. */
+			wait = !wait;
+			if (wait || ++passes < NIADDR + 2)
+				goto loop;
+#ifdef INVARIANTS
+			if (!vn_isdisk(vp, NULL))
+				vn_printf(vp, "ffs_fsync: dirty ");
+#endif
+		}
+	}
+	BO_UNLOCK(bo);
+	error = 0;
+	if ((flags & DATA_ONLY) == 0) {
+		if ((flags & NO_INO_UPDT) == 0)
+			error = ffs_update(vp, 1);
+		if (DOINGSUJ(vp))
+			softdep_journal_fsync(VTOI(vp));
+	}
+	return (error);
+}
+
+static int
+ffs_fdatasync(struct vop_fdatasync_args *ap)
+{
+
+	return (ffs_syncvnode(ap->a_vp, MNT_WAIT, DATA_ONLY));
+}
+
+static int
+ffs_lock(ap)
+	struct vop_lock1_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct thread *a_td;
+		char *file;
+		int line;
+	} */ *ap;
+{
+#ifndef NO_FFS_SNAPSHOT
+	struct vnode *vp;
+	int flags;
+	struct lock *lkp;
+	int result;
+
+	switch (ap->a_flags & LK_TYPE_MASK) {
+	case LK_SHARED:
+	case LK_UPGRADE:
+	case LK_EXCLUSIVE:
+		vp = ap->a_vp;
+		flags = ap->a_flags;
+		for (;;) {
+#ifdef DEBUG_VFS_LOCKS
+			KASSERT(vp->v_holdcnt != 0,
+			    ("ffs_lock %p: zero hold count", vp));
+#endif
+			lkp = vp->v_vnlock;
+			result = _lockmgr_args(lkp, flags, VI_MTX(vp),
+			    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT,
+			    ap->a_file, ap->a_line);
+			if (lkp == vp->v_vnlock || result != 0)
+				break;
+			/*
+			 * Apparent success, except that the vnode
+			 * mutated between snapshot file vnode and
+			 * regular file vnode while this process
+			 * slept.  The lock currently held is not the
+			 * right lock.  Release it, and try to get the
+			 * new lock.
+			 */
+			(void) _lockmgr_args(lkp, LK_RELEASE, NULL,
+			    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT,
+			    ap->a_file, ap->a_line);
+			if ((flags & (LK_INTERLOCK | LK_NOWAIT)) ==
+			    (LK_INTERLOCK | LK_NOWAIT))
+				return (EBUSY);
+			if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
+				flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
+			flags &= ~LK_INTERLOCK;
+		}
+		break;
+	default:
+		result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
+	}
+	return (result);
+#else
+	return (VOP_LOCK1_APV(&ufs_vnodeops, ap));
+#endif
+}
+
+/*
+ * Vnode op for reading.
+ */
+static int
+ffs_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp;
+	struct inode *ip;
+	struct uio *uio;
+	struct fs *fs;
+	struct buf *bp;
+	ufs_lbn_t lbn, nextlbn;
+	off_t bytesinfile;
+	long size, xfersize, blkoffset;
+	ssize_t orig_resid;
+	int error;
+	int seqcount;
+	int ioflag;
+
+	vp = ap->a_vp;
+	uio = ap->a_uio;
+	ioflag = ap->a_ioflag;
+	if (ap->a_ioflag & IO_EXT)
+#ifdef notyet
+		return (ffs_extread(vp, uio, ioflag));
+#else
+		panic("ffs_read+IO_EXT");
+#endif
+#ifdef DIRECTIO
+	if ((ioflag & IO_DIRECT) != 0) {
+		int workdone;
+
+		error = ffs_rawread(vp, uio, &workdone);
+		if (error != 0 || workdone != 0)
+			return error;
+	}
+#endif
+
+	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
+	ip = VTOI(vp);
+
+#ifdef INVARIANTS
+	if (uio->uio_rw != UIO_READ)
+		panic("ffs_read: mode");
+
+	if (vp->v_type == VLNK) {
+		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
+			panic("ffs_read: short symlink");
+	} else if (vp->v_type != VREG && vp->v_type != VDIR)
+		panic("ffs_read: type %d",  vp->v_type);
+#endif
+	orig_resid = uio->uio_resid;
+	KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
+	if (orig_resid == 0)
+		return (0);
+	KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
+	fs = ITOFS(ip);
+	if (uio->uio_offset < ip->i_size &&
+	    uio->uio_offset >= fs->fs_maxfilesize)
+		return (EOVERFLOW);
+
+	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
+		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
+			break;
+		lbn = lblkno(fs, uio->uio_offset);
+		nextlbn = lbn + 1;
+
+		/*
+		 * size of buffer.  The buffer representing the
+		 * end of the file is rounded up to the size of
+		 * the block type ( fragment or full block,
+		 * depending ).
+		 */
+		size = blksize(fs, ip, lbn);
+		blkoffset = blkoff(fs, uio->uio_offset);
+
+		/*
+		 * The amount we want to transfer in this iteration is
+		 * one FS block less the amount of the data before
+		 * our startpoint (duh!)
+		 */
+		xfersize = fs->fs_bsize - blkoffset;
+
+		/*
+		 * But if we actually want less than the block,
+		 * or the file doesn't have a whole block more of data,
+		 * then use the lesser number.
+		 */
+		if (uio->uio_resid < xfersize)
+			xfersize = uio->uio_resid;
+		if (bytesinfile < xfersize)
+			xfersize = bytesinfile;
+
+		if (lblktosize(fs, nextlbn) >= ip->i_size) {
+			/*
+			 * Don't do readahead if this is the end of the file.
+			 */
+			error = bread_gb(vp, lbn, size, NOCRED,
+			    GB_UNMAPPED, &bp);
+		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+			/*
+			 * Otherwise if we are allowed to cluster,
+			 * grab as much as we can.
+			 *
+			 * XXX  This may not be a win if we are not
+			 * doing sequential access.
+			 */
+			error = cluster_read(vp, ip->i_size, lbn,
+			    size, NOCRED, blkoffset + uio->uio_resid,
+			    seqcount, GB_UNMAPPED, &bp);
+		} else if (seqcount > 1) {
+			/*
+			 * If we are NOT allowed to cluster, then
+			 * if we appear to be acting sequentially,
+			 * fire off a request for a readahead
+			 * as well as a read. Note that the 4th and 5th
+			 * arguments point to arrays of the size specified in
+			 * the 6th argument.
+			 */
+			u_int nextsize = blksize(fs, ip, nextlbn);
+			error = breadn_flags(vp, lbn, size, &nextlbn,
+			    &nextsize, 1, NOCRED, GB_UNMAPPED, &bp);
+		} else {
+			/*
+			 * Failing all of the above, just read what the
+			 * user asked for. Interestingly, the same as
+			 * the first option above.
+			 */
+			error = bread_gb(vp, lbn, size, NOCRED,
+			    GB_UNMAPPED, &bp);
+		}
+		if (error) {
+			brelse(bp);
+			bp = NULL;
+			break;
+		}
+
+		/*
+		 * We should only get non-zero b_resid when an I/O error
+		 * has occurred, which should cause us to break above.
+		 * However, if the short read did not cause an error,
+		 * then we want to ensure that we do not uiomove bad
+		 * or uninitialized data.
+		 */
+		size -= bp->b_resid;
+		if (size < xfersize) {
+			if (size == 0)
+				break;
+			xfersize = size;
+		}
+
+		if (buf_mapped(bp)) {
+			error = vn_io_fault_uiomove((char *)bp->b_data +
+			    blkoffset, (int)xfersize, uio);
+		} else {
+			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
+			    (int)xfersize, uio);
+		}
+		if (error)
+			break;
+
+		vfs_bio_brelse(bp, ioflag);
+	}
+
+	/*
+	 * This can only happen in the case of an error
+	 * because the loop above resets bp to NULL on each iteration
+	 * and on normal completion has not set a new value into it.
+	 * so it must have come from a 'break' statement
+	 */
+	if (bp != NULL)
+		vfs_bio_brelse(bp, ioflag);
+
+	if ((error == 0 || uio->uio_resid != orig_resid) &&
+	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0 &&
+	    (ip->i_flag & IN_ACCESS) == 0) {
+		VI_LOCK(vp);
+		ip->i_flag |= IN_ACCESS;
+		VI_UNLOCK(vp);
+	}
+	return (error);
+}
+
+/*
+ * Vnode op for writing.
+ */
+static int
+ffs_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp;
+	struct uio *uio;
+	struct inode *ip;
+	struct fs *fs;
+	struct buf *bp;
+	ufs_lbn_t lbn;
+	off_t osize;
+	ssize_t resid;
+	int seqcount;
+	int blkoffset, error, flags, ioflag, size, xfersize;
+
+	vp = ap->a_vp;
+	uio = ap->a_uio;
+	ioflag = ap->a_ioflag;
+	if (ap->a_ioflag & IO_EXT)
+#ifdef notyet
+		return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
+#else
+		panic("ffs_write+IO_EXT");
+#endif
+
+	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
+	ip = VTOI(vp);
+
+#ifdef INVARIANTS
+	if (uio->uio_rw != UIO_WRITE)
+		panic("ffs_write: mode");
+#endif
+
+	switch (vp->v_type) {
+	case VREG:
+		if (ioflag & IO_APPEND)
+			uio->uio_offset = ip->i_size;
+		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
+			return (EPERM);
+		/* FALLTHROUGH */
+	case VLNK:
+		break;
+	case VDIR:
+		panic("ffs_write: dir write");
+		break;
+	default:
+		panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
+			(int)uio->uio_offset,
+			(int)uio->uio_resid
+		);
+	}
+
+	KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
+	KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
+	fs = ITOFS(ip);
+	if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
+		return (EFBIG);
+	/*
+	 * Maybe this should be above the vnode op call, but so long as
+	 * file servers have no limits, I don't think it matters.
+	 */
+	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
+		return (EFBIG);
+
+	resid = uio->uio_resid;
+	osize = ip->i_size;
+	if (seqcount > BA_SEQMAX)
+		flags = BA_SEQMAX << BA_SEQSHIFT;
+	else
+		flags = seqcount << BA_SEQSHIFT;
+	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
+		flags |= IO_SYNC;
+	flags |= BA_UNMAPPED;
+
+	for (error = 0; uio->uio_resid > 0;) {
+		lbn = lblkno(fs, uio->uio_offset);
+		blkoffset = blkoff(fs, uio->uio_offset);
+		xfersize = fs->fs_bsize - blkoffset;
+		if (uio->uio_resid < xfersize)
+			xfersize = uio->uio_resid;
+		if (uio->uio_offset + xfersize > ip->i_size)
+			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
+
+		/*
+		 * We must perform a read-before-write if the transfer size
+		 * does not cover the entire buffer.
+		 */
+		if (fs->fs_bsize > xfersize)
+			flags |= BA_CLRBUF;
+		else
+			flags &= ~BA_CLRBUF;
+/* XXX is uio->uio_offset the right thing here? */
+		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
+		    ap->a_cred, flags, &bp);
+		if (error != 0) {
+			vnode_pager_setsize(vp, ip->i_size);
+			break;
+		}
+		if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
+			bp->b_flags |= B_NOCACHE;
+
+		if (uio->uio_offset + xfersize > ip->i_size) {
+			ip->i_size = uio->uio_offset + xfersize;
+			DIP_SET(ip, i_size, ip->i_size);
+		}
+
+		size = blksize(fs, ip, lbn) - bp->b_resid;
+		if (size < xfersize)
+			xfersize = size;
+
+		if (buf_mapped(bp)) {
+			error = vn_io_fault_uiomove((char *)bp->b_data +
+			    blkoffset, (int)xfersize, uio);
+		} else {
+			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
+			    (int)xfersize, uio);
+		}
+		/*
+		 * If the buffer is not already filled and we encounter an
+		 * error while trying to fill it, we have to clear out any
+		 * garbage data from the pages instantiated for the buffer.
+		 * If we do not, a failed uiomove() during a write can leave
+		 * the prior contents of the pages exposed to a userland mmap.
+		 *
+		 * Note that we need only clear buffers with a transfer size
+		 * equal to the block size because buffers with a shorter
+		 * transfer size were cleared above by the call to UFS_BALLOC()
+		 * with the BA_CLRBUF flag set.
+		 *
+		 * If the source region for uiomove identically mmaps the
+		 * buffer, uiomove() performed the NOP copy, and the buffer
+		 * content remains valid because the page fault handler
+		 * validated the pages.
+		 */
+		if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
+		    fs->fs_bsize == xfersize)
+			vfs_bio_clrbuf(bp);
+
+		vfs_bio_set_flags(bp, ioflag);
+
+		/*
+		 * If IO_SYNC each buffer is written synchronously.  Otherwise
+		 * if we have a severe page deficiency write the buffer
+		 * asynchronously.  Otherwise try to cluster, and if that
+		 * doesn't do it then either do an async write (if O_DIRECT),
+		 * or a delayed write (if not).
+		 */
+		if (ioflag & IO_SYNC) {
+			(void)bwrite(bp);
+		} else if (vm_page_count_severe() ||
+			    buf_dirty_count_severe() ||
+			    (ioflag & IO_ASYNC)) {
+			bp->b_flags |= B_CLUSTEROK;
+			bawrite(bp);
+		} else if (xfersize + blkoffset == fs->fs_bsize) {
+			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
+				bp->b_flags |= B_CLUSTEROK;
+				cluster_write(vp, bp, ip->i_size, seqcount,
+				    GB_UNMAPPED);
+			} else {
+				bawrite(bp);
+			}
+		} else if (ioflag & IO_DIRECT) {
+			bp->b_flags |= B_CLUSTEROK;
+			bawrite(bp);
+		} else {
+			bp->b_flags |= B_CLUSTEROK;
+			bdwrite(bp);
+		}
+		if (error || xfersize == 0)
+			break;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	/*
+	 * If we successfully wrote any data, and we are not the superuser
+	 * we clear the setuid and setgid bits as a precaution against
+	 * tampering.
+	 */
+	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
+	    ap->a_cred) {
+		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) {
+			ip->i_mode &= ~(ISUID | ISGID);
+			DIP_SET(ip, i_mode, ip->i_mode);
+		}
+	}
+	if (error) {
+		if (ioflag & IO_UNIT) {
+			(void)ffs_truncate(vp, osize,
+			    IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred);
+			uio->uio_offset -= resid - uio->uio_resid;
+			uio->uio_resid = resid;
+		}
+	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
+		error = ffs_update(vp, 1);
+	return (error);
+}
+
+/*
+ * Extended attribute area reading.
+ */
+static int
+ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
+{
+	struct inode *ip;
+	struct ufs2_dinode *dp;
+	struct fs *fs;
+	struct buf *bp;
+	ufs_lbn_t lbn, nextlbn;
+	off_t bytesinfile;
+	long size, xfersize, blkoffset;
+	ssize_t orig_resid;
+	int error;
+
+	ip = VTOI(vp);
+	fs = ITOFS(ip);
+	dp = ip->i_din2;
+
+#ifdef INVARIANTS
+	if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
+		panic("ffs_extread: mode");
+
+#endif
+	orig_resid = uio->uio_resid;
+	KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
+	if (orig_resid == 0)
+		return (0);
+	KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
+
+	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
+		if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
+			break;
+		lbn = lblkno(fs, uio->uio_offset);
+		nextlbn = lbn + 1;
+
+		/*
+		 * size of buffer.  The buffer representing the
+		 * end of the file is rounded up to the size of
+		 * the block type ( fragment or full block,
+		 * depending ).
+		 */
+		size = sblksize(fs, dp->di_extsize, lbn);
+		blkoffset = blkoff(fs, uio->uio_offset);
+
+		/*
+		 * The amount we want to transfer in this iteration is
+		 * one FS block less the amount of the data before
+		 * our startpoint (duh!)
+		 */
+		xfersize = fs->fs_bsize - blkoffset;
+
+		/*
+		 * But if we actually want less than the block,
+		 * or the file doesn't have a whole block more of data,
+		 * then use the lesser number.
+		 */
+		if (uio->uio_resid < xfersize)
+			xfersize = uio->uio_resid;
+		if (bytesinfile < xfersize)
+			xfersize = bytesinfile;
+
+		if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
+			/*
+			 * Don't do readahead if this is the end of the info.
+			 */
+			error = bread(vp, -1 - lbn, size, NOCRED, &bp);
+		} else {
+			/*
+			 * If we have a second block, then
+			 * fire off a request for a readahead
+			 * as well as a read. Note that the 4th and 5th
+			 * arguments point to arrays of the size specified in
+			 * the 6th argument.
+			 */
+			u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
+
+			nextlbn = -1 - nextlbn;
+			error = breadn(vp, -1 - lbn,
+			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
+		}
+		if (error) {
+			brelse(bp);
+			bp = NULL;
+			break;
+		}
+
+		/*
+		 * We should only get non-zero b_resid when an I/O error
+		 * has occurred, which should cause us to break above.
+		 * However, if the short read did not cause an error,
+		 * then we want to ensure that we do not uiomove bad
+		 * or uninitialized data.
+		 */
+		size -= bp->b_resid;
+		if (size < xfersize) {
+			if (size == 0)
+				break;
+			xfersize = size;
+		}
+
+		error = uiomove((char *)bp->b_data + blkoffset,
+					(int)xfersize, uio);
+		if (error)
+			break;
+		vfs_bio_brelse(bp, ioflag);
+	}
+
+	/*
+	 * This can only happen in the case of an error
+	 * because the loop above resets bp to NULL on each iteration
+	 * and on normal completion has not set a new value into it.
+	 * so it must have come from a 'break' statement
+	 */
+	if (bp != NULL)
+		vfs_bio_brelse(bp, ioflag);
+	return (error);
+}
+
+/*
+ * Extended attribute area writing.
+ */
+static int
+ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
+{
+	struct inode *ip;
+	struct ufs2_dinode *dp;
+	struct fs *fs;
+	struct buf *bp;
+	ufs_lbn_t lbn;
+	off_t osize;
+	ssize_t resid;
+	int blkoffset, error, flags, size, xfersize;
+
+	ip = VTOI(vp);
+	fs = ITOFS(ip);
+	dp = ip->i_din2;
+
+#ifdef INVARIANTS
+	if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
+		panic("ffs_extwrite: mode");
+#endif
+
+	if (ioflag & IO_APPEND)
+		uio->uio_offset = dp->di_extsize;
+	KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
+	KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
+	if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
+		return (EFBIG);
+
+	resid = uio->uio_resid;
+	osize = dp->di_extsize;
+	flags = IO_EXT;
+	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
+		flags |= IO_SYNC;
+
+	for (error = 0; uio->uio_resid > 0;) {
+		lbn = lblkno(fs, uio->uio_offset);
+		blkoffset = blkoff(fs, uio->uio_offset);
+		xfersize = fs->fs_bsize - blkoffset;
+		if (uio->uio_resid < xfersize)
+			xfersize = uio->uio_resid;
+
+		/*
+		 * We must perform a read-before-write if the transfer size
+		 * does not cover the entire buffer.
+		 */
+		if (fs->fs_bsize > xfersize)
+			flags |= BA_CLRBUF;
+		else
+			flags &= ~BA_CLRBUF;
+		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
+		    ucred, flags, &bp);
+		if (error != 0)
+			break;
+		/*
+		 * If the buffer is not valid we have to clear out any
+		 * garbage data from the pages instantiated for the buffer.
+		 * If we do not, a failed uiomove() during a write can leave
+		 * the prior contents of the pages exposed to a userland
+		 * mmap().  XXX deal with uiomove() errors a better way.
+		 */
+		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
+			vfs_bio_clrbuf(bp);
+
+		if (uio->uio_offset + xfersize > dp->di_extsize)
+			dp->di_extsize = uio->uio_offset + xfersize;
+
+		size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
+		if (size < xfersize)
+			xfersize = size;
+
+		error =
+		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+
+		vfs_bio_set_flags(bp, ioflag);
+
+		/*
+		 * If IO_SYNC each buffer is written synchronously.  Otherwise
+		 * if we have a severe page deficiency write the buffer
+		 * asynchronously.  Otherwise try to cluster, and if that
+		 * doesn't do it then either do an async write (if O_DIRECT),
+		 * or a delayed write (if not).
+		 */
+		if (ioflag & IO_SYNC) {
+			(void)bwrite(bp);
+		} else if (vm_page_count_severe() ||
+			    buf_dirty_count_severe() ||
+			    xfersize + blkoffset == fs->fs_bsize ||
+			    (ioflag & (IO_ASYNC | IO_DIRECT)))
+			bawrite(bp);
+		else
+			bdwrite(bp);
+		if (error || xfersize == 0)
+			break;
+		ip->i_flag |= IN_CHANGE;
+	}
+	/*
+	 * If we successfully wrote any data, and we are not the superuser
+	 * we clear the setuid and setgid bits as a precaution against
+	 * tampering.
+	 */
+	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
+		if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID, 0)) {
+			ip->i_mode &= ~(ISUID | ISGID);
+			dp->di_mode = ip->i_mode;
+		}
+	}
+	if (error) {
+		if (ioflag & IO_UNIT) {
+			(void)ffs_truncate(vp, osize,
+			    IO_EXT | (ioflag&IO_SYNC), ucred);
+			uio->uio_offset -= resid - uio->uio_resid;
+			uio->uio_resid = resid;
+		}
+	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
+		error = ffs_update(vp, 1);
+	return (error);
+}
+
+
+/*
+ * Vnode operating to retrieve a named extended attribute.
+ *
+ * Locate a particular EA (nspace:name) in the area (ptr:length), and return
+ * the length of the EA, and possibly the pointer to the entry and to the data.
+ */
+static int
+ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac)
+{
+	u_char *p, *pe, *pn, *p0;
+	int eapad1, eapad2, ealength, ealen, nlen;
+	uint32_t ul;
+
+	pe = ptr + length;
+	nlen = strlen(name);
+
+	for (p = ptr; p < pe; p = pn) {
+		p0 = p;
+		bcopy(p, &ul, sizeof(ul));
+		pn = p + ul;
+		/* make sure this entry is complete */
+		if (pn > pe)
+			break;
+		p += sizeof(uint32_t);
+		if (*p != nspace)
+			continue;
+		p++;
+		eapad2 = *p++;
+		if (*p != nlen)
+			continue;
+		p++;
+		if (bcmp(p, name, nlen))
+			continue;
+		ealength = sizeof(uint32_t) + 3 + nlen;
+		eapad1 = 8 - (ealength % 8);
+		if (eapad1 == 8)
+			eapad1 = 0;
+		ealength += eapad1;
+		ealen = ul - ealength - eapad2;
+		p += nlen + eapad1;
+		if (eap != NULL)
+			*eap = p0;
+		if (eac != NULL)
+			*eac = p;
+		return (ealen);
+	}
+	return(-1);
+}
+
+static int
+ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
+{
+	struct inode *ip;
+	struct ufs2_dinode *dp;
+	struct fs *fs;
+	struct uio luio;
+	struct iovec liovec;
+	u_int easize;
+	int error;
+	u_char *eae;
+
+	ip = VTOI(vp);
+	fs = ITOFS(ip);
+	dp = ip->i_din2;
+	easize = dp->di_extsize;
+	if ((uoff_t)easize + extra > NXADDR * fs->fs_bsize)
+		return (EFBIG);
+
+	eae = malloc(easize + extra, M_TEMP, M_WAITOK);
+
+	liovec.iov_base = eae;
+	liovec.iov_len = easize;
+	luio.uio_iov = &liovec;
+	luio.uio_iovcnt = 1;
+	luio.uio_offset = 0;
+	luio.uio_resid = easize;
+	luio.uio_segflg = UIO_SYSSPACE;
+	luio.uio_rw = UIO_READ;
+	luio.uio_td = td;
+
+	error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
+	if (error) {
+		free(eae, M_TEMP);
+		return(error);
+	}
+	*p = eae;
+	return (0);
+}
+
+static void
+ffs_lock_ea(struct vnode *vp)
+{
+	struct inode *ip;
+
+	ip = VTOI(vp);
+	VI_LOCK(vp);
+	while (ip->i_flag & IN_EA_LOCKED) {
+		ip->i_flag |= IN_EA_LOCKWAIT;
+		msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea",
+		    0);
+	}
+	ip->i_flag |= IN_EA_LOCKED;
+	VI_UNLOCK(vp);
+}
+
+static void
+ffs_unlock_ea(struct vnode *vp)
+{
+	struct inode *ip;
+
+	ip = VTOI(vp);
+	VI_LOCK(vp);
+	if (ip->i_flag & IN_EA_LOCKWAIT)
+		wakeup(&ip->i_ea_refs);
+	ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT);
+	VI_UNLOCK(vp);
+}
+
+static int
+ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
+{
+	struct inode *ip;
+	struct ufs2_dinode *dp;
+	int error;
+
+	ip = VTOI(vp);
+
+	ffs_lock_ea(vp);
+	if (ip->i_ea_area != NULL) {
+		ip->i_ea_refs++;
+		ffs_unlock_ea(vp);
+		return (0);
+	}
+	dp = ip->i_din2;
+	error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
+	if (error) {
+		ffs_unlock_ea(vp);
+		return (error);
+	}
+	ip->i_ea_len = dp->di_extsize;
+	ip->i_ea_error = 0;
+	ip->i_ea_refs++;
+	ffs_unlock_ea(vp);
+	return (0);
+}
+
+/*
+ * Vnode extattr transaction commit/abort
+ */
+static int
+ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
+{
+	struct inode *ip;
+	struct uio luio;
+	struct iovec liovec;
+	int error;
+	struct ufs2_dinode *dp;
+
+	ip = VTOI(vp);
+
+	ffs_lock_ea(vp);
+	if (ip->i_ea_area == NULL) {
+		ffs_unlock_ea(vp);
+		return (EINVAL);
+	}
+	dp = ip->i_din2;
+	error = ip->i_ea_error;
+	if (commit && error == 0) {
+		ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit");
+		if (cred == NOCRED)
+			cred =  vp->v_mount->mnt_cred;
+		liovec.iov_base = ip->i_ea_area;
+		liovec.iov_len = ip->i_ea_len;
+		luio.uio_iov = &liovec;
+		luio.uio_iovcnt = 1;
+		luio.uio_offset = 0;
+		luio.uio_resid = ip->i_ea_len;
+		luio.uio_segflg = UIO_SYSSPACE;
+		luio.uio_rw = UIO_WRITE;
+		luio.uio_td = td;
+		/* XXX: I'm not happy about truncating to zero size */
+		if (ip->i_ea_len < dp->di_extsize)
+			error = ffs_truncate(vp, 0, IO_EXT, cred);
+		error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
+	}
+	if (--ip->i_ea_refs == 0) {
+		free(ip->i_ea_area, M_TEMP);
+		ip->i_ea_area = NULL;
+		ip->i_ea_len = 0;
+		ip->i_ea_error = 0;
+	}
+	ffs_unlock_ea(vp);
+	return (error);
+}
+
+/*
+ * Vnode extattr strategy routine for fifos.
+ *
+ * We need to check for a read or write of the external attributes.
+ * Otherwise we just fall through and do the usual thing.
+ */
+static int
+ffsext_strategy(struct vop_strategy_args *ap)
+/*
+struct vop_strategy_args {
+	struct vnodeop_desc *a_desc;
+	struct vnode *a_vp;
+	struct buf *a_bp;
+};
+*/
+{
+	struct vnode *vp;
+	daddr_t lbn;
+
+	vp = ap->a_vp;
+	lbn = ap->a_bp->b_lblkno;
+	if (I_IS_UFS2(VTOI(vp)) && lbn < 0 && lbn >= -NXADDR)
+		return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
+	if (vp->v_type == VFIFO)
+		return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
+	panic("spec nodes went here");
+}
+
+/*
+ * Vnode extattr transaction commit/abort
+ */
+static int
+ffs_openextattr(struct vop_openextattr_args *ap)
+/*
+struct vop_openextattr_args {
+	struct vnodeop_desc *a_desc;
+	struct vnode *a_vp;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+*/
+{
+
+	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+
+	return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
+}
+
+
+/*
+ * Vnode extattr transaction commit/abort
+ */
+static int
+ffs_closeextattr(struct vop_closeextattr_args *ap)
+/*
+struct vop_closeextattr_args {
+	struct vnodeop_desc *a_desc;
+	struct vnode *a_vp;
+	int a_commit;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+*/
+{
+
+	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+
+	if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY))
+		return (EROFS);
+
+	return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
+}
+
+/*
+ * Vnode operation to remove a named attribute.
+ */
+static int
+ffs_deleteextattr(struct vop_deleteextattr_args *ap)
+/*
+vop_deleteextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	IN const char *a_name;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+*/
+{
+	struct inode *ip;
+	struct fs *fs;
+	uint32_t ealength, ul;
+	int ealen, olen, eapad1, eapad2, error, i, easize;
+	u_char *eae, *p;
+
+	ip = VTOI(ap->a_vp);
+	fs = ITOFS(ip);
+
+	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+
+	if (strlen(ap->a_name) == 0)
+		return (EINVAL);
+
+	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (EROFS);
+
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VWRITE);
+	if (error) {
+
+		/*
+		 * ffs_lock_ea is not needed there, because the vnode
+		 * must be exclusively locked.
+		 */
+		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
+			ip->i_ea_error = error;
+		return (error);
+	}
+
+	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
+	if (error)
+		return (error);
+
+	ealength = eapad1 = ealen = eapad2 = 0;
+
+	eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
+	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
+	easize = ip->i_ea_len;
+
+	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
+	    &p, NULL);
+	if (olen == -1) {
+		/* delete but nonexistent */
+		free(eae, M_TEMP);
+		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
+		return(ENOATTR);
+	}
+	bcopy(p, &ul, sizeof ul);
+	i = p - eae + ul;
+	if (ul != ealength) {
+		bcopy(p + ul, p + ealength, easize - i);
+		easize += (ealength - ul);
+	}
+	if (easize > NXADDR * fs->fs_bsize) {
+		free(eae, M_TEMP);
+		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
+		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
+			ip->i_ea_error = ENOSPC;
+		return(ENOSPC);
+	}
+	p = ip->i_ea_area;
+	ip->i_ea_area = eae;
+	ip->i_ea_len = easize;
+	free(p, M_TEMP);
+	error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
+	return(error);
+}
+
+/*
+ * Vnode operation to retrieve a named extended attribute.
+ */
+static int
+ffs_getextattr(struct vop_getextattr_args *ap)
+/*
+vop_getextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	IN const char *a_name;
+	INOUT struct uio *a_uio;
+	OUT size_t *a_size;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+*/
+{
+	struct inode *ip;
+	u_char *eae, *p;
+	unsigned easize;
+	int error, ealen;
+
+	ip = VTOI(ap->a_vp);
+
+	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VREAD);
+	if (error)
+		return (error);
+
+	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
+	if (error)
+		return (error);
+
+	eae = ip->i_ea_area;
+	easize = ip->i_ea_len;
+
+	ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
+	    NULL, &p);
+	if (ealen >= 0) {
+		error = 0;
+		if (ap->a_size != NULL)
+			*ap->a_size = ealen;
+		else if (ap->a_uio != NULL)
+			error = uiomove(p, ealen, ap->a_uio);
+	} else
+		error = ENOATTR;
+
+	ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
+	return(error);
+}
+
+/*
+ * Vnode operation to retrieve extended attributes on a vnode.
+ */
+static int
+ffs_listextattr(struct vop_listextattr_args *ap)
+/*
+vop_listextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	INOUT struct uio *a_uio;
+	OUT size_t *a_size;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+*/
+{
+	struct inode *ip;
+	u_char *eae, *p, *pe, *pn;
+	unsigned easize;
+	uint32_t ul;
+	int error, ealen;
+
+	ip = VTOI(ap->a_vp);
+
+	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VREAD);
+	if (error)
+		return (error);
+
+	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
+	if (error)
+		return (error);
+	eae = ip->i_ea_area;
+	easize = ip->i_ea_len;
+
+	error = 0;
+	if (ap->a_size != NULL)
+		*ap->a_size = 0;
+	pe = eae + easize;
+	for(p = eae; error == 0 && p < pe; p = pn) {
+		bcopy(p, &ul, sizeof(ul));
+		pn = p + ul;
+		if (pn > pe)
+			break;
+		p += sizeof(ul);
+		if (*p++ != ap->a_attrnamespace)
+			continue;
+		p++;	/* pad2 */
+		ealen = *p;
+		if (ap->a_size != NULL) {
+			*ap->a_size += ealen + 1;
+		} else if (ap->a_uio != NULL) {
+			error = uiomove(p, ealen + 1, ap->a_uio);
+		}
+	}
+	ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
+	return(error);
+}
+
+/*
+ * Vnode operation to set a named attribute.
+ */
+static int
+ffs_setextattr(struct vop_setextattr_args *ap)
+/*
+vop_setextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	IN const char *a_name;
+	INOUT struct uio *a_uio;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+*/
+{
+	struct inode *ip;
+	struct fs *fs;
+	uint32_t ealength, ul;
+	ssize_t ealen;
+	int olen, eapad1, eapad2, error, i, easize;
+	u_char *eae, *p;
+
+	ip = VTOI(ap->a_vp);
+	fs = ITOFS(ip);
+
+	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+
+	if (strlen(ap->a_name) == 0)
+		return (EINVAL);
+
+	/* XXX Now unsupported API to delete EAs using NULL uio. */
+	if (ap->a_uio == NULL)
+		return (EOPNOTSUPP);
+
+	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (EROFS);
+
+	ealen = ap->a_uio->uio_resid;
+	if (ealen < 0 || ealen > lblktosize(fs, NXADDR))
+		return (EINVAL);
+
+	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
+	    ap->a_cred, ap->a_td, VWRITE);
+	if (error) {
+
+		/*
+		 * ffs_lock_ea is not needed there, because the vnode
+		 * must be exclusively locked.
+		 */
+		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
+			ip->i_ea_error = error;
+		return (error);
+	}
+
+	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
+	if (error)
+		return (error);
+
+	ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
+	eapad1 = 8 - (ealength % 8);
+	if (eapad1 == 8)
+		eapad1 = 0;
+	eapad2 = 8 - (ealen % 8);
+	if (eapad2 == 8)
+		eapad2 = 0;
+	ealength += eapad1 + ealen + eapad2;
+
+	eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
+	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
+	easize = ip->i_ea_len;
+
+	olen = ffs_findextattr(eae, easize,
+	    ap->a_attrnamespace, ap->a_name, &p, NULL);
+        if (olen == -1) {
+		/* new, append at end */
+		p = eae + easize;
+		easize += ealength;
+	} else {
+		bcopy(p, &ul, sizeof ul);
+		i = p - eae + ul;
+		if (ul != ealength) {
+			bcopy(p + ul, p + ealength, easize - i);
+			easize += (ealength - ul);
+		}
+	}
+	if (easize > lblktosize(fs, NXADDR)) {
+		free(eae, M_TEMP);
+		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
+		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
+			ip->i_ea_error = ENOSPC;
+		return(ENOSPC);
+	}
+	bcopy(&ealength, p, sizeof(ealength));
+	p += sizeof(ealength);
+	*p++ = ap->a_attrnamespace;
+	*p++ = eapad2;
+	*p++ = strlen(ap->a_name);
+	strcpy(p, ap->a_name);
+	p += strlen(ap->a_name);
+	bzero(p, eapad1);
+	p += eapad1;
+	error = uiomove(p, ealen, ap->a_uio);
+	if (error) {
+		free(eae, M_TEMP);
+		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
+		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
+			ip->i_ea_error = error;
+		return(error);
+	}
+	p += ealen;
+	bzero(p, eapad2);
+
+	p = ip->i_ea_area;
+	ip->i_ea_area = eae;
+	ip->i_ea_len = easize;
+	free(p, M_TEMP);
+	error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
+	return(error);
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+static int
+ffs_vptofh(struct vop_vptofh_args *ap)
+/*
+vop_vptofh {
+	IN struct vnode *a_vp;
+	IN struct fid *a_fhp;
+};
+*/
+{
+	struct inode *ip;
+	struct ufid *ufhp;
+
+	ip = VTOI(ap->a_vp);
+	ufhp = (struct ufid *)ap->a_fhp;
+	ufhp->ufid_len = sizeof(struct ufid);
+	ufhp->ufid_ino = ip->i_number;
+	ufhp->ufid_gen = ip->i_gen;
+	return (0);
+}
+
+SYSCTL_DECL(_vfs_ffs);
+static int use_buf_pager = 0;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
+    "Always use buffer pager instead of bmap");
+
+static daddr_t
+ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
+{
+
+	return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off));
+}
+
+static int
+ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn)
+{
+
+	return (blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn));
+}
+
+static int
+ffs_getpages(struct vop_getpages_args *ap)
+{
+	struct vnode *vp;
+	struct ufsmount *um;
+
+	vp = ap->a_vp;
+	um = VFSTOUFS(vp->v_mount);
+
+	if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
+		return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
+		    ap->a_rbehind, ap->a_rahead, NULL, NULL));
+	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
+	    ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz));
+}
diff --git a/Dump/ufs/ffs/fs.h b/Dump/ufs/ffs/fs.h
new file mode 100644
index 0000000..233b347
--- /dev/null
+++ b/Dump/ufs/ffs/fs.h
@@ -0,0 +1,792 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fs.h	8.13 (Berkeley) 3/21/95
+ * $FreeBSD: releng/11.2/sys/ufs/ffs/fs.h 331722 2018-03-29 02:50:57Z eadler $
+ */
+
+#ifndef _UFS_FFS_FS_H_
+#define	_UFS_FFS_FS_H_
+
+#include <sys/mount.h>
+#include <ufs/ufs/dinode.h>
+
+/*
+ * Each disk drive contains some number of filesystems.
+ * A filesystem consists of a number of cylinder groups.
+ * Each cylinder group has inodes and data.
+ *
+ * A filesystem is described by its super-block, which in turn
+ * describes the cylinder groups.  The super-block is critical
+ * data and is replicated in each cylinder group to protect against
+ * catastrophic loss.  This is done at `newfs' time and the critical
+ * super-block data does not change, so the copies need not be
+ * referenced further unless disaster strikes.
+ *
+ * For filesystem fs, the offsets of the various blocks of interest
+ * are given in the super block as:
+ *	[fs->fs_sblkno]		Super-block
+ *	[fs->fs_cblkno]		Cylinder group block
+ *	[fs->fs_iblkno]		Inode blocks
+ *	[fs->fs_dblkno]		Data blocks
+ * The beginning of cylinder group cg in fs, is given by
+ * the ``cgbase(fs, cg)'' macro.
+ *
+ * Depending on the architecture and the media, the superblock may
+ * reside in any one of four places. For tiny media where every block 
+ * counts, it is placed at the very front of the partition. Historically,
+ * UFS1 placed it 8K from the front to leave room for the disk label and
+ * a small bootstrap. For UFS2 it got moved to 64K from the front to leave
+ * room for the disk label and a bigger bootstrap, and for really piggy
+ * systems we check at 256K from the front if the first three fail. In
+ * all cases the size of the superblock will be SBLOCKSIZE. All values are
+ * given in byte-offset form, so they do not imply a sector size. The
+ * SBLOCKSEARCH specifies the order in which the locations should be searched.
+ */
+#define	SBLOCK_FLOPPY	     0
+#define	SBLOCK_UFS1	  8192
+#define	SBLOCK_UFS2	 65536
+#define	SBLOCK_PIGGY	262144
+#define	SBLOCKSIZE	  8192
+#define	SBLOCKSEARCH \
+	{ SBLOCK_UFS2, SBLOCK_UFS1, SBLOCK_FLOPPY, SBLOCK_PIGGY, -1 }
+
+/*
+ * Max number of fragments per block. This value is NOT tweakable.
+ */
+#define	MAXFRAG 	8
+
+/*
+ * Addresses stored in inodes are capable of addressing fragments
+ * of `blocks'. File system blocks of at most size MAXBSIZE can
+ * be optionally broken into 2, 4, or 8 pieces, each of which is
+ * addressable; these pieces may be DEV_BSIZE, or some multiple of
+ * a DEV_BSIZE unit.
+ *
+ * Large files consist of exclusively large data blocks.  To avoid
+ * undue wasted disk space, the last data block of a small file may be
+ * allocated as only as many fragments of a large block as are
+ * necessary.  The filesystem format retains only a single pointer
+ * to such a fragment, which is a piece of a single large block that
+ * has been divided.  The size of such a fragment is determinable from
+ * information in the inode, using the ``blksize(fs, ip, lbn)'' macro.
+ *
+ * The filesystem records space availability at the fragment level;
+ * to determine block availability, aligned fragments are examined.
+ */
+
+/*
+ * MINBSIZE is the smallest allowable block size.
+ * In order to insure that it is possible to create files of size
+ * 2^32 with only two levels of indirection, MINBSIZE is set to 4096.
+ * MINBSIZE must be big enough to hold a cylinder group block,
+ * thus changes to (struct cg) must keep its size within MINBSIZE.
+ * Note that super blocks are always of size SBLOCKSIZE,
+ * and that both SBLOCKSIZE and MAXBSIZE must be >= MINBSIZE.
+ */
+#define	MINBSIZE	4096
+
+/*
+ * The path name on which the filesystem is mounted is maintained
+ * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in
+ * the super block for this name.
+ */
+#define	MAXMNTLEN	468
+
+/*
+ * The volume name for this filesystem is maintained in fs_volname.
+ * MAXVOLLEN defines the length of the buffer allocated.
+ */
+#define	MAXVOLLEN	32
+
+/*
+ * There is a 128-byte region in the superblock reserved for in-core
+ * pointers to summary information. Originally this included an array
+ * of pointers to blocks of struct csum; now there are just a few
+ * pointers and the remaining space is padded with fs_ocsp[].
+ *
+ * NOCSPTRS determines the size of this padding. One pointer (fs_csp)
+ * is taken away to point to a contiguous array of struct csum for
+ * all cylinder groups; a second (fs_maxcluster) points to an array
+ * of cluster sizes that is computed as cylinder groups are inspected,
+ * and the third points to an array that tracks the creation of new
+ * directories. A fourth pointer, fs_active, is used when creating
+ * snapshots; it points to a bitmap of cylinder groups for which the
+ * free-block bitmap has changed since the snapshot operation began.
+ */
+#define	NOCSPTRS	((128 / sizeof(void *)) - 4)
+
+/*
+ * A summary of contiguous blocks of various sizes is maintained
+ * in each cylinder group. Normally this is set by the initial
+ * value of fs_maxcontig. To conserve space, a maximum summary size
+ * is set by FS_MAXCONTIG.
+ */
+#define	FS_MAXCONTIG	16
+
+/*
+ * MINFREE gives the minimum acceptable percentage of filesystem
+ * blocks which may be free. If the freelist drops below this level
+ * only the superuser may continue to allocate blocks. This may
+ * be set to 0 if no reserve of free blocks is deemed necessary,
+ * however throughput drops by fifty percent if the filesystem
+ * is run at between 95% and 100% full; thus the minimum default
+ * value of fs_minfree is 5%. However, to get good clustering
+ * performance, 10% is a better choice. hence we use 10% as our
+ * default value. With 10% free space, fragmentation is not a
+ * problem, so we choose to optimize for time.
+ */
+#define	MINFREE		8
+#define	DEFAULTOPT	FS_OPTTIME
+
+/*
+ * Grigoriy Orlov <gluk@ptci.ru> has done some extensive work to fine
+ * tune the layout preferences for directories within a filesystem.
+ * His algorithm can be tuned by adjusting the following parameters
+ * which tell the system the average file size and the average number
+ * of files per directory. These defaults are well selected for typical
+ * filesystems, but may need to be tuned for odd cases like filesystems
+ * being used for squid caches or news spools.
+ */
+#define	AVFILESIZ	16384	/* expected average file size */
+#define	AFPDIR		64	/* expected number of files per directory */
+
+/*
+ * The maximum number of snapshot nodes that can be associated
+ * with each filesystem. This limit affects only the number of
+ * snapshot files that can be recorded within the superblock so
+ * that they can be found when the filesystem is mounted. However,
+ * maintaining too many will slow the filesystem performance, so
+ * having this limit is a good idea.
+ */
+#define	FSMAXSNAP 20
+
+/*
+ * Used to identify special blocks in snapshots:
+ *
+ * BLK_NOCOPY - A block that was unallocated at the time the snapshot
+ *	was taken, hence does not need to be copied when written.
+ * BLK_SNAP - A block held by another snapshot that is not needed by this
+ *	snapshot. When the other snapshot is freed, the BLK_SNAP entries
+ *	are converted to BLK_NOCOPY. These are needed to allow fsck to
+ *	identify blocks that are in use by other snapshots (which are
+ *	expunged from this snapshot).
+ */
+#define	BLK_NOCOPY ((ufs2_daddr_t)(1))
+#define	BLK_SNAP ((ufs2_daddr_t)(2))
+
+/*
+ * Sysctl values for the fast filesystem.
+ */
+#define	FFS_ADJ_REFCNT		 1	/* adjust inode reference count */
+#define	FFS_ADJ_BLKCNT		 2	/* adjust inode used block count */
+#define	FFS_BLK_FREE		 3	/* free range of blocks in map */
+#define	FFS_DIR_FREE		 4	/* free specified dir inodes in map */
+#define	FFS_FILE_FREE		 5	/* free specified file inodes in map */
+#define	FFS_SET_FLAGS		 6	/* set filesystem flags */
+#define	FFS_ADJ_NDIR		 7	/* adjust number of directories */
+#define	FFS_ADJ_NBFREE		 8	/* adjust number of free blocks */
+#define	FFS_ADJ_NIFREE		 9	/* adjust number of free inodes */
+#define	FFS_ADJ_NFFREE		10 	/* adjust number of free frags */
+#define	FFS_ADJ_NUMCLUSTERS	11	/* adjust number of free clusters */
+#define	FFS_SET_CWD		12	/* set current directory */
+#define	FFS_SET_DOTDOT		13	/* set inode number for ".." */
+#define	FFS_UNLINK		14	/* remove a name in the filesystem */
+#define	FFS_SET_INODE		15	/* update an on-disk inode */
+#define	FFS_SET_BUFOUTPUT	16	/* set buffered writing on descriptor */
+#define	FFS_MAXID		16	/* number of valid ffs ids */
+
+/*
+ * Command structure passed in to the filesystem to adjust filesystem values.
+ */
+#define	FFS_CMD_VERSION		0x19790518	/* version ID */
+struct fsck_cmd {
+	int32_t	version;	/* version of command structure */
+	int32_t	handle;		/* reference to filesystem to be changed */
+	int64_t	value;		/* inode or block number to be affected */
+	int64_t	size;		/* amount or range to be adjusted */
+	int64_t	spare;		/* reserved for future use */
+};
+
+/*
+ * A recovery structure placed at the end of the boot block area by newfs
+ * that can be used by fsck to search for alternate superblocks.
+ */
+#define RESID	(4096 - 20)	/* disk sector size minus recovery area size */
+struct fsrecovery {
+	char	block[RESID];	/* unused part of sector */
+	int32_t	fsr_magic;	/* magic number */
+	int32_t	fsr_fsbtodb;	/* fsbtodb and dbtofsb shift constant */
+	int32_t	fsr_sblkno;	/* offset of super-block in filesys */
+	int32_t	fsr_fpg;	/* blocks per group * fs_frag */
+	u_int32_t fsr_ncg;	/* number of cylinder groups */
+};
+
+/*
+ * Per cylinder group information; summarized in blocks allocated
+ * from first cylinder group data blocks.  These blocks have to be
+ * read in from fs_csaddr (size fs_cssize) in addition to the
+ * super block.
+ */
+struct csum {
+	int32_t	cs_ndir;		/* number of directories */
+	int32_t	cs_nbfree;		/* number of free blocks */
+	int32_t	cs_nifree;		/* number of free inodes */
+	int32_t	cs_nffree;		/* number of free frags */
+};
+struct csum_total {
+	int64_t	cs_ndir;		/* number of directories */
+	int64_t	cs_nbfree;		/* number of free blocks */
+	int64_t	cs_nifree;		/* number of free inodes */
+	int64_t	cs_nffree;		/* number of free frags */
+	int64_t	cs_numclusters;		/* number of free clusters */
+	int64_t	cs_spare[3];		/* future expansion */
+};
+
+/*
+ * Super block for an FFS filesystem.
+ */
+struct fs {
+	int32_t	 fs_firstfield;		/* historic filesystem linked list, */
+	int32_t	 fs_unused_1;		/*     used for incore super blocks */
+	int32_t	 fs_sblkno;		/* offset of super-block in filesys */
+	int32_t	 fs_cblkno;		/* offset of cyl-block in filesys */
+	int32_t	 fs_iblkno;		/* offset of inode-blocks in filesys */
+	int32_t	 fs_dblkno;		/* offset of first data after cg */
+	int32_t	 fs_old_cgoffset;	/* cylinder group offset in cylinder */
+	int32_t	 fs_old_cgmask;		/* used to calc mod fs_ntrak */
+	int32_t  fs_old_time;		/* last time written */
+	int32_t	 fs_old_size;		/* number of blocks in fs */
+	int32_t	 fs_old_dsize;		/* number of data blocks in fs */
+	u_int32_t fs_ncg;		/* number of cylinder groups */
+	int32_t	 fs_bsize;		/* size of basic blocks in fs */
+	int32_t	 fs_fsize;		/* size of frag blocks in fs */
+	int32_t	 fs_frag;		/* number of frags in a block in fs */
+/* these are configuration parameters */
+	int32_t	 fs_minfree;		/* minimum percentage of free blocks */
+	int32_t	 fs_old_rotdelay;	/* num of ms for optimal next block */
+	int32_t	 fs_old_rps;		/* disk revolutions per second */
+/* these fields can be computed from the others */
+	int32_t	 fs_bmask;		/* ``blkoff'' calc of blk offsets */
+	int32_t	 fs_fmask;		/* ``fragoff'' calc of frag offsets */
+	int32_t	 fs_bshift;		/* ``lblkno'' calc of logical blkno */
+	int32_t	 fs_fshift;		/* ``numfrags'' calc number of frags */
+/* these are configuration parameters */
+	int32_t	 fs_maxcontig;		/* max number of contiguous blks */
+	int32_t	 fs_maxbpg;		/* max number of blks per cyl group */
+/* these fields can be computed from the others */
+	int32_t	 fs_fragshift;		/* block to frag shift */
+	int32_t	 fs_fsbtodb;		/* fsbtodb and dbtofsb shift constant */
+	int32_t	 fs_sbsize;		/* actual size of super block */
+	int32_t	 fs_spare1[2];		/* old fs_csmask */
+					/* old fs_csshift */
+	int32_t	 fs_nindir;		/* value of NINDIR */
+	u_int32_t fs_inopb;		/* value of INOPB */
+	int32_t	 fs_old_nspf;		/* value of NSPF */
+/* yet another configuration parameter */
+	int32_t	 fs_optim;		/* optimization preference, see below */
+	int32_t	 fs_old_npsect;		/* # sectors/track including spares */
+	int32_t	 fs_old_interleave;	/* hardware sector interleave */
+	int32_t	 fs_old_trackskew;	/* sector 0 skew, per track */
+	int32_t	 fs_id[2];		/* unique filesystem id */
+/* sizes determined by number of cylinder groups and their sizes */
+	int32_t	 fs_old_csaddr;		/* blk addr of cyl grp summary area */
+	int32_t	 fs_cssize;		/* size of cyl grp summary area */
+	int32_t	 fs_cgsize;		/* cylinder group size */
+	int32_t	 fs_spare2;		/* old fs_ntrak */
+	int32_t	 fs_old_nsect;		/* sectors per track */
+	int32_t  fs_old_spc;		/* sectors per cylinder */
+	int32_t	 fs_old_ncyl;		/* cylinders in filesystem */
+	int32_t	 fs_old_cpg;		/* cylinders per group */
+	u_int32_t fs_ipg;		/* inodes per group */
+	int32_t	 fs_fpg;		/* blocks per group * fs_frag */
+/* this data must be re-computed after crashes */
+	struct	csum fs_old_cstotal;	/* cylinder summary information */
+/* these fields are cleared at mount time */
+	int8_t   fs_fmod;		/* super block modified flag */
+	int8_t   fs_clean;		/* filesystem is clean flag */
+	int8_t 	 fs_ronly;		/* mounted read-only flag */
+	int8_t   fs_old_flags;		/* old FS_ flags */
+	u_char	 fs_fsmnt[MAXMNTLEN];	/* name mounted on */
+	u_char	 fs_volname[MAXVOLLEN];	/* volume name */
+	u_int64_t fs_swuid;		/* system-wide uid */
+	int32_t  fs_pad;		/* due to alignment of fs_swuid */
+/* these fields retain the current block allocation info */
+	int32_t	 fs_cgrotor;		/* last cg searched */
+	void 	*fs_ocsp[NOCSPTRS];	/* padding; was list of fs_cs buffers */
+	u_int8_t *fs_contigdirs;	/* (u) # of contig. allocated dirs */
+	struct	csum *fs_csp;		/* (u) cg summary info buffer */
+	int32_t	*fs_maxcluster;		/* (u) max cluster in each cyl group */
+	u_int	*fs_active;		/* (u) used by snapshots to track fs */
+	int32_t	 fs_old_cpc;		/* cyl per cycle in postbl */
+	int32_t	 fs_maxbsize;		/* maximum blocking factor permitted */
+	int64_t	 fs_unrefs;		/* number of unreferenced inodes */
+	int64_t  fs_providersize;	/* size of underlying GEOM provider */
+	int64_t	 fs_metaspace;		/* size of area reserved for metadata */
+	int64_t	 fs_sparecon64[14];	/* old rotation block list head */
+	int64_t	 fs_sblockloc;		/* byte offset of standard superblock */
+	struct	csum_total fs_cstotal;	/* (u) cylinder summary information */
+	ufs_time_t fs_time;		/* last time written */
+	int64_t	 fs_size;		/* number of blocks in fs */
+	int64_t	 fs_dsize;		/* number of data blocks in fs */
+	ufs2_daddr_t fs_csaddr;		/* blk addr of cyl grp summary area */
+	int64_t	 fs_pendingblocks;	/* (u) blocks being freed */
+	u_int32_t fs_pendinginodes;	/* (u) inodes being freed */
+	uint32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */
+	u_int32_t fs_avgfilesize;	/* expected average file size */
+	u_int32_t fs_avgfpdir;		/* expected # of files per directory */
+	int32_t	 fs_save_cgsize;	/* save real cg size to use fs_bsize */
+	ufs_time_t fs_mtime;		/* Last mount or fsck time. */
+	int32_t  fs_sujfree;		/* SUJ free list */
+	int32_t	 fs_sparecon32[23];	/* reserved for future constants */
+	int32_t  fs_flags;		/* see FS_ flags below */
+	int32_t	 fs_contigsumsize;	/* size of cluster summary array */ 
+	int32_t	 fs_maxsymlinklen;	/* max length of an internal symlink */
+	int32_t	 fs_old_inodefmt;	/* format of on-disk inodes */
+	u_int64_t fs_maxfilesize;	/* maximum representable file size */
+	int64_t	 fs_qbmask;		/* ~fs_bmask for use with 64-bit size */
+	int64_t	 fs_qfmask;		/* ~fs_fmask for use with 64-bit size */
+	int32_t	 fs_state;		/* validate fs_clean field */
+	int32_t	 fs_old_postblformat;	/* format of positional layout tables */
+	int32_t	 fs_old_nrpos;		/* number of rotational positions */
+	int32_t	 fs_spare5[2];		/* old fs_postbloff */
+					/* old fs_rotbloff */
+	int32_t	 fs_magic;		/* magic number */
+};
+
+/* Sanity checking. */
+#ifdef CTASSERT
+CTASSERT(sizeof(struct fs) == 1376);
+#endif
+
+/*
+ * Filesystem identification
+ */
+#define	FS_UFS1_MAGIC	0x011954	/* UFS1 fast filesystem magic number */
+#define	FS_UFS2_MAGIC	0x19540119	/* UFS2 fast filesystem magic number */
+#define	FS_BAD_MAGIC	0x19960408	/* UFS incomplete newfs magic number */
+#define	FS_OKAY		0x7c269d38	/* superblock checksum */
+#define	FS_42INODEFMT	-1		/* 4.2BSD inode format */
+#define	FS_44INODEFMT	2		/* 4.4BSD inode format */
+
+/*
+ * Preference for optimization.
+ */
+#define	FS_OPTTIME	0	/* minimize allocation time */
+#define	FS_OPTSPACE	1	/* minimize disk fragmentation */
+
+/*
+ * Filesystem flags.
+ *
+ * The FS_UNCLEAN flag is set by the kernel when the filesystem was
+ * mounted with fs_clean set to zero. The FS_DOSOFTDEP flag indicates
+ * that the filesystem should be managed by the soft updates code.
+ * Note that the FS_NEEDSFSCK flag is set and cleared only by the
+ * fsck utility. It is set when background fsck finds an unexpected
+ * inconsistency which requires a traditional foreground fsck to be
+ * run. Such inconsistencies should only be found after an uncorrectable
+ * disk error. A foreground fsck will clear the FS_NEEDSFSCK flag when
+ * it has successfully cleaned up the filesystem. The kernel uses this
+ * flag to enforce that inconsistent filesystems be mounted read-only.
+ * The FS_INDEXDIRS flag when set indicates that the kernel maintains
+ * on-disk auxiliary indexes (such as B-trees) for speeding directory
+ * accesses. Kernels that do not support auxiliary indices clear the
+ * flag to indicate that the indices need to be rebuilt (by fsck) before
+ * they can be used.
+ *
+ * FS_ACLS indicates that POSIX.1e ACLs are administratively enabled
+ * for the file system, so they should be loaded from extended attributes,
+ * observed for access control purposes, and be administered by object
+ * owners.  FS_NFS4ACLS indicates that NFSv4 ACLs are administratively
+ * enabled.  This flag is mutually exclusive with FS_ACLS.  FS_MULTILABEL
+ * indicates that the TrustedBSD MAC Framework should attempt to back MAC
+ * labels into extended attributes on the file system rather than maintain
+ * a single mount label for all objects.
+ */
+#define	FS_UNCLEAN	0x0001	/* filesystem not clean at mount */
+#define	FS_DOSOFTDEP	0x0002	/* filesystem using soft dependencies */
+#define	FS_NEEDSFSCK	0x0004	/* filesystem needs sync fsck before mount */
+#define	FS_SUJ       	0x0008	/* Filesystem using softupdate journal */
+#define	FS_ACLS		0x0010	/* file system has POSIX.1e ACLs enabled */
+#define	FS_MULTILABEL	0x0020	/* file system is MAC multi-label */
+#define	FS_GJOURNAL	0x0040	/* gjournaled file system */
+#define	FS_FLAGS_UPDATED 0x0080	/* flags have been moved to new location */
+#define	FS_NFS4ACLS	0x0100	/* file system has NFSv4 ACLs enabled */
+#define	FS_INDEXDIRS	0x0200	/* kernel supports indexed directories */
+#define	FS_TRIM		0x0400	/* issue BIO_DELETE for deleted blocks */
+
+/*
+ * Macros to access bits in the fs_active array.
+ */
+#define	ACTIVECGNUM(fs, cg)	((fs)->fs_active[(cg) / (NBBY * sizeof(int))])
+#define	ACTIVECGOFF(cg)		(1 << ((cg) % (NBBY * sizeof(int))))
+#define	ACTIVESET(fs, cg)	do {					\
+	if ((fs)->fs_active)						\
+		ACTIVECGNUM((fs), (cg)) |= ACTIVECGOFF((cg));		\
+} while (0)
+#define	ACTIVECLEAR(fs, cg)	do {					\
+	if ((fs)->fs_active)						\
+		ACTIVECGNUM((fs), (cg)) &= ~ACTIVECGOFF((cg));		\
+} while (0)
+
+/*
+ * The size of a cylinder group is calculated by CGSIZE. The maximum size
+ * is limited by the fact that cylinder groups are at most one block.
+ * Its size is derived from the size of the maps maintained in the
+ * cylinder group and the (struct cg) size.
+ */
+#define	CGSIZE(fs) \
+    /* base cg */	(sizeof(struct cg) + sizeof(int32_t) + \
+    /* old btotoff */	(fs)->fs_old_cpg * sizeof(int32_t) + \
+    /* old boff */	(fs)->fs_old_cpg * sizeof(u_int16_t) + \
+    /* inode map */	howmany((fs)->fs_ipg, NBBY) + \
+    /* block map */	howmany((fs)->fs_fpg, NBBY) +\
+    /* if present */	((fs)->fs_contigsumsize <= 0 ? 0 : \
+    /* cluster sum */	(fs)->fs_contigsumsize * sizeof(int32_t) + \
+    /* cluster map */	howmany(fragstoblks(fs, (fs)->fs_fpg), NBBY)))
+
+/*
+ * The minimal number of cylinder groups that should be created.
+ */
+#define	MINCYLGRPS	4
+
+/*
+ * Convert cylinder group to base address of its global summary info.
+ */
+#define	fs_cs(fs, indx) fs_csp[indx]
+
+/*
+ * Cylinder group block for a filesystem.
+ */
+#define	CG_MAGIC	0x090255
+struct cg {
+	int32_t	 cg_firstfield;		/* historic cyl groups linked list */
+	int32_t	 cg_magic;		/* magic number */
+	int32_t  cg_old_time;		/* time last written */
+	u_int32_t cg_cgx;		/* we are the cgx'th cylinder group */
+	int16_t	 cg_old_ncyl;		/* number of cyl's this cg */
+	int16_t  cg_old_niblk;		/* number of inode blocks this cg */
+	u_int32_t cg_ndblk;		/* number of data blocks this cg */
+	struct	 csum cg_cs;		/* cylinder summary information */
+	u_int32_t cg_rotor;		/* position of last used block */
+	u_int32_t cg_frotor;		/* position of last used frag */
+	u_int32_t cg_irotor;		/* position of last used inode */
+	u_int32_t cg_frsum[MAXFRAG];	/* counts of available frags */
+	int32_t	 cg_old_btotoff;	/* (int32) block totals per cylinder */
+	int32_t	 cg_old_boff;		/* (u_int16) free block positions */
+	u_int32_t cg_iusedoff;		/* (u_int8) used inode map */
+	u_int32_t cg_freeoff;		/* (u_int8) free block map */
+	u_int32_t cg_nextfreeoff;	/* (u_int8) next available space */
+	u_int32_t cg_clustersumoff;	/* (u_int32) counts of avail clusters */
+	u_int32_t cg_clusteroff;		/* (u_int8) free cluster map */
+	u_int32_t cg_nclusterblks;	/* number of clusters this cg */
+	u_int32_t cg_niblk;		/* number of inode blocks this cg */
+	u_int32_t cg_initediblk;		/* last initialized inode */
+	u_int32_t cg_unrefs;		/* number of unreferenced inodes */
+	int32_t	 cg_sparecon32[2];	/* reserved for future use */
+	ufs_time_t cg_time;		/* time last written */
+	int64_t	 cg_sparecon64[3];	/* reserved for future use */
+	u_int8_t cg_space[1];		/* space for cylinder group maps */
+/* actually longer */
+};
+
+/*
+ * Macros for access to cylinder group array structures
+ */
+#define	cg_chkmagic(cgp) ((cgp)->cg_magic == CG_MAGIC)
+#define	cg_inosused(cgp) \
+    ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff))
+#define	cg_blksfree(cgp) \
+    ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff))
+#define	cg_clustersfree(cgp) \
+    ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff))
+#define	cg_clustersum(cgp) \
+    ((int32_t *)((uintptr_t)(cgp) + (cgp)->cg_clustersumoff))
+
+/*
+ * Turn filesystem block numbers into disk block addresses.
+ * This maps filesystem blocks to device size blocks.
+ */
+#define	fsbtodb(fs, b)	((daddr_t)(b) << (fs)->fs_fsbtodb)
+#define	dbtofsb(fs, b)	((b) >> (fs)->fs_fsbtodb)
+
+/*
+ * Cylinder group macros to locate things in cylinder groups.
+ * They calc filesystem addresses of cylinder group data structures.
+ */
+#define	cgbase(fs, c)	(((ufs2_daddr_t)(fs)->fs_fpg) * (c))
+#define	cgdata(fs, c)	(cgdmin(fs, c) + (fs)->fs_metaspace)	/* data zone */
+#define	cgmeta(fs, c)	(cgdmin(fs, c))				/* meta data */
+#define	cgdmin(fs, c)	(cgstart(fs, c) + (fs)->fs_dblkno)	/* 1st data */
+#define	cgimin(fs, c)	(cgstart(fs, c) + (fs)->fs_iblkno)	/* inode blk */
+#define	cgsblock(fs, c)	(cgstart(fs, c) + (fs)->fs_sblkno)	/* super blk */
+#define	cgtod(fs, c)	(cgstart(fs, c) + (fs)->fs_cblkno)	/* cg block */
+#define	cgstart(fs, c)							\
+       ((fs)->fs_magic == FS_UFS2_MAGIC ? cgbase(fs, c) :		\
+       (cgbase(fs, c) + (fs)->fs_old_cgoffset * ((c) & ~((fs)->fs_old_cgmask))))
+
+/*
+ * Macros for handling inode numbers:
+ *     inode number to filesystem block offset.
+ *     inode number to cylinder group number.
+ *     inode number to filesystem block address.
+ */
+#define	ino_to_cg(fs, x)	(((ino_t)(x)) / (fs)->fs_ipg)
+#define	ino_to_fsba(fs, x)						\
+	((ufs2_daddr_t)(cgimin(fs, ino_to_cg(fs, (ino_t)(x))) +		\
+	    (blkstofrags((fs), ((((ino_t)(x)) % (fs)->fs_ipg) / INOPB(fs))))))
+#define	ino_to_fsbo(fs, x)	(((ino_t)(x)) % INOPB(fs))
+
+/*
+ * Give cylinder group number for a filesystem block.
+ * Give cylinder group block number for a filesystem block.
+ */
+#define	dtog(fs, d)	((d) / (fs)->fs_fpg)
+#define	dtogd(fs, d)	((d) % (fs)->fs_fpg)
+
+/*
+ * Extract the bits for a block from a map.
+ * Compute the cylinder and rotational position of a cyl block addr.
+ */
+#define	blkmap(fs, map, loc) \
+    (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag)))
+
+/*
+ * The following macros optimize certain frequently calculated
+ * quantities by using shifts and masks in place of divisions
+ * modulos and multiplications.
+ */
+#define	blkoff(fs, loc)		/* calculates (loc % fs->fs_bsize) */ \
+	((loc) & (fs)->fs_qbmask)
+#define	fragoff(fs, loc)	/* calculates (loc % fs->fs_fsize) */ \
+	((loc) & (fs)->fs_qfmask)
+#define	lfragtosize(fs, frag)	/* calculates ((off_t)frag * fs->fs_fsize) */ \
+	(((off_t)(frag)) << (fs)->fs_fshift)
+#define	lblktosize(fs, blk)	/* calculates ((off_t)blk * fs->fs_bsize) */ \
+	(((off_t)(blk)) << (fs)->fs_bshift)
+/* Use this only when `blk' is known to be small, e.g., < NDADDR. */
+#define	smalllblktosize(fs, blk)    /* calculates (blk * fs->fs_bsize) */ \
+	((blk) << (fs)->fs_bshift)
+#define	lblkno(fs, loc)		/* calculates (loc / fs->fs_bsize) */ \
+	((loc) >> (fs)->fs_bshift)
+#define	numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */ \
+	((loc) >> (fs)->fs_fshift)
+#define	blkroundup(fs, size)	/* calculates roundup(size, fs->fs_bsize) */ \
+	(((size) + (fs)->fs_qbmask) & (fs)->fs_bmask)
+#define	fragroundup(fs, size)	/* calculates roundup(size, fs->fs_fsize) */ \
+	(((size) + (fs)->fs_qfmask) & (fs)->fs_fmask)
+#define	fragstoblks(fs, frags)	/* calculates (frags / fs->fs_frag) */ \
+	((frags) >> (fs)->fs_fragshift)
+#define	blkstofrags(fs, blks)	/* calculates (blks * fs->fs_frag) */ \
+	((blks) << (fs)->fs_fragshift)
+#define	fragnum(fs, fsb)	/* calculates (fsb % fs->fs_frag) */ \
+	((fsb) & ((fs)->fs_frag - 1))
+#define	blknum(fs, fsb)		/* calculates rounddown(fsb, fs->fs_frag) */ \
+	((fsb) &~ ((fs)->fs_frag - 1))
+
+/*
+ * Determine the number of available frags given a
+ * percentage to hold in reserve.
+ */
+#define	freespace(fs, percentreserved) \
+	(blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
+	(fs)->fs_cstotal.cs_nffree - \
+	(((off_t)((fs)->fs_dsize)) * (percentreserved) / 100))
+
+/*
+ * Determining the size of a file block in the filesystem.
+ */
+#define	blksize(fs, ip, lbn) \
+	(((lbn) >= NDADDR || (ip)->i_size >= smalllblktosize(fs, (lbn) + 1)) \
+	    ? (fs)->fs_bsize \
+	    : (fragroundup(fs, blkoff(fs, (ip)->i_size))))
+#define	sblksize(fs, size, lbn) \
+	(((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \
+	  ? (fs)->fs_bsize \
+	  : (fragroundup(fs, blkoff(fs, (size)))))
+
+/*
+ * Number of indirects in a filesystem block.
+ */
+#define	NINDIR(fs)	((fs)->fs_nindir)
+
+/*
+ * Indirect lbns are aligned on NDADDR addresses where single indirects
+ * are the negated address of the lowest lbn reachable, double indirects
+ * are this lbn - 1 and triple indirects are this lbn - 2.  This yields
+ * an unusual bit order to determine level.
+ */
+static inline int
+lbn_level(ufs_lbn_t lbn)
+{
+	if (lbn >= 0)
+		return 0;
+	switch (lbn & 0x3) {
+	case 0:
+		return (0);
+	case 1:
+		break;
+	case 2:
+		return (2);
+	case 3:
+		return (1);
+	default:
+		break;
+	}
+	return (-1);
+}
+
+static inline ufs_lbn_t
+lbn_offset(struct fs *fs, int level)
+{
+	ufs_lbn_t res;
+
+	for (res = 1; level > 0; level--)
+		res *= NINDIR(fs);
+	return (res);
+}
+
+/*
+ * Number of inodes in a secondary storage block/fragment.
+ */
+#define	INOPB(fs)	((fs)->fs_inopb)
+#define	INOPF(fs)	((fs)->fs_inopb >> (fs)->fs_fragshift)
+
+/*
+ * Softdep journal record format.
+ */
+
+#define	JOP_ADDREF	1	/* Add a reference to an inode. */
+#define	JOP_REMREF	2	/* Remove a reference from an inode. */
+#define	JOP_NEWBLK	3	/* Allocate a block. */
+#define	JOP_FREEBLK	4	/* Free a block or a tree of blocks. */
+#define	JOP_MVREF	5	/* Move a reference from one off to another. */
+#define	JOP_TRUNC	6	/* Partial truncation record. */
+#define	JOP_SYNC	7	/* fsync() complete record. */
+
+#define	JREC_SIZE	32	/* Record and segment header size. */
+
+#define	SUJ_MIN		(4 * 1024 * 1024)	/* Minimum journal size */
+#define	SUJ_MAX		(32 * 1024 * 1024)	/* Maximum journal size */
+#define	SUJ_FILE	".sujournal"		/* Journal file name */
+
+/*
+ * Size of the segment record header.  There is at most one for each disk
+ * block in the journal.  The segment header is followed by an array of
+ * records.  fsck depends on the first element in each record being 'op'
+ * and the second being 'ino'.  Segments may span multiple disk blocks but
+ * the header is present on each.
+ */
+struct jsegrec {
+	uint64_t	jsr_seq;	/* Our sequence number */
+	uint64_t	jsr_oldest;	/* Oldest valid sequence number */
+	uint16_t	jsr_cnt;	/* Count of valid records */
+	uint16_t	jsr_blocks;	/* Count of device bsize blocks. */
+	uint32_t	jsr_crc;	/* 32bit crc of the valid space */
+	ufs_time_t	jsr_time;	/* timestamp for mount instance */
+};
+
+/*
+ * Reference record.  Records a single link count modification.
+ */
+struct jrefrec {
+	uint32_t	jr_op;
+	uint32_t	jr_ino;
+	uint32_t	jr_parent;
+	uint16_t	jr_nlink;
+	uint16_t	jr_mode;
+	int64_t		jr_diroff;
+	uint64_t	jr_unused;
+};
+
+/*
+ * Move record.  Records a reference moving within a directory block.  The
+ * nlink is unchanged but we must search both locations.
+ */
+struct jmvrec {
+	uint32_t	jm_op;
+	uint32_t	jm_ino;
+	uint32_t	jm_parent;
+	uint16_t	jm_unused;
+	int64_t		jm_oldoff;
+	int64_t		jm_newoff;
+};
+
+/*
+ * Block record.  A set of frags or tree of blocks starting at an indirect are
+ * freed or a set of frags are allocated.
+ */
+struct jblkrec {
+	uint32_t	jb_op;
+	uint32_t	jb_ino;
+	ufs2_daddr_t	jb_blkno;
+	ufs_lbn_t	jb_lbn;
+	uint16_t	jb_frags;
+	uint16_t	jb_oldfrags;
+	uint32_t	jb_unused;
+};
+
+/*
+ * Truncation record.  Records a partial truncation so that it may be
+ * completed at check time.  Also used for sync records.
+ */
+struct jtrncrec {
+	uint32_t	jt_op;
+	uint32_t	jt_ino;
+	int64_t		jt_size;
+	uint32_t	jt_extsize;
+	uint32_t	jt_pad[3];
+};
+
+union jrec {
+	struct jsegrec	rec_jsegrec;
+	struct jrefrec	rec_jrefrec;
+	struct jmvrec	rec_jmvrec;
+	struct jblkrec	rec_jblkrec;
+	struct jtrncrec	rec_jtrncrec;
+};
+
+#ifdef CTASSERT
+CTASSERT(sizeof(struct jsegrec) == JREC_SIZE);
+CTASSERT(sizeof(struct jrefrec) == JREC_SIZE);
+CTASSERT(sizeof(struct jmvrec) == JREC_SIZE);
+CTASSERT(sizeof(struct jblkrec) == JREC_SIZE);
+CTASSERT(sizeof(struct jtrncrec) == JREC_SIZE);
+CTASSERT(sizeof(union jrec) == JREC_SIZE);
+#endif
+
+extern int inside[], around[];
+extern u_char *fragtbl[];
+
+/*
+ * IOCTLs used for filesystem write suspension.
+ */
+#define	UFSSUSPEND	_IOW('U', 1, fsid_t)
+#define	UFSRESUME	_IO('U', 2)
+
+#endif
diff --git a/Dump/ufs/ffs/softdep.h b/Dump/ufs/ffs/softdep.h
new file mode 100644
index 0000000..8f5222e
--- /dev/null
+++ b/Dump/ufs/ffs/softdep.h
@@ -0,0 +1,1100 @@
+/*-
+ * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
+ *
+ * The soft updates code is derived from the appendix of a University
+ * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
+ * "Soft Updates: A Solution to the Metadata Update Problem in File
+ * Systems", CSE-TR-254-95, August 1995).
+ *
+ * Further information about soft updates can be obtained from:
+ *
+ *	Marshall Kirk McKusick		http://www.mckusick.com/softdep/
+ *	1614 Oxford Street		mckusick@mckusick.com
+ *	Berkeley, CA 94709-1608		+1-510-843-9542
+ *	USA
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)softdep.h	9.7 (McKusick) 6/21/00
+ * $FreeBSD: releng/11.2/sys/ufs/ffs/softdep.h 320057 2017-06-17 17:10:50Z kib $
+ */
+
+#include <sys/queue.h>
+
+/*
+ * Allocation dependencies are handled with undo/redo on the in-memory
+ * copy of the data. A particular data dependency is eliminated when
+ * it is ALLCOMPLETE: that is ATTACHED, DEPCOMPLETE, and COMPLETE.
+ * 
+ * The ATTACHED flag means that the data is not currently being written
+ * to disk.
+ * 
+ * The UNDONE flag means that the data has been rolled back to a safe
+ * state for writing to the disk. When the I/O completes, the data is
+ * restored to its current form and the state reverts to ATTACHED.
+ * The data must be locked throughout the rollback, I/O, and roll
+ * forward so that the rolled back information is never visible to
+ * user processes.
+ *
+ * The COMPLETE flag indicates that the item has been written. For example,
+ * a dependency that requires that an inode be written will be marked
+ * COMPLETE after the inode has been written to disk.
+ * 
+ * The DEPCOMPLETE flag indicates the completion of any other
+ * dependencies such as the writing of a cylinder group map has been
+ * completed. A dependency structure may be freed only when both it
+ * and its dependencies have completed and any rollbacks that are in
+ * progress have finished as indicated by the set of ALLCOMPLETE flags
+ * all being set.
+ * 
+ * The two MKDIR flags indicate additional dependencies that must be done
+ * when creating a new directory. MKDIR_BODY is cleared when the directory
+ * data block containing the "." and ".." entries has been written.
+ * MKDIR_PARENT is cleared when the parent inode with the increased link
+ * count for ".." has been written. When both MKDIR flags have been
+ * cleared, the DEPCOMPLETE flag is set to indicate that the directory
+ * dependencies have been completed. The writing of the directory inode
+ * itself sets the COMPLETE flag which then allows the directory entry for
+ * the new directory to be written to disk. The RMDIR flag marks a dirrem
+ * structure as representing the removal of a directory rather than a
+ * file. When the removal dependencies are completed, additional work needs
+ * to be done* (an additional decrement of the associated inode, and a
+ * decrement of the parent inode).
+ *
+ * The DIRCHG flag marks a diradd structure as representing the changing
+ * of an existing entry rather than the addition of a new one. When
+ * the update is complete the dirrem associated with the inode for
+ * the old name must be added to the worklist to do the necessary
+ * reference count decrement.
+ * 
+ * The GOINGAWAY flag indicates that the data structure is frozen from
+ * further change until its dependencies have been completed and its
+ * resources freed after which it will be discarded.
+ *
+ * The IOSTARTED flag prevents multiple calls to the I/O start routine from
+ * doing multiple rollbacks.
+ *
+ * The NEWBLOCK flag marks pagedep structures that have just been allocated,
+ * so must be claimed by the inode before all dependencies are complete.
+ *
+ * The INPROGRESS flag marks worklist structures that are still on the
+ * worklist, but are being considered for action by some process.
+ *
+ * The UFS1FMT flag indicates that the inode being processed is a ufs1 format.
+ *
+ * The EXTDATA flag indicates that the allocdirect describes an
+ * extended-attributes dependency.
+ *
+ * The ONWORKLIST flag shows whether the structure is currently linked
+ * onto a worklist.
+ *
+ * The UNLINK* flags track the progress of updating the on-disk linked
+ * list of active but unlinked inodes. When an inode is first unlinked
+ * it is marked as UNLINKED. When its on-disk di_freelink has been
+ * written its UNLINKNEXT flags is set. When its predecessor in the
+ * list has its di_freelink pointing at us its UNLINKPREV is set.
+ * When the on-disk list can reach it from the superblock, its
+ * UNLINKONLIST flag is set. Once all of these flags are set, it
+ * is safe to let its last name be removed.
+ */
+#define	ATTACHED	0x000001
+#define	UNDONE		0x000002
+#define	COMPLETE	0x000004
+#define	DEPCOMPLETE	0x000008
+#define	MKDIR_PARENT	0x000010 /* diradd, mkdir, jaddref, jsegdep only */
+#define	MKDIR_BODY	0x000020 /* diradd, mkdir, jaddref only */
+#define	RMDIR		0x000040 /* dirrem only */
+#define	DIRCHG		0x000080 /* diradd, dirrem only */
+#define	GOINGAWAY	0x000100 /* indirdep, jremref only */
+#define	IOSTARTED	0x000200 /* inodedep, pagedep, bmsafemap only */
+#define	DELAYEDFREE	0x000400 /* allocindirect free delayed. */
+#define	NEWBLOCK	0x000800 /* pagedep, jaddref only */
+#define	INPROGRESS	0x001000 /* dirrem, freeblks, freefrag, freefile only */
+#define	UFS1FMT		0x002000 /* indirdep only */
+#define	EXTDATA		0x004000 /* allocdirect only */
+#define	ONWORKLIST	0x008000
+#define	IOWAITING	0x010000 /* Thread is waiting for IO to complete. */
+#define	ONDEPLIST	0x020000 /* Structure is on a dependency list. */
+#define	UNLINKED	0x040000 /* inodedep has been unlinked. */
+#define	UNLINKNEXT	0x080000 /* inodedep has valid di_freelink */
+#define	UNLINKPREV	0x100000 /* inodedep is pointed at in the unlink list */
+#define	UNLINKONLIST	0x200000 /* inodedep is in the unlinked list on disk */
+#define	UNLINKLINKS	(UNLINKNEXT | UNLINKPREV)
+#define	WRITESUCCEEDED	0x400000 /* the disk write completed successfully */
+
+#define	ALLCOMPLETE	(ATTACHED | COMPLETE | DEPCOMPLETE)
+
+/*
+ * Values for each of the soft dependency types.
+ */
+#define	D_PAGEDEP	0
+#define	D_INODEDEP	1
+#define	D_BMSAFEMAP	2
+#define	D_NEWBLK	3
+#define	D_ALLOCDIRECT	4
+#define	D_INDIRDEP	5
+#define	D_ALLOCINDIR	6
+#define	D_FREEFRAG	7
+#define	D_FREEBLKS	8
+#define	D_FREEFILE	9
+#define	D_DIRADD	10
+#define	D_MKDIR		11
+#define	D_DIRREM	12
+#define	D_NEWDIRBLK	13
+#define	D_FREEWORK	14
+#define	D_FREEDEP	15
+#define	D_JADDREF	16
+#define	D_JREMREF	17
+#define	D_JMVREF	18
+#define	D_JNEWBLK	19
+#define	D_JFREEBLK	20
+#define	D_JFREEFRAG	21
+#define	D_JSEG		22
+#define	D_JSEGDEP	23
+#define	D_SBDEP		24
+#define	D_JTRUNC	25
+#define	D_JFSYNC	26
+#define	D_SENTINEL	27
+#define	D_LAST		D_SENTINEL
+
+/*
+ * The workitem queue.
+ * 
+ * It is sometimes useful and/or necessary to clean up certain dependencies
+ * in the background rather than during execution of an application process
+ * or interrupt service routine. To realize this, we append dependency
+ * structures corresponding to such tasks to a "workitem" queue. In a soft
+ * updates implementation, most pending workitems should not wait for more
+ * than a couple of seconds, so the filesystem syncer process awakens once
+ * per second to process the items on the queue.
+ */
+
+/* LIST_HEAD(workhead, worklist);	-- declared in buf.h */
+
+/*
+ * Each request can be linked onto a work queue through its worklist structure.
+ * To avoid the need for a pointer to the structure itself, this structure
+ * MUST be declared FIRST in each type in which it appears! If more than one
+ * worklist is needed in the structure, then a wk_data field must be added
+ * and the macros below changed to use it.
+ */
+struct worklist {
+	LIST_ENTRY(worklist)	wk_list;	/* list of work requests */
+	struct mount		*wk_mp;		/* Mount we live in */
+	unsigned int		wk_type:8,	/* type of request */
+				wk_state:24;	/* state flags */
+};
+#define	WK_DATA(wk) ((void *)(wk))
+#define	WK_PAGEDEP(wk) ((struct pagedep *)(wk))
+#define	WK_INODEDEP(wk) ((struct inodedep *)(wk))
+#define	WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk))
+#define	WK_NEWBLK(wk)  ((struct newblk *)(wk))
+#define	WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk))
+#define	WK_INDIRDEP(wk) ((struct indirdep *)(wk))
+#define	WK_ALLOCINDIR(wk) ((struct allocindir *)(wk))
+#define	WK_FREEFRAG(wk) ((struct freefrag *)(wk))
+#define	WK_FREEBLKS(wk) ((struct freeblks *)(wk))
+#define	WK_FREEWORK(wk) ((struct freework *)(wk))
+#define	WK_FREEFILE(wk) ((struct freefile *)(wk))
+#define	WK_DIRADD(wk) ((struct diradd *)(wk))
+#define	WK_MKDIR(wk) ((struct mkdir *)(wk))
+#define	WK_DIRREM(wk) ((struct dirrem *)(wk))
+#define	WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk))
+#define	WK_JADDREF(wk) ((struct jaddref *)(wk))
+#define	WK_JREMREF(wk) ((struct jremref *)(wk))
+#define	WK_JMVREF(wk) ((struct jmvref *)(wk))
+#define	WK_JSEGDEP(wk) ((struct jsegdep *)(wk))
+#define	WK_JSEG(wk) ((struct jseg *)(wk))
+#define	WK_JNEWBLK(wk) ((struct jnewblk *)(wk))
+#define	WK_JFREEBLK(wk) ((struct jfreeblk *)(wk))
+#define	WK_FREEDEP(wk) ((struct freedep *)(wk))
+#define	WK_JFREEFRAG(wk) ((struct jfreefrag *)(wk))
+#define	WK_SBDEP(wk) ((struct sbdep *)(wk))
+#define	WK_JTRUNC(wk) ((struct jtrunc *)(wk))
+#define	WK_JFSYNC(wk) ((struct jfsync *)(wk))
+
+/*
+ * Various types of lists
+ */
+LIST_HEAD(dirremhd, dirrem);
+LIST_HEAD(diraddhd, diradd);
+LIST_HEAD(newblkhd, newblk);
+LIST_HEAD(inodedephd, inodedep);
+LIST_HEAD(allocindirhd, allocindir);
+LIST_HEAD(allocdirecthd, allocdirect);
+TAILQ_HEAD(allocdirectlst, allocdirect);
+LIST_HEAD(indirdephd, indirdep);
+LIST_HEAD(jaddrefhd, jaddref);
+LIST_HEAD(jremrefhd, jremref);
+LIST_HEAD(jmvrefhd, jmvref);
+LIST_HEAD(jnewblkhd, jnewblk);
+LIST_HEAD(jblkdephd, jblkdep);
+LIST_HEAD(freeworkhd, freework);
+TAILQ_HEAD(freeworklst, freework);
+TAILQ_HEAD(jseglst, jseg);
+TAILQ_HEAD(inoreflst, inoref);
+TAILQ_HEAD(freeblklst, freeblks);
+
+/*
+ * The "pagedep" structure tracks the various dependencies related to
+ * a particular directory page. If a directory page has any dependencies,
+ * it will have a pagedep linked to its associated buffer. The
+ * pd_dirremhd list holds the list of dirrem requests which decrement
+ * inode reference counts. These requests are processed after the
+ * directory page with the corresponding zero'ed entries has been
+ * written. The pd_diraddhd list maintains the list of diradd requests
+ * which cannot be committed until their corresponding inode has been
+ * written to disk. Because a directory may have many new entries
+ * being created, several lists are maintained hashed on bits of the
+ * offset of the entry into the directory page to keep the lists from
+ * getting too long. Once a new directory entry has been cleared to
+ * be written, it is moved to the pd_pendinghd list. After the new
+ * entry has been written to disk it is removed from the pd_pendinghd
+ * list, any removed operations are done, and the dependency structure
+ * is freed.
+ */
+#define	DAHASHSZ 5
+#define	DIRADDHASH(offset) (((offset) >> 2) % DAHASHSZ)
+struct pagedep {
+	struct	worklist pd_list;	/* page buffer */
+#	define	pd_state pd_list.wk_state /* check for multiple I/O starts */
+	LIST_ENTRY(pagedep) pd_hash;	/* hashed lookup */
+	ino_t	pd_ino;			/* associated file */
+	ufs_lbn_t pd_lbn;		/* block within file */
+	struct	newdirblk *pd_newdirblk; /* associated newdirblk if NEWBLOCK */
+	struct	dirremhd pd_dirremhd;	/* dirrem's waiting for page */
+	struct	diraddhd pd_diraddhd[DAHASHSZ]; /* diradd dir entry updates */
+	struct	diraddhd pd_pendinghd;	/* directory entries awaiting write */
+	struct	jmvrefhd pd_jmvrefhd;	/* Dependent journal writes. */
+};
+
+/*
+ * The "inodedep" structure tracks the set of dependencies associated
+ * with an inode. One task that it must manage is delayed operations
+ * (i.e., work requests that must be held until the inodedep's associated
+ * inode has been written to disk). Getting an inode from its incore 
+ * state to the disk requires two steps to be taken by the filesystem
+ * in this order: first the inode must be copied to its disk buffer by
+ * the VOP_UPDATE operation; second the inode's buffer must be written
+ * to disk. To ensure that both operations have happened in the required
+ * order, the inodedep maintains two lists. Delayed operations are
+ * placed on the id_inowait list. When the VOP_UPDATE is done, all
+ * operations on the id_inowait list are moved to the id_bufwait list.
+ * When the buffer is written, the items on the id_bufwait list can be
+ * safely moved to the work queue to be processed. A second task of the
+ * inodedep structure is to track the status of block allocation within
+ * the inode.  Each block that is allocated is represented by an
+ * "allocdirect" structure (see below). It is linked onto the id_newinoupdt
+ * list until both its contents and its allocation in the cylinder
+ * group map have been written to disk. Once these dependencies have been
+ * satisfied, it is removed from the id_newinoupdt list and any followup
+ * actions such as releasing the previous block or fragment are placed
+ * on the id_inowait list. When an inode is updated (a VOP_UPDATE is
+ * done), the "inodedep" structure is linked onto the buffer through
+ * its worklist. Thus, it will be notified when the buffer is about
+ * to be written and when it is done. At the update time, all the
+ * elements on the id_newinoupdt list are moved to the id_inoupdt list
+ * since those changes are now relevant to the copy of the inode in the
+ * buffer. Also at update time, the tasks on the id_inowait list are
+ * moved to the id_bufwait list so that they will be executed when
+ * the updated inode has been written to disk. When the buffer containing
+ * the inode is written to disk, any updates listed on the id_inoupdt
+ * list are rolled back as they are not yet safe. Following the write,
+ * the changes are once again rolled forward and any actions on the
+ * id_bufwait list are processed (since those actions are now safe).
+ * The entries on the id_inoupdt and id_newinoupdt lists must be kept
+ * sorted by logical block number to speed the calculation of the size
+ * of the rolled back inode (see explanation in initiate_write_inodeblock).
+ * When a directory entry is created, it is represented by a diradd.
+ * The diradd is added to the id_inowait list as it cannot be safely
+ * written to disk until the inode that it represents is on disk. After
+ * the inode is written, the id_bufwait list is processed and the diradd
+ * entries are moved to the id_pendinghd list where they remain until
+ * the directory block containing the name has been written to disk.
+ * The purpose of keeping the entries on the id_pendinghd list is so that
+ * the softdep_fsync function can find and push the inode's directory
+ * name(s) as part of the fsync operation for that file.
+ */
+struct inodedep {
+	struct	worklist id_list;	/* buffer holding inode block */
+#	define	id_state id_list.wk_state /* inode dependency state */
+	LIST_ENTRY(inodedep) id_hash;	/* hashed lookup */
+	TAILQ_ENTRY(inodedep) id_unlinked;	/* Unlinked but ref'd inodes */
+	struct	fs *id_fs;		/* associated filesystem */
+	ino_t	id_ino;			/* dependent inode */
+	nlink_t	id_nlinkdelta;		/* saved effective link count */
+	nlink_t	id_savednlink;		/* Link saved during rollback */
+	LIST_ENTRY(inodedep) id_deps;	/* bmsafemap's list of inodedep's */
+	struct	bmsafemap *id_bmsafemap; /* related bmsafemap (if pending) */
+	struct	diradd *id_mkdiradd;	/* diradd for a mkdir. */
+	struct	inoreflst id_inoreflst;	/* Inode reference adjustments. */
+	long	id_savedextsize;	/* ext size saved during rollback */
+	off_t	id_savedsize;		/* file size saved during rollback */
+	struct	dirremhd id_dirremhd;	/* Removals pending. */
+	struct	workhead id_pendinghd;	/* entries awaiting directory write */
+	struct	workhead id_bufwait;	/* operations after inode written */
+	struct	workhead id_inowait;	/* operations waiting inode update */
+	struct	allocdirectlst id_inoupdt; /* updates before inode written */
+	struct	allocdirectlst id_newinoupdt; /* updates when inode written */
+	struct	allocdirectlst id_extupdt; /* extdata updates pre-inode write */
+	struct	allocdirectlst id_newextupdt; /* extdata updates at ino write */
+	struct	freeblklst id_freeblklst; /* List of partial truncates. */
+	union {
+	struct	ufs1_dinode *idu_savedino1; /* saved ufs1_dinode contents */
+	struct	ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */
+	} id_un;
+};
+#define	id_savedino1 id_un.idu_savedino1
+#define	id_savedino2 id_un.idu_savedino2
+
+/*
+ * A "bmsafemap" structure maintains a list of dependency structures
+ * that depend on the update of a particular cylinder group map.
+ * It has lists for newblks, allocdirects, allocindirs, and inodedeps.
+ * It is attached to the buffer of a cylinder group block when any of
+ * these things are allocated from the cylinder group. It is freed
+ * after the cylinder group map is written and the state of its
+ * dependencies are updated with DEPCOMPLETE to indicate that it has
+ * been processed.
+ */
+struct bmsafemap {
+	struct	worklist sm_list;	/* cylgrp buffer */
+#	define	sm_state sm_list.wk_state
+	LIST_ENTRY(bmsafemap) sm_hash;	/* Hash links. */
+	LIST_ENTRY(bmsafemap) sm_next;	/* Mount list. */
+	int	sm_cg;
+	struct	buf *sm_buf;		/* associated buffer */
+	struct	allocdirecthd sm_allocdirecthd; /* allocdirect deps */
+	struct	allocdirecthd sm_allocdirectwr; /* writing allocdirect deps */
+	struct	allocindirhd sm_allocindirhd; /* allocindir deps */
+	struct	allocindirhd sm_allocindirwr; /* writing allocindir deps */
+	struct	inodedephd sm_inodedephd; /* inodedep deps */
+	struct	inodedephd sm_inodedepwr; /* writing inodedep deps */
+	struct	newblkhd sm_newblkhd;	/* newblk deps */
+	struct	newblkhd sm_newblkwr;	/* writing newblk deps */
+	struct	jaddrefhd sm_jaddrefhd;	/* Pending inode allocations. */
+	struct	jnewblkhd sm_jnewblkhd;	/* Pending block allocations. */
+	struct	workhead sm_freehd;	/* Freedep deps. */
+	struct	workhead sm_freewr;	/* Written freedeps. */
+};
+
+/*
+ * A "newblk" structure is attached to a bmsafemap structure when a block
+ * or fragment is allocated from a cylinder group. Its state is set to
+ * DEPCOMPLETE when its cylinder group map is written. It is converted to
+ * an allocdirect or allocindir allocation once the allocator calls the
+ * appropriate setup function. It will initially be linked onto a bmsafemap
+ * list. Once converted it can be linked onto the lists described for
+ * allocdirect or allocindir as described below.
+ */ 
+struct newblk {
+	struct	worklist nb_list;	/* See comment above. */
+#	define	nb_state nb_list.wk_state
+	LIST_ENTRY(newblk) nb_hash;	/* Hashed lookup. */
+	LIST_ENTRY(newblk) nb_deps;	/* Bmsafemap's list of newblks. */
+	struct	jnewblk *nb_jnewblk;	/* New block journal entry. */
+	struct	bmsafemap *nb_bmsafemap;/* Cylgrp dep (if pending). */
+	struct	freefrag *nb_freefrag;	/* Fragment to be freed (if any). */
+	struct	indirdephd nb_indirdeps; /* Children indirect blocks. */
+	struct	workhead nb_newdirblk;	/* Dir block to notify when written. */
+	struct	workhead nb_jwork;	/* Journal work pending. */
+	ufs2_daddr_t	nb_newblkno;	/* New value of block pointer. */
+};
+
+/*
+ * An "allocdirect" structure is attached to an "inodedep" when a new block
+ * or fragment is allocated and pointed to by the inode described by
+ * "inodedep". The worklist is linked to the buffer that holds the block.
+ * When the block is first allocated, it is linked to the bmsafemap
+ * structure associated with the buffer holding the cylinder group map
+ * from which it was allocated. When the cylinder group map is written
+ * to disk, ad_state has the DEPCOMPLETE flag set. When the block itself
+ * is written, the COMPLETE flag is set. Once both the cylinder group map
+ * and the data itself have been written, it is safe to write the inode
+ * that claims the block. If there was a previous fragment that had been
+ * allocated before the file was increased in size, the old fragment may
+ * be freed once the inode claiming the new block is written to disk.
+ * This ad_fragfree request is attached to the id_inowait list of the
+ * associated inodedep (pointed to by ad_inodedep) for processing after
+ * the inode is written. When a block is allocated to a directory, an
+ * fsync of a file whose name is within that block must ensure not only
+ * that the block containing the file name has been written, but also
+ * that the on-disk inode references that block. When a new directory
+ * block is created, we allocate a newdirblk structure which is linked
+ * to the associated allocdirect (on its ad_newdirblk list). When the
+ * allocdirect has been satisfied, the newdirblk structure is moved to
+ * the inodedep id_bufwait list of its directory to await the inode
+ * being written. When the inode is written, the directory entries are
+ * fully committed and can be deleted from their pagedep->id_pendinghd
+ * and inodedep->id_pendinghd lists.
+ */
+struct allocdirect {
+	struct	newblk ad_block;	/* Common block logic */
+#	define	ad_list ad_block.nb_list /* block pointer worklist */
+#	define	ad_state ad_list.wk_state /* block pointer state */
+	TAILQ_ENTRY(allocdirect) ad_next; /* inodedep's list of allocdirect's */
+	struct	inodedep *ad_inodedep;	/* associated inodedep */
+	ufs2_daddr_t	ad_oldblkno;	/* old value of block pointer */
+	int		ad_offset;	/* Pointer offset in parent. */
+	long		ad_newsize;	/* size of new block */
+	long		ad_oldsize;	/* size of old block */
+};
+#define	ad_newblkno	ad_block.nb_newblkno
+#define	ad_freefrag	ad_block.nb_freefrag
+#define	ad_newdirblk	ad_block.nb_newdirblk
+
+/*
+ * A single "indirdep" structure manages all allocation dependencies for
+ * pointers in an indirect block. The up-to-date state of the indirect
+ * block is stored in ir_savedata. The set of pointers that may be safely
+ * written to the disk is stored in ir_safecopy. The state field is used
+ * only to track whether the buffer is currently being written (in which
+ * case it is not safe to update ir_safecopy). Ir_deplisthd contains the
+ * list of allocindir structures, one for each block that needs to be
+ * written to disk. Once the block and its bitmap allocation have been
+ * written the safecopy can be updated to reflect the allocation and the
+ * allocindir structure freed. If ir_state indicates that an I/O on the
+ * indirect block is in progress when ir_safecopy is to be updated, the
+ * update is deferred by placing the allocindir on the ir_donehd list.
+ * When the I/O on the indirect block completes, the entries on the
+ * ir_donehd list are processed by updating their corresponding ir_safecopy
+ * pointers and then freeing the allocindir structure.
+ */
+struct indirdep {
+	struct	worklist ir_list;	/* buffer holding indirect block */
+#	define	ir_state ir_list.wk_state /* indirect block pointer state */
+	LIST_ENTRY(indirdep) ir_next;	/* alloc{direct,indir} list */
+	TAILQ_HEAD(, freework) ir_trunc;	/* List of truncations. */
+	caddr_t	ir_saveddata;		/* buffer cache contents */
+	struct	buf *ir_savebp;		/* buffer holding safe copy */
+	struct	buf *ir_bp;		/* buffer holding live copy */
+	struct	allocindirhd ir_completehd; /* waiting for indirdep complete */
+	struct	allocindirhd ir_writehd; /* Waiting for the pointer write. */
+	struct	allocindirhd ir_donehd;	/* done waiting to update safecopy */
+	struct	allocindirhd ir_deplisthd; /* allocindir deps for this block */
+	struct	freeblks *ir_freeblks;	/* Freeblks that frees this indir. */
+};
+
+/*
+ * An "allocindir" structure is attached to an "indirdep" when a new block
+ * is allocated and pointed to by the indirect block described by the
+ * "indirdep". The worklist is linked to the buffer that holds the new block.
+ * When the block is first allocated, it is linked to the bmsafemap
+ * structure associated with the buffer holding the cylinder group map
+ * from which it was allocated. When the cylinder group map is written
+ * to disk, ai_state has the DEPCOMPLETE flag set. When the block itself
+ * is written, the COMPLETE flag is set. Once both the cylinder group map
+ * and the data itself have been written, it is safe to write the entry in
+ * the indirect block that claims the block; the "allocindir" dependency 
+ * can then be freed as it is no longer applicable.
+ */
+struct allocindir {
+	struct	newblk ai_block;	/* Common block area */
+#	define	ai_state ai_block.nb_list.wk_state /* indirect pointer state */
+	LIST_ENTRY(allocindir) ai_next;	/* indirdep's list of allocindir's */
+	struct	indirdep *ai_indirdep;	/* address of associated indirdep */
+	ufs2_daddr_t	ai_oldblkno;	/* old value of block pointer */
+	ufs_lbn_t	ai_lbn;		/* Logical block number. */
+	int		ai_offset;	/* Pointer offset in parent. */
+};
+#define	ai_newblkno	ai_block.nb_newblkno
+#define	ai_freefrag	ai_block.nb_freefrag
+#define	ai_newdirblk	ai_block.nb_newdirblk
+
+/*
+ * The allblk union is used to size the newblk structure on allocation so
+ * that it may be any one of three types.
+ */
+union allblk {
+	struct	allocindir ab_allocindir;
+	struct	allocdirect ab_allocdirect;
+	struct	newblk	ab_newblk;
+};
+
+/*
+ * A "freefrag" structure is attached to an "inodedep" when a previously
+ * allocated fragment is replaced with a larger fragment, rather than extended.
+ * The "freefrag" structure is constructed and attached when the replacement
+ * block is first allocated. It is processed after the inode claiming the
+ * bigger block that replaces it has been written to disk.
+ */
+struct freefrag {
+	struct	worklist ff_list;	/* id_inowait or delayed worklist */
+#	define	ff_state ff_list.wk_state
+	struct	worklist *ff_jdep;	/* Associated journal entry. */
+	struct	workhead ff_jwork;	/* Journal work pending. */
+	ufs2_daddr_t ff_blkno;		/* fragment physical block number */
+	long	ff_fragsize;		/* size of fragment being deleted */
+	ino_t	ff_inum;		/* owning inode number */
+	enum	vtype ff_vtype;		/* owning inode's file type */
+};
+
+/*
+ * A "freeblks" structure is attached to an "inodedep" when the
+ * corresponding file's length is reduced to zero. It records all
+ * the information needed to free the blocks of a file after its
+ * zero'ed inode has been written to disk.  The actual work is done
+ * by child freework structures which are responsible for individual
+ * inode pointers while freeblks is responsible for retiring the
+ * entire operation when it is complete and holding common members.
+ */
+struct freeblks {
+	struct	worklist fb_list;	/* id_inowait or delayed worklist */
+#	define	fb_state fb_list.wk_state /* inode and dirty block state */
+	TAILQ_ENTRY(freeblks) fb_next;	/* List of inode truncates. */
+	struct	jblkdephd fb_jblkdephd;	/* Journal entries pending */
+	struct	workhead fb_freeworkhd;	/* Work items pending */
+	struct	workhead fb_jwork;	/* Journal work pending */
+	struct	vnode *fb_devvp;	/* filesystem device vnode */
+#ifdef QUOTA
+	struct	dquot *fb_quota[MAXQUOTAS]; /* quotas to be adjusted */
+#endif
+	uint64_t fb_modrev;		/* Inode revision at start of trunc. */
+	off_t	fb_len;			/* Length we're truncating to. */
+	ufs2_daddr_t fb_chkcnt;		/* Blocks released. */
+	ino_t	fb_inum;		/* inode owner of blocks */
+	enum	vtype fb_vtype;		/* inode owner's file type */
+	uid_t	fb_uid;			/* uid of previous owner of blocks */
+	int	fb_ref;			/* Children outstanding. */
+	int	fb_cgwait;		/* cg writes outstanding. */
+};
+
+/*
+ * A "freework" structure handles the release of a tree of blocks or a single
+ * block.  Each indirect block in a tree is allocated its own freework
+ * structure so that the indirect block may be freed only when all of its
+ * children are freed.  In this way we enforce the rule that an allocated
+ * block must have a valid path to a root that is journaled.  Each child
+ * block acquires a reference and when the ref hits zero the parent ref
+ * is decremented.  If there is no parent the freeblks ref is decremented.
+ */
+struct freework {
+	struct	worklist fw_list;		/* Delayed worklist. */
+#	define	fw_state fw_list.wk_state
+	LIST_ENTRY(freework) fw_segs;		/* Seg list. */
+	TAILQ_ENTRY(freework) fw_next;		/* Hash/Trunc list. */
+	struct	jnewblk	 *fw_jnewblk;		/* Journal entry to cancel. */
+	struct	freeblks *fw_freeblks;		/* Root of operation. */
+	struct	freework *fw_parent;		/* Parent indirect. */
+	struct	indirdep *fw_indir;		/* indirect block. */
+	ufs2_daddr_t	 fw_blkno;		/* Our block #. */
+	ufs_lbn_t	 fw_lbn;		/* Original lbn before free. */
+	uint16_t	 fw_frags;		/* Number of frags. */
+	uint16_t	 fw_ref;		/* Number of children out. */
+	uint16_t	 fw_off;		/* Current working position. */
+	uint16_t	 fw_start;		/* Start of partial truncate. */
+};
+
+/*
+ * A "freedep" structure is allocated to track the completion of a bitmap
+ * write for a freework.  One freedep may cover many freed blocks so long
+ * as they reside in the same cylinder group.  When the cg is written
+ * the freedep decrements the ref on the freework which may permit it
+ * to be freed as well.
+ */
+struct freedep {
+	struct	worklist fd_list;	/* Delayed worklist. */
+	struct	freework *fd_freework;	/* Parent freework. */
+};
+
+/*
+ * A "freefile" structure is attached to an inode when its
+ * link count is reduced to zero. It marks the inode as free in
+ * the cylinder group map after the zero'ed inode has been written
+ * to disk and any associated blocks and fragments have been freed.
+ */
+struct freefile {
+	struct	worklist fx_list;	/* id_inowait or delayed worklist */
+	mode_t	fx_mode;		/* mode of inode */
+	ino_t	fx_oldinum;		/* inum of the unlinked file */
+	struct	vnode *fx_devvp;	/* filesystem device vnode */
+	struct	workhead fx_jwork;	/* journal work pending. */
+};
+
+/*
+ * A "diradd" structure is linked to an "inodedep" id_inowait list when a
+ * new directory entry is allocated that references the inode described
+ * by "inodedep". When the inode itself is written (either the initial
+ * allocation for new inodes or with the increased link count for
+ * existing inodes), the COMPLETE flag is set in da_state. If the entry
+ * is for a newly allocated inode, the "inodedep" structure is associated
+ * with a bmsafemap which prevents the inode from being written to disk
+ * until the cylinder group has been updated. Thus the da_state COMPLETE
+ * flag cannot be set until the inode bitmap dependency has been removed.
+ * When creating a new file, it is safe to write the directory entry that
+ * claims the inode once the referenced inode has been written. Since
+ * writing the inode clears the bitmap dependencies, the DEPCOMPLETE flag
+ * in the diradd can be set unconditionally when creating a file. When
+ * creating a directory, there are two additional dependencies described by
+ * mkdir structures (see their description below). When these dependencies
+ * are resolved the DEPCOMPLETE flag is set in the diradd structure.
+ * If there are multiple links created to the same inode, there will be
+ * a separate diradd structure created for each link. The diradd is
+ * linked onto the pg_diraddhd list of the pagedep for the directory
+ * page that contains the entry. When a directory page is written,
+ * the pg_diraddhd list is traversed to rollback any entries that are
+ * not yet ready to be written to disk. If a directory entry is being
+ * changed (by rename) rather than added, the DIRCHG flag is set and
+ * the da_previous entry points to the entry that will be "removed"
+ * once the new entry has been committed. During rollback, entries
+ * with da_previous are replaced with the previous inode number rather
+ * than zero.
+ *
+ * The overlaying of da_pagedep and da_previous is done to keep the
+ * structure down. If a da_previous entry is present, the pointer to its
+ * pagedep is available in the associated dirrem entry. If the DIRCHG flag
+ * is set, the da_previous entry is valid; if not set the da_pagedep entry
+ * is valid. The DIRCHG flag never changes; it is set when the structure
+ * is created if appropriate and is never cleared.
+ */
+struct diradd {
+	struct	worklist da_list;	/* id_inowait or id_pendinghd list */
+#	define	da_state da_list.wk_state /* state of the new directory entry */
+	LIST_ENTRY(diradd) da_pdlist;	/* pagedep holding directory block */
+	doff_t	da_offset;		/* offset of new dir entry in dir blk */
+	ino_t	da_newinum;		/* inode number for the new dir entry */
+	union {
+	struct	dirrem *dau_previous;	/* entry being replaced in dir change */
+	struct	pagedep *dau_pagedep;	/* pagedep dependency for addition */
+	} da_un;
+	struct workhead da_jwork;	/* Journal work awaiting completion. */
+};
+#define	da_previous da_un.dau_previous
+#define	da_pagedep da_un.dau_pagedep
+
+/*
+ * Two "mkdir" structures are needed to track the additional dependencies
+ * associated with creating a new directory entry. Normally a directory
+ * addition can be committed as soon as the newly referenced inode has been
+ * written to disk with its increased link count. When a directory is
+ * created there are two additional dependencies: writing the directory
+ * data block containing the "." and ".." entries (MKDIR_BODY) and writing
+ * the parent inode with the increased link count for ".." (MKDIR_PARENT).
+ * These additional dependencies are tracked by two mkdir structures that
+ * reference the associated "diradd" structure. When they have completed,
+ * they set the DEPCOMPLETE flag on the diradd so that it knows that its
+ * extra dependencies have been completed. The md_state field is used only
+ * to identify which type of dependency the mkdir structure is tracking.
+ * It is not used in the mainline code for any purpose other than consistency
+ * checking. All the mkdir structures in the system are linked together on
+ * a list. This list is needed so that a diradd can find its associated
+ * mkdir structures and deallocate them if it is prematurely freed (as for
+ * example if a mkdir is immediately followed by a rmdir of the same directory).
+ * Here, the free of the diradd must traverse the list to find the associated
+ * mkdir structures that reference it. The deletion would be faster if the
+ * diradd structure were simply augmented to have two pointers that referenced
+ * the associated mkdir's. However, this would increase the size of the diradd
+ * structure to speed a very infrequent operation.
+ */
+struct mkdir {
+	struct	worklist md_list;	/* id_inowait or buffer holding dir */
+#	define	md_state md_list.wk_state /* type: MKDIR_PARENT or MKDIR_BODY */
+	struct	diradd *md_diradd;	/* associated diradd */
+	struct	jaddref *md_jaddref;	/* dependent jaddref. */
+	struct	buf *md_buf;		/* MKDIR_BODY: buffer holding dir */
+	LIST_ENTRY(mkdir) md_mkdirs;	/* list of all mkdirs */
+};
+
+/*
+ * A "dirrem" structure describes an operation to decrement the link
+ * count on an inode. The dirrem structure is attached to the pg_dirremhd
+ * list of the pagedep for the directory page that contains the entry.
+ * It is processed after the directory page with the deleted entry has
+ * been written to disk.
+ */
+struct dirrem {
+	struct	worklist dm_list;	/* delayed worklist */
+#	define	dm_state dm_list.wk_state /* state of the old directory entry */
+	LIST_ENTRY(dirrem) dm_next;	/* pagedep's list of dirrem's */
+	LIST_ENTRY(dirrem) dm_inonext;	/* inodedep's list of dirrem's */
+	struct	jremrefhd dm_jremrefhd;	/* Pending remove reference deps. */
+	ino_t	dm_oldinum;		/* inum of the removed dir entry */
+	doff_t	dm_offset;		/* offset of removed dir entry in blk */
+	union {
+	struct	pagedep *dmu_pagedep;	/* pagedep dependency for remove */
+	ino_t	dmu_dirinum;		/* parent inode number (for rmdir) */
+	} dm_un;
+	struct workhead dm_jwork;	/* Journal work awaiting completion. */
+};
+#define	dm_pagedep dm_un.dmu_pagedep
+#define	dm_dirinum dm_un.dmu_dirinum
+
+/*
+ * A "newdirblk" structure tracks the progress of a newly allocated
+ * directory block from its creation until it is claimed by its on-disk
+ * inode. When a block is allocated to a directory, an fsync of a file
+ * whose name is within that block must ensure not only that the block
+ * containing the file name has been written, but also that the on-disk
+ * inode references that block. When a new directory block is created,
+ * we allocate a newdirblk structure which is linked to the associated
+ * allocdirect (on its ad_newdirblk list). When the allocdirect has been
+ * satisfied, the newdirblk structure is moved to the inodedep id_bufwait
+ * list of its directory to await the inode being written. When the inode
+ * is written, the directory entries are fully committed and can be
+ * deleted from their pagedep->id_pendinghd and inodedep->id_pendinghd
+ * lists. Note that we could track directory blocks allocated to indirect
+ * blocks using a similar scheme with the allocindir structures. Rather
+ * than adding this level of complexity, we simply write those newly 
+ * allocated indirect blocks synchronously as such allocations are rare.
+ * In the case of a new directory the . and .. links are tracked with
+ * a mkdir rather than a pagedep.  In this case we track the mkdir
+ * so it can be released when it is written.  A workhead is used
+ * to simplify canceling a mkdir that is removed by a subsequent dirrem.
+ */
+struct newdirblk {
+	struct	worklist db_list;	/* id_inowait or pg_newdirblk */
+#	define	db_state db_list.wk_state
+	struct	pagedep *db_pagedep;	/* associated pagedep */
+	struct	workhead db_mkdir;
+};
+
+/*
+ * The inoref structure holds the elements common to jaddref and jremref
+ * so they may easily be queued in-order on the inodedep.
+ */
+struct inoref {
+	struct	worklist if_list;	/* Journal pending or jseg entries. */
+#	define	if_state if_list.wk_state
+	TAILQ_ENTRY(inoref) if_deps;	/* Links for inodedep. */
+	struct	jsegdep	*if_jsegdep;	/* Will track our journal record. */
+	off_t		if_diroff;	/* Directory offset. */
+	ino_t		if_ino;		/* Inode number. */
+	ino_t		if_parent;	/* Parent inode number. */
+	nlink_t		if_nlink;	/* nlink before addition. */
+	uint16_t	if_mode;	/* File mode, needed for IFMT. */
+};
+
+/*
+ * A "jaddref" structure tracks a new reference (link count) on an inode
+ * and prevents the link count increase and bitmap allocation until a
+ * journal entry can be written.  Once the journal entry is written,
+ * the inode is put on the pendinghd of the bmsafemap and a diradd or
+ * mkdir entry is placed on the bufwait list of the inode.  The DEPCOMPLETE
+ * flag is used to indicate that all of the required information for writing
+ * the journal entry is present.  MKDIR_BODY and MKDIR_PARENT are used to
+ * differentiate . and .. links from regular file names.  NEWBLOCK indicates
+ * a bitmap is still pending.  If a new reference is canceled by a delete
+ * prior to writing the journal the jaddref write is canceled and the
+ * structure persists to prevent any disk-visible changes until it is
+ * ultimately released when the file is freed or the link is dropped again.
+ */
+struct jaddref {
+	struct	inoref	ja_ref;		/* see inoref above. */
+#	define	ja_list	ja_ref.if_list	/* Jrnl pending, id_inowait, dm_jwork.*/
+#	define	ja_state ja_ref.if_list.wk_state
+	LIST_ENTRY(jaddref) ja_bmdeps;	/* Links for bmsafemap. */
+	union {
+		struct	diradd	*jau_diradd;	/* Pending diradd. */
+		struct	mkdir	*jau_mkdir;	/* MKDIR_{PARENT,BODY} */
+	} ja_un;
+};
+#define	ja_diradd	ja_un.jau_diradd
+#define	ja_mkdir	ja_un.jau_mkdir
+#define	ja_diroff	ja_ref.if_diroff
+#define	ja_ino		ja_ref.if_ino
+#define	ja_parent	ja_ref.if_parent
+#define	ja_mode		ja_ref.if_mode
+
+/*
+ * A "jremref" structure tracks a removed reference (unlink) on an
+ * inode and prevents the directory remove from proceeding until the
+ * journal entry is written.  Once the journal has been written the remove
+ * may proceed as normal. 
+ */
+struct jremref {
+	struct	inoref	jr_ref;		/* see inoref above. */
+#	define	jr_list	jr_ref.if_list	/* Linked to softdep_journal_pending. */
+#	define	jr_state jr_ref.if_list.wk_state
+	LIST_ENTRY(jremref) jr_deps;	/* Links for dirrem. */
+	struct	dirrem	*jr_dirrem;	/* Back pointer to dirrem. */
+};
+
+/*
+ * A "jmvref" structure tracks a name relocations within the same
+ * directory block that occur as a result of directory compaction.
+ * It prevents the updated directory entry from being written to disk
+ * until the journal entry is written. Once the journal has been
+ * written the compacted directory may be written to disk.
+ */
+struct jmvref {
+	struct	worklist jm_list;	/* Linked to softdep_journal_pending. */
+	LIST_ENTRY(jmvref) jm_deps;	/* Jmvref on pagedep. */
+	struct pagedep	*jm_pagedep;	/* Back pointer to pagedep. */
+	ino_t		jm_parent;	/* Containing directory inode number. */
+	ino_t		jm_ino;		/* Inode number of our entry. */
+	off_t		jm_oldoff;	/* Our old offset in directory. */
+	off_t		jm_newoff;	/* Our new offset in directory. */
+};
+
+/*
+ * A "jnewblk" structure tracks a newly allocated block or fragment and
+ * prevents the direct or indirect block pointer as well as the cg bitmap
+ * from being written until it is logged.  After it is logged the jsegdep
+ * is attached to the allocdirect or allocindir until the operation is
+ * completed or reverted.  If the operation is reverted prior to the journal
+ * write the jnewblk structure is maintained to prevent the bitmaps from
+ * reaching the disk.  Ultimately the jnewblk structure will be passed
+ * to the free routine as the in memory cg is modified back to the free
+ * state at which time it can be released. It may be held on any of the
+ * fx_jwork, fw_jwork, fb_jwork, ff_jwork, nb_jwork, or ir_jwork lists.
+ */
+struct jnewblk {
+	struct	worklist jn_list;	/* See lists above. */
+#	define	jn_state jn_list.wk_state
+	struct	jsegdep	*jn_jsegdep;	/* Will track our journal record. */
+	LIST_ENTRY(jnewblk) jn_deps;	/* Jnewblks on sm_jnewblkhd. */
+	struct	worklist *jn_dep;	/* Dependency to ref completed seg. */
+	ufs_lbn_t	jn_lbn;		/* Lbn to which allocated. */
+	ufs2_daddr_t	jn_blkno;	/* Blkno allocated */
+	ino_t		jn_ino;		/* Ino to which allocated. */
+	int		jn_oldfrags;	/* Previous fragments when extended. */
+	int		jn_frags;	/* Number of fragments. */
+};
+
+/*
+ * A "jblkdep" structure tracks jfreeblk and jtrunc records attached to a
+ * freeblks structure.
+ */
+struct jblkdep {
+	struct	worklist jb_list;	/* For softdep journal pending. */
+	struct	jsegdep *jb_jsegdep;	/* Reference to the jseg. */
+	struct	freeblks *jb_freeblks;	/* Back pointer to freeblks. */
+	LIST_ENTRY(jblkdep) jb_deps;	/* Dep list on freeblks. */
+
+};
+
+/*
+ * A "jfreeblk" structure tracks the journal write for freeing a block
+ * or tree of blocks.  The block pointer must not be cleared in the inode
+ * or indirect prior to the jfreeblk being written to the journal.
+ */
+struct jfreeblk {
+	struct	jblkdep	jf_dep;		/* freeblks linkage. */
+	ufs_lbn_t	jf_lbn;		/* Lbn from which blocks freed. */
+	ufs2_daddr_t	jf_blkno;	/* Blkno being freed. */
+	ino_t		jf_ino;		/* Ino from which blocks freed. */
+	int		jf_frags;	/* Number of frags being freed. */
+};
+
+/*
+ * A "jfreefrag" tracks the freeing of a single block when a fragment is
+ * extended or an indirect page is replaced.  It is not part of a larger
+ * freeblks operation.
+ */
+struct jfreefrag {
+	struct	worklist fr_list;	/* Linked to softdep_journal_pending. */
+#	define	fr_state fr_list.wk_state
+	struct	jsegdep	*fr_jsegdep;	/* Will track our journal record. */
+	struct freefrag	*fr_freefrag;	/* Back pointer to freefrag. */
+	ufs_lbn_t	fr_lbn;		/* Lbn from which frag freed. */
+	ufs2_daddr_t	fr_blkno;	/* Blkno being freed. */
+	ino_t		fr_ino;		/* Ino from which frag freed. */
+	int		fr_frags;	/* Size of frag being freed. */
+};
+
+/*
+ * A "jtrunc" journals the intent to truncate an inode's data or extent area.
+ */
+struct jtrunc {
+	struct	jblkdep	jt_dep;		/* freeblks linkage. */
+	off_t		jt_size;	/* Final file size. */
+	int		jt_extsize;	/* Final extent size. */
+	ino_t		jt_ino;		/* Ino being truncated. */
+};
+
+/*
+ * A "jfsync" journals the completion of an fsync which invalidates earlier
+ * jtrunc records in the journal.
+ */
+struct jfsync {
+	struct worklist	jfs_list;	/* For softdep journal pending. */
+	off_t		jfs_size;	/* Sync file size. */
+	int		jfs_extsize;	/* Sync extent size. */
+	ino_t		jfs_ino;	/* ino being synced. */
+};
+
+/*
+ * A "jsegdep" structure tracks a single reference to a written journal
+ * segment so the journal space can be reclaimed when all dependencies
+ * have been written. It can hang off of id_inowait, dm_jwork, da_jwork,
+ * nb_jwork, ff_jwork, or fb_jwork lists.
+ */
+struct jsegdep {
+	struct	worklist jd_list;	/* See above for lists. */
+#	define	jd_state jd_list.wk_state
+	struct	jseg	*jd_seg;	/* Our journal record. */
+};
+
+/*
+ * A "jseg" structure contains all of the journal records written in a
+ * single disk write.  The jaddref and jremref structures are linked into
+ * js_entries so thay may be completed when the write completes.  The
+ * js_entries also include the write dependency structures: jmvref,
+ * jnewblk, jfreeblk, jfreefrag, and jtrunc.  The js_refs field counts
+ * the number of entries on the js_entries list. Thus there is a single
+ * jseg entry to describe each journal write.
+ */
+struct jseg {
+	struct	worklist js_list;	/* b_deps link for journal */
+#	define	js_state js_list.wk_state
+	struct	workhead js_entries;	/* Entries awaiting write */
+	LIST_HEAD(, freework) js_indirs;/* List of indirects in this seg. */
+	TAILQ_ENTRY(jseg) js_next;	/* List of all unfinished segments. */
+	struct	jblocks *js_jblocks;	/* Back pointer to block/seg list */
+	struct	buf *js_buf;		/* Buffer while unwritten */
+	uint64_t js_seq;		/* Journal record sequence number. */
+	uint64_t js_oldseq;		/* Oldest valid sequence number. */
+	int	js_size;		/* Size of journal record in bytes. */
+	int	js_cnt;			/* Total items allocated. */
+	int	js_refs;		/* Count of js_entries items. */
+};
+
+/*
+ * A 'sbdep' structure tracks the head of the free inode list and
+ * superblock writes.  This makes sure the superblock is always pointing at
+ * the first possible unlinked inode for the suj recovery process.  If a
+ * block write completes and we discover a new head is available the buf
+ * is dirtied and the dep is kept. See the description of the UNLINK*
+ * flags above for more details.
+ */
+struct sbdep {
+	struct	worklist sb_list;	/* b_dep linkage */
+	struct	fs	*sb_fs;		/* Filesystem pointer within buf. */
+	struct	ufsmount *sb_ump;	/* Our mount structure */
+};
+
+/*
+ * Private journaling structures.
+ */
+struct jblocks {
+	struct jseglst	jb_segs;	/* TAILQ of current segments. */
+	struct jseg	*jb_writeseg;	/* Next write to complete. */
+	struct jseg	*jb_oldestseg;	/* Oldest segment with valid entries. */
+	struct jextent	*jb_extent;	/* Extent array. */
+	uint64_t	jb_nextseq;	/* Next sequence number. */
+	uint64_t	jb_oldestwrseq;	/* Oldest written sequence number. */
+	uint8_t		jb_needseg;	/* Need a forced segment. */
+	uint8_t		jb_suspended;	/* Did journal suspend writes? */
+	int		jb_avail;	/* Available extents. */
+	int		jb_used;	/* Last used extent. */
+	int		jb_head;	/* Allocator head. */
+	int		jb_off;		/* Allocator extent offset. */
+	int		jb_blocks;	/* Total disk blocks covered. */
+	int		jb_free;	/* Total disk blocks free. */
+	int		jb_min;		/* Minimum free space. */
+	int		jb_low;		/* Low on space. */
+	int		jb_age;		/* Insertion time of oldest rec. */
+};
+
+struct jextent {
+	ufs2_daddr_t	je_daddr;	/* Disk block address. */
+	int		je_blocks;	/* Disk block count. */
+};
+
+/*
+ * Hash table declarations.
+ */
+LIST_HEAD(mkdirlist, mkdir);
+LIST_HEAD(pagedep_hashhead, pagedep);
+LIST_HEAD(inodedep_hashhead, inodedep);
+LIST_HEAD(newblk_hashhead, newblk);
+LIST_HEAD(bmsafemap_hashhead, bmsafemap);
+TAILQ_HEAD(indir_hashhead, freework);
+
+/*
+ * Per-filesystem soft dependency data.
+ * Allocated at mount and freed at unmount.
+ */
+struct mount_softdeps {
+	struct	rwlock sd_fslock;		/* softdep lock */
+	struct	workhead sd_workitem_pending;	/* softdep work queue */
+	struct	worklist *sd_worklist_tail;	/* Tail pointer for above */
+	struct	workhead sd_journal_pending;	/* journal work queue */
+	struct	worklist *sd_journal_tail;	/* Tail pointer for above */
+	struct	jblocks *sd_jblocks;		/* Journal block information */
+	struct	inodedeplst sd_unlinked;	/* Unlinked inodes */
+	struct	bmsafemaphd sd_dirtycg;		/* Dirty CGs */
+	struct	mkdirlist sd_mkdirlisthd;	/* Track mkdirs */
+	struct	pagedep_hashhead *sd_pdhash;	/* pagedep hash table */
+	u_long	sd_pdhashsize;			/* pagedep hash table size-1 */
+	long	sd_pdnextclean;			/* next hash bucket to clean */
+	struct	inodedep_hashhead *sd_idhash;	/* inodedep hash table */
+	u_long	sd_idhashsize;			/* inodedep hash table size-1 */
+	long	sd_idnextclean;			/* next hash bucket to clean */
+	struct	newblk_hashhead *sd_newblkhash;	/* newblk hash table */
+	u_long	sd_newblkhashsize;		/* newblk hash table size-1 */
+	struct	bmsafemap_hashhead *sd_bmhash;	/* bmsafemap hash table */
+	u_long	sd_bmhashsize;			/* bmsafemap hash table size-1*/
+	struct	indir_hashhead *sd_indirhash;	/* indir hash table */
+	u_long	sd_indirhashsize;		/* indir hash table size-1 */
+	int	sd_on_journal;			/* Items on the journal list */
+	int	sd_on_worklist;			/* Items on the worklist */
+	int	sd_deps;			/* Total dependency count */
+	int	sd_accdeps;			/* accumulated dep count */
+	int	sd_req;				/* Wakeup when deps hits 0. */
+	int	sd_flags;			/* comm with flushing thread */
+	int	sd_cleanups;			/* Calls to cleanup */
+	struct	thread *sd_flushtd;		/* thread handling flushing */
+	TAILQ_ENTRY(mount_softdeps) sd_next;	/* List of softdep filesystem */
+	struct	ufsmount *sd_ump;		/* our ufsmount structure */
+	u_long	sd_curdeps[D_LAST + 1];		/* count of current deps */
+};
+/*
+ * Flags for communicating with the syncer thread.
+ */
+#define FLUSH_EXIT	0x0001	/* time to exit */
+#define FLUSH_CLEANUP	0x0002	/* need to clear out softdep structures */
+#define	FLUSH_STARTING	0x0004	/* flush thread not yet started */
+#define	FLUSH_RC_ACTIVE	0x0008	/* a thread is flushing the mount point */
+
+/*
+ * Keep the old names from when these were in the ufsmount structure.
+ */
+#define	softdep_workitem_pending	um_softdep->sd_workitem_pending
+#define	softdep_worklist_tail		um_softdep->sd_worklist_tail
+#define	softdep_journal_pending		um_softdep->sd_journal_pending
+#define	softdep_journal_tail		um_softdep->sd_journal_tail
+#define	softdep_jblocks			um_softdep->sd_jblocks
+#define	softdep_unlinked		um_softdep->sd_unlinked
+#define	softdep_dirtycg			um_softdep->sd_dirtycg
+#define	softdep_mkdirlisthd		um_softdep->sd_mkdirlisthd
+#define	pagedep_hashtbl			um_softdep->sd_pdhash
+#define	pagedep_hash_size		um_softdep->sd_pdhashsize
+#define	pagedep_nextclean		um_softdep->sd_pdnextclean
+#define	inodedep_hashtbl		um_softdep->sd_idhash
+#define	inodedep_hash_size		um_softdep->sd_idhashsize
+#define	inodedep_nextclean		um_softdep->sd_idnextclean
+#define	newblk_hashtbl			um_softdep->sd_newblkhash
+#define	newblk_hash_size		um_softdep->sd_newblkhashsize
+#define	bmsafemap_hashtbl		um_softdep->sd_bmhash
+#define	bmsafemap_hash_size		um_softdep->sd_bmhashsize
+#define	indir_hashtbl			um_softdep->sd_indirhash
+#define	indir_hash_size			um_softdep->sd_indirhashsize
+#define	softdep_on_journal		um_softdep->sd_on_journal
+#define	softdep_on_worklist		um_softdep->sd_on_worklist
+#define	softdep_deps			um_softdep->sd_deps
+#define	softdep_accdeps			um_softdep->sd_accdeps
+#define	softdep_req			um_softdep->sd_req
+#define	softdep_flags			um_softdep->sd_flags
+#define	softdep_flushtd			um_softdep->sd_flushtd
+#define	softdep_curdeps			um_softdep->sd_curdeps
diff --git a/Dump/ufs/ufs/README.acls b/Dump/ufs/ufs/README.acls
new file mode 100644
index 0000000..0e8a9d5
--- /dev/null
+++ b/Dump/ufs/ufs/README.acls
@@ -0,0 +1,79 @@
+$FreeBSD: releng/11.2/sys/ufs/ufs/README.acls 105456 2002-10-19 16:09:16Z rwatson $
+
+  UFS Access Control Lists Copyright
+
+The UFS Access Control Lists implementation is copyright Robert Watson,
+and is made available under a Berkeley-style license.
+
+  About UFS Access Control Lists (ACLs)
+
+Access control lists allow the association of fine-grained discretionary
+access control information with files and directories, extending the
+base UNIX permission model in a (mostly) compatible way.  This
+implementation largely follows the POSIX.1e model, and relies on the
+availability of extended attributes to store extended components of
+the ACL, while maintaining the base permission information in the inode.
+
+  Using UFS Access Control Lists (ACLs)
+
+Support for UFS access control lists may be enabled by adding:
+
+	options UFS_ACL
+
+to your kernel configuration.  As ACLs rely on the availability of extended
+attributes, your file systems must have support for extended attributes.
+For UFS2, this is supported natively, so no further configuration is
+necessary.  For UFS1, you must also enable the optional extended attributes
+support documented in README.extattr.  A summary of the instructions
+and ACL-specific information follows.
+
+To enable support for ACLs on a file system, the 'acls' mount flag
+must be set for the file system.  This may be set using the tunefs
+'-a' flag:
+
+	tunefs -a enable /dev/md0a
+
+Or by using the mount-time flag:
+
+	mount -o acls /dev/md0a /mnt
+
+The flag may also be set in /etc/fstab.  Note that mounting a file
+system previously configured for ACLs without ACL-support will result
+in incorrect application of discretionary protections.  Likewise,
+mounting an ACL-enabled file system without kernel support for ACLs
+will result in incorrect application of discretionary protections.  If
+the kernel is not configured for ACL support, a warning will be
+printed by the kernel at mount-time.  For reliability purposes, it
+is recommended that the superblock flag be used instead of the
+mount-time flag, as this will avoid re-mount isses with the root file
+system.  For reliability and performance reasons, the use of ACLs on
+UFS1 is discouraged; UFS2 extended attributes provide a more reliable
+storage mechanism for ACLs.
+
+Currently, support for ACLs on UFS1 requires the use of UFS1 EAs, which may
+be enabled by adding:
+
+	options UFS_EXTATTR
+
+to your kernel configuration file and rebuilding.  Because of filesystem
+mount atomicity requirements, it is also recommended that:
+
+	options UFS_EXTATTR_AUTOSTART
+
+be added to the kernel so as to support the atomic enabling of the
+required extended attributes with the filesystem mount operation.  To
+enable ACLs, two extended attributes must be available in the
+EXTATTR_NAMESPACE_SYSTEM namespace: "posix1e.acl_access", which holds
+the access ACL, and "posix1e.acl_default" which holds the default ACL
+for directories.  If you're using UFS1 Extended Attributes, the following
+commands may be used to create the necessary EA backing files for
+ACLs in the filesystem root of each filesystem.  In these examples,
+the root filesystem is used; see README.extattr for more details.
+
+  mkdir -p /.attribute/system
+  cd /.attribute/system
+  extattrctl initattr -p / 388 posix1e.acl_access
+  extattrctl initattr -p / 388 posix1e.acl_default
+
+On the next mount of the root filesystem, the attributes will be
+automatically started, and ACLs will be enabled.
diff --git a/Dump/ufs/ufs/README.extattr b/Dump/ufs/ufs/README.extattr
new file mode 100644
index 0000000..eea7628
--- /dev/null
+++ b/Dump/ufs/ufs/README.extattr
@@ -0,0 +1,91 @@
+$FreeBSD: releng/11.2/sys/ufs/ufs/README.extattr 105417 2002-10-18 21:11:36Z rwatson $
+
+  UFS Extended Attributes Copyright
+
+The UFS Extended Attributes implementation is copyright Robert Watson, and
+is made available under a Berkeley-style license.
+
+  About UFS Extended Attributes
+
+Extended attributes allow the association of additional arbitrary
+meta-data with files and directories.  Extended attributes are defined in
+the form name=value, where name is an nul-terminated string in the style
+of a filename, and value is a binary blob of zero or more bytes. The UFS
+extended attribute service layers support for extended attributes onto a
+backing file, in the style of the quota implementation, meaning that it
+requires no underlying format changes in the filesystem.  This design
+choice exchanges simplicity, usability and easy deployment for
+performance.  When defined, extended attribute names exist in a series of
+disjoint namespaces: currently, two namespaces are defined:
+EXTATTR_NAMESPACE_SYSTEM and EXTATTR_NAMESPACE_USER.  The primary
+distinction lies in the protection model: USER EAs are protected using the
+normal inode protections, whereas SYSTEM EAs require privilege to access
+or modify.
+
+  Using UFS Extended Attributes
+
+Support for UFS extended attributes is natively available in UFS2, and
+requires no special configuration.  For reliability, administrative,
+and performance reasons, if you plan to use extended attributes, it
+is recommended that you use UFS2 in preference to UFS1.
+
+Support for UFS extended attributes may be enabled for UFS1 by adding:
+
+	options UFS_EXTATTR
+
+to your kernel configuration file.  This allows UFS-based filesystems to
+support extended attributes, but requires manual administration of EAs
+using the extattrctl tool, including the starting of EA support for each
+filesystem, and the enabling of individual attributes for the file
+system.  The extattrctl utility may be used to initialize backing files
+before first use, to start and stop EA service on a filesystem, and to
+enable and disable named attributes.  The command lines for extattrctl
+take the following forms:
+
+  extattrctl start [path]
+  extattrctl stop [path]
+  extattrctl initattr [-f] [-p path] [attrsize] [attrfile]
+  extattrctl enable [path] [attrnamespace] [attrname] [attrfile]
+  extattrctl disable [path] [attrnamespace] [attrname]
+
+In each case, [path] is used to indicate the mounted filesystem on which
+to perform the operation.  [attrnamespace] refers to the namespace in
+which the attribute is being manipulated, and may be "system" or "user".  
+The [attrname] is the attribute name to use for the operation. The
+[attrfile] argument specifies the attribute backing file to use. When
+using the "initattr" function to initialize a backing file, the maximum
+size of attribute data must be defined in bytes using the [attrsize]
+field.  Optionally, the [-p path] argument may be used to indicate to
+extattrctl that it should pre-allocate space for EA data, rather than
+creating a sparse backing file.  This prevents attribute operations from
+failing in low disk-space conditions (which can be important when EAs are
+used for security purposes), but pre-allocation will consume space
+proportional to the product of the defined maximum attribute size and
+number of attributes on the specified filesystem.
+
+Manual configuration increases administrative overhead, but also
+introduces the possibility of race conditions during filesystem mount, if
+EAs are used to support other features, as starting the EAs manually is
+not atomic with the mount operation.  To address this problem, an
+additional kernel option may be defined to auto-start EAs on a UFS file
+system based on special directories at mount-time:
+
+	options UFS_EXTATTR_AUTOSTART
+
+If this option is defined, UFS will search for a ".attribute"
+sub-directory of the filesystem root during the mount operation.  If it
+is found, EA support will be started for the filesystem.  UFS will then
+search for "system" and "user" sub-directories of the ".attribute"
+directory for any potential backing files, and enable an EA for each valid
+backing file with the name of the backing file as the attribute name.  
+For example, by creating the following tree, the two EAs,
+posix1e.acl_access and posix1e.acl_default will be enabled in the system
+namespace of the root filesystem, reserving space for attribute data:
+
+  mkdir -p /.attribute/system
+  cd /.attribute/system
+  extattrctl initattr -p / 388 posix1e.acl_access
+  extattrctl initattr -p / 388 posix1e.acl_default
+
+On the next mount of the root filesystem, the attributes will be
+automatically started.
diff --git a/Dump/ufs/ufs/acl.h b/Dump/ufs/ufs/acl.h
new file mode 100644
index 0000000..63b32dd
--- /dev/null
+++ b/Dump/ufs/ufs/acl.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 1999-2001 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * This software was developed by Robert Watson for the TrustedBSD Project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: releng/11.2/sys/ufs/ufs/acl.h 200796 2009-12-21 19:39:10Z trasz $
+ */
+/*
+ * Developed by the TrustedBSD Project.
+ * Support for POSIX.1e access control lists.
+ */
+
+#ifndef _UFS_UFS_ACL_H_
+#define	_UFS_UFS_ACL_H_
+
+#ifdef _KERNEL
+
+int	ufs_getacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td);
+int	ufs_setacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td);
+void	ufs_sync_acl_from_inode(struct inode *ip, struct acl *acl);
+void	ufs_sync_inode_from_acl(struct acl *acl, struct inode *ip);
+
+int	ufs_getacl(struct vop_getacl_args *);
+int	ufs_setacl(struct vop_setacl_args *);
+int	ufs_aclcheck(struct vop_aclcheck_args *);
+
+#endif /* !_KERNEL */
+
+#endif /* !_UFS_UFS_ACL_H_ */
diff --git a/Dump/ufs/ufs/dinode.h b/Dump/ufs/ufs/dinode.h
new file mode 100644
index 0000000..386ac8c
--- /dev/null
+++ b/Dump/ufs/ufs/dinode.h
@@ -0,0 +1,189 @@
+/*-
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Marshall
+ * Kirk McKusick and Network Associates Laboratories, the Security
+ * Research Division of Network Associates, Inc. under DARPA/SPAWAR
+ * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
+ * research program
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (c) 1982, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dinode.h	8.3 (Berkeley) 1/21/94
+ * $FreeBSD: releng/11.2/sys/ufs/ufs/dinode.h 257029 2013-10-24 00:33:29Z pfg $
+ */
+
+#ifndef _UFS_UFS_DINODE_H_
+#define	_UFS_UFS_DINODE_H_
+
+/*
+ * The root inode is the root of the filesystem.  Inode 0 can't be used for
+ * normal purposes and historically bad blocks were linked to inode 1, thus
+ * the root inode is 2.  (Inode 1 is no longer used for this purpose, however
+ * numerous dump tapes make this assumption, so we are stuck with it).
+ */
+#define	ROOTINO	((ino_t)2)
+
+/*
+ * The Whiteout inode# is a dummy non-zero inode number which will
+ * never be allocated to a real file.  It is used as a place holder
+ * in the directory entry which has been tagged as a DT_WHT entry.
+ * See the comments about ROOTINO above.
+ */
+#define	WINO	((ino_t)1)
+
+/*
+ * The size of physical and logical block numbers and time fields in UFS.
+ */
+typedef	int32_t	ufs1_daddr_t;
+typedef	int64_t	ufs2_daddr_t;
+typedef int64_t ufs_lbn_t;
+typedef int64_t ufs_time_t;
+
+/* File permissions. */
+#define	IEXEC		0000100		/* Executable. */
+#define	IWRITE		0000200		/* Writeable. */
+#define	IREAD		0000400		/* Readable. */
+#define	ISVTX		0001000		/* Sticky bit. */
+#define	ISGID		0002000		/* Set-gid. */
+#define	ISUID		0004000		/* Set-uid. */
+
+/* File types. */
+#define	IFMT		0170000		/* Mask of file type. */
+#define	IFIFO		0010000		/* Named pipe (fifo). */
+#define	IFCHR		0020000		/* Character device. */
+#define	IFDIR		0040000		/* Directory file. */
+#define	IFBLK		0060000		/* Block device. */
+#define	IFREG		0100000		/* Regular file. */
+#define	IFLNK		0120000		/* Symbolic link. */
+#define	IFSOCK		0140000		/* UNIX domain socket. */
+#define	IFWHT		0160000		/* Whiteout. */
+
+/*
+ * A dinode contains all the meta-data associated with a UFS2 file.
+ * This structure defines the on-disk format of a dinode. Since
+ * this structure describes an on-disk structure, all its fields
+ * are defined by types with precise widths.
+ */
+
+#define	NXADDR	2			/* External addresses in inode. */
+#define	NDADDR	12			/* Direct addresses in inode. */
+#define	NIADDR	3			/* Indirect addresses in inode. */
+
+struct ufs2_dinode {
+	u_int16_t	di_mode;	/*   0: IFMT, permissions; see below. */
+	int16_t		di_nlink;	/*   2: File link count. */
+	u_int32_t	di_uid;		/*   4: File owner. */
+	u_int32_t	di_gid;		/*   8: File group. */
+	u_int32_t	di_blksize;	/*  12: Inode blocksize. */
+	u_int64_t	di_size;	/*  16: File byte count. */
+	u_int64_t	di_blocks;	/*  24: Blocks actually held. */
+	ufs_time_t	di_atime;	/*  32: Last access time. */
+	ufs_time_t	di_mtime;	/*  40: Last modified time. */
+	ufs_time_t	di_ctime;	/*  48: Last inode change time. */
+	ufs_time_t	di_birthtime;	/*  56: Inode creation time. */
+	int32_t		di_mtimensec;	/*  64: Last modified time. */
+	int32_t		di_atimensec;	/*  68: Last access time. */
+	int32_t		di_ctimensec;	/*  72: Last inode change time. */
+	int32_t		di_birthnsec;	/*  76: Inode creation time. */
+	u_int32_t	di_gen;		/*  80: Generation number. */
+	u_int32_t	di_kernflags;	/*  84: Kernel flags. */
+	u_int32_t	di_flags;	/*  88: Status flags (chflags). */
+	u_int32_t	di_extsize;	/*  92: External attributes size. */
+	ufs2_daddr_t	di_extb[NXADDR];/*  96: External attributes block. */
+	ufs2_daddr_t	di_db[NDADDR];	/* 112: Direct disk blocks. */
+	ufs2_daddr_t	di_ib[NIADDR];	/* 208: Indirect disk blocks. */
+	u_int64_t	di_modrev;	/* 232: i_modrev for NFSv4 */
+	uint32_t	di_freelink;	/* 240: SUJ: Next unlinked inode. */
+	uint32_t	di_spare[3];	/* 244: Reserved; currently unused */
+};
+
+/*
+ * The di_db fields may be overlaid with other information for
+ * file types that do not have associated disk storage. Block
+ * and character devices overlay the first data block with their
+ * dev_t value. Short symbolic links place their path in the
+ * di_db area.
+ */
+#define	di_rdev di_db[0]
+
+/*
+ * A UFS1 dinode contains all the meta-data associated with a UFS1 file.
+ * This structure defines the on-disk format of a UFS1 dinode. Since
+ * this structure describes an on-disk structure, all its fields
+ * are defined by types with precise widths.
+ */
+struct ufs1_dinode {
+	u_int16_t	di_mode;	/*   0: IFMT, permissions; see below. */
+	int16_t		di_nlink;	/*   2: File link count. */
+	uint32_t	di_freelink;	/*   4: SUJ: Next unlinked inode. */
+	u_int64_t	di_size;	/*   8: File byte count. */
+	int32_t		di_atime;	/*  16: Last access time. */
+	int32_t		di_atimensec;	/*  20: Last access time. */
+	int32_t		di_mtime;	/*  24: Last modified time. */
+	int32_t		di_mtimensec;	/*  28: Last modified time. */
+	int32_t		di_ctime;	/*  32: Last inode change time. */
+	int32_t		di_ctimensec;	/*  36: Last inode change time. */
+	ufs1_daddr_t	di_db[NDADDR];	/*  40: Direct disk blocks. */
+	ufs1_daddr_t	di_ib[NIADDR];	/*  88: Indirect disk blocks. */
+	u_int32_t	di_flags;	/* 100: Status flags (chflags). */
+	u_int32_t	di_blocks;	/* 104: Blocks actually held. */
+	u_int32_t	di_gen;		/* 108: Generation number. */
+	u_int32_t	di_uid;		/* 112: File owner. */
+	u_int32_t	di_gid;		/* 116: File group. */
+	u_int64_t	di_modrev;	/* 120: i_modrev for NFSv4 */
+};
+
+#endif /* _UFS_UFS_DINODE_H_ */
diff --git a/Dump/ufs/ufs/dir.h b/Dump/ufs/ufs/dir.h
new file mode 100644
index 0000000..77aa5c7
--- /dev/null
+++ b/Dump/ufs/ufs/dir.h
@@ -0,0 +1,155 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dir.h	8.2 (Berkeley) 1/21/94
+ * $FreeBSD: releng/11.2/sys/ufs/ufs/dir.h 331722 2018-03-29 02:50:57Z eadler $
+ */
+
+#ifndef _UFS_UFS_DIR_H_
+#define	_UFS_UFS_DIR_H_
+
+/*
+ * Theoretically, directories can be more than 2Gb in length, however, in
+ * practice this seems unlikely. So, we define the type doff_t as a 32-bit
+ * quantity to keep down the cost of doing lookup on a 32-bit machine.
+ */
+#define	doff_t		int32_t
+#define	MAXDIRSIZE	(0x7fffffff)
+
+/*
+ * A directory consists of some number of blocks of DIRBLKSIZ
+ * bytes, where DIRBLKSIZ is chosen such that it can be transferred
+ * to disk in a single atomic operation (e.g. 512 bytes on most machines).
+ *
+ * Each DIRBLKSIZ byte block contains some number of directory entry
+ * structures, which are of variable length.  Each directory entry has
+ * a struct direct at the front of it, containing its inode number,
+ * the length of the entry, and the length of the name contained in
+ * the entry.  These are followed by the name padded to a 4 byte boundary
+ * with null bytes.  All names are guaranteed null terminated.
+ * The maximum length of a name in a directory is MAXNAMLEN.
+ *
+ * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent
+ * a directory entry.  Free space in a directory is represented by
+ * entries which have dp->d_reclen > DIRSIZ(fmt, dp).  All DIRBLKSIZ bytes
+ * in a directory block are claimed by the directory entries.  This
+ * usually results in the last entry in a directory having a large
+ * dp->d_reclen.  When entries are deleted from a directory, the
+ * space is returned to the previous entry in the same directory
+ * block by increasing its dp->d_reclen.  If the first entry of
+ * a directory block is free, then its dp->d_ino is set to 0.
+ * Entries other than the first in a directory do not normally have
+ * dp->d_ino set to 0.
+ */
+#define	DIRBLKSIZ	DEV_BSIZE
+#define	MAXNAMLEN	255
+
+struct	direct {
+	u_int32_t d_ino;		/* inode number of entry */
+	u_int16_t d_reclen;		/* length of this record */
+	u_int8_t  d_type; 		/* file type, see below */
+	u_int8_t  d_namlen;		/* length of string in d_name */
+	char	  d_name[MAXNAMLEN + 1];/* name with length <= MAXNAMLEN */
+};
+
+/*
+ * File types
+ */
+#define	DT_UNKNOWN	 0
+#define	DT_FIFO		 1
+#define	DT_CHR		 2
+#define	DT_DIR		 4
+#define	DT_BLK		 6
+#define	DT_REG		 8
+#define	DT_LNK		10
+#define	DT_SOCK		12
+#define	DT_WHT		14
+
+/*
+ * Convert between stat structure types and directory types.
+ */
+#define	IFTODT(mode)	(((mode) & 0170000) >> 12)
+#define	DTTOIF(dirtype)	((dirtype) << 12)
+
+/*
+ * The DIRSIZ macro gives the minimum record length which will hold
+ * the directory entry.  This requires the amount of space in struct direct
+ * without the d_name field, plus enough space for the name with a terminating
+ * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
+ *
+ * 
+ */
+#define	DIRECTSIZ(namlen)						\
+	((__offsetof(struct direct, d_name) +				\
+	  ((namlen)+1)*sizeof(((struct direct *)0)->d_name[0]) + 3) & ~3)
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+#define	DIRSIZ(oldfmt, dp) \
+    ((oldfmt) ? DIRECTSIZ((dp)->d_type) : DIRECTSIZ((dp)->d_namlen))
+#else
+#define	DIRSIZ(oldfmt, dp) \
+    DIRECTSIZ((dp)->d_namlen)
+#endif
+#define	OLDDIRFMT	1
+#define	NEWDIRFMT	0
+
+/*
+ * Template for manipulating directories.  Should use struct direct's,
+ * but the name field is MAXNAMLEN - 1, and this just won't do.
+ */
+struct dirtemplate {
+	u_int32_t	dot_ino;
+	int16_t		dot_reclen;
+	u_int8_t	dot_type;
+	u_int8_t	dot_namlen;
+	char		dot_name[4];	/* must be multiple of 4 */
+	u_int32_t	dotdot_ino;
+	int16_t		dotdot_reclen;
+	u_int8_t	dotdot_type;
+	u_int8_t	dotdot_namlen;
+	char		dotdot_name[4];	/* ditto */
+};
+
+/*
+ * This is the old format of directories, sanz type element.
+ */
+struct odirtemplate {
+	u_int32_t	dot_ino;
+	int16_t		dot_reclen;
+	u_int16_t	dot_namlen;
+	char		dot_name[4];	/* must be multiple of 4 */
+	u_int32_t	dotdot_ino;
+	int16_t		dotdot_reclen;
+	u_int16_t	dotdot_namlen;
+	char		dotdot_name[4];	/* ditto */
+};
+#endif /* !_DIR_H_ */
diff --git a/Dump/ufs/ufs/dirhash.h b/Dump/ufs/ufs/dirhash.h
new file mode 100644
index 0000000..f58e2df
--- /dev/null
+++ b/Dump/ufs/ufs/dirhash.h
@@ -0,0 +1,133 @@
+/*-
+ * Copyright (c) 2001 Ian Dowse.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: releng/11.2/sys/ufs/ufs/dirhash.h 298804 2016-04-29 20:43:51Z pfg $
+ */
+
+#ifndef _UFS_UFS_DIRHASH_H_
+#define	_UFS_UFS_DIRHASH_H_
+
+#include <sys/_lock.h>
+#include <sys/_sx.h>
+
+/*
+ * For fast operations on large directories, we maintain a hash
+ * that maps the file name to the offset of the directory entry within
+ * the directory file.
+ *
+ * The hashing uses a dumb spillover to the next free slot on
+ * collisions, so we must keep the utilisation low to avoid
+ * long linear searches. Deleted entries that are not the last
+ * in a chain must be marked DIRHASH_DEL.
+ *
+ * We also maintain information about free space in each block
+ * to speed up creations.
+ */
+#define	DIRHASH_EMPTY	(-1)	/* entry unused */
+#define	DIRHASH_DEL	(-2)	/* deleted entry; may be part of chain */
+
+#define	DIRALIGN	4
+#define	DH_NFSTATS	(DIRECTSIZ(MAXNAMLEN + 1) / DIRALIGN)
+				 /* max DIRALIGN words in a directory entry */
+
+/*
+ * Dirhash uses a score mechanism to achieve a hybrid between a
+ * least-recently-used and a least-often-used algorithm for entry
+ * recycling. The score is incremented when a directory is used, and
+ * decremented when the directory is a candidate for recycling. When
+ * the score reaches zero, the hash is recycled. Hashes are linked
+ * together on a TAILQ list, and hashes with higher scores filter
+ * towards the tail (most recently used) end of the list.
+ *
+ * New hash entries are given an initial score of DH_SCOREINIT and are
+ * placed at the most-recently-used end of the list. This helps a lot
+ * in the worst-case case scenario where every directory access is
+ * to a directory that is not hashed (i.e. the working set of hash
+ * candidates is much larger than the configured memry limit). In this
+ * case it limits the number of hash builds to 1/DH_SCOREINIT of the
+ * number of accesses.
+ */ 
+#define	DH_SCOREINIT	8	/* initial dh_score when dirhash built */
+#define	DH_SCOREMAX	64	/* max dh_score value */
+
+/*
+ * The main hash table has 2 levels. It is an array of pointers to
+ * blocks of DH_NBLKOFF offsets.
+ */
+#define	DH_BLKOFFSHIFT	8
+#define	DH_NBLKOFF	(1 << DH_BLKOFFSHIFT)
+#define	DH_BLKOFFMASK	(DH_NBLKOFF - 1)
+
+#define	DH_ENTRY(dh, slot) \
+    ((dh)->dh_hash[(slot) >> DH_BLKOFFSHIFT][(slot) & DH_BLKOFFMASK])
+
+struct dirhash {
+	struct sx dh_lock;	/* protects all fields except list & score */
+	int	dh_refcount;
+
+	doff_t	**dh_hash;	/* the hash array (2-level) */
+	int	dh_narrays;	/* number of entries in dh_hash */
+	int	dh_hlen;	/* total slots in the 2-level hash array */
+	int	dh_hused;	/* entries in use */
+	int	dh_memreq;	/* Memory used. */
+
+	/* Free space statistics. XXX assumes DIRBLKSIZ is 512. */
+	u_int8_t *dh_blkfree;	/* free DIRALIGN words in each dir block */
+	int	dh_nblk;	/* size of dh_blkfree array */
+	int	dh_dirblks;	/* number of DIRBLKSIZ blocks in dir */
+	int	dh_firstfree[DH_NFSTATS + 1]; /* first blk with N words free */
+
+	doff_t	dh_seqoff;	/* sequential access optimisation offset */
+
+	int	dh_score;	/* access count for this dirhash */
+
+	int	dh_onlist;	/* true if on the ufsdirhash_list chain */
+
+	time_t	dh_lastused;	/* time the dirhash was last read or written*/
+
+	/* Protected by ufsdirhash_mtx. */
+	TAILQ_ENTRY(dirhash) dh_list;	/* chain of all dirhashes */
+};
+
+
+/*
+ * Dirhash functions.
+ */
+void	ufsdirhash_init(void);
+void	ufsdirhash_uninit(void);
+int	ufsdirhash_build(struct inode *);
+doff_t	ufsdirhash_findfree(struct inode *, int, int *);
+doff_t	ufsdirhash_enduseful(struct inode *);
+int	ufsdirhash_lookup(struct inode *, char *, int, doff_t *, struct buf **,
+	    doff_t *);
+void	ufsdirhash_newblk(struct inode *, doff_t);
+void	ufsdirhash_add(struct inode *, struct direct *, doff_t);
+void	ufsdirhash_remove(struct inode *, struct direct *, doff_t);
+void	ufsdirhash_move(struct inode *, struct direct *, doff_t, doff_t);
+void	ufsdirhash_dirtrunc(struct inode *, doff_t);
+void	ufsdirhash_free(struct inode *);
+
+void	ufsdirhash_checkblock(struct inode *, char *, doff_t);
+
+#endif /* !_UFS_UFS_DIRHASH_H_ */
diff --git a/Dump/ufs/ufs/extattr.h b/Dump/ufs/ufs/extattr.h
new file mode 100644
index 0000000..61a6939
--- /dev/null
+++ b/Dump/ufs/ufs/extattr.h
@@ -0,0 +1,161 @@
+/*-
+ * Copyright (c) 1999-2001 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * This software was developed by Robert Watson for the TrustedBSD Project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: releng/11.2/sys/ufs/ufs/extattr.h 306553 2016-10-01 09:19:43Z kib $
+ */
+/*
+ * Developed by the TrustedBSD Project.
+ * Support for extended filesystem attributes.
+ */
+
+#ifndef _UFS_UFS_EXTATTR_H_
+#define	_UFS_UFS_EXTATTR_H_
+
+#define	UFS_EXTATTR_MAGIC		0x00b5d5ec
+#define	UFS_EXTATTR_VERSION		0x00000003
+#define	UFS_EXTATTR_FSROOTSUBDIR	".attribute"
+#define	UFS_EXTATTR_SUBDIR_SYSTEM	"system"
+#define	UFS_EXTATTR_SUBDIR_USER		"user"
+#define	UFS_EXTATTR_MAXEXTATTRNAME	65	/* including null */
+
+#define	UFS_EXTATTR_ATTR_FLAG_INUSE	0x00000001	/* attr has been set */
+#define	UFS_EXTATTR_PERM_KERNEL		0x00000000
+#define	UFS_EXTATTR_PERM_ROOT		0x00000001
+#define	UFS_EXTATTR_PERM_OWNER		0x00000002
+#define	UFS_EXTATTR_PERM_ANYONE		0x00000003
+
+#define	UFS_EXTATTR_UEPM_INITIALIZED	0x00000001
+#define	UFS_EXTATTR_UEPM_STARTED	0x00000002
+
+#define	UFS_EXTATTR_CMD_START		0x00000001
+#define	UFS_EXTATTR_CMD_STOP		0x00000002
+#define	UFS_EXTATTR_CMD_ENABLE		0x00000003
+#define	UFS_EXTATTR_CMD_DISABLE		0x00000004
+
+struct ufs_extattr_fileheader {
+	u_int	uef_magic;	/* magic number for sanity checking */
+	u_int	uef_version;	/* version of attribute file */
+	u_int	uef_size;	/* size of attributes, w/o header */
+};
+
+struct ufs_extattr_header {
+	u_int	ueh_flags;	/* flags for attribute */
+	u_int	ueh_len;	/* local defined length; <= uef_size */
+	u_int32_t	ueh_i_gen;	/* generation number for sanity */
+	/* data follows the header */
+};
+
+/*
+ * This structure defines the required fields of an extended-attribute header.
+ */
+struct extattr {
+	int32_t	ea_length;	    /* length of this attribute */
+	int8_t	ea_namespace;	    /* name space of this attribute */
+	int8_t	ea_contentpadlen;   /* bytes of padding at end of attribute */
+	int8_t	ea_namelength;	    /* length of attribute name */
+	char	ea_name[1];	    /* null-terminated attribute name */
+	/* extended attribute content follows */
+};
+
+/*
+ * These macros are used to access and manipulate an extended attribute:
+ *
+ * EXTATTR_NEXT(eap) returns a pointer to the next extended attribute
+ *	following eap.
+ * EXTATTR_CONTENT(eap) returns a pointer to the extended attribute
+ *	content referenced by eap.
+ * EXTATTR_CONTENT_SIZE(eap) returns the size of the extended attribute
+ *	content referenced by eap.
+ * EXTATTR_SET_LENGTHS(eap, contentsize) called after initializing the
+ *	attribute name to calculate and set the ea_length, ea_namelength,
+ *	and ea_contentpadlen fields of the extended attribute structure.
+ */
+#define	EXTATTR_NEXT(eap) \
+	((struct extattr *)(((void *)(eap)) + (eap)->ea_length))
+#define	EXTATTR_CONTENT(eap) (((void *)(eap)) + EXTATTR_BASE_LENGTH(eap))
+#define	EXTATTR_CONTENT_SIZE(eap) \
+	((eap)->ea_length - EXTATTR_BASE_LENGTH(eap) - (eap)->ea_contentpadlen)
+#define	EXTATTR_BASE_LENGTH(eap) \
+	((sizeof(struct extattr) + (eap)->ea_namelength + 7) & ~7)
+#define	EXTATTR_SET_LENGTHS(eap, contentsize) do { \
+	KASSERT(((eap)->ea_name[0] != 0), \
+		("Must initialize name before setting lengths")); \
+	(eap)->ea_namelength = strlen((eap)->ea_name); \
+	(eap)->ea_contentpadlen = ((contentsize) % 8) ? \
+		8 - ((contentsize) % 8) : 0; \
+	(eap)->ea_length = EXTATTR_BASE_LENGTH(eap) + \
+		(contentsize) + (eap)->ea_contentpadlen; \
+} while (0)
+
+#ifdef _KERNEL
+
+#include <sys/_sx.h>
+
+struct vnode;
+LIST_HEAD(ufs_extattr_list_head, ufs_extattr_list_entry);
+struct ufs_extattr_list_entry {
+	LIST_ENTRY(ufs_extattr_list_entry)	uele_entries;
+	struct ufs_extattr_fileheader		uele_fileheader;
+	int	uele_attrnamespace;
+	char	uele_attrname[UFS_EXTATTR_MAXEXTATTRNAME];
+	struct vnode	*uele_backing_vnode;
+};
+
+struct ucred;
+struct ufs_extattr_per_mount {
+	struct sx	uepm_lock;
+	struct ufs_extattr_list_head	uepm_list;
+	struct ucred	*uepm_ucred;
+	int	uepm_flags;
+};
+
+struct vop_getextattr_args;
+struct vop_deleteextattr_args;
+struct vop_setextattr_args;
+
+void	ufs_extattr_uepm_init(struct ufs_extattr_per_mount *uepm);
+void	ufs_extattr_uepm_destroy(struct ufs_extattr_per_mount *uepm);
+int	ufs_extattr_start(struct mount *mp, struct thread *td);
+int	ufs_extattr_autostart(struct mount *mp, struct thread *td);
+int	ufs_extattr_stop(struct mount *mp, struct thread *td);
+int	ufs_extattrctl(struct mount *mp, int cmd, struct vnode *filename,
+	    int attrnamespace, const char *attrname);
+int	ufs_getextattr(struct vop_getextattr_args *ap);
+int	ufs_deleteextattr(struct vop_deleteextattr_args *ap);
+int	ufs_setextattr(struct vop_setextattr_args *ap);
+void	ufs_extattr_vnode_inactive(struct vnode *vp, struct thread *td);
+
+#else
+
+/* User-level definition of KASSERT for macros above */
+#define	KASSERT(cond, str) do { \
+        if (!(cond)) { printf("panic: "); printf(str); printf("\n"); exit(1); }\
+} while (0)
+
+#endif /* !_KERNEL */
+
+#endif /* !_UFS_UFS_EXTATTR_H_ */
diff --git a/Dump/ufs/ufs/gjournal.h b/Dump/ufs/ufs/gjournal.h
new file mode 100644
index 0000000..cd57fd8
--- /dev/null
+++ b/Dump/ufs/ufs/gjournal.h
@@ -0,0 +1,37 @@
+/*-
+ * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: releng/11.2/sys/ufs/ufs/gjournal.h 262678 2014-03-02 02:52:34Z pfg $
+ */
+
+#ifndef _UFS_UFS_GJOURNAL_H_
+#define	_UFS_UFS_GJOURNAL_H_
+
+/*
+ * GEOM journal function prototypes.
+ */
+void	ufs_gjournal_orphan(struct vnode *fvp);
+void	ufs_gjournal_close(struct vnode *vp);
+#endif /* !_UFS_UFS_GJOURNAL_H_ */
diff --git a/Dump/ufs/ufs/inode.h b/Dump/ufs/ufs/inode.h
new file mode 100644
index 0000000..6c9bd06
--- /dev/null
+++ b/Dump/ufs/ufs/inode.h
@@ -0,0 +1,207 @@
+/*-
+ * Copyright (c) 1982, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)inode.h	8.9 (Berkeley) 5/14/95
+ * $FreeBSD: releng/11.2/sys/ufs/ufs/inode.h 331722 2018-03-29 02:50:57Z eadler $
+ */
+
+#ifndef _UFS_UFS_INODE_H_
+#define	_UFS_UFS_INODE_H_
+
+#include <sys/lock.h>
+#include <sys/queue.h>
+#include <ufs/ufs/dinode.h>
+
+/*
+ * This must agree with the definition in <ufs/ufs/dir.h>.
+ */
+#define	doff_t		int32_t
+
+/*
+ * The inode is used to describe each active (or recently active) file in the
+ * UFS filesystem. It is composed of two types of information. The first part
+ * is the information that is needed only while the file is active (such as
+ * the identity of the file and linkage to speed its lookup). The second part
+ * is the permanent meta-data associated with the file which is read in
+ * from the permanent dinode from long term storage when the file becomes
+ * active, and is put back when the file is no longer being used.
+ *
+ * An inode may only be changed while holding either the exclusive
+ * vnode lock or the shared vnode lock and the vnode interlock. We use
+ * the latter only for "read" and "get" operations that require
+ * changing i_flag, or a timestamp. This locking protocol allows executing
+ * those operations without having to upgrade the vnode lock from shared to
+ * exclusive.
+ */
+struct inode {
+	TAILQ_ENTRY(inode) i_nextsnap; /* snapshot file list. */
+	struct	vnode  *i_vnode;/* Vnode associated with this inode. */
+	struct 	ufsmount *i_ump;/* Ufsmount point associated with this inode. */
+	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
+	union {
+		struct dirhash *dirhash; /* Hashing for large directories. */
+		daddr_t *snapblklist;    /* Collect expunged snapshot blocks. */
+	} i_un;
+	/*
+	 * The real copy of the on-disk inode.
+	 */
+	union {
+		struct ufs1_dinode *din1;	/* UFS1 on-disk dinode. */
+		struct ufs2_dinode *din2;	/* UFS2 on-disk dinode. */
+	} dinode_u;
+
+	ino_t	  i_number;	/* The identity of the inode. */
+	u_int32_t i_flag;	/* flags, see below */
+	int	  i_effnlink;	/* i_nlink when I/O completes */
+
+
+	/*
+	 * Side effects; used during directory lookup.
+	 */
+	int32_t	  i_count;	/* Size of free slot in directory. */
+	doff_t	  i_endoff;	/* End of useful stuff in directory. */
+	doff_t	  i_diroff;	/* Offset in dir, where we found last entry. */
+	doff_t	  i_offset;	/* Offset of free space in directory. */
+
+	int	i_nextclustercg; /* last cg searched for cluster */
+
+	/*
+	 * Data for extended attribute modification.
+ 	 */
+	u_char	  *i_ea_area;	/* Pointer to malloced copy of EA area */
+	unsigned  i_ea_len;	/* Length of i_ea_area */
+	int	  i_ea_error;	/* First errno in transaction */
+	int	  i_ea_refs;	/* Number of users of EA area */
+
+	/*
+	 * Copies from the on-disk dinode itself.
+	 */
+	u_int64_t i_size;	/* File byte count. */
+	u_int64_t i_gen;	/* Generation number. */
+	u_int32_t i_flags;	/* Status flags (chflags). */
+	u_int32_t i_uid;	/* File owner. */
+	u_int32_t i_gid;	/* File group. */
+	u_int16_t i_mode;	/* IFMT, permissions; see below. */
+	int16_t	  i_nlink;	/* File link count. */
+};
+/*
+ * These flags are kept in i_flag.
+ */
+#define	IN_ACCESS	0x0001		/* Access time update request. */
+#define	IN_CHANGE	0x0002		/* Inode change time update request. */
+#define	IN_UPDATE	0x0004		/* Modification time update request. */
+#define	IN_MODIFIED	0x0008		/* Inode has been modified. */
+#define	IN_NEEDSYNC	0x0010		/* Inode requires fsync. */
+#define	IN_LAZYMOD	0x0020		/* Modified, but don't write yet. */
+#define	IN_LAZYACCESS	0x0040		/* Process IN_ACCESS after the
+					   suspension finished */
+#define	IN_EA_LOCKED	0x0080
+#define	IN_EA_LOCKWAIT	0x0100
+
+#define	IN_TRUNCATED	0x0200		/* Journaled truncation pending. */
+
+#define	IN_UFS2		0x0400		/* UFS2 vs UFS1 */
+
+#define	i_dirhash i_un.dirhash
+#define	i_snapblklist i_un.snapblklist
+#define	i_din1 dinode_u.din1
+#define	i_din2 dinode_u.din2
+
+#ifdef _KERNEL
+
+#define	ITOUMP(ip)	((ip)->i_ump)
+#define	ITODEV(ip)	(ITOUMP(ip)->um_dev)
+#define	ITODEVVP(ip)	(ITOUMP(ip)->um_devvp)
+#define	ITOFS(ip)	(ITOUMP(ip)->um_fs)
+#define	ITOVFS(ip)	((ip)->i_vnode->v_mount)
+
+static inline _Bool
+I_IS_UFS1(const struct inode *ip)
+{
+
+	return ((ip->i_flag & IN_UFS2) == 0);
+}
+
+static inline _Bool
+I_IS_UFS2(const struct inode *ip)
+{
+
+	return ((ip->i_flag & IN_UFS2) != 0);
+}
+
+/*
+ * The DIP macro is used to access fields in the dinode that are
+ * not cached in the inode itself.
+ */
+#define	DIP(ip, field)	(I_IS_UFS1(ip) ? (ip)->i_din1->d##field : \
+    (ip)->i_din2->d##field)
+#define	DIP_SET(ip, field, val) do {				\
+	if (I_IS_UFS1(ip))					\
+		(ip)->i_din1->d##field = (val); 		\
+	else							\
+		(ip)->i_din2->d##field = (val); 		\
+	} while (0)
+
+#define	SHORTLINK(ip)	(I_IS_UFS1(ip) ?			\
+    (caddr_t)(ip)->i_din1->di_db : (caddr_t)(ip)->i_din2->di_db)
+#define	IS_SNAPSHOT(ip)		((ip)->i_flags & SF_SNAPSHOT)
+
+/*
+ * Structure used to pass around logical block paths generated by
+ * ufs_getlbns and used by truncate and bmap code.
+ */
+struct indir {
+	ufs2_daddr_t in_lbn;		/* Logical block number. */
+	int	in_off;			/* Offset in buffer. */
+};
+
+/* Convert between inode pointers and vnode pointers. */
+#define	VTOI(vp)	((struct inode *)(vp)->v_data)
+#define	ITOV(ip)	((ip)->i_vnode)
+
+/* Determine if soft dependencies are being done */
+#define	DOINGSOFTDEP(vp)   ((vp)->v_mount->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
+#define	MOUNTEDSOFTDEP(mp) ((mp)->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
+#define	DOINGSUJ(vp)	   ((vp)->v_mount->mnt_flag & MNT_SUJ)
+#define	MOUNTEDSUJ(mp)	   ((mp)->mnt_flag & MNT_SUJ)
+
+/* This overlays the fid structure (see mount.h). */
+struct ufid {
+	u_int16_t ufid_len;	/* Length of structure. */
+	u_int16_t ufid_pad;	/* Force 32-bit alignment. */
+	uint32_t  ufid_ino;	/* File number (ino). */
+	uint32_t  ufid_gen;	/* Generation number. */
+};
+#endif /* _KERNEL */
+
+#endif /* !_UFS_UFS_INODE_H_ */
diff --git a/Dump/ufs/ufs/quota.h b/Dump/ufs/ufs/quota.h
new file mode 100644
index 0000000..71cbb70
--- /dev/null
+++ b/Dump/ufs/ufs/quota.h
@@ -0,0 +1,259 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Robert Elz at The University of Melbourne.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)quota.h	8.3 (Berkeley) 8/19/94
+ * $FreeBSD: releng/11.2/sys/ufs/ufs/quota.h 331722 2018-03-29 02:50:57Z eadler $
+ */
+
+#ifndef _UFS_UFS_QUOTA_H_
+#define	_UFS_UFS_QUOTA_H_
+
+/*
+ * Definitions for disk quotas imposed on the average user
+ * (big brother finally hits UNIX).
+ *
+ * The following constants define the amount of time given a user before the
+ * soft limits are treated as hard limits (usually resulting in an allocation
+ * failure). The timer is started when the user crosses their soft limit, it
+ * is reset when they go below their soft limit.
+ */
+#define	MAX_IQ_TIME	(7*24*60*60)	/* seconds in 1 week */
+#define	MAX_DQ_TIME	(7*24*60*60)	/* seconds in 1 week */
+
+/*
+ * The following constants define the usage of the quota file array in the
+ * ufsmount structure and dquot array in the inode structure.  The semantics
+ * of the elements of these arrays are defined in the routine getinoquota;
+ * the remainder of the quota code treats them generically and need not be
+ * inspected when changing the size of the array.
+ */
+#define	MAXQUOTAS	2
+#define	USRQUOTA	0	/* element used for user quotas */
+#define	GRPQUOTA	1	/* element used for group quotas */
+
+/*
+ * Definitions for the default names of the quotas files.
+ */
+#define	INITQFNAMES { \
+	"user",		/* USRQUOTA */ \
+	"group",	/* GRPQUOTA */ \
+	"undefined", \
+}
+#define	QUOTAFILENAME	"quota"
+#define	QUOTAGROUP	"operator"
+
+/*
+ * Command definitions for the 'quotactl' system call.  The commands are
+ * broken into a main command defined below and a subcommand that is used
+ * to convey the type of quota that is being manipulated (see above).
+ */
+#define	SUBCMDMASK	0x00ff
+#define	SUBCMDSHIFT	8
+#define	QCMD(cmd, type)	(((cmd) << SUBCMDSHIFT) | ((type) & SUBCMDMASK))
+
+#define	Q_QUOTAON	0x0100	/* enable quotas */
+#define	Q_QUOTAOFF	0x0200	/* disable quotas */
+#define	Q_GETQUOTA32	0x0300	/* get limits and usage (32-bit version) */
+#define	Q_SETQUOTA32	0x0400	/* set limits and usage (32-bit version) */
+#define	Q_SETUSE32	0x0500	/* set usage (32-bit version) */
+#define	Q_SYNC		0x0600	/* sync disk copy of a filesystems quotas */
+#define	Q_GETQUOTA	0x0700	/* get limits and usage (64-bit version) */
+#define	Q_SETQUOTA	0x0800	/* set limits and usage (64-bit version) */
+#define	Q_SETUSE	0x0900	/* set usage (64-bit version) */
+#define	Q_GETQUOTASIZE	0x0A00	/* get bit-size of quota file fields */
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is an array of these structures
+ * indexed by user or group number.  The setquota system call establishes
+ * the vnode for each quota file (a pointer is retained in the ufsmount
+ * structure).
+ */
+struct dqblk32 {
+	u_int32_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	u_int32_t dqb_bsoftlimit;	/* preferred limit on disk blks */
+	u_int32_t dqb_curblocks;	/* current block count */
+	u_int32_t dqb_ihardlimit;	/* maximum # allocated inodes + 1 */
+	u_int32_t dqb_isoftlimit;	/* preferred inode limit */
+	u_int32_t dqb_curinodes;	/* current # allocated inodes */
+	int32_t   dqb_btime;		/* time limit for excessive disk use */
+	int32_t   dqb_itime;		/* time limit for excessive files */
+};
+
+struct dqblk64 {
+	u_int64_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	u_int64_t dqb_bsoftlimit;	/* preferred limit on disk blks */
+	u_int64_t dqb_curblocks;	/* current block count */
+	u_int64_t dqb_ihardlimit;	/* maximum # allocated inodes + 1 */
+	u_int64_t dqb_isoftlimit;	/* preferred inode limit */
+	u_int64_t dqb_curinodes;	/* current # allocated inodes */
+	int64_t   dqb_btime;		/* time limit for excessive disk use */
+	int64_t   dqb_itime;		/* time limit for excessive files */
+};
+
+#define	dqblk dqblk64
+
+#define	Q_DQHDR64_MAGIC "QUOTA64"
+#define	Q_DQHDR64_VERSION 0x20081104
+
+struct dqhdr64 {
+	char	  dqh_magic[8];		/* Q_DQHDR64_MAGIC */
+	uint32_t  dqh_version;		/* Q_DQHDR64_VERSION */
+	uint32_t  dqh_hdrlen;		/* header length */
+	uint32_t  dqh_reclen;		/* record length */
+	char	  dqh_unused[44];	/* reserved for future extension */
+};
+
+#ifdef _KERNEL
+
+#include <sys/queue.h>
+
+/*
+ * The following structure records disk usage for a user or group on a
+ * filesystem. There is one allocated for each quota that exists on any
+ * filesystem for the current user or group. A cache is kept of recently
+ * used entries.
+ * (h) protected by dqhlock
+ */
+struct dquot {
+	LIST_ENTRY(dquot) dq_hash;	/* (h) hash list */
+	TAILQ_ENTRY(dquot) dq_freelist;	/* (h) free list */
+	struct mtx dq_lock;		/* lock for concurrency */
+	u_int16_t dq_flags;		/* flags, see below */
+	u_int16_t dq_type;		/* quota type of this dquot */
+	u_int32_t dq_cnt;		/* (h) count of active references */
+	u_int32_t dq_id;		/* identifier this applies to */
+	struct ufsmount *dq_ump;	/* (h) filesystem that this is
+					   taken from */
+	struct dqblk64 dq_dqb;		/* actual usage & quotas */
+};
+/*
+ * Flag values.
+ */
+#define	DQ_LOCK		0x01		/* this quota locked (no MODS) */
+#define	DQ_WANT		0x02		/* wakeup on unlock */
+#define	DQ_MOD		0x04		/* this quota modified since read */
+#define	DQ_FAKE		0x08		/* no limits here, just usage */
+#define	DQ_BLKS		0x10		/* has been warned about blk limit */
+#define	DQ_INODS	0x20		/* has been warned about inode limit */
+/*
+ * Shorthand notation.
+ */
+#define	dq_bhardlimit	dq_dqb.dqb_bhardlimit
+#define	dq_bsoftlimit	dq_dqb.dqb_bsoftlimit
+#define	dq_curblocks	dq_dqb.dqb_curblocks
+#define	dq_ihardlimit	dq_dqb.dqb_ihardlimit
+#define	dq_isoftlimit	dq_dqb.dqb_isoftlimit
+#define	dq_curinodes	dq_dqb.dqb_curinodes
+#define	dq_btime	dq_dqb.dqb_btime
+#define	dq_itime	dq_dqb.dqb_itime
+
+/*
+ * If the system has never checked for a quota for this file, then it is
+ * set to NODQUOT.  Once a write attempt is made the inode pointer is set
+ * to reference a dquot structure.
+ */
+#define	NODQUOT		NULL
+
+/*
+ * Flags to chkdq() and chkiq()
+ */
+#define	FORCE	0x01	/* force usage changes independent of limits */
+#define	CHOWN	0x02	/* (advisory) change initiated by chown */
+
+/*
+ * Macros to avoid subroutine calls to trivial functions.
+ */
+#ifdef DIAGNOSTIC
+#define	DQREF(dq)	dqref(dq)
+#else
+#define	DQREF(dq)	(dq)->dq_cnt++
+#endif
+
+#define	DQI_LOCK(dq)	mtx_lock(&(dq)->dq_lock)
+#define	DQI_UNLOCK(dq)	mtx_unlock(&(dq)->dq_lock)
+
+#define	DQI_WAIT(dq, prio, msg) do {		\
+	while ((dq)->dq_flags & DQ_LOCK) {	\
+		(dq)->dq_flags |= DQ_WANT;	\
+		(void) msleep((dq),		\
+		    &(dq)->dq_lock, (prio), (msg), 0); \
+	}					\
+} while (0)
+
+#define	DQI_WAKEUP(dq) do {			\
+	if ((dq)->dq_flags & DQ_WANT)		\
+		wakeup((dq));			\
+	(dq)->dq_flags &= ~(DQ_WANT|DQ_LOCK);	\
+} while (0)
+
+struct inode;
+struct mount;
+struct thread;
+struct ucred;
+struct vnode;
+
+int	chkdq(struct inode *, int64_t, struct ucred *, int);
+int	chkiq(struct inode *, int, struct ucred *, int);
+void	dqinit(void);
+void	dqrele(struct vnode *, struct dquot *);
+void	dquninit(void);
+int	getinoquota(struct inode *);
+int	qsync(struct mount *);
+int	qsyncvp(struct vnode *);
+int	quotaoff(struct thread *, struct mount *, int);
+int	quotaon(struct thread *, struct mount *, int, void *);
+int	getquota32(struct thread *, struct mount *, u_long, int, void *);
+int	setquota32(struct thread *, struct mount *, u_long, int, void *);
+int	setuse32(struct thread *, struct mount *, u_long, int, void *);
+int	getquota(struct thread *, struct mount *, u_long, int, void *);
+int	setquota(struct thread *, struct mount *, u_long, int, void *);
+int	setuse(struct thread *, struct mount *, u_long, int, void *);
+int	getquotasize(struct thread *, struct mount *, u_long, int, void *);
+vfs_quotactl_t ufs_quotactl;
+
+#ifdef SOFTUPDATES
+int	quotaref(struct vnode *, struct dquot **);
+void	quotarele(struct dquot **);
+void	quotaadj(struct dquot **, struct ufsmount *, int64_t);
+#endif /* SOFTUPDATES */
+
+#else /* !_KERNEL */
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	quotactl(const char *, int, int, void *);
+__END_DECLS
+
+#endif /* _KERNEL */
+
+#endif /* !_UFS_UFS_QUOTA_H_ */
diff --git a/Dump/ufs/ufs/ufs_acl.c b/Dump/ufs/ufs/ufs_acl.c
new file mode 100644
index 0000000..5c7b11a
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_acl.c
@@ -0,0 +1,698 @@
+/*-
+ * Copyright (c) 1999-2003 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * This software was developed by Robert Watson for the TrustedBSD Project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Support for POSIX.1e access control lists: UFS-specific support functions.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ufs/ufs_acl.c 306553 2016-10-01 09:19:43Z kib $");
+
+#include "opt_ufs.h"
+#include "opt_quota.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/types.h>
+#include <sys/acl.h>
+#include <sys/event.h>
+#include <sys/extattr.h>
+#include <sys/proc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/acl.h>
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+#include <ufs/ffs/fs.h>
+
+#ifdef UFS_ACL
+
+FEATURE(ufs_acl, "ACL support for UFS");
+
+/*
+ * Synchronize an ACL and an inode by copying over appropriate inode fields
+ * to the passed ACL.  Assumes an ACL that would satisfy acl_posix1e_check(),
+ * and may panic if not.
+ */
+void
+ufs_sync_acl_from_inode(struct inode *ip, struct acl *acl)
+{
+	struct acl_entry	*acl_mask, *acl_group_obj;
+	int	i;
+
+	/*
+	 * Update ACL_USER_OBJ, ACL_OTHER, but simply identify ACL_MASK
+	 * and ACL_GROUP_OBJ for use after we know whether ACL_MASK is
+	 * present.
+	 */
+	acl_mask = NULL;
+	acl_group_obj = NULL;
+	for (i = 0; i < acl->acl_cnt; i++) {
+		switch (acl->acl_entry[i].ae_tag) {
+		case ACL_USER_OBJ:
+			acl->acl_entry[i].ae_perm = acl_posix1e_mode_to_perm(
+			    ACL_USER_OBJ, ip->i_mode);
+			acl->acl_entry[i].ae_id = ACL_UNDEFINED_ID;
+			break;
+	
+		case ACL_GROUP_OBJ:
+			acl_group_obj = &acl->acl_entry[i];
+			acl->acl_entry[i].ae_id = ACL_UNDEFINED_ID;
+			break;
+
+		case ACL_OTHER:
+			acl->acl_entry[i].ae_perm = acl_posix1e_mode_to_perm(
+			    ACL_OTHER, ip->i_mode);
+			acl->acl_entry[i].ae_id = ACL_UNDEFINED_ID;
+			break;
+
+		case ACL_MASK:
+			acl_mask = &acl->acl_entry[i];
+			acl->acl_entry[i].ae_id = ACL_UNDEFINED_ID;
+			break;
+
+		case ACL_USER:
+		case ACL_GROUP:
+			break;
+	
+		default:
+			panic("ufs_sync_acl_from_inode(): bad ae_tag");
+		}
+	}
+
+	if (acl_group_obj == NULL)
+		panic("ufs_sync_acl_from_inode(): no ACL_GROUP_OBJ");
+
+	if (acl_mask == NULL) {
+		/*
+		 * There is no ACL_MASK, so update ACL_GROUP_OBJ.
+		 */
+		acl_group_obj->ae_perm = acl_posix1e_mode_to_perm(
+		    ACL_GROUP_OBJ, ip->i_mode);
+	} else {
+		/*
+		 * Update the ACL_MASK entry instead of ACL_GROUP_OBJ.
+		 */
+		acl_mask->ae_perm = acl_posix1e_mode_to_perm(ACL_GROUP_OBJ,
+		    ip->i_mode);
+	}
+}
+
+/*
+ * Calculate what the inode mode should look like based on an authoritative
+ * ACL for the inode.  Replace only the fields in the inode that the ACL
+ * can represent.
+ */
+void
+ufs_sync_inode_from_acl(struct acl *acl, struct inode *ip)
+{
+
+	ip->i_mode &= ACL_PRESERVE_MASK;
+	ip->i_mode |= acl_posix1e_acl_to_mode(acl);
+	DIP_SET(ip, i_mode, ip->i_mode);
+}
+
+/*
+ * Retrieve NFSv4 ACL, skipping access checks.  Must be used in UFS code
+ * instead of VOP_GETACL() when we don't want to be restricted by the user
+ * not having ACL_READ_ACL permission, e.g. when calculating inherited ACL
+ * or in ufs_vnops.c:ufs_accessx().
+ */
+int
+ufs_getacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td)
+{
+	int error, len;
+	struct inode *ip = VTOI(vp);
+
+	len = sizeof(*aclp);
+	bzero(aclp, len);
+
+	error = vn_extattr_get(vp, IO_NODELOCKED,
+	    NFS4_ACL_EXTATTR_NAMESPACE, NFS4_ACL_EXTATTR_NAME,
+	    &len, (char *) aclp, td);
+	aclp->acl_maxcnt = ACL_MAX_ENTRIES;
+	if (error == ENOATTR) {
+		/*
+		 * Legitimately no ACL set on object, purely
+		 * emulate it through the inode.
+		 */
+		acl_nfs4_sync_acl_from_mode(aclp, ip->i_mode, ip->i_uid);
+
+		return (0);
+	}
+
+	if (error)
+		return (error);
+
+	if (len != sizeof(*aclp)) {
+		/*
+		 * A short (or long) read, meaning that for
+		 * some reason the ACL is corrupted.  Return
+		 * EPERM since the object DAC protections
+		 * are unsafe.
+		 */
+		printf("ufs_getacl_nfs4(): Loaded invalid ACL ("
+		    "%d bytes), inumber %ju on %s\n", len,
+		    (uintmax_t)ip->i_number, ITOFS(ip)->fs_fsmnt);
+
+		return (EPERM);
+	}
+
+	error = acl_nfs4_check(aclp, vp->v_type == VDIR);
+	if (error) {
+		printf("ufs_getacl_nfs4(): Loaded invalid ACL "
+		    "(failed acl_nfs4_check), inumber %ju on %s\n",
+		    (uintmax_t)ip->i_number, ITOFS(ip)->fs_fsmnt);
+
+		return (EPERM);
+	}
+
+	return (0);
+}
+
+static int
+ufs_getacl_nfs4(struct vop_getacl_args *ap)
+{
+	int error;
+
+	if ((ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS) == 0)
+		return (EINVAL);
+
+	error = VOP_ACCESSX(ap->a_vp, VREAD_ACL, ap->a_td->td_ucred, ap->a_td);
+	if (error)
+		return (error);
+
+	error = ufs_getacl_nfs4_internal(ap->a_vp, ap->a_aclp, ap->a_td);
+
+	return (error);
+}
+
+/*
+ * Read POSIX.1e ACL from an EA.  Return error if its not found
+ * or if any other error has occurred.
+ */
+static int
+ufs_get_oldacl(acl_type_t type, struct oldacl *old, struct vnode *vp,
+    struct thread *td)
+{
+	int error, len;
+	struct inode *ip = VTOI(vp);
+
+	len = sizeof(*old);
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		error = vn_extattr_get(vp, IO_NODELOCKED,
+		    POSIX1E_ACL_ACCESS_EXTATTR_NAMESPACE,
+		    POSIX1E_ACL_ACCESS_EXTATTR_NAME, &len, (char *) old,
+		    td);
+		break;
+	case ACL_TYPE_DEFAULT:
+		if (vp->v_type != VDIR)
+			return (EINVAL);
+		error = vn_extattr_get(vp, IO_NODELOCKED,
+		    POSIX1E_ACL_DEFAULT_EXTATTR_NAMESPACE,
+		    POSIX1E_ACL_DEFAULT_EXTATTR_NAME, &len, (char *) old,
+		    td);
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	if (error != 0)
+		return (error);
+
+	if (len != sizeof(*old)) {
+		/*
+		 * A short (or long) read, meaning that for some reason
+		 * the ACL is corrupted.  Return EPERM since the object
+		 * DAC protections are unsafe.
+		 */
+		printf("ufs_get_oldacl(): Loaded invalid ACL "
+		    "(len = %d), inumber %ju on %s\n", len,
+		    (uintmax_t)ip->i_number, ITOFS(ip)->fs_fsmnt);
+		return (EPERM);
+	}
+
+	return (0);
+}
+
+/*
+ * Retrieve the ACL on a file.
+ *
+ * As part of the ACL is stored in the inode, and the rest in an EA,
+ * assemble both into a final ACL product.  Right now this is not done
+ * very efficiently.
+ */
+static int
+ufs_getacl_posix1e(struct vop_getacl_args *ap)
+{
+	struct inode *ip = VTOI(ap->a_vp);
+	int error;
+	struct oldacl *old;
+
+	/*
+	 * XXX: If ufs_getacl() should work on file systems not supporting
+	 * ACLs, remove this check.
+	 */
+	if ((ap->a_vp->v_mount->mnt_flag & MNT_ACLS) == 0)
+		return (EINVAL);
+
+	old = malloc(sizeof(*old), M_ACL, M_WAITOK | M_ZERO);
+
+	/*
+	 * Attempt to retrieve the ACL from the extended attributes.
+	 */
+	error = ufs_get_oldacl(ap->a_type, old, ap->a_vp, ap->a_td);
+	switch (error) {
+	/*
+	 * XXX: If ufs_getacl() should work on filesystems
+	 * without the EA configured, add case EOPNOTSUPP here.
+	 */
+	case ENOATTR:
+		switch (ap->a_type) {
+		case ACL_TYPE_ACCESS:
+			/*
+			 * Legitimately no ACL set on object, purely
+			 * emulate it through the inode.  These fields will
+			 * be updated when the ACL is synchronized with
+			 * the inode later.
+			 */
+			old->acl_cnt = 3;
+			old->acl_entry[0].ae_tag = ACL_USER_OBJ;
+			old->acl_entry[0].ae_id = ACL_UNDEFINED_ID;
+			old->acl_entry[0].ae_perm = ACL_PERM_NONE;
+			old->acl_entry[1].ae_tag = ACL_GROUP_OBJ;
+			old->acl_entry[1].ae_id = ACL_UNDEFINED_ID;
+			old->acl_entry[1].ae_perm = ACL_PERM_NONE;
+			old->acl_entry[2].ae_tag = ACL_OTHER;
+			old->acl_entry[2].ae_id = ACL_UNDEFINED_ID;
+			old->acl_entry[2].ae_perm = ACL_PERM_NONE;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			/*
+			 * Unlike ACL_TYPE_ACCESS, there is no relationship
+			 * between the inode contents and the ACL, and it is
+			 * therefore possible for the request for the ACL
+			 * to fail since the ACL is undefined.  In this
+			 * situation, return success and an empty ACL,
+			 * as required by POSIX.1e.
+			 */
+			old->acl_cnt = 0;
+			break;
+		}
+		/* FALLTHROUGH */
+	case 0:
+		error = acl_copy_oldacl_into_acl(old, ap->a_aclp);
+		if (error != 0)
+			break;
+
+		if (ap->a_type == ACL_TYPE_ACCESS)
+			ufs_sync_acl_from_inode(ip, ap->a_aclp);
+	default:
+		break;
+	}
+
+	free(old, M_ACL);
+	return (error);
+}
+
+int
+ufs_getacl(ap)
+	struct vop_getacl_args /* {
+		struct vnode *vp;
+		acl_type_t type;
+		struct acl *aclp;
+		struct ucred *cred;
+		struct thread *td;
+	} */ *ap;
+{
+
+	if ((ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) == 0)
+		return (EOPNOTSUPP);
+
+	if (ap->a_type == ACL_TYPE_NFS4)
+		return (ufs_getacl_nfs4(ap));
+
+	return (ufs_getacl_posix1e(ap));
+}
+
+/*
+ * Set NFSv4 ACL without doing any access checking.  This is required
+ * e.g. by the UFS code that implements ACL inheritance, or from
+ * ufs_vnops.c:ufs_chmod(), as some of the checks have to be skipped
+ * in that case, and others are redundant.
+ */
+int
+ufs_setacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td)
+{
+	int error;
+	mode_t mode;
+	struct inode *ip = VTOI(vp);
+
+	KASSERT(acl_nfs4_check(aclp, vp->v_type == VDIR) == 0,
+	    ("invalid ACL passed to ufs_setacl_nfs4_internal"));
+
+	if (acl_nfs4_is_trivial(aclp, ip->i_uid)) {
+		error = vn_extattr_rm(vp, IO_NODELOCKED,
+		    NFS4_ACL_EXTATTR_NAMESPACE, NFS4_ACL_EXTATTR_NAME, td);
+
+		/*
+		 * An attempt to remove ACL from a file that didn't have
+		 * any extended entries is not an error.
+		 */
+		if (error == ENOATTR)
+			error = 0;
+
+	} else {
+		error = vn_extattr_set(vp, IO_NODELOCKED,
+		    NFS4_ACL_EXTATTR_NAMESPACE, NFS4_ACL_EXTATTR_NAME,
+		    sizeof(*aclp), (char *) aclp, td);
+	}
+
+	/*
+	 * Map lack of attribute definition in UFS_EXTATTR into lack of
+	 * support for ACLs on the filesystem.
+	 */
+	if (error == ENOATTR)
+		return (EOPNOTSUPP);
+
+	if (error)
+		return (error);
+
+	mode = ip->i_mode;
+
+	acl_nfs4_sync_mode_from_acl(&mode, aclp);
+
+	ip->i_mode &= ACL_PRESERVE_MASK;
+	ip->i_mode |= mode;
+	DIP_SET(ip, i_mode, ip->i_mode);
+	ip->i_flag |= IN_CHANGE;
+
+	VN_KNOTE_UNLOCKED(vp, NOTE_ATTRIB);
+
+	error = UFS_UPDATE(vp, 0);
+	return (error);
+}
+
+static int
+ufs_setacl_nfs4(struct vop_setacl_args *ap)
+{
+	int error;
+	struct inode *ip = VTOI(ap->a_vp);
+
+	if ((ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS) == 0)
+		return (EINVAL);
+
+	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (EROFS);
+
+	if (ap->a_aclp == NULL)
+		return (EINVAL);
+
+	error = VOP_ACLCHECK(ap->a_vp, ap->a_type, ap->a_aclp, ap->a_cred,
+	    ap->a_td);
+	if (error)
+		return (error);
+
+	/*
+	 * Authorize the ACL operation.
+	 */
+	if (ip->i_flags & (IMMUTABLE | APPEND))
+		return (EPERM);
+
+	/*
+	 * Must hold VWRITE_ACL or have appropriate privilege.
+	 */
+	if ((error = VOP_ACCESSX(ap->a_vp, VWRITE_ACL, ap->a_cred, ap->a_td)))
+		return (error);
+
+	/*
+	 * With NFSv4 ACLs, chmod(2) may need to add additional entries.
+	 * Make sure it has enough room for that - splitting every entry
+	 * into two and appending "canonical six" entries at the end.
+	 */
+	if (ap->a_aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2)
+		return (ENOSPC);
+
+	error = ufs_setacl_nfs4_internal(ap->a_vp, ap->a_aclp, ap->a_td);
+
+	return (error);
+}
+
+/*
+ * Set the ACL on a file.
+ *
+ * As part of the ACL is stored in the inode, and the rest in an EA,
+ * this is necessarily non-atomic, and has complex authorization.
+ * As ufs_setacl() includes elements of ufs_chown() and ufs_chmod(),
+ * a fair number of different access checks may be required to go ahead
+ * with the operation at all.
+ */
+static int
+ufs_setacl_posix1e(struct vop_setacl_args *ap)
+{
+	struct inode *ip = VTOI(ap->a_vp);
+	int error;
+	struct oldacl *old;
+
+	if ((ap->a_vp->v_mount->mnt_flag & MNT_ACLS) == 0)
+		return (EINVAL);
+
+	/*
+	 * If this is a set operation rather than a delete operation,
+	 * invoke VOP_ACLCHECK() on the passed ACL to determine if it is
+	 * valid for the target.  This will include a check on ap->a_type.
+	 */
+	if (ap->a_aclp != NULL) {
+		/*
+		 * Set operation.
+		 */
+		error = VOP_ACLCHECK(ap->a_vp, ap->a_type, ap->a_aclp,
+		    ap->a_cred, ap->a_td);
+		if (error != 0)
+			return (error);
+	} else {
+		/*
+		 * Delete operation.
+		 * POSIX.1e allows only deletion of the default ACL on a
+		 * directory (ACL_TYPE_DEFAULT).
+		 */
+		if (ap->a_type != ACL_TYPE_DEFAULT)
+			return (EINVAL);
+		if (ap->a_vp->v_type != VDIR)
+			return (ENOTDIR);
+	}
+
+	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (EROFS);
+
+	/*
+	 * Authorize the ACL operation.
+	 */
+	if (ip->i_flags & (IMMUTABLE | APPEND))
+		return (EPERM);
+
+	/*
+	 * Must hold VADMIN (be file owner) or have appropriate privilege.
+	 */
+	if ((error = VOP_ACCESS(ap->a_vp, VADMIN, ap->a_cred, ap->a_td)))
+		return (error);
+
+	switch(ap->a_type) {
+	case ACL_TYPE_ACCESS:
+		old = malloc(sizeof(*old), M_ACL, M_WAITOK | M_ZERO);
+		error = acl_copy_acl_into_oldacl(ap->a_aclp, old);
+		if (error == 0) {
+			error = vn_extattr_set(ap->a_vp, IO_NODELOCKED,
+			    POSIX1E_ACL_ACCESS_EXTATTR_NAMESPACE,
+			    POSIX1E_ACL_ACCESS_EXTATTR_NAME, sizeof(*old),
+			    (char *) old, ap->a_td);
+		}
+		free(old, M_ACL);
+		break;
+
+	case ACL_TYPE_DEFAULT:
+		if (ap->a_aclp == NULL) {
+			error = vn_extattr_rm(ap->a_vp, IO_NODELOCKED,
+			    POSIX1E_ACL_DEFAULT_EXTATTR_NAMESPACE,
+			    POSIX1E_ACL_DEFAULT_EXTATTR_NAME, ap->a_td);
+			/*
+			 * Attempting to delete a non-present default ACL
+			 * will return success for portability purposes.
+			 * (TRIX)
+			 *
+			 * XXX: Note that since we can't distinguish
+			 * "that EA is not supported" from "that EA is not
+			 * defined", the success case here overlaps the
+			 * the ENOATTR->EOPNOTSUPP case below.
+		 	 */
+			if (error == ENOATTR)
+				error = 0;
+		} else {
+			old = malloc(sizeof(*old), M_ACL, M_WAITOK | M_ZERO);
+			error = acl_copy_acl_into_oldacl(ap->a_aclp, old);
+			if (error == 0) {
+				error = vn_extattr_set(ap->a_vp, IO_NODELOCKED,
+				    POSIX1E_ACL_DEFAULT_EXTATTR_NAMESPACE,
+				    POSIX1E_ACL_DEFAULT_EXTATTR_NAME,
+				    sizeof(*old), (char *) old, ap->a_td);
+			}
+			free(old, M_ACL);
+		}
+		break;
+
+	default:
+		error = EINVAL;
+	}
+	/*
+	 * Map lack of attribute definition in UFS_EXTATTR into lack of
+	 * support for ACLs on the filesystem.
+	 */
+	if (error == ENOATTR)
+		return (EOPNOTSUPP);
+	if (error != 0)
+		return (error);
+
+	if (ap->a_type == ACL_TYPE_ACCESS) {
+		/*
+		 * Now that the EA is successfully updated, update the
+		 * inode and mark it as changed.
+		 */
+		ufs_sync_inode_from_acl(ap->a_aclp, ip);
+		ip->i_flag |= IN_CHANGE;
+		error = UFS_UPDATE(ap->a_vp, 0);
+	}
+
+	VN_KNOTE_UNLOCKED(ap->a_vp, NOTE_ATTRIB);
+	return (error);
+}
+
+int
+ufs_setacl(ap)
+	struct vop_setacl_args /* {
+		struct vnode *vp;
+		acl_type_t type;
+		struct acl *aclp;
+		struct ucred *cred;
+		struct thread *td;
+	} */ *ap;
+{
+	if ((ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) == 0)
+		return (EOPNOTSUPP);
+
+	if (ap->a_type == ACL_TYPE_NFS4)
+		return (ufs_setacl_nfs4(ap));
+
+	return (ufs_setacl_posix1e(ap));
+}
+
+static int
+ufs_aclcheck_nfs4(struct vop_aclcheck_args *ap)
+{
+	int is_directory = 0;
+
+	if ((ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS) == 0)
+		return (EINVAL);
+
+	/*
+	 * With NFSv4 ACLs, chmod(2) may need to add additional entries.
+	 * Make sure it has enough room for that - splitting every entry
+	 * into two and appending "canonical six" entries at the end.
+	 */
+	if (ap->a_aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2)
+		return (ENOSPC);
+
+	if (ap->a_vp->v_type == VDIR)
+		is_directory = 1;
+
+	return (acl_nfs4_check(ap->a_aclp, is_directory));
+}
+
+static int
+ufs_aclcheck_posix1e(struct vop_aclcheck_args *ap)
+{
+
+	if ((ap->a_vp->v_mount->mnt_flag & MNT_ACLS) == 0)
+		return (EINVAL);
+
+	/*
+	 * Verify we understand this type of ACL, and that it applies
+	 * to this kind of object.
+	 * Rely on the acl_posix1e_check() routine to verify the contents.
+	 */
+	switch(ap->a_type) {
+	case ACL_TYPE_ACCESS:
+		break;
+
+	case ACL_TYPE_DEFAULT:
+		if (ap->a_vp->v_type != VDIR)
+			return (EINVAL);
+		break;
+
+	default:
+		return (EINVAL);
+	}
+
+	if (ap->a_aclp->acl_cnt > OLDACL_MAX_ENTRIES)
+		return (EINVAL);
+
+	return (acl_posix1e_check(ap->a_aclp));
+}
+
+/*
+ * Check the validity of an ACL for a file.
+ */
+int
+ufs_aclcheck(ap)
+	struct vop_aclcheck_args /* {
+		struct vnode *vp;
+		acl_type_t type;
+		struct acl *aclp;
+		struct ucred *cred;
+		struct thread *td;
+	} */ *ap;
+{
+
+	if ((ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) == 0)
+		return (EOPNOTSUPP);
+
+	if (ap->a_type == ACL_TYPE_NFS4)
+		return (ufs_aclcheck_nfs4(ap));
+
+	return (ufs_aclcheck_posix1e(ap));
+}
+
+#endif /* !UFS_ACL */
diff --git a/Dump/ufs/ufs/ufs_bmap.c b/Dump/ufs/ufs/ufs_bmap.c
new file mode 100644
index 0000000..501529d
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_bmap.c
@@ -0,0 +1,384 @@
+/*-
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_bmap.c	8.7 (Berkeley) 3/21/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ufs/ufs_bmap.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/racct.h>
+#include <sys/resourcevar.h>
+#include <sys/stat.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Bmap converts the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ */
+int
+ufs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t a_bn;
+		struct bufobj **a_bop;
+		daddr_t *a_bnp;
+		int *a_runp;
+		int *a_runb;
+	} */ *ap;
+{
+	ufs2_daddr_t blkno;
+	int error;
+
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (ap->a_bop != NULL)
+		*ap->a_bop = &VFSTOUFS(ap->a_vp->v_mount)->um_devvp->v_bufobj;
+	if (ap->a_bnp == NULL)
+		return (0);
+
+	error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno, NULL,
+	    ap->a_runp, ap->a_runb);
+	*ap->a_bnp = blkno;
+	return (error);
+}
+
+/*
+ * Indirect blocks are now on the vnode for the file.  They are given negative
+ * logical block numbers.  Indirect blocks are addressed by the negative
+ * address of the first data block to which they point.  Double indirect blocks
+ * are addressed by one less than the address of the first indirect block to
+ * which they point.  Triple indirect blocks are addressed by one less than
+ * the address of the first double indirect block to which they point.
+ *
+ * ufs_bmaparray does the bmap conversion, and if requested returns the
+ * array of logical blocks which must be traversed to get to a block.
+ * Each entry contains the offset into that block that gets you to the
+ * next block and the disk address of the block (if it is assigned).
+ */
+
+int
+ufs_bmaparray(vp, bn, bnp, nbp, runp, runb)
+	struct vnode *vp;
+	ufs2_daddr_t bn;
+	ufs2_daddr_t *bnp;
+	struct buf *nbp;
+	int *runp;
+	int *runb;
+{
+	struct inode *ip;
+	struct buf *bp;
+	struct ufsmount *ump;
+	struct mount *mp;
+	struct indir a[NIADDR+1], *ap;
+	ufs2_daddr_t daddr;
+	ufs_lbn_t metalbn;
+	int error, num, maxrun = 0;
+	int *nump;
+
+	ap = NULL;
+	ip = VTOI(vp);
+	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+
+	if (runp) {
+		maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
+		*runp = 0;
+	}
+
+	if (runb) {
+		*runb = 0;
+	}
+
+
+	ap = a;
+	nump = &num;
+	error = ufs_getlbns(vp, bn, ap, nump);
+	if (error)
+		return (error);
+
+	num = *nump;
+	if (num == 0) {
+		if (bn >= 0 && bn < NDADDR) {
+			*bnp = blkptrtodb(ump, DIP(ip, i_db[bn]));
+		} else if (bn < 0 && bn >= -NXADDR) {
+			*bnp = blkptrtodb(ump, ip->i_din2->di_extb[-1 - bn]);
+			if (*bnp == 0)
+				*bnp = -1;
+			if (nbp == NULL)
+				panic("ufs_bmaparray: mapping ext data");
+			nbp->b_xflags |= BX_ALTDATA;
+			return (0);
+		} else {
+			panic("ufs_bmaparray: blkno out of range");
+		}
+		/*
+		 * Since this is FFS independent code, we are out of
+		 * scope for the definitions of BLK_NOCOPY and
+		 * BLK_SNAP, but we do know that they will fall in
+		 * the range 1..um_seqinc, so we use that test and
+		 * return a request for a zeroed out buffer if attempts
+		 * are made to read a BLK_NOCOPY or BLK_SNAP block.
+		 */
+		if ((ip->i_flags & SF_SNAPSHOT) && DIP(ip, i_db[bn]) > 0 &&
+		    DIP(ip, i_db[bn]) < ump->um_seqinc) {
+			*bnp = -1;
+		} else if (*bnp == 0) {
+			if (ip->i_flags & SF_SNAPSHOT)
+				*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+			else
+				*bnp = -1;
+		} else if (runp) {
+			ufs2_daddr_t bnb = bn;
+			for (++bn; bn < NDADDR && *runp < maxrun &&
+			    is_sequential(ump, DIP(ip, i_db[bn - 1]),
+			    DIP(ip, i_db[bn]));
+			    ++bn, ++*runp);
+			bn = bnb;
+			if (runb && (bn > 0)) {
+				for (--bn; (bn >= 0) && (*runb < maxrun) &&
+					is_sequential(ump, DIP(ip, i_db[bn]),
+						DIP(ip, i_db[bn+1]));
+						--bn, ++*runb);
+			}
+		}
+		return (0);
+	}
+
+
+	/* Get disk address out of indirect block array */
+	daddr = DIP(ip, i_ib[ap->in_off]);
+
+	for (bp = NULL, ++ap; --num; ++ap) {
+		/*
+		 * Exit the loop if there is no disk address assigned yet and
+		 * the indirect block isn't in the cache, or if we were
+		 * looking for an indirect block and we've found it.
+		 */
+
+		metalbn = ap->in_lbn;
+		if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn)
+			break;
+		/*
+		 * If we get here, we've either got the block in the cache
+		 * or we have a disk address for it, go fetch it.
+		 */
+		if (bp)
+			bqrelse(bp);
+
+		bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0, 0);
+		if ((bp->b_flags & B_CACHE) == 0) {
+#ifdef INVARIANTS
+			if (!daddr)
+				panic("ufs_bmaparray: indirect block not in cache");
+#endif
+			bp->b_blkno = blkptrtodb(ump, daddr);
+			bp->b_iocmd = BIO_READ;
+			bp->b_flags &= ~B_INVAL;
+			bp->b_ioflags &= ~BIO_ERROR;
+			vfs_busy_pages(bp, 0);
+			bp->b_iooffset = dbtob(bp->b_blkno);
+			bstrategy(bp);
+#ifdef RACCT
+			if (racct_enable) {
+				PROC_LOCK(curproc);
+				racct_add_buf(curproc, bp, 0);
+				PROC_UNLOCK(curproc);
+			}
+#endif /* RACCT */
+			curthread->td_ru.ru_inblock++;
+			error = bufwait(bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+		}
+
+		if (I_IS_UFS1(ip)) {
+			daddr = ((ufs1_daddr_t *)bp->b_data)[ap->in_off];
+			if (num == 1 && daddr && runp) {
+				for (bn = ap->in_off + 1;
+				    bn < MNINDIR(ump) && *runp < maxrun &&
+				    is_sequential(ump,
+				    ((ufs1_daddr_t *)bp->b_data)[bn - 1],
+				    ((ufs1_daddr_t *)bp->b_data)[bn]);
+				    ++bn, ++*runp);
+				bn = ap->in_off;
+				if (runb && bn) {
+					for (--bn; bn >= 0 && *runb < maxrun &&
+					    is_sequential(ump,
+					    ((ufs1_daddr_t *)bp->b_data)[bn],
+					    ((ufs1_daddr_t *)bp->b_data)[bn+1]);
+					    --bn, ++*runb);
+				}
+			}
+			continue;
+		}
+		daddr = ((ufs2_daddr_t *)bp->b_data)[ap->in_off];
+		if (num == 1 && daddr && runp) {
+			for (bn = ap->in_off + 1;
+			    bn < MNINDIR(ump) && *runp < maxrun &&
+			    is_sequential(ump,
+			    ((ufs2_daddr_t *)bp->b_data)[bn - 1],
+			    ((ufs2_daddr_t *)bp->b_data)[bn]);
+			    ++bn, ++*runp);
+			bn = ap->in_off;
+			if (runb && bn) {
+				for (--bn; bn >= 0 && *runb < maxrun &&
+				    is_sequential(ump,
+				    ((ufs2_daddr_t *)bp->b_data)[bn],
+				    ((ufs2_daddr_t *)bp->b_data)[bn + 1]);
+				    --bn, ++*runb);
+			}
+		}
+	}
+	if (bp)
+		bqrelse(bp);
+
+	/*
+	 * Since this is FFS independent code, we are out of scope for the
+	 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
+	 * will fall in the range 1..um_seqinc, so we use that test and
+	 * return a request for a zeroed out buffer if attempts are made
+	 * to read a BLK_NOCOPY or BLK_SNAP block.
+	 */
+	if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){
+		*bnp = -1;
+		return (0);
+	}
+	*bnp = blkptrtodb(ump, daddr);
+	if (*bnp == 0) {
+		if (ip->i_flags & SF_SNAPSHOT)
+			*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+		else
+			*bnp = -1;
+	}
+	return (0);
+}
+
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block.  The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+int
+ufs_getlbns(vp, bn, ap, nump)
+	struct vnode *vp;
+	ufs2_daddr_t bn;
+	struct indir *ap;
+	int *nump;
+{
+	ufs2_daddr_t blockcnt;
+	ufs_lbn_t metalbn, realbn;
+	struct ufsmount *ump;
+	int i, numlevels, off;
+
+	ump = VFSTOUFS(vp->v_mount);
+	if (nump)
+		*nump = 0;
+	numlevels = 0;
+	realbn = bn;
+	if (bn < 0)
+		bn = -bn;
+
+	/* The first NDADDR blocks are direct blocks. */
+	if (bn < NDADDR)
+		return (0);
+
+	/*
+	 * Determine the number of levels of indirection.  After this loop
+	 * is done, blockcnt indicates the number of data blocks possible
+	 * at the previous level of indirection, and NIADDR - i is the number
+	 * of levels of indirection needed to locate the requested block.
+	 */
+	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+		if (i == 0)
+			return (EFBIG);
+		blockcnt *= MNINDIR(ump);
+		if (bn < blockcnt)
+			break;
+	}
+
+	/* Calculate the address of the first meta-block. */
+	if (realbn >= 0)
+		metalbn = -(realbn - bn + NIADDR - i);
+	else
+		metalbn = -(-realbn - bn + NIADDR - i);
+
+	/*
+	 * At each iteration, off is the offset into the bap array which is
+	 * an array of disk addresses at the current level of indirection.
+	 * The logical block number and the offset in that block are stored
+	 * into the argument array.
+	 */
+	ap->in_lbn = metalbn;
+	ap->in_off = off = NIADDR - i;
+	ap++;
+	for (++numlevels; i <= NIADDR; i++) {
+		/* If searching for a meta-data block, quit when found. */
+		if (metalbn == realbn)
+			break;
+
+		blockcnt /= MNINDIR(ump);
+		off = (bn / blockcnt) % MNINDIR(ump);
+
+		++numlevels;
+		ap->in_lbn = metalbn;
+		ap->in_off = off;
+		++ap;
+
+		metalbn -= -1 + off * blockcnt;
+	}
+	if (nump)
+		*nump = numlevels;
+	return (0);
+}
diff --git a/Dump/ufs/ufs/ufs_dirhash.c b/Dump/ufs/ufs/ufs_dirhash.c
new file mode 100644
index 0000000..18f7cc9
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_dirhash.c
@@ -0,0 +1,1324 @@
+/*-
+ * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This implements a hash-based lookup scheme for UFS directories.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ufs/ufs_dirhash.c 326845 2017-12-14 11:41:12Z kib $");
+
+#include "opt_ufs.h"
+
+#ifdef UFS_DIRHASH
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/fnv_hash.h>
+#include <sys/proc.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/refcount.h>
+#include <sys/sysctl.h>
+#include <sys/sx.h>
+#include <sys/eventhandler.h>
+#include <sys/time.h>
+#include <vm/uma.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/dirhash.h>
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#define WRAPINCR(val, limit)	(((val) + 1 == (limit)) ? 0 : ((val) + 1))
+#define WRAPDECR(val, limit)	(((val) == 0) ? ((limit) - 1) : ((val) - 1))
+#define OFSFMT(vp)		((vp)->v_mount->mnt_maxsymlinklen <= 0)
+#define BLKFREE2IDX(n)		((n) > DH_NFSTATS ? DH_NFSTATS : (n))
+
+static MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables");
+
+static int ufs_mindirhashsize = DIRBLKSIZ * 5;
+SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW,
+    &ufs_mindirhashsize,
+    0, "minimum directory size in bytes for which to use hashed lookup");
+static int ufs_dirhashmaxmem = 2 * 1024 * 1024;	/* NOTE: initial value. It is
+						   tuned in ufsdirhash_init() */
+SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem,
+    0, "maximum allowed dirhash memory usage");
+static int ufs_dirhashmem;
+SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem,
+    0, "current dirhash memory usage");
+static int ufs_dirhashcheck = 0;
+SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck,
+    0, "enable extra sanity tests");
+static int ufs_dirhashlowmemcount = 0;
+SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_lowmemcount, CTLFLAG_RD, 
+    &ufs_dirhashlowmemcount, 0, "number of times low memory hook called");
+static int ufs_dirhashreclaimpercent = 10;
+static int ufsdirhash_set_reclaimpercent(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_vfs_ufs, OID_AUTO, dirhash_reclaimpercent,
+    CTLTYPE_INT | CTLFLAG_RW, 0, 0, ufsdirhash_set_reclaimpercent, "I",
+    "set percentage of dirhash cache to be removed in low VM events");
+
+
+static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen);
+static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff);
+static void ufsdirhash_delslot(struct dirhash *dh, int slot);
+static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen,
+	   doff_t offset);
+static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
+static int ufsdirhash_recycle(int wanted);
+static void ufsdirhash_lowmem(void);
+static void ufsdirhash_free_locked(struct inode *ip);
+
+static uma_zone_t	ufsdirhash_zone;
+
+#define DIRHASHLIST_LOCK() 		mtx_lock(&ufsdirhash_mtx)
+#define DIRHASHLIST_UNLOCK() 		mtx_unlock(&ufsdirhash_mtx)
+#define DIRHASH_BLKALLOC_WAITOK() 	uma_zalloc(ufsdirhash_zone, M_WAITOK)
+#define DIRHASH_BLKFREE(ptr) 		uma_zfree(ufsdirhash_zone, (ptr))
+#define	DIRHASH_ASSERT_LOCKED(dh)					\
+    sx_assert(&(dh)->dh_lock, SA_LOCKED)
+
+/* Dirhash list; recently-used entries are near the tail. */
+static TAILQ_HEAD(, dirhash) ufsdirhash_list;
+
+/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */
+static struct mtx	ufsdirhash_mtx;
+
+/*
+ * Locking:
+ *
+ * The relationship between inode and dirhash is protected either by an
+ * exclusive vnode lock or the vnode interlock where a shared vnode lock
+ * may be used.  The dirhash_mtx is acquired after the dirhash lock.  To
+ * handle teardown races, code wishing to lock the dirhash for an inode
+ * when using a shared vnode lock must obtain a private reference on the
+ * dirhash while holding the vnode interlock.  They can drop it once they
+ * have obtained the dirhash lock and verified that the dirhash wasn't
+ * recycled while they waited for the dirhash lock.
+ *
+ * ufsdirhash_build() acquires a shared lock on the dirhash when it is
+ * successful.  This lock is released after a call to ufsdirhash_lookup().
+ *
+ * Functions requiring exclusive access use ufsdirhash_acquire() which may
+ * free a dirhash structure that was recycled by ufsdirhash_recycle().
+ *
+ * The dirhash lock may be held across io operations.
+ *
+ * WITNESS reports a lock order reversal between the "bufwait" lock
+ * and the "dirhash" lock.  However, this specific reversal will not
+ * cause a deadlock.  To get a deadlock, one would have to lock a
+ * buffer followed by the dirhash while a second thread locked a
+ * buffer while holding the dirhash lock.  The second order can happen
+ * under a shared or exclusive vnode lock for the associated directory
+ * in lookup().  The first order, however, can only happen under an
+ * exclusive vnode lock (e.g. unlink(), rename(), etc.).  Thus, for
+ * a thread to be doing a "bufwait" -> "dirhash" order, it has to hold
+ * an exclusive vnode lock.  That exclusive vnode lock will prevent
+ * any other threads from doing a "dirhash" -> "bufwait" order.
+ */
+
+static void
+ufsdirhash_hold(struct dirhash *dh)
+{
+
+	refcount_acquire(&dh->dh_refcount);
+}
+
+static void
+ufsdirhash_drop(struct dirhash *dh)
+{
+
+	if (refcount_release(&dh->dh_refcount)) {
+		sx_destroy(&dh->dh_lock);
+		free(dh, M_DIRHASH);
+	}
+}
+
+/*
+ * Release the lock on a dirhash.
+ */
+static void
+ufsdirhash_release(struct dirhash *dh)
+{
+
+	sx_unlock(&dh->dh_lock);
+}
+
+/*
+ * Either acquire an existing hash locked shared or create a new hash and
+ * return it exclusively locked.  May return NULL if the allocation fails.
+ *
+ * The vnode interlock is used to protect the i_dirhash pointer from
+ * simultaneous access while only a shared vnode lock is held.
+ */
+static struct dirhash *
+ufsdirhash_create(struct inode *ip)
+{
+	struct dirhash *ndh;
+	struct dirhash *dh;
+	struct vnode *vp;
+	bool excl;
+
+	ndh = dh = NULL;
+	vp = ip->i_vnode;
+	excl = false;
+	for (;;) {
+		/* Racy check for i_dirhash to prefetch a dirhash structure. */
+		if (ip->i_dirhash == NULL && ndh == NULL) {
+			ndh = malloc(sizeof *dh, M_DIRHASH,
+			    M_NOWAIT | M_ZERO);
+			if (ndh == NULL)
+				return (NULL);
+			refcount_init(&ndh->dh_refcount, 1);
+
+			/*
+			 * The DUPOK is to prevent warnings from the
+			 * sx_slock() a few lines down which is safe
+			 * since the duplicate lock in that case is
+			 * the one for this dirhash we are creating
+			 * now which has no external references until
+			 * after this function returns.
+			 */
+			sx_init_flags(&ndh->dh_lock, "dirhash", SX_DUPOK);
+			sx_xlock(&ndh->dh_lock);
+		}
+		/*
+		 * Check i_dirhash.  If it's NULL just try to use a
+		 * preallocated structure.  If none exists loop and try again.
+		 */
+		VI_LOCK(vp);
+		dh = ip->i_dirhash;
+		if (dh == NULL) {
+			ip->i_dirhash = ndh;
+			VI_UNLOCK(vp);
+			if (ndh == NULL)
+				continue;
+			return (ndh);
+		}
+		ufsdirhash_hold(dh);
+		VI_UNLOCK(vp);
+
+		/* Acquire a lock on existing hashes. */
+		if (excl)
+			sx_xlock(&dh->dh_lock);
+		else
+			sx_slock(&dh->dh_lock);
+
+		/* The hash could've been recycled while we were waiting. */
+		VI_LOCK(vp);
+		if (ip->i_dirhash != dh) {
+			VI_UNLOCK(vp);
+			ufsdirhash_release(dh);
+			ufsdirhash_drop(dh);
+			continue;
+		}
+		VI_UNLOCK(vp);
+		ufsdirhash_drop(dh);
+
+		/* If the hash is still valid we've succeeded. */
+		if (dh->dh_hash != NULL)
+			break;
+		/*
+		 * If the hash is NULL it has been recycled.  Try to upgrade
+		 * so we can recreate it.  If we fail the upgrade, drop our
+		 * lock and try again.
+		 */
+		if (excl || sx_try_upgrade(&dh->dh_lock))
+			break;
+		sx_sunlock(&dh->dh_lock);
+		excl = true;
+	}
+	/* Free the preallocated structure if it was not necessary. */
+	if (ndh) {
+		ufsdirhash_release(ndh);
+		ufsdirhash_drop(ndh);
+	}
+	return (dh);
+}
+
+/*
+ * Acquire an exclusive lock on an existing hash.  Requires an exclusive
+ * vnode lock to protect the i_dirhash pointer.  hashes that have been
+ * recycled are reclaimed here and NULL is returned.
+ */
+static struct dirhash *
+ufsdirhash_acquire(struct inode *ip)
+{
+	struct dirhash *dh;
+
+	ASSERT_VOP_ELOCKED(ip->i_vnode, __FUNCTION__);
+
+	dh = ip->i_dirhash;
+	if (dh == NULL)
+		return (NULL);
+	sx_xlock(&dh->dh_lock);
+	if (dh->dh_hash != NULL)
+		return (dh);
+	ufsdirhash_free_locked(ip);
+	return (NULL);
+}
+
+/*
+ * Acquire exclusively and free the hash pointed to by ip.  Works with a
+ * shared or exclusive vnode lock.
+ */
+void
+ufsdirhash_free(struct inode *ip)
+{
+	struct dirhash *dh;
+	struct vnode *vp;
+
+	vp = ip->i_vnode;
+	for (;;) {
+		/* Grab a reference on this inode's dirhash if it has one. */
+		VI_LOCK(vp);
+		dh = ip->i_dirhash;
+		if (dh == NULL) {
+			VI_UNLOCK(vp);
+			return;
+		}
+		ufsdirhash_hold(dh);
+		VI_UNLOCK(vp);
+
+		/* Exclusively lock the dirhash. */
+		sx_xlock(&dh->dh_lock);
+
+		/* If this dirhash still belongs to this inode, then free it. */
+		VI_LOCK(vp);
+		if (ip->i_dirhash == dh) {
+			VI_UNLOCK(vp);
+			ufsdirhash_drop(dh);
+			break;
+		}
+		VI_UNLOCK(vp);
+
+		/*
+		 * This inode's dirhash has changed while we were
+		 * waiting for the dirhash lock, so try again.
+		 */
+		ufsdirhash_release(dh);
+		ufsdirhash_drop(dh);
+	}
+	ufsdirhash_free_locked(ip);
+}
+
+/*
+ * Attempt to build up a hash table for the directory contents in
+ * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
+ */
+int
+ufsdirhash_build(struct inode *ip)
+{
+	struct dirhash *dh;
+	struct buf *bp = NULL;
+	struct direct *ep;
+	struct vnode *vp;
+	doff_t bmask, pos;
+	int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
+
+	/* Take care of a decreased sysctl value. */
+	while (ufs_dirhashmem > ufs_dirhashmaxmem) {
+		if (ufsdirhash_recycle(0) != 0)
+			return (-1);
+		/* Recycled enough memory, so unlock the list. */
+		DIRHASHLIST_UNLOCK();
+	}
+
+	/* Check if we can/should use dirhash. */
+	if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode) ||
+	    ip->i_effnlink == 0) {
+		if (ip->i_dirhash)
+			ufsdirhash_free(ip);
+		return (-1);
+	}
+	dh = ufsdirhash_create(ip);
+	if (dh == NULL)
+		return (-1);
+	if (dh->dh_hash != NULL)
+		return (0);
+
+	vp = ip->i_vnode;
+	/* Allocate 50% more entries than this dir size could ever need. */
+	KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size"));
+	nslots = ip->i_size / DIRECTSIZ(1);
+	nslots = (nslots * 3 + 1) / 2;
+	narrays = howmany(nslots, DH_NBLKOFF);
+	nslots = narrays * DH_NBLKOFF;
+	dirblocks = howmany(ip->i_size, DIRBLKSIZ);
+	nblocks = (dirblocks * 3 + 1) / 2;
+	memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
+	    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
+	    nblocks * sizeof(*dh->dh_blkfree);
+	DIRHASHLIST_LOCK();
+	if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
+		DIRHASHLIST_UNLOCK();
+		if (memreqd > ufs_dirhashmaxmem / 2)
+			goto fail;
+		/* Try to free some space. */
+		if (ufsdirhash_recycle(memreqd) != 0)
+			goto fail;
+		/* Enough was freed, and list has been locked. */
+	}
+	ufs_dirhashmem += memreqd;
+	DIRHASHLIST_UNLOCK();
+
+	/* Initialise the hash table and block statistics. */
+	dh->dh_memreq = memreqd;
+	dh->dh_narrays = narrays;
+	dh->dh_hlen = nslots;
+	dh->dh_nblk = nblocks;
+	dh->dh_dirblks = dirblocks;
+	for (i = 0; i < DH_NFSTATS; i++)
+		dh->dh_firstfree[i] = -1;
+	dh->dh_firstfree[DH_NFSTATS] = 0;
+	dh->dh_hused = 0;
+	dh->dh_seqoff = -1;
+	dh->dh_score = DH_SCOREINIT;
+	dh->dh_lastused = time_second;
+
+	/*
+	 * Use non-blocking mallocs so that we will revert to a linear
+	 * lookup on failure rather than potentially blocking forever.
+	 */
+	dh->dh_hash = malloc(narrays * sizeof(dh->dh_hash[0]),
+	    M_DIRHASH, M_NOWAIT | M_ZERO);
+	if (dh->dh_hash == NULL)
+		goto fail;
+	dh->dh_blkfree = malloc(nblocks * sizeof(dh->dh_blkfree[0]),
+	    M_DIRHASH, M_NOWAIT);
+	if (dh->dh_blkfree == NULL)
+		goto fail;
+	for (i = 0; i < narrays; i++) {
+		if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL)
+			goto fail;
+		for (j = 0; j < DH_NBLKOFF; j++)
+			dh->dh_hash[i][j] = DIRHASH_EMPTY;
+	}
+	for (i = 0; i < dirblocks; i++)
+		dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN;
+	bmask = vp->v_mount->mnt_stat.f_iosize - 1;
+	pos = 0;
+	while (pos < ip->i_size) {
+		/* If necessary, get the next directory block. */
+		if ((pos & bmask) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0)
+				goto fail;
+		}
+
+		/* Add this entry to the hash. */
+		ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
+		if (ep->d_reclen == 0 || ep->d_reclen >
+		    DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) {
+			/* Corrupted directory. */
+			brelse(bp);
+			goto fail;
+		}
+		if (ep->d_ino != 0) {
+			/* Add the entry (simplified ufsdirhash_add). */
+			slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
+			while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
+				slot = WRAPINCR(slot, dh->dh_hlen);
+			dh->dh_hused++;
+			DH_ENTRY(dh, slot) = pos;
+			ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep));
+		}
+		pos += ep->d_reclen;
+	}
+
+	if (bp != NULL)
+		brelse(bp);
+	DIRHASHLIST_LOCK();
+	TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
+	dh->dh_onlist = 1;
+	DIRHASHLIST_UNLOCK();
+	sx_downgrade(&dh->dh_lock);
+	return (0);
+
+fail:
+	ufsdirhash_free_locked(ip);
+	return (-1);
+}
+
+/*
+ * Free any hash table associated with inode 'ip'.
+ */
+static void
+ufsdirhash_free_locked(struct inode *ip)
+{
+	struct dirhash *dh;
+	struct vnode *vp;
+	int i;
+
+	DIRHASH_ASSERT_LOCKED(ip->i_dirhash);
+
+	/*
+	 * Clear the pointer in the inode to prevent new threads from
+	 * finding the dead structure.
+	 */
+	vp = ip->i_vnode;
+	VI_LOCK(vp);
+	dh = ip->i_dirhash;
+	ip->i_dirhash = NULL;
+	VI_UNLOCK(vp);
+
+	/*
+	 * Remove the hash from the list since we are going to free its
+	 * memory.
+	 */
+	DIRHASHLIST_LOCK();
+	if (dh->dh_onlist)
+		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
+	ufs_dirhashmem -= dh->dh_memreq;
+	DIRHASHLIST_UNLOCK();
+
+	/*
+	 * At this point, any waiters for the lock should hold their
+	 * own reference on the dirhash structure.  They will drop
+	 * that reference once they grab the vnode interlock and see
+	 * that ip->i_dirhash is NULL.
+	 */
+	sx_xunlock(&dh->dh_lock);
+
+	/*
+	 * Handle partially recycled as well as fully constructed hashes.
+	 */
+	if (dh->dh_hash != NULL) {
+		for (i = 0; i < dh->dh_narrays; i++)
+			if (dh->dh_hash[i] != NULL)
+				DIRHASH_BLKFREE(dh->dh_hash[i]);
+		free(dh->dh_hash, M_DIRHASH);
+		if (dh->dh_blkfree != NULL)
+			free(dh->dh_blkfree, M_DIRHASH);
+	}
+
+	/*
+	 * Drop the inode's reference to the data structure.
+	 */
+	ufsdirhash_drop(dh);
+}
+
+/*
+ * Find the offset of the specified name within the given inode.
+ * Returns 0 on success, ENOENT if the entry does not exist, or
+ * EJUSTRETURN if the caller should revert to a linear search.
+ *
+ * If successful, the directory offset is stored in *offp, and a
+ * pointer to a struct buf containing the entry is stored in *bpp. If
+ * prevoffp is non-NULL, the offset of the previous entry within
+ * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
+ * is the first in a block, the start of the block is used).
+ *
+ * Must be called with the hash locked.  Returns with the hash unlocked.
+ */
+int
+ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp,
+    struct buf **bpp, doff_t *prevoffp)
+{
+	struct dirhash *dh, *dh_next;
+	struct direct *dp;
+	struct vnode *vp;
+	struct buf *bp;
+	doff_t blkoff, bmask, offset, prevoff, seqoff;
+	int i, slot;
+	int error;
+
+	dh = ip->i_dirhash;
+	KASSERT(dh != NULL && dh->dh_hash != NULL,
+	    ("ufsdirhash_lookup: Invalid dirhash %p\n", dh));
+	DIRHASH_ASSERT_LOCKED(dh);
+	/*
+	 * Move this dirhash towards the end of the list if it has a
+	 * score higher than the next entry, and acquire the dh_lock.
+	 */
+	DIRHASHLIST_LOCK();
+	if (TAILQ_NEXT(dh, dh_list) != NULL) {
+		/*
+		 * If the new score will be greater than that of the next
+		 * entry, then move this entry past it. With both mutexes
+		 * held, dh_next won't go away, but its dh_score could
+		 * change; that's not important since it is just a hint.
+		 */
+		if ((dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
+		    dh->dh_score >= dh_next->dh_score) {
+			KASSERT(dh->dh_onlist, ("dirhash: not on list"));
+			TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
+			TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
+			    dh_list);
+		}
+	}
+	/* Update the score. */
+	if (dh->dh_score < DH_SCOREMAX)
+		dh->dh_score++;
+
+	/* Update last used time. */
+	dh->dh_lastused = time_second;
+	DIRHASHLIST_UNLOCK();
+
+	vp = ip->i_vnode;
+	bmask = vp->v_mount->mnt_stat.f_iosize - 1;
+	blkoff = -1;
+	bp = NULL;
+	seqoff = dh->dh_seqoff;
+restart:
+	slot = ufsdirhash_hash(dh, name, namelen);
+
+	if (seqoff != -1) {
+		/*
+		 * Sequential access optimisation. seqoff contains the
+		 * offset of the directory entry immediately following
+		 * the last entry that was looked up. Check if this offset
+		 * appears in the hash chain for the name we are looking for.
+		 */
+		for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
+		    i = WRAPINCR(i, dh->dh_hlen))
+			if (offset == seqoff)
+				break;
+		if (offset == seqoff) {
+			/*
+			 * We found an entry with the expected offset. This
+			 * is probably the entry we want, but if not, the
+			 * code below will retry.
+			 */ 
+			slot = i;
+		} else
+			seqoff = -1;
+	}
+
+	for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
+	    slot = WRAPINCR(slot, dh->dh_hlen)) {
+		if (offset == DIRHASH_DEL)
+			continue;
+		if (offset < 0 || offset >= ip->i_size)
+			panic("ufsdirhash_lookup: bad offset in hash array");
+		if ((offset & ~bmask) != blkoff) {
+			if (bp != NULL)
+				brelse(bp);
+			blkoff = offset & ~bmask;
+			if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) {
+				error = EJUSTRETURN;
+				goto fail;
+			}
+		}
+		KASSERT(bp != NULL, ("no buffer allocated"));
+		dp = (struct direct *)(bp->b_data + (offset & bmask));
+		if (dp->d_reclen == 0 || dp->d_reclen >
+		    DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) {
+			/* Corrupted directory. */
+			error = EJUSTRETURN;
+			goto fail;
+		}
+		if (dp->d_namlen == namelen &&
+		    bcmp(dp->d_name, name, namelen) == 0) {
+			/* Found. Get the prev offset if needed. */
+			if (prevoffp != NULL) {
+				if (offset & (DIRBLKSIZ - 1)) {
+					prevoff = ufsdirhash_getprev(dp,
+					    offset);
+					if (prevoff == -1) {
+						error = EJUSTRETURN;
+						goto fail;
+					}
+				} else
+					prevoff = offset;
+				*prevoffp = prevoff;
+			}
+
+			/* Update offset. */
+			dh->dh_seqoff = offset + DIRSIZ(0, dp);
+			*bpp = bp;
+			*offp = offset;
+			ufsdirhash_release(dh);
+			return (0);
+		}
+
+		/*
+		 * When the name doesn't match in the sequential
+		 * optimization case, go back and search normally.
+		 */
+		if (seqoff != -1) {
+			seqoff = -1;
+			goto restart;
+		}
+	}
+	error = ENOENT;
+fail:
+	ufsdirhash_release(dh);
+	if (bp != NULL)
+		brelse(bp);
+	return (error);
+}
+
+/*
+ * Find a directory block with room for 'slotneeded' bytes. Returns
+ * the offset of the directory entry that begins the free space.
+ * This will either be the offset of an existing entry that has free
+ * space at the end, or the offset of an entry with d_ino == 0 at
+ * the start of a DIRBLKSIZ block.
+ *
+ * To use the space, the caller may need to compact existing entries in
+ * the directory. The total number of bytes in all of the entries involved
+ * in the compaction is stored in *slotsize. In other words, all of
+ * the entries that must be compacted are exactly contained in the
+ * region beginning at the returned offset and spanning *slotsize bytes.
+ *
+ * Returns -1 if no space was found, indicating that the directory
+ * must be extended.
+ */
+doff_t
+ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
+{
+	struct direct *dp;
+	struct dirhash *dh;
+	struct buf *bp;
+	doff_t pos, slotstart;
+	int dirblock, error, freebytes, i;
+
+	dh = ip->i_dirhash;
+	KASSERT(dh != NULL && dh->dh_hash != NULL,
+	    ("ufsdirhash_findfree: Invalid dirhash %p\n", dh));
+	DIRHASH_ASSERT_LOCKED(dh);
+
+	/* Find a directory block with the desired free space. */
+	dirblock = -1;
+	for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
+		if ((dirblock = dh->dh_firstfree[i]) != -1)
+			break;
+	if (dirblock == -1)
+		return (-1);
+
+	KASSERT(dirblock < dh->dh_nblk &&
+	    dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN),
+	    ("ufsdirhash_findfree: bad stats"));
+	pos = dirblock * DIRBLKSIZ;
+	error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp);
+	if (error)
+		return (-1);
+
+	/* Find the first entry with free space. */
+	for (i = 0; i < DIRBLKSIZ; ) {
+		if (dp->d_reclen == 0) {
+			brelse(bp);
+			return (-1);
+		}
+		if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp))
+			break;
+		i += dp->d_reclen;
+		dp = (struct direct *)((char *)dp + dp->d_reclen);
+	}
+	if (i > DIRBLKSIZ) {
+		brelse(bp);
+		return (-1);
+	}
+	slotstart = pos + i;
+
+	/* Find the range of entries needed to get enough space */
+	freebytes = 0;
+	while (i < DIRBLKSIZ && freebytes < slotneeded) {
+		freebytes += dp->d_reclen;
+		if (dp->d_ino != 0)
+			freebytes -= DIRSIZ(0, dp);
+		if (dp->d_reclen == 0) {
+			brelse(bp);
+			return (-1);
+		}
+		i += dp->d_reclen;
+		dp = (struct direct *)((char *)dp + dp->d_reclen);
+	}
+	if (i > DIRBLKSIZ) {
+		brelse(bp);
+		return (-1);
+	}
+	if (freebytes < slotneeded)
+		panic("ufsdirhash_findfree: free mismatch");
+	brelse(bp);
+	*slotsize = pos + i - slotstart;
+	return (slotstart);
+}
+
+/*
+ * Return the start of the unused space at the end of a directory, or
+ * -1 if there are no trailing unused blocks.
+ */
+doff_t
+ufsdirhash_enduseful(struct inode *ip)
+{
+
+	struct dirhash *dh;
+	int i;
+
+	dh = ip->i_dirhash;
+	DIRHASH_ASSERT_LOCKED(dh);
+	KASSERT(dh != NULL && dh->dh_hash != NULL,
+	    ("ufsdirhash_enduseful: Invalid dirhash %p\n", dh));
+
+	if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN)
+		return (-1);
+
+	for (i = dh->dh_dirblks - 1; i >= 0; i--)
+		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
+			break;
+
+	return ((doff_t)(i + 1) * DIRBLKSIZ);
+}
+
+/*
+ * Insert information into the hash about a new directory entry. dirp
+ * points to a struct direct containing the entry, and offset specifies
+ * the offset of this entry.
+ */
+void
+ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
+{
+	struct dirhash *dh;
+	int slot;
+
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
+		return;
+	
+	KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
+	    ("ufsdirhash_add: bad offset"));
+	/*
+	 * Normal hash usage is < 66%. If the usage gets too high then
+	 * remove the hash entirely and let it be rebuilt later.
+	 */
+	if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
+		ufsdirhash_free_locked(ip);
+		return;
+	}
+
+	/* Find a free hash slot (empty or deleted), and add the entry. */
+	slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
+	while (DH_ENTRY(dh, slot) >= 0)
+		slot = WRAPINCR(slot, dh->dh_hlen);
+	if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
+		dh->dh_hused++;
+	DH_ENTRY(dh, slot) = offset;
+
+	/* Update last used time. */
+	dh->dh_lastused = time_second;
+
+	/* Update the per-block summary info. */
+	ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp));
+	ufsdirhash_release(dh);
+}
+
+/*
+ * Remove the specified directory entry from the hash. The entry to remove
+ * is defined by the name in `dirp', which must exist at the specified
+ * `offset' within the directory.
+ */
+void
+ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
+{
+	struct dirhash *dh;
+	int slot;
+
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
+		return;
+
+	KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
+	    ("ufsdirhash_remove: bad offset"));
+	/* Find the entry */
+	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
+
+	/* Remove the hash entry. */
+	ufsdirhash_delslot(dh, slot);
+
+	/* Update the per-block summary info. */
+	ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp));
+	ufsdirhash_release(dh);
+}
+
+/*
+ * Change the offset associated with a directory entry in the hash. Used
+ * when compacting directory blocks.
+ */
+void
+ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
+    doff_t newoff)
+{
+	struct dirhash *dh;
+	int slot;
+
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
+		return;
+
+	KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ &&
+	    newoff < dh->dh_dirblks * DIRBLKSIZ,
+	    ("ufsdirhash_move: bad offset"));
+	/* Find the entry, and update the offset. */
+	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
+	DH_ENTRY(dh, slot) = newoff;
+	ufsdirhash_release(dh);
+}
+
+/*
+ * Inform dirhash that the directory has grown by one block that
+ * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
+ */
+void
+ufsdirhash_newblk(struct inode *ip, doff_t offset)
+{
+	struct dirhash *dh;
+	int block;
+
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
+		return;
+
+	KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ,
+	    ("ufsdirhash_newblk: bad offset"));
+	block = offset / DIRBLKSIZ;
+	if (block >= dh->dh_nblk) {
+		/* Out of space; must rebuild. */
+		ufsdirhash_free_locked(ip);
+		return;
+	}
+	dh->dh_dirblks = block + 1;
+
+	/* Account for the new free block. */
+	dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN;
+	if (dh->dh_firstfree[DH_NFSTATS] == -1)
+		dh->dh_firstfree[DH_NFSTATS] = block;
+	ufsdirhash_release(dh);
+}
+
+/*
+ * Inform dirhash that the directory is being truncated.
+ */
+void
+ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
+{
+	struct dirhash *dh;
+	int block, i;
+
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
+		return;
+
+	KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ,
+	    ("ufsdirhash_dirtrunc: bad offset"));
+	block = howmany(offset, DIRBLKSIZ);
+	/*
+	 * If the directory shrinks to less than 1/8 of dh_nblk blocks
+	 * (about 20% of its original size due to the 50% extra added in
+	 * ufsdirhash_build) then free it, and let the caller rebuild
+	 * if necessary.
+	 */
+	if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
+		ufsdirhash_free_locked(ip);
+		return;
+	}
+
+	/*
+	 * Remove any `first free' information pertaining to the
+	 * truncated blocks. All blocks we're removing should be
+	 * completely unused.
+	 */
+	if (dh->dh_firstfree[DH_NFSTATS] >= block)
+		dh->dh_firstfree[DH_NFSTATS] = -1;
+	for (i = block; i < dh->dh_dirblks; i++)
+		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
+			panic("ufsdirhash_dirtrunc: blocks in use");
+	for (i = 0; i < DH_NFSTATS; i++)
+		if (dh->dh_firstfree[i] >= block)
+			panic("ufsdirhash_dirtrunc: first free corrupt");
+	dh->dh_dirblks = block;
+	ufsdirhash_release(dh);
+}
+
+/*
+ * Debugging function to check that the dirhash information about
+ * a directory block matches its actual contents. Panics if a mismatch
+ * is detected.
+ *
+ * On entry, `buf' should point to the start of an in-core
+ * DIRBLKSIZ-sized directory block, and `offset' should contain the
+ * offset from the start of the directory of that block.
+ */
+void
+ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset)
+{
+	struct dirhash *dh;
+	struct direct *dp;
+	int block, ffslot, i, nfree;
+
+	if (!ufs_dirhashcheck)
+		return;
+	if ((dh = ufsdirhash_acquire(ip)) == NULL)
+		return;
+
+	block = offset / DIRBLKSIZ;
+	if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks)
+		panic("ufsdirhash_checkblock: bad offset");
+
+	nfree = 0;
+	for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) {
+		dp = (struct direct *)(buf + i);
+		if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ)
+			panic("ufsdirhash_checkblock: bad dir");
+
+		if (dp->d_ino == 0) {
+#if 0
+			/*
+			 * XXX entries with d_ino == 0 should only occur
+			 * at the start of a DIRBLKSIZ block. However the
+			 * ufs code is tolerant of such entries at other
+			 * offsets, and fsck does not fix them.
+			 */
+			if (i != 0)
+				panic("ufsdirhash_checkblock: bad dir inode");
+#endif
+			nfree += dp->d_reclen;
+			continue;
+		}
+
+		/* Check that the entry	exists (will panic if it doesn't). */
+		ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
+
+		nfree += dp->d_reclen - DIRSIZ(0, dp);
+	}
+	if (i != DIRBLKSIZ)
+		panic("ufsdirhash_checkblock: bad dir end");
+
+	if (dh->dh_blkfree[block] * DIRALIGN != nfree)
+		panic("ufsdirhash_checkblock: bad free count");
+
+	ffslot = BLKFREE2IDX(nfree / DIRALIGN);
+	for (i = 0; i <= DH_NFSTATS; i++)
+		if (dh->dh_firstfree[i] == block && i != ffslot)
+			panic("ufsdirhash_checkblock: bad first-free");
+	if (dh->dh_firstfree[ffslot] == -1)
+		panic("ufsdirhash_checkblock: missing first-free entry");
+	ufsdirhash_release(dh);
+}
+
+/*
+ * Hash the specified filename into a dirhash slot.
+ */
+static int
+ufsdirhash_hash(struct dirhash *dh, char *name, int namelen)
+{
+	u_int32_t hash;
+
+	/*
+	 * We hash the name and then some other bit of data that is
+	 * invariant over the dirhash's lifetime. Otherwise names
+	 * differing only in the last byte are placed close to one
+	 * another in the table, which is bad for linear probing.
+	 */
+	hash = fnv_32_buf(name, namelen, FNV1_32_INIT);
+	hash = fnv_32_buf(&dh, sizeof(dh), hash);
+	return (hash % dh->dh_hlen);
+}
+
+/*
+ * Adjust the number of free bytes in the block containing `offset'
+ * by the value specified by `diff'.
+ *
+ * The caller must ensure we have exclusive access to `dh'; normally
+ * that means that dh_lock should be held, but this is also called
+ * from ufsdirhash_build() where exclusive access can be assumed.
+ */
+static void
+ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff)
+{
+	int block, i, nfidx, ofidx;
+
+	/* Update the per-block summary info. */
+	block = offset / DIRBLKSIZ;
+	KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks,
+	     ("dirhash bad offset"));
+	ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
+	dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
+	nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
+
+	/* Update the `first free' list if necessary. */
+	if (ofidx != nfidx) {
+		/* If removing, scan forward for the next block. */
+		if (dh->dh_firstfree[ofidx] == block) {
+			for (i = block + 1; i < dh->dh_dirblks; i++)
+				if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
+					break;
+			dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
+		}
+
+		/* Make this the new `first free' if necessary */
+		if (dh->dh_firstfree[nfidx] > block ||
+		    dh->dh_firstfree[nfidx] == -1)
+			dh->dh_firstfree[nfidx] = block;
+	}
+}
+
+/*
+ * Find the specified name which should have the specified offset.
+ * Returns a slot number, and panics on failure.
+ *
+ * `dh' must be locked on entry and remains so on return.
+ */
+static int
+ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset)
+{
+	int slot;
+
+	DIRHASH_ASSERT_LOCKED(dh);
+
+	/* Find the entry. */
+	KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full"));
+	slot = ufsdirhash_hash(dh, name, namelen);
+	while (DH_ENTRY(dh, slot) != offset &&
+	    DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
+		slot = WRAPINCR(slot, dh->dh_hlen);
+	if (DH_ENTRY(dh, slot) != offset)
+		panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
+
+	return (slot);
+}
+
+/*
+ * Remove the entry corresponding to the specified slot from the hash array.
+ *
+ * `dh' must be locked on entry and remains so on return.
+ */
+static void
+ufsdirhash_delslot(struct dirhash *dh, int slot)
+{
+	int i;
+
+	DIRHASH_ASSERT_LOCKED(dh);
+
+	/* Mark the entry as deleted. */
+	DH_ENTRY(dh, slot) = DIRHASH_DEL;
+
+	/* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
+	for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
+		i = WRAPINCR(i, dh->dh_hlen);
+	if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
+		i = WRAPDECR(i, dh->dh_hlen);
+		while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
+			DH_ENTRY(dh, i) = DIRHASH_EMPTY;
+			dh->dh_hused--;
+			i = WRAPDECR(i, dh->dh_hlen);
+		}
+		KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen"));
+	}
+}
+
+/*
+ * Given a directory entry and its offset, find the offset of the
+ * previous entry in the same DIRBLKSIZ-sized block. Returns an
+ * offset, or -1 if there is no previous entry in the block or some
+ * other problem occurred.
+ */
+static doff_t
+ufsdirhash_getprev(struct direct *dirp, doff_t offset)
+{
+	struct direct *dp;
+	char *blkbuf;
+	doff_t blkoff, prevoff;
+	int entrypos, i;
+
+	blkoff = rounddown2(offset, DIRBLKSIZ);	/* offset of start of block */
+	entrypos = offset & (DIRBLKSIZ - 1);	/* entry relative to block */
+	blkbuf = (char *)dirp - entrypos;
+	prevoff = blkoff;
+
+	/* If `offset' is the start of a block, there is no previous entry. */
+	if (entrypos == 0)
+		return (-1);
+
+	/* Scan from the start of the block until we get to the entry. */
+	for (i = 0; i < entrypos; i += dp->d_reclen) {
+		dp = (struct direct *)(blkbuf + i);
+		if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
+			return (-1);	/* Corrupted directory. */
+		prevoff = blkoff + i;
+	}
+	return (prevoff);
+}
+
+/*
+ * Delete the given dirhash and reclaim its memory. Assumes that 
+ * ufsdirhash_list is locked, and leaves it locked. Also assumes 
+ * that dh is locked. Returns the amount of memory freed.
+ */
+static int
+ufsdirhash_destroy(struct dirhash *dh)
+{
+	doff_t **hash;
+	u_int8_t *blkfree;
+	int i, mem, narrays;
+
+	KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
+	
+	/* Remove it from the list and detach its memory. */
+	TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
+	dh->dh_onlist = 0;
+	hash = dh->dh_hash;
+	dh->dh_hash = NULL;
+	blkfree = dh->dh_blkfree;
+	dh->dh_blkfree = NULL;
+	narrays = dh->dh_narrays;
+	mem = dh->dh_memreq;
+	dh->dh_memreq = 0;
+
+	/* Unlock dirhash and free the detached memory. */
+	ufsdirhash_release(dh);
+	for (i = 0; i < narrays; i++)
+		DIRHASH_BLKFREE(hash[i]);
+	free(hash, M_DIRHASH);
+	free(blkfree, M_DIRHASH);
+
+	/* Account for the returned memory. */
+	ufs_dirhashmem -= mem;
+
+	return (mem);
+}
+
+/*
+ * Try to free up `wanted' bytes by stealing memory from existing
+ * dirhashes. Returns zero with list locked if successful.
+ */
+static int
+ufsdirhash_recycle(int wanted)
+{
+	struct dirhash *dh;
+
+	DIRHASHLIST_LOCK();
+	dh = TAILQ_FIRST(&ufsdirhash_list);
+	while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
+		/* Decrement the score; only recycle if it becomes zero. */
+		if (dh == NULL || --dh->dh_score > 0) {
+			DIRHASHLIST_UNLOCK();
+			return (-1);
+		}
+		/*
+		 * If we can't lock it it's in use and we don't want to
+		 * recycle it anyway.
+		 */
+		if (!sx_try_xlock(&dh->dh_lock)) {
+			dh = TAILQ_NEXT(dh, dh_list);
+			continue;
+		}
+
+		ufsdirhash_destroy(dh);
+
+		/* Repeat if necessary. */
+		dh = TAILQ_FIRST(&ufsdirhash_list);
+	}
+	/* Success; return with list locked. */
+	return (0);
+}
+
+/*
+ * Callback that frees some dirhashes when the system is low on virtual memory.
+ */
+static void
+ufsdirhash_lowmem()
+{
+	struct dirhash *dh, *dh_temp;
+	int memfreed, memwanted;
+
+	ufs_dirhashlowmemcount++;
+	memfreed = 0;
+	memwanted = ufs_dirhashmem * ufs_dirhashreclaimpercent / 100;
+
+	DIRHASHLIST_LOCK();
+
+	/*
+	 * Reclaim up to memwanted from the oldest dirhashes. This will allow
+	 * us to make some progress when the system is running out of memory
+	 * without compromising the dinamicity of maximum age. If the situation
+	 * does not improve lowmem will be eventually retriggered and free some
+	 * other entry in the cache. The entries on the head of the list should
+	 * be the oldest. If during list traversal we can't get a lock on the
+	 * dirhash, it will be skipped.
+	 */
+	TAILQ_FOREACH_SAFE(dh, &ufsdirhash_list, dh_list, dh_temp) {
+		if (sx_try_xlock(&dh->dh_lock))
+			memfreed += ufsdirhash_destroy(dh);
+		if (memfreed >= memwanted)
+			break;
+	}
+	DIRHASHLIST_UNLOCK();
+}
+
+static int
+ufsdirhash_set_reclaimpercent(SYSCTL_HANDLER_ARGS)
+{
+	int error, v;
+
+	v = ufs_dirhashreclaimpercent;
+	error = sysctl_handle_int(oidp, &v, v, req);
+	if (error)
+		return (error);
+	if (req->newptr == NULL)
+		return (error);
+	if (v == ufs_dirhashreclaimpercent)
+		return (0);
+
+	/* Refuse invalid percentages */
+	if (v < 0 || v > 100)
+		return (EINVAL);
+	ufs_dirhashreclaimpercent = v;
+	return (0);
+}
+
+void
+ufsdirhash_init()
+{
+	ufs_dirhashmaxmem = lmax(roundup(hibufspace / 64, PAGE_SIZE),
+	    2 * 1024 * 1024);
+
+	ufsdirhash_zone = uma_zcreate("DIRHASH", DH_NBLKOFF * sizeof(doff_t),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF);
+	TAILQ_INIT(&ufsdirhash_list);
+
+	/* Register a callback function to handle low memory signals */
+	EVENTHANDLER_REGISTER(vm_lowmem, ufsdirhash_lowmem, NULL, 
+	    EVENTHANDLER_PRI_FIRST);
+}
+
+void
+ufsdirhash_uninit()
+{
+	KASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit"));
+	uma_zdestroy(ufsdirhash_zone);
+	mtx_destroy(&ufsdirhash_mtx);
+}
+
+#endif /* UFS_DIRHASH */
diff --git a/Dump/ufs/ufs/ufs_extattr.c b/Dump/ufs/ufs/ufs_extattr.c
new file mode 100644
index 0000000..bb3bcca
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_extattr.c
@@ -0,0 +1,1300 @@
+/*-
+ * Copyright (c) 1999-2002 Robert N. M. Watson
+ * Copyright (c) 2002-2003 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert Watson for the TrustedBSD Project.
+ *
+ * This software was developed for the FreeBSD Project in part by Network
+ * Associates Laboratories, the Security Research Division of Network
+ * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
+ * as part of the DARPA CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Support for filesystem extended attribute: UFS-specific support functions.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ufs/ufs_extattr.c 298463 2016-04-22 08:09:27Z ngie $");
+
+#include "opt_ufs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/fcntl.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/lock.h>
+#include <sys/dirent.h>
+#include <sys/extattr.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
+
+#include <vm/uma.h>
+
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#ifdef UFS_EXTATTR
+
+FEATURE(ufs_extattr, "ufs extended attribute support");
+
+static MALLOC_DEFINE(M_UFS_EXTATTR, "ufs_extattr", "ufs extended attribute");
+
+static int ufs_extattr_sync = 0;
+SYSCTL_INT(_debug, OID_AUTO, ufs_extattr_sync, CTLFLAG_RW, &ufs_extattr_sync,
+    0, "");
+
+static int	ufs_extattr_valid_attrname(int attrnamespace,
+		    const char *attrname);
+static int	ufs_extattr_enable_with_open(struct ufsmount *ump,
+		    struct vnode *vp, int attrnamespace, const char *attrname,
+		    struct thread *td);
+static int	ufs_extattr_enable(struct ufsmount *ump, int attrnamespace,
+		    const char *attrname, struct vnode *backing_vnode,
+		    struct thread *td);
+static int	ufs_extattr_disable(struct ufsmount *ump, int attrnamespace,
+		    const char *attrname, struct thread *td);
+static int	ufs_extattr_get(struct vnode *vp, int attrnamespace,
+		    const char *name, struct uio *uio, size_t *size,
+		    struct ucred *cred, struct thread *td);
+static int	ufs_extattr_set(struct vnode *vp, int attrnamespace,
+		    const char *name, struct uio *uio, struct ucred *cred,
+		    struct thread *td);
+static int	ufs_extattr_rm(struct vnode *vp, int attrnamespace,
+		    const char *name, struct ucred *cred, struct thread *td);
+#ifdef UFS_EXTATTR_AUTOSTART
+static int	ufs_extattr_autostart_locked(struct mount *mp,
+		    struct thread *td);
+#endif
+static int	ufs_extattr_start_locked(struct ufsmount *ump,
+		    struct thread *td);
+
+/*
+ * Per-FS attribute lock protecting attribute operations.
+ *
+ * XXXRW: Perhaps something more fine-grained would be appropriate, but at
+ * the end of the day we're going to contend on the vnode lock for the
+ * backing file anyway.
+ */
+static void
+ufs_extattr_uepm_lock(struct ufsmount *ump)
+{
+
+	sx_xlock(&ump->um_extattr.uepm_lock);
+}
+
+static void
+ufs_extattr_uepm_unlock(struct ufsmount *ump)
+{
+
+	sx_xunlock(&ump->um_extattr.uepm_lock);
+}
+
+/*-
+ * Determine whether the name passed is a valid name for an actual
+ * attribute.
+ *
+ * Invalid currently consists of:
+ *	 NULL pointer for attrname
+ *	 zero-length attrname (used to retrieve application attribute list)
+ */
+static int
+ufs_extattr_valid_attrname(int attrnamespace, const char *attrname)
+{
+
+	if (attrname == NULL)
+		return (0);
+	if (strlen(attrname) == 0)
+		return (0);
+	return (1);
+}
+
+/*
+ * Locate an attribute given a name and mountpoint.
+ * Must be holding uepm lock for the mount point.
+ */
+static struct ufs_extattr_list_entry *
+ufs_extattr_find_attr(struct ufsmount *ump, int attrnamespace,
+    const char *attrname)
+{
+	struct ufs_extattr_list_entry *search_attribute;
+
+	sx_assert(&ump->um_extattr.uepm_lock, SA_XLOCKED);
+
+	for (search_attribute = LIST_FIRST(&ump->um_extattr.uepm_list);
+	    search_attribute != NULL;
+	    search_attribute = LIST_NEXT(search_attribute, uele_entries)) {
+		if (!(strncmp(attrname, search_attribute->uele_attrname,
+		    UFS_EXTATTR_MAXEXTATTRNAME)) &&
+		    (attrnamespace == search_attribute->uele_attrnamespace)) {
+			return (search_attribute);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Initialize per-FS structures supporting extended attributes.  Do not
+ * start extended attributes yet.
+ */
+void
+ufs_extattr_uepm_init(struct ufs_extattr_per_mount *uepm)
+{
+
+	uepm->uepm_flags = 0;
+	LIST_INIT(&uepm->uepm_list);
+	sx_init(&uepm->uepm_lock, "ufs_extattr_sx");
+	uepm->uepm_flags |= UFS_EXTATTR_UEPM_INITIALIZED;
+}
+
+/*
+ * Destroy per-FS structures supporting extended attributes.  Assumes
+ * that EAs have already been stopped, and will panic if not.
+ */
+void
+ufs_extattr_uepm_destroy(struct ufs_extattr_per_mount *uepm)
+{
+
+	if (!(uepm->uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED))
+		panic("ufs_extattr_uepm_destroy: not initialized");
+
+	if ((uepm->uepm_flags & UFS_EXTATTR_UEPM_STARTED))
+		panic("ufs_extattr_uepm_destroy: called while still started");
+
+	/*
+	 * It's not clear that either order for the next two lines is
+	 * ideal, and it should never be a problem if this is only called
+	 * during unmount, and with vfs_busy().
+	 */
+	uepm->uepm_flags &= ~UFS_EXTATTR_UEPM_INITIALIZED;
+	sx_destroy(&uepm->uepm_lock);
+}
+
+/*
+ * Start extended attribute support on an FS.
+ */
+int
+ufs_extattr_start(struct mount *mp, struct thread *td)
+{
+	struct ufsmount *ump;
+	int error = 0;
+
+	ump = VFSTOUFS(mp);
+
+	ufs_extattr_uepm_lock(ump);
+	error = ufs_extattr_start_locked(ump, td);
+	ufs_extattr_uepm_unlock(ump);
+	return (error);
+}
+
+static int
+ufs_extattr_start_locked(struct ufsmount *ump, struct thread *td)
+{
+	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED))
+		return (EOPNOTSUPP);
+	if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)
+		return (EBUSY);
+
+	ump->um_extattr.uepm_flags |= UFS_EXTATTR_UEPM_STARTED;
+	ump->um_extattr.uepm_ucred = crhold(td->td_ucred);
+	return (0);
+}
+
+#ifdef UFS_EXTATTR_AUTOSTART
+/*
+ * Helper routine: given a locked parent directory and filename, return
+ * the locked vnode of the inode associated with the name.  Will not
+ * follow symlinks, may return any type of vnode.  Lock on parent will
+ * be released even in the event of a failure.  In the event that the
+ * target is the parent (i.e., "."), there will be two references and
+ * one lock, requiring the caller to possibly special-case.
+ */
+#define	UE_GETDIR_LOCKPARENT	1
+#define	UE_GETDIR_LOCKPARENT_DONT	2
+static int
+ufs_extattr_lookup(struct vnode *start_dvp, int lockparent, char *dirname,
+    struct vnode **vp, struct thread *td)
+{
+	struct vop_cachedlookup_args vargs;
+	struct componentname cnp;
+	struct vnode *target_vp;
+	int error;
+
+	bzero(&cnp, sizeof(cnp));
+	cnp.cn_nameiop = LOOKUP;
+	cnp.cn_flags = ISLASTCN;
+	if (lockparent == UE_GETDIR_LOCKPARENT)
+		cnp.cn_flags |= LOCKPARENT;
+	cnp.cn_lkflags = LK_EXCLUSIVE;
+	cnp.cn_thread = td;
+	cnp.cn_cred = td->td_ucred;
+	cnp.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
+	cnp.cn_nameptr = cnp.cn_pnbuf;
+	error = copystr(dirname, cnp.cn_pnbuf, MAXPATHLEN,
+	    (size_t *) &cnp.cn_namelen);
+	if (error) {
+		if (lockparent == UE_GETDIR_LOCKPARENT_DONT) {
+			VOP_UNLOCK(start_dvp, 0);
+		}
+		uma_zfree(namei_zone, cnp.cn_pnbuf);
+		printf("ufs_extattr_lookup: copystr failed\n");
+		return (error);
+	}
+	cnp.cn_namelen--;	/* trim nul termination */
+	vargs.a_gen.a_desc = NULL;
+	vargs.a_dvp = start_dvp;
+	vargs.a_vpp = &target_vp;
+	vargs.a_cnp = &cnp;
+	error = ufs_lookup(&vargs);
+	uma_zfree(namei_zone, cnp.cn_pnbuf);
+	if (error) {
+		/*
+		 * Error condition, may have to release the lock on the parent
+		 * if ufs_lookup() didn't.
+		 */
+		if (lockparent == UE_GETDIR_LOCKPARENT_DONT)
+			VOP_UNLOCK(start_dvp, 0);
+
+		/*
+		 * Check that ufs_lookup() didn't release the lock when we
+		 * didn't want it to.
+		 */
+		if (lockparent == UE_GETDIR_LOCKPARENT)
+			ASSERT_VOP_LOCKED(start_dvp, "ufs_extattr_lookup");
+
+		return (error);
+	}
+/*
+	if (target_vp == start_dvp)
+		panic("ufs_extattr_lookup: target_vp == start_dvp");
+*/
+
+	if (target_vp != start_dvp && lockparent == UE_GETDIR_LOCKPARENT_DONT)
+		VOP_UNLOCK(start_dvp, 0);
+
+	if (lockparent == UE_GETDIR_LOCKPARENT)
+		ASSERT_VOP_LOCKED(start_dvp, "ufs_extattr_lookup");
+
+	/* printf("ufs_extattr_lookup: success\n"); */
+	*vp = target_vp;
+	return (0);
+}
+#endif /* !UFS_EXTATTR_AUTOSTART */
+
+/*
+ * Enable an EA using the passed filesystem, backing vnode, attribute name,
+ * namespace, and proc.  Will perform a VOP_OPEN() on the vp, so expects vp
+ * to be locked when passed in.  The vnode will be returned unlocked,
+ * regardless of success/failure of the function.  As a result, the caller
+ * will always need to vrele(), but not vput().
+ */
+static int
+ufs_extattr_enable_with_open(struct ufsmount *ump, struct vnode *vp,
+    int attrnamespace, const char *attrname, struct thread *td)
+{
+	int error;
+
+	error = VOP_OPEN(vp, FREAD|FWRITE, td->td_ucred, td, NULL);
+	if (error) {
+		printf("ufs_extattr_enable_with_open.VOP_OPEN(): failed "
+		    "with %d\n", error);
+		VOP_UNLOCK(vp, 0);
+		return (error);
+	}
+
+	VOP_ADD_WRITECOUNT(vp, 1);
+	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", __func__, vp,
+	    vp->v_writecount);
+
+	vref(vp);
+
+	VOP_UNLOCK(vp, 0);
+
+	error = ufs_extattr_enable(ump, attrnamespace, attrname, vp, td);
+	if (error != 0)
+		vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
+	return (error);
+}
+
+#ifdef UFS_EXTATTR_AUTOSTART
+/*
+ * Given a locked directory vnode, iterate over the names in the directory
+ * and use ufs_extattr_lookup() to retrieve locked vnodes of potential
+ * attribute files.  Then invoke ufs_extattr_enable_with_open() on each
+ * to attempt to start the attribute.  Leaves the directory locked on
+ * exit.
+ */
+static int
+ufs_extattr_iterate_directory(struct ufsmount *ump, struct vnode *dvp,
+    int attrnamespace, struct thread *td)
+{
+	struct vop_readdir_args vargs;
+	struct dirent *dp, *edp;
+	struct vnode *attr_vp;
+	struct uio auio;
+	struct iovec aiov;
+	char *dirbuf;
+	int error, eofflag = 0;
+
+	if (dvp->v_type != VDIR)
+		return (ENOTDIR);
+
+	dirbuf = malloc(DIRBLKSIZ, M_TEMP, M_WAITOK);
+
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_td = td;
+	auio.uio_offset = 0;
+
+	vargs.a_gen.a_desc = NULL;
+	vargs.a_vp = dvp;
+	vargs.a_uio = &auio;
+	vargs.a_cred = td->td_ucred;
+	vargs.a_eofflag = &eofflag;
+	vargs.a_ncookies = NULL;
+	vargs.a_cookies = NULL;
+
+	while (!eofflag) {
+		auio.uio_resid = DIRBLKSIZ;
+		aiov.iov_base = dirbuf;
+		aiov.iov_len = DIRBLKSIZ;
+		error = ufs_readdir(&vargs);
+		if (error) {
+			printf("ufs_extattr_iterate_directory: ufs_readdir "
+			    "%d\n", error);
+			return (error);
+		}
+
+		edp = (struct dirent *)&dirbuf[DIRBLKSIZ - auio.uio_resid];
+		for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+			if (dp->d_reclen == 0)
+				break;
+			error = ufs_extattr_lookup(dvp, UE_GETDIR_LOCKPARENT,
+			    dp->d_name, &attr_vp, td);
+			if (error) {
+				printf("ufs_extattr_iterate_directory: lookup "
+				    "%s %d\n", dp->d_name, error);
+			} else if (attr_vp == dvp) {
+				vrele(attr_vp);
+			} else if (attr_vp->v_type != VREG) {
+				vput(attr_vp);
+			} else {
+				error = ufs_extattr_enable_with_open(ump,
+				    attr_vp, attrnamespace, dp->d_name, td);
+				vrele(attr_vp);
+				if (error) {
+					printf("ufs_extattr_iterate_directory: "
+					    "enable %s %d\n", dp->d_name,
+					    error);
+				} else if (bootverbose) {
+					printf("UFS autostarted EA %s\n",
+					    dp->d_name);
+				}
+			}
+			dp = (struct dirent *) ((char *)dp + dp->d_reclen);
+			if (dp >= edp)
+				break;
+		}
+	}
+	free(dirbuf, M_TEMP);
+	
+	return (0);
+}
+
+/*
+ * Auto-start of extended attributes, to be executed (optionally) at
+ * mount-time.
+ */
+int
+ufs_extattr_autostart(struct mount *mp, struct thread *td)
+{
+	struct ufsmount *ump;
+	int error;
+
+	ump = VFSTOUFS(mp);
+	ufs_extattr_uepm_lock(ump);
+	error = ufs_extattr_autostart_locked(mp, td);
+	ufs_extattr_uepm_unlock(ump);
+	return (error);
+}
+
+static int
+ufs_extattr_autostart_locked(struct mount *mp, struct thread *td)
+{
+	struct vnode *rvp, *attr_dvp, *attr_system_dvp, *attr_user_dvp;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	int error;
+
+	/*
+	 * UFS_EXTATTR applies only to UFS1, as UFS2 uses native extended
+	 * attributes, so don't autostart.
+	 */
+	if (ump->um_fstype != UFS1)
+		return (0);
+
+	/*
+	 * Does UFS_EXTATTR_FSROOTSUBDIR exist off the filesystem root?
+	 * If so, automatically start EA's.
+	 */
+	error = VFS_ROOT(mp, LK_EXCLUSIVE, &rvp);
+	if (error) {
+		printf("ufs_extattr_autostart.VFS_ROOT() returned %d\n",
+		    error);
+		return (error);
+	}
+
+	error = ufs_extattr_lookup(rvp, UE_GETDIR_LOCKPARENT_DONT,
+	    UFS_EXTATTR_FSROOTSUBDIR, &attr_dvp, td);
+	if (error) {
+		/* rvp ref'd but now unlocked */
+		vrele(rvp);
+		return (error);
+	}
+	if (rvp == attr_dvp) {
+		/* Should never happen. */
+		vput(rvp);
+		vrele(attr_dvp);
+		return (EINVAL);
+	}
+	vrele(rvp);
+
+	if (attr_dvp->v_type != VDIR) {
+		printf("ufs_extattr_autostart: %s != VDIR\n",
+		    UFS_EXTATTR_FSROOTSUBDIR);
+		goto return_vput_attr_dvp;
+	}
+
+	error = ufs_extattr_start_locked(ump, td);
+	if (error) {
+		printf("ufs_extattr_autostart: ufs_extattr_start failed (%d)\n",
+		    error);
+		goto return_vput_attr_dvp;
+	}
+
+	/*
+	 * Look for two subdirectories: UFS_EXTATTR_SUBDIR_SYSTEM,
+	 * UFS_EXTATTR_SUBDIR_USER.  For each, iterate over the sub-directory,
+	 * and start with appropriate type.  Failures in either don't
+	 * result in an over-all failure.  attr_dvp is left locked to
+	 * be cleaned up on exit.
+	 */
+	error = ufs_extattr_lookup(attr_dvp, UE_GETDIR_LOCKPARENT,
+	    UFS_EXTATTR_SUBDIR_SYSTEM, &attr_system_dvp, td);
+	if (!error) {
+		error = ufs_extattr_iterate_directory(VFSTOUFS(mp),
+		    attr_system_dvp, EXTATTR_NAMESPACE_SYSTEM, td);
+		if (error)
+			printf("ufs_extattr_iterate_directory returned %d\n",
+			    error);
+		vput(attr_system_dvp);
+	}
+
+	error = ufs_extattr_lookup(attr_dvp, UE_GETDIR_LOCKPARENT,
+	    UFS_EXTATTR_SUBDIR_USER, &attr_user_dvp, td);
+	if (!error) {
+		error = ufs_extattr_iterate_directory(VFSTOUFS(mp),
+		    attr_user_dvp, EXTATTR_NAMESPACE_USER, td);
+		if (error)
+			printf("ufs_extattr_iterate_directory returned %d\n",
+			    error);
+		vput(attr_user_dvp);
+	}
+
+	/* Mask startup failures in sub-directories. */
+	error = 0;
+
+return_vput_attr_dvp:
+	vput(attr_dvp);
+
+	return (error);
+}
+#endif /* !UFS_EXTATTR_AUTOSTART */
+
+/*
+ * Stop extended attribute support on an FS.
+ */
+int
+ufs_extattr_stop(struct mount *mp, struct thread *td)
+{
+	struct ufs_extattr_list_entry *uele;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	int error = 0;
+
+	ufs_extattr_uepm_lock(ump);
+
+	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
+		error = EOPNOTSUPP;
+		goto unlock;
+	}
+
+	while ((uele = LIST_FIRST(&ump->um_extattr.uepm_list)) != NULL) {
+		ufs_extattr_disable(ump, uele->uele_attrnamespace,
+		    uele->uele_attrname, td);
+	}
+
+	ump->um_extattr.uepm_flags &= ~UFS_EXTATTR_UEPM_STARTED;
+
+	crfree(ump->um_extattr.uepm_ucred);
+	ump->um_extattr.uepm_ucred = NULL;
+
+unlock:
+	ufs_extattr_uepm_unlock(ump);
+
+	return (error);
+}
+
+/*
+ * Enable a named attribute on the specified filesystem; provide an
+ * unlocked backing vnode to hold the attribute data.
+ */
+static int
+ufs_extattr_enable(struct ufsmount *ump, int attrnamespace,
+    const char *attrname, struct vnode *backing_vnode, struct thread *td)
+{
+	struct ufs_extattr_list_entry *attribute;
+	struct iovec aiov;
+	struct uio auio;
+	int error = 0;
+
+	if (!ufs_extattr_valid_attrname(attrnamespace, attrname))
+		return (EINVAL);
+	if (backing_vnode->v_type != VREG)
+		return (EINVAL);
+
+	attribute = malloc(sizeof(struct ufs_extattr_list_entry),
+	    M_UFS_EXTATTR, M_WAITOK);
+
+	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
+		error = EOPNOTSUPP;
+		goto free_exit;
+	}
+
+	if (ufs_extattr_find_attr(ump, attrnamespace, attrname)) {
+		error = EEXIST;
+		goto free_exit;
+	}
+
+	strncpy(attribute->uele_attrname, attrname,
+	    UFS_EXTATTR_MAXEXTATTRNAME);
+	attribute->uele_attrnamespace = attrnamespace;
+	bzero(&attribute->uele_fileheader,
+	    sizeof(struct ufs_extattr_fileheader));
+	
+	attribute->uele_backing_vnode = backing_vnode;
+
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = (caddr_t) &attribute->uele_fileheader;
+	aiov.iov_len = sizeof(struct ufs_extattr_fileheader);
+	auio.uio_resid = sizeof(struct ufs_extattr_fileheader);
+	auio.uio_offset = (off_t) 0;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_td = td;
+
+	vn_lock(backing_vnode, LK_SHARED | LK_RETRY);
+	error = VOP_READ(backing_vnode, &auio, IO_NODELOCKED,
+	    ump->um_extattr.uepm_ucred);
+
+	if (error)
+		goto unlock_free_exit;
+
+	if (auio.uio_resid != 0) {
+		printf("ufs_extattr_enable: malformed attribute header\n");
+		error = EINVAL;
+		goto unlock_free_exit;
+	}
+
+	if (attribute->uele_fileheader.uef_magic != UFS_EXTATTR_MAGIC) {
+		printf("ufs_extattr_enable: invalid attribute header magic\n");
+		error = EINVAL;
+		goto unlock_free_exit;
+	}
+
+	if (attribute->uele_fileheader.uef_version != UFS_EXTATTR_VERSION) {
+		printf("ufs_extattr_enable: incorrect attribute header "
+		    "version\n");
+		error = EINVAL;
+		goto unlock_free_exit;
+	}
+
+	ASSERT_VOP_LOCKED(backing_vnode, "ufs_extattr_enable");
+	LIST_INSERT_HEAD(&ump->um_extattr.uepm_list, attribute,
+	    uele_entries);
+
+	VOP_UNLOCK(backing_vnode, 0);
+	return (0);
+
+unlock_free_exit:
+	VOP_UNLOCK(backing_vnode, 0);
+
+free_exit:
+	free(attribute, M_UFS_EXTATTR);
+	return (error);
+}
+
+/*
+ * Disable extended attribute support on an FS.
+ */
+static int
+ufs_extattr_disable(struct ufsmount *ump, int attrnamespace,
+    const char *attrname, struct thread *td)
+{
+	struct ufs_extattr_list_entry *uele;
+	int error = 0;
+
+	if (!ufs_extattr_valid_attrname(attrnamespace, attrname))
+		return (EINVAL);
+
+	uele = ufs_extattr_find_attr(ump, attrnamespace, attrname);
+	if (!uele)
+		return (ENOATTR);
+
+	LIST_REMOVE(uele, uele_entries);
+
+	vn_lock(uele->uele_backing_vnode, LK_SHARED | LK_RETRY);
+	ASSERT_VOP_LOCKED(uele->uele_backing_vnode, "ufs_extattr_disable");
+	VOP_UNLOCK(uele->uele_backing_vnode, 0);
+	error = vn_close(uele->uele_backing_vnode, FREAD|FWRITE,
+	    td->td_ucred, td);
+
+	free(uele, M_UFS_EXTATTR);
+
+	return (error);
+}
+
+/*
+ * VFS call to manage extended attributes in UFS.  If filename_vp is
+ * non-NULL, it must be passed in locked, and regardless of errors in
+ * processing, will be unlocked.
+ */
+int
+ufs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
+    int attrnamespace, const char *attrname)
+{
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct thread *td = curthread;
+	int error;
+
+	/*
+	 * Processes with privilege, but in jail, are not allowed to
+	 * configure extended attributes.
+	 */
+	error = priv_check(td, PRIV_UFS_EXTATTRCTL);
+	if (error) {
+		if (filename_vp != NULL)
+			VOP_UNLOCK(filename_vp, 0);
+		return (error);
+	}
+
+	/*
+	 * We only allow extattrctl(2) on UFS1 file systems, as UFS2 uses
+	 * native extended attributes.
+	 */
+	if (ump->um_fstype != UFS1) {
+		if (filename_vp != NULL)
+			VOP_UNLOCK(filename_vp, 0);
+		return (EOPNOTSUPP);
+	}
+
+	switch(cmd) {
+	case UFS_EXTATTR_CMD_START:
+		if (filename_vp != NULL) {
+			VOP_UNLOCK(filename_vp, 0);
+			return (EINVAL);
+		}
+		if (attrname != NULL)
+			return (EINVAL);
+
+		error = ufs_extattr_start(mp, td);
+
+		return (error);
+		
+	case UFS_EXTATTR_CMD_STOP:
+		if (filename_vp != NULL) {
+			VOP_UNLOCK(filename_vp, 0);
+			return (EINVAL);
+		}
+		if (attrname != NULL)
+			return (EINVAL);
+
+		error = ufs_extattr_stop(mp, td);
+
+		return (error);
+
+	case UFS_EXTATTR_CMD_ENABLE:
+
+		if (filename_vp == NULL)
+			return (EINVAL);
+		if (attrname == NULL) {
+			VOP_UNLOCK(filename_vp, 0);
+			return (EINVAL);
+		}
+
+		/*
+		 * ufs_extattr_enable_with_open() will always unlock the
+		 * vnode, regardless of failure.
+		 */
+		ufs_extattr_uepm_lock(ump);
+		error = ufs_extattr_enable_with_open(ump, filename_vp,
+		    attrnamespace, attrname, td);
+		ufs_extattr_uepm_unlock(ump);
+
+		return (error);
+
+	case UFS_EXTATTR_CMD_DISABLE:
+
+		if (filename_vp != NULL) {
+			VOP_UNLOCK(filename_vp, 0);
+			return (EINVAL);
+		}
+		if (attrname == NULL)
+			return (EINVAL);
+
+		ufs_extattr_uepm_lock(ump);
+		error = ufs_extattr_disable(ump, attrnamespace, attrname,
+		    td);
+		ufs_extattr_uepm_unlock(ump);
+
+		return (error);
+
+	default:
+		return (EINVAL);
+	}
+}
+
+/*
+ * Vnode operating to retrieve a named extended attribute.
+ */
+int
+ufs_getextattr(struct vop_getextattr_args *ap)
+/*
+vop_getextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	IN const char *a_name;
+	INOUT struct uio *a_uio;
+	OUT size_t *a_size;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+*/
+{
+	struct mount *mp = ap->a_vp->v_mount;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	int error;
+
+	ufs_extattr_uepm_lock(ump);
+
+	error = ufs_extattr_get(ap->a_vp, ap->a_attrnamespace, ap->a_name,
+	    ap->a_uio, ap->a_size, ap->a_cred, ap->a_td);
+
+	ufs_extattr_uepm_unlock(ump);
+
+	return (error);
+}
+
+/*
+ * Real work associated with retrieving a named attribute--assumes that
+ * the attribute lock has already been grabbed.
+ */
+static int
+ufs_extattr_get(struct vnode *vp, int attrnamespace, const char *name,
+    struct uio *uio, size_t *size, struct ucred *cred, struct thread *td)
+{
+	struct ufs_extattr_list_entry *attribute;
+	struct ufs_extattr_header ueh;
+	struct iovec local_aiov;
+	struct uio local_aio;
+	struct mount *mp = vp->v_mount;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct inode *ip = VTOI(vp);
+	off_t base_offset;
+	size_t len, old_len;
+	int error = 0;
+
+	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
+		return (EOPNOTSUPP);
+
+	if (strlen(name) == 0)
+		return (EINVAL);
+
+	error = extattr_check_cred(vp, attrnamespace, cred, td, VREAD);
+	if (error)
+		return (error);
+
+	attribute = ufs_extattr_find_attr(ump, attrnamespace, name);
+	if (!attribute)
+		return (ENOATTR);
+
+	/*
+	 * Allow only offsets of zero to encourage the read/replace
+	 * extended attribute semantic.  Otherwise we can't guarantee
+	 * atomicity, as we don't provide locks for extended attributes.
+	 */
+	if (uio != NULL && uio->uio_offset != 0)
+		return (ENXIO);
+
+	/*
+	 * Find base offset of header in file based on file header size, and
+	 * data header size + maximum data size, indexed by inode number.
+	 */
+	base_offset = sizeof(struct ufs_extattr_fileheader) +
+	    ip->i_number * (sizeof(struct ufs_extattr_header) +
+	    attribute->uele_fileheader.uef_size);
+
+	/*
+	 * Read in the data header to see if the data is defined, and if so
+	 * how much.
+	 */
+	bzero(&ueh, sizeof(struct ufs_extattr_header));
+	local_aiov.iov_base = (caddr_t) &ueh;
+	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
+	local_aio.uio_iov = &local_aiov;
+	local_aio.uio_iovcnt = 1;
+	local_aio.uio_rw = UIO_READ;
+	local_aio.uio_segflg = UIO_SYSSPACE;
+	local_aio.uio_td = td;
+	local_aio.uio_offset = base_offset;
+	local_aio.uio_resid = sizeof(struct ufs_extattr_header);
+	
+	/*
+	 * Acquire locks.
+	 *
+	 * Don't need to get a lock on the backing file if the getattr is
+	 * being applied to the backing file, as the lock is already held.
+	 */
+	if (attribute->uele_backing_vnode != vp)
+		vn_lock(attribute->uele_backing_vnode, LK_SHARED | LK_RETRY);
+
+	error = VOP_READ(attribute->uele_backing_vnode, &local_aio,
+	    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
+	if (error)
+		goto vopunlock_exit;
+
+	/* Defined? */
+	if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) {
+		error = ENOATTR;
+		goto vopunlock_exit;
+	}
+
+	/* Valid for the current inode generation? */
+	if (ueh.ueh_i_gen != ip->i_gen) {
+		/*
+		 * The inode itself has a different generation number
+		 * than the attribute data.  For now, the best solution
+		 * is to coerce this to undefined, and let it get cleaned
+		 * up by the next write or extattrctl clean.
+		 */
+		printf("ufs_extattr_get (%s): inode number inconsistency (%d, %ju)\n",
+		    mp->mnt_stat.f_mntonname, ueh.ueh_i_gen, (uintmax_t)ip->i_gen);
+		error = ENOATTR;
+		goto vopunlock_exit;
+	}
+
+	/* Local size consistency check. */
+	if (ueh.ueh_len > attribute->uele_fileheader.uef_size) {
+		error = ENXIO;
+		goto vopunlock_exit;
+	}
+
+	/* Return full data size if caller requested it. */
+	if (size != NULL)
+		*size = ueh.ueh_len;
+
+	/* Return data if the caller requested it. */
+	if (uio != NULL) {
+		/* Allow for offset into the attribute data. */
+		uio->uio_offset = base_offset + sizeof(struct
+		    ufs_extattr_header);
+
+		/*
+		 * Figure out maximum to transfer -- use buffer size and
+		 * local data limit.
+		 */
+		len = MIN(uio->uio_resid, ueh.ueh_len);
+		old_len = uio->uio_resid;
+		uio->uio_resid = len;
+
+		error = VOP_READ(attribute->uele_backing_vnode, uio,
+		    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
+		if (error)
+			goto vopunlock_exit;
+
+		uio->uio_resid = old_len - (len - uio->uio_resid);
+	}
+
+vopunlock_exit:
+
+	if (uio != NULL)
+		uio->uio_offset = 0;
+
+	if (attribute->uele_backing_vnode != vp)
+		VOP_UNLOCK(attribute->uele_backing_vnode, 0);
+
+	return (error);
+}
+
+/*
+ * Vnode operation to remove a named attribute.
+ */
+int
+ufs_deleteextattr(struct vop_deleteextattr_args *ap)
+/*
+vop_deleteextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	IN const char *a_name;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+*/
+{
+	struct mount *mp = ap->a_vp->v_mount;
+	struct ufsmount *ump = VFSTOUFS(mp); 
+	int error;
+
+	ufs_extattr_uepm_lock(ump);
+
+	error = ufs_extattr_rm(ap->a_vp, ap->a_attrnamespace, ap->a_name,
+	    ap->a_cred, ap->a_td);
+
+
+	ufs_extattr_uepm_unlock(ump);
+
+	return (error);
+}
+
+/*
+ * Vnode operation to set a named attribute.
+ */
+int
+ufs_setextattr(struct vop_setextattr_args *ap)
+/*
+vop_setextattr {
+	IN struct vnode *a_vp;
+	IN int a_attrnamespace;
+	IN const char *a_name;
+	INOUT struct uio *a_uio;
+	IN struct ucred *a_cred;
+	IN struct thread *a_td;
+};
+*/
+{
+	struct mount *mp = ap->a_vp->v_mount;
+	struct ufsmount *ump = VFSTOUFS(mp); 
+	int error;
+
+	/*
+	 * XXX: No longer a supported way to delete extended attributes.
+	 */
+	if (ap->a_uio == NULL)
+		return (EINVAL);
+
+	ufs_extattr_uepm_lock(ump);
+
+	error = ufs_extattr_set(ap->a_vp, ap->a_attrnamespace, ap->a_name,
+	    ap->a_uio, ap->a_cred, ap->a_td);
+
+	ufs_extattr_uepm_unlock(ump);
+
+	return (error);
+}
+
+/*
+ * Real work associated with setting a vnode's extended attributes;
+ * assumes that the attribute lock has already been grabbed.
+ */
+static int
+ufs_extattr_set(struct vnode *vp, int attrnamespace, const char *name,
+    struct uio *uio, struct ucred *cred, struct thread *td)
+{
+	struct ufs_extattr_list_entry *attribute;
+	struct ufs_extattr_header ueh;
+	struct iovec local_aiov;
+	struct uio local_aio;
+	struct mount *mp = vp->v_mount;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct inode *ip = VTOI(vp);
+	off_t base_offset;
+	int error = 0, ioflag;
+
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (EROFS);
+	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
+		return (EOPNOTSUPP);
+	if (!ufs_extattr_valid_attrname(attrnamespace, name))
+		return (EINVAL);
+
+	error = extattr_check_cred(vp, attrnamespace, cred, td, VWRITE);
+	if (error)
+		return (error);
+
+	attribute = ufs_extattr_find_attr(ump, attrnamespace, name);
+	if (!attribute)
+		return (ENOATTR);
+
+	/*
+	 * Early rejection of invalid offsets/length.
+	 * Reject: any offset but 0 (replace)
+	 *	 Any size greater than attribute size limit
+ 	 */
+	if (uio->uio_offset != 0 ||
+	    uio->uio_resid > attribute->uele_fileheader.uef_size)
+		return (ENXIO);
+
+	/*
+	 * Find base offset of header in file based on file header size, and
+	 * data header size + maximum data size, indexed by inode number.
+	 */
+	base_offset = sizeof(struct ufs_extattr_fileheader) +
+	    ip->i_number * (sizeof(struct ufs_extattr_header) +
+	    attribute->uele_fileheader.uef_size);
+
+	/*
+	 * Write out a data header for the data.
+	 */
+	ueh.ueh_len = uio->uio_resid;
+	ueh.ueh_flags = UFS_EXTATTR_ATTR_FLAG_INUSE;
+	ueh.ueh_i_gen = ip->i_gen;
+	local_aiov.iov_base = (caddr_t) &ueh;
+	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
+	local_aio.uio_iov = &local_aiov;
+	local_aio.uio_iovcnt = 1;
+	local_aio.uio_rw = UIO_WRITE;
+	local_aio.uio_segflg = UIO_SYSSPACE;
+	local_aio.uio_td = td;
+	local_aio.uio_offset = base_offset;
+	local_aio.uio_resid = sizeof(struct ufs_extattr_header);
+
+	/*
+	 * Acquire locks.
+	 *
+	 * Don't need to get a lock on the backing file if the setattr is
+	 * being applied to the backing file, as the lock is already held.
+	 */
+	if (attribute->uele_backing_vnode != vp)
+		vn_lock(attribute->uele_backing_vnode, LK_EXCLUSIVE | LK_RETRY);
+
+	ioflag = IO_NODELOCKED;
+	if (ufs_extattr_sync)
+		ioflag |= IO_SYNC;
+	error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag,
+	    ump->um_extattr.uepm_ucred);
+	if (error)
+		goto vopunlock_exit;
+
+	if (local_aio.uio_resid != 0) {
+		error = ENXIO;
+		goto vopunlock_exit;
+	}
+
+	/*
+	 * Write out user data.
+	 */
+	uio->uio_offset = base_offset + sizeof(struct ufs_extattr_header);
+
+	ioflag = IO_NODELOCKED;
+	if (ufs_extattr_sync)
+		ioflag |= IO_SYNC;
+	error = VOP_WRITE(attribute->uele_backing_vnode, uio, ioflag,
+	    ump->um_extattr.uepm_ucred);
+
+vopunlock_exit:
+	uio->uio_offset = 0;
+
+	if (attribute->uele_backing_vnode != vp)
+		VOP_UNLOCK(attribute->uele_backing_vnode, 0);
+
+	return (error);
+}
+
+/*
+ * Real work associated with removing an extended attribute from a vnode.
+ * Assumes the attribute lock has already been grabbed.
+ */
+static int
+ufs_extattr_rm(struct vnode *vp, int attrnamespace, const char *name,
+    struct ucred *cred, struct thread *td)
+{
+	struct ufs_extattr_list_entry *attribute;
+	struct ufs_extattr_header ueh;
+	struct iovec local_aiov;
+	struct uio local_aio;
+	struct mount *mp = vp->v_mount;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct inode *ip = VTOI(vp);
+	off_t base_offset;
+	int error = 0, ioflag;
+
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)  
+		return (EROFS);
+	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
+		return (EOPNOTSUPP);
+	if (!ufs_extattr_valid_attrname(attrnamespace, name))
+		return (EINVAL);
+
+	error = extattr_check_cred(vp, attrnamespace, cred, td, VWRITE);
+	if (error)
+		return (error);
+
+	attribute = ufs_extattr_find_attr(ump, attrnamespace, name);
+	if (!attribute)
+		return (ENOATTR);
+
+	/*
+	 * Find base offset of header in file based on file header size, and
+	 * data header size + maximum data size, indexed by inode number.
+	 */
+	base_offset = sizeof(struct ufs_extattr_fileheader) +
+	    ip->i_number * (sizeof(struct ufs_extattr_header) +
+	    attribute->uele_fileheader.uef_size);
+
+	/*
+	 * Check to see if currently defined.
+	 */
+	bzero(&ueh, sizeof(struct ufs_extattr_header));
+
+	local_aiov.iov_base = (caddr_t) &ueh;
+	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
+	local_aio.uio_iov = &local_aiov;
+	local_aio.uio_iovcnt = 1;
+	local_aio.uio_rw = UIO_READ;
+	local_aio.uio_segflg = UIO_SYSSPACE;
+	local_aio.uio_td = td;
+	local_aio.uio_offset = base_offset;
+	local_aio.uio_resid = sizeof(struct ufs_extattr_header);
+
+	/*
+	 * Don't need to get the lock on the backing vnode if the vnode we're
+	 * modifying is it, as we already hold the lock.
+	 */
+	if (attribute->uele_backing_vnode != vp)
+		vn_lock(attribute->uele_backing_vnode, LK_EXCLUSIVE | LK_RETRY);
+
+	error = VOP_READ(attribute->uele_backing_vnode, &local_aio,
+	    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
+	if (error)
+		goto vopunlock_exit;
+
+	/* Defined? */
+	if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) {
+		error = ENOATTR;
+		goto vopunlock_exit;
+	}
+
+	/* Valid for the current inode generation? */
+	if (ueh.ueh_i_gen != ip->i_gen) {
+		/*
+		 * The inode itself has a different generation number than
+		 * the attribute data.  For now, the best solution is to
+		 * coerce this to undefined, and let it get cleaned up by
+		 * the next write or extattrctl clean.
+		 */
+		printf("ufs_extattr_rm (%s): inode number inconsistency (%d, %jd)\n",
+		    mp->mnt_stat.f_mntonname, ueh.ueh_i_gen, (intmax_t)ip->i_gen);
+		error = ENOATTR;
+		goto vopunlock_exit;
+	}
+
+	/* Flag it as not in use. */
+	ueh.ueh_flags = 0;
+	ueh.ueh_len = 0;
+
+	local_aiov.iov_base = (caddr_t) &ueh;
+	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
+	local_aio.uio_iov = &local_aiov;
+	local_aio.uio_iovcnt = 1;
+	local_aio.uio_rw = UIO_WRITE;
+	local_aio.uio_segflg = UIO_SYSSPACE;
+	local_aio.uio_td = td;
+	local_aio.uio_offset = base_offset;
+	local_aio.uio_resid = sizeof(struct ufs_extattr_header);
+
+	ioflag = IO_NODELOCKED;
+	if (ufs_extattr_sync)
+		ioflag |= IO_SYNC;
+	error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag,
+	    ump->um_extattr.uepm_ucred);
+	if (error)
+		goto vopunlock_exit;
+
+	if (local_aio.uio_resid != 0)
+		error = ENXIO;
+
+vopunlock_exit:
+	VOP_UNLOCK(attribute->uele_backing_vnode, 0);
+
+	return (error);
+}
+
+/*
+ * Called by UFS when an inode is no longer active and should have its
+ * attributes stripped.
+ */
+void
+ufs_extattr_vnode_inactive(struct vnode *vp, struct thread *td)
+{
+	struct ufs_extattr_list_entry *uele;
+	struct mount *mp = vp->v_mount;
+	struct ufsmount *ump = VFSTOUFS(mp);
+
+	/*
+	 * In that case, we cannot lock. We should not have any active vnodes
+	 * on the fs if this is not yet initialized but is going to be, so
+	 * this can go unlocked.
+	 */
+	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED))
+		return;
+
+	ufs_extattr_uepm_lock(ump);
+
+	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
+		ufs_extattr_uepm_unlock(ump);
+		return;
+	}
+
+	LIST_FOREACH(uele, &ump->um_extattr.uepm_list, uele_entries)
+		ufs_extattr_rm(vp, uele->uele_attrnamespace,
+		    uele->uele_attrname, NULL, td);
+
+	ufs_extattr_uepm_unlock(ump);
+}
+
+#endif /* !UFS_EXTATTR */
diff --git a/Dump/ufs/ufs/ufs_extern.h b/Dump/ufs/ufs/ufs_extern.h
new file mode 100644
index 0000000..ea2ee8a
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_extern.h
@@ -0,0 +1,127 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_extern.h	8.10 (Berkeley) 5/14/95
+ * $FreeBSD: releng/11.2/sys/ufs/ufs/ufs_extern.h 331722 2018-03-29 02:50:57Z eadler $
+ */
+
+#ifndef _UFS_UFS_EXTERN_H_
+#define	_UFS_UFS_EXTERN_H_
+
+struct componentname;
+struct direct;
+struct indir;
+struct inode;
+struct mount;
+struct thread;
+struct sockaddr;
+struct ucred;
+struct ufid;
+struct vfsconf;
+struct vnode;
+struct vop_bmap_args;
+struct vop_cachedlookup_args;
+struct vop_generic_args;
+struct vop_inactive_args;
+struct vop_reclaim_args;
+
+extern struct vop_vector ufs_fifoops;
+extern struct vop_vector ufs_vnodeops;
+
+int	 ufs_bmap(struct vop_bmap_args *);
+int	 ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *,
+	    struct buf *, int *, int *);
+int	 ufs_fhtovp(struct mount *, struct ufid *, int, struct vnode **);
+int	 ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *);
+void	 ufs_dirbad(struct inode *, doff_t, char *);
+int	 ufs_dirbadentry(struct vnode *, struct direct *, int);
+int	 ufs_dirempty(struct inode *, ino_t, struct ucred *);
+int	 ufs_extread(struct vop_read_args *);
+int	 ufs_extwrite(struct vop_write_args *);
+void	 ufs_makedirentry(struct inode *, struct componentname *,
+	    struct direct *);
+int	 ufs_direnter(struct vnode *, struct vnode *, struct direct *,
+	    struct componentname *, struct buf *, int);
+int	 ufs_dirremove(struct vnode *, struct inode *, int, int);
+int	 ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int);
+int	 ufs_lookup_ino(struct vnode *, struct vnode **, struct componentname *,
+	    ino_t *);
+int	 ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *);
+int	 ufs_inactive(struct vop_inactive_args *);
+int	 ufs_init(struct vfsconf *);
+void	 ufs_itimes(struct vnode *vp);
+int	 ufs_lookup(struct vop_cachedlookup_args *);
+void	 ufs_prepare_reclaim(struct vnode *vp);
+int	 ufs_readdir(struct vop_readdir_args *);
+int	 ufs_reclaim(struct vop_reclaim_args *);
+void	 ffs_snapgone(struct inode *);
+vfs_root_t ufs_root;
+int	 ufs_uninit(struct vfsconf *);
+int	 ufs_vinit(struct mount *, struct vop_vector *, struct vnode **);
+
+#include <sys/sysctl.h>
+SYSCTL_DECL(_vfs_ufs);
+
+/*
+ * Soft update function prototypes.
+ */
+int	softdep_setup_directory_add(struct buf *, struct inode *, off_t,
+	    ino_t, struct buf *, int);
+void	softdep_change_directoryentry_offset(struct buf *, struct inode *,
+	    caddr_t, caddr_t, caddr_t, int);
+void	softdep_setup_remove(struct buf *,struct inode *, struct inode *, int);
+void	softdep_setup_directory_change(struct buf *, struct inode *,
+	    struct inode *, ino_t, int);
+void	softdep_change_linkcnt(struct inode *);
+int	softdep_slowdown(struct vnode *);
+void	softdep_setup_create(struct inode *, struct inode *);
+void	softdep_setup_dotdot_link(struct inode *, struct inode *);
+void	softdep_setup_link(struct inode *, struct inode *);
+void	softdep_setup_mkdir(struct inode *, struct inode *);
+void	softdep_setup_rmdir(struct inode *, struct inode *);
+void	softdep_setup_unlink(struct inode *, struct inode *);
+void	softdep_revert_create(struct inode *, struct inode *);
+void	softdep_revert_link(struct inode *, struct inode *);
+void	softdep_revert_mkdir(struct inode *, struct inode *);
+void	softdep_revert_rmdir(struct inode *, struct inode *);
+
+/*
+ * Flags to low-level allocation routines.  The low 16-bits are reserved
+ * for IO_ flags from vnode.h.
+ *
+ * Note: The general vfs code typically limits the sequential heuristic
+ * count to 127.  See sequential_heuristic() in kern/vfs_vnops.c
+ */
+#define	BA_CLRBUF	0x00010000	/* Clear invalid areas of buffer. */
+#define	BA_METAONLY	0x00020000	/* Return indirect block buffer. */
+#define	BA_UNMAPPED	0x00040000	/* Do not mmap resulted buffer. */
+#define	BA_SEQMASK	0x7F000000	/* Bits holding seq heuristic. */
+#define	BA_SEQSHIFT	24
+#define	BA_SEQMAX	0x7F
+
+#endif /* !_UFS_UFS_EXTERN_H_ */
diff --git a/Dump/ufs/ufs/ufs_gjournal.c b/Dump/ufs/ufs/ufs_gjournal.c
new file mode 100644
index 0000000..fd4c584
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_gjournal.c
@@ -0,0 +1,141 @@
+/*-
+ * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ufs/ufs_gjournal.c 306627 2016-10-03 09:37:56Z kib $");
+
+#include "opt_ufs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/lock.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/gjournal.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * Change the number of unreferenced inodes.
+ */
+static int
+ufs_gjournal_modref(struct vnode *vp, int count)
+{
+	struct cg *cgp;
+	struct buf *bp;
+	ufs2_daddr_t cgbno;
+	int error, cg;
+	struct cdev *dev;
+	struct inode *ip;
+	struct ufsmount *ump;
+	struct fs *fs;
+	struct vnode *devvp;
+	ino_t ino;
+
+	ip = VTOI(vp);
+	ump = VFSTOUFS(vp->v_mount);
+	fs = ump->um_fs;
+	devvp = ump->um_devvp;
+	ino = ip->i_number;
+
+	cg = ino_to_cg(fs, ino);
+	if (devvp->v_type == VREG) {
+		/* devvp is a snapshot */
+		dev = VFSTOUFS(devvp->v_mount)->um_devvp->v_rdev;
+		cgbno = fragstoblks(fs, cgtod(fs, cg));
+	} else if (devvp->v_type == VCHR) {
+		/* devvp is a normal disk device */
+		dev = devvp->v_rdev;
+		cgbno = fsbtodb(fs, cgtod(fs, cg));
+	} else {
+		bp = NULL;
+		return (EIO);
+	}
+	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
+		panic("ufs_gjournal_modref: range: dev = %s, ino = %lu, fs = %s",
+		    devtoname(dev), (u_long)ino, fs->fs_fsmnt);
+	if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) {
+		brelse(bp);
+		return (error);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return (0);
+	}
+	bp->b_xflags |= BX_BKGRDWRITE;
+	cgp->cg_unrefs += count;
+	UFS_LOCK(ump);
+	fs->fs_unrefs += count;
+	fs->fs_fmod = 1;
+	ACTIVECLEAR(fs, cg);
+	UFS_UNLOCK(ump);
+	bdwrite(bp);
+	return (0);
+}
+
+void
+ufs_gjournal_orphan(struct vnode *vp)
+{
+	struct inode *ip;
+
+	if (vp->v_mount->mnt_gjprovider == NULL)
+		return;
+	if (vp->v_usecount < 2 || (vp->v_vflag & VV_DELETED))
+		return;
+	ip = VTOI(vp);
+	if ((vp->v_type == VDIR && ip->i_nlink > 2) ||
+	    (vp->v_type != VDIR && ip->i_nlink > 1)) {
+		return;
+	}
+	vp->v_vflag |= VV_DELETED;
+
+	ufs_gjournal_modref(vp, 1);
+}
+
+void
+ufs_gjournal_close(struct vnode *vp)
+{
+	struct inode *ip;
+
+	if (vp->v_mount->mnt_gjprovider == NULL)
+		return;
+	if (!(vp->v_vflag & VV_DELETED))
+		return;
+	ip = VTOI(vp);
+	if (ip->i_nlink > 0)
+		return;
+	ufs_gjournal_modref(vp, -1);
+}
diff --git a/Dump/ufs/ufs/ufs_inode.c b/Dump/ufs/ufs/ufs_inode.c
new file mode 100644
index 0000000..46a11d9
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_inode.c
@@ -0,0 +1,237 @@
+/*-
+ * Copyright (c) 1991, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_inode.c	8.9 (Berkeley) 5/14/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ufs/ufs_inode.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include "opt_quota.h"
+#include "opt_ufs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/lock.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+#ifdef UFS_DIRHASH
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/dirhash.h>
+#endif
+#ifdef UFS_GJOURNAL
+#include <ufs/ufs/gjournal.h>
+#endif
+
+/*
+ * Last reference to an inode.  If necessary, write or delete it.
+ */
+int
+ufs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	mode_t mode;
+	int error = 0;
+	off_t isize;
+	struct mount *mp;
+
+	mp = NULL;
+	/*
+	 * Ignore inodes related to stale file handles.
+	 */
+	if (ip->i_mode == 0)
+		goto out;
+#ifdef UFS_GJOURNAL
+	ufs_gjournal_close(vp);
+#endif
+#ifdef QUOTA
+	/*
+	 * Before moving off the active list, we must be sure that
+	 * any modified quotas have been pushed since these will no
+	 * longer be checked once the vnode is on the inactive list.
+	 */
+	qsyncvp(vp);
+#endif
+	if ((ip->i_effnlink == 0 && DOINGSOFTDEP(vp)) ||
+	    (ip->i_nlink <= 0 && !UFS_RDONLY(ip))) {
+	loop:
+		if (vn_start_secondary_write(vp, &mp, V_NOWAIT) != 0) {
+			/* Cannot delete file while file system is suspended */
+			if ((vp->v_iflag & VI_DOOMED) != 0) {
+				/* Cannot return before file is deleted */
+				(void) vn_start_secondary_write(vp, &mp,
+								V_WAIT);
+			} else {
+				MNT_ILOCK(mp);
+				if ((mp->mnt_kern_flag &
+				     (MNTK_SUSPEND2 | MNTK_SUSPENDED)) == 0) {
+					MNT_IUNLOCK(mp);
+					goto loop;
+				}
+				/*
+				 * Fail to inactivate vnode now and
+				 * let ffs_snapshot() clean up after
+				 * it has resumed the file system.
+				 */
+				VI_LOCK(vp);
+				vp->v_iflag |= VI_OWEINACT;
+				VI_UNLOCK(vp);
+				MNT_IUNLOCK(mp);
+				return (0);
+			}
+		}
+	}
+	isize = ip->i_size;
+	if (I_IS_UFS2(ip))
+		isize += ip->i_din2->di_extsize;
+	if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip))
+		error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL, NOCRED);
+	if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) {
+#ifdef QUOTA
+		if (!getinoquota(ip))
+			(void)chkiq(ip, -1, NOCRED, FORCE);
+#endif
+#ifdef UFS_EXTATTR
+		ufs_extattr_vnode_inactive(vp, ap->a_td);
+#endif
+		/*
+		 * Setting the mode to zero needs to wait for the inode
+		 * to be written just as does a change to the link count.
+		 * So, rather than creating a new entry point to do the
+		 * same thing, we just use softdep_change_linkcnt().
+		 */
+		DIP_SET(ip, i_rdev, 0);
+		mode = ip->i_mode;
+		ip->i_mode = 0;
+		DIP_SET(ip, i_mode, 0);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		if (DOINGSOFTDEP(vp))
+			softdep_change_linkcnt(ip);
+		UFS_VFREE(vp, ip->i_number, mode);
+	}
+	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
+		if ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
+		    mp == NULL &&
+		    vn_start_secondary_write(vp, &mp, V_NOWAIT)) {
+			mp = NULL;
+			ip->i_flag &= ~IN_ACCESS;
+		} else {
+			if (mp == NULL)
+				(void) vn_start_secondary_write(vp, &mp,
+								V_WAIT);
+			UFS_UPDATE(vp, 0);
+		}
+	}
+out:
+	/*
+	 * If we are done with the inode, reclaim it
+	 * so that it can be reused immediately.
+	 */
+	if (ip->i_mode == 0)
+		vrecycle(vp);
+	if (mp != NULL)
+		vn_finished_secondary_write(mp);
+	return (error);
+}
+
+void
+ufs_prepare_reclaim(struct vnode *vp)
+{
+	struct inode *ip;
+#ifdef QUOTA
+	int i;
+#endif
+
+	ip = VTOI(vp);
+
+	vnode_destroy_vobject(vp);
+#ifdef QUOTA
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (ip->i_dquot[i] != NODQUOT) {
+			dqrele(vp, ip->i_dquot[i]);
+			ip->i_dquot[i] = NODQUOT;
+		}
+	}
+#endif
+#ifdef UFS_DIRHASH
+	if (ip->i_dirhash != NULL)
+		ufsdirhash_free(ip);
+#endif
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+ufs_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+
+	ufs_prepare_reclaim(vp);
+
+	if (ip->i_flag & IN_LAZYMOD)
+		ip->i_flag |= IN_MODIFIED;
+	UFS_UPDATE(vp, 0);
+	/*
+	 * Remove the inode from its hash chain.
+	 */
+	vfs_hash_remove(vp);
+
+	/*
+	 * Lock the clearing of v_data so ffs_lock() can inspect it
+	 * prior to obtaining the lock.
+	 */
+	VI_LOCK(vp);
+	vp->v_data = 0;
+	VI_UNLOCK(vp);
+	UFS_IFREE(ITOUMP(ip), ip);
+	return (0);
+}
diff --git a/Dump/ufs/ufs/ufs_lookup.c b/Dump/ufs/ufs/ufs_lookup.c
new file mode 100644
index 0000000..5c9967b
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_lookup.c
@@ -0,0 +1,1486 @@
+/*-
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_lookup.c	8.15 (Berkeley) 6/16/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ufs/ufs_lookup.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include "opt_ufs.h"
+#include "opt_quota.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/namei.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#ifdef UFS_DIRHASH
+#include <ufs/ufs/dirhash.h>
+#endif
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#ifdef DIAGNOSTIC
+static int	dirchk = 1;
+#else
+static int	dirchk = 0;
+#endif
+
+SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, "");
+
+/* true if old FS format...*/
+#define OFSFMT(vp)	((vp)->v_mount->mnt_maxsymlinklen <= 0)
+
+static int
+ufs_delete_denied(struct vnode *vdp, struct vnode *tdp, struct ucred *cred,
+    struct thread *td)
+{
+	int error;
+
+#ifdef UFS_ACL
+	/*
+	 * NFSv4 Minor Version 1, draft-ietf-nfsv4-minorversion1-03.txt
+	 *
+	 * 3.16.2.1. ACE4_DELETE vs. ACE4_DELETE_CHILD
+	 */
+
+	/*
+	 * XXX: Is this check required?
+	 */
+	error = VOP_ACCESS(vdp, VEXEC, cred, td);
+	if (error)
+		return (error);
+
+	error = VOP_ACCESSX(tdp, VDELETE, cred, td);
+	if (error == 0)
+		return (0);
+
+	error = VOP_ACCESSX(vdp, VDELETE_CHILD, cred, td);
+	if (error == 0)
+		return (0);
+
+	error = VOP_ACCESSX(vdp, VEXPLICIT_DENY | VDELETE_CHILD, cred, td);
+	if (error)
+		return (error);
+
+#endif /* !UFS_ACL */
+
+	/*
+	 * Standard Unix access control - delete access requires VWRITE.
+	 */
+	error = VOP_ACCESS(vdp, VWRITE, cred, td);
+	if (error)
+		return (error);
+
+	/*
+	 * If directory is "sticky", then user must own
+	 * the directory, or the file in it, else she
+	 * may not delete it (unless she's root). This
+	 * implements append-only directories.
+	 */
+	if ((VTOI(vdp)->i_mode & ISVTX) &&
+	    VOP_ACCESS(vdp, VADMIN, cred, td) &&
+	    VOP_ACCESS(tdp, VADMIN, cred, td))
+		return (EPERM);
+
+	return (0);
+}
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the filesystem is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
+ * on whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".".  When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and vput
+ * instead of two vputs.
+ *
+ * This routine is actually used as VOP_CACHEDLOOKUP method, and the
+ * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP
+ * method.
+ *
+ * vfs_cache_lookup() performs the following for us:
+ *	check that it is a directory
+ *	check accessibility of directory
+ *	check for modification attempts on read-only mounts
+ *	if name found in cache
+ *	    if at end of path and deleting or creating
+ *		drop it
+ *	     else
+ *		return name.
+ *	return VOP_CACHEDLOOKUP()
+ *
+ * Overall outline of ufs_lookup:
+ *
+ *	search for name in directory, to found or notfound
+ * notfound:
+ *	if creating, return locked directory, leaving info on available slots
+ *	else return error
+ * found:
+ *	if at end of path and deleting, return information to allow delete
+ *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ *	  inode and return info to allow rewrite
+ *	if not at end, add name to cache; if at end and neither creating
+ *	  nor deleting, add name to cache
+ */
+int
+ufs_lookup(ap)
+	struct vop_cachedlookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+
+	return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
+}
+
+int
+ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
+    ino_t *dd_ino)
+{
+	struct inode *dp;		/* inode for directory being searched */
+	struct buf *bp;			/* a buffer of directory entries */
+	struct direct *ep;		/* the current directory entry */
+	int entryoffsetinblock;		/* offset of ep in bp's buffer */
+	enum {NONE, COMPACT, FOUND} slotstatus;
+	doff_t slotoffset;		/* offset of area with free space */
+	doff_t i_diroff;		/* cached i_diroff value. */
+	doff_t i_offset;		/* cached i_offset value. */
+	int slotsize;			/* size of area at slotoffset */
+	int slotfreespace;		/* amount of space free in slot */
+	int slotneeded;			/* size of the entry we're seeking */
+	int numdirpasses;		/* strategy for directory search */
+	doff_t endsearch;		/* offset to end directory search */
+	doff_t prevoff;			/* prev entry dp->i_offset */
+	struct vnode *pdp;		/* saved dp during symlink work */
+	struct vnode *tdp;		/* returned by VFS_VGET */
+	doff_t enduseful;		/* pointer past last used dir slot */
+	u_long bmask;			/* block offset mask */
+	int namlen, error;
+	struct ucred *cred = cnp->cn_cred;
+	int flags = cnp->cn_flags;
+	int nameiop = cnp->cn_nameiop;
+	ino_t ino, ino1;
+	int ltype;
+
+	if (vpp != NULL)
+		*vpp = NULL;
+
+	dp = VTOI(vdp);
+	if (dp->i_effnlink == 0)
+		return (ENOENT);
+
+	/*
+	 * Create a vm object if vmiodirenable is enabled.
+	 * Alternatively we could call vnode_create_vobject
+	 * in VFS_VGET but we could end up creating objects
+	 * that are never used.
+	 */
+	vnode_create_vobject(vdp, DIP(dp, i_size), cnp->cn_thread);
+
+	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
+
+#ifdef DEBUG_VFS_LOCKS
+	/*
+	 * Assert that the directory vnode is locked, and locked
+	 * exclusively for the last component lookup for modifying
+	 * operations.
+	 *
+	 * The directory-modifying operations need to save
+	 * intermediate state in the inode between namei() call and
+	 * actual directory manipulations.  See fields in the struct
+	 * inode marked as 'used during directory lookup'.  We must
+	 * ensure that upgrade in namei() does not happen, since
+	 * upgrade might need to unlock vdp.  If quotas are enabled,
+	 * getinoquota() also requires exclusive lock to modify inode.
+	 */
+	ASSERT_VOP_LOCKED(vdp, "ufs_lookup1");
+	if ((nameiop == CREATE || nameiop == DELETE || nameiop == RENAME) &&
+	    (flags & (LOCKPARENT | ISLASTCN)) == (LOCKPARENT | ISLASTCN))
+		ASSERT_VOP_ELOCKED(vdp, "ufs_lookup2");
+#endif
+
+restart:
+	bp = NULL;
+	slotoffset = -1;
+
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 *
+	 * Suppress search for slots unless creating
+	 * file and at end of pathname, in which case
+	 * we watch for a place to put the new file in
+	 * case it doesn't already exist.
+	 */
+	ino = 0;
+	i_diroff = dp->i_diroff;
+	slotstatus = FOUND;
+	slotfreespace = slotsize = slotneeded = 0;
+	if ((nameiop == CREATE || nameiop == RENAME) &&
+	    (flags & ISLASTCN)) {
+		slotstatus = NONE;
+		slotneeded = DIRECTSIZ(cnp->cn_namelen);
+	}
+
+#ifdef UFS_DIRHASH
+	/*
+	 * Use dirhash for fast operations on large directories. The logic
+	 * to determine whether to hash the directory is contained within
+	 * ufsdirhash_build(); a zero return means that it decided to hash
+	 * this directory and it successfully built up the hash table.
+	 */
+	if (ufsdirhash_build(dp) == 0) {
+		/* Look for a free slot if needed. */
+		enduseful = dp->i_size;
+		if (slotstatus != FOUND) {
+			slotoffset = ufsdirhash_findfree(dp, slotneeded,
+			    &slotsize);
+			if (slotoffset >= 0) {
+				slotstatus = COMPACT;
+				enduseful = ufsdirhash_enduseful(dp);
+				if (enduseful < 0)
+					enduseful = dp->i_size;
+			}
+		}
+		/* Look up the component. */
+		numdirpasses = 1;
+		entryoffsetinblock = 0; /* silence compiler warning */
+		switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
+		    &i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) {
+		case 0:
+			ep = (struct direct *)((char *)bp->b_data +
+			    (i_offset & bmask));
+			goto foundentry;
+		case ENOENT:
+			i_offset = roundup2(dp->i_size, DIRBLKSIZ);
+			goto notfound;
+		default:
+			/* Something failed; just do a linear search. */
+			break;
+		}
+	}
+#endif /* UFS_DIRHASH */
+	/*
+	 * If there is cached information on a previous search of
+	 * this directory, pick up where we last left off.
+	 * We cache only lookups as these are the most common
+	 * and have the greatest payoff. Caching CREATE has little
+	 * benefit as it usually must search the entire directory
+	 * to determine that the entry does not exist. Caching the
+	 * location of the last DELETE or RENAME has not reduced
+	 * profiling time and hence has been removed in the interest
+	 * of simplicity.
+	 */
+	if (nameiop != LOOKUP || i_diroff == 0 || i_diroff >= dp->i_size) {
+		entryoffsetinblock = 0;
+		i_offset = 0;
+		numdirpasses = 1;
+	} else {
+		i_offset = i_diroff;
+		if ((entryoffsetinblock = i_offset & bmask) &&
+		    (error = UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp)))
+			return (error);
+		numdirpasses = 2;
+		nchstats.ncs_2passes++;
+	}
+	prevoff = i_offset;
+	endsearch = roundup2(dp->i_size, DIRBLKSIZ);
+	enduseful = 0;
+
+searchloop:
+	while (i_offset < endsearch) {
+		/*
+		 * If necessary, get the next directory block.
+		 */
+		if ((i_offset & bmask) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			error =
+			    UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp);
+			if (error)
+				return (error);
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * If still looking for a slot, and at a DIRBLKSIZE
+		 * boundary, have to start looking for free space again.
+		 */
+		if (slotstatus == NONE &&
+		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
+			slotoffset = -1;
+			slotfreespace = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 * Full validation checks are slow, so we only check
+		 * enough to insure forward progress through the
+		 * directory. Complete checks can be run by patching
+		 * "dirchk" to be true.
+		 */
+		ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
+		if (ep->d_reclen == 0 || ep->d_reclen >
+		    DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
+		    (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) {
+			int i;
+
+			ufs_dirbad(dp, i_offset, "mangled entry");
+			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
+			i_offset += i;
+			entryoffsetinblock += i;
+			continue;
+		}
+
+		/*
+		 * If an appropriate sized slot has not yet been found,
+		 * check to see if one is available. Also accumulate space
+		 * in the current block so that we can determine if
+		 * compaction is viable.
+		 */
+		if (slotstatus != FOUND) {
+			int size = ep->d_reclen;
+
+			if (ep->d_ino != 0)
+				size -= DIRSIZ(OFSFMT(vdp), ep);
+			if (size > 0) {
+				if (size >= slotneeded) {
+					slotstatus = FOUND;
+					slotoffset = i_offset;
+					slotsize = ep->d_reclen;
+				} else if (slotstatus == NONE) {
+					slotfreespace += size;
+					if (slotoffset == -1)
+						slotoffset = i_offset;
+					if (slotfreespace >= slotneeded) {
+						slotstatus = COMPACT;
+						slotsize = i_offset +
+						      ep->d_reclen - slotoffset;
+					}
+				}
+			}
+		}
+
+		/*
+		 * Check for a name match.
+		 */
+		if (ep->d_ino) {
+#			if (BYTE_ORDER == LITTLE_ENDIAN)
+				if (OFSFMT(vdp))
+					namlen = ep->d_type;
+				else
+					namlen = ep->d_namlen;
+#			else
+				namlen = ep->d_namlen;
+#			endif
+			if (namlen == cnp->cn_namelen &&
+				(cnp->cn_nameptr[0] == ep->d_name[0]) &&
+			    !bcmp(cnp->cn_nameptr, ep->d_name,
+				(unsigned)namlen)) {
+#ifdef UFS_DIRHASH
+foundentry:
+#endif
+				/*
+				 * Save directory entry's inode number and
+				 * reclen in ndp->ni_ufs area, and release
+				 * directory buffer.
+				 */
+				if (vdp->v_mount->mnt_maxsymlinklen > 0 &&
+				    ep->d_type == DT_WHT) {
+					slotstatus = FOUND;
+					slotoffset = i_offset;
+					slotsize = ep->d_reclen;
+					enduseful = dp->i_size;
+					cnp->cn_flags |= ISWHITEOUT;
+					numdirpasses--;
+					goto notfound;
+				}
+				ino = ep->d_ino;
+				goto found;
+			}
+		}
+		prevoff = i_offset;
+		i_offset += ep->d_reclen;
+		entryoffsetinblock += ep->d_reclen;
+		if (ep->d_ino)
+			enduseful = i_offset;
+	}
+notfound:
+	/*
+	 * If we started in the middle of the directory and failed
+	 * to find our target, we must check the beginning as well.
+	 */
+	if (numdirpasses == 2) {
+		numdirpasses--;
+		i_offset = 0;
+		endsearch = i_diroff;
+		goto searchloop;
+	}
+	if (bp != NULL)
+		brelse(bp);
+	/*
+	 * If creating, and at end of pathname and current
+	 * directory has not been removed, then can consider
+	 * allowing file to be created.
+	 */
+	if ((nameiop == CREATE || nameiop == RENAME ||
+	     (nameiop == DELETE &&
+	      (cnp->cn_flags & DOWHITEOUT) &&
+	      (cnp->cn_flags & ISWHITEOUT))) &&
+	    (flags & ISLASTCN) && dp->i_effnlink != 0) {
+		/*
+		 * Access for write is interpreted as allowing
+		 * creation of files in the directory.
+		 *
+		 * XXX: Fix the comment above.
+		 */
+		if (flags & WILLBEDIR)
+			error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread);
+		else
+			error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread);
+		if (error)
+			return (error);
+		/*
+		 * Return an indication of where the new directory
+		 * entry should be put.  If we didn't find a slot,
+		 * then set dp->i_count to 0 indicating
+		 * that the new slot belongs at the end of the
+		 * directory. If we found a slot, then the new entry
+		 * can be put in the range from dp->i_offset to
+		 * dp->i_offset + dp->i_count.
+		 */
+		if (slotstatus == NONE) {
+			dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ);
+			dp->i_count = 0;
+			enduseful = dp->i_offset;
+		} else if (nameiop == DELETE) {
+			dp->i_offset = slotoffset;
+			if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
+				dp->i_count = 0;
+			else
+				dp->i_count = dp->i_offset - prevoff;
+		} else {
+			dp->i_offset = slotoffset;
+			dp->i_count = slotsize;
+			if (enduseful < slotoffset + slotsize)
+				enduseful = slotoffset + slotsize;
+		}
+		dp->i_endoff = roundup2(enduseful, DIRBLKSIZ);
+		/*
+		 * We return with the directory locked, so that
+		 * the parameters we set up above will still be
+		 * valid if we actually decide to do a direnter().
+		 * We return ni_vp == NULL to indicate that the entry
+		 * does not currently exist; we leave a pointer to
+		 * the (locked) directory inode in ndp->ni_dvp.
+		 * The pathname buffer is saved so that the name
+		 * can be obtained later.
+		 *
+		 * NB - if the directory is unlocked, then this
+		 * information cannot be used.
+		 */
+		cnp->cn_flags |= SAVENAME;
+		return (EJUSTRETURN);
+	}
+	/*
+	 * Insert name into cache (as non-existent) if appropriate.
+	 */
+	if ((cnp->cn_flags & MAKEENTRY) != 0)
+		cache_enter(vdp, NULL, cnp);
+	return (ENOENT);
+
+found:
+	if (dd_ino != NULL)
+		*dd_ino = ino;
+	if (numdirpasses == 2)
+		nchstats.ncs_pass2++;
+	/*
+	 * Check that directory length properly reflects presence
+	 * of this entry.
+	 */
+	if (i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) {
+		ufs_dirbad(dp, i_offset, "i_size too small");
+		dp->i_size = i_offset + DIRSIZ(OFSFMT(vdp), ep);
+		DIP_SET(dp, i_size, dp->i_size);
+		dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	brelse(bp);
+
+	/*
+	 * Found component in pathname.
+	 * If the final component of path name, save information
+	 * in the cache as to where the entry was found.
+	 */
+	if ((flags & ISLASTCN) && nameiop == LOOKUP)
+		dp->i_diroff = rounddown2(i_offset, DIRBLKSIZ);
+
+	/*
+	 * If deleting, and at end of pathname, return
+	 * parameters which can be used to remove file.
+	 */
+	if (nameiop == DELETE && (flags & ISLASTCN)) {
+		if (flags & LOCKPARENT)
+			ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
+		/*
+		 * Return pointer to current entry in dp->i_offset,
+		 * and distance past previous entry (if there
+		 * is a previous entry in this block) in dp->i_count.
+		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
+		 *
+		 * Technically we shouldn't be setting these in the
+		 * WANTPARENT case (first lookup in rename()), but any
+		 * lookups that will result in directory changes will
+		 * overwrite these.
+		 */
+		dp->i_offset = i_offset;
+		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
+			dp->i_count = 0;
+		else
+			dp->i_count = dp->i_offset - prevoff;
+		if (dd_ino != NULL)
+			return (0);
+		if ((error = VFS_VGET(vdp->v_mount, ino,
+		    LK_EXCLUSIVE, &tdp)) != 0)
+			return (error);
+		error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
+		if (error) {
+			vput(tdp);
+			return (error);
+		}
+		if (dp->i_number == ino) {
+			VREF(vdp);
+			*vpp = vdp;
+			vput(tdp);
+			return (0);
+		}
+
+		*vpp = tdp;
+		return (0);
+	}
+
+	/*
+	 * If rewriting (RENAME), return the inode and the
+	 * information required to rewrite the present directory
+	 * Must get inode of directory entry to verify it's a
+	 * regular file, or empty directory.
+	 */
+	if (nameiop == RENAME && (flags & ISLASTCN)) {
+		if (flags & WILLBEDIR)
+			error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread);
+		else
+			error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread);
+		if (error)
+			return (error);
+		/*
+		 * Careful about locking second inode.
+		 * This can only occur if the target is ".".
+		 */
+		dp->i_offset = i_offset;
+		if (dp->i_number == ino)
+			return (EISDIR);
+		if (dd_ino != NULL)
+			return (0);
+		if ((error = VFS_VGET(vdp->v_mount, ino,
+		    LK_EXCLUSIVE, &tdp)) != 0)
+			return (error);
+
+		error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
+		if (error) {
+			vput(tdp);
+			return (error);
+		}
+
+#ifdef SunOS_doesnt_do_that
+		/*
+		 * The only purpose of this check is to return the correct
+		 * error.  Assume that we want to rename directory "a"
+		 * to a file "b", and that we have no ACL_WRITE_DATA on
+		 * a containing directory, but we _do_ have ACL_APPEND_DATA. 
+		 * In that case, the VOP_ACCESS check above will return 0,
+		 * and the operation will fail with ENOTDIR instead
+		 * of EACCESS.
+		 */
+		if (tdp->v_type == VDIR)
+			error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread);
+		else
+			error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread);
+		if (error) {
+			vput(tdp);
+			return (error);
+		}
+#endif
+
+		*vpp = tdp;
+		cnp->cn_flags |= SAVENAME;
+		return (0);
+	}
+	if (dd_ino != NULL)
+		return (0);
+
+	/*
+	 * Step through the translation in the name.  We do not `vput' the
+	 * directory because we may need it again if a symbolic link
+	 * is relative to the current directory.  Instead we save it
+	 * unlocked as "pdp".  We must get the target inode before unlocking
+	 * the directory to insure that the inode will not be removed
+	 * before we get it.  We prevent deadlock by always fetching
+	 * inodes from the root, moving down the directory tree. Thus
+	 * when following backward pointers ".." we must unlock the
+	 * parent directory before getting the requested directory.
+	 * There is a potential race condition here if both the current
+	 * and parent directories are removed before the VFS_VGET for the
+	 * inode associated with ".." returns.  We hope that this occurs
+	 * infrequently since we cannot avoid this race condition without
+	 * implementing a sophisticated deadlock detection algorithm.
+	 * Note also that this simple deadlock detection scheme will not
+	 * work if the filesystem has any hard links other than ".."
+	 * that point backwards in the directory structure.
+	 */
+	pdp = vdp;
+	if (flags & ISDOTDOT) {
+		error = vn_vget_ino(pdp, ino, cnp->cn_lkflags, &tdp);
+		if (error)
+			return (error);
+
+		/*
+		 * Recheck that ".." entry in the vdp directory points
+		 * to the inode we looked up before vdp lock was
+		 * dropped.
+		 */
+		error = ufs_lookup_ino(pdp, NULL, cnp, &ino1);
+		if (error) {
+			vput(tdp);
+			return (error);
+		}
+		if (ino1 != ino) {
+			vput(tdp);
+			goto restart;
+		}
+
+		*vpp = tdp;
+	} else if (dp->i_number == ino) {
+		VREF(vdp);	/* we want ourself, ie "." */
+		/*
+		 * When we lookup "." we still can be asked to lock it
+		 * differently.
+		 */
+		ltype = cnp->cn_lkflags & LK_TYPE_MASK;
+		if (ltype != VOP_ISLOCKED(vdp)) {
+			if (ltype == LK_EXCLUSIVE)
+				vn_lock(vdp, LK_UPGRADE | LK_RETRY);
+			else /* if (ltype == LK_SHARED) */
+				vn_lock(vdp, LK_DOWNGRADE | LK_RETRY);
+			/*
+			 * Relock for the "." case may left us with
+			 * reclaimed vnode.
+			 */
+			if (vdp->v_iflag & VI_DOOMED) {
+				vrele(vdp);
+				return (ENOENT);
+			}
+		}
+		*vpp = vdp;
+	} else {
+		error = VFS_VGET(pdp->v_mount, ino, cnp->cn_lkflags, &tdp);
+		if (error)
+			return (error);
+		*vpp = tdp;
+	}
+
+	/*
+	 * Insert name into cache if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	return (0);
+}
+
+void
+ufs_dirbad(ip, offset, how)
+	struct inode *ip;
+	doff_t offset;
+	char *how;
+{
+	struct mount *mp;
+
+	mp = ITOV(ip)->v_mount;
+	if ((mp->mnt_flag & MNT_RDONLY) == 0)
+		panic("ufs_dirbad: %s: bad dir ino %ju at offset %ld: %s",
+		    mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
+		    (long)offset, how);
+	else
+		(void)printf("%s: bad dir ino %ju at offset %ld: %s\n",
+		    mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
+		    (long)offset, how);
+}
+
+/*
+ * Do consistency checking on a directory entry:
+ *	record length must be multiple of 4
+ *	entry must fit in rest of its DIRBLKSIZ block
+ *	record must be large enough to contain entry
+ *	name is not longer than MAXNAMLEN
+ *	name must be as long as advertised, and null terminated
+ */
+int
+ufs_dirbadentry(dp, ep, entryoffsetinblock)
+	struct vnode *dp;
+	struct direct *ep;
+	int entryoffsetinblock;
+{
+	int i, namlen;
+
+#	if (BYTE_ORDER == LITTLE_ENDIAN)
+		if (OFSFMT(dp))
+			namlen = ep->d_type;
+		else
+			namlen = ep->d_namlen;
+#	else
+		namlen = ep->d_namlen;
+#	endif
+	if ((ep->d_reclen & 0x3) != 0 ||
+	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
+	    ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) {
+		/*return (1); */
+		printf("First bad\n");
+		goto bad;
+	}
+	if (ep->d_ino == 0)
+		return (0);
+	for (i = 0; i < namlen; i++)
+		if (ep->d_name[i] == '\0') {
+			/*return (1); */
+			printf("Second bad\n");
+			goto bad;
+		}
+	if (ep->d_name[i])
+		goto bad;
+	return (0);
+bad:
+	return (1);
+}
+
+/*
+ * Construct a new directory entry after a call to namei, using the
+ * parameters that it left in the componentname argument cnp. The
+ * argument ip is the inode to which the new directory entry will refer.
+ */
+void
+ufs_makedirentry(ip, cnp, newdirp)
+	struct inode *ip;
+	struct componentname *cnp;
+	struct direct *newdirp;
+{
+
+#ifdef INVARIANTS
+	if ((cnp->cn_flags & SAVENAME) == 0)
+		panic("ufs_makedirentry: missing name");
+#endif
+	newdirp->d_ino = ip->i_number;
+	newdirp->d_namlen = cnp->cn_namelen;
+	bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1);
+	if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0)
+		newdirp->d_type = IFTODT(ip->i_mode);
+	else {
+		newdirp->d_type = 0;
+#		if (BYTE_ORDER == LITTLE_ENDIAN)
+			{ u_char tmp = newdirp->d_namlen;
+			newdirp->d_namlen = newdirp->d_type;
+			newdirp->d_type = tmp; }
+#		endif
+	}
+}
+
+/*
+ * Write a directory entry after a call to namei, using the parameters
+ * that it left in nameidata. The argument dirp is the new directory
+ * entry contents. Dvp is a pointer to the directory to be written,
+ * which was left locked by namei. Remaining parameters (dp->i_offset, 
+ * dp->i_count) indicate how the space for the new entry is to be obtained.
+ * Non-null bp indicates that a directory is being created (for the
+ * soft dependency code).
+ */
+int
+ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename)
+	struct vnode *dvp;
+	struct vnode *tvp;
+	struct direct *dirp;
+	struct componentname *cnp;
+	struct buf *newdirbp;
+	int isrename;
+{
+	struct ucred *cr;
+	struct thread *td;
+	int newentrysize;
+	struct inode *dp;
+	struct buf *bp;
+	u_int dsize;
+	struct direct *ep, *nep;
+	u_int64_t old_isize;
+	int error, ret, blkoff, loc, spacefree, flags, namlen;
+	char *dirbuf;
+
+	td = curthread;	/* XXX */
+	cr = td->td_ucred;
+
+	dp = VTOI(dvp);
+	newentrysize = DIRSIZ(OFSFMT(dvp), dirp);
+
+	if (dp->i_count == 0) {
+		/*
+		 * If dp->i_count is 0, then namei could find no
+		 * space in the directory. Here, dp->i_offset will
+		 * be on a directory block boundary and we will write the
+		 * new entry into a fresh block.
+		 */
+		if (dp->i_offset & (DIRBLKSIZ - 1))
+			panic("ufs_direnter: newblk");
+		flags = BA_CLRBUF;
+		if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp))
+			flags |= IO_SYNC;
+#ifdef QUOTA
+		if ((error = getinoquota(dp)) != 0) {
+			if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
+				bdwrite(newdirbp);
+			return (error);
+		}
+#endif
+		old_isize = dp->i_size;
+		vnode_pager_setsize(dvp, (u_long)dp->i_offset + DIRBLKSIZ);
+		if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
+		    cr, flags, &bp)) != 0) {
+			if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
+				bdwrite(newdirbp);
+			vnode_pager_setsize(dvp, (u_long)old_isize);
+			return (error);
+		}
+		dp->i_size = dp->i_offset + DIRBLKSIZ;
+		DIP_SET(dp, i_size, dp->i_size);
+		dp->i_endoff = dp->i_size;
+		dp->i_flag |= IN_CHANGE | IN_UPDATE;
+		dirp->d_reclen = DIRBLKSIZ;
+		blkoff = dp->i_offset &
+		    (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
+		bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize);
+#ifdef UFS_DIRHASH
+		if (dp->i_dirhash != NULL) {
+			ufsdirhash_newblk(dp, dp->i_offset);
+			ufsdirhash_add(dp, dirp, dp->i_offset);
+			ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff,
+			    dp->i_offset);
+		}
+#endif
+		if (DOINGSOFTDEP(dvp)) {
+			/*
+			 * Ensure that the entire newly allocated block is a
+			 * valid directory so that future growth within the
+			 * block does not have to ensure that the block is
+			 * written before the inode.
+			 */
+			blkoff += DIRBLKSIZ;
+			while (blkoff < bp->b_bcount) {
+				((struct direct *)
+				   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
+				blkoff += DIRBLKSIZ;
+			}
+			if (softdep_setup_directory_add(bp, dp, dp->i_offset,
+			    dirp->d_ino, newdirbp, 1))
+				dp->i_flag |= IN_NEEDSYNC;
+			if (newdirbp)
+				bdwrite(newdirbp);
+			bdwrite(bp);
+			if ((dp->i_flag & IN_NEEDSYNC) == 0)
+				return (UFS_UPDATE(dvp, 0));
+			/*
+			 * We have just allocated a directory block in an
+			 * indirect block.  We must prevent holes in the
+			 * directory created if directory entries are
+			 * written out of order.  To accomplish this we
+			 * fsync when we extend a directory into indirects.
+			 * During rename it's not safe to drop the tvp lock
+			 * so sync must be delayed until it is.
+			 *
+			 * This synchronous step could be removed if fsck and
+			 * the kernel were taught to fill in sparse
+			 * directories rather than panic.
+			 */
+			if (isrename)
+				return (0);
+			if (tvp != NULL)
+				VOP_UNLOCK(tvp, 0);
+			(void) VOP_FSYNC(dvp, MNT_WAIT, td);
+			if (tvp != NULL)
+				vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
+			return (error);
+		}
+		if (DOINGASYNC(dvp)) {
+			bdwrite(bp);
+			return (UFS_UPDATE(dvp, 0));
+		}
+		error = bwrite(bp);
+		ret = UFS_UPDATE(dvp, 1);
+		if (error == 0)
+			return (ret);
+		return (error);
+	}
+
+	/*
+	 * If dp->i_count is non-zero, then namei found space for the new
+	 * entry in the range dp->i_offset to dp->i_offset + dp->i_count
+	 * in the directory. To use this space, we may have to compact
+	 * the entries located there, by copying them together towards the
+	 * beginning of the block, leaving the free space in one usable
+	 * chunk at the end.
+	 */
+
+	/*
+	 * Increase size of directory if entry eats into new space.
+	 * This should never push the size past a new multiple of
+	 * DIRBLKSIZE.
+	 *
+	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
+	 */
+	if (dp->i_offset + dp->i_count > dp->i_size) {
+		dp->i_size = dp->i_offset + dp->i_count;
+		DIP_SET(dp, i_size, dp->i_size);
+	}
+	/*
+	 * Get the block containing the space for the new directory entry.
+	 */
+	error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp);
+	if (error) {
+		if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
+			bdwrite(newdirbp);
+		return (error);
+	}
+	/*
+	 * Find space for the new entry. In the simple case, the entry at
+	 * offset base will have the space. If it does not, then namei
+	 * arranged that compacting the region dp->i_offset to
+	 * dp->i_offset + dp->i_count would yield the space.
+	 */
+	ep = (struct direct *)dirbuf;
+	dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0;
+	spacefree = ep->d_reclen - dsize;
+	for (loc = ep->d_reclen; loc < dp->i_count; ) {
+		nep = (struct direct *)(dirbuf + loc);
+
+		/* Trim the existing slot (NB: dsize may be zero). */
+		ep->d_reclen = dsize;
+		ep = (struct direct *)((char *)ep + dsize);
+
+		/* Read nep->d_reclen now as the bcopy() may clobber it. */
+		loc += nep->d_reclen;
+		if (nep->d_ino == 0) {
+			/*
+			 * A mid-block unused entry. Such entries are
+			 * never created by the kernel, but fsck_ffs
+			 * can create them (and it doesn't fix them).
+			 *
+			 * Add up the free space, and initialise the
+			 * relocated entry since we don't bcopy it.
+			 */
+			spacefree += nep->d_reclen;
+			ep->d_ino = 0;
+			dsize = 0;
+			continue;
+		}
+		dsize = DIRSIZ(OFSFMT(dvp), nep);
+		spacefree += nep->d_reclen - dsize;
+#ifdef UFS_DIRHASH
+		if (dp->i_dirhash != NULL)
+			ufsdirhash_move(dp, nep,
+			    dp->i_offset + ((char *)nep - dirbuf),
+			    dp->i_offset + ((char *)ep - dirbuf));
+#endif
+		if (DOINGSOFTDEP(dvp))
+			softdep_change_directoryentry_offset(bp, dp, dirbuf,
+			    (caddr_t)nep, (caddr_t)ep, dsize); 
+		else
+			bcopy((caddr_t)nep, (caddr_t)ep, dsize);
+	}
+	/*
+	 * Here, `ep' points to a directory entry containing `dsize' in-use
+	 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
+	 * then the entry is completely unused (dsize == 0). The value
+	 * of ep->d_reclen is always indeterminate.
+	 *
+	 * Update the pointer fields in the previous entry (if any),
+	 * copy in the new entry, and write out the block.
+	 */
+#	if (BYTE_ORDER == LITTLE_ENDIAN)
+		if (OFSFMT(dvp))
+			namlen = ep->d_type;
+		else
+			namlen = ep->d_namlen;
+#	else
+		namlen = ep->d_namlen;
+#	endif
+	if (ep->d_ino == 0 ||
+	    (ep->d_ino == WINO && namlen == dirp->d_namlen &&
+	     bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
+		if (spacefree + dsize < newentrysize)
+			panic("ufs_direnter: compact1");
+		dirp->d_reclen = spacefree + dsize;
+	} else {
+		if (spacefree < newentrysize)
+			panic("ufs_direnter: compact2");
+		dirp->d_reclen = spacefree;
+		ep->d_reclen = dsize;
+		ep = (struct direct *)((char *)ep + dsize);
+	}
+#ifdef UFS_DIRHASH
+	if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
+	    dirp->d_reclen == spacefree))
+		ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf));
+#endif
+	bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
+#ifdef UFS_DIRHASH
+	if (dp->i_dirhash != NULL)
+		ufsdirhash_checkblock(dp, dirbuf -
+		    (dp->i_offset & (DIRBLKSIZ - 1)),
+		    rounddown2(dp->i_offset, DIRBLKSIZ));
+#endif
+
+	if (DOINGSOFTDEP(dvp)) {
+		(void) softdep_setup_directory_add(bp, dp,
+		    dp->i_offset + (caddr_t)ep - dirbuf,
+		    dirp->d_ino, newdirbp, 0);
+		if (newdirbp != NULL)
+			bdwrite(newdirbp);
+		bdwrite(bp);
+	} else {
+		if (DOINGASYNC(dvp)) {
+			bdwrite(bp);
+			error = 0;
+		} else {
+			error = bwrite(bp);
+		}
+	}
+	dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	/*
+	 * If all went well, and the directory can be shortened, proceed
+	 * with the truncation. Note that we have to unlock the inode for
+	 * the entry that we just entered, as the truncation may need to
+	 * lock other inodes which can lead to deadlock if we also hold a
+	 * lock on the newly entered node.
+	 */
+	if (isrename == 0 && error == 0 &&
+	    dp->i_endoff && dp->i_endoff < dp->i_size) {
+		if (tvp != NULL)
+			VOP_UNLOCK(tvp, 0);
+		error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
+		    IO_NORMAL | (DOINGASYNC(dvp) ? 0 : IO_SYNC), cr);
+		if (error != 0)
+			vn_printf(dvp,
+			    "ufs_direnter: failed to truncate, error %d\n",
+			    error);
+#ifdef UFS_DIRHASH
+		if (error == 0 && dp->i_dirhash != NULL)
+			ufsdirhash_dirtrunc(dp, dp->i_endoff);
+#endif
+		error = 0;
+		if (tvp != NULL)
+			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
+	}
+	return (error);
+}
+
+/*
+ * Remove a directory entry after a call to namei, using
+ * the parameters which it left in nameidata. The entry
+ * dp->i_offset contains the offset into the directory of the
+ * entry to be eliminated.  The dp->i_count field contains the
+ * size of the previous record in the directory.  If this
+ * is 0, the first entry is being deleted, so we need only
+ * zero the inode number to mark the entry as free.  If the
+ * entry is not the first in the directory, we must reclaim
+ * the space of the now empty record by adding the record size
+ * to the size of the previous entry.
+ */
+int
+ufs_dirremove(dvp, ip, flags, isrmdir)
+	struct vnode *dvp;
+	struct inode *ip;
+	int flags;
+	int isrmdir;
+{
+	struct inode *dp;
+	struct direct *ep, *rep;
+	struct buf *bp;
+	int error;
+
+	dp = VTOI(dvp);
+
+	/*
+	 * Adjust the link count early so softdep can block if necessary.
+	 */
+	if (ip) {
+		ip->i_effnlink--;
+		if (DOINGSOFTDEP(dvp)) {
+			softdep_setup_unlink(dp, ip);
+		} else {
+			ip->i_nlink--;
+			DIP_SET(ip, i_nlink, ip->i_nlink);
+			ip->i_flag |= IN_CHANGE;
+		}
+	}
+	if (flags & DOWHITEOUT) {
+		/*
+		 * Whiteout entry: set d_ino to WINO.
+		 */
+		if ((error =
+		    UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0)
+			return (error);
+		ep->d_ino = WINO;
+		ep->d_type = DT_WHT;
+		goto out;
+	}
+
+	if ((error = UFS_BLKATOFF(dvp,
+	    (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0)
+		return (error);
+
+	/* Set 'rep' to the entry being removed. */
+	if (dp->i_count == 0)
+		rep = ep;
+	else
+		rep = (struct direct *)((char *)ep + ep->d_reclen);
+#ifdef UFS_DIRHASH
+	/*
+	 * Remove the dirhash entry. This is complicated by the fact
+	 * that `ep' is the previous entry when dp->i_count != 0.
+	 */
+	if (dp->i_dirhash != NULL)
+		ufsdirhash_remove(dp, rep, dp->i_offset);
+#endif
+	if (ip && rep->d_ino != ip->i_number)
+		panic("ufs_dirremove: ip %ju does not match dirent ino %ju\n",
+		    (uintmax_t)ip->i_number, (uintmax_t)rep->d_ino);
+	if (dp->i_count == 0) {
+		/*
+		 * First entry in block: set d_ino to zero.
+		 */
+		ep->d_ino = 0;
+	} else {
+		/*
+		 * Collapse new free space into previous entry.
+		 */
+		ep->d_reclen += rep->d_reclen;
+	}
+#ifdef UFS_DIRHASH
+	if (dp->i_dirhash != NULL)
+		ufsdirhash_checkblock(dp, (char *)ep -
+		    ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)),
+		    rounddown2(dp->i_offset, DIRBLKSIZ));
+#endif
+out:
+	error = 0;
+	if (DOINGSOFTDEP(dvp)) {
+		if (ip)
+			softdep_setup_remove(bp, dp, ip, isrmdir);
+		if (softdep_slowdown(dvp))
+			error = bwrite(bp);
+		else
+			bdwrite(bp);
+	} else {
+		if (flags & DOWHITEOUT)
+			error = bwrite(bp);
+		else if (DOINGASYNC(dvp) && dp->i_count != 0)
+			bdwrite(bp);
+		else
+			error = bwrite(bp);
+	}
+	dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	/*
+	 * If the last named reference to a snapshot goes away,
+	 * drop its snapshot reference so that it will be reclaimed
+	 * when last open reference goes away.
+	 */
+	if (ip != NULL && (ip->i_flags & SF_SNAPSHOT) != 0 &&
+	    ip->i_effnlink == 0)
+		UFS_SNAPGONE(ip);
+	return (error);
+}
+
+/*
+ * Rewrite an existing directory entry to point at the inode
+ * supplied.  The parameters describing the directory entry are
+ * set up by a call to namei.
+ */
+int
+ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
+	struct inode *dp, *oip;
+	ino_t newinum;
+	int newtype;
+	int isrmdir;
+{
+	struct buf *bp;
+	struct direct *ep;
+	struct vnode *vdp = ITOV(dp);
+	int error;
+
+	/*
+	 * Drop the link before we lock the buf so softdep can block if
+	 * necessary.
+	 */
+	oip->i_effnlink--;
+	if (DOINGSOFTDEP(vdp)) {
+		softdep_setup_unlink(dp, oip);
+	} else {
+		oip->i_nlink--;
+		DIP_SET(oip, i_nlink, oip->i_nlink);
+		oip->i_flag |= IN_CHANGE;
+	}
+
+	error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
+	if (error)
+		return (error);
+	if (ep->d_namlen == 2 && ep->d_name[1] == '.' && ep->d_name[0] == '.' &&
+	    ep->d_ino != oip->i_number) {
+		brelse(bp);
+		return (EIDRM);
+	}
+	ep->d_ino = newinum;
+	if (!OFSFMT(vdp))
+		ep->d_type = newtype;
+	if (DOINGSOFTDEP(vdp)) {
+		softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
+		bdwrite(bp);
+	} else {
+		if (DOINGASYNC(vdp)) {
+			bdwrite(bp);
+			error = 0;
+		} else {
+			error = bwrite(bp);
+		}
+	}
+	dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	/*
+	 * If the last named reference to a snapshot goes away,
+	 * drop its snapshot reference so that it will be reclaimed
+	 * when last open reference goes away.
+	 */
+	if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_effnlink == 0)
+		UFS_SNAPGONE(oip);
+	return (error);
+}
+
+/*
+ * Check if a directory is empty or not.
+ * Inode supplied must be locked.
+ *
+ * Using a struct dirtemplate here is not precisely
+ * what we want, but better than using a struct direct.
+ *
+ * NB: does not handle corrupted directories.
+ */
+int
+ufs_dirempty(ip, parentino, cred)
+	struct inode *ip;
+	ino_t parentino;
+	struct ucred *cred;
+{
+	doff_t off;
+	struct dirtemplate dbuf;
+	struct direct *dp = (struct direct *)&dbuf;
+	int error, namlen;
+	ssize_t count;
+#define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
+
+	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
+		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ,
+		    off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred,
+		    NOCRED, &count, (struct thread *)0);
+		/*
+		 * Since we read MINDIRSIZ, residual must
+		 * be 0 unless we're at end of file.
+		 */
+		if (error || count != 0)
+			return (0);
+		/* avoid infinite loops */
+		if (dp->d_reclen == 0)
+			return (0);
+		/* skip empty entries */
+		if (dp->d_ino == 0 || dp->d_ino == WINO)
+			continue;
+		/* accept only "." and ".." */
+#		if (BYTE_ORDER == LITTLE_ENDIAN)
+			if (OFSFMT(ITOV(ip)))
+				namlen = dp->d_type;
+			else
+				namlen = dp->d_namlen;
+#		else
+			namlen = dp->d_namlen;
+#		endif
+		if (namlen > 2)
+			return (0);
+		if (dp->d_name[0] != '.')
+			return (0);
+		/*
+		 * At this point namlen must be 1 or 2.
+		 * 1 implies ".", 2 implies ".." if second
+		 * char is also "."
+		 */
+		if (namlen == 1 && dp->d_ino == ip->i_number)
+			continue;
+		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
+			continue;
+		return (0);
+	}
+	return (1);
+}
+
+static int
+ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino,
+    struct vnode **dd_vp)
+{
+	struct dirtemplate dirbuf;
+	struct vnode *ddvp;
+	int error, namlen;
+
+	ASSERT_VOP_LOCKED(vp, "ufs_dir_dd_ino");
+	if (vp->v_type != VDIR)
+		return (ENOTDIR);
+	/*
+	 * First check to see if we have it in the name cache.
+	 */
+	if ((ddvp = vn_dir_dd_ino(vp)) != NULL) {
+		KASSERT(ddvp->v_mount == vp->v_mount,
+		    ("ufs_dir_dd_ino: Unexpected mount point crossing"));
+		*dd_ino = VTOI(ddvp)->i_number;
+		*dd_vp = ddvp;
+		return (0);
+	}
+	/*
+	 * Have to read the directory.
+	 */
+	error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
+	    sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
+	    IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL, NULL);
+	if (error != 0)
+		return (error);
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+	if (OFSFMT(vp))
+		namlen = dirbuf.dotdot_type;
+	else
+		namlen = dirbuf.dotdot_namlen;
+#else
+	namlen = dirbuf.dotdot_namlen;
+#endif
+	if (namlen != 2 || dirbuf.dotdot_name[0] != '.' ||
+	    dirbuf.dotdot_name[1] != '.')
+		return (ENOTDIR);
+	*dd_ino = dirbuf.dotdot_ino;
+	*dd_vp = NULL;
+	return (0);
+}
+
+/*
+ * Check if source directory is in the path of the target directory.
+ */
+int
+ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct ucred *cred, ino_t *wait_ino)
+{
+	struct mount *mp;
+	struct vnode *tvp, *vp, *vp1;
+	int error;
+	ino_t dd_ino;
+
+	vp = tvp = ITOV(target);
+	mp = vp->v_mount;
+	*wait_ino = 0;
+	if (target->i_number == source_ino)
+		return (EEXIST);
+	if (target->i_number == parent_ino)
+		return (0);
+	if (target->i_number == ROOTINO)
+		return (0);
+	for (;;) {
+		error = ufs_dir_dd_ino(vp, cred, &dd_ino, &vp1);
+		if (error != 0)
+			break;
+		if (dd_ino == source_ino) {
+			error = EINVAL;
+			break;
+		}
+		if (dd_ino == ROOTINO)
+			break;
+		if (dd_ino == parent_ino)
+			break;
+		if (vp1 == NULL) {
+			error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT,
+			    &vp1);
+			if (error != 0) {
+				*wait_ino = dd_ino;
+				break;
+			}
+		}
+		KASSERT(dd_ino == VTOI(vp1)->i_number,
+		    ("directory %ju reparented\n",
+		    (uintmax_t)VTOI(vp1)->i_number));
+		if (vp != tvp)
+			vput(vp);
+		vp = vp1;
+	}
+
+	if (error == ENOTDIR)
+		panic("checkpath: .. not a directory\n");
+	if (vp1 != NULL)
+		vput(vp1);
+	if (vp != tvp)
+		vput(vp);
+	return (error);
+}
diff --git a/Dump/ufs/ufs/ufs_quota.c b/Dump/ufs/ufs/ufs_quota.c
new file mode 100644
index 0000000..550bb9c
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_quota.c
@@ -0,0 +1,1855 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Robert Elz at The University of Melbourne.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_quota.c	8.5 (Berkeley) 5/20/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ufs/ufs_quota.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include "opt_ffs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/endian.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+CTASSERT(sizeof(struct dqblk64) == sizeof(struct dqhdr64));
+
+static int unprivileged_get_quota = 0;
+SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_get_quota, CTLFLAG_RW,
+    &unprivileged_get_quota, 0,
+    "Unprivileged processes may retrieve quotas for other uids and gids");
+
+static MALLOC_DEFINE(M_DQUOT, "ufs_quota", "UFS quota entries");
+
+/*
+ * Quota name to error message mapping.
+ */
+static char *quotatypes[] = INITQFNAMES;
+
+static int chkdqchg(struct inode *, ufs2_daddr_t, struct ucred *, int, int *);
+static int chkiqchg(struct inode *, int, struct ucred *, int, int *);
+static int dqopen(struct vnode *, struct ufsmount *, int);
+static int dqget(struct vnode *,
+	u_long, struct ufsmount *, int, struct dquot **);
+static int dqsync(struct vnode *, struct dquot *);
+static int dqflush(struct vnode *);
+static int quotaoff1(struct thread *td, struct mount *mp, int type);
+static int quotaoff_inchange(struct thread *td, struct mount *mp, int type);
+
+/* conversion functions - from_to() */
+static void dqb32_dq(const struct dqblk32 *, struct dquot *);
+static void dqb64_dq(const struct dqblk64 *, struct dquot *);
+static void dq_dqb32(const struct dquot *, struct dqblk32 *);
+static void dq_dqb64(const struct dquot *, struct dqblk64 *);
+static void dqb32_dqb64(const struct dqblk32 *, struct dqblk64 *);
+static void dqb64_dqb32(const struct dqblk64 *, struct dqblk32 *);
+
+#ifdef DIAGNOSTIC
+static void dqref(struct dquot *);
+static void chkdquot(struct inode *);
+#endif
+
+/*
+ * Set up the quotas for an inode.
+ *
+ * This routine completely defines the semantics of quotas.
+ * If other criterion want to be used to establish quotas, the
+ * MAXQUOTAS value in quota.h should be increased, and the
+ * additional dquots set up here.
+ */
+int
+getinoquota(struct inode *ip)
+{
+	struct ufsmount *ump;
+	struct vnode *vp;
+	int error;
+
+	vp = ITOV(ip);
+
+	/*
+	 * Disk quotas must be turned off for system files.  Currently
+	 * snapshot and quota files.
+	 */
+	if ((vp->v_vflag & VV_SYSTEM) != 0)
+		return (0);
+	/*
+	 * XXX: Turn off quotas for files with a negative UID or GID.
+	 * This prevents the creation of 100GB+ quota files.
+	 */
+	if ((int)ip->i_uid < 0 || (int)ip->i_gid < 0)
+		return (0);
+	ump = VFSTOUFS(vp->v_mount);
+	/*
+	 * Set up the user quota based on file uid.
+	 * EINVAL means that quotas are not enabled.
+	 */
+	if ((error =
+		dqget(vp, ip->i_uid, ump, USRQUOTA, &ip->i_dquot[USRQUOTA])) &&
+	    error != EINVAL)
+		return (error);
+	/*
+	 * Set up the group quota based on file gid.
+	 * EINVAL means that quotas are not enabled.
+	 */
+	if ((error =
+		dqget(vp, ip->i_gid, ump, GRPQUOTA, &ip->i_dquot[GRPQUOTA])) &&
+	    error != EINVAL)
+		return (error);
+	return (0);
+}
+
+/*
+ * Update disk usage, and take corrective action.
+ */
+int
+chkdq(struct inode *ip, ufs2_daddr_t change, struct ucred *cred, int flags)
+{
+	struct dquot *dq;
+	ufs2_daddr_t ncurblocks;
+	struct vnode *vp = ITOV(ip);
+	int i, error, warn, do_check;
+
+	/*
+	 * Disk quotas must be turned off for system files.  Currently
+	 * snapshot and quota files.
+	 */
+	if ((vp->v_vflag & VV_SYSTEM) != 0)
+		return (0);
+	/*
+	 * XXX: Turn off quotas for files with a negative UID or GID.
+	 * This prevents the creation of 100GB+ quota files.
+	 */
+	if ((int)ip->i_uid < 0 || (int)ip->i_gid < 0)
+		return (0);
+#ifdef DIAGNOSTIC
+	if ((flags & CHOWN) == 0)
+		chkdquot(ip);
+#endif
+	if (change == 0)
+		return (0);
+	if (change < 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = ip->i_dquot[i]) == NODQUOT)
+				continue;
+			DQI_LOCK(dq);
+			DQI_WAIT(dq, PINOD+1, "chkdq1");
+			ncurblocks = dq->dq_curblocks + change;
+			if (ncurblocks >= 0)
+				dq->dq_curblocks = ncurblocks;
+			else
+				dq->dq_curblocks = 0;
+			dq->dq_flags &= ~DQ_BLKS;
+			dq->dq_flags |= DQ_MOD;
+			DQI_UNLOCK(dq);
+		}
+		return (0);
+	}
+	if ((flags & FORCE) == 0 &&
+	    priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0))
+		do_check = 1;
+	else
+		do_check = 0;
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if ((dq = ip->i_dquot[i]) == NODQUOT)
+			continue;
+		warn = 0;
+		DQI_LOCK(dq);
+		DQI_WAIT(dq, PINOD+1, "chkdq2");
+		if (do_check) {
+			error = chkdqchg(ip, change, cred, i, &warn);
+			if (error) {
+				/*
+				 * Roll back user quota changes when
+				 * group quota failed.
+				 */
+				while (i > 0) {
+					--i;
+					dq = ip->i_dquot[i];
+					if (dq == NODQUOT)
+						continue;
+					DQI_LOCK(dq);
+					DQI_WAIT(dq, PINOD+1, "chkdq3");
+					ncurblocks = dq->dq_curblocks - change;
+					if (ncurblocks >= 0)
+						dq->dq_curblocks = ncurblocks;
+					else
+						dq->dq_curblocks = 0;
+					dq->dq_flags &= ~DQ_BLKS;
+					dq->dq_flags |= DQ_MOD;
+					DQI_UNLOCK(dq);
+				}
+				return (error);
+			}
+		}
+		/* Reset timer when crossing soft limit */
+		if (dq->dq_curblocks + change >= dq->dq_bsoftlimit &&
+		    dq->dq_curblocks < dq->dq_bsoftlimit)
+			dq->dq_btime = time_second + ITOUMP(ip)->um_btime[i];
+		dq->dq_curblocks += change;
+		dq->dq_flags |= DQ_MOD;
+		DQI_UNLOCK(dq);
+		if (warn)
+			uprintf("\n%s: warning, %s disk quota exceeded\n",
+			    ITOVFS(ip)->mnt_stat.f_mntonname,
+			    quotatypes[i]);
+	}
+	return (0);
+}
+
+/*
+ * Check for a valid change to a users allocation.
+ * Issue an error message if appropriate.
+ */
+static int
+chkdqchg(struct inode *ip, ufs2_daddr_t change, struct ucred *cred,
+    int type, int *warn)
+{
+	struct dquot *dq = ip->i_dquot[type];
+	ufs2_daddr_t ncurblocks = dq->dq_curblocks + change;
+
+	/*
+	 * If user would exceed their hard limit, disallow space allocation.
+	 */
+	if (ncurblocks >= dq->dq_bhardlimit && dq->dq_bhardlimit) {
+		if ((dq->dq_flags & DQ_BLKS) == 0 &&
+		    ip->i_uid == cred->cr_uid) {
+			dq->dq_flags |= DQ_BLKS;
+			DQI_UNLOCK(dq);
+			uprintf("\n%s: write failed, %s disk limit reached\n",
+			    ITOVFS(ip)->mnt_stat.f_mntonname,
+			    quotatypes[type]);
+			return (EDQUOT);
+		}
+		DQI_UNLOCK(dq);
+		return (EDQUOT);
+	}
+	/*
+	 * If user is over their soft limit for too long, disallow space
+	 * allocation. Reset time limit as they cross their soft limit.
+	 */
+	if (ncurblocks >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) {
+		if (dq->dq_curblocks < dq->dq_bsoftlimit) {
+			dq->dq_btime = time_second + ITOUMP(ip)->um_btime[type];
+			if (ip->i_uid == cred->cr_uid)
+				*warn = 1;
+			return (0);
+		}
+		if (time_second > dq->dq_btime) {
+			if ((dq->dq_flags & DQ_BLKS) == 0 &&
+			    ip->i_uid == cred->cr_uid) {
+				dq->dq_flags |= DQ_BLKS;
+				DQI_UNLOCK(dq);
+				uprintf("\n%s: write failed, %s "
+				    "disk quota exceeded for too long\n",
+				    ITOVFS(ip)->mnt_stat.f_mntonname,
+				    quotatypes[type]);
+				return (EDQUOT);
+			}
+			DQI_UNLOCK(dq);
+			return (EDQUOT);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Check the inode limit, applying corrective action.
+ */
+int
+chkiq(struct inode *ip, int change, struct ucred *cred, int flags)
+{
+	struct dquot *dq;
+	int i, error, warn, do_check;
+
+#ifdef DIAGNOSTIC
+	if ((flags & CHOWN) == 0)
+		chkdquot(ip);
+#endif
+	if (change == 0)
+		return (0);
+	if (change < 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = ip->i_dquot[i]) == NODQUOT)
+				continue;
+			DQI_LOCK(dq);
+			DQI_WAIT(dq, PINOD+1, "chkiq1");
+			if (dq->dq_curinodes >= -change)
+				dq->dq_curinodes += change;
+			else
+				dq->dq_curinodes = 0;
+			dq->dq_flags &= ~DQ_INODS;
+			dq->dq_flags |= DQ_MOD;
+			DQI_UNLOCK(dq);
+		}
+		return (0);
+	}
+	if ((flags & FORCE) == 0 &&
+	    priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0))
+		do_check = 1;
+	else
+		do_check = 0;
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if ((dq = ip->i_dquot[i]) == NODQUOT)
+			continue;
+		warn = 0;
+		DQI_LOCK(dq);
+		DQI_WAIT(dq, PINOD+1, "chkiq2");
+		if (do_check) {
+			error = chkiqchg(ip, change, cred, i, &warn);
+			if (error) {
+				/*
+				 * Roll back user quota changes when
+				 * group quota failed.
+				 */
+				while (i > 0) {
+					--i;
+					dq = ip->i_dquot[i];
+					if (dq == NODQUOT)
+						continue;
+					DQI_LOCK(dq);
+					DQI_WAIT(dq, PINOD+1, "chkiq3");
+					if (dq->dq_curinodes >= change)
+						dq->dq_curinodes -= change;
+					else
+						dq->dq_curinodes = 0;
+					dq->dq_flags &= ~DQ_INODS;
+					dq->dq_flags |= DQ_MOD;
+					DQI_UNLOCK(dq);
+				}
+				return (error);
+			}
+		}
+		/* Reset timer when crossing soft limit */
+		if (dq->dq_curinodes + change >= dq->dq_isoftlimit &&
+		    dq->dq_curinodes < dq->dq_isoftlimit)
+			dq->dq_itime = time_second + ITOUMP(ip)->um_itime[i];
+		dq->dq_curinodes += change;
+		dq->dq_flags |= DQ_MOD;
+		DQI_UNLOCK(dq);
+		if (warn)
+			uprintf("\n%s: warning, %s inode quota exceeded\n",
+			    ITOVFS(ip)->mnt_stat.f_mntonname,
+			    quotatypes[i]);
+	}
+	return (0);
+}
+
+/*
+ * Check for a valid change to a users allocation.
+ * Issue an error message if appropriate.
+ */
+static int
+chkiqchg(struct inode *ip, int change, struct ucred *cred, int type, int *warn)
+{
+	struct dquot *dq = ip->i_dquot[type];
+	ino_t ncurinodes = dq->dq_curinodes + change;
+
+	/*
+	 * If user would exceed their hard limit, disallow inode allocation.
+	 */
+	if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) {
+		if ((dq->dq_flags & DQ_INODS) == 0 &&
+		    ip->i_uid == cred->cr_uid) {
+			dq->dq_flags |= DQ_INODS;
+			DQI_UNLOCK(dq);
+			uprintf("\n%s: write failed, %s inode limit reached\n",
+			    ITOVFS(ip)->mnt_stat.f_mntonname,
+			    quotatypes[type]);
+			return (EDQUOT);
+		}
+		DQI_UNLOCK(dq);
+		return (EDQUOT);
+	}
+	/*
+	 * If user is over their soft limit for too long, disallow inode
+	 * allocation. Reset time limit as they cross their soft limit.
+	 */
+	if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) {
+		if (dq->dq_curinodes < dq->dq_isoftlimit) {
+			dq->dq_itime = time_second + ITOUMP(ip)->um_itime[type];
+			if (ip->i_uid == cred->cr_uid)
+				*warn = 1;
+			return (0);
+		}
+		if (time_second > dq->dq_itime) {
+			if ((dq->dq_flags & DQ_INODS) == 0 &&
+			    ip->i_uid == cred->cr_uid) {
+				dq->dq_flags |= DQ_INODS;
+				DQI_UNLOCK(dq);
+				uprintf("\n%s: write failed, %s "
+				    "inode quota exceeded for too long\n",
+				    ITOVFS(ip)->mnt_stat.f_mntonname,
+				    quotatypes[type]);
+				return (EDQUOT);
+			}
+			DQI_UNLOCK(dq);
+			return (EDQUOT);
+		}
+	}
+	return (0);
+}
+
+#ifdef DIAGNOSTIC
+/*
+ * On filesystems with quotas enabled, it is an error for a file to change
+ * size and not to have a dquot structure associated with it.
+ */
+static void
+chkdquot(struct inode *ip)
+{
+	struct ufsmount *ump;
+	struct vnode *vp;
+	int i;
+
+	ump = ITOUMP(ip);
+	vp = ITOV(ip);
+
+	/*
+	 * Disk quotas must be turned off for system files.  Currently
+	 * these are snapshots and quota files.
+	 */
+	if ((vp->v_vflag & VV_SYSTEM) != 0)
+		return;
+	/*
+	 * XXX: Turn off quotas for files with a negative UID or GID.
+	 * This prevents the creation of 100GB+ quota files.
+	 */
+	if ((int)ip->i_uid < 0 || (int)ip->i_gid < 0)
+		return;
+
+	UFS_LOCK(ump);
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (ump->um_quotas[i] == NULLVP ||
+		    (ump->um_qflags[i] & (QTF_OPENING|QTF_CLOSING)))
+			continue;
+		if (ip->i_dquot[i] == NODQUOT) {
+			UFS_UNLOCK(ump);
+			vn_printf(ITOV(ip), "chkdquot: missing dquot ");
+			panic("chkdquot: missing dquot");
+		}
+	}
+	UFS_UNLOCK(ump);
+}
+#endif
+
+/*
+ * Code to process quotactl commands.
+ */
+
+/*
+ * Q_QUOTAON - set up a quota file for a particular filesystem.
+ */
+int
+quotaon(struct thread *td, struct mount *mp, int type, void *fname)
+{
+	struct ufsmount *ump;
+	struct vnode *vp, **vpp;
+	struct vnode *mvp;
+	struct dquot *dq;
+	int error, flags;
+	struct nameidata nd;
+
+	error = priv_check(td, PRIV_UFS_QUOTAON);
+	if (error != 0) {
+		vfs_unbusy(mp);
+		return (error);
+	}
+
+	if ((mp->mnt_flag & MNT_RDONLY) != 0) {
+		vfs_unbusy(mp);
+		return (EROFS);
+	}
+
+	ump = VFSTOUFS(mp);
+	dq = NODQUOT;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, td);
+	flags = FREAD | FWRITE;
+	vfs_ref(mp);
+	vfs_unbusy(mp);
+	error = vn_open(&nd, &flags, 0, NULL);
+	if (error != 0) {
+		vfs_rel(mp);
+		return (error);
+	}
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+	vp = nd.ni_vp;
+	error = vfs_busy(mp, MBF_NOWAIT);
+	vfs_rel(mp);
+	if (error == 0) {
+		if (vp->v_type != VREG) {
+			error = EACCES;
+			vfs_unbusy(mp);
+		}
+	}
+	if (error != 0) {
+		VOP_UNLOCK(vp, 0);
+		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
+		return (error);
+	}
+
+	UFS_LOCK(ump);
+	if ((ump->um_qflags[type] & (QTF_OPENING|QTF_CLOSING)) != 0) {
+		UFS_UNLOCK(ump);
+		VOP_UNLOCK(vp, 0);
+		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
+		vfs_unbusy(mp);
+		return (EALREADY);
+	}
+	ump->um_qflags[type] |= QTF_OPENING|QTF_CLOSING;
+	UFS_UNLOCK(ump);
+	if ((error = dqopen(vp, ump, type)) != 0) {
+		VOP_UNLOCK(vp, 0);
+		UFS_LOCK(ump);
+		ump->um_qflags[type] &= ~(QTF_OPENING|QTF_CLOSING);
+		UFS_UNLOCK(ump);
+		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
+		vfs_unbusy(mp);
+		return (error);
+	}
+	VOP_UNLOCK(vp, 0);
+	MNT_ILOCK(mp);
+	mp->mnt_flag |= MNT_QUOTA;
+	MNT_IUNLOCK(mp);
+
+	vpp = &ump->um_quotas[type];
+	if (*vpp != vp)
+		quotaoff1(td, mp, type);
+
+	/*
+	 * When the directory vnode containing the quota file is
+	 * inactivated, due to the shared lookup of the quota file
+	 * vput()ing the dvp, the qsyncvp() call for the containing
+	 * directory would try to acquire the quota lock exclusive.
+	 * At the same time, lookup already locked the quota vnode
+	 * shared.  Mark the quota vnode lock as allowing recursion
+	 * and automatically converting shared locks to exclusive.
+	 *
+	 * Also mark quota vnode as system.
+	 */
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	vp->v_vflag |= VV_SYSTEM;
+	VN_LOCK_AREC(vp);
+	VN_LOCK_DSHARE(vp);
+	VOP_UNLOCK(vp, 0);
+	*vpp = vp;
+	/*
+	 * Save the credential of the process that turned on quotas.
+	 * Set up the time limits for this quota.
+	 */
+	ump->um_cred[type] = crhold(td->td_ucred);
+	ump->um_btime[type] = MAX_DQ_TIME;
+	ump->um_itime[type] = MAX_IQ_TIME;
+	if (dqget(NULLVP, 0, ump, type, &dq) == 0) {
+		if (dq->dq_btime > 0)
+			ump->um_btime[type] = dq->dq_btime;
+		if (dq->dq_itime > 0)
+			ump->um_itime[type] = dq->dq_itime;
+		dqrele(NULLVP, dq);
+	}
+	/*
+	 * Allow the getdq from getinoquota below to read the quota
+	 * from file.
+	 */
+	UFS_LOCK(ump);
+	ump->um_qflags[type] &= ~QTF_CLOSING;
+	UFS_UNLOCK(ump);
+	/*
+	 * Search vnodes associated with this mount point,
+	 * adding references to quota file being opened.
+	 * NB: only need to add dquot's for inodes being modified.
+	 */
+again:
+	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
+		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+			goto again;
+		}
+		if (vp->v_type == VNON || vp->v_writecount == 0) {
+			VOP_UNLOCK(vp, 0);
+			vrele(vp);
+			continue;
+		}
+		error = getinoquota(VTOI(vp));
+		VOP_UNLOCK(vp, 0);
+		vrele(vp);
+		if (error) {
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+			break;
+		}
+	}
+
+        if (error)
+		quotaoff_inchange(td, mp, type);
+	UFS_LOCK(ump);
+	ump->um_qflags[type] &= ~QTF_OPENING;
+	KASSERT((ump->um_qflags[type] & QTF_CLOSING) == 0,
+		("quotaon: leaking flags"));
+	UFS_UNLOCK(ump);
+
+	vfs_unbusy(mp);
+	return (error);
+}
+
+/*
+ * Main code to turn off disk quotas for a filesystem. Does not change
+ * flags.
+ */
+static int
+quotaoff1(struct thread *td, struct mount *mp, int type)
+{
+	struct vnode *vp;
+	struct vnode *qvp, *mvp;
+	struct ufsmount *ump;
+	struct dquot *dq;
+	struct inode *ip;
+	struct ucred *cr;
+	int error;
+
+	ump = VFSTOUFS(mp);
+
+	UFS_LOCK(ump);
+	KASSERT((ump->um_qflags[type] & QTF_CLOSING) != 0,
+		("quotaoff1: flags are invalid"));
+	if ((qvp = ump->um_quotas[type]) == NULLVP) {
+		UFS_UNLOCK(ump);
+		return (0);
+	}
+	cr = ump->um_cred[type];
+	UFS_UNLOCK(ump);
+
+	/*
+	 * Search vnodes associated with this mount point,
+	 * deleting any references to quota file being closed.
+	 */
+again:
+	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
+		if (vp->v_type == VNON) {
+			VI_UNLOCK(vp);
+			continue;
+		}
+		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+			goto again;
+		}
+		ip = VTOI(vp);
+		dq = ip->i_dquot[type];
+		ip->i_dquot[type] = NODQUOT;
+		dqrele(vp, dq);
+		VOP_UNLOCK(vp, 0);
+		vrele(vp);
+	}
+
+	error = dqflush(qvp);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Clear um_quotas before closing the quota vnode to prevent
+	 * access to the closed vnode from dqget/dqsync
+	 */
+	UFS_LOCK(ump);
+	ump->um_quotas[type] = NULLVP;
+	ump->um_cred[type] = NOCRED;
+	UFS_UNLOCK(ump);
+
+	vn_lock(qvp, LK_EXCLUSIVE | LK_RETRY);
+	qvp->v_vflag &= ~VV_SYSTEM;
+	VOP_UNLOCK(qvp, 0);
+	error = vn_close(qvp, FREAD|FWRITE, td->td_ucred, td);
+	crfree(cr);
+
+	return (error);
+}
+
+/*
+ * Turns off quotas, assumes that ump->um_qflags are already checked
+ * and QTF_CLOSING is set to indicate operation in progress. Fixes
+ * ump->um_qflags and mp->mnt_flag after.
+ */
+int
+quotaoff_inchange(struct thread *td, struct mount *mp, int type)
+{
+	struct ufsmount *ump;
+	int i;
+	int error;
+
+	error = quotaoff1(td, mp, type);
+
+	ump = VFSTOUFS(mp);
+	UFS_LOCK(ump);
+	ump->um_qflags[type] &= ~QTF_CLOSING;
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (ump->um_quotas[i] != NULLVP)
+			break;
+	if (i == MAXQUOTAS) {
+		MNT_ILOCK(mp);
+		mp->mnt_flag &= ~MNT_QUOTA;
+		MNT_IUNLOCK(mp);
+	}
+	UFS_UNLOCK(ump);
+	return (error);
+}
+
+/*
+ * Q_QUOTAOFF - turn off disk quotas for a filesystem.
+ */
+int
+quotaoff(struct thread *td, struct mount *mp, int type)
+{
+	struct ufsmount *ump;
+	int error;
+
+	error = priv_check(td, PRIV_UFS_QUOTAOFF);
+	if (error)
+		return (error);
+
+	ump = VFSTOUFS(mp);
+	UFS_LOCK(ump);
+	if ((ump->um_qflags[type] & (QTF_OPENING|QTF_CLOSING)) != 0) {
+		UFS_UNLOCK(ump);
+		return (EALREADY);
+	}
+	ump->um_qflags[type] |= QTF_CLOSING;
+	UFS_UNLOCK(ump);
+
+	return (quotaoff_inchange(td, mp, type));
+}
+
+/*
+ * Q_GETQUOTA - return current values in a dqblk structure.
+ */
+static int
+_getquota(struct thread *td, struct mount *mp, u_long id, int type,
+    struct dqblk64 *dqb)
+{
+	struct dquot *dq;
+	int error;
+
+	switch (type) {
+	case USRQUOTA:
+		if ((td->td_ucred->cr_uid != id) && !unprivileged_get_quota) {
+			error = priv_check(td, PRIV_VFS_GETQUOTA);
+			if (error)
+				return (error);
+		}
+		break;
+
+	case GRPQUOTA:
+		if (!groupmember(id, td->td_ucred) &&
+		    !unprivileged_get_quota) {
+			error = priv_check(td, PRIV_VFS_GETQUOTA);
+			if (error)
+				return (error);
+		}
+		break;
+
+	default:
+		return (EINVAL);
+	}
+
+	dq = NODQUOT;
+	error = dqget(NULLVP, id, VFSTOUFS(mp), type, &dq);
+	if (error)
+		return (error);
+	*dqb = dq->dq_dqb;
+	dqrele(NULLVP, dq);
+	return (error);
+}
+
+/*
+ * Q_SETQUOTA - assign an entire dqblk structure.
+ */
+static int
+_setquota(struct thread *td, struct mount *mp, u_long id, int type,
+    struct dqblk64 *dqb)
+{
+	struct dquot *dq;
+	struct dquot *ndq;
+	struct ufsmount *ump;
+	struct dqblk64 newlim;
+	int error;
+
+	error = priv_check(td, PRIV_VFS_SETQUOTA);
+	if (error)
+		return (error);
+
+	newlim = *dqb;
+
+	ndq = NODQUOT;
+	ump = VFSTOUFS(mp);
+
+	error = dqget(NULLVP, id, ump, type, &ndq);
+	if (error)
+		return (error);
+	dq = ndq;
+	DQI_LOCK(dq);
+	DQI_WAIT(dq, PINOD+1, "setqta");
+	/*
+	 * Copy all but the current values.
+	 * Reset time limit if previously had no soft limit or were
+	 * under it, but now have a soft limit and are over it.
+	 */
+	newlim.dqb_curblocks = dq->dq_curblocks;
+	newlim.dqb_curinodes = dq->dq_curinodes;
+	if (dq->dq_id != 0) {
+		newlim.dqb_btime = dq->dq_btime;
+		newlim.dqb_itime = dq->dq_itime;
+	}
+	if (newlim.dqb_bsoftlimit &&
+	    dq->dq_curblocks >= newlim.dqb_bsoftlimit &&
+	    (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit))
+		newlim.dqb_btime = time_second + ump->um_btime[type];
+	if (newlim.dqb_isoftlimit &&
+	    dq->dq_curinodes >= newlim.dqb_isoftlimit &&
+	    (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit))
+		newlim.dqb_itime = time_second + ump->um_itime[type];
+	dq->dq_dqb = newlim;
+	if (dq->dq_curblocks < dq->dq_bsoftlimit)
+		dq->dq_flags &= ~DQ_BLKS;
+	if (dq->dq_curinodes < dq->dq_isoftlimit)
+		dq->dq_flags &= ~DQ_INODS;
+	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
+	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
+		dq->dq_flags |= DQ_FAKE;
+	else
+		dq->dq_flags &= ~DQ_FAKE;
+	dq->dq_flags |= DQ_MOD;
+	DQI_UNLOCK(dq);
+	dqrele(NULLVP, dq);
+	return (0);
+}
+
+/*
+ * Q_SETUSE - set current inode and block usage.
+ */
+static int
+_setuse(struct thread *td, struct mount *mp, u_long id, int type,
+    struct dqblk64 *dqb)
+{
+	struct dquot *dq;
+	struct ufsmount *ump;
+	struct dquot *ndq;
+	struct dqblk64 usage;
+	int error;
+
+	error = priv_check(td, PRIV_UFS_SETUSE);
+	if (error)
+		return (error);
+
+	usage = *dqb;
+
+	ump = VFSTOUFS(mp);
+	ndq = NODQUOT;
+
+	error = dqget(NULLVP, id, ump, type, &ndq);
+	if (error)
+		return (error);
+	dq = ndq;
+	DQI_LOCK(dq);
+	DQI_WAIT(dq, PINOD+1, "setuse");
+	/*
+	 * Reset time limit if have a soft limit and were
+	 * previously under it, but are now over it.
+	 */
+	if (dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit &&
+	    usage.dqb_curblocks >= dq->dq_bsoftlimit)
+		dq->dq_btime = time_second + ump->um_btime[type];
+	if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit &&
+	    usage.dqb_curinodes >= dq->dq_isoftlimit)
+		dq->dq_itime = time_second + ump->um_itime[type];
+	dq->dq_curblocks = usage.dqb_curblocks;
+	dq->dq_curinodes = usage.dqb_curinodes;
+	if (dq->dq_curblocks < dq->dq_bsoftlimit)
+		dq->dq_flags &= ~DQ_BLKS;
+	if (dq->dq_curinodes < dq->dq_isoftlimit)
+		dq->dq_flags &= ~DQ_INODS;
+	dq->dq_flags |= DQ_MOD;
+	DQI_UNLOCK(dq);
+	dqrele(NULLVP, dq);
+	return (0);
+}
+
+int
+getquota32(struct thread *td, struct mount *mp, u_long id, int type, void *addr)
+{
+	struct dqblk32 dqb32;
+	struct dqblk64 dqb64;
+	int error;
+
+	error = _getquota(td, mp, id, type, &dqb64);
+	if (error)
+		return (error);
+	dqb64_dqb32(&dqb64, &dqb32);
+	error = copyout(&dqb32, addr, sizeof(dqb32));
+	return (error);
+}
+
+int
+setquota32(struct thread *td, struct mount *mp, u_long id, int type, void *addr)
+{
+	struct dqblk32 dqb32;
+	struct dqblk64 dqb64;
+	int error;
+
+	error = copyin(addr, &dqb32, sizeof(dqb32));
+	if (error)
+		return (error);
+	dqb32_dqb64(&dqb32, &dqb64);
+	error = _setquota(td, mp, id, type, &dqb64);
+	return (error);
+}
+
+int
+setuse32(struct thread *td, struct mount *mp, u_long id, int type, void *addr)
+{
+	struct dqblk32 dqb32;
+	struct dqblk64 dqb64;
+	int error;
+
+	error = copyin(addr, &dqb32, sizeof(dqb32));
+	if (error)
+		return (error);
+	dqb32_dqb64(&dqb32, &dqb64);
+	error = _setuse(td, mp, id, type, &dqb64);
+	return (error);
+}
+
+int
+getquota(struct thread *td, struct mount *mp, u_long id, int type, void *addr)
+{
+	struct dqblk64 dqb64;
+	int error;
+
+	error = _getquota(td, mp, id, type, &dqb64);
+	if (error)
+		return (error);
+	error = copyout(&dqb64, addr, sizeof(dqb64));
+	return (error);
+}
+
+int
+setquota(struct thread *td, struct mount *mp, u_long id, int type, void *addr)
+{
+	struct dqblk64 dqb64;
+	int error;
+
+	error = copyin(addr, &dqb64, sizeof(dqb64));
+	if (error)
+		return (error);
+	error = _setquota(td, mp, id, type, &dqb64);
+	return (error);
+}
+
+int
+setuse(struct thread *td, struct mount *mp, u_long id, int type, void *addr)
+{
+	struct dqblk64 dqb64;
+	int error;
+
+	error = copyin(addr, &dqb64, sizeof(dqb64));
+	if (error)
+		return (error);
+	error = _setuse(td, mp, id, type, &dqb64);
+	return (error);
+}
+
+/*
+ * Q_GETQUOTASIZE - get bit-size of quota file fields
+ */
+int
+getquotasize(struct thread *td, struct mount *mp, u_long id, int type,
+    void *sizep)
+{
+	struct ufsmount *ump = VFSTOUFS(mp);
+	int bitsize;
+
+	UFS_LOCK(ump);
+	if (ump->um_quotas[type] == NULLVP ||
+	    (ump->um_qflags[type] & QTF_CLOSING)) {
+		UFS_UNLOCK(ump);
+		return (EINVAL);
+	}
+	if ((ump->um_qflags[type] & QTF_64BIT) != 0)
+		bitsize = 64;
+	else
+		bitsize = 32;
+	UFS_UNLOCK(ump);
+	return (copyout(&bitsize, sizep, sizeof(int)));
+}
+
+/*
+ * Q_SYNC - sync quota files to disk.
+ */
+int
+qsync(struct mount *mp)
+{
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct thread *td = curthread;		/* XXX */
+	struct vnode *vp, *mvp;
+	struct dquot *dq;
+	int i, error;
+
+	/*
+	 * Check if the mount point has any quotas.
+	 * If not, simply return.
+	 */
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (ump->um_quotas[i] != NULLVP)
+			break;
+	if (i == MAXQUOTAS)
+		return (0);
+	/*
+	 * Search vnodes associated with this mount point,
+	 * synchronizing any modified dquot structures.
+	 */
+again:
+	MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
+		if (vp->v_type == VNON) {
+			VI_UNLOCK(vp);
+			continue;
+		}
+		error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td);
+		if (error) {
+			if (error == ENOENT) {
+				MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp);
+				goto again;
+			}
+			continue;
+		}
+		for (i = 0; i < MAXQUOTAS; i++) {
+			dq = VTOI(vp)->i_dquot[i];
+			if (dq != NODQUOT)
+				dqsync(vp, dq);
+		}
+		vput(vp);
+	}
+	return (0);
+}
+
+/*
+ * Sync quota file for given vnode to disk.
+ */
+int
+qsyncvp(struct vnode *vp)
+{
+	struct ufsmount *ump = VFSTOUFS(vp->v_mount);
+	struct dquot *dq;
+	int i;
+
+	/*
+	 * Check if the mount point has any quotas.
+	 * If not, simply return.
+	 */
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (ump->um_quotas[i] != NULLVP)
+			break;
+	if (i == MAXQUOTAS)
+		return (0);
+	/*
+	 * Search quotas associated with this vnode
+	 * synchronizing any modified dquot structures.
+	 */
+	for (i = 0; i < MAXQUOTAS; i++) {
+		dq = VTOI(vp)->i_dquot[i];
+		if (dq != NODQUOT)
+			dqsync(vp, dq);
+	}
+	return (0);
+}
+
+/*
+ * Code pertaining to management of the in-core dquot data structures.
+ */
+#define DQHASH(dqvp, id) \
+	(&dqhashtbl[((((intptr_t)(dqvp)) >> 8) + id) & dqhash])
+static LIST_HEAD(dqhash, dquot) *dqhashtbl;
+static u_long dqhash;
+
+/*
+ * Dquot free list.
+ */
+#define	DQUOTINC	5	/* minimum free dquots desired */
+static TAILQ_HEAD(dqfreelist, dquot) dqfreelist;
+static long numdquot, desireddquot = DQUOTINC;
+
+/*
+ * Lock to protect quota hash, dq free list and dq_cnt ref counters of
+ * _all_ dqs.
+ */
+struct mtx dqhlock;
+
+#define	DQH_LOCK()	mtx_lock(&dqhlock)
+#define	DQH_UNLOCK()	mtx_unlock(&dqhlock)
+
+static struct dquot *dqhashfind(struct dqhash *dqh, u_long id,
+	struct vnode *dqvp);
+
+/*
+ * Initialize the quota system.
+ */
+void
+dqinit(void)
+{
+
+	mtx_init(&dqhlock, "dqhlock", NULL, MTX_DEF);
+	dqhashtbl = hashinit(desiredvnodes, M_DQUOT, &dqhash);
+	TAILQ_INIT(&dqfreelist);
+}
+
+/*
+ * Shut down the quota system.
+ */
+void
+dquninit(void)
+{
+	struct dquot *dq;
+
+	hashdestroy(dqhashtbl, M_DQUOT, dqhash);
+	while ((dq = TAILQ_FIRST(&dqfreelist)) != NULL) {
+		TAILQ_REMOVE(&dqfreelist, dq, dq_freelist);
+		mtx_destroy(&dq->dq_lock);
+		free(dq, M_DQUOT);
+	}
+	mtx_destroy(&dqhlock);
+}
+
+static struct dquot *
+dqhashfind(struct dqhash *dqh, u_long id, struct vnode *dqvp)
+{
+	struct dquot *dq;
+
+	mtx_assert(&dqhlock, MA_OWNED);
+	LIST_FOREACH(dq, dqh, dq_hash) {
+		if (dq->dq_id != id ||
+		    dq->dq_ump->um_quotas[dq->dq_type] != dqvp)
+			continue;
+		/*
+		 * Cache hit with no references.  Take
+		 * the structure off the free list.
+		 */
+		if (dq->dq_cnt == 0)
+			TAILQ_REMOVE(&dqfreelist, dq, dq_freelist);
+		DQREF(dq);
+		return (dq);
+	}
+	return (NODQUOT);
+}
+
+/*
+ * Determine the quota file type.
+ *
+ * A 32-bit quota file is simply an array of struct dqblk32.
+ *
+ * A 64-bit quota file is a struct dqhdr64 followed by an array of struct
+ * dqblk64.  The header contains various magic bits which allow us to be
+ * reasonably confident that it is indeeda 64-bit quota file and not just
+ * a 32-bit quota file that just happens to "look right".
+ *
+ */
+static int
+dqopen(struct vnode *vp, struct ufsmount *ump, int type)
+{
+	struct dqhdr64 dqh;
+	struct iovec aiov;
+	struct uio auio;
+	int error;
+
+	ASSERT_VOP_LOCKED(vp, "dqopen");
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = &dqh;
+	aiov.iov_len = sizeof(dqh);
+	auio.uio_resid = sizeof(dqh);
+	auio.uio_offset = 0;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_td = (struct thread *)0;
+	error = VOP_READ(vp, &auio, 0, ump->um_cred[type]);
+
+	if (error != 0)
+		return (error);
+	if (auio.uio_resid > 0) {
+		/* assume 32 bits */
+		return (0);
+	}
+
+	UFS_LOCK(ump);
+	if (strcmp(dqh.dqh_magic, Q_DQHDR64_MAGIC) == 0 &&
+	    be32toh(dqh.dqh_version) == Q_DQHDR64_VERSION &&
+	    be32toh(dqh.dqh_hdrlen) == (uint32_t)sizeof(struct dqhdr64) &&
+	    be32toh(dqh.dqh_reclen) == (uint32_t)sizeof(struct dqblk64)) {
+		/* XXX: what if the magic matches, but the sizes are wrong? */
+		ump->um_qflags[type] |= QTF_64BIT;
+	} else {
+		ump->um_qflags[type] &= ~QTF_64BIT;
+	}
+	UFS_UNLOCK(ump);
+
+	return (0);
+}
+
+/*
+ * Obtain a dquot structure for the specified identifier and quota file
+ * reading the information from the file if necessary.
+ */
+static int
+dqget(struct vnode *vp, u_long id, struct ufsmount *ump, int type,
+    struct dquot **dqp)
+{
+	uint8_t buf[sizeof(struct dqblk64)];
+	off_t base, recsize;
+	struct dquot *dq, *dq1;
+	struct dqhash *dqh;
+	struct vnode *dqvp;
+	struct iovec aiov;
+	struct uio auio;
+	int dqvplocked, error;
+
+#ifdef DEBUG_VFS_LOCKS
+	if (vp != NULLVP)
+		ASSERT_VOP_ELOCKED(vp, "dqget");
+#endif
+
+	if (vp != NULLVP && *dqp != NODQUOT) {
+		return (0);
+	}
+
+	/* XXX: Disallow negative id values to prevent the
+	* creation of 100GB+ quota data files.
+	*/
+	if ((int)id < 0)
+		return (EINVAL);
+
+	UFS_LOCK(ump);
+	dqvp = ump->um_quotas[type];
+	if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) {
+		*dqp = NODQUOT;
+		UFS_UNLOCK(ump);
+		return (EINVAL);
+	}
+	vref(dqvp);
+	UFS_UNLOCK(ump);
+	error = 0;
+	dqvplocked = 0;
+
+	/*
+	 * Check the cache first.
+	 */
+	dqh = DQHASH(dqvp, id);
+	DQH_LOCK();
+	dq = dqhashfind(dqh, id, dqvp);
+	if (dq != NULL) {
+		DQH_UNLOCK();
+hfound:		DQI_LOCK(dq);
+		DQI_WAIT(dq, PINOD+1, "dqget");
+		DQI_UNLOCK(dq);
+		if (dq->dq_ump == NULL) {
+			dqrele(vp, dq);
+			dq = NODQUOT;
+			error = EIO;
+		}
+		*dqp = dq;
+		if (dqvplocked)
+			vput(dqvp);
+		else
+			vrele(dqvp);
+		return (error);
+	}
+
+	/*
+	 * Quota vnode lock is before DQ_LOCK. Acquire dqvp lock there
+	 * since new dq will appear on the hash chain DQ_LOCKed.
+	 */
+	if (vp != dqvp) {
+		DQH_UNLOCK();
+		vn_lock(dqvp, LK_SHARED | LK_RETRY);
+		dqvplocked = 1;
+		DQH_LOCK();
+		/*
+		 * Recheck the cache after sleep for quota vnode lock.
+		 */
+		dq = dqhashfind(dqh, id, dqvp);
+		if (dq != NULL) {
+			DQH_UNLOCK();
+			goto hfound;
+		}
+	}
+
+	/*
+	 * Not in cache, allocate a new one or take it from the
+	 * free list.
+	 */
+	if (TAILQ_FIRST(&dqfreelist) == NODQUOT &&
+	    numdquot < MAXQUOTAS * desiredvnodes)
+		desireddquot += DQUOTINC;
+	if (numdquot < desireddquot) {
+		numdquot++;
+		DQH_UNLOCK();
+		dq1 = malloc(sizeof *dq1, M_DQUOT, M_WAITOK | M_ZERO);
+		mtx_init(&dq1->dq_lock, "dqlock", NULL, MTX_DEF);
+		DQH_LOCK();
+		/*
+		 * Recheck the cache after sleep for memory.
+		 */
+		dq = dqhashfind(dqh, id, dqvp);
+		if (dq != NULL) {
+			numdquot--;
+			DQH_UNLOCK();
+			mtx_destroy(&dq1->dq_lock);
+			free(dq1, M_DQUOT);
+			goto hfound;
+		}
+		dq = dq1;
+	} else {
+		if ((dq = TAILQ_FIRST(&dqfreelist)) == NULL) {
+			DQH_UNLOCK();
+			tablefull("dquot");
+			*dqp = NODQUOT;
+			if (dqvplocked)
+				vput(dqvp);
+			else
+				vrele(dqvp);
+			return (EUSERS);
+		}
+		if (dq->dq_cnt || (dq->dq_flags & DQ_MOD))
+			panic("dqget: free dquot isn't %p", dq);
+		TAILQ_REMOVE(&dqfreelist, dq, dq_freelist);
+		if (dq->dq_ump != NULL)
+			LIST_REMOVE(dq, dq_hash);
+	}
+
+	/*
+	 * Dq is put into hash already locked to prevent parallel
+	 * usage while it is being read from file.
+	 */
+	dq->dq_flags = DQ_LOCK;
+	dq->dq_id = id;
+	dq->dq_type = type;
+	dq->dq_ump = ump;
+	LIST_INSERT_HEAD(dqh, dq, dq_hash);
+	DQREF(dq);
+	DQH_UNLOCK();
+
+	/*
+	 * Read the requested quota record from the quota file, performing
+	 * any necessary conversions.
+	 */
+	if (ump->um_qflags[type] & QTF_64BIT) {
+		recsize = sizeof(struct dqblk64);
+		base = sizeof(struct dqhdr64);
+	} else {
+		recsize = sizeof(struct dqblk32);
+		base = 0;
+	}
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = buf;
+	aiov.iov_len = recsize;
+	auio.uio_resid = recsize;
+	auio.uio_offset = base + id * recsize;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_td = (struct thread *)0;
+
+	error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]);
+	if (auio.uio_resid == recsize && error == 0) {
+		bzero(&dq->dq_dqb, sizeof(dq->dq_dqb));
+	} else {
+		if (ump->um_qflags[type] & QTF_64BIT)
+			dqb64_dq((struct dqblk64 *)buf, dq);
+		else
+			dqb32_dq((struct dqblk32 *)buf, dq);
+	}
+	if (dqvplocked)
+		vput(dqvp);
+	else
+		vrele(dqvp);
+	/*
+	 * I/O error in reading quota file, release
+	 * quota structure and reflect problem to caller.
+	 */
+	if (error) {
+		DQH_LOCK();
+		dq->dq_ump = NULL;
+		LIST_REMOVE(dq, dq_hash);
+		DQH_UNLOCK();
+		DQI_LOCK(dq);
+		if (dq->dq_flags & DQ_WANT)
+			wakeup(dq);
+		dq->dq_flags = 0;
+		DQI_UNLOCK(dq);
+		dqrele(vp, dq);
+		*dqp = NODQUOT;
+		return (error);
+	}
+	DQI_LOCK(dq);
+	/*
+	 * Check for no limit to enforce.
+	 * Initialize time values if necessary.
+	 */
+	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
+	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
+		dq->dq_flags |= DQ_FAKE;
+	if (dq->dq_id != 0) {
+		if (dq->dq_btime == 0) {
+			dq->dq_btime = time_second + ump->um_btime[type];
+			if (dq->dq_bsoftlimit &&
+			    dq->dq_curblocks >= dq->dq_bsoftlimit)
+				dq->dq_flags |= DQ_MOD;
+		}
+		if (dq->dq_itime == 0) {
+			dq->dq_itime = time_second + ump->um_itime[type];
+			if (dq->dq_isoftlimit &&
+			    dq->dq_curinodes >= dq->dq_isoftlimit)
+				dq->dq_flags |= DQ_MOD;
+		}
+	}
+	DQI_WAKEUP(dq);
+	DQI_UNLOCK(dq);
+	*dqp = dq;
+	return (0);
+}
+
+#ifdef DIAGNOSTIC
+/*
+ * Obtain a reference to a dquot.
+ */
+static void
+dqref(struct dquot *dq)
+{
+
+	dq->dq_cnt++;
+}
+#endif
+
+/*
+ * Release a reference to a dquot.
+ */
+void
+dqrele(struct vnode *vp, struct dquot *dq)
+{
+
+	if (dq == NODQUOT)
+		return;
+	DQH_LOCK();
+	KASSERT(dq->dq_cnt > 0, ("Lost dq %p reference 1", dq));
+	if (dq->dq_cnt > 1) {
+		dq->dq_cnt--;
+		DQH_UNLOCK();
+		return;
+	}
+	DQH_UNLOCK();
+sync:
+	(void) dqsync(vp, dq);
+
+	DQH_LOCK();
+	KASSERT(dq->dq_cnt > 0, ("Lost dq %p reference 2", dq));
+	if (--dq->dq_cnt > 0)
+	{
+		DQH_UNLOCK();
+		return;
+	}
+
+	/*
+	 * The dq may become dirty after it is synced but before it is
+	 * put to the free list. Checking the DQ_MOD there without
+	 * locking dq should be safe since no other references to the
+	 * dq exist.
+	 */
+	if ((dq->dq_flags & DQ_MOD) != 0) {
+		dq->dq_cnt++;
+		DQH_UNLOCK();
+		goto sync;
+	}
+	TAILQ_INSERT_TAIL(&dqfreelist, dq, dq_freelist);
+	DQH_UNLOCK();
+}
+
+/*
+ * Update the disk quota in the quota file.
+ */
+static int
+dqsync(struct vnode *vp, struct dquot *dq)
+{
+	uint8_t buf[sizeof(struct dqblk64)];
+	off_t base, recsize;
+	struct vnode *dqvp;
+	struct iovec aiov;
+	struct uio auio;
+	int error;
+	struct mount *mp;
+	struct ufsmount *ump;
+
+#ifdef DEBUG_VFS_LOCKS
+	if (vp != NULL)
+		ASSERT_VOP_ELOCKED(vp, "dqsync");
+#endif
+
+	mp = NULL;
+	error = 0;
+	if (dq == NODQUOT)
+		panic("dqsync: dquot");
+	if ((ump = dq->dq_ump) == NULL)
+		return (0);
+	UFS_LOCK(ump);
+	if ((dqvp = ump->um_quotas[dq->dq_type]) == NULLVP) {
+		if (vp == NULL) {
+			UFS_UNLOCK(ump);
+			return (0);
+		} else
+			panic("dqsync: file");
+	}
+	vref(dqvp);
+	UFS_UNLOCK(ump);
+
+	DQI_LOCK(dq);
+	if ((dq->dq_flags & DQ_MOD) == 0) {
+		DQI_UNLOCK(dq);
+		vrele(dqvp);
+		return (0);
+	}
+	DQI_UNLOCK(dq);
+
+	(void) vn_start_secondary_write(dqvp, &mp, V_WAIT);
+	if (vp != dqvp)
+		vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY);
+
+	DQI_LOCK(dq);
+	DQI_WAIT(dq, PINOD+2, "dqsync");
+	if ((dq->dq_flags & DQ_MOD) == 0)
+		goto out;
+	dq->dq_flags |= DQ_LOCK;
+	DQI_UNLOCK(dq);
+
+	/*
+	 * Write the quota record to the quota file, performing any
+	 * necessary conversions.  See dqget() for additional details.
+	 */
+	if (ump->um_qflags[dq->dq_type] & QTF_64BIT) {
+		dq_dqb64(dq, (struct dqblk64 *)buf);
+		recsize = sizeof(struct dqblk64);
+		base = sizeof(struct dqhdr64);
+	} else {
+		dq_dqb32(dq, (struct dqblk32 *)buf);
+		recsize = sizeof(struct dqblk32);
+		base = 0;
+	}
+
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = buf;
+	aiov.iov_len = recsize;
+	auio.uio_resid = recsize;
+	auio.uio_offset = base + dq->dq_id * recsize;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_td = (struct thread *)0;
+	error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]);
+	if (auio.uio_resid && error == 0)
+		error = EIO;
+
+	DQI_LOCK(dq);
+	DQI_WAKEUP(dq);
+	dq->dq_flags &= ~DQ_MOD;
+out:
+	DQI_UNLOCK(dq);
+	if (vp != dqvp)
+		vput(dqvp);
+	else
+		vrele(dqvp);
+	vn_finished_secondary_write(mp);
+	return (error);
+}
+
+/*
+ * Flush all entries from the cache for a particular vnode.
+ */
+static int
+dqflush(struct vnode *vp)
+{
+	struct dquot *dq, *nextdq;
+	struct dqhash *dqh;
+	int error;
+
+	/*
+	 * Move all dquot's that used to refer to this quota
+	 * file off their hash chains (they will eventually
+	 * fall off the head of the free list and be re-used).
+	 */
+	error = 0;
+	DQH_LOCK();
+	for (dqh = &dqhashtbl[dqhash]; dqh >= dqhashtbl; dqh--) {
+		for (dq = LIST_FIRST(dqh); dq; dq = nextdq) {
+			nextdq = LIST_NEXT(dq, dq_hash);
+			if (dq->dq_ump->um_quotas[dq->dq_type] != vp)
+				continue;
+			if (dq->dq_cnt)
+				error = EBUSY;
+			else {
+				LIST_REMOVE(dq, dq_hash);
+				dq->dq_ump = NULL;
+			}
+		}
+	}
+	DQH_UNLOCK();
+	return (error);
+}
+
+/*
+ * The following three functions are provided for the adjustment of
+ * quotas by the soft updates code.
+ */
+#ifdef SOFTUPDATES
+/*
+ * Acquire a reference to the quota structures associated with a vnode.
+ * Return count of number of quota structures found.
+ */
+int
+quotaref(vp, qrp)
+	struct vnode *vp;
+	struct dquot **qrp;
+{
+	struct inode *ip;
+	struct dquot *dq;
+	int i, found;
+
+	for (i = 0; i < MAXQUOTAS; i++)
+		qrp[i] = NODQUOT;
+	/*
+	 * Disk quotas must be turned off for system files.  Currently
+	 * snapshot and quota files.
+	 */
+	if ((vp->v_vflag & VV_SYSTEM) != 0)
+		return (0);
+	/*
+	 * Iterate through and copy active quotas.
+	 */
+	found = 0;
+	ip = VTOI(vp);
+	mtx_lock(&dqhlock);
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if ((dq = ip->i_dquot[i]) == NODQUOT)
+			continue;
+		DQREF(dq);
+		qrp[i] = dq;
+		found++;
+	}
+	mtx_unlock(&dqhlock);
+	return (found);
+}
+
+/*
+ * Release a set of quota structures obtained from a vnode.
+ */
+void
+quotarele(qrp)
+	struct dquot **qrp;
+{
+	struct dquot *dq;
+	int i;
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if ((dq = qrp[i]) == NODQUOT)
+			continue;
+		dqrele(NULL, dq);
+	}
+}
+
+/*
+ * Adjust the number of blocks associated with a quota.
+ * Positive numbers when adding blocks; negative numbers when freeing blocks.
+ */
+void
+quotaadj(qrp, ump, blkcount)
+	struct dquot **qrp;
+	struct ufsmount *ump;
+	int64_t blkcount;
+{
+	struct dquot *dq;
+	ufs2_daddr_t ncurblocks;
+	int i;
+
+	if (blkcount == 0)
+		return;
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if ((dq = qrp[i]) == NODQUOT)
+			continue;
+		DQI_LOCK(dq);
+		DQI_WAIT(dq, PINOD+1, "adjqta");
+		ncurblocks = dq->dq_curblocks + blkcount;
+		if (ncurblocks >= 0)
+			dq->dq_curblocks = ncurblocks;
+		else
+			dq->dq_curblocks = 0;
+		if (blkcount < 0)
+			dq->dq_flags &= ~DQ_BLKS;
+		else if (dq->dq_curblocks + blkcount >= dq->dq_bsoftlimit &&
+			 dq->dq_curblocks < dq->dq_bsoftlimit)
+			dq->dq_btime = time_second + ump->um_btime[i];
+		dq->dq_flags |= DQ_MOD;
+		DQI_UNLOCK(dq);
+	}
+}
+#endif /* SOFTUPDATES */
+
+/*
+ * 32-bit / 64-bit conversion functions.
+ *
+ * 32-bit quota records are stored in native byte order.  Attention must
+ * be paid to overflow issues.
+ *
+ * 64-bit quota records are stored in network byte order.
+ */
+
+#define CLIP32(u64) (u64 > UINT32_MAX ? UINT32_MAX : (uint32_t)u64)
+
+/*
+ * Convert 32-bit host-order structure to dquot.
+ */
+static void
+dqb32_dq(const struct dqblk32 *dqb32, struct dquot *dq)
+{
+
+	dq->dq_bhardlimit = dqb32->dqb_bhardlimit;
+	dq->dq_bsoftlimit = dqb32->dqb_bsoftlimit;
+	dq->dq_curblocks = dqb32->dqb_curblocks;
+	dq->dq_ihardlimit = dqb32->dqb_ihardlimit;
+	dq->dq_isoftlimit = dqb32->dqb_isoftlimit;
+	dq->dq_curinodes = dqb32->dqb_curinodes;
+	dq->dq_btime = dqb32->dqb_btime;
+	dq->dq_itime = dqb32->dqb_itime;
+}
+
+/*
+ * Convert 64-bit network-order structure to dquot.
+ */
+static void
+dqb64_dq(const struct dqblk64 *dqb64, struct dquot *dq)
+{
+
+	dq->dq_bhardlimit = be64toh(dqb64->dqb_bhardlimit);
+	dq->dq_bsoftlimit = be64toh(dqb64->dqb_bsoftlimit);
+	dq->dq_curblocks = be64toh(dqb64->dqb_curblocks);
+	dq->dq_ihardlimit = be64toh(dqb64->dqb_ihardlimit);
+	dq->dq_isoftlimit = be64toh(dqb64->dqb_isoftlimit);
+	dq->dq_curinodes = be64toh(dqb64->dqb_curinodes);
+	dq->dq_btime = be64toh(dqb64->dqb_btime);
+	dq->dq_itime = be64toh(dqb64->dqb_itime);
+}
+
+/*
+ * Convert dquot to 32-bit host-order structure.
+ */
+static void
+dq_dqb32(const struct dquot *dq, struct dqblk32 *dqb32)
+{
+
+	dqb32->dqb_bhardlimit = CLIP32(dq->dq_bhardlimit);
+	dqb32->dqb_bsoftlimit = CLIP32(dq->dq_bsoftlimit);
+	dqb32->dqb_curblocks = CLIP32(dq->dq_curblocks);
+	dqb32->dqb_ihardlimit = CLIP32(dq->dq_ihardlimit);
+	dqb32->dqb_isoftlimit = CLIP32(dq->dq_isoftlimit);
+	dqb32->dqb_curinodes = CLIP32(dq->dq_curinodes);
+	dqb32->dqb_btime = CLIP32(dq->dq_btime);
+	dqb32->dqb_itime = CLIP32(dq->dq_itime);
+}
+
+/*
+ * Convert dquot to 64-bit network-order structure.
+ */
+static void
+dq_dqb64(const struct dquot *dq, struct dqblk64 *dqb64)
+{
+
+	dqb64->dqb_bhardlimit = htobe64(dq->dq_bhardlimit);
+	dqb64->dqb_bsoftlimit = htobe64(dq->dq_bsoftlimit);
+	dqb64->dqb_curblocks = htobe64(dq->dq_curblocks);
+	dqb64->dqb_ihardlimit = htobe64(dq->dq_ihardlimit);
+	dqb64->dqb_isoftlimit = htobe64(dq->dq_isoftlimit);
+	dqb64->dqb_curinodes = htobe64(dq->dq_curinodes);
+	dqb64->dqb_btime = htobe64(dq->dq_btime);
+	dqb64->dqb_itime = htobe64(dq->dq_itime);
+}
+
+/*
+ * Convert 64-bit host-order structure to 32-bit host-order structure.
+ */
+static void
+dqb64_dqb32(const struct dqblk64 *dqb64, struct dqblk32 *dqb32)
+{
+
+	dqb32->dqb_bhardlimit = CLIP32(dqb64->dqb_bhardlimit);
+	dqb32->dqb_bsoftlimit = CLIP32(dqb64->dqb_bsoftlimit);
+	dqb32->dqb_curblocks = CLIP32(dqb64->dqb_curblocks);
+	dqb32->dqb_ihardlimit = CLIP32(dqb64->dqb_ihardlimit);
+	dqb32->dqb_isoftlimit = CLIP32(dqb64->dqb_isoftlimit);
+	dqb32->dqb_curinodes = CLIP32(dqb64->dqb_curinodes);
+	dqb32->dqb_btime = CLIP32(dqb64->dqb_btime);
+	dqb32->dqb_itime = CLIP32(dqb64->dqb_itime);
+}
+
+/*
+ * Convert 32-bit host-order structure to 64-bit host-order structure.
+ */
+static void
+dqb32_dqb64(const struct dqblk32 *dqb32, struct dqblk64 *dqb64)
+{
+
+	dqb64->dqb_bhardlimit = dqb32->dqb_bhardlimit;
+	dqb64->dqb_bsoftlimit = dqb32->dqb_bsoftlimit;
+	dqb64->dqb_curblocks = dqb32->dqb_curblocks;
+	dqb64->dqb_ihardlimit = dqb32->dqb_ihardlimit;
+	dqb64->dqb_isoftlimit = dqb32->dqb_isoftlimit;
+	dqb64->dqb_curinodes = dqb32->dqb_curinodes;
+	dqb64->dqb_btime = dqb32->dqb_btime;
+	dqb64->dqb_itime = dqb32->dqb_itime;
+}
diff --git a/Dump/ufs/ufs/ufs_vfsops.c b/Dump/ufs/ufs/ufs_vfsops.c
new file mode 100644
index 0000000..461cd73
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_vfsops.c
@@ -0,0 +1,244 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_vfsops.c	8.8 (Berkeley) 5/20/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ufs/ufs_vfsops.c 331722 2018-03-29 02:50:57Z eadler $");
+
+#include "opt_quota.h"
+#include "opt_ufs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/vnode.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+#ifdef UFS_DIRHASH
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/dirhash.h>
+#endif
+
+MALLOC_DEFINE(M_UFSMNT, "ufs_mount", "UFS mount structure");
+
+/*
+ * Return the root of a filesystem.
+ */
+int
+ufs_root(mp, flags, vpp)
+	struct mount *mp;
+	int flags;
+	struct vnode **vpp;
+{
+	struct vnode *nvp;
+	int error;
+
+	error = VFS_VGET(mp, (ino_t)ROOTINO, flags, &nvp);
+	if (error)
+		return (error);
+	*vpp = nvp;
+	return (0);
+}
+
+/*
+ * Do operations associated with quotas
+ */
+int
+ufs_quotactl(mp, cmds, id, arg)
+	struct mount *mp;
+	int cmds;
+	uid_t id;
+	void *arg;
+{
+#ifndef QUOTA
+	if ((cmds >> SUBCMDSHIFT) == Q_QUOTAON)
+		vfs_unbusy(mp);
+
+	return (EOPNOTSUPP);
+#else
+	struct thread *td;
+	int cmd, type, error;
+
+	td = curthread;
+	cmd = cmds >> SUBCMDSHIFT;
+	type = cmds & SUBCMDMASK;
+	if (id == -1) {
+		switch (type) {
+
+		case USRQUOTA:
+			id = td->td_ucred->cr_ruid;
+			break;
+
+		case GRPQUOTA:
+			id = td->td_ucred->cr_rgid;
+			break;
+
+		default:
+			if (cmd == Q_QUOTAON)
+				vfs_unbusy(mp);
+			return (EINVAL);
+		}
+	}
+	if ((u_int)type >= MAXQUOTAS) {
+		if (cmd == Q_QUOTAON)
+			vfs_unbusy(mp);
+		return (EINVAL);
+	}
+
+	switch (cmd) {
+	case Q_QUOTAON:
+		error = quotaon(td, mp, type, arg);
+		break;
+
+	case Q_QUOTAOFF:
+		error = quotaoff(td, mp, type);
+		break;
+
+	case Q_SETQUOTA32:
+		error = setquota32(td, mp, id, type, arg);
+		break;
+
+	case Q_SETUSE32:
+		error = setuse32(td, mp, id, type, arg);
+		break;
+
+	case Q_GETQUOTA32:
+		error = getquota32(td, mp, id, type, arg);
+		break;
+
+	case Q_SETQUOTA:
+		error = setquota(td, mp, id, type, arg);
+		break;
+
+	case Q_SETUSE:
+		error = setuse(td, mp, id, type, arg);
+		break;
+
+	case Q_GETQUOTA:
+		error = getquota(td, mp, id, type, arg);
+		break;
+
+	case Q_GETQUOTASIZE:
+		error = getquotasize(td, mp, id, type, arg);
+		break;
+
+	case Q_SYNC:
+		error = qsync(mp);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	return (error);
+#endif
+}
+
+/*
+ * Initial UFS filesystems, done only once.
+ */
+int
+ufs_init(vfsp)
+	struct vfsconf *vfsp;
+{
+
+#ifdef QUOTA
+	dqinit();
+#endif
+#ifdef UFS_DIRHASH
+	ufsdirhash_init();
+#endif
+	return (0);
+}
+
+/*
+ * Uninitialise UFS filesystems, done before module unload.
+ */
+int
+ufs_uninit(vfsp)
+	struct vfsconf *vfsp;
+{
+
+#ifdef QUOTA
+	dquninit();
+#endif
+#ifdef UFS_DIRHASH
+	ufsdirhash_uninit();
+#endif
+	return (0);
+}
+
+/*
+ * This is the generic part of fhtovp called after the underlying
+ * filesystem has validated the file handle.
+ *
+ * Call the VFS_CHECKEXP beforehand to verify access.
+ */
+int
+ufs_fhtovp(mp, ufhp, flags, vpp)
+	struct mount *mp;
+	struct ufid *ufhp;
+	int flags;
+	struct vnode **vpp;
+{
+	struct inode *ip;
+	struct vnode *nvp;
+	int error;
+
+	error = VFS_VGET(mp, ufhp->ufid_ino, flags, &nvp);
+	if (error) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	ip = VTOI(nvp);
+	if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen ||
+	    ip->i_effnlink <= 0) {
+		vput(nvp);
+		*vpp = NULLVP;
+		return (ESTALE);
+	}
+	*vpp = nvp;
+	vnode_create_vobject(*vpp, DIP(ip, i_size), curthread);
+	return (0);
+}
diff --git a/Dump/ufs/ufs/ufs_vnops.c b/Dump/ufs/ufs/ufs_vnops.c
new file mode 100644
index 0000000..66662a6
--- /dev/null
+++ b/Dump/ufs/ufs/ufs_vnops.c
@@ -0,0 +1,2805 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: releng/11.2/sys/ufs/ufs/ufs_vnops.c 332749 2018-04-19 02:47:21Z pfg $");
+
+#include "opt_quota.h"
+#include "opt_suiddir.h"
+#include "opt_ufs.h"
+#include "opt_ffs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/fcntl.h>
+#include <sys/filio.h>
+#include <sys/stat.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/priv.h>
+#include <sys/refcount.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+#include <sys/dirent.h>
+#include <sys/lockf.h>
+#include <sys/conf.h>
+#include <sys/acl.h>
+
+#include <security/mac/mac_framework.h>
+
+#include <sys/file.h>		/* XXX */
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include <ufs/ufs/acl.h>
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+#ifdef UFS_DIRHASH
+#include <ufs/ufs/dirhash.h>
+#endif
+#ifdef UFS_GJOURNAL
+#include <ufs/ufs/gjournal.h>
+FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS");
+#endif
+
+#ifdef QUOTA
+FEATURE(ufs_quota, "UFS disk quotas support");
+FEATURE(ufs_quota64, "64bit UFS disk quotas support");
+#endif
+
+#ifdef SUIDDIR
+FEATURE(suiddir,
+    "Give all new files in directory the same ownership as the directory");
+#endif
+
+
+#include <ufs/ffs/ffs_extern.h>
+
+static vop_accessx_t	ufs_accessx;
+static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
+static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
+static vop_close_t	ufs_close;
+static vop_create_t	ufs_create;
+static vop_getattr_t	ufs_getattr;
+static vop_ioctl_t	ufs_ioctl;
+static vop_link_t	ufs_link;
+static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *, const char *);
+static vop_markatime_t	ufs_markatime;
+static vop_mkdir_t	ufs_mkdir;
+static vop_mknod_t	ufs_mknod;
+static vop_open_t	ufs_open;
+static vop_pathconf_t	ufs_pathconf;
+static vop_print_t	ufs_print;
+static vop_readlink_t	ufs_readlink;
+static vop_remove_t	ufs_remove;
+static vop_rename_t	ufs_rename;
+static vop_rmdir_t	ufs_rmdir;
+static vop_setattr_t	ufs_setattr;
+static vop_strategy_t	ufs_strategy;
+static vop_symlink_t	ufs_symlink;
+static vop_whiteout_t	ufs_whiteout;
+static vop_close_t	ufsfifo_close;
+static vop_kqfilter_t	ufsfifo_kqfilter;
+
+SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
+
+/*
+ * A virgin directory (no blushing please).
+ */
+static struct dirtemplate mastertemplate = {
+	0, 12, DT_DIR, 1, ".",
+	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
+};
+static struct odirtemplate omastertemplate = {
+	0, 12, 1, ".",
+	0, DIRBLKSIZ - 12, 2, ".."
+};
+
+static void
+ufs_itimes_locked(struct vnode *vp)
+{
+	struct inode *ip;
+	struct timespec ts;
+
+	ASSERT_VI_LOCKED(vp, __func__);
+
+	ip = VTOI(vp);
+	if (UFS_RDONLY(ip))
+		goto out;
+	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
+		return;
+
+	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
+		ip->i_flag |= IN_LAZYMOD;
+	else if (((vp->v_mount->mnt_kern_flag &
+		    (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
+		    (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
+		ip->i_flag |= IN_MODIFIED;
+	else if (ip->i_flag & IN_ACCESS)
+		ip->i_flag |= IN_LAZYACCESS;
+	vfs_timestamp(&ts);
+	if (ip->i_flag & IN_ACCESS) {
+		DIP_SET(ip, i_atime, ts.tv_sec);
+		DIP_SET(ip, i_atimensec, ts.tv_nsec);
+	}
+	if (ip->i_flag & IN_UPDATE) {
+		DIP_SET(ip, i_mtime, ts.tv_sec);
+		DIP_SET(ip, i_mtimensec, ts.tv_nsec);
+	}
+	if (ip->i_flag & IN_CHANGE) {
+		DIP_SET(ip, i_ctime, ts.tv_sec);
+		DIP_SET(ip, i_ctimensec, ts.tv_nsec);
+		DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1);
+	}
+
+ out:
+	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
+}
+
+void
+ufs_itimes(struct vnode *vp)
+{
+
+	VI_LOCK(vp);
+	ufs_itimes_locked(vp);
+	VI_UNLOCK(vp);
+}
+
+/*
+ * Create a regular file
+ */
+static int
+ufs_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	int error;
+
+	error =
+	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
+	    ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create");
+	if (error != 0)
+		return (error);
+	if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0)
+		cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp);
+	return (0);
+}
+
+/*
+ * Mknod vnode call
+ */
+/* ARGSUSED */
+static int
+ufs_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct vattr *vap = ap->a_vap;
+	struct vnode **vpp = ap->a_vpp;
+	struct inode *ip;
+	ino_t ino;
+	int error;
+
+	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
+	    ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod");
+	if (error)
+		return (error);
+	ip = VTOI(*vpp);
+	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+	if (vap->va_rdev != VNOVAL) {
+		/*
+		 * Want to be able to use this to make badblock
+		 * inodes, so don't truncate the dev number.
+		 */
+		DIP_SET(ip, i_rdev, vap->va_rdev);
+	}
+	/*
+	 * Remove inode, then reload it through VFS_VGET so it is
+	 * checked to see if it is an alias of an existing entry in
+	 * the inode cache.  XXX I don't believe this is necessary now.
+	 */
+	(*vpp)->v_type = VNON;
+	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
+	vgone(*vpp);
+	vput(*vpp);
+	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
+	if (error) {
+		*vpp = NULL;
+		return (error);
+	}
+	return (0);
+}
+
+/*
+ * Open called.
+ */
+/* ARGSUSED */
+static int
+ufs_open(struct vop_open_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip;
+
+	if (vp->v_type == VCHR || vp->v_type == VBLK)
+		return (EOPNOTSUPP);
+
+	ip = VTOI(vp);
+	/*
+	 * Files marked append-only must be opened for appending.
+	 */
+	if ((ip->i_flags & APPEND) &&
+	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
+		return (EPERM);
+	vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td);
+	return (0);
+}
+
+/*
+ * Close called.
+ *
+ * Update the times on the inode.
+ */
+/* ARGSUSED */
+static int
+ufs_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	int usecount;
+
+	VI_LOCK(vp);
+	usecount = vp->v_usecount;
+	if (usecount > 1)
+		ufs_itimes_locked(vp);
+	VI_UNLOCK(vp);
+	return (0);
+}
+
+static int
+ufs_accessx(ap)
+	struct vop_accessx_args /* {
+		struct vnode *a_vp;
+		accmode_t a_accmode;
+		struct ucred *a_cred;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	accmode_t accmode = ap->a_accmode;
+	int error;
+#ifdef QUOTA
+	int relocked;
+#endif
+#ifdef UFS_ACL
+	struct acl *acl;
+	acl_type_t type;
+#endif
+
+	/*
+	 * Disallow write attempts on read-only filesystems;
+	 * unless the file is a socket, fifo, or a block or
+	 * character device resident on the filesystem.
+	 */
+	if (accmode & VMODIFY_PERMS) {
+		switch (vp->v_type) {
+		case VDIR:
+		case VLNK:
+		case VREG:
+			if (vp->v_mount->mnt_flag & MNT_RDONLY)
+				return (EROFS);
+#ifdef QUOTA
+			/*
+			 * Inode is accounted in the quotas only if struct
+			 * dquot is attached to it. VOP_ACCESS() is called
+			 * from vn_open_cred() and provides a convenient
+			 * point to call getinoquota().
+			 */
+			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
+
+				/*
+				 * Upgrade vnode lock, since getinoquota()
+				 * requires exclusive lock to modify inode.
+				 */
+				relocked = 1;
+				vhold(vp);
+				vn_lock(vp, LK_UPGRADE | LK_RETRY);
+				VI_LOCK(vp);
+				if (vp->v_iflag & VI_DOOMED) {
+					vdropl(vp);
+					error = ENOENT;
+					goto relock;
+				}
+				vdropl(vp);
+			} else
+				relocked = 0;
+			error = getinoquota(ip);
+relock:
+			if (relocked)
+				vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
+			if (error != 0)
+				return (error);
+#endif
+			break;
+		default:
+			break;
+		}
+	}
+
+	/*
+	 * If immutable bit set, nobody gets to write it.  "& ~VADMIN_PERMS"
+	 * permits the owner of the file to remove the IMMUTABLE flag.
+	 */
+	if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) &&
+	    (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
+		return (EPERM);
+
+#ifdef UFS_ACL
+	if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) {
+		if (vp->v_mount->mnt_flag & MNT_NFS4ACLS)
+			type = ACL_TYPE_NFS4;
+		else
+			type = ACL_TYPE_ACCESS;
+
+		acl = acl_alloc(M_WAITOK);
+		if (type == ACL_TYPE_NFS4)
+			error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td);
+		else
+			error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td);
+		switch (error) {
+		case 0:
+			if (type == ACL_TYPE_NFS4) {
+				error = vaccess_acl_nfs4(vp->v_type, ip->i_uid,
+				    ip->i_gid, acl, accmode, ap->a_cred, NULL);
+			} else {
+				error = vfs_unixify_accmode(&accmode);
+				if (error == 0)
+					error = vaccess_acl_posix1e(vp->v_type, ip->i_uid,
+					    ip->i_gid, acl, accmode, ap->a_cred, NULL);
+			}
+			break;
+		default:
+			if (error != EOPNOTSUPP)
+				printf(
+"ufs_accessx(): Error retrieving ACL on object (%d).\n",
+				    error);
+			/*
+			 * XXX: Fall back until debugged.  Should
+			 * eventually possibly log an error, and return
+			 * EPERM for safety.
+			 */
+			error = vfs_unixify_accmode(&accmode);
+			if (error == 0)
+				error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
+				    ip->i_gid, accmode, ap->a_cred, NULL);
+		}
+		acl_free(acl);
+
+		return (error);
+	}
+#endif /* !UFS_ACL */
+	error = vfs_unixify_accmode(&accmode);
+	if (error == 0)
+		error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
+		    accmode, ap->a_cred, NULL);
+	return (error);
+}
+
+/* ARGSUSED */
+static int
+ufs_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	struct vattr *vap = ap->a_vap;
+
+	VI_LOCK(vp);
+	ufs_itimes_locked(vp);
+	if (I_IS_UFS1(ip)) {
+		vap->va_atime.tv_sec = ip->i_din1->di_atime;
+		vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
+	} else {
+		vap->va_atime.tv_sec = ip->i_din2->di_atime;
+		vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
+	}
+	VI_UNLOCK(vp);
+	/*
+	 * Copy from inode table
+	 */
+	vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev);
+	vap->va_fileid = ip->i_number;
+	vap->va_mode = ip->i_mode & ~IFMT;
+	vap->va_nlink = ip->i_effnlink;
+	vap->va_uid = ip->i_uid;
+	vap->va_gid = ip->i_gid;
+	if (I_IS_UFS1(ip)) {
+		vap->va_rdev = ip->i_din1->di_rdev;
+		vap->va_size = ip->i_din1->di_size;
+		vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
+		vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
+		vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
+		vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec;
+		vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks);
+		vap->va_filerev = ip->i_din1->di_modrev;
+	} else {
+		vap->va_rdev = ip->i_din2->di_rdev;
+		vap->va_size = ip->i_din2->di_size;
+		vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
+		vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
+		vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
+		vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec;
+		vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime;
+		vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
+		vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
+		vap->va_filerev = ip->i_din2->di_modrev;
+	}
+	vap->va_flags = ip->i_flags;
+	vap->va_gen = ip->i_gen;
+	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+	vap->va_type = IFTOVT(ip->i_mode);
+	return (0);
+}
+
+/*
+ * Set attribute vnode op. called from several syscalls
+ */
+static int
+ufs_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vattr *vap = ap->a_vap;
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	struct ucred *cred = ap->a_cred;
+	struct thread *td = curthread;
+	int error;
+
+	/*
+	 * Check for unsettable attributes.
+	 */
+	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+		return (EINVAL);
+	}
+	if (vap->va_flags != VNOVAL) {
+		if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE |
+		    SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE |
+		    UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK |
+		    UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
+		    UF_SPARSE | UF_SYSTEM)) != 0)
+			return (EOPNOTSUPP);
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
+		/*
+		 * Callers may only modify the file flags on objects they
+		 * have VADMIN rights for.
+		 */
+		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
+			return (error);
+		/*
+		 * Unprivileged processes are not permitted to unset system
+		 * flags, or modify flags if any system flags are set.
+		 * Privileged non-jail processes may not modify system flags
+		 * if securelevel > 0 and any existing system flags are set.
+		 * Privileged jail processes behave like privileged non-jail
+		 * processes if the security.jail.chflags_allowed sysctl is
+		 * is non-zero; otherwise, they behave like unprivileged
+		 * processes.
+		 */
+		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
+			if (ip->i_flags &
+			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
+				error = securelevel_gt(cred, 0);
+				if (error)
+					return (error);
+			}
+			/* The snapshot flag cannot be toggled. */
+			if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT)
+				return (EPERM);
+		} else {
+			if (ip->i_flags &
+			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
+			    ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
+				return (EPERM);
+		}
+		ip->i_flags = vap->va_flags;
+		DIP_SET(ip, i_flags, vap->va_flags);
+		ip->i_flag |= IN_CHANGE;
+		error = UFS_UPDATE(vp, 0);
+		if (ip->i_flags & (IMMUTABLE | APPEND))
+			return (error);
+	}
+	/*
+	 * If immutable or append, no one can change any of its attributes
+	 * except the ones already handled (in some cases, file flags
+	 * including the immutability flags themselves for the superuser).
+	 */
+	if (ip->i_flags & (IMMUTABLE | APPEND))
+		return (EPERM);
+	/*
+	 * Go through the fields and update iff not VNOVAL.
+	 */
+	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
+		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
+		    td)) != 0)
+			return (error);
+	}
+	if (vap->va_size != VNOVAL) {
+		/*
+		 * XXX most of the following special cases should be in
+		 * callers instead of in N filesystems.  The VDIR check
+		 * mostly already is.
+		 */
+		switch (vp->v_type) {
+		case VDIR:
+			return (EISDIR);
+		case VLNK:
+		case VREG:
+			/*
+			 * Truncation should have an effect in these cases.
+			 * Disallow it if the filesystem is read-only or
+			 * the file is being snapshotted.
+			 */
+			if (vp->v_mount->mnt_flag & MNT_RDONLY)
+				return (EROFS);
+			if ((ip->i_flags & SF_SNAPSHOT) != 0)
+				return (EPERM);
+			break;
+		default:
+			/*
+			 * According to POSIX, the result is unspecified
+			 * for file types other than regular files,
+			 * directories and shared memory objects.  We
+			 * don't support shared memory objects in the file
+			 * system, and have dubious support for truncating
+			 * symlinks.  Just ignore the request in other cases.
+			 */
+			return (0);
+		}
+		if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL |
+		    ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0),
+		    cred)) != 0)
+			return (error);
+	}
+	if (vap->va_atime.tv_sec != VNOVAL ||
+	    vap->va_mtime.tv_sec != VNOVAL ||
+	    vap->va_birthtime.tv_sec != VNOVAL) {
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
+		if ((ip->i_flags & SF_SNAPSHOT) != 0)
+			return (EPERM);
+		error = vn_utimes_perm(vp, vap, cred, td);
+		if (error != 0)
+			return (error);
+		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
+		if (vap->va_atime.tv_sec != VNOVAL) {
+			ip->i_flag &= ~IN_ACCESS;
+			DIP_SET(ip, i_atime, vap->va_atime.tv_sec);
+			DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec);
+		}
+		if (vap->va_mtime.tv_sec != VNOVAL) {
+			ip->i_flag &= ~IN_UPDATE;
+			DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec);
+			DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec);
+		}
+		if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) {
+			ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec;
+			ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec;
+		}
+		error = UFS_UPDATE(vp, 0);
+		if (error)
+			return (error);
+	}
+	error = 0;
+	if (vap->va_mode != (mode_t)VNOVAL) {
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
+		if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode &
+		   (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)))
+			return (EPERM);
+		error = ufs_chmod(vp, (int)vap->va_mode, cred, td);
+	}
+	return (error);
+}
+
+#ifdef UFS_ACL
+static int
+ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode,
+    int file_owner_id, struct ucred *cred, struct thread *td)
+{
+	int error;
+	struct acl *aclp;
+
+	aclp = acl_alloc(M_WAITOK);
+	error = ufs_getacl_nfs4_internal(vp, aclp, td);
+	/*
+	 * We don't have to handle EOPNOTSUPP here, as the filesystem claims
+	 * it supports ACLs.
+	 */
+	if (error)
+		goto out;
+
+	acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id);
+	error = ufs_setacl_nfs4_internal(vp, aclp, td);
+
+out:
+	acl_free(aclp);
+	return (error);
+}
+#endif /* UFS_ACL */
+
+/*
+ * Mark this file's access time for update for vfs_mark_atime().  This
+ * is called from execve() and mmap().
+ */
+static int
+ufs_markatime(ap)
+	struct vop_markatime_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+
+	VI_LOCK(vp);
+	ip->i_flag |= IN_ACCESS;
+	VI_UNLOCK(vp);
+	/*
+	 * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there.
+	 */
+	return (0);
+}
+
+/*
+ * Change the mode on a file.
+ * Inode must be locked before calling.
+ */
+static int
+ufs_chmod(vp, mode, cred, td)
+	struct vnode *vp;
+	int mode;
+	struct ucred *cred;
+	struct thread *td;
+{
+	struct inode *ip = VTOI(vp);
+	int error;
+
+	/*
+	 * To modify the permissions on a file, must possess VADMIN
+	 * for that file.
+	 */
+	if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td)))
+		return (error);
+	/*
+	 * Privileged processes may set the sticky bit on non-directories,
+	 * as well as set the setgid bit on a file with a group that the
+	 * process is not a member of.  Both of these are allowed in
+	 * jail(8).
+	 */
+	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
+		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
+			return (EFTYPE);
+	}
+	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
+		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
+		if (error)
+			return (error);
+	}
+
+	/*
+	 * Deny setting setuid if we are not the file owner.
+	 */
+	if ((mode & ISUID) && ip->i_uid != cred->cr_uid) {
+		error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
+		if (error)
+			return (error);
+	}
+
+	ip->i_mode &= ~ALLPERMS;
+	ip->i_mode |= (mode & ALLPERMS);
+	DIP_SET(ip, i_mode, ip->i_mode);
+	ip->i_flag |= IN_CHANGE;
+#ifdef UFS_ACL
+	if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0)
+		error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td);
+#endif
+	if (error == 0 && (ip->i_flag & IN_CHANGE) != 0)
+		error = UFS_UPDATE(vp, 0);
+
+	return (error);
+}
+
+/*
+ * Perform chown operation on inode ip;
+ * inode must be locked prior to call.
+ */
+static int
+ufs_chown(vp, uid, gid, cred, td)
+	struct vnode *vp;
+	uid_t uid;
+	gid_t gid;
+	struct ucred *cred;
+	struct thread *td;
+{
+	struct inode *ip = VTOI(vp);
+	uid_t ouid;
+	gid_t ogid;
+	int error = 0;
+#ifdef QUOTA
+	int i;
+	ufs2_daddr_t change;
+#endif
+
+	if (uid == (uid_t)VNOVAL)
+		uid = ip->i_uid;
+	if (gid == (gid_t)VNOVAL)
+		gid = ip->i_gid;
+	/*
+	 * To modify the ownership of a file, must possess VADMIN for that
+	 * file.
+	 */
+	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
+		return (error);
+	/*
+	 * To change the owner of a file, or change the group of a file to a
+	 * group of which we are not a member, the caller must have
+	 * privilege.
+	 */
+	if (((uid != ip->i_uid && uid != cred->cr_uid) || 
+	    (gid != ip->i_gid && !groupmember(gid, cred))) &&
+	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
+		return (error);
+	ogid = ip->i_gid;
+	ouid = ip->i_uid;
+#ifdef QUOTA
+	if ((error = getinoquota(ip)) != 0)
+		return (error);
+	if (ouid == uid) {
+		dqrele(vp, ip->i_dquot[USRQUOTA]);
+		ip->i_dquot[USRQUOTA] = NODQUOT;
+	}
+	if (ogid == gid) {
+		dqrele(vp, ip->i_dquot[GRPQUOTA]);
+		ip->i_dquot[GRPQUOTA] = NODQUOT;
+	}
+	change = DIP(ip, i_blocks);
+	(void) chkdq(ip, -change, cred, CHOWN);
+	(void) chkiq(ip, -1, cred, CHOWN);
+	for (i = 0; i < MAXQUOTAS; i++) {
+		dqrele(vp, ip->i_dquot[i]);
+		ip->i_dquot[i] = NODQUOT;
+	}
+#endif
+	ip->i_gid = gid;
+	DIP_SET(ip, i_gid, gid);
+	ip->i_uid = uid;
+	DIP_SET(ip, i_uid, uid);
+#ifdef QUOTA
+	if ((error = getinoquota(ip)) == 0) {
+		if (ouid == uid) {
+			dqrele(vp, ip->i_dquot[USRQUOTA]);
+			ip->i_dquot[USRQUOTA] = NODQUOT;
+		}
+		if (ogid == gid) {
+			dqrele(vp, ip->i_dquot[GRPQUOTA]);
+			ip->i_dquot[GRPQUOTA] = NODQUOT;
+		}
+		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
+			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
+				goto good;
+			else
+				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
+		}
+		for (i = 0; i < MAXQUOTAS; i++) {
+			dqrele(vp, ip->i_dquot[i]);
+			ip->i_dquot[i] = NODQUOT;
+		}
+	}
+	ip->i_gid = ogid;
+	DIP_SET(ip, i_gid, ogid);
+	ip->i_uid = ouid;
+	DIP_SET(ip, i_uid, ouid);
+	if (getinoquota(ip) == 0) {
+		if (ouid == uid) {
+			dqrele(vp, ip->i_dquot[USRQUOTA]);
+			ip->i_dquot[USRQUOTA] = NODQUOT;
+		}
+		if (ogid == gid) {
+			dqrele(vp, ip->i_dquot[GRPQUOTA]);
+			ip->i_dquot[GRPQUOTA] = NODQUOT;
+		}
+		(void) chkdq(ip, change, cred, FORCE|CHOWN);
+		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
+		(void) getinoquota(ip);
+	}
+	return (error);
+good:
+	if (getinoquota(ip))
+		panic("ufs_chown: lost quota");
+#endif /* QUOTA */
+	ip->i_flag |= IN_CHANGE;
+	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
+		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
+			ip->i_mode &= ~(ISUID | ISGID);
+			DIP_SET(ip, i_mode, ip->i_mode);
+		}
+	}
+	error = UFS_UPDATE(vp, 0);
+	return (error);
+}
+
+static int
+ufs_remove(ap)
+	struct vop_remove_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct inode *ip;
+	struct vnode *vp = ap->a_vp;
+	struct vnode *dvp = ap->a_dvp;
+	int error;
+	struct thread *td;
+
+	td = curthread;
+	ip = VTOI(vp);
+	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
+	    (VTOI(dvp)->i_flags & APPEND)) {
+		error = EPERM;
+		goto out;
+	}
+#ifdef UFS_GJOURNAL
+	ufs_gjournal_orphan(vp);
+#endif
+	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
+	if (ip->i_nlink <= 0)
+		vp->v_vflag |= VV_NOSYNC;
+	if ((ip->i_flags & SF_SNAPSHOT) != 0) {
+		/*
+		 * Avoid deadlock where another thread is trying to
+		 * update the inodeblock for dvp and is waiting on
+		 * snaplk.  Temporary unlock the vnode lock for the
+		 * unlinked file and sync the directory.  This should
+		 * allow vput() of the directory to not block later on
+		 * while holding the snapshot vnode locked, assuming
+		 * that the directory hasn't been unlinked too.
+		 */
+		VOP_UNLOCK(vp, 0);
+		(void) VOP_FSYNC(dvp, MNT_WAIT, td);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	}
+out:
+	return (error);
+}
+
+static void
+print_bad_link_count(const char *funcname, struct vnode *dvp)
+{
+	struct inode *dip;
+
+	dip = VTOI(dvp);
+	uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n",
+	    funcname, dip->i_effnlink, (intmax_t)dip->i_number,
+	    dvp->v_mount->mnt_stat.f_mntonname);
+}
+
+/*
+ * link vnode call
+ */
+static int
+ufs_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_tdvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vnode *tdvp = ap->a_tdvp;
+	struct componentname *cnp = ap->a_cnp;
+	struct inode *ip;
+	struct direct newdir;
+	int error;
+
+#ifdef INVARIANTS
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("ufs_link: no name");
+#endif
+	if (VTOI(tdvp)->i_effnlink < 2) {
+		print_bad_link_count("ufs_link", tdvp);
+		error = EINVAL;
+		goto out;
+	}
+	ip = VTOI(vp);
+	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
+		error = EMLINK;
+		goto out;
+	}
+	/*
+	 * The file may have been removed after namei droped the original
+	 * lock.
+	 */
+	if (ip->i_effnlink == 0) {
+		error = ENOENT;
+		goto out;
+	}
+	if (ip->i_flags & (IMMUTABLE | APPEND)) {
+		error = EPERM;
+		goto out;
+	}
+	ip->i_effnlink++;
+	ip->i_nlink++;
+	DIP_SET(ip, i_nlink, ip->i_nlink);
+	ip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(vp))
+		softdep_setup_link(VTOI(tdvp), ip);
+	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
+	if (!error) {
+		ufs_makedirentry(ip, cnp, &newdir);
+		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
+	}
+
+	if (error) {
+		ip->i_effnlink--;
+		ip->i_nlink--;
+		DIP_SET(ip, i_nlink, ip->i_nlink);
+		ip->i_flag |= IN_CHANGE;
+		if (DOINGSOFTDEP(vp))
+			softdep_revert_link(VTOI(tdvp), ip);
+	}
+out:
+	return (error);
+}
+
+/*
+ * whiteout vnode call
+ */
+static int
+ufs_whiteout(ap)
+	struct vop_whiteout_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+		int a_flags;
+	} */ *ap;
+{
+	struct vnode *dvp = ap->a_dvp;
+	struct componentname *cnp = ap->a_cnp;
+	struct direct newdir;
+	int error = 0;
+
+	switch (ap->a_flags) {
+	case LOOKUP:
+		/* 4.4 format directories support whiteout operations */
+		if (dvp->v_mount->mnt_maxsymlinklen > 0)
+			return (0);
+		return (EOPNOTSUPP);
+
+	case CREATE:
+		/* create a new directory whiteout */
+#ifdef INVARIANTS
+		if ((cnp->cn_flags & SAVENAME) == 0)
+			panic("ufs_whiteout: missing name");
+		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
+			panic("ufs_whiteout: old format filesystem");
+#endif
+
+		newdir.d_ino = WINO;
+		newdir.d_namlen = cnp->cn_namelen;
+		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
+		newdir.d_type = DT_WHT;
+		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
+		break;
+
+	case DELETE:
+		/* remove an existing directory whiteout */
+#ifdef INVARIANTS
+		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
+			panic("ufs_whiteout: old format filesystem");
+#endif
+
+		cnp->cn_flags &= ~DOWHITEOUT;
+		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
+		break;
+	default:
+		panic("ufs_whiteout: unknown op");
+	}
+	return (error);
+}
+
+static volatile int rename_restarts;
+SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
+    __DEVOLATILE(int *, &rename_restarts), 0,
+    "Times rename had to restart due to lock contention");
+
+/*
+ * Rename system call.
+ * 	rename("foo", "bar");
+ * is essentially
+ *	unlink("bar");
+ *	link("foo", "bar");
+ *	unlink("foo");
+ * but ``atomically''.  Can't do full commit without saving state in the
+ * inode on disk which isn't feasible at this time.  Best we can do is
+ * always guarantee the target exists.
+ *
+ * Basic algorithm is:
+ *
+ * 1) Bump link count on source while we're linking it to the
+ *    target.  This also ensure the inode won't be deleted out
+ *    from underneath us while we work (it may be truncated by
+ *    a concurrent `trunc' or `open' for creation).
+ * 2) Link source to destination.  If destination already exists,
+ *    delete it first.
+ * 3) Unlink source reference to inode if still around. If a
+ *    directory was moved and the parent of the destination
+ *    is different from the source, patch the ".." entry in the
+ *    directory.
+ */
+static int
+ufs_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	struct vnode *tvp = ap->a_tvp;
+	struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *fvp = ap->a_fvp;
+	struct vnode *fdvp = ap->a_fdvp;
+	struct vnode *nvp;
+	struct componentname *tcnp = ap->a_tcnp;
+	struct componentname *fcnp = ap->a_fcnp;
+	struct thread *td = fcnp->cn_thread;
+	struct inode *fip, *tip, *tdp, *fdp;
+	struct direct newdir;
+	off_t endoff;
+	int doingdirectory, newparent;
+	int error = 0;
+	struct mount *mp;
+	ino_t ino;
+
+#ifdef INVARIANTS
+	if ((tcnp->cn_flags & HASBUF) == 0 ||
+	    (fcnp->cn_flags & HASBUF) == 0)
+		panic("ufs_rename: no name");
+#endif
+	endoff = 0;
+	mp = tdvp->v_mount;
+	VOP_UNLOCK(tdvp, 0);
+	if (tvp && tvp != tdvp)
+		VOP_UNLOCK(tvp, 0);
+	/*
+	 * Check for cross-device rename.
+	 */
+	if ((fvp->v_mount != tdvp->v_mount) ||
+	    (tvp && (fvp->v_mount != tvp->v_mount))) {
+		error = EXDEV;
+		mp = NULL;
+		goto releout;
+	}
+relock:
+	/* 
+	 * We need to acquire 2 to 4 locks depending on whether tvp is NULL
+	 * and fdvp and tdvp are the same directory.  Subsequently we need
+	 * to double-check all paths and in the directory rename case we
+	 * need to verify that we are not creating a directory loop.  To
+	 * handle this we acquire all but fdvp using non-blocking
+	 * acquisitions.  If we fail to acquire any lock in the path we will
+	 * drop all held locks, acquire the new lock in a blocking fashion,
+	 * and then release it and restart the rename.  This acquire/release
+	 * step ensures that we do not spin on a lock waiting for release.
+	 */
+	error = vn_lock(fdvp, LK_EXCLUSIVE);
+	if (error)
+		goto releout;
+	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
+		VOP_UNLOCK(fdvp, 0);
+		error = vn_lock(tdvp, LK_EXCLUSIVE);
+		if (error)
+			goto releout;
+		VOP_UNLOCK(tdvp, 0);
+		atomic_add_int(&rename_restarts, 1);
+		goto relock;
+	}
+	/*
+	 * Re-resolve fvp to be certain it still exists and fetch the
+	 * correct vnode.
+	 */
+	error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
+	if (error) {
+		VOP_UNLOCK(fdvp, 0);
+		VOP_UNLOCK(tdvp, 0);
+		goto releout;
+	}
+	error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
+	if (error) {
+		VOP_UNLOCK(fdvp, 0);
+		VOP_UNLOCK(tdvp, 0);
+		if (error != EBUSY)
+			goto releout;
+		error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
+		if (error != 0)
+			goto releout;
+		VOP_UNLOCK(nvp, 0);
+		vrele(fvp);
+		fvp = nvp;
+		atomic_add_int(&rename_restarts, 1);
+		goto relock;
+	}
+	vrele(fvp);
+	fvp = nvp;
+	/*
+	 * Re-resolve tvp and acquire the vnode lock if present.
+	 */
+	error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
+	if (error != 0 && error != EJUSTRETURN) {
+		VOP_UNLOCK(fdvp, 0);
+		VOP_UNLOCK(tdvp, 0);
+		VOP_UNLOCK(fvp, 0);
+		goto releout;
+	}
+	/*
+	 * If tvp disappeared we just carry on.
+	 */
+	if (error == EJUSTRETURN && tvp != NULL) {
+		vrele(tvp);
+		tvp = NULL;
+	}
+	/*
+	 * Get the tvp ino if the lookup succeeded.  We may have to restart
+	 * if the non-blocking acquire fails.
+	 */
+	if (error == 0) {
+		nvp = NULL;
+		error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
+		if (tvp)
+			vrele(tvp);
+		tvp = nvp;
+		if (error) {
+			VOP_UNLOCK(fdvp, 0);
+			VOP_UNLOCK(tdvp, 0);
+			VOP_UNLOCK(fvp, 0);
+			if (error != EBUSY)
+				goto releout;
+			error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
+			if (error != 0)
+				goto releout;
+			vput(nvp);
+			atomic_add_int(&rename_restarts, 1);
+			goto relock;
+		}
+	}
+	fdp = VTOI(fdvp);
+	fip = VTOI(fvp);
+	tdp = VTOI(tdvp);
+	tip = NULL;
+	if (tvp)
+		tip = VTOI(tvp);
+	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
+	    (VTOI(tdvp)->i_flags & APPEND))) {
+		error = EPERM;
+		goto unlockout;
+	}
+	/*
+	 * Renaming a file to itself has no effect.  The upper layers should
+	 * not call us in that case.  However, things could change after
+	 * we drop the locks above.
+	 */
+	if (fvp == tvp) {
+		error = 0;
+		goto unlockout;
+	}
+	doingdirectory = 0;
+	newparent = 0;
+	ino = fip->i_number;
+	if (fip->i_nlink >= LINK_MAX) {
+		error = EMLINK;
+		goto unlockout;
+	}
+	if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
+	    || (fdp->i_flags & APPEND)) {
+		error = EPERM;
+		goto unlockout;
+	}
+	if ((fip->i_mode & IFMT) == IFDIR) {
+		/*
+		 * Avoid ".", "..", and aliases of "." for obvious reasons.
+		 */
+		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+		    fdp == fip ||
+		    (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
+			error = EINVAL;
+			goto unlockout;
+		}
+		if (fdp->i_number != tdp->i_number)
+			newparent = tdp->i_number;
+		doingdirectory = 1;
+	}
+	if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) ||
+	    (tvp != NULL && tvp->v_type == VDIR &&
+	    tvp->v_mountedhere != NULL)) {
+		error = EXDEV;
+		goto unlockout;
+	}
+
+	/*
+	 * If ".." must be changed (ie the directory gets a new
+	 * parent) then the source directory must not be in the
+	 * directory hierarchy above the target, as this would
+	 * orphan everything below the source directory. Also
+	 * the user must have write permission in the source so
+	 * as to be able to change "..".
+	 */
+	if (doingdirectory && newparent) {
+		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
+		if (error)
+			goto unlockout;
+		error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
+		    &ino);
+		/*
+		 * We encountered a lock that we have to wait for.  Unlock
+		 * everything else and VGET before restarting.
+		 */
+		if (ino) {
+			VOP_UNLOCK(fdvp, 0);
+			VOP_UNLOCK(fvp, 0);
+			VOP_UNLOCK(tdvp, 0);
+			if (tvp)
+				VOP_UNLOCK(tvp, 0);
+			error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
+			if (error == 0)
+				vput(nvp);
+			atomic_add_int(&rename_restarts, 1);
+			goto relock;
+		}
+		if (error)
+			goto unlockout;
+		if ((tcnp->cn_flags & SAVESTART) == 0)
+			panic("ufs_rename: lost to startdir");
+	}
+	if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
+	    tdp->i_effnlink == 0)
+		panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
+
+	/*
+	 * 1) Bump link count while we're moving stuff
+	 *    around.  If we crash somewhere before
+	 *    completing our work, the link count
+	 *    may be wrong, but correctable.
+	 */
+	fip->i_effnlink++;
+	fip->i_nlink++;
+	DIP_SET(fip, i_nlink, fip->i_nlink);
+	fip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(fvp))
+		softdep_setup_link(tdp, fip);
+	error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp)));
+	if (error)
+		goto bad;
+
+	/*
+	 * 2) If target doesn't exist, link the target
+	 *    to the source and unlink the source.
+	 *    Otherwise, rewrite the target directory
+	 *    entry to reference the source inode and
+	 *    expunge the original entry's existence.
+	 */
+	if (tip == NULL) {
+		if (ITODEV(tdp) != ITODEV(fip))
+			panic("ufs_rename: EXDEV");
+		if (doingdirectory && newparent) {
+			/*
+			 * Account for ".." in new directory.
+			 * When source and destination have the same
+			 * parent we don't adjust the link count.  The
+			 * actual link modification is completed when
+			 * .. is rewritten below.
+			 */
+			if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
+				error = EMLINK;
+				goto bad;
+			}
+		}
+		ufs_makedirentry(fip, tcnp, &newdir);
+		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
+		if (error)
+			goto bad;
+		/* Setup tdvp for directory compaction if needed. */
+		if (tdp->i_count && tdp->i_endoff &&
+		    tdp->i_endoff < tdp->i_size)
+			endoff = tdp->i_endoff;
+	} else {
+		if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip))
+			panic("ufs_rename: EXDEV");
+		/*
+		 * Short circuit rename(foo, foo).
+		 */
+		if (tip->i_number == fip->i_number)
+			panic("ufs_rename: same file");
+		/*
+		 * If the parent directory is "sticky", then the caller
+		 * must possess VADMIN for the parent directory, or the
+		 * destination of the rename.  This implements append-only
+		 * directories.
+		 */
+		if ((tdp->i_mode & S_ISTXT) &&
+		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
+		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
+			error = EPERM;
+			goto bad;
+		}
+		/*
+		 * Target must be empty if a directory and have no links
+		 * to it. Also, ensure source and target are compatible
+		 * (both directories, or both not directories).
+		 */
+		if ((tip->i_mode & IFMT) == IFDIR) {
+			if ((tip->i_effnlink > 2) ||
+			    !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
+				error = ENOTEMPTY;
+				goto bad;
+			}
+			if (!doingdirectory) {
+				error = ENOTDIR;
+				goto bad;
+			}
+			cache_purge(tdvp);
+		} else if (doingdirectory) {
+			error = EISDIR;
+			goto bad;
+		}
+		if (doingdirectory) {
+			if (!newparent) {
+				tdp->i_effnlink--;
+				if (DOINGSOFTDEP(tdvp))
+					softdep_change_linkcnt(tdp);
+			}
+			tip->i_effnlink--;
+			if (DOINGSOFTDEP(tvp))
+				softdep_change_linkcnt(tip);
+		}
+		error = ufs_dirrewrite(tdp, tip, fip->i_number,
+		    IFTODT(fip->i_mode),
+		    (doingdirectory && newparent) ? newparent : doingdirectory);
+		if (error) {
+			if (doingdirectory) {
+				if (!newparent) {
+					tdp->i_effnlink++;
+					if (DOINGSOFTDEP(tdvp))
+						softdep_change_linkcnt(tdp);
+				}
+				tip->i_effnlink++;
+				if (DOINGSOFTDEP(tvp))
+					softdep_change_linkcnt(tip);
+			}
+		}
+		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
+			/*
+			 * The only stuff left in the directory is "."
+			 * and "..". The "." reference is inconsequential
+			 * since we are quashing it. We have removed the "."
+			 * reference and the reference in the parent directory,
+			 * but there may be other hard links. The soft
+			 * dependency code will arrange to do these operations
+			 * after the parent directory entry has been deleted on
+			 * disk, so when running with that code we avoid doing
+			 * them now.
+			 */
+			if (!newparent) {
+				tdp->i_nlink--;
+				DIP_SET(tdp, i_nlink, tdp->i_nlink);
+				tdp->i_flag |= IN_CHANGE;
+			}
+			tip->i_nlink--;
+			DIP_SET(tip, i_nlink, tip->i_nlink);
+			tip->i_flag |= IN_CHANGE;
+		}
+	}
+
+	/*
+	 * 3) Unlink the source.  We have to resolve the path again to
+	 * fixup the directory offset and count for ufs_dirremove.
+	 */
+	if (fdvp == tdvp) {
+		error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
+		if (error)
+			panic("ufs_rename: from entry went away!");
+		if (ino != fip->i_number)
+			panic("ufs_rename: ino mismatch %ju != %ju\n",
+			    (uintmax_t)ino, (uintmax_t)fip->i_number);
+	}
+	/*
+	 * If the source is a directory with a
+	 * new parent, the link count of the old
+	 * parent directory must be decremented
+	 * and ".." set to point to the new parent.
+	 */
+	if (doingdirectory && newparent) {
+		/*
+		 * If tip exists we simply use its link, otherwise we must
+		 * add a new one.
+		 */
+		if (tip == NULL) {
+			tdp->i_effnlink++;
+			tdp->i_nlink++;
+			DIP_SET(tdp, i_nlink, tdp->i_nlink);
+			tdp->i_flag |= IN_CHANGE;
+			if (DOINGSOFTDEP(tdvp))
+				softdep_setup_dotdot_link(tdp, fip);
+			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
+						   DOINGASYNC(tdvp)));
+			/* Don't go to bad here as the new link exists. */
+			if (error)
+				goto unlockout;
+		} else if (DOINGSUJ(tdvp))
+			/* Journal must account for each new link. */
+			softdep_setup_dotdot_link(tdp, fip);
+		fip->i_offset = mastertemplate.dot_reclen;
+		ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
+		cache_purge(fdvp);
+	}
+	error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
+	/*
+	 * The kern_renameat() looks up the fvp using the DELETE flag, which
+	 * causes the removal of the name cache entry for fvp.
+	 * As the relookup of the fvp is done in two steps:
+	 * ufs_lookup_ino() and then VFS_VGET(), another thread might do a
+	 * normal lookup of the from name just before the VFS_VGET() call,
+	 * causing the cache entry to be re-instantiated.
+	 *
+	 * The same issue also applies to tvp if it exists as
+	 * otherwise we may have a stale name cache entry for the new
+	 * name that references the old i-node if it has other links
+	 * or open file descriptors.
+	 */
+	cache_purge(fvp);
+	if (tvp)
+		cache_purge(tvp);
+	cache_purge_negative(tdvp);
+
+unlockout:
+	vput(fdvp);
+	vput(fvp);
+	if (tvp)
+		vput(tvp);
+	/*
+	 * If compaction or fsync was requested do it now that other locks
+	 * are no longer needed.
+	 */
+	if (error == 0 && endoff != 0) {
+		error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC,
+		    tcnp->cn_cred);
+		if (error != 0)
+			vn_printf(tdvp,
+			    "ufs_rename: failed to truncate, error %d\n",
+			    error);
+#ifdef UFS_DIRHASH
+		else if (tdp->i_dirhash != NULL)
+			ufsdirhash_dirtrunc(tdp, endoff);
+#endif
+		/*
+		 * Even if the directory compaction failed, rename was
+		 * succesful.  Do not propagate a UFS_TRUNCATE() error
+		 * to the caller.
+		 */
+		error = 0;
+	}
+	if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
+		error = VOP_FSYNC(tdvp, MNT_WAIT, td);
+	vput(tdvp);
+	return (error);
+
+bad:
+	fip->i_effnlink--;
+	fip->i_nlink--;
+	DIP_SET(fip, i_nlink, fip->i_nlink);
+	fip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(fvp))
+		softdep_revert_link(tdp, fip);
+	goto unlockout;
+
+releout:
+	vrele(fdvp);
+	vrele(fvp);
+	vrele(tdvp);
+	if (tvp)
+		vrele(tvp);
+
+	return (error);
+}
+
+#ifdef UFS_ACL
+static int
+ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
+    mode_t dmode, struct ucred *cred, struct thread *td)
+{
+	int error;
+	struct inode *ip = VTOI(tvp);
+	struct acl *dacl, *acl;
+
+	acl = acl_alloc(M_WAITOK);
+	dacl = acl_alloc(M_WAITOK);
+
+	/*
+	 * Retrieve default ACL from parent, if any.
+	 */
+	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
+	switch (error) {
+	case 0:
+		/*
+		 * Retrieved a default ACL, so merge mode and ACL if
+		 * necessary.  If the ACL is empty, fall through to
+		 * the "not defined or available" case.
+		 */
+		if (acl->acl_cnt != 0) {
+			dmode = acl_posix1e_newfilemode(dmode, acl);
+			ip->i_mode = dmode;
+			DIP_SET(ip, i_mode, dmode);
+			*dacl = *acl;
+			ufs_sync_acl_from_inode(ip, acl);
+			break;
+		}
+		/* FALLTHROUGH */
+
+	case EOPNOTSUPP:
+		/*
+		 * Just use the mode as-is.
+		 */
+		ip->i_mode = dmode;
+		DIP_SET(ip, i_mode, dmode);
+		error = 0;
+		goto out;
+	
+	default:
+		goto out;
+	}
+
+	/*
+	 * XXX: If we abort now, will Soft Updates notify the extattr
+	 * code that the EAs for the file need to be released?
+	 */
+	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
+	if (error == 0)
+		error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td);
+	switch (error) {
+	case 0:
+		break;
+
+	case EOPNOTSUPP:
+		/*
+		 * XXX: This should not happen, as EOPNOTSUPP above
+		 * was supposed to free acl.
+		 */
+		printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
+		/*
+		panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
+		 */
+		break;
+
+	default:
+		goto out;
+	}
+
+out:
+	acl_free(acl);
+	acl_free(dacl);
+
+	return (error);
+}
+
+static int
+ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
+    mode_t mode, struct ucred *cred, struct thread *td)
+{
+	int error;
+	struct inode *ip = VTOI(tvp);
+	struct acl *acl;
+
+	acl = acl_alloc(M_WAITOK);
+
+	/*
+	 * Retrieve default ACL for parent, if any.
+	 */
+	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
+	switch (error) {
+	case 0:
+		/*
+		 * Retrieved a default ACL, so merge mode and ACL if
+		 * necessary.
+		 */
+		if (acl->acl_cnt != 0) {
+			/*
+			 * Two possible ways for default ACL to not
+			 * be present.  First, the EA can be
+			 * undefined, or second, the default ACL can
+			 * be blank.  If it's blank, fall through to
+			 * the it's not defined case.
+			 */
+			mode = acl_posix1e_newfilemode(mode, acl);
+			ip->i_mode = mode;
+			DIP_SET(ip, i_mode, mode);
+			ufs_sync_acl_from_inode(ip, acl);
+			break;
+		}
+		/* FALLTHROUGH */
+
+	case EOPNOTSUPP:
+		/*
+		 * Just use the mode as-is.
+		 */
+		ip->i_mode = mode;
+		DIP_SET(ip, i_mode, mode);
+		error = 0;
+		goto out;
+
+	default:
+		goto out;
+	}
+
+	/*
+	 * XXX: If we abort now, will Soft Updates notify the extattr
+	 * code that the EAs for the file need to be released?
+	 */
+	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
+	switch (error) {
+	case 0:
+		break;
+
+	case EOPNOTSUPP:
+		/*
+		 * XXX: This should not happen, as EOPNOTSUPP above was
+		 * supposed to free acl.
+		 */
+		printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
+		    "but no VOP_SETACL()\n");
+		/* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
+		    "but no VOP_SETACL()"); */
+		break;
+
+	default:
+		goto out;
+	}
+
+out:
+	acl_free(acl);
+
+	return (error);
+}
+
+static int
+ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp,
+    mode_t child_mode, struct ucred *cred, struct thread *td)
+{
+	int error;
+	struct acl *parent_aclp, *child_aclp;
+
+	parent_aclp = acl_alloc(M_WAITOK);
+	child_aclp = acl_alloc(M_WAITOK | M_ZERO);
+
+	error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td);
+	if (error)
+		goto out;
+	acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp,
+	    child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR);
+	error = ufs_setacl_nfs4_internal(tvp, child_aclp, td);
+	if (error)
+		goto out;
+out:
+	acl_free(parent_aclp);
+	acl_free(child_aclp);
+
+	return (error);
+}
+#endif
+
+/*
+ * Mkdir system call
+ */
+static int
+ufs_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct vnode *dvp = ap->a_dvp;
+	struct vattr *vap = ap->a_vap;
+	struct componentname *cnp = ap->a_cnp;
+	struct inode *ip, *dp;
+	struct vnode *tvp;
+	struct buf *bp;
+	struct dirtemplate dirtemplate, *dtp;
+	struct direct newdir;
+	int error, dmode;
+	long blkoff;
+
+#ifdef INVARIANTS
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("ufs_mkdir: no name");
+#endif
+	dp = VTOI(dvp);
+	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+		error = EMLINK;
+		goto out;
+	}
+	dmode = vap->va_mode & 0777;
+	dmode |= IFDIR;
+	/*
+	 * Must simulate part of ufs_makeinode here to acquire the inode,
+	 * but not have it entered in the parent directory. The entry is
+	 * made later after writing "." and ".." entries.
+	 */
+	if (dp->i_effnlink < 2) {
+		print_bad_link_count("ufs_mkdir", dvp);
+		error = EINVAL;
+		goto out;
+	}
+	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
+	if (error)
+		goto out;
+	ip = VTOI(tvp);
+	ip->i_gid = dp->i_gid;
+	DIP_SET(ip, i_gid, dp->i_gid);
+#ifdef SUIDDIR
+	{
+#ifdef QUOTA
+		struct ucred ucred, *ucp;
+		gid_t ucred_group;
+		ucp = cnp->cn_cred;
+#endif
+		/*
+		 * If we are hacking owners here, (only do this where told to)
+		 * and we are not giving it TO root, (would subvert quotas)
+		 * then go ahead and give it to the other user.
+		 * The new directory also inherits the SUID bit.
+		 * If user's UID and dir UID are the same,
+		 * 'give it away' so that the SUID is still forced on.
+		 */
+		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
+		    (dp->i_mode & ISUID) && dp->i_uid) {
+			dmode |= ISUID;
+			ip->i_uid = dp->i_uid;
+			DIP_SET(ip, i_uid, dp->i_uid);
+#ifdef QUOTA
+			if (dp->i_uid != cnp->cn_cred->cr_uid) {
+				/*
+				 * Make sure the correct user gets charged
+				 * for the space.
+				 * Make a dummy credential for the victim.
+				 * XXX This seems to never be accessed out of
+				 * our context so a stack variable is ok.
+				 */
+				refcount_init(&ucred.cr_ref, 1);
+				ucred.cr_uid = ip->i_uid;
+				ucred.cr_ngroups = 1;
+				ucred.cr_groups = &ucred_group;
+				ucred.cr_groups[0] = dp->i_gid;
+				ucp = &ucred;
+			}
+#endif
+		} else {
+			ip->i_uid = cnp->cn_cred->cr_uid;
+			DIP_SET(ip, i_uid, ip->i_uid);
+		}
+#ifdef QUOTA
+		if ((error = getinoquota(ip)) ||
+	    	    (error = chkiq(ip, 1, ucp, 0))) {
+			if (DOINGSOFTDEP(tvp))
+				softdep_revert_link(dp, ip);
+			UFS_VFREE(tvp, ip->i_number, dmode);
+			vput(tvp);
+			return (error);
+		}
+#endif
+	}
+#else	/* !SUIDDIR */
+	ip->i_uid = cnp->cn_cred->cr_uid;
+	DIP_SET(ip, i_uid, ip->i_uid);
+#ifdef QUOTA
+	if ((error = getinoquota(ip)) ||
+	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+		if (DOINGSOFTDEP(tvp))
+			softdep_revert_link(dp, ip);
+		UFS_VFREE(tvp, ip->i_number, dmode);
+		vput(tvp);
+		return (error);
+	}
+#endif
+#endif	/* !SUIDDIR */
+	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+	ip->i_mode = dmode;
+	DIP_SET(ip, i_mode, dmode);
+	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
+	ip->i_effnlink = 2;
+	ip->i_nlink = 2;
+	DIP_SET(ip, i_nlink, 2);
+
+	if (cnp->cn_flags & ISWHITEOUT) {
+		ip->i_flags |= UF_OPAQUE;
+		DIP_SET(ip, i_flags, ip->i_flags);
+	}
+
+	/*
+	 * Bump link count in parent directory to reflect work done below.
+	 * Should be done before reference is created so cleanup is
+	 * possible if we crash.
+	 */
+	dp->i_effnlink++;
+	dp->i_nlink++;
+	DIP_SET(dp, i_nlink, dp->i_nlink);
+	dp->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(dvp))
+		softdep_setup_mkdir(dp, ip);
+	error = UFS_UPDATE(dvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
+	if (error)
+		goto bad;
+#ifdef MAC
+	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
+		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
+		    dvp, tvp, cnp);
+		if (error)
+			goto bad;
+	}
+#endif
+#ifdef UFS_ACL
+	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
+		error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
+		    cnp->cn_cred, cnp->cn_thread);
+		if (error)
+			goto bad;
+	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
+		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode,
+		    cnp->cn_cred, cnp->cn_thread);
+		if (error)
+			goto bad;
+	}
+#endif /* !UFS_ACL */
+
+	/*
+	 * Initialize directory with "." and ".." from static template.
+	 */
+	if (dvp->v_mount->mnt_maxsymlinklen > 0)
+		dtp = &mastertemplate;
+	else
+		dtp = (struct dirtemplate *)&omastertemplate;
+	dirtemplate = *dtp;
+	dirtemplate.dot_ino = ip->i_number;
+	dirtemplate.dotdot_ino = dp->i_number;
+	vnode_pager_setsize(tvp, DIRBLKSIZ);
+	if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
+	    BA_CLRBUF, &bp)) != 0)
+		goto bad;
+	ip->i_size = DIRBLKSIZ;
+	DIP_SET(ip, i_size, DIRBLKSIZ);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
+	if (DOINGSOFTDEP(tvp)) {
+		/*
+		 * Ensure that the entire newly allocated block is a
+		 * valid directory so that future growth within the
+		 * block does not have to ensure that the block is
+		 * written before the inode.
+		 */
+		blkoff = DIRBLKSIZ;
+		while (blkoff < bp->b_bcount) {
+			((struct direct *)
+			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
+			blkoff += DIRBLKSIZ;
+		}
+	}
+	if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) |
+				       DOINGASYNC(tvp)))) != 0) {
+		(void)bwrite(bp);
+		goto bad;
+	}
+	/*
+	 * Directory set up, now install its entry in the parent directory.
+	 *
+	 * If we are not doing soft dependencies, then we must write out the
+	 * buffer containing the new directory body before entering the new 
+	 * name in the parent. If we are doing soft dependencies, then the
+	 * buffer containing the new directory body will be passed to and
+	 * released in the soft dependency code after the code has attached
+	 * an appropriate ordering dependency to the buffer which ensures that
+	 * the buffer is written before the new name is written in the parent.
+	 */
+	if (DOINGASYNC(dvp))
+		bdwrite(bp);
+	else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp))))
+		goto bad;
+	ufs_makedirentry(ip, cnp, &newdir);
+	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0);
+	
+bad:
+	if (error == 0) {
+		*ap->a_vpp = tvp;
+	} else {
+		dp->i_effnlink--;
+		dp->i_nlink--;
+		DIP_SET(dp, i_nlink, dp->i_nlink);
+		dp->i_flag |= IN_CHANGE;
+		/*
+		 * No need to do an explicit VOP_TRUNCATE here, vrele will
+		 * do this for us because we set the link count to 0.
+		 */
+		ip->i_effnlink = 0;
+		ip->i_nlink = 0;
+		DIP_SET(ip, i_nlink, 0);
+		ip->i_flag |= IN_CHANGE;
+		if (DOINGSOFTDEP(tvp))
+			softdep_revert_mkdir(dp, ip);
+
+		vput(tvp);
+	}
+out:
+	return (error);
+}
+
+/*
+ * Rmdir system call.
+ */
+static int
+ufs_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vnode *dvp = ap->a_dvp;
+	struct componentname *cnp = ap->a_cnp;
+	struct inode *ip, *dp;
+	int error;
+
+	ip = VTOI(vp);
+	dp = VTOI(dvp);
+
+	/*
+	 * Do not remove a directory that is in the process of being renamed.
+	 * Verify the directory is empty (and valid). Rmdir ".." will not be
+	 * valid since ".." will contain a reference to the current directory
+	 * and thus be non-empty. Do not allow the removal of mounted on
+	 * directories (this can happen when an NFS exported filesystem
+	 * tries to remove a locally mounted on directory).
+	 */
+	error = 0;
+	if (dp->i_effnlink <= 2) {
+		if (dp->i_effnlink == 2)
+			print_bad_link_count("ufs_rmdir", dvp);
+		error = EINVAL;
+		goto out;
+	}
+	if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
+		error = ENOTEMPTY;
+		goto out;
+	}
+	if ((dp->i_flags & APPEND)
+	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
+		error = EPERM;
+		goto out;
+	}
+	if (vp->v_mountedhere != 0) {
+		error = EINVAL;
+		goto out;
+	}
+#ifdef UFS_GJOURNAL
+	ufs_gjournal_orphan(vp);
+#endif
+	/*
+	 * Delete reference to directory before purging
+	 * inode.  If we crash in between, the directory
+	 * will be reattached to lost+found,
+	 */
+	dp->i_effnlink--;
+	ip->i_effnlink--;
+	if (DOINGSOFTDEP(vp))
+		softdep_setup_rmdir(dp, ip);
+	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
+	if (error) {
+		dp->i_effnlink++;
+		ip->i_effnlink++;
+		if (DOINGSOFTDEP(vp))
+			softdep_revert_rmdir(dp, ip);
+		goto out;
+	}
+	cache_purge(dvp);
+	/*
+	 * The only stuff left in the directory is "." and "..". The "."
+	 * reference is inconsequential since we are quashing it. The soft
+	 * dependency code will arrange to do these operations after
+	 * the parent directory entry has been deleted on disk, so
+	 * when running with that code we avoid doing them now.
+	 */
+	if (!DOINGSOFTDEP(vp)) {
+		dp->i_nlink--;
+		DIP_SET(dp, i_nlink, dp->i_nlink);
+		dp->i_flag |= IN_CHANGE;
+		error = UFS_UPDATE(dvp, 0);
+		ip->i_nlink--;
+		DIP_SET(ip, i_nlink, ip->i_nlink);
+		ip->i_flag |= IN_CHANGE;
+	}
+	cache_purge(vp);
+#ifdef UFS_DIRHASH
+	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
+	if (ip->i_dirhash != NULL)
+		ufsdirhash_free(ip);
+#endif
+out:
+	return (error);
+}
+
+/*
+ * symlink -- make a symbolic link
+ */
+static int
+ufs_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	struct vnode *vp, **vpp = ap->a_vpp;
+	struct inode *ip;
+	int len, error;
+
+	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
+	    vpp, ap->a_cnp, "ufs_symlink");
+	if (error)
+		return (error);
+	vp = *vpp;
+	len = strlen(ap->a_target);
+	if (len < vp->v_mount->mnt_maxsymlinklen) {
+		ip = VTOI(vp);
+		bcopy(ap->a_target, SHORTLINK(ip), len);
+		ip->i_size = len;
+		DIP_SET(ip, i_size, len);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		error = UFS_UPDATE(vp, 0);
+	} else
+		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
+		    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
+		    ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
+	if (error)
+		vput(vp);
+	return (error);
+}
+
+/*
+ * Vnode op for reading directories.
+ */
+int
+ufs_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+		int *a_eofflag;
+		int *a_ncookies;
+		u_long **a_cookies;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct uio *uio = ap->a_uio;
+	struct buf *bp;
+	struct inode *ip;
+	struct direct *dp, *edp;
+	u_long *cookies;
+	struct dirent dstdp;
+	off_t offset, startoffset;
+	size_t readcnt, skipcnt;
+	ssize_t startresid;
+	int ncookies;
+	int error;
+
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+	ip = VTOI(vp);
+	if (ip->i_effnlink == 0)
+		return (0);
+	if (ap->a_ncookies != NULL) {
+		if (uio->uio_resid < 0)
+			ncookies = 0;
+		else
+			ncookies = uio->uio_resid;
+		if (uio->uio_offset >= ip->i_size)
+			ncookies = 0;
+		else if (ip->i_size - uio->uio_offset < ncookies)
+			ncookies = ip->i_size - uio->uio_offset;
+		ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1;
+		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
+		*ap->a_ncookies = ncookies;
+		*ap->a_cookies = cookies;
+	} else {
+		ncookies = 0;
+		cookies = NULL;
+	}
+	offset = startoffset = uio->uio_offset;
+	startresid = uio->uio_resid;
+	error = 0;
+	while (error == 0 && uio->uio_resid > 0 &&
+	    uio->uio_offset < ip->i_size) {
+		error = ffs_blkatoff(vp, uio->uio_offset, NULL, &bp);
+		if (error)
+			break;
+		if (bp->b_offset + bp->b_bcount > ip->i_size)
+			readcnt = ip->i_size - bp->b_offset;
+		else
+			readcnt = bp->b_bcount;
+		skipcnt = (size_t)(uio->uio_offset - bp->b_offset) &
+		    ~(size_t)(DIRBLKSIZ - 1);
+		offset = bp->b_offset + skipcnt;
+		dp = (struct direct *)&bp->b_data[skipcnt];
+		edp = (struct direct *)&bp->b_data[readcnt];
+		while (error == 0 && uio->uio_resid > 0 && dp < edp) {
+			if (dp->d_reclen <= offsetof(struct direct, d_name) ||
+			    (caddr_t)dp + dp->d_reclen > (caddr_t)edp) {
+				error = EIO;
+				break;
+			}
+#if BYTE_ORDER == LITTLE_ENDIAN
+			/* Old filesystem format. */
+			if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+				dstdp.d_namlen = dp->d_type;
+				dstdp.d_type = dp->d_namlen;
+			} else
+#endif
+			{
+				dstdp.d_namlen = dp->d_namlen;
+				dstdp.d_type = dp->d_type;
+			}
+			if (offsetof(struct direct, d_name) + dstdp.d_namlen >
+			    dp->d_reclen) {
+				error = EIO;
+				break;
+			}
+			if (offset < startoffset || dp->d_ino == 0)
+				goto nextentry;
+			dstdp.d_fileno = dp->d_ino;
+			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
+			bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
+			dstdp.d_name[dstdp.d_namlen] = '\0';
+			if (dstdp.d_reclen > uio->uio_resid) {
+				if (uio->uio_resid == startresid)
+					error = EINVAL;
+				else
+					error = EJUSTRETURN;
+				break;
+			}
+			/* Advance dp. */
+			error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio);
+			if (error)
+				break;
+			if (cookies != NULL) {
+				KASSERT(ncookies > 0,
+				    ("ufs_readdir: cookies buffer too small"));
+				*cookies = offset + dp->d_reclen;
+				cookies++;
+				ncookies--;
+			}
+nextentry:
+			offset += dp->d_reclen;
+			dp = (struct direct *)((caddr_t)dp + dp->d_reclen);
+		}
+		bqrelse(bp);
+		uio->uio_offset = offset;
+	}
+	/* We need to correct uio_offset. */
+	uio->uio_offset = offset;
+	if (error == EJUSTRETURN)
+		error = 0;
+	if (ap->a_ncookies != NULL) {
+		if (error == 0) {
+			ap->a_ncookies -= ncookies;
+		} else {
+			free(*ap->a_cookies, M_TEMP);
+			*ap->a_ncookies = 0;
+			*ap->a_cookies = NULL;
+		}
+	}
+	if (error == 0 && ap->a_eofflag)
+		*ap->a_eofflag = ip->i_size <= uio->uio_offset;
+	return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ */
+static int
+ufs_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	doff_t isize;
+
+	isize = ip->i_size;
+	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
+	    DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */
+		return (uiomove(SHORTLINK(ip), isize, ap->a_uio));
+	}
+	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ *
+ * In order to be able to swap to a file, the ufs_bmaparray() operation may not
+ * deadlock on memory.  See ufs_bmap() for details.
+ */
+static int
+ufs_strategy(ap)
+	struct vop_strategy_args /* {
+		struct vnode *a_vp;
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	struct vnode *vp = ap->a_vp;
+	ufs2_daddr_t blkno;
+	int error;
+
+	if (bp->b_blkno == bp->b_lblkno) {
+		error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
+		bp->b_blkno = blkno;
+		if (error) {
+			bp->b_error = error;
+			bp->b_ioflags |= BIO_ERROR;
+			bufdone(bp);
+			return (0);
+		}
+		if ((long)bp->b_blkno == -1)
+			vfs_bio_clrbuf(bp);
+	}
+	if ((long)bp->b_blkno == -1) {
+		bufdone(bp);
+		return (0);
+	}
+	bp->b_iooffset = dbtob(bp->b_blkno);
+	BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp);
+	return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+static int
+ufs_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+
+	printf("\tino %lu, on dev %s", (u_long)ip->i_number,
+	    devtoname(ITODEV(ip)));
+	if (vp->v_type == VFIFO)
+		fifo_printinfo(vp);
+	printf("\n");
+	return (0);
+}
+
+/*
+ * Close wrapper for fifos.
+ *
+ * Update the times on the inode then do device close.
+ */
+static int
+ufsfifo_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	int usecount;
+
+	VI_LOCK(vp);
+	usecount = vp->v_usecount;
+	if (usecount > 1)
+		ufs_itimes_locked(vp);
+	VI_UNLOCK(vp);
+	return (fifo_specops.vop_close(ap));
+}
+
+/*
+ * Kqfilter wrapper for fifos.
+ *
+ * Fall through to ufs kqfilter routines if needed 
+ */
+static int
+ufsfifo_kqfilter(ap)
+	struct vop_kqfilter_args *ap;
+{
+	int error;
+
+	error = fifo_specops.vop_kqfilter(ap);
+	if (error)
+		error = vfs_kqfilter(ap);
+	return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to ufs filesystems.
+ */
+static int
+ufs_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+	int error;
+
+	error = 0;
+	switch (ap->a_name) {
+	case _PC_NAME_MAX:
+		*ap->a_retval = NAME_MAX;
+		break;
+	case _PC_PIPE_BUF:
+		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO)
+			*ap->a_retval = PIPE_BUF;
+		else
+			error = EINVAL;
+		break;
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		break;
+	case _PC_NO_TRUNC:
+		*ap->a_retval = 1;
+		break;
+	case _PC_ACL_EXTENDED:
+#ifdef UFS_ACL
+		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
+			*ap->a_retval = 1;
+		else
+			*ap->a_retval = 0;
+#else
+		*ap->a_retval = 0;
+#endif
+		break;
+
+	case _PC_ACL_NFS4:
+#ifdef UFS_ACL
+		if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS)
+			*ap->a_retval = 1;
+		else
+			*ap->a_retval = 0;
+#else
+		*ap->a_retval = 0;
+#endif
+		break;
+
+	case _PC_ACL_PATH_MAX:
+#ifdef UFS_ACL
+		if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS))
+			*ap->a_retval = ACL_MAX_ENTRIES;
+		else
+			*ap->a_retval = 3;
+#else
+		*ap->a_retval = 3;
+#endif
+		break;
+	case _PC_MAC_PRESENT:
+#ifdef MAC
+		if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL)
+			*ap->a_retval = 1;
+		else
+			*ap->a_retval = 0;
+#else
+		*ap->a_retval = 0;
+#endif
+		break;
+	case _PC_MIN_HOLE_SIZE:
+		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
+		break;
+	case _PC_PRIO_IO:
+		*ap->a_retval = 0;
+		break;
+	case _PC_SYNC_IO:
+		*ap->a_retval = 0;
+		break;
+	case _PC_ALLOC_SIZE_MIN:
+		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
+		break;
+	case _PC_FILESIZEBITS:
+		*ap->a_retval = 64;
+		break;
+	case _PC_REC_INCR_XFER_SIZE:
+		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
+		break;
+	case _PC_REC_MAX_XFER_SIZE:
+		*ap->a_retval = -1; /* means ``unlimited'' */
+		break;
+	case _PC_REC_MIN_XFER_SIZE:
+		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
+		break;
+	case _PC_REC_XFER_ALIGN:
+		*ap->a_retval = PAGE_SIZE;
+		break;
+	case _PC_SYMLINK_MAX:
+		*ap->a_retval = MAXPATHLEN;
+		break;
+
+	default:
+		error = vop_stdpathconf(ap);
+		break;
+	}
+	return (error);
+}
+
+/*
+ * Initialize the vnode associated with a new inode, handle aliased
+ * vnodes.
+ */
+int
+ufs_vinit(mntp, fifoops, vpp)
+	struct mount *mntp;
+	struct vop_vector *fifoops;
+	struct vnode **vpp;
+{
+	struct inode *ip;
+	struct vnode *vp;
+
+	vp = *vpp;
+	ip = VTOI(vp);
+	vp->v_type = IFTOVT(ip->i_mode);
+	if (vp->v_type == VFIFO)
+		vp->v_op = fifoops;
+	ASSERT_VOP_LOCKED(vp, "ufs_vinit");
+	if (ip->i_number == ROOTINO)
+		vp->v_vflag |= VV_ROOT;
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * Allocate a new inode.
+ * Vnode dvp must be locked.
+ */
+static int
+ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
+	int mode;
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+	const char *callfunc;
+{
+	struct inode *ip, *pdir;
+	struct direct newdir;
+	struct vnode *tvp;
+	int error;
+
+	pdir = VTOI(dvp);
+#ifdef INVARIANTS
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("%s: no name", callfunc);
+#endif
+	*vpp = NULL;
+	if ((mode & IFMT) == 0)
+		mode |= IFREG;
+
+	if (pdir->i_effnlink < 2) {
+		print_bad_link_count(callfunc, dvp);
+		return (EINVAL);
+	}
+	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
+	if (error)
+		return (error);
+	ip = VTOI(tvp);
+	ip->i_gid = pdir->i_gid;
+	DIP_SET(ip, i_gid, pdir->i_gid);
+#ifdef SUIDDIR
+	{
+#ifdef QUOTA
+		struct ucred ucred, *ucp;
+		gid_t ucred_group;
+		ucp = cnp->cn_cred;
+#endif
+		/*
+		 * If we are not the owner of the directory,
+		 * and we are hacking owners here, (only do this where told to)
+		 * and we are not giving it TO root, (would subvert quotas)
+		 * then go ahead and give it to the other user.
+		 * Note that this drops off the execute bits for security.
+		 */
+		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
+		    (pdir->i_mode & ISUID) &&
+		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
+			ip->i_uid = pdir->i_uid;
+			DIP_SET(ip, i_uid, ip->i_uid);
+			mode &= ~07111;
+#ifdef QUOTA
+			/*
+			 * Make sure the correct user gets charged
+			 * for the space.
+			 * Quickly knock up a dummy credential for the victim.
+			 * XXX This seems to never be accessed out of our
+			 * context so a stack variable is ok.
+			 */
+			refcount_init(&ucred.cr_ref, 1);
+			ucred.cr_uid = ip->i_uid;
+			ucred.cr_ngroups = 1;
+			ucred.cr_groups = &ucred_group;
+			ucred.cr_groups[0] = pdir->i_gid;
+			ucp = &ucred;
+#endif
+		} else {
+			ip->i_uid = cnp->cn_cred->cr_uid;
+			DIP_SET(ip, i_uid, ip->i_uid);
+		}
+
+#ifdef QUOTA
+		if ((error = getinoquota(ip)) ||
+	    	    (error = chkiq(ip, 1, ucp, 0))) {
+			if (DOINGSOFTDEP(tvp))
+				softdep_revert_link(pdir, ip);
+			UFS_VFREE(tvp, ip->i_number, mode);
+			vput(tvp);
+			return (error);
+		}
+#endif
+	}
+#else	/* !SUIDDIR */
+	ip->i_uid = cnp->cn_cred->cr_uid;
+	DIP_SET(ip, i_uid, ip->i_uid);
+#ifdef QUOTA
+	if ((error = getinoquota(ip)) ||
+	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+		if (DOINGSOFTDEP(tvp))
+			softdep_revert_link(pdir, ip);
+		UFS_VFREE(tvp, ip->i_number, mode);
+		vput(tvp);
+		return (error);
+	}
+#endif
+#endif	/* !SUIDDIR */
+	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+	ip->i_mode = mode;
+	DIP_SET(ip, i_mode, mode);
+	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
+	ip->i_effnlink = 1;
+	ip->i_nlink = 1;
+	DIP_SET(ip, i_nlink, 1);
+	if (DOINGSOFTDEP(tvp))
+		softdep_setup_create(VTOI(dvp), ip);
+	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
+	    priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) {
+		ip->i_mode &= ~ISGID;
+		DIP_SET(ip, i_mode, ip->i_mode);
+	}
+
+	if (cnp->cn_flags & ISWHITEOUT) {
+		ip->i_flags |= UF_OPAQUE;
+		DIP_SET(ip, i_flags, ip->i_flags);
+	}
+
+	/*
+	 * Make sure inode goes to disk before directory entry.
+	 */
+	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
+	if (error)
+		goto bad;
+#ifdef MAC
+	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
+		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
+		    dvp, tvp, cnp);
+		if (error)
+			goto bad;
+	}
+#endif
+#ifdef UFS_ACL
+	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
+		error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode,
+		    cnp->cn_cred, cnp->cn_thread);
+		if (error)
+			goto bad;
+	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
+		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode,
+		    cnp->cn_cred, cnp->cn_thread);
+		if (error)
+			goto bad;
+	}
+#endif /* !UFS_ACL */
+	ufs_makedirentry(ip, cnp, &newdir);
+	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
+	if (error)
+		goto bad;
+	*vpp = tvp;
+	return (0);
+
+bad:
+	/*
+	 * Write error occurred trying to update the inode
+	 * or the directory so must deallocate the inode.
+	 */
+	ip->i_effnlink = 0;
+	ip->i_nlink = 0;
+	DIP_SET(ip, i_nlink, 0);
+	ip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(tvp))
+		softdep_revert_create(VTOI(dvp), ip);
+	vput(tvp);
+	return (error);
+}
+
+static int
+ufs_ioctl(struct vop_ioctl_args *ap)
+{
+
+	switch (ap->a_command) {
+	case FIOSEEKDATA:
+	case FIOSEEKHOLE:
+		return (vn_bmap_seekhole(ap->a_vp, ap->a_command,
+		    (off_t *)ap->a_data, ap->a_cred));
+	default:
+		return (ENOTTY);
+	}
+}
+
+/* Global vfs data structures for ufs. */
+struct vop_vector ufs_vnodeops = {
+	.vop_default =		&default_vnodeops,
+	.vop_fsync =		VOP_PANIC,
+	.vop_read =		VOP_PANIC,
+	.vop_reallocblks =	VOP_PANIC,
+	.vop_write =		VOP_PANIC,
+	.vop_accessx =		ufs_accessx,
+	.vop_bmap =		ufs_bmap,
+	.vop_cachedlookup =	ufs_lookup,
+	.vop_close =		ufs_close,
+	.vop_create =		ufs_create,
+	.vop_getattr =		ufs_getattr,
+	.vop_inactive =		ufs_inactive,
+	.vop_ioctl =		ufs_ioctl,
+	.vop_link =		ufs_link,
+	.vop_lookup =		vfs_cache_lookup,
+	.vop_markatime =	ufs_markatime,
+	.vop_mkdir =		ufs_mkdir,
+	.vop_mknod =		ufs_mknod,
+	.vop_open =		ufs_open,
+	.vop_pathconf =		ufs_pathconf,
+	.vop_poll =		vop_stdpoll,
+	.vop_print =		ufs_print,
+	.vop_readdir =		ufs_readdir,
+	.vop_readlink =		ufs_readlink,
+	.vop_reclaim =		ufs_reclaim,
+	.vop_remove =		ufs_remove,
+	.vop_rename =		ufs_rename,
+	.vop_rmdir =		ufs_rmdir,
+	.vop_setattr =		ufs_setattr,
+#ifdef MAC
+	.vop_setlabel =		vop_stdsetlabel_ea,
+#endif
+	.vop_strategy =		ufs_strategy,
+	.vop_symlink =		ufs_symlink,
+	.vop_whiteout =		ufs_whiteout,
+#ifdef UFS_EXTATTR
+	.vop_getextattr =	ufs_getextattr,
+	.vop_deleteextattr =	ufs_deleteextattr,
+	.vop_setextattr =	ufs_setextattr,
+#endif
+#ifdef UFS_ACL
+	.vop_getacl =		ufs_getacl,
+	.vop_setacl =		ufs_setacl,
+	.vop_aclcheck =		ufs_aclcheck,
+#endif
+};
+
+struct vop_vector ufs_fifoops = {
+	.vop_default =		&fifo_specops,
+	.vop_fsync =		VOP_PANIC,
+	.vop_accessx =		ufs_accessx,
+	.vop_close =		ufsfifo_close,
+	.vop_getattr =		ufs_getattr,
+	.vop_inactive =		ufs_inactive,
+	.vop_kqfilter =		ufsfifo_kqfilter,
+	.vop_markatime =	ufs_markatime,
+	.vop_pathconf = 	ufs_pathconf,
+	.vop_print =		ufs_print,
+	.vop_read =		VOP_PANIC,
+	.vop_reclaim =		ufs_reclaim,
+	.vop_setattr =		ufs_setattr,
+#ifdef MAC
+	.vop_setlabel =		vop_stdsetlabel_ea,
+#endif
+	.vop_write =		VOP_PANIC,
+#ifdef UFS_EXTATTR
+	.vop_getextattr =	ufs_getextattr,
+	.vop_deleteextattr =	ufs_deleteextattr,
+	.vop_setextattr =	ufs_setextattr,
+#endif
+#ifdef UFS_ACL
+	.vop_getacl =		ufs_getacl,
+	.vop_setacl =		ufs_setacl,
+	.vop_aclcheck =		ufs_aclcheck,
+#endif
+};
diff --git a/Dump/ufs/ufs/ufsmount.h b/Dump/ufs/ufs/ufsmount.h
new file mode 100644
index 0000000..88ecf09
--- /dev/null
+++ b/Dump/ufs/ufs/ufsmount.h
@@ -0,0 +1,144 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufsmount.h	8.6 (Berkeley) 3/30/95
+ * $FreeBSD: releng/11.2/sys/ufs/ufs/ufsmount.h 331722 2018-03-29 02:50:57Z eadler $
+ */
+
+#ifndef _UFS_UFS_UFSMOUNT_H_
+#define	_UFS_UFS_UFSMOUNT_H_
+
+/*
+ * Arguments to mount UFS-based filesystems
+ */
+struct ufs_args {
+	char	*fspec;			/* block special device to mount */
+	struct	oexport_args export;	/* network export information */
+};
+
+#ifdef _KERNEL
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_UFSMNT);
+#endif
+
+struct buf;
+struct inode;
+struct nameidata;
+struct taskqueue;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct ufs_extattr_per_mount;
+struct jblocks;
+struct inodedep;
+
+TAILQ_HEAD(inodedeplst, inodedep);
+LIST_HEAD(bmsafemaphd, bmsafemap);
+
+/* This structure describes the UFS specific mount structure data. */
+struct ufsmount {
+	struct	mount *um_mountp;		/* filesystem vfs structure */
+	struct	cdev *um_dev;			/* device mounted */
+	struct	g_consumer *um_cp;
+	struct	bufobj *um_bo;			/* Buffer cache object */
+	struct	vnode *um_devvp;		/* block device mounted vnode */
+	u_long	um_fstype;			/* type of filesystem */
+	struct	fs *um_fs;			/* pointer to superblock */
+	struct	ufs_extattr_per_mount um_extattr;	/* extended attrs */
+	u_long	um_nindir;			/* indirect ptrs per block */
+	u_long	um_bptrtodb;			/* indir ptr to disk block */
+	u_long	um_seqinc;			/* inc between seq blocks */
+	struct	mtx um_lock;			/* Protects ufsmount & fs */
+	pid_t	um_fsckpid;			/* PID permitted fsck sysctls */
+	struct	mount_softdeps *um_softdep;	/* softdep mgmt structure */
+	struct	vnode *um_quotas[MAXQUOTAS];	/* pointer to quota files */
+	struct	ucred *um_cred[MAXQUOTAS];	/* quota file access cred */
+	time_t	um_btime[MAXQUOTAS];		/* block quota time limit */
+	time_t	um_itime[MAXQUOTAS];		/* inode quota time limit */
+	char	um_qflags[MAXQUOTAS];		/* quota specific flags */
+	int64_t	um_savedmaxfilesize;		/* XXX - limit maxfilesize */
+	int	um_candelete;			/* devvp supports TRIM */
+	int	um_writesuspended;		/* suspension in progress */
+	u_int	um_trim_inflight;
+	struct taskqueue *um_trim_tq;
+	int	(*um_balloc)(struct vnode *, off_t, int, struct ucred *,
+		    int, struct buf **);
+	int	(*um_blkatoff)(struct vnode *, off_t, char **, struct buf **);
+	int	(*um_truncate)(struct vnode *, off_t, int, struct ucred *);
+	int	(*um_update)(struct vnode *, int);
+	int	(*um_valloc)(struct vnode *, int, struct ucred *,
+		    struct vnode **);
+	int	(*um_vfree)(struct vnode *, ino_t, int);
+	void	(*um_ifree)(struct ufsmount *, struct inode *);
+	int	(*um_rdonly)(struct inode *);
+	void	(*um_snapgone)(struct inode *);
+};
+
+#define	UFS_BALLOC(aa, bb, cc, dd, ee, ff) VFSTOUFS((aa)->v_mount)->um_balloc(aa, bb, cc, dd, ee, ff)
+#define	UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd)
+#define	UFS_TRUNCATE(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd)
+#define	UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb)
+#define	UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd)
+#define	UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc)
+#define	UFS_IFREE(aa, bb) ((aa)->um_ifree(aa, bb))
+#define	UFS_RDONLY(aa) (ITOUMP(aa)->um_rdonly(aa))
+#define	UFS_SNAPGONE(aa) (ITOUMP(aa)->um_snapgone(aa))
+
+#define	UFS_LOCK(aa)	mtx_lock(&(aa)->um_lock)
+#define	UFS_UNLOCK(aa)	mtx_unlock(&(aa)->um_lock)
+#define	UFS_MTX(aa)	(&(aa)->um_lock)
+
+/*
+ * Filesystem types
+ */
+#define	UFS1	1
+#define	UFS2	2
+
+/*
+ * Flags describing the state of quotas.
+ */
+#define	QTF_OPENING	0x01			/* Q_QUOTAON in progress */
+#define	QTF_CLOSING	0x02			/* Q_QUOTAOFF in progress */
+#define	QTF_64BIT	0x04			/* 64-bit quota file */
+
+/* Convert mount ptr to ufsmount ptr. */
+#define	VFSTOUFS(mp)	((struct ufsmount *)((mp)->mnt_data))
+#define	UFSTOVFS(ump)	(ump)->um_mountp
+
+/*
+ * Macros to access filesystem parameters in the ufsmount structure.
+ * Used by ufs_bmap.
+ */
+#define	MNINDIR(ump)			((ump)->um_nindir)
+#define	blkptrtodb(ump, b)		((b) << (ump)->um_bptrtodb)
+#define	is_sequential(ump, a, b)	((b) == (a) + ump->um_seqinc)
+#endif /* _KERNEL */
+
+#endif
diff --git a/sys/include/sys/_types.h b/sys/include/sys/_types.h
index c998ff6..2cfb128 100644
--- a/sys/include/sys/_types.h
+++ b/sys/include/sys/_types.h
@@ -29,6 +29,7 @@
 #ifndef _SYS__TYPES_H_
 #define _SYS__TYPES_H_
 
+
 typedef char __int8_t;
 typedef unsigned char __uint8_t;
 typedef short __int16_t;
@@ -38,6 +39,10 @@
 typedef long long __int64_t;
 typedef unsigned long long __uint64_t;
 
+typedef __int64_t       __rlim_t;       /* resource limit - intentionally */
+                                        /* signed, because of legacy code */
+                                        /* that uses -1 for RLIM_INFINITY */
+
 typedef unsigned long __clock_t;
 typedef __uint32_t            __ino_t; typedef __int32_t             __ssize_t;/* stat types */
 typedef __uint32_t            __dev_t;/* device number */
diff --git a/sys/include/sys/descrip.h b/sys/include/sys/descrip.h
index bb4b9a9..a012794 100644
--- a/sys/include/sys/descrip.h
+++ b/sys/include/sys/descrip.h
@@ -84,10 +84,21 @@
 #define O_NDELAY        O_NONBLOCK      /* compat */
 #define FPOSIXSHM       O_NOFOLLOW
 
+#define O_DIRECTORY     0x00020000      /* Fail if not directory */
+#define O_EXEC          0x00040000      /* Open for execute only */
+
 #define FCNTLFLAGS      (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FPOSIXSHM|O_DIRECT)
 
+/*
 #define FFLAGS(oflags)  ((oflags) + 1)
 #define OFLAGS(fflags)  ((fflags) - 1)
+*/
+
+/* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
+#define FFLAGS(oflags)  ((oflags) & O_EXEC ? (oflags) : (oflags) + 1)
+#define OFLAGS(fflags)  ((fflags) & O_EXEC ? (fflags) : (fflags) - 1)
+
+#define        FMASK   (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT|FEXEC)
 
 struct fileOps;
 struct file;
@@ -180,6 +191,8 @@
 int ioctl(struct thread *, struct ioctl_args *);
 int getfd(struct thread *td, struct file **fp, int fd);
 
+int_kern_openat(struct thread *,int, char *,int int);
+
 #endif
 
 /***
diff --git a/sys/include/sys/resource.h b/sys/include/sys/resource.h
index b831da5..d8dc18f 100644
--- a/sys/include/sys/resource.h
+++ b/sys/include/sys/resource.h
@@ -29,7 +29,13 @@
 #ifndef _SYS_RESOURCE_H_
 #define _SYS_RESOURCE_H_
 
-#include <sys/types.h>
+#include <sys/_types.h>
+
+#ifndef _RLIM_T_DECLARED
+typedef __rlim_t        rlim_t;
+#define _RLIM_T_DECLARED
+#endif
+
 
 /*
  * Resource limits
diff --git a/sys/kernel/gen_calls.c b/sys/kernel/gen_calls.c
index 537409a..098025d 100644
--- a/sys/kernel/gen_calls.c
+++ b/sys/kernel/gen_calls.c
@@ -376,96 +376,68 @@
 int sys_getrlimit(struct thread *thr, struct sys_getrlimit_args *args) {
   int error = 0;
 
+  struct rlimit *rlim = 0x0;
+
   switch (args->which) {
   case 0:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 1:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 2:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 3:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 4:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 5:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 6:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 7:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 8:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 9:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 10:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 11:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 12:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 13:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   case 14:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    args->rlp->rlim_cur = thr->rlim[args->which]->rlim_cur;
-    args->rlp->rlim_max = thr->rlim[args->which]->rlim_max;
+    args->rlp->rlim_cur = thr->rlim[args->which].rlim_cur;
+    args->rlp->rlim_max = thr->rlim[args->which].rlim_max;
     break;
   default:
     error = -1;
@@ -480,94 +452,64 @@
 
   switch (args->which) {
   case 0:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 1:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 2:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 3:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 4:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 5:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 6:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 7:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 8:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 9:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 10:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 11:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 12:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 13:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   case 14:
-    if (thr->rlim[args->which] == 0)
-      thr->rlim[args->which] = (struct rlimit *) kmalloc(sizeof(struct rlimit));
-    thr->rlim[args->which]->rlim_cur = args->rlp->rlim_cur;
-    thr->rlim[args->which]->rlim_max = args->rlp->rlim_max;
+    thr->rlim[args->which].rlim_cur = args->rlp->rlim_cur;
+    thr->rlim[args->which].rlim_max = args->rlp->rlim_max;
     break;
   default:
     error = -1;
diff --git a/sys/kernel/vfs_calls.c b/sys/kernel/vfs_calls.c
index 1fe71b4..acc1b86 100644
--- a/sys/kernel/vfs_calls.c
+++ b/sys/kernel/vfs_calls.c
@@ -36,36 +36,13 @@
 #include <ufs/ufs.h>
 
 int sys_open(struct thread *td, struct sys_open_args *args) {
-  int error = 0x0;
-  int fd = 0x0;
-  struct file *nfp = 0x0;
 
-  error = falloc(td, &nfp, &fd);
+  return(kern_openat(td, AT_FDCWD, args->path, args->flags, args->mode));
 
-  if (error) {
-    td->td_retval[0] = -1;
-    return (error);
-  }
-
-
-  nfp->fd = fopen(args->path, "rb");
-
-  if (nfp->fd == 0x0) {
-    fdestroy(td, nfp, fd);
-
-    td->td_retval[0] = -1;
-    error = -1;
-  }
-  else {
-    td->td_retval[0] = fd;
-  }
-
-  //kprintf("sO: 0x%X:%s:", args->mode, args->path, td->td_retval[0]);
-
-  return (error);
 }
 
 int sys_openat(struct thread *td, struct sys_openat_args *args) {
+
   int error = 0x0;
   int fd = 0x0;
   struct file *nfp = 0x0;
@@ -413,3 +390,52 @@
   thr->td_retval[0] = 2;
   return (-1);
 }
+
+int kern_openat(struct thread *thr, int fd, char *path, int flags, int mode) {
+  int error = 0x0;
+  int fd = 0x0;
+  struct file *nfp = 0x0;
+
+  /*
+   * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
+   * may be specified.
+   */
+  if (flags & O_EXEC) {
+    if (flags & O_ACCMODE)
+      return (EINVAL);
+  }
+  else if ((flags & O_ACCMODE) == O_ACCMODE) {
+    return (EINVAL);
+  }
+  else {
+    flags = FFLAGS(flags);
+  }
+
+  error = falloc(td, &nfp, &fd);
+
+  if (error) {
+    thr->td_retval[0] = -1;
+    return (error);
+  }
+
+  nfp->f_flag = flags & FMASK;
+
+
+  nfp->fd = fopen(args->path, "rb");
+
+
+  if (nfp->fd == 0x0) {
+    fdestroy(td, nfp, fd);
+
+    td->td_retval[0] = -1;
+    error = -1;
+  }
+  else {
+    td->td_retval[0] = fd;
+  }
+
+  //kprintf("sO: 0x%X:%s:", args->mode, args->path, td->td_retval[0]);
+
+  return (error);
+
+}