diff --git a/src/sys/ufs/ffs/README.snapshot b/src/sys/ufs/ffs/README.snapshot deleted file mode 100644 index 2ca5a19..0000000 --- a/src/sys/ufs/ffs/README.snapshot +++ /dev/null @@ -1,110 +0,0 @@ -$FreeBSD: src/sys/ufs/ffs/README.snapshot,v 1.4 2002/12/12 00:31:45 trhodes Exp $ - -Soft Updates Status - -As is detailed in the operational information below, snapshots -are definitely alpha-test code and are NOT yet ready for production -use. Much remains to be done to make them really useful, but I -wanted to let folks get a chance to try it out and start reporting -bugs and other shortcomings. Such reports should be sent to -Kirk McKusick . - - -Snapshot Copyright Restrictions - -Snapshots have been introduced to FreeBSD with a `Berkeley-style' -copyright. The file implementing snapshots resides in the sys/ufs/ffs -directory and is compiled into the generic kernel by default. - - -Using Snapshots - -To create a snapshot of your /var filesystem, run the command: - - mount -u -o snapshot /var/snapshot/snap1 /var - -This command will take a snapshot of your /var filesystem and -leave it in the file /var/snapshot/snap1. Note that snapshot -files must be created in the filesystem that is being snapshotted. -I use the convention of putting a `snapshot' directory at the -root of each filesystem into which I can place snapshots. -You may create up to 20 snapshots per filesystem. Active snapshots -are recorded in the superblock, so they persist across unmount -and remount operations and across system reboots. When you -are done with a snapshot, it can be removed with the `rm' -command. Snapshots may be removed in any order, however you -may not get back all the space contained in the snapshot as -another snapshot may claim some of the blocks that it is releasing. -Note that the `schg' flag is set on snapshots to ensure that -not even the root user can write to them. The unlink command -makes an exception for snapshot files in that it allows them -to be removed even though they have the `schg' flag set, so it -is not necessary to clear the `schg' flag before removing a -snapshot file. - -Once you have taken a snapshot, there are three interesting -things that you can do with it: - -1) Run fsck on the snapshot file. Assuming that the filesystem - was clean when it was mounted, you should always get a clean - (and unchanging) result from running fsck on the snapshot. - If you are running with soft updates and rebooted after a - crash without cleaning up the filesystem, then fsck of the - snapshot may find missing blocks and inodes or inodes with - link counts that are too high. I have not yet added the - system calls to allow fsck to add these missing resources - back to the filesystem - that will be added once the basic - snapshot code is working properly. So, view those reports - as informational for now. - -2) Run dump on the snapshot. You will get a dump that is - consistent with the filesystem as of the timestamp of the - snapshot. - -3) Mount the snapshot as a frozen image of the filesystem. - To mount the snapshot /var/snapshot/snap1: - - mdconfig -a -t vnode -f /var/snapshot/snap1 -u 4 - mount -r /dev/md4 /mnt - - You can now cruise around your frozen /var filesystem - at /mnt. Everything will be in the same state that it - was at the time the snapshot was taken. The one exception - is that any earlier snapshots will appear as zero length - files. When you are done with the mounted snapshot: - - umount /mnt - mdconfig -d -u 4 - - Note that under some circumstances, the process accessing - the frozen filesystem may deadlock. I am aware of this - problem, but the solution is not simple. It requires - using buffer read locks rather than exclusive locks when - traversing the inode indirect blocks. Until this problem - is fixed, you should avoid putting mounted snapshots into - production. - - -Performance - -It takes about 30 seconds to create a snapshot of an 8Gb filesystem. -Of that time 25 seconds is spent in preparation; filesystem activity -is only suspended for the final 5 seconds of that period. Snapshot -removal of an 8Gb filesystem takes about two minutes. Filesystem -activity is never suspended during snapshot removal. - -The suspend time may be expanded by several minutes if a process -is in the midst of removing many files as all the soft updates -backlog must be cleared. Generally snapshots do not slow the system -down appreciably except when removing many small files (i.e., any -file less than 96Kb whose last block is a fragment) that are claimed -by a snapshot. Here, the snapshot code must make a copy of every -released fragment which slows the rate of file removal to about -twenty files per second once the soft updates backlog limit is -reached. - - -How Snapshots Work - -For more general information on snapshots, please see: - http://www.mckusick.com/softdep/ diff --git a/src/sys/ufs/ffs/README.softupdates b/src/sys/ufs/ffs/README.softupdates deleted file mode 100644 index a965f4f..0000000 --- a/src/sys/ufs/ffs/README.softupdates +++ /dev/null @@ -1,58 +0,0 @@ -$FreeBSD: src/sys/ufs/ffs/README.softupdates,v 1.9 2000/07/08 02:31:21 mckusick Exp $ - -Using Soft Updates - -To enable the soft updates feature in your kernel, add option -SOFTUPDATES to your kernel configuration. - -Once you are running a kernel with soft update support, you need to enable -it for whichever filesystems you wish to run with the soft update policy. -This is done with the -n option to tunefs(8) on the UNMOUNTED filesystems, -e.g. from single-user mode you'd do something like: - - tunefs -n enable /usr - -To permanently enable soft updates on the /usr filesystem (or at least -until a corresponding ``tunefs -n disable'' is done). - - -Soft Updates Copyright Restrictions - -As of June 2000 the restrictive copyright has been removed and -replaced with a `Berkeley-style' copyright. The files implementing -soft updates now reside in the sys/ufs/ffs directory and are -compiled into the generic kernel by default. - - -Soft Updates Status - -The soft updates code has been running in production on many -systems for the past two years generally quite successfully. -The two current sets of shortcomings are: - -1) On filesystems that are chronically full, the two minute lag - from the time a file is deleted until its free space shows up - will result in premature filesystem full failures. This - failure mode is most evident in small filesystems such as - the root. For this reason, use of soft updates is not - recommended on the root filesystem. - -2) If your system routines runs parallel processes each of which - remove many files, the kernel memory rate limiting code may - not be able to slow removal operations to a level sustainable - by the disk subsystem. The result is that the kernel runs out - of memory and hangs. - -Both of these problems are being addressed, but have not yet -been resolved. There are no other known problems at this time. - - -How Soft Updates Work - -For more general information on soft updates, please see: - http://www.mckusick.com/softdep/ - http://www.ece.cmu.edu/~ganger/papers/CSE-TR-254-95/ - --- -Marshall Kirk McKusick -July 2000 diff --git a/src/sys/ufs/ffs/ffs_alloc.c b/src/sys/ufs/ffs/ffs_alloc.c deleted file mode 100644 index 9836d44..0000000 --- a/src/sys/ufs/ffs/ffs_alloc.c +++ /dev/null @@ -1,2364 +0,0 @@ -#if 0 -/* - * Copyright (c) 2002 Networks Associates Technology, Inc. - * All rights reserved. - * - * This software was developed for the FreeBSD Project by Marshall - * Kirk McKusick and Network Associates Laboratories, the Security - * Research Division of Network Associates, Inc. under DARPA/SPAWAR - * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS - * research program - * - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_alloc.c,v 1.116 2003/10/31 07:25:06 truckman Exp $"); - -#include "opt_quota.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref, - int size); - -static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int); -static ufs2_daddr_t - ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); -#ifdef DIAGNOSTIC -static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); -#endif -static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int); -static ino_t ffs_dirpref(struct inode *); -static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int); -static void ffs_fserr(struct fs *, ino_t, char *); -static ufs2_daddr_t ffs_hashalloc - (struct inode *, int, ufs2_daddr_t, int, allocfcn_t *); -static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int); -static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); -static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); -static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); - -/* - * Allocate a block in the filesystem. - * - * The size of the requested block is given, which must be some - * multiple of fs_fsize and <= fs_bsize. - * A preference may be optionally specified. If a preference is given - * the following hierarchy is used to allocate a block: - * 1) allocate the requested block. - * 2) allocate a rotationally optimal block in the same cylinder. - * 3) allocate a block in the same cylinder group. - * 4) quadradically rehash into other cylinder groups, until an - * available block is located. - * If no block preference is given the following heirarchy is used - * to allocate a block: - * 1) allocate a block in the cylinder group that contains the - * inode for the file. - * 2) quadradically rehash into other cylinder groups, until an - * available block is located. - */ -int -ffs_alloc(ip, lbn, bpref, size, cred, bnp) - struct inode *ip; - ufs2_daddr_t lbn, bpref; - int size; - struct ucred *cred; - ufs2_daddr_t *bnp; -{ - struct fs *fs; - ufs2_daddr_t bno; - int cg, reclaimed; -#ifdef QUOTA - int error; -#endif - - *bnp = 0; - fs = ip->i_fs; -#ifdef DIAGNOSTIC - if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { - printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", - devtoname(ip->i_dev), (long)fs->fs_bsize, size, - fs->fs_fsmnt); - panic("ffs_alloc: bad size"); - } - if (cred == NOCRED) - panic("ffs_alloc: missing credential"); -#endif /* DIAGNOSTIC */ - reclaimed = 0; -retry: - if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) - goto nospace; - if (suser_cred(cred, PRISON_ROOT) && - freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) - goto nospace; -#ifdef QUOTA - error = chkdq(ip, btodb(size), cred, 0); - if (error) - return (error); -#endif - if (bpref >= fs->fs_size) - bpref = 0; - if (bpref == 0) - cg = ino_to_cg(fs, ip->i_number); - else - cg = dtog(fs, bpref); - bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); - if (bno > 0) { - DIP(ip, i_blocks) += btodb(size); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bnp = bno; - return (0); - } -#ifdef QUOTA - /* - * Restore user's disk quota because allocation failed. - */ - (void) chkdq(ip, -btodb(size), cred, FORCE); -#endif -nospace: - if (fs->fs_pendingblocks > 0 && reclaimed == 0) { - reclaimed = 1; - softdep_request_cleanup(fs, ITOV(ip)); - goto retry; - } - ffs_fserr(fs, ip->i_number, "filesystem full"); - uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); - return (ENOSPC); -} - -/* - * Reallocate a fragment to a bigger size - * - * The number and size of the old block is given, and a preference - * and new size is also specified. The allocator attempts to extend - * the original block. Failing that, the regular block allocator is - * invoked to get an appropriate block. - */ -int -ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, cred, bpp) - struct inode *ip; - ufs2_daddr_t lbprev; - ufs2_daddr_t bprev; - ufs2_daddr_t bpref; - int osize, nsize; - struct ucred *cred; - struct buf **bpp; -{ - struct vnode *vp; - struct fs *fs; - struct buf *bp; - int cg, request, error, reclaimed; - ufs2_daddr_t bno; - - *bpp = 0; - vp = ITOV(ip); - fs = ip->i_fs; -#ifdef DIAGNOSTIC - if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) - panic("ffs_realloccg: allocation on suspended filesystem"); - if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || - (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { - printf( - "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", - devtoname(ip->i_dev), (long)fs->fs_bsize, osize, - nsize, fs->fs_fsmnt); - panic("ffs_realloccg: bad size"); - } - if (cred == NOCRED) - panic("ffs_realloccg: missing credential"); -#endif /* DIAGNOSTIC */ - reclaimed = 0; -retry: - if (suser_cred(cred, PRISON_ROOT) && - freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) - goto nospace; - if (bprev == 0) { - printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n", - devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev, - fs->fs_fsmnt); - panic("ffs_realloccg: bad bprev"); - } - /* - * Allocate the extra space in the buffer. - */ - error = bread(vp, lbprev, osize, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - - if (bp->b_blkno == bp->b_lblkno) { - if (lbprev >= NDADDR) - panic("ffs_realloccg: lbprev out of range"); - bp->b_blkno = fsbtodb(fs, bprev); - } - -#ifdef QUOTA - error = chkdq(ip, btodb(nsize - osize), cred, 0); - if (error) { - brelse(bp); - return (error); - } -#endif - /* - * Check for extension in the existing location. - */ - cg = dtog(fs, bprev); - bno = ffs_fragextend(ip, cg, bprev, osize, nsize); - if (bno) { - if (bp->b_blkno != fsbtodb(fs, bno)) - panic("ffs_realloccg: bad blockno"); - DIP(ip, i_blocks) += btodb(nsize - osize); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - allocbuf(bp, nsize); - bp->b_flags |= B_DONE; - bzero((char *)bp->b_data + osize, (u_int)nsize - osize); - *bpp = bp; - return (0); - } - /* - * Allocate a new disk location. - */ - if (bpref >= fs->fs_size) - bpref = 0; - switch ((int)fs->fs_optim) { - case FS_OPTSPACE: - /* - * Allocate an exact sized fragment. Although this makes - * best use of space, we will waste time relocating it if - * the file continues to grow. If the fragmentation is - * less than half of the minimum free reserve, we choose - * to begin optimizing for time. - */ - request = nsize; - if (fs->fs_minfree <= 5 || - fs->fs_cstotal.cs_nffree > - (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) - break; - log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", - fs->fs_fsmnt); - fs->fs_optim = FS_OPTTIME; - break; - case FS_OPTTIME: - /* - * At this point we have discovered a file that is trying to - * grow a small fragment to a larger fragment. To save time, - * we allocate a full sized block, then free the unused portion. - * If the file continues to grow, the `ffs_fragextend' call - * above will be able to grow it in place without further - * copying. If aberrant programs cause disk fragmentation to - * grow within 2% of the free reserve, we choose to begin - * optimizing for space. - */ - request = fs->fs_bsize; - if (fs->fs_cstotal.cs_nffree < - (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) - break; - log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", - fs->fs_fsmnt); - fs->fs_optim = FS_OPTSPACE; - break; - default: - printf("dev = %s, optim = %ld, fs = %s\n", - devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); - panic("ffs_realloccg: bad optim"); - /* NOTREACHED */ - } - bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); - if (bno > 0) { - bp->b_blkno = fsbtodb(fs, bno); - if (!DOINGSOFTDEP(vp)) - ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize, - ip->i_number); - if (nsize < request) - ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize), - (long)(request - nsize), ip->i_number); - DIP(ip, i_blocks) += btodb(nsize - osize); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - allocbuf(bp, nsize); - bp->b_flags |= B_DONE; - bzero((char *)bp->b_data + osize, (u_int)nsize - osize); - *bpp = bp; - return (0); - } -#ifdef QUOTA - /* - * Restore user's disk quota because allocation failed. - */ - (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); -#endif - brelse(bp); -nospace: - /* - * no space available - */ - if (fs->fs_pendingblocks > 0 && reclaimed == 0) { - reclaimed = 1; - softdep_request_cleanup(fs, vp); - goto retry; - } - ffs_fserr(fs, ip->i_number, "filesystem full"); - uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); - return (ENOSPC); -} - -/* - * Reallocate a sequence of blocks into a contiguous sequence of blocks. - * - * The vnode and an array of buffer pointers for a range of sequential - * logical blocks to be made contiguous is given. The allocator attempts - * to find a range of sequential blocks starting as close as possible - * from the end of the allocation for the logical block immediately - * preceding the current range. If successful, the physical block numbers - * in the buffer pointers and in the inode are changed to reflect the new - * allocation. If unsuccessful, the allocation is left unchanged. The - * success in doing the reallocation is returned. Note that the error - * return is not reflected back to the user. Rather the previous block - * allocation will be used. - */ - -SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); - -static int doasyncfree = 1; -SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); - -static int doreallocblks = 1; -SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); - -#ifdef DEBUG -static volatile int prtrealloc = 0; -#endif - -int -ffs_reallocblks(ap) - struct vop_reallocblks_args /* { - struct vnode *a_vp; - struct cluster_save *a_buflist; - } */ *ap; -{ - - if (doreallocblks == 0) - return (ENOSPC); - if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1) - return (ffs_reallocblks_ufs1(ap)); - return (ffs_reallocblks_ufs2(ap)); -} - -static int -ffs_reallocblks_ufs1(ap) - struct vop_reallocblks_args /* { - struct vnode *a_vp; - struct cluster_save *a_buflist; - } */ *ap; -{ - struct fs *fs; - struct inode *ip; - struct vnode *vp; - struct buf *sbp, *ebp; - ufs1_daddr_t *bap, *sbap, *ebap = 0; - struct cluster_save *buflist; - ufs_lbn_t start_lbn, end_lbn; - ufs1_daddr_t soff, newblk, blkno; - ufs2_daddr_t pref; - struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; - int i, len, start_lvl, end_lvl, ssize; - - vp = ap->a_vp; - ip = VTOI(vp); - fs = ip->i_fs; - if (fs->fs_contigsumsize <= 0) - return (ENOSPC); - buflist = ap->a_buflist; - len = buflist->bs_nchildren; - start_lbn = buflist->bs_children[0]->b_lblkno; - end_lbn = start_lbn + len - 1; -#ifdef DIAGNOSTIC - for (i = 0; i < len; i++) - if (!ffs_checkblk(ip, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) - panic("ffs_reallocblks: unallocated block 1"); - for (i = 1; i < len; i++) - if (buflist->bs_children[i]->b_lblkno != start_lbn + i) - panic("ffs_reallocblks: non-logical cluster"); - blkno = buflist->bs_children[0]->b_blkno; - ssize = fsbtodb(fs, fs->fs_frag); - for (i = 1; i < len - 1; i++) - if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) - panic("ffs_reallocblks: non-physical cluster %d", i); -#endif - /* - * If the latest allocation is in a new cylinder group, assume that - * the filesystem has decided to move and do not force it back to - * the previous cylinder group. - */ - if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != - dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) - return (ENOSPC); - if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || - ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) - return (ENOSPC); - /* - * Get the starting offset and block map for the first block. - */ - if (start_lvl == 0) { - sbap = &ip->i_din1->di_db[0]; - soff = start_lbn; - } else { - idp = &start_ap[start_lvl - 1]; - if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { - brelse(sbp); - return (ENOSPC); - } - sbap = (ufs1_daddr_t *)sbp->b_data; - soff = idp->in_off; - } - /* - * Find the preferred location for the cluster. - */ - pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap); - /* - * If the block range spans two block maps, get the second map. - */ - if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { - ssize = len; - } else { -#ifdef DIAGNOSTIC - if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) - panic("ffs_reallocblk: start == end"); -#endif - ssize = len - (idp->in_off + 1); - if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) - goto fail; - ebap = (ufs1_daddr_t *)ebp->b_data; - } - /* - * Search the block map looking for an allocation of the desired size. - */ - if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, - len, ffs_clusteralloc)) == 0) - goto fail; - /* - * We have found a new contiguous block. - * - * First we have to replace the old block pointers with the new - * block pointers in the inode and indirect blocks associated - * with the file. - */ -#ifdef DEBUG - if (prtrealloc) - printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, - (intmax_t)start_lbn, (intmax_t)end_lbn); -#endif - blkno = newblk; - for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { - if (i == ssize) { - bap = ebap; - soff = -i; - } -#ifdef DIAGNOSTIC - if (!ffs_checkblk(ip, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) - panic("ffs_reallocblks: unallocated block 2"); - if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) - panic("ffs_reallocblks: alloc mismatch"); -#endif -#ifdef DEBUG - if (prtrealloc) - printf(" %d,", *bap); -#endif - if (DOINGSOFTDEP(vp)) { - if (sbap == &ip->i_din1->di_db[0] && i < ssize) - softdep_setup_allocdirect(ip, start_lbn + i, - blkno, *bap, fs->fs_bsize, fs->fs_bsize, - buflist->bs_children[i]); - else - softdep_setup_allocindir_page(ip, start_lbn + i, - i < ssize ? sbp : ebp, soff + i, blkno, - *bap, buflist->bs_children[i]); - } - *bap++ = blkno; - } - /* - * Next we must write out the modified inode and indirect blocks. - * For strict correctness, the writes should be synchronous since - * the old block values may have been written to disk. In practise - * they are almost never written, but if we are concerned about - * strict correctness, the `doasyncfree' flag should be set to zero. - * - * The test on `doasyncfree' should be changed to test a flag - * that shows whether the associated buffers and inodes have - * been written. The flag should be set when the cluster is - * started and cleared whenever the buffer or inode is flushed. - * We can then check below to see if it is set, and do the - * synchronous write only when it has been cleared. - */ - if (sbap != &ip->i_din1->di_db[0]) { - if (doasyncfree) - bdwrite(sbp); - else - bwrite(sbp); - } else { - ip->i_flag |= IN_CHANGE | IN_UPDATE; - if (!doasyncfree) - UFS_UPDATE(vp, 1); - } - if (ssize < len) { - if (doasyncfree) - bdwrite(ebp); - else - bwrite(ebp); - } - /* - * Last, free the old blocks and assign the new blocks to the buffers. - */ -#ifdef DEBUG - if (prtrealloc) - printf("\n\tnew:"); -#endif - for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { - if (!DOINGSOFTDEP(vp)) - ffs_blkfree(fs, ip->i_devvp, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number); - buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); -#ifdef DIAGNOSTIC - if (!ffs_checkblk(ip, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) - panic("ffs_reallocblks: unallocated block 3"); -#endif -#ifdef DEBUG - if (prtrealloc) - printf(" %d,", blkno); -#endif - } -#ifdef DEBUG - if (prtrealloc) { - prtrealloc--; - printf("\n"); - } -#endif - return (0); - -fail: - if (ssize < len) - brelse(ebp); - if (sbap != &ip->i_din1->di_db[0]) - brelse(sbp); - return (ENOSPC); -} - -static int -ffs_reallocblks_ufs2(ap) - struct vop_reallocblks_args /* { - struct vnode *a_vp; - struct cluster_save *a_buflist; - } */ *ap; -{ - struct fs *fs; - struct inode *ip; - struct vnode *vp; - struct buf *sbp, *ebp; - ufs2_daddr_t *bap, *sbap, *ebap = 0; - struct cluster_save *buflist; - ufs_lbn_t start_lbn, end_lbn; - ufs2_daddr_t soff, newblk, blkno, pref; - struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; - int i, len, start_lvl, end_lvl, ssize; - - vp = ap->a_vp; - ip = VTOI(vp); - fs = ip->i_fs; - if (fs->fs_contigsumsize <= 0) - return (ENOSPC); - buflist = ap->a_buflist; - len = buflist->bs_nchildren; - start_lbn = buflist->bs_children[0]->b_lblkno; - end_lbn = start_lbn + len - 1; -#ifdef DIAGNOSTIC - for (i = 0; i < len; i++) - if (!ffs_checkblk(ip, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) - panic("ffs_reallocblks: unallocated block 1"); - for (i = 1; i < len; i++) - if (buflist->bs_children[i]->b_lblkno != start_lbn + i) - panic("ffs_reallocblks: non-logical cluster"); - blkno = buflist->bs_children[0]->b_blkno; - ssize = fsbtodb(fs, fs->fs_frag); - for (i = 1; i < len - 1; i++) - if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) - panic("ffs_reallocblks: non-physical cluster %d", i); -#endif - /* - * If the latest allocation is in a new cylinder group, assume that - * the filesystem has decided to move and do not force it back to - * the previous cylinder group. - */ - if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != - dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) - return (ENOSPC); - if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || - ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) - return (ENOSPC); - /* - * Get the starting offset and block map for the first block. - */ - if (start_lvl == 0) { - sbap = &ip->i_din2->di_db[0]; - soff = start_lbn; - } else { - idp = &start_ap[start_lvl - 1]; - if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { - brelse(sbp); - return (ENOSPC); - } - sbap = (ufs2_daddr_t *)sbp->b_data; - soff = idp->in_off; - } - /* - * Find the preferred location for the cluster. - */ - pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); - /* - * If the block range spans two block maps, get the second map. - */ - if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { - ssize = len; - } else { -#ifdef DIAGNOSTIC - if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) - panic("ffs_reallocblk: start == end"); -#endif - ssize = len - (idp->in_off + 1); - if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) - goto fail; - ebap = (ufs2_daddr_t *)ebp->b_data; - } - /* - * Search the block map looking for an allocation of the desired size. - */ - if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, - len, ffs_clusteralloc)) == 0) - goto fail; - /* - * We have found a new contiguous block. - * - * First we have to replace the old block pointers with the new - * block pointers in the inode and indirect blocks associated - * with the file. - */ -#ifdef DEBUG - if (prtrealloc) - printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, - (intmax_t)start_lbn, (intmax_t)end_lbn); -#endif - blkno = newblk; - for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { - if (i == ssize) { - bap = ebap; - soff = -i; - } -#ifdef DIAGNOSTIC - if (!ffs_checkblk(ip, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) - panic("ffs_reallocblks: unallocated block 2"); - if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) - panic("ffs_reallocblks: alloc mismatch"); -#endif -#ifdef DEBUG - if (prtrealloc) - printf(" %jd,", (intmax_t)*bap); -#endif - if (DOINGSOFTDEP(vp)) { - if (sbap == &ip->i_din2->di_db[0] && i < ssize) - softdep_setup_allocdirect(ip, start_lbn + i, - blkno, *bap, fs->fs_bsize, fs->fs_bsize, - buflist->bs_children[i]); - else - softdep_setup_allocindir_page(ip, start_lbn + i, - i < ssize ? sbp : ebp, soff + i, blkno, - *bap, buflist->bs_children[i]); - } - *bap++ = blkno; - } - /* - * Next we must write out the modified inode and indirect blocks. - * For strict correctness, the writes should be synchronous since - * the old block values may have been written to disk. In practise - * they are almost never written, but if we are concerned about - * strict correctness, the `doasyncfree' flag should be set to zero. - * - * The test on `doasyncfree' should be changed to test a flag - * that shows whether the associated buffers and inodes have - * been written. The flag should be set when the cluster is - * started and cleared whenever the buffer or inode is flushed. - * We can then check below to see if it is set, and do the - * synchronous write only when it has been cleared. - */ - if (sbap != &ip->i_din2->di_db[0]) { - if (doasyncfree) - bdwrite(sbp); - else - bwrite(sbp); - } else { - ip->i_flag |= IN_CHANGE | IN_UPDATE; - if (!doasyncfree) - UFS_UPDATE(vp, 1); - } - if (ssize < len) { - if (doasyncfree) - bdwrite(ebp); - else - bwrite(ebp); - } - /* - * Last, free the old blocks and assign the new blocks to the buffers. - */ -#ifdef DEBUG - if (prtrealloc) - printf("\n\tnew:"); -#endif - for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { - if (!DOINGSOFTDEP(vp)) - ffs_blkfree(fs, ip->i_devvp, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number); - buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); -#ifdef DIAGNOSTIC - if (!ffs_checkblk(ip, - dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) - panic("ffs_reallocblks: unallocated block 3"); -#endif -#ifdef DEBUG - if (prtrealloc) - printf(" %jd,", (intmax_t)blkno); -#endif - } -#ifdef DEBUG - if (prtrealloc) { - prtrealloc--; - printf("\n"); - } -#endif - return (0); - -fail: - if (ssize < len) - brelse(ebp); - if (sbap != &ip->i_din2->di_db[0]) - brelse(sbp); - return (ENOSPC); -} - -/* - * Allocate an inode in the filesystem. - * - * If allocating a directory, use ffs_dirpref to select the inode. - * If allocating in a directory, the following hierarchy is followed: - * 1) allocate the preferred inode. - * 2) allocate an inode in the same cylinder group. - * 3) quadradically rehash into other cylinder groups, until an - * available inode is located. - * If no inode preference is given the following heirarchy is used - * to allocate an inode: - * 1) allocate an inode in cylinder group 0. - * 2) quadradically rehash into other cylinder groups, until an - * available inode is located. - */ -int -ffs_valloc(pvp, mode, cred, vpp) - struct vnode *pvp; - int mode; - struct ucred *cred; - struct vnode **vpp; -{ - struct inode *pip; - struct fs *fs; - struct inode *ip; - struct timespec ts; - ino_t ino, ipref; - int cg, error; - - *vpp = NULL; - pip = VTOI(pvp); - fs = pip->i_fs; - if (fs->fs_cstotal.cs_nifree == 0) - goto noinodes; - - if ((mode & IFMT) == IFDIR) - ipref = ffs_dirpref(pip); - else - ipref = pip->i_number; - if (ipref >= fs->fs_ncg * fs->fs_ipg) - ipref = 0; - cg = ino_to_cg(fs, ipref); - /* - * Track number of dirs created one after another - * in a same cg without intervening by files. - */ - if ((mode & IFMT) == IFDIR) { - if (fs->fs_contigdirs[cg] < 255) - fs->fs_contigdirs[cg]++; - } else { - if (fs->fs_contigdirs[cg] > 0) - fs->fs_contigdirs[cg]--; - } - ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, - (allocfcn_t *)ffs_nodealloccg); - if (ino == 0) - goto noinodes; - error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); - if (error) { - UFS_VFREE(pvp, ino, mode); - return (error); - } - ip = VTOI(*vpp); - if (ip->i_mode) { - printf("mode = 0%o, inum = %lu, fs = %s\n", - ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); - panic("ffs_valloc: dup alloc"); - } - if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ - printf("free inode %s/%lu had %ld blocks\n", - fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks)); - DIP(ip, i_blocks) = 0; - } - ip->i_flags = 0; - DIP(ip, i_flags) = 0; - /* - * Set up a new generation number for this inode. - */ - if (ip->i_gen == 0 || ++ip->i_gen == 0) - ip->i_gen = arc4random() / 2 + 1; - DIP(ip, i_gen) = ip->i_gen; - if (fs->fs_magic == FS_UFS2_MAGIC) { - vfs_timestamp(&ts); - ip->i_din2->di_birthtime = ts.tv_sec; - ip->i_din2->di_birthnsec = ts.tv_nsec; - } - return (0); -noinodes: - ffs_fserr(fs, pip->i_number, "out of inodes"); - uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); - return (ENOSPC); -} - -/* - * Find a cylinder group to place a directory. - * - * The policy implemented by this algorithm is to allocate a - * directory inode in the same cylinder group as its parent - * directory, but also to reserve space for its files inodes - * and data. Restrict the number of directories which may be - * allocated one after another in the same cylinder group - * without intervening allocation of files. - * - * If we allocate a first level directory then force allocation - * in another cylinder group. - */ -static ino_t -ffs_dirpref(pip) - struct inode *pip; -{ - struct fs *fs; - int cg, prefcg, dirsize, cgsize; - int avgifree, avgbfree, avgndir, curdirsize; - int minifree, minbfree, maxndir; - int mincg, minndir; - int maxcontigdirs; - - fs = pip->i_fs; - - avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; - avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; - avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; - - /* - * Force allocation in another cg if creating a first level dir. - */ - ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref"); - if (ITOV(pip)->v_vflag & VV_ROOT) { - prefcg = arc4random() % fs->fs_ncg; - mincg = prefcg; - minndir = fs->fs_ipg; - for (cg = prefcg; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_ndir < minndir && - fs->fs_cs(fs, cg).cs_nifree >= avgifree && - fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - mincg = cg; - minndir = fs->fs_cs(fs, cg).cs_ndir; - } - for (cg = 0; cg < prefcg; cg++) - if (fs->fs_cs(fs, cg).cs_ndir < minndir && - fs->fs_cs(fs, cg).cs_nifree >= avgifree && - fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - mincg = cg; - minndir = fs->fs_cs(fs, cg).cs_ndir; - } - return ((ino_t)(fs->fs_ipg * mincg)); - } - - /* - * Count various limits which used for - * optimal allocation of a directory inode. - */ - maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); - minifree = avgifree - avgifree / 4; - if (minifree < 1) - minifree = 1; - minbfree = avgbfree - avgbfree / 4; - if (minbfree < 1) - minbfree = 1; - cgsize = fs->fs_fsize * fs->fs_fpg; - dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; - curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; - if (dirsize < curdirsize) - dirsize = curdirsize; - maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255); - if (fs->fs_avgfpdir > 0) - maxcontigdirs = min(maxcontigdirs, - fs->fs_ipg / fs->fs_avgfpdir); - if (maxcontigdirs == 0) - maxcontigdirs = 1; - - /* - * Limit number of dirs in one cg and reserve space for - * regular files, but only if we have no deficit in - * inodes or space. - */ - prefcg = ino_to_cg(fs, pip->i_number); - for (cg = prefcg; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_ndir < maxndir && - fs->fs_cs(fs, cg).cs_nifree >= minifree && - fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { - if (fs->fs_contigdirs[cg] < maxcontigdirs) - return ((ino_t)(fs->fs_ipg * cg)); - } - for (cg = 0; cg < prefcg; cg++) - if (fs->fs_cs(fs, cg).cs_ndir < maxndir && - fs->fs_cs(fs, cg).cs_nifree >= minifree && - fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { - if (fs->fs_contigdirs[cg] < maxcontigdirs) - return ((ino_t)(fs->fs_ipg * cg)); - } - /* - * This is a backstop when we have deficit in space. - */ - for (cg = prefcg; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) - return ((ino_t)(fs->fs_ipg * cg)); - for (cg = 0; cg < prefcg; cg++) - if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) - break; - return ((ino_t)(fs->fs_ipg * cg)); -} - -/* - * Select the desired position for the next block in a file. The file is - * logically divided into sections. The first section is composed of the - * direct blocks. Each additional section contains fs_maxbpg blocks. - * - * If no blocks have been allocated in the first section, the policy is to - * request a block in the same cylinder group as the inode that describes - * the file. If no blocks have been allocated in any other section, the - * policy is to place the section in a cylinder group with a greater than - * average number of free blocks. An appropriate cylinder group is found - * by using a rotor that sweeps the cylinder groups. When a new group of - * blocks is needed, the sweep begins in the cylinder group following the - * cylinder group from which the previous allocation was made. The sweep - * continues until a cylinder group with greater than the average number - * of free blocks is found. If the allocation is for the first block in an - * indirect block, the information on the previous allocation is unavailable; - * here a best guess is made based upon the logical block number being - * allocated. - * - * If a section is already partially allocated, the policy is to - * contiguously allocate fs_maxcontig blocks. The end of one of these - * contiguous blocks and the beginning of the next is laid out - * contiguously if possible. - */ -ufs2_daddr_t -ffs_blkpref_ufs1(ip, lbn, indx, bap) - struct inode *ip; - ufs_lbn_t lbn; - int indx; - ufs1_daddr_t *bap; -{ - struct fs *fs; - int cg; - int avgbfree, startcg; - - fs = ip->i_fs; - if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { - if (lbn < NDADDR + NINDIR(fs)) { - cg = ino_to_cg(fs, ip->i_number); - return (fs->fs_fpg * cg + fs->fs_frag); - } - /* - * Find a cylinder with greater than average number of - * unused data blocks. - */ - if (indx == 0 || bap[indx - 1] == 0) - startcg = - ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; - else - startcg = dtog(fs, bap[indx - 1]) + 1; - startcg %= fs->fs_ncg; - avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; - for (cg = startcg; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; - return (fs->fs_fpg * cg + fs->fs_frag); - } - for (cg = 0; cg <= startcg; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; - return (fs->fs_fpg * cg + fs->fs_frag); - } - return (0); - } - /* - * We just always try to lay things out contiguously. - */ - return (bap[indx - 1] + fs->fs_frag); -} - -/* - * Same as above, but for UFS2 - */ -ufs2_daddr_t -ffs_blkpref_ufs2(ip, lbn, indx, bap) - struct inode *ip; - ufs_lbn_t lbn; - int indx; - ufs2_daddr_t *bap; -{ - struct fs *fs; - int cg; - int avgbfree, startcg; - - fs = ip->i_fs; - if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { - if (lbn < NDADDR + NINDIR(fs)) { - cg = ino_to_cg(fs, ip->i_number); - return (fs->fs_fpg * cg + fs->fs_frag); - } - /* - * Find a cylinder with greater than average number of - * unused data blocks. - */ - if (indx == 0 || bap[indx - 1] == 0) - startcg = - ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; - else - startcg = dtog(fs, bap[indx - 1]) + 1; - startcg %= fs->fs_ncg; - avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; - for (cg = startcg; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; - return (fs->fs_fpg * cg + fs->fs_frag); - } - for (cg = 0; cg <= startcg; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; - return (fs->fs_fpg * cg + fs->fs_frag); - } - return (0); - } - /* - * We just always try to lay things out contiguously. - */ - return (bap[indx - 1] + fs->fs_frag); -} - -/* - * Implement the cylinder overflow algorithm. - * - * The policy implemented by this algorithm is: - * 1) allocate the block in its requested cylinder group. - * 2) quadradically rehash on the cylinder group number. - * 3) brute force search for a free block. - */ -/*VARARGS5*/ -static ufs2_daddr_t -ffs_hashalloc(ip, cg, pref, size, allocator) - struct inode *ip; - int cg; - ufs2_daddr_t pref; - int size; /* size for data blocks, mode for inodes */ - allocfcn_t *allocator; -{ - struct fs *fs; - ufs2_daddr_t result; - int i, icg = cg; - -#ifdef DIAGNOSTIC - if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED) - panic("ffs_hashalloc: allocation on suspended filesystem"); -#endif - fs = ip->i_fs; - /* - * 1: preferred cylinder group - */ - result = (*allocator)(ip, cg, pref, size); - if (result) - return (result); - /* - * 2: quadratic rehash - */ - for (i = 1; i < fs->fs_ncg; i *= 2) { - cg += i; - if (cg >= fs->fs_ncg) - cg -= fs->fs_ncg; - result = (*allocator)(ip, cg, 0, size); - if (result) - return (result); - } - /* - * 3: brute force search - * Note that we start at i == 2, since 0 was checked initially, - * and 1 is always checked in the quadratic rehash. - */ - cg = (icg + 2) % fs->fs_ncg; - for (i = 2; i < fs->fs_ncg; i++) { - result = (*allocator)(ip, cg, 0, size); - if (result) - return (result); - cg++; - if (cg == fs->fs_ncg) - cg = 0; - } - return (0); -} - -/* - * Determine whether a fragment can be extended. - * - * Check to see if the necessary fragments are available, and - * if they are, allocate them. - */ -static ufs2_daddr_t -ffs_fragextend(ip, cg, bprev, osize, nsize) - struct inode *ip; - int cg; - ufs2_daddr_t bprev; - int osize, nsize; -{ - struct fs *fs; - struct cg *cgp; - struct buf *bp; - long bno; - int frags, bbase; - int i, error; - u_int8_t *blksfree; - - fs = ip->i_fs; - if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) - return (0); - frags = numfrags(fs, nsize); - bbase = fragnum(fs, bprev); - if (bbase > fragnum(fs, (bprev + frags - 1))) { - /* cannot extend across a block boundary */ - return (0); - } - error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), - (int)fs->fs_cgsize, NOCRED, &bp); - if (error) { - brelse(bp); - return (0); - } - cgp = (struct cg *)bp->b_data; - if (!cg_chkmagic(cgp)) { - brelse(bp); - return (0); - } - bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_old_time = cgp->cg_time = time_second; - bno = dtogd(fs, bprev); - blksfree = cg_blksfree(cgp); - for (i = numfrags(fs, osize); i < frags; i++) - if (isclr(blksfree, bno + i)) { - brelse(bp); - return (0); - } - /* - * the current fragment can be extended - * deduct the count on fragment being extended into - * increase the count on the remaining fragment (if any) - * allocate the extended piece - */ - for (i = frags; i < fs->fs_frag - bbase; i++) - if (isclr(blksfree, bno + i)) - break; - cgp->cg_frsum[i - numfrags(fs, osize)]--; - if (i != frags) - cgp->cg_frsum[i - frags]++; - for (i = numfrags(fs, osize); i < frags; i++) { - clrbit(blksfree, bno + i); - cgp->cg_cs.cs_nffree--; - fs->fs_cstotal.cs_nffree--; - fs->fs_cs(fs, cg).cs_nffree--; - } - fs->fs_fmod = 1; - if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_blkmapdep(bp, fs, bprev); - if (fs->fs_active != 0) - atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); - bdwrite(bp); - return (bprev); -} - -/* - * Determine whether a block can be allocated. - * - * Check to see if a block of the appropriate size is available, - * and if it is, allocate it. - */ -static ufs2_daddr_t -ffs_alloccg(ip, cg, bpref, size) - struct inode *ip; - int cg; - ufs2_daddr_t bpref; - int size; -{ - struct fs *fs; - struct cg *cgp; - struct buf *bp; - ufs1_daddr_t bno; - ufs2_daddr_t blkno; - int i, allocsiz, error, frags; - u_int8_t *blksfree; - - fs = ip->i_fs; - if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) - return (0); - error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), - (int)fs->fs_cgsize, NOCRED, &bp); - if (error) { - brelse(bp); - return (0); - } - cgp = (struct cg *)bp->b_data; - if (!cg_chkmagic(cgp) || - (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { - brelse(bp); - return (0); - } - bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_old_time = cgp->cg_time = time_second; - if (size == fs->fs_bsize) { - blkno = ffs_alloccgblk(ip, bp, bpref); - if (fs->fs_active != 0) - atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); - bdwrite(bp); - return (blkno); - } - /* - * check to see if any fragments are already available - * allocsiz is the size which will be allocated, hacking - * it down to a smaller size if necessary - */ - blksfree = cg_blksfree(cgp); - frags = numfrags(fs, size); - for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) - if (cgp->cg_frsum[allocsiz] != 0) - break; - if (allocsiz == fs->fs_frag) { - /* - * no fragments were available, so a block will be - * allocated, and hacked up - */ - if (cgp->cg_cs.cs_nbfree == 0) { - brelse(bp); - return (0); - } - blkno = ffs_alloccgblk(ip, bp, bpref); - bno = dtogd(fs, blkno); - for (i = frags; i < fs->fs_frag; i++) - setbit(blksfree, bno + i); - i = fs->fs_frag - frags; - cgp->cg_cs.cs_nffree += i; - fs->fs_cstotal.cs_nffree += i; - fs->fs_cs(fs, cg).cs_nffree += i; - fs->fs_fmod = 1; - cgp->cg_frsum[i]++; - if (fs->fs_active != 0) - atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); - bdwrite(bp); - return (blkno); - } - bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); - if (bno < 0) { - brelse(bp); - return (0); - } - for (i = 0; i < frags; i++) - clrbit(blksfree, bno + i); - cgp->cg_cs.cs_nffree -= frags; - fs->fs_cstotal.cs_nffree -= frags; - fs->fs_cs(fs, cg).cs_nffree -= frags; - fs->fs_fmod = 1; - cgp->cg_frsum[allocsiz]--; - if (frags != allocsiz) - cgp->cg_frsum[allocsiz - frags]++; - blkno = cg * fs->fs_fpg + bno; - if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_blkmapdep(bp, fs, blkno); - if (fs->fs_active != 0) - atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); - bdwrite(bp); - return (blkno); -} - -/* - * Allocate a block in a cylinder group. - * - * This algorithm implements the following policy: - * 1) allocate the requested block. - * 2) allocate a rotationally optimal block in the same cylinder. - * 3) allocate the next available block on the block rotor for the - * specified cylinder group. - * Note that this routine only allocates fs_bsize blocks; these - * blocks may be fragmented by the routine that allocates them. - */ -static ufs2_daddr_t -ffs_alloccgblk(ip, bp, bpref) - struct inode *ip; - struct buf *bp; - ufs2_daddr_t bpref; -{ - struct fs *fs; - struct cg *cgp; - ufs1_daddr_t bno; - ufs2_daddr_t blkno; - u_int8_t *blksfree; - - fs = ip->i_fs; - cgp = (struct cg *)bp->b_data; - blksfree = cg_blksfree(cgp); - if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { - bpref = cgp->cg_rotor; - } else { - bpref = blknum(fs, bpref); - bno = dtogd(fs, bpref); - /* - * if the requested block is available, use it - */ - if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) - goto gotit; - } - /* - * Take the next available block in this cylinder group. - */ - bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); - if (bno < 0) - return (0); - cgp->cg_rotor = bno; -gotit: - blkno = fragstoblks(fs, bno); - ffs_clrblock(fs, blksfree, (long)blkno); - ffs_clusteracct(fs, cgp, blkno, -1); - cgp->cg_cs.cs_nbfree--; - fs->fs_cstotal.cs_nbfree--; - fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; - fs->fs_fmod = 1; - blkno = cgp->cg_cgx * fs->fs_fpg + bno; - if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_blkmapdep(bp, fs, blkno); - return (blkno); -} - -/* - * Determine whether a cluster can be allocated. - * - * We do not currently check for optimal rotational layout if there - * are multiple choices in the same cylinder group. Instead we just - * take the first one that we find following bpref. - */ -static ufs2_daddr_t -ffs_clusteralloc(ip, cg, bpref, len) - struct inode *ip; - int cg; - ufs2_daddr_t bpref; - int len; -{ - struct fs *fs; - struct cg *cgp; - struct buf *bp; - int i, run, bit, map, got; - ufs2_daddr_t bno; - u_char *mapp; - int32_t *lp; - u_int8_t *blksfree; - - fs = ip->i_fs; - if (fs->fs_maxcluster[cg] < len) - return (0); - if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, - NOCRED, &bp)) - goto fail; - cgp = (struct cg *)bp->b_data; - if (!cg_chkmagic(cgp)) - goto fail; - bp->b_xflags |= BX_BKGRDWRITE; - /* - * Check to see if a cluster of the needed size (or bigger) is - * available in this cylinder group. - */ - lp = &cg_clustersum(cgp)[len]; - for (i = len; i <= fs->fs_contigsumsize; i++) - if (*lp++ > 0) - break; - if (i > fs->fs_contigsumsize) { - /* - * This is the first time looking for a cluster in this - * cylinder group. Update the cluster summary information - * to reflect the true maximum sized cluster so that - * future cluster allocation requests can avoid reading - * the cylinder group map only to find no clusters. - */ - lp = &cg_clustersum(cgp)[len - 1]; - for (i = len - 1; i > 0; i--) - if (*lp-- > 0) - break; - fs->fs_maxcluster[cg] = i; - goto fail; - } - /* - * Search the cluster map to find a big enough cluster. - * We take the first one that we find, even if it is larger - * than we need as we prefer to get one close to the previous - * block allocation. We do not search before the current - * preference point as we do not want to allocate a block - * that is allocated before the previous one (as we will - * then have to wait for another pass of the elevator - * algorithm before it will be read). We prefer to fail and - * be recalled to try an allocation in the next cylinder group. - */ - if (dtog(fs, bpref) != cg) - bpref = 0; - else - bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); - mapp = &cg_clustersfree(cgp)[bpref / NBBY]; - map = *mapp++; - bit = 1 << (bpref % NBBY); - for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { - if ((map & bit) == 0) { - run = 0; - } else { - run++; - if (run == len) - break; - } - if ((got & (NBBY - 1)) != (NBBY - 1)) { - bit <<= 1; - } else { - map = *mapp++; - bit = 1; - } - } - if (got >= cgp->cg_nclusterblks) - goto fail; - /* - * Allocate the cluster that we have found. - */ - blksfree = cg_blksfree(cgp); - for (i = 1; i <= len; i++) - if (!ffs_isblock(fs, blksfree, got - run + i)) - panic("ffs_clusteralloc: map mismatch"); - bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); - if (dtog(fs, bno) != cg) - panic("ffs_clusteralloc: allocated out of group"); - len = blkstofrags(fs, len); - for (i = 0; i < len; i += fs->fs_frag) - if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) - panic("ffs_clusteralloc: lost block"); - if (fs->fs_active != 0) - atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); - bdwrite(bp); - return (bno); - -fail: - brelse(bp); - return (0); -} - -/* - * Determine whether an inode can be allocated. - * - * Check to see if an inode is available, and if it is, - * allocate it using the following policy: - * 1) allocate the requested inode. - * 2) allocate the next available inode after the requested - * inode in the specified cylinder group. - */ -static ufs2_daddr_t -ffs_nodealloccg(ip, cg, ipref, mode) - struct inode *ip; - int cg; - ufs2_daddr_t ipref; - int mode; -{ - struct fs *fs; - struct cg *cgp; - struct buf *bp, *ibp; - u_int8_t *inosused; - struct ufs2_dinode *dp2; - int error, start, len, loc, map, i; - - fs = ip->i_fs; - if (fs->fs_cs(fs, cg).cs_nifree == 0) - return (0); - error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), - (int)fs->fs_cgsize, NOCRED, &bp); - if (error) { - brelse(bp); - return (0); - } - cgp = (struct cg *)bp->b_data; - if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { - brelse(bp); - return (0); - } - bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_old_time = cgp->cg_time = time_second; - inosused = cg_inosused(cgp); - if (ipref) { - ipref %= fs->fs_ipg; - if (isclr(inosused, ipref)) - goto gotit; - } - start = cgp->cg_irotor / NBBY; - len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); - loc = skpc(0xff, len, &inosused[start]); - if (loc == 0) { - len = start + 1; - start = 0; - loc = skpc(0xff, len, &inosused[0]); - if (loc == 0) { - printf("cg = %d, irotor = %ld, fs = %s\n", - cg, (long)cgp->cg_irotor, fs->fs_fsmnt); - panic("ffs_nodealloccg: map corrupted"); - /* NOTREACHED */ - } - } - i = start + len - loc; - map = inosused[i]; - ipref = i * NBBY; - for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { - if ((map & i) == 0) { - cgp->cg_irotor = ipref; - goto gotit; - } - } - printf("fs = %s\n", fs->fs_fsmnt); - panic("ffs_nodealloccg: block not in map"); - /* NOTREACHED */ -gotit: - if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); - setbit(inosused, ipref); - cgp->cg_cs.cs_nifree--; - fs->fs_cstotal.cs_nifree--; - fs->fs_cs(fs, cg).cs_nifree--; - fs->fs_fmod = 1; - if ((mode & IFMT) == IFDIR) { - cgp->cg_cs.cs_ndir++; - fs->fs_cstotal.cs_ndir++; - fs->fs_cs(fs, cg).cs_ndir++; - } - /* - * Check to see if we need to initialize more inodes. - */ - if (fs->fs_magic == FS_UFS2_MAGIC && - ipref + INOPB(fs) > cgp->cg_initediblk && - cgp->cg_initediblk < cgp->cg_niblk) { - ibp = getblk(ip->i_devvp, fsbtodb(fs, - ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), - (int)fs->fs_bsize, 0, 0, 0); - bzero(ibp->b_data, (int)fs->fs_bsize); - dp2 = (struct ufs2_dinode *)(ibp->b_data); - for (i = 0; i < INOPB(fs); i++) { - dp2->di_gen = arc4random() / 2 + 1; - dp2++; - } - bawrite(ibp); - cgp->cg_initediblk += INOPB(fs); - } - if (fs->fs_active != 0) - atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); - bdwrite(bp); - return (cg * fs->fs_ipg + ipref); -} - -/* - * check if a block is free - */ -static int -ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) -{ - - switch ((int)fs->fs_frag) { - case 8: - return (cp[h] == 0); - case 4: - return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); - case 2: - return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); - case 1: - return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); - default: - panic("ffs_isfreeblock"); - } - return (0); -} - -/* - * Free a block or fragment. - * - * The specified block or fragment is placed back in the - * free map. If a fragment is deallocated, a possible - * block reassembly is checked. - */ -void -ffs_blkfree(fs, devvp, bno, size, inum) - struct fs *fs; - struct vnode *devvp; - ufs2_daddr_t bno; - long size; - ino_t inum; -{ - struct cg *cgp; - struct buf *bp; - ufs1_daddr_t fragno, cgbno; - ufs2_daddr_t cgblkno; - int i, cg, blk, frags, bbase; - u_int8_t *blksfree; - dev_t dev; - - cg = dtog(fs, bno); - if (devvp->v_type != VCHR) { - /* devvp is a snapshot */ - dev = VTOI(devvp)->i_devvp->v_rdev; - cgblkno = fragstoblks(fs, cgtod(fs, cg)); - } else { - /* devvp is a normal disk device */ - dev = devvp->v_rdev; - cgblkno = fsbtodb(fs, cgtod(fs, cg)); - ASSERT_VOP_LOCKED(devvp, "ffs_blkfree"); - if ((devvp->v_vflag & VV_COPYONWRITE) && - ffs_snapblkfree(fs, devvp, bno, size, inum)) - return; - VOP_FREEBLKS(devvp, fsbtodb(fs, bno), size); - } -#ifdef DIAGNOSTIC - if (dev->si_mountpoint && - (dev->si_mountpoint->mnt_kern_flag & MNTK_SUSPENDED)) - panic("ffs_blkfree: deallocation on suspended filesystem"); - if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || - fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { - printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n", - devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, - size, fs->fs_fsmnt); - panic("ffs_blkfree: bad size"); - } -#endif - if ((u_int)bno >= fs->fs_size) { - printf("bad block %jd, ino %lu\n", (intmax_t)bno, - (u_long)inum); - ffs_fserr(fs, inum, "bad block"); - return; - } - if (bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp)) { - brelse(bp); - return; - } - cgp = (struct cg *)bp->b_data; - if (!cg_chkmagic(cgp)) { - brelse(bp); - return; - } - bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_old_time = cgp->cg_time = time_second; - cgbno = dtogd(fs, bno); - blksfree = cg_blksfree(cgp); - if (size == fs->fs_bsize) { - fragno = fragstoblks(fs, cgbno); - if (!ffs_isfreeblock(fs, blksfree, fragno)) { - if (devvp->v_type != VCHR) { - /* devvp is a snapshot */ - brelse(bp); - return; - } - printf("dev = %s, block = %jd, fs = %s\n", - devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); - panic("ffs_blkfree: freeing free block"); - } - ffs_setblock(fs, blksfree, fragno); - ffs_clusteracct(fs, cgp, fragno, 1); - cgp->cg_cs.cs_nbfree++; - fs->fs_cstotal.cs_nbfree++; - fs->fs_cs(fs, cg).cs_nbfree++; - } else { - bbase = cgbno - fragnum(fs, cgbno); - /* - * decrement the counts associated with the old frags - */ - blk = blkmap(fs, blksfree, bbase); - ffs_fragacct(fs, blk, cgp->cg_frsum, -1); - /* - * deallocate the fragment - */ - frags = numfrags(fs, size); - for (i = 0; i < frags; i++) { - if (isset(blksfree, cgbno + i)) { - printf("dev = %s, block = %jd, fs = %s\n", - devtoname(dev), (intmax_t)(bno + i), - fs->fs_fsmnt); - panic("ffs_blkfree: freeing free frag"); - } - setbit(blksfree, cgbno + i); - } - cgp->cg_cs.cs_nffree += i; - fs->fs_cstotal.cs_nffree += i; - fs->fs_cs(fs, cg).cs_nffree += i; - /* - * add back in counts associated with the new frags - */ - blk = blkmap(fs, blksfree, bbase); - ffs_fragacct(fs, blk, cgp->cg_frsum, 1); - /* - * if a complete block has been reassembled, account for it - */ - fragno = fragstoblks(fs, bbase); - if (ffs_isblock(fs, blksfree, fragno)) { - cgp->cg_cs.cs_nffree -= fs->fs_frag; - fs->fs_cstotal.cs_nffree -= fs->fs_frag; - fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; - ffs_clusteracct(fs, cgp, fragno, 1); - cgp->cg_cs.cs_nbfree++; - fs->fs_cstotal.cs_nbfree++; - fs->fs_cs(fs, cg).cs_nbfree++; - } - } - fs->fs_fmod = 1; - if (fs->fs_active != 0) - atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); - bdwrite(bp); -} - -#ifdef DIAGNOSTIC -/* - * Verify allocation of a block or fragment. Returns true if block or - * fragment is allocated, false if it is free. - */ -static int -ffs_checkblk(ip, bno, size) - struct inode *ip; - ufs2_daddr_t bno; - long size; -{ - struct fs *fs; - struct cg *cgp; - struct buf *bp; - ufs1_daddr_t cgbno; - int i, error, frags, free; - u_int8_t *blksfree; - - fs = ip->i_fs; - if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { - printf("bsize = %ld, size = %ld, fs = %s\n", - (long)fs->fs_bsize, size, fs->fs_fsmnt); - panic("ffs_checkblk: bad size"); - } - if ((u_int)bno >= fs->fs_size) - panic("ffs_checkblk: bad block %jd", (intmax_t)bno); - error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), - (int)fs->fs_cgsize, NOCRED, &bp); - if (error) - panic("ffs_checkblk: cg bread failed"); - cgp = (struct cg *)bp->b_data; - if (!cg_chkmagic(cgp)) - panic("ffs_checkblk: cg magic mismatch"); - bp->b_xflags |= BX_BKGRDWRITE; - blksfree = cg_blksfree(cgp); - cgbno = dtogd(fs, bno); - if (size == fs->fs_bsize) { - free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno)); - } else { - frags = numfrags(fs, size); - for (free = 0, i = 0; i < frags; i++) - if (isset(blksfree, cgbno + i)) - free++; - if (free != 0 && free != frags) - panic("ffs_checkblk: partially free fragment"); - } - brelse(bp); - return (!free); -} -#endif /* DIAGNOSTIC */ - -/* - * Free an inode. - */ -int -ffs_vfree(pvp, ino, mode) - struct vnode *pvp; - ino_t ino; - int mode; -{ - if (DOINGSOFTDEP(pvp)) { - softdep_freefile(pvp, ino, mode); - return (0); - } - return (ffs_freefile(VTOI(pvp)->i_fs, VTOI(pvp)->i_devvp, ino, mode)); -} - -/* - * Do the actual free operation. - * The specified inode is placed back in the free map. - */ -int -ffs_freefile(fs, devvp, ino, mode) - struct fs *fs; - struct vnode *devvp; - ino_t ino; - int mode; -{ - struct cg *cgp; - struct buf *bp; - ufs2_daddr_t cgbno; - int error, cg; - u_int8_t *inosused; - dev_t dev; - - cg = ino_to_cg(fs, ino); - if (devvp->v_type != VCHR) { - /* devvp is a snapshot */ - dev = VTOI(devvp)->i_devvp->v_rdev; - cgbno = fragstoblks(fs, cgtod(fs, cg)); - } else { - /* devvp is a normal disk device */ - dev = devvp->v_rdev; - cgbno = fsbtodb(fs, cgtod(fs, cg)); - } - if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) - panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s", - devtoname(dev), (u_long)ino, fs->fs_fsmnt); - if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { - brelse(bp); - return (error); - } - cgp = (struct cg *)bp->b_data; - if (!cg_chkmagic(cgp)) { - brelse(bp); - return (0); - } - bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_old_time = cgp->cg_time = time_second; - inosused = cg_inosused(cgp); - ino %= fs->fs_ipg; - if (isclr(inosused, ino)) { - printf("dev = %s, ino = %lu, fs = %s\n", devtoname(dev), - (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt); - if (fs->fs_ronly == 0) - panic("ffs_freefile: freeing free inode"); - } - clrbit(inosused, ino); - if (ino < cgp->cg_irotor) - cgp->cg_irotor = ino; - cgp->cg_cs.cs_nifree++; - fs->fs_cstotal.cs_nifree++; - fs->fs_cs(fs, cg).cs_nifree++; - if ((mode & IFMT) == IFDIR) { - cgp->cg_cs.cs_ndir--; - fs->fs_cstotal.cs_ndir--; - fs->fs_cs(fs, cg).cs_ndir--; - } - fs->fs_fmod = 1; - if (fs->fs_active != 0) - atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); - bdwrite(bp); - return (0); -} - -/* - * Check to see if a file is free. - */ -int -ffs_checkfreefile(fs, devvp, ino) - struct fs *fs; - struct vnode *devvp; - ino_t ino; -{ - struct cg *cgp; - struct buf *bp; - ufs2_daddr_t cgbno; - int ret, cg; - u_int8_t *inosused; - - cg = ino_to_cg(fs, ino); - if (devvp->v_type != VCHR) { - /* devvp is a snapshot */ - cgbno = fragstoblks(fs, cgtod(fs, cg)); - } else { - /* devvp is a normal disk device */ - cgbno = fsbtodb(fs, cgtod(fs, cg)); - } - if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) - return (1); - if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) { - brelse(bp); - return (1); - } - cgp = (struct cg *)bp->b_data; - if (!cg_chkmagic(cgp)) { - brelse(bp); - return (1); - } - inosused = cg_inosused(cgp); - ino %= fs->fs_ipg; - ret = isclr(inosused, ino); - brelse(bp); - return (ret); -} - -/* - * Find a block of the specified size in the specified cylinder group. - * - * It is a panic if a request is made to find a block if none are - * available. - */ -static ufs1_daddr_t -ffs_mapsearch(fs, cgp, bpref, allocsiz) - struct fs *fs; - struct cg *cgp; - ufs2_daddr_t bpref; - int allocsiz; -{ - ufs1_daddr_t bno; - int start, len, loc, i; - int blk, field, subfield, pos; - u_int8_t *blksfree; - - /* - * find the fragment by searching through the free block - * map for an appropriate bit pattern - */ - if (bpref) - start = dtogd(fs, bpref) / NBBY; - else - start = cgp->cg_frotor / NBBY; - blksfree = cg_blksfree(cgp); - len = howmany(fs->fs_fpg, NBBY) - start; - loc = scanc((u_int)len, (u_char *)&blksfree[start], - (u_char *)fragtbl[fs->fs_frag], - (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); - if (loc == 0) { - len = start + 1; - start = 0; - loc = scanc((u_int)len, (u_char *)&blksfree[0], - (u_char *)fragtbl[fs->fs_frag], - (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); - if (loc == 0) { - printf("start = %d, len = %d, fs = %s\n", - start, len, fs->fs_fsmnt); - panic("ffs_alloccg: map corrupted"); - /* NOTREACHED */ - } - } - bno = (start + len - loc) * NBBY; - cgp->cg_frotor = bno; - /* - * found the byte in the map - * sift through the bits to find the selected frag - */ - for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { - blk = blkmap(fs, blksfree, bno); - blk <<= 1; - field = around[allocsiz]; - subfield = inside[allocsiz]; - for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { - if ((blk & field) == subfield) - return (bno + pos); - field <<= 1; - subfield <<= 1; - } - } - printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); - panic("ffs_alloccg: block not in map"); - return (-1); -} - -/* - * Update the cluster map because of an allocation or free. - * - * Cnt == 1 means free; cnt == -1 means allocating. - */ -void -ffs_clusteracct(fs, cgp, blkno, cnt) - struct fs *fs; - struct cg *cgp; - ufs1_daddr_t blkno; - int cnt; -{ - int32_t *sump; - int32_t *lp; - u_char *freemapp, *mapp; - int i, start, end, forw, back, map, bit; - - if (fs->fs_contigsumsize <= 0) - return; - freemapp = cg_clustersfree(cgp); - sump = cg_clustersum(cgp); - /* - * Allocate or clear the actual block. - */ - if (cnt > 0) - setbit(freemapp, blkno); - else - clrbit(freemapp, blkno); - /* - * Find the size of the cluster going forward. - */ - start = blkno + 1; - end = start + fs->fs_contigsumsize; - if (end >= cgp->cg_nclusterblks) - end = cgp->cg_nclusterblks; - mapp = &freemapp[start / NBBY]; - map = *mapp++; - bit = 1 << (start % NBBY); - for (i = start; i < end; i++) { - if ((map & bit) == 0) - break; - if ((i & (NBBY - 1)) != (NBBY - 1)) { - bit <<= 1; - } else { - map = *mapp++; - bit = 1; - } - } - forw = i - start; - /* - * Find the size of the cluster going backward. - */ - start = blkno - 1; - end = start - fs->fs_contigsumsize; - if (end < 0) - end = -1; - mapp = &freemapp[start / NBBY]; - map = *mapp--; - bit = 1 << (start % NBBY); - for (i = start; i > end; i--) { - if ((map & bit) == 0) - break; - if ((i & (NBBY - 1)) != 0) { - bit >>= 1; - } else { - map = *mapp--; - bit = 1 << (NBBY - 1); - } - } - back = start - i; - /* - * Account for old cluster and the possibly new forward and - * back clusters. - */ - i = back + forw + 1; - if (i > fs->fs_contigsumsize) - i = fs->fs_contigsumsize; - sump[i] += cnt; - if (back > 0) - sump[back] -= cnt; - if (forw > 0) - sump[forw] -= cnt; - /* - * Update cluster summary information. - */ - lp = &sump[fs->fs_contigsumsize]; - for (i = fs->fs_contigsumsize; i > 0; i--) - if (*lp-- > 0) - break; - fs->fs_maxcluster[cgp->cg_cgx] = i; -} - -/* - * Fserr prints the name of a filesystem with an error diagnostic. - * - * The form of the error message is: - * fs: error message - */ -static void -ffs_fserr(fs, inum, cp) - struct fs *fs; - ino_t inum; - char *cp; -{ - struct thread *td = curthread; /* XXX */ - struct proc *p = td->td_proc; - - log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n", - p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp); -} - -/* - * This function provides the capability for the fsck program to - * update an active filesystem. Six operations are provided: - * - * adjrefcnt(inode, amt) - adjusts the reference count on the - * specified inode by the specified amount. Under normal - * operation the count should always go down. Decrementing - * the count to zero will cause the inode to be freed. - * adjblkcnt(inode, amt) - adjust the number of blocks used to - * by the specifed amount. - * freedirs(inode, count) - directory inodes [inode..inode + count - 1] - * are marked as free. Inodes should never have to be marked - * as in use. - * freefiles(inode, count) - file inodes [inode..inode + count - 1] - * are marked as free. Inodes should never have to be marked - * as in use. - * freeblks(blockno, size) - blocks [blockno..blockno + size - 1] - * are marked as free. Blocks should never have to be marked - * as in use. - * setflags(flags, set/clear) - the fs_flags field has the specified - * flags set (second parameter +1) or cleared (second parameter -1). - */ - -static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS); - -SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT, - 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count"); - -SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR, - sysctl_ffs_fsck, "Adjust Inode Used Blocks Count"); - -SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR, - sysctl_ffs_fsck, "Free Range of Directory Inodes"); - -SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR, - sysctl_ffs_fsck, "Free Range of File Inodes"); - -SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR, - sysctl_ffs_fsck, "Free Range of Blocks"); - -SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR, - sysctl_ffs_fsck, "Change Filesystem Flags"); - -#ifdef DEBUG -static int fsckcmds = 0; -SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); -#endif /* DEBUG */ - -static int -sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) -{ - struct fsck_cmd cmd; - struct ufsmount *ump; - struct vnode *vp; - struct inode *ip; - struct mount *mp; - struct fs *fs; - ufs2_daddr_t blkno; - long blkcnt, blksize; - struct file *fp; - int filetype, error; - - if (req->newlen > sizeof cmd) - return (EBADRPC); - if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0) - return (error); - if (cmd.version != FFS_CMD_VERSION) - return (ERPCMISMATCH); - if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) - return (error); - vn_start_write(fp->f_data, &mp, V_WAIT); - if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { - vn_finished_write(mp); - fdrop(fp, curthread); - return (EINVAL); - } - if (mp->mnt_flag & MNT_RDONLY) { - vn_finished_write(mp); - fdrop(fp, curthread); - return (EROFS); - } - ump = VFSTOUFS(mp); - fs = ump->um_fs; - filetype = IFREG; - - switch (oidp->oid_number) { - - case FFS_SET_FLAGS: -#ifdef DEBUG - if (fsckcmds) - printf("%s: %s flags\n", mp->mnt_stat.f_mntonname, - cmd.size > 0 ? "set" : "clear"); -#endif /* DEBUG */ - if (cmd.size > 0) - fs->fs_flags |= (long)cmd.value; - else - fs->fs_flags &= ~(long)cmd.value; - break; - - case FFS_ADJ_REFCNT: -#ifdef DEBUG - if (fsckcmds) { - printf("%s: adjust inode %jd count by %jd\n", - mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, - (intmax_t)cmd.size); - } -#endif /* DEBUG */ - if ((error = VFS_VGET(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) - break; - ip = VTOI(vp); - ip->i_nlink += cmd.size; - DIP(ip, i_nlink) = ip->i_nlink; - ip->i_effnlink += cmd.size; - ip->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(vp)) - softdep_change_linkcnt(ip); - vput(vp); - break; - - case FFS_ADJ_BLKCNT: -#ifdef DEBUG - if (fsckcmds) { - printf("%s: adjust inode %jd block count by %jd\n", - mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, - (intmax_t)cmd.size); - } -#endif /* DEBUG */ - if ((error = VFS_VGET(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) - break; - ip = VTOI(vp); - DIP(ip, i_blocks) += cmd.size; - ip->i_flag |= IN_CHANGE; - vput(vp); - break; - - case FFS_DIR_FREE: - filetype = IFDIR; - /* fall through */ - - case FFS_FILE_FREE: -#ifdef DEBUG - if (fsckcmds) { - if (cmd.size == 1) - printf("%s: free %s inode %d\n", - mp->mnt_stat.f_mntonname, - filetype == IFDIR ? "directory" : "file", - (ino_t)cmd.value); - else - printf("%s: free %s inodes %d-%d\n", - mp->mnt_stat.f_mntonname, - filetype == IFDIR ? "directory" : "file", - (ino_t)cmd.value, - (ino_t)(cmd.value + cmd.size - 1)); - } -#endif /* DEBUG */ - while (cmd.size > 0) { - if ((error = ffs_freefile(fs, ump->um_devvp, cmd.value, - filetype))) - break; - cmd.size -= 1; - cmd.value += 1; - } - break; - - case FFS_BLK_FREE: -#ifdef DEBUG - if (fsckcmds) { - if (cmd.size == 1) - printf("%s: free block %jd\n", - mp->mnt_stat.f_mntonname, - (intmax_t)cmd.value); - else - printf("%s: free blocks %jd-%jd\n", - mp->mnt_stat.f_mntonname, - (intmax_t)cmd.value, - (intmax_t)cmd.value + cmd.size - 1); - } -#endif /* DEBUG */ - blkno = cmd.value; - blkcnt = cmd.size; - blksize = fs->fs_frag - (blkno % fs->fs_frag); - while (blkcnt > 0) { - if (blksize > blkcnt) - blksize = blkcnt; - ffs_blkfree(fs, ump->um_devvp, blkno, - blksize * fs->fs_fsize, ROOTINO); - blkno += blksize; - blkcnt -= blksize; - blksize = fs->fs_frag; - } - break; - - default: -#ifdef DEBUG - if (fsckcmds) { - printf("Invalid request %d from fsck\n", - oidp->oid_number); - } -#endif /* DEBUG */ - error = EINVAL; - break; - - } - fdrop(fp, curthread); - vn_finished_write(mp); - return (error); -} -#endif diff --git a/src/sys/ufs/ffs/ffs_balloc.c b/src/sys/ufs/ffs/ffs_balloc.c deleted file mode 100644 index 1f5b956..0000000 --- a/src/sys/ufs/ffs/ffs_balloc.c +++ /dev/null @@ -1,889 +0,0 @@ -#if 0 -/* - * Copyright (c) 2002 Networks Associates Technology, Inc. - * All rights reserved. - * - * This software was developed for the FreeBSD Project by Marshall - * Kirk McKusick and Network Associates Laboratories, the Security - * Research Division of Network Associates, Inc. under DARPA/SPAWAR - * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS - * research program - * - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_balloc.c,v 1.43 2003/08/15 20:03:19 phk Exp $"); - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -/* - * Balloc defines the structure of filesystem storage - * by allocating the physical blocks on a device given - * the inode and the logical block number in a file. - * This is the allocation strategy for UFS1. Below is - * the allocation strategy for UFS2. - */ -int -ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, - struct ucred *cred, int flags, struct buf **bpp) -{ - struct inode *ip; - struct ufs1_dinode *dp; - ufs_lbn_t lbn, lastlbn; - struct fs *fs; - ufs1_daddr_t nb; - struct buf *bp, *nbp; - struct indir indirs[NIADDR + 2]; - int deallocated, osize, nsize, num, i, error; - ufs2_daddr_t newb; - ufs1_daddr_t *bap, pref; - ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; - int unwindidx = -1; - struct thread *td = curthread; /* XXX */ - - ip = VTOI(vp); - dp = ip->i_din1; - fs = ip->i_fs; - lbn = lblkno(fs, startoffset); - size = blkoff(fs, startoffset) + size; - if (size > fs->fs_bsize) - panic("ffs_balloc_ufs1: blk too big"); - *bpp = NULL; - if (flags & IO_EXT) - return (EOPNOTSUPP); - if (lbn < 0) - return (EFBIG); - - /* - * If the next write will extend the file into a new block, - * and the file is currently composed of a fragment - * this fragment has to be extended to be a full block. - */ - lastlbn = lblkno(fs, ip->i_size); - if (lastlbn < NDADDR && lastlbn < lbn) { - nb = lastlbn; - osize = blksize(fs, ip, nb); - if (osize < fs->fs_bsize && osize > 0) { - error = ffs_realloccg(ip, nb, dp->di_db[nb], - ffs_blkpref_ufs1(ip, lastlbn, (int)nb, - &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp); - if (error) - return (error); - if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, nb, - dbtofsb(fs, bp->b_blkno), dp->di_db[nb], - fs->fs_bsize, osize, bp); - ip->i_size = smalllblktosize(fs, nb + 1); - dp->di_size = ip->i_size; - dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - if (flags & IO_SYNC) - bwrite(bp); - else - bawrite(bp); - } - } - /* - * The first NDADDR blocks are direct blocks - */ - if (lbn < NDADDR) { - if (flags & BA_METAONLY) - panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); - nb = dp->di_db[lbn]; - if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { - error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - bp->b_blkno = fsbtodb(fs, nb); - *bpp = bp; - return (0); - } - if (nb != 0) { - /* - * Consider need to reallocate a fragment. - */ - osize = fragroundup(fs, blkoff(fs, ip->i_size)); - nsize = fragroundup(fs, size); - if (nsize <= osize) { - error = bread(vp, lbn, osize, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - bp->b_blkno = fsbtodb(fs, nb); - } else { - error = ffs_realloccg(ip, lbn, dp->di_db[lbn], - ffs_blkpref_ufs1(ip, lbn, (int)lbn, - &dp->di_db[0]), osize, nsize, cred, &bp); - if (error) - return (error); - if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, lbn, - dbtofsb(fs, bp->b_blkno), nb, - nsize, osize, bp); - } - } else { - if (ip->i_size < smalllblktosize(fs, lbn + 1)) - nsize = fragroundup(fs, size); - else - nsize = fs->fs_bsize; - error = ffs_alloc(ip, lbn, - ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), - nsize, cred, &newb); - if (error) - return (error); - bp = getblk(vp, lbn, nsize, 0, 0, 0); - bp->b_blkno = fsbtodb(fs, newb); - if (flags & BA_CLRBUF) - vfs_bio_clrbuf(bp); - if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, lbn, newb, 0, - nsize, 0, bp); - } - dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bpp = bp; - return (0); - } - /* - * Determine the number of levels of indirection. - */ - pref = 0; - if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) - return(error); -#ifdef DIAGNOSTIC - if (num < 1) - panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); -#endif - /* - * Fetch the first indirect block allocating if necessary. - */ - --num; - nb = dp->di_ib[indirs[0].in_off]; - allocib = NULL; - allocblk = allociblk; - if (nb == 0) { - pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); - if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, - cred, &newb)) != 0) - return (error); - nb = newb; - *allocblk++ = nb; - bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); - bp->b_blkno = fsbtodb(fs, nb); - vfs_bio_clrbuf(bp); - if (DOINGSOFTDEP(vp)) { - softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, - newb, 0, fs->fs_bsize, 0, bp); - bdwrite(bp); - } else { - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if (DOINGASYNC(vp)) - bdwrite(bp); - else if ((error = bwrite(bp)) != 0) - goto fail; - } - allocib = &dp->di_ib[indirs[0].in_off]; - *allocib = nb; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - /* - * Fetch through the indirect blocks, allocating as necessary. - */ - for (i = 1;;) { - error = bread(vp, - indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); - if (error) { - brelse(bp); - goto fail; - } - bap = (ufs1_daddr_t *)bp->b_data; - nb = bap[indirs[i].in_off]; - if (i == num) - break; - i += 1; - if (nb != 0) { - bqrelse(bp); - continue; - } - if (pref == 0) - pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); - if ((error = - ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { - brelse(bp); - goto fail; - } - nb = newb; - *allocblk++ = nb; - nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); - nbp->b_blkno = fsbtodb(fs, nb); - vfs_bio_clrbuf(nbp); - if (DOINGSOFTDEP(vp)) { - softdep_setup_allocindir_meta(nbp, ip, bp, - indirs[i - 1].in_off, nb); - bdwrite(nbp); - } else { - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if ((error = bwrite(nbp)) != 0) { - brelse(bp); - goto fail; - } - } - bap[indirs[i - 1].in_off] = nb; - if (allocib == NULL && unwindidx < 0) - unwindidx = i - 1; - /* - * If required, write synchronously, otherwise use - * delayed write. - */ - if (flags & IO_SYNC) { - bwrite(bp); - } else { - if (bp->b_bufsize == fs->fs_bsize) - bp->b_flags |= B_CLUSTEROK; - bdwrite(bp); - } - } - /* - * If asked only for the indirect block, then return it. - */ - if (flags & BA_METAONLY) { - *bpp = bp; - return (0); - } - /* - * Get the data block, allocating if necessary. - */ - if (nb == 0) { - pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); - error = ffs_alloc(ip, - lbn, pref, (int)fs->fs_bsize, cred, &newb); - if (error) { - brelse(bp); - goto fail; - } - nb = newb; - *allocblk++ = nb; - nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); - nbp->b_blkno = fsbtodb(fs, nb); - if (flags & BA_CLRBUF) - vfs_bio_clrbuf(nbp); - if (DOINGSOFTDEP(vp)) - softdep_setup_allocindir_page(ip, lbn, bp, - indirs[i].in_off, nb, 0, nbp); - bap[indirs[i].in_off] = nb; - /* - * If required, write synchronously, otherwise use - * delayed write. - */ - if (flags & IO_SYNC) { - bwrite(bp); - } else { - if (bp->b_bufsize == fs->fs_bsize) - bp->b_flags |= B_CLUSTEROK; - bdwrite(bp); - } - *bpp = nbp; - return (0); - } - brelse(bp); - if (flags & BA_CLRBUF) { - int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; - if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { - error = cluster_read(vp, ip->i_size, lbn, - (int)fs->fs_bsize, NOCRED, - MAXBSIZE, seqcount, &nbp); - } else { - error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); - } - if (error) { - brelse(nbp); - goto fail; - } - } else { - nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); - nbp->b_blkno = fsbtodb(fs, nb); - } - *bpp = nbp; - return (0); -fail: - /* - * If we have failed to allocate any blocks, simply return the error. - * This is the usual case and avoids the need to fsync the file. - */ - if (allocblk == allociblk && allocib == NULL && unwindidx == -1) - return (error); - /* - * If we have failed part way through block allocation, we - * have to deallocate any indirect blocks that we have allocated. - * We have to fsync the file before we start to get rid of all - * of its dependencies so that we do not leave them dangling. - * We have to sync it at the end so that the soft updates code - * does not find any untracked changes. Although this is really - * slow, running out of disk space is not expected to be a common - * occurence. The error return from fsync is ignored as we already - * have an error to return to the user. - */ - (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); - for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { - ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number); - deallocated += fs->fs_bsize; - } - if (allocib != NULL) { - *allocib = 0; - } else if (unwindidx >= 0) { - int r; - - r = bread(vp, indirs[unwindidx].in_lbn, - (int)fs->fs_bsize, NOCRED, &bp); - if (r) { - panic("Could not unwind indirect block, error %d", r); - brelse(bp); - } else { - bap = (ufs1_daddr_t *)bp->b_data; - bap[indirs[unwindidx].in_off] = 0; - if (flags & IO_SYNC) { - bwrite(bp); - } else { - if (bp->b_bufsize == fs->fs_bsize) - bp->b_flags |= B_CLUSTEROK; - bdwrite(bp); - } - } - } - if (deallocated) { -#ifdef QUOTA - /* - * Restore user's disk quota because allocation failed. - */ - (void) chkdq(ip, -btodb(deallocated), cred, FORCE); -#endif - dp->di_blocks -= btodb(deallocated); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); - return (error); -} - -/* - * Balloc defines the structure of file system storage - * by allocating the physical blocks on a device given - * the inode and the logical block number in a file. - * This is the allocation strategy for UFS2. Above is - * the allocation strategy for UFS1. - */ -int -ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, - struct ucred *cred, int flags, struct buf **bpp) -{ - struct inode *ip; - struct ufs2_dinode *dp; - ufs_lbn_t lbn, lastlbn; - struct fs *fs; - struct buf *bp, *nbp; - struct indir indirs[NIADDR + 2]; - ufs2_daddr_t nb, newb, *bap, pref; - ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; - int deallocated, osize, nsize, num, i, error; - int unwindidx = -1; - struct thread *td = curthread; /* XXX */ - - ip = VTOI(vp); - dp = ip->i_din2; - fs = ip->i_fs; - lbn = lblkno(fs, startoffset); - size = blkoff(fs, startoffset) + size; - if (size > fs->fs_bsize) - panic("ffs_balloc_ufs2: blk too big"); - *bpp = NULL; - if (lbn < 0) - return (EFBIG); - - /* - * Check for allocating external data. - */ - if (flags & IO_EXT) { - if (lbn >= NXADDR) - return (EFBIG); - /* - * If the next write will extend the data into a new block, - * and the data is currently composed of a fragment - * this fragment has to be extended to be a full block. - */ - lastlbn = lblkno(fs, dp->di_extsize); - if (lastlbn < lbn) { - nb = lastlbn; - osize = sblksize(fs, dp->di_extsize, nb); - if (osize < fs->fs_bsize && osize > 0) { - error = ffs_realloccg(ip, -1 - nb, - dp->di_extb[nb], - ffs_blkpref_ufs2(ip, lastlbn, (int)nb, - &dp->di_extb[0]), osize, - (int)fs->fs_bsize, cred, &bp); - if (error) - return (error); - if (DOINGSOFTDEP(vp)) - softdep_setup_allocext(ip, nb, - dbtofsb(fs, bp->b_blkno), - dp->di_extb[nb], - fs->fs_bsize, osize, bp); - dp->di_extsize = smalllblktosize(fs, nb + 1); - dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); - bp->b_xflags |= BX_ALTDATA; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - if (flags & IO_SYNC) - bwrite(bp); - else - bawrite(bp); - } - } - /* - * All blocks are direct blocks - */ - if (flags & BA_METAONLY) - panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); - nb = dp->di_extb[lbn]; - if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { - error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - bp->b_blkno = fsbtodb(fs, nb); - bp->b_xflags |= BX_ALTDATA; - *bpp = bp; - return (0); - } - if (nb != 0) { - /* - * Consider need to reallocate a fragment. - */ - osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); - nsize = fragroundup(fs, size); - if (nsize <= osize) { - error = bread(vp, -1 - lbn, osize, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - bp->b_blkno = fsbtodb(fs, nb); - bp->b_xflags |= BX_ALTDATA; - } else { - error = ffs_realloccg(ip, -1 - lbn, - dp->di_extb[lbn], - ffs_blkpref_ufs2(ip, lbn, (int)lbn, - &dp->di_extb[0]), osize, nsize, cred, &bp); - if (error) - return (error); - bp->b_xflags |= BX_ALTDATA; - if (DOINGSOFTDEP(vp)) - softdep_setup_allocext(ip, lbn, - dbtofsb(fs, bp->b_blkno), nb, - nsize, osize, bp); - } - } else { - if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) - nsize = fragroundup(fs, size); - else - nsize = fs->fs_bsize; - error = ffs_alloc(ip, lbn, - ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), - nsize, cred, &newb); - if (error) - return (error); - bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0); - bp->b_blkno = fsbtodb(fs, newb); - bp->b_xflags |= BX_ALTDATA; - if (flags & BA_CLRBUF) - vfs_bio_clrbuf(bp); - if (DOINGSOFTDEP(vp)) - softdep_setup_allocext(ip, lbn, newb, 0, - nsize, 0, bp); - } - dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bpp = bp; - return (0); - } - /* - * If the next write will extend the file into a new block, - * and the file is currently composed of a fragment - * this fragment has to be extended to be a full block. - */ - lastlbn = lblkno(fs, ip->i_size); - if (lastlbn < NDADDR && lastlbn < lbn) { - nb = lastlbn; - osize = blksize(fs, ip, nb); - if (osize < fs->fs_bsize && osize > 0) { - error = ffs_realloccg(ip, nb, dp->di_db[nb], - ffs_blkpref_ufs2(ip, lastlbn, (int)nb, - &dp->di_db[0]), osize, (int)fs->fs_bsize, - cred, &bp); - if (error) - return (error); - if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, nb, - dbtofsb(fs, bp->b_blkno), - dp->di_db[nb], - fs->fs_bsize, osize, bp); - ip->i_size = smalllblktosize(fs, nb + 1); - dp->di_size = ip->i_size; - dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - if (flags & IO_SYNC) - bwrite(bp); - else - bawrite(bp); - } - } - /* - * The first NDADDR blocks are direct blocks - */ - if (lbn < NDADDR) { - if (flags & BA_METAONLY) - panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); - nb = dp->di_db[lbn]; - if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { - error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - bp->b_blkno = fsbtodb(fs, nb); - *bpp = bp; - return (0); - } - if (nb != 0) { - /* - * Consider need to reallocate a fragment. - */ - osize = fragroundup(fs, blkoff(fs, ip->i_size)); - nsize = fragroundup(fs, size); - if (nsize <= osize) { - error = bread(vp, lbn, osize, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - bp->b_blkno = fsbtodb(fs, nb); - } else { - error = ffs_realloccg(ip, lbn, dp->di_db[lbn], - ffs_blkpref_ufs2(ip, lbn, (int)lbn, - &dp->di_db[0]), osize, nsize, cred, &bp); - if (error) - return (error); - if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, lbn, - dbtofsb(fs, bp->b_blkno), nb, - nsize, osize, bp); - } - } else { - if (ip->i_size < smalllblktosize(fs, lbn + 1)) - nsize = fragroundup(fs, size); - else - nsize = fs->fs_bsize; - error = ffs_alloc(ip, lbn, - ffs_blkpref_ufs2(ip, lbn, (int)lbn, - &dp->di_db[0]), nsize, cred, &newb); - if (error) - return (error); - bp = getblk(vp, lbn, nsize, 0, 0, 0); - bp->b_blkno = fsbtodb(fs, newb); - if (flags & BA_CLRBUF) - vfs_bio_clrbuf(bp); - if (DOINGSOFTDEP(vp)) - softdep_setup_allocdirect(ip, lbn, newb, 0, - nsize, 0, bp); - } - dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bpp = bp; - return (0); - } - /* - * Determine the number of levels of indirection. - */ - pref = 0; - if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) - return(error); -#ifdef DIAGNOSTIC - if (num < 1) - panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); -#endif - /* - * Fetch the first indirect block allocating if necessary. - */ - --num; - nb = dp->di_ib[indirs[0].in_off]; - allocib = NULL; - allocblk = allociblk; - if (nb == 0) { - pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); - if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, - cred, &newb)) != 0) - return (error); - nb = newb; - *allocblk++ = nb; - bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); - bp->b_blkno = fsbtodb(fs, nb); - vfs_bio_clrbuf(bp); - if (DOINGSOFTDEP(vp)) { - softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, - newb, 0, fs->fs_bsize, 0, bp); - bdwrite(bp); - } else { - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if (DOINGASYNC(vp)) - bdwrite(bp); - else if ((error = bwrite(bp)) != 0) - goto fail; - } - allocib = &dp->di_ib[indirs[0].in_off]; - *allocib = nb; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - /* - * Fetch through the indirect blocks, allocating as necessary. - */ - for (i = 1;;) { - error = bread(vp, - indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); - if (error) { - brelse(bp); - goto fail; - } - bap = (ufs2_daddr_t *)bp->b_data; - nb = bap[indirs[i].in_off]; - if (i == num) - break; - i += 1; - if (nb != 0) { - bqrelse(bp); - continue; - } - if (pref == 0) - pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); - if ((error = - ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { - brelse(bp); - goto fail; - } - nb = newb; - *allocblk++ = nb; - nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); - nbp->b_blkno = fsbtodb(fs, nb); - vfs_bio_clrbuf(nbp); - if (DOINGSOFTDEP(vp)) { - softdep_setup_allocindir_meta(nbp, ip, bp, - indirs[i - 1].in_off, nb); - bdwrite(nbp); - } else { - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if ((error = bwrite(nbp)) != 0) { - brelse(bp); - goto fail; - } - } - bap[indirs[i - 1].in_off] = nb; - if (allocib == NULL && unwindidx < 0) - unwindidx = i - 1; - /* - * If required, write synchronously, otherwise use - * delayed write. - */ - if (flags & IO_SYNC) { - bwrite(bp); - } else { - if (bp->b_bufsize == fs->fs_bsize) - bp->b_flags |= B_CLUSTEROK; - bdwrite(bp); - } - } - /* - * If asked only for the indirect block, then return it. - */ - if (flags & BA_METAONLY) { - *bpp = bp; - return (0); - } - /* - * Get the data block, allocating if necessary. - */ - if (nb == 0) { - pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); - error = ffs_alloc(ip, - lbn, pref, (int)fs->fs_bsize, cred, &newb); - if (error) { - brelse(bp); - goto fail; - } - nb = newb; - *allocblk++ = nb; - nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); - nbp->b_blkno = fsbtodb(fs, nb); - if (flags & BA_CLRBUF) - vfs_bio_clrbuf(nbp); - if (DOINGSOFTDEP(vp)) - softdep_setup_allocindir_page(ip, lbn, bp, - indirs[i].in_off, nb, 0, nbp); - bap[indirs[i].in_off] = nb; - /* - * If required, write synchronously, otherwise use - * delayed write. - */ - if (flags & IO_SYNC) { - bwrite(bp); - } else { - if (bp->b_bufsize == fs->fs_bsize) - bp->b_flags |= B_CLUSTEROK; - bdwrite(bp); - } - *bpp = nbp; - return (0); - } - brelse(bp); - /* - * If requested clear invalid portions of the buffer. If we - * have to do a read-before-write (typical if BA_CLRBUF is set), - * try to do some read-ahead in the sequential case to reduce - * the number of I/O transactions. - */ - if (flags & BA_CLRBUF) { - int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; - if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { - error = cluster_read(vp, ip->i_size, lbn, - (int)fs->fs_bsize, NOCRED, - MAXBSIZE, seqcount, &nbp); - } else { - error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); - } - if (error) { - brelse(nbp); - goto fail; - } - } else { - nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); - nbp->b_blkno = fsbtodb(fs, nb); - } - *bpp = nbp; - return (0); -fail: - /* - * If we have failed to allocate any blocks, simply return the error. - * This is the usual case and avoids the need to fsync the file. - */ - if (allocblk == allociblk && allocib == NULL && unwindidx == -1) - return (error); - /* - * If we have failed part way through block allocation, we - * have to deallocate any indirect blocks that we have allocated. - * We have to fsync the file before we start to get rid of all - * of its dependencies so that we do not leave them dangling. - * We have to sync it at the end so that the soft updates code - * does not find any untracked changes. Although this is really - * slow, running out of disk space is not expected to be a common - * occurence. The error return from fsync is ignored as we already - * have an error to return to the user. - */ - (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); - for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { - ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number); - deallocated += fs->fs_bsize; - } - if (allocib != NULL) { - *allocib = 0; - } else if (unwindidx >= 0) { - int r; - - r = bread(vp, indirs[unwindidx].in_lbn, - (int)fs->fs_bsize, NOCRED, &bp); - if (r) { - panic("Could not unwind indirect block, error %d", r); - brelse(bp); - } else { - bap = (ufs2_daddr_t *)bp->b_data; - bap[indirs[unwindidx].in_off] = 0; - if (flags & IO_SYNC) { - bwrite(bp); - } else { - if (bp->b_bufsize == fs->fs_bsize) - bp->b_flags |= B_CLUSTEROK; - bdwrite(bp); - } - } - } - if (deallocated) { -#ifdef QUOTA - /* - * Restore user's disk quota because allocation failed. - */ - (void) chkdq(ip, -btodb(deallocated), cred, FORCE); -#endif - dp->di_blocks -= btodb(deallocated); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); - return (error); -} -#endif diff --git a/src/sys/ufs/ffs/ffs_extern.h b/src/sys/ufs/ffs/ffs_extern.h deleted file mode 100644 index a83660d..0000000 --- a/src/sys/ufs/ffs/ffs_extern.h +++ /dev/null @@ -1,134 +0,0 @@ -#if 0 -/*- - * Copyright (c) 1991, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_extern.h 8.6 (Berkeley) 3/30/95 - * $FreeBSD: src/sys/ufs/ffs/ffs_extern.h,v 1.55 2003/02/22 00:29:50 mckusick Exp $ - */ - -#ifndef _UFS_FFS_EXTERN_H -#define _UFS_FFS_EXTERN_H - -struct buf; -struct cg; -struct fid; -struct fs; -struct inode; -struct malloc_type; -struct mount; -struct thread; -struct sockaddr; -struct statfs; -struct ucred; -struct vnode; -struct vop_fsync_args; -struct vop_reallocblks_args; -struct vop_copyonwrite_args; - -int ffs_alloc(struct inode *, - ufs2_daddr_t, ufs2_daddr_t, int, struct ucred *, ufs2_daddr_t *); -int ffs_balloc_ufs1(struct vnode *a_vp, off_t a_startoffset, int a_size, - struct ucred *a_cred, int a_flags, struct buf **a_bpp); -int ffs_balloc_ufs2(struct vnode *a_vp, off_t a_startoffset, int a_size, - struct ucred *a_cred, int a_flags, struct buf **a_bpp); -int ffs_blkatoff(struct vnode *, off_t, char **, struct buf **); -void ffs_blkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t); -ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *); -ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *); -int ffs_checkfreefile(struct fs *, struct vnode *, ino_t); -void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); -void ffs_clusteracct (struct fs *, struct cg *, ufs1_daddr_t, int); -vfs_fhtovp_t ffs_fhtovp; -int ffs_flushfiles(struct mount *, int, struct thread *); -void ffs_fragacct(struct fs *, int, int32_t [], int); -int ffs_freefile(struct fs *, struct vnode *, ino_t, int); -int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); -void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t); -int ffs_mountroot(void); -vfs_mount_t ffs_mount; -int ffs_reallocblks(struct vop_reallocblks_args *); -int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t, - ufs2_daddr_t, int, int, struct ucred *, struct buf **); -void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); -int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t); -void ffs_snapremove(struct vnode *vp); -int ffs_snapshot(struct mount *mp, char *snapfile); -void ffs_snapshot_mount(struct mount *mp); -void ffs_snapshot_unmount(struct mount *mp); -vfs_statfs_t ffs_statfs; -vfs_sync_t ffs_sync; -int ffs_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *); -vfs_unmount_t ffs_unmount; -int ffs_update(struct vnode *, int); -int ffs_valloc(struct vnode *, int, struct ucred *, struct vnode **); - -int ffs_vfree(struct vnode *, ino_t, int); -vfs_vget_t ffs_vget; -vfs_vptofh_t ffs_vptofh; - -extern vop_t **ffs_vnodeop_p; -extern vop_t **ffs_specop_p; -extern vop_t **ffs_fifoop_p; - -/* - * Soft update function prototypes. - */ -void softdep_initialize(void); -void softdep_uninitialize(void); -int softdep_mount(struct vnode *, struct mount *, struct fs *, - struct ucred *); -int softdep_flushworklist(struct mount *, int *, struct thread *); -int softdep_flushfiles(struct mount *, int, struct thread *); -void softdep_update_inodeblock(struct inode *, struct buf *, int); -void softdep_load_inodeblock(struct inode *); -void softdep_freefile(struct vnode *, ino_t, int); -int softdep_request_cleanup(struct fs *, struct vnode *); -void softdep_setup_freeblocks(struct inode *, off_t, int); -void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t); -void softdep_setup_blkmapdep(struct buf *, struct fs *, ufs2_daddr_t); -void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t, - ufs2_daddr_t, long, long, struct buf *); -void softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t, - ufs2_daddr_t, long, long, struct buf *); -void softdep_setup_allocindir_meta(struct buf *, struct inode *, - struct buf *, int, ufs2_daddr_t); -void softdep_setup_allocindir_page(struct inode *, ufs_lbn_t, - struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *); -void softdep_fsync_mountdev(struct vnode *); -int softdep_sync_metadata(struct vop_fsync_args *); -/* XXX incorrectly moved to mount.h - should be indirect function */ -#if 0 -int softdep_fsync(struct vnode *vp); -#endif - -#endif /* !_UFS_FFS_EXTERN_H */ -#endif diff --git a/src/sys/ufs/ffs/ffs_inode.c b/src/sys/ufs/ffs/ffs_inode.c deleted file mode 100644 index c4124c9..0000000 --- a/src/sys/ufs/ffs/ffs_inode.c +++ /dev/null @@ -1,643 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_inode.c,v 1.91 2003/10/18 14:10:27 phk Exp $"); - -#include "opt_quota.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, - ufs2_daddr_t, int, ufs2_daddr_t *); - -/* - * Update the access, modified, and inode change times as specified by the - * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode - * to disk if the IN_MODIFIED flag is set (it may be set initially, or by - * the timestamp update). The IN_LAZYMOD flag is set to force a write - * later if not now. If we write now, then clear both IN_MODIFIED and - * IN_LAZYMOD to reflect the presumably successful write, and if waitfor is - * set, then wait for the write to complete. - */ -int -ffs_update(vp, waitfor) - struct vnode *vp; - int waitfor; -{ - struct fs *fs; - struct buf *bp; - struct inode *ip; - int error; - -#ifdef DEBUG_VFS_LOCKS - if ((vp->v_iflag & VI_XLOCK) == 0) - ASSERT_VOP_LOCKED(vp, "ffs_update"); -#endif - ufs_itimes(vp); - ip = VTOI(vp); - if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) - return (0); - ip->i_flag &= ~(IN_LAZYMOD | IN_MODIFIED); - fs = ip->i_fs; - if (fs->fs_ronly) - return (0); - /* - * Ensure that uid and gid are correct. This is a temporary - * fix until fsck has been changed to do the update. - */ - if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */ - fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ - ip->i_din1->di_ouid = ip->i_uid; /* XXX */ - ip->i_din1->di_ogid = ip->i_gid; /* XXX */ - } /* XXX */ - error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), - (int)fs->fs_bsize, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - if (DOINGSOFTDEP(vp)) - softdep_update_inodeblock(ip, bp, waitfor); - else if (ip->i_effnlink != ip->i_nlink) - panic("ffs_update: bad link cnt"); - if (ip->i_ump->um_fstype == UFS1) - *((struct ufs1_dinode *)bp->b_data + - ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; - else - *((struct ufs2_dinode *)bp->b_data + - ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; - if (waitfor && !DOINGASYNC(vp)) { - return (bwrite(bp)); - } else if (vm_page_count_severe() || buf_dirty_count_severe()) { - return (bwrite(bp)); - } else { - if (bp->b_bufsize == fs->fs_bsize) - bp->b_flags |= B_CLUSTEROK; - bdwrite(bp); - return (0); - } -} - -#define SINGLE 0 /* index of single indirect block */ -#define DOUBLE 1 /* index of double indirect block */ -#define TRIPLE 2 /* index of triple indirect block */ -/* - * Truncate the inode oip to at most length size, freeing the - * disk blocks. - */ -int -ffs_truncate(vp, length, flags, cred, td) - struct vnode *vp; - off_t length; - int flags; - struct ucred *cred; - struct thread *td; -{ - struct vnode *ovp = vp; - struct inode *oip; - ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR]; - ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; - ufs2_daddr_t count, blocksreleased = 0, datablocks; - struct fs *fs; - struct buf *bp; - int needextclean, softdepslowdown, extblocks; - int offset, size, level, nblocks; - int i, error, allerror; - off_t osize; - - oip = VTOI(ovp); - fs = oip->i_fs; - if (length < 0) - return (EINVAL); - /* - * Historically clients did not have to specify which data - * they were truncating. So, if not specified, we assume - * traditional behavior, e.g., just the normal data. - */ - if ((flags & (IO_EXT | IO_NORMAL)) == 0) - flags |= IO_NORMAL; - /* - * If we are truncating the extended-attributes, and cannot - * do it with soft updates, then do it slowly here. If we are - * truncating both the extended attributes and the file contents - * (e.g., the file is being unlinked), then pick it off with - * soft updates below. - */ - needextclean = 0; - softdepslowdown = DOINGSOFTDEP(ovp) && softdep_slowdown(ovp); - extblocks = 0; - datablocks = DIP(oip, i_blocks); - if (fs->fs_magic == FS_UFS2_MAGIC && oip->i_din2->di_extsize > 0) { - extblocks = btodb(fragroundup(fs, oip->i_din2->di_extsize)); - datablocks -= extblocks; - } - if ((flags & IO_EXT) && extblocks > 0) { - if (DOINGSOFTDEP(ovp) && softdepslowdown == 0 && length == 0) { - if ((flags & IO_NORMAL) == 0) { - softdep_setup_freeblocks(oip, length, IO_EXT); - return (0); - } - needextclean = 1; - } else { - if (length != 0) - panic("ffs_truncate: partial trunc of extdata"); - if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, td)) != 0) - return (error); - osize = oip->i_din2->di_extsize; - oip->i_din2->di_blocks -= extblocks; -#ifdef QUOTA - (void) chkdq(oip, -extblocks, NOCRED, 0); -#endif - vinvalbuf(ovp, V_ALT, cred, td, 0, 0); - oip->i_din2->di_extsize = 0; - for (i = 0; i < NXADDR; i++) { - oldblks[i] = oip->i_din2->di_extb[i]; - oip->i_din2->di_extb[i] = 0; - } - oip->i_flag |= IN_CHANGE | IN_UPDATE; - if ((error = ffs_update(ovp, 1))) - return (error); - for (i = 0; i < NXADDR; i++) { - if (oldblks[i] == 0) - continue; - ffs_blkfree(fs, oip->i_devvp, oldblks[i], - sblksize(fs, osize, i), oip->i_number); - } - } - } - if ((flags & IO_NORMAL) == 0) - return (0); - if (length > fs->fs_maxfilesize) - return (EFBIG); - if (ovp->v_type == VLNK && - (oip->i_size < ovp->v_mount->mnt_maxsymlinklen || - datablocks == 0)) { -#ifdef DIAGNOSTIC - if (length != 0) - panic("ffs_truncate: partial truncate of symlink"); -#endif - bzero(SHORTLINK(oip), (u_int)oip->i_size); - oip->i_size = 0; - DIP(oip, i_size) = 0; - oip->i_flag |= IN_CHANGE | IN_UPDATE; - if (needextclean) - softdep_setup_freeblocks(oip, length, IO_EXT); - return (UFS_UPDATE(ovp, 1)); - } - if (oip->i_size == length) { - oip->i_flag |= IN_CHANGE | IN_UPDATE; - if (needextclean) - softdep_setup_freeblocks(oip, length, IO_EXT); - return (UFS_UPDATE(ovp, 0)); - } - if (fs->fs_ronly) - panic("ffs_truncate: read-only filesystem"); -#ifdef QUOTA - error = getinoquota(oip); - if (error) - return (error); -#endif - if ((oip->i_flags & SF_SNAPSHOT) != 0) - ffs_snapremove(ovp); - ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0; - if (DOINGSOFTDEP(ovp)) { - if (length > 0 || softdepslowdown) { - /* - * If a file is only partially truncated, then - * we have to clean up the data structures - * describing the allocation past the truncation - * point. Finding and deallocating those structures - * is a lot of work. Since partial truncation occurs - * rarely, we solve the problem by syncing the file - * so that it will have no data structures left. - */ - if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, td)) != 0) - return (error); - if (oip->i_flag & IN_SPACECOUNTED) - fs->fs_pendingblocks -= datablocks; - } else { -#ifdef QUOTA - (void) chkdq(oip, -datablocks, NOCRED, 0); -#endif - softdep_setup_freeblocks(oip, length, needextclean ? - IO_EXT | IO_NORMAL : IO_NORMAL); - vinvalbuf(ovp, needextclean ? 0 : V_NORMAL, - cred, td, 0, 0); - oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (ffs_update(ovp, 0)); - } - } - osize = oip->i_size; - /* - * Lengthen the size of the file. We must ensure that the - * last byte of the file is allocated. Since the smallest - * value of osize is 0, length will be at least 1. - */ - if (osize < length) { - vnode_pager_setsize(ovp, length); - flags |= BA_CLRBUF; - error = UFS_BALLOC(ovp, length - 1, 1, cred, flags, &bp); - if (error) - return (error); - oip->i_size = length; - DIP(oip, i_size) = length; - if (bp->b_bufsize == fs->fs_bsize) - bp->b_flags |= B_CLUSTEROK; - if (flags & IO_SYNC) - bwrite(bp); - else - bawrite(bp); - oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (UFS_UPDATE(ovp, 1)); - } - /* - * Shorten the size of the file. If the file is not being - * truncated to a block boundary, the contents of the - * partial block following the end of the file must be - * zero'ed in case it ever becomes accessible again because - * of subsequent file growth. Directories however are not - * zero'ed as they should grow back initialized to empty. - */ - offset = blkoff(fs, length); - if (offset == 0) { - oip->i_size = length; - DIP(oip, i_size) = length; - } else { - lbn = lblkno(fs, length); - flags |= BA_CLRBUF; - error = UFS_BALLOC(ovp, length - 1, 1, cred, flags, &bp); - if (error) { - return (error); - } - /* - * When we are doing soft updates and the UFS_BALLOC - * above fills in a direct block hole with a full sized - * block that will be truncated down to a fragment below, - * we must flush out the block dependency with an FSYNC - * so that we do not get a soft updates inconsistency - * when we create the fragment below. - */ - if (DOINGSOFTDEP(ovp) && lbn < NDADDR && - fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && - (error = VOP_FSYNC(ovp, cred, MNT_WAIT, td)) != 0) - return (error); - oip->i_size = length; - DIP(oip, i_size) = length; - size = blksize(fs, oip, lbn); - if (ovp->v_type != VDIR) - bzero((char *)bp->b_data + offset, - (u_int)(size - offset)); - /* Kirk's code has reallocbuf(bp, size, 1) here */ - allocbuf(bp, size); - if (bp->b_bufsize == fs->fs_bsize) - bp->b_flags |= B_CLUSTEROK; - if (flags & IO_SYNC) - bwrite(bp); - else - bawrite(bp); - } - /* - * Calculate index into inode's block list of - * last direct and indirect blocks (if any) - * which we want to keep. Lastblock is -1 when - * the file is truncated to 0. - */ - lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; - lastiblock[SINGLE] = lastblock - NDADDR; - lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); - lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); - nblocks = btodb(fs->fs_bsize); - /* - * Update file and block pointers on disk before we start freeing - * blocks. If we crash before free'ing blocks below, the blocks - * will be returned to the free list. lastiblock values are also - * normalized to -1 for calls to ffs_indirtrunc below. - */ - for (level = TRIPLE; level >= SINGLE; level--) { - oldblks[NDADDR + level] = DIP(oip, i_ib[level]); - if (lastiblock[level] < 0) { - DIP(oip, i_ib[level]) = 0; - lastiblock[level] = -1; - } - } - for (i = 0; i < NDADDR; i++) { - oldblks[i] = DIP(oip, i_db[i]); - if (i > lastblock) - DIP(oip, i_db[i]) = 0; - } - oip->i_flag |= IN_CHANGE | IN_UPDATE; - allerror = UFS_UPDATE(ovp, 1); - - /* - * Having written the new inode to disk, save its new configuration - * and put back the old block pointers long enough to process them. - * Note that we save the new block configuration so we can check it - * when we are done. - */ - for (i = 0; i < NDADDR; i++) { - newblks[i] = DIP(oip, i_db[i]); - DIP(oip, i_db[i]) = oldblks[i]; - } - for (i = 0; i < NIADDR; i++) { - newblks[NDADDR + i] = DIP(oip, i_ib[i]); - DIP(oip, i_ib[i]) = oldblks[NDADDR + i]; - } - oip->i_size = osize; - DIP(oip, i_size) = osize; - - error = vtruncbuf(ovp, cred, td, length, fs->fs_bsize); - if (error && (allerror == 0)) - allerror = error; - - /* - * Indirect blocks first. - */ - indir_lbn[SINGLE] = -NDADDR; - indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; - indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; - for (level = TRIPLE; level >= SINGLE; level--) { - bn = DIP(oip, i_ib[level]); - if (bn != 0) { - error = ffs_indirtrunc(oip, indir_lbn[level], - fsbtodb(fs, bn), lastiblock[level], level, &count); - if (error) - allerror = error; - blocksreleased += count; - if (lastiblock[level] < 0) { - DIP(oip, i_ib[level]) = 0; - ffs_blkfree(fs, oip->i_devvp, bn, fs->fs_bsize, - oip->i_number); - blocksreleased += nblocks; - } - } - if (lastiblock[level] >= 0) - goto done; - } - - /* - * All whole direct blocks or frags. - */ - for (i = NDADDR - 1; i > lastblock; i--) { - long bsize; - - bn = DIP(oip, i_db[i]); - if (bn == 0) - continue; - DIP(oip, i_db[i]) = 0; - bsize = blksize(fs, oip, i); - ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number); - blocksreleased += btodb(bsize); - } - if (lastblock < 0) - goto done; - - /* - * Finally, look for a change in size of the - * last direct block; release any frags. - */ - bn = DIP(oip, i_db[lastblock]); - if (bn != 0) { - long oldspace, newspace; - - /* - * Calculate amount of space we're giving - * back as old block size minus new block size. - */ - oldspace = blksize(fs, oip, lastblock); - oip->i_size = length; - DIP(oip, i_size) = length; - newspace = blksize(fs, oip, lastblock); - if (newspace == 0) - panic("ffs_truncate: newspace"); - if (oldspace - newspace > 0) { - /* - * Block number of space to be free'd is - * the old block # plus the number of frags - * required for the storage we're keeping. - */ - bn += numfrags(fs, newspace); - ffs_blkfree(fs, oip->i_devvp, bn, oldspace - newspace, - oip->i_number); - blocksreleased += btodb(oldspace - newspace); - } - } -done: -#ifdef DIAGNOSTIC - for (level = SINGLE; level <= TRIPLE; level++) - if (newblks[NDADDR + level] != DIP(oip, i_ib[level])) - panic("ffs_truncate1"); - for (i = 0; i < NDADDR; i++) - if (newblks[i] != DIP(oip, i_db[i])) - panic("ffs_truncate2"); - VI_LOCK(ovp); - if (length == 0 && - (fs->fs_magic != FS_UFS2_MAGIC || oip->i_din2->di_extsize == 0) && - (!TAILQ_EMPTY(&ovp->v_dirtyblkhd) || - !TAILQ_EMPTY(&ovp->v_cleanblkhd))) - panic("ffs_truncate3"); - VI_UNLOCK(ovp); -#endif /* DIAGNOSTIC */ - /* - * Put back the real size. - */ - oip->i_size = length; - DIP(oip, i_size) = length; - DIP(oip, i_blocks) -= blocksreleased; - - if (DIP(oip, i_blocks) < 0) /* sanity */ - DIP(oip, i_blocks) = 0; - oip->i_flag |= IN_CHANGE; -#ifdef QUOTA - (void) chkdq(oip, -blocksreleased, NOCRED, 0); -#endif - return (allerror); -} - -/* - * Release blocks associated with the inode ip and stored in the indirect - * block bn. Blocks are free'd in LIFO order up to (but not including) - * lastbn. If level is greater than SINGLE, the block is an indirect block - * and recursive calls to indirtrunc must be used to cleanse other indirect - * blocks. - */ -static int -ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) - struct inode *ip; - ufs2_daddr_t lbn, lastbn; - ufs2_daddr_t dbn; - int level; - ufs2_daddr_t *countp; -{ - struct buf *bp; - struct fs *fs = ip->i_fs; - struct vnode *vp; - caddr_t copy = NULL; - int i, nblocks, error = 0, allerror = 0; - ufs2_daddr_t nb, nlbn, last; - ufs2_daddr_t blkcount, factor, blocksreleased = 0; - ufs1_daddr_t *bap1 = NULL; - ufs2_daddr_t *bap2 = NULL; -# define BAP(ip, i) (((ip)->i_ump->um_fstype == UFS1) ? bap1[i] : bap2[i]) - - /* - * Calculate index in current block of last - * block to be kept. -1 indicates the entire - * block so we need not calculate the index. - */ - factor = 1; - for (i = SINGLE; i < level; i++) - factor *= NINDIR(fs); - last = lastbn; - if (lastbn > 0) - last /= factor; - nblocks = btodb(fs->fs_bsize); - /* - * Get buffer of block pointers, zero those entries corresponding - * to blocks to be free'd, and update on disk copy first. Since - * double(triple) indirect before single(double) indirect, calls - * to bmap on these blocks will fail. However, we already have - * the on disk address, so we have to set the b_blkno field - * explicitly instead of letting bread do everything for us. - */ - vp = ITOV(ip); - bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0, 0); - if ((bp->b_flags & B_CACHE) == 0) { - curproc->p_stats->p_ru.ru_inblock++; /* pay for read */ - bp->b_iocmd = BIO_READ; - bp->b_flags &= ~B_INVAL; - bp->b_ioflags &= ~BIO_ERROR; - if (bp->b_bcount > bp->b_bufsize) - panic("ffs_indirtrunc: bad buffer size"); - bp->b_blkno = dbn; - vfs_busy_pages(bp, 0); - bp->b_iooffset = dbtob(bp->b_blkno); - VOP_STRATEGY(bp->b_vp, bp); - error = bufwait(bp); - } - if (error) { - brelse(bp); - *countp = 0; - return (error); - } - - if (ip->i_ump->um_fstype == UFS1) - bap1 = (ufs1_daddr_t *)bp->b_data; - else - bap2 = (ufs2_daddr_t *)bp->b_data; - if (lastbn != -1) { - MALLOC(copy, caddr_t, fs->fs_bsize, M_TEMP, M_WAITOK); - bcopy((caddr_t)bp->b_data, copy, (u_int)fs->fs_bsize); - for (i = last + 1; i < NINDIR(fs); i++) - BAP(ip, i) = 0; - if (DOINGASYNC(vp)) { - bawrite(bp); - } else { - error = bwrite(bp); - if (error) - allerror = error; - } - if (ip->i_ump->um_fstype == UFS1) - bap1 = (ufs1_daddr_t *)copy; - else - bap2 = (ufs2_daddr_t *)copy; - } - - /* - * Recursively free totally unused blocks. - */ - for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; - i--, nlbn += factor) { - nb = BAP(ip, i); - if (nb == 0) - continue; - if (level > SINGLE) { - if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), - (ufs2_daddr_t)-1, level - 1, &blkcount)) != 0) - allerror = error; - blocksreleased += blkcount; - } - ffs_blkfree(fs, ip->i_devvp, nb, fs->fs_bsize, ip->i_number); - blocksreleased += nblocks; - } - - /* - * Recursively free last partial block. - */ - if (level > SINGLE && lastbn >= 0) { - last = lastbn % factor; - nb = BAP(ip, i); - if (nb != 0) { - error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), - last, level - 1, &blkcount); - if (error) - allerror = error; - blocksreleased += blkcount; - } - } - if (copy != NULL) { - FREE(copy, M_TEMP); - } else { - bp->b_flags |= B_INVAL | B_NOCACHE; - brelse(bp); - } - - *countp = blocksreleased; - return (allerror); -} -#endif diff --git a/src/sys/ufs/ffs/ffs_rawread.c b/src/sys/ufs/ffs/ffs_rawread.c deleted file mode 100644 index f0f0f95..0000000 --- a/src/sys/ufs/ffs/ffs_rawread.c +++ /dev/null @@ -1,501 +0,0 @@ -#if 0 -/*- - * Copyright (c) 2000-2003 Tor Egge - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_rawread.c,v 1.12 2003/11/15 09:28:09 phk Exp $"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -static int ffs_rawread_readahead(struct vnode *vp, - caddr_t udata, - off_t offset, - size_t len, - struct thread *td, - struct buf *bp, - caddr_t sa); -static int ffs_rawread_main(struct vnode *vp, - struct uio *uio); - -static int ffs_rawread_sync(struct vnode *vp, struct thread *td); - -int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); - -void ffs_rawread_setup(void); - -static void ffs_rawreadwakeup(struct buf *bp); - - -SYSCTL_DECL(_vfs_ffs); - -static int ffsrawbufcnt = 4; -SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0, - "Buffers available for raw reads"); - -static int allowrawread = 1; -SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0, - "Flag to enable raw reads"); - -static int rawreadahead = 1; -SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0, - "Flag to enable readahead for long raw reads"); - - -void -ffs_rawread_setup(void) -{ - ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8; -} - - -static int -ffs_rawread_sync(struct vnode *vp, struct thread *td) -{ - int spl; - int error; - int upgraded; - - GIANT_REQUIRED; - /* Check for dirty mmap, pending writes and dirty buffers */ - spl = splbio(); - VI_LOCK(vp); - if (vp->v_numoutput > 0 || - !TAILQ_EMPTY(&vp->v_dirtyblkhd) || - (vp->v_iflag & VI_OBJDIRTY) != 0) { - splx(spl); - VI_UNLOCK(vp); - - if (VOP_ISLOCKED(vp, td) != LK_EXCLUSIVE) { - upgraded = 1; - /* Upgrade to exclusive lock, this might block */ - VOP_LOCK(vp, LK_UPGRADE | LK_NOPAUSE, td); - } else - upgraded = 0; - - - /* Attempt to msync mmap() regions to clean dirty mmap */ - VI_LOCK(vp); - if ((vp->v_iflag & VI_OBJDIRTY) != 0) { - struct vm_object *obj; - VI_UNLOCK(vp); - if (VOP_GETVOBJECT(vp, &obj) == 0) { - VM_OBJECT_LOCK(obj); - vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); - VM_OBJECT_UNLOCK(obj); - } - VI_LOCK(vp); - } - - /* Wait for pending writes to complete */ - spl = splbio(); - while (vp->v_numoutput) { - vp->v_iflag |= VI_BWAIT; - error = msleep((caddr_t)&vp->v_numoutput, - VI_MTX(vp), - PRIBIO + 1, - "rawrdfls", 0); - if (error != 0) { - splx(spl); - VI_UNLOCK(vp); - if (upgraded != 0) - VOP_LOCK(vp, LK_DOWNGRADE, td); - return (error); - } - } - /* Flush dirty buffers */ - if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { - splx(spl); - VI_UNLOCK(vp); - if ((error = VOP_FSYNC(vp, NOCRED, MNT_WAIT, td)) != 0) { - if (upgraded != 0) - VOP_LOCK(vp, LK_DOWNGRADE, td); - return (error); - } - VI_LOCK(vp); - spl = splbio(); - if (vp->v_numoutput > 0 || - !TAILQ_EMPTY(&vp->v_dirtyblkhd)) - panic("ffs_rawread_sync: dirty bufs"); - } - splx(spl); - VI_UNLOCK(vp); - if (upgraded != 0) - VOP_LOCK(vp, LK_DOWNGRADE, td); - } else { - splx(spl); - VI_UNLOCK(vp); - } - return 0; -} - - -static int -ffs_rawread_readahead(struct vnode *vp, - caddr_t udata, - off_t offset, - size_t len, - struct thread *td, - struct buf *bp, - caddr_t sa) -{ - int error; - u_int iolen; - off_t blockno; - int blockoff; - int bsize; - struct vnode *dp; - int bforwards; - struct inode *ip; - ufs2_daddr_t blkno; - - GIANT_REQUIRED; - bsize = vp->v_mount->mnt_stat.f_iosize; - - ip = VTOI(vp); - dp = ip->i_devvp; - - iolen = ((vm_offset_t) udata) & PAGE_MASK; - bp->b_bcount = len; - if (bp->b_bcount + iolen > bp->b_kvasize) { - bp->b_bcount = bp->b_kvasize; - if (iolen != 0) - bp->b_bcount -= PAGE_SIZE; - } - bp->b_flags = 0; /* XXX necessary ? */ - bp->b_iocmd = BIO_READ; - bp->b_iodone = ffs_rawreadwakeup; - bp->b_data = udata; - bp->b_saveaddr = sa; - blockno = offset / bsize; - blockoff = (offset % bsize) / DEV_BSIZE; - if ((daddr_t) blockno != blockno) { - return EINVAL; /* blockno overflow */ - } - - bp->b_lblkno = bp->b_blkno = blockno; - - error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, NULL, &bforwards, NULL); - if (error != 0) - return error; - if (blkno == -1) { - - /* Fill holes with NULs to preserve semantics */ - - if (bp->b_bcount + blockoff * DEV_BSIZE > bsize) - bp->b_bcount = bsize - blockoff * DEV_BSIZE; - bp->b_bufsize = bp->b_bcount; - - if (vmapbuf(bp) < 0) - return EFAULT; - - if (ticks - PCPU_GET(switchticks) >= hogticks) - uio_yield(); - bzero(bp->b_data, bp->b_bufsize); - - /* Mark operation completed (similar to bufdone()) */ - - bp->b_resid = 0; - bp->b_flags |= B_DONE; - return 0; - } - bp->b_blkno = blkno + blockoff; - bp->b_offset = bp->b_iooffset = (blkno + blockoff) * DEV_BSIZE; - - if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards)) - bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE; - bp->b_bufsize = bp->b_bcount; - bp->b_dev = dp->v_rdev; - - if (vmapbuf(bp) < 0) - return EFAULT; - - if (dp->v_type == VCHR) - (void) VOP_SPECSTRATEGY(dp, bp); - else - (void) VOP_STRATEGY(dp, bp); - return 0; -} - - -static int -ffs_rawread_main(struct vnode *vp, - struct uio *uio) -{ - int error, nerror; - struct buf *bp, *nbp, *tbp; - caddr_t sa, nsa, tsa; - u_int iolen; - int spl; - caddr_t udata; - long resid; - off_t offset; - struct thread *td; - - GIANT_REQUIRED; - td = uio->uio_td ? uio->uio_td : curthread; - udata = uio->uio_iov->iov_base; - resid = uio->uio_resid; - offset = uio->uio_offset; - - /* - * keep the process from being swapped - */ - PHOLD(td->td_proc); - - error = 0; - nerror = 0; - - bp = NULL; - nbp = NULL; - sa = NULL; - nsa = NULL; - - while (resid > 0) { - - if (bp == NULL) { /* Setup first read */ - /* XXX: Leave some bufs for swap */ - bp = getpbuf(&ffsrawbufcnt); - sa = bp->b_data; - bp->b_vp = vp; - error = ffs_rawread_readahead(vp, udata, offset, - resid, td, bp, sa); - if (error != 0) - break; - - if (resid > bp->b_bufsize) { /* Setup fist readahead */ - /* XXX: Leave bufs for swap */ - if (rawreadahead != 0) - nbp = trypbuf(&ffsrawbufcnt); - else - nbp = NULL; - if (nbp != NULL) { - nsa = nbp->b_data; - nbp->b_vp = vp; - - nerror = ffs_rawread_readahead(vp, - udata + - bp->b_bufsize, - offset + - bp->b_bufsize, - resid - - bp->b_bufsize, - td, - nbp, - nsa); - if (nerror) { - relpbuf(nbp, &ffsrawbufcnt); - nbp = NULL; - } - } - } - } - - spl = splbio(); - bwait(bp, PRIBIO, "rawrd"); - splx(spl); - - vunmapbuf(bp); - - iolen = bp->b_bcount - bp->b_resid; - if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) { - nerror = 0; /* Ignore possible beyond EOF error */ - break; /* EOF */ - } - - if ((bp->b_ioflags & BIO_ERROR) != 0) { - error = bp->b_error; - break; - } - resid -= iolen; - udata += iolen; - offset += iolen; - if (iolen < bp->b_bufsize) { - /* Incomplete read. Try to read remaining part */ - error = ffs_rawread_readahead(vp, - udata, - offset, - bp->b_bufsize - iolen, - td, - bp, - sa); - if (error != 0) - break; - } else if (nbp != NULL) { /* Complete read with readahead */ - - tbp = bp; - bp = nbp; - nbp = tbp; - - tsa = sa; - sa = nsa; - nsa = tsa; - - if (resid <= bp->b_bufsize) { /* No more readaheads */ - relpbuf(nbp, &ffsrawbufcnt); - nbp = NULL; - } else { /* Setup next readahead */ - nerror = ffs_rawread_readahead(vp, - udata + - bp->b_bufsize, - offset + - bp->b_bufsize, - resid - - bp->b_bufsize, - td, - nbp, - nsa); - if (nerror != 0) { - relpbuf(nbp, &ffsrawbufcnt); - nbp = NULL; - } - } - } else if (nerror != 0) {/* Deferred Readahead error */ - break; - } else if (resid > 0) { /* More to read, no readahead */ - error = ffs_rawread_readahead(vp, udata, offset, - resid, td, bp, sa); - if (error != 0) - break; - } - } - - if (bp != NULL) - relpbuf(bp, &ffsrawbufcnt); - if (nbp != NULL) { /* Run down readahead buffer */ - spl = splbio(); - bwait(nbp, PRIBIO, "rawrd"); - splx(spl); - vunmapbuf(nbp); - relpbuf(nbp, &ffsrawbufcnt); - } - - if (error == 0) - error = nerror; - PRELE(td->td_proc); - uio->uio_iov->iov_base = udata; - uio->uio_resid = resid; - uio->uio_offset = offset; - return error; -} - - -int -ffs_rawread(struct vnode *vp, - struct uio *uio, - int *workdone) -{ - if (allowrawread != 0 && - uio->uio_iovcnt == 1 && - uio->uio_segflg == UIO_USERSPACE && - uio->uio_resid == uio->uio_iov->iov_len && - (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_flags & - TDF_DEADLKTREAT) == 0) { - int secsize; /* Media sector size */ - off_t filebytes; /* Bytes left of file */ - int blockbytes; /* Bytes left of file in full blocks */ - int partialbytes; /* Bytes in last partial block */ - int skipbytes; /* Bytes not to read in ffs_rawread */ - struct inode *ip; - int error; - - - /* Only handle sector aligned reads */ - ip = VTOI(vp); - secsize = ip->i_devvp->v_rdev->si_bsize_phys; - if ((uio->uio_offset & (secsize - 1)) == 0 && - (uio->uio_resid & (secsize - 1)) == 0) { - - /* Sync dirty pages and buffers if needed */ - error = ffs_rawread_sync(vp, - (uio->uio_td != NULL) ? - uio->uio_td : curthread); - if (error != 0) - return error; - - /* Check for end of file */ - if (ip->i_size > uio->uio_offset) { - filebytes = ip->i_size - uio->uio_offset; - - /* No special eof handling needed ? */ - if (uio->uio_resid <= filebytes) { - *workdone = 1; - return ffs_rawread_main(vp, uio); - } - - partialbytes = ((unsigned int) ip->i_size) % - ip->i_fs->fs_bsize; - blockbytes = (int) filebytes - partialbytes; - if (blockbytes > 0) { - skipbytes = uio->uio_resid - - blockbytes; - uio->uio_resid = blockbytes; - error = ffs_rawread_main(vp, uio); - uio->uio_resid += skipbytes; - if (error != 0) - return error; - /* Read remaining part using buffer */ - } - } - } - } - *workdone = 0; - return 0; -} - - -static void -ffs_rawreadwakeup(struct buf *bp) -{ - bdone(bp); -} - -#endif diff --git a/src/sys/ufs/ffs/ffs_snapshot.c b/src/sys/ufs/ffs/ffs_snapshot.c deleted file mode 100644 index 30784a4..0000000 --- a/src/sys/ufs/ffs/ffs_snapshot.c +++ /dev/null @@ -1,2116 +0,0 @@ -#if 0 -/* - * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. - * - * Further information about snapshots can be obtained from: - * - * Marshall Kirk McKusick http://www.mckusick.com/softdep/ - * 1614 Oxford Street mckusick@mckusick.com - * Berkeley, CA 94709-1608 +1-510-843-9542 - * USA - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_snapshot.c 8.11 (McKusick) 7/23/00 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_snapshot.c,v 1.76 2003/11/13 03:56:32 alc Exp $"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#define KERNCRED thread0.td_ucred -#define DEBUG 1 - -static int cgaccount(int, struct vnode *, struct buf *, int); -static int expunge_ufs1(struct vnode *, struct inode *, struct fs *, - int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, - ufs_lbn_t, int), int); -static int indiracct_ufs1(struct vnode *, struct vnode *, int, - ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, - int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, - ufs_lbn_t, int), int); -static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, - struct fs *, ufs_lbn_t, int); -static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, - struct fs *, ufs_lbn_t, int); -static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, - struct fs *, ufs_lbn_t, int); -static int expunge_ufs2(struct vnode *, struct inode *, struct fs *, - int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, - ufs_lbn_t, int), int); -static int indiracct_ufs2(struct vnode *, struct vnode *, int, - ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, - int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, - ufs_lbn_t, int), int); -static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, - struct fs *, ufs_lbn_t, int); -static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, - struct fs *, ufs_lbn_t, int); -static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, - struct fs *, ufs_lbn_t, int); -static int ffs_copyonwrite(struct vnode *, struct buf *); -static int readblock(struct buf *, ufs2_daddr_t); - -/* - * To ensure the consistency of snapshots across crashes, we must - * synchronously write out copied blocks before allowing the - * originals to be modified. Because of the rather severe speed - * penalty that this imposes, the following flag allows this - * crash persistence to be disabled. - */ -int dopersistence = 0; - -#ifdef DEBUG -#include -SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, ""); -static int snapdebug = 0; -SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, ""); -int collectsnapstats = 0; -SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats, - 0, ""); -#endif /* DEBUG */ - -/* - * Create a snapshot file and initialize it for the filesystem. - */ -int -ffs_snapshot(mp, snapfile) - struct mount *mp; - char *snapfile; -{ - ufs2_daddr_t numblks, blkno, *blkp, *snapblklist; - int error, cg, snaploc; - int i, size, len, loc; - int flag = mp->mnt_flag; - struct timespec starttime = {0, 0}, endtime; - char saved_nice = 0; - long redo = 0, snaplistsize = 0; - int32_t *lp; - void *space; - struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs; - struct snaphead *snaphead; - struct thread *td = curthread; - struct inode *ip, *xp; - struct buf *bp, *nbp, *ibp, *sbp = NULL; - struct nameidata nd; - struct mount *wrtmp; - struct vattr vat; - struct vnode *vp, *xvp, *nvp, *devvp; - struct uio auio; - struct iovec aiov; - - /* - * Need to serialize access to snapshot code per filesystem. - */ - /* - * Assign a snapshot slot in the superblock. - */ - for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) - if (fs->fs_snapinum[snaploc] == 0) - break; - if (snaploc == FSMAXSNAP) - return (ENOSPC); - /* - * Create the snapshot file. - */ -restart: - NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, snapfile, td); - if ((error = namei(&nd)) != 0) - return (error); - if (nd.ni_vp != NULL) { - vput(nd.ni_vp); - error = EEXIST; - } - if (nd.ni_dvp->v_mount != mp) - error = EXDEV; - if (error) { - NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - return (error); - } - VATTR_NULL(&vat); - vat.va_type = VREG; - vat.va_mode = S_IRUSR; - vat.va_vaflags |= VA_EXCLUSIVE; - if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) - wrtmp = NULL; - if (wrtmp != mp) - panic("ffs_snapshot: mount mismatch"); - if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) { - NDFREE(&nd, NDF_ONLY_PNBUF); - vput(nd.ni_dvp); - if ((error = vn_start_write(NULL, &wrtmp, - V_XSLEEP | PCATCH)) != 0) - return (error); - goto restart; - } - VOP_LEASE(nd.ni_dvp, td, KERNCRED, LEASE_WRITE); - error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat); - vput(nd.ni_dvp); - if (error) { - NDFREE(&nd, NDF_ONLY_PNBUF); - vn_finished_write(wrtmp); - return (error); - } - vp = nd.ni_vp; - ip = VTOI(vp); - devvp = ip->i_devvp; - /* - * Allocate and copy the last block contents so as to be able - * to set size to that of the filesystem. - */ - numblks = howmany(fs->fs_size, fs->fs_frag); - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)), - fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp); - if (error) - goto out; - ip->i_size = lblktosize(fs, (off_t)numblks); - DIP(ip, i_size) = ip->i_size; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - if ((error = readblock(bp, numblks - 1)) != 0) - goto out; - bawrite(bp); - /* - * Preallocate critical data structures so that we can copy - * them in without further allocation after we suspend all - * operations on the filesystem. We would like to just release - * the allocated buffers without writing them since they will - * be filled in below once we are ready to go, but this upsets - * the soft update code, so we go ahead and write the new buffers. - * - * Allocate all indirect blocks and mark all of them as not - * needing to be copied. - */ - for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), - fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp); - if (error) - goto out; - bawrite(ibp); - } - /* - * Allocate copies for the superblock and its summary information. - */ - error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED, - 0, &nbp); - if (error) - goto out; - bawrite(nbp); - blkno = fragstoblks(fs, fs->fs_csaddr); - len = howmany(fs->fs_cssize, fs->fs_bsize); - for (loc = 0; loc < len; loc++) { - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), - fs->fs_bsize, KERNCRED, 0, &nbp); - if (error) - goto out; - bawrite(nbp); - } - /* - * Allocate all cylinder group blocks. - */ - for (cg = 0; cg < fs->fs_ncg; cg++) { - error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), - fs->fs_bsize, KERNCRED, 0, &nbp); - if (error) - goto out; - bawrite(nbp); - } - /* - * Copy all the cylinder group maps. Although the - * filesystem is still active, we hope that only a few - * cylinder groups will change between now and when we - * suspend operations. Thus, we will be able to quickly - * touch up the few cylinder groups that changed during - * the suspension period. - */ - len = howmany(fs->fs_ncg, NBBY); - MALLOC(fs->fs_active, int *, len, M_DEVBUF, M_WAITOK); - bzero(fs->fs_active, len); - for (cg = 0; cg < fs->fs_ncg; cg++) { - error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), - fs->fs_bsize, KERNCRED, 0, &nbp); - if (error) - goto out; - error = cgaccount(cg, vp, nbp, 1); - bawrite(nbp); - if (error) - goto out; - } - /* - * Change inode to snapshot type file. - */ - ip->i_flags |= SF_SNAPSHOT; - DIP(ip, i_flags) = ip->i_flags; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - /* - * Ensure that the snapshot is completely on disk. - * Since we have marked it as a snapshot it is safe to - * unlock it as no process will be allowed to write to it. - */ - if ((error = VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td)) != 0) - goto out; - VOP_UNLOCK(vp, 0, td); - /* - * All allocations are done, so we can now snapshot the system. - * - * Recind nice scheduling while running with the filesystem suspended. - */ - if (td->td_ksegrp->kg_nice > 0) { - PROC_LOCK(td->td_proc); - mtx_lock_spin(&sched_lock); - saved_nice = td->td_ksegrp->kg_nice; - sched_nice(td->td_ksegrp, 0); - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(td->td_proc); - } - /* - * Suspend operation on filesystem. - */ - for (;;) { - vn_finished_write(wrtmp); - if ((error = vfs_write_suspend(vp->v_mount)) != 0) { - vn_start_write(NULL, &wrtmp, V_WAIT); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - goto out; - } - if (mp->mnt_kern_flag & MNTK_SUSPENDED) - break; - vn_start_write(NULL, &wrtmp, V_WAIT); - } - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - if (collectsnapstats) - nanotime(&starttime); - /* - * First, copy all the cylinder group maps that have changed. - */ - for (cg = 0; cg < fs->fs_ncg; cg++) { - if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0) - continue; - redo++; - error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)), - fs->fs_bsize, KERNCRED, 0, &nbp); - if (error) - goto out1; - error = cgaccount(cg, vp, nbp, 2); - bawrite(nbp); - if (error) - goto out1; - } - /* - * Grab a copy of the superblock and its summary information. - * We delay writing it until the suspension is released below. - */ - error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, - KERNCRED, &sbp); - if (error) { - brelse(sbp); - sbp = NULL; - goto out1; - } - loc = blkoff(fs, fs->fs_sblockloc); - copy_fs = (struct fs *)(sbp->b_data + loc); - bcopy(fs, copy_fs, fs->fs_sbsize); - if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) - copy_fs->fs_clean = 1; - size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; - if (fs->fs_sbsize < size) - bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize); - size = blkroundup(fs, fs->fs_cssize); - if (fs->fs_contigsumsize > 0) - size += fs->fs_ncg * sizeof(int32_t); - space = malloc((u_long)size, M_UFSMNT, M_WAITOK); - copy_fs->fs_csp = space; - bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize); - (char *)space += fs->fs_cssize; - loc = howmany(fs->fs_cssize, fs->fs_fsize); - i = fs->fs_frag - loc % fs->fs_frag; - len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize; - if (len > 0) { - if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc), - len, KERNCRED, &bp)) != 0) { - brelse(bp); - free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); - sbp = NULL; - goto out1; - } - bcopy(bp->b_data, space, (u_int)len); - (char *)space += len; - bp->b_flags |= B_INVAL | B_NOCACHE; - brelse(bp); - } - if (fs->fs_contigsumsize > 0) { - copy_fs->fs_maxcluster = lp = space; - for (i = 0; i < fs->fs_ncg; i++) - *lp++ = fs->fs_contigsumsize; - } - /* - * We must check for active files that have been unlinked - * (e.g., with a zero link count). We have to expunge all - * trace of these files from the snapshot so that they are - * not reclaimed prematurely by fsck or unnecessarily dumped. - * We turn off the MNTK_SUSPENDED flag to avoid a panic from - * spec_strategy about writing on a suspended filesystem. - * Note that we skip unlinked snapshot files as they will - * be handled separately below. - * - * We also calculate the needed size for the snapshot list. - */ - snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + - FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */; - mp->mnt_kern_flag &= ~MNTK_SUSPENDED; - MNT_ILOCK(mp); -loop: - for (xvp = TAILQ_FIRST(&mp->mnt_nvnodelist); xvp; xvp = nvp) { - /* - * Make sure this vnode wasn't reclaimed in getnewvnode(). - * Start over if it has (it won't be on the list anymore). - */ - if (xvp->v_mount != mp) - goto loop; - nvp = TAILQ_NEXT(xvp, v_nmntvnodes); - VI_LOCK(xvp); - MNT_IUNLOCK(mp); - if ((xvp->v_iflag & VI_XLOCK) || - xvp->v_usecount == 0 || xvp->v_type == VNON || - (VTOI(xvp)->i_flags & SF_SNAPSHOT)) { - VI_UNLOCK(xvp); - MNT_ILOCK(mp); - continue; - } - if (snapdebug) - vprint("ffs_snapshot: busy vnode", xvp); - if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK, td) != 0) { - MNT_ILOCK(mp); - goto loop; - } - if (VOP_GETATTR(xvp, &vat, td->td_ucred, td) == 0 && - vat.va_nlink > 0) { - VOP_UNLOCK(xvp, 0, td); - MNT_ILOCK(mp); - continue; - } - xp = VTOI(xvp); - if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) { - VOP_UNLOCK(xvp, 0, td); - MNT_ILOCK(mp); - continue; - } - /* - * If there is a fragment, clear it here. - */ - blkno = 0; - loc = howmany(xp->i_size, fs->fs_bsize) - 1; - if (loc < NDADDR) { - len = fragroundup(fs, blkoff(fs, xp->i_size)); - if (len < fs->fs_bsize) { - ffs_blkfree(copy_fs, vp, DIP(xp, i_db[loc]), - len, xp->i_number); - blkno = DIP(xp, i_db[loc]); - DIP(xp, i_db[loc]) = 0; - } - } - snaplistsize += 1; - if (xp->i_ump->um_fstype == UFS1) - error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, - BLK_NOCOPY); - else - error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, - BLK_NOCOPY); - if (blkno) - DIP(xp, i_db[loc]) = blkno; - if (!error) - error = ffs_freefile(copy_fs, vp, xp->i_number, - xp->i_mode); - VOP_UNLOCK(xvp, 0, td); - if (error) { - free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); - sbp = NULL; - goto out1; - } - MNT_ILOCK(mp); - } - MNT_IUNLOCK(mp); - /* - * If there already exist snapshots on this filesystem, grab a - * reference to their shared lock. If this is the first snapshot - * on this filesystem, we need to allocate a lock for the snapshots - * to share. In either case, acquire the snapshot lock and give - * up our original private lock. - */ - VI_LOCK(devvp); - snaphead = &devvp->v_rdev->si_snapshots; - if ((xp = TAILQ_FIRST(snaphead)) != NULL) { - VI_LOCK(vp); - vp->v_vnlock = ITOV(xp)->v_vnlock; - VI_UNLOCK(devvp); - } else { - struct lock *lkp; - - VI_UNLOCK(devvp); - MALLOC(lkp, struct lock *, sizeof(struct lock), M_UFSMNT, - M_WAITOK); - lockinit(lkp, PVFS, "snaplk", VLKTIMEOUT, - LK_CANRECURSE | LK_NOPAUSE); - VI_LOCK(vp); - vp->v_vnlock = lkp; - } - vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, td); - transferlockers(&vp->v_lock, vp->v_vnlock); - lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); - /* - * If this is the first snapshot on this filesystem, then we need - * to allocate the space for the list of preallocated snapshot blocks. - * This list will be refined below, but this preliminary one will - * keep us out of deadlock until the full one is ready. - */ - if (xp == NULL) { - MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t), - M_UFSMNT, M_WAITOK); - blkp = &snapblklist[1]; - *blkp++ = lblkno(fs, fs->fs_sblockloc); - blkno = fragstoblks(fs, fs->fs_csaddr); - for (cg = 0; cg < fs->fs_ncg; cg++) { - if (fragstoblks(fs, cgtod(fs, cg) > blkno)) - break; - *blkp++ = fragstoblks(fs, cgtod(fs, cg)); - } - len = howmany(fs->fs_cssize, fs->fs_bsize); - for (loc = 0; loc < len; loc++) - *blkp++ = blkno + loc; - for (; cg < fs->fs_ncg; cg++) - *blkp++ = fragstoblks(fs, cgtod(fs, cg)); - snapblklist[0] = blkp - snapblklist; - VI_LOCK(devvp); - if (devvp->v_rdev->si_snapblklist != NULL) - panic("ffs_snapshot: non-empty list"); - devvp->v_rdev->si_snapblklist = snapblklist; - devvp->v_rdev->si_snaplistsize = blkp - snapblklist; - VI_UNLOCK(devvp); - } - /* - * Record snapshot inode. Since this is the newest snapshot, - * it must be placed at the end of the list. - */ - VI_LOCK(devvp); - fs->fs_snapinum[snaploc] = ip->i_number; - if (ip->i_nextsnap.tqe_prev != 0) - panic("ffs_snapshot: %d already on list", ip->i_number); - TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); - devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; - devvp->v_vflag |= VV_COPYONWRITE; - VI_UNLOCK(devvp); - ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); - vp->v_vflag |= VV_SYSTEM; -out1: - /* - * Resume operation on filesystem. - */ - vfs_write_resume(vp->v_mount); - vn_start_write(NULL, &wrtmp, V_WAIT); - if (collectsnapstats && starttime.tv_sec > 0) { - nanotime(&endtime); - timespecsub(&endtime, &starttime); - printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n", - vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, - endtime.tv_nsec / 1000000, redo, fs->fs_ncg); - } - if (sbp == NULL) - goto out; - /* - * Copy allocation information from all the snapshots in - * this snapshot and then expunge them from its view. - */ - snaphead = &devvp->v_rdev->si_snapshots; - TAILQ_FOREACH(xp, snaphead, i_nextsnap) { - if (xp == ip) - break; - if (xp->i_ump->um_fstype == UFS1) - error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, - BLK_SNAP); - else - error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, - BLK_SNAP); - if (error) { - fs->fs_snapinum[snaploc] = 0; - goto done; - } - } - /* - * Allocate space for the full list of preallocated snapshot blocks. - */ - MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t), - M_UFSMNT, M_WAITOK); - ip->i_snapblklist = &snapblklist[1]; - /* - * Expunge the blocks used by the snapshots from the set of - * blocks marked as used in the snapshot bitmaps. Also, collect - * the list of allocated blocks in i_snapblklist. - */ - if (ip->i_ump->um_fstype == UFS1) - error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); - else - error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); - if (error) { - fs->fs_snapinum[snaploc] = 0; - FREE(snapblklist, M_UFSMNT); - goto done; - } - if (snaplistsize < ip->i_snapblklist - snapblklist) - panic("ffs_snapshot: list too small"); - snaplistsize = ip->i_snapblklist - snapblklist; - snapblklist[0] = snaplistsize; - ip->i_snapblklist = 0; - /* - * Write out the list of allocated blocks to the end of the snapshot. - */ - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = (void *)snapblklist; - aiov.iov_len = snaplistsize * sizeof(daddr_t); - auio.uio_resid = aiov.iov_len;; - auio.uio_offset = ip->i_size; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_WRITE; - auio.uio_td = td; - if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { - fs->fs_snapinum[snaploc] = 0; - FREE(snapblklist, M_UFSMNT); - goto done; - } - /* - * Write the superblock and its summary information - * to the snapshot. - */ - blkno = fragstoblks(fs, fs->fs_csaddr); - len = howmany(fs->fs_cssize, fs->fs_bsize); - space = copy_fs->fs_csp; - for (loc = 0; loc < len; loc++) { - error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp); - if (error) { - brelse(nbp); - fs->fs_snapinum[snaploc] = 0; - FREE(snapblklist, M_UFSMNT); - goto done; - } - bcopy(space, nbp->b_data, fs->fs_bsize); - space = (char *)space + fs->fs_bsize; - bawrite(nbp); - } - /* - * As this is the newest list, it is the most inclusive, so - * should replace the previous list. - */ - VI_LOCK(devvp); - space = devvp->v_rdev->si_snapblklist; - devvp->v_rdev->si_snapblklist = snapblklist; - devvp->v_rdev->si_snaplistsize = snaplistsize; - VI_UNLOCK(devvp); - if (space != NULL) - FREE(space, M_UFSMNT); -done: - free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); -out: - if (saved_nice > 0) { - PROC_LOCK(td->td_proc); - mtx_lock_spin(&sched_lock); - sched_nice(td->td_ksegrp, saved_nice); - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(td->td_proc); - } - if (fs->fs_active != 0) { - FREE(fs->fs_active, M_DEVBUF); - fs->fs_active = 0; - } - mp->mnt_flag = flag; - if (error) - (void) UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td); - (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); - if (error) - vput(vp); - else - VOP_UNLOCK(vp, 0, td); - vn_finished_write(wrtmp); - return (error); -} - -/* - * Copy a cylinder group map. All the unallocated blocks are marked - * BLK_NOCOPY so that the snapshot knows that it need not copy them - * if they are later written. If passno is one, then this is a first - * pass, so only setting needs to be done. If passno is 2, then this - * is a revision to a previous pass which must be undone as the - * replacement pass is done. - */ -static int -cgaccount(cg, vp, nbp, passno) - int cg; - struct vnode *vp; - struct buf *nbp; - int passno; -{ - struct buf *bp, *ibp; - struct inode *ip; - struct cg *cgp; - struct fs *fs; - ufs2_daddr_t base, numblks; - int error, len, loc, indiroff; - - ip = VTOI(vp); - fs = ip->i_fs; - error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), - (int)fs->fs_cgsize, KERNCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - cgp = (struct cg *)bp->b_data; - if (!cg_chkmagic(cgp)) { - brelse(bp); - return (EIO); - } - atomic_set_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); - bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize); - if (fs->fs_cgsize < fs->fs_bsize) - bzero(&nbp->b_data[fs->fs_cgsize], - fs->fs_bsize - fs->fs_cgsize); - if (passno == 2) - nbp->b_flags |= B_VALIDSUSPWRT; - numblks = howmany(fs->fs_size, fs->fs_frag); - len = howmany(fs->fs_fpg, fs->fs_frag); - base = cg * fs->fs_fpg / fs->fs_frag; - if (base + len >= numblks) - len = numblks - base - 1; - loc = 0; - if (base < NDADDR) { - for ( ; loc < NDADDR; loc++) { - if (ffs_isblock(fs, cg_blksfree(cgp), loc)) - DIP(ip, i_db[loc]) = BLK_NOCOPY; - else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY) - DIP(ip, i_db[loc]) = 0; - else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY) - panic("ffs_snapshot: lost direct block"); - } - } - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)), - fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); - if (error) { - brelse(bp); - return (error); - } - indiroff = (base + loc - NDADDR) % NINDIR(fs); - for ( ; loc < len; loc++, indiroff++) { - if (indiroff >= NINDIR(fs)) { - if (passno == 2) - ibp->b_flags |= B_VALIDSUSPWRT; - bawrite(ibp); - error = UFS_BALLOC(vp, - lblktosize(fs, (off_t)(base + loc)), - fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); - if (error) { - brelse(bp); - return (error); - } - indiroff = 0; - } - if (ip->i_ump->um_fstype == UFS1) { - if (ffs_isblock(fs, cg_blksfree(cgp), loc)) - ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = - BLK_NOCOPY; - else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data)) - [indiroff] == BLK_NOCOPY) - ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0; - else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data)) - [indiroff] == BLK_NOCOPY) - panic("ffs_snapshot: lost indirect block"); - continue; - } - if (ffs_isblock(fs, cg_blksfree(cgp), loc)) - ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY; - else if (passno == 2 && - ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) - ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0; - else if (passno == 1 && - ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY) - panic("ffs_snapshot: lost indirect block"); - } - bqrelse(bp); - if (passno == 2) - ibp->b_flags |= B_VALIDSUSPWRT; - bdwrite(ibp); - return (0); -} - -/* - * Before expunging a snapshot inode, note all the - * blocks that it claims with BLK_SNAP so that fsck will - * be able to account for those blocks properly and so - * that this snapshot knows that it need not copy them - * if the other snapshot holding them is freed. This code - * is reproduced once each for UFS1 and UFS2. - */ -static int -expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) - struct vnode *snapvp; - struct inode *cancelip; - struct fs *fs; - int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, - struct fs *, ufs_lbn_t, int); - int expungetype; -{ - int i, error, indiroff; - ufs_lbn_t lbn, rlbn; - ufs2_daddr_t len, blkno, numblks, blksperindir; - struct ufs1_dinode *dip; - struct thread *td = curthread; - struct buf *bp; - - /* - * Prepare to expunge the inode. If its inode block has not - * yet been copied, then allocate and fill the copy. - */ - lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); - blkno = 0; - if (lbn < NDADDR) { - blkno = VTOI(snapvp)->i_din1->di_db[lbn]; - } else { - td->td_pflags |= TDP_COWINPROGRESS; - error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); - td->td_pflags &= ~TDP_COWINPROGRESS; - if (error) - return (error); - indiroff = (lbn - NDADDR) % NINDIR(fs); - blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; - bqrelse(bp); - } - if (blkno != 0) { - if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) - return (error); - } else { - error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, 0, &bp); - if (error) - return (error); - if ((error = readblock(bp, lbn)) != 0) - return (error); - } - /* - * Set a snapshot inode to be a zero length file, regular files - * to be completely unallocated. - */ - dip = (struct ufs1_dinode *)bp->b_data + - ino_to_fsbo(fs, cancelip->i_number); - if (expungetype == BLK_NOCOPY) - dip->di_mode = 0; - dip->di_size = 0; - dip->di_blocks = 0; - dip->di_flags &= ~SF_SNAPSHOT; - bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); - bdwrite(bp); - /* - * Now go through and expunge all the blocks in the file - * using the function requested. - */ - numblks = howmany(cancelip->i_size, fs->fs_bsize); - if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], - &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype))) - return (error); - if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0], - &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype))) - return (error); - blksperindir = 1; - lbn = -NDADDR; - len = numblks - NDADDR; - rlbn = NDADDR; - for (i = 0; len > 0 && i < NIADDR; i++) { - error = indiracct_ufs1(snapvp, ITOV(cancelip), i, - cancelip->i_din1->di_ib[i], lbn, rlbn, len, - blksperindir, fs, acctfunc, expungetype); - if (error) - return (error); - blksperindir *= NINDIR(fs); - lbn -= blksperindir + 1; - len -= blksperindir; - rlbn += blksperindir; - } - return (0); -} - -/* - * Descend an indirect block chain for vnode cancelvp accounting for all - * its indirect blocks in snapvp. - */ -static int -indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, - blksperindir, fs, acctfunc, expungetype) - struct vnode *snapvp; - struct vnode *cancelvp; - int level; - ufs1_daddr_t blkno; - ufs_lbn_t lbn; - ufs_lbn_t rlbn; - ufs_lbn_t remblks; - ufs_lbn_t blksperindir; - struct fs *fs; - int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, - struct fs *, ufs_lbn_t, int); - int expungetype; -{ - int error, num, i; - ufs_lbn_t subblksperindir; - struct indir indirs[NIADDR + 2]; - ufs1_daddr_t last, *bap; - struct buf *bp; - - if (blkno == 0) { - if (expungetype == BLK_NOCOPY) - return (0); - panic("indiracct_ufs1: missing indir"); - } - if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) - return (error); - if (lbn != indirs[num - 1 - level].in_lbn || num < 2) - panic("indiracct_ufs1: botched params"); - /* - * We have to expand bread here since it will deadlock looking - * up the block number for any blocks that are not in the cache. - */ - bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); - bp->b_blkno = fsbtodb(fs, blkno); - if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && - (error = readblock(bp, fragstoblks(fs, blkno)))) { - brelse(bp); - return (error); - } - /* - * Account for the block pointers in this indirect block. - */ - last = howmany(remblks, blksperindir); - if (last > NINDIR(fs)) - last = NINDIR(fs); - MALLOC(bap, ufs1_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); - bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); - bqrelse(bp); - error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, - level == 0 ? rlbn : -1, expungetype); - if (error || level == 0) - goto out; - /* - * Account for the block pointers in each of the indirect blocks - * in the levels below us. - */ - subblksperindir = blksperindir / NINDIR(fs); - for (lbn++, level--, i = 0; i < last; i++) { - error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn, - rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); - if (error) - goto out; - rlbn += blksperindir; - lbn -= blksperindir; - remblks -= blksperindir; - } -out: - FREE(bap, M_DEVBUF); - return (error); -} - -/* - * Do both snap accounting and map accounting. - */ -static int -fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype) - struct vnode *vp; - ufs1_daddr_t *oldblkp, *lastblkp; - struct fs *fs; - ufs_lbn_t lblkno; - int exptype; /* BLK_SNAP or BLK_NOCOPY */ -{ - int error; - - if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype))) - return (error); - return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)); -} - -/* - * Identify a set of blocks allocated in a snapshot inode. - */ -static int -snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) - struct vnode *vp; - ufs1_daddr_t *oldblkp, *lastblkp; - struct fs *fs; - ufs_lbn_t lblkno; - int expungetype; /* BLK_SNAP or BLK_NOCOPY */ -{ - struct inode *ip = VTOI(vp); - ufs1_daddr_t blkno, *blkp; - ufs_lbn_t lbn; - struct buf *ibp; - int error; - - for ( ; oldblkp < lastblkp; oldblkp++) { - blkno = *oldblkp; - if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) - continue; - lbn = fragstoblks(fs, blkno); - if (lbn < NDADDR) { - blkp = &ip->i_din1->di_db[lbn]; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } else { - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); - if (error) - return (error); - blkp = &((ufs1_daddr_t *)(ibp->b_data)) - [(lbn - NDADDR) % NINDIR(fs)]; - } - /* - * If we are expunging a snapshot vnode and we - * find a block marked BLK_NOCOPY, then it is - * one that has been allocated to this snapshot after - * we took our current snapshot and can be ignored. - */ - if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { - if (lbn >= NDADDR) - brelse(ibp); - } else { - if (*blkp != 0) - panic("snapacct_ufs1: bad block"); - *blkp = expungetype; - if (lbn >= NDADDR) - bdwrite(ibp); - } - } - return (0); -} - -/* - * Account for a set of blocks allocated in a snapshot inode. - */ -static int -mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) - struct vnode *vp; - ufs1_daddr_t *oldblkp, *lastblkp; - struct fs *fs; - ufs_lbn_t lblkno; - int expungetype; -{ - ufs1_daddr_t blkno; - struct inode *ip; - ino_t inum; - int acctit; - - ip = VTOI(vp); - inum = ip->i_number; - if (lblkno == -1) - acctit = 0; - else - acctit = 1; - for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { - blkno = *oldblkp; - if (blkno == 0 || blkno == BLK_NOCOPY) - continue; - if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) - *ip->i_snapblklist++ = lblkno; - if (blkno == BLK_SNAP) - blkno = blkstofrags(fs, lblkno); - ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); - } - return (0); -} - -/* - * Before expunging a snapshot inode, note all the - * blocks that it claims with BLK_SNAP so that fsck will - * be able to account for those blocks properly and so - * that this snapshot knows that it need not copy them - * if the other snapshot holding them is freed. This code - * is reproduced once each for UFS1 and UFS2. - */ -static int -expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) - struct vnode *snapvp; - struct inode *cancelip; - struct fs *fs; - int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, - struct fs *, ufs_lbn_t, int); - int expungetype; -{ - int i, error, indiroff; - ufs_lbn_t lbn, rlbn; - ufs2_daddr_t len, blkno, numblks, blksperindir; - struct ufs2_dinode *dip; - struct thread *td = curthread; - struct buf *bp; - - /* - * Prepare to expunge the inode. If its inode block has not - * yet been copied, then allocate and fill the copy. - */ - lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number)); - blkno = 0; - if (lbn < NDADDR) { - blkno = VTOI(snapvp)->i_din2->di_db[lbn]; - } else { - td->td_pflags |= TDP_COWINPROGRESS; - error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); - td->td_pflags &= ~TDP_COWINPROGRESS; - if (error) - return (error); - indiroff = (lbn - NDADDR) % NINDIR(fs); - blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; - bqrelse(bp); - } - if (blkno != 0) { - if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp))) - return (error); - } else { - error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, 0, &bp); - if (error) - return (error); - if ((error = readblock(bp, lbn)) != 0) - return (error); - } - /* - * Set a snapshot inode to be a zero length file, regular files - * to be completely unallocated. - */ - dip = (struct ufs2_dinode *)bp->b_data + - ino_to_fsbo(fs, cancelip->i_number); - if (expungetype == BLK_NOCOPY) - dip->di_mode = 0; - dip->di_size = 0; - dip->di_blocks = 0; - dip->di_flags &= ~SF_SNAPSHOT; - bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); - bdwrite(bp); - /* - * Now go through and expunge all the blocks in the file - * using the function requested. - */ - numblks = howmany(cancelip->i_size, fs->fs_bsize); - if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], - &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype))) - return (error); - if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0], - &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype))) - return (error); - blksperindir = 1; - lbn = -NDADDR; - len = numblks - NDADDR; - rlbn = NDADDR; - for (i = 0; len > 0 && i < NIADDR; i++) { - error = indiracct_ufs2(snapvp, ITOV(cancelip), i, - cancelip->i_din2->di_ib[i], lbn, rlbn, len, - blksperindir, fs, acctfunc, expungetype); - if (error) - return (error); - blksperindir *= NINDIR(fs); - lbn -= blksperindir + 1; - len -= blksperindir; - rlbn += blksperindir; - } - return (0); -} - -/* - * Descend an indirect block chain for vnode cancelvp accounting for all - * its indirect blocks in snapvp. - */ -static int -indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, - blksperindir, fs, acctfunc, expungetype) - struct vnode *snapvp; - struct vnode *cancelvp; - int level; - ufs2_daddr_t blkno; - ufs_lbn_t lbn; - ufs_lbn_t rlbn; - ufs_lbn_t remblks; - ufs_lbn_t blksperindir; - struct fs *fs; - int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, - struct fs *, ufs_lbn_t, int); - int expungetype; -{ - int error, num, i; - ufs_lbn_t subblksperindir; - struct indir indirs[NIADDR + 2]; - ufs2_daddr_t last, *bap; - struct buf *bp; - - if (blkno == 0) { - if (expungetype == BLK_NOCOPY) - return (0); - panic("indiracct_ufs2: missing indir"); - } - if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0) - return (error); - if (lbn != indirs[num - 1 - level].in_lbn || num < 2) - panic("indiracct_ufs2: botched params"); - /* - * We have to expand bread here since it will deadlock looking - * up the block number for any blocks that are not in the cache. - */ - bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0); - bp->b_blkno = fsbtodb(fs, blkno); - if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && - (error = readblock(bp, fragstoblks(fs, blkno)))) { - brelse(bp); - return (error); - } - /* - * Account for the block pointers in this indirect block. - */ - last = howmany(remblks, blksperindir); - if (last > NINDIR(fs)) - last = NINDIR(fs); - MALLOC(bap, ufs2_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); - bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); - bqrelse(bp); - error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, - level == 0 ? rlbn : -1, expungetype); - if (error || level == 0) - goto out; - /* - * Account for the block pointers in each of the indirect blocks - * in the levels below us. - */ - subblksperindir = blksperindir / NINDIR(fs); - for (lbn++, level--, i = 0; i < last; i++) { - error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn, - rlbn, remblks, subblksperindir, fs, acctfunc, expungetype); - if (error) - goto out; - rlbn += blksperindir; - lbn -= blksperindir; - remblks -= blksperindir; - } -out: - FREE(bap, M_DEVBUF); - return (error); -} - -/* - * Do both snap accounting and map accounting. - */ -static int -fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype) - struct vnode *vp; - ufs2_daddr_t *oldblkp, *lastblkp; - struct fs *fs; - ufs_lbn_t lblkno; - int exptype; /* BLK_SNAP or BLK_NOCOPY */ -{ - int error; - - if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype))) - return (error); - return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)); -} - -/* - * Identify a set of blocks allocated in a snapshot inode. - */ -static int -snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) - struct vnode *vp; - ufs2_daddr_t *oldblkp, *lastblkp; - struct fs *fs; - ufs_lbn_t lblkno; - int expungetype; /* BLK_SNAP or BLK_NOCOPY */ -{ - struct inode *ip = VTOI(vp); - ufs2_daddr_t blkno, *blkp; - ufs_lbn_t lbn; - struct buf *ibp; - int error; - - for ( ; oldblkp < lastblkp; oldblkp++) { - blkno = *oldblkp; - if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP) - continue; - lbn = fragstoblks(fs, blkno); - if (lbn < NDADDR) { - blkp = &ip->i_din2->di_db[lbn]; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } else { - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); - if (error) - return (error); - blkp = &((ufs2_daddr_t *)(ibp->b_data)) - [(lbn - NDADDR) % NINDIR(fs)]; - } - /* - * If we are expunging a snapshot vnode and we - * find a block marked BLK_NOCOPY, then it is - * one that has been allocated to this snapshot after - * we took our current snapshot and can be ignored. - */ - if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) { - if (lbn >= NDADDR) - brelse(ibp); - } else { - if (*blkp != 0) - panic("snapacct_ufs2: bad block"); - *blkp = expungetype; - if (lbn >= NDADDR) - bdwrite(ibp); - } - } - return (0); -} - -/* - * Account for a set of blocks allocated in a snapshot inode. - */ -static int -mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) - struct vnode *vp; - ufs2_daddr_t *oldblkp, *lastblkp; - struct fs *fs; - ufs_lbn_t lblkno; - int expungetype; -{ - ufs2_daddr_t blkno; - struct inode *ip; - ino_t inum; - int acctit; - - ip = VTOI(vp); - inum = ip->i_number; - if (lblkno == -1) - acctit = 0; - else - acctit = 1; - for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { - blkno = *oldblkp; - if (blkno == 0 || blkno == BLK_NOCOPY) - continue; - if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP) - *ip->i_snapblklist++ = lblkno; - if (blkno == BLK_SNAP) - blkno = blkstofrags(fs, lblkno); - ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); - } - return (0); -} - -/* - * Decrement extra reference on snapshot when last name is removed. - * It will not be freed until the last open reference goes away. - */ -void -ffs_snapgone(ip) - struct inode *ip; -{ - struct inode *xp; - struct fs *fs; - int snaploc; - - /* - * Find snapshot in incore list. - */ - TAILQ_FOREACH(xp, &ip->i_devvp->v_rdev->si_snapshots, i_nextsnap) - if (xp == ip) - break; - if (xp != NULL) - vrele(ITOV(ip)); - else if (snapdebug) - printf("ffs_snapgone: lost snapshot vnode %d\n", - ip->i_number); - /* - * Delete snapshot inode from superblock. Keep list dense. - */ - fs = ip->i_fs; - for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) - if (fs->fs_snapinum[snaploc] == ip->i_number) - break; - if (snaploc < FSMAXSNAP) { - for (snaploc++; snaploc < FSMAXSNAP; snaploc++) { - if (fs->fs_snapinum[snaploc] == 0) - break; - fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc]; - } - fs->fs_snapinum[snaploc - 1] = 0; - } -} - -/* - * Prepare a snapshot file for being removed. - */ -void -ffs_snapremove(vp) - struct vnode *vp; -{ - struct inode *ip; - struct vnode *devvp; - struct lock *lkp; - struct buf *ibp; - struct fs *fs; - struct thread *td = curthread; - ufs2_daddr_t numblks, blkno, dblk, *snapblklist; - int error, loc, last; - - ip = VTOI(vp); - fs = ip->i_fs; - devvp = ip->i_devvp; - /* - * If active, delete from incore list (this snapshot may - * already have been in the process of being deleted, so - * would not have been active). - * - * Clear copy-on-write flag if last snapshot. - */ - if (ip->i_nextsnap.tqe_prev != 0) { - VI_LOCK(devvp); - lockmgr(&vp->v_lock, LK_INTERLOCK | LK_EXCLUSIVE, - VI_MTX(devvp), td); - VI_LOCK(devvp); - TAILQ_REMOVE(&devvp->v_rdev->si_snapshots, ip, i_nextsnap); - ip->i_nextsnap.tqe_prev = 0; - lkp = vp->v_vnlock; - vp->v_vnlock = &vp->v_lock; - lockmgr(lkp, LK_RELEASE, NULL, td); - if (TAILQ_FIRST(&devvp->v_rdev->si_snapshots) != 0) { - VI_UNLOCK(devvp); - } else { - snapblklist = devvp->v_rdev->si_snapblklist; - devvp->v_rdev->si_snapblklist = 0; - devvp->v_rdev->si_snaplistsize = 0; - devvp->v_rdev->si_copyonwrite = 0; - devvp->v_vflag &= ~VV_COPYONWRITE; - lockmgr(lkp, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp), td); - lockmgr(lkp, LK_RELEASE, NULL, td); - lockdestroy(lkp); - FREE(lkp, M_UFSMNT); - FREE(snapblklist, M_UFSMNT); - } - } - /* - * Clear all BLK_NOCOPY fields. Pass any block claims to other - * snapshots that want them (see ffs_snapblkfree below). - */ - for (blkno = 1; blkno < NDADDR; blkno++) { - dblk = DIP(ip, i_db[blkno]); - if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) - DIP(ip, i_db[blkno]) = 0; - else if ((dblk == blkstofrags(fs, blkno) && - ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, - ip->i_number))) { - DIP(ip, i_blocks) -= btodb(fs->fs_bsize); - DIP(ip, i_db[blkno]) = 0; - } - } - numblks = howmany(ip->i_size, fs->fs_bsize); - for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) { - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno), - fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); - if (error) - continue; - if (fs->fs_size - blkno > NINDIR(fs)) - last = NINDIR(fs); - else - last = fs->fs_size - blkno; - for (loc = 0; loc < last; loc++) { - if (ip->i_ump->um_fstype == UFS1) { - dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc]; - if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) - ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; - else if ((dblk == blkstofrags(fs, blkno) && - ffs_snapblkfree(fs, ip->i_devvp, dblk, - fs->fs_bsize, ip->i_number))) { - ip->i_din1->di_blocks -= - btodb(fs->fs_bsize); - ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; - } - continue; - } - dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc]; - if (dblk == BLK_NOCOPY || dblk == BLK_SNAP) - ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; - else if ((dblk == blkstofrags(fs, blkno) && - ffs_snapblkfree(fs, ip->i_devvp, dblk, - fs->fs_bsize, ip->i_number))) { - ip->i_din2->di_blocks -= btodb(fs->fs_bsize); - ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; - } - } - bawrite(ibp); - } - /* - * Clear snapshot flag and drop reference. - */ - ip->i_flags &= ~SF_SNAPSHOT; - DIP(ip, i_flags) = ip->i_flags; - ip->i_flag |= IN_CHANGE | IN_UPDATE; -} - -/* - * Notification that a block is being freed. Return zero if the free - * should be allowed to proceed. Return non-zero if the snapshot file - * wants to claim the block. The block will be claimed if it is an - * uncopied part of one of the snapshots. It will be freed if it is - * either a BLK_NOCOPY or has already been copied in all of the snapshots. - * If a fragment is being freed, then all snapshots that care about - * it must make a copy since a snapshot file can only claim full sized - * blocks. Note that if more than one snapshot file maps the block, - * we can pick one at random to claim it. Since none of the snapshots - * can change, we are assurred that they will all see the same unmodified - * image. When deleting a snapshot file (see ffs_snapremove above), we - * must push any of these claimed blocks to one of the other snapshots - * that maps it. These claimed blocks are easily identified as they will - * have a block number equal to their logical block number within the - * snapshot. A copied block can never have this property because they - * must always have been allocated from a BLK_NOCOPY location. - */ -int -ffs_snapblkfree(fs, devvp, bno, size, inum) - struct fs *fs; - struct vnode *devvp; - ufs2_daddr_t bno; - long size; - ino_t inum; -{ - struct buf *ibp, *cbp, *savedcbp = 0; - struct thread *td = curthread; - struct inode *ip; - struct vnode *vp = NULL; - ufs_lbn_t lbn; - ufs2_daddr_t blkno; - int indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0; - struct snaphead *snaphead; - - lbn = fragstoblks(fs, bno); -retry: - VI_LOCK(devvp); - snaphead = &devvp->v_rdev->si_snapshots; - TAILQ_FOREACH(ip, snaphead, i_nextsnap) { - vp = ITOV(ip); - /* - * Lookup block being written. - */ - if (lbn < NDADDR) { - blkno = DIP(ip, i_db[lbn]); - } else { - if (snapshot_locked == 0 && - lockmgr(vp->v_vnlock, - LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, - VI_MTX(devvp), td) != 0) - goto retry; - snapshot_locked = 1; - td->td_pflags |= TDP_COWINPROGRESS; - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); - td->td_pflags &= ~TDP_COWINPROGRESS; - if (error) - break; - indiroff = (lbn - NDADDR) % NINDIR(fs); - if (ip->i_ump->um_fstype == UFS1) - blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; - else - blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; - } - /* - * Check to see if block needs to be copied. - */ - if (blkno == 0) { - /* - * A block that we map is being freed. If it has not - * been claimed yet, we will claim or copy it (below). - */ - claimedblk = 1; - } else if (blkno == BLK_SNAP) { - /* - * No previous snapshot claimed the block, - * so it will be freed and become a BLK_NOCOPY - * (don't care) for us. - */ - if (claimedblk) - panic("snapblkfree: inconsistent block type"); - if (snapshot_locked == 0 && - lockmgr(vp->v_vnlock, - LK_INTERLOCK | LK_EXCLUSIVE | LK_NOWAIT, - VI_MTX(devvp), td) != 0) { - if (lbn >= NDADDR) - bqrelse(ibp); - vn_lock(vp, LK_EXCLUSIVE | LK_SLEEPFAIL, td); - goto retry; - } - snapshot_locked = 1; - if (lbn < NDADDR) { - DIP(ip, i_db[lbn]) = BLK_NOCOPY; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } else if (ip->i_ump->um_fstype == UFS1) { - ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = - BLK_NOCOPY; - bdwrite(ibp); - } else { - ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = - BLK_NOCOPY; - bdwrite(ibp); - } - continue; - } else /* BLK_NOCOPY or default */ { - /* - * If the snapshot has already copied the block - * (default), or does not care about the block, - * it is not needed. - */ - if (lbn >= NDADDR) - bqrelse(ibp); - continue; - } - /* - * If this is a full size block, we will just grab it - * and assign it to the snapshot inode. Otherwise we - * will proceed to copy it. See explanation for this - * routine as to why only a single snapshot needs to - * claim this block. - */ - if (snapshot_locked == 0 && - lockmgr(vp->v_vnlock, - LK_INTERLOCK | LK_EXCLUSIVE | LK_NOWAIT, - VI_MTX(devvp), td) != 0) { - if (lbn >= NDADDR) - bqrelse(ibp); - vn_lock(vp, LK_EXCLUSIVE | LK_SLEEPFAIL, td); - goto retry; - } - snapshot_locked = 1; - if (size == fs->fs_bsize) { -#ifdef DEBUG - if (snapdebug) - printf("%s %d lbn %jd from inum %d\n", - "Grabonremove: snapino", ip->i_number, - (intmax_t)lbn, inum); -#endif - if (lbn < NDADDR) { - DIP(ip, i_db[lbn]) = bno; - } else if (ip->i_ump->um_fstype == UFS1) { - ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno; - bdwrite(ibp); - } else { - ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno; - bdwrite(ibp); - } - DIP(ip, i_blocks) += btodb(size); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - VOP_UNLOCK(vp, 0, td); - return (1); - } - if (lbn >= NDADDR) - bqrelse(ibp); - /* - * Allocate the block into which to do the copy. Note that this - * allocation will never require any additional allocations for - * the snapshot inode. - */ - td->td_pflags |= TDP_COWINPROGRESS; - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, 0, &cbp); - td->td_pflags &= ~TDP_COWINPROGRESS; - if (error) - break; -#ifdef DEBUG - if (snapdebug) - printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n", - "Copyonremove: snapino ", ip->i_number, - (intmax_t)lbn, "for inum", inum, size, - (intmax_t)cbp->b_blkno); -#endif - /* - * If we have already read the old block contents, then - * simply copy them to the new block. Note that we need - * to synchronously write snapshots that have not been - * unlinked, and hence will be visible after a crash, - * to ensure their integrity. - */ - if (savedcbp != 0) { - bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); - bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) - (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); - continue; - } - /* - * Otherwise, read the old block contents into the buffer. - */ - if ((error = readblock(cbp, lbn)) != 0) { - bzero(cbp->b_data, fs->fs_bsize); - bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) - (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); - break; - } - savedcbp = cbp; - } - /* - * Note that we need to synchronously write snapshots that - * have not been unlinked, and hence will be visible after - * a crash, to ensure their integrity. - */ - if (savedcbp) { - vp = savedcbp->b_vp; - bawrite(savedcbp); - if (dopersistence && VTOI(vp)->i_effnlink > 0) - (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); - } - /* - * If we have been unable to allocate a block in which to do - * the copy, then return non-zero so that the fragment will - * not be freed. Although space will be lost, the snapshot - * will stay consistent. - */ - if (snapshot_locked) - VOP_UNLOCK(vp, 0, td); - else - VI_UNLOCK(devvp); - return (error); -} - -/* - * Associate snapshot files when mounting. - */ -void -ffs_snapshot_mount(mp) - struct mount *mp; -{ - struct ufsmount *ump = VFSTOUFS(mp); - struct vnode *devvp = ump->um_devvp; - struct fs *fs = ump->um_fs; - struct thread *td = curthread; - struct snaphead *snaphead; - struct vnode *vp; - struct inode *ip, *xp; - struct uio auio; - struct iovec aiov; - void *snapblklist; - char *reason; - daddr_t snaplistsize; - int error, snaploc, loc; - - /* - * XXX The following needs to be set before UFS_TRUNCATE or - * VOP_READ can be called. - */ - mp->mnt_stat.f_iosize = fs->fs_bsize; - /* - * Process each snapshot listed in the superblock. - */ - vp = NULL; - snaphead = &devvp->v_rdev->si_snapshots; - for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { - if (fs->fs_snapinum[snaploc] == 0) - break; - if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc], - LK_EXCLUSIVE, &vp)) != 0){ - printf("ffs_snapshot_mount: vget failed %d\n", error); - continue; - } - ip = VTOI(vp); - if ((ip->i_flags & SF_SNAPSHOT) == 0 || ip->i_size == - lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) { - if ((ip->i_flags & SF_SNAPSHOT) == 0) { - reason = "non-snapshot"; - } else { - reason = "old format snapshot"; - (void)UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td); - (void)VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); - } - printf("ffs_snapshot_mount: %s inode %d\n", - reason, fs->fs_snapinum[snaploc]); - vput(vp); - vp = NULL; - for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { - if (fs->fs_snapinum[loc] == 0) - break; - fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc]; - } - fs->fs_snapinum[loc - 1] = 0; - snaploc--; - continue; - } - /* - * If there already exist snapshots on this filesystem, grab a - * reference to their shared lock. If this is the first snapshot - * on this filesystem, we need to allocate a lock for the - * snapshots to share. In either case, acquire the snapshot - * lock and give up our original private lock. - */ - VI_LOCK(devvp); - if ((xp = TAILQ_FIRST(snaphead)) != NULL) { - VI_LOCK(vp); - vp->v_vnlock = ITOV(xp)->v_vnlock; - VI_UNLOCK(devvp); - } else { - struct lock *lkp; - - VI_UNLOCK(devvp); - MALLOC(lkp, struct lock *, sizeof(struct lock), - M_UFSMNT, M_WAITOK); - lockinit(lkp, PVFS, "snaplk", VLKTIMEOUT, - LK_CANRECURSE | LK_NOPAUSE); - VI_LOCK(vp); - vp->v_vnlock = lkp; - } - vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, td); - transferlockers(&vp->v_lock, vp->v_vnlock); - lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); - /* - * Link it onto the active snapshot list. - */ - VI_LOCK(devvp); - if (ip->i_nextsnap.tqe_prev != 0) - panic("ffs_snapshot_mount: %d already on list", - ip->i_number); - else - TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); - vp->v_vflag |= VV_SYSTEM; - VI_UNLOCK(devvp); - VOP_UNLOCK(vp, 0, td); - } - /* - * No usable snapshots found. - */ - if (vp == NULL) - return; - /* - * Allocate the space for the block hints list. We always want to - * use the list from the newest snapshot. - */ - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = (void *)&snaplistsize; - aiov.iov_len = sizeof(snaplistsize); - auio.uio_resid = aiov.iov_len; - auio.uio_offset = - lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_READ; - auio.uio_td = td; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { - printf("ffs_snapshot_mount: read_1 failed %d\n", error); - VOP_UNLOCK(vp, 0, td); - return; - } - MALLOC(snapblklist, void *, snaplistsize * sizeof(daddr_t), - M_UFSMNT, M_WAITOK); - auio.uio_iovcnt = 1; - aiov.iov_base = snapblklist; - aiov.iov_len = snaplistsize * sizeof (daddr_t); - auio.uio_resid = aiov.iov_len; - auio.uio_offset -= sizeof(snaplistsize); - if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { - printf("ffs_snapshot_mount: read_2 failed %d\n", error); - VOP_UNLOCK(vp, 0, td); - FREE(snapblklist, M_UFSMNT); - return; - } - VOP_UNLOCK(vp, 0, td); - VI_LOCK(devvp); - ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); - devvp->v_rdev->si_snaplistsize = snaplistsize; - devvp->v_rdev->si_snapblklist = (daddr_t *)snapblklist; - devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; - devvp->v_vflag |= VV_COPYONWRITE; - VI_UNLOCK(devvp); -} - -/* - * Disassociate snapshot files when unmounting. - */ -void -ffs_snapshot_unmount(mp) - struct mount *mp; -{ - struct vnode *devvp = VFSTOUFS(mp)->um_devvp; - struct snaphead *snaphead = &devvp->v_rdev->si_snapshots; - struct lock *lkp = NULL; - struct inode *xp; - struct vnode *vp; - - VI_LOCK(devvp); - while ((xp = TAILQ_FIRST(snaphead)) != 0) { - vp = ITOV(xp); - lkp = vp->v_vnlock; - vp->v_vnlock = &vp->v_lock; - TAILQ_REMOVE(snaphead, xp, i_nextsnap); - xp->i_nextsnap.tqe_prev = 0; - if (xp->i_effnlink > 0) { - VI_UNLOCK(devvp); - vrele(vp); - VI_LOCK(devvp); - } - } - if (devvp->v_rdev->si_snapblklist != NULL) { - FREE(devvp->v_rdev->si_snapblklist, M_UFSMNT); - devvp->v_rdev->si_snapblklist = NULL; - devvp->v_rdev->si_snaplistsize = 0; - } - if (lkp != NULL) { - lockdestroy(lkp); - FREE(lkp, M_UFSMNT); - } - ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount"); - devvp->v_rdev->si_copyonwrite = 0; - devvp->v_vflag &= ~VV_COPYONWRITE; - VI_UNLOCK(devvp); -} - -/* - * Check for need to copy block that is about to be written, - * copying the block if necessary. - */ -static int -ffs_copyonwrite(devvp, bp) - struct vnode *devvp; - struct buf *bp; -{ - struct snaphead *snaphead; - struct buf *ibp, *cbp, *savedcbp = 0; - struct thread *td = curthread; - struct fs *fs; - struct inode *ip; - struct vnode *vp = 0; - ufs2_daddr_t lbn, blkno, *snapblklist; - int lower, upper, mid, indiroff, snapshot_locked = 0, error = 0; - - if (td->td_pflags & TDP_COWINPROGRESS) - panic("ffs_copyonwrite: recursive call"); - /* - * First check to see if it is in the preallocated list. - * By doing this check we avoid several potential deadlocks. - */ - VI_LOCK(devvp); - snaphead = &devvp->v_rdev->si_snapshots; - ip = TAILQ_FIRST(snaphead); - fs = ip->i_fs; - lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); - snapblklist = devvp->v_rdev->si_snapblklist; - upper = devvp->v_rdev->si_snaplistsize - 1; - lower = 1; - while (lower <= upper) { - mid = (lower + upper) / 2; - if (snapblklist[mid] == lbn) - break; - if (snapblklist[mid] < lbn) - lower = mid + 1; - else - upper = mid - 1; - } - if (lower <= upper) { - VI_UNLOCK(devvp); - return (0); - } - /* - * Not in the precomputed list, so check the snapshots. - */ -retry: - TAILQ_FOREACH(ip, snaphead, i_nextsnap) { - vp = ITOV(ip); - /* - * We ensure that everything of our own that needs to be - * copied will be done at the time that ffs_snapshot is - * called. Thus we can skip the check here which can - * deadlock in doing the lookup in UFS_BALLOC. - */ - if (bp->b_vp == vp) - continue; - /* - * Check to see if block needs to be copied. We do not have - * to hold the snapshot lock while doing this lookup as it - * will never require any additional allocations for the - * snapshot inode. - */ - if (lbn < NDADDR) { - blkno = DIP(ip, i_db[lbn]); - } else { - if (snapshot_locked == 0 && - lockmgr(vp->v_vnlock, - LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, - VI_MTX(devvp), td) != 0) { - VI_LOCK(devvp); - goto retry; - } - snapshot_locked = 1; - td->td_pflags |= TDP_COWINPROGRESS; - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); - td->td_pflags &= ~TDP_COWINPROGRESS; - if (error) - break; - indiroff = (lbn - NDADDR) % NINDIR(fs); - if (ip->i_ump->um_fstype == UFS1) - blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff]; - else - blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff]; - bqrelse(ibp); - } -#ifdef DIAGNOSTIC - if (blkno == BLK_SNAP && bp->b_lblkno >= 0) - panic("ffs_copyonwrite: bad copy block"); -#endif - if (blkno != 0) - continue; - /* - * Allocate the block into which to do the copy. Since - * multiple processes may all try to copy the same block, - * we have to recheck our need to do a copy if we sleep - * waiting for the lock. - * - * Because all snapshots on a filesystem share a single - * lock, we ensure that we will never be in competition - * with another process to allocate a block. - */ - if (snapshot_locked == 0 && - lockmgr(vp->v_vnlock, - LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, - VI_MTX(devvp), td) != 0) { - VI_LOCK(devvp); - goto retry; - } - snapshot_locked = 1; - td->td_pflags |= TDP_COWINPROGRESS; - error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), - fs->fs_bsize, KERNCRED, 0, &cbp); - td->td_pflags &= ~TDP_COWINPROGRESS; - if (error) - break; -#ifdef DEBUG - if (snapdebug) { - printf("Copyonwrite: snapino %d lbn %jd for ", - ip->i_number, (intmax_t)lbn); - if (bp->b_vp == devvp) - printf("fs metadata"); - else - printf("inum %d", VTOI(bp->b_vp)->i_number); - printf(" lblkno %jd to blkno %jd\n", - (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno); - } -#endif - /* - * If we have already read the old block contents, then - * simply copy them to the new block. Note that we need - * to synchronously write snapshots that have not been - * unlinked, and hence will be visible after a crash, - * to ensure their integrity. - */ - if (savedcbp != 0) { - bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); - bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) - (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); - continue; - } - /* - * Otherwise, read the old block contents into the buffer. - */ - if ((error = readblock(cbp, lbn)) != 0) { - bzero(cbp->b_data, fs->fs_bsize); - bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) - (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); - break; - } - savedcbp = cbp; - } - /* - * Note that we need to synchronously write snapshots that - * have not been unlinked, and hence will be visible after - * a crash, to ensure their integrity. - */ - if (savedcbp) { - vp = savedcbp->b_vp; - bawrite(savedcbp); - if (dopersistence && VTOI(vp)->i_effnlink > 0) - (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td); - } - if (snapshot_locked) - VOP_UNLOCK(vp, 0, td); - else - VI_UNLOCK(devvp); - return (error); -} - -/* - * Read the specified block into the given buffer. - * Much of this boiler-plate comes from bwrite(). - */ -static int -readblock(bp, lbn) - struct buf *bp; - ufs2_daddr_t lbn; -{ - struct uio auio; - struct iovec aiov; - struct thread *td = curthread; - struct inode *ip = VTOI(bp->b_vp); - - aiov.iov_base = bp->b_data; - aiov.iov_len = bp->b_bcount; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn))); - auio.uio_resid = bp->b_bcount; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_td = td; - return (physio(ip->i_devvp->v_rdev, &auio, 0)); -} -#endif diff --git a/src/sys/ufs/ffs/ffs_softdep.c b/src/sys/ufs/ffs/ffs_softdep.c deleted file mode 100644 index 844008a..0000000 --- a/src/sys/ufs/ffs/ffs_softdep.c +++ /dev/null @@ -1,5935 +0,0 @@ -#if 0 -/* - * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved. - * - * The soft updates code is derived from the appendix of a University - * of Michigan technical report (Gregory R. Ganger and Yale N. Patt, - * "Soft Updates: A Solution to the Metadata Update Problem in File - * Systems", CSE-TR-254-95, August 1995). - * - * Further information about soft updates can be obtained from: - * - * Marshall Kirk McKusick http://www.mckusick.com/softdep/ - * 1614 Oxford Street mckusick@mckusick.com - * Berkeley, CA 94709-1608 +1-510-843-9542 - * USA - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_softdep.c,v 1.149 2003/10/23 21:14:08 jhb Exp $"); - -/* - * For now we want the safety net that the DIAGNOSTIC and DEBUG flags provide. - */ -#ifndef DIAGNOSTIC -#define DIAGNOSTIC -#endif -#ifndef DEBUG -#define DEBUG -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * These definitions need to be adapted to the system to which - * this file is being ported. - */ -/* - * malloc types defined for the softdep system. - */ -static MALLOC_DEFINE(M_PAGEDEP, "pagedep","File page dependencies"); -static MALLOC_DEFINE(M_INODEDEP, "inodedep","Inode dependencies"); -static MALLOC_DEFINE(M_NEWBLK, "newblk","New block allocation"); -static MALLOC_DEFINE(M_BMSAFEMAP, "bmsafemap","Block or frag allocated from cyl group map"); -static MALLOC_DEFINE(M_ALLOCDIRECT, "allocdirect","Block or frag dependency for an inode"); -static MALLOC_DEFINE(M_INDIRDEP, "indirdep","Indirect block dependencies"); -static MALLOC_DEFINE(M_ALLOCINDIR, "allocindir","Block dependency for an indirect block"); -static MALLOC_DEFINE(M_FREEFRAG, "freefrag","Previously used frag for an inode"); -static MALLOC_DEFINE(M_FREEBLKS, "freeblks","Blocks freed from an inode"); -static MALLOC_DEFINE(M_FREEFILE, "freefile","Inode deallocated"); -static MALLOC_DEFINE(M_DIRADD, "diradd","New directory entry"); -static MALLOC_DEFINE(M_MKDIR, "mkdir","New directory"); -static MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted"); -static MALLOC_DEFINE(M_NEWDIRBLK, "newdirblk","Unclaimed new directory block"); - -#define M_SOFTDEP_FLAGS (M_WAITOK | M_USE_RESERVE) - -#define D_PAGEDEP 0 -#define D_INODEDEP 1 -#define D_NEWBLK 2 -#define D_BMSAFEMAP 3 -#define D_ALLOCDIRECT 4 -#define D_INDIRDEP 5 -#define D_ALLOCINDIR 6 -#define D_FREEFRAG 7 -#define D_FREEBLKS 8 -#define D_FREEFILE 9 -#define D_DIRADD 10 -#define D_MKDIR 11 -#define D_DIRREM 12 -#define D_NEWDIRBLK 13 -#define D_LAST D_NEWDIRBLK - -/* - * translate from workitem type to memory type - * MUST match the defines above, such that memtype[D_XXX] == M_XXX - */ -static struct malloc_type *memtype[] = { - M_PAGEDEP, - M_INODEDEP, - M_NEWBLK, - M_BMSAFEMAP, - M_ALLOCDIRECT, - M_INDIRDEP, - M_ALLOCINDIR, - M_FREEFRAG, - M_FREEBLKS, - M_FREEFILE, - M_DIRADD, - M_MKDIR, - M_DIRREM, - M_NEWDIRBLK -}; - -#define DtoM(type) (memtype[type]) - -/* - * Names of malloc types. - */ -#define TYPENAME(type) \ - ((unsigned)(type) < D_LAST ? memtype[type]->ks_shortdesc : "???") -/* - * End system adaptaion definitions. - */ - -/* - * Internal function prototypes. - */ -static void softdep_error(char *, int); -static void drain_output(struct vnode *, int); -static struct buf *getdirtybuf(struct buf **, struct mtx *, int); -static void clear_remove(struct thread *); -static void clear_inodedeps(struct thread *); -static int flush_pagedep_deps(struct vnode *, struct mount *, - struct diraddhd *); -static int flush_inodedep_deps(struct fs *, ino_t); -static int flush_deplist(struct allocdirectlst *, int, int *); -static int handle_written_filepage(struct pagedep *, struct buf *); -static void diradd_inode_written(struct diradd *, struct inodedep *); -static int handle_written_inodeblock(struct inodedep *, struct buf *); -static void handle_allocdirect_partdone(struct allocdirect *); -static void handle_allocindir_partdone(struct allocindir *); -static void initiate_write_filepage(struct pagedep *, struct buf *); -static void handle_written_mkdir(struct mkdir *, int); -static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *); -static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *); -static void handle_workitem_freefile(struct freefile *); -static void handle_workitem_remove(struct dirrem *, struct vnode *); -static struct dirrem *newdirrem(struct buf *, struct inode *, - struct inode *, int, struct dirrem **); -static void free_diradd(struct diradd *); -static void free_allocindir(struct allocindir *, struct inodedep *); -static void free_newdirblk(struct newdirblk *); -static int indir_trunc(struct freeblks *, ufs2_daddr_t, int, ufs_lbn_t, - ufs2_daddr_t *); -static void deallocate_dependencies(struct buf *, struct inodedep *); -static void free_allocdirect(struct allocdirectlst *, - struct allocdirect *, int); -static int check_inode_unwritten(struct inodedep *); -static int free_inodedep(struct inodedep *); -static void handle_workitem_freeblocks(struct freeblks *, int); -static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *); -static void setup_allocindir_phase2(struct buf *, struct inode *, - struct allocindir *); -static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t, - ufs2_daddr_t); -static void handle_workitem_freefrag(struct freefrag *); -static struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long); -static void allocdirect_merge(struct allocdirectlst *, - struct allocdirect *, struct allocdirect *); -static struct bmsafemap *bmsafemap_lookup(struct buf *); -static int newblk_lookup(struct fs *, ufs2_daddr_t, int, struct newblk **); -static int inodedep_lookup(struct fs *, ino_t, int, struct inodedep **); -static int pagedep_lookup(struct inode *, ufs_lbn_t, int, struct pagedep **); -static void pause_timer(void *); -static int request_cleanup(int, int); -static int process_worklist_item(struct mount *, int); -static void add_to_worklist(struct worklist *); - -/* - * Exported softdep operations. - */ -static void softdep_disk_io_initiation(struct buf *); -static void softdep_disk_write_complete(struct buf *); -static void softdep_deallocate_dependencies(struct buf *); -static void softdep_move_dependencies(struct buf *, struct buf *); -static int softdep_count_dependencies(struct buf *bp, int); - -/* - * Locking primitives. - * - * For a uniprocessor, all we need to do is protect against disk - * interrupts. For a multiprocessor, this lock would have to be - * a mutex. A single mutex is used throughout this file, though - * finer grain locking could be used if contention warranted it. - * - * For a multiprocessor, the sleep call would accept a lock and - * release it after the sleep processing was complete. In a uniprocessor - * implementation there is no such interlock, so we simple mark - * the places where it needs to be done with the `interlocked' form - * of the lock calls. Since the uniprocessor sleep already interlocks - * the spl, there is nothing that really needs to be done. - */ -#ifndef /* NOT */ DEBUG -static struct lockit { - int lkt_spl; -} lk = { 0 }; -#define ACQUIRE_LOCK(lk) (lk)->lkt_spl = splbio() -#define FREE_LOCK(lk) splx((lk)->lkt_spl) - -#else /* DEBUG */ -#define NOHOLDER ((struct thread *)-1) -#define SPECIAL_FLAG ((struct thread *)-2) -static struct lockit { - int lkt_spl; - struct thread *lkt_held; -} lk = { 0, NOHOLDER }; - -static void acquire_lock(struct lockit *); -static void free_lock(struct lockit *); -void softdep_panic(char *); - -#define ACQUIRE_LOCK(lk) acquire_lock(lk) -#define FREE_LOCK(lk) free_lock(lk) - -static void -acquire_lock(lk) - struct lockit *lk; -{ - struct thread *holder; - - if (lk->lkt_held != NOHOLDER) { - holder = lk->lkt_held; - FREE_LOCK(lk); - if (holder == curthread) - panic("softdep_lock: locking against myself"); - else - panic("softdep_lock: lock held by %p", holder); - } - lk->lkt_spl = splbio(); - lk->lkt_held = curthread; -} - -static void -free_lock(lk) - struct lockit *lk; -{ - - if (lk->lkt_held == NOHOLDER) - panic("softdep_unlock: lock not held"); - lk->lkt_held = NOHOLDER; - splx(lk->lkt_spl); -} - -/* - * Function to release soft updates lock and panic. - */ -void -softdep_panic(msg) - char *msg; -{ - - if (lk.lkt_held != NOHOLDER) - FREE_LOCK(&lk); - panic(msg); -} -#endif /* DEBUG */ - -static int interlocked_sleep(struct lockit *, int, void *, struct mtx *, int, - const char *, int); - -/* - * When going to sleep, we must save our SPL so that it does - * not get lost if some other process uses the lock while we - * are sleeping. We restore it after we have slept. This routine - * wraps the interlocking with functions that sleep. The list - * below enumerates the available set of operations. - */ -#define UNKNOWN 0 -#define SLEEP 1 -#define LOCKBUF 2 - -static int -interlocked_sleep(lk, op, ident, mtx, flags, wmesg, timo) - struct lockit *lk; - int op; - void *ident; - struct mtx *mtx; - int flags; - const char *wmesg; - int timo; -{ - struct thread *holder; - int s, retval; - - s = lk->lkt_spl; -# ifdef DEBUG - if (lk->lkt_held == NOHOLDER) - panic("interlocked_sleep: lock not held"); - lk->lkt_held = NOHOLDER; -# endif /* DEBUG */ - switch (op) { - case SLEEP: - retval = msleep(ident, mtx, flags, wmesg, timo); - break; - case LOCKBUF: - retval = BUF_LOCK((struct buf *)ident, flags, mtx); - break; - default: - panic("interlocked_sleep: unknown operation"); - } -# ifdef DEBUG - if (lk->lkt_held != NOHOLDER) { - holder = lk->lkt_held; - FREE_LOCK(lk); - if (holder == curthread) - panic("interlocked_sleep: locking against self"); - else - panic("interlocked_sleep: lock held by %p", holder); - } - lk->lkt_held = curthread; -# endif /* DEBUG */ - lk->lkt_spl = s; - return (retval); -} - -/* - * Place holder for real semaphores. - */ -struct sema { - int value; - struct thread *holder; - char *name; - int prio; - int timo; -}; -static void sema_init(struct sema *, char *, int, int); -static int sema_get(struct sema *, struct lockit *); -static void sema_release(struct sema *); - -static void -sema_init(semap, name, prio, timo) - struct sema *semap; - char *name; - int prio, timo; -{ - - semap->holder = NOHOLDER; - semap->value = 0; - semap->name = name; - semap->prio = prio; - semap->timo = timo; -} - -static int -sema_get(semap, interlock) - struct sema *semap; - struct lockit *interlock; -{ - - if (semap->value++ > 0) { - if (interlock != NULL) { - interlocked_sleep(interlock, SLEEP, (caddr_t)semap, - NULL, semap->prio, semap->name, - semap->timo); - FREE_LOCK(interlock); - } else { - tsleep(semap, semap->prio, semap->name, - semap->timo); - } - return (0); - } - semap->holder = curthread; - if (interlock != NULL) - FREE_LOCK(interlock); - return (1); -} - -static void -sema_release(semap) - struct sema *semap; -{ - - if (semap->value <= 0 || semap->holder != curthread) { - if (lk.lkt_held != NOHOLDER) - FREE_LOCK(&lk); - panic("sema_release: not held"); - } - if (--semap->value > 0) { - semap->value = 0; - wakeup(semap); - } - semap->holder = NOHOLDER; -} - -/* - * Worklist queue management. - * These routines require that the lock be held. - */ -#ifndef /* NOT */ DEBUG -#define WORKLIST_INSERT(head, item) do { \ - (item)->wk_state |= ONWORKLIST; \ - LIST_INSERT_HEAD(head, item, wk_list); \ -} while (0) -#define WORKLIST_REMOVE(item) do { \ - (item)->wk_state &= ~ONWORKLIST; \ - LIST_REMOVE(item, wk_list); \ -} while (0) -#define WORKITEM_FREE(item, type) FREE(item, DtoM(type)) - -#else /* DEBUG */ -static void worklist_insert(struct workhead *, struct worklist *); -static void worklist_remove(struct worklist *); -static void workitem_free(struct worklist *, int); - -#define WORKLIST_INSERT(head, item) worklist_insert(head, item) -#define WORKLIST_REMOVE(item) worklist_remove(item) -#define WORKITEM_FREE(item, type) workitem_free((struct worklist *)item, type) - -static void -worklist_insert(head, item) - struct workhead *head; - struct worklist *item; -{ - - if (lk.lkt_held == NOHOLDER) - panic("worklist_insert: lock not held"); - if (item->wk_state & ONWORKLIST) { - FREE_LOCK(&lk); - panic("worklist_insert: already on list"); - } - item->wk_state |= ONWORKLIST; - LIST_INSERT_HEAD(head, item, wk_list); -} - -static void -worklist_remove(item) - struct worklist *item; -{ - - if (lk.lkt_held == NOHOLDER) - panic("worklist_remove: lock not held"); - if ((item->wk_state & ONWORKLIST) == 0) { - FREE_LOCK(&lk); - panic("worklist_remove: not on list"); - } - item->wk_state &= ~ONWORKLIST; - LIST_REMOVE(item, wk_list); -} - -static void -workitem_free(item, type) - struct worklist *item; - int type; -{ - - if (item->wk_state & ONWORKLIST) { - if (lk.lkt_held != NOHOLDER) - FREE_LOCK(&lk); - panic("workitem_free: still on list"); - } - if (item->wk_type != type) { - if (lk.lkt_held != NOHOLDER) - FREE_LOCK(&lk); - panic("workitem_free: type mismatch"); - } - FREE(item, DtoM(type)); -} -#endif /* DEBUG */ - -/* - * Workitem queue management - */ -static struct workhead softdep_workitem_pending; -static struct worklist *worklist_tail; -static int num_on_worklist; /* number of worklist items to be processed */ -static int softdep_worklist_busy; /* 1 => trying to do unmount */ -static int softdep_worklist_req; /* serialized waiters */ -static int max_softdeps; /* maximum number of structs before slowdown */ -static int maxindirdeps = 50; /* max number of indirdeps before slowdown */ -static int tickdelay = 2; /* number of ticks to pause during slowdown */ -static int proc_waiting; /* tracks whether we have a timeout posted */ -static int *stat_countp; /* statistic to count in proc_waiting timeout */ -static struct callout_handle handle; /* handle on posted proc_waiting timeout */ -static struct thread *filesys_syncer; /* proc of filesystem syncer process */ -static int req_clear_inodedeps; /* syncer process flush some inodedeps */ -#define FLUSH_INODES 1 -static int req_clear_remove; /* syncer process flush some freeblks */ -#define FLUSH_REMOVE 2 -#define FLUSH_REMOVE_WAIT 3 -/* - * runtime statistics - */ -static int stat_worklist_push; /* number of worklist cleanups */ -static int stat_blk_limit_push; /* number of times block limit neared */ -static int stat_ino_limit_push; /* number of times inode limit neared */ -static int stat_blk_limit_hit; /* number of times block slowdown imposed */ -static int stat_ino_limit_hit; /* number of times inode slowdown imposed */ -static int stat_sync_limit_hit; /* number of synchronous slowdowns imposed */ -static int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */ -static int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */ -static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */ -static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */ -#ifdef DEBUG -#include -#include -SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, maxindirdeps, CTLFLAG_RW, &maxindirdeps, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, worklist_push, CTLFLAG_RW, &stat_worklist_push, 0,""); -SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,""); -SYSCTL_INT(_debug, OID_AUTO, ino_limit_push, CTLFLAG_RW, &stat_ino_limit_push, 0,""); -SYSCTL_INT(_debug, OID_AUTO, blk_limit_hit, CTLFLAG_RW, &stat_blk_limit_hit, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, ino_limit_hit, CTLFLAG_RW, &stat_ino_limit_hit, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, sync_limit_hit, CTLFLAG_RW, &stat_sync_limit_hit, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, ""); -#endif /* DEBUG */ - -/* - * Add an item to the end of the work queue. - * This routine requires that the lock be held. - * This is the only routine that adds items to the list. - * The following routine is the only one that removes items - * and does so in order from first to last. - */ -static void -add_to_worklist(wk) - struct worklist *wk; -{ - - if (wk->wk_state & ONWORKLIST) { - if (lk.lkt_held != NOHOLDER) - FREE_LOCK(&lk); - panic("add_to_worklist: already on list"); - } - wk->wk_state |= ONWORKLIST; - if (LIST_FIRST(&softdep_workitem_pending) == NULL) - LIST_INSERT_HEAD(&softdep_workitem_pending, wk, wk_list); - else - LIST_INSERT_AFTER(worklist_tail, wk, wk_list); - worklist_tail = wk; - num_on_worklist += 1; -} - -/* - * Process that runs once per second to handle items in the background queue. - * - * Note that we ensure that everything is done in the order in which they - * appear in the queue. The code below depends on this property to ensure - * that blocks of a file are freed before the inode itself is freed. This - * ordering ensures that no new triples will be generated - * until all the old ones have been purged from the dependency lists. - */ -int -softdep_process_worklist(matchmnt) - struct mount *matchmnt; -{ - struct thread *td = curthread; - int cnt, matchcnt, loopcount; - long starttime; - - /* - * Record the process identifier of our caller so that we can give - * this process preferential treatment in request_cleanup below. - */ - filesys_syncer = td; - matchcnt = 0; - - /* - * There is no danger of having multiple processes run this - * code, but we have to single-thread it when softdep_flushfiles() - * is in operation to get an accurate count of the number of items - * related to its mount point that are in the list. - */ - if (matchmnt == NULL) { - if (softdep_worklist_busy < 0) - return(-1); - softdep_worklist_busy += 1; - } - - /* - * If requested, try removing inode or removal dependencies. - */ - if (req_clear_inodedeps) { - clear_inodedeps(td); - req_clear_inodedeps -= 1; - wakeup_one(&proc_waiting); - } - if (req_clear_remove) { - clear_remove(td); - req_clear_remove -= 1; - wakeup_one(&proc_waiting); - } - loopcount = 1; - starttime = time_second; - while (num_on_worklist > 0) { - if ((cnt = process_worklist_item(matchmnt, 0)) == -1) - break; - else - matchcnt += cnt; - - /* - * If a umount operation wants to run the worklist - * accurately, abort. - */ - if (softdep_worklist_req && matchmnt == NULL) { - matchcnt = -1; - break; - } - - /* - * If requested, try removing inode or removal dependencies. - */ - if (req_clear_inodedeps) { - clear_inodedeps(td); - req_clear_inodedeps -= 1; - wakeup_one(&proc_waiting); - } - if (req_clear_remove) { - clear_remove(td); - req_clear_remove -= 1; - wakeup_one(&proc_waiting); - } - /* - * We do not generally want to stop for buffer space, but if - * we are really being a buffer hog, we will stop and wait. - */ - if (loopcount++ % 128 == 0) - bwillwrite(); - /* - * Never allow processing to run for more than one - * second. Otherwise the other syncer tasks may get - * excessively backlogged. - */ - if (starttime != time_second && matchmnt == NULL) { - matchcnt = -1; - break; - } - } - if (matchmnt == NULL) { - softdep_worklist_busy -= 1; - if (softdep_worklist_req && softdep_worklist_busy == 0) - wakeup(&softdep_worklist_req); - } - return (matchcnt); -} - -/* - * Process one item on the worklist. - */ -static int -process_worklist_item(matchmnt, flags) - struct mount *matchmnt; - int flags; -{ - struct worklist *wk, *wkend; - struct mount *mp; - struct vnode *vp; - int matchcnt = 0; - - /* - * If we are being called because of a process doing a - * copy-on-write, then it is not safe to write as we may - * recurse into the copy-on-write routine. - */ - if (curthread->td_pflags & TDP_COWINPROGRESS) - return (-1); - ACQUIRE_LOCK(&lk); - /* - * Normally we just process each item on the worklist in order. - * However, if we are in a situation where we cannot lock any - * inodes, we have to skip over any dirrem requests whose - * vnodes are resident and locked. - */ - vp = NULL; - LIST_FOREACH(wk, &softdep_workitem_pending, wk_list) { - if (wk->wk_state & INPROGRESS) - continue; - if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM) - break; - wk->wk_state |= INPROGRESS; - FREE_LOCK(&lk); - VFS_VGET(WK_DIRREM(wk)->dm_mnt, WK_DIRREM(wk)->dm_oldinum, - LK_NOWAIT | LK_EXCLUSIVE, &vp); - ACQUIRE_LOCK(&lk); - wk->wk_state &= ~INPROGRESS; - if (vp != NULL) - break; - } - if (wk == 0) { - FREE_LOCK(&lk); - return (-1); - } - /* - * Remove the item to be processed. If we are removing the last - * item on the list, we need to recalculate the tail pointer. - * As this happens rarely and usually when the list is short, - * we just run down the list to find it rather than tracking it - * in the above loop. - */ - WORKLIST_REMOVE(wk); - if (wk == worklist_tail) { - LIST_FOREACH(wkend, &softdep_workitem_pending, wk_list) - if (LIST_NEXT(wkend, wk_list) == NULL) - break; - worklist_tail = wkend; - } - num_on_worklist -= 1; - FREE_LOCK(&lk); - switch (wk->wk_type) { - - case D_DIRREM: - /* removal of a directory entry */ - mp = WK_DIRREM(wk)->dm_mnt; - if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) - panic("%s: dirrem on suspended filesystem", - "process_worklist_item"); - if (mp == matchmnt) - matchcnt += 1; - handle_workitem_remove(WK_DIRREM(wk), vp); - break; - - case D_FREEBLKS: - /* releasing blocks and/or fragments from a file */ - mp = WK_FREEBLKS(wk)->fb_mnt; - if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) - panic("%s: freeblks on suspended filesystem", - "process_worklist_item"); - if (mp == matchmnt) - matchcnt += 1; - handle_workitem_freeblocks(WK_FREEBLKS(wk), flags & LK_NOWAIT); - break; - - case D_FREEFRAG: - /* releasing a fragment when replaced as a file grows */ - mp = WK_FREEFRAG(wk)->ff_mnt; - if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) - panic("%s: freefrag on suspended filesystem", - "process_worklist_item"); - if (mp == matchmnt) - matchcnt += 1; - handle_workitem_freefrag(WK_FREEFRAG(wk)); - break; - - case D_FREEFILE: - /* releasing an inode when its link count drops to 0 */ - mp = WK_FREEFILE(wk)->fx_mnt; - if (vn_write_suspend_wait(NULL, mp, V_NOWAIT)) - panic("%s: freefile on suspended filesystem", - "process_worklist_item"); - if (mp == matchmnt) - matchcnt += 1; - handle_workitem_freefile(WK_FREEFILE(wk)); - break; - - default: - panic("%s_process_worklist: Unknown type %s", - "softdep", TYPENAME(wk->wk_type)); - /* NOTREACHED */ - } - return (matchcnt); -} - -/* - * Move dependencies from one buffer to another. - */ -static void -softdep_move_dependencies(oldbp, newbp) - struct buf *oldbp; - struct buf *newbp; -{ - struct worklist *wk, *wktail; - - if (LIST_FIRST(&newbp->b_dep) != NULL) - panic("softdep_move_dependencies: need merge code"); - wktail = 0; - ACQUIRE_LOCK(&lk); - while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) { - LIST_REMOVE(wk, wk_list); - if (wktail == 0) - LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list); - else - LIST_INSERT_AFTER(wktail, wk, wk_list); - wktail = wk; - } - FREE_LOCK(&lk); -} - -/* - * Purge the work list of all items associated with a particular mount point. - */ -int -softdep_flushworklist(oldmnt, countp, td) - struct mount *oldmnt; - int *countp; - struct thread *td; -{ - struct vnode *devvp; - int count, error = 0; - - /* - * Await our turn to clear out the queue, then serialize access. - */ - while (softdep_worklist_busy) { - softdep_worklist_req += 1; - tsleep(&softdep_worklist_req, PRIBIO, "softflush", 0); - softdep_worklist_req -= 1; - } - softdep_worklist_busy = -1; - /* - * Alternately flush the block device associated with the mount - * point and process any dependencies that the flushing - * creates. We continue until no more worklist dependencies - * are found. - */ - *countp = 0; - devvp = VFSTOUFS(oldmnt)->um_devvp; - while ((count = softdep_process_worklist(oldmnt)) > 0) { - *countp += count; - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); - error = VOP_FSYNC(devvp, td->td_ucred, MNT_WAIT, td); - VOP_UNLOCK(devvp, 0, td); - if (error) - break; - } - softdep_worklist_busy = 0; - if (softdep_worklist_req) - wakeup(&softdep_worklist_req); - return (error); -} - -/* - * Flush all vnodes and worklist items associated with a specified mount point. - */ -int -softdep_flushfiles(oldmnt, flags, td) - struct mount *oldmnt; - int flags; - struct thread *td; -{ - int error, count, loopcnt; - - error = 0; - - /* - * Alternately flush the vnodes associated with the mount - * point and process any dependencies that the flushing - * creates. In theory, this loop can happen at most twice, - * but we give it a few extra just to be sure. - */ - for (loopcnt = 10; loopcnt > 0; loopcnt--) { - /* - * Do another flush in case any vnodes were brought in - * as part of the cleanup operations. - */ - if ((error = ffs_flushfiles(oldmnt, flags, td)) != 0) - break; - if ((error = softdep_flushworklist(oldmnt, &count, td)) != 0 || - count == 0) - break; - } - /* - * If we are unmounting then it is an error to fail. If we - * are simply trying to downgrade to read-only, then filesystem - * activity can keep us busy forever, so we just fail with EBUSY. - */ - if (loopcnt == 0) { - if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT) - panic("softdep_flushfiles: looping"); - error = EBUSY; - } - return (error); -} - -/* - * Structure hashing. - * - * There are three types of structures that can be looked up: - * 1) pagedep structures identified by mount point, inode number, - * and logical block. - * 2) inodedep structures identified by mount point and inode number. - * 3) newblk structures identified by mount point and - * physical block number. - * - * The "pagedep" and "inodedep" dependency structures are hashed - * separately from the file blocks and inodes to which they correspond. - * This separation helps when the in-memory copy of an inode or - * file block must be replaced. It also obviates the need to access - * an inode or file page when simply updating (or de-allocating) - * dependency structures. Lookup of newblk structures is needed to - * find newly allocated blocks when trying to associate them with - * their allocdirect or allocindir structure. - * - * The lookup routines optionally create and hash a new instance when - * an existing entry is not found. - */ -#define DEPALLOC 0x0001 /* allocate structure if lookup fails */ -#define NODELAY 0x0002 /* cannot do background work */ - -/* - * Structures and routines associated with pagedep caching. - */ -LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl; -u_long pagedep_hash; /* size of hash table - 1 */ -#define PAGEDEP_HASH(mp, inum, lbn) \ - (&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \ - pagedep_hash]) -static struct sema pagedep_in_progress; - -/* - * Look up a pagedep. Return 1 if found, 0 if not found or found - * when asked to allocate but not associated with any buffer. - * If not found, allocate if DEPALLOC flag is passed. - * Found or allocated entry is returned in pagedeppp. - * This routine must be called with splbio interrupts blocked. - */ -static int -pagedep_lookup(ip, lbn, flags, pagedeppp) - struct inode *ip; - ufs_lbn_t lbn; - int flags; - struct pagedep **pagedeppp; -{ - struct pagedep *pagedep; - struct pagedep_hashhead *pagedephd; - struct mount *mp; - int i; - -#ifdef DEBUG - if (lk.lkt_held == NOHOLDER) - panic("pagedep_lookup: lock not held"); -#endif - mp = ITOV(ip)->v_mount; - pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn); -top: - LIST_FOREACH(pagedep, pagedephd, pd_hash) - if (ip->i_number == pagedep->pd_ino && - lbn == pagedep->pd_lbn && - mp == pagedep->pd_mnt) - break; - if (pagedep) { - *pagedeppp = pagedep; - if ((flags & DEPALLOC) != 0 && - (pagedep->pd_state & ONWORKLIST) == 0) - return (0); - return (1); - } - if ((flags & DEPALLOC) == 0) { - *pagedeppp = NULL; - return (0); - } - if (sema_get(&pagedep_in_progress, &lk) == 0) { - ACQUIRE_LOCK(&lk); - goto top; - } - MALLOC(pagedep, struct pagedep *, sizeof(struct pagedep), M_PAGEDEP, - M_SOFTDEP_FLAGS|M_ZERO); - pagedep->pd_list.wk_type = D_PAGEDEP; - pagedep->pd_mnt = mp; - pagedep->pd_ino = ip->i_number; - pagedep->pd_lbn = lbn; - LIST_INIT(&pagedep->pd_dirremhd); - LIST_INIT(&pagedep->pd_pendinghd); - for (i = 0; i < DAHASHSZ; i++) - LIST_INIT(&pagedep->pd_diraddhd[i]); - ACQUIRE_LOCK(&lk); - LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash); - sema_release(&pagedep_in_progress); - *pagedeppp = pagedep; - return (0); -} - -/* - * Structures and routines associated with inodedep caching. - */ -LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl; -static u_long inodedep_hash; /* size of hash table - 1 */ -static long num_inodedep; /* number of inodedep allocated */ -#define INODEDEP_HASH(fs, inum) \ - (&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash]) -static struct sema inodedep_in_progress; - -/* - * Look up an inodedep. Return 1 if found, 0 if not found. - * If not found, allocate if DEPALLOC flag is passed. - * Found or allocated entry is returned in inodedeppp. - * This routine must be called with splbio interrupts blocked. - */ -static int -inodedep_lookup(fs, inum, flags, inodedeppp) - struct fs *fs; - ino_t inum; - int flags; - struct inodedep **inodedeppp; -{ - struct inodedep *inodedep; - struct inodedep_hashhead *inodedephd; - int firsttry; - -#ifdef DEBUG - if (lk.lkt_held == NOHOLDER) - panic("inodedep_lookup: lock not held"); -#endif - firsttry = 1; - inodedephd = INODEDEP_HASH(fs, inum); -top: - LIST_FOREACH(inodedep, inodedephd, id_hash) - if (inum == inodedep->id_ino && fs == inodedep->id_fs) - break; - if (inodedep) { - *inodedeppp = inodedep; - return (1); - } - if ((flags & DEPALLOC) == 0) { - *inodedeppp = NULL; - return (0); - } - /* - * If we are over our limit, try to improve the situation. - */ - if (num_inodedep > max_softdeps && firsttry && (flags & NODELAY) == 0 && - request_cleanup(FLUSH_INODES, 1)) { - firsttry = 0; - goto top; - } - if (sema_get(&inodedep_in_progress, &lk) == 0) { - ACQUIRE_LOCK(&lk); - goto top; - } - num_inodedep += 1; - MALLOC(inodedep, struct inodedep *, sizeof(struct inodedep), - M_INODEDEP, M_SOFTDEP_FLAGS); - inodedep->id_list.wk_type = D_INODEDEP; - inodedep->id_fs = fs; - inodedep->id_ino = inum; - inodedep->id_state = ALLCOMPLETE; - inodedep->id_nlinkdelta = 0; - inodedep->id_savedino1 = NULL; - inodedep->id_savedsize = -1; - inodedep->id_savedextsize = -1; - inodedep->id_buf = NULL; - LIST_INIT(&inodedep->id_pendinghd); - LIST_INIT(&inodedep->id_inowait); - LIST_INIT(&inodedep->id_bufwait); - TAILQ_INIT(&inodedep->id_inoupdt); - TAILQ_INIT(&inodedep->id_newinoupdt); - TAILQ_INIT(&inodedep->id_extupdt); - TAILQ_INIT(&inodedep->id_newextupdt); - ACQUIRE_LOCK(&lk); - LIST_INSERT_HEAD(inodedephd, inodedep, id_hash); - sema_release(&inodedep_in_progress); - *inodedeppp = inodedep; - return (0); -} - -/* - * Structures and routines associated with newblk caching. - */ -LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl; -u_long newblk_hash; /* size of hash table - 1 */ -#define NEWBLK_HASH(fs, inum) \ - (&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash]) -static struct sema newblk_in_progress; - -/* - * Look up a newblk. Return 1 if found, 0 if not found. - * If not found, allocate if DEPALLOC flag is passed. - * Found or allocated entry is returned in newblkpp. - */ -static int -newblk_lookup(fs, newblkno, flags, newblkpp) - struct fs *fs; - ufs2_daddr_t newblkno; - int flags; - struct newblk **newblkpp; -{ - struct newblk *newblk; - struct newblk_hashhead *newblkhd; - - newblkhd = NEWBLK_HASH(fs, newblkno); -top: - LIST_FOREACH(newblk, newblkhd, nb_hash) - if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs) - break; - if (newblk) { - *newblkpp = newblk; - return (1); - } - if ((flags & DEPALLOC) == 0) { - *newblkpp = NULL; - return (0); - } - if (sema_get(&newblk_in_progress, 0) == 0) - goto top; - MALLOC(newblk, struct newblk *, sizeof(struct newblk), - M_NEWBLK, M_SOFTDEP_FLAGS); - newblk->nb_state = 0; - newblk->nb_fs = fs; - newblk->nb_newblkno = newblkno; - LIST_INSERT_HEAD(newblkhd, newblk, nb_hash); - sema_release(&newblk_in_progress); - *newblkpp = newblk; - return (0); -} - -/* - * Executed during filesystem system initialization before - * mounting any filesystems. - */ -void -softdep_initialize() -{ - - LIST_INIT(&mkdirlisthd); - LIST_INIT(&softdep_workitem_pending); - max_softdeps = desiredvnodes * 4; - pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, - &pagedep_hash); - sema_init(&pagedep_in_progress, "pagedep", PRIBIO, 0); - inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash); - sema_init(&inodedep_in_progress, "inodedep", PRIBIO, 0); - newblk_hashtbl = hashinit(64, M_NEWBLK, &newblk_hash); - sema_init(&newblk_in_progress, "newblk", PRIBIO, 0); - - /* hooks through which the main kernel code calls us */ - softdep_process_worklist_hook = softdep_process_worklist; - softdep_fsync_hook = softdep_fsync; - - /* initialise bioops hack */ - bioops.io_start = softdep_disk_io_initiation; - bioops.io_complete = softdep_disk_write_complete; - bioops.io_deallocate = softdep_deallocate_dependencies; - bioops.io_movedeps = softdep_move_dependencies; - bioops.io_countdeps = softdep_count_dependencies; -} - -/* - * Executed after all filesystems have been unmounted during - * filesystem module unload. - */ -void -softdep_uninitialize() -{ - - softdep_process_worklist_hook = NULL; - softdep_fsync_hook = NULL; - hashdestroy(pagedep_hashtbl, M_PAGEDEP, pagedep_hash); - hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash); - hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash); -} - -/* - * Called at mount time to notify the dependency code that a - * filesystem wishes to use it. - */ -int -softdep_mount(devvp, mp, fs, cred) - struct vnode *devvp; - struct mount *mp; - struct fs *fs; - struct ucred *cred; -{ - struct csum_total cstotal; - struct cg *cgp; - struct buf *bp; - int error, cyl; - - mp->mnt_flag &= ~MNT_ASYNC; - mp->mnt_flag |= MNT_SOFTDEP; - /* - * When doing soft updates, the counters in the - * superblock may have gotten out of sync, so we have - * to scan the cylinder groups and recalculate them. - */ - if (fs->fs_clean != 0) - return (0); - bzero(&cstotal, sizeof cstotal); - for (cyl = 0; cyl < fs->fs_ncg; cyl++) { - if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)), - fs->fs_cgsize, cred, &bp)) != 0) { - brelse(bp); - return (error); - } - cgp = (struct cg *)bp->b_data; - cstotal.cs_nffree += cgp->cg_cs.cs_nffree; - cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree; - cstotal.cs_nifree += cgp->cg_cs.cs_nifree; - cstotal.cs_ndir += cgp->cg_cs.cs_ndir; - fs->fs_cs(fs, cyl) = cgp->cg_cs; - brelse(bp); - } -#ifdef DEBUG - if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal)) - printf("%s: superblock summary recomputed\n", fs->fs_fsmnt); -#endif - bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal); - return (0); -} - -/* - * Protecting the freemaps (or bitmaps). - * - * To eliminate the need to execute fsck before mounting a filesystem - * after a power failure, one must (conservatively) guarantee that the - * on-disk copy of the bitmaps never indicate that a live inode or block is - * free. So, when a block or inode is allocated, the bitmap should be - * updated (on disk) before any new pointers. When a block or inode is - * freed, the bitmap should not be updated until all pointers have been - * reset. The latter dependency is handled by the delayed de-allocation - * approach described below for block and inode de-allocation. The former - * dependency is handled by calling the following procedure when a block or - * inode is allocated. When an inode is allocated an "inodedep" is created - * with its DEPCOMPLETE flag cleared until its bitmap is written to disk. - * Each "inodedep" is also inserted into the hash indexing structure so - * that any additional link additions can be made dependent on the inode - * allocation. - * - * The ufs filesystem maintains a number of free block counts (e.g., per - * cylinder group, per cylinder and per pair) - * in addition to the bitmaps. These counts are used to improve efficiency - * during allocation and therefore must be consistent with the bitmaps. - * There is no convenient way to guarantee post-crash consistency of these - * counts with simple update ordering, for two main reasons: (1) The counts - * and bitmaps for a single cylinder group block are not in the same disk - * sector. If a disk write is interrupted (e.g., by power failure), one may - * be written and the other not. (2) Some of the counts are located in the - * superblock rather than the cylinder group block. So, we focus our soft - * updates implementation on protecting the bitmaps. When mounting a - * filesystem, we recompute the auxiliary counts from the bitmaps. - */ - -/* - * Called just after updating the cylinder group block to allocate an inode. - */ -void -softdep_setup_inomapdep(bp, ip, newinum) - struct buf *bp; /* buffer for cylgroup block with inode map */ - struct inode *ip; /* inode related to allocation */ - ino_t newinum; /* new inode number being allocated */ -{ - struct inodedep *inodedep; - struct bmsafemap *bmsafemap; - - /* - * Create a dependency for the newly allocated inode. - * Panic if it already exists as something is seriously wrong. - * Otherwise add it to the dependency list for the buffer holding - * the cylinder group map from which it was allocated. - */ - ACQUIRE_LOCK(&lk); - if ((inodedep_lookup(ip->i_fs, newinum, DEPALLOC|NODELAY, &inodedep))) { - FREE_LOCK(&lk); - panic("softdep_setup_inomapdep: found inode"); - } - inodedep->id_buf = bp; - inodedep->id_state &= ~DEPCOMPLETE; - bmsafemap = bmsafemap_lookup(bp); - LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps); - FREE_LOCK(&lk); -} - -/* - * Called just after updating the cylinder group block to - * allocate block or fragment. - */ -void -softdep_setup_blkmapdep(bp, fs, newblkno) - struct buf *bp; /* buffer for cylgroup block with block map */ - struct fs *fs; /* filesystem doing allocation */ - ufs2_daddr_t newblkno; /* number of newly allocated block */ -{ - struct newblk *newblk; - struct bmsafemap *bmsafemap; - - /* - * Create a dependency for the newly allocated block. - * Add it to the dependency list for the buffer holding - * the cylinder group map from which it was allocated. - */ - if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0) - panic("softdep_setup_blkmapdep: found block"); - ACQUIRE_LOCK(&lk); - newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp); - LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); - FREE_LOCK(&lk); -} - -/* - * Find the bmsafemap associated with a cylinder group buffer. - * If none exists, create one. The buffer must be locked when - * this routine is called and this routine must be called with - * splbio interrupts blocked. - */ -static struct bmsafemap * -bmsafemap_lookup(bp) - struct buf *bp; -{ - struct bmsafemap *bmsafemap; - struct worklist *wk; - -#ifdef DEBUG - if (lk.lkt_held == NOHOLDER) - panic("bmsafemap_lookup: lock not held"); -#endif - LIST_FOREACH(wk, &bp->b_dep, wk_list) - if (wk->wk_type == D_BMSAFEMAP) - return (WK_BMSAFEMAP(wk)); - FREE_LOCK(&lk); - MALLOC(bmsafemap, struct bmsafemap *, sizeof(struct bmsafemap), - M_BMSAFEMAP, M_SOFTDEP_FLAGS); - bmsafemap->sm_list.wk_type = D_BMSAFEMAP; - bmsafemap->sm_list.wk_state = 0; - bmsafemap->sm_buf = bp; - LIST_INIT(&bmsafemap->sm_allocdirecthd); - LIST_INIT(&bmsafemap->sm_allocindirhd); - LIST_INIT(&bmsafemap->sm_inodedephd); - LIST_INIT(&bmsafemap->sm_newblkhd); - ACQUIRE_LOCK(&lk); - WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list); - return (bmsafemap); -} - -/* - * Direct block allocation dependencies. - * - * When a new block is allocated, the corresponding disk locations must be - * initialized (with zeros or new data) before the on-disk inode points to - * them. Also, the freemap from which the block was allocated must be - * updated (on disk) before the inode's pointer. These two dependencies are - * independent of each other and are needed for all file blocks and indirect - * blocks that are pointed to directly by the inode. Just before the - * "in-core" version of the inode is updated with a newly allocated block - * number, a procedure (below) is called to setup allocation dependency - * structures. These structures are removed when the corresponding - * dependencies are satisfied or when the block allocation becomes obsolete - * (i.e., the file is deleted, the block is de-allocated, or the block is a - * fragment that gets upgraded). All of these cases are handled in - * procedures described later. - * - * When a file extension causes a fragment to be upgraded, either to a larger - * fragment or to a full block, the on-disk location may change (if the - * previous fragment could not simply be extended). In this case, the old - * fragment must be de-allocated, but not until after the inode's pointer has - * been updated. In most cases, this is handled by later procedures, which - * will construct a "freefrag" structure to be added to the workitem queue - * when the inode update is complete (or obsolete). The main exception to - * this is when an allocation occurs while a pending allocation dependency - * (for the same block pointer) remains. This case is handled in the main - * allocation dependency setup procedure by immediately freeing the - * unreferenced fragments. - */ -void -softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) - struct inode *ip; /* inode to which block is being added */ - ufs_lbn_t lbn; /* block pointer within inode */ - ufs2_daddr_t newblkno; /* disk block number being added */ - ufs2_daddr_t oldblkno; /* previous block number, 0 unless frag */ - long newsize; /* size of new block */ - long oldsize; /* size of new block */ - struct buf *bp; /* bp for allocated block */ -{ - struct allocdirect *adp, *oldadp; - struct allocdirectlst *adphead; - struct bmsafemap *bmsafemap; - struct inodedep *inodedep; - struct pagedep *pagedep; - struct newblk *newblk; - - MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect), - M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO); - adp->ad_list.wk_type = D_ALLOCDIRECT; - adp->ad_lbn = lbn; - adp->ad_newblkno = newblkno; - adp->ad_oldblkno = oldblkno; - adp->ad_newsize = newsize; - adp->ad_oldsize = oldsize; - adp->ad_state = ATTACHED; - LIST_INIT(&adp->ad_newdirblk); - if (newblkno == oldblkno) - adp->ad_freefrag = NULL; - else - adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize); - - if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0) - panic("softdep_setup_allocdirect: lost block"); - - ACQUIRE_LOCK(&lk); - inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC | NODELAY, &inodedep); - adp->ad_inodedep = inodedep; - - if (newblk->nb_state == DEPCOMPLETE) { - adp->ad_state |= DEPCOMPLETE; - adp->ad_buf = NULL; - } else { - bmsafemap = newblk->nb_bmsafemap; - adp->ad_buf = bmsafemap->sm_buf; - LIST_REMOVE(newblk, nb_deps); - LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps); - } - LIST_REMOVE(newblk, nb_hash); - FREE(newblk, M_NEWBLK); - - WORKLIST_INSERT(&bp->b_dep, &adp->ad_list); - if (lbn >= NDADDR) { - /* allocating an indirect block */ - if (oldblkno != 0) { - FREE_LOCK(&lk); - panic("softdep_setup_allocdirect: non-zero indir"); - } - } else { - /* - * Allocating a direct block. - * - * If we are allocating a directory block, then we must - * allocate an associated pagedep to track additions and - * deletions. - */ - if ((ip->i_mode & IFMT) == IFDIR && - pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) - WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); - } - /* - * The list of allocdirects must be kept in sorted and ascending - * order so that the rollback routines can quickly determine the - * first uncommitted block (the size of the file stored on disk - * ends at the end of the lowest committed fragment, or if there - * are no fragments, at the end of the highest committed block). - * Since files generally grow, the typical case is that the new - * block is to be added at the end of the list. We speed this - * special case by checking against the last allocdirect in the - * list before laboriously traversing the list looking for the - * insertion point. - */ - adphead = &inodedep->id_newinoupdt; - oldadp = TAILQ_LAST(adphead, allocdirectlst); - if (oldadp == NULL || oldadp->ad_lbn <= lbn) { - /* insert at end of list */ - TAILQ_INSERT_TAIL(adphead, adp, ad_next); - if (oldadp != NULL && oldadp->ad_lbn == lbn) - allocdirect_merge(adphead, adp, oldadp); - FREE_LOCK(&lk); - return; - } - TAILQ_FOREACH(oldadp, adphead, ad_next) { - if (oldadp->ad_lbn >= lbn) - break; - } - if (oldadp == NULL) { - FREE_LOCK(&lk); - panic("softdep_setup_allocdirect: lost entry"); - } - /* insert in middle of list */ - TAILQ_INSERT_BEFORE(oldadp, adp, ad_next); - if (oldadp->ad_lbn == lbn) - allocdirect_merge(adphead, adp, oldadp); - FREE_LOCK(&lk); -} - -/* - * Replace an old allocdirect dependency with a newer one. - * This routine must be called with splbio interrupts blocked. - */ -static void -allocdirect_merge(adphead, newadp, oldadp) - struct allocdirectlst *adphead; /* head of list holding allocdirects */ - struct allocdirect *newadp; /* allocdirect being added */ - struct allocdirect *oldadp; /* existing allocdirect being checked */ -{ - struct worklist *wk; - struct freefrag *freefrag; - struct newdirblk *newdirblk; - -#ifdef DEBUG - if (lk.lkt_held == NOHOLDER) - panic("allocdirect_merge: lock not held"); -#endif - if (newadp->ad_oldblkno != oldadp->ad_newblkno || - newadp->ad_oldsize != oldadp->ad_newsize || - newadp->ad_lbn >= NDADDR) { - FREE_LOCK(&lk); - panic("%s %jd != new %jd || old size %ld != new %ld", - "allocdirect_merge: old blkno", - (intmax_t)newadp->ad_oldblkno, - (intmax_t)oldadp->ad_newblkno, - newadp->ad_oldsize, oldadp->ad_newsize); - } - newadp->ad_oldblkno = oldadp->ad_oldblkno; - newadp->ad_oldsize = oldadp->ad_oldsize; - /* - * If the old dependency had a fragment to free or had never - * previously had a block allocated, then the new dependency - * can immediately post its freefrag and adopt the old freefrag. - * This action is done by swapping the freefrag dependencies. - * The new dependency gains the old one's freefrag, and the - * old one gets the new one and then immediately puts it on - * the worklist when it is freed by free_allocdirect. It is - * not possible to do this swap when the old dependency had a - * non-zero size but no previous fragment to free. This condition - * arises when the new block is an extension of the old block. - * Here, the first part of the fragment allocated to the new - * dependency is part of the block currently claimed on disk by - * the old dependency, so cannot legitimately be freed until the - * conditions for the new dependency are fulfilled. - */ - if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) { - freefrag = newadp->ad_freefrag; - newadp->ad_freefrag = oldadp->ad_freefrag; - oldadp->ad_freefrag = freefrag; - } - /* - * If we are tracking a new directory-block allocation, - * move it from the old allocdirect to the new allocdirect. - */ - if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) { - newdirblk = WK_NEWDIRBLK(wk); - WORKLIST_REMOVE(&newdirblk->db_list); - if (LIST_FIRST(&oldadp->ad_newdirblk) != NULL) - panic("allocdirect_merge: extra newdirblk"); - WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list); - } - free_allocdirect(adphead, oldadp, 0); -} - -/* - * Allocate a new freefrag structure if needed. - */ -static struct freefrag * -newfreefrag(ip, blkno, size) - struct inode *ip; - ufs2_daddr_t blkno; - long size; -{ - struct freefrag *freefrag; - struct fs *fs; - - if (blkno == 0) - return (NULL); - fs = ip->i_fs; - if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag) - panic("newfreefrag: frag size"); - MALLOC(freefrag, struct freefrag *, sizeof(struct freefrag), - M_FREEFRAG, M_SOFTDEP_FLAGS); - freefrag->ff_list.wk_type = D_FREEFRAG; - freefrag->ff_state = 0; - freefrag->ff_inum = ip->i_number; - freefrag->ff_mnt = ITOV(ip)->v_mount; - freefrag->ff_blkno = blkno; - freefrag->ff_fragsize = size; - return (freefrag); -} - -/* - * This workitem de-allocates fragments that were replaced during - * file block allocation. - */ -static void -handle_workitem_freefrag(freefrag) - struct freefrag *freefrag; -{ - struct ufsmount *ump = VFSTOUFS(freefrag->ff_mnt); - - ffs_blkfree(ump->um_fs, ump->um_devvp, freefrag->ff_blkno, - freefrag->ff_fragsize, freefrag->ff_inum); - FREE(freefrag, M_FREEFRAG); -} - -/* - * Set up a dependency structure for an external attributes data block. - * This routine follows much of the structure of softdep_setup_allocdirect. - * See the description of softdep_setup_allocdirect above for details. - */ -void -softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) - struct inode *ip; - ufs_lbn_t lbn; - ufs2_daddr_t newblkno; - ufs2_daddr_t oldblkno; - long newsize; - long oldsize; - struct buf *bp; -{ - struct allocdirect *adp, *oldadp; - struct allocdirectlst *adphead; - struct bmsafemap *bmsafemap; - struct inodedep *inodedep; - struct newblk *newblk; - - MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect), - M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO); - adp->ad_list.wk_type = D_ALLOCDIRECT; - adp->ad_lbn = lbn; - adp->ad_newblkno = newblkno; - adp->ad_oldblkno = oldblkno; - adp->ad_newsize = newsize; - adp->ad_oldsize = oldsize; - adp->ad_state = ATTACHED | EXTDATA; - LIST_INIT(&adp->ad_newdirblk); - if (newblkno == oldblkno) - adp->ad_freefrag = NULL; - else - adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize); - - if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0) - panic("softdep_setup_allocext: lost block"); - - ACQUIRE_LOCK(&lk); - inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC | NODELAY, &inodedep); - adp->ad_inodedep = inodedep; - - if (newblk->nb_state == DEPCOMPLETE) { - adp->ad_state |= DEPCOMPLETE; - adp->ad_buf = NULL; - } else { - bmsafemap = newblk->nb_bmsafemap; - adp->ad_buf = bmsafemap->sm_buf; - LIST_REMOVE(newblk, nb_deps); - LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps); - } - LIST_REMOVE(newblk, nb_hash); - FREE(newblk, M_NEWBLK); - - WORKLIST_INSERT(&bp->b_dep, &adp->ad_list); - if (lbn >= NXADDR) { - FREE_LOCK(&lk); - panic("softdep_setup_allocext: lbn %lld > NXADDR", - (long long)lbn); - } - /* - * The list of allocdirects must be kept in sorted and ascending - * order so that the rollback routines can quickly determine the - * first uncommitted block (the size of the file stored on disk - * ends at the end of the lowest committed fragment, or if there - * are no fragments, at the end of the highest committed block). - * Since files generally grow, the typical case is that the new - * block is to be added at the end of the list. We speed this - * special case by checking against the last allocdirect in the - * list before laboriously traversing the list looking for the - * insertion point. - */ - adphead = &inodedep->id_newextupdt; - oldadp = TAILQ_LAST(adphead, allocdirectlst); - if (oldadp == NULL || oldadp->ad_lbn <= lbn) { - /* insert at end of list */ - TAILQ_INSERT_TAIL(adphead, adp, ad_next); - if (oldadp != NULL && oldadp->ad_lbn == lbn) - allocdirect_merge(adphead, adp, oldadp); - FREE_LOCK(&lk); - return; - } - TAILQ_FOREACH(oldadp, adphead, ad_next) { - if (oldadp->ad_lbn >= lbn) - break; - } - if (oldadp == NULL) { - FREE_LOCK(&lk); - panic("softdep_setup_allocext: lost entry"); - } - /* insert in middle of list */ - TAILQ_INSERT_BEFORE(oldadp, adp, ad_next); - if (oldadp->ad_lbn == lbn) - allocdirect_merge(adphead, adp, oldadp); - FREE_LOCK(&lk); -} - -/* - * Indirect block allocation dependencies. - * - * The same dependencies that exist for a direct block also exist when - * a new block is allocated and pointed to by an entry in a block of - * indirect pointers. The undo/redo states described above are also - * used here. Because an indirect block contains many pointers that - * may have dependencies, a second copy of the entire in-memory indirect - * block is kept. The buffer cache copy is always completely up-to-date. - * The second copy, which is used only as a source for disk writes, - * contains only the safe pointers (i.e., those that have no remaining - * update dependencies). The second copy is freed when all pointers - * are safe. The cache is not allowed to replace indirect blocks with - * pending update dependencies. If a buffer containing an indirect - * block with dependencies is written, these routines will mark it - * dirty again. It can only be successfully written once all the - * dependencies are removed. The ffs_fsync routine in conjunction with - * softdep_sync_metadata work together to get all the dependencies - * removed so that a file can be successfully written to disk. Three - * procedures are used when setting up indirect block pointer - * dependencies. The division is necessary because of the organization - * of the "balloc" routine and because of the distinction between file - * pages and file metadata blocks. - */ - -/* - * Allocate a new allocindir structure. - */ -static struct allocindir * -newallocindir(ip, ptrno, newblkno, oldblkno) - struct inode *ip; /* inode for file being extended */ - int ptrno; /* offset of pointer in indirect block */ - ufs2_daddr_t newblkno; /* disk block number being added */ - ufs2_daddr_t oldblkno; /* previous block number, 0 if none */ -{ - struct allocindir *aip; - - MALLOC(aip, struct allocindir *, sizeof(struct allocindir), - M_ALLOCINDIR, M_SOFTDEP_FLAGS|M_ZERO); - aip->ai_list.wk_type = D_ALLOCINDIR; - aip->ai_state = ATTACHED; - aip->ai_offset = ptrno; - aip->ai_newblkno = newblkno; - aip->ai_oldblkno = oldblkno; - aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize); - return (aip); -} - -/* - * Called just before setting an indirect block pointer - * to a newly allocated file page. - */ -void -softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) - struct inode *ip; /* inode for file being extended */ - ufs_lbn_t lbn; /* allocated block number within file */ - struct buf *bp; /* buffer with indirect blk referencing page */ - int ptrno; /* offset of pointer in indirect block */ - ufs2_daddr_t newblkno; /* disk block number being added */ - ufs2_daddr_t oldblkno; /* previous block number, 0 if none */ - struct buf *nbp; /* buffer holding allocated page */ -{ - struct allocindir *aip; - struct pagedep *pagedep; - - aip = newallocindir(ip, ptrno, newblkno, oldblkno); - ACQUIRE_LOCK(&lk); - /* - * If we are allocating a directory page, then we must - * allocate an associated pagedep to track additions and - * deletions. - */ - if ((ip->i_mode & IFMT) == IFDIR && - pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) - WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list); - WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); - FREE_LOCK(&lk); - setup_allocindir_phase2(bp, ip, aip); -} - -/* - * Called just before setting an indirect block pointer to a - * newly allocated indirect block. - */ -void -softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno) - struct buf *nbp; /* newly allocated indirect block */ - struct inode *ip; /* inode for file being extended */ - struct buf *bp; /* indirect block referencing allocated block */ - int ptrno; /* offset of pointer in indirect block */ - ufs2_daddr_t newblkno; /* disk block number being added */ -{ - struct allocindir *aip; - - aip = newallocindir(ip, ptrno, newblkno, 0); - ACQUIRE_LOCK(&lk); - WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); - FREE_LOCK(&lk); - setup_allocindir_phase2(bp, ip, aip); -} - -/* - * Called to finish the allocation of the "aip" allocated - * by one of the two routines above. - */ -static void -setup_allocindir_phase2(bp, ip, aip) - struct buf *bp; /* in-memory copy of the indirect block */ - struct inode *ip; /* inode for file being extended */ - struct allocindir *aip; /* allocindir allocated by the above routines */ -{ - struct worklist *wk; - struct indirdep *indirdep, *newindirdep; - struct bmsafemap *bmsafemap; - struct allocindir *oldaip; - struct freefrag *freefrag; - struct newblk *newblk; - ufs2_daddr_t blkno; - - if (bp->b_lblkno >= 0) - panic("setup_allocindir_phase2: not indir blk"); - for (indirdep = NULL, newindirdep = NULL; ; ) { - ACQUIRE_LOCK(&lk); - LIST_FOREACH(wk, &bp->b_dep, wk_list) { - if (wk->wk_type != D_INDIRDEP) - continue; - indirdep = WK_INDIRDEP(wk); - break; - } - if (indirdep == NULL && newindirdep) { - indirdep = newindirdep; - WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list); - newindirdep = NULL; - } - FREE_LOCK(&lk); - if (indirdep) { - if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0, - &newblk) == 0) - panic("setup_allocindir: lost block"); - ACQUIRE_LOCK(&lk); - if (newblk->nb_state == DEPCOMPLETE) { - aip->ai_state |= DEPCOMPLETE; - aip->ai_buf = NULL; - } else { - bmsafemap = newblk->nb_bmsafemap; - aip->ai_buf = bmsafemap->sm_buf; - LIST_REMOVE(newblk, nb_deps); - LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd, - aip, ai_deps); - } - LIST_REMOVE(newblk, nb_hash); - FREE(newblk, M_NEWBLK); - aip->ai_indirdep = indirdep; - /* - * Check to see if there is an existing dependency - * for this block. If there is, merge the old - * dependency into the new one. - */ - if (aip->ai_oldblkno == 0) - oldaip = NULL; - else - - LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next) - if (oldaip->ai_offset == aip->ai_offset) - break; - freefrag = NULL; - if (oldaip != NULL) { - if (oldaip->ai_newblkno != aip->ai_oldblkno) { - FREE_LOCK(&lk); - panic("setup_allocindir_phase2: blkno"); - } - aip->ai_oldblkno = oldaip->ai_oldblkno; - freefrag = aip->ai_freefrag; - aip->ai_freefrag = oldaip->ai_freefrag; - oldaip->ai_freefrag = NULL; - free_allocindir(oldaip, NULL); - } - LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next); - if (ip->i_ump->um_fstype == UFS1) - ((ufs1_daddr_t *)indirdep->ir_savebp->b_data) - [aip->ai_offset] = aip->ai_oldblkno; - else - ((ufs2_daddr_t *)indirdep->ir_savebp->b_data) - [aip->ai_offset] = aip->ai_oldblkno; - FREE_LOCK(&lk); - if (freefrag != NULL) - handle_workitem_freefrag(freefrag); - } - if (newindirdep) { - brelse(newindirdep->ir_savebp); - WORKITEM_FREE((caddr_t)newindirdep, D_INDIRDEP); - } - if (indirdep) - break; - MALLOC(newindirdep, struct indirdep *, sizeof(struct indirdep), - M_INDIRDEP, M_SOFTDEP_FLAGS); - newindirdep->ir_list.wk_type = D_INDIRDEP; - newindirdep->ir_state = ATTACHED; - if (ip->i_ump->um_fstype == UFS1) - newindirdep->ir_state |= UFS1FMT; - LIST_INIT(&newindirdep->ir_deplisthd); - LIST_INIT(&newindirdep->ir_donehd); - if (bp->b_blkno == bp->b_lblkno) { - ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp, - NULL, NULL); - bp->b_blkno = blkno; - } - newindirdep->ir_savebp = - getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0, 0); - BUF_KERNPROC(newindirdep->ir_savebp); - bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount); - } -} - -/* - * Block de-allocation dependencies. - * - * When blocks are de-allocated, the on-disk pointers must be nullified before - * the blocks are made available for use by other files. (The true - * requirement is that old pointers must be nullified before new on-disk - * pointers are set. We chose this slightly more stringent requirement to - * reduce complexity.) Our implementation handles this dependency by updating - * the inode (or indirect block) appropriately but delaying the actual block - * de-allocation (i.e., freemap and free space count manipulation) until - * after the updated versions reach stable storage. After the disk is - * updated, the blocks can be safely de-allocated whenever it is convenient. - * This implementation handles only the common case of reducing a file's - * length to zero. Other cases are handled by the conventional synchronous - * write approach. - * - * The ffs implementation with which we worked double-checks - * the state of the block pointers and file size as it reduces - * a file's length. Some of this code is replicated here in our - * soft updates implementation. The freeblks->fb_chkcnt field is - * used to transfer a part of this information to the procedure - * that eventually de-allocates the blocks. - * - * This routine should be called from the routine that shortens - * a file's length, before the inode's size or block pointers - * are modified. It will save the block pointer information for - * later release and zero the inode so that the calling routine - * can release it. - */ -void -softdep_setup_freeblocks(ip, length, flags) - struct inode *ip; /* The inode whose length is to be reduced */ - off_t length; /* The new length for the file */ - int flags; /* IO_EXT and/or IO_NORMAL */ -{ - struct freeblks *freeblks; - struct inodedep *inodedep; - struct allocdirect *adp; - struct vnode *vp; - struct buf *bp; - struct fs *fs; - ufs2_daddr_t extblocks, datablocks; - int i, delay, error; - - fs = ip->i_fs; - if (length != 0) - panic("softdep_setup_freeblocks: non-zero length"); - MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks), - M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO); - freeblks->fb_list.wk_type = D_FREEBLKS; - freeblks->fb_uid = ip->i_uid; - freeblks->fb_previousinum = ip->i_number; - freeblks->fb_devvp = ip->i_devvp; - freeblks->fb_mnt = ITOV(ip)->v_mount; - extblocks = 0; - if (fs->fs_magic == FS_UFS2_MAGIC) - extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); - datablocks = DIP(ip, i_blocks) - extblocks; - if ((flags & IO_NORMAL) == 0) { - freeblks->fb_oldsize = 0; - freeblks->fb_chkcnt = 0; - } else { - freeblks->fb_oldsize = ip->i_size; - ip->i_size = 0; - DIP(ip, i_size) = 0; - freeblks->fb_chkcnt = datablocks; - for (i = 0; i < NDADDR; i++) { - freeblks->fb_dblks[i] = DIP(ip, i_db[i]); - DIP(ip, i_db[i]) = 0; - } - for (i = 0; i < NIADDR; i++) { - freeblks->fb_iblks[i] = DIP(ip, i_ib[i]); - DIP(ip, i_ib[i]) = 0; - } - /* - * If the file was removed, then the space being freed was - * accounted for then (see softdep_filereleased()). If the - * file is merely being truncated, then we account for it now. - */ - if ((ip->i_flag & IN_SPACECOUNTED) == 0) - fs->fs_pendingblocks += datablocks; - } - if ((flags & IO_EXT) == 0) { - freeblks->fb_oldextsize = 0; - } else { - freeblks->fb_oldextsize = ip->i_din2->di_extsize; - ip->i_din2->di_extsize = 0; - freeblks->fb_chkcnt += extblocks; - for (i = 0; i < NXADDR; i++) { - freeblks->fb_eblks[i] = ip->i_din2->di_extb[i]; - ip->i_din2->di_extb[i] = 0; - } - } - DIP(ip, i_blocks) -= freeblks->fb_chkcnt; - /* - * Push the zero'ed inode to to its disk buffer so that we are free - * to delete its dependencies below. Once the dependencies are gone - * the buffer can be safely released. - */ - if ((error = bread(ip->i_devvp, - fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), - (int)fs->fs_bsize, NOCRED, &bp)) != 0) { - brelse(bp); - softdep_error("softdep_setup_freeblocks", error); - } - if (ip->i_ump->um_fstype == UFS1) - *((struct ufs1_dinode *)bp->b_data + - ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; - else - *((struct ufs2_dinode *)bp->b_data + - ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; - /* - * Find and eliminate any inode dependencies. - */ - ACQUIRE_LOCK(&lk); - (void) inodedep_lookup(fs, ip->i_number, DEPALLOC, &inodedep); - if ((inodedep->id_state & IOSTARTED) != 0) { - FREE_LOCK(&lk); - panic("softdep_setup_freeblocks: inode busy"); - } - /* - * Add the freeblks structure to the list of operations that - * must await the zero'ed inode being written to disk. If we - * still have a bitmap dependency (delay == 0), then the inode - * has never been written to disk, so we can process the - * freeblks below once we have deleted the dependencies. - */ - delay = (inodedep->id_state & DEPCOMPLETE); - if (delay) - WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list); - /* - * Because the file length has been truncated to zero, any - * pending block allocation dependency structures associated - * with this inode are obsolete and can simply be de-allocated. - * We must first merge the two dependency lists to get rid of - * any duplicate freefrag structures, then purge the merged list. - * If we still have a bitmap dependency, then the inode has never - * been written to disk, so we can free any fragments without delay. - */ - if (flags & IO_NORMAL) { - merge_inode_lists(&inodedep->id_newinoupdt, - &inodedep->id_inoupdt); - while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) - free_allocdirect(&inodedep->id_inoupdt, adp, delay); - } - if (flags & IO_EXT) { - merge_inode_lists(&inodedep->id_newextupdt, - &inodedep->id_extupdt); - while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0) - free_allocdirect(&inodedep->id_extupdt, adp, delay); - } - FREE_LOCK(&lk); - bdwrite(bp); - /* - * We must wait for any I/O in progress to finish so that - * all potential buffers on the dirty list will be visible. - * Once they are all there, walk the list and get rid of - * any dependencies. - */ - vp = ITOV(ip); - ACQUIRE_LOCK(&lk); - VI_LOCK(vp); - drain_output(vp, 1); -restart: - TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { - if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) || - ((flags & IO_NORMAL) == 0 && - (bp->b_xflags & BX_ALTDATA) == 0)) - continue; - if ((bp = getdirtybuf(&bp, VI_MTX(vp), MNT_WAIT)) == NULL) - goto restart; - (void) inodedep_lookup(fs, ip->i_number, 0, &inodedep); - deallocate_dependencies(bp, inodedep); - bp->b_flags |= B_INVAL | B_NOCACHE; - FREE_LOCK(&lk); - brelse(bp); - ACQUIRE_LOCK(&lk); - VI_LOCK(vp); - goto restart; - } - VI_UNLOCK(vp); - if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0) - (void) free_inodedep(inodedep); - FREE_LOCK(&lk); - /* - * If the inode has never been written to disk (delay == 0), - * then we can process the freeblks now that we have deleted - * the dependencies. - */ - if (!delay) - handle_workitem_freeblocks(freeblks, 0); -} - -/* - * Reclaim any dependency structures from a buffer that is about to - * be reallocated to a new vnode. The buffer must be locked, thus, - * no I/O completion operations can occur while we are manipulating - * its associated dependencies. The mutex is held so that other I/O's - * associated with related dependencies do not occur. - */ -static void -deallocate_dependencies(bp, inodedep) - struct buf *bp; - struct inodedep *inodedep; -{ - struct worklist *wk; - struct indirdep *indirdep; - struct allocindir *aip; - struct pagedep *pagedep; - struct dirrem *dirrem; - struct diradd *dap; - int i; - - while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { - switch (wk->wk_type) { - - case D_INDIRDEP: - indirdep = WK_INDIRDEP(wk); - /* - * None of the indirect pointers will ever be visible, - * so they can simply be tossed. GOINGAWAY ensures - * that allocated pointers will be saved in the buffer - * cache until they are freed. Note that they will - * only be able to be found by their physical address - * since the inode mapping the logical address will - * be gone. The save buffer used for the safe copy - * was allocated in setup_allocindir_phase2 using - * the physical address so it could be used for this - * purpose. Hence we swap the safe copy with the real - * copy, allowing the safe copy to be freed and holding - * on to the real copy for later use in indir_trunc. - */ - if (indirdep->ir_state & GOINGAWAY) { - FREE_LOCK(&lk); - panic("deallocate_dependencies: already gone"); - } - indirdep->ir_state |= GOINGAWAY; - VFSTOUFS(bp->b_vp->v_mount)->um_numindirdeps += 1; - while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0) - free_allocindir(aip, inodedep); - if (bp->b_lblkno >= 0 || - bp->b_blkno != indirdep->ir_savebp->b_lblkno) { - FREE_LOCK(&lk); - panic("deallocate_dependencies: not indir"); - } - bcopy(bp->b_data, indirdep->ir_savebp->b_data, - bp->b_bcount); - WORKLIST_REMOVE(wk); - WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk); - continue; - - case D_PAGEDEP: - pagedep = WK_PAGEDEP(wk); - /* - * None of the directory additions will ever be - * visible, so they can simply be tossed. - */ - for (i = 0; i < DAHASHSZ; i++) - while ((dap = - LIST_FIRST(&pagedep->pd_diraddhd[i]))) - free_diradd(dap); - while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0) - free_diradd(dap); - /* - * Copy any directory remove dependencies to the list - * to be processed after the zero'ed inode is written. - * If the inode has already been written, then they - * can be dumped directly onto the work list. - */ - LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) { - LIST_REMOVE(dirrem, dm_next); - dirrem->dm_dirinum = pagedep->pd_ino; - if (inodedep == NULL || - (inodedep->id_state & ALLCOMPLETE) == - ALLCOMPLETE) - add_to_worklist(&dirrem->dm_list); - else - WORKLIST_INSERT(&inodedep->id_bufwait, - &dirrem->dm_list); - } - if ((pagedep->pd_state & NEWBLOCK) != 0) { - LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list) - if (wk->wk_type == D_NEWDIRBLK && - WK_NEWDIRBLK(wk)->db_pagedep == - pagedep) - break; - if (wk != NULL) { - WORKLIST_REMOVE(wk); - free_newdirblk(WK_NEWDIRBLK(wk)); - } else { - FREE_LOCK(&lk); - panic("deallocate_dependencies: " - "lost pagedep"); - } - } - WORKLIST_REMOVE(&pagedep->pd_list); - LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); - continue; - - case D_ALLOCINDIR: - free_allocindir(WK_ALLOCINDIR(wk), inodedep); - continue; - - case D_ALLOCDIRECT: - case D_INODEDEP: - FREE_LOCK(&lk); - panic("deallocate_dependencies: Unexpected type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ - - default: - FREE_LOCK(&lk); - panic("deallocate_dependencies: Unknown type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ - } - } -} - -/* - * Free an allocdirect. Generate a new freefrag work request if appropriate. - * This routine must be called with splbio interrupts blocked. - */ -static void -free_allocdirect(adphead, adp, delay) - struct allocdirectlst *adphead; - struct allocdirect *adp; - int delay; -{ - struct newdirblk *newdirblk; - struct worklist *wk; - -#ifdef DEBUG - if (lk.lkt_held == NOHOLDER) - panic("free_allocdirect: lock not held"); -#endif - if ((adp->ad_state & DEPCOMPLETE) == 0) - LIST_REMOVE(adp, ad_deps); - TAILQ_REMOVE(adphead, adp, ad_next); - if ((adp->ad_state & COMPLETE) == 0) - WORKLIST_REMOVE(&adp->ad_list); - if (adp->ad_freefrag != NULL) { - if (delay) - WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, - &adp->ad_freefrag->ff_list); - else - add_to_worklist(&adp->ad_freefrag->ff_list); - } - if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) { - newdirblk = WK_NEWDIRBLK(wk); - WORKLIST_REMOVE(&newdirblk->db_list); - if (LIST_FIRST(&adp->ad_newdirblk) != NULL) - panic("free_allocdirect: extra newdirblk"); - if (delay) - WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, - &newdirblk->db_list); - else - free_newdirblk(newdirblk); - } - WORKITEM_FREE(adp, D_ALLOCDIRECT); -} - -/* - * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep. - * This routine must be called with splbio interrupts blocked. - */ -static void -free_newdirblk(newdirblk) - struct newdirblk *newdirblk; -{ - struct pagedep *pagedep; - struct diradd *dap; - int i; - -#ifdef DEBUG - if (lk.lkt_held == NOHOLDER) - panic("free_newdirblk: lock not held"); -#endif - /* - * If the pagedep is still linked onto the directory buffer - * dependency chain, then some of the entries on the - * pd_pendinghd list may not be committed to disk yet. In - * this case, we will simply clear the NEWBLOCK flag and - * let the pd_pendinghd list be processed when the pagedep - * is next written. If the pagedep is no longer on the buffer - * dependency chain, then all the entries on the pd_pending - * list are committed to disk and we can free them here. - */ - pagedep = newdirblk->db_pagedep; - pagedep->pd_state &= ~NEWBLOCK; - if ((pagedep->pd_state & ONWORKLIST) == 0) - while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) - free_diradd(dap); - /* - * If no dependencies remain, the pagedep will be freed. - */ - for (i = 0; i < DAHASHSZ; i++) - if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL) - break; - if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) { - LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); - } - WORKITEM_FREE(newdirblk, D_NEWDIRBLK); -} - -/* - * Prepare an inode to be freed. The actual free operation is not - * done until the zero'ed inode has been written to disk. - */ -void -softdep_freefile(pvp, ino, mode) - struct vnode *pvp; - ino_t ino; - int mode; -{ - struct inode *ip = VTOI(pvp); - struct inodedep *inodedep; - struct freefile *freefile; - - /* - * This sets up the inode de-allocation dependency. - */ - MALLOC(freefile, struct freefile *, sizeof(struct freefile), - M_FREEFILE, M_SOFTDEP_FLAGS); - freefile->fx_list.wk_type = D_FREEFILE; - freefile->fx_list.wk_state = 0; - freefile->fx_mode = mode; - freefile->fx_oldinum = ino; - freefile->fx_devvp = ip->i_devvp; - freefile->fx_mnt = ITOV(ip)->v_mount; - if ((ip->i_flag & IN_SPACECOUNTED) == 0) - ip->i_fs->fs_pendinginodes += 1; - - /* - * If the inodedep does not exist, then the zero'ed inode has - * been written to disk. If the allocated inode has never been - * written to disk, then the on-disk inode is zero'ed. In either - * case we can free the file immediately. - */ - ACQUIRE_LOCK(&lk); - if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0 || - check_inode_unwritten(inodedep)) { - FREE_LOCK(&lk); - handle_workitem_freefile(freefile); - return; - } - WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list); - FREE_LOCK(&lk); -} - -/* - * Check to see if an inode has never been written to disk. If - * so free the inodedep and return success, otherwise return failure. - * This routine must be called with splbio interrupts blocked. - * - * If we still have a bitmap dependency, then the inode has never - * been written to disk. Drop the dependency as it is no longer - * necessary since the inode is being deallocated. We set the - * ALLCOMPLETE flags since the bitmap now properly shows that the - * inode is not allocated. Even if the inode is actively being - * written, it has been rolled back to its zero'ed state, so we - * are ensured that a zero inode is what is on the disk. For short - * lived files, this change will usually result in removing all the - * dependencies from the inode so that it can be freed immediately. - */ -static int -check_inode_unwritten(inodedep) - struct inodedep *inodedep; -{ - - if ((inodedep->id_state & DEPCOMPLETE) != 0 || - LIST_FIRST(&inodedep->id_pendinghd) != NULL || - LIST_FIRST(&inodedep->id_bufwait) != NULL || - LIST_FIRST(&inodedep->id_inowait) != NULL || - TAILQ_FIRST(&inodedep->id_inoupdt) != NULL || - TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL || - TAILQ_FIRST(&inodedep->id_extupdt) != NULL || - TAILQ_FIRST(&inodedep->id_newextupdt) != NULL || - inodedep->id_nlinkdelta != 0) - return (0); - inodedep->id_state |= ALLCOMPLETE; - LIST_REMOVE(inodedep, id_deps); - inodedep->id_buf = NULL; - if (inodedep->id_state & ONWORKLIST) - WORKLIST_REMOVE(&inodedep->id_list); - if (inodedep->id_savedino1 != NULL) { - FREE(inodedep->id_savedino1, M_INODEDEP); - inodedep->id_savedino1 = NULL; - } - if (free_inodedep(inodedep) == 0) { - FREE_LOCK(&lk); - panic("check_inode_unwritten: busy inode"); - } - return (1); -} - -/* - * Try to free an inodedep structure. Return 1 if it could be freed. - */ -static int -free_inodedep(inodedep) - struct inodedep *inodedep; -{ - - if ((inodedep->id_state & ONWORKLIST) != 0 || - (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE || - LIST_FIRST(&inodedep->id_pendinghd) != NULL || - LIST_FIRST(&inodedep->id_bufwait) != NULL || - LIST_FIRST(&inodedep->id_inowait) != NULL || - TAILQ_FIRST(&inodedep->id_inoupdt) != NULL || - TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL || - TAILQ_FIRST(&inodedep->id_extupdt) != NULL || - TAILQ_FIRST(&inodedep->id_newextupdt) != NULL || - inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL) - return (0); - LIST_REMOVE(inodedep, id_hash); - WORKITEM_FREE(inodedep, D_INODEDEP); - num_inodedep -= 1; - return (1); -} - -/* - * This workitem routine performs the block de-allocation. - * The workitem is added to the pending list after the updated - * inode block has been written to disk. As mentioned above, - * checks regarding the number of blocks de-allocated (compared - * to the number of blocks allocated for the file) are also - * performed in this function. - */ -static void -handle_workitem_freeblocks(freeblks, flags) - struct freeblks *freeblks; - int flags; -{ - struct inode *ip; - struct vnode *vp; - struct fs *fs; - int i, nblocks, level, bsize; - ufs2_daddr_t bn, blocksreleased = 0; - int error, allerror = 0; - ufs_lbn_t baselbns[NIADDR], tmpval; - - fs = VFSTOUFS(freeblks->fb_mnt)->um_fs; - tmpval = 1; - baselbns[0] = NDADDR; - for (i = 1; i < NIADDR; i++) { - tmpval *= NINDIR(fs); - baselbns[i] = baselbns[i - 1] + tmpval; - } - nblocks = btodb(fs->fs_bsize); - blocksreleased = 0; - /* - * Release all extended attribute blocks or frags. - */ - if (freeblks->fb_oldextsize > 0) { - for (i = (NXADDR - 1); i >= 0; i--) { - if ((bn = freeblks->fb_eblks[i]) == 0) - continue; - bsize = sblksize(fs, freeblks->fb_oldextsize, i); - ffs_blkfree(fs, freeblks->fb_devvp, bn, bsize, - freeblks->fb_previousinum); - blocksreleased += btodb(bsize); - } - } - /* - * Release all data blocks or frags. - */ - if (freeblks->fb_oldsize > 0) { - /* - * Indirect blocks first. - */ - for (level = (NIADDR - 1); level >= 0; level--) { - if ((bn = freeblks->fb_iblks[level]) == 0) - continue; - if ((error = indir_trunc(freeblks, fsbtodb(fs, bn), - level, baselbns[level], &blocksreleased)) == 0) - allerror = error; - ffs_blkfree(fs, freeblks->fb_devvp, bn, fs->fs_bsize, - freeblks->fb_previousinum); - fs->fs_pendingblocks -= nblocks; - blocksreleased += nblocks; - } - /* - * All direct blocks or frags. - */ - for (i = (NDADDR - 1); i >= 0; i--) { - if ((bn = freeblks->fb_dblks[i]) == 0) - continue; - bsize = sblksize(fs, freeblks->fb_oldsize, i); - ffs_blkfree(fs, freeblks->fb_devvp, bn, bsize, - freeblks->fb_previousinum); - fs->fs_pendingblocks -= btodb(bsize); - blocksreleased += btodb(bsize); - } - } - /* - * If we still have not finished background cleanup, then check - * to see if the block count needs to be adjusted. - */ - if (freeblks->fb_chkcnt != blocksreleased && - (fs->fs_flags & FS_UNCLEAN) != 0 && - VFS_VGET(freeblks->fb_mnt, freeblks->fb_previousinum, - (flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp) == 0) { - ip = VTOI(vp); - DIP(ip, i_blocks) += freeblks->fb_chkcnt - blocksreleased; - ip->i_flag |= IN_CHANGE; - vput(vp); - } - -#ifdef DIAGNOSTIC - if (freeblks->fb_chkcnt != blocksreleased && - ((fs->fs_flags & FS_UNCLEAN) == 0 || (flags & LK_NOWAIT) != 0)) - printf("handle_workitem_freeblocks: block count\n"); - if (allerror) - softdep_error("handle_workitem_freeblks", allerror); -#endif /* DIAGNOSTIC */ - - WORKITEM_FREE(freeblks, D_FREEBLKS); -} - -/* - * Release blocks associated with the inode ip and stored in the indirect - * block dbn. If level is greater than SINGLE, the block is an indirect block - * and recursive calls to indirtrunc must be used to cleanse other indirect - * blocks. - */ -static int -indir_trunc(freeblks, dbn, level, lbn, countp) - struct freeblks *freeblks; - ufs2_daddr_t dbn; - int level; - ufs_lbn_t lbn; - ufs2_daddr_t *countp; -{ - struct buf *bp; - struct fs *fs; - struct worklist *wk; - struct indirdep *indirdep; - ufs1_daddr_t *bap1 = 0; - ufs2_daddr_t nb, *bap2 = 0; - ufs_lbn_t lbnadd; - int i, nblocks, ufs1fmt; - int error, allerror = 0; - - fs = VFSTOUFS(freeblks->fb_mnt)->um_fs; - lbnadd = 1; - for (i = level; i > 0; i--) - lbnadd *= NINDIR(fs); - /* - * Get buffer of block pointers to be freed. This routine is not - * called until the zero'ed inode has been written, so it is safe - * to free blocks as they are encountered. Because the inode has - * been zero'ed, calls to bmap on these blocks will fail. So, we - * have to use the on-disk address and the block device for the - * filesystem to look them up. If the file was deleted before its - * indirect blocks were all written to disk, the routine that set - * us up (deallocate_dependencies) will have arranged to leave - * a complete copy of the indirect block in memory for our use. - * Otherwise we have to read the blocks in from the disk. - */ -#ifdef notyet - bp = getblk(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, 0, 0, - GB_NOCREAT); -#else - bp = incore(freeblks->fb_devvp, dbn); -#endif - ACQUIRE_LOCK(&lk); - if (bp != NULL && (wk = LIST_FIRST(&bp->b_dep)) != NULL) { - if (wk->wk_type != D_INDIRDEP || - (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp || - (indirdep->ir_state & GOINGAWAY) == 0) { - FREE_LOCK(&lk); - panic("indir_trunc: lost indirdep"); - } - WORKLIST_REMOVE(wk); - WORKITEM_FREE(indirdep, D_INDIRDEP); - if (LIST_FIRST(&bp->b_dep) != NULL) { - FREE_LOCK(&lk); - panic("indir_trunc: dangling dep"); - } - VFSTOUFS(freeblks->fb_mnt)->um_numindirdeps -= 1; - FREE_LOCK(&lk); - } else { -#ifdef notyet - if (bp) - brelse(bp); -#endif - FREE_LOCK(&lk); - error = bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, - NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - } - /* - * Recursively free indirect blocks. - */ - if (VFSTOUFS(freeblks->fb_mnt)->um_fstype == UFS1) { - ufs1fmt = 1; - bap1 = (ufs1_daddr_t *)bp->b_data; - } else { - ufs1fmt = 0; - bap2 = (ufs2_daddr_t *)bp->b_data; - } - nblocks = btodb(fs->fs_bsize); - for (i = NINDIR(fs) - 1; i >= 0; i--) { - if (ufs1fmt) - nb = bap1[i]; - else - nb = bap2[i]; - if (nb == 0) - continue; - if (level != 0) { - if ((error = indir_trunc(freeblks, fsbtodb(fs, nb), - level - 1, lbn + (i * lbnadd), countp)) != 0) - allerror = error; - } - ffs_blkfree(fs, freeblks->fb_devvp, nb, fs->fs_bsize, - freeblks->fb_previousinum); - fs->fs_pendingblocks -= nblocks; - *countp += nblocks; - } - bp->b_flags |= B_INVAL | B_NOCACHE; - brelse(bp); - return (allerror); -} - -/* - * Free an allocindir. - * This routine must be called with splbio interrupts blocked. - */ -static void -free_allocindir(aip, inodedep) - struct allocindir *aip; - struct inodedep *inodedep; -{ - struct freefrag *freefrag; - -#ifdef DEBUG - if (lk.lkt_held == NOHOLDER) - panic("free_allocindir: lock not held"); -#endif - if ((aip->ai_state & DEPCOMPLETE) == 0) - LIST_REMOVE(aip, ai_deps); - if (aip->ai_state & ONWORKLIST) - WORKLIST_REMOVE(&aip->ai_list); - LIST_REMOVE(aip, ai_next); - if ((freefrag = aip->ai_freefrag) != NULL) { - if (inodedep == NULL) - add_to_worklist(&freefrag->ff_list); - else - WORKLIST_INSERT(&inodedep->id_bufwait, - &freefrag->ff_list); - } - WORKITEM_FREE(aip, D_ALLOCINDIR); -} - -/* - * Directory entry addition dependencies. - * - * When adding a new directory entry, the inode (with its incremented link - * count) must be written to disk before the directory entry's pointer to it. - * Also, if the inode is newly allocated, the corresponding freemap must be - * updated (on disk) before the directory entry's pointer. These requirements - * are met via undo/redo on the directory entry's pointer, which consists - * simply of the inode number. - * - * As directory entries are added and deleted, the free space within a - * directory block can become fragmented. The ufs filesystem will compact - * a fragmented directory block to make space for a new entry. When this - * occurs, the offsets of previously added entries change. Any "diradd" - * dependency structures corresponding to these entries must be updated with - * the new offsets. - */ - -/* - * This routine is called after the in-memory inode's link - * count has been incremented, but before the directory entry's - * pointer to the inode has been set. - */ -int -softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) - struct buf *bp; /* buffer containing directory block */ - struct inode *dp; /* inode for directory */ - off_t diroffset; /* offset of new entry in directory */ - ino_t newinum; /* inode referenced by new directory entry */ - struct buf *newdirbp; /* non-NULL => contents of new mkdir */ - int isnewblk; /* entry is in a newly allocated block */ -{ - int offset; /* offset of new entry within directory block */ - ufs_lbn_t lbn; /* block in directory containing new entry */ - struct fs *fs; - struct diradd *dap; - struct allocdirect *adp; - struct pagedep *pagedep; - struct inodedep *inodedep; - struct newdirblk *newdirblk = 0; - struct mkdir *mkdir1, *mkdir2; - - /* - * Whiteouts have no dependencies. - */ - if (newinum == WINO) { - if (newdirbp != NULL) - bdwrite(newdirbp); - return (0); - } - - fs = dp->i_fs; - lbn = lblkno(fs, diroffset); - offset = blkoff(fs, diroffset); - MALLOC(dap, struct diradd *, sizeof(struct diradd), M_DIRADD, - M_SOFTDEP_FLAGS|M_ZERO); - dap->da_list.wk_type = D_DIRADD; - dap->da_offset = offset; - dap->da_newinum = newinum; - dap->da_state = ATTACHED; - if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) { - MALLOC(newdirblk, struct newdirblk *, sizeof(struct newdirblk), - M_NEWDIRBLK, M_SOFTDEP_FLAGS); - newdirblk->db_list.wk_type = D_NEWDIRBLK; - newdirblk->db_state = 0; - } - if (newdirbp == NULL) { - dap->da_state |= DEPCOMPLETE; - ACQUIRE_LOCK(&lk); - } else { - dap->da_state |= MKDIR_BODY | MKDIR_PARENT; - MALLOC(mkdir1, struct mkdir *, sizeof(struct mkdir), M_MKDIR, - M_SOFTDEP_FLAGS); - mkdir1->md_list.wk_type = D_MKDIR; - mkdir1->md_state = MKDIR_BODY; - mkdir1->md_diradd = dap; - MALLOC(mkdir2, struct mkdir *, sizeof(struct mkdir), M_MKDIR, - M_SOFTDEP_FLAGS); - mkdir2->md_list.wk_type = D_MKDIR; - mkdir2->md_state = MKDIR_PARENT; - mkdir2->md_diradd = dap; - /* - * Dependency on "." and ".." being written to disk. - */ - mkdir1->md_buf = newdirbp; - ACQUIRE_LOCK(&lk); - LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs); - WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list); - FREE_LOCK(&lk); - bdwrite(newdirbp); - /* - * Dependency on link count increase for parent directory - */ - ACQUIRE_LOCK(&lk); - if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0 - || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { - dap->da_state &= ~MKDIR_PARENT; - WORKITEM_FREE(mkdir2, D_MKDIR); - } else { - LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs); - WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list); - } - } - /* - * Link into parent directory pagedep to await its being written. - */ - if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0) - WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); - dap->da_pagedep = pagedep; - LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap, - da_pdlist); - /* - * Link into its inodedep. Put it on the id_bufwait list if the inode - * is not yet written. If it is written, do the post-inode write - * processing to put it on the id_pendinghd list. - */ - (void) inodedep_lookup(fs, newinum, DEPALLOC, &inodedep); - if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) - diradd_inode_written(dap, inodedep); - else - WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list); - if (isnewblk) { - /* - * Directories growing into indirect blocks are rare - * enough and the frequency of new block allocation - * in those cases even more rare, that we choose not - * to bother tracking them. Rather we simply force the - * new directory entry to disk. - */ - if (lbn >= NDADDR) { - FREE_LOCK(&lk); - /* - * We only have a new allocation when at the - * beginning of a new block, not when we are - * expanding into an existing block. - */ - if (blkoff(fs, diroffset) == 0) - return (1); - return (0); - } - /* - * We only have a new allocation when at the beginning - * of a new fragment, not when we are expanding into an - * existing fragment. Also, there is nothing to do if we - * are already tracking this block. - */ - if (fragoff(fs, diroffset) != 0) { - FREE_LOCK(&lk); - return (0); - } - if ((pagedep->pd_state & NEWBLOCK) != 0) { - WORKITEM_FREE(newdirblk, D_NEWDIRBLK); - FREE_LOCK(&lk); - return (0); - } - /* - * Find our associated allocdirect and have it track us. - */ - if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0) - panic("softdep_setup_directory_add: lost inodedep"); - adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst); - if (adp == NULL || adp->ad_lbn != lbn) { - FREE_LOCK(&lk); - panic("softdep_setup_directory_add: lost entry"); - } - pagedep->pd_state |= NEWBLOCK; - newdirblk->db_pagedep = pagedep; - WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list); - } - FREE_LOCK(&lk); - return (0); -} - -/* - * This procedure is called to change the offset of a directory - * entry when compacting a directory block which must be owned - * exclusively by the caller. Note that the actual entry movement - * must be done in this procedure to ensure that no I/O completions - * occur while the move is in progress. - */ -void -softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize) - struct inode *dp; /* inode for directory */ - caddr_t base; /* address of dp->i_offset */ - caddr_t oldloc; /* address of old directory location */ - caddr_t newloc; /* address of new directory location */ - int entrysize; /* size of directory entry */ -{ - int offset, oldoffset, newoffset; - struct pagedep *pagedep; - struct diradd *dap; - ufs_lbn_t lbn; - - ACQUIRE_LOCK(&lk); - lbn = lblkno(dp->i_fs, dp->i_offset); - offset = blkoff(dp->i_fs, dp->i_offset); - if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0) - goto done; - oldoffset = offset + (oldloc - base); - newoffset = offset + (newloc - base); - - LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist) { - if (dap->da_offset != oldoffset) - continue; - dap->da_offset = newoffset; - if (DIRADDHASH(newoffset) == DIRADDHASH(oldoffset)) - break; - LIST_REMOVE(dap, da_pdlist); - LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)], - dap, da_pdlist); - break; - } - if (dap == NULL) { - - LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) { - if (dap->da_offset == oldoffset) { - dap->da_offset = newoffset; - break; - } - } - } -done: - bcopy(oldloc, newloc, entrysize); - FREE_LOCK(&lk); -} - -/* - * Free a diradd dependency structure. This routine must be called - * with splbio interrupts blocked. - */ -static void -free_diradd(dap) - struct diradd *dap; -{ - struct dirrem *dirrem; - struct pagedep *pagedep; - struct inodedep *inodedep; - struct mkdir *mkdir, *nextmd; - -#ifdef DEBUG - if (lk.lkt_held == NOHOLDER) - panic("free_diradd: lock not held"); -#endif - WORKLIST_REMOVE(&dap->da_list); - LIST_REMOVE(dap, da_pdlist); - if ((dap->da_state & DIRCHG) == 0) { - pagedep = dap->da_pagedep; - } else { - dirrem = dap->da_previous; - pagedep = dirrem->dm_pagedep; - dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); - } - if (inodedep_lookup(VFSTOUFS(pagedep->pd_mnt)->um_fs, dap->da_newinum, - 0, &inodedep) != 0) - (void) free_inodedep(inodedep); - if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) { - for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) { - nextmd = LIST_NEXT(mkdir, md_mkdirs); - if (mkdir->md_diradd != dap) - continue; - dap->da_state &= ~mkdir->md_state; - WORKLIST_REMOVE(&mkdir->md_list); - LIST_REMOVE(mkdir, md_mkdirs); - WORKITEM_FREE(mkdir, D_MKDIR); - } - if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) { - FREE_LOCK(&lk); - panic("free_diradd: unfound ref"); - } - } - WORKITEM_FREE(dap, D_DIRADD); -} - -/* - * Directory entry removal dependencies. - * - * When removing a directory entry, the entry's inode pointer must be - * zero'ed on disk before the corresponding inode's link count is decremented - * (possibly freeing the inode for re-use). This dependency is handled by - * updating the directory entry but delaying the inode count reduction until - * after the directory block has been written to disk. After this point, the - * inode count can be decremented whenever it is convenient. - */ - -/* - * This routine should be called immediately after removing - * a directory entry. The inode's link count should not be - * decremented by the calling procedure -- the soft updates - * code will do this task when it is safe. - */ -void -softdep_setup_remove(bp, dp, ip, isrmdir) - struct buf *bp; /* buffer containing directory block */ - struct inode *dp; /* inode for the directory being modified */ - struct inode *ip; /* inode for directory entry being removed */ - int isrmdir; /* indicates if doing RMDIR */ -{ - struct dirrem *dirrem, *prevdirrem; - - /* - * Allocate a new dirrem if appropriate and ACQUIRE_LOCK. - */ - dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem); - - /* - * If the COMPLETE flag is clear, then there were no active - * entries and we want to roll back to a zeroed entry until - * the new inode is committed to disk. If the COMPLETE flag is - * set then we have deleted an entry that never made it to - * disk. If the entry we deleted resulted from a name change, - * then the old name still resides on disk. We cannot delete - * its inode (returned to us in prevdirrem) until the zeroed - * directory entry gets to disk. The new inode has never been - * referenced on the disk, so can be deleted immediately. - */ - if ((dirrem->dm_state & COMPLETE) == 0) { - LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem, - dm_next); - FREE_LOCK(&lk); - } else { - if (prevdirrem != NULL) - LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, - prevdirrem, dm_next); - dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino; - FREE_LOCK(&lk); - handle_workitem_remove(dirrem, NULL); - } -} - -/* - * Allocate a new dirrem if appropriate and return it along with - * its associated pagedep. Called without a lock, returns with lock. - */ -static long num_dirrem; /* number of dirrem allocated */ -static struct dirrem * -newdirrem(bp, dp, ip, isrmdir, prevdirremp) - struct buf *bp; /* buffer containing directory block */ - struct inode *dp; /* inode for the directory being modified */ - struct inode *ip; /* inode for directory entry being removed */ - int isrmdir; /* indicates if doing RMDIR */ - struct dirrem **prevdirremp; /* previously referenced inode, if any */ -{ - int offset; - ufs_lbn_t lbn; - struct diradd *dap; - struct dirrem *dirrem; - struct pagedep *pagedep; - - /* - * Whiteouts have no deletion dependencies. - */ - if (ip == NULL) - panic("newdirrem: whiteout"); - /* - * If we are over our limit, try to improve the situation. - * Limiting the number of dirrem structures will also limit - * the number of freefile and freeblks structures. - */ - if (num_dirrem > max_softdeps / 2) - (void) request_cleanup(FLUSH_REMOVE, 0); - num_dirrem += 1; - MALLOC(dirrem, struct dirrem *, sizeof(struct dirrem), - M_DIRREM, M_SOFTDEP_FLAGS|M_ZERO); - dirrem->dm_list.wk_type = D_DIRREM; - dirrem->dm_state = isrmdir ? RMDIR : 0; - dirrem->dm_mnt = ITOV(ip)->v_mount; - dirrem->dm_oldinum = ip->i_number; - *prevdirremp = NULL; - - ACQUIRE_LOCK(&lk); - lbn = lblkno(dp->i_fs, dp->i_offset); - offset = blkoff(dp->i_fs, dp->i_offset); - if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0) - WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); - dirrem->dm_pagedep = pagedep; - /* - * Check for a diradd dependency for the same directory entry. - * If present, then both dependencies become obsolete and can - * be de-allocated. Check for an entry on both the pd_dirraddhd - * list and the pd_pendinghd list. - */ - - LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist) - if (dap->da_offset == offset) - break; - if (dap == NULL) { - - LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) - if (dap->da_offset == offset) - break; - if (dap == NULL) - return (dirrem); - } - /* - * Must be ATTACHED at this point. - */ - if ((dap->da_state & ATTACHED) == 0) { - FREE_LOCK(&lk); - panic("newdirrem: not ATTACHED"); - } - if (dap->da_newinum != ip->i_number) { - FREE_LOCK(&lk); - panic("newdirrem: inum %d should be %d", - ip->i_number, dap->da_newinum); - } - /* - * If we are deleting a changed name that never made it to disk, - * then return the dirrem describing the previous inode (which - * represents the inode currently referenced from this entry on disk). - */ - if ((dap->da_state & DIRCHG) != 0) { - *prevdirremp = dap->da_previous; - dap->da_state &= ~DIRCHG; - dap->da_pagedep = pagedep; - } - /* - * We are deleting an entry that never made it to disk. - * Mark it COMPLETE so we can delete its inode immediately. - */ - dirrem->dm_state |= COMPLETE; - free_diradd(dap); - return (dirrem); -} - -/* - * Directory entry change dependencies. - * - * Changing an existing directory entry requires that an add operation - * be completed first followed by a deletion. The semantics for the addition - * are identical to the description of adding a new entry above except - * that the rollback is to the old inode number rather than zero. Once - * the addition dependency is completed, the removal is done as described - * in the removal routine above. - */ - -/* - * This routine should be called immediately after changing - * a directory entry. The inode's link count should not be - * decremented by the calling procedure -- the soft updates - * code will perform this task when it is safe. - */ -void -softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) - struct buf *bp; /* buffer containing directory block */ - struct inode *dp; /* inode for the directory being modified */ - struct inode *ip; /* inode for directory entry being removed */ - ino_t newinum; /* new inode number for changed entry */ - int isrmdir; /* indicates if doing RMDIR */ -{ - int offset; - struct diradd *dap = NULL; - struct dirrem *dirrem, *prevdirrem; - struct pagedep *pagedep; - struct inodedep *inodedep; - - offset = blkoff(dp->i_fs, dp->i_offset); - - /* - * Whiteouts do not need diradd dependencies. - */ - if (newinum != WINO) { - MALLOC(dap, struct diradd *, sizeof(struct diradd), - M_DIRADD, M_SOFTDEP_FLAGS|M_ZERO); - dap->da_list.wk_type = D_DIRADD; - dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE; - dap->da_offset = offset; - dap->da_newinum = newinum; - } - - /* - * Allocate a new dirrem and ACQUIRE_LOCK. - */ - dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem); - pagedep = dirrem->dm_pagedep; - /* - * The possible values for isrmdir: - * 0 - non-directory file rename - * 1 - directory rename within same directory - * inum - directory rename to new directory of given inode number - * When renaming to a new directory, we are both deleting and - * creating a new directory entry, so the link count on the new - * directory should not change. Thus we do not need the followup - * dirrem which is usually done in handle_workitem_remove. We set - * the DIRCHG flag to tell handle_workitem_remove to skip the - * followup dirrem. - */ - if (isrmdir > 1) - dirrem->dm_state |= DIRCHG; - - /* - * Whiteouts have no additional dependencies, - * so just put the dirrem on the correct list. - */ - if (newinum == WINO) { - if ((dirrem->dm_state & COMPLETE) == 0) { - LIST_INSERT_HEAD(&pagedep->pd_dirremhd, dirrem, - dm_next); - } else { - dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); - } - FREE_LOCK(&lk); - return; - } - - /* - * If the COMPLETE flag is clear, then there were no active - * entries and we want to roll back to the previous inode until - * the new inode is committed to disk. If the COMPLETE flag is - * set, then we have deleted an entry that never made it to disk. - * If the entry we deleted resulted from a name change, then the old - * inode reference still resides on disk. Any rollback that we do - * needs to be to that old inode (returned to us in prevdirrem). If - * the entry we deleted resulted from a create, then there is - * no entry on the disk, so we want to roll back to zero rather - * than the uncommitted inode. In either of the COMPLETE cases we - * want to immediately free the unwritten and unreferenced inode. - */ - if ((dirrem->dm_state & COMPLETE) == 0) { - dap->da_previous = dirrem; - } else { - if (prevdirrem != NULL) { - dap->da_previous = prevdirrem; - } else { - dap->da_state &= ~DIRCHG; - dap->da_pagedep = pagedep; - } - dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); - } - /* - * Link into its inodedep. Put it on the id_bufwait list if the inode - * is not yet written. If it is written, do the post-inode write - * processing to put it on the id_pendinghd list. - */ - if (inodedep_lookup(dp->i_fs, newinum, DEPALLOC, &inodedep) == 0 || - (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { - dap->da_state |= COMPLETE; - LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); - WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list); - } else { - LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], - dap, da_pdlist); - WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list); - } - FREE_LOCK(&lk); -} - -/* - * Called whenever the link count on an inode is changed. - * It creates an inode dependency so that the new reference(s) - * to the inode cannot be committed to disk until the updated - * inode has been written. - */ -void -softdep_change_linkcnt(ip) - struct inode *ip; /* the inode with the increased link count */ -{ - struct inodedep *inodedep; - - ACQUIRE_LOCK(&lk); - (void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep); - if (ip->i_nlink < ip->i_effnlink) { - FREE_LOCK(&lk); - panic("softdep_change_linkcnt: bad delta"); - } - inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; - FREE_LOCK(&lk); -} - -/* - * Called when the effective link count and the reference count - * on an inode drops to zero. At this point there are no names - * referencing the file in the filesystem and no active file - * references. The space associated with the file will be freed - * as soon as the necessary soft dependencies are cleared. - */ -void -softdep_releasefile(ip) - struct inode *ip; /* inode with the zero effective link count */ -{ - struct inodedep *inodedep; - struct fs *fs; - int extblocks; - - if (ip->i_effnlink > 0) - panic("softdep_filerelease: file still referenced"); - /* - * We may be called several times as the real reference count - * drops to zero. We only want to account for the space once. - */ - if (ip->i_flag & IN_SPACECOUNTED) - return; - /* - * We have to deactivate a snapshot otherwise copyonwrites may - * add blocks and the cleanup may remove blocks after we have - * tried to account for them. - */ - if ((ip->i_flags & SF_SNAPSHOT) != 0) - ffs_snapremove(ITOV(ip)); - /* - * If we are tracking an nlinkdelta, we have to also remember - * whether we accounted for the freed space yet. - */ - ACQUIRE_LOCK(&lk); - if ((inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep))) - inodedep->id_state |= SPACECOUNTED; - FREE_LOCK(&lk); - fs = ip->i_fs; - extblocks = 0; - if (fs->fs_magic == FS_UFS2_MAGIC) - extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); - ip->i_fs->fs_pendingblocks += DIP(ip, i_blocks) - extblocks; - ip->i_fs->fs_pendinginodes += 1; - ip->i_flag |= IN_SPACECOUNTED; -} - -/* - * This workitem decrements the inode's link count. - * If the link count reaches zero, the file is removed. - */ -static void -handle_workitem_remove(dirrem, xp) - struct dirrem *dirrem; - struct vnode *xp; -{ - struct thread *td = curthread; - struct inodedep *inodedep; - struct vnode *vp; - struct inode *ip; - ino_t oldinum; - int error; - - if ((vp = xp) == NULL && - (error = VFS_VGET(dirrem->dm_mnt, dirrem->dm_oldinum, LK_EXCLUSIVE, - &vp)) != 0) { - softdep_error("handle_workitem_remove: vget", error); - return; - } - ip = VTOI(vp); - ACQUIRE_LOCK(&lk); - if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0){ - FREE_LOCK(&lk); - panic("handle_workitem_remove: lost inodedep"); - } - /* - * Normal file deletion. - */ - if ((dirrem->dm_state & RMDIR) == 0) { - ip->i_nlink--; - DIP(ip, i_nlink) = ip->i_nlink; - ip->i_flag |= IN_CHANGE; - if (ip->i_nlink < ip->i_effnlink) { - FREE_LOCK(&lk); - panic("handle_workitem_remove: bad file delta"); - } - inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; - FREE_LOCK(&lk); - vput(vp); - num_dirrem -= 1; - WORKITEM_FREE(dirrem, D_DIRREM); - return; - } - /* - * Directory deletion. Decrement reference count for both the - * just deleted parent directory entry and the reference for ".". - * Next truncate the directory to length zero. When the - * truncation completes, arrange to have the reference count on - * the parent decremented to account for the loss of "..". - */ - ip->i_nlink -= 2; - DIP(ip, i_nlink) = ip->i_nlink; - ip->i_flag |= IN_CHANGE; - if (ip->i_nlink < ip->i_effnlink) { - FREE_LOCK(&lk); - panic("handle_workitem_remove: bad dir delta"); - } - inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; - FREE_LOCK(&lk); - if ((error = UFS_TRUNCATE(vp, (off_t)0, 0, td->td_ucred, td)) != 0) - softdep_error("handle_workitem_remove: truncate", error); - /* - * Rename a directory to a new parent. Since, we are both deleting - * and creating a new directory entry, the link count on the new - * directory should not change. Thus we skip the followup dirrem. - */ - if (dirrem->dm_state & DIRCHG) { - vput(vp); - num_dirrem -= 1; - WORKITEM_FREE(dirrem, D_DIRREM); - return; - } - /* - * If the inodedep does not exist, then the zero'ed inode has - * been written to disk. If the allocated inode has never been - * written to disk, then the on-disk inode is zero'ed. In either - * case we can remove the file immediately. - */ - ACQUIRE_LOCK(&lk); - dirrem->dm_state = 0; - oldinum = dirrem->dm_oldinum; - dirrem->dm_oldinum = dirrem->dm_dirinum; - if (inodedep_lookup(ip->i_fs, oldinum, 0, &inodedep) == 0 || - check_inode_unwritten(inodedep)) { - FREE_LOCK(&lk); - vput(vp); - handle_workitem_remove(dirrem, NULL); - return; - } - WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list); - FREE_LOCK(&lk); - vput(vp); -} - -/* - * Inode de-allocation dependencies. - * - * When an inode's link count is reduced to zero, it can be de-allocated. We - * found it convenient to postpone de-allocation until after the inode is - * written to disk with its new link count (zero). At this point, all of the - * on-disk inode's block pointers are nullified and, with careful dependency - * list ordering, all dependencies related to the inode will be satisfied and - * the corresponding dependency structures de-allocated. So, if/when the - * inode is reused, there will be no mixing of old dependencies with new - * ones. This artificial dependency is set up by the block de-allocation - * procedure above (softdep_setup_freeblocks) and completed by the - * following procedure. - */ -static void -handle_workitem_freefile(freefile) - struct freefile *freefile; -{ - struct fs *fs; - struct inodedep *idp; - int error; - - fs = VFSTOUFS(freefile->fx_mnt)->um_fs; -#ifdef DEBUG - ACQUIRE_LOCK(&lk); - error = inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp); - FREE_LOCK(&lk); - if (error) - panic("handle_workitem_freefile: inodedep survived"); -#endif - fs->fs_pendinginodes -= 1; - if ((error = ffs_freefile(fs, freefile->fx_devvp, freefile->fx_oldinum, - freefile->fx_mode)) != 0) - softdep_error("handle_workitem_freefile", error); - WORKITEM_FREE(freefile, D_FREEFILE); -} - -/* - * Disk writes. - * - * The dependency structures constructed above are most actively used when file - * system blocks are written to disk. No constraints are placed on when a - * block can be written, but unsatisfied update dependencies are made safe by - * modifying (or replacing) the source memory for the duration of the disk - * write. When the disk write completes, the memory block is again brought - * up-to-date. - * - * In-core inode structure reclamation. - * - * Because there are a finite number of "in-core" inode structures, they are - * reused regularly. By transferring all inode-related dependencies to the - * in-memory inode block and indexing them separately (via "inodedep"s), we - * can allow "in-core" inode structures to be reused at any time and avoid - * any increase in contention. - * - * Called just before entering the device driver to initiate a new disk I/O. - * The buffer must be locked, thus, no I/O completion operations can occur - * while we are manipulating its associated dependencies. - */ -static void -softdep_disk_io_initiation(bp) - struct buf *bp; /* structure describing disk write to occur */ -{ - struct worklist *wk, *nextwk; - struct indirdep *indirdep; - struct inodedep *inodedep; - - /* - * We only care about write operations. There should never - * be dependencies for reads. - */ - if (bp->b_iocmd == BIO_READ) - panic("softdep_disk_io_initiation: read"); - /* - * Do any necessary pre-I/O processing. - */ - for (wk = LIST_FIRST(&bp->b_dep); wk; wk = nextwk) { - nextwk = LIST_NEXT(wk, wk_list); - switch (wk->wk_type) { - - case D_PAGEDEP: - initiate_write_filepage(WK_PAGEDEP(wk), bp); - continue; - - case D_INODEDEP: - inodedep = WK_INODEDEP(wk); - if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC) - initiate_write_inodeblock_ufs1(inodedep, bp); - else - initiate_write_inodeblock_ufs2(inodedep, bp); - continue; - - case D_INDIRDEP: - indirdep = WK_INDIRDEP(wk); - if (indirdep->ir_state & GOINGAWAY) - panic("disk_io_initiation: indirdep gone"); - /* - * If there are no remaining dependencies, this - * will be writing the real pointers, so the - * dependency can be freed. - */ - if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) { - indirdep->ir_savebp->b_flags |= - B_INVAL | B_NOCACHE; - brelse(indirdep->ir_savebp); - /* inline expand WORKLIST_REMOVE(wk); */ - wk->wk_state &= ~ONWORKLIST; - LIST_REMOVE(wk, wk_list); - WORKITEM_FREE(indirdep, D_INDIRDEP); - continue; - } - /* - * Replace up-to-date version with safe version. - */ - MALLOC(indirdep->ir_saveddata, caddr_t, bp->b_bcount, - M_INDIRDEP, M_SOFTDEP_FLAGS); - ACQUIRE_LOCK(&lk); - indirdep->ir_state &= ~ATTACHED; - indirdep->ir_state |= UNDONE; - bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount); - bcopy(indirdep->ir_savebp->b_data, bp->b_data, - bp->b_bcount); - FREE_LOCK(&lk); - continue; - - case D_MKDIR: - case D_BMSAFEMAP: - case D_ALLOCDIRECT: - case D_ALLOCINDIR: - continue; - - default: - panic("handle_disk_io_initiation: Unexpected type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ - } - } -} - -/* - * Called from within the procedure above to deal with unsatisfied - * allocation dependencies in a directory. The buffer must be locked, - * thus, no I/O completion operations can occur while we are - * manipulating its associated dependencies. - */ -static void -initiate_write_filepage(pagedep, bp) - struct pagedep *pagedep; - struct buf *bp; -{ - struct diradd *dap; - struct direct *ep; - int i; - - if (pagedep->pd_state & IOSTARTED) { - /* - * This can only happen if there is a driver that does not - * understand chaining. Here biodone will reissue the call - * to strategy for the incomplete buffers. - */ - printf("initiate_write_filepage: already started\n"); - return; - } - pagedep->pd_state |= IOSTARTED; - ACQUIRE_LOCK(&lk); - for (i = 0; i < DAHASHSZ; i++) { - LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) { - ep = (struct direct *) - ((char *)bp->b_data + dap->da_offset); - if (ep->d_ino != dap->da_newinum) { - FREE_LOCK(&lk); - panic("%s: dir inum %d != new %d", - "initiate_write_filepage", - ep->d_ino, dap->da_newinum); - } - if (dap->da_state & DIRCHG) - ep->d_ino = dap->da_previous->dm_oldinum; - else - ep->d_ino = 0; - dap->da_state &= ~ATTACHED; - dap->da_state |= UNDONE; - } - } - FREE_LOCK(&lk); -} - -/* - * Version of initiate_write_inodeblock that handles UFS1 dinodes. - * Note that any bug fixes made to this routine must be done in the - * version found below. - * - * Called from within the procedure above to deal with unsatisfied - * allocation dependencies in an inodeblock. The buffer must be - * locked, thus, no I/O completion operations can occur while we - * are manipulating its associated dependencies. - */ -static void -initiate_write_inodeblock_ufs1(inodedep, bp) - struct inodedep *inodedep; - struct buf *bp; /* The inode block */ -{ - struct allocdirect *adp, *lastadp; - struct ufs1_dinode *dp; - struct fs *fs; - ufs_lbn_t i, prevlbn = 0; - int deplist; - - if (inodedep->id_state & IOSTARTED) - panic("initiate_write_inodeblock_ufs1: already started"); - inodedep->id_state |= IOSTARTED; - fs = inodedep->id_fs; - dp = (struct ufs1_dinode *)bp->b_data + - ino_to_fsbo(fs, inodedep->id_ino); - /* - * If the bitmap is not yet written, then the allocated - * inode cannot be written to disk. - */ - if ((inodedep->id_state & DEPCOMPLETE) == 0) { - if (inodedep->id_savedino1 != NULL) - panic("initiate_write_inodeblock_ufs1: I/O underway"); - MALLOC(inodedep->id_savedino1, struct ufs1_dinode *, - sizeof(struct ufs1_dinode), M_INODEDEP, M_SOFTDEP_FLAGS); - *inodedep->id_savedino1 = *dp; - bzero((caddr_t)dp, sizeof(struct ufs1_dinode)); - return; - } - /* - * If no dependencies, then there is nothing to roll back. - */ - inodedep->id_savedsize = dp->di_size; - inodedep->id_savedextsize = 0; - if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL) - return; - /* - * Set the dependencies to busy. - */ - ACQUIRE_LOCK(&lk); - for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; - adp = TAILQ_NEXT(adp, ad_next)) { -#ifdef DIAGNOSTIC - if (deplist != 0 && prevlbn >= adp->ad_lbn) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: lbn order"); - } - prevlbn = adp->ad_lbn; - if (adp->ad_lbn < NDADDR && - dp->di_db[adp->ad_lbn] != adp->ad_newblkno) { - FREE_LOCK(&lk); - panic("%s: direct pointer #%jd mismatch %d != %jd", - "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn, - dp->di_db[adp->ad_lbn], - (intmax_t)adp->ad_newblkno); - } - if (adp->ad_lbn >= NDADDR && - dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno) { - FREE_LOCK(&lk); - panic("%s: indirect pointer #%jd mismatch %d != %jd", - "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn - NDADDR, - dp->di_ib[adp->ad_lbn - NDADDR], - (intmax_t)adp->ad_newblkno); - } - deplist |= 1 << adp->ad_lbn; - if ((adp->ad_state & ATTACHED) == 0) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: Unknown state 0x%x", - adp->ad_state); - } -#endif /* DIAGNOSTIC */ - adp->ad_state &= ~ATTACHED; - adp->ad_state |= UNDONE; - } - /* - * The on-disk inode cannot claim to be any larger than the last - * fragment that has been written. Otherwise, the on-disk inode - * might have fragments that were not the last block in the file - * which would corrupt the filesystem. - */ - for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; - lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) { - if (adp->ad_lbn >= NDADDR) - break; - dp->di_db[adp->ad_lbn] = adp->ad_oldblkno; - /* keep going until hitting a rollback to a frag */ - if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize) - continue; - dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize; - for (i = adp->ad_lbn + 1; i < NDADDR; i++) { -#ifdef DIAGNOSTIC - if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: lost dep1"); - } -#endif /* DIAGNOSTIC */ - dp->di_db[i] = 0; - } - for (i = 0; i < NIADDR; i++) { -#ifdef DIAGNOSTIC - if (dp->di_ib[i] != 0 && - (deplist & ((1 << NDADDR) << i)) == 0) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: lost dep2"); - } -#endif /* DIAGNOSTIC */ - dp->di_ib[i] = 0; - } - FREE_LOCK(&lk); - return; - } - /* - * If we have zero'ed out the last allocated block of the file, - * roll back the size to the last currently allocated block. - * We know that this last allocated block is a full-sized as - * we already checked for fragments in the loop above. - */ - if (lastadp != NULL && - dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) { - for (i = lastadp->ad_lbn; i >= 0; i--) - if (dp->di_db[i] != 0) - break; - dp->di_size = (i + 1) * fs->fs_bsize; - } - /* - * The only dependencies are for indirect blocks. - * - * The file size for indirect block additions is not guaranteed. - * Such a guarantee would be non-trivial to achieve. The conventional - * synchronous write implementation also does not make this guarantee. - * Fsck should catch and fix discrepancies. Arguably, the file size - * can be over-estimated without destroying integrity when the file - * moves into the indirect blocks (i.e., is large). If we want to - * postpone fsck, we are stuck with this argument. - */ - for (; adp; adp = TAILQ_NEXT(adp, ad_next)) - dp->di_ib[adp->ad_lbn - NDADDR] = 0; - FREE_LOCK(&lk); -} - -/* - * Version of initiate_write_inodeblock that handles UFS2 dinodes. - * Note that any bug fixes made to this routine must be done in the - * version found above. - * - * Called from within the procedure above to deal with unsatisfied - * allocation dependencies in an inodeblock. The buffer must be - * locked, thus, no I/O completion operations can occur while we - * are manipulating its associated dependencies. - */ -static void -initiate_write_inodeblock_ufs2(inodedep, bp) - struct inodedep *inodedep; - struct buf *bp; /* The inode block */ -{ - struct allocdirect *adp, *lastadp; - struct ufs2_dinode *dp; - struct fs *fs; - ufs_lbn_t i, prevlbn = 0; - int deplist; - - if (inodedep->id_state & IOSTARTED) - panic("initiate_write_inodeblock_ufs2: already started"); - inodedep->id_state |= IOSTARTED; - fs = inodedep->id_fs; - dp = (struct ufs2_dinode *)bp->b_data + - ino_to_fsbo(fs, inodedep->id_ino); - /* - * If the bitmap is not yet written, then the allocated - * inode cannot be written to disk. - */ - if ((inodedep->id_state & DEPCOMPLETE) == 0) { - if (inodedep->id_savedino2 != NULL) - panic("initiate_write_inodeblock_ufs2: I/O underway"); - MALLOC(inodedep->id_savedino2, struct ufs2_dinode *, - sizeof(struct ufs2_dinode), M_INODEDEP, M_SOFTDEP_FLAGS); - *inodedep->id_savedino2 = *dp; - bzero((caddr_t)dp, sizeof(struct ufs2_dinode)); - return; - } - /* - * If no dependencies, then there is nothing to roll back. - */ - inodedep->id_savedsize = dp->di_size; - inodedep->id_savedextsize = dp->di_extsize; - if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL && - TAILQ_FIRST(&inodedep->id_extupdt) == NULL) - return; - /* - * Set the ext data dependencies to busy. - */ - ACQUIRE_LOCK(&lk); - for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; - adp = TAILQ_NEXT(adp, ad_next)) { -#ifdef DIAGNOSTIC - if (deplist != 0 && prevlbn >= adp->ad_lbn) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: lbn order"); - } - prevlbn = adp->ad_lbn; - if (dp->di_extb[adp->ad_lbn] != adp->ad_newblkno) { - FREE_LOCK(&lk); - panic("%s: direct pointer #%jd mismatch %jd != %jd", - "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn, - (intmax_t)dp->di_extb[adp->ad_lbn], - (intmax_t)adp->ad_newblkno); - } - deplist |= 1 << adp->ad_lbn; - if ((adp->ad_state & ATTACHED) == 0) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: Unknown state 0x%x", - adp->ad_state); - } -#endif /* DIAGNOSTIC */ - adp->ad_state &= ~ATTACHED; - adp->ad_state |= UNDONE; - } - /* - * The on-disk inode cannot claim to be any larger than the last - * fragment that has been written. Otherwise, the on-disk inode - * might have fragments that were not the last block in the ext - * data which would corrupt the filesystem. - */ - for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; - lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) { - dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno; - /* keep going until hitting a rollback to a frag */ - if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize) - continue; - dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize; - for (i = adp->ad_lbn + 1; i < NXADDR; i++) { -#ifdef DIAGNOSTIC - if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: lost dep1"); - } -#endif /* DIAGNOSTIC */ - dp->di_extb[i] = 0; - } - lastadp = NULL; - break; - } - /* - * If we have zero'ed out the last allocated block of the ext - * data, roll back the size to the last currently allocated block. - * We know that this last allocated block is a full-sized as - * we already checked for fragments in the loop above. - */ - if (lastadp != NULL && - dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) { - for (i = lastadp->ad_lbn; i >= 0; i--) - if (dp->di_extb[i] != 0) - break; - dp->di_extsize = (i + 1) * fs->fs_bsize; - } - /* - * Set the file data dependencies to busy. - */ - for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; - adp = TAILQ_NEXT(adp, ad_next)) { -#ifdef DIAGNOSTIC - if (deplist != 0 && prevlbn >= adp->ad_lbn) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: lbn order"); - } - prevlbn = adp->ad_lbn; - if (adp->ad_lbn < NDADDR && - dp->di_db[adp->ad_lbn] != adp->ad_newblkno) { - FREE_LOCK(&lk); - panic("%s: direct pointer #%jd mismatch %jd != %jd", - "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn, - (intmax_t)dp->di_db[adp->ad_lbn], - (intmax_t)adp->ad_newblkno); - } - if (adp->ad_lbn >= NDADDR && - dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno) { - FREE_LOCK(&lk); - panic("%s indirect pointer #%jd mismatch %jd != %jd", - "softdep_write_inodeblock:", - (intmax_t)adp->ad_lbn - NDADDR, - (intmax_t)dp->di_ib[adp->ad_lbn - NDADDR], - (intmax_t)adp->ad_newblkno); - } - deplist |= 1 << adp->ad_lbn; - if ((adp->ad_state & ATTACHED) == 0) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: Unknown state 0x%x", - adp->ad_state); - } -#endif /* DIAGNOSTIC */ - adp->ad_state &= ~ATTACHED; - adp->ad_state |= UNDONE; - } - /* - * The on-disk inode cannot claim to be any larger than the last - * fragment that has been written. Otherwise, the on-disk inode - * might have fragments that were not the last block in the file - * which would corrupt the filesystem. - */ - for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; - lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) { - if (adp->ad_lbn >= NDADDR) - break; - dp->di_db[adp->ad_lbn] = adp->ad_oldblkno; - /* keep going until hitting a rollback to a frag */ - if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize) - continue; - dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize; - for (i = adp->ad_lbn + 1; i < NDADDR; i++) { -#ifdef DIAGNOSTIC - if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: lost dep2"); - } -#endif /* DIAGNOSTIC */ - dp->di_db[i] = 0; - } - for (i = 0; i < NIADDR; i++) { -#ifdef DIAGNOSTIC - if (dp->di_ib[i] != 0 && - (deplist & ((1 << NDADDR) << i)) == 0) { - FREE_LOCK(&lk); - panic("softdep_write_inodeblock: lost dep3"); - } -#endif /* DIAGNOSTIC */ - dp->di_ib[i] = 0; - } - FREE_LOCK(&lk); - return; - } - /* - * If we have zero'ed out the last allocated block of the file, - * roll back the size to the last currently allocated block. - * We know that this last allocated block is a full-sized as - * we already checked for fragments in the loop above. - */ - if (lastadp != NULL && - dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) { - for (i = lastadp->ad_lbn; i >= 0; i--) - if (dp->di_db[i] != 0) - break; - dp->di_size = (i + 1) * fs->fs_bsize; - } - /* - * The only dependencies are for indirect blocks. - * - * The file size for indirect block additions is not guaranteed. - * Such a guarantee would be non-trivial to achieve. The conventional - * synchronous write implementation also does not make this guarantee. - * Fsck should catch and fix discrepancies. Arguably, the file size - * can be over-estimated without destroying integrity when the file - * moves into the indirect blocks (i.e., is large). If we want to - * postpone fsck, we are stuck with this argument. - */ - for (; adp; adp = TAILQ_NEXT(adp, ad_next)) - dp->di_ib[adp->ad_lbn - NDADDR] = 0; - FREE_LOCK(&lk); -} - -/* - * This routine is called during the completion interrupt - * service routine for a disk write (from the procedure called - * by the device driver to inform the filesystem caches of - * a request completion). It should be called early in this - * procedure, before the block is made available to other - * processes or other routines are called. - */ -static void -softdep_disk_write_complete(bp) - struct buf *bp; /* describes the completed disk write */ -{ - struct worklist *wk; - struct workhead reattach; - struct newblk *newblk; - struct allocindir *aip; - struct allocdirect *adp; - struct indirdep *indirdep; - struct inodedep *inodedep; - struct bmsafemap *bmsafemap; - - /* - * If an error occurred while doing the write, then the data - * has not hit the disk and the dependencies cannot be unrolled. - */ - if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) - return; -#ifdef DEBUG - if (lk.lkt_held != NOHOLDER) - panic("softdep_disk_write_complete: lock is held"); - lk.lkt_held = SPECIAL_FLAG; -#endif - LIST_INIT(&reattach); - while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { - WORKLIST_REMOVE(wk); - switch (wk->wk_type) { - - case D_PAGEDEP: - if (handle_written_filepage(WK_PAGEDEP(wk), bp)) - WORKLIST_INSERT(&reattach, wk); - continue; - - case D_INODEDEP: - if (handle_written_inodeblock(WK_INODEDEP(wk), bp)) - WORKLIST_INSERT(&reattach, wk); - continue; - - case D_BMSAFEMAP: - bmsafemap = WK_BMSAFEMAP(wk); - while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd))) { - newblk->nb_state |= DEPCOMPLETE; - newblk->nb_bmsafemap = NULL; - LIST_REMOVE(newblk, nb_deps); - } - while ((adp = - LIST_FIRST(&bmsafemap->sm_allocdirecthd))) { - adp->ad_state |= DEPCOMPLETE; - adp->ad_buf = NULL; - LIST_REMOVE(adp, ad_deps); - handle_allocdirect_partdone(adp); - } - while ((aip = - LIST_FIRST(&bmsafemap->sm_allocindirhd))) { - aip->ai_state |= DEPCOMPLETE; - aip->ai_buf = NULL; - LIST_REMOVE(aip, ai_deps); - handle_allocindir_partdone(aip); - } - while ((inodedep = - LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) { - inodedep->id_state |= DEPCOMPLETE; - LIST_REMOVE(inodedep, id_deps); - inodedep->id_buf = NULL; - } - WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); - continue; - - case D_MKDIR: - handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY); - continue; - - case D_ALLOCDIRECT: - adp = WK_ALLOCDIRECT(wk); - adp->ad_state |= COMPLETE; - handle_allocdirect_partdone(adp); - continue; - - case D_ALLOCINDIR: - aip = WK_ALLOCINDIR(wk); - aip->ai_state |= COMPLETE; - handle_allocindir_partdone(aip); - continue; - - case D_INDIRDEP: - indirdep = WK_INDIRDEP(wk); - if (indirdep->ir_state & GOINGAWAY) { - lk.lkt_held = NOHOLDER; - panic("disk_write_complete: indirdep gone"); - } - bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount); - FREE(indirdep->ir_saveddata, M_INDIRDEP); - indirdep->ir_saveddata = 0; - indirdep->ir_state &= ~UNDONE; - indirdep->ir_state |= ATTACHED; - while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) { - handle_allocindir_partdone(aip); - if (aip == LIST_FIRST(&indirdep->ir_donehd)) { - lk.lkt_held = NOHOLDER; - panic("disk_write_complete: not gone"); - } - } - WORKLIST_INSERT(&reattach, wk); - if ((bp->b_flags & B_DELWRI) == 0) - stat_indir_blk_ptrs++; - bdirty(bp); - continue; - - default: - lk.lkt_held = NOHOLDER; - panic("handle_disk_write_complete: Unknown type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ - } - } - /* - * Reattach any requests that must be redone. - */ - while ((wk = LIST_FIRST(&reattach)) != NULL) { - WORKLIST_REMOVE(wk); - WORKLIST_INSERT(&bp->b_dep, wk); - } -#ifdef DEBUG - if (lk.lkt_held != SPECIAL_FLAG) - panic("softdep_disk_write_complete: lock lost"); - lk.lkt_held = NOHOLDER; -#endif -} - -/* - * Called from within softdep_disk_write_complete above. Note that - * this routine is always called from interrupt level with further - * splbio interrupts blocked. - */ -static void -handle_allocdirect_partdone(adp) - struct allocdirect *adp; /* the completed allocdirect */ -{ - struct allocdirectlst *listhead; - struct allocdirect *listadp; - struct inodedep *inodedep; - long bsize, delay; - - if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE) - return; - if (adp->ad_buf != NULL) { - lk.lkt_held = NOHOLDER; - panic("handle_allocdirect_partdone: dangling dep"); - } - /* - * The on-disk inode cannot claim to be any larger than the last - * fragment that has been written. Otherwise, the on-disk inode - * might have fragments that were not the last block in the file - * which would corrupt the filesystem. Thus, we cannot free any - * allocdirects after one whose ad_oldblkno claims a fragment as - * these blocks must be rolled back to zero before writing the inode. - * We check the currently active set of allocdirects in id_inoupdt - * or id_extupdt as appropriate. - */ - inodedep = adp->ad_inodedep; - bsize = inodedep->id_fs->fs_bsize; - if (adp->ad_state & EXTDATA) - listhead = &inodedep->id_extupdt; - else - listhead = &inodedep->id_inoupdt; - TAILQ_FOREACH(listadp, listhead, ad_next) { - /* found our block */ - if (listadp == adp) - break; - /* continue if ad_oldlbn is not a fragment */ - if (listadp->ad_oldsize == 0 || - listadp->ad_oldsize == bsize) - continue; - /* hit a fragment */ - return; - } - /* - * If we have reached the end of the current list without - * finding the just finished dependency, then it must be - * on the future dependency list. Future dependencies cannot - * be freed until they are moved to the current list. - */ - if (listadp == NULL) { -#ifdef DEBUG - if (adp->ad_state & EXTDATA) - listhead = &inodedep->id_newextupdt; - else - listhead = &inodedep->id_newinoupdt; - TAILQ_FOREACH(listadp, listhead, ad_next) - /* found our block */ - if (listadp == adp) - break; - if (listadp == NULL) { - lk.lkt_held = NOHOLDER; - panic("handle_allocdirect_partdone: lost dep"); - } -#endif /* DEBUG */ - return; - } - /* - * If we have found the just finished dependency, then free - * it along with anything that follows it that is complete. - * If the inode still has a bitmap dependency, then it has - * never been written to disk, hence the on-disk inode cannot - * reference the old fragment so we can free it without delay. - */ - delay = (inodedep->id_state & DEPCOMPLETE); - for (; adp; adp = listadp) { - listadp = TAILQ_NEXT(adp, ad_next); - if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE) - return; - free_allocdirect(listhead, adp, delay); - } -} - -/* - * Called from within softdep_disk_write_complete above. Note that - * this routine is always called from interrupt level with further - * splbio interrupts blocked. - */ -static void -handle_allocindir_partdone(aip) - struct allocindir *aip; /* the completed allocindir */ -{ - struct indirdep *indirdep; - - if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE) - return; - if (aip->ai_buf != NULL) { - lk.lkt_held = NOHOLDER; - panic("handle_allocindir_partdone: dangling dependency"); - } - indirdep = aip->ai_indirdep; - if (indirdep->ir_state & UNDONE) { - LIST_REMOVE(aip, ai_next); - LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next); - return; - } - if (indirdep->ir_state & UFS1FMT) - ((ufs1_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] = - aip->ai_newblkno; - else - ((ufs2_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] = - aip->ai_newblkno; - LIST_REMOVE(aip, ai_next); - if (aip->ai_freefrag != NULL) - add_to_worklist(&aip->ai_freefrag->ff_list); - WORKITEM_FREE(aip, D_ALLOCINDIR); -} - -/* - * Called from within softdep_disk_write_complete above to restore - * in-memory inode block contents to their most up-to-date state. Note - * that this routine is always called from interrupt level with further - * splbio interrupts blocked. - */ -static int -handle_written_inodeblock(inodedep, bp) - struct inodedep *inodedep; - struct buf *bp; /* buffer containing the inode block */ -{ - struct worklist *wk, *filefree; - struct allocdirect *adp, *nextadp; - struct ufs1_dinode *dp1 = NULL; - struct ufs2_dinode *dp2 = NULL; - int hadchanges, fstype; - - if ((inodedep->id_state & IOSTARTED) == 0) { - lk.lkt_held = NOHOLDER; - panic("handle_written_inodeblock: not started"); - } - inodedep->id_state &= ~IOSTARTED; - inodedep->id_state |= COMPLETE; - if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC) { - fstype = UFS1; - dp1 = (struct ufs1_dinode *)bp->b_data + - ino_to_fsbo(inodedep->id_fs, inodedep->id_ino); - } else { - fstype = UFS2; - dp2 = (struct ufs2_dinode *)bp->b_data + - ino_to_fsbo(inodedep->id_fs, inodedep->id_ino); - } - /* - * If we had to rollback the inode allocation because of - * bitmaps being incomplete, then simply restore it. - * Keep the block dirty so that it will not be reclaimed until - * all associated dependencies have been cleared and the - * corresponding updates written to disk. - */ - if (inodedep->id_savedino1 != NULL) { - if (fstype == UFS1) - *dp1 = *inodedep->id_savedino1; - else - *dp2 = *inodedep->id_savedino2; - FREE(inodedep->id_savedino1, M_INODEDEP); - inodedep->id_savedino1 = NULL; - if ((bp->b_flags & B_DELWRI) == 0) - stat_inode_bitmap++; - bdirty(bp); - return (1); - } - /* - * Roll forward anything that had to be rolled back before - * the inode could be updated. - */ - hadchanges = 0; - for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) { - nextadp = TAILQ_NEXT(adp, ad_next); - if (adp->ad_state & ATTACHED) { - lk.lkt_held = NOHOLDER; - panic("handle_written_inodeblock: new entry"); - } - if (fstype == UFS1) { - if (adp->ad_lbn < NDADDR) { - if (dp1->di_db[adp->ad_lbn]!=adp->ad_oldblkno) { - lk.lkt_held = NOHOLDER; - panic("%s %s #%jd mismatch %d != %jd", - "handle_written_inodeblock:", - "direct pointer", - (intmax_t)adp->ad_lbn, - dp1->di_db[adp->ad_lbn], - (intmax_t)adp->ad_oldblkno); - } - dp1->di_db[adp->ad_lbn] = adp->ad_newblkno; - } else { - if (dp1->di_ib[adp->ad_lbn - NDADDR] != 0) { - lk.lkt_held = NOHOLDER; - panic("%s: %s #%jd allocated as %d", - "handle_written_inodeblock", - "indirect pointer", - (intmax_t)adp->ad_lbn - NDADDR, - dp1->di_ib[adp->ad_lbn - NDADDR]); - } - dp1->di_ib[adp->ad_lbn - NDADDR] = - adp->ad_newblkno; - } - } else { - if (adp->ad_lbn < NDADDR) { - if (dp2->di_db[adp->ad_lbn]!=adp->ad_oldblkno) { - lk.lkt_held = NOHOLDER; - panic("%s: %s #%jd %s %jd != %jd", - "handle_written_inodeblock", - "direct pointer", - (intmax_t)adp->ad_lbn, "mismatch", - (intmax_t)dp2->di_db[adp->ad_lbn], - (intmax_t)adp->ad_oldblkno); - } - dp2->di_db[adp->ad_lbn] = adp->ad_newblkno; - } else { - if (dp2->di_ib[adp->ad_lbn - NDADDR] != 0) { - lk.lkt_held = NOHOLDER; - panic("%s: %s #%jd allocated as %jd", - "handle_written_inodeblock", - "indirect pointer", - (intmax_t)adp->ad_lbn - NDADDR, - (intmax_t) - dp2->di_ib[adp->ad_lbn - NDADDR]); - } - dp2->di_ib[adp->ad_lbn - NDADDR] = - adp->ad_newblkno; - } - } - adp->ad_state &= ~UNDONE; - adp->ad_state |= ATTACHED; - hadchanges = 1; - } - for (adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; adp = nextadp) { - nextadp = TAILQ_NEXT(adp, ad_next); - if (adp->ad_state & ATTACHED) { - lk.lkt_held = NOHOLDER; - panic("handle_written_inodeblock: new entry"); - } - if (dp2->di_extb[adp->ad_lbn] != adp->ad_oldblkno) { - lk.lkt_held = NOHOLDER; - panic("%s: direct pointers #%jd %s %jd != %jd", - "handle_written_inodeblock", - (intmax_t)adp->ad_lbn, "mismatch", - (intmax_t)dp2->di_extb[adp->ad_lbn], - (intmax_t)adp->ad_oldblkno); - } - dp2->di_extb[adp->ad_lbn] = adp->ad_newblkno; - adp->ad_state &= ~UNDONE; - adp->ad_state |= ATTACHED; - hadchanges = 1; - } - if (hadchanges && (bp->b_flags & B_DELWRI) == 0) - stat_direct_blk_ptrs++; - /* - * Reset the file size to its most up-to-date value. - */ - if (inodedep->id_savedsize == -1 || inodedep->id_savedextsize == -1) { - lk.lkt_held = NOHOLDER; - panic("handle_written_inodeblock: bad size"); - } - if (fstype == UFS1) { - if (dp1->di_size != inodedep->id_savedsize) { - dp1->di_size = inodedep->id_savedsize; - hadchanges = 1; - } - } else { - if (dp2->di_size != inodedep->id_savedsize) { - dp2->di_size = inodedep->id_savedsize; - hadchanges = 1; - } - if (dp2->di_extsize != inodedep->id_savedextsize) { - dp2->di_extsize = inodedep->id_savedextsize; - hadchanges = 1; - } - } - inodedep->id_savedsize = -1; - inodedep->id_savedextsize = -1; - /* - * If there were any rollbacks in the inode block, then it must be - * marked dirty so that its will eventually get written back in - * its correct form. - */ - if (hadchanges) - bdirty(bp); - /* - * Process any allocdirects that completed during the update. - */ - if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL) - handle_allocdirect_partdone(adp); - if ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL) - handle_allocdirect_partdone(adp); - /* - * Process deallocations that were held pending until the - * inode had been written to disk. Freeing of the inode - * is delayed until after all blocks have been freed to - * avoid creation of new triples - * before the old ones have been deleted. - */ - filefree = NULL; - while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) { - WORKLIST_REMOVE(wk); - switch (wk->wk_type) { - - case D_FREEFILE: - /* - * We defer adding filefree to the worklist until - * all other additions have been made to ensure - * that it will be done after all the old blocks - * have been freed. - */ - if (filefree != NULL) { - lk.lkt_held = NOHOLDER; - panic("handle_written_inodeblock: filefree"); - } - filefree = wk; - continue; - - case D_MKDIR: - handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT); - continue; - - case D_DIRADD: - diradd_inode_written(WK_DIRADD(wk), inodedep); - continue; - - case D_FREEBLKS: - case D_FREEFRAG: - case D_DIRREM: - add_to_worklist(wk); - continue; - - case D_NEWDIRBLK: - free_newdirblk(WK_NEWDIRBLK(wk)); - continue; - - default: - lk.lkt_held = NOHOLDER; - panic("handle_written_inodeblock: Unknown type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ - } - } - if (filefree != NULL) { - if (free_inodedep(inodedep) == 0) { - lk.lkt_held = NOHOLDER; - panic("handle_written_inodeblock: live inodedep"); - } - add_to_worklist(filefree); - return (0); - } - - /* - * If no outstanding dependencies, free it. - */ - if (free_inodedep(inodedep) || - (TAILQ_FIRST(&inodedep->id_inoupdt) == 0 && - TAILQ_FIRST(&inodedep->id_extupdt) == 0)) - return (0); - return (hadchanges); -} - -/* - * Process a diradd entry after its dependent inode has been written. - * This routine must be called with splbio interrupts blocked. - */ -static void -diradd_inode_written(dap, inodedep) - struct diradd *dap; - struct inodedep *inodedep; -{ - struct pagedep *pagedep; - - dap->da_state |= COMPLETE; - if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) { - if (dap->da_state & DIRCHG) - pagedep = dap->da_previous->dm_pagedep; - else - pagedep = dap->da_pagedep; - LIST_REMOVE(dap, da_pdlist); - LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); - } - WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list); -} - -/* - * Handle the completion of a mkdir dependency. - */ -static void -handle_written_mkdir(mkdir, type) - struct mkdir *mkdir; - int type; -{ - struct diradd *dap; - struct pagedep *pagedep; - - if (mkdir->md_state != type) { - lk.lkt_held = NOHOLDER; - panic("handle_written_mkdir: bad type"); - } - dap = mkdir->md_diradd; - dap->da_state &= ~type; - if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) - dap->da_state |= DEPCOMPLETE; - if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) { - if (dap->da_state & DIRCHG) - pagedep = dap->da_previous->dm_pagedep; - else - pagedep = dap->da_pagedep; - LIST_REMOVE(dap, da_pdlist); - LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); - } - LIST_REMOVE(mkdir, md_mkdirs); - WORKITEM_FREE(mkdir, D_MKDIR); -} - -/* - * Called from within softdep_disk_write_complete above. - * A write operation was just completed. Removed inodes can - * now be freed and associated block pointers may be committed. - * Note that this routine is always called from interrupt level - * with further splbio interrupts blocked. - */ -static int -handle_written_filepage(pagedep, bp) - struct pagedep *pagedep; - struct buf *bp; /* buffer containing the written page */ -{ - struct dirrem *dirrem; - struct diradd *dap, *nextdap; - struct direct *ep; - int i, chgs; - - if ((pagedep->pd_state & IOSTARTED) == 0) { - lk.lkt_held = NOHOLDER; - panic("handle_written_filepage: not started"); - } - pagedep->pd_state &= ~IOSTARTED; - /* - * Process any directory removals that have been committed. - */ - while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) { - LIST_REMOVE(dirrem, dm_next); - dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); - } - /* - * Free any directory additions that have been committed. - * If it is a newly allocated block, we have to wait until - * the on-disk directory inode claims the new block. - */ - if ((pagedep->pd_state & NEWBLOCK) == 0) - while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) - free_diradd(dap); - /* - * Uncommitted directory entries must be restored. - */ - for (chgs = 0, i = 0; i < DAHASHSZ; i++) { - for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap; - dap = nextdap) { - nextdap = LIST_NEXT(dap, da_pdlist); - if (dap->da_state & ATTACHED) { - lk.lkt_held = NOHOLDER; - panic("handle_written_filepage: attached"); - } - ep = (struct direct *) - ((char *)bp->b_data + dap->da_offset); - ep->d_ino = dap->da_newinum; - dap->da_state &= ~UNDONE; - dap->da_state |= ATTACHED; - chgs = 1; - /* - * If the inode referenced by the directory has - * been written out, then the dependency can be - * moved to the pending list. - */ - if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) { - LIST_REMOVE(dap, da_pdlist); - LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, - da_pdlist); - } - } - } - /* - * If there were any rollbacks in the directory, then it must be - * marked dirty so that its will eventually get written back in - * its correct form. - */ - if (chgs) { - if ((bp->b_flags & B_DELWRI) == 0) - stat_dir_entry++; - bdirty(bp); - return (1); - } - /* - * If we are not waiting for a new directory block to be - * claimed by its inode, then the pagedep will be freed. - * Otherwise it will remain to track any new entries on - * the page in case they are fsync'ed. - */ - if ((pagedep->pd_state & NEWBLOCK) == 0) { - LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); - } - return (0); -} - -/* - * Writing back in-core inode structures. - * - * The filesystem only accesses an inode's contents when it occupies an - * "in-core" inode structure. These "in-core" structures are separate from - * the page frames used to cache inode blocks. Only the latter are - * transferred to/from the disk. So, when the updated contents of the - * "in-core" inode structure are copied to the corresponding in-memory inode - * block, the dependencies are also transferred. The following procedure is - * called when copying a dirty "in-core" inode to a cached inode block. - */ - -/* - * Called when an inode is loaded from disk. If the effective link count - * differed from the actual link count when it was last flushed, then we - * need to ensure that the correct effective link count is put back. - */ -void -softdep_load_inodeblock(ip) - struct inode *ip; /* the "in_core" copy of the inode */ -{ - struct inodedep *inodedep; - - /* - * Check for alternate nlink count. - */ - ip->i_effnlink = ip->i_nlink; - ACQUIRE_LOCK(&lk); - if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) { - FREE_LOCK(&lk); - return; - } - ip->i_effnlink -= inodedep->id_nlinkdelta; - if (inodedep->id_state & SPACECOUNTED) - ip->i_flag |= IN_SPACECOUNTED; - FREE_LOCK(&lk); -} - -/* - * This routine is called just before the "in-core" inode - * information is to be copied to the in-memory inode block. - * Recall that an inode block contains several inodes. If - * the force flag is set, then the dependencies will be - * cleared so that the update can always be made. Note that - * the buffer is locked when this routine is called, so we - * will never be in the middle of writing the inode block - * to disk. - */ -void -softdep_update_inodeblock(ip, bp, waitfor) - struct inode *ip; /* the "in_core" copy of the inode */ - struct buf *bp; /* the buffer containing the inode block */ - int waitfor; /* nonzero => update must be allowed */ -{ - struct inodedep *inodedep; - struct worklist *wk; - struct buf *ibp; - int error; - - /* - * If the effective link count is not equal to the actual link - * count, then we must track the difference in an inodedep while - * the inode is (potentially) tossed out of the cache. Otherwise, - * if there is no existing inodedep, then there are no dependencies - * to track. - */ - ACQUIRE_LOCK(&lk); - if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) { - FREE_LOCK(&lk); - if (ip->i_effnlink != ip->i_nlink) - panic("softdep_update_inodeblock: bad link count"); - return; - } - if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink) { - FREE_LOCK(&lk); - panic("softdep_update_inodeblock: bad delta"); - } - /* - * Changes have been initiated. Anything depending on these - * changes cannot occur until this inode has been written. - */ - inodedep->id_state &= ~COMPLETE; - if ((inodedep->id_state & ONWORKLIST) == 0) - WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list); - /* - * Any new dependencies associated with the incore inode must - * now be moved to the list associated with the buffer holding - * the in-memory copy of the inode. Once merged process any - * allocdirects that are completed by the merger. - */ - merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt); - if (TAILQ_FIRST(&inodedep->id_inoupdt) != NULL) - handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt)); - merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt); - if (TAILQ_FIRST(&inodedep->id_extupdt) != NULL) - handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt)); - /* - * Now that the inode has been pushed into the buffer, the - * operations dependent on the inode being written to disk - * can be moved to the id_bufwait so that they will be - * processed when the buffer I/O completes. - */ - while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) { - WORKLIST_REMOVE(wk); - WORKLIST_INSERT(&inodedep->id_bufwait, wk); - } - /* - * Newly allocated inodes cannot be written until the bitmap - * that allocates them have been written (indicated by - * DEPCOMPLETE being set in id_state). If we are doing a - * forced sync (e.g., an fsync on a file), we force the bitmap - * to be written so that the update can be done. - */ - if ((inodedep->id_state & DEPCOMPLETE) != 0 || waitfor == 0) { - FREE_LOCK(&lk); - return; - } - ibp = inodedep->id_buf; - ibp = getdirtybuf(&ibp, NULL, MNT_WAIT); - FREE_LOCK(&lk); - if (ibp && (error = BUF_WRITE(ibp)) != 0) - softdep_error("softdep_update_inodeblock: bwrite", error); - if ((inodedep->id_state & DEPCOMPLETE) == 0) - panic("softdep_update_inodeblock: update failed"); -} - -/* - * Merge the a new inode dependency list (such as id_newinoupdt) into an - * old inode dependency list (such as id_inoupdt). This routine must be - * called with splbio interrupts blocked. - */ -static void -merge_inode_lists(newlisthead, oldlisthead) - struct allocdirectlst *newlisthead; - struct allocdirectlst *oldlisthead; -{ - struct allocdirect *listadp, *newadp; - - newadp = TAILQ_FIRST(newlisthead); - for (listadp = TAILQ_FIRST(oldlisthead); listadp && newadp;) { - if (listadp->ad_lbn < newadp->ad_lbn) { - listadp = TAILQ_NEXT(listadp, ad_next); - continue; - } - TAILQ_REMOVE(newlisthead, newadp, ad_next); - TAILQ_INSERT_BEFORE(listadp, newadp, ad_next); - if (listadp->ad_lbn == newadp->ad_lbn) { - allocdirect_merge(oldlisthead, newadp, - listadp); - listadp = newadp; - } - newadp = TAILQ_FIRST(newlisthead); - } - while ((newadp = TAILQ_FIRST(newlisthead)) != NULL) { - TAILQ_REMOVE(newlisthead, newadp, ad_next); - TAILQ_INSERT_TAIL(oldlisthead, newadp, ad_next); - } -} - -/* - * If we are doing an fsync, then we must ensure that any directory - * entries for the inode have been written after the inode gets to disk. - */ -int -softdep_fsync(vp) - struct vnode *vp; /* the "in_core" copy of the inode */ -{ - struct inodedep *inodedep; - struct pagedep *pagedep; - struct worklist *wk; - struct diradd *dap; - struct mount *mnt; - struct vnode *pvp; - struct inode *ip; - struct buf *bp; - struct fs *fs; - struct thread *td = curthread; - int error, flushparent; - ino_t parentino; - ufs_lbn_t lbn; - - ip = VTOI(vp); - fs = ip->i_fs; - ACQUIRE_LOCK(&lk); - if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) { - FREE_LOCK(&lk); - return (0); - } - if (LIST_FIRST(&inodedep->id_inowait) != NULL || - LIST_FIRST(&inodedep->id_bufwait) != NULL || - TAILQ_FIRST(&inodedep->id_extupdt) != NULL || - TAILQ_FIRST(&inodedep->id_newextupdt) != NULL || - TAILQ_FIRST(&inodedep->id_inoupdt) != NULL || - TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL) { - FREE_LOCK(&lk); - panic("softdep_fsync: pending ops"); - } - for (error = 0, flushparent = 0; ; ) { - if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL) - break; - if (wk->wk_type != D_DIRADD) { - FREE_LOCK(&lk); - panic("softdep_fsync: Unexpected type %s", - TYPENAME(wk->wk_type)); - } - dap = WK_DIRADD(wk); - /* - * Flush our parent if this directory entry has a MKDIR_PARENT - * dependency or is contained in a newly allocated block. - */ - if (dap->da_state & DIRCHG) - pagedep = dap->da_previous->dm_pagedep; - else - pagedep = dap->da_pagedep; - mnt = pagedep->pd_mnt; - parentino = pagedep->pd_ino; - lbn = pagedep->pd_lbn; - if ((dap->da_state & (MKDIR_BODY | COMPLETE)) != COMPLETE) { - FREE_LOCK(&lk); - panic("softdep_fsync: dirty"); - } - if ((dap->da_state & MKDIR_PARENT) || - (pagedep->pd_state & NEWBLOCK)) - flushparent = 1; - else - flushparent = 0; - /* - * If we are being fsync'ed as part of vgone'ing this vnode, - * then we will not be able to release and recover the - * vnode below, so we just have to give up on writing its - * directory entry out. It will eventually be written, just - * not now, but then the user was not asking to have it - * written, so we are not breaking any promises. - */ - if (vp->v_iflag & VI_XLOCK) - break; - /* - * We prevent deadlock by always fetching inodes from the - * root, moving down the directory tree. Thus, when fetching - * our parent directory, we first try to get the lock. If - * that fails, we must unlock ourselves before requesting - * the lock on our parent. See the comment in ufs_lookup - * for details on possible races. - */ - FREE_LOCK(&lk); - if (VFS_VGET(mnt, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp)) { - VOP_UNLOCK(vp, 0, td); - error = VFS_VGET(mnt, parentino, LK_EXCLUSIVE, &pvp); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - if (error != 0) - return (error); - } - /* - * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps - * that are contained in direct blocks will be resolved by - * doing a UFS_UPDATE. Pagedeps contained in indirect blocks - * may require a complete sync'ing of the directory. So, we - * try the cheap and fast UFS_UPDATE first, and if that fails, - * then we do the slower VOP_FSYNC of the directory. - */ - if (flushparent) { - if ((error = UFS_UPDATE(pvp, 1)) != 0) { - vput(pvp); - return (error); - } - if ((pagedep->pd_state & NEWBLOCK) && - (error = VOP_FSYNC(pvp, td->td_ucred, MNT_WAIT, td))) { - vput(pvp); - return (error); - } - } - /* - * Flush directory page containing the inode's name. - */ - error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), td->td_ucred, - &bp); - if (error == 0) - error = BUF_WRITE(bp); - else - brelse(bp); - vput(pvp); - if (error != 0) - return (error); - ACQUIRE_LOCK(&lk); - if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) - break; - } - FREE_LOCK(&lk); - return (0); -} - -/* - * Flush all the dirty bitmaps associated with the block device - * before flushing the rest of the dirty blocks so as to reduce - * the number of dependencies that will have to be rolled back. - */ -void -softdep_fsync_mountdev(vp) - struct vnode *vp; -{ - struct buf *bp, *nbp; - struct worklist *wk; - - if (!vn_isdisk(vp, NULL)) - panic("softdep_fsync_mountdev: vnode not a disk"); - ACQUIRE_LOCK(&lk); - VI_LOCK(vp); - for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { - nbp = TAILQ_NEXT(bp, b_vnbufs); - /* - * If it is already scheduled, skip to the next buffer. - */ - if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) - continue; - - if ((bp->b_flags & B_DELWRI) == 0) { - FREE_LOCK(&lk); - panic("softdep_fsync_mountdev: not dirty"); - } - /* - * We are only interested in bitmaps with outstanding - * dependencies. - */ - if ((wk = LIST_FIRST(&bp->b_dep)) == NULL || - wk->wk_type != D_BMSAFEMAP || - (bp->b_vflags & BV_BKGRDINPROG)) { - BUF_UNLOCK(bp); - continue; - } - VI_UNLOCK(vp); - bremfree(bp); - FREE_LOCK(&lk); - (void) bawrite(bp); - ACQUIRE_LOCK(&lk); - /* - * Since we may have slept during the I/O, we need - * to start from a known point. - */ - VI_LOCK(vp); - nbp = TAILQ_FIRST(&vp->v_dirtyblkhd); - } - drain_output(vp, 1); - VI_UNLOCK(vp); - FREE_LOCK(&lk); -} - -/* - * This routine is called when we are trying to synchronously flush a - * file. This routine must eliminate any filesystem metadata dependencies - * so that the syncing routine can succeed by pushing the dirty blocks - * associated with the file. If any I/O errors occur, they are returned. - */ -int -softdep_sync_metadata(ap) - struct vop_fsync_args /* { - struct vnode *a_vp; - struct ucred *a_cred; - int a_waitfor; - struct thread *a_td; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct pagedep *pagedep; - struct allocdirect *adp; - struct allocindir *aip; - struct buf *bp, *nbp; - struct worklist *wk; - int i, error, waitfor; - - /* - * Check whether this vnode is involved in a filesystem - * that is doing soft dependency processing. - */ - if (!vn_isdisk(vp, NULL)) { - if (!DOINGSOFTDEP(vp)) - return (0); - } else - if (vp->v_rdev->si_mountpoint == NULL || - (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP) == 0) - return (0); - /* - * Ensure that any direct block dependencies have been cleared. - */ - ACQUIRE_LOCK(&lk); - if ((error = flush_inodedep_deps(VTOI(vp)->i_fs, VTOI(vp)->i_number))) { - FREE_LOCK(&lk); - return (error); - } - /* - * For most files, the only metadata dependencies are the - * cylinder group maps that allocate their inode or blocks. - * The block allocation dependencies can be found by traversing - * the dependency lists for any buffers that remain on their - * dirty buffer list. The inode allocation dependency will - * be resolved when the inode is updated with MNT_WAIT. - * This work is done in two passes. The first pass grabs most - * of the buffers and begins asynchronously writing them. The - * only way to wait for these asynchronous writes is to sleep - * on the filesystem vnode which may stay busy for a long time - * if the filesystem is active. So, instead, we make a second - * pass over the dependencies blocking on each write. In the - * usual case we will be blocking against a write that we - * initiated, so when it is done the dependency will have been - * resolved. Thus the second pass is expected to end quickly. - */ - waitfor = MNT_NOWAIT; -top: - /* - * We must wait for any I/O in progress to finish so that - * all potential buffers on the dirty list will be visible. - */ - VI_LOCK(vp); - drain_output(vp, 1); - bp = getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), - VI_MTX(vp), MNT_WAIT); - if (bp == NULL) { - VI_UNLOCK(vp); - FREE_LOCK(&lk); - return (0); - } - /* While syncing snapshots, we must allow recursive lookups */ - bp->b_lock.lk_flags |= LK_CANRECURSE; -loop: - /* - * As we hold the buffer locked, none of its dependencies - * will disappear. - */ - LIST_FOREACH(wk, &bp->b_dep, wk_list) { - switch (wk->wk_type) { - - case D_ALLOCDIRECT: - adp = WK_ALLOCDIRECT(wk); - if (adp->ad_state & DEPCOMPLETE) - continue; - nbp = adp->ad_buf; - nbp = getdirtybuf(&nbp, NULL, waitfor); - if (nbp == NULL) - continue; - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(nbp); - } else if ((error = BUF_WRITE(nbp)) != 0) { - break; - } - ACQUIRE_LOCK(&lk); - continue; - - case D_ALLOCINDIR: - aip = WK_ALLOCINDIR(wk); - if (aip->ai_state & DEPCOMPLETE) - continue; - nbp = aip->ai_buf; - nbp = getdirtybuf(&nbp, NULL, waitfor); - if (nbp == NULL) - continue; - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(nbp); - } else if ((error = BUF_WRITE(nbp)) != 0) { - break; - } - ACQUIRE_LOCK(&lk); - continue; - - case D_INDIRDEP: - restart: - - LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) { - if (aip->ai_state & DEPCOMPLETE) - continue; - nbp = aip->ai_buf; - nbp = getdirtybuf(&nbp, NULL, MNT_WAIT); - if (nbp == NULL) - goto restart; - FREE_LOCK(&lk); - if ((error = BUF_WRITE(nbp)) != 0) { - break; - } - ACQUIRE_LOCK(&lk); - goto restart; - } - continue; - - case D_INODEDEP: - if ((error = flush_inodedep_deps(WK_INODEDEP(wk)->id_fs, - WK_INODEDEP(wk)->id_ino)) != 0) { - FREE_LOCK(&lk); - break; - } - continue; - - case D_PAGEDEP: - /* - * We are trying to sync a directory that may - * have dependencies on both its own metadata - * and/or dependencies on the inodes of any - * recently allocated files. We walk its diradd - * lists pushing out the associated inode. - */ - pagedep = WK_PAGEDEP(wk); - for (i = 0; i < DAHASHSZ; i++) { - if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0) - continue; - if ((error = - flush_pagedep_deps(vp, pagedep->pd_mnt, - &pagedep->pd_diraddhd[i]))) { - FREE_LOCK(&lk); - break; - } - } - continue; - - case D_MKDIR: - /* - * This case should never happen if the vnode has - * been properly sync'ed. However, if this function - * is used at a place where the vnode has not yet - * been sync'ed, this dependency can show up. So, - * rather than panic, just flush it. - */ - nbp = WK_MKDIR(wk)->md_buf; - nbp = getdirtybuf(&nbp, NULL, waitfor); - if (nbp == NULL) - continue; - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(nbp); - } else if ((error = BUF_WRITE(nbp)) != 0) { - break; - } - ACQUIRE_LOCK(&lk); - continue; - - case D_BMSAFEMAP: - /* - * This case should never happen if the vnode has - * been properly sync'ed. However, if this function - * is used at a place where the vnode has not yet - * been sync'ed, this dependency can show up. So, - * rather than panic, just flush it. - */ - nbp = WK_BMSAFEMAP(wk)->sm_buf; - nbp = getdirtybuf(&nbp, NULL, waitfor); - if (nbp == NULL) - continue; - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(nbp); - } else if ((error = BUF_WRITE(nbp)) != 0) { - break; - } - ACQUIRE_LOCK(&lk); - continue; - - default: - FREE_LOCK(&lk); - panic("softdep_sync_metadata: Unknown type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ - } - /* We reach here only in error and unlocked */ - if (error == 0) - panic("softdep_sync_metadata: zero error"); - bp->b_lock.lk_flags &= ~LK_CANRECURSE; - bawrite(bp); - return (error); - } - VI_LOCK(vp); - nbp = getdirtybuf(&TAILQ_NEXT(bp, b_vnbufs), VI_MTX(vp), MNT_WAIT); - if (nbp == NULL) - VI_UNLOCK(vp); - FREE_LOCK(&lk); - bp->b_lock.lk_flags &= ~LK_CANRECURSE; - bawrite(bp); - ACQUIRE_LOCK(&lk); - if (nbp != NULL) { - bp = nbp; - goto loop; - } - /* - * The brief unlock is to allow any pent up dependency - * processing to be done. Then proceed with the second pass. - */ - if (waitfor == MNT_NOWAIT) { - waitfor = MNT_WAIT; - FREE_LOCK(&lk); - ACQUIRE_LOCK(&lk); - goto top; - } - - /* - * If we have managed to get rid of all the dirty buffers, - * then we are done. For certain directories and block - * devices, we may need to do further work. - * - * We must wait for any I/O in progress to finish so that - * all potential buffers on the dirty list will be visible. - */ - VI_LOCK(vp); - drain_output(vp, 1); - if (TAILQ_FIRST(&vp->v_dirtyblkhd) == NULL) { - VI_UNLOCK(vp); - FREE_LOCK(&lk); - return (0); - } - VI_UNLOCK(vp); - - FREE_LOCK(&lk); - /* - * If we are trying to sync a block device, some of its buffers may - * contain metadata that cannot be written until the contents of some - * partially written files have been written to disk. The only easy - * way to accomplish this is to sync the entire filesystem (luckily - * this happens rarely). - */ - if (vn_isdisk(vp, NULL) && - vp->v_rdev->si_mountpoint && !VOP_ISLOCKED(vp, NULL) && - (error = VFS_SYNC(vp->v_rdev->si_mountpoint, MNT_WAIT, ap->a_cred, - ap->a_td)) != 0) - return (error); - return (0); -} - -/* - * Flush the dependencies associated with an inodedep. - * Called with splbio blocked. - */ -static int -flush_inodedep_deps(fs, ino) - struct fs *fs; - ino_t ino; -{ - struct inodedep *inodedep; - int error, waitfor; - - /* - * This work is done in two passes. The first pass grabs most - * of the buffers and begins asynchronously writing them. The - * only way to wait for these asynchronous writes is to sleep - * on the filesystem vnode which may stay busy for a long time - * if the filesystem is active. So, instead, we make a second - * pass over the dependencies blocking on each write. In the - * usual case we will be blocking against a write that we - * initiated, so when it is done the dependency will have been - * resolved. Thus the second pass is expected to end quickly. - * We give a brief window at the top of the loop to allow - * any pending I/O to complete. - */ - for (error = 0, waitfor = MNT_NOWAIT; ; ) { - if (error) - return (error); - FREE_LOCK(&lk); - ACQUIRE_LOCK(&lk); - if (inodedep_lookup(fs, ino, 0, &inodedep) == 0) - return (0); - if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) || - flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) || - flush_deplist(&inodedep->id_extupdt, waitfor, &error) || - flush_deplist(&inodedep->id_newextupdt, waitfor, &error)) - continue; - /* - * If pass2, we are done, otherwise do pass 2. - */ - if (waitfor == MNT_WAIT) - break; - waitfor = MNT_WAIT; - } - /* - * Try freeing inodedep in case all dependencies have been removed. - */ - if (inodedep_lookup(fs, ino, 0, &inodedep) != 0) - (void) free_inodedep(inodedep); - return (0); -} - -/* - * Flush an inode dependency list. - * Called with splbio blocked. - */ -static int -flush_deplist(listhead, waitfor, errorp) - struct allocdirectlst *listhead; - int waitfor; - int *errorp; -{ - struct allocdirect *adp; - struct buf *bp; - - TAILQ_FOREACH(adp, listhead, ad_next) { - if (adp->ad_state & DEPCOMPLETE) - continue; - bp = adp->ad_buf; - bp = getdirtybuf(&bp, NULL, waitfor); - if (bp == NULL) { - if (waitfor == MNT_NOWAIT) - continue; - return (1); - } - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(bp); - } else if ((*errorp = BUF_WRITE(bp)) != 0) { - ACQUIRE_LOCK(&lk); - return (1); - } - ACQUIRE_LOCK(&lk); - return (1); - } - return (0); -} - -/* - * Eliminate a pagedep dependency by flushing out all its diradd dependencies. - * Called with splbio blocked. - */ -static int -flush_pagedep_deps(pvp, mp, diraddhdp) - struct vnode *pvp; - struct mount *mp; - struct diraddhd *diraddhdp; -{ - struct thread *td = curthread; - struct inodedep *inodedep; - struct ufsmount *ump; - struct diradd *dap; - struct vnode *vp; - int error = 0; - struct buf *bp; - ino_t inum; - - ump = VFSTOUFS(mp); - while ((dap = LIST_FIRST(diraddhdp)) != NULL) { - /* - * Flush ourselves if this directory entry - * has a MKDIR_PARENT dependency. - */ - if (dap->da_state & MKDIR_PARENT) { - FREE_LOCK(&lk); - if ((error = UFS_UPDATE(pvp, 1)) != 0) - break; - ACQUIRE_LOCK(&lk); - /* - * If that cleared dependencies, go on to next. - */ - if (dap != LIST_FIRST(diraddhdp)) - continue; - if (dap->da_state & MKDIR_PARENT) { - FREE_LOCK(&lk); - panic("flush_pagedep_deps: MKDIR_PARENT"); - } - } - /* - * A newly allocated directory must have its "." and - * ".." entries written out before its name can be - * committed in its parent. We do not want or need - * the full semantics of a synchronous VOP_FSYNC as - * that may end up here again, once for each directory - * level in the filesystem. Instead, we push the blocks - * and wait for them to clear. We have to fsync twice - * because the first call may choose to defer blocks - * that still have dependencies, but deferral will - * happen at most once. - */ - inum = dap->da_newinum; - if (dap->da_state & MKDIR_BODY) { - FREE_LOCK(&lk); - if ((error = VFS_VGET(mp, inum, LK_EXCLUSIVE, &vp))) - break; - if ((error=VOP_FSYNC(vp, td->td_ucred, MNT_NOWAIT, td)) || - (error=VOP_FSYNC(vp, td->td_ucred, MNT_NOWAIT, td))) { - vput(vp); - break; - } - VI_LOCK(vp); - drain_output(vp, 0); - VI_UNLOCK(vp); - vput(vp); - ACQUIRE_LOCK(&lk); - /* - * If that cleared dependencies, go on to next. - */ - if (dap != LIST_FIRST(diraddhdp)) - continue; - if (dap->da_state & MKDIR_BODY) { - FREE_LOCK(&lk); - panic("flush_pagedep_deps: MKDIR_BODY"); - } - } - /* - * Flush the inode on which the directory entry depends. - * Having accounted for MKDIR_PARENT and MKDIR_BODY above, - * the only remaining dependency is that the updated inode - * count must get pushed to disk. The inode has already - * been pushed into its inode buffer (via VOP_UPDATE) at - * the time of the reference count change. So we need only - * locate that buffer, ensure that there will be no rollback - * caused by a bitmap dependency, then write the inode buffer. - */ - if (inodedep_lookup(ump->um_fs, inum, 0, &inodedep) == 0) { - FREE_LOCK(&lk); - panic("flush_pagedep_deps: lost inode"); - } - /* - * If the inode still has bitmap dependencies, - * push them to disk. - */ - if ((inodedep->id_state & DEPCOMPLETE) == 0) { - bp = inodedep->id_buf; - bp = getdirtybuf(&bp, NULL, MNT_WAIT); - FREE_LOCK(&lk); - if (bp && (error = BUF_WRITE(bp)) != 0) - break; - ACQUIRE_LOCK(&lk); - if (dap != LIST_FIRST(diraddhdp)) - continue; - } - /* - * If the inode is still sitting in a buffer waiting - * to be written, push it to disk. - */ - FREE_LOCK(&lk); - if ((error = bread(ump->um_devvp, - fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)), - (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0) { - brelse(bp); - break; - } - if ((error = BUF_WRITE(bp)) != 0) - break; - ACQUIRE_LOCK(&lk); - /* - * If we have failed to get rid of all the dependencies - * then something is seriously wrong. - */ - if (dap == LIST_FIRST(diraddhdp)) { - FREE_LOCK(&lk); - panic("flush_pagedep_deps: flush failed"); - } - } - if (error) - ACQUIRE_LOCK(&lk); - return (error); -} - -/* - * A large burst of file addition or deletion activity can drive the - * memory load excessively high. First attempt to slow things down - * using the techniques below. If that fails, this routine requests - * the offending operations to fall back to running synchronously - * until the memory load returns to a reasonable level. - */ -int -softdep_slowdown(vp) - struct vnode *vp; -{ - int max_softdeps_hard; - - max_softdeps_hard = max_softdeps * 11 / 10; - if (num_dirrem < max_softdeps_hard / 2 && - num_inodedep < max_softdeps_hard && - VFSTOUFS(vp->v_mount)->um_numindirdeps < maxindirdeps) - return (0); - if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps) - speedup_syncer(); - stat_sync_limit_hit += 1; - return (1); -} - -/* - * Called by the allocation routines when they are about to fail - * in the hope that we can free up some disk space. - * - * First check to see if the work list has anything on it. If it has, - * clean up entries until we successfully free some space. Because this - * process holds inodes locked, we cannot handle any remove requests - * that might block on a locked inode as that could lead to deadlock. - * If the worklist yields no free space, encourage the syncer daemon - * to help us. In no event will we try for longer than tickdelay seconds. - */ -int -softdep_request_cleanup(fs, vp) - struct fs *fs; - struct vnode *vp; -{ - long starttime; - ufs2_daddr_t needed; - - needed = fs->fs_cstotal.cs_nbfree + fs->fs_contigsumsize; - starttime = time_second + tickdelay; - /* - * If we are being called because of a process doing a - * copy-on-write, then it is not safe to update the vnode - * as we may recurse into the copy-on-write routine. - */ - if (!(curthread->td_pflags & TDP_COWINPROGRESS) && - UFS_UPDATE(vp, 1) != 0) - return (0); - while (fs->fs_pendingblocks > 0 && fs->fs_cstotal.cs_nbfree <= needed) { - if (time_second > starttime) - return (0); - if (num_on_worklist > 0 && - process_worklist_item(NULL, LK_NOWAIT) != -1) { - stat_worklist_push += 1; - continue; - } - request_cleanup(FLUSH_REMOVE_WAIT, 0); - } - return (1); -} - -/* - * If memory utilization has gotten too high, deliberately slow things - * down and speed up the I/O processing. - */ -static int -request_cleanup(resource, islocked) - int resource; - int islocked; -{ - struct thread *td = curthread; - - /* - * We never hold up the filesystem syncer process. - */ - if (td == filesys_syncer) - return (0); - /* - * First check to see if the work list has gotten backlogged. - * If it has, co-opt this process to help clean up two entries. - * Because this process may hold inodes locked, we cannot - * handle any remove requests that might block on a locked - * inode as that could lead to deadlock. - */ - if (num_on_worklist > max_softdeps / 10) { - if (islocked) - FREE_LOCK(&lk); - process_worklist_item(NULL, LK_NOWAIT); - process_worklist_item(NULL, LK_NOWAIT); - stat_worklist_push += 2; - if (islocked) - ACQUIRE_LOCK(&lk); - return(1); - } - /* - * Next, we attempt to speed up the syncer process. If that - * is successful, then we allow the process to continue. - */ - if (speedup_syncer() && resource != FLUSH_REMOVE_WAIT) - return(0); - /* - * If we are resource constrained on inode dependencies, try - * flushing some dirty inodes. Otherwise, we are constrained - * by file deletions, so try accelerating flushes of directories - * with removal dependencies. We would like to do the cleanup - * here, but we probably hold an inode locked at this point and - * that might deadlock against one that we try to clean. So, - * the best that we can do is request the syncer daemon to do - * the cleanup for us. - */ - switch (resource) { - - case FLUSH_INODES: - stat_ino_limit_push += 1; - req_clear_inodedeps += 1; - stat_countp = &stat_ino_limit_hit; - break; - - case FLUSH_REMOVE: - case FLUSH_REMOVE_WAIT: - stat_blk_limit_push += 1; - req_clear_remove += 1; - stat_countp = &stat_blk_limit_hit; - break; - - default: - if (islocked) - FREE_LOCK(&lk); - panic("request_cleanup: unknown type"); - } - /* - * Hopefully the syncer daemon will catch up and awaken us. - * We wait at most tickdelay before proceeding in any case. - */ - if (islocked == 0) - ACQUIRE_LOCK(&lk); - proc_waiting += 1; - if (handle.callout == NULL) - handle = timeout(pause_timer, 0, tickdelay > 2 ? tickdelay : 2); - interlocked_sleep(&lk, SLEEP, (caddr_t)&proc_waiting, NULL, PPAUSE, - "softupdate", 0); - proc_waiting -= 1; - if (islocked == 0) - FREE_LOCK(&lk); - return (1); -} - -/* - * Awaken processes pausing in request_cleanup and clear proc_waiting - * to indicate that there is no longer a timer running. - */ -static void -pause_timer(arg) - void *arg; -{ - - *stat_countp += 1; - wakeup_one(&proc_waiting); - if (proc_waiting > 0) - handle = timeout(pause_timer, 0, tickdelay > 2 ? tickdelay : 2); - else - handle.callout = NULL; -} - -/* - * Flush out a directory with at least one removal dependency in an effort to - * reduce the number of dirrem, freefile, and freeblks dependency structures. - */ -static void -clear_remove(td) - struct thread *td; -{ - struct pagedep_hashhead *pagedephd; - struct pagedep *pagedep; - static int next = 0; - struct mount *mp; - struct vnode *vp; - int error, cnt; - ino_t ino; - - ACQUIRE_LOCK(&lk); - for (cnt = 0; cnt < pagedep_hash; cnt++) { - pagedephd = &pagedep_hashtbl[next++]; - if (next >= pagedep_hash) - next = 0; - LIST_FOREACH(pagedep, pagedephd, pd_hash) { - if (LIST_FIRST(&pagedep->pd_dirremhd) == NULL) - continue; - mp = pagedep->pd_mnt; - ino = pagedep->pd_ino; - if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) - continue; - FREE_LOCK(&lk); - if ((error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &vp))) { - softdep_error("clear_remove: vget", error); - vn_finished_write(mp); - return; - } - if ((error = VOP_FSYNC(vp, td->td_ucred, MNT_NOWAIT, td))) - softdep_error("clear_remove: fsync", error); - VI_LOCK(vp); - drain_output(vp, 0); - VI_UNLOCK(vp); - vput(vp); - vn_finished_write(mp); - return; - } - } - FREE_LOCK(&lk); -} - -/* - * Clear out a block of dirty inodes in an effort to reduce - * the number of inodedep dependency structures. - */ -static void -clear_inodedeps(td) - struct thread *td; -{ - struct inodedep_hashhead *inodedephd; - struct inodedep *inodedep; - static int next = 0; - struct mount *mp; - struct vnode *vp; - struct fs *fs; - int error, cnt; - ino_t firstino, lastino, ino; - - ACQUIRE_LOCK(&lk); - /* - * Pick a random inode dependency to be cleared. - * We will then gather up all the inodes in its block - * that have dependencies and flush them out. - */ - for (cnt = 0; cnt < inodedep_hash; cnt++) { - inodedephd = &inodedep_hashtbl[next++]; - if (next >= inodedep_hash) - next = 0; - if ((inodedep = LIST_FIRST(inodedephd)) != NULL) - break; - } - if (inodedep == NULL) - return; - /* - * Ugly code to find mount point given pointer to superblock. - */ - fs = inodedep->id_fs; - TAILQ_FOREACH(mp, &mountlist, mnt_list) - if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs) - break; - /* - * Find the last inode in the block with dependencies. - */ - firstino = inodedep->id_ino & ~(INOPB(fs) - 1); - for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--) - if (inodedep_lookup(fs, lastino, 0, &inodedep) != 0) - break; - /* - * Asynchronously push all but the last inode with dependencies. - * Synchronously push the last inode with dependencies to ensure - * that the inode block gets written to free up the inodedeps. - */ - for (ino = firstino; ino <= lastino; ino++) { - if (inodedep_lookup(fs, ino, 0, &inodedep) == 0) - continue; - FREE_LOCK(&lk); - if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) - continue; - if ((error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &vp)) != 0) { - softdep_error("clear_inodedeps: vget", error); - vn_finished_write(mp); - return; - } - if (ino == lastino) { - if ((error = VOP_FSYNC(vp, td->td_ucred, MNT_WAIT, td))) - softdep_error("clear_inodedeps: fsync1", error); - } else { - if ((error = VOP_FSYNC(vp, td->td_ucred, MNT_NOWAIT, td))) - softdep_error("clear_inodedeps: fsync2", error); - VI_LOCK(vp); - drain_output(vp, 0); - VI_UNLOCK(vp); - } - vput(vp); - vn_finished_write(mp); - ACQUIRE_LOCK(&lk); - } - FREE_LOCK(&lk); -} - -/* - * Function to determine if the buffer has outstanding dependencies - * that will cause a roll-back if the buffer is written. If wantcount - * is set, return number of dependencies, otherwise just yes or no. - */ -static int -softdep_count_dependencies(bp, wantcount) - struct buf *bp; - int wantcount; -{ - struct worklist *wk; - struct inodedep *inodedep; - struct indirdep *indirdep; - struct allocindir *aip; - struct pagedep *pagedep; - struct diradd *dap; - int i, retval; - - retval = 0; - ACQUIRE_LOCK(&lk); - LIST_FOREACH(wk, &bp->b_dep, wk_list) { - switch (wk->wk_type) { - - case D_INODEDEP: - inodedep = WK_INODEDEP(wk); - if ((inodedep->id_state & DEPCOMPLETE) == 0) { - /* bitmap allocation dependency */ - retval += 1; - if (!wantcount) - goto out; - } - if (TAILQ_FIRST(&inodedep->id_inoupdt)) { - /* direct block pointer dependency */ - retval += 1; - if (!wantcount) - goto out; - } - if (TAILQ_FIRST(&inodedep->id_extupdt)) { - /* direct block pointer dependency */ - retval += 1; - if (!wantcount) - goto out; - } - continue; - - case D_INDIRDEP: - indirdep = WK_INDIRDEP(wk); - - LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) { - /* indirect block pointer dependency */ - retval += 1; - if (!wantcount) - goto out; - } - continue; - - case D_PAGEDEP: - pagedep = WK_PAGEDEP(wk); - for (i = 0; i < DAHASHSZ; i++) { - - LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) { - /* directory entry dependency */ - retval += 1; - if (!wantcount) - goto out; - } - } - continue; - - case D_BMSAFEMAP: - case D_ALLOCDIRECT: - case D_ALLOCINDIR: - case D_MKDIR: - /* never a dependency on these blocks */ - continue; - - default: - FREE_LOCK(&lk); - panic("softdep_check_for_rollback: Unexpected type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ - } - } -out: - FREE_LOCK(&lk); - return retval; -} - -/* - * Acquire exclusive access to a buffer. - * Must be called with splbio blocked. - * Return acquired buffer or NULL on failure. mtx, if provided, will be - * released on success but held on failure. - */ -static struct buf * -getdirtybuf(bpp, mtx, waitfor) - struct buf **bpp; - struct mtx *mtx; - int waitfor; -{ - struct buf *bp; - int error; - - /* - * XXX This code and the code that calls it need to be reviewed to - * verify its use of the vnode interlock. - */ - - for (;;) { - if ((bp = *bpp) == NULL) - return (0); - if (bp->b_vp == NULL) - backtrace(); - if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) { - if ((bp->b_vflags & BV_BKGRDINPROG) == 0) - break; - BUF_UNLOCK(bp); - if (waitfor != MNT_WAIT) - return (NULL); - /* - * The mtx argument must be bp->b_vp's mutex in - * this case. - */ -#ifdef DEBUG_VFS_LOCKS - if (bp->b_vp->v_type != VCHR) - ASSERT_VI_LOCKED(bp->b_vp, "getdirtybuf"); -#endif - bp->b_vflags |= BV_BKGRDWAIT; - interlocked_sleep(&lk, SLEEP, &bp->b_xflags, mtx, - PRIBIO, "getbuf", 0); - continue; - } - if (waitfor != MNT_WAIT) - return (NULL); - if (mtx) { - error = interlocked_sleep(&lk, LOCKBUF, bp, mtx, - LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 0, 0); - mtx_lock(mtx); - } else - error = interlocked_sleep(&lk, LOCKBUF, bp, NULL, - LK_EXCLUSIVE | LK_SLEEPFAIL, 0, 0); - if (error != ENOLCK) { - FREE_LOCK(&lk); - panic("getdirtybuf: inconsistent lock"); - } - } - if ((bp->b_flags & B_DELWRI) == 0) { - BUF_UNLOCK(bp); - return (NULL); - } - if (mtx) - mtx_unlock(mtx); - bremfree(bp); - return (bp); -} - -/* - * Wait for pending output on a vnode to complete. - * Must be called with vnode lock and interlock locked. - */ -static void -drain_output(vp, islocked) - struct vnode *vp; - int islocked; -{ - ASSERT_VOP_LOCKED(vp, "drain_output"); - ASSERT_VI_LOCKED(vp, "drain_output"); - - if (!islocked) - ACQUIRE_LOCK(&lk); - while (vp->v_numoutput) { - vp->v_iflag |= VI_BWAIT; - interlocked_sleep(&lk, SLEEP, (caddr_t)&vp->v_numoutput, - VI_MTX(vp), PRIBIO + 1, "drainvp", 0); - } - if (!islocked) - FREE_LOCK(&lk); -} - -/* - * Called whenever a buffer that is being invalidated or reallocated - * contains dependencies. This should only happen if an I/O error has - * occurred. The routine is called with the buffer locked. - */ -static void -softdep_deallocate_dependencies(bp) - struct buf *bp; -{ - - if ((bp->b_ioflags & BIO_ERROR) == 0) - panic("softdep_deallocate_dependencies: dangling deps"); - softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error); - panic("softdep_deallocate_dependencies: unrecovered I/O error"); -} - -/* - * Function to handle asynchronous write errors in the filesystem. - */ -static void -softdep_error(func, error) - char *func; - int error; -{ - - /* XXX should do something better! */ - printf("%s: got error %d while accessing filesystem\n", func, error); -} -#endif diff --git a/src/sys/ufs/ffs/ffs_softdep_stub.c b/src/sys/ufs/ffs/ffs_softdep_stub.c deleted file mode 100644 index 2e65208..0000000 --- a/src/sys/ufs/ffs/ffs_softdep_stub.c +++ /dev/null @@ -1,319 +0,0 @@ -#if 0 -/* - * Copyright 1998 Marshall Kirk McKusick. All Rights Reserved. - * - * The soft updates code is derived from the appendix of a University - * of Michigan technical report (Gregory R. Ganger and Yale N. Patt, - * "Soft Updates: A Solution to the Metadata Update Problem in File - * Systems", CSE-TR-254-95, August 1995). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. None of the names of McKusick, Ganger, or the University of Michigan - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)ffs_softdep_stub.c 9.1 (McKusick) 7/10/97 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_softdep_stub.c,v 1.27 2003/06/11 06:31:28 obrien Exp $"); - -/* - * Use this file as ffs_softdep.c if you do not wish the real ffs_softdep.c - * to be included in your system. (e.g for legal reasons ) - * The real files are in /usr/src/contrib/sys/softupdates. - * You must copy them here before you can use soft updates. - * Read the README for legal and technical information. - */ - -#include "opt_ffs.h" -#if (SOFTUPDATES == 0 ) /* SOFTUPDATES not configured in, use these stubs. */ -#include -#include -#include -#include -#include -#include -#include -#include - -int -softdep_flushfiles(oldmnt, flags, td) - struct mount *oldmnt; - int flags; - struct thread *td; -{ - - panic("softdep_flushfiles called"); -} - -int -softdep_mount(devvp, mp, fs, cred) - struct vnode *devvp; - struct mount *mp; - struct fs *fs; - struct ucred *cred; -{ - - return (0); -} - -void -softdep_initialize() -{ - - return; -} - -void -softdep_uninitialize() -{ - - return; -} - -void -softdep_setup_inomapdep(bp, ip, newinum) - struct buf *bp; - struct inode *ip; - ino_t newinum; -{ - - panic("softdep_setup_inomapdep called"); -} - -void -softdep_setup_blkmapdep(bp, fs, newblkno) - struct buf *bp; - struct fs *fs; - ufs2_daddr_t newblkno; -{ - - panic("softdep_setup_blkmapdep called"); -} - -void -softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) - struct inode *ip; - ufs_lbn_t lbn; - ufs2_daddr_t newblkno; - ufs2_daddr_t oldblkno; - long newsize; - long oldsize; - struct buf *bp; -{ - - panic("softdep_setup_allocdirect called"); -} - -void -softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) - struct inode *ip; - ufs_lbn_t lbn; - ufs2_daddr_t newblkno; - ufs2_daddr_t oldblkno; - long newsize; - long oldsize; - struct buf *bp; -{ - - panic("softdep_setup_allocdirect called"); -} - -void -softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) - struct inode *ip; - ufs_lbn_t lbn; - struct buf *bp; - int ptrno; - ufs2_daddr_t newblkno; - ufs2_daddr_t oldblkno; - struct buf *nbp; -{ - - panic("softdep_setup_allocindir_page called"); -} - -void -softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno) - struct buf *nbp; - struct inode *ip; - struct buf *bp; - int ptrno; - ufs2_daddr_t newblkno; -{ - - panic("softdep_setup_allocindir_meta called"); -} - -void -softdep_setup_freeblocks(ip, length, flags) - struct inode *ip; - off_t length; - int flags; -{ - - panic("softdep_setup_freeblocks called"); -} - -void -softdep_freefile(pvp, ino, mode) - struct vnode *pvp; - ino_t ino; - int mode; -{ - - panic("softdep_freefile called"); -} - -int -softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) - struct buf *bp; - struct inode *dp; - off_t diroffset; - ino_t newinum; - struct buf *newdirbp; - int isnewblk; -{ - - panic("softdep_setup_directory_add called"); -} - -void -softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize) - struct inode *dp; - caddr_t base; - caddr_t oldloc; - caddr_t newloc; - int entrysize; -{ - - panic("softdep_change_directoryentry_offset called"); -} - -void -softdep_setup_remove(bp, dp, ip, isrmdir) - struct buf *bp; - struct inode *dp; - struct inode *ip; - int isrmdir; -{ - - panic("softdep_setup_remove called"); -} - -void -softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) - struct buf *bp; - struct inode *dp; - struct inode *ip; - ino_t newinum; - int isrmdir; -{ - - panic("softdep_setup_directory_change called"); -} - -void -softdep_change_linkcnt(ip) - struct inode *ip; -{ - - panic("softdep_change_linkcnt called"); -} - -void -softdep_load_inodeblock(ip) - struct inode *ip; -{ - - panic("softdep_load_inodeblock called"); -} - -void -softdep_update_inodeblock(ip, bp, waitfor) - struct inode *ip; - struct buf *bp; - int waitfor; -{ - - panic("softdep_update_inodeblock called"); -} - -void -softdep_fsync_mountdev(vp) - struct vnode *vp; -{ - - return; -} - -int -softdep_flushworklist(oldmnt, countp, td) - struct mount *oldmnt; - int *countp; - struct thread *td; -{ - - *countp = 0; - return (0); -} - -int -softdep_sync_metadata(ap) - struct vop_fsync_args /* { - struct vnode *a_vp; - struct ucred *a_cred; - int a_waitfor; - struct thread *a_td; - } */ *ap; -{ - - return (0); -} - -int -softdep_slowdown(vp) - struct vnode *vp; -{ - - panic("softdep_slowdown called"); -} - -void -softdep_releasefile(ip) - struct inode *ip; /* inode with the zero effective link count */ -{ - - panic("softdep_releasefile called"); -} - -int -softdep_request_cleanup(fs, vp) - struct fs *fs; - struct vnode *vp; -{ - - return (0); -} -#endif /* SOFTUPDATES not configured in */ -#endif diff --git a/src/sys/ufs/ffs/ffs_subr.c b/src/sys/ufs/ffs/ffs_subr.c deleted file mode 100644 index e304131..0000000 --- a/src/sys/ufs/ffs/ffs_subr.c +++ /dev/null @@ -1,294 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_subr.c 8.5 (Berkeley) 3/21/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_subr.c,v 1.37 2003/06/11 06:31:28 obrien Exp $"); - -#include - -#ifndef _KERNEL -#include -#include -#include "fsck.h" -#else -#include "opt_ddb.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#ifdef DDB -void ffs_checkoverlap(struct buf *, struct inode *); -#endif - -/* - * Return buffer with the contents of block "offset" from the beginning of - * directory "ip". If "res" is non-zero, fill it in with a pointer to the - * remaining space in the directory. - */ -int -ffs_blkatoff(vp, offset, res, bpp) - struct vnode *vp; - off_t offset; - char **res; - struct buf **bpp; -{ - struct inode *ip; - struct fs *fs; - struct buf *bp; - ufs_lbn_t lbn; - int bsize, error; - - ip = VTOI(vp); - fs = ip->i_fs; - lbn = lblkno(fs, offset); - bsize = blksize(fs, ip, lbn); - - *bpp = NULL; - error = bread(vp, lbn, bsize, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - if (res) - *res = (char *)bp->b_data + blkoff(fs, offset); - *bpp = bp; - return (0); -} - -/* - * Load up the contents of an inode and copy the appropriate pieces - * to the incore copy. - */ -void -ffs_load_inode(bp, ip, fs, ino) - struct buf *bp; - struct inode *ip; - struct fs *fs; - ino_t ino; -{ - - if (ip->i_ump->um_fstype == UFS1) { - *ip->i_din1 = - *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ino)); - ip->i_mode = ip->i_din1->di_mode; - ip->i_nlink = ip->i_din1->di_nlink; - ip->i_size = ip->i_din1->di_size; - ip->i_flags = ip->i_din1->di_flags; - ip->i_gen = ip->i_din1->di_gen; - ip->i_uid = ip->i_din1->di_uid; - ip->i_gid = ip->i_din1->di_gid; - } else { - *ip->i_din2 = - *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino)); - ip->i_mode = ip->i_din2->di_mode; - ip->i_nlink = ip->i_din2->di_nlink; - ip->i_size = ip->i_din2->di_size; - ip->i_flags = ip->i_din2->di_flags; - ip->i_gen = ip->i_din2->di_gen; - ip->i_uid = ip->i_din2->di_uid; - ip->i_gid = ip->i_din2->di_gid; - } -} -#endif /* KERNEL */ - -/* - * Update the frsum fields to reflect addition or deletion - * of some frags. - */ -void -ffs_fragacct(fs, fragmap, fraglist, cnt) - struct fs *fs; - int fragmap; - int32_t fraglist[]; - int cnt; -{ - int inblk; - int field, subfield; - int siz, pos; - - inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1; - fragmap <<= 1; - for (siz = 1; siz < fs->fs_frag; siz++) { - if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0) - continue; - field = around[siz]; - subfield = inside[siz]; - for (pos = siz; pos <= fs->fs_frag; pos++) { - if ((fragmap & field) == subfield) { - fraglist[siz] += cnt; - pos += siz; - field <<= siz; - subfield <<= siz; - } - field <<= 1; - subfield <<= 1; - } - } -} - -#ifdef DDB -void -ffs_checkoverlap(bp, ip) - struct buf *bp; - struct inode *ip; -{ - struct buf *ebp, *ep; - ufs2_daddr_t start, last; - struct vnode *vp; - - ebp = &buf[nbuf]; - start = bp->b_blkno; - last = start + btodb(bp->b_bcount) - 1; - for (ep = buf; ep < ebp; ep++) { - if (ep == bp || (ep->b_flags & B_INVAL) || - ep->b_vp == NULLVP) - continue; - vp = ip->i_devvp; - /* look for overlap */ - if (ep->b_bcount == 0 || ep->b_blkno > last || - ep->b_blkno + btodb(ep->b_bcount) <= start) - continue; - vprint("Disk overlap", vp); - printf("\tstart %jd, end %jd overlap start %jd, end %jd\n", - (intmax_t)start, (intmax_t)last, (intmax_t)ep->b_blkno, - (intmax_t)(ep->b_blkno + btodb(ep->b_bcount) - 1)); - panic("ffs_checkoverlap: Disk buffer overlap"); - } -} -#endif /* DDB */ - -/* - * block operations - * - * check if a block is available - */ -int -ffs_isblock(fs, cp, h) - struct fs *fs; - unsigned char *cp; - ufs1_daddr_t h; -{ - unsigned char mask; - - switch ((int)fs->fs_frag) { - case 8: - return (cp[h] == 0xff); - case 4: - mask = 0x0f << ((h & 0x1) << 2); - return ((cp[h >> 1] & mask) == mask); - case 2: - mask = 0x03 << ((h & 0x3) << 1); - return ((cp[h >> 2] & mask) == mask); - case 1: - mask = 0x01 << (h & 0x7); - return ((cp[h >> 3] & mask) == mask); - default: - panic("ffs_isblock"); - } - return (0); -} - -/* - * take a block out of the map - */ -void -ffs_clrblock(fs, cp, h) - struct fs *fs; - u_char *cp; - ufs1_daddr_t h; -{ - - switch ((int)fs->fs_frag) { - case 8: - cp[h] = 0; - return; - case 4: - cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); - return; - case 2: - cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); - return; - case 1: - cp[h >> 3] &= ~(0x01 << (h & 0x7)); - return; - default: - panic("ffs_clrblock"); - } -} - -/* - * put a block into the map - */ -void -ffs_setblock(fs, cp, h) - struct fs *fs; - unsigned char *cp; - ufs1_daddr_t h; -{ - - switch ((int)fs->fs_frag) { - - case 8: - cp[h] = 0xff; - return; - case 4: - cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); - return; - case 2: - cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); - return; - case 1: - cp[h >> 3] |= (0x01 << (h & 0x7)); - return; - default: - panic("ffs_setblock"); - } -} -#endif diff --git a/src/sys/ufs/ffs/ffs_tables.c b/src/sys/ufs/ffs/ffs_tables.c deleted file mode 100644 index af35c3c..0000000 --- a/src/sys/ufs/ffs/ffs_tables.c +++ /dev/null @@ -1,143 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_tables.c 8.1 (Berkeley) 6/11/93 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_tables.c,v 1.10 2003/06/11 06:31:28 obrien Exp $"); - -#include -#include -#include - -/* - * Bit patterns for identifying fragments in the block map - * used as ((map & around) == inside) - */ -int around[9] = { - 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff -}; -int inside[9] = { - 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe -}; - -/* - * Given a block map bit pattern, the frag tables tell whether a - * particular size fragment is available. - * - * used as: - * if ((1 << (size - 1)) & fragtbl[fs->fs_frag][map] { - * at least one fragment of the indicated size is available - * } - * - * These tables are used by the scanc instruction on the VAX to - * quickly find an appropriate fragment. - */ -static u_char fragtbl124[256] = { - 0x00, 0x16, 0x16, 0x2a, 0x16, 0x16, 0x26, 0x4e, - 0x16, 0x16, 0x16, 0x3e, 0x2a, 0x3e, 0x4e, 0x8a, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e, - 0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x26, 0x36, 0x36, 0x2e, 0x36, 0x36, 0x26, 0x6e, - 0x36, 0x36, 0x36, 0x3e, 0x2e, 0x3e, 0x6e, 0xae, - 0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e, - 0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, - 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe, - 0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e, - 0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa, - 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, - 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe, - 0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e, - 0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce, - 0x8a, 0x9e, 0x9e, 0xaa, 0x9e, 0x9e, 0xae, 0xce, - 0x9e, 0x9e, 0x9e, 0xbe, 0xaa, 0xbe, 0xce, 0x8a, -}; - -static u_char fragtbl8[256] = { - 0x00, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x04, - 0x01, 0x01, 0x01, 0x03, 0x02, 0x03, 0x04, 0x08, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x02, 0x03, 0x03, 0x02, 0x04, 0x05, 0x08, 0x10, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, - 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, - 0x04, 0x05, 0x05, 0x06, 0x08, 0x09, 0x10, 0x20, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11, - 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, - 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a, - 0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04, - 0x08, 0x09, 0x09, 0x0a, 0x10, 0x11, 0x20, 0x40, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, - 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07, - 0x05, 0x05, 0x05, 0x07, 0x09, 0x09, 0x11, 0x21, - 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, - 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a, - 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07, - 0x02, 0x03, 0x03, 0x02, 0x06, 0x07, 0x0a, 0x12, - 0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04, - 0x05, 0x05, 0x05, 0x07, 0x06, 0x07, 0x04, 0x0c, - 0x08, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x0a, 0x0c, - 0x10, 0x11, 0x11, 0x12, 0x20, 0x21, 0x40, 0x80, -}; - -/* - * The actual fragtbl array. - */ -u_char *fragtbl[MAXFRAG + 1] = { - 0, fragtbl124, fragtbl124, 0, fragtbl124, 0, 0, 0, fragtbl8, -}; -#endif diff --git a/src/sys/ufs/ffs/ffs_vfsops.c b/src/sys/ufs/ffs/ffs_vfsops.c deleted file mode 100644 index e3db44c..0000000 --- a/src/sys/ufs/ffs/ffs_vfsops.c +++ /dev/null @@ -1,1562 +0,0 @@ -/* - * Copyright (c) 1989, 1991, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_vfsops.c,v 1.225.2.1 2003/12/12 02:23:22 truckman Exp $"); - -#include "opt_mac.h" -#include "opt_quota.h" -#include "opt_ufs.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -uma_zone_t uma_inode, uma_ufs1, uma_ufs2; - -static int ffs_sbupdate(struct ufsmount *, int); - int ffs_reload(struct mount *,struct ucred *,struct thread *); -static int ffs_mountfs(struct vnode *, struct mount *, struct thread *); -static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, - ufs2_daddr_t); -static void ffs_oldfscompat_write(struct fs *, struct ufsmount *); -static void ffs_ifree(struct ufsmount *ump, struct inode *ip); -static vfs_init_t ffs_init; -static vfs_uninit_t ffs_uninit; -static vfs_extattrctl_t ffs_extattrctl; - -static struct vfsops ufs_vfsops = { - .vfs_extattrctl = ffs_extattrctl, - .vfs_fhtovp = ffs_fhtovp, - .vfs_init = ffs_init, - .vfs_mount = ffs_mount, - .vfs_quotactl = ufs_quotactl, - .vfs_root = ufs_root, - .vfs_start = ufs_start, - .vfs_statfs = ffs_statfs, - .vfs_sync = ffs_sync, - .vfs_uninit = ffs_uninit, - .vfs_unmount = ffs_unmount, - .vfs_vget = ffs_vget, - .vfs_vptofh = ffs_vptofh, -}; - -VFS_SET(ufs_vfsops, ufs, 0); - -/* - * ffs_mount - * - * Called when mounting local physical media - * - * PARAMETERS: - * mountroot - * mp mount point structure - * path NULL (flag for root mount!!!) - * data - * ndp - * p process (user credentials check [statfs]) - * - * mount - * mp mount point structure - * path path to mount point - * data pointer to argument struct in user space - * ndp mount point namei() return (used for - * credentials on reload), reused to look - * up block device. - * p process (user credentials check) - * - * RETURNS: 0 Success - * !0 error number (errno.h) - * - * LOCK STATE: - * - * ENTRY - * mount point is locked - * EXIT - * mount point is locked - * - * NOTES: - * A NULL path can be used for a flag since the mount - * system call will fail with EFAULT in copyinstr in - * namei() if it is a genuine NULL from the user. - */ -int -ffs_mount(mp, path, data, ndp, td) - struct mount *mp; /* mount struct pointer*/ - char *path; /* path to mount point*/ - caddr_t data; /* arguments to FS specific mount*/ - struct nameidata *ndp; /* mount point credentials*/ - struct thread *td; /* process requesting mount*/ -{ - size_t size; - struct vnode *devvp; - struct ufs_args args; - struct ufsmount *ump = 0; - struct fs *fs; - int error, flags; - mode_t accessmode; - - if (uma_inode == NULL) { - uma_inode = uma_zcreate("FFS inode", - sizeof(struct inode), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - uma_ufs1 = uma_zcreate("FFS1 dinode", - sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - uma_ufs2 = uma_zcreate("FFS2 dinode", - sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - } - /* - * Use NULL path to indicate we are mounting the root filesystem. - */ - if (path == NULL) { - if ((error = bdevvp(rootdev, &rootvp))) { - printf("ffs_mountroot: can't find rootvp\n"); - return (error); - } - - if ((error = ffs_mountfs(rootvp, mp, td)) != 0) - return (error); - (void)VFS_STATFS(mp, &mp->mnt_stat, td); - return (0); - } - - /* - * Mounting non-root filesystem or updating a filesystem - */ - if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0) - return (error); - - /* - * If updating, check whether changing from read-only to - * read/write; if there is no device name, that's all we do. - */ - if (mp->mnt_flag & MNT_UPDATE) { - ump = VFSTOUFS(mp); - fs = ump->um_fs; - devvp = ump->um_devvp; - if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { - if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) - return (error); - /* - * Flush any dirty data. - */ - if ((error = VFS_SYNC(mp, MNT_WAIT, - td->td_ucred, td)) != 0) { - vn_finished_write(mp); - return (error); - } - /* - * Check for and optionally get rid of files open - * for writing. - */ - flags = WRITECLOSE; - if (mp->mnt_flag & MNT_FORCE) - flags |= FORCECLOSE; - if (mp->mnt_flag & MNT_SOFTDEP) { - error = softdep_flushfiles(mp, flags, td); - } else { - error = ffs_flushfiles(mp, flags, td); - } - if (error) { - vn_finished_write(mp); - return (error); - } - if (fs->fs_pendingblocks != 0 || - fs->fs_pendinginodes != 0) { - printf("%s: %s: blocks %jd files %d\n", - fs->fs_fsmnt, "update error", - (intmax_t)fs->fs_pendingblocks, - fs->fs_pendinginodes); - fs->fs_pendingblocks = 0; - fs->fs_pendinginodes = 0; - } - fs->fs_ronly = 1; - if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) - fs->fs_clean = 1; - if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) { - fs->fs_ronly = 0; - fs->fs_clean = 0; - vn_finished_write(mp); - return (error); - } - vn_finished_write(mp); - } - if ((mp->mnt_flag & MNT_RELOAD) && - (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0) - return (error); - if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { - /* - * If upgrade to read-write by non-root, then verify - * that user has necessary permissions on the device. - */ - if (suser(td)) { - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); - if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, - td->td_ucred, td)) != 0) { - VOP_UNLOCK(devvp, 0, td); - return (error); - } - VOP_UNLOCK(devvp, 0, td); - } - fs->fs_flags &= ~FS_UNCLEAN; - if (fs->fs_clean == 0) { - fs->fs_flags |= FS_UNCLEAN; - if ((mp->mnt_flag & MNT_FORCE) || - ((fs->fs_flags & FS_NEEDSFSCK) == 0 && - (fs->fs_flags & FS_DOSOFTDEP))) { - printf("WARNING: %s was not %s\n", - fs->fs_fsmnt, "properly dismounted"); - } else { - printf( -"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", - fs->fs_fsmnt); - return (EPERM); - } - } - if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) - return (error); - fs->fs_ronly = 0; - fs->fs_clean = 0; - if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) { - vn_finished_write(mp); - return (error); - } - /* check to see if we need to start softdep */ - if ((fs->fs_flags & FS_DOSOFTDEP) && - (error = softdep_mount(devvp, mp, fs, td->td_ucred))){ - vn_finished_write(mp); - return (error); - } - if (fs->fs_snapinum[0] != 0) - ffs_snapshot_mount(mp); - vn_finished_write(mp); - } - /* - * Soft updates is incompatible with "async", - * so if we are doing softupdates stop the user - * from setting the async flag in an update. - * Softdep_mount() clears it in an initial mount - * or ro->rw remount. - */ - if (mp->mnt_flag & MNT_SOFTDEP) - mp->mnt_flag &= ~MNT_ASYNC; - /* - * If not updating name, process export requests. - */ - if (args.fspec == 0) - return (vfs_export(mp, &args.export)); - /* - * If this is a snapshot request, take the snapshot. - */ - if (mp->mnt_flag & MNT_SNAPSHOT) - return (ffs_snapshot(mp, args.fspec)); - } - - /* - * Not an update, or updating the name: look up the name - * and verify that it refers to a sensible block device. - */ - NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td); - if ((error = namei(ndp)) != 0) - return (error); - NDFREE(ndp, NDF_ONLY_PNBUF); - devvp = ndp->ni_vp; - if (!vn_isdisk(devvp, &error)) { - vrele(devvp); - return (error); - } - - /* - * If mount by non-root, then verify that user has necessary - * permissions on the device. - */ - if (suser(td)) { - accessmode = VREAD; - if ((mp->mnt_flag & MNT_RDONLY) == 0) - accessmode |= VWRITE; - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); - if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){ - vput(devvp); - return (error); - } - VOP_UNLOCK(devvp, 0, td); - } - - if (mp->mnt_flag & MNT_UPDATE) { - /* - * Update only - * - * If it's not the same vnode, or at least the same device - * then it's not correct. - */ - - if (devvp != ump->um_devvp && - devvp->v_rdev != ump->um_devvp->v_rdev) - error = EINVAL; /* needs translation */ - vrele(devvp); - if (error) - return (error); - } else { - /* - * New mount - * - * We need the name for the mount point (also used for - * "last mounted on") copied in. If an error occurs, - * the mount point is discarded by the upper level code. - * Note that vfs_mount() populates f_mntonname for us. - */ - if ((error = ffs_mountfs(devvp, mp, td)) != 0) { - vrele(devvp); - return (error); - } - } - /* - * Save "mounted from" device name info for mount point (NULL pad). - */ - copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); - bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); - /* - * Initialize filesystem stat information in mount struct. - */ - (void)VFS_STATFS(mp, &mp->mnt_stat, td); - return (0); -} - -/* - * Reload all incore data for a filesystem (used after running fsck on - * the root filesystem and finding things to fix). The filesystem must - * be mounted read-only. - * - * Things to do to update the mount: - * 1) invalidate all cached meta-data. - * 2) re-read superblock from disk. - * 3) re-read summary information from disk. - * 4) invalidate all inactive vnodes. - * 5) invalidate all cached file data. - * 6) re-read inode data for all active vnodes. - */ -int -ffs_reload(mp, cred, td) - struct mount *mp; - struct ucred *cred; - struct thread *td; -{ - struct vnode *vp, *nvp, *devvp; - struct inode *ip; - void *space; - struct buf *bp; - struct fs *fs, *newfs; - ufs2_daddr_t sblockloc; - int i, blks, size, error; - int32_t *lp; - - if ((mp->mnt_flag & MNT_RDONLY) == 0) - return (EINVAL); - /* - * Step 1: invalidate all cached meta-data. - */ - devvp = VFSTOUFS(mp)->um_devvp; - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); - error = vinvalbuf(devvp, 0, cred, td, 0, 0); - VOP_UNLOCK(devvp, 0, td); - if (error) - panic("ffs_reload: dirty1"); - - /* - * Only VMIO the backing device if the backing device is a real - * block device. - */ - if (vn_isdisk(devvp, NULL)) { - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); - vfs_object_create(devvp, td, td->td_ucred); - VOP_UNLOCK(devvp, 0, td); - } - - /* - * Step 2: re-read superblock from disk. - */ - fs = VFSTOUFS(mp)->um_fs; - if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize, - NOCRED, &bp)) != 0) - return (error); - newfs = (struct fs *)bp->b_data; - if ((newfs->fs_magic != FS_UFS1_MAGIC && - newfs->fs_magic != FS_UFS2_MAGIC) || - newfs->fs_bsize > MAXBSIZE || - newfs->fs_bsize < sizeof(struct fs)) { - brelse(bp); - return (EIO); /* XXX needs translation */ - } - /* - * Copy pointer fields back into superblock before copying in XXX - * new superblock. These should really be in the ufsmount. XXX - * Note that important parameters (eg fs_ncg) are unchanged. - */ - newfs->fs_csp = fs->fs_csp; - newfs->fs_maxcluster = fs->fs_maxcluster; - newfs->fs_contigdirs = fs->fs_contigdirs; - newfs->fs_active = fs->fs_active; - /* The file system is still read-only. */ - newfs->fs_ronly = 1; - sblockloc = fs->fs_sblockloc; - bcopy(newfs, fs, (u_int)fs->fs_sbsize); - brelse(bp); - mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; - ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc); - if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { - printf("%s: reload pending error: blocks %jd files %d\n", - fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, - fs->fs_pendinginodes); - fs->fs_pendingblocks = 0; - fs->fs_pendinginodes = 0; - } - - /* - * Step 3: re-read summary information from disk. - */ - blks = howmany(fs->fs_cssize, fs->fs_fsize); - space = fs->fs_csp; - for (i = 0; i < blks; i += fs->fs_frag) { - size = fs->fs_bsize; - if (i + fs->fs_frag > blks) - size = (blks - i) * fs->fs_fsize; - error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, - NOCRED, &bp); - if (error) - return (error); - bcopy(bp->b_data, space, (u_int)size); - space = (char *)space + size; - brelse(bp); - } - /* - * We no longer know anything about clusters per cylinder group. - */ - if (fs->fs_contigsumsize > 0) { - lp = fs->fs_maxcluster; - for (i = 0; i < fs->fs_ncg; i++) - *lp++ = fs->fs_contigsumsize; - } - -loop: - MNT_ILOCK(mp); - for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { - if (vp->v_mount != mp) { - MNT_IUNLOCK(mp); - goto loop; - } - nvp = TAILQ_NEXT(vp, v_nmntvnodes); - VI_LOCK(vp); - if (vp->v_iflag & VI_XLOCK) { - VI_UNLOCK(vp); - continue; - } - MNT_IUNLOCK(mp); - /* - * Step 4: invalidate all inactive vnodes. - */ - if (vp->v_usecount == 0) { - vgonel(vp, td); - goto loop; - } - /* - * Step 5: invalidate all cached file data. - */ - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { - goto loop; - } - if (vinvalbuf(vp, 0, cred, td, 0, 0)) - panic("ffs_reload: dirty2"); - /* - * Step 6: re-read inode data for all active vnodes. - */ - ip = VTOI(vp); - error = - bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), - (int)fs->fs_bsize, NOCRED, &bp); - if (error) { - VOP_UNLOCK(vp, 0, td); - vrele(vp); - return (error); - } - ffs_load_inode(bp, ip, fs, ip->i_number); - ip->i_effnlink = ip->i_nlink; - brelse(bp); - VOP_UNLOCK(vp, 0, td); - vrele(vp); - MNT_ILOCK(mp); - } - MNT_IUNLOCK(mp); - return (0); -} - -/* - * Possible superblock locations ordered from most to least likely. - */ -static int sblock_try[] = SBLOCKSEARCH; - -/* - * Common code for mount and mountroot - */ -static int -ffs_mountfs(devvp, mp, td) - struct vnode *devvp; - struct mount *mp; - struct thread *td; -{ - struct ufsmount *ump; - struct buf *bp; - struct fs *fs; - dev_t dev; - void *space; - ufs2_daddr_t sblockloc; - int error, i, blks, size, ronly; - int32_t *lp; - struct ucred *cred; - size_t strsize; - int ncount; - - dev = devvp->v_rdev; - cred = td ? td->td_ucred : NOCRED; - /* - * Disallow multiple mounts of the same device. - * Disallow mounting of a device that is currently in use - * (except for root, which might share swap device for miniroot). - * Flush out any old buffers remaining from a previous use. - */ - error = vfs_mountedon(devvp); - if (error) - return (error); - ncount = vcount(devvp); - - if (ncount > 1 && devvp != rootvp) - return (EBUSY); - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); - error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0); - VOP_UNLOCK(devvp, 0, td); - if (error) - return (error); - - /* - * Only VMIO the backing device if the backing device is a real - * block device. - * Note that it is optional that the backing device be VMIOed. This - * increases the opportunity for metadata caching. - */ - if (vn_isdisk(devvp, NULL)) { - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); - vfs_object_create(devvp, td, cred); - VOP_UNLOCK(devvp, 0, td); - } - - ronly = (mp->mnt_flag & MNT_RDONLY) != 0; - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); - /* - * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem - * XXX: is subsequently remounted, so open it FREAD|FWRITE from the - * XXX: start to avoid getting trashed later on. - */ -#ifdef notyet - error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td, -1); -#else - error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td, -1); -#endif - VOP_UNLOCK(devvp, 0, td); - if (error) - return (error); - if (devvp->v_rdev->si_iosize_max != 0) - mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; - if (mp->mnt_iosize_max > MAXPHYS) - mp->mnt_iosize_max = MAXPHYS; - - bp = NULL; - ump = NULL; - fs = NULL; - sblockloc = 0; - /* - * Try reading the superblock in each of its possible locations. - */ - for (i = 0; sblock_try[i] != -1; i++) { - if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, - cred, &bp)) != 0) - goto out; - fs = (struct fs *)bp->b_data; - sblockloc = sblock_try[i]; - if ((fs->fs_magic == FS_UFS1_MAGIC || - (fs->fs_magic == FS_UFS2_MAGIC && - (fs->fs_sblockloc == sblockloc || - (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) && - fs->fs_bsize <= MAXBSIZE && - fs->fs_bsize >= sizeof(struct fs)) - break; - brelse(bp); - bp = NULL; - } - if (sblock_try[i] == -1) { - error = EINVAL; /* XXX needs translation */ - goto out; - } - fs->fs_fmod = 0; - fs->fs_flags &= ~FS_INDEXDIRS; /* no support for directory indicies */ - fs->fs_flags &= ~FS_UNCLEAN; - if (fs->fs_clean == 0) { - fs->fs_flags |= FS_UNCLEAN; - if (ronly || (mp->mnt_flag & MNT_FORCE) || - ((fs->fs_flags & FS_NEEDSFSCK) == 0 && - (fs->fs_flags & FS_DOSOFTDEP))) { - printf( -"WARNING: %s was not properly dismounted\n", - fs->fs_fsmnt); - } else { - printf( -"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", - fs->fs_fsmnt); - error = EPERM; - goto out; - } - if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) && - (mp->mnt_flag & MNT_FORCE)) { - printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt, - (intmax_t)fs->fs_pendingblocks, - fs->fs_pendinginodes); - fs->fs_pendingblocks = 0; - fs->fs_pendinginodes = 0; - } - } - if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { - printf("%s: mount pending error: blocks %jd files %d\n", - fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, - fs->fs_pendinginodes); - fs->fs_pendingblocks = 0; - fs->fs_pendinginodes = 0; - } - ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO); - ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, - M_WAITOK); - if (fs->fs_magic == FS_UFS1_MAGIC) { - ump->um_fstype = UFS1; - ump->um_balloc = ffs_balloc_ufs1; - } else { - ump->um_fstype = UFS2; - ump->um_balloc = ffs_balloc_ufs2; - } - ump->um_blkatoff = ffs_blkatoff; - ump->um_truncate = ffs_truncate; - ump->um_update = ffs_update; - ump->um_valloc = ffs_valloc; - ump->um_vfree = ffs_vfree; - ump->um_ifree = ffs_ifree; - bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); - if (fs->fs_sbsize < SBLOCKSIZE) - bp->b_flags |= B_INVAL | B_NOCACHE; - brelse(bp); - bp = NULL; - fs = ump->um_fs; - ffs_oldfscompat_read(fs, ump, sblockloc); - fs->fs_ronly = ronly; - size = fs->fs_cssize; - blks = howmany(size, fs->fs_fsize); - if (fs->fs_contigsumsize > 0) - size += fs->fs_ncg * sizeof(int32_t); - size += fs->fs_ncg * sizeof(u_int8_t); - space = malloc((u_long)size, M_UFSMNT, M_WAITOK); - fs->fs_csp = space; - for (i = 0; i < blks; i += fs->fs_frag) { - size = fs->fs_bsize; - if (i + fs->fs_frag > blks) - size = (blks - i) * fs->fs_fsize; - if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, - cred, &bp)) != 0) { - free(fs->fs_csp, M_UFSMNT); - goto out; - } - bcopy(bp->b_data, space, (u_int)size); - space = (char *)space + size; - brelse(bp); - bp = NULL; - } - if (fs->fs_contigsumsize > 0) { - fs->fs_maxcluster = lp = space; - for (i = 0; i < fs->fs_ncg; i++) - *lp++ = fs->fs_contigsumsize; - space = lp; - } - size = fs->fs_ncg * sizeof(u_int8_t); - fs->fs_contigdirs = (u_int8_t *)space; - bzero(fs->fs_contigdirs, size); - fs->fs_active = NULL; - mp->mnt_data = (qaddr_t)ump; - mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; - mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; - if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || - vfs_getvfs(&mp->mnt_stat.f_fsid)) - vfs_getnewfsid(mp); - mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; - mp->mnt_flag |= MNT_LOCAL; - if ((fs->fs_flags & FS_MULTILABEL) != 0) -#ifdef MAC - mp->mnt_flag |= MNT_MULTILABEL; -#else - printf( -"WARNING: %s: multilabel flag on fs but no MAC support\n", - fs->fs_fsmnt); -#endif - if ((fs->fs_flags & FS_ACLS) != 0) -#ifdef UFS_ACL - mp->mnt_flag |= MNT_ACLS; -#else - printf( -"WARNING: %s: ACLs flag on fs but no ACLs support\n", - fs->fs_fsmnt); -#endif - ump->um_mountp = mp; - ump->um_dev = dev; - ump->um_devvp = devvp; - ump->um_nindir = fs->fs_nindir; - ump->um_bptrtodb = fs->fs_fsbtodb; - ump->um_seqinc = fs->fs_frag; - for (i = 0; i < MAXQUOTAS; i++) - ump->um_quotas[i] = NULLVP; -#ifdef UFS_EXTATTR - ufs_extattr_uepm_init(&ump->um_extattr); -#endif - devvp->v_rdev->si_mountpoint = mp; - - /* - * Set FS local "last mounted on" information (NULL pad) - */ - copystr( mp->mnt_stat.f_mntonname, /* mount point*/ - fs->fs_fsmnt, /* copy area*/ - sizeof(fs->fs_fsmnt) - 1, /* max size*/ - &strsize); /* real size*/ - bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize); - - if( mp->mnt_flag & MNT_ROOTFS) { - /* - * Root mount; update timestamp in mount structure. - * this will be used by the common root mount code - * to update the system clock. - */ - mp->mnt_time = fs->fs_time; - } - - if (ronly == 0) { - if ((fs->fs_flags & FS_DOSOFTDEP) && - (error = softdep_mount(devvp, mp, fs, cred)) != 0) { - free(fs->fs_csp, M_UFSMNT); - goto out; - } - if (fs->fs_snapinum[0] != 0) - ffs_snapshot_mount(mp); - fs->fs_fmod = 1; - fs->fs_clean = 0; - (void) ffs_sbupdate(ump, MNT_WAIT); - } -#ifdef UFS_EXTATTR -#ifdef UFS_EXTATTR_AUTOSTART - /* - * - * Auto-starting does the following: - * - check for /.attribute in the fs, and extattr_start if so - * - for each file in .attribute, enable that file with - * an attribute of the same name. - * Not clear how to report errors -- probably eat them. - * This would all happen while the filesystem was busy/not - * available, so would effectively be "atomic". - */ - (void) ufs_extattr_autostart(mp, td); -#endif /* !UFS_EXTATTR_AUTOSTART */ -#endif /* !UFS_EXTATTR */ - return (0); -out: - devvp->v_rdev->si_mountpoint = NULL; - if (bp) - brelse(bp); - /* XXX: see comment above VOP_OPEN */ -#ifdef notyet - (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td); -#else - (void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td); -#endif - if (ump) { - free(ump->um_fs, M_UFSMNT); - free(ump, M_UFSMNT); - mp->mnt_data = (qaddr_t)0; - } - return (error); -} - -#include -int bigcgs = 0; -SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, ""); - -/* - * Sanity checks for loading old filesystem superblocks. - * See ffs_oldfscompat_write below for unwound actions. - * - * XXX - Parts get retired eventually. - * Unfortunately new bits get added. - */ -static void -ffs_oldfscompat_read(fs, ump, sblockloc) - struct fs *fs; - struct ufsmount *ump; - ufs2_daddr_t sblockloc; -{ - off_t maxfilesize; - - /* - * If not yet done, update fs_flags location and value of fs_sblockloc. - */ - if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { - fs->fs_flags = fs->fs_old_flags; - fs->fs_old_flags |= FS_FLAGS_UPDATED; - fs->fs_sblockloc = sblockloc; - } - /* - * If not yet done, update UFS1 superblock with new wider fields. - */ - if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) { - fs->fs_maxbsize = fs->fs_bsize; - fs->fs_time = fs->fs_old_time; - fs->fs_size = fs->fs_old_size; - fs->fs_dsize = fs->fs_old_dsize; - fs->fs_csaddr = fs->fs_old_csaddr; - fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; - fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; - fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; - fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; - } - if (fs->fs_magic == FS_UFS1_MAGIC && - fs->fs_old_inodefmt < FS_44INODEFMT) { - fs->fs_maxfilesize = (u_quad_t) 1LL << 39; - fs->fs_qbmask = ~fs->fs_bmask; - fs->fs_qfmask = ~fs->fs_fmask; - } - if (fs->fs_magic == FS_UFS1_MAGIC) { - ump->um_savedmaxfilesize = fs->fs_maxfilesize; - maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; - if (fs->fs_maxfilesize > maxfilesize) - fs->fs_maxfilesize = maxfilesize; - } - /* Compatibility for old filesystems */ - if (fs->fs_avgfilesize <= 0) - fs->fs_avgfilesize = AVFILESIZ; - if (fs->fs_avgfpdir <= 0) - fs->fs_avgfpdir = AFPDIR; - if (bigcgs) { - fs->fs_save_cgsize = fs->fs_cgsize; - fs->fs_cgsize = fs->fs_bsize; - } -} - -/* - * Unwinding superblock updates for old filesystems. - * See ffs_oldfscompat_read above for details. - * - * XXX - Parts get retired eventually. - * Unfortunately new bits get added. - */ -static void -ffs_oldfscompat_write(fs, ump) - struct fs *fs; - struct ufsmount *ump; -{ - - /* - * Copy back UFS2 updated fields that UFS1 inspects. - */ - if (fs->fs_magic == FS_UFS1_MAGIC) { - fs->fs_old_time = fs->fs_time; - fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; - fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; - fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; - fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; - fs->fs_maxfilesize = ump->um_savedmaxfilesize; - } - if (bigcgs) { - fs->fs_cgsize = fs->fs_save_cgsize; - fs->fs_save_cgsize = 0; - } -} - -/* - * unmount system call - */ -int -ffs_unmount(mp, mntflags, td) - struct mount *mp; - int mntflags; - struct thread *td; -{ - struct ufsmount *ump = VFSTOUFS(mp); - struct fs *fs; - int error, flags; - - flags = 0; - if (mntflags & MNT_FORCE) { - flags |= FORCECLOSE; - } -#ifdef UFS_EXTATTR - if ((error = ufs_extattr_stop(mp, td))) { - if (error != EOPNOTSUPP) - printf("ffs_unmount: ufs_extattr_stop returned %d\n", - error); - } else { - ufs_extattr_uepm_destroy(&ump->um_extattr); - } -#endif - if (mp->mnt_flag & MNT_SOFTDEP) { - if ((error = softdep_flushfiles(mp, flags, td)) != 0) - return (error); - } else { - if ((error = ffs_flushfiles(mp, flags, td)) != 0) - return (error); - } - fs = ump->um_fs; - if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { - printf("%s: unmount pending error: blocks %jd files %d\n", - fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, - fs->fs_pendinginodes); - fs->fs_pendingblocks = 0; - fs->fs_pendinginodes = 0; - } - if (fs->fs_ronly == 0) { - fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; - error = ffs_sbupdate(ump, MNT_WAIT); - if (error) { - fs->fs_clean = 0; - return (error); - } - } - ump->um_devvp->v_rdev->si_mountpoint = NULL; - - vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0); - /* XXX: see comment above VOP_OPEN */ -#ifdef notyet - error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, - NOCRED, td); -#else - error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td); -#endif - - vrele(ump->um_devvp); - - free(fs->fs_csp, M_UFSMNT); - free(fs, M_UFSMNT); - free(ump, M_UFSMNT); - mp->mnt_data = (qaddr_t)0; - mp->mnt_flag &= ~MNT_LOCAL; - return (error); -} - -/* - * Flush out all the files in a filesystem. - */ -int -ffs_flushfiles(mp, flags, td) - struct mount *mp; - int flags; - struct thread *td; -{ - struct ufsmount *ump; - int error; - - ump = VFSTOUFS(mp); -#ifdef QUOTA - if (mp->mnt_flag & MNT_QUOTA) { - int i; - error = vflush(mp, 0, SKIPSYSTEM|flags); - if (error) - return (error); - for (i = 0; i < MAXQUOTAS; i++) { - if (ump->um_quotas[i] == NULLVP) - continue; - quotaoff(td, mp, i); - } - /* - * Here we fall through to vflush again to ensure - * that we have gotten rid of all the system vnodes. - */ - } -#endif - ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles"); - if (ump->um_devvp->v_vflag & VV_COPYONWRITE) { - if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0) - return (error); - ffs_snapshot_unmount(mp); - /* - * Here we fall through to vflush again to ensure - * that we have gotten rid of all the system vnodes. - */ - } - /* - * Flush all the files. - */ - if ((error = vflush(mp, 0, flags)) != 0) - return (error); - /* - * Flush filesystem metadata. - */ - vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); - error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td); - VOP_UNLOCK(ump->um_devvp, 0, td); - return (error); -} - -/* - * Get filesystem statistics. - */ -int -ffs_statfs(mp, sbp, td) - struct mount *mp; - struct statfs *sbp; - struct thread *td; -{ - struct ufsmount *ump; - struct fs *fs; - - ump = VFSTOUFS(mp); - fs = ump->um_fs; - if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC) - panic("ffs_statfs"); - sbp->f_version = STATFS_VERSION; - sbp->f_bsize = fs->fs_fsize; - sbp->f_iosize = fs->fs_bsize; - sbp->f_blocks = fs->fs_dsize; - sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + - fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks); - sbp->f_bavail = freespace(fs, fs->fs_minfree) + - dbtofsb(fs, fs->fs_pendingblocks); - sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; - sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; - sbp->f_namemax = NAME_MAX; - if (sbp != &mp->mnt_stat) { - sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - sbp->f_type = mp->mnt_vfc->vfc_typenum; - sbp->f_syncwrites = mp->mnt_stat.f_syncwrites; - sbp->f_asyncwrites = mp->mnt_stat.f_asyncwrites; - sbp->f_syncreads = mp->mnt_stat.f_syncreads; - sbp->f_asyncreads = mp->mnt_stat.f_asyncreads; - sbp->f_owner = mp->mnt_stat.f_owner; - sbp->f_fsid = mp->mnt_stat.f_fsid; - bcopy((caddr_t)mp->mnt_stat.f_fstypename, - (caddr_t)&sbp->f_fstypename[0], MFSNAMELEN); - bcopy((caddr_t)mp->mnt_stat.f_mntonname, - (caddr_t)&sbp->f_mntonname[0], MNAMELEN); - bcopy((caddr_t)mp->mnt_stat.f_mntfromname, - (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); - } - return (0); -} - -/* - * Go through the disk queues to initiate sandbagged IO; - * go through the inodes to write those that have been modified; - * initiate the writing of the super block if it has been modified. - * - * Note: we are always called with the filesystem marked `MPBUSY'. - */ -int -ffs_sync(mp, waitfor, cred, td) - struct mount *mp; - int waitfor; - struct ucred *cred; - struct thread *td; -{ - struct vnode *nvp, *vp, *devvp; - struct inode *ip; - struct ufsmount *ump = VFSTOUFS(mp); - struct fs *fs; - int error, count, wait, lockreq, allerror = 0; - - fs = ump->um_fs; - if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ - printf("fs = %s\n", fs->fs_fsmnt); - panic("ffs_sync: rofs mod"); - } - /* - * Write back each (modified) inode. - */ - wait = 0; - lockreq = LK_EXCLUSIVE | LK_NOWAIT; - if (waitfor == MNT_WAIT) { - wait = 1; - lockreq = LK_EXCLUSIVE; - } - lockreq |= LK_INTERLOCK; - MNT_ILOCK(mp); -loop: - for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { - /* - * If the vnode that we are about to sync is no longer - * associated with this mount point, start over. - */ - if (vp->v_mount != mp) - goto loop; - - /* - * Depend on the mntvnode_slock to keep things stable enough - * for a quick test. Since there might be hundreds of - * thousands of vnodes, we cannot afford even a subroutine - * call unless there's a good chance that we have work to do. - */ - nvp = TAILQ_NEXT(vp, v_nmntvnodes); - VI_LOCK(vp); - if (vp->v_iflag & VI_XLOCK) { - VI_UNLOCK(vp); - continue; - } - ip = VTOI(vp); - if (vp->v_type == VNON || ((ip->i_flag & - (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && - TAILQ_EMPTY(&vp->v_dirtyblkhd))) { - VI_UNLOCK(vp); - continue; - } - MNT_IUNLOCK(mp); - if ((error = vget(vp, lockreq, td)) != 0) { - MNT_ILOCK(mp); - if (error == ENOENT) - goto loop; - continue; - } - if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0) - allerror = error; - VOP_UNLOCK(vp, 0, td); - vrele(vp); - MNT_ILOCK(mp); - if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp) - goto loop; - } - MNT_IUNLOCK(mp); - /* - * Force stale filesystem control information to be flushed. - */ - if (waitfor == MNT_WAIT) { - if ((error = softdep_flushworklist(ump->um_mountp, &count, td))) - allerror = error; - /* Flushed work items may create new vnodes to clean */ - if (allerror == 0 && count) { - MNT_ILOCK(mp); - goto loop; - } - } -#ifdef QUOTA - qsync(mp); -#endif - devvp = ump->um_devvp; - VI_LOCK(devvp); - if (waitfor != MNT_LAZY && - (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) { - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td); - if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0) - allerror = error; - VOP_UNLOCK(devvp, 0, td); - if (allerror == 0 && waitfor == MNT_WAIT) { - MNT_ILOCK(mp); - goto loop; - } - } else - VI_UNLOCK(devvp); - /* - * Write back modified superblock. - */ - if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) - allerror = error; - return (allerror); -} - -int -ffs_vget(mp, ino, flags, vpp) - struct mount *mp; - ino_t ino; - int flags; - struct vnode **vpp; -{ - struct thread *td = curthread; /* XXX */ - struct fs *fs; - struct inode *ip; - struct ufsmount *ump; - struct buf *bp; - struct vnode *vp; - dev_t dev; - int error; - - ump = VFSTOUFS(mp); - dev = ump->um_dev; - - /* - * We do not lock vnode creation as it is believed to be too - * expensive for such rare case as simultaneous creation of vnode - * for same ino by different processes. We just allow them to race - * and check later to decide who wins. Let the race begin! - */ - if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0) - return (error); - if (*vpp != NULL) - return (0); - - /* - * If this MALLOC() is performed after the getnewvnode() - * it might block, leaving a vnode with a NULL v_data to be - * found by ffs_sync() if a sync happens to fire right then, - * which will cause a panic because ffs_sync() blindly - * dereferences vp->v_data (as well it should). - */ - ip = uma_zalloc(uma_inode, M_WAITOK); - - /* Allocate a new vnode/inode. */ - error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp); - if (error) { - *vpp = NULL; - uma_zfree(uma_inode, ip); - return (error); - } - bzero((caddr_t)ip, sizeof(struct inode)); - /* - * FFS supports recursive locking. - */ - vp->v_vnlock->lk_flags |= LK_CANRECURSE; - vp->v_data = ip; - ip->i_vnode = vp; - ip->i_ump = ump; - ip->i_fs = fs = ump->um_fs; - ip->i_dev = dev; - ip->i_number = ino; -#ifdef QUOTA - { - int i; - for (i = 0; i < MAXQUOTAS; i++) - ip->i_dquot[i] = NODQUOT; - } -#endif - /* - * Exclusively lock the vnode before adding to hash. Note, that we - * must not release nor downgrade the lock (despite flags argument - * says) till it is fully initialized. - */ - lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td); - - /* - * Atomicaly (in terms of ufs_hash operations) check the hash for - * duplicate of vnode being created and add it to the hash. If a - * duplicate vnode was found, it will be vget()ed from hash for us. - */ - if ((error = ufs_ihashins(ip, flags, vpp)) != 0) { - vput(vp); - *vpp = NULL; - return (error); - } - - /* We lost the race, then throw away our vnode and return existing */ - if (*vpp != NULL) { - vput(vp); - return (0); - } - - /* Read in the disk contents for the inode, copy into the inode. */ - error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), - (int)fs->fs_bsize, NOCRED, &bp); - if (error) { - /* - * The inode does not contain anything useful, so it would - * be misleading to leave it on its hash chain. With mode - * still zero, it will be unlinked and returned to the free - * list by vput(). - */ - brelse(bp); - vput(vp); - *vpp = NULL; - return (error); - } - if (ip->i_ump->um_fstype == UFS1) - ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK); - else - ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK); - ffs_load_inode(bp, ip, fs, ino); - if (DOINGSOFTDEP(vp)) - softdep_load_inodeblock(ip); - else - ip->i_effnlink = ip->i_nlink; - bqrelse(bp); - - /* - * Initialize the vnode from the inode, check for aliases. - * Note that the underlying vnode may have changed. - */ - error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); - if (error) { - vput(vp); - *vpp = NULL; - return (error); - } - /* - * Finish inode initialization. - */ - VREF(ip->i_devvp); - /* - * Set up a generation number for this inode if it does not - * already have one. This should only happen on old filesystems. - */ - if (ip->i_gen == 0) { - ip->i_gen = arc4random() / 2 + 1; - if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { - ip->i_flag |= IN_MODIFIED; - DIP(ip, i_gen) = ip->i_gen; - } - } - /* - * Ensure that uid and gid are correct. This is a temporary - * fix until fsck has been changed to do the update. - */ - if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */ - fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ - ip->i_uid = ip->i_din1->di_ouid; /* XXX */ - ip->i_gid = ip->i_din1->di_ogid; /* XXX */ - } /* XXX */ - -#ifdef MAC - if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) { - /* - * If this vnode is already allocated, and we're running - * multi-label, attempt to perform a label association - * from the extended attributes on the inode. - */ - error = mac_associate_vnode_extattr(mp, vp); - if (error) { - /* ufs_inactive will release ip->i_devvp ref. */ - vput(vp); - *vpp = NULL; - return (error); - } - } -#endif - - *vpp = vp; - return (0); -} - -/* - * File handle to vnode - * - * Have to be really careful about stale file handles: - * - check that the inode number is valid - * - call ffs_vget() to get the locked inode - * - check for an unallocated inode (i_mode == 0) - * - check that the given client host has export rights and return - * those rights via. exflagsp and credanonp - */ -int -ffs_fhtovp(mp, fhp, vpp) - struct mount *mp; - struct fid *fhp; - struct vnode **vpp; -{ - struct ufid *ufhp; - struct fs *fs; - - ufhp = (struct ufid *)fhp; - fs = VFSTOUFS(mp)->um_fs; - if (ufhp->ufid_ino < ROOTINO || - ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) - return (ESTALE); - return (ufs_fhtovp(mp, ufhp, vpp)); -} - -/* - * Vnode pointer to File handle - */ -/* ARGSUSED */ -int -ffs_vptofh(vp, fhp) - struct vnode *vp; - struct fid *fhp; -{ - struct inode *ip; - struct ufid *ufhp; - - ip = VTOI(vp); - ufhp = (struct ufid *)fhp; - ufhp->ufid_len = sizeof(struct ufid); - ufhp->ufid_ino = ip->i_number; - ufhp->ufid_gen = ip->i_gen; - return (0); -} - -/* - * Initialize the filesystem. - */ -static int -ffs_init(vfsp) - struct vfsconf *vfsp; -{ - - softdep_initialize(); - return (ufs_init(vfsp)); -} - -/* - * Undo the work of ffs_init(). - */ -static int -ffs_uninit(vfsp) - struct vfsconf *vfsp; -{ - int ret; - - ret = ufs_uninit(vfsp); - softdep_uninitialize(); - return (ret); -} - -/* - * Write a superblock and associated information back to disk. - */ -static int -ffs_sbupdate(mp, waitfor) - struct ufsmount *mp; - int waitfor; -{ - struct fs *fs = mp->um_fs; - struct buf *bp; - int blks; - void *space; - int i, size, error, allerror = 0; - - if (fs->fs_ronly == 1 && - (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != - (MNT_RDONLY | MNT_UPDATE)) - panic("ffs_sbupdate: write read-only filesystem"); - /* - * First write back the summary information. - */ - blks = howmany(fs->fs_cssize, fs->fs_fsize); - space = fs->fs_csp; - for (i = 0; i < blks; i += fs->fs_frag) { - size = fs->fs_bsize; - if (i + fs->fs_frag > blks) - size = (blks - i) * fs->fs_fsize; - bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), - size, 0, 0, 0); - bcopy(space, bp->b_data, (u_int)size); - space = (char *)space + size; - if (waitfor != MNT_WAIT) - bawrite(bp); - else if ((error = bwrite(bp)) != 0) - allerror = error; - } - /* - * Now write back the superblock itself. If any errors occurred - * up to this point, then fail so that the superblock avoids - * being written out as clean. - */ - if (allerror) - return (allerror); - if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 && - (fs->fs_flags & FS_FLAGS_UPDATED) == 0) { - printf("%s: correcting fs_sblockloc from %jd to %d\n", - fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1); - fs->fs_sblockloc = SBLOCK_UFS1; - } - if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 && - (fs->fs_flags & FS_FLAGS_UPDATED) == 0) { - printf("%s: correcting fs_sblockloc from %jd to %d\n", - fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2); - fs->fs_sblockloc = SBLOCK_UFS2; - } - bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize, - 0, 0, 0); - fs->fs_fmod = 0; - fs->fs_time = time_second; - bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); - ffs_oldfscompat_write((struct fs *)bp->b_data, mp); - if (waitfor != MNT_WAIT) - bawrite(bp); - else if ((error = bwrite(bp)) != 0) - allerror = error; - return (allerror); -} - -static int -ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, - int attrnamespace, const char *attrname, struct thread *td) -{ - -#ifdef UFS_EXTATTR - return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace, - attrname, td)); -#else - return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, - attrname, td)); -#endif -} - -static void -ffs_ifree(struct ufsmount *ump, struct inode *ip) -{ - - if (ump->um_fstype == UFS1 && ip->i_din1 != NULL) - uma_zfree(uma_ufs1, ip->i_din1); - else if (ip->i_din2 != NULL) - uma_zfree(uma_ufs2, ip->i_din2); - uma_zfree(uma_inode, ip); -} diff --git a/src/sys/ufs/ffs/ffs_vnops.c b/src/sys/ufs/ffs/ffs_vnops.c deleted file mode 100644 index 401cacd..0000000 --- a/src/sys/ufs/ffs/ffs_vnops.c +++ /dev/null @@ -1,1817 +0,0 @@ -#if 0 -/* - * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. - * All rights reserved. - * - * This software was developed for the FreeBSD Project by Marshall - * Kirk McKusick and Network Associates Laboratories, the Security - * Research Division of Network Associates, Inc. under DARPA/SPAWAR - * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS - * research program - * - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_vnops.c,v 1.119 2003/10/04 20:38:32 alc Exp $"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include "opt_directio.h" - -#ifdef DIRECTIO -extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); -#endif -static int ffs_fsync(struct vop_fsync_args *); -static int ffs_getpages(struct vop_getpages_args *); -static int ffs_read(struct vop_read_args *); -static int ffs_write(struct vop_write_args *); -static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); -static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, - struct ucred *cred); -static int ffsext_strategy(struct vop_strategy_args *); -static int ffs_closeextattr(struct vop_closeextattr_args *); -static int ffs_deleteextattr(struct vop_deleteextattr_args *); -static int ffs_getextattr(struct vop_getextattr_args *); -static int ffs_listextattr(struct vop_listextattr_args *); -static int ffs_openextattr(struct vop_openextattr_args *); -static int ffs_setextattr(struct vop_setextattr_args *); - - -/* Global vfs data structures for ufs. */ -vop_t **ffs_vnodeop_p; -static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { - { &vop_default_desc, (vop_t *) ufs_vnoperate }, - { &vop_fsync_desc, (vop_t *) ffs_fsync }, - { &vop_getpages_desc, (vop_t *) ffs_getpages }, - { &vop_read_desc, (vop_t *) ffs_read }, - { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, - { &vop_write_desc, (vop_t *) ffs_write }, - { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr }, - { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr }, - { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, - { &vop_listextattr_desc, (vop_t *) ffs_listextattr }, - { &vop_openextattr_desc, (vop_t *) ffs_openextattr }, - { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, - { NULL, NULL } -}; -static struct vnodeopv_desc ffs_vnodeop_opv_desc = - { &ffs_vnodeop_p, ffs_vnodeop_entries }; - -vop_t **ffs_specop_p; -static struct vnodeopv_entry_desc ffs_specop_entries[] = { - { &vop_default_desc, (vop_t *) ufs_vnoperatespec }, - { &vop_fsync_desc, (vop_t *) ffs_fsync }, - { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, - { &vop_strategy_desc, (vop_t *) ffsext_strategy }, - { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr }, - { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr }, - { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, - { &vop_listextattr_desc, (vop_t *) ffs_listextattr }, - { &vop_openextattr_desc, (vop_t *) ffs_openextattr }, - { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, - { NULL, NULL } -}; -static struct vnodeopv_desc ffs_specop_opv_desc = - { &ffs_specop_p, ffs_specop_entries }; - -vop_t **ffs_fifoop_p; -static struct vnodeopv_entry_desc ffs_fifoop_entries[] = { - { &vop_default_desc, (vop_t *) ufs_vnoperatefifo }, - { &vop_fsync_desc, (vop_t *) ffs_fsync }, - { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, - { &vop_strategy_desc, (vop_t *) ffsext_strategy }, - { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr }, - { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr }, - { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, - { &vop_listextattr_desc, (vop_t *) ffs_listextattr }, - { &vop_openextattr_desc, (vop_t *) ffs_openextattr }, - { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, - { NULL, NULL } -}; -static struct vnodeopv_desc ffs_fifoop_opv_desc = - { &ffs_fifoop_p, ffs_fifoop_entries }; - -VNODEOP_SET(ffs_vnodeop_opv_desc); -VNODEOP_SET(ffs_specop_opv_desc); -VNODEOP_SET(ffs_fifoop_opv_desc); - -/* - * Synch an open file. - */ -/* ARGSUSED */ -static int -ffs_fsync(ap) - struct vop_fsync_args /* { - struct vnode *a_vp; - struct ucred *a_cred; - int a_waitfor; - struct thread *a_td; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - struct buf *bp; - struct buf *nbp; - int s, error, wait, passes, skipmeta; - ufs_lbn_t lbn; - - wait = (ap->a_waitfor == MNT_WAIT); - if (vn_isdisk(vp, NULL)) { - lbn = INT_MAX; - if (vp->v_rdev->si_mountpoint != NULL && - (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP)) - softdep_fsync_mountdev(vp); - } else { - lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); - } - - /* - * Flush all dirty buffers associated with a vnode. - */ - passes = NIADDR + 1; - skipmeta = 0; - if (wait) - skipmeta = 1; - s = splbio(); - VI_LOCK(vp); -loop: - TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) - bp->b_vflags &= ~BV_SCANNED; - for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { - nbp = TAILQ_NEXT(bp, b_vnbufs); - /* - * Reasons to skip this buffer: it has already been considered - * on this pass, this pass is the first time through on a - * synchronous flush request and the buffer being considered - * is metadata, the buffer has dependencies that will cause - * it to be redirtied and it has not already been deferred, - * or it is already being written. - */ - if ((bp->b_vflags & BV_SCANNED) != 0) - continue; - bp->b_vflags |= BV_SCANNED; - if ((skipmeta == 1 && bp->b_lblkno < 0)) - continue; - if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) - continue; - if (!wait && LIST_FIRST(&bp->b_dep) != NULL && - (bp->b_flags & B_DEFERRED) == 0 && - buf_countdeps(bp, 0)) { - bp->b_flags |= B_DEFERRED; - BUF_UNLOCK(bp); - continue; - } - VI_UNLOCK(vp); - if ((bp->b_flags & B_DELWRI) == 0) - panic("ffs_fsync: not dirty"); - if (vp != bp->b_vp) - panic("ffs_fsync: vp != vp->b_vp"); - /* - * If this is a synchronous flush request, or it is not a - * file or device, start the write on this buffer immediatly. - */ - if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { - - /* - * On our final pass through, do all I/O synchronously - * so that we can find out if our flush is failing - * because of write errors. - */ - if (passes > 0 || !wait) { - if ((bp->b_flags & B_CLUSTEROK) && !wait) { - (void) vfs_bio_awrite(bp); - } else { - bremfree(bp); - splx(s); - (void) bawrite(bp); - s = splbio(); - } - } else { - bremfree(bp); - splx(s); - if ((error = bwrite(bp)) != 0) - return (error); - s = splbio(); - } - } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { - /* - * If the buffer is for data that has been truncated - * off the file, then throw it away. - */ - bremfree(bp); - bp->b_flags |= B_INVAL | B_NOCACHE; - splx(s); - brelse(bp); - s = splbio(); - } else - vfs_bio_awrite(bp); - - /* - * Since we may have slept during the I/O, we need - * to start from a known point. - */ - VI_LOCK(vp); - nbp = TAILQ_FIRST(&vp->v_dirtyblkhd); - } - /* - * If we were asked to do this synchronously, then go back for - * another pass, this time doing the metadata. - */ - if (skipmeta) { - skipmeta = 0; - goto loop; - } - - if (wait) { - while (vp->v_numoutput) { - vp->v_iflag |= VI_BWAIT; - msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), - PRIBIO + 4, "ffsfsn", 0); - } - VI_UNLOCK(vp); - - /* - * Ensure that any filesystem metatdata associated - * with the vnode has been written. - */ - splx(s); - if ((error = softdep_sync_metadata(ap)) != 0) - return (error); - s = splbio(); - - VI_LOCK(vp); - if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { - /* - * Block devices associated with filesystems may - * have new I/O requests posted for them even if - * the vnode is locked, so no amount of trying will - * get them clean. Thus we give block devices a - * good effort, then just give up. For all other file - * types, go around and try again until it is clean. - */ - if (passes > 0) { - passes -= 1; - goto loop; - } -#ifdef DIAGNOSTIC - if (!vn_isdisk(vp, NULL)) - vprint("ffs_fsync: dirty", vp); -#endif - } - } - VI_UNLOCK(vp); - splx(s); - return (UFS_UPDATE(vp, wait)); -} - - -/* - * Vnode op for reading. - */ -/* ARGSUSED */ -static int -ffs_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; -{ - struct vnode *vp; - struct inode *ip; - struct uio *uio; - struct fs *fs; - struct buf *bp; - ufs_lbn_t lbn, nextlbn; - off_t bytesinfile; - long size, xfersize, blkoffset; - int error, orig_resid; - int seqcount; - int ioflag; - vm_object_t object; - - vp = ap->a_vp; - uio = ap->a_uio; - ioflag = ap->a_ioflag; - if (ap->a_ioflag & IO_EXT) -#ifdef notyet - return (ffs_extread(vp, uio, ioflag)); -#else - panic("ffs_read+IO_EXT"); -#endif -#ifdef DIRECTIO - if ((ioflag & IO_DIRECT) != 0) { - int workdone; - - error = ffs_rawread(vp, uio, &workdone); - if (error != 0 || workdone != 0) - return error; - } -#endif - - GIANT_REQUIRED; - - seqcount = ap->a_ioflag >> 16; - ip = VTOI(vp); - -#ifdef DIAGNOSTIC - if (uio->uio_rw != UIO_READ) - panic("ffs_read: mode"); - - if (vp->v_type == VLNK) { - if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) - panic("ffs_read: short symlink"); - } else if (vp->v_type != VREG && vp->v_type != VDIR) - panic("ffs_read: type %d", vp->v_type); -#endif - fs = ip->i_fs; - if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize) - return (EFBIG); - - orig_resid = uio->uio_resid; - if (orig_resid <= 0) - return (0); - - object = vp->v_object; - - bytesinfile = ip->i_size - uio->uio_offset; - if (bytesinfile <= 0) { - if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) - ip->i_flag |= IN_ACCESS; - return 0; - } - - if (object) { - vm_object_reference(object); - } - - /* - * Ok so we couldn't do it all in one vm trick... - * so cycle around trying smaller bites.. - */ - for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { - if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) - break; - - lbn = lblkno(fs, uio->uio_offset); - nextlbn = lbn + 1; - - /* - * size of buffer. The buffer representing the - * end of the file is rounded up to the size of - * the block type ( fragment or full block, - * depending ). - */ - size = blksize(fs, ip, lbn); - blkoffset = blkoff(fs, uio->uio_offset); - - /* - * The amount we want to transfer in this iteration is - * one FS block less the amount of the data before - * our startpoint (duh!) - */ - xfersize = fs->fs_bsize - blkoffset; - - /* - * But if we actually want less than the block, - * or the file doesn't have a whole block more of data, - * then use the lesser number. - */ - if (uio->uio_resid < xfersize) - xfersize = uio->uio_resid; - if (bytesinfile < xfersize) - xfersize = bytesinfile; - - if (lblktosize(fs, nextlbn) >= ip->i_size) { - /* - * Don't do readahead if this is the end of the file. - */ - error = bread(vp, lbn, size, NOCRED, &bp); - } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { - /* - * Otherwise if we are allowed to cluster, - * grab as much as we can. - * - * XXX This may not be a win if we are not - * doing sequential access. - */ - error = cluster_read(vp, ip->i_size, lbn, - size, NOCRED, uio->uio_resid, seqcount, &bp); - } else if (seqcount > 1) { - /* - * If we are NOT allowed to cluster, then - * if we appear to be acting sequentially, - * fire off a request for a readahead - * as well as a read. Note that the 4th and 5th - * arguments point to arrays of the size specified in - * the 6th argument. - */ - int nextsize = blksize(fs, ip, nextlbn); - error = breadn(vp, lbn, - size, &nextlbn, &nextsize, 1, NOCRED, &bp); - } else { - /* - * Failing all of the above, just read what the - * user asked for. Interestingly, the same as - * the first option above. - */ - error = bread(vp, lbn, size, NOCRED, &bp); - } - if (error) { - brelse(bp); - bp = NULL; - break; - } - - /* - * If IO_DIRECT then set B_DIRECT for the buffer. This - * will cause us to attempt to release the buffer later on - * and will cause the buffer cache to attempt to free the - * underlying pages. - */ - if (ioflag & IO_DIRECT) - bp->b_flags |= B_DIRECT; - - /* - * We should only get non-zero b_resid when an I/O error - * has occurred, which should cause us to break above. - * However, if the short read did not cause an error, - * then we want to ensure that we do not uiomove bad - * or uninitialized data. - */ - size -= bp->b_resid; - if (size < xfersize) { - if (size == 0) - break; - xfersize = size; - } - - { - /* - * otherwise use the general form - */ - error = - uiomove((char *)bp->b_data + blkoffset, - (int)xfersize, uio); - } - - if (error) - break; - - if ((ioflag & (IO_VMIO|IO_DIRECT)) && - (LIST_FIRST(&bp->b_dep) == NULL)) { - /* - * If there are no dependencies, and it's VMIO, - * then we don't need the buf, mark it available - * for freeing. The VM has the data. - */ - bp->b_flags |= B_RELBUF; - brelse(bp); - } else { - /* - * Otherwise let whoever - * made the request take care of - * freeing it. We just queue - * it onto another list. - */ - bqrelse(bp); - } - } - - /* - * This can only happen in the case of an error - * because the loop above resets bp to NULL on each iteration - * and on normal completion has not set a new value into it. - * so it must have come from a 'break' statement - */ - if (bp != NULL) { - if ((ioflag & (IO_VMIO|IO_DIRECT)) && - (LIST_FIRST(&bp->b_dep) == NULL)) { - bp->b_flags |= B_RELBUF; - brelse(bp); - } else { - bqrelse(bp); - } - } - - if (object) { - VM_OBJECT_LOCK(object); - vm_object_vndeallocate(object); - } - if ((error == 0 || uio->uio_resid != orig_resid) && - (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) - ip->i_flag |= IN_ACCESS; - return (error); -} - -/* - * Vnode op for writing. - */ -static int -ffs_write(ap) - struct vop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; -{ - struct vnode *vp; - struct uio *uio; - struct inode *ip; - struct fs *fs; - struct buf *bp; - struct thread *td; - ufs_lbn_t lbn; - off_t osize; - int seqcount; - int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; - vm_object_t object; - - vp = ap->a_vp; - uio = ap->a_uio; - ioflag = ap->a_ioflag; - if (ap->a_ioflag & IO_EXT) -#ifdef notyet - return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); -#else - panic("ffs_read+IO_EXT"); -#endif - - GIANT_REQUIRED; - - extended = 0; - seqcount = ap->a_ioflag >> 16; - ip = VTOI(vp); - - object = vp->v_object; - if (object) { - vm_object_reference(object); - } - -#ifdef DIAGNOSTIC - if (uio->uio_rw != UIO_WRITE) - panic("ffswrite: mode"); -#endif - - switch (vp->v_type) { - case VREG: - if (ioflag & IO_APPEND) - uio->uio_offset = ip->i_size; - if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) { - if (object) { - VM_OBJECT_LOCK(object); - vm_object_vndeallocate(object); - } - return (EPERM); - } - /* FALLTHROUGH */ - case VLNK: - break; - case VDIR: - panic("ffswrite: dir write"); - break; - default: - panic("ffswrite: type %p %d (%d,%d)", vp, (int)vp->v_type, - (int)uio->uio_offset, - (int)uio->uio_resid - ); - } - - fs = ip->i_fs; - if (uio->uio_offset < 0 || - (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) { - if (object) { - VM_OBJECT_LOCK(object); - vm_object_vndeallocate(object); - } - return (EFBIG); - } - /* - * Maybe this should be above the vnode op call, but so long as - * file servers have no limits, I don't think it matters. - */ - td = uio->uio_td; - if (vp->v_type == VREG && td && - uio->uio_offset + uio->uio_resid > - td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { - PROC_LOCK(td->td_proc); - psignal(td->td_proc, SIGXFSZ); - PROC_UNLOCK(td->td_proc); - if (object) { - VM_OBJECT_LOCK(object); - vm_object_vndeallocate(object); - } - return (EFBIG); - } - - resid = uio->uio_resid; - osize = ip->i_size; - if (seqcount > BA_SEQMAX) - flags = BA_SEQMAX << BA_SEQSHIFT; - else - flags = seqcount << BA_SEQSHIFT; - if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) - flags |= IO_SYNC; - - for (error = 0; uio->uio_resid > 0;) { - lbn = lblkno(fs, uio->uio_offset); - blkoffset = blkoff(fs, uio->uio_offset); - xfersize = fs->fs_bsize - blkoffset; - if (uio->uio_resid < xfersize) - xfersize = uio->uio_resid; - - if (uio->uio_offset + xfersize > ip->i_size) - vnode_pager_setsize(vp, uio->uio_offset + xfersize); - - /* - * We must perform a read-before-write if the transfer size - * does not cover the entire buffer. - */ - if (fs->fs_bsize > xfersize) - flags |= BA_CLRBUF; - else - flags &= ~BA_CLRBUF; -/* XXX is uio->uio_offset the right thing here? */ - error = UFS_BALLOC(vp, uio->uio_offset, xfersize, - ap->a_cred, flags, &bp); - if (error != 0) - break; - /* - * If the buffer is not valid we have to clear out any - * garbage data from the pages instantiated for the buffer. - * If we do not, a failed uiomove() during a write can leave - * the prior contents of the pages exposed to a userland - * mmap(). XXX deal with uiomove() errors a better way. - */ - if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) - vfs_bio_clrbuf(bp); - if (ioflag & IO_DIRECT) - bp->b_flags |= B_DIRECT; - - if (uio->uio_offset + xfersize > ip->i_size) { - ip->i_size = uio->uio_offset + xfersize; - DIP(ip, i_size) = ip->i_size; - extended = 1; - } - - size = blksize(fs, ip, lbn) - bp->b_resid; - if (size < xfersize) - xfersize = size; - - error = - uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); - if ((ioflag & (IO_VMIO|IO_DIRECT)) && - (LIST_FIRST(&bp->b_dep) == NULL)) { - bp->b_flags |= B_RELBUF; - } - - /* - * If IO_SYNC each buffer is written synchronously. Otherwise - * if we have a severe page deficiency write the buffer - * asynchronously. Otherwise try to cluster, and if that - * doesn't do it then either do an async write (if O_DIRECT), - * or a delayed write (if not). - */ - if (ioflag & IO_SYNC) { - (void)bwrite(bp); - } else if (vm_page_count_severe() || - buf_dirty_count_severe() || - (ioflag & IO_ASYNC)) { - bp->b_flags |= B_CLUSTEROK; - bawrite(bp); - } else if (xfersize + blkoffset == fs->fs_bsize) { - if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { - bp->b_flags |= B_CLUSTEROK; - cluster_write(bp, ip->i_size, seqcount); - } else { - bawrite(bp); - } - } else if (ioflag & IO_DIRECT) { - bp->b_flags |= B_CLUSTEROK; - bawrite(bp); - } else { - bp->b_flags |= B_CLUSTEROK; - bdwrite(bp); - } - if (error || xfersize == 0) - break; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - /* - * If we successfully wrote any data, and we are not the superuser - * we clear the setuid and setgid bits as a precaution against - * tampering. - */ - if (resid > uio->uio_resid && ap->a_cred && - suser_cred(ap->a_cred, PRISON_ROOT)) { - ip->i_mode &= ~(ISUID | ISGID); - DIP(ip, i_mode) = ip->i_mode; - } - if (resid > uio->uio_resid) - VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); - if (error) { - if (ioflag & IO_UNIT) { - (void)UFS_TRUNCATE(vp, osize, - IO_NORMAL | (ioflag & IO_SYNC), - ap->a_cred, uio->uio_td); - uio->uio_offset -= resid - uio->uio_resid; - uio->uio_resid = resid; - } - } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) - error = UFS_UPDATE(vp, 1); - - if (object) { - VM_OBJECT_LOCK(object); - vm_object_vndeallocate(object); - } - - return (error); -} - -/* - * get page routine - */ -static int -ffs_getpages(ap) - struct vop_getpages_args *ap; -{ - off_t foff, physoffset; - int i, size, bsize; - struct vnode *dp, *vp; - vm_object_t obj; - vm_pindex_t pindex; - vm_page_t mreq; - int bbackwards, bforwards; - int pbackwards, pforwards; - int firstpage; - ufs2_daddr_t reqblkno, reqlblkno; - int poff; - int pcount; - int rtval; - int pagesperblock; - - GIANT_REQUIRED; - - pcount = round_page(ap->a_count) / PAGE_SIZE; - mreq = ap->a_m[ap->a_reqpage]; - - /* - * if ANY DEV_BSIZE blocks are valid on a large filesystem block, - * then the entire page is valid. Since the page may be mapped, - * user programs might reference data beyond the actual end of file - * occuring within the page. We have to zero that data. - */ - VM_OBJECT_LOCK(mreq->object); - if (mreq->valid) { - if (mreq->valid != VM_PAGE_BITS_ALL) - vm_page_zero_invalid(mreq, TRUE); - vm_page_lock_queues(); - for (i = 0; i < pcount; i++) { - if (i != ap->a_reqpage) { - vm_page_free(ap->a_m[i]); - } - } - vm_page_unlock_queues(); - VM_OBJECT_UNLOCK(mreq->object); - return VM_PAGER_OK; - } - VM_OBJECT_UNLOCK(mreq->object); - vp = ap->a_vp; - obj = vp->v_object; - bsize = vp->v_mount->mnt_stat.f_iosize; - pindex = mreq->pindex; - foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */; - - if (bsize < PAGE_SIZE) - return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, - ap->a_count, - ap->a_reqpage); - - /* - * foff is the file offset of the required page - * reqlblkno is the logical block that contains the page - * poff is the index of the page into the logical block - */ - reqlblkno = foff / bsize; - poff = (foff % bsize) / PAGE_SIZE; - - dp = VTOI(vp)->i_devvp; - if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards) - || (reqblkno == -1)) { - VM_OBJECT_LOCK(obj); - vm_page_lock_queues(); - for(i = 0; i < pcount; i++) { - if (i != ap->a_reqpage) - vm_page_free(ap->a_m[i]); - } - vm_page_unlock_queues(); - if (reqblkno == -1) { - if ((mreq->flags & PG_ZERO) == 0) - pmap_zero_page(mreq); - vm_page_undirty(mreq); - mreq->valid = VM_PAGE_BITS_ALL; - VM_OBJECT_UNLOCK(obj); - return VM_PAGER_OK; - } else { - VM_OBJECT_UNLOCK(obj); - return VM_PAGER_ERROR; - } - } - - physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE; - pagesperblock = bsize / PAGE_SIZE; - /* - * find the first page that is contiguous... - * note that pbackwards is the number of pages that are contiguous - * backwards. - */ - firstpage = 0; - if (ap->a_count) { - pbackwards = poff + bbackwards * pagesperblock; - if (ap->a_reqpage > pbackwards) { - firstpage = ap->a_reqpage - pbackwards; - VM_OBJECT_LOCK(obj); - vm_page_lock_queues(); - for(i=0;ia_m[i]); - vm_page_unlock_queues(); - VM_OBJECT_UNLOCK(obj); - } - - /* - * pforwards is the number of pages that are contiguous - * after the current page. - */ - pforwards = (pagesperblock - (poff + 1)) + - bforwards * pagesperblock; - if (pforwards < (pcount - (ap->a_reqpage + 1))) { - VM_OBJECT_LOCK(obj); - vm_page_lock_queues(); - for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++) - vm_page_free(ap->a_m[i]); - vm_page_unlock_queues(); - VM_OBJECT_UNLOCK(obj); - pcount = ap->a_reqpage + pforwards + 1; - } - - /* - * number of pages for I/O corrected for the non-contig pages at - * the beginning of the array. - */ - pcount -= firstpage; - } - - /* - * calculate the size of the transfer - */ - - size = pcount * PAGE_SIZE; - - if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) > - obj->un_pager.vnp.vnp_size) - size = obj->un_pager.vnp.vnp_size - - IDX_TO_OFF(ap->a_m[firstpage]->pindex); - - physoffset -= foff; - rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size, - (ap->a_reqpage - firstpage), physoffset); - - return (rtval); -} - -/* - * Extended attribute area reading. - */ -static int -ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) -{ - struct inode *ip; - struct ufs2_dinode *dp; - struct fs *fs; - struct buf *bp; - ufs_lbn_t lbn, nextlbn; - off_t bytesinfile; - long size, xfersize, blkoffset; - int error, orig_resid; - - GIANT_REQUIRED; - - ip = VTOI(vp); - fs = ip->i_fs; - dp = ip->i_din2; - -#ifdef DIAGNOSTIC - if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) - panic("ffs_extread: mode"); - -#endif - orig_resid = uio->uio_resid; - if (orig_resid <= 0) - return (0); - - bytesinfile = dp->di_extsize - uio->uio_offset; - if (bytesinfile <= 0) { - if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) - ip->i_flag |= IN_ACCESS; - return 0; - } - - for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { - if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) - break; - - lbn = lblkno(fs, uio->uio_offset); - nextlbn = lbn + 1; - - /* - * size of buffer. The buffer representing the - * end of the file is rounded up to the size of - * the block type ( fragment or full block, - * depending ). - */ - size = sblksize(fs, dp->di_extsize, lbn); - blkoffset = blkoff(fs, uio->uio_offset); - - /* - * The amount we want to transfer in this iteration is - * one FS block less the amount of the data before - * our startpoint (duh!) - */ - xfersize = fs->fs_bsize - blkoffset; - - /* - * But if we actually want less than the block, - * or the file doesn't have a whole block more of data, - * then use the lesser number. - */ - if (uio->uio_resid < xfersize) - xfersize = uio->uio_resid; - if (bytesinfile < xfersize) - xfersize = bytesinfile; - - if (lblktosize(fs, nextlbn) >= dp->di_extsize) { - /* - * Don't do readahead if this is the end of the info. - */ - error = bread(vp, -1 - lbn, size, NOCRED, &bp); - } else { - /* - * If we have a second block, then - * fire off a request for a readahead - * as well as a read. Note that the 4th and 5th - * arguments point to arrays of the size specified in - * the 6th argument. - */ - int nextsize = sblksize(fs, dp->di_extsize, nextlbn); - - nextlbn = -1 - nextlbn; - error = breadn(vp, -1 - lbn, - size, &nextlbn, &nextsize, 1, NOCRED, &bp); - } - if (error) { - brelse(bp); - bp = NULL; - break; - } - - /* - * If IO_DIRECT then set B_DIRECT for the buffer. This - * will cause us to attempt to release the buffer later on - * and will cause the buffer cache to attempt to free the - * underlying pages. - */ - if (ioflag & IO_DIRECT) - bp->b_flags |= B_DIRECT; - - /* - * We should only get non-zero b_resid when an I/O error - * has occurred, which should cause us to break above. - * However, if the short read did not cause an error, - * then we want to ensure that we do not uiomove bad - * or uninitialized data. - */ - size -= bp->b_resid; - if (size < xfersize) { - if (size == 0) - break; - xfersize = size; - } - - error = uiomove((char *)bp->b_data + blkoffset, - (int)xfersize, uio); - if (error) - break; - - if ((ioflag & (IO_VMIO|IO_DIRECT)) && - (LIST_FIRST(&bp->b_dep) == NULL)) { - /* - * If there are no dependencies, and it's VMIO, - * then we don't need the buf, mark it available - * for freeing. The VM has the data. - */ - bp->b_flags |= B_RELBUF; - brelse(bp); - } else { - /* - * Otherwise let whoever - * made the request take care of - * freeing it. We just queue - * it onto another list. - */ - bqrelse(bp); - } - } - - /* - * This can only happen in the case of an error - * because the loop above resets bp to NULL on each iteration - * and on normal completion has not set a new value into it. - * so it must have come from a 'break' statement - */ - if (bp != NULL) { - if ((ioflag & (IO_VMIO|IO_DIRECT)) && - (LIST_FIRST(&bp->b_dep) == NULL)) { - bp->b_flags |= B_RELBUF; - brelse(bp); - } else { - bqrelse(bp); - } - } - - if ((error == 0 || uio->uio_resid != orig_resid) && - (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) - ip->i_flag |= IN_ACCESS; - return (error); -} - -/* - * Extended attribute area writing. - */ -static int -ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) -{ - struct inode *ip; - struct ufs2_dinode *dp; - struct fs *fs; - struct buf *bp; - ufs_lbn_t lbn; - off_t osize; - int blkoffset, error, flags, resid, size, xfersize; - - GIANT_REQUIRED; - - ip = VTOI(vp); - fs = ip->i_fs; - dp = ip->i_din2; - -#ifdef DIAGNOSTIC - if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) - panic("ext_write: mode"); -#endif - - if (ioflag & IO_APPEND) - uio->uio_offset = dp->di_extsize; - - if (uio->uio_offset < 0 || - (u_int64_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) - return (EFBIG); - - resid = uio->uio_resid; - osize = dp->di_extsize; - flags = IO_EXT; - if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) - flags |= IO_SYNC; - - for (error = 0; uio->uio_resid > 0;) { - lbn = lblkno(fs, uio->uio_offset); - blkoffset = blkoff(fs, uio->uio_offset); - xfersize = fs->fs_bsize - blkoffset; - if (uio->uio_resid < xfersize) - xfersize = uio->uio_resid; - - /* - * We must perform a read-before-write if the transfer size - * does not cover the entire buffer. - */ - if (fs->fs_bsize > xfersize) - flags |= BA_CLRBUF; - else - flags &= ~BA_CLRBUF; - error = UFS_BALLOC(vp, uio->uio_offset, xfersize, - ucred, flags, &bp); - if (error != 0) - break; - /* - * If the buffer is not valid we have to clear out any - * garbage data from the pages instantiated for the buffer. - * If we do not, a failed uiomove() during a write can leave - * the prior contents of the pages exposed to a userland - * mmap(). XXX deal with uiomove() errors a better way. - */ - if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) - vfs_bio_clrbuf(bp); - if (ioflag & IO_DIRECT) - bp->b_flags |= B_DIRECT; - - if (uio->uio_offset + xfersize > dp->di_extsize) - dp->di_extsize = uio->uio_offset + xfersize; - - size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; - if (size < xfersize) - xfersize = size; - - error = - uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); - if ((ioflag & (IO_VMIO|IO_DIRECT)) && - (LIST_FIRST(&bp->b_dep) == NULL)) { - bp->b_flags |= B_RELBUF; - } - - /* - * If IO_SYNC each buffer is written synchronously. Otherwise - * if we have a severe page deficiency write the buffer - * asynchronously. Otherwise try to cluster, and if that - * doesn't do it then either do an async write (if O_DIRECT), - * or a delayed write (if not). - */ - if (ioflag & IO_SYNC) { - (void)bwrite(bp); - } else if (vm_page_count_severe() || - buf_dirty_count_severe() || - xfersize + blkoffset == fs->fs_bsize || - (ioflag & (IO_ASYNC | IO_DIRECT))) - bawrite(bp); - else - bdwrite(bp); - if (error || xfersize == 0) - break; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - /* - * If we successfully wrote any data, and we are not the superuser - * we clear the setuid and setgid bits as a precaution against - * tampering. - */ - if (resid > uio->uio_resid && ucred && - suser_cred(ucred, PRISON_ROOT)) { - ip->i_mode &= ~(ISUID | ISGID); - dp->di_mode = ip->i_mode; - } - if (error) { - if (ioflag & IO_UNIT) { - (void)UFS_TRUNCATE(vp, osize, - IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); - uio->uio_offset -= resid - uio->uio_resid; - uio->uio_resid = resid; - } - } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) - error = UFS_UPDATE(vp, 1); - return (error); -} - - -/* - * Vnode operating to retrieve a named extended attribute. - * - * Locate a particular EA (nspace:name) in the area (ptr:length), and return - * the length of the EA, and possibly the pointer to the entry and to the data. - */ -static int -ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac) -{ - u_char *p, *pe, *pn, *p0; - int eapad1, eapad2, ealength, ealen, nlen; - uint32_t ul; - - pe = ptr + length; - nlen = strlen(name); - - for (p = ptr; p < pe; p = pn) { - p0 = p; - bcopy(p, &ul, sizeof(ul)); - pn = p + ul; - /* make sure this entry is complete */ - if (pn > pe) - break; - p += sizeof(uint32_t); - if (*p != nspace) - continue; - p++; - eapad2 = *p++; - if (*p != nlen) - continue; - p++; - if (bcmp(p, name, nlen)) - continue; - ealength = sizeof(uint32_t) + 3 + nlen; - eapad1 = 8 - (ealength % 8); - if (eapad1 == 8) - eapad1 = 0; - ealength += eapad1; - ealen = ul - ealength - eapad2; - p += nlen + eapad1; - if (eap != NULL) - *eap = p0; - if (eac != NULL) - *eac = p; - return (ealen); - } - return(-1); -} - -static int -ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) -{ - struct inode *ip; - struct ufs2_dinode *dp; - struct uio luio; - struct iovec liovec; - int easize, error; - u_char *eae; - - ip = VTOI(vp); - dp = ip->i_din2; - easize = dp->di_extsize; - - eae = malloc(easize + extra, M_TEMP, M_WAITOK); - - liovec.iov_base = eae; - liovec.iov_len = easize; - luio.uio_iov = &liovec; - luio.uio_iovcnt = 1; - luio.uio_offset = 0; - luio.uio_resid = easize; - luio.uio_segflg = UIO_SYSSPACE; - luio.uio_rw = UIO_READ; - luio.uio_td = td; - - error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); - if (error) { - free(eae, M_TEMP); - return(error); - } - *p = eae; - return (0); -} - -static int -ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) -{ - struct inode *ip; - struct ufs2_dinode *dp; - int error; - - ip = VTOI(vp); - - if (ip->i_ea_area != NULL) - return (EBUSY); - dp = ip->i_din2; - error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); - if (error) - return (error); - ip->i_ea_len = dp->di_extsize; - ip->i_ea_error = 0; - return (0); -} - -/* - * Vnode extattr transaction commit/abort - */ -static int -ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) -{ - struct inode *ip; - struct uio luio; - struct iovec liovec; - int error; - struct ufs2_dinode *dp; - - ip = VTOI(vp); - if (ip->i_ea_area == NULL) - return (EINVAL); - dp = ip->i_din2; - error = ip->i_ea_error; - if (commit && error == 0) { - if (cred == NOCRED) - cred = vp->v_mount->mnt_cred; - liovec.iov_base = ip->i_ea_area; - liovec.iov_len = ip->i_ea_len; - luio.uio_iov = &liovec; - luio.uio_iovcnt = 1; - luio.uio_offset = 0; - luio.uio_resid = ip->i_ea_len; - luio.uio_segflg = UIO_SYSSPACE; - luio.uio_rw = UIO_WRITE; - luio.uio_td = td; - /* XXX: I'm not happy about truncating to zero size */ - if (ip->i_ea_len < dp->di_extsize) - error = ffs_truncate(vp, 0, IO_EXT, cred, td); - error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); - } - free(ip->i_ea_area, M_TEMP); - ip->i_ea_area = NULL; - ip->i_ea_len = 0; - ip->i_ea_error = 0; - return (error); -} - -/* - * Vnode extattr strategy routine for special devices and fifos. - * - * We need to check for a read or write of the external attributes. - * Otherwise we just fall through and do the usual thing. - */ -static int -ffsext_strategy(struct vop_strategy_args *ap) -/* -struct vop_strategy_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct buf *a_bp; -}; -*/ -{ - struct vnode *vp; - daddr_t lbn; - - KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)", - __func__, ap->a_vp, ap->a_bp->b_vp)); - vp = ap->a_vp; - lbn = ap->a_bp->b_lblkno; - if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && - lbn < 0 && lbn >= -NXADDR) - return (ufs_vnoperate((struct vop_generic_args *)ap)); - if (vp->v_type == VFIFO) - return (ufs_vnoperatefifo((struct vop_generic_args *)ap)); - return (ufs_vnoperatespec((struct vop_generic_args *)ap)); -} - -/* - * Vnode extattr transaction commit/abort - */ -static int -ffs_openextattr(struct vop_openextattr_args *ap) -/* -struct vop_openextattr_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -*/ -{ - struct inode *ip; - struct fs *fs; - - ip = VTOI(ap->a_vp); - fs = ip->i_fs; - if (fs->fs_magic == FS_UFS1_MAGIC) - return (ufs_vnoperate((struct vop_generic_args *)ap)); - - if (ap->a_vp->v_type == VCHR) - return (EOPNOTSUPP); - - return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); -} - - -/* - * Vnode extattr transaction commit/abort - */ -static int -ffs_closeextattr(struct vop_closeextattr_args *ap) -/* -struct vop_closeextattr_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - int a_commit; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -*/ -{ - struct inode *ip; - struct fs *fs; - - ip = VTOI(ap->a_vp); - fs = ip->i_fs; - if (fs->fs_magic == FS_UFS1_MAGIC) - return (ufs_vnoperate((struct vop_generic_args *)ap)); - - if (ap->a_vp->v_type == VCHR) - return (EOPNOTSUPP); - - return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); -} - -/* - * Vnode operation to remove a named attribute. - */ -static int -ffs_deleteextattr(struct vop_deleteextattr_args *ap) -/* -vop_deleteextattr { - IN struct vnode *a_vp; - IN int a_attrnamespace; - IN const char *a_name; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -*/ -{ - struct inode *ip; - struct fs *fs; - uint32_t ealength, ul; - int ealen, olen, eapad1, eapad2, error, i, easize; - u_char *eae, *p; - int stand_alone; - - ip = VTOI(ap->a_vp); - fs = ip->i_fs; - - if (fs->fs_magic == FS_UFS1_MAGIC) - return (ufs_vnoperate((struct vop_generic_args *)ap)); - - if (ap->a_vp->v_type == VCHR) - return (EOPNOTSUPP); - - if (strlen(ap->a_name) == 0) - return (EINVAL); - - error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, - ap->a_cred, ap->a_td, IWRITE); - if (error) { - if (ip->i_ea_area != NULL && ip->i_ea_error == 0) - ip->i_ea_error = error; - return (error); - } - - if (ip->i_ea_area == NULL) { - error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); - if (error) - return (error); - stand_alone = 1; - } else { - stand_alone = 0; - } - - ealength = eapad1 = ealen = eapad2 = 0; - - eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); - bcopy(ip->i_ea_area, eae, ip->i_ea_len); - easize = ip->i_ea_len; - - olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, - &p, NULL); - if (olen == -1) { - /* delete but nonexistent */ - free(eae, M_TEMP); - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); - return(ENOATTR); - } - bcopy(p, &ul, sizeof ul); - i = p - eae + ul; - if (ul != ealength) { - bcopy(p + ul, p + ealength, easize - i); - easize += (ealength - ul); - } - if (easize > NXADDR * fs->fs_bsize) { - free(eae, M_TEMP); - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); - else if (ip->i_ea_error == 0) - ip->i_ea_error = ENOSPC; - return(ENOSPC); - } - p = ip->i_ea_area; - ip->i_ea_area = eae; - ip->i_ea_len = easize; - free(p, M_TEMP); - if (stand_alone) - error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); - return(error); -} - -/* - * Vnode operation to retrieve a named extended attribute. - */ -static int -ffs_getextattr(struct vop_getextattr_args *ap) -/* -vop_getextattr { - IN struct vnode *a_vp; - IN int a_attrnamespace; - IN const char *a_name; - INOUT struct uio *a_uio; - OUT size_t *a_size; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -*/ -{ - struct inode *ip; - struct fs *fs; - u_char *eae, *p; - unsigned easize; - int error, ealen, stand_alone; - - ip = VTOI(ap->a_vp); - fs = ip->i_fs; - - if (fs->fs_magic == FS_UFS1_MAGIC) - return (ufs_vnoperate((struct vop_generic_args *)ap)); - - if (ap->a_vp->v_type == VCHR) - return (EOPNOTSUPP); - - error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, - ap->a_cred, ap->a_td, IREAD); - if (error) - return (error); - - if (ip->i_ea_area == NULL) { - error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); - if (error) - return (error); - stand_alone = 1; - } else { - stand_alone = 0; - } - eae = ip->i_ea_area; - easize = ip->i_ea_len; - - ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, - NULL, &p); - if (ealen >= 0) { - error = 0; - if (ap->a_size != NULL) - *ap->a_size = ealen; - else if (ap->a_uio != NULL) - error = uiomove(p, ealen, ap->a_uio); - } else - error = ENOATTR; - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); - return(error); -} - -/* - * Vnode operation to retrieve extended attributes on a vnode. - */ -static int -ffs_listextattr(struct vop_listextattr_args *ap) -/* -vop_listextattr { - IN struct vnode *a_vp; - IN int a_attrnamespace; - INOUT struct uio *a_uio; - OUT size_t *a_size; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -*/ -{ - struct inode *ip; - struct fs *fs; - u_char *eae, *p, *pe, *pn; - unsigned easize; - uint32_t ul; - int error, ealen, stand_alone; - - ip = VTOI(ap->a_vp); - fs = ip->i_fs; - - if (fs->fs_magic == FS_UFS1_MAGIC) - return (ufs_vnoperate((struct vop_generic_args *)ap)); - - if (ap->a_vp->v_type == VCHR) - return (EOPNOTSUPP); - - error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, - ap->a_cred, ap->a_td, IREAD); - if (error) - return (error); - - if (ip->i_ea_area == NULL) { - error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); - if (error) - return (error); - stand_alone = 1; - } else { - stand_alone = 0; - } - eae = ip->i_ea_area; - easize = ip->i_ea_len; - - error = 0; - if (ap->a_size != NULL) - *ap->a_size = 0; - pe = eae + easize; - for(p = eae; error == 0 && p < pe; p = pn) { - bcopy(p, &ul, sizeof(ul)); - pn = p + ul; - if (pn > pe) - break; - p += sizeof(ul); - if (*p++ != ap->a_attrnamespace) - continue; - p++; /* pad2 */ - ealen = *p; - if (ap->a_size != NULL) { - *ap->a_size += ealen + 1; - } else if (ap->a_uio != NULL) { - error = uiomove(p, ealen + 1, ap->a_uio); - } - } - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); - return(error); -} - -/* - * Vnode operation to set a named attribute. - */ -static int -ffs_setextattr(struct vop_setextattr_args *ap) -/* -vop_setextattr { - IN struct vnode *a_vp; - IN int a_attrnamespace; - IN const char *a_name; - INOUT struct uio *a_uio; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -*/ -{ - struct inode *ip; - struct fs *fs; - uint32_t ealength, ul; - int ealen, olen, eapad1, eapad2, error, i, easize; - u_char *eae, *p; - int stand_alone; - - ip = VTOI(ap->a_vp); - fs = ip->i_fs; - - if (fs->fs_magic == FS_UFS1_MAGIC) - return (ufs_vnoperate((struct vop_generic_args *)ap)); - - if (ap->a_vp->v_type == VCHR) - return (EOPNOTSUPP); - - if (strlen(ap->a_name) == 0) - return (EINVAL); - - /* XXX Now unsupported API to delete EAs using NULL uio. */ - if (ap->a_uio == NULL) - return (EOPNOTSUPP); - - error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, - ap->a_cred, ap->a_td, IWRITE); - if (error) { - if (ip->i_ea_area != NULL && ip->i_ea_error == 0) - ip->i_ea_error = error; - return (error); - } - - if (ip->i_ea_area == NULL) { - error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); - if (error) - return (error); - stand_alone = 1; - } else { - stand_alone = 0; - } - - ealen = ap->a_uio->uio_resid; - ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); - eapad1 = 8 - (ealength % 8); - if (eapad1 == 8) - eapad1 = 0; - eapad2 = 8 - (ealen % 8); - if (eapad2 == 8) - eapad2 = 0; - ealength += eapad1 + ealen + eapad2; - - eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); - bcopy(ip->i_ea_area, eae, ip->i_ea_len); - easize = ip->i_ea_len; - - olen = ffs_findextattr(eae, easize, - ap->a_attrnamespace, ap->a_name, &p, NULL); - if (olen == -1) { - /* new, append at end */ - p = eae + easize; - easize += ealength; - } else { - bcopy(p, &ul, sizeof ul); - i = p - eae + ul; - if (ul != ealength) { - bcopy(p + ul, p + ealength, easize - i); - easize += (ealength - ul); - } - } - if (easize > NXADDR * fs->fs_bsize) { - free(eae, M_TEMP); - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); - else if (ip->i_ea_error == 0) - ip->i_ea_error = ENOSPC; - return(ENOSPC); - } - bcopy(&ealength, p, sizeof(ealength)); - p += sizeof(ealength); - *p++ = ap->a_attrnamespace; - *p++ = eapad2; - *p++ = strlen(ap->a_name); - strcpy(p, ap->a_name); - p += strlen(ap->a_name); - bzero(p, eapad1); - p += eapad1; - error = uiomove(p, ealen, ap->a_uio); - if (error) { - free(eae, M_TEMP); - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); - else if (ip->i_ea_error == 0) - ip->i_ea_error = error; - return(error); - } - p += ealen; - bzero(p, eapad2); - - p = ip->i_ea_area; - ip->i_ea_area = eae; - ip->i_ea_len = easize; - free(p, M_TEMP); - if (stand_alone) - error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); - return(error); -} -#endif diff --git a/src/sys/ufs/ffs/fs.h b/src/sys/ufs/ffs/fs.h deleted file mode 100644 index 96e18e1..0000000 --- a/src/sys/ufs/ffs/fs.h +++ /dev/null @@ -1,606 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fs.h 8.13 (Berkeley) 3/21/95 - * $FreeBSD: src/sys/ufs/ffs/fs.h,v 1.40 2003/11/16 07:08:27 wes Exp $ - */ - -#ifndef _UFS_FFS_FS_H_ -#define _UFS_FFS_FS_H_ - -/* - * Each disk drive contains some number of filesystems. - * A filesystem consists of a number of cylinder groups. - * Each cylinder group has inodes and data. - * - * A filesystem is described by its super-block, which in turn - * describes the cylinder groups. The super-block is critical - * data and is replicated in each cylinder group to protect against - * catastrophic loss. This is done at `newfs' time and the critical - * super-block data does not change, so the copies need not be - * referenced further unless disaster strikes. - * - * For filesystem fs, the offsets of the various blocks of interest - * are given in the super block as: - * [fs->fs_sblkno] Super-block - * [fs->fs_cblkno] Cylinder group block - * [fs->fs_iblkno] Inode blocks - * [fs->fs_dblkno] Data blocks - * The beginning of cylinder group cg in fs, is given by - * the ``cgbase(fs, cg)'' macro. - * - * Depending on the architecture and the media, the superblock may - * reside in any one of four places. For tiny media where every block - * counts, it is placed at the very front of the partition. Historically, - * UFS1 placed it 8K from the front to leave room for the disk label and - * a small bootstrap. For UFS2 it got moved to 64K from the front to leave - * room for the disk label and a bigger bootstrap, and for really piggy - * systems we check at 256K from the front if the first three fail. In - * all cases the size of the superblock will be SBLOCKSIZE. All values are - * given in byte-offset form, so they do not imply a sector size. The - * SBLOCKSEARCH specifies the order in which the locations should be searched. - */ -#define SBLOCK_FLOPPY 0 -#define SBLOCK_UFS1 8192 -#define SBLOCK_UFS2 65536 -#define SBLOCK_PIGGY 262144 -#define SBLOCKSIZE 8192 -#define SBLOCKSEARCH \ - { SBLOCK_UFS2, SBLOCK_UFS1, SBLOCK_FLOPPY, SBLOCK_PIGGY, -1 } - -/* - * Max number of fragments per block. This value is NOT tweakable. - */ -#define MAXFRAG 8 - -/* - * Addresses stored in inodes are capable of addressing fragments - * of `blocks'. File system blocks of at most size MAXBSIZE can - * be optionally broken into 2, 4, or 8 pieces, each of which is - * addressable; these pieces may be DEV_BSIZE, or some multiple of - * a DEV_BSIZE unit. - * - * Large files consist of exclusively large data blocks. To avoid - * undue wasted disk space, the last data block of a small file may be - * allocated as only as many fragments of a large block as are - * necessary. The filesystem format retains only a single pointer - * to such a fragment, which is a piece of a single large block that - * has been divided. The size of such a fragment is determinable from - * information in the inode, using the ``blksize(fs, ip, lbn)'' macro. - * - * The filesystem records space availability at the fragment level; - * to determine block availability, aligned fragments are examined. - */ - -/* - * MINBSIZE is the smallest allowable block size. - * In order to insure that it is possible to create files of size - * 2^32 with only two levels of indirection, MINBSIZE is set to 4096. - * MINBSIZE must be big enough to hold a cylinder group block, - * thus changes to (struct cg) must keep its size within MINBSIZE. - * Note that super blocks are always of size SBSIZE, - * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE. - */ -#define MINBSIZE 4096 - -/* - * The path name on which the filesystem is mounted is maintained - * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in - * the super block for this name. - */ -#define MAXMNTLEN 468 - -/* - * The volume name for this filesystem is maintained in fs_volname. - * MAXVOLLEN defines the length of the buffer allocated. - */ -#define MAXVOLLEN 32 - -/* - * There is a 128-byte region in the superblock reserved for in-core - * pointers to summary information. Originally this included an array - * of pointers to blocks of struct csum; now there are just a few - * pointers and the remaining space is padded with fs_ocsp[]. - * - * NOCSPTRS determines the size of this padding. One pointer (fs_csp) - * is taken away to point to a contiguous array of struct csum for - * all cylinder groups; a second (fs_maxcluster) points to an array - * of cluster sizes that is computed as cylinder groups are inspected, - * and the third points to an array that tracks the creation of new - * directories. A fourth pointer, fs_active, is used when creating - * snapshots; it points to a bitmap of cylinder groups for which the - * free-block bitmap has changed since the snapshot operation began. - */ -#define NOCSPTRS ((128 / sizeof(void *)) - 4) - -/* - * A summary of contiguous blocks of various sizes is maintained - * in each cylinder group. Normally this is set by the initial - * value of fs_maxcontig. To conserve space, a maximum summary size - * is set by FS_MAXCONTIG. - */ -#define FS_MAXCONTIG 16 - -/* - * MINFREE gives the minimum acceptable percentage of filesystem - * blocks which may be free. If the freelist drops below this level - * only the superuser may continue to allocate blocks. This may - * be set to 0 if no reserve of free blocks is deemed necessary, - * however throughput drops by fifty percent if the filesystem - * is run at between 95% and 100% full; thus the minimum default - * value of fs_minfree is 5%. However, to get good clustering - * performance, 10% is a better choice. hence we use 10% as our - * default value. With 10% free space, fragmentation is not a - * problem, so we choose to optimize for time. - */ -#define MINFREE 8 -#define DEFAULTOPT FS_OPTTIME - -/* - * Grigoriy Orlov has done some extensive work to fine - * tune the layout preferences for directories within a filesystem. - * His algorithm can be tuned by adjusting the following parameters - * which tell the system the average file size and the average number - * of files per directory. These defaults are well selected for typical - * filesystems, but may need to be tuned for odd cases like filesystems - * being used for sqiud caches or news spools. - */ -#define AVFILESIZ 16384 /* expected average file size */ -#define AFPDIR 64 /* expected number of files per directory */ - -/* - * The maximum number of snapshot nodes that can be associated - * with each filesystem. This limit affects only the number of - * snapshot files that can be recorded within the superblock so - * that they can be found when the filesystem is mounted. However, - * maintaining too many will slow the filesystem performance, so - * having this limit is a good idea. - */ -#define FSMAXSNAP 20 - -/* - * Used to identify special blocks in snapshots: - * - * BLK_NOCOPY - A block that was unallocated at the time the snapshot - * was taken, hence does not need to be copied when written. - * BLK_SNAP - A block held by another snapshot that is not needed by this - * snapshot. When the other snapshot is freed, the BLK_SNAP entries - * are converted to BLK_NOCOPY. These are needed to allow fsck to - * identify blocks that are in use by other snapshots (which are - * expunged from this snapshot). - */ -#define BLK_NOCOPY ((ufs2_daddr_t)(1)) -#define BLK_SNAP ((ufs2_daddr_t)(2)) - -/* - * Sysctl values for the fast filesystem. - */ -#define FFS_ADJ_REFCNT 1 /* adjust inode reference count */ -#define FFS_ADJ_BLKCNT 2 /* adjust inode used block count */ -#define FFS_BLK_FREE 3 /* free range of blocks in map */ -#define FFS_DIR_FREE 4 /* free specified dir inodes in map */ -#define FFS_FILE_FREE 5 /* free specified file inodes in map */ -#define FFS_SET_FLAGS 6 /* set filesystem flags */ -#define FFS_MAXID 7 /* number of valid ffs ids */ - -/* - * Command structure passed in to the filesystem to adjust filesystem values. - */ -#define FFS_CMD_VERSION 0x19790518 /* version ID */ -struct fsck_cmd { - int32_t version; /* version of command structure */ - int32_t handle; /* reference to filesystem to be changed */ - int64_t value; /* inode or block number to be affected */ - int64_t size; /* amount or range to be adjusted */ - int64_t spare; /* reserved for future use */ -}; - -/* - * Per cylinder group information; summarized in blocks allocated - * from first cylinder group data blocks. These blocks have to be - * read in from fs_csaddr (size fs_cssize) in addition to the - * super block. - */ -struct csum { - int32_t cs_ndir; /* number of directories */ - int32_t cs_nbfree; /* number of free blocks */ - int32_t cs_nifree; /* number of free inodes */ - int32_t cs_nffree; /* number of free frags */ -}; -struct csum_total { - int64_t cs_ndir; /* number of directories */ - int64_t cs_nbfree; /* number of free blocks */ - int64_t cs_nifree; /* number of free inodes */ - int64_t cs_nffree; /* number of free frags */ - int64_t cs_numclusters; /* number of free clusters */ - int64_t cs_spare[3]; /* future expansion */ -}; - -/* - * Super block for an FFS filesystem. - */ -struct fs { - int32_t fs_firstfield; /* historic filesystem linked list, */ - int32_t fs_unused_1; /* used for incore super blocks */ - int32_t fs_sblkno; /* offset of super-block in filesys */ - int32_t fs_cblkno; /* offset of cyl-block in filesys */ - int32_t fs_iblkno; /* offset of inode-blocks in filesys */ - int32_t fs_dblkno; /* offset of first data after cg */ - int32_t fs_old_cgoffset; /* cylinder group offset in cylinder */ - int32_t fs_old_cgmask; /* used to calc mod fs_ntrak */ - int32_t fs_old_time; /* last time written */ - int32_t fs_old_size; /* number of blocks in fs */ - int32_t fs_old_dsize; /* number of data blocks in fs */ - int32_t fs_ncg; /* number of cylinder groups */ - int32_t fs_bsize; /* size of basic blocks in fs */ - int32_t fs_fsize; /* size of frag blocks in fs */ - int32_t fs_frag; /* number of frags in a block in fs */ -/* these are configuration parameters */ - int32_t fs_minfree; /* minimum percentage of free blocks */ - int32_t fs_old_rotdelay; /* num of ms for optimal next block */ - int32_t fs_old_rps; /* disk revolutions per second */ -/* these fields can be computed from the others */ - int32_t fs_bmask; /* ``blkoff'' calc of blk offsets */ - int32_t fs_fmask; /* ``fragoff'' calc of frag offsets */ - int32_t fs_bshift; /* ``lblkno'' calc of logical blkno */ - int32_t fs_fshift; /* ``numfrags'' calc number of frags */ -/* these are configuration parameters */ - int32_t fs_maxcontig; /* max number of contiguous blks */ - int32_t fs_maxbpg; /* max number of blks per cyl group */ -/* these fields can be computed from the others */ - int32_t fs_fragshift; /* block to frag shift */ - int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ - int32_t fs_sbsize; /* actual size of super block */ - int32_t fs_spare1[2]; /* old fs_csmask */ - /* old fs_csshift */ - int32_t fs_nindir; /* value of NINDIR */ - int32_t fs_inopb; /* value of INOPB */ - int32_t fs_old_nspf; /* value of NSPF */ -/* yet another configuration parameter */ - int32_t fs_optim; /* optimization preference, see below */ - int32_t fs_old_npsect; /* # sectors/track including spares */ - int32_t fs_old_interleave; /* hardware sector interleave */ - int32_t fs_old_trackskew; /* sector 0 skew, per track */ - int32_t fs_id[2]; /* unique filesystem id */ -/* sizes determined by number of cylinder groups and their sizes */ - int32_t fs_old_csaddr; /* blk addr of cyl grp summary area */ - int32_t fs_cssize; /* size of cyl grp summary area */ - int32_t fs_cgsize; /* cylinder group size */ - int32_t fs_spare2; /* old fs_ntrak */ - int32_t fs_old_nsect; /* sectors per track */ - int32_t fs_old_spc; /* sectors per cylinder */ - int32_t fs_old_ncyl; /* cylinders in filesystem */ - int32_t fs_old_cpg; /* cylinders per group */ - int32_t fs_ipg; /* inodes per group */ - int32_t fs_fpg; /* blocks per group * fs_frag */ -/* this data must be re-computed after crashes */ - struct csum fs_old_cstotal; /* cylinder summary information */ -/* these fields are cleared at mount time */ - int8_t fs_fmod; /* super block modified flag */ - int8_t fs_clean; /* filesystem is clean flag */ - int8_t fs_ronly; /* mounted read-only flag */ - int8_t fs_old_flags; /* old FS_ flags */ - u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ - u_char fs_volname[MAXVOLLEN]; /* volume name */ - u_int64_t fs_swuid; /* system-wide uid */ - int32_t fs_pad; /* due to alignment of fs_swuid */ -/* these fields retain the current block allocation info */ - int32_t fs_cgrotor; /* last cg searched */ - void *fs_ocsp[NOCSPTRS]; /* padding; was list of fs_cs buffers */ - u_int8_t *fs_contigdirs; /* # of contiguously allocated dirs */ - struct csum *fs_csp; /* cg summary info buffer for fs_cs */ - int32_t *fs_maxcluster; /* max cluster in each cyl group */ - u_int *fs_active; /* used by snapshots to track fs */ - int32_t fs_old_cpc; /* cyl per cycle in postbl */ - int32_t fs_maxbsize; /* maximum blocking factor permitted */ - int64_t fs_sparecon64[17]; /* old rotation block list head */ - int64_t fs_sblockloc; /* byte offset of standard superblock */ - struct csum_total fs_cstotal; /* cylinder summary information */ - ufs_time_t fs_time; /* last time written */ - int64_t fs_size; /* number of blocks in fs */ - int64_t fs_dsize; /* number of data blocks in fs */ - ufs2_daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ - int64_t fs_pendingblocks; /* blocks in process of being freed */ - int32_t fs_pendinginodes; /* inodes in process of being freed */ - int32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */ - int32_t fs_avgfilesize; /* expected average file size */ - int32_t fs_avgfpdir; /* expected # of files per directory */ - int32_t fs_save_cgsize; /* save real cg size to use fs_bsize */ - int32_t fs_sparecon32[26]; /* reserved for future constants */ - int32_t fs_flags; /* see FS_ flags below */ - int32_t fs_contigsumsize; /* size of cluster summary array */ - int32_t fs_maxsymlinklen; /* max length of an internal symlink */ - int32_t fs_old_inodefmt; /* format of on-disk inodes */ - u_int64_t fs_maxfilesize; /* maximum representable file size */ - int64_t fs_qbmask; /* ~fs_bmask for use with 64-bit size */ - int64_t fs_qfmask; /* ~fs_fmask for use with 64-bit size */ - int32_t fs_state; /* validate fs_clean field */ - int32_t fs_old_postblformat; /* format of positional layout tables */ - int32_t fs_old_nrpos; /* number of rotational positions */ - int32_t fs_spare5[2]; /* old fs_postbloff */ - /* old fs_rotbloff */ - int32_t fs_magic; /* magic number */ -}; - -/* Sanity checking. */ -#ifdef CTASSERT -CTASSERT(sizeof(struct fs) == 1376); -#endif - -/* - * Filesystem identification - */ -#define FS_UFS1_MAGIC 0x011954 /* UFS1 fast filesystem magic number */ -#define FS_UFS2_MAGIC 0x19540119 /* UFS2 fast filesystem magic number */ -#define FS_BAD2_MAGIC 0x19960408 /* UFS2 incomplete newfs magic number */ -#define FS_OKAY 0x7c269d38 /* superblock checksum */ -#define FS_42INODEFMT -1 /* 4.2BSD inode format */ -#define FS_44INODEFMT 2 /* 4.4BSD inode format */ - -/* - * Preference for optimization. - */ -#define FS_OPTTIME 0 /* minimize allocation time */ -#define FS_OPTSPACE 1 /* minimize disk fragmentation */ - -/* - * Filesystem flags. - * - * The FS_UNCLEAN flag is set by the kernel when the filesystem was - * mounted with fs_clean set to zero. The FS_DOSOFTDEP flag indicates - * that the filesystem should be managed by the soft updates code. - * Note that the FS_NEEDSFSCK flag is set and cleared only by the - * fsck utility. It is set when background fsck finds an unexpected - * inconsistency which requires a traditional foreground fsck to be - * run. Such inconsistencies should only be found after an uncorrectable - * disk error. A foreground fsck will clear the FS_NEEDSFSCK flag when - * it has successfully cleaned up the filesystem. The kernel uses this - * flag to enforce that inconsistent filesystems be mounted read-only. - * The FS_INDEXDIRS flag when set indicates that the kernel maintains - * on-disk auxiliary indexes (such as B-trees) for speeding directory - * accesses. Kernels that do not support auxiliary indicies clear the - * flag to indicate that the indicies need to be rebuilt (by fsck) before - * they can be used. - * - * FS_ACLS indicates that ACLs are administratively enabled for the - * file system, so they should be loaded from extended attributes, - * observed for access control purposes, and be administered by object - * owners. FS_MULTILABEL indicates that the TrustedBSD MAC Framework - * should attempt to back MAC labels into extended attributes on the - * file system rather than maintain a single mount label for all - * objects. - */ -#define FS_UNCLEAN 0x01 /* filesystem not clean at mount */ -#define FS_DOSOFTDEP 0x02 /* filesystem using soft dependencies */ -#define FS_NEEDSFSCK 0x04 /* filesystem needs sync fsck before mount */ -#define FS_INDEXDIRS 0x08 /* kernel supports indexed directories */ -#define FS_ACLS 0x10 /* file system has ACLs enabled */ -#define FS_MULTILABEL 0x20 /* file system is MAC multi-label */ -#define FS_FLAGS_UPDATED 0x80 /* flags have been moved to new location */ - -/* - * Macros to access bits in the fs_active array. - */ -#define ACTIVECGNUM(fs, cg) ((fs)->fs_active[(cg) / (NBBY * sizeof(int))]) -#define ACTIVECGOFF(cg) (1 << ((cg) % (NBBY * sizeof(int)))) - -/* - * The size of a cylinder group is calculated by CGSIZE. The maximum size - * is limited by the fact that cylinder groups are at most one block. - * Its size is derived from the size of the maps maintained in the - * cylinder group and the (struct cg) size. - */ -#define CGSIZE(fs) \ - /* base cg */ (sizeof(struct cg) + sizeof(int32_t) + \ - /* old btotoff */ (fs)->fs_old_cpg * sizeof(int32_t) + \ - /* old boff */ (fs)->fs_old_cpg * sizeof(u_int16_t) + \ - /* inode map */ howmany((fs)->fs_ipg, NBBY) + \ - /* block map */ howmany((fs)->fs_fpg, NBBY) +\ - /* if present */ ((fs)->fs_contigsumsize <= 0 ? 0 : \ - /* cluster sum */ (fs)->fs_contigsumsize * sizeof(int32_t) + \ - /* cluster map */ howmany(fragstoblks(fs, (fs)->fs_fpg), NBBY))) - -/* - * The minimal number of cylinder groups that should be created. - */ -#define MINCYLGRPS 4 - -/* - * Convert cylinder group to base address of its global summary info. - */ -#define fs_cs(fs, indx) fs_csp[indx] - -/* - * Cylinder group block for a filesystem. - */ -#define CG_MAGIC 0x090255 -struct cg { - int32_t cg_firstfield; /* historic cyl groups linked list */ - int32_t cg_magic; /* magic number */ - int32_t cg_old_time; /* time last written */ - int32_t cg_cgx; /* we are the cgx'th cylinder group */ - int16_t cg_old_ncyl; /* number of cyl's this cg */ - int16_t cg_old_niblk; /* number of inode blocks this cg */ - int32_t cg_ndblk; /* number of data blocks this cg */ - struct csum cg_cs; /* cylinder summary information */ - int32_t cg_rotor; /* position of last used block */ - int32_t cg_frotor; /* position of last used frag */ - int32_t cg_irotor; /* position of last used inode */ - int32_t cg_frsum[MAXFRAG]; /* counts of available frags */ - int32_t cg_old_btotoff; /* (int32) block totals per cylinder */ - int32_t cg_old_boff; /* (u_int16) free block positions */ - int32_t cg_iusedoff; /* (u_int8) used inode map */ - int32_t cg_freeoff; /* (u_int8) free block map */ - int32_t cg_nextfreeoff; /* (u_int8) next available space */ - int32_t cg_clustersumoff; /* (u_int32) counts of avail clusters */ - int32_t cg_clusteroff; /* (u_int8) free cluster map */ - int32_t cg_nclusterblks; /* number of clusters this cg */ - int32_t cg_niblk; /* number of inode blocks this cg */ - int32_t cg_initediblk; /* last initialized inode */ - int32_t cg_sparecon32[3]; /* reserved for future use */ - ufs_time_t cg_time; /* time last written */ - int64_t cg_sparecon64[3]; /* reserved for future use */ - u_int8_t cg_space[1]; /* space for cylinder group maps */ -/* actually longer */ -}; - -/* - * Macros for access to cylinder group array structures - */ -#define cg_chkmagic(cgp) ((cgp)->cg_magic == CG_MAGIC) -#define cg_inosused(cgp) \ - ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff)) -#define cg_blksfree(cgp) \ - ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff)) -#define cg_clustersfree(cgp) \ - ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff)) -#define cg_clustersum(cgp) \ - ((int32_t *)((u_int8_t *)(cgp) + (cgp)->cg_clustersumoff)) - -/* - * Turn filesystem block numbers into disk block addresses. - * This maps filesystem blocks to device size blocks. - */ -#define fsbtodb(fs, b) ((b) << (fs)->fs_fsbtodb) -#define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb) - -/* - * Cylinder group macros to locate things in cylinder groups. - * They calc filesystem addresses of cylinder group data structures. - */ -#define cgbase(fs, c) (((ufs2_daddr_t)(fs)->fs_fpg) * (c)) -#define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */ -#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */ -#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */ -#define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */ -#define cgstart(fs, c) \ - ((fs)->fs_magic == FS_UFS2_MAGIC ? cgbase(fs, c) : \ - (cgbase(fs, c) + (fs)->fs_old_cgoffset * ((c) & ~((fs)->fs_old_cgmask)))) - -/* - * Macros for handling inode numbers: - * inode number to filesystem block offset. - * inode number to cylinder group number. - * inode number to filesystem block address. - */ -#define ino_to_cg(fs, x) ((x) / (fs)->fs_ipg) -#define ino_to_fsba(fs, x) \ - ((ufs2_daddr_t)(cgimin(fs, ino_to_cg(fs, x)) + \ - (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs)))))) -#define ino_to_fsbo(fs, x) ((x) % INOPB(fs)) - -/* - * Give cylinder group number for a filesystem block. - * Give cylinder group block number for a filesystem block. - */ -#define dtog(fs, d) ((d) / (fs)->fs_fpg) -#define dtogd(fs, d) ((d) % (fs)->fs_fpg) - -/* - * Extract the bits for a block from a map. - * Compute the cylinder and rotational position of a cyl block addr. - */ -#define blkmap(fs, map, loc) \ - (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag))) - -/* - * The following macros optimize certain frequently calculated - * quantities by using shifts and masks in place of divisions - * modulos and multiplications. - */ -#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ - ((loc) & (fs)->fs_qbmask) -#define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \ - ((loc) & (fs)->fs_qfmask) -#define lfragtosize(fs, frag) /* calculates ((off_t)frag * fs->fs_fsize) */ \ - (((off_t)(frag)) << (fs)->fs_fshift) -#define lblktosize(fs, blk) /* calculates ((off_t)blk * fs->fs_bsize) */ \ - (((off_t)(blk)) << (fs)->fs_bshift) -/* Use this only when `blk' is known to be small, e.g., < NDADDR. */ -#define smalllblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ - ((blk) << (fs)->fs_bshift) -#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ - ((loc) >> (fs)->fs_bshift) -#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ - ((loc) >> (fs)->fs_fshift) -#define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \ - (((size) + (fs)->fs_qbmask) & (fs)->fs_bmask) -#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ - (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) -#define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \ - ((frags) >> (fs)->fs_fragshift) -#define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \ - ((blks) << (fs)->fs_fragshift) -#define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \ - ((fsb) & ((fs)->fs_frag - 1)) -#define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \ - ((fsb) &~ ((fs)->fs_frag - 1)) - -/* - * Determine the number of available frags given a - * percentage to hold in reserve. - */ -#define freespace(fs, percentreserved) \ - (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \ - (fs)->fs_cstotal.cs_nffree - \ - (((off_t)((fs)->fs_dsize)) * (percentreserved) / 100)) - -/* - * Determining the size of a file block in the filesystem. - */ -#define blksize(fs, ip, lbn) \ - (((lbn) >= NDADDR || (ip)->i_size >= smalllblktosize(fs, (lbn) + 1)) \ - ? (fs)->fs_bsize \ - : (fragroundup(fs, blkoff(fs, (ip)->i_size)))) -#define sblksize(fs, size, lbn) \ - (((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \ - ? (fs)->fs_bsize \ - : (fragroundup(fs, blkoff(fs, (size))))) - - -/* - * Number of inodes in a secondary storage block/fragment. - */ -#define INOPB(fs) ((fs)->fs_inopb) -#define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift) - -/* - * Number of indirects in a filesystem block. - */ -#define NINDIR(fs) ((fs)->fs_nindir) - -extern int inside[], around[]; -extern u_char *fragtbl[]; - -#endif -#endif diff --git a/src/sys/ufs/ffs/softdep.h b/src/sys/ufs/ffs/softdep.h deleted file mode 100644 index 97813c3..0000000 --- a/src/sys/ufs/ffs/softdep.h +++ /dev/null @@ -1,591 +0,0 @@ -#if 0 -/* - * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved. - * - * The soft updates code is derived from the appendix of a University - * of Michigan technical report (Gregory R. Ganger and Yale N. Patt, - * "Soft Updates: A Solution to the Metadata Update Problem in File - * Systems", CSE-TR-254-95, August 1995). - * - * Further information about soft updates can be obtained from: - * - * Marshall Kirk McKusick http://www.mckusick.com/softdep/ - * 1614 Oxford Street mckusick@mckusick.com - * Berkeley, CA 94709-1608 +1-510-843-9542 - * USA - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)softdep.h 9.7 (McKusick) 6/21/00 - * $FreeBSD: src/sys/ufs/ffs/softdep.h,v 1.16 2002/07/19 07:29:38 mckusick Exp $ - */ - -#include - -/* - * Allocation dependencies are handled with undo/redo on the in-memory - * copy of the data. A particular data dependency is eliminated when - * it is ALLCOMPLETE: that is ATTACHED, DEPCOMPLETE, and COMPLETE. - * - * ATTACHED means that the data is not currently being written to - * disk. UNDONE means that the data has been rolled back to a safe - * state for writing to the disk. When the I/O completes, the data is - * restored to its current form and the state reverts to ATTACHED. - * The data must be locked throughout the rollback, I/O, and roll - * forward so that the rolled back information is never visible to - * user processes. The COMPLETE flag indicates that the item has been - * written. For example, a dependency that requires that an inode be - * written will be marked COMPLETE after the inode has been written - * to disk. The DEPCOMPLETE flag indicates the completion of any other - * dependencies such as the writing of a cylinder group map has been - * completed. A dependency structure may be freed only when both it - * and its dependencies have completed and any rollbacks that are in - * progress have finished as indicated by the set of ALLCOMPLETE flags - * all being set. The two MKDIR flags indicate additional dependencies - * that must be done when creating a new directory. MKDIR_BODY is - * cleared when the directory data block containing the "." and ".." - * entries has been written. MKDIR_PARENT is cleared when the parent - * inode with the increased link count for ".." has been written. When - * both MKDIR flags have been cleared, the DEPCOMPLETE flag is set to - * indicate that the directory dependencies have been completed. The - * writing of the directory inode itself sets the COMPLETE flag which - * then allows the directory entry for the new directory to be written - * to disk. The RMDIR flag marks a dirrem structure as representing - * the removal of a directory rather than a file. When the removal - * dependencies are completed, additional work needs to be done - * (truncation of the "." and ".." entries, an additional decrement - * of the associated inode, and a decrement of the parent inode). The - * DIRCHG flag marks a diradd structure as representing the changing - * of an existing entry rather than the addition of a new one. When - * the update is complete the dirrem associated with the inode for - * the old name must be added to the worklist to do the necessary - * reference count decrement. The GOINGAWAY flag indicates that the - * data structure is frozen from further change until its dependencies - * have been completed and its resources freed after which it will be - * discarded. The IOSTARTED flag prevents multiple calls to the I/O - * start routine from doing multiple rollbacks. The SPACECOUNTED flag - * says that the files space has been accounted to the pending free - * space count. The NEWBLOCK flag marks pagedep structures that have - * just been allocated, so must be claimed by the inode before all - * dependencies are complete. The INPROGRESS flag marks worklist - * structures that are still on the worklist, but are being considered - * for action by some process. The UFS1FMT flag indicates that the - * inode being processed is a ufs1 format. The EXTDATA flag indicates - * that the allocdirect describes an extended-attributes dependency. - * The ONWORKLIST flag shows whether the structure is currently linked - * onto a worklist. - */ -#define ATTACHED 0x0001 -#define UNDONE 0x0002 -#define COMPLETE 0x0004 -#define DEPCOMPLETE 0x0008 -#define MKDIR_PARENT 0x0010 /* diradd & mkdir only */ -#define MKDIR_BODY 0x0020 /* diradd & mkdir only */ -#define RMDIR 0x0040 /* dirrem only */ -#define DIRCHG 0x0080 /* diradd & dirrem only */ -#define GOINGAWAY 0x0100 /* indirdep only */ -#define IOSTARTED 0x0200 /* inodedep & pagedep only */ -#define SPACECOUNTED 0x0400 /* inodedep only */ -#define NEWBLOCK 0x0800 /* pagedep only */ -#define INPROGRESS 0x1000 /* dirrem, freeblks, freefrag, freefile only */ -#define UFS1FMT 0x2000 /* indirdep only */ -#define EXTDATA 0x4000 /* allocdirect only */ -#define ONWORKLIST 0x8000 - -#define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE) - -/* - * The workitem queue. - * - * It is sometimes useful and/or necessary to clean up certain dependencies - * in the background rather than during execution of an application process - * or interrupt service routine. To realize this, we append dependency - * structures corresponding to such tasks to a "workitem" queue. In a soft - * updates implementation, most pending workitems should not wait for more - * than a couple of seconds, so the filesystem syncer process awakens once - * per second to process the items on the queue. - */ - -/* LIST_HEAD(workhead, worklist); -- declared in buf.h */ - -/* - * Each request can be linked onto a work queue through its worklist structure. - * To avoid the need for a pointer to the structure itself, this structure - * MUST be declared FIRST in each type in which it appears! If more than one - * worklist is needed in the structure, then a wk_data field must be added - * and the macros below changed to use it. - */ -struct worklist { - LIST_ENTRY(worklist) wk_list; /* list of work requests */ - unsigned short wk_type; /* type of request */ - unsigned short wk_state; /* state flags */ -}; -#define WK_DATA(wk) ((void *)(wk)) -#define WK_PAGEDEP(wk) ((struct pagedep *)(wk)) -#define WK_INODEDEP(wk) ((struct inodedep *)(wk)) -#define WK_NEWBLK(wk) ((struct newblk *)(wk)) -#define WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk)) -#define WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk)) -#define WK_INDIRDEP(wk) ((struct indirdep *)(wk)) -#define WK_ALLOCINDIR(wk) ((struct allocindir *)(wk)) -#define WK_FREEFRAG(wk) ((struct freefrag *)(wk)) -#define WK_FREEBLKS(wk) ((struct freeblks *)(wk)) -#define WK_FREEFILE(wk) ((struct freefile *)(wk)) -#define WK_DIRADD(wk) ((struct diradd *)(wk)) -#define WK_MKDIR(wk) ((struct mkdir *)(wk)) -#define WK_DIRREM(wk) ((struct dirrem *)(wk)) -#define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk)) - -/* - * Various types of lists - */ -LIST_HEAD(dirremhd, dirrem); -LIST_HEAD(diraddhd, diradd); -LIST_HEAD(newblkhd, newblk); -LIST_HEAD(inodedephd, inodedep); -LIST_HEAD(allocindirhd, allocindir); -LIST_HEAD(allocdirecthd, allocdirect); -TAILQ_HEAD(allocdirectlst, allocdirect); - -/* - * The "pagedep" structure tracks the various dependencies related to - * a particular directory page. If a directory page has any dependencies, - * it will have a pagedep linked to its associated buffer. The - * pd_dirremhd list holds the list of dirrem requests which decrement - * inode reference counts. These requests are processed after the - * directory page with the corresponding zero'ed entries has been - * written. The pd_diraddhd list maintains the list of diradd requests - * which cannot be committed until their corresponding inode has been - * written to disk. Because a directory may have many new entries - * being created, several lists are maintained hashed on bits of the - * offset of the entry into the directory page to keep the lists from - * getting too long. Once a new directory entry has been cleared to - * be written, it is moved to the pd_pendinghd list. After the new - * entry has been written to disk it is removed from the pd_pendinghd - * list, any removed operations are done, and the dependency structure - * is freed. - */ -#define DAHASHSZ 5 -#define DIRADDHASH(offset) (((offset) >> 2) % DAHASHSZ) -struct pagedep { - struct worklist pd_list; /* page buffer */ -# define pd_state pd_list.wk_state /* check for multiple I/O starts */ - LIST_ENTRY(pagedep) pd_hash; /* hashed lookup */ - struct mount *pd_mnt; /* associated mount point */ - ino_t pd_ino; /* associated file */ - ufs_lbn_t pd_lbn; /* block within file */ - struct dirremhd pd_dirremhd; /* dirrem's waiting for page */ - struct diraddhd pd_diraddhd[DAHASHSZ]; /* diradd dir entry updates */ - struct diraddhd pd_pendinghd; /* directory entries awaiting write */ -}; - -/* - * The "inodedep" structure tracks the set of dependencies associated - * with an inode. One task that it must manage is delayed operations - * (i.e., work requests that must be held until the inodedep's associated - * inode has been written to disk). Getting an inode from its incore - * state to the disk requires two steps to be taken by the filesystem - * in this order: first the inode must be copied to its disk buffer by - * the VOP_UPDATE operation; second the inode's buffer must be written - * to disk. To ensure that both operations have happened in the required - * order, the inodedep maintains two lists. Delayed operations are - * placed on the id_inowait list. When the VOP_UPDATE is done, all - * operations on the id_inowait list are moved to the id_bufwait list. - * When the buffer is written, the items on the id_bufwait list can be - * safely moved to the work queue to be processed. A second task of the - * inodedep structure is to track the status of block allocation within - * the inode. Each block that is allocated is represented by an - * "allocdirect" structure (see below). It is linked onto the id_newinoupdt - * list until both its contents and its allocation in the cylinder - * group map have been written to disk. Once these dependencies have been - * satisfied, it is removed from the id_newinoupdt list and any followup - * actions such as releasing the previous block or fragment are placed - * on the id_inowait list. When an inode is updated (a VOP_UPDATE is - * done), the "inodedep" structure is linked onto the buffer through - * its worklist. Thus, it will be notified when the buffer is about - * to be written and when it is done. At the update time, all the - * elements on the id_newinoupdt list are moved to the id_inoupdt list - * since those changes are now relevant to the copy of the inode in the - * buffer. Also at update time, the tasks on the id_inowait list are - * moved to the id_bufwait list so that they will be executed when - * the updated inode has been written to disk. When the buffer containing - * the inode is written to disk, any updates listed on the id_inoupdt - * list are rolled back as they are not yet safe. Following the write, - * the changes are once again rolled forward and any actions on the - * id_bufwait list are processed (since those actions are now safe). - * The entries on the id_inoupdt and id_newinoupdt lists must be kept - * sorted by logical block number to speed the calculation of the size - * of the rolled back inode (see explanation in initiate_write_inodeblock). - * When a directory entry is created, it is represented by a diradd. - * The diradd is added to the id_inowait list as it cannot be safely - * written to disk until the inode that it represents is on disk. After - * the inode is written, the id_bufwait list is processed and the diradd - * entries are moved to the id_pendinghd list where they remain until - * the directory block containing the name has been written to disk. - * The purpose of keeping the entries on the id_pendinghd list is so that - * the softdep_fsync function can find and push the inode's directory - * name(s) as part of the fsync operation for that file. - */ -struct inodedep { - struct worklist id_list; /* buffer holding inode block */ -# define id_state id_list.wk_state /* inode dependency state */ - LIST_ENTRY(inodedep) id_hash; /* hashed lookup */ - struct fs *id_fs; /* associated filesystem */ - ino_t id_ino; /* dependent inode */ - nlink_t id_nlinkdelta; /* saved effective link count */ - LIST_ENTRY(inodedep) id_deps; /* bmsafemap's list of inodedep's */ - struct buf *id_buf; /* related bmsafemap (if pending) */ - long id_savedextsize; /* ext size saved during rollback */ - off_t id_savedsize; /* file size saved during rollback */ - struct workhead id_pendinghd; /* entries awaiting directory write */ - struct workhead id_bufwait; /* operations after inode written */ - struct workhead id_inowait; /* operations waiting inode update */ - struct allocdirectlst id_inoupdt; /* updates before inode written */ - struct allocdirectlst id_newinoupdt; /* updates when inode written */ - struct allocdirectlst id_extupdt; /* extdata updates pre-inode write */ - struct allocdirectlst id_newextupdt; /* extdata updates at ino write */ - union { - struct ufs1_dinode *idu_savedino1; /* saved ufs1_dinode contents */ - struct ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */ - } id_un; -}; -#define id_savedino1 id_un.idu_savedino1 -#define id_savedino2 id_un.idu_savedino2 - -/* - * A "newblk" structure is attached to a bmsafemap structure when a block - * or fragment is allocated from a cylinder group. Its state is set to - * DEPCOMPLETE when its cylinder group map is written. It is consumed by - * an associated allocdirect or allocindir allocation which will attach - * themselves to the bmsafemap structure if the newblk's DEPCOMPLETE flag - * is not set (i.e., its cylinder group map has not been written). - */ -struct newblk { - LIST_ENTRY(newblk) nb_hash; /* hashed lookup */ - struct fs *nb_fs; /* associated filesystem */ - int nb_state; /* state of bitmap dependency */ - ufs2_daddr_t nb_newblkno; /* allocated block number */ - LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblk's */ - struct bmsafemap *nb_bmsafemap; /* associated bmsafemap */ -}; - -/* - * A "bmsafemap" structure maintains a list of dependency structures - * that depend on the update of a particular cylinder group map. - * It has lists for newblks, allocdirects, allocindirs, and inodedeps. - * It is attached to the buffer of a cylinder group block when any of - * these things are allocated from the cylinder group. It is freed - * after the cylinder group map is written and the state of its - * dependencies are updated with DEPCOMPLETE to indicate that it has - * been processed. - */ -struct bmsafemap { - struct worklist sm_list; /* cylgrp buffer */ - struct buf *sm_buf; /* associated buffer */ - struct allocdirecthd sm_allocdirecthd; /* allocdirect deps */ - struct allocindirhd sm_allocindirhd; /* allocindir deps */ - struct inodedephd sm_inodedephd; /* inodedep deps */ - struct newblkhd sm_newblkhd; /* newblk deps */ -}; - -/* - * An "allocdirect" structure is attached to an "inodedep" when a new block - * or fragment is allocated and pointed to by the inode described by - * "inodedep". The worklist is linked to the buffer that holds the block. - * When the block is first allocated, it is linked to the bmsafemap - * structure associated with the buffer holding the cylinder group map - * from which it was allocated. When the cylinder group map is written - * to disk, ad_state has the DEPCOMPLETE flag set. When the block itself - * is written, the COMPLETE flag is set. Once both the cylinder group map - * and the data itself have been written, it is safe to write the inode - * that claims the block. If there was a previous fragment that had been - * allocated before the file was increased in size, the old fragment may - * be freed once the inode claiming the new block is written to disk. - * This ad_fragfree request is attached to the id_inowait list of the - * associated inodedep (pointed to by ad_inodedep) for processing after - * the inode is written. When a block is allocated to a directory, an - * fsync of a file whose name is within that block must ensure not only - * that the block containing the file name has been written, but also - * that the on-disk inode references that block. When a new directory - * block is created, we allocate a newdirblk structure which is linked - * to the associated allocdirect (on its ad_newdirblk list). When the - * allocdirect has been satisfied, the newdirblk structure is moved to - * the inodedep id_bufwait list of its directory to await the inode - * being written. When the inode is written, the directory entries are - * fully committed and can be deleted from their pagedep->id_pendinghd - * and inodedep->id_pendinghd lists. - */ -struct allocdirect { - struct worklist ad_list; /* buffer holding block */ -# define ad_state ad_list.wk_state /* block pointer state */ - TAILQ_ENTRY(allocdirect) ad_next; /* inodedep's list of allocdirect's */ - ufs_lbn_t ad_lbn; /* block within file */ - ufs2_daddr_t ad_newblkno; /* new value of block pointer */ - ufs2_daddr_t ad_oldblkno; /* old value of block pointer */ - long ad_newsize; /* size of new block */ - long ad_oldsize; /* size of old block */ - LIST_ENTRY(allocdirect) ad_deps; /* bmsafemap's list of allocdirect's */ - struct buf *ad_buf; /* cylgrp buffer (if pending) */ - struct inodedep *ad_inodedep; /* associated inodedep */ - struct freefrag *ad_freefrag; /* fragment to be freed (if any) */ - struct workhead ad_newdirblk; /* dir block to notify when written */ -}; - -/* - * A single "indirdep" structure manages all allocation dependencies for - * pointers in an indirect block. The up-to-date state of the indirect - * block is stored in ir_savedata. The set of pointers that may be safely - * written to the disk is stored in ir_safecopy. The state field is used - * only to track whether the buffer is currently being written (in which - * case it is not safe to update ir_safecopy). Ir_deplisthd contains the - * list of allocindir structures, one for each block that needs to be - * written to disk. Once the block and its bitmap allocation have been - * written the safecopy can be updated to reflect the allocation and the - * allocindir structure freed. If ir_state indicates that an I/O on the - * indirect block is in progress when ir_safecopy is to be updated, the - * update is deferred by placing the allocindir on the ir_donehd list. - * When the I/O on the indirect block completes, the entries on the - * ir_donehd list are processed by updating their corresponding ir_safecopy - * pointers and then freeing the allocindir structure. - */ -struct indirdep { - struct worklist ir_list; /* buffer holding indirect block */ -# define ir_state ir_list.wk_state /* indirect block pointer state */ - caddr_t ir_saveddata; /* buffer cache contents */ - struct buf *ir_savebp; /* buffer holding safe copy */ - struct allocindirhd ir_donehd; /* done waiting to update safecopy */ - struct allocindirhd ir_deplisthd; /* allocindir deps for this block */ -}; - -/* - * An "allocindir" structure is attached to an "indirdep" when a new block - * is allocated and pointed to by the indirect block described by the - * "indirdep". The worklist is linked to the buffer that holds the new block. - * When the block is first allocated, it is linked to the bmsafemap - * structure associated with the buffer holding the cylinder group map - * from which it was allocated. When the cylinder group map is written - * to disk, ai_state has the DEPCOMPLETE flag set. When the block itself - * is written, the COMPLETE flag is set. Once both the cylinder group map - * and the data itself have been written, it is safe to write the entry in - * the indirect block that claims the block; the "allocindir" dependency - * can then be freed as it is no longer applicable. - */ -struct allocindir { - struct worklist ai_list; /* buffer holding indirect block */ -# define ai_state ai_list.wk_state /* indirect block pointer state */ - LIST_ENTRY(allocindir) ai_next; /* indirdep's list of allocindir's */ - int ai_offset; /* pointer offset in indirect block */ - ufs2_daddr_t ai_newblkno; /* new block pointer value */ - ufs2_daddr_t ai_oldblkno; /* old block pointer value */ - struct freefrag *ai_freefrag; /* block to be freed when complete */ - struct indirdep *ai_indirdep; /* address of associated indirdep */ - LIST_ENTRY(allocindir) ai_deps; /* bmsafemap's list of allocindir's */ - struct buf *ai_buf; /* cylgrp buffer (if pending) */ -}; - -/* - * A "freefrag" structure is attached to an "inodedep" when a previously - * allocated fragment is replaced with a larger fragment, rather than extended. - * The "freefrag" structure is constructed and attached when the replacement - * block is first allocated. It is processed after the inode claiming the - * bigger block that replaces it has been written to disk. Note that the - * ff_state field is is used to store the uid, so may lose data. However, - * the uid is used only in printing an error message, so is not critical. - * Keeping it in a short keeps the data structure down to 32 bytes. - */ -struct freefrag { - struct worklist ff_list; /* id_inowait or delayed worklist */ -# define ff_state ff_list.wk_state /* owning user; should be uid_t */ - struct mount *ff_mnt; /* associated mount point */ - ufs2_daddr_t ff_blkno; /* fragment physical block number */ - long ff_fragsize; /* size of fragment being deleted */ - ino_t ff_inum; /* owning inode number */ -}; - -/* - * A "freeblks" structure is attached to an "inodedep" when the - * corresponding file's length is reduced to zero. It records all - * the information needed to free the blocks of a file after its - * zero'ed inode has been written to disk. - */ -struct freeblks { - struct worklist fb_list; /* id_inowait or delayed worklist */ - ino_t fb_previousinum; /* inode of previous owner of blocks */ - uid_t fb_uid; /* uid of previous owner of blocks */ - struct vnode *fb_devvp; /* filesystem device vnode */ - struct mount *fb_mnt; /* associated mount point */ - long fb_oldextsize; /* previous ext data size */ - off_t fb_oldsize; /* previous file size */ - ufs2_daddr_t fb_chkcnt; /* used to check cnt of blks released */ - ufs2_daddr_t fb_dblks[NDADDR]; /* direct blk ptrs to deallocate */ - ufs2_daddr_t fb_iblks[NIADDR]; /* indirect blk ptrs to deallocate */ - ufs2_daddr_t fb_eblks[NXADDR]; /* indirect blk ptrs to deallocate */ -}; - -/* - * A "freefile" structure is attached to an inode when its - * link count is reduced to zero. It marks the inode as free in - * the cylinder group map after the zero'ed inode has been written - * to disk and any associated blocks and fragments have been freed. - */ -struct freefile { - struct worklist fx_list; /* id_inowait or delayed worklist */ - mode_t fx_mode; /* mode of inode */ - ino_t fx_oldinum; /* inum of the unlinked file */ - struct vnode *fx_devvp; /* filesystem device vnode */ - struct mount *fx_mnt; /* associated mount point */ -}; - -/* - * A "diradd" structure is linked to an "inodedep" id_inowait list when a - * new directory entry is allocated that references the inode described - * by "inodedep". When the inode itself is written (either the initial - * allocation for new inodes or with the increased link count for - * existing inodes), the COMPLETE flag is set in da_state. If the entry - * is for a newly allocated inode, the "inodedep" structure is associated - * with a bmsafemap which prevents the inode from being written to disk - * until the cylinder group has been updated. Thus the da_state COMPLETE - * flag cannot be set until the inode bitmap dependency has been removed. - * When creating a new file, it is safe to write the directory entry that - * claims the inode once the referenced inode has been written. Since - * writing the inode clears the bitmap dependencies, the DEPCOMPLETE flag - * in the diradd can be set unconditionally when creating a file. When - * creating a directory, there are two additional dependencies described by - * mkdir structures (see their description below). When these dependencies - * are resolved the DEPCOMPLETE flag is set in the diradd structure. - * If there are multiple links created to the same inode, there will be - * a separate diradd structure created for each link. The diradd is - * linked onto the pg_diraddhd list of the pagedep for the directory - * page that contains the entry. When a directory page is written, - * the pg_diraddhd list is traversed to rollback any entries that are - * not yet ready to be written to disk. If a directory entry is being - * changed (by rename) rather than added, the DIRCHG flag is set and - * the da_previous entry points to the entry that will be "removed" - * once the new entry has been committed. During rollback, entries - * with da_previous are replaced with the previous inode number rather - * than zero. - * - * The overlaying of da_pagedep and da_previous is done to keep the - * structure down to 32 bytes in size on a 32-bit machine. If a - * da_previous entry is present, the pointer to its pagedep is available - * in the associated dirrem entry. If the DIRCHG flag is set, the - * da_previous entry is valid; if not set the da_pagedep entry is valid. - * The DIRCHG flag never changes; it is set when the structure is created - * if appropriate and is never cleared. - */ -struct diradd { - struct worklist da_list; /* id_inowait or id_pendinghd list */ -# define da_state da_list.wk_state /* state of the new directory entry */ - LIST_ENTRY(diradd) da_pdlist; /* pagedep holding directory block */ - doff_t da_offset; /* offset of new dir entry in dir blk */ - ino_t da_newinum; /* inode number for the new dir entry */ - union { - struct dirrem *dau_previous; /* entry being replaced in dir change */ - struct pagedep *dau_pagedep; /* pagedep dependency for addition */ - } da_un; -}; -#define da_previous da_un.dau_previous -#define da_pagedep da_un.dau_pagedep - -/* - * Two "mkdir" structures are needed to track the additional dependencies - * associated with creating a new directory entry. Normally a directory - * addition can be committed as soon as the newly referenced inode has been - * written to disk with its increased link count. When a directory is - * created there are two additional dependencies: writing the directory - * data block containing the "." and ".." entries (MKDIR_BODY) and writing - * the parent inode with the increased link count for ".." (MKDIR_PARENT). - * These additional dependencies are tracked by two mkdir structures that - * reference the associated "diradd" structure. When they have completed, - * they set the DEPCOMPLETE flag on the diradd so that it knows that its - * extra dependencies have been completed. The md_state field is used only - * to identify which type of dependency the mkdir structure is tracking. - * It is not used in the mainline code for any purpose other than consistency - * checking. All the mkdir structures in the system are linked together on - * a list. This list is needed so that a diradd can find its associated - * mkdir structures and deallocate them if it is prematurely freed (as for - * example if a mkdir is immediately followed by a rmdir of the same directory). - * Here, the free of the diradd must traverse the list to find the associated - * mkdir structures that reference it. The deletion would be faster if the - * diradd structure were simply augmented to have two pointers that referenced - * the associated mkdir's. However, this would increase the size of the diradd - * structure from 32 to 64-bits to speed a very infrequent operation. - */ -struct mkdir { - struct worklist md_list; /* id_inowait or buffer holding dir */ -# define md_state md_list.wk_state /* type: MKDIR_PARENT or MKDIR_BODY */ - struct diradd *md_diradd; /* associated diradd */ - struct buf *md_buf; /* MKDIR_BODY: buffer holding dir */ - LIST_ENTRY(mkdir) md_mkdirs; /* list of all mkdirs */ -}; -LIST_HEAD(mkdirlist, mkdir) mkdirlisthd; - -/* - * A "dirrem" structure describes an operation to decrement the link - * count on an inode. The dirrem structure is attached to the pg_dirremhd - * list of the pagedep for the directory page that contains the entry. - * It is processed after the directory page with the deleted entry has - * been written to disk. - * - * The overlaying of dm_pagedep and dm_dirinum is done to keep the - * structure down to 32 bytes in size on a 32-bit machine. It works - * because they are never used concurrently. - */ -struct dirrem { - struct worklist dm_list; /* delayed worklist */ -# define dm_state dm_list.wk_state /* state of the old directory entry */ - LIST_ENTRY(dirrem) dm_next; /* pagedep's list of dirrem's */ - struct mount *dm_mnt; /* associated mount point */ - ino_t dm_oldinum; /* inum of the removed dir entry */ - union { - struct pagedep *dmu_pagedep; /* pagedep dependency for remove */ - ino_t dmu_dirinum; /* parent inode number (for rmdir) */ - } dm_un; -}; -#define dm_pagedep dm_un.dmu_pagedep -#define dm_dirinum dm_un.dmu_dirinum - -/* - * A "newdirblk" structure tracks the progress of a newly allocated - * directory block from its creation until it is claimed by its on-disk - * inode. When a block is allocated to a directory, an fsync of a file - * whose name is within that block must ensure not only that the block - * containing the file name has been written, but also that the on-disk - * inode references that block. When a new directory block is created, - * we allocate a newdirblk structure which is linked to the associated - * allocdirect (on its ad_newdirblk list). When the allocdirect has been - * satisfied, the newdirblk structure is moved to the inodedep id_bufwait - * list of its directory to await the inode being written. When the inode - * is written, the directory entries are fully committed and can be - * deleted from their pagedep->id_pendinghd and inodedep->id_pendinghd - * lists. Note that we could track directory blocks allocated to indirect - * blocks using a similar scheme with the allocindir structures. Rather - * than adding this level of complexity, we simply write those newly - * allocated indirect blocks synchronously as such allocations are rare. - */ -struct newdirblk { - struct worklist db_list; /* id_inowait or pg_newdirblk */ -# define db_state db_list.wk_state /* unused */ - struct pagedep *db_pagedep; /* associated pagedep */ -}; -#endif diff --git a/src/sys/ufs/ufs/Makefile b/src/sys/ufs/ufs/Makefile deleted file mode 100644 index ae863f1..0000000 --- a/src/sys/ufs/ufs/Makefile +++ /dev/null @@ -1,28 +0,0 @@ -# (C) 2002 The UbixOS Project -# $Id$ - -# Include Global 'Source' Options -include ../../../Makefile.inc -include ../../Makefile.inc - -# Objects -OBJS = ufs_acl.o ufs_bmap.o ufs_dirhash.o ufs_extattr.o ufs_ihash.o ufs_inode.o ufs_lookup.o ufs_quota.o ufs_vfsops.o ufs_vnops.o - -all: $(OBJS) - -# Compile Types -.cc.o: - $(CXX) -Wall -O $(CFLAGS) $(INCLUDES) -c -o $@ $< -.cc.s: - $(CXX) -Wall -O $(CFLAGS) $(INCLUDES) -S -o $@ $< -.c.o: - $(CC) -Wall -O $(CFLAGS) $(INCLUDES) -c -o $@ $< -.c.s: - $(CC) -Wall -O $(CFLAGS) $(INCLUDES) -S -o $@ $< -.S.o: - $(CC) -Wall $(CFLAGS) $(INCLUDES) -c -o $@ $< - -# Clean up the junk -clean: - $(REMOVE) $(OBJS) - diff --git a/src/sys/ufs/ufs/README.acls b/src/sys/ufs/ufs/README.acls deleted file mode 100644 index ef752c7..0000000 --- a/src/sys/ufs/ufs/README.acls +++ /dev/null @@ -1,79 +0,0 @@ -$FreeBSD: src/sys/ufs/ufs/README.acls,v 1.6 2002/10/19 16:09:16 rwatson Exp $ - - UFS Access Control Lists Copyright - -The UFS Access Control Lists implementation is copyright Robert Watson, -and is made available under a Berkeley-style license. - - About UFS Access Control Lists (ACLs) - -Access control lists allow the association of fine-grained discretionary -access control information with files and directories, extending the -base UNIX permission model in a (mostly) compatible way. This -implementation largely follows the POSIX.1e model, and relies on the -availability of extended attributes to store extended components of -the ACL, while maintaining the base permission information in the inode. - - Using UFS Access Control Lists (ACLs) - -Support for UFS access control lists may be enabled by adding: - - options UFS_ACL - -to your kernel configuration. As ACLs rely on the availability of extended -attributes, your file systems must have support for extended attributes. -For UFS2, this is supported natively, so no further configuration is -necessary. For UFS1, you must also enable the optional extended attributes -support documented in README.extattr. A summary of the instructions -and ACL-specific information follows. - -To enable support for ACLs on a file system, the 'acls' mount flag -must be set for the file system. This may be set using the tunefs -'-a' flag: - - tunefs -a enable /dev/md0a - -Or by using the mount-time flag: - - mount -o acls /dev/md0a /mnt - -The flag may also be set in /etc/fstab. Note that mounting a file -system previously configured for ACLs without ACL-support will result -in incorrect application of discretionary protections. Likewise, -mounting an ACL-enabled file system without kernel support for ACLs -will result in incorrect application of discretionary protections. If -the kernel is not configured for ACL support, a warning will be -printed by the kernel at mount-time. For reliability purposes, it -is recommended that the superblock flag be used instead of the -mount-time flag, as this will avoid re-mount isses with the root file -system. For reliability and performance reasons, the use of ACLs on -UFS1 is discouraged; UFS2 extended attributes provide a more reliable -storage mechanism for ACLs. - -Currently, support for ACLs on UFS1 requires the use of UFS1 EAs, which may -be enabled by adding: - - options UFS_EXTATTR - -to your kernel configuration file and rebuilding. Because of filesystem -mount atomicity requirements, it is also recommended that: - - options UFS_EXTATTR_AUTOSTART - -be added to the kernel so as to support the atomic enabling of the -required extended attributes with the filesystem mount operation. To -enable ACLs, two extended attributes must be available in the -EXTATTR_NAMESPACE_SYSTEM namespace: "posix1e.acl_access", which holds -the access ACL, and "posix1e.acl_default" which holds the default ACL -for directories. If you're using UFS1 Extended Attributes, the following -commands may be used to create the necessary EA backing files for -ACLs in the filesystem root of each filesystem. In these examples, -the root filesystem is used; see README.extattr for more details. - - mkdir -p /.attribute/system - cd /.attribute/system - extattrctl initattr -p / 388 posix1e.acl_access - extattrctl initattr -p / 388 posix1e.acl_default - -On the next mount of the root filesystem, the attributes will be -automatically started, and ACLs will be enabled. diff --git a/src/sys/ufs/ufs/README.extattr b/src/sys/ufs/ufs/README.extattr deleted file mode 100644 index a6e07d0..0000000 --- a/src/sys/ufs/ufs/README.extattr +++ /dev/null @@ -1,91 +0,0 @@ -$FreeBSD: src/sys/ufs/ufs/README.extattr,v 1.5 2002/10/18 21:11:36 rwatson Exp $ - - UFS Extended Attributes Copyright - -The UFS Extended Attributes implementation is copyright Robert Watson, and -is made available under a Berkeley-style license. - - About UFS Extended Attributes - -Extended attributes allow the association of additional arbitrary -meta-data with files and directories. Extended attributes are defined in -the form name=value, where name is an nul-terminated string in the style -of a filename, and value is a binary blob of zero or more bytes. The UFS -extended attribute service layers support for extended attributes onto a -backing file, in the style of the quota implementation, meaning that it -requires no underlying format changes in the filesystem. This design -choice exchanges simplicity, usability and easy deployment for -performance. When defined, extended attribute names exist in a series of -disjoint namespaces: currently, two namespaces are defined: -EXTATTR_NAMESPACE_SYSTEM and EXTATTR_NAMESPACE_USER. The primary -distinction lies in the protection model: USER EAs are protected using the -normal inode protections, whereas SYSTEM EAs require privilege to access -or modify. - - Using UFS Extended Attributes - -Support for UFS extended attributes is natively available in UFS2, and -requires no special configuration. For reliability, administrative, -and performance reasons, if you plan to use extended attributes, it -is recommended that you use UFS2 in preference to UFS1. - -Support for UFS extended attributes may be enabled for UFS1 by adding: - - options UFS_EXTATTR - -to your kernel configuration file. This allows UFS-based filesystems to -support extended attributes, but requires manual administration of EAs -using the extattrctl tool, including the starting of EA support for each -filesystem, and the enabling of individual attributes for the file -system. The extattrctl utility may be used to initialize backing files -before first use, to start and stop EA service on a filesystem, and to -enable and disable named attributes. The command lines for extattrctl -take the following forms: - - extattrctl start [path] - extattrctl stop [path] - extattrctl initattr [-f] [-p path] [attrsize] [attrfile] - extattrctl enable [path] [attrnamespace] [attrname] [attrfile] - extattrctl disable [path] [attrnamespace] [attrname] - -In each case, [path] is used to indicate the mounted filesystem on which -to perform the operation. [attrnamespace] refers to the namespace in -which the attribute is being manipulated, and may be "system" or "user". -The [attrname] is the attribute name to use for the operation. The -[attrfile] argument specifies the attribute backing file to use. When -using the "initattr" function to initialize a backing file, the maximum -size of attribute data must be defined in bytes using the [attrsize] -field. Optionally, the [-p path] argument may be used to indicate to -extattrctl that it should pre-allocate space for EA data, rather than -creating a sparse backing file. This prevents attribute operations from -failing in low disk-space conditions (which can be important when EAs are -used for security purposes), but pre-allocation will consume space -proportional to the product of the defined maximum attribute size and -number of attributes on the specified filesystem. - -Manual configuration increases administrative overhead, but also -introduces the possibility of race conditions during filesystem mount, if -EAs are used to support other features, as starting the EAs manually is -not atomic with the mount operation. To address this problem, an -additional kernel option may be defined to auto-start EAs on a UFS file -system based on special directories at mount-time: - - options UFS_EXTATTR_AUTOSTART - -If this option is defined, UFS will search for a ".attribute" -sub-directory of the filesystem root during the mount operation. If it -is found, EA support will be started for the filesystem. UFS will then -search for "system" and "user" sub-directories of the ".attribute" -directory for any potential backing files, and enable an EA for each valid -backing file with the name of the backing file as the attribute name. -For example, by creating the following tree, the two EAs, -posix1e.acl_access and posix1e.acl_default will be enabled in the system -namespace of the root filesystem, reserving space for attribute data: - - mkdir -p /.attribute/system - cd /.attribute/system - extattrctl initattr -p / 388 posix1e.acl_access - extattrctl initattr -p / 388 posix1e.acl_default - -On the next mount of the root filesystem, the attributes will be -automatically started. diff --git a/src/sys/ufs/ufs/acl.h b/src/sys/ufs/ufs/acl.h deleted file mode 100644 index 1eabf29..0000000 --- a/src/sys/ufs/ufs/acl.h +++ /dev/null @@ -1,51 +0,0 @@ -#if 0 -/*- - * Copyright (c) 1999-2001 Robert N. M. Watson - * All rights reserved. - * - * This software was developed by Robert Watson for the TrustedBSD Project. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/ufs/ufs/acl.h,v 1.5 2003/08/04 03:29:13 rwatson Exp $ - */ -/* - * Developed by the TrustedBSD Project. - * Support for POSIX.1e access control lists. - */ - -#ifndef _UFS_UFS_ACL_H_ -#define _UFS_UFS_ACL_H_ - -#ifdef _KERNEL - -void ufs_sync_acl_from_inode(struct inode *ip, struct acl *acl); -void ufs_sync_inode_from_acl(struct acl *acl, struct inode *ip); - -int ufs_getacl(struct vop_getacl_args *); -int ufs_setacl(struct vop_setacl_args *); -int ufs_aclcheck(struct vop_aclcheck_args *); - -#endif /* !_KERNEL */ - -#endif /* !_UFS_UFS_ACL_H_ */ -#endif diff --git a/src/sys/ufs/ufs/dinode.h b/src/sys/ufs/ufs/dinode.h deleted file mode 100644 index af3fb6e..0000000 --- a/src/sys/ufs/ufs/dinode.h +++ /dev/null @@ -1,172 +0,0 @@ -#if 0 -/* - * Copyright (c) 2002 Networks Associates Technology, Inc. - * All rights reserved. - * - * This software was developed for the FreeBSD Project by Marshall - * Kirk McKusick and Network Associates Laboratories, the Security - * Research Division of Network Associates, Inc. under DARPA/SPAWAR - * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS - * research program - * - * Copyright (c) 1982, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The names of the authors may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)dinode.h 8.3 (Berkeley) 1/21/94 - * $FreeBSD: src/sys/ufs/ufs/dinode.h,v 1.11 2002/07/16 22:36:00 mckusick Exp $ - */ - -#ifndef _UFS_UFS_DINODE_H_ -#define _UFS_UFS_DINODE_H_ - -/* - * The root inode is the root of the filesystem. Inode 0 can't be used for - * normal purposes and historically bad blocks were linked to inode 1, thus - * the root inode is 2. (Inode 1 is no longer used for this purpose, however - * numerous dump tapes make this assumption, so we are stuck with it). - */ -#define ROOTINO ((ino_t)2) - -/* - * The Whiteout inode# is a dummy non-zero inode number which will - * never be allocated to a real file. It is used as a place holder - * in the directory entry which has been tagged as a DT_W entry. - * See the comments about ROOTINO above. - */ -#define WINO ((ino_t)1) - -/* - * The size of physical and logical block numbers and time fields in UFS. - */ -typedef int32_t ufs1_daddr_t; -typedef int64_t ufs2_daddr_t; -typedef int64_t ufs_lbn_t; -typedef int64_t ufs_time_t; - -/* File permissions. */ -#define IEXEC 0000100 /* Executable. */ -#define IWRITE 0000200 /* Writeable. */ -#define IREAD 0000400 /* Readable. */ -#define ISVTX 0001000 /* Sticky bit. */ -#define ISGID 0002000 /* Set-gid. */ -#define ISUID 0004000 /* Set-uid. */ - -/* File types. */ -#define IFMT 0170000 /* Mask of file type. */ -#define IFIFO 0010000 /* Named pipe (fifo). */ -#define IFCHR 0020000 /* Character device. */ -#define IFDIR 0040000 /* Directory file. */ -#define IFBLK 0060000 /* Block device. */ -#define IFREG 0100000 /* Regular file. */ -#define IFLNK 0120000 /* Symbolic link. */ -#define IFSOCK 0140000 /* UNIX domain socket. */ -#define IFWHT 0160000 /* Whiteout. */ - -/* - * A dinode contains all the meta-data associated with a UFS2 file. - * This structure defines the on-disk format of a dinode. Since - * this structure describes an on-disk structure, all its fields - * are defined by types with precise widths. - */ - -#define NXADDR 2 /* External addresses in inode. */ -#define NDADDR 12 /* Direct addresses in inode. */ -#define NIADDR 3 /* Indirect addresses in inode. */ - -struct ufs2_dinode { - u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ - int16_t di_nlink; /* 2: File link count. */ - u_int32_t di_uid; /* 4: File owner. */ - u_int32_t di_gid; /* 8: File group. */ - u_int32_t di_blksize; /* 12: Inode blocksize. */ - u_int64_t di_size; /* 16: File byte count. */ - u_int64_t di_blocks; /* 24: Bytes actually held. */ - ufs_time_t di_atime; /* 32: Last access time. */ - ufs_time_t di_mtime; /* 40: Last modified time. */ - ufs_time_t di_ctime; /* 48: Last inode change time. */ - ufs_time_t di_birthtime; /* 56: Inode creation time. */ - int32_t di_mtimensec; /* 64: Last modified time. */ - int32_t di_atimensec; /* 68: Last access time. */ - int32_t di_ctimensec; /* 72: Last inode change time. */ - int32_t di_birthnsec; /* 76: Inode creation time. */ - int32_t di_gen; /* 80: Generation number. */ - u_int32_t di_kernflags; /* 84: Kernel flags. */ - u_int32_t di_flags; /* 88: Status flags (chflags). */ - int32_t di_extsize; /* 92: External attributes block. */ - ufs2_daddr_t di_extb[NXADDR];/* 96: External attributes block. */ - ufs2_daddr_t di_db[NDADDR]; /* 112: Direct disk blocks. */ - ufs2_daddr_t di_ib[NIADDR]; /* 208: Indirect disk blocks. */ - int64_t di_spare[3]; /* 232: Reserved; currently unused */ -}; - -/* - * The di_db fields may be overlaid with other information for - * file types that do not have associated disk storage. Block - * and character devices overlay the first data block with their - * dev_t value. Short symbolic links place their path in the - * di_db area. - */ -#define di_rdev di_db[0] - -/* - * A UFS1 dinode contains all the meta-data associated with a UFS1 file. - * This structure defines the on-disk format of a UFS1 dinode. Since - * this structure describes an on-disk structure, all its fields - * are defined by types with precise widths. - */ -struct ufs1_dinode { - u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ - int16_t di_nlink; /* 2: File link count. */ - union { - u_int16_t oldids[2]; /* 4: Ffs: old user and group ids. */ - } di_u; - u_int64_t di_size; /* 8: File byte count. */ - int32_t di_atime; /* 16: Last access time. */ - int32_t di_atimensec; /* 20: Last access time. */ - int32_t di_mtime; /* 24: Last modified time. */ - int32_t di_mtimensec; /* 28: Last modified time. */ - int32_t di_ctime; /* 32: Last inode change time. */ - int32_t di_ctimensec; /* 36: Last inode change time. */ - ufs1_daddr_t di_db[NDADDR]; /* 40: Direct disk blocks. */ - ufs1_daddr_t di_ib[NIADDR]; /* 88: Indirect disk blocks. */ - u_int32_t di_flags; /* 100: Status flags (chflags). */ - int32_t di_blocks; /* 104: Blocks actually held. */ - int32_t di_gen; /* 108: Generation number. */ - u_int32_t di_uid; /* 112: File owner. */ - u_int32_t di_gid; /* 116: File group. */ - int32_t di_spare[2]; /* 120: Reserved; currently unused */ -}; -#define di_ogid di_u.oldids[1] -#define di_ouid di_u.oldids[0] - -#endif /* _UFS_UFS_DINODE_H_ */ -#endif diff --git a/src/sys/ufs/ufs/dir.h b/src/sys/ufs/ufs/dir.h deleted file mode 100644 index 289c18b..0000000 --- a/src/sys/ufs/ufs/dir.h +++ /dev/null @@ -1,161 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)dir.h 8.2 (Berkeley) 1/21/94 - * $FreeBSD: src/sys/ufs/ufs/dir.h,v 1.9 1999/08/28 00:52:27 peter Exp $ - */ - -#ifndef _UFS_UFS_DIR_H_ -#define _UFS_UFS_DIR_H_ - -/* - * Theoretically, directories can be more than 2Gb in length, however, in - * practice this seems unlikely. So, we define the type doff_t as a 32-bit - * quantity to keep down the cost of doing lookup on a 32-bit machine. - */ -#define doff_t int32_t -#define MAXDIRSIZE (0x7fffffff) - -/* - * A directory consists of some number of blocks of DIRBLKSIZ - * bytes, where DIRBLKSIZ is chosen such that it can be transferred - * to disk in a single atomic operation (e.g. 512 bytes on most machines). - * - * Each DIRBLKSIZ byte block contains some number of directory entry - * structures, which are of variable length. Each directory entry has - * a struct direct at the front of it, containing its inode number, - * the length of the entry, and the length of the name contained in - * the entry. These are followed by the name padded to a 4 byte boundary - * with null bytes. All names are guaranteed null terminated. - * The maximum length of a name in a directory is MAXNAMLEN. - * - * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent - * a directory entry. Free space in a directory is represented by - * entries which have dp->d_reclen > DIRSIZ(fmt, dp). All DIRBLKSIZ bytes - * in a directory block are claimed by the directory entries. This - * usually results in the last entry in a directory having a large - * dp->d_reclen. When entries are deleted from a directory, the - * space is returned to the previous entry in the same directory - * block by increasing its dp->d_reclen. If the first entry of - * a directory block is free, then its dp->d_ino is set to 0. - * Entries other than the first in a directory do not normally have - * dp->d_ino set to 0. - */ -#define DIRBLKSIZ DEV_BSIZE -#define MAXNAMLEN 255 - -struct direct { - u_int32_t d_ino; /* inode number of entry */ - u_int16_t d_reclen; /* length of this record */ - u_int8_t d_type; /* file type, see below */ - u_int8_t d_namlen; /* length of string in d_name */ - char d_name[MAXNAMLEN + 1];/* name with length <= MAXNAMLEN */ -}; - -/* - * File types - */ -#define DT_UNKNOWN 0 -#define DT_FIFO 1 -#define DT_CHR 2 -#define DT_DIR 4 -#define DT_BLK 6 -#define DT_REG 8 -#define DT_LNK 10 -#define DT_SOCK 12 -#define DT_WHT 14 - -/* - * Convert between stat structure types and directory types. - */ -#define IFTODT(mode) (((mode) & 0170000) >> 12) -#define DTTOIF(dirtype) ((dirtype) << 12) - -/* - * The DIRSIZ macro gives the minimum record length which will hold - * the directory entry. This requires the amount of space in struct direct - * without the d_name field, plus enough space for the name with a terminating - * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary. - * - * - */ -#define DIRECTSIZ(namlen) \ - (((int)&((struct direct *)0)->d_name + \ - ((namlen)+1)*sizeof(((struct direct *)0)->d_name[0]) + 3) & ~3) -#if (BYTE_ORDER == LITTLE_ENDIAN) -#define DIRSIZ(oldfmt, dp) \ - ((oldfmt) ? DIRECTSIZ((dp)->d_type) : DIRECTSIZ((dp)->d_namlen)) -#else -#define DIRSIZ(oldfmt, dp) \ - DIRECTSIZ((dp)->d_namlen) -#endif -#define OLDDIRFMT 1 -#define NEWDIRFMT 0 - -/* - * Template for manipulating directories. Should use struct direct's, - * but the name field is MAXNAMLEN - 1, and this just won't do. - */ -struct dirtemplate { - u_int32_t dot_ino; - int16_t dot_reclen; - u_int8_t dot_type; - u_int8_t dot_namlen; - char dot_name[4]; /* must be multiple of 4 */ - u_int32_t dotdot_ino; - int16_t dotdot_reclen; - u_int8_t dotdot_type; - u_int8_t dotdot_namlen; - char dotdot_name[4]; /* ditto */ -}; - -/* - * This is the old format of directories, sanz type element. - */ -struct odirtemplate { - u_int32_t dot_ino; - int16_t dot_reclen; - u_int16_t dot_namlen; - char dot_name[4]; /* must be multiple of 4 */ - u_int32_t dotdot_ino; - int16_t dotdot_reclen; - u_int16_t dotdot_namlen; - char dotdot_name[4]; /* ditto */ -}; -#endif /* !_DIR_H_ */ -#endif diff --git a/src/sys/ufs/ufs/dirhash.h b/src/sys/ufs/ufs/dirhash.h deleted file mode 100644 index 1073afc..0000000 --- a/src/sys/ufs/ufs/dirhash.h +++ /dev/null @@ -1,129 +0,0 @@ -#if 0 -/* - * Copyright (c) 2001 Ian Dowse. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/ufs/ufs/dirhash.h,v 1.4 2003/01/01 18:48:59 schweikh Exp $ - */ - -#ifndef _UFS_UFS_DIRHASH_H_ -#define _UFS_UFS_DIRHASH_H_ - -/* - * For fast operations on large directories, we maintain a hash - * that maps the file name to the offset of the directory entry within - * the directory file. - * - * The hashing uses a dumb spillover to the next free slot on - * collisions, so we must keep the utilisation low to avoid - * long linear searches. Deleted entries that are not the last - * in a chain must be marked DIRHASH_DEL. - * - * We also maintain information about free space in each block - * to speed up creations. - */ -#define DIRHASH_EMPTY (-1) /* entry unused */ -#define DIRHASH_DEL (-2) /* deleted entry; may be part of chain */ - -#define DIRALIGN 4 -#define DH_NFSTATS (DIRECTSIZ(MAXNAMLEN + 1) / DIRALIGN) - /* max DIRALIGN words in a directory entry */ - -/* - * Dirhash uses a score mechanism to achieve a hybrid between a - * least-recently-used and a least-often-used algorithm for entry - * recycling. The score is incremented when a directory is used, and - * decremented when the directory is a candidate for recycling. When - * the score reaches zero, the hash is recycled. Hashes are linked - * together on a TAILQ list, and hashes with higher scores filter - * towards the tail (most recently used) end of the list. - * - * New hash entries are given an inital score of DH_SCOREINIT and are - * placed at the most-recently-used end of the list. This helps a lot - * in the worst-case case scenario where every directory access is - * to a directory that is not hashed (i.e. the working set of hash - * candidates is much larger than the configured memry limit). In this - * case it limits the number of hash builds to 1/DH_SCOREINIT of the - * number of accesses. - */ -#define DH_SCOREINIT 8 /* initial dh_score when dirhash built */ -#define DH_SCOREMAX 64 /* max dh_score value */ - -/* - * The main hash table has 2 levels. It is an array of pointers to - * blocks of DH_NBLKOFF offsets. - */ -#define DH_BLKOFFSHIFT 8 -#define DH_NBLKOFF (1 << DH_BLKOFFSHIFT) -#define DH_BLKOFFMASK (DH_NBLKOFF - 1) - -#define DH_ENTRY(dh, slot) \ - ((dh)->dh_hash[(slot) >> DH_BLKOFFSHIFT][(slot) & DH_BLKOFFMASK]) - -struct dirhash { - struct mtx dh_mtx; /* protects all fields except dh_list */ - - doff_t **dh_hash; /* the hash array (2-level) */ - int dh_narrays; /* number of entries in dh_hash */ - int dh_hlen; /* total slots in the 2-level hash array */ - int dh_hused; /* entries in use */ - - /* Free space statistics. XXX assumes DIRBLKSIZ is 512. */ - u_int8_t *dh_blkfree; /* free DIRALIGN words in each dir block */ - int dh_nblk; /* size of dh_blkfree array */ - int dh_dirblks; /* number of DIRBLKSIZ blocks in dir */ - int dh_firstfree[DH_NFSTATS + 1]; /* first blk with N words free */ - - int dh_seqopt; /* sequential access optimisation enabled */ - doff_t dh_seqoff; /* sequential access optimisation offset */ - - int dh_score; /* access count for this dirhash */ - - int dh_onlist; /* true if on the ufsdirhash_list chain */ - - /* Protected by ufsdirhash_mtx. */ - TAILQ_ENTRY(dirhash) dh_list; /* chain of all dirhashes */ -}; - - -/* - * Dirhash functions. - */ -void ufsdirhash_init(void); -void ufsdirhash_uninit(void); -int ufsdirhash_build(struct inode *); -doff_t ufsdirhash_findfree(struct inode *, int, int *); -doff_t ufsdirhash_enduseful(struct inode *); -int ufsdirhash_lookup(struct inode *, char *, int, doff_t *, struct buf **, - doff_t *); -void ufsdirhash_newblk(struct inode *, doff_t); -void ufsdirhash_add(struct inode *, struct direct *, doff_t); -void ufsdirhash_remove(struct inode *, struct direct *, doff_t); -void ufsdirhash_move(struct inode *, struct direct *, doff_t, doff_t); -void ufsdirhash_dirtrunc(struct inode *, doff_t); -void ufsdirhash_free(struct inode *); - -void ufsdirhash_checkblock(struct inode *, char *, doff_t); - -#endif /* !_UFS_UFS_DIRHASH_H_ */ -#endif diff --git a/src/sys/ufs/ufs/extattr.h b/src/sys/ufs/ufs/extattr.h deleted file mode 100644 index 37321fe..0000000 --- a/src/sys/ufs/ufs/extattr.h +++ /dev/null @@ -1,113 +0,0 @@ -#if 0 -/*- - * Copyright (c) 1999-2001 Robert N. M. Watson - * All rights reserved. - * - * This software was developed by Robert Watson for the TrustedBSD Project. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/ufs/ufs/extattr.h,v 1.18 2003/07/28 18:53:28 rwatson Exp $ - */ -/* - * Developed by the TrustedBSD Project. - * Support for extended filesystem attributes. - */ - -#ifndef _UFS_UFS_EXTATTR_H_ -#define _UFS_UFS_EXTATTR_H_ - -#define UFS_EXTATTR_MAGIC 0x00b5d5ec -#define UFS_EXTATTR_VERSION 0x00000003 -#define UFS_EXTATTR_FSROOTSUBDIR ".attribute" -#define UFS_EXTATTR_SUBDIR_SYSTEM "system" -#define UFS_EXTATTR_SUBDIR_USER "user" -#define UFS_EXTATTR_MAXEXTATTRNAME 65 /* including null */ - -#define UFS_EXTATTR_ATTR_FLAG_INUSE 0x00000001 /* attr has been set */ -#define UFS_EXTATTR_PERM_KERNEL 0x00000000 -#define UFS_EXTATTR_PERM_ROOT 0x00000001 -#define UFS_EXTATTR_PERM_OWNER 0x00000002 -#define UFS_EXTATTR_PERM_ANYONE 0x00000003 - -#define UFS_EXTATTR_UEPM_INITIALIZED 0x00000001 -#define UFS_EXTATTR_UEPM_STARTED 0x00000002 - -#define UFS_EXTATTR_CMD_START 0x00000001 -#define UFS_EXTATTR_CMD_STOP 0x00000002 -#define UFS_EXTATTR_CMD_ENABLE 0x00000003 -#define UFS_EXTATTR_CMD_DISABLE 0x00000004 - -struct ufs_extattr_fileheader { - u_int uef_magic; /* magic number for sanity checking */ - u_int uef_version; /* version of attribute file */ - u_int uef_size; /* size of attributes, w/o header */ -}; - -struct ufs_extattr_header { - u_int ueh_flags; /* flags for attribute */ - u_int ueh_len; /* local defined length; <= uef_size */ - u_int32_t ueh_i_gen; /* generation number for sanity */ - /* data follows the header */ -}; - -#ifdef _KERNEL - -#ifdef MALLOC_DECLARE -MALLOC_DECLARE(M_EXTATTR); -#endif - -struct vnode; -LIST_HEAD(ufs_extattr_list_head, ufs_extattr_list_entry); -struct ufs_extattr_list_entry { - LIST_ENTRY(ufs_extattr_list_entry) uele_entries; - struct ufs_extattr_fileheader uele_fileheader; - int uele_attrnamespace; - char uele_attrname[UFS_EXTATTR_MAXEXTATTRNAME]; - struct vnode *uele_backing_vnode; -}; - -struct lock; -struct ucred; -struct ufs_extattr_per_mount { - struct lock uepm_lock; - struct ufs_extattr_list_head uepm_list; - struct ucred *uepm_ucred; - int uepm_flags; -}; - -void ufs_extattr_uepm_init(struct ufs_extattr_per_mount *uepm); -void ufs_extattr_uepm_destroy(struct ufs_extattr_per_mount *uepm); -int ufs_extattr_start(struct mount *mp, struct thread *td); -int ufs_extattr_autostart(struct mount *mp, struct thread *td); -int ufs_extattr_stop(struct mount *mp, struct thread *td); -int ufs_extattrctl(struct mount *mp, int cmd, struct vnode *filename, - int attrnamespace, const char *attrname, struct thread *td); -int ufs_getextattr(struct vop_getextattr_args *ap); -int ufs_deleteextattr(struct vop_deleteextattr_args *ap); -int ufs_setextattr(struct vop_setextattr_args *ap); -void ufs_extattr_vnode_inactive(struct vnode *vp, struct thread *td); - -#endif /* !_KERNEL */ - -#endif /* !_UFS_UFS_EXTATTR_H_ */ -#endif diff --git a/src/sys/ufs/ufs/inode.h b/src/sys/ufs/ufs/inode.h deleted file mode 100644 index df6df2c..0000000 --- a/src/sys/ufs/ufs/inode.h +++ /dev/null @@ -1,181 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)inode.h 8.9 (Berkeley) 5/14/95 - * $FreeBSD: src/sys/ufs/ufs/inode.h,v 1.44 2003/08/15 20:03:19 phk Exp $ - */ - -#ifndef _UFS_UFS_INODE_H_ -#define _UFS_UFS_INODE_H_ - -#include -#include -#include - -/* - * This must agree with the definition in . - */ -#define doff_t int32_t - -/* - * The inode is used to describe each active (or recently active) file in the - * UFS filesystem. It is composed of two types of information. The first part - * is the information that is needed only while the file is active (such as - * the identity of the file and linkage to speed its lookup). The second part - * is the permanent meta-data associated with the file which is read in - * from the permanent dinode from long term storage when the file becomes - * active, and is put back when the file is no longer being used. - */ -struct inode { - LIST_ENTRY(inode) i_hash;/* Hash chain. */ - TAILQ_ENTRY(inode) i_nextsnap; /* snapshot file list. */ - struct vnode *i_vnode;/* Vnode associated with this inode. */ - struct ufsmount *i_ump;/* Ufsmount point associated with this inode. */ - u_int32_t i_flag; /* flags, see below */ - struct cdev *i_dev; /* Device associated with the inode. */ - ino_t i_number; /* The identity of the inode. */ - int i_effnlink; /* i_nlink when I/O completes */ - - struct fs *i_fs; /* Associated filesystem superblock. */ - struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ - u_quad_t i_modrev; /* Revision level for NFS lease. */ - struct lockf *i_lockf;/* Head of byte-level lock list. */ - /* - * Side effects; used during directory lookup. - */ - int32_t i_count; /* Size of free slot in directory. */ - doff_t i_endoff; /* End of useful stuff in directory. */ - doff_t i_diroff; /* Offset in dir, where we found last entry. */ - doff_t i_offset; /* Offset of free space in directory. */ - ino_t i_ino; /* Inode number of found directory. */ - u_int32_t i_reclen; /* Size of found directory entry. */ - - union { - struct dirhash *dirhash; /* Hashing for large directories. */ - daddr_t *snapblklist; /* Collect expunged snapshot blocks. */ - } i_un; - - /* - * Data for extended attribute modification. - */ - u_char *i_ea_area; /* Pointer to malloced copy of EA area */ - unsigned i_ea_len; /* Length of i_ea_area */ - int i_ea_error; /* First errno in transaction */ - - /* - * Copies from the on-disk dinode itself. - */ - u_int16_t i_mode; /* IFMT, permissions; see below. */ - int16_t i_nlink; /* File link count. */ - u_int64_t i_size; /* File byte count. */ - u_int32_t i_flags; /* Status flags (chflags). */ - int64_t i_gen; /* Generation number. */ - u_int32_t i_uid; /* File owner. */ - u_int32_t i_gid; /* File group. */ - /* - * The real copy of the on-disk inode. - */ - union { - struct ufs1_dinode *din1; /* UFS1 on-disk dinode. */ - struct ufs2_dinode *din2; /* UFS2 on-disk dinode. */ - } dinode_u; -}; -/* - * These flags are kept in i_flag. - */ -#define IN_ACCESS 0x0001 /* Access time update request. */ -#define IN_CHANGE 0x0002 /* Inode change time update request. */ -#define IN_UPDATE 0x0004 /* Modification time update request. */ -#define IN_MODIFIED 0x0008 /* Inode has been modified. */ -#define IN_RENAME 0x0010 /* Inode is being renamed. */ -#define IN_HASHED 0x0020 /* Inode is on hash list */ -#define IN_LAZYMOD 0x0040 /* Modified, but don't write yet. */ -#define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */ - -#define i_devvp i_ump->um_devvp -#define i_dirhash i_un.dirhash -#define i_snapblklist i_un.snapblklist -#define i_din1 dinode_u.din1 -#define i_din2 dinode_u.din2 - -#ifdef _KERNEL -/* - * The DIP macro is used to access fields in the dinode that are - * not cached in the inode itself. - */ -#define DIP(ip, field) \ - (((ip)->i_ump->um_fstype == UFS1) ? \ - (ip)->i_din1->d##field : (ip)->i_din2->d##field) - -#define MAXSYMLINKLEN(ip) \ - ((ip)->i_ump->um_fstype == UFS1) ? \ - ((NDADDR + NIADDR) * sizeof(ufs1_daddr_t)) : \ - ((NDADDR + NIADDR) * sizeof(ufs2_daddr_t)) -#define SHORTLINK(ip) \ - (((ip)->i_ump->um_fstype == UFS1) ? \ - (caddr_t)(ip)->i_din1->di_db : (caddr_t)(ip)->i_din2->di_db) - -/* - * Structure used to pass around logical block paths generated by - * ufs_getlbns and used by truncate and bmap code. - */ -struct indir { - ufs2_daddr_t in_lbn; /* Logical block number. */ - int in_off; /* Offset in buffer. */ - int in_exists; /* Flag if the block exists. */ -}; - -/* Convert between inode pointers and vnode pointers. */ -#define VTOI(vp) ((struct inode *)(vp)->v_data) -#define ITOV(ip) ((ip)->i_vnode) - -/* Determine if soft dependencies are being done */ -#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) -#define DOINGASYNC(vp) ((vp)->v_mount->mnt_flag & MNT_ASYNC) - -/* This overlays the fid structure (see mount.h). */ -struct ufid { - u_int16_t ufid_len; /* Length of structure. */ - u_int16_t ufid_pad; /* Force 32-bit alignment. */ - ino_t ufid_ino; /* File number (ino). */ - int32_t ufid_gen; /* Generation number. */ -}; -#endif /* _KERNEL */ - -#endif /* !_UFS_UFS_INODE_H_ */ -#endif diff --git a/src/sys/ufs/ufs/quota.h b/src/sys/ufs/ufs/quota.h deleted file mode 100644 index b3de955..0000000 --- a/src/sys/ufs/ufs/quota.h +++ /dev/null @@ -1,206 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Robert Elz at The University of Melbourne. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)quota.h 8.3 (Berkeley) 8/19/94 - * $FreeBSD: src/sys/ufs/ufs/quota.h,v 1.25 2003/06/15 06:36:19 rwatson Exp $ - */ - -#ifndef _UFS_UFS_QUOTA_H_ -#define _UFS_UFS_QUOTA_H_ - -/* - * Definitions for disk quotas imposed on the average user - * (big brother finally hits UNIX). - * - * The following constants define the amount of time given a user before the - * soft limits are treated as hard limits (usually resulting in an allocation - * failure). The timer is started when the user crosses their soft limit, it - * is reset when they go below their soft limit. - */ -#define MAX_IQ_TIME (7*24*60*60) /* seconds in 1 week */ -#define MAX_DQ_TIME (7*24*60*60) /* seconds in 1 week */ - -/* - * The following constants define the usage of the quota file array in the - * ufsmount structure and dquot array in the inode structure. The semantics - * of the elements of these arrays are defined in the routine getinoquota; - * the remainder of the quota code treats them generically and need not be - * inspected when changing the size of the array. - */ -#define MAXQUOTAS 2 -#define USRQUOTA 0 /* element used for user quotas */ -#define GRPQUOTA 1 /* element used for group quotas */ - -/* - * Definitions for the default names of the quotas files. - */ -#define INITQFNAMES { \ - "user", /* USRQUOTA */ \ - "group", /* GRPQUOTA */ \ - "undefined", \ -} -#define QUOTAFILENAME "quota" -#define QUOTAGROUP "operator" - -/* - * Command definitions for the 'quotactl' system call. The commands are - * broken into a main command defined below and a subcommand that is used - * to convey the type of quota that is being manipulated (see above). - */ -#define SUBCMDMASK 0x00ff -#define SUBCMDSHIFT 8 -#define QCMD(cmd, type) (((cmd) << SUBCMDSHIFT) | ((type) & SUBCMDMASK)) - -#define Q_QUOTAON 0x0100 /* enable quotas */ -#define Q_QUOTAOFF 0x0200 /* disable quotas */ -#define Q_GETQUOTA 0x0300 /* get limits and usage */ -#define Q_SETQUOTA 0x0400 /* set limits and usage */ -#define Q_SETUSE 0x0500 /* set usage */ -#define Q_SYNC 0x0600 /* sync disk copy of a filesystems quotas */ - -/* - * The following structure defines the format of the disk quota file - * (as it appears on disk) - the file is an array of these structures - * indexed by user or group number. The setquota system call establishes - * the vnode for each quota file (a pointer is retained in the ufsmount - * structure). - */ -struct dqblk { - u_int32_t dqb_bhardlimit; /* absolute limit on disk blks alloc */ - u_int32_t dqb_bsoftlimit; /* preferred limit on disk blks */ - u_int32_t dqb_curblocks; /* current block count */ - u_int32_t dqb_ihardlimit; /* maximum # allocated inodes + 1 */ - u_int32_t dqb_isoftlimit; /* preferred inode limit */ - u_int32_t dqb_curinodes; /* current # allocated inodes */ - int32_t dqb_btime; /* time limit for excessive disk use */ - int32_t dqb_itime; /* time limit for excessive files */ -}; - -#ifdef _KERNEL - -#include - -/* - * The following structure records disk usage for a user or group on a - * filesystem. There is one allocated for each quota that exists on any - * filesystem for the current user or group. A cache is kept of recently - * used entries. - */ -struct dquot { - LIST_ENTRY(dquot) dq_hash; /* hash list */ - TAILQ_ENTRY(dquot) dq_freelist; /* free list */ - u_int16_t dq_flags; /* flags, see below */ - u_int16_t dq_type; /* quota type of this dquot */ - u_int32_t dq_cnt; /* count of active references */ - u_int32_t dq_id; /* identifier this applies to */ - struct ufsmount *dq_ump; /* filesystem that this is taken from */ - struct dqblk dq_dqb; /* actual usage & quotas */ -}; -/* - * Flag values. - */ -#define DQ_LOCK 0x01 /* this quota locked (no MODS) */ -#define DQ_WANT 0x02 /* wakeup on unlock */ -#define DQ_MOD 0x04 /* this quota modified since read */ -#define DQ_FAKE 0x08 /* no limits here, just usage */ -#define DQ_BLKS 0x10 /* has been warned about blk limit */ -#define DQ_INODS 0x20 /* has been warned about inode limit */ -/* - * Shorthand notation. - */ -#define dq_bhardlimit dq_dqb.dqb_bhardlimit -#define dq_bsoftlimit dq_dqb.dqb_bsoftlimit -#define dq_curblocks dq_dqb.dqb_curblocks -#define dq_ihardlimit dq_dqb.dqb_ihardlimit -#define dq_isoftlimit dq_dqb.dqb_isoftlimit -#define dq_curinodes dq_dqb.dqb_curinodes -#define dq_btime dq_dqb.dqb_btime -#define dq_itime dq_dqb.dqb_itime - -/* - * If the system has never checked for a quota for this file, then it is - * set to NODQUOT. Once a write attempt is made the inode pointer is set - * to reference a dquot structure. - */ -#define NODQUOT NULL - -/* - * Flags to chkdq() and chkiq() - */ -#define FORCE 0x01 /* force usage changes independent of limits */ -#define CHOWN 0x02 /* (advisory) change initiated by chown */ - -/* - * Macros to avoid subroutine calls to trivial functions. - */ -#ifdef DIAGNOSTIC -#define DQREF(dq) dqref(dq) -#else -#define DQREF(dq) (dq)->dq_cnt++ -#endif - -struct inode; -struct mount; -struct thread; -struct ucred; -struct vnode; - -int chkdq(struct inode *, int64_t, struct ucred *, int); -int chkiq(struct inode *, ino_t, struct ucred *, int); -void dqinit(void); -void dqrele(struct vnode *, struct dquot *); -void dquninit(void); -int getinoquota(struct inode *); -int getquota(struct thread *, struct mount *, u_long, int, caddr_t); -int qsync(struct mount *mp); -int quotaoff(struct thread *td, struct mount *, int); -int quotaon(struct thread *td, struct mount *, int, caddr_t); -int setquota(struct thread *, struct mount *, u_long, int, caddr_t); -int setuse(struct thread *, struct mount *, u_long, int, caddr_t); -vfs_quotactl_t ufs_quotactl; - -#else /* !_KERNEL */ - -#include - -__BEGIN_DECLS -int quotactl(const char *, int, int, void *); -__END_DECLS - -#endif /* _KERNEL */ - -#endif /* !_UFS_UFS_QUOTA_H_ */ -#endif diff --git a/src/sys/ufs/ufs/ufs_acl.c b/src/sys/ufs/ufs/ufs_acl.c deleted file mode 100644 index 5645987..0000000 --- a/src/sys/ufs/ufs/ufs_acl.c +++ /dev/null @@ -1,443 +0,0 @@ -#if 0 -/*- - * Copyright (c) 1999-2001, 2003 Robert N. M. Watson - * All rights reserved. - * - * This software was developed by Robert Watson for the TrustedBSD Project. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * Support for POSIX.1e access control lists: UFS-specific support functions. - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ufs/ufs_acl.c,v 1.18 2003/08/04 03:29:13 rwatson Exp $"); - -#include "opt_ufs.h" -#include "opt_quota.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#ifdef UFS_ACL - -/* - * Synchronize an ACL and an inode by copying over appropriate inode fields - * to the passed ACL. Assumes an ACL that would satisfy acl_posix1e_check(), - * and may panic if not. - */ -void -ufs_sync_acl_from_inode(struct inode *ip, struct acl *acl) -{ - struct acl_entry *acl_mask, *acl_group_obj; - int i; - - /* - * Update ACL_USER_OBJ, ACL_OTHER, but simply identify ACL_MASK - * and ACL_GROUP_OBJ for use after we know whether ACL_MASK is - * present. - */ - acl_mask = NULL; - acl_group_obj = NULL; - for (i = 0; i < acl->acl_cnt; i++) { - switch (acl->acl_entry[i].ae_tag) { - case ACL_USER_OBJ: - acl->acl_entry[i].ae_perm = acl_posix1e_mode_to_perm( - ACL_USER_OBJ, ip->i_mode); - acl->acl_entry[i].ae_id = ACL_UNDEFINED_ID; - break; - - case ACL_GROUP_OBJ: - acl_group_obj = &acl->acl_entry[i]; - acl->acl_entry[i].ae_id = ACL_UNDEFINED_ID; - break; - - case ACL_OTHER: - acl->acl_entry[i].ae_perm = acl_posix1e_mode_to_perm( - ACL_OTHER, ip->i_mode); - acl->acl_entry[i].ae_id = ACL_UNDEFINED_ID; - break; - - case ACL_MASK: - acl_mask = &acl->acl_entry[i]; - acl->acl_entry[i].ae_id = ACL_UNDEFINED_ID; - break; - - case ACL_USER: - case ACL_GROUP: - break; - - default: - panic("ufs_sync_acl_from_inode(): bad ae_tag"); - } - } - - if (acl_group_obj == NULL) - panic("ufs_sync_acl_from_inode(): no ACL_GROUP_OBJ"); - - if (acl_mask == NULL) { - /* - * There is no ACL_MASK, so update ACL_GROUP_OBJ. - */ - acl_group_obj->ae_perm = acl_posix1e_mode_to_perm( - ACL_GROUP_OBJ, ip->i_mode); - } else { - /* - * Update the ACL_MASK entry instead of ACL_GROUP_OBJ. - */ - acl_mask->ae_perm = acl_posix1e_mode_to_perm(ACL_GROUP_OBJ, - ip->i_mode); - } -} - -/* - * Calculate what the inode mode should look like based on an authoritative - * ACL for the inode. Replace only the fields in the inode that the ACL - * can represent. - */ -void -ufs_sync_inode_from_acl(struct acl *acl, struct inode *ip) -{ - - ip->i_mode &= ACL_PRESERVE_MASK; - ip->i_mode |= acl_posix1e_acl_to_mode(acl); - DIP(ip, i_mode) = ip->i_mode; -} - -/* - * Retrieve the ACL on a file. - * - * As part of the ACL is stored in the inode, and the rest in an EA, - * assemble both into a final ACL product. Right now this is not done - * very efficiently. - */ -int -ufs_getacl(ap) - struct vop_getacl_args /* { - struct vnode *vp; - struct acl_type_t type; - struct acl *aclp; - struct ucred *cred; - struct thread *td; - } */ *ap; -{ - struct inode *ip = VTOI(ap->a_vp); - int error, len; - - /* - * XXX: If ufs_getacl() should work on file systems not supporting - * ACLs, remove this check. - */ - if ((ap->a_vp->v_mount->mnt_flag & MNT_ACLS) == 0) - return (EOPNOTSUPP); - - /* - * Attempt to retrieve the ACL based on the ACL type. - */ - bzero(ap->a_aclp, sizeof(*ap->a_aclp)); - len = sizeof(*ap->a_aclp); - switch(ap->a_type) { - case ACL_TYPE_ACCESS: - /* - * ACL_TYPE_ACCESS ACLs may or may not be stored in the - * EA, as they are in fact a combination of the inode - * ownership/permissions and the EA contents. If the - * EA is present, merge the two in a temporary ACL - * storage, otherwise just return the inode contents. - */ - error = vn_extattr_get(ap->a_vp, IO_NODELOCKED, - POSIX1E_ACL_ACCESS_EXTATTR_NAMESPACE, - POSIX1E_ACL_ACCESS_EXTATTR_NAME, &len, (char *) ap->a_aclp, - ap->a_td); - switch (error) { - /* XXX: If ufs_getacl() should work on filesystems without - * the EA configured, add case EOPNOTSUPP here. */ - case ENOATTR: - /* - * Legitimately no ACL set on object, purely - * emulate it through the inode. These fields will - * be updated when the ACL is synchronized with - * the inode later. - */ - ap->a_aclp->acl_cnt = 3; - ap->a_aclp->acl_entry[0].ae_tag = ACL_USER_OBJ; - ap->a_aclp->acl_entry[0].ae_id = ACL_UNDEFINED_ID; - ap->a_aclp->acl_entry[0].ae_perm = ACL_PERM_NONE; - ap->a_aclp->acl_entry[1].ae_tag = ACL_GROUP_OBJ; - ap->a_aclp->acl_entry[1].ae_id = ACL_UNDEFINED_ID; - ap->a_aclp->acl_entry[1].ae_perm = ACL_PERM_NONE; - ap->a_aclp->acl_entry[2].ae_tag = ACL_OTHER; - ap->a_aclp->acl_entry[2].ae_id = ACL_UNDEFINED_ID; - ap->a_aclp->acl_entry[2].ae_perm = ACL_PERM_NONE; - ufs_sync_acl_from_inode(ip, ap->a_aclp); - error = 0; - break; - - case 0: - if (len != sizeof(*ap->a_aclp)) { - /* - * A short (or long) read, meaning that for - * some reason the ACL is corrupted. Return - * EPERM since the object DAC protections - * are unsafe. - */ - printf("ufs_getacl(): Loaded invalid ACL (" - "%d bytes)\n", len); - return (EPERM); - } - ufs_sync_acl_from_inode(ip, ap->a_aclp); - break; - - default: - break; - } - break; - - case ACL_TYPE_DEFAULT: - if (ap->a_vp->v_type != VDIR) { - error = EINVAL; - break; - } - error = vn_extattr_get(ap->a_vp, IO_NODELOCKED, - POSIX1E_ACL_DEFAULT_EXTATTR_NAMESPACE, - POSIX1E_ACL_DEFAULT_EXTATTR_NAME, &len, - (char *) ap->a_aclp, ap->a_td); - /* - * Unlike ACL_TYPE_ACCESS, there is no relationship between - * the inode contents and the ACL, and it is therefore - * possible for the request for the ACL to fail since the - * ACL is undefined. In this situation, return success - * and an empty ACL, as required by POSIX.1e. - */ - switch (error) { - /* XXX: If ufs_getacl() should work on filesystems without - * the EA configured, add case EOPNOTSUPP here. */ - case ENOATTR: - bzero(ap->a_aclp, sizeof(*ap->a_aclp)); - ap->a_aclp->acl_cnt = 0; - error = 0; - break; - - case 0: - if (len != sizeof(*ap->a_aclp)) { - /* - * A short (or long) read, meaning that for - * some reason the ACL is corrupted. Return - * EPERM since the object default DAC - * protections are unsafe. - */ - printf("ufs_getacl(): Loaded invalid ACL (" - "%d bytes)\n", len); - return (EPERM); - } - break; - - default: - break; - } - break; - - default: - error = EINVAL; - } - - return (error); -} - -/* - * Set the ACL on a file. - * - * As part of the ACL is stored in the inode, and the rest in an EA, - * this is necessarily non-atomic, and has complex authorization. - * As ufs_setacl() includes elements of ufs_chown() and ufs_chmod(), - * a fair number of different access checks may be required to go ahead - * with the operation at all. - */ -int -ufs_setacl(ap) - struct vop_setacl_args /* { - struct vnode *vp; - acl_type_t type; - struct acl *aclp; - struct ucred *cred; - struct proc *p; - } */ *ap; -{ - struct inode *ip = VTOI(ap->a_vp); - int error; - - if ((ap->a_vp->v_mount->mnt_flag & MNT_ACLS) == 0) - return (EOPNOTSUPP); - - /* - * If this is a set operation rather than a delete operation, - * invoke VOP_ACLCHECK() on the passed ACL to determine if it is - * valid for the target. This will include a check on ap->a_type. - */ - if (ap->a_aclp != NULL) { - /* - * Set operation. - */ - error = VOP_ACLCHECK(ap->a_vp, ap->a_type, ap->a_aclp, - ap->a_cred, ap->a_td); - if (error != 0) - return (error); - } else { - /* - * Delete operation. - * POSIX.1e allows only deletion of the default ACL on a - * directory (ACL_TYPE_DEFAULT). - */ - if (ap->a_type != ACL_TYPE_DEFAULT) - return (EINVAL); - if (ap->a_vp->v_type != VDIR) - return (ENOTDIR); - } - - if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - - /* - * Authorize the ACL operation. - */ - if (ip->i_flags & (IMMUTABLE | APPEND)) - return (EPERM); - - /* - * Must hold VADMIN (be file owner) or have appropriate privilege. - */ - if ((error = VOP_ACCESS(ap->a_vp, VADMIN, ap->a_cred, ap->a_td))) - return (error); - - switch(ap->a_type) { - case ACL_TYPE_ACCESS: - error = vn_extattr_set(ap->a_vp, IO_NODELOCKED, - POSIX1E_ACL_ACCESS_EXTATTR_NAMESPACE, - POSIX1E_ACL_ACCESS_EXTATTR_NAME, sizeof(*ap->a_aclp), - (char *) ap->a_aclp, ap->a_td); - break; - - case ACL_TYPE_DEFAULT: - if (ap->a_aclp == NULL) { - error = vn_extattr_rm(ap->a_vp, IO_NODELOCKED, - POSIX1E_ACL_DEFAULT_EXTATTR_NAMESPACE, - POSIX1E_ACL_DEFAULT_EXTATTR_NAME, ap->a_td); - /* - * Attempting to delete a non-present default ACL - * will return success for portability purposes. - * (TRIX) - * - * XXX: Note that since we can't distinguish - * "that EA is not supported" from "that EA is not - * defined", the success case here overlaps the - * the ENOATTR->EOPNOTSUPP case below. - */ - if (error == ENOATTR) - error = 0; - } else - error = vn_extattr_set(ap->a_vp, IO_NODELOCKED, - POSIX1E_ACL_DEFAULT_EXTATTR_NAMESPACE, - POSIX1E_ACL_DEFAULT_EXTATTR_NAME, - sizeof(*ap->a_aclp), (char *) ap->a_aclp, ap->a_td); - break; - - default: - error = EINVAL; - } - /* - * Map lack of attribute definition in UFS_EXTATTR into lack of - * support for ACLs on the filesystem. - */ - if (error == ENOATTR) - return (EOPNOTSUPP); - if (error != 0) - return (error); - - if (ap->a_type == ACL_TYPE_ACCESS) { - /* - * Now that the EA is successfully updated, update the - * inode and mark it as changed. - */ - ufs_sync_inode_from_acl(ap->a_aclp, ip); - ip->i_flag |= IN_CHANGE; - } - - VN_KNOTE(ap->a_vp, NOTE_ATTRIB); - return (0); -} - -/* - * Check the validity of an ACL for a file. - */ -int -ufs_aclcheck(ap) - struct vop_aclcheck_args /* { - struct vnode *vp; - acl_type_t type; - struct acl *aclp; - struct ucred *cred; - struct thread *td; - } */ *ap; -{ - - if ((ap->a_vp->v_mount->mnt_flag & MNT_ACLS) == 0) - return (EOPNOTSUPP); - - /* - * Verify we understand this type of ACL, and that it applies - * to this kind of object. - * Rely on the acl_posix1e_check() routine to verify the contents. - */ - switch(ap->a_type) { - case ACL_TYPE_ACCESS: - break; - - case ACL_TYPE_DEFAULT: - if (ap->a_vp->v_type != VDIR) - return (EINVAL); - break; - - default: - return (EINVAL); - } - return (acl_posix1e_check(ap->a_aclp)); -} - -#endif /* !UFS_ACL */ -#endif diff --git a/src/sys/ufs/ufs/ufs_bmap.c b/src/sys/ufs/ufs/ufs_bmap.c deleted file mode 100644 index d38f9fd..0000000 --- a/src/sys/ufs/ufs/ufs_bmap.c +++ /dev/null @@ -1,387 +0,0 @@ -#if 0 -/* - * Copyright (c) 1989, 1991, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ufs/ufs_bmap.c,v 1.59 2003/10/18 14:10:27 phk Exp $"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -/* - * Bmap converts a the logical block number of a file to its physical block - * number on the disk. The conversion is done by using the logical block - * number to index into the array of block pointers described by the dinode. - */ -int -ufs_bmap(ap) - struct vop_bmap_args /* { - struct vnode *a_vp; - daddr_t a_bn; - struct vnode **a_vpp; - daddr_t *a_bnp; - int *a_runp; - int *a_runb; - } */ *ap; -{ - ufs2_daddr_t blkno; - int error; - - /* - * Check for underlying vnode requests and ensure that logical - * to physical mapping is requested. - */ - if (ap->a_vpp != NULL) - *ap->a_vpp = VTOI(ap->a_vp)->i_devvp; - if (ap->a_bnp == NULL) - return (0); - - error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno, NULL, - ap->a_runp, ap->a_runb); - *ap->a_bnp = blkno; - return (error); -} - -/* - * Indirect blocks are now on the vnode for the file. They are given negative - * logical block numbers. Indirect blocks are addressed by the negative - * address of the first data block to which they point. Double indirect blocks - * are addressed by one less than the address of the first indirect block to - * which they point. Triple indirect blocks are addressed by one less than - * the address of the first double indirect block to which they point. - * - * ufs_bmaparray does the bmap conversion, and if requested returns the - * array of logical blocks which must be traversed to get to a block. - * Each entry contains the offset into that block that gets you to the - * next block and the disk address of the block (if it is assigned). - */ - -int -ufs_bmaparray(vp, bn, bnp, nbp, runp, runb) - struct vnode *vp; - ufs2_daddr_t bn; - ufs2_daddr_t *bnp; - struct buf *nbp; - int *runp; - int *runb; -{ - struct inode *ip; - struct buf *bp; - struct ufsmount *ump; - struct mount *mp; - struct vnode *devvp; - struct indir a[NIADDR+1], *ap; - ufs2_daddr_t daddr; - ufs_lbn_t metalbn; - int error, num, maxrun = 0; - int *nump; - - ap = NULL; - ip = VTOI(vp); - mp = vp->v_mount; - ump = VFSTOUFS(mp); - devvp = ump->um_devvp; - - if (runp) { - maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; - *runp = 0; - } - - if (runb) { - *runb = 0; - } - - - ap = a; - nump = # - error = ufs_getlbns(vp, bn, ap, nump); - if (error) - return (error); - - num = *nump; - if (num == 0) { - if (bn >= 0 && bn < NDADDR) { - *bnp = blkptrtodb(ump, DIP(ip, i_db[bn])); - } else if (bn < 0 && bn >= -NXADDR) { - *bnp = blkptrtodb(ump, ip->i_din2->di_extb[-1 - bn]); - if (*bnp == 0) - *bnp = -1; - if (nbp == NULL) - panic("ufs_bmaparray: mapping ext data"); - nbp->b_xflags |= BX_ALTDATA; - return (0); - } else { - panic("ufs_bmaparray: blkno out of range"); - } - /* - * Since this is FFS independent code, we are out of - * scope for the definitions of BLK_NOCOPY and - * BLK_SNAP, but we do know that they will fall in - * the range 1..um_seqinc, so we use that test and - * return a request for a zeroed out buffer if attempts - * are made to read a BLK_NOCOPY or BLK_SNAP block. - */ - if ((ip->i_flags & SF_SNAPSHOT) && DIP(ip, i_db[bn]) > 0 && - DIP(ip, i_db[bn]) < ump->um_seqinc) { - *bnp = -1; - } else if (*bnp == 0) { - if (ip->i_flags & SF_SNAPSHOT) - *bnp = blkptrtodb(ump, bn * ump->um_seqinc); - else - *bnp = -1; - } else if (runp) { - ufs2_daddr_t bnb = bn; - for (++bn; bn < NDADDR && *runp < maxrun && - is_sequential(ump, DIP(ip, i_db[bn - 1]), - DIP(ip, i_db[bn])); - ++bn, ++*runp); - bn = bnb; - if (runb && (bn > 0)) { - for (--bn; (bn >= 0) && (*runb < maxrun) && - is_sequential(ump, DIP(ip, i_db[bn]), - DIP(ip, i_db[bn+1])); - --bn, ++*runb); - } - } - return (0); - } - - - /* Get disk address out of indirect block array */ - daddr = DIP(ip, i_ib[ap->in_off]); - - for (bp = NULL, ++ap; --num; ++ap) { - /* - * Exit the loop if there is no disk address assigned yet and - * the indirect block isn't in the cache, or if we were - * looking for an indirect block and we've found it. - */ - - metalbn = ap->in_lbn; - if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn) - break; - /* - * If we get here, we've either got the block in the cache - * or we have a disk address for it, go fetch it. - */ - if (bp) - bqrelse(bp); - - ap->in_exists = 1; - bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0, 0); - if ((bp->b_flags & B_CACHE) == 0) { -#ifdef DIAGNOSTIC - if (!daddr) - panic("ufs_bmaparray: indirect block not in cache"); -#endif - bp->b_blkno = blkptrtodb(ump, daddr); - bp->b_iocmd = BIO_READ; - bp->b_flags &= ~B_INVAL; - bp->b_ioflags &= ~BIO_ERROR; - vfs_busy_pages(bp, 0); - bp->b_iooffset = dbtob(bp->b_blkno); - VOP_STRATEGY(bp->b_vp, bp); - curproc->p_stats->p_ru.ru_inblock++; /* XXX */ - error = bufwait(bp); - if (error) { - brelse(bp); - return (error); - } - } - - if (ip->i_ump->um_fstype == UFS1) { - daddr = ((ufs1_daddr_t *)bp->b_data)[ap->in_off]; - if (num == 1 && daddr && runp) { - for (bn = ap->in_off + 1; - bn < MNINDIR(ump) && *runp < maxrun && - is_sequential(ump, - ((ufs1_daddr_t *)bp->b_data)[bn - 1], - ((ufs1_daddr_t *)bp->b_data)[bn]); - ++bn, ++*runp); - bn = ap->in_off; - if (runb && bn) { - for (--bn; bn >= 0 && *runb < maxrun && - is_sequential(ump, - ((ufs1_daddr_t *)bp->b_data)[bn], - ((ufs1_daddr_t *)bp->b_data)[bn+1]); - --bn, ++*runb); - } - } - continue; - } - daddr = ((ufs2_daddr_t *)bp->b_data)[ap->in_off]; - if (num == 1 && daddr && runp) { - for (bn = ap->in_off + 1; - bn < MNINDIR(ump) && *runp < maxrun && - is_sequential(ump, - ((ufs2_daddr_t *)bp->b_data)[bn - 1], - ((ufs2_daddr_t *)bp->b_data)[bn]); - ++bn, ++*runp); - bn = ap->in_off; - if (runb && bn) { - for (--bn; bn >= 0 && *runb < maxrun && - is_sequential(ump, - ((ufs2_daddr_t *)bp->b_data)[bn], - ((ufs2_daddr_t *)bp->b_data)[bn + 1]); - --bn, ++*runb); - } - } - } - if (bp) - bqrelse(bp); - - /* - * Since this is FFS independent code, we are out of scope for the - * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they - * will fall in the range 1..um_seqinc, so we use that test and - * return a request for a zeroed out buffer if attempts are made - * to read a BLK_NOCOPY or BLK_SNAP block. - */ - if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){ - *bnp = -1; - return (0); - } - *bnp = blkptrtodb(ump, daddr); - if (*bnp == 0) { - if (ip->i_flags & SF_SNAPSHOT) - *bnp = blkptrtodb(ump, bn * ump->um_seqinc); - else - *bnp = -1; - } - return (0); -} - -/* - * Create an array of logical block number/offset pairs which represent the - * path of indirect blocks required to access a data block. The first "pair" - * contains the logical block number of the appropriate single, double or - * triple indirect block and the offset into the inode indirect block array. - * Note, the logical block number of the inode single/double/triple indirect - * block appears twice in the array, once with the offset into the i_ib and - * once with the offset into the page itself. - */ -int -ufs_getlbns(vp, bn, ap, nump) - struct vnode *vp; - ufs2_daddr_t bn; - struct indir *ap; - int *nump; -{ - ufs2_daddr_t blockcnt; - ufs_lbn_t metalbn, realbn; - struct ufsmount *ump; - int i, numlevels, off; - - ump = VFSTOUFS(vp->v_mount); - if (nump) - *nump = 0; - numlevels = 0; - realbn = bn; - if (bn < 0) - bn = -bn; - - /* The first NDADDR blocks are direct blocks. */ - if (bn < NDADDR) - return (0); - - /* - * Determine the number of levels of indirection. After this loop - * is done, blockcnt indicates the number of data blocks possible - * at the previous level of indirection, and NIADDR - i is the number - * of levels of indirection needed to locate the requested block. - */ - for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { - if (i == 0) - return (EFBIG); - blockcnt *= MNINDIR(ump); - if (bn < blockcnt) - break; - } - - /* Calculate the address of the first meta-block. */ - if (realbn >= 0) - metalbn = -(realbn - bn + NIADDR - i); - else - metalbn = -(-realbn - bn + NIADDR - i); - - /* - * At each iteration, off is the offset into the bap array which is - * an array of disk addresses at the current level of indirection. - * The logical block number and the offset in that block are stored - * into the argument array. - */ - ap->in_lbn = metalbn; - ap->in_off = off = NIADDR - i; - ap->in_exists = 0; - ap++; - for (++numlevels; i <= NIADDR; i++) { - /* If searching for a meta-data block, quit when found. */ - if (metalbn == realbn) - break; - - blockcnt /= MNINDIR(ump); - off = (bn / blockcnt) % MNINDIR(ump); - - ++numlevels; - ap->in_lbn = metalbn; - ap->in_off = off; - ap->in_exists = 0; - ++ap; - - metalbn -= -1 + off * blockcnt; - } - if (nump) - *nump = numlevels; - return (0); -} -#endif diff --git a/src/sys/ufs/ufs/ufs_dirhash.c b/src/sys/ufs/ufs/ufs_dirhash.c deleted file mode 100644 index 6080a7d..0000000 --- a/src/sys/ufs/ufs/ufs_dirhash.c +++ /dev/null @@ -1,1086 +0,0 @@ -#if 0 -/* - * Copyright (c) 2001, 2002 Ian Dowse. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * This implements a hash-based lookup scheme for UFS directories. - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.17 2003/06/11 06:34:30 obrien Exp $"); - -#include "opt_ufs.h" - -#ifdef UFS_DIRHASH - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1)) -#define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1)) -#define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) -#define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n)) - -static MALLOC_DEFINE(M_DIRHASH, "UFS dirhash", "UFS directory hash tables"); - -SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); - -static int ufs_mindirhashsize = DIRBLKSIZ * 5; -SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW, - &ufs_mindirhashsize, - 0, "minimum directory size in bytes for which to use hashed lookup"); -static int ufs_dirhashmaxmem = 2 * 1024 * 1024; -SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem, - 0, "maximum allowed dirhash memory usage"); -static int ufs_dirhashmem; -SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem, - 0, "current dirhash memory usage"); -static int ufs_dirhashcheck = 0; -SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck, - 0, "enable extra sanity tests"); - - -static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen); -static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff); -static void ufsdirhash_delslot(struct dirhash *dh, int slot); -static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, - doff_t offset); -static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset); -static int ufsdirhash_recycle(int wanted); - -static uma_zone_t ufsdirhash_zone; - -/* Dirhash list; recently-used entries are near the tail. */ -static TAILQ_HEAD(, dirhash) ufsdirhash_list; - -/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */ -static struct mtx ufsdirhash_mtx; - -/* - * Locking order: - * ufsdirhash_mtx - * dh_mtx - * - * The dh_mtx mutex should be acquired either via the inode lock, or via - * ufsdirhash_mtx. Only the owner of the inode may free the associated - * dirhash, but anything can steal its memory and set dh_hash to NULL. - */ - -/* - * Attempt to build up a hash table for the directory contents in - * inode 'ip'. Returns 0 on success, or -1 of the operation failed. - */ -int -ufsdirhash_build(struct inode *ip) -{ - struct dirhash *dh; - struct buf *bp = NULL; - struct direct *ep; - struct vnode *vp; - doff_t bmask, pos; - int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; - - /* Check if we can/should use dirhash. */ - if (ip->i_dirhash == NULL) { - if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode)) - return (-1); - } else { - /* Hash exists, but sysctls could have changed. */ - if (ip->i_size < ufs_mindirhashsize || - ufs_dirhashmem > ufs_dirhashmaxmem) { - ufsdirhash_free(ip); - return (-1); - } - /* Check if hash exists and is intact (note: unlocked read). */ - if (ip->i_dirhash->dh_hash != NULL) - return (0); - /* Free the old, recycled hash and build a new one. */ - ufsdirhash_free(ip); - } - - /* Don't hash removed directories. */ - if (ip->i_effnlink == 0) - return (-1); - - vp = ip->i_vnode; - /* Allocate 50% more entries than this dir size could ever need. */ - KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size")); - nslots = ip->i_size / DIRECTSIZ(1); - nslots = (nslots * 3 + 1) / 2; - narrays = howmany(nslots, DH_NBLKOFF); - nslots = narrays * DH_NBLKOFF; - dirblocks = howmany(ip->i_size, DIRBLKSIZ); - nblocks = (dirblocks * 3 + 1) / 2; - - memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + - narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + - nblocks * sizeof(*dh->dh_blkfree); - mtx_lock(&ufsdirhash_mtx); - if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) { - mtx_unlock(&ufsdirhash_mtx); - if (memreqd > ufs_dirhashmaxmem / 2) - return (-1); - - /* Try to free some space. */ - if (ufsdirhash_recycle(memreqd) != 0) - return (-1); - /* Enough was freed, and ufsdirhash_mtx has been locked. */ - } - ufs_dirhashmem += memreqd; - mtx_unlock(&ufsdirhash_mtx); - - /* - * Use non-blocking mallocs so that we will revert to a linear - * lookup on failure rather than potentially blocking forever. - */ - MALLOC(dh, struct dirhash *, sizeof *dh, M_DIRHASH, M_NOWAIT | M_ZERO); - if (dh == NULL) { - mtx_lock(&ufsdirhash_mtx); - ufs_dirhashmem -= memreqd; - mtx_unlock(&ufsdirhash_mtx); - return (-1); - } - MALLOC(dh->dh_hash, doff_t **, narrays * sizeof(dh->dh_hash[0]), - M_DIRHASH, M_NOWAIT | M_ZERO); - MALLOC(dh->dh_blkfree, u_int8_t *, nblocks * sizeof(dh->dh_blkfree[0]), - M_DIRHASH, M_NOWAIT); - if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) - goto fail; - for (i = 0; i < narrays; i++) { - if ((dh->dh_hash[i] = uma_zalloc(ufsdirhash_zone, - M_WAITOK)) == NULL) - goto fail; - for (j = 0; j < DH_NBLKOFF; j++) - dh->dh_hash[i][j] = DIRHASH_EMPTY; - } - - /* Initialise the hash table and block statistics. */ - mtx_init(&dh->dh_mtx, "dirhash", NULL, MTX_DEF); - dh->dh_narrays = narrays; - dh->dh_hlen = nslots; - dh->dh_nblk = nblocks; - dh->dh_dirblks = dirblocks; - for (i = 0; i < dirblocks; i++) - dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN; - for (i = 0; i < DH_NFSTATS; i++) - dh->dh_firstfree[i] = -1; - dh->dh_firstfree[DH_NFSTATS] = 0; - dh->dh_seqopt = 0; - dh->dh_seqoff = 0; - dh->dh_score = DH_SCOREINIT; - ip->i_dirhash = dh; - - bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; - pos = 0; - while (pos < ip->i_size) { - /* If necessary, get the next directory block. */ - if ((pos & bmask) == 0) { - if (bp != NULL) - brelse(bp); - if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0) - goto fail; - } - - /* Add this entry to the hash. */ - ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); - if (ep->d_reclen == 0 || ep->d_reclen > - DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) { - /* Corrupted directory. */ - brelse(bp); - goto fail; - } - if (ep->d_ino != 0) { - /* Add the entry (simplified ufsdirhash_add). */ - slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); - while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) - slot = WRAPINCR(slot, dh->dh_hlen); - dh->dh_hused++; - DH_ENTRY(dh, slot) = pos; - ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep)); - } - pos += ep->d_reclen; - } - - if (bp != NULL) - brelse(bp); - mtx_lock(&ufsdirhash_mtx); - TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); - dh->dh_onlist = 1; - mtx_unlock(&ufsdirhash_mtx); - return (0); - -fail: - if (dh->dh_hash != NULL) { - for (i = 0; i < narrays; i++) - if (dh->dh_hash[i] != NULL) - uma_zfree(ufsdirhash_zone, dh->dh_hash[i]); - FREE(dh->dh_hash, M_DIRHASH); - } - if (dh->dh_blkfree != NULL) - FREE(dh->dh_blkfree, M_DIRHASH); - FREE(dh, M_DIRHASH); - ip->i_dirhash = NULL; - mtx_lock(&ufsdirhash_mtx); - ufs_dirhashmem -= memreqd; - mtx_unlock(&ufsdirhash_mtx); - return (-1); -} - -/* - * Free any hash table associated with inode 'ip'. - */ -void -ufsdirhash_free(struct inode *ip) -{ - struct dirhash *dh; - int i, mem; - - if ((dh = ip->i_dirhash) == NULL) - return; - mtx_lock(&ufsdirhash_mtx); - mtx_lock(&dh->dh_mtx); - if (dh->dh_onlist) - TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); - mtx_unlock(&dh->dh_mtx); - mtx_unlock(&ufsdirhash_mtx); - - /* The dirhash pointed to by 'dh' is exclusively ours now. */ - - mem = sizeof(*dh); - if (dh->dh_hash != NULL) { - for (i = 0; i < dh->dh_narrays; i++) - uma_zfree(ufsdirhash_zone, dh->dh_hash[i]); - FREE(dh->dh_hash, M_DIRHASH); - FREE(dh->dh_blkfree, M_DIRHASH); - mem += dh->dh_narrays * sizeof(*dh->dh_hash) + - dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + - dh->dh_nblk * sizeof(*dh->dh_blkfree); - } - mtx_destroy(&dh->dh_mtx); - FREE(dh, M_DIRHASH); - ip->i_dirhash = NULL; - - mtx_lock(&ufsdirhash_mtx); - ufs_dirhashmem -= mem; - mtx_unlock(&ufsdirhash_mtx); -} - -/* - * Find the offset of the specified name within the given inode. - * Returns 0 on success, ENOENT if the entry does not exist, or - * EJUSTRETURN if the caller should revert to a linear search. - * - * If successful, the directory offset is stored in *offp, and a - * pointer to a struct buf containing the entry is stored in *bpp. If - * prevoffp is non-NULL, the offset of the previous entry within - * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry - * is the first in a block, the start of the block is used). - */ -int -ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp, - struct buf **bpp, doff_t *prevoffp) -{ - struct dirhash *dh, *dh_next; - struct direct *dp; - struct vnode *vp; - struct buf *bp; - doff_t blkoff, bmask, offset, prevoff; - int i, slot; - - if ((dh = ip->i_dirhash) == NULL) - return (EJUSTRETURN); - /* - * Move this dirhash towards the end of the list if it has a - * score higher than the next entry, and acquire the dh_mtx. - * Optimise the case where it's already the last by performing - * an unlocked read of the TAILQ_NEXT pointer. - * - * In both cases, end up holding just dh_mtx. - */ - if (TAILQ_NEXT(dh, dh_list) != NULL) { - mtx_lock(&ufsdirhash_mtx); - mtx_lock(&dh->dh_mtx); - /* - * If the new score will be greater than that of the next - * entry, then move this entry past it. With both mutexes - * held, dh_next won't go away, but its dh_score could - * change; that's not important since it is just a hint. - */ - if (dh->dh_hash != NULL && - (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && - dh->dh_score >= dh_next->dh_score) { - KASSERT(dh->dh_onlist, ("dirhash: not on list")); - TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); - TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, - dh_list); - } - mtx_unlock(&ufsdirhash_mtx); - } else { - /* Already the last, though that could change as we wait. */ - mtx_lock(&dh->dh_mtx); - } - if (dh->dh_hash == NULL) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return (EJUSTRETURN); - } - - /* Update the score. */ - if (dh->dh_score < DH_SCOREMAX) - dh->dh_score++; - - vp = ip->i_vnode; - bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; - blkoff = -1; - bp = NULL; -restart: - slot = ufsdirhash_hash(dh, name, namelen); - - if (dh->dh_seqopt) { - /* - * Sequential access optimisation. dh_seqoff contains the - * offset of the directory entry immediately following - * the last entry that was looked up. Check if this offset - * appears in the hash chain for the name we are looking for. - */ - for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; - i = WRAPINCR(i, dh->dh_hlen)) - if (offset == dh->dh_seqoff) - break; - if (offset == dh->dh_seqoff) { - /* - * We found an entry with the expected offset. This - * is probably the entry we want, but if not, the - * code below will turn off seqoff and retry. - */ - slot = i; - } else - dh->dh_seqopt = 0; - } - - for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; - slot = WRAPINCR(slot, dh->dh_hlen)) { - if (offset == DIRHASH_DEL) - continue; - mtx_unlock(&dh->dh_mtx); - - if (offset < 0 || offset >= ip->i_size) - panic("ufsdirhash_lookup: bad offset in hash array"); - if ((offset & ~bmask) != blkoff) { - if (bp != NULL) - brelse(bp); - blkoff = offset & ~bmask; - if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) - return (EJUSTRETURN); - } - dp = (struct direct *)(bp->b_data + (offset & bmask)); - if (dp->d_reclen == 0 || dp->d_reclen > - DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) { - /* Corrupted directory. */ - brelse(bp); - return (EJUSTRETURN); - } - if (dp->d_namlen == namelen && - bcmp(dp->d_name, name, namelen) == 0) { - /* Found. Get the prev offset if needed. */ - if (prevoffp != NULL) { - if (offset & (DIRBLKSIZ - 1)) { - prevoff = ufsdirhash_getprev(dp, - offset); - if (prevoff == -1) { - brelse(bp); - return (EJUSTRETURN); - } - } else - prevoff = offset; - *prevoffp = prevoff; - } - - /* Check for sequential access, and update offset. */ - if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset) - dh->dh_seqopt = 1; - dh->dh_seqoff = offset + DIRSIZ(0, dp); - - *bpp = bp; - *offp = offset; - return (0); - } - - mtx_lock(&dh->dh_mtx); - if (dh->dh_hash == NULL) { - mtx_unlock(&dh->dh_mtx); - if (bp != NULL) - brelse(bp); - ufsdirhash_free(ip); - return (EJUSTRETURN); - } - /* - * When the name doesn't match in the seqopt case, go back - * and search normally. - */ - if (dh->dh_seqopt) { - dh->dh_seqopt = 0; - goto restart; - } - } - mtx_unlock(&dh->dh_mtx); - if (bp != NULL) - brelse(bp); - return (ENOENT); -} - -/* - * Find a directory block with room for 'slotneeded' bytes. Returns - * the offset of the directory entry that begins the free space. - * This will either be the offset of an existing entry that has free - * space at the end, or the offset of an entry with d_ino == 0 at - * the start of a DIRBLKSIZ block. - * - * To use the space, the caller may need to compact existing entries in - * the directory. The total number of bytes in all of the entries involved - * in the compaction is stored in *slotsize. In other words, all of - * the entries that must be compacted are exactly contained in the - * region beginning at the returned offset and spanning *slotsize bytes. - * - * Returns -1 if no space was found, indicating that the directory - * must be extended. - */ -doff_t -ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize) -{ - struct direct *dp; - struct dirhash *dh; - struct buf *bp; - doff_t pos, slotstart; - int dirblock, error, freebytes, i; - - if ((dh = ip->i_dirhash) == NULL) - return (-1); - mtx_lock(&dh->dh_mtx); - if (dh->dh_hash == NULL) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return (-1); - } - - /* Find a directory block with the desired free space. */ - dirblock = -1; - for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++) - if ((dirblock = dh->dh_firstfree[i]) != -1) - break; - if (dirblock == -1) { - mtx_unlock(&dh->dh_mtx); - return (-1); - } - - KASSERT(dirblock < dh->dh_nblk && - dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN), - ("ufsdirhash_findfree: bad stats")); - mtx_unlock(&dh->dh_mtx); - pos = dirblock * DIRBLKSIZ; - error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp); - if (error) - return (-1); - - /* Find the first entry with free space. */ - for (i = 0; i < DIRBLKSIZ; ) { - if (dp->d_reclen == 0) { - brelse(bp); - return (-1); - } - if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp)) - break; - i += dp->d_reclen; - dp = (struct direct *)((char *)dp + dp->d_reclen); - } - if (i > DIRBLKSIZ) { - brelse(bp); - return (-1); - } - slotstart = pos + i; - - /* Find the range of entries needed to get enough space */ - freebytes = 0; - while (i < DIRBLKSIZ && freebytes < slotneeded) { - freebytes += dp->d_reclen; - if (dp->d_ino != 0) - freebytes -= DIRSIZ(0, dp); - if (dp->d_reclen == 0) { - brelse(bp); - return (-1); - } - i += dp->d_reclen; - dp = (struct direct *)((char *)dp + dp->d_reclen); - } - if (i > DIRBLKSIZ) { - brelse(bp); - return (-1); - } - if (freebytes < slotneeded) - panic("ufsdirhash_findfree: free mismatch"); - brelse(bp); - *slotsize = pos + i - slotstart; - return (slotstart); -} - -/* - * Return the start of the unused space at the end of a directory, or - * -1 if there are no trailing unused blocks. - */ -doff_t -ufsdirhash_enduseful(struct inode *ip) -{ - - struct dirhash *dh; - int i; - - if ((dh = ip->i_dirhash) == NULL) - return (-1); - mtx_lock(&dh->dh_mtx); - if (dh->dh_hash == NULL) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return (-1); - } - - if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) { - mtx_unlock(&dh->dh_mtx); - return (-1); - } - - for (i = dh->dh_dirblks - 1; i >= 0; i--) - if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) - break; - mtx_unlock(&dh->dh_mtx); - return ((doff_t)(i + 1) * DIRBLKSIZ); -} - -/* - * Insert information into the hash about a new directory entry. dirp - * points to a struct direct containing the entry, and offset specifies - * the offset of this entry. - */ -void -ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset) -{ - struct dirhash *dh; - int slot; - - if ((dh = ip->i_dirhash) == NULL) - return; - mtx_lock(&dh->dh_mtx); - if (dh->dh_hash == NULL) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return; - } - - KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, - ("ufsdirhash_add: bad offset")); - /* - * Normal hash usage is < 66%. If the usage gets too high then - * remove the hash entirely and let it be rebuilt later. - */ - if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return; - } - - /* Find a free hash slot (empty or deleted), and add the entry. */ - slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen); - while (DH_ENTRY(dh, slot) >= 0) - slot = WRAPINCR(slot, dh->dh_hlen); - if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY) - dh->dh_hused++; - DH_ENTRY(dh, slot) = offset; - - /* Update the per-block summary info. */ - ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp)); - mtx_unlock(&dh->dh_mtx); -} - -/* - * Remove the specified directory entry from the hash. The entry to remove - * is defined by the name in `dirp', which must exist at the specified - * `offset' within the directory. - */ -void -ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset) -{ - struct dirhash *dh; - int slot; - - if ((dh = ip->i_dirhash) == NULL) - return; - mtx_lock(&dh->dh_mtx); - if (dh->dh_hash == NULL) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return; - } - - KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, - ("ufsdirhash_remove: bad offset")); - /* Find the entry */ - slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset); - - /* Remove the hash entry. */ - ufsdirhash_delslot(dh, slot); - - /* Update the per-block summary info. */ - ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp)); - mtx_unlock(&dh->dh_mtx); -} - -/* - * Change the offset associated with a directory entry in the hash. Used - * when compacting directory blocks. - */ -void -ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff, - doff_t newoff) -{ - struct dirhash *dh; - int slot; - - if ((dh = ip->i_dirhash) == NULL) - return; - mtx_lock(&dh->dh_mtx); - if (dh->dh_hash == NULL) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return; - } - - KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ && - newoff < dh->dh_dirblks * DIRBLKSIZ, - ("ufsdirhash_move: bad offset")); - /* Find the entry, and update the offset. */ - slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff); - DH_ENTRY(dh, slot) = newoff; - mtx_unlock(&dh->dh_mtx); -} - -/* - * Inform dirhash that the directory has grown by one block that - * begins at offset (i.e. the new length is offset + DIRBLKSIZ). - */ -void -ufsdirhash_newblk(struct inode *ip, doff_t offset) -{ - struct dirhash *dh; - int block; - - if ((dh = ip->i_dirhash) == NULL) - return; - mtx_lock(&dh->dh_mtx); - if (dh->dh_hash == NULL) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return; - } - - KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ, - ("ufsdirhash_newblk: bad offset")); - block = offset / DIRBLKSIZ; - if (block >= dh->dh_nblk) { - /* Out of space; must rebuild. */ - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return; - } - dh->dh_dirblks = block + 1; - - /* Account for the new free block. */ - dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN; - if (dh->dh_firstfree[DH_NFSTATS] == -1) - dh->dh_firstfree[DH_NFSTATS] = block; - mtx_unlock(&dh->dh_mtx); -} - -/* - * Inform dirhash that the directory is being truncated. - */ -void -ufsdirhash_dirtrunc(struct inode *ip, doff_t offset) -{ - struct dirhash *dh; - int block, i; - - if ((dh = ip->i_dirhash) == NULL) - return; - mtx_lock(&dh->dh_mtx); - if (dh->dh_hash == NULL) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return; - } - - KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ, - ("ufsdirhash_dirtrunc: bad offset")); - block = howmany(offset, DIRBLKSIZ); - /* - * If the directory shrinks to less than 1/8 of dh_nblk blocks - * (about 20% of its original size due to the 50% extra added in - * ufsdirhash_build) then free it, and let the caller rebuild - * if necessary. - */ - if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return; - } - - /* - * Remove any `first free' information pertaining to the - * truncated blocks. All blocks we're removing should be - * completely unused. - */ - if (dh->dh_firstfree[DH_NFSTATS] >= block) - dh->dh_firstfree[DH_NFSTATS] = -1; - for (i = block; i < dh->dh_dirblks; i++) - if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) - panic("ufsdirhash_dirtrunc: blocks in use"); - for (i = 0; i < DH_NFSTATS; i++) - if (dh->dh_firstfree[i] >= block) - panic("ufsdirhash_dirtrunc: first free corrupt"); - dh->dh_dirblks = block; - mtx_unlock(&dh->dh_mtx); -} - -/* - * Debugging function to check that the dirhash information about - * a directory block matches its actual contents. Panics if a mismatch - * is detected. - * - * On entry, `buf' should point to the start of an in-core - * DIRBLKSIZ-sized directory block, and `offset' should contain the - * offset from the start of the directory of that block. - */ -void -ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset) -{ - struct dirhash *dh; - struct direct *dp; - int block, ffslot, i, nfree; - - if (!ufs_dirhashcheck) - return; - if ((dh = ip->i_dirhash) == NULL) - return; - mtx_lock(&dh->dh_mtx); - if (dh->dh_hash == NULL) { - mtx_unlock(&dh->dh_mtx); - ufsdirhash_free(ip); - return; - } - - block = offset / DIRBLKSIZ; - if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks) - panic("ufsdirhash_checkblock: bad offset"); - - nfree = 0; - for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) { - dp = (struct direct *)(buf + i); - if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ) - panic("ufsdirhash_checkblock: bad dir"); - - if (dp->d_ino == 0) { -#if 0 - /* - * XXX entries with d_ino == 0 should only occur - * at the start of a DIRBLKSIZ block. However the - * ufs code is tolerant of such entries at other - * offsets, and fsck does not fix them. - */ - if (i != 0) - panic("ufsdirhash_checkblock: bad dir inode"); -#endif - nfree += dp->d_reclen; - continue; - } - - /* Check that the entry exists (will panic if it doesn't). */ - ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i); - - nfree += dp->d_reclen - DIRSIZ(0, dp); - } - if (i != DIRBLKSIZ) - panic("ufsdirhash_checkblock: bad dir end"); - - if (dh->dh_blkfree[block] * DIRALIGN != nfree) - panic("ufsdirhash_checkblock: bad free count"); - - ffslot = BLKFREE2IDX(nfree / DIRALIGN); - for (i = 0; i <= DH_NFSTATS; i++) - if (dh->dh_firstfree[i] == block && i != ffslot) - panic("ufsdirhash_checkblock: bad first-free"); - if (dh->dh_firstfree[ffslot] == -1) - panic("ufsdirhash_checkblock: missing first-free entry"); - mtx_unlock(&dh->dh_mtx); -} - -/* - * Hash the specified filename into a dirhash slot. - */ -static int -ufsdirhash_hash(struct dirhash *dh, char *name, int namelen) -{ - u_int32_t hash; - - /* - * We hash the name and then some other bit of data that is - * invariant over the dirhash's lifetime. Otherwise names - * differing only in the last byte are placed close to one - * another in the table, which is bad for linear probing. - */ - hash = fnv_32_buf(name, namelen, FNV1_32_INIT); - hash = fnv_32_buf(dh, sizeof(dh), hash); - return (hash % dh->dh_hlen); -} - -/* - * Adjust the number of free bytes in the block containing `offset' - * by the value specified by `diff'. - * - * The caller must ensure we have exclusive access to `dh'; normally - * that means that dh_mtx should be held, but this is also called - * from ufsdirhash_build() where exclusive access can be assumed. - */ -static void -ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff) -{ - int block, i, nfidx, ofidx; - - /* Update the per-block summary info. */ - block = offset / DIRBLKSIZ; - KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks, - ("dirhash bad offset")); - ofidx = BLKFREE2IDX(dh->dh_blkfree[block]); - dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN); - nfidx = BLKFREE2IDX(dh->dh_blkfree[block]); - - /* Update the `first free' list if necessary. */ - if (ofidx != nfidx) { - /* If removing, scan forward for the next block. */ - if (dh->dh_firstfree[ofidx] == block) { - for (i = block + 1; i < dh->dh_dirblks; i++) - if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx) - break; - dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1; - } - - /* Make this the new `first free' if necessary */ - if (dh->dh_firstfree[nfidx] > block || - dh->dh_firstfree[nfidx] == -1) - dh->dh_firstfree[nfidx] = block; - } -} - -/* - * Find the specified name which should have the specified offset. - * Returns a slot number, and panics on failure. - * - * `dh' must be locked on entry and remains so on return. - */ -static int -ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset) -{ - int slot; - - mtx_assert(&dh->dh_mtx, MA_OWNED); - - /* Find the entry. */ - KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full")); - slot = ufsdirhash_hash(dh, name, namelen); - while (DH_ENTRY(dh, slot) != offset && - DH_ENTRY(dh, slot) != DIRHASH_EMPTY) - slot = WRAPINCR(slot, dh->dh_hlen); - if (DH_ENTRY(dh, slot) != offset) - panic("ufsdirhash_findslot: '%.*s' not found", namelen, name); - - return (slot); -} - -/* - * Remove the entry corresponding to the specified slot from the hash array. - * - * `dh' must be locked on entry and remains so on return. - */ -static void -ufsdirhash_delslot(struct dirhash *dh, int slot) -{ - int i; - - mtx_assert(&dh->dh_mtx, MA_OWNED); - - /* Mark the entry as deleted. */ - DH_ENTRY(dh, slot) = DIRHASH_DEL; - - /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */ - for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) - i = WRAPINCR(i, dh->dh_hlen); - if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) { - i = WRAPDECR(i, dh->dh_hlen); - while (DH_ENTRY(dh, i) == DIRHASH_DEL) { - DH_ENTRY(dh, i) = DIRHASH_EMPTY; - dh->dh_hused--; - i = WRAPDECR(i, dh->dh_hlen); - } - KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen")); - } -} - -/* - * Given a directory entry and its offset, find the offset of the - * previous entry in the same DIRBLKSIZ-sized block. Returns an - * offset, or -1 if there is no previous entry in the block or some - * other problem occurred. - */ -static doff_t -ufsdirhash_getprev(struct direct *dirp, doff_t offset) -{ - struct direct *dp; - char *blkbuf; - doff_t blkoff, prevoff; - int entrypos, i; - - blkoff = offset & ~(DIRBLKSIZ - 1); /* offset of start of block */ - entrypos = offset & (DIRBLKSIZ - 1); /* entry relative to block */ - blkbuf = (char *)dirp - entrypos; - prevoff = blkoff; - - /* If `offset' is the start of a block, there is no previous entry. */ - if (entrypos == 0) - return (-1); - - /* Scan from the start of the block until we get to the entry. */ - for (i = 0; i < entrypos; i += dp->d_reclen) { - dp = (struct direct *)(blkbuf + i); - if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos) - return (-1); /* Corrupted directory. */ - prevoff = blkoff + i; - } - return (prevoff); -} - -/* - * Try to free up `wanted' bytes by stealing memory from existing - * dirhashes. Returns zero with ufsdirhash_mtx locked if successful. - */ -static int -ufsdirhash_recycle(int wanted) -{ - struct dirhash *dh; - doff_t **hash; - u_int8_t *blkfree; - int i, mem, narrays; - - mtx_lock(&ufsdirhash_mtx); - while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) { - /* Find a dirhash, and lock it. */ - if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) { - mtx_unlock(&ufsdirhash_mtx); - return (-1); - } - mtx_lock(&dh->dh_mtx); - KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list")); - - /* Decrement the score; only recycle if it becomes zero. */ - if (--dh->dh_score > 0) { - mtx_unlock(&dh->dh_mtx); - mtx_unlock(&ufsdirhash_mtx); - return (-1); - } - - /* Remove it from the list and detach its memory. */ - TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); - dh->dh_onlist = 0; - hash = dh->dh_hash; - dh->dh_hash = NULL; - blkfree = dh->dh_blkfree; - dh->dh_blkfree = NULL; - narrays = dh->dh_narrays; - mem = narrays * sizeof(*dh->dh_hash) + - narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + - dh->dh_nblk * sizeof(*dh->dh_blkfree); - - /* Unlock everything, free the detached memory. */ - mtx_unlock(&dh->dh_mtx); - mtx_unlock(&ufsdirhash_mtx); - for (i = 0; i < narrays; i++) - uma_zfree(ufsdirhash_zone, hash[i]); - FREE(hash, M_DIRHASH); - FREE(blkfree, M_DIRHASH); - - /* Account for the returned memory, and repeat if necessary. */ - mtx_lock(&ufsdirhash_mtx); - ufs_dirhashmem -= mem; - } - /* Success; return with ufsdirhash_mtx locked. */ - return (0); -} - - -void -ufsdirhash_init() -{ - ufsdirhash_zone = uma_zcreate("DIRHASH", DH_NBLKOFF * sizeof(doff_t), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF); - TAILQ_INIT(&ufsdirhash_list); -} - -void -ufsdirhash_uninit() -{ - KASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit")); - uma_zdestroy(ufsdirhash_zone); - mtx_destroy(&ufsdirhash_mtx); -} - -#endif /* UFS_DIRHASH */ -#endif diff --git a/src/sys/ufs/ufs/ufs_extattr.c b/src/sys/ufs/ufs/ufs_extattr.c deleted file mode 100644 index ebccd86..0000000 --- a/src/sys/ufs/ufs/ufs_extattr.c +++ /dev/null @@ -1,1303 +0,0 @@ -#if 0 -/*- - * Copyright (c) 1999, 2000, 2001, 2002 Robert N. M. Watson - * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. - * All rights reserved. - * - * This software was developed by Robert Watson for the TrustedBSD Project. - * - * This software was developed for the FreeBSD Project in part by Network - * Associates Laboratories, the Security Research Division of Network - * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), - * as part of the DARPA CHATS research program. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -/* - * Support for filesystem extended attribute: UFS-specific support functions. - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ufs/ufs_extattr.c,v 1.67 2003/07/28 18:53:28 rwatson Exp $"); - -#include "opt_ufs.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -#ifdef UFS_EXTATTR - -static MALLOC_DEFINE(M_UFS_EXTATTR, "ufs_extattr", "ufs extended attribute"); - -static int ufs_extattr_sync = 0; -SYSCTL_INT(_debug, OID_AUTO, ufs_extattr_sync, CTLFLAG_RW, &ufs_extattr_sync, - 0, ""); - -static int ufs_extattr_valid_attrname(int attrnamespace, - const char *attrname); -static int ufs_extattr_enable_with_open(struct ufsmount *ump, - struct vnode *vp, int attrnamespace, const char *attrname, - struct thread *td); -static int ufs_extattr_enable(struct ufsmount *ump, int attrnamespace, - const char *attrname, struct vnode *backing_vnode, - struct thread *td); -static int ufs_extattr_disable(struct ufsmount *ump, int attrnamespace, - const char *attrname, struct thread *td); -static int ufs_extattr_get(struct vnode *vp, int attrnamespace, - const char *name, struct uio *uio, size_t *size, - struct ucred *cred, struct thread *td); -static int ufs_extattr_set(struct vnode *vp, int attrnamespace, - const char *name, struct uio *uio, struct ucred *cred, - struct thread *td); -static int ufs_extattr_rm(struct vnode *vp, int attrnamespace, - const char *name, struct ucred *cred, struct thread *td); - -/* - * Per-FS attribute lock protecting attribute operations. - * XXX Right now there is a lot of lock contention due to having a single - * lock per-FS; really, this should be far more fine-grained. - */ -static void -ufs_extattr_uepm_lock(struct ufsmount *ump, struct thread *td) -{ - - /* Ideally, LK_CANRECURSE would not be used, here. */ - lockmgr(&ump->um_extattr.uepm_lock, LK_EXCLUSIVE | LK_RETRY | - LK_CANRECURSE, 0, td); -} - -static void -ufs_extattr_uepm_unlock(struct ufsmount *ump, struct thread *td) -{ - - lockmgr(&ump->um_extattr.uepm_lock, LK_RELEASE, 0, td); -} - -/* - * Determine whether the name passed is a valid name for an actual - * attribute. - * - * Invalid currently consists of: - * NULL pointer for attrname - * zero-length attrname (used to retrieve application attribute list) - */ -static int -ufs_extattr_valid_attrname(int attrnamespace, const char *attrname) -{ - - if (attrname == NULL) - return (0); - if (strlen(attrname) == 0) - return (0); - return (1); -} - -/* - * Locate an attribute given a name and mountpoint. - * Must be holding uepm lock for the mount point. - */ -static struct ufs_extattr_list_entry * -ufs_extattr_find_attr(struct ufsmount *ump, int attrnamespace, - const char *attrname) -{ - struct ufs_extattr_list_entry *search_attribute; - - for (search_attribute = LIST_FIRST(&ump->um_extattr.uepm_list); - search_attribute; - search_attribute = LIST_NEXT(search_attribute, uele_entries)) { - if (!(strncmp(attrname, search_attribute->uele_attrname, - UFS_EXTATTR_MAXEXTATTRNAME)) && - (attrnamespace == search_attribute->uele_attrnamespace)) { - return (search_attribute); - } - } - - return (0); -} - -/* - * Initialize per-FS structures supporting extended attributes. Do not - * start extended attributes yet. - */ -void -ufs_extattr_uepm_init(struct ufs_extattr_per_mount *uepm) -{ - - uepm->uepm_flags = 0; - - LIST_INIT(&uepm->uepm_list); - /* XXX is PVFS right, here? */ - lockinit(&uepm->uepm_lock, PVFS, "extattr", 0, 0); - uepm->uepm_flags |= UFS_EXTATTR_UEPM_INITIALIZED; -} - -/* - * Destroy per-FS structures supporting extended attributes. Assumes - * that EAs have already been stopped, and will panic if not. - */ -void -ufs_extattr_uepm_destroy(struct ufs_extattr_per_mount *uepm) -{ - - if (!(uepm->uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED)) - panic("ufs_extattr_uepm_destroy: not initialized"); - - if ((uepm->uepm_flags & UFS_EXTATTR_UEPM_STARTED)) - panic("ufs_extattr_uepm_destroy: called while still started"); - - /* - * It's not clear that either order for the next two lines is - * ideal, and it should never be a problem if this is only called - * during unmount, and with vfs_busy(). - */ - uepm->uepm_flags &= ~UFS_EXTATTR_UEPM_INITIALIZED; - lockdestroy(&uepm->uepm_lock); -} - -/* - * Start extended attribute support on an FS. - */ -int -ufs_extattr_start(struct mount *mp, struct thread *td) -{ - struct ufsmount *ump; - int error = 0; - - ump = VFSTOUFS(mp); - - ufs_extattr_uepm_lock(ump, td); - - if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED)) { - error = EOPNOTSUPP; - goto unlock; - } - if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED) { - error = EBUSY; - goto unlock; - } - - ump->um_extattr.uepm_flags |= UFS_EXTATTR_UEPM_STARTED; - - ump->um_extattr.uepm_ucred = crhold(td->td_ucred); - -unlock: - ufs_extattr_uepm_unlock(ump, td); - - return (error); -} - -#ifdef UFS_EXTATTR_AUTOSTART -/* - * Helper routine: given a locked parent directory and filename, return - * the locked vnode of the inode associated with the name. Will not - * follow symlinks, may return any type of vnode. Lock on parent will - * be released even in the event of a failure. In the event that the - * target is the parent (i.e., "."), there will be two references and - * one lock, requiring the caller to possibly special-case. - */ -#define UE_GETDIR_LOCKPARENT 1 -#define UE_GETDIR_LOCKPARENT_DONT 2 -static int -ufs_extattr_lookup(struct vnode *start_dvp, int lockparent, char *dirname, - struct vnode **vp, struct thread *td) -{ - struct vop_cachedlookup_args vargs; - struct componentname cnp; - struct vnode *target_vp; - int error; - - bzero(&cnp, sizeof(cnp)); - cnp.cn_nameiop = LOOKUP; - cnp.cn_flags = ISLASTCN; - if (lockparent == UE_GETDIR_LOCKPARENT) - cnp.cn_flags |= LOCKPARENT; - cnp.cn_thread = td; - cnp.cn_cred = td->td_ucred; - cnp.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); - cnp.cn_nameptr = cnp.cn_pnbuf; - error = copystr(dirname, cnp.cn_pnbuf, MAXPATHLEN, - (size_t *) &cnp.cn_namelen); - if (error) { - if (lockparent == UE_GETDIR_LOCKPARENT_DONT) { - VOP_UNLOCK(start_dvp, 0, td); - } - uma_zfree(namei_zone, cnp.cn_pnbuf); - printf("ufs_extattr_lookup: copystr failed\n"); - return (error); - } - cnp.cn_namelen--; /* trim nul termination */ - vargs.a_desc = NULL; - vargs.a_dvp = start_dvp; - vargs.a_vpp = &target_vp; - vargs.a_cnp = &cnp; - error = ufs_lookup(&vargs); - uma_zfree(namei_zone, cnp.cn_pnbuf); - if (error) { - /* - * Error condition, may have to release the lock on the parent - * if ufs_lookup() didn't. - */ - if (!(cnp.cn_flags & PDIRUNLOCK) && - (lockparent == UE_GETDIR_LOCKPARENT_DONT)) - VOP_UNLOCK(start_dvp, 0, td); - - /* - * Check that ufs_lookup() didn't release the lock when we - * didn't want it to. - */ - if ((cnp.cn_flags & PDIRUNLOCK) && - (lockparent == UE_GETDIR_LOCKPARENT)) - panic("ufs_extattr_lookup: lockparent but PDIRUNLOCK"); - - return (error); - } -/* - if (target_vp == start_dvp) - panic("ufs_extattr_lookup: target_vp == start_dvp"); -*/ - - if (target_vp != start_dvp && - !(cnp.cn_flags & PDIRUNLOCK) && - (lockparent == UE_GETDIR_LOCKPARENT_DONT)) - panic("ufs_extattr_lookup: !lockparent but !PDIRUNLOCK"); - - if ((cnp.cn_flags & PDIRUNLOCK) && - (lockparent == UE_GETDIR_LOCKPARENT)) - panic("ufs_extattr_lookup: lockparent but PDIRUNLOCK"); - - /* printf("ufs_extattr_lookup: success\n"); */ - *vp = target_vp; - return (0); -} -#endif /* !UFS_EXTATTR_AUTOSTART */ - -/* - * Enable an EA using the passed filesystem, backing vnode, attribute name, - * namespace, and proc. Will perform a VOP_OPEN() on the vp, so expects vp - * to be locked when passed in. The vnode will be returned unlocked, - * regardless of success/failure of the function. As a result, the caller - * will always need to vrele(), but not vput(). - */ -static int -ufs_extattr_enable_with_open(struct ufsmount *ump, struct vnode *vp, - int attrnamespace, const char *attrname, struct thread *td) -{ - int error; - - error = VOP_OPEN(vp, FREAD|FWRITE, td->td_ucred, td, -1); - if (error) { - printf("ufs_extattr_enable_with_open.VOP_OPEN(): failed " - "with %d\n", error); - VOP_UNLOCK(vp, 0, td); - return (error); - } - - /* - * XXX: Note, should VOP_CLOSE() if vfs_object_create() fails, but due - * to a similar piece of code in vn_open(), we don't. - */ - if (vn_canvmio(vp) == TRUE) - if ((error = vfs_object_create(vp, td, - td->td_ucred)) != 0) { - /* - * XXX: bug replicated from vn_open(): should - * VOP_CLOSE() here. - */ - VOP_UNLOCK(vp, 0, td); - return (error); - } - - vp->v_writecount++; - - vref(vp); - - VOP_UNLOCK(vp, 0, td); - - error = ufs_extattr_enable(ump, attrnamespace, attrname, vp, td); - if (error != 0) - vn_close(vp, FREAD|FWRITE, td->td_ucred, td); - return (error); -} - -#ifdef UFS_EXTATTR_AUTOSTART -/* - * Given a locked directory vnode, iterate over the names in the directory - * and use ufs_extattr_lookup() to retrieve locked vnodes of potential - * attribute files. Then invoke ufs_extattr_enable_with_open() on each - * to attempt to start the attribute. Leaves the directory locked on - * exit. - */ -static int -ufs_extattr_iterate_directory(struct ufsmount *ump, struct vnode *dvp, - int attrnamespace, struct thread *td) -{ - struct vop_readdir_args vargs; - struct dirent *dp, *edp; - struct vnode *attr_vp; - struct uio auio; - struct iovec aiov; - char *dirbuf; - int error, eofflag = 0; - - if (dvp->v_type != VDIR) - return (ENOTDIR); - - MALLOC(dirbuf, char *, DIRBLKSIZ, M_TEMP, M_WAITOK); - - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_td = td; - auio.uio_offset = 0; - - vargs.a_desc = NULL; - vargs.a_vp = dvp; - vargs.a_uio = &auio; - vargs.a_cred = td->td_ucred; - vargs.a_eofflag = &eofflag; - vargs.a_ncookies = NULL; - vargs.a_cookies = NULL; - - while (!eofflag) { - auio.uio_resid = DIRBLKSIZ; - aiov.iov_base = dirbuf; - aiov.iov_len = DIRBLKSIZ; - error = ufs_readdir(&vargs); - if (error) { - printf("ufs_extattr_iterate_directory: ufs_readdir " - "%d\n", error); - return (error); - } - - edp = (struct dirent *)&dirbuf[DIRBLKSIZ]; - for (dp = (struct dirent *)dirbuf; dp < edp; ) { -#if (BYTE_ORDER == LITTLE_ENDIAN) - dp->d_type = dp->d_namlen; - dp->d_namlen = 0; -#else - dp->d_type = 0; -#endif - if (dp->d_reclen == 0) - break; - error = ufs_extattr_lookup(dvp, UE_GETDIR_LOCKPARENT, - dp->d_name, &attr_vp, td); - if (error) { - printf("ufs_extattr_iterate_directory: lookup " - "%s %d\n", dp->d_name, error); - } else if (attr_vp == dvp) { - vrele(attr_vp); - } else if (attr_vp->v_type != VREG) { - vput(attr_vp); - } else { - error = ufs_extattr_enable_with_open(ump, - attr_vp, attrnamespace, dp->d_name, td); - vrele(attr_vp); - if (error) { - printf("ufs_extattr_iterate_directory: " - "enable %s %d\n", dp->d_name, - error); - } else if (bootverbose) { - printf("UFS autostarted EA %s\n", - dp->d_name); - } - } - dp = (struct dirent *) ((char *)dp + dp->d_reclen); - if (dp >= edp) - break; - } - } - FREE(dirbuf, M_TEMP); - - return (0); -} - -/* - * Auto-start of extended attributes, to be executed (optionally) at - * mount-time. - */ -int -ufs_extattr_autostart(struct mount *mp, struct thread *td) -{ - struct vnode *rvp, *attr_dvp, *attr_system_dvp, *attr_user_dvp; - int error; - - /* - * Does UFS_EXTATTR_FSROOTSUBDIR exist off the filesystem root? - * If so, automatically start EA's. - */ - error = VFS_ROOT(mp, &rvp); - if (error) { - printf("ufs_extattr_autostart.VFS_ROOT() returned %d\n", - error); - return (error); - } - - error = ufs_extattr_lookup(rvp, UE_GETDIR_LOCKPARENT_DONT, - UFS_EXTATTR_FSROOTSUBDIR, &attr_dvp, td); - if (error) { - /* rvp ref'd but now unlocked */ - vrele(rvp); - return (error); - } - if (rvp == attr_dvp) { - /* Should never happen. */ - vrele(attr_dvp); - vput(rvp); - return (EINVAL); - } - vrele(rvp); - - if (attr_dvp->v_type != VDIR) { - printf("ufs_extattr_autostart: %s != VDIR\n", - UFS_EXTATTR_FSROOTSUBDIR); - goto return_vput_attr_dvp; - } - - error = ufs_extattr_start(mp, td); - if (error) { - printf("ufs_extattr_autostart: ufs_extattr_start failed (%d)\n", - error); - goto return_vput_attr_dvp; - } - - /* - * Look for two subdirectories: UFS_EXTATTR_SUBDIR_SYSTEM, - * UFS_EXTATTR_SUBDIR_USER. For each, iterate over the sub-directory, - * and start with appropriate type. Failures in either don't - * result in an over-all failure. attr_dvp is left locked to - * be cleaned up on exit. - */ - error = ufs_extattr_lookup(attr_dvp, UE_GETDIR_LOCKPARENT, - UFS_EXTATTR_SUBDIR_SYSTEM, &attr_system_dvp, td); - if (!error) { - error = ufs_extattr_iterate_directory(VFSTOUFS(mp), - attr_system_dvp, EXTATTR_NAMESPACE_SYSTEM, td); - if (error) - printf("ufs_extattr_iterate_directory returned %d\n", - error); - vput(attr_system_dvp); - } - - error = ufs_extattr_lookup(attr_dvp, UE_GETDIR_LOCKPARENT, - UFS_EXTATTR_SUBDIR_USER, &attr_user_dvp, td); - if (!error) { - error = ufs_extattr_iterate_directory(VFSTOUFS(mp), - attr_user_dvp, EXTATTR_NAMESPACE_USER, td); - if (error) - printf("ufs_extattr_iterate_directory returned %d\n", - error); - vput(attr_user_dvp); - } - - /* Mask startup failures in sub-directories. */ - error = 0; - -return_vput_attr_dvp: - vput(attr_dvp); - - return (error); -} -#endif /* !UFS_EXTATTR_AUTOSTART */ - -/* - * Stop extended attribute support on an FS. - */ -int -ufs_extattr_stop(struct mount *mp, struct thread *td) -{ - struct ufs_extattr_list_entry *uele; - struct ufsmount *ump = VFSTOUFS(mp); - int error = 0; - - ufs_extattr_uepm_lock(ump, td); - - if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) { - error = EOPNOTSUPP; - goto unlock; - } - - while (LIST_FIRST(&ump->um_extattr.uepm_list) != NULL) { - uele = LIST_FIRST(&ump->um_extattr.uepm_list); - ufs_extattr_disable(ump, uele->uele_attrnamespace, - uele->uele_attrname, td); - } - - ump->um_extattr.uepm_flags &= ~UFS_EXTATTR_UEPM_STARTED; - - crfree(ump->um_extattr.uepm_ucred); - ump->um_extattr.uepm_ucred = NULL; - -unlock: - ufs_extattr_uepm_unlock(ump, td); - - return (error); -} - -/* - * Enable a named attribute on the specified filesystem; provide an - * unlocked backing vnode to hold the attribute data. - */ -static int -ufs_extattr_enable(struct ufsmount *ump, int attrnamespace, - const char *attrname, struct vnode *backing_vnode, struct thread *td) -{ - struct ufs_extattr_list_entry *attribute; - struct iovec aiov; - struct uio auio; - int error = 0; - - if (!ufs_extattr_valid_attrname(attrnamespace, attrname)) - return (EINVAL); - if (backing_vnode->v_type != VREG) - return (EINVAL); - - MALLOC(attribute, struct ufs_extattr_list_entry *, - sizeof(struct ufs_extattr_list_entry), M_UFS_EXTATTR, M_WAITOK); - if (attribute == NULL) - return (ENOMEM); - - if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) { - error = EOPNOTSUPP; - goto free_exit; - } - - if (ufs_extattr_find_attr(ump, attrnamespace, attrname)) { - error = EEXIST; - goto free_exit; - } - - strncpy(attribute->uele_attrname, attrname, - UFS_EXTATTR_MAXEXTATTRNAME); - attribute->uele_attrnamespace = attrnamespace; - bzero(&attribute->uele_fileheader, - sizeof(struct ufs_extattr_fileheader)); - - attribute->uele_backing_vnode = backing_vnode; - - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = (caddr_t) &attribute->uele_fileheader; - aiov.iov_len = sizeof(struct ufs_extattr_fileheader); - auio.uio_resid = sizeof(struct ufs_extattr_fileheader); - auio.uio_offset = (off_t) 0; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_READ; - auio.uio_td = td; - - VOP_LEASE(backing_vnode, td, td->td_ucred, LEASE_WRITE); - vn_lock(backing_vnode, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); - error = VOP_READ(backing_vnode, &auio, IO_NODELOCKED, - ump->um_extattr.uepm_ucred); - - if (error) - goto unlock_free_exit; - - if (auio.uio_resid != 0) { - printf("ufs_extattr_enable: malformed attribute header\n"); - error = EINVAL; - goto unlock_free_exit; - } - - if (attribute->uele_fileheader.uef_magic != UFS_EXTATTR_MAGIC) { - printf("ufs_extattr_enable: invalid attribute header magic\n"); - error = EINVAL; - goto unlock_free_exit; - } - - if (attribute->uele_fileheader.uef_version != UFS_EXTATTR_VERSION) { - printf("ufs_extattr_enable: incorrect attribute header " - "version\n"); - error = EINVAL; - goto unlock_free_exit; - } - - ASSERT_VOP_LOCKED(backing_vnode, "ufs_extattr_enable"); - backing_vnode->v_vflag |= VV_SYSTEM; - LIST_INSERT_HEAD(&ump->um_extattr.uepm_list, attribute, - uele_entries); - - VOP_UNLOCK(backing_vnode, 0, td); - return (0); - -unlock_free_exit: - VOP_UNLOCK(backing_vnode, 0, td); - -free_exit: - FREE(attribute, M_UFS_EXTATTR); - return (error); -} - -/* - * Disable extended attribute support on an FS. - */ -static int -ufs_extattr_disable(struct ufsmount *ump, int attrnamespace, - const char *attrname, struct thread *td) -{ - struct ufs_extattr_list_entry *uele; - int error = 0; - - if (!ufs_extattr_valid_attrname(attrnamespace, attrname)) - return (EINVAL); - - uele = ufs_extattr_find_attr(ump, attrnamespace, attrname); - if (!uele) - return (ENOATTR); - - LIST_REMOVE(uele, uele_entries); - - vn_lock(uele->uele_backing_vnode, LK_SHARED | LK_NOPAUSE | LK_RETRY, - td); - ASSERT_VOP_LOCKED(uele->uele_backing_vnode, "ufs_extattr_disable"); - uele->uele_backing_vnode->v_vflag &= ~VV_SYSTEM; - VOP_UNLOCK(uele->uele_backing_vnode, 0, td); - error = vn_close(uele->uele_backing_vnode, FREAD|FWRITE, - td->td_ucred, td); - - FREE(uele, M_UFS_EXTATTR); - - return (error); -} - -/* - * VFS call to manage extended attributes in UFS. If filename_vp is - * non-NULL, it must be passed in locked, and regardless of errors in - * processing, will be unlocked. - */ -int -ufs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, - int attrnamespace, const char *attrname, struct thread *td) -{ - struct ufsmount *ump = VFSTOUFS(mp); - int error; - - /* - * Processes with privilege, but in jail, are not allowed to - * configure extended attributes. - */ - if ((error = suser(td))) { - if (filename_vp != NULL) - VOP_UNLOCK(filename_vp, 0, td); - return (error); - } - - switch(cmd) { - case UFS_EXTATTR_CMD_START: - if (filename_vp != NULL) { - VOP_UNLOCK(filename_vp, 0, td); - return (EINVAL); - } - if (attrname != NULL) - return (EINVAL); - - error = ufs_extattr_start(mp, td); - - return (error); - - case UFS_EXTATTR_CMD_STOP: - if (filename_vp != NULL) { - VOP_UNLOCK(filename_vp, 0, td); - return (EINVAL); - } - if (attrname != NULL) - return (EINVAL); - - error = ufs_extattr_stop(mp, td); - - return (error); - - case UFS_EXTATTR_CMD_ENABLE: - - if (filename_vp == NULL) - return (EINVAL); - if (attrname == NULL) { - VOP_UNLOCK(filename_vp, 0, td); - return (EINVAL); - } - - /* - * ufs_extattr_enable_with_open() will always unlock the - * vnode, regardless of failure. - */ - ufs_extattr_uepm_lock(ump, td); - error = ufs_extattr_enable_with_open(ump, filename_vp, - attrnamespace, attrname, td); - ufs_extattr_uepm_unlock(ump, td); - - return (error); - - case UFS_EXTATTR_CMD_DISABLE: - - if (filename_vp != NULL) { - VOP_UNLOCK(filename_vp, 0, td); - return (EINVAL); - } - if (attrname == NULL) - return (EINVAL); - - ufs_extattr_uepm_lock(ump, td); - error = ufs_extattr_disable(ump, attrnamespace, attrname, - td); - ufs_extattr_uepm_unlock(ump, td); - - return (error); - - default: - return (EINVAL); - } -} - -/* - * Vnode operating to retrieve a named extended attribute. - */ -int -ufs_getextattr(struct vop_getextattr_args *ap) -/* -vop_getextattr { - IN struct vnode *a_vp; - IN int a_attrnamespace; - IN const char *a_name; - INOUT struct uio *a_uio; - OUT size_t *a_size; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -*/ -{ - struct mount *mp = ap->a_vp->v_mount; - struct ufsmount *ump = VFSTOUFS(mp); - int error; - - ufs_extattr_uepm_lock(ump, ap->a_td); - - error = ufs_extattr_get(ap->a_vp, ap->a_attrnamespace, ap->a_name, - ap->a_uio, ap->a_size, ap->a_cred, ap->a_td); - - ufs_extattr_uepm_unlock(ump, ap->a_td); - - return (error); -} - -/* - * Real work associated with retrieving a named attribute--assumes that - * the attribute lock has already been grabbed. - */ -static int -ufs_extattr_get(struct vnode *vp, int attrnamespace, const char *name, - struct uio *uio, size_t *size, struct ucred *cred, struct thread *td) -{ - struct ufs_extattr_list_entry *attribute; - struct ufs_extattr_header ueh; - struct iovec local_aiov; - struct uio local_aio; - struct mount *mp = vp->v_mount; - struct ufsmount *ump = VFSTOUFS(mp); - struct inode *ip = VTOI(vp); - off_t base_offset; - size_t len, old_len; - int error = 0; - - if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) - return (EOPNOTSUPP); - - if (strlen(name) == 0) - return (EINVAL); - - error = extattr_check_cred(vp, attrnamespace, cred, td, IREAD); - if (error) - return (error); - - attribute = ufs_extattr_find_attr(ump, attrnamespace, name); - if (!attribute) - return (ENOATTR); - - /* - * Allow only offsets of zero to encourage the read/replace - * extended attribute semantic. Otherwise we can't guarantee - * atomicity, as we don't provide locks for extended attributes. - */ - if (uio != NULL && uio->uio_offset != 0) - return (ENXIO); - - /* - * Find base offset of header in file based on file header size, and - * data header size + maximum data size, indexed by inode number. - */ - base_offset = sizeof(struct ufs_extattr_fileheader) + - ip->i_number * (sizeof(struct ufs_extattr_header) + - attribute->uele_fileheader.uef_size); - - /* - * Read in the data header to see if the data is defined, and if so - * how much. - */ - bzero(&ueh, sizeof(struct ufs_extattr_header)); - local_aiov.iov_base = (caddr_t) &ueh; - local_aiov.iov_len = sizeof(struct ufs_extattr_header); - local_aio.uio_iov = &local_aiov; - local_aio.uio_iovcnt = 1; - local_aio.uio_rw = UIO_READ; - local_aio.uio_segflg = UIO_SYSSPACE; - local_aio.uio_td = td; - local_aio.uio_offset = base_offset; - local_aio.uio_resid = sizeof(struct ufs_extattr_header); - - /* - * Acquire locks. - */ - VOP_LEASE(attribute->uele_backing_vnode, td, cred, LEASE_READ); - /* - * Don't need to get a lock on the backing file if the getattr is - * being applied to the backing file, as the lock is already held. - */ - if (attribute->uele_backing_vnode != vp) - vn_lock(attribute->uele_backing_vnode, LK_SHARED | - LK_NOPAUSE | LK_RETRY, td); - - error = VOP_READ(attribute->uele_backing_vnode, &local_aio, - IO_NODELOCKED, ump->um_extattr.uepm_ucred); - if (error) - goto vopunlock_exit; - - /* Defined? */ - if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) { - error = ENOATTR; - goto vopunlock_exit; - } - - /* Valid for the current inode generation? */ - if (ueh.ueh_i_gen != ip->i_gen) { - /* - * The inode itself has a different generation number - * than the attribute data. For now, the best solution - * is to coerce this to undefined, and let it get cleaned - * up by the next write or extattrctl clean. - */ - printf("ufs_extattr_get (%s): inode number inconsistency (%d, %jd)\n", - mp->mnt_stat.f_mntonname, ueh.ueh_i_gen, (intmax_t)ip->i_gen); - error = ENOATTR; - goto vopunlock_exit; - } - - /* Local size consistency check. */ - if (ueh.ueh_len > attribute->uele_fileheader.uef_size) { - error = ENXIO; - goto vopunlock_exit; - } - - /* Return full data size if caller requested it. */ - if (size != NULL) - *size = ueh.ueh_len; - - /* Return data if the caller requested it. */ - if (uio != NULL) { - /* Allow for offset into the attribute data. */ - uio->uio_offset = base_offset + sizeof(struct - ufs_extattr_header); - - /* - * Figure out maximum to transfer -- use buffer size and - * local data limit. - */ - len = MIN(uio->uio_resid, ueh.ueh_len); - old_len = uio->uio_resid; - uio->uio_resid = len; - - error = VOP_READ(attribute->uele_backing_vnode, uio, - IO_NODELOCKED, ump->um_extattr.uepm_ucred); - if (error) - goto vopunlock_exit; - - uio->uio_resid = old_len - (len - uio->uio_resid); - } - -vopunlock_exit: - - if (uio != NULL) - uio->uio_offset = 0; - - if (attribute->uele_backing_vnode != vp) - VOP_UNLOCK(attribute->uele_backing_vnode, 0, td); - - return (error); -} - -/* - * Vnode operation to remove a named attribute. - */ -int -ufs_deleteextattr(struct vop_deleteextattr_args *ap) -/* -vop_deleteextattr { - IN struct vnode *a_vp; - IN int a_attrnamespace; - IN const char *a_name; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -*/ -{ - struct mount *mp = ap->a_vp->v_mount; - struct ufsmount *ump = VFSTOUFS(mp); - - int error; - - ufs_extattr_uepm_lock(ump, ap->a_td); - - error = ufs_extattr_rm(ap->a_vp, ap->a_attrnamespace, ap->a_name, - ap->a_cred, ap->a_td); - - - ufs_extattr_uepm_unlock(ump, ap->a_td); - - return (error); -} - -/* - * Vnode operation to set a named attribute. - */ -int -ufs_setextattr(struct vop_setextattr_args *ap) -/* -vop_setextattr { - IN struct vnode *a_vp; - IN int a_attrnamespace; - IN const char *a_name; - INOUT struct uio *a_uio; - IN struct ucred *a_cred; - IN struct thread *a_td; -}; -*/ -{ - struct mount *mp = ap->a_vp->v_mount; - struct ufsmount *ump = VFSTOUFS(mp); - - int error; - - ufs_extattr_uepm_lock(ump, ap->a_td); - - /* - * XXX: No longer a supported way to delete extended attributes. - */ - if (ap->a_uio == NULL) - return (EINVAL); - - error = ufs_extattr_set(ap->a_vp, ap->a_attrnamespace, ap->a_name, - ap->a_uio, ap->a_cred, ap->a_td); - - ufs_extattr_uepm_unlock(ump, ap->a_td); - - return (error); -} - -/* - * Real work associated with setting a vnode's extended attributes; - * assumes that the attribute lock has already been grabbed. - */ -static int -ufs_extattr_set(struct vnode *vp, int attrnamespace, const char *name, - struct uio *uio, struct ucred *cred, struct thread *td) -{ - struct ufs_extattr_list_entry *attribute; - struct ufs_extattr_header ueh; - struct iovec local_aiov; - struct uio local_aio; - struct mount *mp = vp->v_mount; - struct ufsmount *ump = VFSTOUFS(mp); - struct inode *ip = VTOI(vp); - off_t base_offset; - int error = 0, ioflag; - - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) - return (EOPNOTSUPP); - if (!ufs_extattr_valid_attrname(attrnamespace, name)) - return (EINVAL); - - error = extattr_check_cred(vp, attrnamespace, cred, td, IWRITE); - if (error) - return (error); - - attribute = ufs_extattr_find_attr(ump, attrnamespace, name); - if (!attribute) - return (ENOATTR); - - /* - * Early rejection of invalid offsets/length. - * Reject: any offset but 0 (replace) - * Any size greater than attribute size limit - */ - if (uio->uio_offset != 0 || - uio->uio_resid > attribute->uele_fileheader.uef_size) - return (ENXIO); - - /* - * Find base offset of header in file based on file header size, and - * data header size + maximum data size, indexed by inode number. - */ - base_offset = sizeof(struct ufs_extattr_fileheader) + - ip->i_number * (sizeof(struct ufs_extattr_header) + - attribute->uele_fileheader.uef_size); - - /* - * Write out a data header for the data. - */ - ueh.ueh_len = uio->uio_resid; - ueh.ueh_flags = UFS_EXTATTR_ATTR_FLAG_INUSE; - ueh.ueh_i_gen = ip->i_gen; - local_aiov.iov_base = (caddr_t) &ueh; - local_aiov.iov_len = sizeof(struct ufs_extattr_header); - local_aio.uio_iov = &local_aiov; - local_aio.uio_iovcnt = 1; - local_aio.uio_rw = UIO_WRITE; - local_aio.uio_segflg = UIO_SYSSPACE; - local_aio.uio_td = td; - local_aio.uio_offset = base_offset; - local_aio.uio_resid = sizeof(struct ufs_extattr_header); - - /* - * Acquire locks. - */ - VOP_LEASE(attribute->uele_backing_vnode, td, cred, LEASE_WRITE); - - /* - * Don't need to get a lock on the backing file if the setattr is - * being applied to the backing file, as the lock is already held. - */ - if (attribute->uele_backing_vnode != vp) - vn_lock(attribute->uele_backing_vnode, - LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, td); - - ioflag = IO_NODELOCKED; - if (ufs_extattr_sync) - ioflag |= IO_SYNC; - error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag, - ump->um_extattr.uepm_ucred); - if (error) - goto vopunlock_exit; - - if (local_aio.uio_resid != 0) { - error = ENXIO; - goto vopunlock_exit; - } - - /* - * Write out user data. - */ - uio->uio_offset = base_offset + sizeof(struct ufs_extattr_header); - - ioflag = IO_NODELOCKED; - if (ufs_extattr_sync) - ioflag |= IO_SYNC; - error = VOP_WRITE(attribute->uele_backing_vnode, uio, ioflag, - ump->um_extattr.uepm_ucred); - -vopunlock_exit: - uio->uio_offset = 0; - - if (attribute->uele_backing_vnode != vp) - VOP_UNLOCK(attribute->uele_backing_vnode, 0, td); - - return (error); -} - -/* - * Real work associated with removing an extended attribute from a vnode. - * Assumes the attribute lock has already been grabbed. - */ -static int -ufs_extattr_rm(struct vnode *vp, int attrnamespace, const char *name, - struct ucred *cred, struct thread *td) -{ - struct ufs_extattr_list_entry *attribute; - struct ufs_extattr_header ueh; - struct iovec local_aiov; - struct uio local_aio; - struct mount *mp = vp->v_mount; - struct ufsmount *ump = VFSTOUFS(mp); - struct inode *ip = VTOI(vp); - off_t base_offset; - int error = 0, ioflag; - - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) - return (EOPNOTSUPP); - if (!ufs_extattr_valid_attrname(attrnamespace, name)) - return (EINVAL); - - error = extattr_check_cred(vp, attrnamespace, cred, td, IWRITE); - if (error) - return (error); - - attribute = ufs_extattr_find_attr(ump, attrnamespace, name); - if (!attribute) - return (ENOATTR); - - /* - * Find base offset of header in file based on file header size, and - * data header size + maximum data size, indexed by inode number. - */ - base_offset = sizeof(struct ufs_extattr_fileheader) + - ip->i_number * (sizeof(struct ufs_extattr_header) + - attribute->uele_fileheader.uef_size); - - /* - * Check to see if currently defined. - */ - bzero(&ueh, sizeof(struct ufs_extattr_header)); - - local_aiov.iov_base = (caddr_t) &ueh; - local_aiov.iov_len = sizeof(struct ufs_extattr_header); - local_aio.uio_iov = &local_aiov; - local_aio.uio_iovcnt = 1; - local_aio.uio_rw = UIO_READ; - local_aio.uio_segflg = UIO_SYSSPACE; - local_aio.uio_td = td; - local_aio.uio_offset = base_offset; - local_aio.uio_resid = sizeof(struct ufs_extattr_header); - - VOP_LEASE(attribute->uele_backing_vnode, td, cred, LEASE_WRITE); - - /* - * Don't need to get the lock on the backing vnode if the vnode we're - * modifying is it, as we already hold the lock. - */ - if (attribute->uele_backing_vnode != vp) - vn_lock(attribute->uele_backing_vnode, - LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, td); - - error = VOP_READ(attribute->uele_backing_vnode, &local_aio, - IO_NODELOCKED, ump->um_extattr.uepm_ucred); - if (error) - goto vopunlock_exit; - - /* Defined? */ - if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) { - error = ENOATTR; - goto vopunlock_exit; - } - - /* Valid for the current inode generation? */ - if (ueh.ueh_i_gen != ip->i_gen) { - /* - * The inode itself has a different generation number than - * the attribute data. For now, the best solution is to - * coerce this to undefined, and let it get cleaned up by - * the next write or extattrctl clean. - */ - printf("ufs_extattr_rm (%s): inode number inconsistency (%d, %jd)\n", - mp->mnt_stat.f_mntonname, ueh.ueh_i_gen, (intmax_t)ip->i_gen); - error = ENOATTR; - goto vopunlock_exit; - } - - /* Flag it as not in use. */ - ueh.ueh_flags = 0; - ueh.ueh_len = 0; - - local_aiov.iov_base = (caddr_t) &ueh; - local_aiov.iov_len = sizeof(struct ufs_extattr_header); - local_aio.uio_iov = &local_aiov; - local_aio.uio_iovcnt = 1; - local_aio.uio_rw = UIO_WRITE; - local_aio.uio_segflg = UIO_SYSSPACE; - local_aio.uio_td = td; - local_aio.uio_offset = base_offset; - local_aio.uio_resid = sizeof(struct ufs_extattr_header); - - ioflag = IO_NODELOCKED; - if (ufs_extattr_sync) - ioflag |= IO_SYNC; - error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag, - ump->um_extattr.uepm_ucred); - if (error) - goto vopunlock_exit; - - if (local_aio.uio_resid != 0) - error = ENXIO; - -vopunlock_exit: - VOP_UNLOCK(attribute->uele_backing_vnode, 0, td); - - return (error); -} - -/* - * Called by UFS when an inode is no longer active and should have its - * attributes stripped. - */ -void -ufs_extattr_vnode_inactive(struct vnode *vp, struct thread *td) -{ - struct ufs_extattr_list_entry *uele; - struct mount *mp = vp->v_mount; - struct ufsmount *ump = VFSTOUFS(mp); - - /* - * In that case, we cannot lock. We should not have any active vnodes - * on the fs if this is not yet initialized but is going to be, so - * this can go unlocked. - */ - if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED)) - return; - - ufs_extattr_uepm_lock(ump, td); - - if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) { - ufs_extattr_uepm_unlock(ump, td); - return; - } - - LIST_FOREACH(uele, &ump->um_extattr.uepm_list, uele_entries) - ufs_extattr_rm(vp, uele->uele_attrnamespace, - uele->uele_attrname, NULL, td); - - ufs_extattr_uepm_unlock(ump, td); -} - -#endif /* !UFS_EXTATTR */ -#endif diff --git a/src/sys/ufs/ufs/ufs_extern.h b/src/sys/ufs/ufs/ufs_extern.h deleted file mode 100644 index e8dac02..0000000 --- a/src/sys/ufs/ufs/ufs_extern.h +++ /dev/null @@ -1,127 +0,0 @@ -#if 0 -/*- - * Copyright (c) 1991, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_extern.h 8.10 (Berkeley) 5/14/95 - * $FreeBSD: src/sys/ufs/ufs/ufs_extern.h,v 1.48 2002/10/18 22:52:41 dillon Exp $ - */ - -#ifndef _UFS_UFS_EXTERN_H_ -#define _UFS_UFS_EXTERN_H_ - -struct componentname; -struct direct; -struct indir; -struct inode; -struct mount; -struct netcred; -struct thread; -struct sockaddr; -struct ucred; -struct ufid; -struct vfsconf; -struct vnode; -struct vop_bmap_args; -struct vop_cachedlookup_args; -struct vop_generic_args; -struct vop_inactive_args; -struct vop_reclaim_args; - -int ufs_vnoperate(struct vop_generic_args *); -int ufs_vnoperatefifo(struct vop_generic_args *); -int ufs_vnoperatespec(struct vop_generic_args *); - -int ufs_bmap(struct vop_bmap_args *); -int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *, - struct buf *, int *, int *); -int ufs_fhtovp(struct mount *, struct ufid *, struct vnode **); -int ufs_checkpath(struct inode *, struct inode *, struct ucred *); -void ufs_dirbad(struct inode *, doff_t, char *); -int ufs_dirbadentry(struct vnode *, struct direct *, int); -int ufs_dirempty(struct inode *, ino_t, struct ucred *); -int ufs_extread(struct vop_read_args *); -int ufs_extwrite(struct vop_write_args *); -void ufs_makedirentry(struct inode *, struct componentname *, - struct direct *); -int ufs_direnter(struct vnode *, struct vnode *, struct direct *, - struct componentname *, struct buf *); -int ufs_dirremove(struct vnode *, struct inode *, int, int); -int ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int); -int ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *); -int ufs_ihashget(dev_t, ino_t, int, struct vnode **); -void ufs_ihashinit(void); -int ufs_ihashins(struct inode *, int, struct vnode **); -struct vnode * - ufs_ihashlookup(dev_t, ino_t); -void ufs_ihashrem(struct inode *); -void ufs_ihashuninit(void); -int ufs_inactive(struct vop_inactive_args *); -int ufs_init(struct vfsconf *); -void ufs_itimes(struct vnode *vp); -int ufs_lookup(struct vop_cachedlookup_args *); -int ufs_readdir(struct vop_readdir_args *); -int ufs_reclaim(struct vop_reclaim_args *); -void ffs_snapgone(struct inode *); -vfs_root_t ufs_root; -vfs_start_t ufs_start; -int ufs_uninit(struct vfsconf *); -int ufs_vinit(struct mount *, vop_t **, vop_t **, struct vnode **); - -/* - * Soft update function prototypes. - */ -int softdep_setup_directory_add(struct buf *, struct inode *, off_t, - ino_t, struct buf *, int); -void softdep_change_directoryentry_offset(struct inode *, caddr_t, - caddr_t, caddr_t, int); -void softdep_setup_remove(struct buf *,struct inode *, struct inode *, int); -void softdep_setup_directory_change(struct buf *, struct inode *, - struct inode *, ino_t, int); -void softdep_change_linkcnt(struct inode *); -void softdep_releasefile(struct inode *); -int softdep_slowdown(struct vnode *); - -/* - * Flags to low-level allocation routines. The low 16-bits are reserved - * for IO_ flags from vnode.h. - * - * Note: The general vfs code typically limits the sequential heuristic - * count to 127. See sequential_heuristic() in kern/vfs_vnops.c - */ -#define BA_CLRBUF 0x00010000 /* Clear invalid areas of buffer. */ -#define BA_METAONLY 0x00020000 /* Return indirect block buffer. */ -#define BA_SEQMASK 0x7F000000 /* Bits holding seq heuristic. */ -#define BA_SEQSHIFT 24 -#define BA_SEQMAX 0x7F - -#endif /* !_UFS_UFS_EXTERN_H_ */ -#endif diff --git a/src/sys/ufs/ufs/ufs_ihash.c b/src/sys/ufs/ufs/ufs_ihash.c deleted file mode 100644 index 84b4194..0000000 --- a/src/sys/ufs/ufs/ufs_ihash.c +++ /dev/null @@ -1,198 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1986, 1989, 1991, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_ihash.c 8.7 (Berkeley) 5/17/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ufs/ufs_ihash.c,v 1.37 2003/10/04 14:03:28 jeff Exp $"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static MALLOC_DEFINE(M_UFSIHASH, "UFS ihash", "UFS Inode hash tables"); -/* - * Structures associated with inode cacheing. - */ -static LIST_HEAD(ihashhead, inode) *ihashtbl; -static u_long ihash; /* size of hash table - 1 */ -#define INOHASH(device, inum) (&ihashtbl[(minor(device) + (inum)) & ihash]) -static struct mtx ufs_ihash_mtx; - -/* - * Initialize inode hash table. - */ -void -ufs_ihashinit() -{ - - ihashtbl = hashinit(desiredvnodes, M_UFSIHASH, &ihash); - mtx_init(&ufs_ihash_mtx, "ufs ihash", NULL, MTX_DEF); -} - -/* - * Destroy the inode hash table. - */ -void -ufs_ihashuninit() -{ - - hashdestroy(ihashtbl, M_UFSIHASH, ihash); - mtx_destroy(&ufs_ihash_mtx); -} - -/* - * Use the device/inum pair to find the incore inode, and return a pointer - * to it. If it is in core, return it, even if it is locked. - */ -struct vnode * -ufs_ihashlookup(dev, inum) - dev_t dev; - ino_t inum; -{ - struct inode *ip; - - mtx_lock(&ufs_ihash_mtx); - LIST_FOREACH(ip, INOHASH(dev, inum), i_hash) - if (inum == ip->i_number && dev == ip->i_dev) - break; - mtx_unlock(&ufs_ihash_mtx); - - if (ip) - return (ITOV(ip)); - return (NULLVP); -} - -/* - * Use the device/inum pair to find the incore inode, and return a pointer - * to it. If it is in core, but locked, wait for it. - */ -int -ufs_ihashget(dev, inum, flags, vpp) - dev_t dev; - ino_t inum; - int flags; - struct vnode **vpp; -{ - struct thread *td = curthread; /* XXX */ - struct inode *ip; - struct vnode *vp; - int error; - - *vpp = NULL; -loop: - mtx_lock(&ufs_ihash_mtx); - LIST_FOREACH(ip, INOHASH(dev, inum), i_hash) { - if (inum == ip->i_number && dev == ip->i_dev) { - vp = ITOV(ip); - VI_LOCK(vp); - mtx_unlock(&ufs_ihash_mtx); - error = vget(vp, flags | LK_INTERLOCK, td); - if (error == ENOENT) - goto loop; - if (error) - return (error); - *vpp = vp; - return (0); - } - } - mtx_unlock(&ufs_ihash_mtx); - return (0); -} - -/* - * Check hash for duplicate of passed inode, and add if there is no one. - * if there is a duplicate, vget() it and return to the caller. - */ -int -ufs_ihashins(ip, flags, ovpp) - struct inode *ip; - int flags; - struct vnode **ovpp; -{ - struct thread *td = curthread; /* XXX */ - struct ihashhead *ipp; - struct inode *oip; - struct vnode *ovp; - int error; - -loop: - mtx_lock(&ufs_ihash_mtx); - ipp = INOHASH(ip->i_dev, ip->i_number); - LIST_FOREACH(oip, ipp, i_hash) { - if (ip->i_number == oip->i_number && ip->i_dev == oip->i_dev) { - ovp = ITOV(oip); - VI_LOCK(ovp); - mtx_unlock(&ufs_ihash_mtx); - error = vget(ovp, flags | LK_INTERLOCK, td); - if (error == ENOENT) - goto loop; - if (error) - return (error); - *ovpp = ovp; - return (0); - } - } - LIST_INSERT_HEAD(ipp, ip, i_hash); - ip->i_flag |= IN_HASHED; - mtx_unlock(&ufs_ihash_mtx); - *ovpp = NULL; - return (0); -} - -/* - * Remove the inode from the hash table. - */ -void -ufs_ihashrem(ip) - struct inode *ip; -{ - mtx_lock(&ufs_ihash_mtx); - if (ip->i_flag & IN_HASHED) { - ip->i_flag &= ~IN_HASHED; - LIST_REMOVE(ip, i_hash); - } - mtx_unlock(&ufs_ihash_mtx); -} -#endif diff --git a/src/sys/ufs/ufs/ufs_inode.c b/src/sys/ufs/ufs/ufs_inode.c deleted file mode 100644 index 93e9858..0000000 --- a/src/sys/ufs/ufs/ufs_inode.c +++ /dev/null @@ -1,189 +0,0 @@ -#if 0 -/* - * Copyright (c) 1991, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_inode.c 8.9 (Berkeley) 5/14/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ufs/ufs_inode.c,v 1.52 2003/10/05 02:45:00 jeff Exp $"); - -#include "opt_quota.h" -#include "opt_ufs.h" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#ifdef UFS_DIRHASH -#include -#include -#endif - -/* - * Last reference to an inode. If necessary, write or delete it. - */ -int -ufs_inactive(ap) - struct vop_inactive_args /* { - struct vnode *a_vp; - struct thread *a_td; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - struct thread *td = ap->a_td; - mode_t mode; - int error = 0; - - VI_LOCK(vp); - if (prtactive && vp->v_usecount != 0) - vprint("ufs_inactive: pushing active", vp); - VI_UNLOCK(vp); - - /* - * Ignore inodes related to stale file handles. - */ - if (ip->i_mode == 0) - goto out; - if (ip->i_effnlink == 0 && DOINGSOFTDEP(vp)) - softdep_releasefile(ip); - if (ip->i_nlink <= 0) { - (void) vn_write_suspend_wait(vp, NULL, V_WAIT); -#ifdef QUOTA - if (!getinoquota(ip)) - (void)chkiq(ip, -1, NOCRED, FORCE); -#endif -#ifdef UFS_EXTATTR - ufs_extattr_vnode_inactive(vp, td); -#endif - error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL, - NOCRED, td); - /* - * Setting the mode to zero needs to wait for the inode - * to be written just as does a change to the link count. - * So, rather than creating a new entry point to do the - * same thing, we just use softdep_change_linkcnt(). - */ - DIP(ip, i_rdev) = 0; - mode = ip->i_mode; - ip->i_mode = 0; - DIP(ip, i_mode) = 0; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - if (DOINGSOFTDEP(vp)) - softdep_change_linkcnt(ip); - UFS_VFREE(vp, ip->i_number, mode); - } - if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { - if ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && - vn_write_suspend_wait(vp, NULL, V_NOWAIT)) { - ip->i_flag &= ~IN_ACCESS; - } else { - (void) vn_write_suspend_wait(vp, NULL, V_WAIT); - UFS_UPDATE(vp, 0); - } - } -out: - VOP_UNLOCK(vp, 0, td); - /* - * If we are done with the inode, reclaim it - * so that it can be reused immediately. - */ - if (ip->i_mode == 0) - vrecycle(vp, NULL, td); - return (error); -} - -/* - * Reclaim an inode so that it can be used for other purposes. - */ -int -ufs_reclaim(ap) - struct vop_reclaim_args /* { - struct vnode *a_vp; - struct thread *a_td; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - struct ufsmount *ump = ip->i_ump; -#ifdef QUOTA - int i; -#endif - - VI_LOCK(vp); - if (prtactive && vp->v_usecount != 0) - vprint("ufs_reclaim: pushing active", vp); - VI_UNLOCK(vp); - if (ip->i_flag & IN_LAZYMOD) { - ip->i_flag |= IN_MODIFIED; - UFS_UPDATE(vp, 0); - } - /* - * Remove the inode from its hash chain. - */ - ufs_ihashrem(ip); - /* - * Purge old data structures associated with the inode. - */ - vrele(ip->i_devvp); -#ifdef QUOTA - for (i = 0; i < MAXQUOTAS; i++) { - if (ip->i_dquot[i] != NODQUOT) { - dqrele(vp, ip->i_dquot[i]); - ip->i_dquot[i] = NODQUOT; - } - } -#endif -#ifdef UFS_DIRHASH - if (ip->i_dirhash != NULL) - ufsdirhash_free(ip); -#endif - UFS_IFREE(ump, ip); - vp->v_data = 0; - return (0); -} -#endif diff --git a/src/sys/ufs/ufs/ufs_lookup.c b/src/sys/ufs/ufs/ufs_lookup.c deleted file mode 100644 index 085a6c1..0000000 --- a/src/sys/ufs/ufs/ufs_lookup.c +++ /dev/null @@ -1,1271 +0,0 @@ -#if 0 -/* - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ufs/ufs_lookup.c,v 1.68 2003/06/11 06:34:30 obrien Exp $"); - -#include "opt_ffs_broken_fixme.h" -#include "opt_ufs.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#ifdef UFS_DIRHASH -#include -#endif -#include -#include - -#ifdef DIAGNOSTIC -static int dirchk = 1; -#else -static int dirchk = 0; -#endif - -SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); - -/* true if old FS format...*/ -#define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) - -/* - * Convert a component of a pathname into a pointer to a locked inode. - * This is a very central and rather complicated routine. - * If the filesystem is not maintained in a strict tree hierarchy, - * this can result in a deadlock situation (see comments in code below). - * - * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending - * on whether the name is to be looked up, created, renamed, or deleted. - * When CREATE, RENAME, or DELETE is specified, information usable in - * creating, renaming, or deleting a directory entry may be calculated. - * If flag has LOCKPARENT or'ed into it and the target of the pathname - * exists, lookup returns both the target and its parent directory locked. - * When creating or renaming and LOCKPARENT is specified, the target may - * not be ".". When deleting and LOCKPARENT is specified, the target may - * be "."., but the caller must check to ensure it does an vrele and vput - * instead of two vputs. - * - * This routine is actually used as VOP_CACHEDLOOKUP method, and the - * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP - * method. - * - * vfs_cache_lookup() performs the following for us: - * check that it is a directory - * check accessibility of directory - * check for modification attempts on read-only mounts - * if name found in cache - * if at end of path and deleting or creating - * drop it - * else - * return name. - * return VOP_CACHEDLOOKUP() - * - * Overall outline of ufs_lookup: - * - * search for name in directory, to found or notfound - * notfound: - * if creating, return locked directory, leaving info on available slots - * else return error - * found: - * if at end of path and deleting, return information to allow delete - * if at end of path and rewriting (RENAME and LOCKPARENT), lock target - * inode and return info to allow rewrite - * if not at end, add name to cache; if at end and neither creating - * nor deleting, add name to cache - */ -int -ufs_lookup(ap) - struct vop_cachedlookup_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - } */ *ap; -{ - struct vnode *vdp; /* vnode for directory being searched */ - struct inode *dp; /* inode for directory being searched */ - struct buf *bp; /* a buffer of directory entries */ - struct direct *ep; /* the current directory entry */ - int entryoffsetinblock; /* offset of ep in bp's buffer */ - enum {NONE, COMPACT, FOUND} slotstatus; - doff_t slotoffset; /* offset of area with free space */ - int slotsize; /* size of area at slotoffset */ - int slotfreespace; /* amount of space free in slot */ - int slotneeded; /* size of the entry we're seeking */ - int numdirpasses; /* strategy for directory search */ - doff_t endsearch; /* offset to end directory search */ - doff_t prevoff; /* prev entry dp->i_offset */ - struct vnode *pdp; /* saved dp during symlink work */ - struct vnode *tdp; /* returned by VFS_VGET */ - doff_t enduseful; /* pointer past last used dir slot */ - u_long bmask; /* block offset mask */ - int lockparent; /* 1 => lockparent flag is set */ - int wantparent; /* 1 => wantparent or lockparent flag */ - int namlen, error; - struct vnode **vpp = ap->a_vpp; - struct componentname *cnp = ap->a_cnp; - struct ucred *cred = cnp->cn_cred; - int flags = cnp->cn_flags; - int nameiop = cnp->cn_nameiop; - struct thread *td = cnp->cn_thread; - - bp = NULL; - slotoffset = -1; - cnp->cn_flags &= ~PDIRUNLOCK; -/* - * XXX there was a soft-update diff about this I couldn't merge. - * I think this was the equiv. - */ - *vpp = NULL; - - vdp = ap->a_dvp; - dp = VTOI(vdp); - lockparent = flags & LOCKPARENT; - wantparent = flags & (LOCKPARENT|WANTPARENT); - - /* - * We now have a segment name to search for, and a directory to search. - * - * Suppress search for slots unless creating - * file and at end of pathname, in which case - * we watch for a place to put the new file in - * case it doesn't already exist. - */ - slotstatus = FOUND; - slotfreespace = slotsize = slotneeded = 0; - if ((nameiop == CREATE || nameiop == RENAME) && - (flags & ISLASTCN)) { - slotstatus = NONE; - slotneeded = DIRECTSIZ(cnp->cn_namelen); - } - bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; - -#ifdef UFS_DIRHASH - /* - * Use dirhash for fast operations on large directories. The logic - * to determine whether to hash the directory is contained within - * ufsdirhash_build(); a zero return means that it decided to hash - * this directory and it successfully built up the hash table. - */ - if (ufsdirhash_build(dp) == 0) { - /* Look for a free slot if needed. */ - enduseful = dp->i_size; - if (slotstatus != FOUND) { - slotoffset = ufsdirhash_findfree(dp, slotneeded, - &slotsize); - if (slotoffset >= 0) { - slotstatus = COMPACT; - enduseful = ufsdirhash_enduseful(dp); - if (enduseful < 0) - enduseful = dp->i_size; - } - } - /* Look up the component. */ - numdirpasses = 1; - entryoffsetinblock = 0; /* silence compiler warning */ - switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, - &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { - case 0: - ep = (struct direct *)((char *)bp->b_data + - (dp->i_offset & bmask)); - goto foundentry; - case ENOENT: - dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); - goto notfound; - default: - /* Something failed; just do a linear search. */ - break; - } - } -#endif /* UFS_DIRHASH */ - /* - * If there is cached information on a previous search of - * this directory, pick up where we last left off. - * We cache only lookups as these are the most common - * and have the greatest payoff. Caching CREATE has little - * benefit as it usually must search the entire directory - * to determine that the entry does not exist. Caching the - * location of the last DELETE or RENAME has not reduced - * profiling time and hence has been removed in the interest - * of simplicity. - */ - if (nameiop != LOOKUP || dp->i_diroff == 0 || - dp->i_diroff >= dp->i_size) { - entryoffsetinblock = 0; - dp->i_offset = 0; - numdirpasses = 1; - } else { - dp->i_offset = dp->i_diroff; - if ((entryoffsetinblock = dp->i_offset & bmask) && - (error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))) - return (error); - numdirpasses = 2; - nchstats.ncs_2passes++; - } - prevoff = dp->i_offset; - endsearch = roundup2(dp->i_size, DIRBLKSIZ); - enduseful = 0; - -searchloop: - while (dp->i_offset < endsearch) { - /* - * If necessary, get the next directory block. - */ - if ((dp->i_offset & bmask) == 0) { - if (bp != NULL) - brelse(bp); - error = - UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp); - if (error) - return (error); - entryoffsetinblock = 0; - } - /* - * If still looking for a slot, and at a DIRBLKSIZE - * boundary, have to start looking for free space again. - */ - if (slotstatus == NONE && - (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { - slotoffset = -1; - slotfreespace = 0; - } - /* - * Get pointer to next entry. - * Full validation checks are slow, so we only check - * enough to insure forward progress through the - * directory. Complete checks can be run by patching - * "dirchk" to be true. - */ - ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); - if (ep->d_reclen == 0 || ep->d_reclen > - DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || - (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { - int i; - - ufs_dirbad(dp, dp->i_offset, "mangled entry"); - i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); - dp->i_offset += i; - entryoffsetinblock += i; - continue; - } - - /* - * If an appropriate sized slot has not yet been found, - * check to see if one is available. Also accumulate space - * in the current block so that we can determine if - * compaction is viable. - */ - if (slotstatus != FOUND) { - int size = ep->d_reclen; - - if (ep->d_ino != 0) - size -= DIRSIZ(OFSFMT(vdp), ep); - if (size > 0) { - if (size >= slotneeded) { - slotstatus = FOUND; - slotoffset = dp->i_offset; - slotsize = ep->d_reclen; - } else if (slotstatus == NONE) { - slotfreespace += size; - if (slotoffset == -1) - slotoffset = dp->i_offset; - if (slotfreespace >= slotneeded) { - slotstatus = COMPACT; - slotsize = dp->i_offset + - ep->d_reclen - slotoffset; - } - } - } - } - - /* - * Check for a name match. - */ - if (ep->d_ino) { -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (OFSFMT(vdp)) - namlen = ep->d_type; - else - namlen = ep->d_namlen; -# else - namlen = ep->d_namlen; -# endif - if (namlen == cnp->cn_namelen && - (cnp->cn_nameptr[0] == ep->d_name[0]) && - !bcmp(cnp->cn_nameptr, ep->d_name, - (unsigned)namlen)) { -#ifdef UFS_DIRHASH -foundentry: -#endif - /* - * Save directory entry's inode number and - * reclen in ndp->ni_ufs area, and release - * directory buffer. - */ - if (vdp->v_mount->mnt_maxsymlinklen > 0 && - ep->d_type == DT_WHT) { - slotstatus = FOUND; - slotoffset = dp->i_offset; - slotsize = ep->d_reclen; - dp->i_reclen = slotsize; - enduseful = dp->i_size; - ap->a_cnp->cn_flags |= ISWHITEOUT; - numdirpasses--; - goto notfound; - } - dp->i_ino = ep->d_ino; - dp->i_reclen = ep->d_reclen; - goto found; - } - } - prevoff = dp->i_offset; - dp->i_offset += ep->d_reclen; - entryoffsetinblock += ep->d_reclen; - if (ep->d_ino) - enduseful = dp->i_offset; - } -notfound: - /* - * If we started in the middle of the directory and failed - * to find our target, we must check the beginning as well. - */ - if (numdirpasses == 2) { - numdirpasses--; - dp->i_offset = 0; - endsearch = dp->i_diroff; - goto searchloop; - } - if (bp != NULL) - brelse(bp); - /* - * If creating, and at end of pathname and current - * directory has not been removed, then can consider - * allowing file to be created. - */ - if ((nameiop == CREATE || nameiop == RENAME || - (nameiop == DELETE && - (ap->a_cnp->cn_flags & DOWHITEOUT) && - (ap->a_cnp->cn_flags & ISWHITEOUT))) && - (flags & ISLASTCN) && dp->i_effnlink != 0) { - /* - * Access for write is interpreted as allowing - * creation of files in the directory. - */ - error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); - if (error) - return (error); - /* - * Return an indication of where the new directory - * entry should be put. If we didn't find a slot, - * then set dp->i_count to 0 indicating - * that the new slot belongs at the end of the - * directory. If we found a slot, then the new entry - * can be put in the range from dp->i_offset to - * dp->i_offset + dp->i_count. - */ - if (slotstatus == NONE) { - dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); - dp->i_count = 0; - enduseful = dp->i_offset; - } else if (nameiop == DELETE) { - dp->i_offset = slotoffset; - if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) - dp->i_count = 0; - else - dp->i_count = dp->i_offset - prevoff; - } else { - dp->i_offset = slotoffset; - dp->i_count = slotsize; - if (enduseful < slotoffset + slotsize) - enduseful = slotoffset + slotsize; - } - dp->i_endoff = roundup2(enduseful, DIRBLKSIZ); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - /* - * We return with the directory locked, so that - * the parameters we set up above will still be - * valid if we actually decide to do a direnter(). - * We return ni_vp == NULL to indicate that the entry - * does not currently exist; we leave a pointer to - * the (locked) directory inode in ndp->ni_dvp. - * The pathname buffer is saved so that the name - * can be obtained later. - * - * NB - if the directory is unlocked, then this - * information cannot be used. - */ - cnp->cn_flags |= SAVENAME; - if (!lockparent) { - VOP_UNLOCK(vdp, 0, td); - cnp->cn_flags |= PDIRUNLOCK; - } - return (EJUSTRETURN); - } - /* - * Insert name into cache (as non-existent) if appropriate. - */ - if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) - cache_enter(vdp, *vpp, cnp); - return (ENOENT); - -found: - if (numdirpasses == 2) - nchstats.ncs_pass2++; - /* - * Check that directory length properly reflects presence - * of this entry. - */ - if (dp->i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) { - ufs_dirbad(dp, dp->i_offset, "i_size too small"); - dp->i_size = dp->i_offset + DIRSIZ(OFSFMT(vdp), ep); - DIP(dp, i_size) = dp->i_size; - dp->i_flag |= IN_CHANGE | IN_UPDATE; - } - brelse(bp); - - /* - * Found component in pathname. - * If the final component of path name, save information - * in the cache as to where the entry was found. - */ - if ((flags & ISLASTCN) && nameiop == LOOKUP) - dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); - - /* - * If deleting, and at end of pathname, return - * parameters which can be used to remove file. - * If the wantparent flag isn't set, we return only - * the directory (in ndp->ni_dvp), otherwise we go - * on and lock the inode, being careful with ".". - */ - if (nameiop == DELETE && (flags & ISLASTCN)) { - /* - * Write access to directory required to delete files. - */ - error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); - if (error) - return (error); - /* - * Return pointer to current entry in dp->i_offset, - * and distance past previous entry (if there - * is a previous entry in this block) in dp->i_count. - * Save directory inode pointer in ndp->ni_dvp for dirremove(). - */ - if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) - dp->i_count = 0; - else - dp->i_count = dp->i_offset - prevoff; - if (dp->i_number == dp->i_ino) { - VREF(vdp); - *vpp = vdp; - return (0); - } - if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, - LK_EXCLUSIVE, &tdp)) != 0) - return (error); - /* - * If directory is "sticky", then user must own - * the directory, or the file in it, else she - * may not delete it (unless she's root). This - * implements append-only directories. - */ - if ((dp->i_mode & ISVTX) && - VOP_ACCESS(vdp, VADMIN, cred, cnp->cn_thread) && - VOP_ACCESS(tdp, VADMIN, cred, cnp->cn_thread)) { - vput(tdp); - return (EPERM); - } - *vpp = tdp; - if (!lockparent) { - VOP_UNLOCK(vdp, 0, td); - cnp->cn_flags |= PDIRUNLOCK; - } - return (0); - } - - /* - * If rewriting (RENAME), return the inode and the - * information required to rewrite the present directory - * Must get inode of directory entry to verify it's a - * regular file, or empty directory. - */ - if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) { - if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread))) - return (error); - /* - * Careful about locking second inode. - * This can only occur if the target is ".". - */ - if (dp->i_number == dp->i_ino) - return (EISDIR); - if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, - LK_EXCLUSIVE, &tdp)) != 0) - return (error); - *vpp = tdp; - cnp->cn_flags |= SAVENAME; - if (!lockparent) { - VOP_UNLOCK(vdp, 0, td); - cnp->cn_flags |= PDIRUNLOCK; - } - return (0); - } - - /* - * Step through the translation in the name. We do not `vput' the - * directory because we may need it again if a symbolic link - * is relative to the current directory. Instead we save it - * unlocked as "pdp". We must get the target inode before unlocking - * the directory to insure that the inode will not be removed - * before we get it. We prevent deadlock by always fetching - * inodes from the root, moving down the directory tree. Thus - * when following backward pointers ".." we must unlock the - * parent directory before getting the requested directory. - * There is a potential race condition here if both the current - * and parent directories are removed before the VFS_VGET for the - * inode associated with ".." returns. We hope that this occurs - * infrequently since we cannot avoid this race condition without - * implementing a sophisticated deadlock detection algorithm. - * Note also that this simple deadlock detection scheme will not - * work if the filesystem has any hard links other than ".." - * that point backwards in the directory structure. - */ - pdp = vdp; - if (flags & ISDOTDOT) { - if ((VFS_VGET(pdp->v_mount, dp->i_ino, LK_NOWAIT | LK_EXCLUSIVE, - &tdp)) != 0) { - VOP_UNLOCK(pdp, 0, td); /* race to get the inode */ - error = VFS_VGET(pdp->v_mount, dp->i_ino, - LK_EXCLUSIVE, &tdp); - vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, td); - if (error) - return (error); - } - if (!lockparent || !(flags & ISLASTCN)) { - VOP_UNLOCK(pdp, 0, td); - cnp->cn_flags |= PDIRUNLOCK; - } - *vpp = tdp; - } else if (dp->i_number == dp->i_ino) { - VREF(vdp); /* we want ourself, ie "." */ - *vpp = vdp; - } else { - error = VFS_VGET(pdp->v_mount, dp->i_ino, LK_EXCLUSIVE, &tdp); - if (error) - return (error); - if (!lockparent || !(flags & ISLASTCN)) { - VOP_UNLOCK(pdp, 0, td); - cnp->cn_flags |= PDIRUNLOCK; - } - *vpp = tdp; - } - - /* - * Insert name into cache if appropriate. - */ - if (cnp->cn_flags & MAKEENTRY) - cache_enter(vdp, *vpp, cnp); - return (0); -} - -void -ufs_dirbad(ip, offset, how) - struct inode *ip; - doff_t offset; - char *how; -{ - struct mount *mp; - - mp = ITOV(ip)->v_mount; - (void)printf("%s: bad dir ino %lu at offset %ld: %s\n", - mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how); - if ((mp->mnt_flag & MNT_RDONLY) == 0) - panic("ufs_dirbad: bad dir"); -} - -/* - * Do consistency checking on a directory entry: - * record length must be multiple of 4 - * entry must fit in rest of its DIRBLKSIZ block - * record must be large enough to contain entry - * name is not longer than MAXNAMLEN - * name must be as long as advertised, and null terminated - */ -int -ufs_dirbadentry(dp, ep, entryoffsetinblock) - struct vnode *dp; - struct direct *ep; - int entryoffsetinblock; -{ - int i, namlen; - -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (OFSFMT(dp)) - namlen = ep->d_type; - else - namlen = ep->d_namlen; -# else - namlen = ep->d_namlen; -# endif - if ((ep->d_reclen & 0x3) != 0 || - ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || - ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) { - /*return (1); */ - printf("First bad\n"); - goto bad; - } - if (ep->d_ino == 0) - return (0); - for (i = 0; i < namlen; i++) - if (ep->d_name[i] == '\0') { - /*return (1); */ - printf("Second bad\n"); - goto bad; - } - if (ep->d_name[i]) - goto bad; - return (0); -bad: - return (1); -} - -/* - * Construct a new directory entry after a call to namei, using the - * parameters that it left in the componentname argument cnp. The - * argument ip is the inode to which the new directory entry will refer. - */ -void -ufs_makedirentry(ip, cnp, newdirp) - struct inode *ip; - struct componentname *cnp; - struct direct *newdirp; -{ - -#ifdef DIAGNOSTIC - if ((cnp->cn_flags & SAVENAME) == 0) - panic("ufs_makedirentry: missing name"); -#endif - newdirp->d_ino = ip->i_number; - newdirp->d_namlen = cnp->cn_namelen; - bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); - if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) - newdirp->d_type = IFTODT(ip->i_mode); - else { - newdirp->d_type = 0; -# if (BYTE_ORDER == LITTLE_ENDIAN) - { u_char tmp = newdirp->d_namlen; - newdirp->d_namlen = newdirp->d_type; - newdirp->d_type = tmp; } -# endif - } -} - -/* - * Write a directory entry after a call to namei, using the parameters - * that it left in nameidata. The argument dirp is the new directory - * entry contents. Dvp is a pointer to the directory to be written, - * which was left locked by namei. Remaining parameters (dp->i_offset, - * dp->i_count) indicate how the space for the new entry is to be obtained. - * Non-null bp indicates that a directory is being created (for the - * soft dependency code). - */ -int -ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) - struct vnode *dvp; - struct vnode *tvp; - struct direct *dirp; - struct componentname *cnp; - struct buf *newdirbp; -{ - struct ucred *cr; - struct thread *td; - int newentrysize; - struct inode *dp; - struct buf *bp; - u_int dsize; - struct direct *ep, *nep; - int error, ret, blkoff, loc, spacefree, flags; - char *dirbuf; - - td = curthread; /* XXX */ - cr = td->td_ucred; - - dp = VTOI(dvp); - newentrysize = DIRSIZ(OFSFMT(dvp), dirp); - - if (dp->i_count == 0) { - /* - * If dp->i_count is 0, then namei could find no - * space in the directory. Here, dp->i_offset will - * be on a directory block boundary and we will write the - * new entry into a fresh block. - */ - if (dp->i_offset & (DIRBLKSIZ - 1)) - panic("ufs_direnter: newblk"); - flags = BA_CLRBUF; - if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)) - flags |= IO_SYNC; - if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, - cr, flags, &bp)) != 0) { - if (DOINGSOFTDEP(dvp) && newdirbp != NULL) - bdwrite(newdirbp); - return (error); - } - dp->i_size = dp->i_offset + DIRBLKSIZ; - DIP(dp, i_size) = dp->i_size; - dp->i_flag |= IN_CHANGE | IN_UPDATE; - vnode_pager_setsize(dvp, (u_long)dp->i_size); - dirp->d_reclen = DIRBLKSIZ; - blkoff = dp->i_offset & - (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); - bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); -#ifdef UFS_DIRHASH - if (dp->i_dirhash != NULL) { - ufsdirhash_newblk(dp, dp->i_offset); - ufsdirhash_add(dp, dirp, dp->i_offset); - ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, - dp->i_offset); - } -#endif - if (DOINGSOFTDEP(dvp)) { - /* - * Ensure that the entire newly allocated block is a - * valid directory so that future growth within the - * block does not have to ensure that the block is - * written before the inode. - */ - blkoff += DIRBLKSIZ; - while (blkoff < bp->b_bcount) { - ((struct direct *) - (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; - blkoff += DIRBLKSIZ; - } - if (softdep_setup_directory_add(bp, dp, dp->i_offset, - dirp->d_ino, newdirbp, 1) == 0) { - bdwrite(bp); - return (UFS_UPDATE(dvp, 0)); - } - /* We have just allocated a directory block in an - * indirect block. Rather than tracking when it gets - * claimed by the inode, we simply do a VOP_FSYNC - * now to ensure that it is there (in case the user - * does a future fsync). Note that we have to unlock - * the inode for the entry that we just entered, as - * the VOP_FSYNC may need to lock other inodes which - * can lead to deadlock if we also hold a lock on - * the newly entered node. - */ - if ((error = BUF_WRITE(bp))) - return (error); - if (tvp != NULL) - VOP_UNLOCK(tvp, 0, td); - error = VOP_FSYNC(dvp, td->td_ucred, MNT_WAIT, td); - if (tvp != NULL) - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td); - return (error); - } - if (DOINGASYNC(dvp)) { - bdwrite(bp); - return (UFS_UPDATE(dvp, 0)); - } - error = BUF_WRITE(bp); - ret = UFS_UPDATE(dvp, 1); - if (error == 0) - return (ret); - return (error); - } - - /* - * If dp->i_count is non-zero, then namei found space for the new - * entry in the range dp->i_offset to dp->i_offset + dp->i_count - * in the directory. To use this space, we may have to compact - * the entries located there, by copying them together towards the - * beginning of the block, leaving the free space in one usable - * chunk at the end. - */ - - /* - * Increase size of directory if entry eats into new space. - * This should never push the size past a new multiple of - * DIRBLKSIZE. - * - * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. - */ - if (dp->i_offset + dp->i_count > dp->i_size) { - dp->i_size = dp->i_offset + dp->i_count; - DIP(dp, i_size) = dp->i_size; - } - /* - * Get the block containing the space for the new directory entry. - */ - error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); - if (error) { - if (DOINGSOFTDEP(dvp) && newdirbp != NULL) - bdwrite(newdirbp); - return (error); - } - /* - * Find space for the new entry. In the simple case, the entry at - * offset base will have the space. If it does not, then namei - * arranged that compacting the region dp->i_offset to - * dp->i_offset + dp->i_count would yield the space. - */ - ep = (struct direct *)dirbuf; - dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0; - spacefree = ep->d_reclen - dsize; - for (loc = ep->d_reclen; loc < dp->i_count; ) { - nep = (struct direct *)(dirbuf + loc); - - /* Trim the existing slot (NB: dsize may be zero). */ - ep->d_reclen = dsize; - ep = (struct direct *)((char *)ep + dsize); - - /* Read nep->d_reclen now as the bcopy() may clobber it. */ - loc += nep->d_reclen; - if (nep->d_ino == 0) { - /* - * A mid-block unused entry. Such entries are - * never created by the kernel, but fsck_ffs - * can create them (and it doesn't fix them). - * - * Add up the free space, and initialise the - * relocated entry since we don't bcopy it. - */ - spacefree += nep->d_reclen; - ep->d_ino = 0; - dsize = 0; - continue; - } - dsize = DIRSIZ(OFSFMT(dvp), nep); - spacefree += nep->d_reclen - dsize; -#ifdef UFS_DIRHASH - if (dp->i_dirhash != NULL) - ufsdirhash_move(dp, nep, - dp->i_offset + ((char *)nep - dirbuf), - dp->i_offset + ((char *)ep - dirbuf)); -#endif - if (DOINGSOFTDEP(dvp)) - softdep_change_directoryentry_offset(dp, dirbuf, - (caddr_t)nep, (caddr_t)ep, dsize); - else - bcopy((caddr_t)nep, (caddr_t)ep, dsize); - } - /* - * Here, `ep' points to a directory entry containing `dsize' in-use - * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, - * then the entry is completely unused (dsize == 0). The value - * of ep->d_reclen is always indeterminate. - * - * Update the pointer fields in the previous entry (if any), - * copy in the new entry, and write out the block. - */ - if (ep->d_ino == 0 || - (ep->d_ino == WINO && - bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { - if (spacefree + dsize < newentrysize) - panic("ufs_direnter: compact1"); - dirp->d_reclen = spacefree + dsize; - } else { - if (spacefree < newentrysize) - panic("ufs_direnter: compact2"); - dirp->d_reclen = spacefree; - ep->d_reclen = dsize; - ep = (struct direct *)((char *)ep + dsize); - } -#ifdef UFS_DIRHASH - if (dp->i_dirhash != NULL && (ep->d_ino == 0 || - dirp->d_reclen == spacefree)) - ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); -#endif - bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); -#ifdef UFS_DIRHASH - if (dp->i_dirhash != NULL) - ufsdirhash_checkblock(dp, dirbuf - - (dp->i_offset & (DIRBLKSIZ - 1)), - dp->i_offset & ~(DIRBLKSIZ - 1)); -#endif - - if (DOINGSOFTDEP(dvp)) { - (void) softdep_setup_directory_add(bp, dp, - dp->i_offset + (caddr_t)ep - dirbuf, - dirp->d_ino, newdirbp, 0); - bdwrite(bp); - } else { - if (DOINGASYNC(dvp)) { - bdwrite(bp); - error = 0; - } else { - error = BUF_WRITE(bp); - } - } - dp->i_flag |= IN_CHANGE | IN_UPDATE; - /* - * If all went well, and the directory can be shortened, proceed - * with the truncation. Note that we have to unlock the inode for - * the entry that we just entered, as the truncation may need to - * lock other inodes which can lead to deadlock if we also hold a - * lock on the newly entered node. - */ - if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { - if (tvp != NULL) - VOP_UNLOCK(tvp, 0, td); -#ifdef UFS_DIRHASH - if (dp->i_dirhash != NULL) - ufsdirhash_dirtrunc(dp, dp->i_endoff); -#endif - (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, - IO_NORMAL | IO_SYNC, cr, td); - if (tvp != NULL) - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td); - } - return (error); -} - -/* - * Remove a directory entry after a call to namei, using - * the parameters which it left in nameidata. The entry - * dp->i_offset contains the offset into the directory of the - * entry to be eliminated. The dp->i_count field contains the - * size of the previous record in the directory. If this - * is 0, the first entry is being deleted, so we need only - * zero the inode number to mark the entry as free. If the - * entry is not the first in the directory, we must reclaim - * the space of the now empty record by adding the record size - * to the size of the previous entry. - */ -int -ufs_dirremove(dvp, ip, flags, isrmdir) - struct vnode *dvp; - struct inode *ip; - int flags; - int isrmdir; -{ - struct inode *dp; - struct direct *ep; - struct buf *bp; - int error; - - dp = VTOI(dvp); - - if (flags & DOWHITEOUT) { - /* - * Whiteout entry: set d_ino to WINO. - */ - if ((error = - UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0) - return (error); - ep->d_ino = WINO; - ep->d_type = DT_WHT; - goto out; - } - - if ((error = UFS_BLKATOFF(dvp, - (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) - return (error); -#ifdef UFS_DIRHASH - /* - * Remove the dirhash entry. This is complicated by the fact - * that `ep' is the previous entry when dp->i_count != 0. - */ - if (dp->i_dirhash != NULL) - ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : - (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); -#endif - if (dp->i_count == 0) { - /* - * First entry in block: set d_ino to zero. - */ - ep->d_ino = 0; - } else { - /* - * Collapse new free space into previous entry. - */ - ep->d_reclen += dp->i_reclen; - } -#ifdef UFS_DIRHASH - if (dp->i_dirhash != NULL) - ufsdirhash_checkblock(dp, (char *)ep - - ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), - dp->i_offset & ~(DIRBLKSIZ - 1)); -#endif -out: - if (DOINGSOFTDEP(dvp)) { - if (ip) { - ip->i_effnlink--; - softdep_change_linkcnt(ip); - softdep_setup_remove(bp, dp, ip, isrmdir); - } - if (softdep_slowdown(dvp)) { - error = BUF_WRITE(bp); - } else { - bdwrite(bp); - error = 0; - } - } else { - if (ip) { - ip->i_effnlink--; - ip->i_nlink--; - DIP(ip, i_nlink) = ip->i_nlink; - ip->i_flag |= IN_CHANGE; - } - if (flags & DOWHITEOUT) - error = BUF_WRITE(bp); - else if (DOINGASYNC(dvp) && dp->i_count != 0) { - bdwrite(bp); - error = 0; - } else - error = BUF_WRITE(bp); - } - dp->i_flag |= IN_CHANGE | IN_UPDATE; - /* - * If the last named reference to a snapshot goes away, - * drop its snapshot reference so that it will be reclaimed - * when last open reference goes away. - */ -#if defined(FFS) || defined(IFS) - if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_effnlink == 0) - ffs_snapgone(ip); -#endif - return (error); -} - -/* - * Rewrite an existing directory entry to point at the inode - * supplied. The parameters describing the directory entry are - * set up by a call to namei. - */ -int -ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) - struct inode *dp, *oip; - ino_t newinum; - int newtype; - int isrmdir; -{ - struct buf *bp; - struct direct *ep; - struct vnode *vdp = ITOV(dp); - int error; - - error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); - if (error) - return (error); - ep->d_ino = newinum; - if (!OFSFMT(vdp)) - ep->d_type = newtype; - oip->i_effnlink--; - if (DOINGSOFTDEP(vdp)) { - softdep_change_linkcnt(oip); - softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); - bdwrite(bp); - } else { - oip->i_nlink--; - DIP(oip, i_nlink) = oip->i_nlink; - oip->i_flag |= IN_CHANGE; - if (DOINGASYNC(vdp)) { - bdwrite(bp); - error = 0; - } else { - error = BUF_WRITE(bp); - } - } - dp->i_flag |= IN_CHANGE | IN_UPDATE; - /* - * If the last named reference to a snapshot goes away, - * drop its snapshot reference so that it will be reclaimed - * when last open reference goes away. - */ -#if defined(FFS) || defined(IFS) - if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_effnlink == 0) - ffs_snapgone(oip); -#endif - return (error); -} - -/* - * Check if a directory is empty or not. - * Inode supplied must be locked. - * - * Using a struct dirtemplate here is not precisely - * what we want, but better than using a struct direct. - * - * NB: does not handle corrupted directories. - */ -int -ufs_dirempty(ip, parentino, cred) - struct inode *ip; - ino_t parentino; - struct ucred *cred; -{ - doff_t off; - struct dirtemplate dbuf; - struct direct *dp = (struct direct *)&dbuf; - int error, count, namlen; -#define MINDIRSIZ (sizeof (struct dirtemplate) / 2) - - for (off = 0; off < ip->i_size; off += dp->d_reclen) { - error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, - off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, - NOCRED, &count, (struct thread *)0); - /* - * Since we read MINDIRSIZ, residual must - * be 0 unless we're at end of file. - */ - if (error || count != 0) - return (0); - /* avoid infinite loops */ - if (dp->d_reclen == 0) - return (0); - /* skip empty entries */ - if (dp->d_ino == 0 || dp->d_ino == WINO) - continue; - /* accept only "." and ".." */ -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (OFSFMT(ITOV(ip))) - namlen = dp->d_type; - else - namlen = dp->d_namlen; -# else - namlen = dp->d_namlen; -# endif - if (namlen > 2) - return (0); - if (dp->d_name[0] != '.') - return (0); - /* - * At this point namlen must be 1 or 2. - * 1 implies ".", 2 implies ".." if second - * char is also "." - */ - if (namlen == 1 && dp->d_ino == ip->i_number) - continue; - if (dp->d_name[1] == '.' && dp->d_ino == parentino) - continue; - return (0); - } - return (1); -} - -/* - * Check if source directory is in the path of the target directory. - * Target is supplied locked, source is unlocked. - * The target is always vput before returning. - */ -int -ufs_checkpath(source, target, cred) - struct inode *source, *target; - struct ucred *cred; -{ - struct vnode *vp; - int error, namlen; - ino_t rootino; - struct dirtemplate dirbuf; - - vp = ITOV(target); - if (target->i_number == source->i_number) { - error = EEXIST; - goto out; - } - rootino = ROOTINO; - error = 0; - if (target->i_number == rootino) - goto out; - - for (;;) { - if (vp->v_type != VDIR) { - error = ENOTDIR; - break; - } - error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, - sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, - IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, (int *)0, - (struct thread *)0); - if (error != 0) - break; -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (OFSFMT(vp)) - namlen = dirbuf.dotdot_type; - else - namlen = dirbuf.dotdot_namlen; -# else - namlen = dirbuf.dotdot_namlen; -# endif - if (namlen != 2 || - dirbuf.dotdot_name[0] != '.' || - dirbuf.dotdot_name[1] != '.') { - error = ENOTDIR; - break; - } - if (dirbuf.dotdot_ino == source->i_number) { - error = EINVAL; - break; - } - if (dirbuf.dotdot_ino == rootino) - break; - vput(vp); - error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, - LK_EXCLUSIVE, &vp); - if (error) { - vp = NULL; - break; - } - } - -out: - if (error == ENOTDIR) - printf("checkpath: .. not a directory\n"); - if (vp != NULL) - vput(vp); - return (error); -} -#endif diff --git a/src/sys/ufs/ufs/ufs_quota.c b/src/sys/ufs/ufs/ufs_quota.c deleted file mode 100644 index 395e42a..0000000 --- a/src/sys/ufs/ufs/ufs_quota.c +++ /dev/null @@ -1,1063 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1986, 1990, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Robert Elz at The University of Melbourne. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_quota.c 8.5 (Berkeley) 5/20/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ufs/ufs_quota.c,v 1.70 2003/11/05 04:30:08 kan Exp $"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -SYSCTL_DECL(_security_bsd); - -static int unprivileged_get_quota = 0; -SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_get_quota, CTLFLAG_RW, - &unprivileged_get_quota, 0, - "Unprivileged processes may retrieve quotas for other uids and gids"); - -static MALLOC_DEFINE(M_DQUOT, "UFS quota", "UFS quota entries"); - -/* - * Quota name to error message mapping. - */ -static char *quotatypes[] = INITQFNAMES; - -static int chkdqchg(struct inode *, ufs2_daddr_t, struct ucred *, int); -static int chkiqchg(struct inode *, ino_t, struct ucred *, int); -static int dqget(struct vnode *, - u_long, struct ufsmount *, int, struct dquot **); -static int dqsync(struct vnode *, struct dquot *); -static void dqflush(struct vnode *); - -#ifdef DIAGNOSTIC -static void dqref(struct dquot *); -static void chkdquot(struct inode *); -#endif - -/* - * Set up the quotas for an inode. - * - * This routine completely defines the semantics of quotas. - * If other criterion want to be used to establish quotas, the - * MAXQUOTAS value in quotas.h should be increased, and the - * additional dquots set up here. - */ -int -getinoquota(ip) - struct inode *ip; -{ - struct ufsmount *ump; - struct vnode *vp = ITOV(ip); - int error; - - ump = VFSTOUFS(vp->v_mount); - /* - * Set up the user quota based on file uid. - * EINVAL means that quotas are not enabled. - */ - if (ip->i_dquot[USRQUOTA] == NODQUOT && - (error = - dqget(vp, ip->i_uid, ump, USRQUOTA, &ip->i_dquot[USRQUOTA])) && - error != EINVAL) - return (error); - /* - * Set up the group quota based on file gid. - * EINVAL means that quotas are not enabled. - */ - if (ip->i_dquot[GRPQUOTA] == NODQUOT && - (error = - dqget(vp, ip->i_gid, ump, GRPQUOTA, &ip->i_dquot[GRPQUOTA])) && - error != EINVAL) - return (error); - return (0); -} - -/* - * Update disk usage, and take corrective action. - */ -int -chkdq(ip, change, cred, flags) - struct inode *ip; - ufs2_daddr_t change; - struct ucred *cred; - int flags; -{ - struct dquot *dq; - ufs2_daddr_t ncurblocks; - int i, error; - -#ifdef DIAGNOSTIC - if ((flags & CHOWN) == 0) - chkdquot(ip); -#endif - if (change == 0) - return (0); - if (change < 0) { - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "chkdq1", 0); - } - ncurblocks = dq->dq_curblocks + change; - if (ncurblocks >= 0) - dq->dq_curblocks = ncurblocks; - else - dq->dq_curblocks = 0; - dq->dq_flags &= ~DQ_BLKS; - dq->dq_flags |= DQ_MOD; - } - return (0); - } - if ((flags & FORCE) == 0 && suser_cred(cred, 0)) { - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - error = chkdqchg(ip, change, cred, i); - if (error) - return (error); - } - } - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "chkdq2", 0); - } - /* Reset timer when crossing soft limit */ - if (dq->dq_curblocks + change >= dq->dq_bsoftlimit && - dq->dq_curblocks < dq->dq_bsoftlimit) - dq->dq_btime = time_second + - VFSTOUFS(ITOV(ip)->v_mount)->um_btime[i]; - dq->dq_curblocks += change; - dq->dq_flags |= DQ_MOD; - } - return (0); -} - -/* - * Check for a valid change to a users allocation. - * Issue an error message if appropriate. - */ -static int -chkdqchg(ip, change, cred, type) - struct inode *ip; - ufs2_daddr_t change; - struct ucred *cred; - int type; -{ - struct dquot *dq = ip->i_dquot[type]; - ufs2_daddr_t ncurblocks = dq->dq_curblocks + change; - - /* - * If user would exceed their hard limit, disallow space allocation. - */ - if (ncurblocks >= dq->dq_bhardlimit && dq->dq_bhardlimit) { - if ((dq->dq_flags & DQ_BLKS) == 0 && - ip->i_uid == cred->cr_uid) { - uprintf("\n%s: write failed, %s disk limit reached\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, - quotatypes[type]); - dq->dq_flags |= DQ_BLKS; - } - return (EDQUOT); - } - /* - * If user is over their soft limit for too long, disallow space - * allocation. Reset time limit as they cross their soft limit. - */ - if (ncurblocks >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) { - if (dq->dq_curblocks < dq->dq_bsoftlimit) { - dq->dq_btime = time_second + - VFSTOUFS(ITOV(ip)->v_mount)->um_btime[type]; - if (ip->i_uid == cred->cr_uid) - uprintf("\n%s: warning, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, - quotatypes[type], "disk quota exceeded"); - return (0); - } - if (time_second > dq->dq_btime) { - if ((dq->dq_flags & DQ_BLKS) == 0 && - ip->i_uid == cred->cr_uid) { - uprintf("\n%s: write failed, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, - quotatypes[type], - "disk quota exceeded for too long"); - dq->dq_flags |= DQ_BLKS; - } - return (EDQUOT); - } - } - return (0); -} - -/* - * Check the inode limit, applying corrective action. - */ -int -chkiq(ip, change, cred, flags) - struct inode *ip; - ino_t change; - struct ucred *cred; - int flags; -{ - struct dquot *dq; - ino_t ncurinodes; - int i, error; - -#ifdef DIAGNOSTIC - if ((flags & CHOWN) == 0) - chkdquot(ip); -#endif - if (change == 0) - return (0); - /* XXX: change is unsigned */ - if (change < 0) { - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "chkiq1", 0); - } - ncurinodes = dq->dq_curinodes + change; - /* XXX: ncurinodes is unsigned */ - if (ncurinodes >= 0) - dq->dq_curinodes = ncurinodes; - else - dq->dq_curinodes = 0; - dq->dq_flags &= ~DQ_INODS; - dq->dq_flags |= DQ_MOD; - } - return (0); - } - if ((flags & FORCE) == 0 && suser_cred(cred, 0)) { - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - error = chkiqchg(ip, change, cred, i); - if (error) - return (error); - } - } - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "chkiq2", 0); - } - /* Reset timer when crossing soft limit */ - if (dq->dq_curinodes + change >= dq->dq_isoftlimit && - dq->dq_curinodes < dq->dq_isoftlimit) - dq->dq_itime = time_second + - VFSTOUFS(ITOV(ip)->v_mount)->um_itime[i]; - dq->dq_curinodes += change; - dq->dq_flags |= DQ_MOD; - } - return (0); -} - -/* - * Check for a valid change to a users allocation. - * Issue an error message if appropriate. - */ -static int -chkiqchg(ip, change, cred, type) - struct inode *ip; - ino_t change; - struct ucred *cred; - int type; -{ - struct dquot *dq = ip->i_dquot[type]; - ino_t ncurinodes = dq->dq_curinodes + change; - - /* - * If user would exceed their hard limit, disallow inode allocation. - */ - if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) { - if ((dq->dq_flags & DQ_INODS) == 0 && - ip->i_uid == cred->cr_uid) { - uprintf("\n%s: write failed, %s inode limit reached\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, - quotatypes[type]); - dq->dq_flags |= DQ_INODS; - } - return (EDQUOT); - } - /* - * If user is over their soft limit for too long, disallow inode - * allocation. Reset time limit as they cross their soft limit. - */ - if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) { - if (dq->dq_curinodes < dq->dq_isoftlimit) { - dq->dq_itime = time_second + - VFSTOUFS(ITOV(ip)->v_mount)->um_itime[type]; - if (ip->i_uid == cred->cr_uid) - uprintf("\n%s: warning, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, - quotatypes[type], "inode quota exceeded"); - return (0); - } - if (time_second > dq->dq_itime) { - if ((dq->dq_flags & DQ_INODS) == 0 && - ip->i_uid == cred->cr_uid) { - uprintf("\n%s: write failed, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, - quotatypes[type], - "inode quota exceeded for too long"); - dq->dq_flags |= DQ_INODS; - } - return (EDQUOT); - } - } - return (0); -} - -#ifdef DIAGNOSTIC -/* - * On filesystems with quotas enabled, it is an error for a file to change - * size and not to have a dquot structure associated with it. - */ -static void -chkdquot(ip) - struct inode *ip; -{ - struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount); - int i; - - for (i = 0; i < MAXQUOTAS; i++) { - if (ump->um_quotas[i] == NULLVP || - (ump->um_qflags[i] & (QTF_OPENING|QTF_CLOSING))) - continue; - if (ip->i_dquot[i] == NODQUOT) { - vprint("chkdquot: missing dquot", ITOV(ip)); - panic("chkdquot: missing dquot"); - } - } -} -#endif - -/* - * Code to process quotactl commands. - */ - -/* - * Q_QUOTAON - set up a quota file for a particular filesystem. - */ -int -quotaon(td, mp, type, fname) - struct thread *td; - struct mount *mp; - int type; - caddr_t fname; -{ - struct ufsmount *ump = VFSTOUFS(mp); - struct vnode *vp, **vpp; - struct vnode *nextvp; - struct dquot *dq; - int error, flags; - struct nameidata nd; - - error = suser_cred(td->td_ucred, PRISON_ROOT); - if (error) - return (error); - - vpp = &ump->um_quotas[type]; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, td); - flags = FREAD | FWRITE; - error = vn_open(&nd, &flags, 0, -1); - if (error) - return (error); - NDFREE(&nd, NDF_ONLY_PNBUF); - vp = nd.ni_vp; - VOP_UNLOCK(vp, 0, td); - if (vp->v_type != VREG) { - (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); - return (EACCES); - } - if (*vpp != vp) - quotaoff(td, mp, type); - ump->um_qflags[type] |= QTF_OPENING; - mp->mnt_flag |= MNT_QUOTA; - ASSERT_VOP_LOCKED(vp, "quotaon"); - vp->v_vflag |= VV_SYSTEM; - *vpp = vp; - /* - * Save the credential of the process that turned on quotas. - * Set up the time limits for this quota. - */ - ump->um_cred[type] = crhold(td->td_ucred); - ump->um_btime[type] = MAX_DQ_TIME; - ump->um_itime[type] = MAX_IQ_TIME; - if (dqget(NULLVP, 0, ump, type, &dq) == 0) { - if (dq->dq_btime > 0) - ump->um_btime[type] = dq->dq_btime; - if (dq->dq_itime > 0) - ump->um_itime[type] = dq->dq_itime; - dqrele(NULLVP, dq); - } - /* - * Search vnodes associated with this mount point, - * adding references to quota file being opened. - * NB: only need to add dquot's for inodes being modified. - */ - MNT_ILOCK(mp); -again: - for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nextvp) { - if (vp->v_mount != mp) - goto again; - nextvp = TAILQ_NEXT(vp, v_nmntvnodes); - VI_LOCK(vp); - MNT_IUNLOCK(mp); - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { - MNT_ILOCK(mp); - goto again; - } - if (vp->v_type == VNON || vp->v_writecount == 0) { - VOP_UNLOCK(vp, 0, td); - vrele(vp); - MNT_ILOCK(mp); - continue; - } - error = getinoquota(VTOI(vp)); - VOP_UNLOCK(vp, 0, td); - vrele(vp); - MNT_ILOCK(mp); - if (error) - break; - if (TAILQ_NEXT(vp, v_nmntvnodes) != nextvp) - goto again; - } - MNT_IUNLOCK(mp); - ump->um_qflags[type] &= ~QTF_OPENING; - if (error) - quotaoff(td, mp, type); - return (error); -} - -/* - * Q_QUOTAOFF - turn off disk quotas for a filesystem. - */ -int -quotaoff(td, mp, type) - struct thread *td; - struct mount *mp; - int type; -{ - struct vnode *vp; - struct vnode *qvp, *nextvp; - struct ufsmount *ump = VFSTOUFS(mp); - struct dquot *dq; - struct inode *ip; - int error; - - error = suser_cred(td->td_ucred, PRISON_ROOT); - if (error) - return (error); - - if ((qvp = ump->um_quotas[type]) == NULLVP) - return (0); - ump->um_qflags[type] |= QTF_CLOSING; - /* - * Search vnodes associated with this mount point, - * deleting any references to quota file being closed. - */ - MNT_ILOCK(mp); -again: - for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nextvp) { - if (vp->v_mount != mp) - goto again; - nextvp = TAILQ_NEXT(vp, v_nmntvnodes); - - VI_LOCK(vp); - MNT_IUNLOCK(mp); - if (vp->v_type == VNON) { - VI_UNLOCK(vp); - MNT_ILOCK(mp); - continue; - } - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { - MNT_ILOCK(mp); - goto again; - } - ip = VTOI(vp); - dq = ip->i_dquot[type]; - ip->i_dquot[type] = NODQUOT; - dqrele(vp, dq); - VOP_UNLOCK(vp, 0, td); - vrele(vp); - MNT_ILOCK(mp); - if (TAILQ_NEXT(vp, v_nmntvnodes) != nextvp) - goto again; - } - MNT_IUNLOCK(mp); - dqflush(qvp); - ASSERT_VOP_LOCKED(qvp, "quotaoff"); - qvp->v_vflag &= ~VV_SYSTEM; - error = vn_close(qvp, FREAD|FWRITE, td->td_ucred, td); - ump->um_quotas[type] = NULLVP; - crfree(ump->um_cred[type]); - ump->um_cred[type] = NOCRED; - ump->um_qflags[type] &= ~QTF_CLOSING; - for (type = 0; type < MAXQUOTAS; type++) - if (ump->um_quotas[type] != NULLVP) - break; - if (type == MAXQUOTAS) - mp->mnt_flag &= ~MNT_QUOTA; - return (error); -} - -/* - * Q_GETQUOTA - return current values in a dqblk structure. - */ -int -getquota(td, mp, id, type, addr) - struct thread *td; - struct mount *mp; - u_long id; - int type; - caddr_t addr; -{ - struct dquot *dq; - int error; - - switch (type) { - case USRQUOTA: - if ((td->td_ucred->cr_uid != id) && !unprivileged_get_quota) { - error = suser_cred(td->td_ucred, PRISON_ROOT); - if (error) - return (error); - } - break; - - case GRPQUOTA: - if (!groupmember(id, td->td_ucred) && !unprivileged_get_quota) { - error = suser_cred(td->td_ucred, PRISON_ROOT); - if (error) - return (error); - } - break; - - default: - return (EINVAL); - } - - error = dqget(NULLVP, id, VFSTOUFS(mp), type, &dq); - if (error) - return (error); - error = copyout((caddr_t)&dq->dq_dqb, addr, sizeof (struct dqblk)); - dqrele(NULLVP, dq); - return (error); -} - -/* - * Q_SETQUOTA - assign an entire dqblk structure. - */ -int -setquota(td, mp, id, type, addr) - struct thread *td; - struct mount *mp; - u_long id; - int type; - caddr_t addr; -{ - struct dquot *dq; - struct dquot *ndq; - struct ufsmount *ump = VFSTOUFS(mp); - struct dqblk newlim; - int error; - - error = suser_cred(td->td_ucred, PRISON_ROOT); - if (error) - return (error); - - error = copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)); - if (error) - return (error); - error = dqget(NULLVP, id, ump, type, &ndq); - if (error) - return (error); - dq = ndq; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "setqta", 0); - } - /* - * Copy all but the current values. - * Reset time limit if previously had no soft limit or were - * under it, but now have a soft limit and are over it. - */ - newlim.dqb_curblocks = dq->dq_curblocks; - newlim.dqb_curinodes = dq->dq_curinodes; - if (dq->dq_id != 0) { - newlim.dqb_btime = dq->dq_btime; - newlim.dqb_itime = dq->dq_itime; - } - if (newlim.dqb_bsoftlimit && - dq->dq_curblocks >= newlim.dqb_bsoftlimit && - (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit)) - newlim.dqb_btime = time_second + ump->um_btime[type]; - if (newlim.dqb_isoftlimit && - dq->dq_curinodes >= newlim.dqb_isoftlimit && - (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit)) - newlim.dqb_itime = time_second + ump->um_itime[type]; - dq->dq_dqb = newlim; - if (dq->dq_curblocks < dq->dq_bsoftlimit) - dq->dq_flags &= ~DQ_BLKS; - if (dq->dq_curinodes < dq->dq_isoftlimit) - dq->dq_flags &= ~DQ_INODS; - if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 && - dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0) - dq->dq_flags |= DQ_FAKE; - else - dq->dq_flags &= ~DQ_FAKE; - dq->dq_flags |= DQ_MOD; - dqrele(NULLVP, dq); - return (0); -} - -/* - * Q_SETUSE - set current inode and block usage. - */ -int -setuse(td, mp, id, type, addr) - struct thread *td; - struct mount *mp; - u_long id; - int type; - caddr_t addr; -{ - struct dquot *dq; - struct ufsmount *ump = VFSTOUFS(mp); - struct dquot *ndq; - struct dqblk usage; - int error; - - error = suser_cred(td->td_ucred, PRISON_ROOT); - if (error) - return (error); - - error = copyin(addr, (caddr_t)&usage, sizeof (struct dqblk)); - if (error) - return (error); - error = dqget(NULLVP, id, ump, type, &ndq); - if (error) - return (error); - dq = ndq; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+1, "setuse", 0); - } - /* - * Reset time limit if have a soft limit and were - * previously under it, but are now over it. - */ - if (dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit && - usage.dqb_curblocks >= dq->dq_bsoftlimit) - dq->dq_btime = time_second + ump->um_btime[type]; - if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit && - usage.dqb_curinodes >= dq->dq_isoftlimit) - dq->dq_itime = time_second + ump->um_itime[type]; - dq->dq_curblocks = usage.dqb_curblocks; - dq->dq_curinodes = usage.dqb_curinodes; - if (dq->dq_curblocks < dq->dq_bsoftlimit) - dq->dq_flags &= ~DQ_BLKS; - if (dq->dq_curinodes < dq->dq_isoftlimit) - dq->dq_flags &= ~DQ_INODS; - dq->dq_flags |= DQ_MOD; - dqrele(NULLVP, dq); - return (0); -} - -/* - * Q_SYNC - sync quota files to disk. - */ -int -qsync(mp) - struct mount *mp; -{ - struct ufsmount *ump = VFSTOUFS(mp); - struct thread *td = curthread; /* XXX */ - struct vnode *vp, *nextvp; - struct dquot *dq; - int i, error; - - /* - * Check if the mount point has any quotas. - * If not, simply return. - */ - for (i = 0; i < MAXQUOTAS; i++) - if (ump->um_quotas[i] != NULLVP) - break; - if (i == MAXQUOTAS) - return (0); - /* - * Search vnodes associated with this mount point, - * synchronizing any modified dquot structures. - */ - MNT_ILOCK(mp); -again: - for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nextvp) { - if (vp->v_mount != mp) - goto again; - nextvp = TAILQ_NEXT(vp, v_nmntvnodes); - VI_LOCK(vp); - MNT_IUNLOCK(mp); - if (vp->v_type == VNON) { - VI_UNLOCK(vp); - MNT_ILOCK(mp); - continue; - } - error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); - if (error) { - MNT_ILOCK(mp); - if (error == ENOENT) - goto again; - continue; - } - for (i = 0; i < MAXQUOTAS; i++) { - dq = VTOI(vp)->i_dquot[i]; - if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) - dqsync(vp, dq); - } - vput(vp); - MNT_ILOCK(mp); - if (TAILQ_NEXT(vp, v_nmntvnodes) != nextvp) - goto again; - } - MNT_IUNLOCK(mp); - return (0); -} - -/* - * Code pertaining to management of the in-core dquot data structures. - */ -#define DQHASH(dqvp, id) \ - (&dqhashtbl[((((intptr_t)(dqvp)) >> 8) + id) & dqhash]) -static LIST_HEAD(dqhash, dquot) *dqhashtbl; -static u_long dqhash; - -/* - * Dquot free list. - */ -#define DQUOTINC 5 /* minimum free dquots desired */ -static TAILQ_HEAD(dqfreelist, dquot) dqfreelist; -static long numdquot, desireddquot = DQUOTINC; - -/* - * Initialize the quota system. - */ -void -dqinit() -{ - - dqhashtbl = hashinit(desiredvnodes, M_DQUOT, &dqhash); - TAILQ_INIT(&dqfreelist); -} - -/* - * Shut down the quota system. - */ -void -dquninit() -{ - struct dquot *dq; - - hashdestroy(dqhashtbl, M_DQUOT, dqhash); - while ((dq = TAILQ_FIRST(&dqfreelist)) != NULL) { - TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); - free(dq, M_DQUOT); - } -} - -/* - * Obtain a dquot structure for the specified identifier and quota file - * reading the information from the file if necessary. - */ -static int -dqget(vp, id, ump, type, dqp) - struct vnode *vp; - u_long id; - struct ufsmount *ump; - int type; - struct dquot **dqp; -{ - struct thread *td = curthread; /* XXX */ - struct dquot *dq; - struct dqhash *dqh; - struct vnode *dqvp; - struct iovec aiov; - struct uio auio; - int error; - - dqvp = ump->um_quotas[type]; - if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) { - *dqp = NODQUOT; - return (EINVAL); - } - /* - * Check the cache first. - */ - dqh = DQHASH(dqvp, id); - LIST_FOREACH(dq, dqh, dq_hash) { - if (dq->dq_id != id || - dq->dq_ump->um_quotas[dq->dq_type] != dqvp) - continue; - /* - * Cache hit with no references. Take - * the structure off the free list. - */ - if (dq->dq_cnt == 0) - TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); - DQREF(dq); - *dqp = dq; - return (0); - } - /* - * Not in cache, allocate a new one. - */ - if (TAILQ_FIRST(&dqfreelist) == NODQUOT && - numdquot < MAXQUOTAS * desiredvnodes) - desireddquot += DQUOTINC; - if (numdquot < desireddquot) { - dq = (struct dquot *)malloc(sizeof *dq, M_DQUOT, - M_WAITOK | M_ZERO); - numdquot++; - } else { - if ((dq = TAILQ_FIRST(&dqfreelist)) == NULL) { - tablefull("dquot"); - *dqp = NODQUOT; - return (EUSERS); - } - if (dq->dq_cnt || (dq->dq_flags & DQ_MOD)) - panic("dqget: free dquot isn't"); - TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); - if (dq->dq_ump != NULL) - LIST_REMOVE(dq, dq_hash); - } - /* - * Initialize the contents of the dquot structure. - */ - if (vp != dqvp) - vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, td); - LIST_INSERT_HEAD(dqh, dq, dq_hash); - DQREF(dq); - dq->dq_flags = DQ_LOCK; - dq->dq_id = id; - dq->dq_ump = ump; - dq->dq_type = type; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = (caddr_t)&dq->dq_dqb; - aiov.iov_len = sizeof (struct dqblk); - auio.uio_resid = sizeof (struct dqblk); - auio.uio_offset = (off_t)(id * sizeof (struct dqblk)); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_READ; - auio.uio_td = (struct thread *)0; - error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]); - if (auio.uio_resid == sizeof(struct dqblk) && error == 0) - bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk)); - if (vp != dqvp) - VOP_UNLOCK(dqvp, 0, td); - if (dq->dq_flags & DQ_WANT) - wakeup(dq); - dq->dq_flags = 0; - /* - * I/O error in reading quota file, release - * quota structure and reflect problem to caller. - */ - if (error) { - LIST_REMOVE(dq, dq_hash); - dqrele(vp, dq); - *dqp = NODQUOT; - return (error); - } - /* - * Check for no limit to enforce. - * Initialize time values if necessary. - */ - if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 && - dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0) - dq->dq_flags |= DQ_FAKE; - if (dq->dq_id != 0) { - if (dq->dq_btime == 0) - dq->dq_btime = time_second + ump->um_btime[type]; - if (dq->dq_itime == 0) - dq->dq_itime = time_second + ump->um_itime[type]; - } - *dqp = dq; - return (0); -} - -#ifdef DIAGNOSTIC -/* - * Obtain a reference to a dquot. - */ -static void -dqref(dq) - struct dquot *dq; -{ - - dq->dq_cnt++; -} -#endif - -/* - * Release a reference to a dquot. - */ -void -dqrele(vp, dq) - struct vnode *vp; - struct dquot *dq; -{ - - if (dq == NODQUOT) - return; - if (dq->dq_cnt > 1) { - dq->dq_cnt--; - return; - } - if (dq->dq_flags & DQ_MOD) - (void) dqsync(vp, dq); - if (--dq->dq_cnt > 0) - return; - TAILQ_INSERT_TAIL(&dqfreelist, dq, dq_freelist); -} - -/* - * Update the disk quota in the quota file. - */ -static int -dqsync(vp, dq) - struct vnode *vp; - struct dquot *dq; -{ - struct thread *td = curthread; /* XXX */ - struct vnode *dqvp; - struct iovec aiov; - struct uio auio; - int error; - - if (dq == NODQUOT) - panic("dqsync: dquot"); - if ((dq->dq_flags & DQ_MOD) == 0) - return (0); - if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP) - panic("dqsync: file"); - (void) vn_write_suspend_wait(dqvp, NULL, V_WAIT); - if (vp != dqvp) - vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, td); - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - (void) tsleep(dq, PINOD+2, "dqsync", 0); - if ((dq->dq_flags & DQ_MOD) == 0) { - if (vp != dqvp) - VOP_UNLOCK(dqvp, 0, td); - return (0); - } - } - dq->dq_flags |= DQ_LOCK; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = (caddr_t)&dq->dq_dqb; - aiov.iov_len = sizeof (struct dqblk); - auio.uio_resid = sizeof (struct dqblk); - auio.uio_offset = (off_t)(dq->dq_id * sizeof (struct dqblk)); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_WRITE; - auio.uio_td = (struct thread *)0; - error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]); - if (auio.uio_resid && error == 0) - error = EIO; - if (dq->dq_flags & DQ_WANT) - wakeup(dq); - dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT); - if (vp != dqvp) - VOP_UNLOCK(dqvp, 0, td); - return (error); -} - -/* - * Flush all entries from the cache for a particular vnode. - */ -static void -dqflush(vp) - struct vnode *vp; -{ - struct dquot *dq, *nextdq; - struct dqhash *dqh; - - /* - * Move all dquot's that used to refer to this quota - * file off their hash chains (they will eventually - * fall off the head of the free list and be re-used). - */ - for (dqh = &dqhashtbl[dqhash]; dqh >= dqhashtbl; dqh--) { - for (dq = LIST_FIRST(dqh); dq; dq = nextdq) { - nextdq = LIST_NEXT(dq, dq_hash); - if (dq->dq_ump->um_quotas[dq->dq_type] != vp) - continue; - if (dq->dq_cnt) - panic("dqflush: stray dquot"); - LIST_REMOVE(dq, dq_hash); - dq->dq_ump = (struct ufsmount *)0; - } - } -} -#endif diff --git a/src/sys/ufs/ufs/ufs_vfsops.c b/src/sys/ufs/ufs/ufs_vfsops.c deleted file mode 100644 index 3c064a5..0000000 --- a/src/sys/ufs/ufs/ufs_vfsops.c +++ /dev/null @@ -1,229 +0,0 @@ -#if 0 -/* - * Copyright (c) 1991, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ufs/ufs_vfsops.c,v 1.37 2003/06/15 06:36:19 rwatson Exp $"); - -#include "opt_quota.h" -#include "opt_ufs.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#ifdef UFS_DIRHASH -#include -#include -#endif - -MALLOC_DEFINE(M_UFSMNT, "UFS mount", "UFS mount structure"); -/* - * Make a filesystem operational. - * Nothing to do at the moment. - */ -/* ARGSUSED */ -int -ufs_start(mp, flags, td) - struct mount *mp; - int flags; - struct thread *td; -{ - - return (0); -} - -/* - * Return the root of a filesystem. - */ -int -ufs_root(mp, vpp) - struct mount *mp; - struct vnode **vpp; -{ - struct vnode *nvp; - int error; - - error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); - if (error) - return (error); - *vpp = nvp; - return (0); -} - -/* - * Do operations associated with quotas - */ -int -ufs_quotactl(mp, cmds, uid, arg, td) - struct mount *mp; - int cmds; - uid_t uid; - caddr_t arg; - struct thread *td; -{ -#ifndef QUOTA - return (EOPNOTSUPP); -#else - int cmd, type, error; - - if (uid == -1) - uid = td->td_ucred->cr_ruid; - cmd = cmds >> SUBCMDSHIFT; - type = cmds & SUBCMDMASK; - if ((u_int)type >= MAXQUOTAS) - return (EINVAL); - - if (vfs_busy(mp, LK_NOWAIT, 0, td)) - return (0); - - switch (cmd) { - case Q_QUOTAON: - error = quotaon(td, mp, type, arg); - break; - - case Q_QUOTAOFF: - error = quotaoff(td, mp, type); - break; - - case Q_SETQUOTA: - error = setquota(td, mp, uid, type, arg); - break; - - case Q_SETUSE: - error = setuse(td, mp, uid, type, arg); - break; - - case Q_GETQUOTA: - error = getquota(td, mp, uid, type, arg); - break; - - case Q_SYNC: - error = qsync(mp); - break; - - default: - error = EINVAL; - break; - } - vfs_unbusy(mp, td); - return (error); -#endif -} - -/* - * Initial UFS filesystems, done only once. - */ -int -ufs_init(vfsp) - struct vfsconf *vfsp; -{ - - ufs_ihashinit(); -#ifdef QUOTA - dqinit(); -#endif -#ifdef UFS_DIRHASH - ufsdirhash_init(); -#endif - return (0); -} - -/* - * Uninitialise UFS filesystems, done before module unload. - */ -int -ufs_uninit(vfsp) - struct vfsconf *vfsp; -{ - - ufs_ihashuninit(); -#ifdef QUOTA - dquninit(); -#endif -#ifdef UFS_DIRHASH - ufsdirhash_uninit(); -#endif - return (0); -} - -/* - * This is the generic part of fhtovp called after the underlying - * filesystem has validated the file handle. - * - * Call the VFS_CHECKEXP beforehand to verify access. - */ -int -ufs_fhtovp(mp, ufhp, vpp) - struct mount *mp; - struct ufid *ufhp; - struct vnode **vpp; -{ - struct inode *ip; - struct vnode *nvp; - int error; - - error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); - if (error) { - *vpp = NULLVP; - return (error); - } - ip = VTOI(nvp); - if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen || - ip->i_effnlink <= 0) { - vput(nvp); - *vpp = NULLVP; - return (ESTALE); - } - *vpp = nvp; - return (0); -} -#endif diff --git a/src/sys/ufs/ufs/ufs_vnops.c b/src/sys/ufs/ufs/ufs_vnops.c deleted file mode 100644 index 56ca91d..0000000 --- a/src/sys/ufs/ufs/ufs_vnops.c +++ /dev/null @@ -1,2816 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1986, 1989, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 - */ - -#include -__FBSDID("$FreeBSD: src/sys/ufs/ufs/ufs_vnops.c,v 1.234 2003/10/18 14:10:27 phk Exp $"); - -#include "opt_mac.h" -#include "opt_quota.h" -#include "opt_suiddir.h" -#include "opt_ufs.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include /* XXX */ - -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#ifdef UFS_DIRHASH -#include -#endif - -static int ufs_access(struct vop_access_args *); -static int ufs_advlock(struct vop_advlock_args *); -static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *); -static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *); -static int ufs_close(struct vop_close_args *); -static int ufs_create(struct vop_create_args *); -static int ufs_getattr(struct vop_getattr_args *); -static int ufs_link(struct vop_link_args *); -static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); -static int ufs_mkdir(struct vop_mkdir_args *); -static int ufs_mknod(struct vop_mknod_args *); -static int ufs_pathconf(struct vop_pathconf_args *); -static int ufs_print(struct vop_print_args *); -static int ufs_readlink(struct vop_readlink_args *); -static int ufs_remove(struct vop_remove_args *); -static int ufs_rename(struct vop_rename_args *); -static int ufs_rmdir(struct vop_rmdir_args *); -static int ufs_setattr(struct vop_setattr_args *); -static int ufs_strategy(struct vop_strategy_args *); -static int ufs_symlink(struct vop_symlink_args *); -static int ufs_whiteout(struct vop_whiteout_args *); -static int ufsfifo_close(struct vop_close_args *); -static int ufsfifo_kqfilter(struct vop_kqfilter_args *); -static int ufsfifo_read(struct vop_read_args *); -static int ufsfifo_write(struct vop_write_args *); -static int ufsspec_close(struct vop_close_args *); -static int ufsspec_read(struct vop_read_args *); -static int ufsspec_write(struct vop_write_args *); -static int filt_ufsread(struct knote *kn, long hint); -static int filt_ufswrite(struct knote *kn, long hint); -static int filt_ufsvnode(struct knote *kn, long hint); -static void filt_ufsdetach(struct knote *kn); -static int ufs_kqfilter(struct vop_kqfilter_args *ap); - -union _qcvt { - int64_t qcvt; - int32_t val[2]; -}; -#define SETHIGH(q, h) { \ - union _qcvt tmp; \ - tmp.qcvt = (q); \ - tmp.val[_QUAD_HIGHWORD] = (h); \ - (q) = tmp.qcvt; \ -} -#define SETLOW(q, l) { \ - union _qcvt tmp; \ - tmp.qcvt = (q); \ - tmp.val[_QUAD_LOWWORD] = (l); \ - (q) = tmp.qcvt; \ -} - -/* - * A virgin directory (no blushing please). - */ -static struct dirtemplate mastertemplate = { - 0, 12, DT_DIR, 1, ".", - 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." -}; -static struct odirtemplate omastertemplate = { - 0, 12, 1, ".", - 0, DIRBLKSIZ - 12, 2, ".." -}; - -void -ufs_itimes(vp) - struct vnode *vp; -{ - struct inode *ip; - struct timespec ts; - - ip = VTOI(vp); - if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) - return; - if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) - ip->i_flag |= IN_LAZYMOD; - else - ip->i_flag |= IN_MODIFIED; - if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { - vfs_timestamp(&ts); - if (ip->i_flag & IN_ACCESS) { - DIP(ip, i_atime) = ts.tv_sec; - DIP(ip, i_atimensec) = ts.tv_nsec; - } - if (ip->i_flag & IN_UPDATE) { - DIP(ip, i_mtime) = ts.tv_sec; - DIP(ip, i_mtimensec) = ts.tv_nsec; - ip->i_modrev++; - } - if (ip->i_flag & IN_CHANGE) { - DIP(ip, i_ctime) = ts.tv_sec; - DIP(ip, i_ctimensec) = ts.tv_nsec; - } - } - ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); -} - -/* - * Create a regular file - */ -static int -ufs_create(ap) - struct vop_create_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; -{ - int error; - - error = - ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), - ap->a_dvp, ap->a_vpp, ap->a_cnp); - if (error) - return (error); - VN_KNOTE(ap->a_dvp, NOTE_WRITE); - return (0); -} - -/* - * Mknod vnode call - */ -/* ARGSUSED */ -static int -ufs_mknod(ap) - struct vop_mknod_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; -{ - struct vattr *vap = ap->a_vap; - struct vnode **vpp = ap->a_vpp; - struct inode *ip; - ino_t ino; - int error; - - error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), - ap->a_dvp, vpp, ap->a_cnp); - if (error) - return (error); - VN_KNOTE(ap->a_dvp, NOTE_WRITE); - ip = VTOI(*vpp); - ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; - if (vap->va_rdev != VNOVAL) { - /* - * Want to be able to use this to make badblock - * inodes, so don't truncate the dev number. - */ - DIP(ip, i_rdev) = vap->va_rdev; - } - /* - * Remove inode, then reload it through VFS_VGET so it is - * checked to see if it is an alias of an existing entry in - * the inode cache. - */ - vput(*vpp); - (*vpp)->v_type = VNON; - ino = ip->i_number; /* Save this before vgone() invalidates ip. */ - vgone(*vpp); - error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); - if (error) { - *vpp = NULL; - return (error); - } - return (0); -} - - -/* - * Close called. - * - * Update the times on the inode. - */ -/* ARGSUSED */ -static int -ufs_close(ap) - struct vop_close_args /* { - struct vnode *a_vp; - int a_fflag; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct mount *mp; - - VI_LOCK(vp); - if (vp->v_usecount > 1) { - ufs_itimes(vp); - VI_UNLOCK(vp); - } else { - VI_UNLOCK(vp); - /* - * If we are closing the last reference to an unlinked - * file, then it will be freed by the inactive routine. - * Because the freeing causes a the filesystem to be - * modified, it must be held up during periods when the - * filesystem is suspended. - * - * XXX - EAGAIN is returned to prevent vn_close from - * repeating the vrele operation. - */ - if (vp->v_type == VREG && VTOI(vp)->i_effnlink == 0) { - (void) vn_start_write(vp, &mp, V_WAIT); - vrele(vp); - vn_finished_write(mp); - return (EAGAIN); - } - } - return (0); -} - -static int -ufs_access(ap) - struct vop_access_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - mode_t mode = ap->a_mode; - int error; -#ifdef UFS_ACL - struct acl *acl; -#endif - - /* - * Disallow write attempts on read-only filesystems; - * unless the file is a socket, fifo, or a block or - * character device resident on the filesystem. - */ - if (mode & VWRITE) { - switch (vp->v_type) { - case VDIR: - case VLNK: - case VREG: - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); -#ifdef QUOTA - if ((error = getinoquota(ip)) != 0) - return (error); -#endif - break; - default: - break; - } - } - - /* If immutable bit set, nobody gets to write it. */ - if ((mode & VWRITE) && (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT))) - return (EPERM); - -#ifdef UFS_ACL - if ((vp->v_mount->mnt_flag & MNT_ACLS) != 0) { - MALLOC(acl, struct acl *, sizeof(*acl), M_ACL, M_WAITOK); - error = VOP_GETACL(vp, ACL_TYPE_ACCESS, acl, ap->a_cred, - ap->a_td); - switch (error) { - case EOPNOTSUPP: - error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, - ip->i_gid, ap->a_mode, ap->a_cred, NULL); - break; - case 0: - error = vaccess_acl_posix1e(vp->v_type, ip->i_uid, - ip->i_gid, acl, ap->a_mode, ap->a_cred, NULL); - break; - default: - printf( -"ufs_access(): Error retrieving ACL on object (%d).\n", - error); - /* - * XXX: Fall back until debugged. Should - * eventually possibly log an error, and return - * EPERM for safety. - */ - error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, - ip->i_gid, ap->a_mode, ap->a_cred, NULL); - } - FREE(acl, M_ACL); - } else -#endif /* !UFS_ACL */ - error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, - ap->a_mode, ap->a_cred, NULL); - return (error); -} - -/* ARGSUSED */ -static int -ufs_getattr(ap) - struct vop_getattr_args /* { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - struct vattr *vap = ap->a_vap; - - ufs_itimes(vp); - /* - * Copy from inode table - */ - vap->va_fsid = dev2udev(ip->i_dev); - vap->va_fileid = ip->i_number; - vap->va_mode = ip->i_mode & ~IFMT; - vap->va_nlink = ip->i_effnlink; - vap->va_uid = ip->i_uid; - vap->va_gid = ip->i_gid; - if (ip->i_ump->um_fstype == UFS1) { - vap->va_rdev = ip->i_din1->di_rdev; - vap->va_size = ip->i_din1->di_size; - vap->va_atime.tv_sec = ip->i_din1->di_atime; - vap->va_atime.tv_nsec = ip->i_din1->di_atimensec; - vap->va_mtime.tv_sec = ip->i_din1->di_mtime; - vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; - vap->va_ctime.tv_sec = ip->i_din1->di_ctime; - vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec; - vap->va_birthtime.tv_sec = 0; - vap->va_birthtime.tv_nsec = 0; - vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks); - } else { - vap->va_rdev = ip->i_din2->di_rdev; - vap->va_size = ip->i_din2->di_size; - vap->va_atime.tv_sec = ip->i_din2->di_atime; - vap->va_atime.tv_nsec = ip->i_din2->di_atimensec; - vap->va_mtime.tv_sec = ip->i_din2->di_mtime; - vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; - vap->va_ctime.tv_sec = ip->i_din2->di_ctime; - vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec; - vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime; - vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec; - vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks); - } - vap->va_flags = ip->i_flags; - vap->va_gen = ip->i_gen; - vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; - vap->va_type = IFTOVT(ip->i_mode); - vap->va_filerev = ip->i_modrev; - return (0); -} - -/* - * Set attribute vnode op. called from several syscalls - */ -static int -ufs_setattr(ap) - struct vop_setattr_args /* { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - struct vattr *vap = ap->a_vap; - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - struct ucred *cred = ap->a_cred; - struct thread *td = ap->a_td; - int error; - - /* - * Check for unsettable attributes. - */ - if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || - (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || - (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || - ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { - return (EINVAL); - } - if (vap->va_flags != VNOVAL) { - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - /* - * Callers may only modify the file flags on objects they - * have VADMIN rights for. - */ - if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) - return (error); - /* - * Unprivileged processes and privileged processes in - * jail() are not permitted to unset system flags, or - * modify flags if any system flags are set. - * Privileged non-jail processes may not modify system flags - * if securelevel > 0 and any existing system flags are set. - */ - if (!suser_cred(cred, PRISON_ROOT)) { - if (ip->i_flags - & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { - error = securelevel_gt(cred, 0); - if (error) - return (error); - } - /* Snapshot flag cannot be set or cleared */ - if (((vap->va_flags & SF_SNAPSHOT) != 0 && - (ip->i_flags & SF_SNAPSHOT) == 0) || - ((vap->va_flags & SF_SNAPSHOT) == 0 && - (ip->i_flags & SF_SNAPSHOT) != 0)) - return (EPERM); - ip->i_flags = vap->va_flags; - DIP(ip, i_flags) = vap->va_flags; - } else { - if (ip->i_flags - & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || - (vap->va_flags & UF_SETTABLE) != vap->va_flags) - return (EPERM); - ip->i_flags &= SF_SETTABLE; - ip->i_flags |= (vap->va_flags & UF_SETTABLE); - DIP(ip, i_flags) = ip->i_flags; - } - ip->i_flag |= IN_CHANGE; - if (vap->va_flags & (IMMUTABLE | APPEND)) - return (0); - } - if (ip->i_flags & (IMMUTABLE | APPEND)) - return (EPERM); - /* - * Go through the fields and update iff not VNOVAL. - */ - if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, - td)) != 0) - return (error); - } - if (vap->va_size != VNOVAL) { - /* - * Disallow write attempts on read-only filesystems; - * unless the file is a socket, fifo, or a block or - * character device resident on the filesystem. - */ - switch (vp->v_type) { - case VDIR: - return (EISDIR); - case VLNK: - case VREG: - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - if ((ip->i_flags & SF_SNAPSHOT) != 0) - return (EPERM); - break; - default: - break; - } - if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL, - cred, td)) != 0) - return (error); - } - if (vap->va_atime.tv_sec != VNOVAL || - vap->va_mtime.tv_sec != VNOVAL || - vap->va_birthtime.tv_sec != VNOVAL) { - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - if ((ip->i_flags & SF_SNAPSHOT) != 0) - return (EPERM); - /* - * From utimes(2): - * If times is NULL, ... The caller must be the owner of - * the file, have permission to write the file, or be the - * super-user. - * If times is non-NULL, ... The caller must be the owner of - * the file or be the super-user. - */ - if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && - ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || - (error = VOP_ACCESS(vp, VWRITE, cred, td)))) - return (error); - if (vap->va_atime.tv_sec != VNOVAL) - ip->i_flag |= IN_ACCESS; - if (vap->va_mtime.tv_sec != VNOVAL) - ip->i_flag |= IN_CHANGE | IN_UPDATE; - if (vap->va_birthtime.tv_sec != VNOVAL && - ip->i_ump->um_fstype == UFS2) - ip->i_flag |= IN_MODIFIED; - ufs_itimes(vp); - if (vap->va_atime.tv_sec != VNOVAL) { - DIP(ip, i_atime) = vap->va_atime.tv_sec; - DIP(ip, i_atimensec) = vap->va_atime.tv_nsec; - } - if (vap->va_mtime.tv_sec != VNOVAL) { - DIP(ip, i_mtime) = vap->va_mtime.tv_sec; - DIP(ip, i_mtimensec) = vap->va_mtime.tv_nsec; - } - if (vap->va_birthtime.tv_sec != VNOVAL && - ip->i_ump->um_fstype == UFS2) { - ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec; - ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec; - } - error = UFS_UPDATE(vp, 0); - if (error) - return (error); - } - error = 0; - if (vap->va_mode != (mode_t)VNOVAL) { - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode & - (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH))) - return (EPERM); - error = ufs_chmod(vp, (int)vap->va_mode, cred, td); - } - VN_KNOTE(vp, NOTE_ATTRIB); - return (error); -} - -/* - * Change the mode on a file. - * Inode must be locked before calling. - */ -static int -ufs_chmod(vp, mode, cred, td) - struct vnode *vp; - int mode; - struct ucred *cred; - struct thread *td; -{ - struct inode *ip = VTOI(vp); - int error; - - /* - * To modify the permissions on a file, must possess VADMIN - * for that file. - */ - if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) - return (error); - /* - * Privileged processes may set the sticky bit on non-directories, - * as well as set the setgid bit on a file with a group that the - * process is not a member of. Both of these are allowed in - * jail(8). - */ - if (vp->v_type != VDIR && (mode & S_ISTXT)) { - if (suser_cred(cred, PRISON_ROOT)) - return (EFTYPE); - } - if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { - error = suser_cred(cred, PRISON_ROOT); - if (error) - return (error); - } - ip->i_mode &= ~ALLPERMS; - ip->i_mode |= (mode & ALLPERMS); - DIP(ip, i_mode) = ip->i_mode; - ip->i_flag |= IN_CHANGE; - return (0); -} - -/* - * Perform chown operation on inode ip; - * inode must be locked prior to call. - */ -static int -ufs_chown(vp, uid, gid, cred, td) - struct vnode *vp; - uid_t uid; - gid_t gid; - struct ucred *cred; - struct thread *td; -{ - struct inode *ip = VTOI(vp); - uid_t ouid; - gid_t ogid; - int error = 0; -#ifdef QUOTA - int i; - ufs2_daddr_t change; -#endif - - if (uid == (uid_t)VNOVAL) - uid = ip->i_uid; - if (gid == (gid_t)VNOVAL) - gid = ip->i_gid; - /* - * To modify the ownership of a file, must possess VADMIN - * for that file. - */ - if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) - return (error); - /* - * To change the owner of a file, or change the group of a file - * to a group of which we are not a member, the caller must - * have privilege. - */ - if ((uid != ip->i_uid || - (gid != ip->i_gid && !groupmember(gid, cred))) && - (error = suser_cred(cred, PRISON_ROOT))) - return (error); - ogid = ip->i_gid; - ouid = ip->i_uid; -#ifdef QUOTA - if ((error = getinoquota(ip)) != 0) - return (error); - if (ouid == uid) { - dqrele(vp, ip->i_dquot[USRQUOTA]); - ip->i_dquot[USRQUOTA] = NODQUOT; - } - if (ogid == gid) { - dqrele(vp, ip->i_dquot[GRPQUOTA]); - ip->i_dquot[GRPQUOTA] = NODQUOT; - } - change = DIP(ip, i_blocks); - (void) chkdq(ip, -change, cred, CHOWN); - (void) chkiq(ip, -1, cred, CHOWN); - for (i = 0; i < MAXQUOTAS; i++) { - dqrele(vp, ip->i_dquot[i]); - ip->i_dquot[i] = NODQUOT; - } -#endif - ip->i_gid = gid; - DIP(ip, i_gid) = gid; - ip->i_uid = uid; - DIP(ip, i_uid) = uid; -#ifdef QUOTA - if ((error = getinoquota(ip)) == 0) { - if (ouid == uid) { - dqrele(vp, ip->i_dquot[USRQUOTA]); - ip->i_dquot[USRQUOTA] = NODQUOT; - } - if (ogid == gid) { - dqrele(vp, ip->i_dquot[GRPQUOTA]); - ip->i_dquot[GRPQUOTA] = NODQUOT; - } - if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { - if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) - goto good; - else - (void) chkdq(ip, -change, cred, CHOWN|FORCE); - } - for (i = 0; i < MAXQUOTAS; i++) { - dqrele(vp, ip->i_dquot[i]); - ip->i_dquot[i] = NODQUOT; - } - } - ip->i_gid = ogid; - DIP(ip, i_gid) = ogid; - ip->i_uid = ouid; - DIP(ip, i_uid) = ouid; - if (getinoquota(ip) == 0) { - if (ouid == uid) { - dqrele(vp, ip->i_dquot[USRQUOTA]); - ip->i_dquot[USRQUOTA] = NODQUOT; - } - if (ogid == gid) { - dqrele(vp, ip->i_dquot[GRPQUOTA]); - ip->i_dquot[GRPQUOTA] = NODQUOT; - } - (void) chkdq(ip, change, cred, FORCE|CHOWN); - (void) chkiq(ip, 1, cred, FORCE|CHOWN); - (void) getinoquota(ip); - } - return (error); -good: - if (getinoquota(ip)) - panic("ufs_chown: lost quota"); -#endif /* QUOTA */ - ip->i_flag |= IN_CHANGE; - if (suser_cred(cred, PRISON_ROOT) && (ouid != uid || ogid != gid)) { - ip->i_mode &= ~(ISUID | ISGID); - DIP(ip, i_mode) = ip->i_mode; - } - return (0); -} - -static int -ufs_remove(ap) - struct vop_remove_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; - struct componentname *a_cnp; - } */ *ap; -{ - struct inode *ip; - struct vnode *vp = ap->a_vp; - struct vnode *dvp = ap->a_dvp; - int error; - - ip = VTOI(vp); - if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || - (VTOI(dvp)->i_flags & APPEND)) { - error = EPERM; - goto out; - } - error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); - if (ip->i_nlink <= 0) - vp->v_vflag |= VV_NOSYNC; - VN_KNOTE(vp, NOTE_DELETE); - VN_KNOTE(dvp, NOTE_WRITE); -out: - return (error); -} - -/* - * link vnode call - */ -static int -ufs_link(ap) - struct vop_link_args /* { - struct vnode *a_tdvp; - struct vnode *a_vp; - struct componentname *a_cnp; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct vnode *tdvp = ap->a_tdvp; - struct componentname *cnp = ap->a_cnp; - struct inode *ip; - struct direct newdir; - int error; - -#ifdef DIAGNOSTIC - if ((cnp->cn_flags & HASBUF) == 0) - panic("ufs_link: no name"); -#endif - if (tdvp->v_mount != vp->v_mount) { - error = EXDEV; - goto out; - } - ip = VTOI(vp); - if ((nlink_t)ip->i_nlink >= LINK_MAX) { - error = EMLINK; - goto out; - } - if (ip->i_flags & (IMMUTABLE | APPEND)) { - error = EPERM; - goto out; - } - ip->i_effnlink++; - ip->i_nlink++; - DIP(ip, i_nlink) = ip->i_nlink; - ip->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(vp)) - softdep_change_linkcnt(ip); - error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp))); - if (!error) { - ufs_makedirentry(ip, cnp, &newdir); - error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL); - } - - if (error) { - ip->i_effnlink--; - ip->i_nlink--; - DIP(ip, i_nlink) = ip->i_nlink; - ip->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(vp)) - softdep_change_linkcnt(ip); - } -out: - VN_KNOTE(vp, NOTE_LINK); - VN_KNOTE(tdvp, NOTE_WRITE); - return (error); -} - -/* - * whiteout vnode call - */ -static int -ufs_whiteout(ap) - struct vop_whiteout_args /* { - struct vnode *a_dvp; - struct componentname *a_cnp; - int a_flags; - } */ *ap; -{ - struct vnode *dvp = ap->a_dvp; - struct componentname *cnp = ap->a_cnp; - struct direct newdir; - int error = 0; - - switch (ap->a_flags) { - case LOOKUP: - /* 4.4 format directories support whiteout operations */ - if (dvp->v_mount->mnt_maxsymlinklen > 0) - return (0); - return (EOPNOTSUPP); - - case CREATE: - /* create a new directory whiteout */ -#ifdef DIAGNOSTIC - if ((cnp->cn_flags & SAVENAME) == 0) - panic("ufs_whiteout: missing name"); - if (dvp->v_mount->mnt_maxsymlinklen <= 0) - panic("ufs_whiteout: old format filesystem"); -#endif - - newdir.d_ino = WINO; - newdir.d_namlen = cnp->cn_namelen; - bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); - newdir.d_type = DT_WHT; - error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL); - break; - - case DELETE: - /* remove an existing directory whiteout */ -#ifdef DIAGNOSTIC - if (dvp->v_mount->mnt_maxsymlinklen <= 0) - panic("ufs_whiteout: old format filesystem"); -#endif - - cnp->cn_flags &= ~DOWHITEOUT; - error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); - break; - default: - panic("ufs_whiteout: unknown op"); - } - return (error); -} - -/* - * Rename system call. - * rename("foo", "bar"); - * is essentially - * unlink("bar"); - * link("foo", "bar"); - * unlink("foo"); - * but ``atomically''. Can't do full commit without saving state in the - * inode on disk which isn't feasible at this time. Best we can do is - * always guarantee the target exists. - * - * Basic algorithm is: - * - * 1) Bump link count on source while we're linking it to the - * target. This also ensure the inode won't be deleted out - * from underneath us while we work (it may be truncated by - * a concurrent `trunc' or `open' for creation). - * 2) Link source to destination. If destination already exists, - * delete it first. - * 3) Unlink source reference to inode if still around. If a - * directory was moved and the parent of the destination - * is different from the source, patch the ".." entry in the - * directory. - */ -static int -ufs_rename(ap) - struct vop_rename_args /* { - struct vnode *a_fdvp; - struct vnode *a_fvp; - struct componentname *a_fcnp; - struct vnode *a_tdvp; - struct vnode *a_tvp; - struct componentname *a_tcnp; - } */ *ap; -{ - struct vnode *tvp = ap->a_tvp; - struct vnode *tdvp = ap->a_tdvp; - struct vnode *fvp = ap->a_fvp; - struct vnode *fdvp = ap->a_fdvp; - struct componentname *tcnp = ap->a_tcnp; - struct componentname *fcnp = ap->a_fcnp; - struct thread *td = fcnp->cn_thread; - struct inode *ip, *xp, *dp; - struct direct newdir; - int doingdirectory = 0, oldparent = 0, newparent = 0; - int error = 0, ioflag; - -#ifdef DIAGNOSTIC - if ((tcnp->cn_flags & HASBUF) == 0 || - (fcnp->cn_flags & HASBUF) == 0) - panic("ufs_rename: no name"); -#endif - /* - * Check for cross-device rename. - */ - if ((fvp->v_mount != tdvp->v_mount) || - (tvp && (fvp->v_mount != tvp->v_mount))) { - error = EXDEV; -abortit: - if (tdvp == tvp) - vrele(tdvp); - else - vput(tdvp); - if (tvp) - vput(tvp); - vrele(fdvp); - vrele(fvp); - return (error); - } - - if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || - (VTOI(tdvp)->i_flags & APPEND))) { - error = EPERM; - goto abortit; - } - - /* - * Renaming a file to itself has no effect. The upper layers should - * not call us in that case. Temporarily just warn if they do. - */ - if (fvp == tvp) { - printf("ufs_rename: fvp == tvp (can't happen)\n"); - error = 0; - goto abortit; - } - - if ((error = vn_lock(fvp, LK_EXCLUSIVE, td)) != 0) - goto abortit; - dp = VTOI(fdvp); - ip = VTOI(fvp); - if (ip->i_nlink >= LINK_MAX) { - VOP_UNLOCK(fvp, 0, td); - error = EMLINK; - goto abortit; - } - if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) - || (dp->i_flags & APPEND)) { - VOP_UNLOCK(fvp, 0, td); - error = EPERM; - goto abortit; - } - if ((ip->i_mode & IFMT) == IFDIR) { - /* - * Avoid ".", "..", and aliases of "." for obvious reasons. - */ - if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || - dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || - (ip->i_flag & IN_RENAME)) { - VOP_UNLOCK(fvp, 0, td); - error = EINVAL; - goto abortit; - } - ip->i_flag |= IN_RENAME; - oldparent = dp->i_number; - doingdirectory = 1; - } - VN_KNOTE(fdvp, NOTE_WRITE); /* XXX right place? */ - vrele(fdvp); - - /* - * When the target exists, both the directory - * and target vnodes are returned locked. - */ - dp = VTOI(tdvp); - xp = NULL; - if (tvp) - xp = VTOI(tvp); - - /* - * 1) Bump link count while we're moving stuff - * around. If we crash somewhere before - * completing our work, the link count - * may be wrong, but correctable. - */ - ip->i_effnlink++; - ip->i_nlink++; - DIP(ip, i_nlink) = ip->i_nlink; - ip->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(fvp)) - softdep_change_linkcnt(ip); - if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | - DOINGASYNC(fvp)))) != 0) { - VOP_UNLOCK(fvp, 0, td); - goto bad; - } - - /* - * If ".." must be changed (ie the directory gets a new - * parent) then the source directory must not be in the - * directory heirarchy above the target, as this would - * orphan everything below the source directory. Also - * the user must have write permission in the source so - * as to be able to change "..". We must repeat the call - * to namei, as the parent directory is unlocked by the - * call to checkpath(). - */ - error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); - VOP_UNLOCK(fvp, 0, td); - if (oldparent != dp->i_number) - newparent = dp->i_number; - if (doingdirectory && newparent) { - if (error) /* write access check above */ - goto bad; - if (xp != NULL) - vput(tvp); - error = ufs_checkpath(ip, dp, tcnp->cn_cred); - if (error) - goto out; - if ((tcnp->cn_flags & SAVESTART) == 0) - panic("ufs_rename: lost to startdir"); - VREF(tdvp); - error = relookup(tdvp, &tvp, tcnp); - if (error) - goto out; - vrele(tdvp); - dp = VTOI(tdvp); - xp = NULL; - if (tvp) - xp = VTOI(tvp); - } - /* - * 2) If target doesn't exist, link the target - * to the source and unlink the source. - * Otherwise, rewrite the target directory - * entry to reference the source inode and - * expunge the original entry's existence. - */ - if (xp == NULL) { - if (dp->i_dev != ip->i_dev) - panic("ufs_rename: EXDEV"); - /* - * Account for ".." in new directory. - * When source and destination have the same - * parent we don't fool with the link count. - */ - if (doingdirectory && newparent) { - if ((nlink_t)dp->i_nlink >= LINK_MAX) { - error = EMLINK; - goto bad; - } - dp->i_effnlink++; - dp->i_nlink++; - DIP(dp, i_nlink) = dp->i_nlink; - dp->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(tdvp)) - softdep_change_linkcnt(dp); - error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) | - DOINGASYNC(tdvp))); - if (error) - goto bad; - } - ufs_makedirentry(ip, tcnp, &newdir); - error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL); - if (error) { - if (doingdirectory && newparent) { - dp->i_effnlink--; - dp->i_nlink--; - DIP(dp, i_nlink) = dp->i_nlink; - dp->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(tdvp)) - softdep_change_linkcnt(dp); - (void)UFS_UPDATE(tdvp, 1); - } - goto bad; - } - VN_KNOTE(tdvp, NOTE_WRITE); - vput(tdvp); - } else { - if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) - panic("ufs_rename: EXDEV"); - /* - * Short circuit rename(foo, foo). - */ - if (xp->i_number == ip->i_number) - panic("ufs_rename: same file"); - /* - * If the parent directory is "sticky", then the caller - * must possess VADMIN for the parent directory, or the - * destination of the rename. This implements append-only - * directories. - */ - if ((dp->i_mode & S_ISTXT) && - VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && - VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { - error = EPERM; - goto bad; - } - /* - * Target must be empty if a directory and have no links - * to it. Also, ensure source and target are compatible - * (both directories, or both not directories). - */ - if ((xp->i_mode&IFMT) == IFDIR) { - if ((xp->i_effnlink > 2) || - !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { - error = ENOTEMPTY; - goto bad; - } - if (!doingdirectory) { - error = ENOTDIR; - goto bad; - } - cache_purge(tdvp); - } else if (doingdirectory) { - error = EISDIR; - goto bad; - } - error = ufs_dirrewrite(dp, xp, ip->i_number, - IFTODT(ip->i_mode), - (doingdirectory && newparent) ? newparent : doingdirectory); - if (error) - goto bad; - if (doingdirectory) { - if (!newparent) { - dp->i_effnlink--; - if (DOINGSOFTDEP(tdvp)) - softdep_change_linkcnt(dp); - } - xp->i_effnlink--; - if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(xp); - } - if (doingdirectory && !DOINGSOFTDEP(tvp)) { - /* - * Truncate inode. The only stuff left in the directory - * is "." and "..". The "." reference is inconsequential - * since we are quashing it. We have removed the "." - * reference and the reference in the parent directory, - * but there may be other hard links. The soft - * dependency code will arrange to do these operations - * after the parent directory entry has been deleted on - * disk, so when running with that code we avoid doing - * them now. - */ - if (!newparent) { - dp->i_nlink--; - DIP(dp, i_nlink) = dp->i_nlink; - dp->i_flag |= IN_CHANGE; - } - xp->i_nlink--; - DIP(xp, i_nlink) = xp->i_nlink; - xp->i_flag |= IN_CHANGE; - ioflag = IO_NORMAL; - if (DOINGASYNC(tvp)) - ioflag |= IO_SYNC; - if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag, - tcnp->cn_cred, tcnp->cn_thread)) != 0) - goto bad; - } - VN_KNOTE(tdvp, NOTE_WRITE); - vput(tdvp); - VN_KNOTE(tvp, NOTE_DELETE); - vput(tvp); - xp = NULL; - } - - /* - * 3) Unlink the source. - */ - fcnp->cn_flags &= ~MODMASK; - fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; - if ((fcnp->cn_flags & SAVESTART) == 0) - panic("ufs_rename: lost from startdir"); - VREF(fdvp); - error = relookup(fdvp, &fvp, fcnp); - if (error == 0) - vrele(fdvp); - if (fvp != NULL) { - xp = VTOI(fvp); - dp = VTOI(fdvp); - } else { - /* - * From name has disappeared. IN_RENAME is not sufficient - * to protect against directory races due to timing windows, - * so we have to remove the panic. XXX the only real way - * to solve this issue is at a much higher level. By the - * time we hit ufs_rename() it's too late. - */ -#if 0 - if (doingdirectory) - panic("ufs_rename: lost dir entry"); -#endif - vrele(ap->a_fvp); - return (0); - } - /* - * Ensure that the directory entry still exists and has not - * changed while the new name has been entered. If the source is - * a file then the entry may have been unlinked or renamed. In - * either case there is no further work to be done. If the source - * is a directory then it cannot have been rmdir'ed; the IN_RENAME - * flag ensures that it cannot be moved by another rename or removed - * by a rmdir. - */ - if (xp != ip) { - /* - * From name resolves to a different inode. IN_RENAME is - * not sufficient protection against timing window races - * so we can't panic here. XXX the only real way - * to solve this issue is at a much higher level. By the - * time we hit ufs_rename() it's too late. - */ -#if 0 - if (doingdirectory) - panic("ufs_rename: lost dir entry"); -#endif - } else { - /* - * If the source is a directory with a - * new parent, the link count of the old - * parent directory must be decremented - * and ".." set to point to the new parent. - */ - if (doingdirectory && newparent) { - xp->i_offset = mastertemplate.dot_reclen; - ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); - cache_purge(fdvp); - } - error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); - xp->i_flag &= ~IN_RENAME; - } - VN_KNOTE(fvp, NOTE_RENAME); - if (dp) - vput(fdvp); - if (xp) - vput(fvp); - vrele(ap->a_fvp); - return (error); - -bad: - if (xp) - vput(ITOV(xp)); - vput(ITOV(dp)); -out: - if (doingdirectory) - ip->i_flag &= ~IN_RENAME; - if (vn_lock(fvp, LK_EXCLUSIVE, td) == 0) { - ip->i_effnlink--; - ip->i_nlink--; - DIP(ip, i_nlink) = ip->i_nlink; - ip->i_flag |= IN_CHANGE; - ip->i_flag &= ~IN_RENAME; - if (DOINGSOFTDEP(fvp)) - softdep_change_linkcnt(ip); - vput(fvp); - } else - vrele(fvp); - return (error); -} - -/* - * Mkdir system call - */ -static int -ufs_mkdir(ap) - struct vop_mkdir_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; -{ - struct vnode *dvp = ap->a_dvp; - struct vattr *vap = ap->a_vap; - struct componentname *cnp = ap->a_cnp; - struct inode *ip, *dp; - struct vnode *tvp; - struct buf *bp; - struct dirtemplate dirtemplate, *dtp; - struct direct newdir; -#ifdef UFS_ACL - struct acl *acl, *dacl; -#endif - int error, dmode; - long blkoff; - -#ifdef DIAGNOSTIC - if ((cnp->cn_flags & HASBUF) == 0) - panic("ufs_mkdir: no name"); -#endif - dp = VTOI(dvp); - if ((nlink_t)dp->i_nlink >= LINK_MAX) { - error = EMLINK; - goto out; - } - dmode = vap->va_mode & 0777; - dmode |= IFDIR; - /* - * Must simulate part of ufs_makeinode here to acquire the inode, - * but not have it entered in the parent directory. The entry is - * made later after writing "." and ".." entries. - */ - error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); - if (error) - goto out; - ip = VTOI(tvp); - ip->i_gid = dp->i_gid; - DIP(ip, i_gid) = dp->i_gid; -#ifdef SUIDDIR - { -#ifdef QUOTA - struct ucred ucred, *ucp; - ucp = cnp->cn_cred; -#endif - /* - * If we are hacking owners here, (only do this where told to) - * and we are not giving it TO root, (would subvert quotas) - * then go ahead and give it to the other user. - * The new directory also inherits the SUID bit. - * If user's UID and dir UID are the same, - * 'give it away' so that the SUID is still forced on. - */ - if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && - (dp->i_mode & ISUID) && dp->i_uid) { - dmode |= ISUID; - ip->i_uid = dp->i_uid; - DIP(ip, i_uid) = dp->i_uid; -#ifdef QUOTA - if (dp->i_uid != cnp->cn_cred->cr_uid) { - /* - * Make sure the correct user gets charged - * for the space. - * Make a dummy credential for the victim. - * XXX This seems to never be accessed out of - * our context so a stack variable is ok. - */ - ucred.cr_ref = 1; - ucred.cr_uid = ip->i_uid; - ucred.cr_ngroups = 1; - ucred.cr_groups[0] = dp->i_gid; - ucp = &ucred; - } -#endif - } else { - ip->i_uid = cnp->cn_cred->cr_uid; - DIP(ip, i_uid) = ip->i_uid; - } -#ifdef QUOTA - if ((error = getinoquota(ip)) || - (error = chkiq(ip, 1, ucp, 0))) { - UFS_VFREE(tvp, ip->i_number, dmode); - vput(tvp); - return (error); - } -#endif - } -#else /* !SUIDDIR */ - ip->i_uid = cnp->cn_cred->cr_uid; - DIP(ip, i_uid) = ip->i_uid; -#ifdef QUOTA - if ((error = getinoquota(ip)) || - (error = chkiq(ip, 1, cnp->cn_cred, 0))) { - UFS_VFREE(tvp, ip->i_number, dmode); - vput(tvp); - return (error); - } -#endif -#endif /* !SUIDDIR */ - ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; -#ifdef UFS_ACL - acl = dacl = NULL; - if ((dvp->v_mount->mnt_flag & MNT_ACLS) != 0) { - MALLOC(acl, struct acl *, sizeof(*acl), M_ACL, M_WAITOK); - MALLOC(dacl, struct acl *, sizeof(*dacl), M_ACL, M_WAITOK); - - /* - * Retrieve default ACL from parent, if any. - */ - error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cnp->cn_cred, - cnp->cn_thread); - switch (error) { - case 0: - /* - * Retrieved a default ACL, so merge mode and ACL if - * necessary. If the ACL is empty, fall through to - * the "not defined or available" case. - */ - if (acl->acl_cnt != 0) { - dmode = acl_posix1e_newfilemode(dmode, acl); - ip->i_mode = dmode; - DIP(ip, i_mode) = dmode; - *dacl = *acl; - ufs_sync_acl_from_inode(ip, acl); - break; - } - /* FALLTHROUGH */ - - case EOPNOTSUPP: - /* - * Just use the mode as-is. - */ - ip->i_mode = dmode; - DIP(ip, i_mode) = dmode; - FREE(acl, M_ACL); - FREE(dacl, M_ACL); - dacl = acl = NULL; - break; - - default: - UFS_VFREE(tvp, ip->i_number, dmode); - vput(tvp); - FREE(acl, M_ACL); - FREE(dacl, M_ACL); - return (error); - } - } else { -#endif /* !UFS_ACL */ - ip->i_mode = dmode; - DIP(ip, i_mode) = dmode; -#ifdef UFS_ACL - } -#endif - tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ - ip->i_effnlink = 2; - ip->i_nlink = 2; - DIP(ip, i_nlink) = 2; - if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); - if (cnp->cn_flags & ISWHITEOUT) { - ip->i_flags |= UF_OPAQUE; - DIP(ip, i_flags) = ip->i_flags; - } - - /* - * Bump link count in parent directory to reflect work done below. - * Should be done before reference is created so cleanup is - * possible if we crash. - */ - dp->i_effnlink++; - dp->i_nlink++; - DIP(dp, i_nlink) = dp->i_nlink; - dp->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(dvp)) - softdep_change_linkcnt(dp); - error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp))); - if (error) - goto bad; -#ifdef MAC - if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { - error = mac_create_vnode_extattr(cnp->cn_cred, dvp->v_mount, - dvp, tvp, cnp); - if (error) - goto bad; - } -#endif -#ifdef UFS_ACL - if (acl != NULL) { - /* - * XXX: If we abort now, will Soft Updates notify the extattr - * code that the EAs for the file need to be released? - */ - error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cnp->cn_cred, - cnp->cn_thread); - if (error == 0) - error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, - cnp->cn_cred, cnp->cn_thread); - switch (error) { - case 0: - break; - - case EOPNOTSUPP: - /* - * XXX: This should not happen, as EOPNOTSUPP above - * was supposed to free acl. - */ - printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); - /* - panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()"); - */ - break; - - default: - FREE(acl, M_ACL); - FREE(dacl, M_ACL); - goto bad; - } - FREE(acl, M_ACL); - FREE(dacl, M_ACL); - } -#endif /* !UFS_ACL */ - - /* - * Initialize directory with "." and ".." from static template. - */ - if (dvp->v_mount->mnt_maxsymlinklen > 0 - ) - dtp = &mastertemplate; - else - dtp = (struct dirtemplate *)&omastertemplate; - dirtemplate = *dtp; - dirtemplate.dot_ino = ip->i_number; - dirtemplate.dotdot_ino = dp->i_number; - if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, - BA_CLRBUF, &bp)) != 0) - goto bad; - ip->i_size = DIRBLKSIZ; - DIP(ip, i_size) = DIRBLKSIZ; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - vnode_pager_setsize(tvp, (u_long)ip->i_size); - bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); - if (DOINGSOFTDEP(tvp)) { - /* - * Ensure that the entire newly allocated block is a - * valid directory so that future growth within the - * block does not have to ensure that the block is - * written before the inode. - */ - blkoff = DIRBLKSIZ; - while (blkoff < bp->b_bcount) { - ((struct direct *) - (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; - blkoff += DIRBLKSIZ; - } - } - if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | - DOINGASYNC(tvp)))) != 0) { - (void)BUF_WRITE(bp); - goto bad; - } - /* - * Directory set up, now install its entry in the parent directory. - * - * If we are not doing soft dependencies, then we must write out the - * buffer containing the new directory body before entering the new - * name in the parent. If we are doing soft dependencies, then the - * buffer containing the new directory body will be passed to and - * released in the soft dependency code after the code has attached - * an appropriate ordering dependency to the buffer which ensures that - * the buffer is written before the new name is written in the parent. - */ - if (DOINGASYNC(dvp)) - bdwrite(bp); - else if (!DOINGSOFTDEP(dvp) && ((error = BUF_WRITE(bp)))) - goto bad; - ufs_makedirentry(ip, cnp, &newdir); - error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); - -bad: - if (error == 0) { - VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); - *ap->a_vpp = tvp; - } else { - dp->i_effnlink--; - dp->i_nlink--; - DIP(dp, i_nlink) = dp->i_nlink; - dp->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(dvp)) - softdep_change_linkcnt(dp); - /* - * No need to do an explicit VOP_TRUNCATE here, vrele will - * do this for us because we set the link count to 0. - */ - ip->i_effnlink = 0; - ip->i_nlink = 0; - DIP(ip, i_nlink) = 0; - ip->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); - vput(tvp); - } -out: - return (error); -} - -/* - * Rmdir system call. - */ -static int -ufs_rmdir(ap) - struct vop_rmdir_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; - struct componentname *a_cnp; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct vnode *dvp = ap->a_dvp; - struct componentname *cnp = ap->a_cnp; - struct inode *ip, *dp; - int error, ioflag; - - ip = VTOI(vp); - dp = VTOI(dvp); - - /* - * Do not remove a directory that is in the process of being renamed. - * Verify the directory is empty (and valid). Rmdir ".." will not be - * valid since ".." will contain a reference to the current directory - * and thus be non-empty. Do not allow the removal of mounted on - * directories (this can happen when an NFS exported filesystem - * tries to remove a locally mounted on directory). - */ - error = 0; - if (ip->i_flag & IN_RENAME) { - error = EINVAL; - goto out; - } - if (ip->i_effnlink != 2 || - !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { - error = ENOTEMPTY; - goto out; - } - if ((dp->i_flags & APPEND) - || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { - error = EPERM; - goto out; - } - if (vp->v_mountedhere != 0) { - error = EINVAL; - goto out; - } - /* - * Delete reference to directory before purging - * inode. If we crash in between, the directory - * will be reattached to lost+found, - */ - dp->i_effnlink--; - ip->i_effnlink--; - if (DOINGSOFTDEP(vp)) { - softdep_change_linkcnt(dp); - softdep_change_linkcnt(ip); - } - error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); - if (error) { - dp->i_effnlink++; - ip->i_effnlink++; - if (DOINGSOFTDEP(vp)) { - softdep_change_linkcnt(dp); - softdep_change_linkcnt(ip); - } - goto out; - } - VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); - cache_purge(dvp); - /* - * Truncate inode. The only stuff left in the directory is "." and - * "..". The "." reference is inconsequential since we are quashing - * it. The soft dependency code will arrange to do these operations - * after the parent directory entry has been deleted on disk, so - * when running with that code we avoid doing them now. - */ - if (!DOINGSOFTDEP(vp)) { - dp->i_nlink--; - DIP(dp, i_nlink) = dp->i_nlink; - dp->i_flag |= IN_CHANGE; - ip->i_nlink--; - DIP(ip, i_nlink) = ip->i_nlink; - ip->i_flag |= IN_CHANGE; - ioflag = IO_NORMAL; - if (DOINGASYNC(vp)) - ioflag |= IO_SYNC; - error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred, - cnp->cn_thread); - } - cache_purge(vp); -#ifdef UFS_DIRHASH - /* Kill any active hash; i_effnlink == 0, so it will not come back. */ - if (ip->i_dirhash != NULL) - ufsdirhash_free(ip); -#endif -out: - VN_KNOTE(vp, NOTE_DELETE); - return (error); -} - -/* - * symlink -- make a symbolic link - */ -static int -ufs_symlink(ap) - struct vop_symlink_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - char *a_target; - } */ *ap; -{ - struct vnode *vp, **vpp = ap->a_vpp; - struct inode *ip; - int len, error; - - error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, - vpp, ap->a_cnp); - if (error) - return (error); - VN_KNOTE(ap->a_dvp, NOTE_WRITE); - vp = *vpp; - len = strlen(ap->a_target); - if (len < vp->v_mount->mnt_maxsymlinklen) { - ip = VTOI(vp); - bcopy(ap->a_target, SHORTLINK(ip), len); - ip->i_size = len; - DIP(ip, i_size) = len; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } else - error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, - UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, - ap->a_cnp->cn_cred, NOCRED, (int *)0, (struct thread *)0); - if (error) - vput(vp); - return (error); -} - -/* - * Vnode op for reading directories. - * - * The routine below assumes that the on-disk format of a directory - * is the same as that defined by . If the on-disk - * format changes, then it will be necessary to do a conversion - * from the on-disk format that read returns to the format defined - * by . - */ -int -ufs_readdir(ap) - struct vop_readdir_args /* { - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; - int *a_eofflag; - int *ncookies; - u_long **a_cookies; - } */ *ap; -{ - struct uio *uio = ap->a_uio; - int error; - size_t count, lost; - off_t off; - - if (ap->a_ncookies != NULL) - /* - * Ensure that the block is aligned. The caller can use - * the cookies to determine where in the block to start. - */ - uio->uio_offset &= ~(DIRBLKSIZ - 1); - off = uio->uio_offset; - count = uio->uio_resid; - /* Make sure we don't return partial entries. */ - if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1))) - return (EINVAL); - count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); - lost = uio->uio_resid - count; - uio->uio_resid = count; - uio->uio_iov->iov_len = count; -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) { - error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); - } else { - struct dirent *dp, *edp; - struct uio auio; - struct iovec aiov; - caddr_t dirbuf; - int readcnt; - u_char tmp; - - auio = *uio; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_segflg = UIO_SYSSPACE; - aiov.iov_len = count; - MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); - aiov.iov_base = dirbuf; - error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); - if (error == 0) { - readcnt = count - auio.uio_resid; - edp = (struct dirent *)&dirbuf[readcnt]; - for (dp = (struct dirent *)dirbuf; dp < edp; ) { - tmp = dp->d_namlen; - dp->d_namlen = dp->d_type; - dp->d_type = tmp; - if (dp->d_reclen > 0) { - dp = (struct dirent *) - ((char *)dp + dp->d_reclen); - } else { - error = EIO; - break; - } - } - if (dp >= edp) - error = uiomove(dirbuf, readcnt, uio); - } - FREE(dirbuf, M_TEMP); - } -# else - error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); -# endif - if (!error && ap->a_ncookies != NULL) { - struct dirent* dpStart; - struct dirent* dpEnd; - struct dirent* dp; - int ncookies; - u_long *cookies; - u_long *cookiep; - - if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) - panic("ufs_readdir: unexpected uio from NFS server"); - dpStart = (struct dirent *) - ((char *)uio->uio_iov->iov_base - (uio->uio_offset - off)); - dpEnd = (struct dirent *) uio->uio_iov->iov_base; - for (dp = dpStart, ncookies = 0; - dp < dpEnd; - dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) - ncookies++; - MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, - M_WAITOK); - for (dp = dpStart, cookiep = cookies; - dp < dpEnd; - dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) { - off += dp->d_reclen; - *cookiep++ = (u_long) off; - } - *ap->a_ncookies = ncookies; - *ap->a_cookies = cookies; - } - uio->uio_resid += lost; - if (ap->a_eofflag) - *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset; - return (error); -} - -/* - * Return target name of a symbolic link - */ -static int -ufs_readlink(ap) - struct vop_readlink_args /* { - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - doff_t isize; - - isize = ip->i_size; - if ((isize < vp->v_mount->mnt_maxsymlinklen) || - DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */ - uiomove(SHORTLINK(ip), isize, ap->a_uio); - return (0); - } - return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); -} - -/* - * Calculate the logical to physical mapping if not done already, - * then call the device strategy routine. - * - * In order to be able to swap to a file, the ufs_bmaparray() operation may not - * deadlock on memory. See ufs_bmap() for details. - */ -static int -ufs_strategy(ap) - struct vop_strategy_args /* { - struct vnode *a_vp; - struct buf *a_bp; - } */ *ap; -{ - struct buf *bp = ap->a_bp; - struct vnode *vp = ap->a_vp; - struct inode *ip; - ufs2_daddr_t blkno; - int error; - - KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)", - __func__, ap->a_vp, ap->a_bp->b_vp)); - ip = VTOI(vp); - if (bp->b_blkno == bp->b_lblkno) { - error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL); - bp->b_blkno = blkno; - if (error) { - bp->b_error = error; - bp->b_ioflags |= BIO_ERROR; - bufdone(bp); - return (error); - } - if ((long)bp->b_blkno == -1) - vfs_bio_clrbuf(bp); - } - if ((long)bp->b_blkno == -1) { - bufdone(bp); - return (0); - } - vp = ip->i_devvp; - bp->b_dev = vp->v_rdev; - bp->b_iooffset = dbtob(bp->b_blkno); - VOP_SPECSTRATEGY(vp, bp); - return (0); -} - -/* - * Print out the contents of an inode. - */ -static int -ufs_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - - printf("\tino %lu, on dev %s (%d, %d)", (u_long)ip->i_number, - devtoname(ip->i_dev), major(ip->i_dev), minor(ip->i_dev)); - if (vp->v_type == VFIFO) - fifo_printinfo(vp); - printf("\n"); - return (0); -} - -/* - * Read wrapper for special devices. - */ -static int -ufsspec_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; -{ - int error, resid; - struct inode *ip; - struct uio *uio; - - uio = ap->a_uio; - resid = uio->uio_resid; - error = VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap); - /* - * The inode may have been revoked during the call, so it must not - * be accessed blindly here or in the other wrapper functions. - */ - ip = VTOI(ap->a_vp); - if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0))) - ip->i_flag |= IN_ACCESS; - return (error); -} - -/* - * Write wrapper for special devices. - */ -static int -ufsspec_write(ap) - struct vop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; -{ - int error, resid; - struct inode *ip; - struct uio *uio; - - uio = ap->a_uio; - resid = uio->uio_resid; - error = VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap); - ip = VTOI(ap->a_vp); - if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0))) - VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); -} - -/* - * Close wrapper for special devices. - * - * Update the times on the inode then do device close. - */ -static int -ufsspec_close(ap) - struct vop_close_args /* { - struct vnode *a_vp; - int a_fflag; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - VI_LOCK(vp); - if (vp->v_usecount > 1) - ufs_itimes(vp); - VI_UNLOCK(vp); - return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); -} - -/* - * Read wrapper for fifos. - */ -static int -ufsfifo_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; -{ - int error, resid; - struct inode *ip; - struct uio *uio; - - uio = ap->a_uio; - resid = uio->uio_resid; - error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap); - ip = VTOI(ap->a_vp); - if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL && - (uio->uio_resid != resid || (error == 0 && resid != 0))) - ip->i_flag |= IN_ACCESS; - return (error); -} - -/* - * Write wrapper for fifos. - */ -static int -ufsfifo_write(ap) - struct vop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; -{ - int error, resid; - struct inode *ip; - struct uio *uio; - - uio = ap->a_uio; - resid = uio->uio_resid; - error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap); - ip = VTOI(ap->a_vp); - if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0))) - ip->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); -} - -/* - * Close wrapper for fifos. - * - * Update the times on the inode then do device close. - */ -static int -ufsfifo_close(ap) - struct vop_close_args /* { - struct vnode *a_vp; - int a_fflag; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - VI_LOCK(vp); - if (vp->v_usecount > 1) - ufs_itimes(vp); - VI_UNLOCK(vp); - return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); -} - -/* - * Kqfilter wrapper for fifos. - * - * Fall through to ufs kqfilter routines if needed - */ -static int -ufsfifo_kqfilter(ap) - struct vop_kqfilter_args *ap; -{ - int error; - - error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilter), ap); - if (error) - error = ufs_kqfilter(ap); - return (error); -} - -/* - * Return POSIX pathconf information applicable to ufs filesystems. - */ -static int -ufs_pathconf(ap) - struct vop_pathconf_args /* { - struct vnode *a_vp; - int a_name; - int *a_retval; - } */ *ap; -{ - int error; - - error = 0; - switch (ap->a_name) { - case _PC_LINK_MAX: - *ap->a_retval = LINK_MAX; - break; - case _PC_NAME_MAX: - *ap->a_retval = NAME_MAX; - break; - case _PC_PATH_MAX: - *ap->a_retval = PATH_MAX; - break; - case _PC_PIPE_BUF: - *ap->a_retval = PIPE_BUF; - break; - case _PC_CHOWN_RESTRICTED: - *ap->a_retval = 1; - break; - case _PC_NO_TRUNC: - *ap->a_retval = 1; - break; - case _PC_ACL_EXTENDED: -#ifdef UFS_ACL - if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) - *ap->a_retval = 1; - else - *ap->a_retval = 0; -#else - *ap->a_retval = 0; -#endif - break; - case _PC_ACL_PATH_MAX: -#ifdef UFS_ACL - if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) - *ap->a_retval = ACL_MAX_ENTRIES; - else - *ap->a_retval = 3; -#else - *ap->a_retval = 3; -#endif - break; - case _PC_MAC_PRESENT: -#ifdef MAC - if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL) - *ap->a_retval = 1; - else - *ap->a_retval = 0; -#else - *ap->a_retval = 0; -#endif - break; - case _PC_ASYNC_IO: - /* _PC_ASYNC_IO should have been handled by upper layers. */ - KASSERT(0, ("_PC_ASYNC_IO should not get here")); - error = EINVAL; - break; - case _PC_PRIO_IO: - *ap->a_retval = 0; - break; - case _PC_SYNC_IO: - *ap->a_retval = 0; - break; - case _PC_ALLOC_SIZE_MIN: - *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; - break; - case _PC_FILESIZEBITS: - *ap->a_retval = 64; - break; - case _PC_REC_INCR_XFER_SIZE: - *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; - break; - case _PC_REC_MAX_XFER_SIZE: - *ap->a_retval = -1; /* means ``unlimited'' */ - break; - case _PC_REC_MIN_XFER_SIZE: - *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; - break; - case _PC_REC_XFER_ALIGN: - *ap->a_retval = PAGE_SIZE; - break; - case _PC_SYMLINK_MAX: - *ap->a_retval = MAXPATHLEN; - break; - - default: - error = EINVAL; - break; - } - return (error); -} - -/* - * Advisory record locking support - */ -static int -ufs_advlock(ap) - struct vop_advlock_args /* { - struct vnode *a_vp; - caddr_t a_id; - int a_op; - struct flock *a_fl; - int a_flags; - } */ *ap; -{ - struct inode *ip = VTOI(ap->a_vp); - - return (lf_advlock(ap, &(ip->i_lockf), ip->i_size)); -} - -/* - * Initialize the vnode associated with a new inode, handle aliased - * vnodes. - */ -int -ufs_vinit(mntp, specops, fifoops, vpp) - struct mount *mntp; - vop_t **specops; - vop_t **fifoops; - struct vnode **vpp; -{ - struct inode *ip; - struct vnode *vp; - struct timeval tv; - - vp = *vpp; - ip = VTOI(vp); - switch(vp->v_type = IFTOVT(ip->i_mode)) { - case VCHR: - case VBLK: - vp->v_op = specops; - vp = addaliasu(vp, DIP(ip, i_rdev)); - ip->i_vnode = vp; - break; - case VFIFO: - vp->v_op = fifoops; - break; - default: - break; - - } - ASSERT_VOP_LOCKED(vp, "ufs_vinit"); - if (ip->i_number == ROOTINO) - vp->v_vflag |= VV_ROOT; - /* - * Initialize modrev times - */ - getmicrouptime(&tv); - SETHIGH(ip->i_modrev, tv.tv_sec); - SETLOW(ip->i_modrev, tv.tv_usec * 4294); - *vpp = vp; - return (0); -} - -/* - * Allocate a new inode. - * Vnode dvp must be locked. - */ -static int -ufs_makeinode(mode, dvp, vpp, cnp) - int mode; - struct vnode *dvp; - struct vnode **vpp; - struct componentname *cnp; -{ - struct inode *ip, *pdir; - struct direct newdir; - struct vnode *tvp; -#ifdef UFS_ACL - struct acl *acl; -#endif - int error; - - pdir = VTOI(dvp); -#ifdef DIAGNOSTIC - if ((cnp->cn_flags & HASBUF) == 0) - panic("ufs_makeinode: no name"); -#endif - *vpp = NULL; - if ((mode & IFMT) == 0) - mode |= IFREG; - - error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); - if (error) - return (error); - ip = VTOI(tvp); - ip->i_gid = pdir->i_gid; - DIP(ip, i_gid) = pdir->i_gid; -#ifdef SUIDDIR - { -#ifdef QUOTA - struct ucred ucred, *ucp; - ucp = cnp->cn_cred; -#endif - /* - * If we are not the owner of the directory, - * and we are hacking owners here, (only do this where told to) - * and we are not giving it TO root, (would subvert quotas) - * then go ahead and give it to the other user. - * Note that this drops off the execute bits for security. - */ - if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && - (pdir->i_mode & ISUID) && - (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { - ip->i_uid = pdir->i_uid; - DIP(ip, i_uid) = ip->i_uid; - mode &= ~07111; -#ifdef QUOTA - /* - * Make sure the correct user gets charged - * for the space. - * Quickly knock up a dummy credential for the victim. - * XXX This seems to never be accessed out of our - * context so a stack variable is ok. - */ - ucred.cr_ref = 1; - ucred.cr_uid = ip->i_uid; - ucred.cr_ngroups = 1; - ucred.cr_groups[0] = pdir->i_gid; - ucp = &ucred; -#endif - } else { - ip->i_uid = cnp->cn_cred->cr_uid; - DIP(ip, i_uid) = ip->i_uid; - } - -#ifdef QUOTA - if ((error = getinoquota(ip)) || - (error = chkiq(ip, 1, ucp, 0))) { - UFS_VFREE(tvp, ip->i_number, mode); - vput(tvp); - return (error); - } -#endif - } -#else /* !SUIDDIR */ - ip->i_uid = cnp->cn_cred->cr_uid; - DIP(ip, i_uid) = ip->i_uid; -#ifdef QUOTA - if ((error = getinoquota(ip)) || - (error = chkiq(ip, 1, cnp->cn_cred, 0))) { - UFS_VFREE(tvp, ip->i_number, mode); - vput(tvp); - return (error); - } -#endif -#endif /* !SUIDDIR */ - ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; -#ifdef UFS_ACL - acl = NULL; - if ((dvp->v_mount->mnt_flag & MNT_ACLS) != 0) { - MALLOC(acl, struct acl *, sizeof(*acl), M_ACL, M_WAITOK); - - /* - * Retrieve default ACL for parent, if any. - */ - error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cnp->cn_cred, - cnp->cn_thread); - switch (error) { - case 0: - /* - * Retrieved a default ACL, so merge mode and ACL if - * necessary. - */ - if (acl->acl_cnt != 0) { - /* - * Two possible ways for default ACL to not - * be present. First, the EA can be - * undefined, or second, the default ACL can - * be blank. If it's blank, fall through to - * the it's not defined case. - */ - mode = acl_posix1e_newfilemode(mode, acl); - ip->i_mode = mode; - DIP(ip, i_mode) = mode; - ufs_sync_acl_from_inode(ip, acl); - break; - } - /* FALLTHROUGH */ - - case EOPNOTSUPP: - /* - * Just use the mode as-is. - */ - ip->i_mode = mode; - DIP(ip, i_mode) = mode; - FREE(acl, M_ACL); - acl = NULL; - break; - - default: - UFS_VFREE(tvp, ip->i_number, mode); - vput(tvp); - FREE(acl, M_ACL); - acl = NULL; - return (error); - } - } else { -#endif - ip->i_mode = mode; - DIP(ip, i_mode) = mode; -#ifdef UFS_ACL - } -#endif - tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ - ip->i_effnlink = 1; - ip->i_nlink = 1; - DIP(ip, i_nlink) = 1; - if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); - if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && - suser_cred(cnp->cn_cred, PRISON_ROOT)) { - ip->i_mode &= ~ISGID; - DIP(ip, i_mode) = ip->i_mode; - } - - if (cnp->cn_flags & ISWHITEOUT) { - ip->i_flags |= UF_OPAQUE; - DIP(ip, i_flags) = ip->i_flags; - } - - /* - * Make sure inode goes to disk before directory entry. - */ - error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp))); - if (error) - goto bad; -#ifdef MAC - if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { - error = mac_create_vnode_extattr(cnp->cn_cred, dvp->v_mount, - dvp, tvp, cnp); - if (error) - goto bad; - } -#endif -#ifdef UFS_ACL - if (acl != NULL) { - /* - * XXX: If we abort now, will Soft Updates notify the extattr - * code that the EAs for the file need to be released? - */ - error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cnp->cn_cred, - cnp->cn_thread); - switch (error) { - case 0: - break; - - case EOPNOTSUPP: - /* - * XXX: This should not happen, as EOPNOTSUPP above was - * supposed to free acl. - */ - printf("ufs_makeinode: VOP_GETACL() but no " - "VOP_SETACL()\n"); - /* panic("ufs_makeinode: VOP_GETACL() but no " - "VOP_SETACL()"); */ - break; - - default: - FREE(acl, M_ACL); - goto bad; - } - FREE(acl, M_ACL); - } -#endif /* !UFS_ACL */ - ufs_makedirentry(ip, cnp, &newdir); - error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL); - if (error) - goto bad; - *vpp = tvp; - return (0); - -bad: - /* - * Write error occurred trying to update the inode - * or the directory so must deallocate the inode. - */ - ip->i_effnlink = 0; - ip->i_nlink = 0; - DIP(ip, i_nlink) = 0; - ip->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); - vput(tvp); - return (error); -} - -static struct filterops ufsread_filtops = - { 1, NULL, filt_ufsdetach, filt_ufsread }; -static struct filterops ufswrite_filtops = - { 1, NULL, filt_ufsdetach, filt_ufswrite }; -static struct filterops ufsvnode_filtops = - { 1, NULL, filt_ufsdetach, filt_ufsvnode }; - -static int -ufs_kqfilter(ap) - struct vop_kqfilter_args /* { - struct vnode *a_vp; - struct knote *a_kn; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct knote *kn = ap->a_kn; - - switch (kn->kn_filter) { - case EVFILT_READ: - kn->kn_fop = &ufsread_filtops; - break; - case EVFILT_WRITE: - kn->kn_fop = &ufswrite_filtops; - break; - case EVFILT_VNODE: - kn->kn_fop = &ufsvnode_filtops; - break; - default: - return (1); - } - - kn->kn_hook = (caddr_t)vp; - - if (vp->v_pollinfo == NULL) - v_addpollinfo(vp); - mtx_lock(&vp->v_pollinfo->vpi_lock); - SLIST_INSERT_HEAD(&vp->v_pollinfo->vpi_selinfo.si_note, kn, kn_selnext); - mtx_unlock(&vp->v_pollinfo->vpi_lock); - - return (0); -} - -static void -filt_ufsdetach(struct knote *kn) -{ - struct vnode *vp = (struct vnode *)kn->kn_hook; - - KASSERT(vp->v_pollinfo != NULL, ("Mising v_pollinfo")); - mtx_lock(&vp->v_pollinfo->vpi_lock); - SLIST_REMOVE(&vp->v_pollinfo->vpi_selinfo.si_note, - kn, knote, kn_selnext); - mtx_unlock(&vp->v_pollinfo->vpi_lock); -} - -/*ARGSUSED*/ -static int -filt_ufsread(struct knote *kn, long hint) -{ - struct vnode *vp = (struct vnode *)kn->kn_hook; - struct inode *ip = VTOI(vp); - - /* - * filesystem is gone, so set the EOF flag and schedule - * the knote for deletion. - */ - if (hint == NOTE_REVOKE) { - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - return (1); - } - - kn->kn_data = ip->i_size - kn->kn_fp->f_offset; - return (kn->kn_data != 0); -} - -/*ARGSUSED*/ -static int -filt_ufswrite(struct knote *kn, long hint) -{ - - /* - * filesystem is gone, so set the EOF flag and schedule - * the knote for deletion. - */ - if (hint == NOTE_REVOKE) - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - - kn->kn_data = 0; - return (1); -} - -static int -filt_ufsvnode(struct knote *kn, long hint) -{ - - if (kn->kn_sfflags & hint) - kn->kn_fflags |= hint; - if (hint == NOTE_REVOKE) { - kn->kn_flags |= EV_EOF; - return (1); - } - return (kn->kn_fflags != 0); -} - -/* Global vfs data structures for ufs. */ -static vop_t **ufs_vnodeop_p; -static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = { - { &vop_default_desc, (vop_t *) vop_defaultop }, - { &vop_fsync_desc, (vop_t *) vop_panic }, - { &vop_read_desc, (vop_t *) vop_panic }, - { &vop_reallocblks_desc, (vop_t *) vop_panic }, - { &vop_write_desc, (vop_t *) vop_panic }, - { &vop_access_desc, (vop_t *) ufs_access }, - { &vop_advlock_desc, (vop_t *) ufs_advlock }, - { &vop_bmap_desc, (vop_t *) ufs_bmap }, - { &vop_cachedlookup_desc, (vop_t *) ufs_lookup }, - { &vop_close_desc, (vop_t *) ufs_close }, - { &vop_create_desc, (vop_t *) ufs_create }, - { &vop_getattr_desc, (vop_t *) ufs_getattr }, - { &vop_inactive_desc, (vop_t *) ufs_inactive }, - { &vop_link_desc, (vop_t *) ufs_link }, - { &vop_lookup_desc, (vop_t *) vfs_cache_lookup }, - { &vop_mkdir_desc, (vop_t *) ufs_mkdir }, - { &vop_mknod_desc, (vop_t *) ufs_mknod }, - { &vop_open_desc, (vop_t *) ufs_open }, - { &vop_pathconf_desc, (vop_t *) ufs_pathconf }, - { &vop_poll_desc, (vop_t *) vop_stdpoll }, - { &vop_kqfilter_desc, (vop_t *) ufs_kqfilter }, - { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount }, - { &vop_print_desc, (vop_t *) ufs_print }, - { &vop_readdir_desc, (vop_t *) ufs_readdir }, - { &vop_readlink_desc, (vop_t *) ufs_readlink }, - { &vop_reclaim_desc, (vop_t *) ufs_reclaim }, - { &vop_remove_desc, (vop_t *) ufs_remove }, - { &vop_rename_desc, (vop_t *) ufs_rename }, - { &vop_rmdir_desc, (vop_t *) ufs_rmdir }, - { &vop_setattr_desc, (vop_t *) ufs_setattr }, -#ifdef MAC - { &vop_setlabel_desc, (vop_t *) vop_stdsetlabel_ea }, -#endif - { &vop_strategy_desc, (vop_t *) ufs_strategy }, - { &vop_symlink_desc, (vop_t *) ufs_symlink }, - { &vop_whiteout_desc, (vop_t *) ufs_whiteout }, -#ifdef UFS_EXTATTR - { &vop_getextattr_desc, (vop_t *) ufs_getextattr }, - { &vop_deleteextattr_desc, (vop_t *) ufs_deleteextattr }, - { &vop_setextattr_desc, (vop_t *) ufs_setextattr }, -#endif -#ifdef UFS_ACL - { &vop_getacl_desc, (vop_t *) ufs_getacl }, - { &vop_setacl_desc, (vop_t *) ufs_setacl }, - { &vop_aclcheck_desc, (vop_t *) ufs_aclcheck }, -#endif - { NULL, NULL } -}; -static struct vnodeopv_desc ufs_vnodeop_opv_desc = - { &ufs_vnodeop_p, ufs_vnodeop_entries }; - -static vop_t **ufs_specop_p; -static struct vnodeopv_entry_desc ufs_specop_entries[] = { - { &vop_default_desc, (vop_t *) spec_vnoperate }, - { &vop_fsync_desc, (vop_t *) vop_panic }, - { &vop_access_desc, (vop_t *) ufs_access }, - { &vop_close_desc, (vop_t *) ufsspec_close }, - { &vop_getattr_desc, (vop_t *) ufs_getattr }, - { &vop_inactive_desc, (vop_t *) ufs_inactive }, - { &vop_print_desc, (vop_t *) ufs_print }, - { &vop_read_desc, (vop_t *) ufsspec_read }, - { &vop_reclaim_desc, (vop_t *) ufs_reclaim }, - { &vop_setattr_desc, (vop_t *) ufs_setattr }, -#ifdef MAC - { &vop_setlabel_desc, (vop_t *) vop_stdsetlabel_ea }, -#endif - { &vop_write_desc, (vop_t *) ufsspec_write }, -#ifdef UFS_EXTATTR - { &vop_getextattr_desc, (vop_t *) ufs_getextattr }, - { &vop_deleteextattr_desc, (vop_t *) ufs_deleteextattr }, - { &vop_setextattr_desc, (vop_t *) ufs_setextattr }, -#endif -#ifdef UFS_ACL - { &vop_getacl_desc, (vop_t *) ufs_getacl }, - { &vop_setacl_desc, (vop_t *) ufs_setacl }, - { &vop_aclcheck_desc, (vop_t *) ufs_aclcheck }, -#endif - {NULL, NULL} -}; -static struct vnodeopv_desc ufs_specop_opv_desc = - { &ufs_specop_p, ufs_specop_entries }; - -static vop_t **ufs_fifoop_p; -static struct vnodeopv_entry_desc ufs_fifoop_entries[] = { - { &vop_default_desc, (vop_t *) fifo_vnoperate }, - { &vop_fsync_desc, (vop_t *) vop_panic }, - { &vop_access_desc, (vop_t *) ufs_access }, - { &vop_close_desc, (vop_t *) ufsfifo_close }, - { &vop_getattr_desc, (vop_t *) ufs_getattr }, - { &vop_inactive_desc, (vop_t *) ufs_inactive }, - { &vop_kqfilter_desc, (vop_t *) ufsfifo_kqfilter }, - { &vop_print_desc, (vop_t *) ufs_print }, - { &vop_read_desc, (vop_t *) ufsfifo_read }, - { &vop_reclaim_desc, (vop_t *) ufs_reclaim }, - { &vop_setattr_desc, (vop_t *) ufs_setattr }, -#ifdef MAC - { &vop_setlabel_desc, (vop_t *) vop_stdsetlabel_ea }, -#endif - { &vop_write_desc, (vop_t *) ufsfifo_write }, -#ifdef UFS_EXTATTR - { &vop_getextattr_desc, (vop_t *) ufs_getextattr }, - { &vop_deleteextattr_desc, (vop_t *) ufs_deleteextattr }, - { &vop_setextattr_desc, (vop_t *) ufs_setextattr }, -#endif -#ifdef UFS_ACL - { &vop_getacl_desc, (vop_t *) ufs_getacl }, - { &vop_setacl_desc, (vop_t *) ufs_setacl }, - { &vop_aclcheck_desc, (vop_t *) ufs_aclcheck }, -#endif - { NULL, NULL } -}; -static struct vnodeopv_desc ufs_fifoop_opv_desc = - { &ufs_fifoop_p, ufs_fifoop_entries }; - -VNODEOP_SET(ufs_vnodeop_opv_desc); -VNODEOP_SET(ufs_specop_opv_desc); -VNODEOP_SET(ufs_fifoop_opv_desc); - -int -ufs_vnoperate(ap) - struct vop_generic_args /* { - struct vnodeop_desc *a_desc; - } */ *ap; -{ - return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap)); -} - -int -ufs_vnoperatefifo(ap) - struct vop_generic_args /* { - struct vnodeop_desc *a_desc; - } */ *ap; -{ - return (VOCALL(ufs_fifoop_p, ap->a_desc->vdesc_offset, ap)); -} - -int -ufs_vnoperatespec(ap) - struct vop_generic_args /* { - struct vnodeop_desc *a_desc; - } */ *ap; -{ - return (VOCALL(ufs_specop_p, ap->a_desc->vdesc_offset, ap)); -} -#endif - -static int ufs_open(struct vop_open_args *); - -/* - * Open called. - * - * Nothing to do. - */ -/* ARGSUSED */ -static int -ufs_open(ap) - struct vop_open_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct thread *a_td; - } */ *ap; -{ - - /* - * Files marked append-only must be opened for appending. - */ - if ((VTOI(ap->a_vp)->i_flags & APPEND) && - (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) - return (EPERM); - return (0); -} diff --git a/src/sys/ufs/ufs/ufsmount.h b/src/sys/ufs/ufs/ufsmount.h deleted file mode 100644 index 69c5330..0000000 --- a/src/sys/ufs/ufs/ufsmount.h +++ /dev/null @@ -1,124 +0,0 @@ -#if 0 -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufsmount.h 8.6 (Berkeley) 3/30/95 - * $FreeBSD: src/sys/ufs/ufs/ufsmount.h,v 1.28 2003/01/07 18:23:50 mckusick Exp $ - */ - -#ifndef _UFS_UFS_UFSMOUNT_H_ -#define _UFS_UFS_UFSMOUNT_H_ - -/* - * Arguments to mount UFS-based filesystems - */ -struct ufs_args { - char *fspec; /* block special device to mount */ - struct export_args export; /* network export information */ -}; - -#ifdef _KERNEL - -#ifdef MALLOC_DECLARE -MALLOC_DECLARE(M_UFSMNT); -#endif - -struct buf; -struct inode; -struct nameidata; -struct timeval; -struct ucred; -struct uio; -struct vnode; -struct ufs_extattr_per_mount; - -/* This structure describes the UFS specific mount structure data. */ -struct ufsmount { - struct mount *um_mountp; /* filesystem vfs structure */ - dev_t um_dev; /* device mounted */ - struct vnode *um_devvp; /* block device mounted vnode */ - u_long um_fstype; /* type of filesystem */ - struct fs *um_fs; /* pointer to superblock */ - struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */ - struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */ - struct ufs_extattr_per_mount um_extattr; /* extended attrs */ - u_long um_nindir; /* indirect ptrs per block */ - u_long um_bptrtodb; /* indir ptr to disk block */ - u_long um_seqinc; /* inc between seq blocks */ - long um_numindirdeps; /* indirdeps for this filesys */ - time_t um_btime[MAXQUOTAS]; /* block quota time limit */ - time_t um_itime[MAXQUOTAS]; /* inode quota time limit */ - char um_qflags[MAXQUOTAS]; /* quota specific flags */ - int64_t um_savedmaxfilesize; /* XXX - limit maxfilesize */ - int (*um_balloc)(struct vnode *, off_t, int, struct ucred *, int, struct buf **); - int (*um_blkatoff)(struct vnode *, off_t, char **, struct buf **); - int (*um_truncate)(struct vnode *, off_t, int, struct ucred *, struct thread *); - int (*um_update)(struct vnode *, int); - int (*um_valloc)(struct vnode *, int, struct ucred *, struct vnode **); - int (*um_vfree)(struct vnode *, ino_t, int); - void (*um_ifree)(struct ufsmount *, struct inode *); -}; - -#define UFS_BALLOC(aa, bb, cc, dd, ee, ff) VFSTOUFS((aa)->v_mount)->um_balloc(aa, bb, cc, dd, ee, ff) -#define UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd) -#define UFS_TRUNCATE(aa, bb, cc, dd, ee) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd, ee) -#define UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb) -#define UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd) -#define UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc) -#define UFS_IFREE(aa, bb) ((aa)->um_ifree(aa, bb)) - -/* - * Filesystem types - */ -#define UFS1 1 -#define UFS2 2 - -/* - * Flags describing the state of quotas. - */ -#define QTF_OPENING 0x01 /* Q_QUOTAON in progress */ -#define QTF_CLOSING 0x02 /* Q_QUOTAOFF in progress */ - -/* Convert mount ptr to ufsmount ptr. */ -#define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data)) - -/* - * Macros to access filesystem parameters in the ufsmount structure. - * Used by ufs_bmap. - */ -#define MNINDIR(ump) ((ump)->um_nindir) -#define blkptrtodb(ump, b) ((b) << (ump)->um_bptrtodb) -#define is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc) -#endif /* _KERNEL */ - -#endif -#endif