diff --git a/src/include.new/ufs/ffs/ffs_extern.h b/src/include.new/ufs/ffs/ffs_extern.h new file mode 100644 index 0000000..95eed62 --- /dev/null +++ b/src/include.new/ufs/ffs/ffs_extern.h @@ -0,0 +1,129 @@ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_extern.h 8.6 (Berkeley) 3/30/95 + * $FreeBSD: src/sys/ufs/ffs/ffs_extern.h,v 1.69.2.1 2006/03/13 03:07:37 jeff Exp $ + */ + +#ifndef _UFS_FFS_EXTERN_H +#define _UFS_FFS_EXTERN_H + +struct buf; +struct cg; +struct fid; +struct fs; +struct inode; +struct malloc_type; +struct mount; +struct thread; +struct sockaddr; +struct statfs; +struct ucred; +struct vnode; +struct vop_fsync_args; +struct vop_reallocblks_args; + +int ffs_alloc(struct inode *, + ufs2_daddr_t, ufs2_daddr_t, int, struct ucred *, ufs2_daddr_t *); +int ffs_balloc_ufs1(struct vnode *a_vp, off_t a_startoffset, int a_size, + struct ucred *a_cred, int a_flags, struct buf **a_bpp); +int ffs_balloc_ufs2(struct vnode *a_vp, off_t a_startoffset, int a_size, + struct ucred *a_cred, int a_flags, struct buf **a_bpp); +int ffs_blkatoff(struct vnode *, off_t, char **, struct buf **); +void ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *, + ufs2_daddr_t, long, ino_t); +ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *); +ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *); +int ffs_checkfreefile(struct fs *, struct vnode *, ino_t); +void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); +int ffs_copyonwrite(struct vnode *, struct buf *); +int ffs_flushfiles(struct mount *, int, struct thread *); +void ffs_fragacct(struct fs *, int, int32_t [], int); +int ffs_freefile(struct ufsmount *, struct fs *, struct vnode *, ino_t, + int); +int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); +void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t); +int ffs_mountroot(void); +int ffs_reallocblks(struct vop_reallocblks_args *); +int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t, + ufs2_daddr_t, int, int, struct ucred *, struct buf **); +void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); +int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t); +void ffs_snapremove(struct vnode *vp); +int ffs_snapshot(struct mount *mp, char *snapfile); +void ffs_snapshot_mount(struct mount *mp); +void ffs_snapshot_unmount(struct mount *mp); +int ffs_syncvnode(struct vnode *vp, int waitfor); +int ffs_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *); +int ffs_update(struct vnode *, int); +int ffs_valloc(struct vnode *, int, struct ucred *, struct vnode **); + +int ffs_vfree(struct vnode *, ino_t, int); +vfs_vget_t ffs_vget; + +extern struct vop_vector ffs_vnodeops1; +extern struct vop_vector ffs_fifoops1; +extern struct vop_vector ffs_vnodeops2; +extern struct vop_vector ffs_fifoops2; + +/* + * Soft update function prototypes. + */ + +int softdep_check_suspend(struct mount *, struct vnode *, + int, int, int, int); +void softdep_get_depcounts(struct mount *, int *, int *); +void softdep_initialize(void); +void softdep_uninitialize(void); +int softdep_mount(struct vnode *, struct mount *, struct fs *, + struct ucred *); +void softdep_move_dependencies(struct buf *, struct buf *); +int softdep_flushworklist(struct mount *, int *, struct thread *); +int softdep_flushfiles(struct mount *, int, struct thread *); +void softdep_update_inodeblock(struct inode *, struct buf *, int); +void softdep_load_inodeblock(struct inode *); +void softdep_freefile(struct vnode *, ino_t, int); +int softdep_request_cleanup(struct fs *, struct vnode *); +void softdep_setup_freeblocks(struct inode *, off_t, int); +void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t); +void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t); +void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t, + ufs2_daddr_t, long, long, struct buf *); +void softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t, + ufs2_daddr_t, long, long, struct buf *); +void softdep_setup_allocindir_meta(struct buf *, struct inode *, + struct buf *, int, ufs2_daddr_t); +void softdep_setup_allocindir_page(struct inode *, ufs_lbn_t, + struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *); +void softdep_fsync_mountdev(struct vnode *); +int softdep_sync_metadata(struct vnode *); +int softdep_process_worklist(struct mount *, int); +int softdep_fsync(struct vnode *); +int softdep_waitidle(struct mount *); + +#endif /* !_UFS_FFS_EXTERN_H */ diff --git a/src/include.new/ufs/ffs/fs.h b/src/include.new/ufs/ffs/fs.h new file mode 100644 index 0000000..c309829 --- /dev/null +++ b/src/include.new/ufs/ffs/fs.h @@ -0,0 +1,613 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fs.h 8.13 (Berkeley) 3/21/95 + * $FreeBSD: src/sys/ufs/ffs/fs.h,v 1.48 2005/02/20 08:02:15 delphij Exp $ + */ + +#ifndef _UFS_FFS_FS_H_ +#define _UFS_FFS_FS_H_ + +/* + * Each disk drive contains some number of filesystems. + * A filesystem consists of a number of cylinder groups. + * Each cylinder group has inodes and data. + * + * A filesystem is described by its super-block, which in turn + * describes the cylinder groups. The super-block is critical + * data and is replicated in each cylinder group to protect against + * catastrophic loss. This is done at `newfs' time and the critical + * super-block data does not change, so the copies need not be + * referenced further unless disaster strikes. + * + * For filesystem fs, the offsets of the various blocks of interest + * are given in the super block as: + * [fs->fs_sblkno] Super-block + * [fs->fs_cblkno] Cylinder group block + * [fs->fs_iblkno] Inode blocks + * [fs->fs_dblkno] Data blocks + * The beginning of cylinder group cg in fs, is given by + * the ``cgbase(fs, cg)'' macro. + * + * Depending on the architecture and the media, the superblock may + * reside in any one of four places. For tiny media where every block + * counts, it is placed at the very front of the partition. Historically, + * UFS1 placed it 8K from the front to leave room for the disk label and + * a small bootstrap. For UFS2 it got moved to 64K from the front to leave + * room for the disk label and a bigger bootstrap, and for really piggy + * systems we check at 256K from the front if the first three fail. In + * all cases the size of the superblock will be SBLOCKSIZE. All values are + * given in byte-offset form, so they do not imply a sector size. The + * SBLOCKSEARCH specifies the order in which the locations should be searched. + */ +#define SBLOCK_FLOPPY 0 +#define SBLOCK_UFS1 8192 +#define SBLOCK_UFS2 65536 +#define SBLOCK_PIGGY 262144 +#define SBLOCKSIZE 8192 +#define SBLOCKSEARCH \ + { SBLOCK_UFS2, SBLOCK_UFS1, SBLOCK_FLOPPY, SBLOCK_PIGGY, -1 } + +/* + * Max number of fragments per block. This value is NOT tweakable. + */ +#define MAXFRAG 8 + +/* + * Addresses stored in inodes are capable of addressing fragments + * of `blocks'. File system blocks of at most size MAXBSIZE can + * be optionally broken into 2, 4, or 8 pieces, each of which is + * addressable; these pieces may be DEV_BSIZE, or some multiple of + * a DEV_BSIZE unit. + * + * Large files consist of exclusively large data blocks. To avoid + * undue wasted disk space, the last data block of a small file may be + * allocated as only as many fragments of a large block as are + * necessary. The filesystem format retains only a single pointer + * to such a fragment, which is a piece of a single large block that + * has been divided. The size of such a fragment is determinable from + * information in the inode, using the ``blksize(fs, ip, lbn)'' macro. + * + * The filesystem records space availability at the fragment level; + * to determine block availability, aligned fragments are examined. + */ + +/* + * MINBSIZE is the smallest allowable block size. + * In order to insure that it is possible to create files of size + * 2^32 with only two levels of indirection, MINBSIZE is set to 4096. + * MINBSIZE must be big enough to hold a cylinder group block, + * thus changes to (struct cg) must keep its size within MINBSIZE. + * Note that super blocks are always of size SBSIZE, + * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE. + */ +#define MINBSIZE 4096 + +/* + * The path name on which the filesystem is mounted is maintained + * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in + * the super block for this name. + */ +#define MAXMNTLEN 468 + +/* + * The volume name for this filesystem is maintained in fs_volname. + * MAXVOLLEN defines the length of the buffer allocated. + */ +#define MAXVOLLEN 32 + +/* + * There is a 128-byte region in the superblock reserved for in-core + * pointers to summary information. Originally this included an array + * of pointers to blocks of struct csum; now there are just a few + * pointers and the remaining space is padded with fs_ocsp[]. + * + * NOCSPTRS determines the size of this padding. One pointer (fs_csp) + * is taken away to point to a contiguous array of struct csum for + * all cylinder groups; a second (fs_maxcluster) points to an array + * of cluster sizes that is computed as cylinder groups are inspected, + * and the third points to an array that tracks the creation of new + * directories. A fourth pointer, fs_active, is used when creating + * snapshots; it points to a bitmap of cylinder groups for which the + * free-block bitmap has changed since the snapshot operation began. + */ +#define NOCSPTRS ((128 / sizeof(void *)) - 4) + +/* + * A summary of contiguous blocks of various sizes is maintained + * in each cylinder group. Normally this is set by the initial + * value of fs_maxcontig. To conserve space, a maximum summary size + * is set by FS_MAXCONTIG. + */ +#define FS_MAXCONTIG 16 + +/* + * MINFREE gives the minimum acceptable percentage of filesystem + * blocks which may be free. If the freelist drops below this level + * only the superuser may continue to allocate blocks. This may + * be set to 0 if no reserve of free blocks is deemed necessary, + * however throughput drops by fifty percent if the filesystem + * is run at between 95% and 100% full; thus the minimum default + * value of fs_minfree is 5%. However, to get good clustering + * performance, 10% is a better choice. hence we use 10% as our + * default value. With 10% free space, fragmentation is not a + * problem, so we choose to optimize for time. + */ +#define MINFREE 8 +#define DEFAULTOPT FS_OPTTIME + +/* + * Grigoriy Orlov has done some extensive work to fine + * tune the layout preferences for directories within a filesystem. + * His algorithm can be tuned by adjusting the following parameters + * which tell the system the average file size and the average number + * of files per directory. These defaults are well selected for typical + * filesystems, but may need to be tuned for odd cases like filesystems + * being used for squid caches or news spools. + */ +#define AVFILESIZ 16384 /* expected average file size */ +#define AFPDIR 64 /* expected number of files per directory */ + +/* + * The maximum number of snapshot nodes that can be associated + * with each filesystem. This limit affects only the number of + * snapshot files that can be recorded within the superblock so + * that they can be found when the filesystem is mounted. However, + * maintaining too many will slow the filesystem performance, so + * having this limit is a good idea. + */ +#define FSMAXSNAP 20 + +/* + * Used to identify special blocks in snapshots: + * + * BLK_NOCOPY - A block that was unallocated at the time the snapshot + * was taken, hence does not need to be copied when written. + * BLK_SNAP - A block held by another snapshot that is not needed by this + * snapshot. When the other snapshot is freed, the BLK_SNAP entries + * are converted to BLK_NOCOPY. These are needed to allow fsck to + * identify blocks that are in use by other snapshots (which are + * expunged from this snapshot). + */ +#define BLK_NOCOPY ((ufs2_daddr_t)(1)) +#define BLK_SNAP ((ufs2_daddr_t)(2)) + +/* + * Sysctl values for the fast filesystem. + */ +#define FFS_ADJ_REFCNT 1 /* adjust inode reference count */ +#define FFS_ADJ_BLKCNT 2 /* adjust inode used block count */ +#define FFS_BLK_FREE 3 /* free range of blocks in map */ +#define FFS_DIR_FREE 4 /* free specified dir inodes in map */ +#define FFS_FILE_FREE 5 /* free specified file inodes in map */ +#define FFS_SET_FLAGS 6 /* set filesystem flags */ +#define FFS_ADJ_NDIR 7 /* adjust number of directories */ +#define FFS_ADJ_NBFREE 8 /* adjust number of free blocks */ +#define FFS_ADJ_NIFREE 9 /* adjust number of free inodes */ +#define FFS_ADJ_NFFREE 10 /* adjust number of free frags */ +#define FFS_ADJ_NUMCLUSTERS 11 /* adjust number of free clusters */ +#define FFS_MAXID 12 /* number of valid ffs ids */ + +/* + * Command structure passed in to the filesystem to adjust filesystem values. + */ +#define FFS_CMD_VERSION 0x19790518 /* version ID */ +struct fsck_cmd { + int32_t version; /* version of command structure */ + int32_t handle; /* reference to filesystem to be changed */ + int64_t value; /* inode or block number to be affected */ + int64_t size; /* amount or range to be adjusted */ + int64_t spare; /* reserved for future use */ +}; + +/* + * Per cylinder group information; summarized in blocks allocated + * from first cylinder group data blocks. These blocks have to be + * read in from fs_csaddr (size fs_cssize) in addition to the + * super block. + */ +struct csum { + int32_t cs_ndir; /* number of directories */ + int32_t cs_nbfree; /* number of free blocks */ + int32_t cs_nifree; /* number of free inodes */ + int32_t cs_nffree; /* number of free frags */ +}; +struct csum_total { + int64_t cs_ndir; /* number of directories */ + int64_t cs_nbfree; /* number of free blocks */ + int64_t cs_nifree; /* number of free inodes */ + int64_t cs_nffree; /* number of free frags */ + int64_t cs_numclusters; /* number of free clusters */ + int64_t cs_spare[3]; /* future expansion */ +}; + +/* + * Super block for an FFS filesystem. + */ +struct fs { + int32_t fs_firstfield; /* historic filesystem linked list, */ + int32_t fs_unused_1; /* used for incore super blocks */ + int32_t fs_sblkno; /* offset of super-block in filesys */ + int32_t fs_cblkno; /* offset of cyl-block in filesys */ + int32_t fs_iblkno; /* offset of inode-blocks in filesys */ + int32_t fs_dblkno; /* offset of first data after cg */ + int32_t fs_old_cgoffset; /* cylinder group offset in cylinder */ + int32_t fs_old_cgmask; /* used to calc mod fs_ntrak */ + int32_t fs_old_time; /* last time written */ + int32_t fs_old_size; /* number of blocks in fs */ + int32_t fs_old_dsize; /* number of data blocks in fs */ + int32_t fs_ncg; /* number of cylinder groups */ + int32_t fs_bsize; /* size of basic blocks in fs */ + int32_t fs_fsize; /* size of frag blocks in fs */ + int32_t fs_frag; /* number of frags in a block in fs */ +/* these are configuration parameters */ + int32_t fs_minfree; /* minimum percentage of free blocks */ + int32_t fs_old_rotdelay; /* num of ms for optimal next block */ + int32_t fs_old_rps; /* disk revolutions per second */ +/* these fields can be computed from the others */ + int32_t fs_bmask; /* ``blkoff'' calc of blk offsets */ + int32_t fs_fmask; /* ``fragoff'' calc of frag offsets */ + int32_t fs_bshift; /* ``lblkno'' calc of logical blkno */ + int32_t fs_fshift; /* ``numfrags'' calc number of frags */ +/* these are configuration parameters */ + int32_t fs_maxcontig; /* max number of contiguous blks */ + int32_t fs_maxbpg; /* max number of blks per cyl group */ +/* these fields can be computed from the others */ + int32_t fs_fragshift; /* block to frag shift */ + int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ + int32_t fs_sbsize; /* actual size of super block */ + int32_t fs_spare1[2]; /* old fs_csmask */ + /* old fs_csshift */ + int32_t fs_nindir; /* value of NINDIR */ + int32_t fs_inopb; /* value of INOPB */ + int32_t fs_old_nspf; /* value of NSPF */ +/* yet another configuration parameter */ + int32_t fs_optim; /* optimization preference, see below */ + int32_t fs_old_npsect; /* # sectors/track including spares */ + int32_t fs_old_interleave; /* hardware sector interleave */ + int32_t fs_old_trackskew; /* sector 0 skew, per track */ + int32_t fs_id[2]; /* unique filesystem id */ +/* sizes determined by number of cylinder groups and their sizes */ + int32_t fs_old_csaddr; /* blk addr of cyl grp summary area */ + int32_t fs_cssize; /* size of cyl grp summary area */ + int32_t fs_cgsize; /* cylinder group size */ + int32_t fs_spare2; /* old fs_ntrak */ + int32_t fs_old_nsect; /* sectors per track */ + int32_t fs_old_spc; /* sectors per cylinder */ + int32_t fs_old_ncyl; /* cylinders in filesystem */ + int32_t fs_old_cpg; /* cylinders per group */ + int32_t fs_ipg; /* inodes per group */ + int32_t fs_fpg; /* blocks per group * fs_frag */ +/* this data must be re-computed after crashes */ + struct csum fs_old_cstotal; /* cylinder summary information */ +/* these fields are cleared at mount time */ + int8_t fs_fmod; /* super block modified flag */ + int8_t fs_clean; /* filesystem is clean flag */ + int8_t fs_ronly; /* mounted read-only flag */ + int8_t fs_old_flags; /* old FS_ flags */ + u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ + u_char fs_volname[MAXVOLLEN]; /* volume name */ + u_int64_t fs_swuid; /* system-wide uid */ + int32_t fs_pad; /* due to alignment of fs_swuid */ +/* these fields retain the current block allocation info */ + int32_t fs_cgrotor; /* last cg searched */ + void *fs_ocsp[NOCSPTRS]; /* padding; was list of fs_cs buffers */ + u_int8_t *fs_contigdirs; /* (u) # of contig. allocated dirs */ + struct csum *fs_csp; /* (u) cg summary info buffer */ + int32_t *fs_maxcluster; /* (u) max cluster in each cyl group */ + u_int *fs_active; /* (u) used by snapshots to track fs */ + int32_t fs_old_cpc; /* cyl per cycle in postbl */ + int32_t fs_maxbsize; /* maximum blocking factor permitted */ + int64_t fs_sparecon64[17]; /* old rotation block list head */ + int64_t fs_sblockloc; /* byte offset of standard superblock */ + struct csum_total fs_cstotal; /* (u) cylinder summary information */ + ufs_time_t fs_time; /* last time written */ + int64_t fs_size; /* number of blocks in fs */ + int64_t fs_dsize; /* number of data blocks in fs */ + ufs2_daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ + int64_t fs_pendingblocks; /* (u) blocks being freed */ + int32_t fs_pendinginodes; /* (u) inodes being freed */ + int32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */ + int32_t fs_avgfilesize; /* expected average file size */ + int32_t fs_avgfpdir; /* expected # of files per directory */ + int32_t fs_save_cgsize; /* save real cg size to use fs_bsize */ + int32_t fs_sparecon32[26]; /* reserved for future constants */ + int32_t fs_flags; /* see FS_ flags below */ + int32_t fs_contigsumsize; /* size of cluster summary array */ + int32_t fs_maxsymlinklen; /* max length of an internal symlink */ + int32_t fs_old_inodefmt; /* format of on-disk inodes */ + u_int64_t fs_maxfilesize; /* maximum representable file size */ + int64_t fs_qbmask; /* ~fs_bmask for use with 64-bit size */ + int64_t fs_qfmask; /* ~fs_fmask for use with 64-bit size */ + int32_t fs_state; /* validate fs_clean field */ + int32_t fs_old_postblformat; /* format of positional layout tables */ + int32_t fs_old_nrpos; /* number of rotational positions */ + int32_t fs_spare5[2]; /* old fs_postbloff */ + /* old fs_rotbloff */ + int32_t fs_magic; /* magic number */ +}; + +/* Sanity checking. */ +#ifdef CTASSERT +CTASSERT(sizeof(struct fs) == 1376); +#endif + +/* + * Filesystem identification + */ +#define FS_UFS1_MAGIC 0x011954 /* UFS1 fast filesystem magic number */ +#define FS_UFS2_MAGIC 0x19540119 /* UFS2 fast filesystem magic number */ +#define FS_BAD_MAGIC 0x19960408 /* UFS incomplete newfs magic number */ +#define FS_OKAY 0x7c269d38 /* superblock checksum */ +#define FS_42INODEFMT -1 /* 4.2BSD inode format */ +#define FS_44INODEFMT 2 /* 4.4BSD inode format */ + +/* + * Preference for optimization. + */ +#define FS_OPTTIME 0 /* minimize allocation time */ +#define FS_OPTSPACE 1 /* minimize disk fragmentation */ + +/* + * Filesystem flags. + * + * The FS_UNCLEAN flag is set by the kernel when the filesystem was + * mounted with fs_clean set to zero. The FS_DOSOFTDEP flag indicates + * that the filesystem should be managed by the soft updates code. + * Note that the FS_NEEDSFSCK flag is set and cleared only by the + * fsck utility. It is set when background fsck finds an unexpected + * inconsistency which requires a traditional foreground fsck to be + * run. Such inconsistencies should only be found after an uncorrectable + * disk error. A foreground fsck will clear the FS_NEEDSFSCK flag when + * it has successfully cleaned up the filesystem. The kernel uses this + * flag to enforce that inconsistent filesystems be mounted read-only. + * The FS_INDEXDIRS flag when set indicates that the kernel maintains + * on-disk auxiliary indexes (such as B-trees) for speeding directory + * accesses. Kernels that do not support auxiliary indicies clear the + * flag to indicate that the indicies need to be rebuilt (by fsck) before + * they can be used. + * + * FS_ACLS indicates that ACLs are administratively enabled for the + * file system, so they should be loaded from extended attributes, + * observed for access control purposes, and be administered by object + * owners. FS_MULTILABEL indicates that the TrustedBSD MAC Framework + * should attempt to back MAC labels into extended attributes on the + * file system rather than maintain a single mount label for all + * objects. + */ +#define FS_UNCLEAN 0x01 /* filesystem not clean at mount */ +#define FS_DOSOFTDEP 0x02 /* filesystem using soft dependencies */ +#define FS_NEEDSFSCK 0x04 /* filesystem needs sync fsck before mount */ +#define FS_INDEXDIRS 0x08 /* kernel supports indexed directories */ +#define FS_ACLS 0x10 /* file system has ACLs enabled */ +#define FS_MULTILABEL 0x20 /* file system is MAC multi-label */ +#define FS_FLAGS_UPDATED 0x80 /* flags have been moved to new location */ + +/* + * Macros to access bits in the fs_active array. + */ +#define ACTIVECGNUM(fs, cg) ((fs)->fs_active[(cg) / (NBBY * sizeof(int))]) +#define ACTIVECGOFF(cg) (1 << ((cg) % (NBBY * sizeof(int)))) +#define ACTIVESET(fs, cg) do { \ + if ((fs)->fs_active) \ + ACTIVECGNUM((fs), (cg)) |= ACTIVECGOFF((cg)); \ +} while (0) +#define ACTIVECLEAR(fs, cg) do { \ + if ((fs)->fs_active) \ + ACTIVECGNUM((fs), (cg)) &= ~ACTIVECGOFF((cg)); \ +} while (0) + +/* + * The size of a cylinder group is calculated by CGSIZE. The maximum size + * is limited by the fact that cylinder groups are at most one block. + * Its size is derived from the size of the maps maintained in the + * cylinder group and the (struct cg) size. + */ +#define CGSIZE(fs) \ + /* base cg */ (sizeof(struct cg) + sizeof(int32_t) + \ + /* old btotoff */ (fs)->fs_old_cpg * sizeof(int32_t) + \ + /* old boff */ (fs)->fs_old_cpg * sizeof(u_int16_t) + \ + /* inode map */ howmany((fs)->fs_ipg, NBBY) + \ + /* block map */ howmany((fs)->fs_fpg, NBBY) +\ + /* if present */ ((fs)->fs_contigsumsize <= 0 ? 0 : \ + /* cluster sum */ (fs)->fs_contigsumsize * sizeof(int32_t) + \ + /* cluster map */ howmany(fragstoblks(fs, (fs)->fs_fpg), NBBY))) + +/* + * The minimal number of cylinder groups that should be created. + */ +#define MINCYLGRPS 4 + +/* + * Convert cylinder group to base address of its global summary info. + */ +#define fs_cs(fs, indx) fs_csp[indx] + +/* + * Cylinder group block for a filesystem. + */ +#define CG_MAGIC 0x090255 +struct cg { + int32_t cg_firstfield; /* historic cyl groups linked list */ + int32_t cg_magic; /* magic number */ + int32_t cg_old_time; /* time last written */ + int32_t cg_cgx; /* we are the cgx'th cylinder group */ + int16_t cg_old_ncyl; /* number of cyl's this cg */ + int16_t cg_old_niblk; /* number of inode blocks this cg */ + int32_t cg_ndblk; /* number of data blocks this cg */ + struct csum cg_cs; /* cylinder summary information */ + int32_t cg_rotor; /* position of last used block */ + int32_t cg_frotor; /* position of last used frag */ + int32_t cg_irotor; /* position of last used inode */ + int32_t cg_frsum[MAXFRAG]; /* counts of available frags */ + int32_t cg_old_btotoff; /* (int32) block totals per cylinder */ + int32_t cg_old_boff; /* (u_int16) free block positions */ + int32_t cg_iusedoff; /* (u_int8) used inode map */ + int32_t cg_freeoff; /* (u_int8) free block map */ + int32_t cg_nextfreeoff; /* (u_int8) next available space */ + int32_t cg_clustersumoff; /* (u_int32) counts of avail clusters */ + int32_t cg_clusteroff; /* (u_int8) free cluster map */ + int32_t cg_nclusterblks; /* number of clusters this cg */ + int32_t cg_niblk; /* number of inode blocks this cg */ + int32_t cg_initediblk; /* last initialized inode */ + int32_t cg_sparecon32[3]; /* reserved for future use */ + ufs_time_t cg_time; /* time last written */ + int64_t cg_sparecon64[3]; /* reserved for future use */ + u_int8_t cg_space[1]; /* space for cylinder group maps */ +/* actually longer */ +}; + +/* + * Macros for access to cylinder group array structures + */ +#define cg_chkmagic(cgp) ((cgp)->cg_magic == CG_MAGIC) +#define cg_inosused(cgp) \ + ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff)) +#define cg_blksfree(cgp) \ + ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff)) +#define cg_clustersfree(cgp) \ + ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff)) +#define cg_clustersum(cgp) \ + ((int32_t *)((uintptr_t)(cgp) + (cgp)->cg_clustersumoff)) + +/* + * Turn filesystem block numbers into disk block addresses. + * This maps filesystem blocks to device size blocks. + */ +#define fsbtodb(fs, b) ((daddr_t)(b) << (fs)->fs_fsbtodb) +#define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb) + +/* + * Cylinder group macros to locate things in cylinder groups. + * They calc filesystem addresses of cylinder group data structures. + */ +#define cgbase(fs, c) (((ufs2_daddr_t)(fs)->fs_fpg) * (c)) +#define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */ +#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */ +#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */ +#define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */ +#define cgstart(fs, c) \ + ((fs)->fs_magic == FS_UFS2_MAGIC ? cgbase(fs, c) : \ + (cgbase(fs, c) + (fs)->fs_old_cgoffset * ((c) & ~((fs)->fs_old_cgmask)))) + +/* + * Macros for handling inode numbers: + * inode number to filesystem block offset. + * inode number to cylinder group number. + * inode number to filesystem block address. + */ +#define ino_to_cg(fs, x) ((x) / (fs)->fs_ipg) +#define ino_to_fsba(fs, x) \ + ((ufs2_daddr_t)(cgimin(fs, ino_to_cg(fs, x)) + \ + (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs)))))) +#define ino_to_fsbo(fs, x) ((x) % INOPB(fs)) + +/* + * Give cylinder group number for a filesystem block. + * Give cylinder group block number for a filesystem block. + */ +#define dtog(fs, d) ((d) / (fs)->fs_fpg) +#define dtogd(fs, d) ((d) % (fs)->fs_fpg) + +/* + * Extract the bits for a block from a map. + * Compute the cylinder and rotational position of a cyl block addr. + */ +#define blkmap(fs, map, loc) \ + (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag))) + +/* + * The following macros optimize certain frequently calculated + * quantities by using shifts and masks in place of divisions + * modulos and multiplications. + */ +#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ + ((loc) & (fs)->fs_qbmask) +#define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \ + ((loc) & (fs)->fs_qfmask) +#define lfragtosize(fs, frag) /* calculates ((off_t)frag * fs->fs_fsize) */ \ + (((off_t)(frag)) << (fs)->fs_fshift) +#define lblktosize(fs, blk) /* calculates ((off_t)blk * fs->fs_bsize) */ \ + (((off_t)(blk)) << (fs)->fs_bshift) +/* Use this only when `blk' is known to be small, e.g., < NDADDR. */ +#define smalllblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ + ((blk) << (fs)->fs_bshift) +#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ + ((loc) >> (fs)->fs_bshift) +#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ + ((loc) >> (fs)->fs_fshift) +#define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \ + (((size) + (fs)->fs_qbmask) & (fs)->fs_bmask) +#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ + (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) +#define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \ + ((frags) >> (fs)->fs_fragshift) +#define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \ + ((blks) << (fs)->fs_fragshift) +#define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \ + ((fsb) & ((fs)->fs_frag - 1)) +#define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \ + ((fsb) &~ ((fs)->fs_frag - 1)) + +/* + * Determine the number of available frags given a + * percentage to hold in reserve. + */ +#define freespace(fs, percentreserved) \ + (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \ + (fs)->fs_cstotal.cs_nffree - \ + (((off_t)((fs)->fs_dsize)) * (percentreserved) / 100)) + +/* + * Determining the size of a file block in the filesystem. + */ +#define blksize(fs, ip, lbn) \ + (((lbn) >= NDADDR || (ip)->i_size >= smalllblktosize(fs, (lbn) + 1)) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (ip)->i_size)))) +#define sblksize(fs, size, lbn) \ + (((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (size))))) + + +/* + * Number of inodes in a secondary storage block/fragment. + */ +#define INOPB(fs) ((fs)->fs_inopb) +#define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift) + +/* + * Number of indirects in a filesystem block. + */ +#define NINDIR(fs) ((fs)->fs_nindir) + +extern int inside[], around[]; +extern u_char *fragtbl[]; + +#endif diff --git a/src/include.new/ufs/ffs/softdep.h b/src/include.new/ufs/ffs/softdep.h new file mode 100644 index 0000000..c2f41d0 --- /dev/null +++ b/src/include.new/ufs/ffs/softdep.h @@ -0,0 +1,585 @@ +/*- + * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved. + * + * The soft updates code is derived from the appendix of a University + * of Michigan technical report (Gregory R. Ganger and Yale N. Patt, + * "Soft Updates: A Solution to the Metadata Update Problem in File + * Systems", CSE-TR-254-95, August 1995). + * + * Further information about soft updates can be obtained from: + * + * Marshall Kirk McKusick http://www.mckusick.com/softdep/ + * 1614 Oxford Street mckusick@mckusick.com + * Berkeley, CA 94709-1608 +1-510-843-9542 + * USA + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)softdep.h 9.7 (McKusick) 6/21/00 + * $FreeBSD: src/sys/ufs/ffs/softdep.h,v 1.17.2.2 2006/03/13 03:08:00 jeff Exp $ + */ + +#include + +/* + * Allocation dependencies are handled with undo/redo on the in-memory + * copy of the data. A particular data dependency is eliminated when + * it is ALLCOMPLETE: that is ATTACHED, DEPCOMPLETE, and COMPLETE. + * + * ATTACHED means that the data is not currently being written to + * disk. UNDONE means that the data has been rolled back to a safe + * state for writing to the disk. When the I/O completes, the data is + * restored to its current form and the state reverts to ATTACHED. + * The data must be locked throughout the rollback, I/O, and roll + * forward so that the rolled back information is never visible to + * user processes. The COMPLETE flag indicates that the item has been + * written. For example, a dependency that requires that an inode be + * written will be marked COMPLETE after the inode has been written + * to disk. The DEPCOMPLETE flag indicates the completion of any other + * dependencies such as the writing of a cylinder group map has been + * completed. A dependency structure may be freed only when both it + * and its dependencies have completed and any rollbacks that are in + * progress have finished as indicated by the set of ALLCOMPLETE flags + * all being set. The two MKDIR flags indicate additional dependencies + * that must be done when creating a new directory. MKDIR_BODY is + * cleared when the directory data block containing the "." and ".." + * entries has been written. MKDIR_PARENT is cleared when the parent + * inode with the increased link count for ".." has been written. When + * both MKDIR flags have been cleared, the DEPCOMPLETE flag is set to + * indicate that the directory dependencies have been completed. The + * writing of the directory inode itself sets the COMPLETE flag which + * then allows the directory entry for the new directory to be written + * to disk. The RMDIR flag marks a dirrem structure as representing + * the removal of a directory rather than a file. When the removal + * dependencies are completed, additional work needs to be done + * (truncation of the "." and ".." entries, an additional decrement + * of the associated inode, and a decrement of the parent inode). The + * DIRCHG flag marks a diradd structure as representing the changing + * of an existing entry rather than the addition of a new one. When + * the update is complete the dirrem associated with the inode for + * the old name must be added to the worklist to do the necessary + * reference count decrement. The GOINGAWAY flag indicates that the + * data structure is frozen from further change until its dependencies + * have been completed and its resources freed after which it will be + * discarded. The IOSTARTED flag prevents multiple calls to the I/O + * start routine from doing multiple rollbacks. The SPACECOUNTED flag + * says that the files space has been accounted to the pending free + * space count. The NEWBLOCK flag marks pagedep structures that have + * just been allocated, so must be claimed by the inode before all + * dependencies are complete. The INPROGRESS flag marks worklist + * structures that are still on the worklist, but are being considered + * for action by some process. The UFS1FMT flag indicates that the + * inode being processed is a ufs1 format. The EXTDATA flag indicates + * that the allocdirect describes an extended-attributes dependency. + * The ONWORKLIST flag shows whether the structure is currently linked + * onto a worklist. + */ +#define ATTACHED 0x0001 +#define UNDONE 0x0002 +#define COMPLETE 0x0004 +#define DEPCOMPLETE 0x0008 +#define MKDIR_PARENT 0x0010 /* diradd & mkdir only */ +#define MKDIR_BODY 0x0020 /* diradd & mkdir only */ +#define RMDIR 0x0040 /* dirrem only */ +#define DIRCHG 0x0080 /* diradd & dirrem only */ +#define GOINGAWAY 0x0100 /* indirdep only */ +#define IOSTARTED 0x0200 /* inodedep & pagedep only */ +#define SPACECOUNTED 0x0400 /* inodedep only */ +#define NEWBLOCK 0x0800 /* pagedep only */ +#define INPROGRESS 0x1000 /* dirrem, freeblks, freefrag, freefile only */ +#define UFS1FMT 0x2000 /* indirdep only */ +#define EXTDATA 0x4000 /* allocdirect only */ +#define ONWORKLIST 0x8000 + +#define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE) + +/* + * The workitem queue. + * + * It is sometimes useful and/or necessary to clean up certain dependencies + * in the background rather than during execution of an application process + * or interrupt service routine. To realize this, we append dependency + * structures corresponding to such tasks to a "workitem" queue. In a soft + * updates implementation, most pending workitems should not wait for more + * than a couple of seconds, so the filesystem syncer process awakens once + * per second to process the items on the queue. + */ + +/* LIST_HEAD(workhead, worklist); -- declared in buf.h */ + +/* + * Each request can be linked onto a work queue through its worklist structure. + * To avoid the need for a pointer to the structure itself, this structure + * MUST be declared FIRST in each type in which it appears! If more than one + * worklist is needed in the structure, then a wk_data field must be added + * and the macros below changed to use it. + */ +struct worklist { + struct mount *wk_mp; /* Mount we live in */ + LIST_ENTRY(worklist) wk_list; /* list of work requests */ + unsigned short wk_type; /* type of request */ + unsigned short wk_state; /* state flags */ +}; +#define WK_DATA(wk) ((void *)(wk)) +#define WK_PAGEDEP(wk) ((struct pagedep *)(wk)) +#define WK_INODEDEP(wk) ((struct inodedep *)(wk)) +#define WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk)) +#define WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk)) +#define WK_INDIRDEP(wk) ((struct indirdep *)(wk)) +#define WK_ALLOCINDIR(wk) ((struct allocindir *)(wk)) +#define WK_FREEFRAG(wk) ((struct freefrag *)(wk)) +#define WK_FREEBLKS(wk) ((struct freeblks *)(wk)) +#define WK_FREEFILE(wk) ((struct freefile *)(wk)) +#define WK_DIRADD(wk) ((struct diradd *)(wk)) +#define WK_MKDIR(wk) ((struct mkdir *)(wk)) +#define WK_DIRREM(wk) ((struct dirrem *)(wk)) +#define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk)) + +/* + * Various types of lists + */ +LIST_HEAD(dirremhd, dirrem); +LIST_HEAD(diraddhd, diradd); +LIST_HEAD(newblkhd, newblk); +LIST_HEAD(inodedephd, inodedep); +LIST_HEAD(allocindirhd, allocindir); +LIST_HEAD(allocdirecthd, allocdirect); +TAILQ_HEAD(allocdirectlst, allocdirect); + +/* + * The "pagedep" structure tracks the various dependencies related to + * a particular directory page. If a directory page has any dependencies, + * it will have a pagedep linked to its associated buffer. The + * pd_dirremhd list holds the list of dirrem requests which decrement + * inode reference counts. These requests are processed after the + * directory page with the corresponding zero'ed entries has been + * written. The pd_diraddhd list maintains the list of diradd requests + * which cannot be committed until their corresponding inode has been + * written to disk. Because a directory may have many new entries + * being created, several lists are maintained hashed on bits of the + * offset of the entry into the directory page to keep the lists from + * getting too long. Once a new directory entry has been cleared to + * be written, it is moved to the pd_pendinghd list. After the new + * entry has been written to disk it is removed from the pd_pendinghd + * list, any removed operations are done, and the dependency structure + * is freed. + */ +#define DAHASHSZ 5 +#define DIRADDHASH(offset) (((offset) >> 2) % DAHASHSZ) +struct pagedep { + struct worklist pd_list; /* page buffer */ +# define pd_state pd_list.wk_state /* check for multiple I/O starts */ + LIST_ENTRY(pagedep) pd_hash; /* hashed lookup */ + ino_t pd_ino; /* associated file */ + ufs_lbn_t pd_lbn; /* block within file */ + struct dirremhd pd_dirremhd; /* dirrem's waiting for page */ + struct diraddhd pd_diraddhd[DAHASHSZ]; /* diradd dir entry updates */ + struct diraddhd pd_pendinghd; /* directory entries awaiting write */ +}; + +/* + * The "inodedep" structure tracks the set of dependencies associated + * with an inode. One task that it must manage is delayed operations + * (i.e., work requests that must be held until the inodedep's associated + * inode has been written to disk). Getting an inode from its incore + * state to the disk requires two steps to be taken by the filesystem + * in this order: first the inode must be copied to its disk buffer by + * the VOP_UPDATE operation; second the inode's buffer must be written + * to disk. To ensure that both operations have happened in the required + * order, the inodedep maintains two lists. Delayed operations are + * placed on the id_inowait list. When the VOP_UPDATE is done, all + * operations on the id_inowait list are moved to the id_bufwait list. + * When the buffer is written, the items on the id_bufwait list can be + * safely moved to the work queue to be processed. A second task of the + * inodedep structure is to track the status of block allocation within + * the inode. Each block that is allocated is represented by an + * "allocdirect" structure (see below). It is linked onto the id_newinoupdt + * list until both its contents and its allocation in the cylinder + * group map have been written to disk. Once these dependencies have been + * satisfied, it is removed from the id_newinoupdt list and any followup + * actions such as releasing the previous block or fragment are placed + * on the id_inowait list. When an inode is updated (a VOP_UPDATE is + * done), the "inodedep" structure is linked onto the buffer through + * its worklist. Thus, it will be notified when the buffer is about + * to be written and when it is done. At the update time, all the + * elements on the id_newinoupdt list are moved to the id_inoupdt list + * since those changes are now relevant to the copy of the inode in the + * buffer. Also at update time, the tasks on the id_inowait list are + * moved to the id_bufwait list so that they will be executed when + * the updated inode has been written to disk. When the buffer containing + * the inode is written to disk, any updates listed on the id_inoupdt + * list are rolled back as they are not yet safe. Following the write, + * the changes are once again rolled forward and any actions on the + * id_bufwait list are processed (since those actions are now safe). + * The entries on the id_inoupdt and id_newinoupdt lists must be kept + * sorted by logical block number to speed the calculation of the size + * of the rolled back inode (see explanation in initiate_write_inodeblock). + * When a directory entry is created, it is represented by a diradd. + * The diradd is added to the id_inowait list as it cannot be safely + * written to disk until the inode that it represents is on disk. After + * the inode is written, the id_bufwait list is processed and the diradd + * entries are moved to the id_pendinghd list where they remain until + * the directory block containing the name has been written to disk. + * The purpose of keeping the entries on the id_pendinghd list is so that + * the softdep_fsync function can find and push the inode's directory + * name(s) as part of the fsync operation for that file. + */ +struct inodedep { + struct worklist id_list; /* buffer holding inode block */ +# define id_state id_list.wk_state /* inode dependency state */ + LIST_ENTRY(inodedep) id_hash; /* hashed lookup */ + struct fs *id_fs; /* associated filesystem */ + ino_t id_ino; /* dependent inode */ + nlink_t id_nlinkdelta; /* saved effective link count */ + LIST_ENTRY(inodedep) id_deps; /* bmsafemap's list of inodedep's */ + struct buf *id_buf; /* related bmsafemap (if pending) */ + long id_savedextsize; /* ext size saved during rollback */ + off_t id_savedsize; /* file size saved during rollback */ + struct workhead id_pendinghd; /* entries awaiting directory write */ + struct workhead id_bufwait; /* operations after inode written */ + struct workhead id_inowait; /* operations waiting inode update */ + struct allocdirectlst id_inoupdt; /* updates before inode written */ + struct allocdirectlst id_newinoupdt; /* updates when inode written */ + struct allocdirectlst id_extupdt; /* extdata updates pre-inode write */ + struct allocdirectlst id_newextupdt; /* extdata updates at ino write */ + union { + struct ufs1_dinode *idu_savedino1; /* saved ufs1_dinode contents */ + struct ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */ + } id_un; +}; +#define id_savedino1 id_un.idu_savedino1 +#define id_savedino2 id_un.idu_savedino2 + +/* + * A "newblk" structure is attached to a bmsafemap structure when a block + * or fragment is allocated from a cylinder group. Its state is set to + * DEPCOMPLETE when its cylinder group map is written. It is consumed by + * an associated allocdirect or allocindir allocation which will attach + * themselves to the bmsafemap structure if the newblk's DEPCOMPLETE flag + * is not set (i.e., its cylinder group map has not been written). + */ +struct newblk { + LIST_ENTRY(newblk) nb_hash; /* hashed lookup */ + struct fs *nb_fs; /* associated filesystem */ + int nb_state; /* state of bitmap dependency */ + ufs2_daddr_t nb_newblkno; /* allocated block number */ + LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblk's */ + struct bmsafemap *nb_bmsafemap; /* associated bmsafemap */ +}; + +/* + * A "bmsafemap" structure maintains a list of dependency structures + * that depend on the update of a particular cylinder group map. + * It has lists for newblks, allocdirects, allocindirs, and inodedeps. + * It is attached to the buffer of a cylinder group block when any of + * these things are allocated from the cylinder group. It is freed + * after the cylinder group map is written and the state of its + * dependencies are updated with DEPCOMPLETE to indicate that it has + * been processed. + */ +struct bmsafemap { + struct worklist sm_list; /* cylgrp buffer */ + struct buf *sm_buf; /* associated buffer */ + struct allocdirecthd sm_allocdirecthd; /* allocdirect deps */ + struct allocindirhd sm_allocindirhd; /* allocindir deps */ + struct inodedephd sm_inodedephd; /* inodedep deps */ + struct newblkhd sm_newblkhd; /* newblk deps */ +}; + +/* + * An "allocdirect" structure is attached to an "inodedep" when a new block + * or fragment is allocated and pointed to by the inode described by + * "inodedep". The worklist is linked to the buffer that holds the block. + * When the block is first allocated, it is linked to the bmsafemap + * structure associated with the buffer holding the cylinder group map + * from which it was allocated. When the cylinder group map is written + * to disk, ad_state has the DEPCOMPLETE flag set. When the block itself + * is written, the COMPLETE flag is set. Once both the cylinder group map + * and the data itself have been written, it is safe to write the inode + * that claims the block. If there was a previous fragment that had been + * allocated before the file was increased in size, the old fragment may + * be freed once the inode claiming the new block is written to disk. + * This ad_fragfree request is attached to the id_inowait list of the + * associated inodedep (pointed to by ad_inodedep) for processing after + * the inode is written. When a block is allocated to a directory, an + * fsync of a file whose name is within that block must ensure not only + * that the block containing the file name has been written, but also + * that the on-disk inode references that block. When a new directory + * block is created, we allocate a newdirblk structure which is linked + * to the associated allocdirect (on its ad_newdirblk list). When the + * allocdirect has been satisfied, the newdirblk structure is moved to + * the inodedep id_bufwait list of its directory to await the inode + * being written. When the inode is written, the directory entries are + * fully committed and can be deleted from their pagedep->id_pendinghd + * and inodedep->id_pendinghd lists. + */ +struct allocdirect { + struct worklist ad_list; /* buffer holding block */ +# define ad_state ad_list.wk_state /* block pointer state */ + TAILQ_ENTRY(allocdirect) ad_next; /* inodedep's list of allocdirect's */ + ufs_lbn_t ad_lbn; /* block within file */ + ufs2_daddr_t ad_newblkno; /* new value of block pointer */ + ufs2_daddr_t ad_oldblkno; /* old value of block pointer */ + long ad_newsize; /* size of new block */ + long ad_oldsize; /* size of old block */ + LIST_ENTRY(allocdirect) ad_deps; /* bmsafemap's list of allocdirect's */ + struct buf *ad_buf; /* cylgrp buffer (if pending) */ + struct inodedep *ad_inodedep; /* associated inodedep */ + struct freefrag *ad_freefrag; /* fragment to be freed (if any) */ + struct workhead ad_newdirblk; /* dir block to notify when written */ +}; + +/* + * A single "indirdep" structure manages all allocation dependencies for + * pointers in an indirect block. The up-to-date state of the indirect + * block is stored in ir_savedata. The set of pointers that may be safely + * written to the disk is stored in ir_safecopy. The state field is used + * only to track whether the buffer is currently being written (in which + * case it is not safe to update ir_safecopy). Ir_deplisthd contains the + * list of allocindir structures, one for each block that needs to be + * written to disk. Once the block and its bitmap allocation have been + * written the safecopy can be updated to reflect the allocation and the + * allocindir structure freed. If ir_state indicates that an I/O on the + * indirect block is in progress when ir_safecopy is to be updated, the + * update is deferred by placing the allocindir on the ir_donehd list. + * When the I/O on the indirect block completes, the entries on the + * ir_donehd list are processed by updating their corresponding ir_safecopy + * pointers and then freeing the allocindir structure. + */ +struct indirdep { + struct worklist ir_list; /* buffer holding indirect block */ +# define ir_state ir_list.wk_state /* indirect block pointer state */ + caddr_t ir_saveddata; /* buffer cache contents */ + struct buf *ir_savebp; /* buffer holding safe copy */ + struct allocindirhd ir_donehd; /* done waiting to update safecopy */ + struct allocindirhd ir_deplisthd; /* allocindir deps for this block */ +}; + +/* + * An "allocindir" structure is attached to an "indirdep" when a new block + * is allocated and pointed to by the indirect block described by the + * "indirdep". The worklist is linked to the buffer that holds the new block. + * When the block is first allocated, it is linked to the bmsafemap + * structure associated with the buffer holding the cylinder group map + * from which it was allocated. When the cylinder group map is written + * to disk, ai_state has the DEPCOMPLETE flag set. When the block itself + * is written, the COMPLETE flag is set. Once both the cylinder group map + * and the data itself have been written, it is safe to write the entry in + * the indirect block that claims the block; the "allocindir" dependency + * can then be freed as it is no longer applicable. + */ +struct allocindir { + struct worklist ai_list; /* buffer holding indirect block */ +# define ai_state ai_list.wk_state /* indirect block pointer state */ + LIST_ENTRY(allocindir) ai_next; /* indirdep's list of allocindir's */ + int ai_offset; /* pointer offset in indirect block */ + ufs2_daddr_t ai_newblkno; /* new block pointer value */ + ufs2_daddr_t ai_oldblkno; /* old block pointer value */ + struct freefrag *ai_freefrag; /* block to be freed when complete */ + struct indirdep *ai_indirdep; /* address of associated indirdep */ + LIST_ENTRY(allocindir) ai_deps; /* bmsafemap's list of allocindir's */ + struct buf *ai_buf; /* cylgrp buffer (if pending) */ +}; + +/* + * A "freefrag" structure is attached to an "inodedep" when a previously + * allocated fragment is replaced with a larger fragment, rather than extended. + * The "freefrag" structure is constructed and attached when the replacement + * block is first allocated. It is processed after the inode claiming the + * bigger block that replaces it has been written to disk. Note that the + * ff_state field is is used to store the uid, so may lose data. However, + * the uid is used only in printing an error message, so is not critical. + * Keeping it in a short keeps the data structure down to 32 bytes. + */ +struct freefrag { + struct worklist ff_list; /* id_inowait or delayed worklist */ +# define ff_state ff_list.wk_state /* owning user; should be uid_t */ + ufs2_daddr_t ff_blkno; /* fragment physical block number */ + long ff_fragsize; /* size of fragment being deleted */ + ino_t ff_inum; /* owning inode number */ +}; + +/* + * A "freeblks" structure is attached to an "inodedep" when the + * corresponding file's length is reduced to zero. It records all + * the information needed to free the blocks of a file after its + * zero'ed inode has been written to disk. + */ +struct freeblks { + struct worklist fb_list; /* id_inowait or delayed worklist */ +# define fb_state fb_list.wk_state /* inode and dirty block state */ + ino_t fb_previousinum; /* inode of previous owner of blocks */ + uid_t fb_uid; /* uid of previous owner of blocks */ + struct vnode *fb_devvp; /* filesystem device vnode */ + long fb_oldextsize; /* previous ext data size */ + off_t fb_oldsize; /* previous file size */ + ufs2_daddr_t fb_chkcnt; /* used to check cnt of blks released */ + ufs2_daddr_t fb_dblks[NDADDR]; /* direct blk ptrs to deallocate */ + ufs2_daddr_t fb_iblks[NIADDR]; /* indirect blk ptrs to deallocate */ + ufs2_daddr_t fb_eblks[NXADDR]; /* indirect blk ptrs to deallocate */ +}; + +/* + * A "freefile" structure is attached to an inode when its + * link count is reduced to zero. It marks the inode as free in + * the cylinder group map after the zero'ed inode has been written + * to disk and any associated blocks and fragments have been freed. + */ +struct freefile { + struct worklist fx_list; /* id_inowait or delayed worklist */ + mode_t fx_mode; /* mode of inode */ + ino_t fx_oldinum; /* inum of the unlinked file */ + struct vnode *fx_devvp; /* filesystem device vnode */ +}; + +/* + * A "diradd" structure is linked to an "inodedep" id_inowait list when a + * new directory entry is allocated that references the inode described + * by "inodedep". When the inode itself is written (either the initial + * allocation for new inodes or with the increased link count for + * existing inodes), the COMPLETE flag is set in da_state. If the entry + * is for a newly allocated inode, the "inodedep" structure is associated + * with a bmsafemap which prevents the inode from being written to disk + * until the cylinder group has been updated. Thus the da_state COMPLETE + * flag cannot be set until the inode bitmap dependency has been removed. + * When creating a new file, it is safe to write the directory entry that + * claims the inode once the referenced inode has been written. Since + * writing the inode clears the bitmap dependencies, the DEPCOMPLETE flag + * in the diradd can be set unconditionally when creating a file. When + * creating a directory, there are two additional dependencies described by + * mkdir structures (see their description below). When these dependencies + * are resolved the DEPCOMPLETE flag is set in the diradd structure. + * If there are multiple links created to the same inode, there will be + * a separate diradd structure created for each link. The diradd is + * linked onto the pg_diraddhd list of the pagedep for the directory + * page that contains the entry. When a directory page is written, + * the pg_diraddhd list is traversed to rollback any entries that are + * not yet ready to be written to disk. If a directory entry is being + * changed (by rename) rather than added, the DIRCHG flag is set and + * the da_previous entry points to the entry that will be "removed" + * once the new entry has been committed. During rollback, entries + * with da_previous are replaced with the previous inode number rather + * than zero. + * + * The overlaying of da_pagedep and da_previous is done to keep the + * structure down to 32 bytes in size on a 32-bit machine. If a + * da_previous entry is present, the pointer to its pagedep is available + * in the associated dirrem entry. If the DIRCHG flag is set, the + * da_previous entry is valid; if not set the da_pagedep entry is valid. + * The DIRCHG flag never changes; it is set when the structure is created + * if appropriate and is never cleared. + */ +struct diradd { + struct worklist da_list; /* id_inowait or id_pendinghd list */ +# define da_state da_list.wk_state /* state of the new directory entry */ + LIST_ENTRY(diradd) da_pdlist; /* pagedep holding directory block */ + doff_t da_offset; /* offset of new dir entry in dir blk */ + ino_t da_newinum; /* inode number for the new dir entry */ + union { + struct dirrem *dau_previous; /* entry being replaced in dir change */ + struct pagedep *dau_pagedep; /* pagedep dependency for addition */ + } da_un; +}; +#define da_previous da_un.dau_previous +#define da_pagedep da_un.dau_pagedep + +/* + * Two "mkdir" structures are needed to track the additional dependencies + * associated with creating a new directory entry. Normally a directory + * addition can be committed as soon as the newly referenced inode has been + * written to disk with its increased link count. When a directory is + * created there are two additional dependencies: writing the directory + * data block containing the "." and ".." entries (MKDIR_BODY) and writing + * the parent inode with the increased link count for ".." (MKDIR_PARENT). + * These additional dependencies are tracked by two mkdir structures that + * reference the associated "diradd" structure. When they have completed, + * they set the DEPCOMPLETE flag on the diradd so that it knows that its + * extra dependencies have been completed. The md_state field is used only + * to identify which type of dependency the mkdir structure is tracking. + * It is not used in the mainline code for any purpose other than consistency + * checking. All the mkdir structures in the system are linked together on + * a list. This list is needed so that a diradd can find its associated + * mkdir structures and deallocate them if it is prematurely freed (as for + * example if a mkdir is immediately followed by a rmdir of the same directory). + * Here, the free of the diradd must traverse the list to find the associated + * mkdir structures that reference it. The deletion would be faster if the + * diradd structure were simply augmented to have two pointers that referenced + * the associated mkdir's. However, this would increase the size of the diradd + * structure from 32 to 64-bits to speed a very infrequent operation. + */ +struct mkdir { + struct worklist md_list; /* id_inowait or buffer holding dir */ +# define md_state md_list.wk_state /* type: MKDIR_PARENT or MKDIR_BODY */ + struct diradd *md_diradd; /* associated diradd */ + struct buf *md_buf; /* MKDIR_BODY: buffer holding dir */ + LIST_ENTRY(mkdir) md_mkdirs; /* list of all mkdirs */ +}; +LIST_HEAD(mkdirlist, mkdir) mkdirlisthd; + +/* + * A "dirrem" structure describes an operation to decrement the link + * count on an inode. The dirrem structure is attached to the pg_dirremhd + * list of the pagedep for the directory page that contains the entry. + * It is processed after the directory page with the deleted entry has + * been written to disk. + * + * The overlaying of dm_pagedep and dm_dirinum is done to keep the + * structure down to 32 bytes in size on a 32-bit machine. It works + * because they are never used concurrently. + */ +struct dirrem { + struct worklist dm_list; /* delayed worklist */ +# define dm_state dm_list.wk_state /* state of the old directory entry */ + LIST_ENTRY(dirrem) dm_next; /* pagedep's list of dirrem's */ + ino_t dm_oldinum; /* inum of the removed dir entry */ + union { + struct pagedep *dmu_pagedep; /* pagedep dependency for remove */ + ino_t dmu_dirinum; /* parent inode number (for rmdir) */ + } dm_un; +}; +#define dm_pagedep dm_un.dmu_pagedep +#define dm_dirinum dm_un.dmu_dirinum + +/* + * A "newdirblk" structure tracks the progress of a newly allocated + * directory block from its creation until it is claimed by its on-disk + * inode. When a block is allocated to a directory, an fsync of a file + * whose name is within that block must ensure not only that the block + * containing the file name has been written, but also that the on-disk + * inode references that block. When a new directory block is created, + * we allocate a newdirblk structure which is linked to the associated + * allocdirect (on its ad_newdirblk list). When the allocdirect has been + * satisfied, the newdirblk structure is moved to the inodedep id_bufwait + * list of its directory to await the inode being written. When the inode + * is written, the directory entries are fully committed and can be + * deleted from their pagedep->id_pendinghd and inodedep->id_pendinghd + * lists. Note that we could track directory blocks allocated to indirect + * blocks using a similar scheme with the allocindir structures. Rather + * than adding this level of complexity, we simply write those newly + * allocated indirect blocks synchronously as such allocations are rare. + */ +struct newdirblk { + struct worklist db_list; /* id_inowait or pg_newdirblk */ +# define db_state db_list.wk_state /* unused */ + struct pagedep *db_pagedep; /* associated pagedep */ +}; diff --git a/src/include.new/ufs/ufs/acl.h b/src/include.new/ufs/ufs/acl.h new file mode 100644 index 0000000..6c3cc99 --- /dev/null +++ b/src/include.new/ufs/ufs/acl.h @@ -0,0 +1,49 @@ +/*- + * Copyright (c) 1999-2001 Robert N. M. Watson + * All rights reserved. + * + * This software was developed by Robert Watson for the TrustedBSD Project. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/ufs/ufs/acl.h,v 1.5 2003/08/04 03:29:13 rwatson Exp $ + */ +/* + * Developed by the TrustedBSD Project. + * Support for POSIX.1e access control lists. + */ + +#ifndef _UFS_UFS_ACL_H_ +#define _UFS_UFS_ACL_H_ + +#ifdef _KERNEL + +void ufs_sync_acl_from_inode(struct inode *ip, struct acl *acl); +void ufs_sync_inode_from_acl(struct acl *acl, struct inode *ip); + +int ufs_getacl(struct vop_getacl_args *); +int ufs_setacl(struct vop_setacl_args *); +int ufs_aclcheck(struct vop_aclcheck_args *); + +#endif /* !_KERNEL */ + +#endif /* !_UFS_UFS_ACL_H_ */ diff --git a/src/include.new/ufs/ufs/dinode.h b/src/include.new/ufs/ufs/dinode.h new file mode 100644 index 0000000..f9a9046 --- /dev/null +++ b/src/include.new/ufs/ufs/dinode.h @@ -0,0 +1,191 @@ +/*- + * Copyright (c) 2002 Networks Associates Technology, Inc. + * All rights reserved. + * + * This software was developed for the FreeBSD Project by Marshall + * Kirk McKusick and Network Associates Laboratories, the Security + * Research Division of Network Associates, Inc. under DARPA/SPAWAR + * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS + * research program + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (c) 1982, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)dinode.h 8.3 (Berkeley) 1/21/94 + * $FreeBSD: src/sys/ufs/ufs/dinode.h,v 1.15 2005/01/07 02:29:26 imp Exp $ + */ + +#ifndef _UFS_UFS_DINODE_H_ +#define _UFS_UFS_DINODE_H_ + +/* + * The root inode is the root of the filesystem. Inode 0 can't be used for + * normal purposes and historically bad blocks were linked to inode 1, thus + * the root inode is 2. (Inode 1 is no longer used for this purpose, however + * numerous dump tapes make this assumption, so we are stuck with it). + */ +#define ROOTINO ((ino_t)2) + +/* + * The Whiteout inode# is a dummy non-zero inode number which will + * never be allocated to a real file. It is used as a place holder + * in the directory entry which has been tagged as a DT_W entry. + * See the comments about ROOTINO above. + */ +#define WINO ((ino_t)1) + +/* + * The size of physical and logical block numbers and time fields in UFS. + */ +typedef int32_t ufs1_daddr_t; +typedef int64_t ufs2_daddr_t; +typedef int64_t ufs_lbn_t; +typedef int64_t ufs_time_t; + +/* File permissions. */ +#define IEXEC 0000100 /* Executable. */ +#define IWRITE 0000200 /* Writeable. */ +#define IREAD 0000400 /* Readable. */ +#define ISVTX 0001000 /* Sticky bit. */ +#define ISGID 0002000 /* Set-gid. */ +#define ISUID 0004000 /* Set-uid. */ + +/* File types. */ +#define IFMT 0170000 /* Mask of file type. */ +#define IFIFO 0010000 /* Named pipe (fifo). */ +#define IFCHR 0020000 /* Character device. */ +#define IFDIR 0040000 /* Directory file. */ +#define IFBLK 0060000 /* Block device. */ +#define IFREG 0100000 /* Regular file. */ +#define IFLNK 0120000 /* Symbolic link. */ +#define IFSOCK 0140000 /* UNIX domain socket. */ +#define IFWHT 0160000 /* Whiteout. */ + +/* + * A dinode contains all the meta-data associated with a UFS2 file. + * This structure defines the on-disk format of a dinode. Since + * this structure describes an on-disk structure, all its fields + * are defined by types with precise widths. + */ + +#define NXADDR 2 /* External addresses in inode. */ +#define NDADDR 12 /* Direct addresses in inode. */ +#define NIADDR 3 /* Indirect addresses in inode. */ + +struct ufs2_dinode { + u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ + int16_t di_nlink; /* 2: File link count. */ + u_int32_t di_uid; /* 4: File owner. */ + u_int32_t di_gid; /* 8: File group. */ + u_int32_t di_blksize; /* 12: Inode blocksize. */ + u_int64_t di_size; /* 16: File byte count. */ + u_int64_t di_blocks; /* 24: Bytes actually held. */ + ufs_time_t di_atime; /* 32: Last access time. */ + ufs_time_t di_mtime; /* 40: Last modified time. */ + ufs_time_t di_ctime; /* 48: Last inode change time. */ + ufs_time_t di_birthtime; /* 56: Inode creation time. */ + int32_t di_mtimensec; /* 64: Last modified time. */ + int32_t di_atimensec; /* 68: Last access time. */ + int32_t di_ctimensec; /* 72: Last inode change time. */ + int32_t di_birthnsec; /* 76: Inode creation time. */ + int32_t di_gen; /* 80: Generation number. */ + u_int32_t di_kernflags; /* 84: Kernel flags. */ + u_int32_t di_flags; /* 88: Status flags (chflags). */ + int32_t di_extsize; /* 92: External attributes block. */ + ufs2_daddr_t di_extb[NXADDR];/* 96: External attributes block. */ + ufs2_daddr_t di_db[NDADDR]; /* 112: Direct disk blocks. */ + ufs2_daddr_t di_ib[NIADDR]; /* 208: Indirect disk blocks. */ + int64_t di_spare[3]; /* 232: Reserved; currently unused */ +}; + +/* + * The di_db fields may be overlaid with other information for + * file types that do not have associated disk storage. Block + * and character devices overlay the first data block with their + * dev_t value. Short symbolic links place their path in the + * di_db area. + */ +#define di_rdev di_db[0] + +/* + * A UFS1 dinode contains all the meta-data associated with a UFS1 file. + * This structure defines the on-disk format of a UFS1 dinode. Since + * this structure describes an on-disk structure, all its fields + * are defined by types with precise widths. + */ +struct ufs1_dinode { + u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ + int16_t di_nlink; /* 2: File link count. */ + union { + u_int16_t oldids[2]; /* 4: Ffs: old user and group ids. */ + } di_u; + u_int64_t di_size; /* 8: File byte count. */ + int32_t di_atime; /* 16: Last access time. */ + int32_t di_atimensec; /* 20: Last access time. */ + int32_t di_mtime; /* 24: Last modified time. */ + int32_t di_mtimensec; /* 28: Last modified time. */ + int32_t di_ctime; /* 32: Last inode change time. */ + int32_t di_ctimensec; /* 36: Last inode change time. */ + ufs1_daddr_t di_db[NDADDR]; /* 40: Direct disk blocks. */ + ufs1_daddr_t di_ib[NIADDR]; /* 88: Indirect disk blocks. */ + u_int32_t di_flags; /* 100: Status flags (chflags). */ + int32_t di_blocks; /* 104: Blocks actually held. */ + int32_t di_gen; /* 108: Generation number. */ + u_int32_t di_uid; /* 112: File owner. */ + u_int32_t di_gid; /* 116: File group. */ + int32_t di_spare[2]; /* 120: Reserved; currently unused */ +}; +#define di_ogid di_u.oldids[1] +#define di_ouid di_u.oldids[0] + +#endif /* _UFS_UFS_DINODE_H_ */ diff --git a/src/include.new/ufs/ufs/dir.h b/src/include.new/ufs/ufs/dir.h new file mode 100644 index 0000000..61ca819 --- /dev/null +++ b/src/include.new/ufs/ufs/dir.h @@ -0,0 +1,155 @@ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)dir.h 8.2 (Berkeley) 1/21/94 + * $FreeBSD: src/sys/ufs/ufs/dir.h,v 1.11 2005/01/07 02:29:26 imp Exp $ + */ + +#ifndef _UFS_UFS_DIR_H_ +#define _UFS_UFS_DIR_H_ + +/* + * Theoretically, directories can be more than 2Gb in length, however, in + * practice this seems unlikely. So, we define the type doff_t as a 32-bit + * quantity to keep down the cost of doing lookup on a 32-bit machine. + */ +#define doff_t int32_t +#define MAXDIRSIZE (0x7fffffff) + +/* + * A directory consists of some number of blocks of DIRBLKSIZ + * bytes, where DIRBLKSIZ is chosen such that it can be transferred + * to disk in a single atomic operation (e.g. 512 bytes on most machines). + * + * Each DIRBLKSIZ byte block contains some number of directory entry + * structures, which are of variable length. Each directory entry has + * a struct direct at the front of it, containing its inode number, + * the length of the entry, and the length of the name contained in + * the entry. These are followed by the name padded to a 4 byte boundary + * with null bytes. All names are guaranteed null terminated. + * The maximum length of a name in a directory is MAXNAMLEN. + * + * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent + * a directory entry. Free space in a directory is represented by + * entries which have dp->d_reclen > DIRSIZ(fmt, dp). All DIRBLKSIZ bytes + * in a directory block are claimed by the directory entries. This + * usually results in the last entry in a directory having a large + * dp->d_reclen. When entries are deleted from a directory, the + * space is returned to the previous entry in the same directory + * block by increasing its dp->d_reclen. If the first entry of + * a directory block is free, then its dp->d_ino is set to 0. + * Entries other than the first in a directory do not normally have + * dp->d_ino set to 0. + */ +#define DIRBLKSIZ DEV_BSIZE +#define MAXNAMLEN 255 + +struct direct { + u_int32_t d_ino; /* inode number of entry */ + u_int16_t d_reclen; /* length of this record */ + u_int8_t d_type; /* file type, see below */ + u_int8_t d_namlen; /* length of string in d_name */ + char d_name[MAXNAMLEN + 1];/* name with length <= MAXNAMLEN */ +}; + +/* + * File types + */ +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 +#define DT_WHT 14 + +/* + * Convert between stat structure types and directory types. + */ +#define IFTODT(mode) (((mode) & 0170000) >> 12) +#define DTTOIF(dirtype) ((dirtype) << 12) + +/* + * The DIRSIZ macro gives the minimum record length which will hold + * the directory entry. This requires the amount of space in struct direct + * without the d_name field, plus enough space for the name with a terminating + * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary. + * + * + */ +#define DIRECTSIZ(namlen) \ + (((int)&((struct direct *)0)->d_name + \ + ((namlen)+1)*sizeof(((struct direct *)0)->d_name[0]) + 3) & ~3) +#if (BYTE_ORDER == LITTLE_ENDIAN) +#define DIRSIZ(oldfmt, dp) \ + ((oldfmt) ? DIRECTSIZ((dp)->d_type) : DIRECTSIZ((dp)->d_namlen)) +#else +#define DIRSIZ(oldfmt, dp) \ + DIRECTSIZ((dp)->d_namlen) +#endif +#define OLDDIRFMT 1 +#define NEWDIRFMT 0 + +/* + * Template for manipulating directories. Should use struct direct's, + * but the name field is MAXNAMLEN - 1, and this just won't do. + */ +struct dirtemplate { + u_int32_t dot_ino; + int16_t dot_reclen; + u_int8_t dot_type; + u_int8_t dot_namlen; + char dot_name[4]; /* must be multiple of 4 */ + u_int32_t dotdot_ino; + int16_t dotdot_reclen; + u_int8_t dotdot_type; + u_int8_t dotdot_namlen; + char dotdot_name[4]; /* ditto */ +}; + +/* + * This is the old format of directories, sanz type element. + */ +struct odirtemplate { + u_int32_t dot_ino; + int16_t dot_reclen; + u_int16_t dot_namlen; + char dot_name[4]; /* must be multiple of 4 */ + u_int32_t dotdot_ino; + int16_t dotdot_reclen; + u_int16_t dotdot_namlen; + char dotdot_name[4]; /* ditto */ +}; +#endif /* !_DIR_H_ */ diff --git a/src/include.new/ufs/ufs/dirhash.h b/src/include.new/ufs/ufs/dirhash.h new file mode 100644 index 0000000..d194671 --- /dev/null +++ b/src/include.new/ufs/ufs/dirhash.h @@ -0,0 +1,127 @@ +/*- + * Copyright (c) 2001 Ian Dowse. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/ufs/ufs/dirhash.h,v 1.5 2005/01/07 02:29:26 imp Exp $ + */ + +#ifndef _UFS_UFS_DIRHASH_H_ +#define _UFS_UFS_DIRHASH_H_ + +/* + * For fast operations on large directories, we maintain a hash + * that maps the file name to the offset of the directory entry within + * the directory file. + * + * The hashing uses a dumb spillover to the next free slot on + * collisions, so we must keep the utilisation low to avoid + * long linear searches. Deleted entries that are not the last + * in a chain must be marked DIRHASH_DEL. + * + * We also maintain information about free space in each block + * to speed up creations. + */ +#define DIRHASH_EMPTY (-1) /* entry unused */ +#define DIRHASH_DEL (-2) /* deleted entry; may be part of chain */ + +#define DIRALIGN 4 +#define DH_NFSTATS (DIRECTSIZ(MAXNAMLEN + 1) / DIRALIGN) + /* max DIRALIGN words in a directory entry */ + +/* + * Dirhash uses a score mechanism to achieve a hybrid between a + * least-recently-used and a least-often-used algorithm for entry + * recycling. The score is incremented when a directory is used, and + * decremented when the directory is a candidate for recycling. When + * the score reaches zero, the hash is recycled. Hashes are linked + * together on a TAILQ list, and hashes with higher scores filter + * towards the tail (most recently used) end of the list. + * + * New hash entries are given an inital score of DH_SCOREINIT and are + * placed at the most-recently-used end of the list. This helps a lot + * in the worst-case case scenario where every directory access is + * to a directory that is not hashed (i.e. the working set of hash + * candidates is much larger than the configured memry limit). In this + * case it limits the number of hash builds to 1/DH_SCOREINIT of the + * number of accesses. + */ +#define DH_SCOREINIT 8 /* initial dh_score when dirhash built */ +#define DH_SCOREMAX 64 /* max dh_score value */ + +/* + * The main hash table has 2 levels. It is an array of pointers to + * blocks of DH_NBLKOFF offsets. + */ +#define DH_BLKOFFSHIFT 8 +#define DH_NBLKOFF (1 << DH_BLKOFFSHIFT) +#define DH_BLKOFFMASK (DH_NBLKOFF - 1) + +#define DH_ENTRY(dh, slot) \ + ((dh)->dh_hash[(slot) >> DH_BLKOFFSHIFT][(slot) & DH_BLKOFFMASK]) + +struct dirhash { + struct mtx dh_mtx; /* protects all fields except dh_list */ + + doff_t **dh_hash; /* the hash array (2-level) */ + int dh_narrays; /* number of entries in dh_hash */ + int dh_hlen; /* total slots in the 2-level hash array */ + int dh_hused; /* entries in use */ + + /* Free space statistics. XXX assumes DIRBLKSIZ is 512. */ + u_int8_t *dh_blkfree; /* free DIRALIGN words in each dir block */ + int dh_nblk; /* size of dh_blkfree array */ + int dh_dirblks; /* number of DIRBLKSIZ blocks in dir */ + int dh_firstfree[DH_NFSTATS + 1]; /* first blk with N words free */ + + int dh_seqopt; /* sequential access optimisation enabled */ + doff_t dh_seqoff; /* sequential access optimisation offset */ + + int dh_score; /* access count for this dirhash */ + + int dh_onlist; /* true if on the ufsdirhash_list chain */ + + /* Protected by ufsdirhash_mtx. */ + TAILQ_ENTRY(dirhash) dh_list; /* chain of all dirhashes */ +}; + + +/* + * Dirhash functions. + */ +void ufsdirhash_init(void); +void ufsdirhash_uninit(void); +int ufsdirhash_build(struct inode *); +doff_t ufsdirhash_findfree(struct inode *, int, int *); +doff_t ufsdirhash_enduseful(struct inode *); +int ufsdirhash_lookup(struct inode *, char *, int, doff_t *, struct buf **, + doff_t *); +void ufsdirhash_newblk(struct inode *, doff_t); +void ufsdirhash_add(struct inode *, struct direct *, doff_t); +void ufsdirhash_remove(struct inode *, struct direct *, doff_t); +void ufsdirhash_move(struct inode *, struct direct *, doff_t, doff_t); +void ufsdirhash_dirtrunc(struct inode *, doff_t); +void ufsdirhash_free(struct inode *); + +void ufsdirhash_checkblock(struct inode *, char *, doff_t); + +#endif /* !_UFS_UFS_DIRHASH_H_ */ diff --git a/src/include.new/ufs/ufs/extattr.h b/src/include.new/ufs/ufs/extattr.h new file mode 100644 index 0000000..a1e5707 --- /dev/null +++ b/src/include.new/ufs/ufs/extattr.h @@ -0,0 +1,111 @@ +/*- + * Copyright (c) 1999-2001 Robert N. M. Watson + * All rights reserved. + * + * This software was developed by Robert Watson for the TrustedBSD Project. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/ufs/ufs/extattr.h,v 1.20 2005/01/31 08:16:45 imp Exp $ + */ +/* + * Developed by the TrustedBSD Project. + * Support for extended filesystem attributes. + */ + +#ifndef _UFS_UFS_EXTATTR_H_ +#define _UFS_UFS_EXTATTR_H_ + +#define UFS_EXTATTR_MAGIC 0x00b5d5ec +#define UFS_EXTATTR_VERSION 0x00000003 +#define UFS_EXTATTR_FSROOTSUBDIR ".attribute" +#define UFS_EXTATTR_SUBDIR_SYSTEM "system" +#define UFS_EXTATTR_SUBDIR_USER "user" +#define UFS_EXTATTR_MAXEXTATTRNAME 65 /* including null */ + +#define UFS_EXTATTR_ATTR_FLAG_INUSE 0x00000001 /* attr has been set */ +#define UFS_EXTATTR_PERM_KERNEL 0x00000000 +#define UFS_EXTATTR_PERM_ROOT 0x00000001 +#define UFS_EXTATTR_PERM_OWNER 0x00000002 +#define UFS_EXTATTR_PERM_ANYONE 0x00000003 + +#define UFS_EXTATTR_UEPM_INITIALIZED 0x00000001 +#define UFS_EXTATTR_UEPM_STARTED 0x00000002 + +#define UFS_EXTATTR_CMD_START 0x00000001 +#define UFS_EXTATTR_CMD_STOP 0x00000002 +#define UFS_EXTATTR_CMD_ENABLE 0x00000003 +#define UFS_EXTATTR_CMD_DISABLE 0x00000004 + +struct ufs_extattr_fileheader { + u_int uef_magic; /* magic number for sanity checking */ + u_int uef_version; /* version of attribute file */ + u_int uef_size; /* size of attributes, w/o header */ +}; + +struct ufs_extattr_header { + u_int ueh_flags; /* flags for attribute */ + u_int ueh_len; /* local defined length; <= uef_size */ + u_int32_t ueh_i_gen; /* generation number for sanity */ + /* data follows the header */ +}; + +#ifdef _KERNEL + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_EXTATTR); +#endif + +struct vnode; +LIST_HEAD(ufs_extattr_list_head, ufs_extattr_list_entry); +struct ufs_extattr_list_entry { + LIST_ENTRY(ufs_extattr_list_entry) uele_entries; + struct ufs_extattr_fileheader uele_fileheader; + int uele_attrnamespace; + char uele_attrname[UFS_EXTATTR_MAXEXTATTRNAME]; + struct vnode *uele_backing_vnode; +}; + +struct lock; +struct ucred; +struct ufs_extattr_per_mount { + struct lock uepm_lock; + struct ufs_extattr_list_head uepm_list; + struct ucred *uepm_ucred; + int uepm_flags; +}; + +void ufs_extattr_uepm_init(struct ufs_extattr_per_mount *uepm); +void ufs_extattr_uepm_destroy(struct ufs_extattr_per_mount *uepm); +int ufs_extattr_start(struct mount *mp, struct thread *td); +int ufs_extattr_autostart(struct mount *mp, struct thread *td); +int ufs_extattr_stop(struct mount *mp, struct thread *td); +int ufs_extattrctl(struct mount *mp, int cmd, struct vnode *filename, + int attrnamespace, const char *attrname, struct thread *td); +int ufs_getextattr(struct vop_getextattr_args *ap); +int ufs_deleteextattr(struct vop_deleteextattr_args *ap); +int ufs_setextattr(struct vop_setextattr_args *ap); +void ufs_extattr_vnode_inactive(struct vnode *vp, struct thread *td); + +#endif /* !_KERNEL */ + +#endif /* !_UFS_UFS_EXTATTR_H_ */ diff --git a/src/include.new/ufs/ufs/inode.h b/src/include.new/ufs/ufs/inode.h new file mode 100644 index 0000000..7904bc3 --- /dev/null +++ b/src/include.new/ufs/ufs/inode.h @@ -0,0 +1,180 @@ +/*- + * Copyright (c) 1982, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)inode.h 8.9 (Berkeley) 5/14/95 + * $FreeBSD: src/sys/ufs/ufs/inode.h,v 1.49 2005/03/14 10:21:16 phk Exp $ + */ + +#ifndef _UFS_UFS_INODE_H_ +#define _UFS_UFS_INODE_H_ + +#include +#include +#include + +/* + * This must agree with the definition in . + */ +#define doff_t int32_t + +/* + * The inode is used to describe each active (or recently active) file in the + * UFS filesystem. It is composed of two types of information. The first part + * is the information that is needed only while the file is active (such as + * the identity of the file and linkage to speed its lookup). The second part + * is the permanent meta-data associated with the file which is read in + * from the permanent dinode from long term storage when the file becomes + * active, and is put back when the file is no longer being used. + */ +struct inode { + TAILQ_ENTRY(inode) i_nextsnap; /* snapshot file list. */ + struct vnode *i_vnode;/* Vnode associated with this inode. */ + struct ufsmount *i_ump;/* Ufsmount point associated with this inode. */ + u_int32_t i_flag; /* flags, see below */ + struct cdev *i_dev; /* Device associated with the inode. */ + ino_t i_number; /* The identity of the inode. */ + int i_effnlink; /* i_nlink when I/O completes */ + + struct fs *i_fs; /* Associated filesystem superblock. */ + struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ + u_quad_t i_modrev; /* Revision level for NFS lease. */ + struct lockf *i_lockf;/* Head of byte-level lock list. */ + /* + * Side effects; used during directory lookup. + */ + int32_t i_count; /* Size of free slot in directory. */ + doff_t i_endoff; /* End of useful stuff in directory. */ + doff_t i_diroff; /* Offset in dir, where we found last entry. */ + doff_t i_offset; /* Offset of free space in directory. */ + ino_t i_ino; /* Inode number of found directory. */ + u_int32_t i_reclen; /* Size of found directory entry. */ + + union { + struct dirhash *dirhash; /* Hashing for large directories. */ + daddr_t *snapblklist; /* Collect expunged snapshot blocks. */ + } i_un; + + /* + * Data for extended attribute modification. + */ + u_char *i_ea_area; /* Pointer to malloced copy of EA area */ + unsigned i_ea_len; /* Length of i_ea_area */ + int i_ea_error; /* First errno in transaction */ + + /* + * Copies from the on-disk dinode itself. + */ + u_int16_t i_mode; /* IFMT, permissions; see below. */ + int16_t i_nlink; /* File link count. */ + u_int64_t i_size; /* File byte count. */ + u_int32_t i_flags; /* Status flags (chflags). */ + int64_t i_gen; /* Generation number. */ + u_int32_t i_uid; /* File owner. */ + u_int32_t i_gid; /* File group. */ + /* + * The real copy of the on-disk inode. + */ + union { + struct ufs1_dinode *din1; /* UFS1 on-disk dinode. */ + struct ufs2_dinode *din2; /* UFS2 on-disk dinode. */ + } dinode_u; +}; +/* + * These flags are kept in i_flag. + */ +#define IN_ACCESS 0x0001 /* Access time update request. */ +#define IN_CHANGE 0x0002 /* Inode change time update request. */ +#define IN_UPDATE 0x0004 /* Modification time update request. */ +#define IN_MODIFIED 0x0008 /* Inode has been modified. */ +#define IN_RENAME 0x0010 /* Inode is being renamed. */ +#define IN_LAZYMOD 0x0040 /* Modified, but don't write yet. */ +#define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */ + +#define i_devvp i_ump->um_devvp +#define i_umbufobj i_ump->um_bo +#define i_dirhash i_un.dirhash +#define i_snapblklist i_un.snapblklist +#define i_din1 dinode_u.din1 +#define i_din2 dinode_u.din2 + +#ifdef _KERNEL +/* + * The DIP macro is used to access fields in the dinode that are + * not cached in the inode itself. + */ +#define DIP(ip, field) \ + (((ip)->i_ump->um_fstype == UFS1) ? \ + (ip)->i_din1->d##field : (ip)->i_din2->d##field) +#define DIP_SET(ip, field, val) do { \ + if ((ip)->i_ump->um_fstype == UFS1) \ + (ip)->i_din1->d##field = (val); \ + else \ + (ip)->i_din2->d##field = (val); \ + } while (0) + +#define MAXSYMLINKLEN(ip) \ + ((ip)->i_ump->um_fstype == UFS1) ? \ + ((NDADDR + NIADDR) * sizeof(ufs1_daddr_t)) : \ + ((NDADDR + NIADDR) * sizeof(ufs2_daddr_t)) +#define SHORTLINK(ip) \ + (((ip)->i_ump->um_fstype == UFS1) ? \ + (caddr_t)(ip)->i_din1->di_db : (caddr_t)(ip)->i_din2->di_db) + +/* + * Structure used to pass around logical block paths generated by + * ufs_getlbns and used by truncate and bmap code. + */ +struct indir { + ufs2_daddr_t in_lbn; /* Logical block number. */ + int in_off; /* Offset in buffer. */ + int in_exists; /* Flag if the block exists. */ +}; + +/* Convert between inode pointers and vnode pointers. */ +#define VTOI(vp) ((struct inode *)(vp)->v_data) +#define ITOV(ip) ((ip)->i_vnode) + +/* Determine if soft dependencies are being done */ +#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) +#define DOINGASYNC(vp) ((vp)->v_mount->mnt_flag & MNT_ASYNC) + +/* This overlays the fid structure (see mount.h). */ +struct ufid { + u_int16_t ufid_len; /* Length of structure. */ + u_int16_t ufid_pad; /* Force 32-bit alignment. */ + ino_t ufid_ino; /* File number (ino). */ + int32_t ufid_gen; /* Generation number. */ +}; +#endif /* _KERNEL */ + +#endif /* !_UFS_UFS_INODE_H_ */ diff --git a/src/include.new/ufs/ufs/quota.h b/src/include.new/ufs/ufs/quota.h new file mode 100644 index 0000000..b584188 --- /dev/null +++ b/src/include.new/ufs/ufs/quota.h @@ -0,0 +1,200 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Robert Elz at The University of Melbourne. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)quota.h 8.3 (Berkeley) 8/19/94 + * $FreeBSD: src/sys/ufs/ufs/quota.h,v 1.27 2005/01/07 02:29:26 imp Exp $ + */ + +#ifndef _UFS_UFS_QUOTA_H_ +#define _UFS_UFS_QUOTA_H_ + +/* + * Definitions for disk quotas imposed on the average user + * (big brother finally hits UNIX). + * + * The following constants define the amount of time given a user before the + * soft limits are treated as hard limits (usually resulting in an allocation + * failure). The timer is started when the user crosses their soft limit, it + * is reset when they go below their soft limit. + */ +#define MAX_IQ_TIME (7*24*60*60) /* seconds in 1 week */ +#define MAX_DQ_TIME (7*24*60*60) /* seconds in 1 week */ + +/* + * The following constants define the usage of the quota file array in the + * ufsmount structure and dquot array in the inode structure. The semantics + * of the elements of these arrays are defined in the routine getinoquota; + * the remainder of the quota code treats them generically and need not be + * inspected when changing the size of the array. + */ +#define MAXQUOTAS 2 +#define USRQUOTA 0 /* element used for user quotas */ +#define GRPQUOTA 1 /* element used for group quotas */ + +/* + * Definitions for the default names of the quotas files. + */ +#define INITQFNAMES { \ + "user", /* USRQUOTA */ \ + "group", /* GRPQUOTA */ \ + "undefined", \ +} +#define QUOTAFILENAME "quota" +#define QUOTAGROUP "operator" + +/* + * Command definitions for the 'quotactl' system call. The commands are + * broken into a main command defined below and a subcommand that is used + * to convey the type of quota that is being manipulated (see above). + */ +#define SUBCMDMASK 0x00ff +#define SUBCMDSHIFT 8 +#define QCMD(cmd, type) (((cmd) << SUBCMDSHIFT) | ((type) & SUBCMDMASK)) + +#define Q_QUOTAON 0x0100 /* enable quotas */ +#define Q_QUOTAOFF 0x0200 /* disable quotas */ +#define Q_GETQUOTA 0x0300 /* get limits and usage */ +#define Q_SETQUOTA 0x0400 /* set limits and usage */ +#define Q_SETUSE 0x0500 /* set usage */ +#define Q_SYNC 0x0600 /* sync disk copy of a filesystems quotas */ + +/* + * The following structure defines the format of the disk quota file + * (as it appears on disk) - the file is an array of these structures + * indexed by user or group number. The setquota system call establishes + * the vnode for each quota file (a pointer is retained in the ufsmount + * structure). + */ +struct dqblk { + u_int32_t dqb_bhardlimit; /* absolute limit on disk blks alloc */ + u_int32_t dqb_bsoftlimit; /* preferred limit on disk blks */ + u_int32_t dqb_curblocks; /* current block count */ + u_int32_t dqb_ihardlimit; /* maximum # allocated inodes + 1 */ + u_int32_t dqb_isoftlimit; /* preferred inode limit */ + u_int32_t dqb_curinodes; /* current # allocated inodes */ + int32_t dqb_btime; /* time limit for excessive disk use */ + int32_t dqb_itime; /* time limit for excessive files */ +}; + +#ifdef _KERNEL + +#include + +/* + * The following structure records disk usage for a user or group on a + * filesystem. There is one allocated for each quota that exists on any + * filesystem for the current user or group. A cache is kept of recently + * used entries. + */ +struct dquot { + LIST_ENTRY(dquot) dq_hash; /* hash list */ + TAILQ_ENTRY(dquot) dq_freelist; /* free list */ + u_int16_t dq_flags; /* flags, see below */ + u_int16_t dq_type; /* quota type of this dquot */ + u_int32_t dq_cnt; /* count of active references */ + u_int32_t dq_id; /* identifier this applies to */ + struct ufsmount *dq_ump; /* filesystem that this is taken from */ + struct dqblk dq_dqb; /* actual usage & quotas */ +}; +/* + * Flag values. + */ +#define DQ_LOCK 0x01 /* this quota locked (no MODS) */ +#define DQ_WANT 0x02 /* wakeup on unlock */ +#define DQ_MOD 0x04 /* this quota modified since read */ +#define DQ_FAKE 0x08 /* no limits here, just usage */ +#define DQ_BLKS 0x10 /* has been warned about blk limit */ +#define DQ_INODS 0x20 /* has been warned about inode limit */ +/* + * Shorthand notation. + */ +#define dq_bhardlimit dq_dqb.dqb_bhardlimit +#define dq_bsoftlimit dq_dqb.dqb_bsoftlimit +#define dq_curblocks dq_dqb.dqb_curblocks +#define dq_ihardlimit dq_dqb.dqb_ihardlimit +#define dq_isoftlimit dq_dqb.dqb_isoftlimit +#define dq_curinodes dq_dqb.dqb_curinodes +#define dq_btime dq_dqb.dqb_btime +#define dq_itime dq_dqb.dqb_itime + +/* + * If the system has never checked for a quota for this file, then it is + * set to NODQUOT. Once a write attempt is made the inode pointer is set + * to reference a dquot structure. + */ +#define NODQUOT NULL + +/* + * Flags to chkdq() and chkiq() + */ +#define FORCE 0x01 /* force usage changes independent of limits */ +#define CHOWN 0x02 /* (advisory) change initiated by chown */ + +/* + * Macros to avoid subroutine calls to trivial functions. + */ +#ifdef DIAGNOSTIC +#define DQREF(dq) dqref(dq) +#else +#define DQREF(dq) (dq)->dq_cnt++ +#endif + +struct inode; +struct mount; +struct thread; +struct ucred; +struct vnode; + +int chkdq(struct inode *, int64_t, struct ucred *, int); +int chkiq(struct inode *, ino_t, struct ucred *, int); +void dqinit(void); +void dqrele(struct vnode *, struct dquot *); +void dquninit(void); +int getinoquota(struct inode *); +int getquota(struct thread *, struct mount *, u_long, int, caddr_t); +int qsync(struct mount *mp); +int quotaoff(struct thread *td, struct mount *, int); +int quotaon(struct thread *td, struct mount *, int, caddr_t); +int setquota(struct thread *, struct mount *, u_long, int, caddr_t); +int setuse(struct thread *, struct mount *, u_long, int, caddr_t); +vfs_quotactl_t ufs_quotactl; + +#else /* !_KERNEL */ + +#include + +__BEGIN_DECLS +int quotactl(const char *, int, int, void *); +__END_DECLS + +#endif /* _KERNEL */ + +#endif /* !_UFS_UFS_QUOTA_H_ */ diff --git a/src/include.new/ufs/ufs/ufs_extern.h b/src/include.new/ufs/ufs/ufs_extern.h new file mode 100644 index 0000000..1de0bc9 --- /dev/null +++ b/src/include.new/ufs/ufs/ufs_extern.h @@ -0,0 +1,112 @@ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufs_extern.h 8.10 (Berkeley) 5/14/95 + * $FreeBSD: src/sys/ufs/ufs/ufs_extern.h,v 1.55 2005/03/14 10:21:16 phk Exp $ + */ + +#ifndef _UFS_UFS_EXTERN_H_ +#define _UFS_UFS_EXTERN_H_ + +struct componentname; +struct direct; +struct indir; +struct inode; +struct mount; +struct netcred; +struct thread; +struct sockaddr; +struct ucred; +struct ufid; +struct vfsconf; +struct vnode; +struct vop_bmap_args; +struct vop_cachedlookup_args; +struct vop_generic_args; +struct vop_inactive_args; +struct vop_reclaim_args; + +extern struct vop_vector ufs_fifoops; +extern struct vop_vector ufs_vnodeops; + +int ufs_bmap(struct vop_bmap_args *); +int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *, + struct buf *, int *, int *); +int ufs_fhtovp(struct mount *, struct ufid *, struct vnode **); +int ufs_checkpath(struct inode *, struct inode *, struct ucred *); +void ufs_dirbad(struct inode *, doff_t, char *); +int ufs_dirbadentry(struct vnode *, struct direct *, int); +int ufs_dirempty(struct inode *, ino_t, struct ucred *); +int ufs_extread(struct vop_read_args *); +int ufs_extwrite(struct vop_write_args *); +void ufs_makedirentry(struct inode *, struct componentname *, + struct direct *); +int ufs_direnter(struct vnode *, struct vnode *, struct direct *, + struct componentname *, struct buf *); +int ufs_dirremove(struct vnode *, struct inode *, int, int); +int ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int); +int ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *); +int ufs_inactive(struct vop_inactive_args *); +int ufs_init(struct vfsconf *); +void ufs_itimes(struct vnode *vp); +int ufs_lookup(struct vop_cachedlookup_args *); +int ufs_readdir(struct vop_readdir_args *); +int ufs_reclaim(struct vop_reclaim_args *); +void ffs_snapgone(struct inode *); +vfs_root_t ufs_root; +int ufs_uninit(struct vfsconf *); +int ufs_vinit(struct mount *, struct vop_vector *, struct vnode **); + +/* + * Soft update function prototypes. + */ +int softdep_setup_directory_add(struct buf *, struct inode *, off_t, + ino_t, struct buf *, int); +void softdep_change_directoryentry_offset(struct inode *, caddr_t, + caddr_t, caddr_t, int); +void softdep_setup_remove(struct buf *,struct inode *, struct inode *, int); +void softdep_setup_directory_change(struct buf *, struct inode *, + struct inode *, ino_t, int); +void softdep_change_linkcnt(struct inode *); +void softdep_releasefile(struct inode *); +int softdep_slowdown(struct vnode *); + +/* + * Flags to low-level allocation routines. The low 16-bits are reserved + * for IO_ flags from vnode.h. + * + * Note: The general vfs code typically limits the sequential heuristic + * count to 127. See sequential_heuristic() in kern/vfs_vnops.c + */ +#define BA_CLRBUF 0x00010000 /* Clear invalid areas of buffer. */ +#define BA_METAONLY 0x00020000 /* Return indirect block buffer. */ +#define BA_SEQMASK 0x7F000000 /* Bits holding seq heuristic. */ +#define BA_SEQSHIFT 24 +#define BA_SEQMAX 0x7F + +#endif /* !_UFS_UFS_EXTERN_H_ */ diff --git a/src/include.new/ufs/ufs/ufsmount.h b/src/include.new/ufs/ufs/ufsmount.h new file mode 100644 index 0000000..4183756 --- /dev/null +++ b/src/include.new/ufs/ufs/ufsmount.h @@ -0,0 +1,135 @@ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufsmount.h 8.6 (Berkeley) 3/30/95 + * $FreeBSD: src/sys/ufs/ufs/ufsmount.h,v 1.34.2.2 2006/04/04 18:14:31 tegge Exp $ + */ + +#ifndef _UFS_UFS_UFSMOUNT_H_ +#define _UFS_UFS_UFSMOUNT_H_ + +#include /* XXX For struct workhead. */ + +/* + * Arguments to mount UFS-based filesystems + */ +struct ufs_args { + char *fspec; /* block special device to mount */ + struct export_args export; /* network export information */ +}; + +#ifdef _KERNEL + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_UFSMNT); +#endif + +struct buf; +struct inode; +struct nameidata; +struct timeval; +struct ucred; +struct uio; +struct vnode; +struct ufs_extattr_per_mount; + +/* This structure describes the UFS specific mount structure data. */ +struct ufsmount { + struct mount *um_mountp; /* filesystem vfs structure */ + struct cdev *um_dev; /* device mounted */ + struct g_consumer *um_cp; + struct bufobj *um_bo; /* Buffer cache object */ + struct vnode *um_devvp; /* block device mounted vnode */ + u_long um_fstype; /* type of filesystem */ + struct fs *um_fs; /* pointer to superblock */ + struct ufs_extattr_per_mount um_extattr; /* extended attrs */ + u_long um_nindir; /* indirect ptrs per block */ + u_long um_bptrtodb; /* indir ptr to disk block */ + u_long um_seqinc; /* inc between seq blocks */ + struct mtx um_lock; /* Protects ufsmount & fs */ + long um_numindirdeps; /* outstanding indirdeps */ + struct workhead softdep_workitem_pending; /* softdep work queue */ + struct worklist *softdep_worklist_tail; /* Tail pointer for above */ + int softdep_on_worklist; /* Items on the worklist */ + int softdep_on_worklist_inprogress; /* Busy items on worklist */ + int softdep_deps; /* Total dependency count */ + int softdep_accdeps; /* accumulated dep count */ + int softdep_req; /* Wakeup when deps hits 0. */ + struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */ + struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */ + time_t um_btime[MAXQUOTAS]; /* block quota time limit */ + time_t um_itime[MAXQUOTAS]; /* inode quota time limit */ + char um_qflags[MAXQUOTAS]; /* quota specific flags */ + int64_t um_savedmaxfilesize; /* XXX - limit maxfilesize */ + int (*um_balloc)(struct vnode *, off_t, int, struct ucred *, int, struct buf **); + int (*um_blkatoff)(struct vnode *, off_t, char **, struct buf **); + int (*um_truncate)(struct vnode *, off_t, int, struct ucred *, struct thread *); + int (*um_update)(struct vnode *, int); + int (*um_valloc)(struct vnode *, int, struct ucred *, struct vnode **); + int (*um_vfree)(struct vnode *, ino_t, int); + void (*um_ifree)(struct ufsmount *, struct inode *); +}; + +#define UFS_BALLOC(aa, bb, cc, dd, ee, ff) VFSTOUFS((aa)->v_mount)->um_balloc(aa, bb, cc, dd, ee, ff) +#define UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd) +#define UFS_TRUNCATE(aa, bb, cc, dd, ee) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd, ee) +#define UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb) +#define UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd) +#define UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc) +#define UFS_IFREE(aa, bb) ((aa)->um_ifree(aa, bb)) + +#define UFS_LOCK(aa) mtx_lock(&(aa)->um_lock) +#define UFS_UNLOCK(aa) mtx_unlock(&(aa)->um_lock) +#define UFS_MTX(aa) (&(aa)->um_lock) + +/* + * Filesystem types + */ +#define UFS1 1 +#define UFS2 2 + +/* + * Flags describing the state of quotas. + */ +#define QTF_OPENING 0x01 /* Q_QUOTAON in progress */ +#define QTF_CLOSING 0x02 /* Q_QUOTAOFF in progress */ + +/* Convert mount ptr to ufsmount ptr. */ +#define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data)) +#define UFSTOVFS(ump) (ump)->um_mountp + +/* + * Macros to access filesystem parameters in the ufsmount structure. + * Used by ufs_bmap. + */ +#define MNINDIR(ump) ((ump)->um_nindir) +#define blkptrtodb(ump, b) ((b) << (ump)->um_bptrtodb) +#define is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc) +#endif /* _KERNEL */ + +#endif