From 6f9524e9e118929f1de02840dffe858f99685aea Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 21 Feb 2011 20:16:21 -0500 Subject: ext4: update ext4 documentation Add documentation for mount options and ioctls to Documentation/filesystem/ext4.txt, which has not been udpated for some time. Also add for ext4 sysfs tunables to the Documentation/ABI/testing/sysfs-fs-ext4 file, and fix a few typographical errors in that file. https://bugzilla.kernel.org/show_bug.cgi?id=9423 Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 207 ++++++++++++++++++++++++++++++++++++- 1 file changed, 206 insertions(+), 1 deletion(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 6ab9442d7ee..6b050464a90 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -367,12 +367,47 @@ init_itable=n The lazy itable init code will wait n times the minimizes the impact on the systme performance while file system's inode table is being initialized. -discard Controls whether ext4 should issue discard/TRIM +discard Controls whether ext4 should issue discard/TRIM nodiscard(*) commands to the underlying block device when blocks are freed. This is useful for SSD devices and sparse/thinly-provisioned LUNs, but it is off by default until sufficient testing has been done. +nouid32 Disables 32-bit UIDs and GIDs. This is for + interoperability with older kernels which only + store and expect 16-bit values. + +resize Allows to resize filesystem to the end of the last + existing block group, further resize has to be done + with resize2fs either online, or offline. It can be + used only with conjunction with remount. + +block_validity This options allows to enables/disables the in-kernel +noblock_validity facility for tracking filesystem metadata blocks + within internal data structures. This allows multi- + block allocator and other routines to quickly locate + extents which might overlap with filesystem metadata + blocks. This option is intended for debugging + purposes and since it negatively affects the + performance, it is off by default. + +dioread_lock Controls whether or not ext4 should use the DIO read +dioread_nolock locking. If the dioread_nolock option is specified + ext4 will allocate uninitialized extent before buffer + write and convert the extent to initialized after IO + completes. This approach allows ext4 code to avoid + using inode mutex, which improves scalability on high + speed storages. However this does not work with nobh + option and the mount will fail. Nor does it work with + data journaling and dioread_nolock option will be + ignored with kernel warning. Note that dioread_nolock + code path is only used for extent-based files. + Because of the restrictions this options comprises + it is off by default (e.g. dioread_lock). + +i_version Enable 64-bit inode version support. This option is + off by default. + Data Mode ========= There are 3 different data modes: @@ -400,6 +435,176 @@ needs to be read from and written to disk at the same time where it outperforms all others modes. Currently ext4 does not have delayed allocation support if this data journalling mode is selected. +/proc entries +============= + +Information about mounted ext4 file systems can be found in +/proc/fs/ext4. Each mounted filesystem will have a directory in +/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or +/proc/fs/ext4/dm-0). The files in each per-device directory are shown +in table below. + +Files in /proc/fs/ext4/ +.............................................................................. + File Content + mb_groups details of multiblock allocator buddy cache of free blocks +.............................................................................. + +/sys entries +============ + +Information about mounted ext4 file systems can be found in +/sys/fs/ext4. Each mounted filesystem will have a directory in +/sys/fs/ext4 based on its device name (i.e., /sys/fs/ext4/hdc or +/sys/fs/ext4/dm-0). The files in each per-device directory are shown +in table below. + +Files in /sys/fs/ext4/ +(see also Documentation/ABI/testing/sysfs-fs-ext4) +.............................................................................. + File Content + + delayed_allocation_blocks This file is read-only and shows the number of + blocks that are dirty in the page cache, but + which do not have their location in the + filesystem allocated yet. + + inode_goal Tuning parameter which (if non-zero) controls + the goal inode used by the inode allocator in + preference to all other allocation heuristics. + This is intended for debugging use only, and + should be 0 on production systems. + + inode_readahead_blks Tuning parameter which controls the maximum + number of inode table blocks that ext4's inode + table readahead algorithm will pre-read into + the buffer cache + + lifetime_write_kbytes This file is read-only and shows the number of + kilobytes of data that have been written to this + filesystem since it was created. + + max_writeback_mb_bump The maximum number of megabytes the writeback + code will try to write out before move on to + another inode. + + mb_group_prealloc The multiblock allocator will round up allocation + requests to a multiple of this tuning parameter if + the stripe size is not set in the ext4 superblock + + mb_max_to_scan The maximum number of extents the multiblock + allocator will search to find the best extent + + mb_min_to_scan The minimum number of extents the multiblock + allocator will search to find the best extent + + mb_order2_req Tuning parameter which controls the minimum size + for requests (as a power of 2) where the buddy + cache is used + + mb_stats Controls whether the multiblock allocator should + collect statistics, which are shown during the + unmount. 1 means to collect statistics, 0 means + not to collect statistics + + mb_stream_req Files which have fewer blocks than this tunable + parameter will have their blocks allocated out + of a block group specific preallocation pool, so + that small files are packed closely together. + Each large file will have its blocks allocated + out of its own unique preallocation pool. + + session_write_kbytes This file is read-only and shows the number of + kilobytes of data that have been written to this + filesystem since it was mounted. +.............................................................................. + +Ioctls +====== + +There is some Ext4 specific functionality which can be accessed by applications +through the system call interfaces. The list of all Ext4 specific ioctls are +shown in the table below. + +Table of Ext4 specific ioctls +.............................................................................. + Ioctl Description + EXT4_IOC_GETFLAGS Get additional attributes associated with inode. + The ioctl argument is an integer bitfield, with + bit values described in ext4.h. This ioctl is an + alias for FS_IOC_GETFLAGS. + + EXT4_IOC_SETFLAGS Set additional attributes associated with inode. + The ioctl argument is an integer bitfield, with + bit values described in ext4.h. This ioctl is an + alias for FS_IOC_SETFLAGS. + + EXT4_IOC_GETVERSION + EXT4_IOC_GETVERSION_OLD + Get the inode i_generation number stored for + each inode. The i_generation number is normally + changed only when new inode is created and it is + particularly useful for network filesystems. The + '_OLD' version of this ioctl is an alias for + FS_IOC_GETVERSION. + + EXT4_IOC_SETVERSION + EXT4_IOC_SETVERSION_OLD + Set the inode i_generation number stored for + each inode. The '_OLD' version of this ioctl + is an alias for FS_IOC_SETVERSION. + + EXT4_IOC_GROUP_EXTEND This ioctl has the same purpose as the resize + mount option. It allows to resize filesystem + to the end of the last existing block group, + further resize has to be done with resize2fs, + either online, or offline. The argument points + to the unsigned logn number representing the + filesystem new block count. + + EXT4_IOC_MOVE_EXT Move the block extents from orig_fd (the one + this ioctl is pointing to) to the donor_fd (the + one specified in move_extent structure passed + as an argument to this ioctl). Then, exchange + inode metadata between orig_fd and donor_fd. + This is especially useful for online + defragmentation, because the allocator has the + opportunity to allocate moved blocks better, + ideally into one contiguous extent. + + EXT4_IOC_GROUP_ADD Add a new group descriptor to an existing or + new group descriptor block. The new group + descriptor is described by ext4_new_group_input + structure, which is passed as an argument to + this ioctl. This is especially useful in + conjunction with EXT4_IOC_GROUP_EXTEND, + which allows online resize of the filesystem + to the end of the last existing block group. + Those two ioctls combined is used in userspace + online resize tool (e.g. resize2fs). + + EXT4_IOC_MIGRATE This ioctl operates on the filesystem itself. + It converts (migrates) ext3 indirect block mapped + inode to ext4 extent mapped inode by walking + through indirect block mapping of the original + inode and converting contiguous block ranges + into ext4 extents of the temporary inode. Then, + inodes are swapped. This ioctl might help, when + migrating from ext3 to ext4 filesystem, however + suggestion is to create fresh ext4 filesystem + and copy data from the backup. Note, that + filesystem has to support extents for this ioctl + to work. + + EXT4_IOC_ALLOC_DA_BLKS Force all of the delay allocated blocks to be + allocated to preserve application-expected ext3 + behaviour. Note that this will also start + triggering a write of the data blocks, but this + behaviour may change in the future as it is + not necessary and has been done this way only + for sake of simplicity. +.............................................................................. + References ========== -- cgit v1.2.3-70-g09d2 From 20ad9ea9becd34a3c16252ca9d815f2c74f8f30f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 Feb 2011 12:06:34 +0000 Subject: xfs: enable delaylog by default Signed-off-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Alex Elder --- Documentation/filesystems/xfs-delayed-logging-design.txt | 7 ------- fs/xfs/linux-2.6/xfs_super.c | 1 + 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/xfs-delayed-logging-design.txt b/Documentation/filesystems/xfs-delayed-logging-design.txt index 7445bf335da..5282e3e5141 100644 --- a/Documentation/filesystems/xfs-delayed-logging-design.txt +++ b/Documentation/filesystems/xfs-delayed-logging-design.txt @@ -791,10 +791,3 @@ mount option. Fundamentally, there is no reason why the log manager would not be able to swap methods automatically and transparently depending on load characteristics, but this should not be necessary if delayed logging works as designed. - -Roadmap: - -2.6.39 Switch default mount option to use delayed logging - => should be roughly 12 months after initial merge - => enough time to shake out remaining problems before next round of - enterprise distro kernel rebases diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9731898083a..7ec1fb8c131 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -189,6 +189,7 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_BARRIER; mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + mp->m_flags |= XFS_MOUNT_DELAYLOG; /* * These can be overridden by the mount option parsing. -- cgit v1.2.3-70-g09d2 From 4c1d204cde1470863e78aa0e6f55246c57f15038 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 28 Feb 2011 16:32:39 +0000 Subject: Squashfs: Update documentation to include compression options Signed-off-by: Phillip Lougher --- Documentation/filesystems/squashfs.txt | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt index 66699afd66c..2d78f191184 100644 --- a/Documentation/filesystems/squashfs.txt +++ b/Documentation/filesystems/squashfs.txt @@ -59,12 +59,15 @@ obtained from this site also. 3. SQUASHFS FILESYSTEM DESIGN ----------------------------- -A squashfs filesystem consists of a maximum of eight parts, packed together on a byte -alignment: +A squashfs filesystem consists of a maximum of nine parts, packed together on a +byte alignment: --------------- | superblock | |---------------| + | compression | + | options | + |---------------| | datablocks | | & fragments | |---------------| @@ -91,7 +94,14 @@ the source directory, and checked for duplicates. Once all file data has been written the completed inode, directory, fragment, export and uid/gid lookup tables are written. -3.1 Inodes +3.1 Compression options +----------------------- + +Compressors can optionally support compression specific options (e.g. +dictionary size). If non-default compression options have been used, then +these are stored here. + +3.2 Inodes ---------- Metadata (inodes and directories) are compressed in 8Kbyte blocks. Each @@ -114,7 +124,7 @@ directory inode are defined: inodes optimised for frequently occurring regular files and directories, and extended types where extra information has to be stored. -3.2 Directories +3.3 Directories --------------- Like inodes, directories are packed into compressed metadata blocks, stored @@ -144,7 +154,7 @@ decompressed to do a lookup irrespective of the length of the directory. This scheme has the advantage that it doesn't require extra memory overhead and doesn't require much extra storage on disk. -3.3 File data +3.4 File data ------------- Regular files consist of a sequence of contiguous compressed blocks, and/or a @@ -163,7 +173,7 @@ Larger files use multiple slots, with 1.75 TiB files using all 8 slots. The index cache is designed to be memory efficient, and by default uses 16 KiB. -3.4 Fragment lookup table +3.5 Fragment lookup table ------------------------- Regular files can contain a fragment index which is mapped to a fragment @@ -173,7 +183,7 @@ A second index table is used to locate these. This second index table for speed of access (and because it is small) is read at mount time and cached in memory. -3.5 Uid/gid lookup table +3.6 Uid/gid lookup table ------------------------ For space efficiency regular files store uid and gid indexes, which are @@ -182,7 +192,7 @@ stored compressed into metadata blocks. A second index table is used to locate these. This second index table for speed of access (and because it is small) is read at mount time and cached in memory. -3.6 Export table +3.7 Export table ---------------- To enable Squashfs filesystems to be exportable (via NFS etc.) filesystems @@ -196,7 +206,7 @@ This table is stored compressed into metadata blocks. A second index table is used to locate these. This second index table for speed of access (and because it is small) is read at mount time and cached in memory. -3.7 Xattr table +3.8 Xattr table --------------- The xattr table contains extended attributes for each inode. The xattrs -- cgit v1.2.3-70-g09d2 From 9ed96484311b89360b80a4181d856cbdb21630fd Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 31 Jan 2011 14:32:14 +0200 Subject: exofs: Add option to mount by osdname If /dev/osd* devices are shuffled because more devices where added, and/or login order has changed. It is hard to mount the FS you want. Add an option to mount by osdname. osdname is any osd-device's osdname as specified to the mkfs.exofs command when formatting the osd-devices. The new mount format is: OPT="osdname=$UUID0,pid=$PID,_netdev" mount -t exofs -o $OPT $DEV_OSD0 $MOUNTDIR if "osdname=" is specified in options above $DEV_OSD0 is ignored and can be empty. Also while at it: Removed some old unused Opt_* enums. Signed-off-by: Boaz Harrosh --- Documentation/filesystems/exofs.txt | 10 +++++++++- fs/exofs/super.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 5 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/exofs.txt b/Documentation/filesystems/exofs.txt index abd2a9b5b78..23583a13697 100644 --- a/Documentation/filesystems/exofs.txt +++ b/Documentation/filesystems/exofs.txt @@ -104,7 +104,15 @@ Where: exofs specific options: Options are separated by commas (,) pid= - The partition number to mount/create as container of the filesystem. - This option is mandatory. + This option is mandatory. integer can be + Hex by pre-pending an 0x to the number. + osdname= - Mount by a device's osdname. + osdname is usually a 36 character uuid of the + form "d2683732-c906-4ee1-9dbd-c10c27bb40df". + It is one of the device's uuid specified in the + mkfs.exofs format command. + If this option is specified then the /dev/osdX + above can be empty and is ignored. to= - Timeout in ticks for a single command. default is (60 * HZ) [for debugging only] diff --git a/fs/exofs/super.c b/fs/exofs/super.c index e87510f4749..474989eeb7d 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -48,6 +48,7 @@ * struct to hold what we get from mount options */ struct exofs_mountopt { + bool is_osdname; const char *dev_name; uint64_t pid; int timeout; @@ -56,7 +57,7 @@ struct exofs_mountopt { /* * exofs-specific mount-time options. */ -enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; +enum { Opt_name, Opt_pid, Opt_to, Opt_err }; /* * Our mount-time options. These should ideally be 64-bit unsigned, but the @@ -64,6 +65,7 @@ enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; * sufficient for most applications now. */ static match_table_t tokens = { + {Opt_name, "osdname=%s"}, {Opt_pid, "pid=%u"}, {Opt_to, "to=%u"}, {Opt_err, NULL} @@ -94,6 +96,14 @@ static int parse_options(char *options, struct exofs_mountopt *opts) token = match_token(p, tokens, args); switch (token) { + case Opt_name: + opts->dev_name = match_strdup(&args[0]); + if (unlikely(!opts->dev_name)) { + EXOFS_ERR("Error allocating dev_name"); + return -ENOMEM; + } + opts->is_osdname = true; + break; case Opt_pid: if (0 == match_strlcpy(str, &args[0], sizeof(str))) return -EINVAL; @@ -575,9 +585,17 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) goto free_bdi; /* use mount options to fill superblock */ - od = osduld_path_lookup(opts->dev_name); + if (opts->is_osdname) { + struct osd_dev_info odi = {.systemid_len = 0}; + + odi.osdname_len = strlen(opts->dev_name); + odi.osdname = (u8 *)opts->dev_name; + od = osduld_info_lookup(&odi); + } else { + od = osduld_path_lookup(opts->dev_name); + } if (IS_ERR(od)) { - ret = PTR_ERR(od); + ret = -EINVAL; goto free_sbi; } @@ -670,6 +688,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], sbi->layout.s_pid); + if (opts->is_osdname) + kfree(opts->dev_name); return 0; free_sbi: @@ -678,6 +698,8 @@ free_bdi: EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", opts->dev_name, sbi->layout.s_pid, ret); exofs_free_sbi(sbi); + if (opts->is_osdname) + kfree(opts->dev_name); return ret; } @@ -695,7 +717,8 @@ static struct dentry *exofs_mount(struct file_system_type *type, if (ret) return ERR_PTR(ret); - opts.dev_name = dev_name; + if (!opts.dev_name) + opts.dev_name = dev_name; return mount_nodev(type, flags, &opts, exofs_fill_super); } -- cgit v1.2.3-70-g09d2 From da23ef0549d4205ca9b576cf6cce9a80d0c3e43a Mon Sep 17 00:00:00 2001 From: Stuart Swales Date: Tue, 22 Mar 2011 16:35:06 -0700 Subject: adfs: add hexadecimal filetype suffix option ADFS (FileCore) storage complies with the RISC OS filetype specification (12 bits of file type information is stored in the file load address, rather than using a file extension). The existing driver largely ignores this information and does not present it to the end user. It is desirable that stored filetypes be made visible to the end user to facilitate a precise copy of data and metadata from a hard disc (or image thereof) into a RISC OS emulator (such as RPCEmu) or to a network share which can be accessed by real Acorn systems. This patch implements a per-mount filetype suffix option (use -o ftsuffix=1) to present any filetype as a ,xyz hexadecimal suffix on each file. This type suffix is compatible with that used by RISC OS systems that access network servers using NFS client software and by RPCemu's host filing system. Signed-off-by: Stuart Swales Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/adfs.txt | 18 ++++++++++++++++++ fs/adfs/adfs.h | 21 +++++++++++++++++++-- fs/adfs/dir_f.c | 23 ++++++++++++++++++++--- fs/adfs/dir_fplus.c | 18 ++++++++++++++++++ fs/adfs/inode.c | 22 ++++------------------ fs/adfs/super.c | 23 ++++++++++++++++++++--- 6 files changed, 99 insertions(+), 26 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/adfs.txt b/Documentation/filesystems/adfs.txt index 9e8811f92b8..5949766353f 100644 --- a/Documentation/filesystems/adfs.txt +++ b/Documentation/filesystems/adfs.txt @@ -9,6 +9,9 @@ Mount options for ADFS will be nnn. Default 0700. othmask=nnn The permission mask for ADFS 'other' permissions will be nnn. Default 0077. + ftsuffix=n When ftsuffix=0, no file type suffix will be applied. + When ftsuffix=1, a hexadecimal suffix corresponding to + the RISC OS file type will be added. Default 0. Mapping of ADFS permissions to Linux permissions ------------------------------------------------ @@ -55,3 +58,18 @@ Mapping of ADFS permissions to Linux permissions You can therefore tailor the permission translation to whatever you desire the permissions should be under Linux. + +RISC OS file type suffix +------------------------ + + RISC OS file types are stored in bits 19..8 of the file load address. + + To enable non-RISC OS systems to be used to store files without losing + file type information, a file naming convention was devised (initially + for use with NFS) such that a hexadecimal suffix of the form ,xyz + denoted the file type: e.g. BasicFile,ffb is a BASIC (0xffb) file. This + naming convention is now also used by RISC OS emulators such as RPCEmu. + + Mounting an ADFS disc with option ftsuffix=1 will cause appropriate file + type suffixes to be appended to file names read from a directory. If the + ftsuffix option is zero or omitted, no file type suffixes will be added. diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 58588ddb178..a8a58d864f9 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -50,6 +50,7 @@ struct adfs_sb_info { gid_t s_gid; /* owner gid */ umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ umode_t s_other_mask; /* ADFS other perm -> unix perm */ + int s_ftsuffix; /* ,xyz hex filetype suffix option */ __u32 s_ids_per_zone; /* max. no ids in one zone */ __u32 s_idlen; /* length of ID in map */ @@ -93,7 +94,7 @@ struct adfs_dir { /* * This is the overall maximum name length */ -#define ADFS_MAX_NAME_LEN 256 +#define ADFS_MAX_NAME_LEN (256 + 4) /* +4 for ,xyz hex filetype suffix */ struct object_info { __u32 parent_id; /* parent object id */ __u32 file_id; /* object id */ @@ -101,10 +102,26 @@ struct object_info { __u32 execaddr; /* execution address */ __u32 size; /* size */ __u8 attr; /* RISC OS attributes */ - unsigned char name_len; /* name length */ + unsigned int name_len; /* name length */ char name[ADFS_MAX_NAME_LEN];/* file name */ + + /* RISC OS file type (12-bit: derived from loadaddr) */ + __u16 filetype; }; +/* RISC OS 12-bit filetype converts to ,xyz hex filename suffix */ +static inline int append_filetype_suffix(char *buf, __u16 filetype) +{ + if (filetype == -1) + return 0; + + *buf++ = ','; + *buf++ = hex_asc_lo(filetype >> 8); + *buf++ = hex_asc_lo(filetype >> 4); + *buf++ = hex_asc_lo(filetype >> 0); + return 4; +} + struct adfs_dir_ops { int (*read)(struct super_block *sb, unsigned int id, unsigned int sz, struct adfs_dir *dir); int (*setpos)(struct adfs_dir *dir, unsigned int fpos); diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index bafc71222e2..4bbe853ee50 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -52,7 +52,6 @@ static inline int adfs_readname(char *buf, char *ptr, int maxlen) *buf++ = *ptr; ptr++; } - *buf = '\0'; return buf - old_buf; } @@ -208,7 +207,8 @@ release_buffers: * convert a disk-based directory entry to a Linux ADFS directory entry */ static inline void -adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de) +adfs_dir2obj(struct adfs_dir *dir, struct object_info *obj, + struct adfs_direntry *de) { obj->name_len = adfs_readname(obj->name, de->dirobname, ADFS_F_NAME_LEN); obj->file_id = adfs_readval(de->dirinddiscadd, 3); @@ -216,6 +216,23 @@ adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de) obj->execaddr = adfs_readval(de->direxec, 4); obj->size = adfs_readval(de->dirlen, 4); obj->attr = de->newdiratts; + obj->filetype = -1; + + /* + * object is a file and is filetyped and timestamped? + * RISC OS 12-bit filetype is stored in load_address[19:8] + */ + if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) && + (0xfff00000 == (0xfff00000 & obj->loadaddr))) { + obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8); + + /* optionally append the ,xyz hex filetype suffix */ + if (ADFS_SB(dir->sb)->s_ftsuffix) + obj->name_len += + append_filetype_suffix( + &obj->name[obj->name_len], + obj->filetype); + } } /* @@ -260,7 +277,7 @@ __adfs_dir_get(struct adfs_dir *dir, int pos, struct object_info *obj) if (!de.dirobname[0]) return -ENOENT; - adfs_dir2obj(obj, &de); + adfs_dir2obj(dir, obj, &de); return 0; } diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index a7f41da8115..d9e3bee4e65 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -197,6 +197,24 @@ adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj) if (obj->name[i] == '/') obj->name[i] = '.'; + obj->filetype = -1; + + /* + * object is a file and is filetyped and timestamped? + * RISC OS 12-bit filetype is stored in load_address[19:8] + */ + if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) && + (0xfff00000 == (0xfff00000 & obj->loadaddr))) { + obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8); + + /* optionally append the ,xyz hex filetype suffix */ + if (ADFS_SB(dir->sb)->s_ftsuffix) + obj->name_len += + append_filetype_suffix( + &obj->name[obj->name_len], + obj->filetype); + } + dir->pos += 1; ret = 0; out: diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 16d7ef2dffe..92444e94f84 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -78,26 +78,13 @@ static const struct address_space_operations adfs_aops = { .bmap = _adfs_bmap }; -static inline unsigned int -adfs_filetype(struct inode *inode) -{ - unsigned int type; - - if (ADFS_I(inode)->stamped) - type = (ADFS_I(inode)->loadaddr >> 8) & 0xfff; - else - type = (unsigned int) -1; - - return type; -} - /* * Convert ADFS attributes and filetype to Linux permission. */ static umode_t adfs_atts2mode(struct super_block *sb, struct inode *inode) { - unsigned int filetype, attr = ADFS_I(inode)->attr; + unsigned int attr = ADFS_I(inode)->attr; umode_t mode, rmask; struct adfs_sb_info *asb = ADFS_SB(sb); @@ -106,9 +93,7 @@ adfs_atts2mode(struct super_block *sb, struct inode *inode) return S_IFDIR | S_IXUGO | mode; } - filetype = adfs_filetype(inode); - - switch (filetype) { + switch (ADFS_I(inode)->filetype) { case 0xfc0: /* LinkFS */ return S_IFLNK|S_IRWXUGO; @@ -277,7 +262,8 @@ adfs_iget(struct super_block *sb, struct object_info *obj) ADFS_I(inode)->loadaddr = obj->loadaddr; ADFS_I(inode)->execaddr = obj->execaddr; ADFS_I(inode)->attr = obj->attr; - ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000); + ADFS_I(inode)->filetype = obj->filetype; + ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000); inode->i_mode = adfs_atts2mode(sb, inode); adfs_adfs2unix_time(&inode->i_mtime, inode); diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 06d7388b477..c8bf36a1996 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -138,17 +138,20 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) seq_printf(seq, ",ownmask=%o", asb->s_owner_mask); if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK) seq_printf(seq, ",othmask=%o", asb->s_other_mask); + if (asb->s_ftsuffix != 0) + seq_printf(seq, ",ftsuffix=%u", asb->s_ftsuffix); return 0; } -enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; +enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_ftsuffix, Opt_err}; static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_ownmask, "ownmask=%o"}, {Opt_othmask, "othmask=%o"}, + {Opt_ftsuffix, "ftsuffix=%u"}, {Opt_err, NULL} }; @@ -189,6 +192,11 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; asb->s_other_mask = option; break; + case Opt_ftsuffix: + if (match_int(args, &option)) + return -EINVAL; + asb->s_ftsuffix = option; + break; default: printk("ADFS-fs: unrecognised mount option \"%s\" " "or missing value\n", p); @@ -366,6 +374,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) asb->s_gid = 0; asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK; asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK; + asb->s_ftsuffix = 0; if (parse_options(sb, data)) goto error; @@ -445,11 +454,13 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) root_obj.parent_id = root_obj.file_id = le32_to_cpu(dr->root); root_obj.name_len = 0; - root_obj.loadaddr = 0; - root_obj.execaddr = 0; + /* Set root object date as 01 Jan 1987 00:00:00 */ + root_obj.loadaddr = 0xfff0003f; + root_obj.execaddr = 0xec22c000; root_obj.size = ADFS_NEWDIR_SIZE; root_obj.attr = ADFS_NDA_DIRECTORY | ADFS_NDA_OWNER_READ | ADFS_NDA_OWNER_WRITE | ADFS_NDA_PUBLIC_READ; + root_obj.filetype = -1; /* * If this is a F+ disk with variable length directories, @@ -463,6 +474,12 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) asb->s_dir = &adfs_f_dir_ops; asb->s_namelen = ADFS_F_NAME_LEN; } + /* + * ,xyz hex filetype suffix may be added by driver + * to files that have valid RISC OS filetype + */ + if (asb->s_ftsuffix) + asb->s_namelen += 4; sb->s_d_op = &adfs_dentry_operations; root = adfs_iget(sb, &root_obj); -- cgit v1.2.3-70-g09d2 From f283c86afe6aa70b733d1ecebad5d9464943b774 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 22 Mar 2011 22:23:39 +1100 Subject: fs: remove inode_lock from iput_final and prune_icache Now that inode state changes are protected by the inode->i_lock and the inode LRU manipulations by the inode_lru_lock, we can remove the inode_lock from prune_icache and the initial part of iput_final(). instead of using the inode_lock to protect the inode during iput_final, use the inode->i_lock instead. This protects the inode against new references being taken while we change the inode state to I_FREEING, as well as preventing prune_icache from grabbing the inode while we are manipulating it. Hence we no longer need the inode_lock in iput_final prior to setting I_FREEING on the inode. For prune_icache, we no longer need the inode_lock to protect the LRU list, and the inodes themselves are protected against freeing races by the inode->i_lock. Hence we can lift the inode_lock from prune_icache as well. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/porting | 16 +++++++++++----- Documentation/filesystems/vfs.txt | 2 +- fs/inode.c | 17 +++-------------- fs/logfs/inode.c | 2 +- 5 files changed, 17 insertions(+), 22 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 2e994efe12c..61b31acb917 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -128,7 +128,7 @@ alloc_inode: destroy_inode: dirty_inode: (must not sleep) write_inode: -drop_inode: !!!inode_lock!!! +drop_inode: !!!inode->i_lock!!! evict_inode: put_super: write write_super: read diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 0c986c9e851..6e29954851a 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -298,11 +298,14 @@ be used instead. It gets called whenever the inode is evicted, whether it has remaining links or not. Caller does *not* evict the pagecache or inode-associated metadata buffers; getting rid of those is responsibility of method, as it had been for ->delete_inode(). - ->drop_inode() returns int now; it's called on final iput() with inode_lock -held and it returns true if filesystems wants the inode to be dropped. As before, -generic_drop_inode() is still the default and it's been updated appropriately. -generic_delete_inode() is also alive and it consists simply of return 1. Note that -all actual eviction work is done by caller after ->drop_inode() returns. + + ->drop_inode() returns int now; it's called on final iput() with +inode->i_lock held and it returns true if filesystems wants the inode to be +dropped. As before, generic_drop_inode() is still the default and it's been +updated appropriately. generic_delete_inode() is also alive and it consists +simply of return 1. Note that all actual eviction work is done by caller after +->drop_inode() returns. + clear_inode() is gone; use end_writeback() instead. As before, it must be called exactly once on each call of ->evict_inode() (as it used to be for each call of ->delete_inode()). Unlike before, if you are using inode-associated @@ -395,6 +398,9 @@ Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set, so the i_size should not change when hole punching, even when puching the end of a file off. +-- +[mandatory] + -- [mandatory] ->get_sb() is gone. Switch to use of ->mount(). Typically it's just diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 306f0ae8df0..80815ed654c 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -254,7 +254,7 @@ or bottom half). should be synchronous or not, not all filesystems check this flag. drop_inode: called when the last access to the inode is dropped, - with the inode_lock spinlock held. + with the inode->i_lock spinlock held. This method should be either NULL (normal UNIX filesystem semantics) or "generic_delete_inode" (for filesystems that do not diff --git a/fs/inode.c b/fs/inode.c index b19cb6ee6ca..389f5a24759 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -650,7 +650,6 @@ static void prune_icache(int nr_to_scan) unsigned long reap = 0; down_read(&iprune_sem); - spin_lock(&inode_lock); spin_lock(&inode_lru_lock); for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { struct inode *inode; @@ -676,8 +675,8 @@ static void prune_icache(int nr_to_scan) */ if (atomic_read(&inode->i_count) || (inode->i_state & ~I_REFERENCED)) { - spin_unlock(&inode->i_lock); list_del_init(&inode->i_lru); + spin_unlock(&inode->i_lock); inodes_stat.nr_unused--; continue; } @@ -685,20 +684,18 @@ static void prune_icache(int nr_to_scan) /* recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { inode->i_state &= ~I_REFERENCED; - spin_unlock(&inode->i_lock); list_move(&inode->i_lru, &inode_lru); + spin_unlock(&inode->i_lock); continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&inode_lru_lock); - spin_unlock(&inode_lock); if (remove_inode_buffers(inode)) reap += invalidate_mapping_pages(&inode->i_data, 0, -1); iput(inode); - spin_lock(&inode_lock); spin_lock(&inode_lru_lock); if (inode != list_entry(inode_lru.next, @@ -724,7 +721,6 @@ static void prune_icache(int nr_to_scan) else __count_vm_events(PGINODESTEAL, reap); spin_unlock(&inode_lru_lock); - spin_unlock(&inode_lock); dispose_list(&freeable); up_read(&iprune_sem); @@ -1082,7 +1078,6 @@ EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { - spin_lock(&inode_lock); spin_lock(&inode->i_lock); if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { __iget(inode); @@ -1096,7 +1091,6 @@ struct inode *igrab(struct inode *inode) */ inode = NULL; } - spin_unlock(&inode_lock); return inode; } EXPORT_SYMBOL(igrab); @@ -1439,7 +1433,6 @@ static void iput_final(struct inode *inode) const struct super_operations *op = inode->i_sb->s_op; int drop; - spin_lock(&inode->i_lock); WARN_ON(inode->i_state & I_NEW); if (op && op->drop_inode) @@ -1452,16 +1445,13 @@ static void iput_final(struct inode *inode) if (!(inode->i_state & (I_DIRTY|I_SYNC))) inode_lru_list_add(inode); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); return; } if (!drop) { inode->i_state |= I_WILL_FREE; spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); write_inode_now(inode, 1); - spin_lock(&inode_lock); spin_lock(&inode->i_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; @@ -1470,7 +1460,6 @@ static void iput_final(struct inode *inode) inode->i_state |= I_FREEING; inode_lru_list_del(inode); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); evict(inode); } @@ -1489,7 +1478,7 @@ void iput(struct inode *inode) if (inode) { BUG_ON(inode->i_state & I_CLEAR); - if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) + if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) iput_final(inode); } } diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 03b8c240aed..edfea7a3a74 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -293,7 +293,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) return ret; } -/* called with inode_lock held */ +/* called with inode->i_lock held */ static int logfs_drop_inode(struct inode *inode) { struct logfs_super *super = logfs_super(inode->i_sb); -- cgit v1.2.3-70-g09d2