diff options
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/Locking | 34 | ||||
-rw-r--r-- | Documentation/filesystems/debugfs.txt | 4 | ||||
-rw-r--r-- | Documentation/filesystems/ext3.txt | 13 | ||||
-rw-r--r-- | Documentation/filesystems/ext4.txt | 23 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/Exporting | 9 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/nfs41-server.txt | 33 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/nfsroot.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 28 | ||||
-rw-r--r-- | Documentation/filesystems/squashfs.txt | 4 | ||||
-rw-r--r-- | Documentation/filesystems/ubifs.txt | 28 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 30 |
11 files changed, 100 insertions, 108 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 57d827d6071..653380793a6 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -52,7 +52,7 @@ ata *); void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, unsigned int); - int (*check_acl)(struct inode *, int, unsigned int); + int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -80,7 +80,7 @@ put_link: no truncate: yes (see below) setattr: yes permission: no (may not block if called in rcu-walk mode) -check_acl: no +get_acl: no getattr: no setxattr: yes getxattr: no @@ -338,21 +338,21 @@ fl_release_private: maybe no ----------------------- lock_manager_operations --------------------------- prototypes: - int (*fl_compare_owner)(struct file_lock *, struct file_lock *); - void (*fl_notify)(struct file_lock *); /* unblock callback */ - int (*fl_grant)(struct file_lock *, struct file_lock *, int); - void (*fl_release_private)(struct file_lock *); - void (*fl_break)(struct file_lock *); /* break_lease callback */ - int (*fl_change)(struct file_lock **, int); + int (*lm_compare_owner)(struct file_lock *, struct file_lock *); + void (*lm_notify)(struct file_lock *); /* unblock callback */ + int (*lm_grant)(struct file_lock *, struct file_lock *, int); + void (*lm_release_private)(struct file_lock *); + void (*lm_break)(struct file_lock *); /* break_lease callback */ + int (*lm_change)(struct file_lock **, int); locking rules: file_lock_lock may block -fl_compare_owner: yes no -fl_notify: yes no -fl_grant: no no -fl_release_private: maybe no -fl_break: yes no -fl_change yes no +lm_compare_owner: yes no +lm_notify: yes no +lm_grant: no no +lm_release_private: maybe no +lm_break: yes no +lm_change yes no --------------------------- buffer_head ----------------------------------- prototypes: @@ -412,7 +412,7 @@ prototypes: int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, int datasync); + int (*fsync) (struct file *, loff_t start, loff_t end, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); @@ -438,9 +438,7 @@ prototypes: locking rules: All may block except for ->setlease. - No VFS locks held on entry except for ->fsync and ->setlease. - -->fsync() has i_mutex on inode. + No VFS locks held on entry except for ->setlease. ->setlease has the file_list_lock held and must not sleep. diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt index ed52af60c2d..742cc06e138 100644 --- a/Documentation/filesystems/debugfs.txt +++ b/Documentation/filesystems/debugfs.txt @@ -73,8 +73,8 @@ the following functions can be used instead: struct dentry *parent, u16 *value); struct dentry *debugfs_create_x32(const char *name, mode_t mode, struct dentry *parent, u32 *value); - -Note that there is no debugfs_create_x64(). + struct dentry *debugfs_create_x64(const char *name, mode_t mode, + struct dentry *parent, u64 *value); These functions are useful as long as the developer knows the size of the value to be exported. Some types can have different widths on different diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 272f80d5f96..22f3a0eda1d 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt @@ -147,15 +147,6 @@ grpjquota=<file> during journal replay. They replace the above package for more details (http://sourceforge.net/projects/linuxquota). -bh (*) ext3 associates buffer heads to data pages to -nobh (a) cache disk block mapping information - (b) link pages into transaction to provide - ordering guarantees. - "bh" option forces use of buffer heads. - "nobh" option tries to avoid associating buffer - heads (supported only for "writeback" mode). - - Specification ============= Ext3 shares all disk implementation with the ext2 filesystem, and adds @@ -227,5 +218,5 @@ kernel source: <file:fs/ext3/> programs: http://e2fsprogs.sourceforge.net/ http://ext2resize.sourceforge.net -useful links: http://www.ibm.com/developerworks/library/l-fs7.html - http://www.ibm.com/developerworks/library/l-fs8.html +useful links: http://www.ibm.com/developerworks/library/l-fs7/index.html + http://www.ibm.com/developerworks/library/l-fs8/index.html diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 3ae9bc94352..232a575a0c4 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -68,12 +68,12 @@ Note: More extensive information for getting started with ext4 can be '-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems for a fair comparison. When tuning ext3 for best benchmark numbers, it is often worthwhile to try changing the data journaling mode; '-o - data=writeback,nobh' can be faster for some workloads. (Note - however that running mounted with data=writeback can potentially - leave stale data exposed in recently written files in case of an - unclean shutdown, which could be a security exposure in some - situations.) Configuring the filesystem with a large journal can - also be helpful for metadata-intensive workloads. + data=writeback' can be faster for some workloads. (Note however that + running mounted with data=writeback can potentially leave stale data + exposed in recently written files in case of an unclean shutdown, + which could be a security exposure in some situations.) Configuring + the filesystem with a large journal can also be helpful for + metadata-intensive workloads. 2. Features =========== @@ -272,14 +272,6 @@ grpjquota=<file> during journal replay. They replace the above package for more details (http://sourceforge.net/projects/linuxquota). -bh (*) ext4 associates buffer heads to data pages to -nobh (a) cache disk block mapping information - (b) link pages into transaction to provide - ordering guarantees. - "bh" option forces use of buffer heads. - "nobh" option tries to avoid associating buffer - heads (supported only for "writeback" mode). - stripe=n Number of filesystem blocks that mballoc will try to use for allocation size and alignment. For RAID5/6 systems this should be the number of data @@ -393,8 +385,7 @@ dioread_nolock locking. If the dioread_nolock option is specified write and convert the extent to initialized after IO completes. This approach allows ext4 code to avoid using inode mutex, which improves scalability on high - speed storages. However this does not work with nobh - option and the mount will fail. Nor does it work with + speed storages. However this does not work with data journaling and dioread_nolock option will be ignored with kernel warning. Note that dioread_nolock code path is only used for extent-based files. diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting index 87019d2b598..09994c24728 100644 --- a/Documentation/filesystems/nfs/Exporting +++ b/Documentation/filesystems/nfs/Exporting @@ -92,7 +92,14 @@ For a filesystem to be exportable it must: 1/ provide the filehandle fragment routines described below. 2/ make sure that d_splice_alias is used rather than d_add when ->lookup finds an inode for a given parent and name. - Typically the ->lookup routine will end with a: + + If inode is NULL, d_splice_alias(inode, dentry) is eqivalent to + + d_add(dentry, inode), NULL + + Similarly, d_splice_alias(ERR_PTR(err), dentry) = ERR_PTR(err) + + Typically the ->lookup routine will simply end with a: return d_splice_alias(inode, dentry); } diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index 04884914a1c..092fad92a3f 100644 --- a/Documentation/filesystems/nfs/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt @@ -39,27 +39,17 @@ interoperability problems with future clients. Known issues: from a linux client are possible, but we aren't really conformant with the spec (for example, we don't use kerberos on the backchannel correctly). - - no trunking support: no clients currently take advantage of - trunking, but this is a mandatory feature, and its use is - recommended to clients in a number of places. (E.g. to ensure - timely renewal in case an existing connection's retry timeouts - have gotten too long; see section 8.3 of the RFC.) - Therefore, lack of this feature may cause future clients to - fail. - Incomplete backchannel support: incomplete backchannel gss support and no support for BACKCHANNEL_CTL mean that callbacks (hence delegations and layouts) may not be available and clients confused by the incomplete implementation may fail. - - Server reboot recovery is unsupported; if the server reboots, - clients may fail. - We do not support SSV, which provides security for shared client-server state (thus preventing unauthorized tampering with locks and opens, for example). It is mandatory for servers to support this, though no clients use it yet. - Mandatory operations which we do not support, such as - DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and - TEST_STATEID, are not currently used by clients, but will be + DESTROY_CLIENTID, are not currently used by clients, but will be (and the spec recommends their uses in common cases), and clients should not be expected to know how to recover from the case where they are not supported. This will eventually cause @@ -69,8 +59,9 @@ In addition, some limitations are inherited from the current NFSv4 implementation: - Incomplete delegation enforcement: if a file is renamed or - unlinked, a client holding a delegation may continue to - indefinitely allow opens of the file under the old name. + unlinked by a local process, a client holding a delegation may + continue to indefinitely allow opens of the file under the old + name. The table below, taken from the NFSv4.1 document, lists the operations that are mandatory to implement (REQ), optional @@ -99,7 +90,7 @@ Operations +----------------------+------------+--------------+----------------+ | ACCESS | REQ | | Section 18.1 | NS | BACKCHANNEL_CTL | REQ | | Section 18.33 | -NS | BIND_CONN_TO_SESSION | REQ | | Section 18.34 | +I | BIND_CONN_TO_SESSION | REQ | | Section 18.34 | | CLOSE | REQ | | Section 18.2 | | COMMIT | REQ | | Section 18.3 | | CREATE | REQ | | Section 18.4 | @@ -111,7 +102,7 @@ NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 | NS | DESTROY_CLIENTID | REQ | | Section 18.50 | I | DESTROY_SESSION | REQ | | Section 18.37 | I | EXCHANGE_ID | REQ | | Section 18.35 | -NS | FREE_STATEID | REQ | | Section 18.38 | +I | FREE_STATEID | REQ | | Section 18.38 | | GETATTR | REQ | | Section 18.7 | P | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 | P | GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 | @@ -145,14 +136,14 @@ NS*| OPENATTR | OPT | | Section 18.17 | | RESTOREFH | REQ | | Section 18.27 | | SAVEFH | REQ | | Section 18.28 | | SECINFO | REQ | | Section 18.29 | -NS | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, | +I | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, | | | | layout (REQ) | Section 13.12 | I | SEQUENCE | REQ | | Section 18.46 | | SETATTR | REQ | | Section 18.30 | | SETCLIENTID | MNI | | N/A | | SETCLIENTID_CONFIRM | MNI | | N/A | NS | SET_SSV | REQ | | Section 18.47 | -NS | TEST_STATEID | REQ | | Section 18.48 | +I | TEST_STATEID | REQ | | Section 18.48 | | VERIFY | REQ | | Section 18.31 | NS*| WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 | | WRITE | REQ | | Section 18.32 | @@ -206,12 +197,6 @@ CREATE_SESSION: SEQUENCE: * no support for dynamic slot table renegotiation (optional) -nfsv4.1 COMPOUND rules: -The following cases aren't supported yet: -* Enforcing of NFS4ERR_NOT_ONLY_OP for: BIND_CONN_TO_SESSION, CREATE_SESSION, - DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID. -* DESTROY_SESSION MUST be the final operation in the COMPOUND request. - Nonstandard compound limitations: * No support for a sessions fore channel RPC compound that requires both a ca_maxrequestsize request and a ca_maxresponsesize reply, so we may @@ -219,3 +204,5 @@ Nonstandard compound limitations: negotiation. * No more than one IO operation (read, write, readdir) allowed per compound. + +See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues. diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index 90c71c6f0d0..ffdd9d866ad 100644 --- a/Documentation/filesystems/nfs/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt @@ -226,7 +226,7 @@ They depend on various facilities being available: cdrecord. e.g. - cdrecord dev=ATAPI:1,0,0 arch/i386/boot/image.iso + cdrecord dev=ATAPI:1,0,0 arch/x86/boot/image.iso For more information on isolinux, including how to create bootdisks for prebuilt kernels, see http://syslinux.zytor.com/ diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 6e29954851a..b4a3d765ff9 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -400,10 +400,32 @@ a file off. -- [mandatory] - --- -[mandatory] ->get_sb() is gone. Switch to use of ->mount(). Typically it's just a matter of switching from calling get_sb_... to mount_... and changing the function type. If you were doing it manually, just switch from setting ->mnt_root to some pointer to returning that pointer. On errors return ERR_PTR(...). + +-- +[mandatory] + ->permission() and generic_permission()have lost flags +argument; instead of passing IPERM_FLAG_RCU we add MAY_NOT_BLOCK into mask. + generic_permission() has also lost the check_acl argument; ACL checking +has been taken to VFS and filesystems need to provide a non-NULL ->i_op->get_acl +to read an ACL from disk. + +-- +[mandatory] + If you implement your own ->llseek() you must handle SEEK_HOLE and +SEEK_DATA. You can hanle this by returning -EINVAL, but it would be nicer to +support it in some way. The generic handler assumes that the entire file is +data and there is a virtual hole at the end of the file. So if the provided +offset is less than i_size and SEEK_DATA is specified, return the same offset. +If the above is true for the offset and you are given SEEK_HOLE, return the end +of the file. If the offset is i_size or greater return -ENXIO in either case. + +[mandatory] + If you have your own ->fsync() you must make sure to call +filemap_write_and_wait_range() so that all dirty pages are synced out properly. +You must also keep in mind that ->fsync() is not called with i_mutex held +anymore, so if you require i_mutex locking you must make sure to take it and +release it yourself. diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt index d4d41465a0b..7db3ebda5a4 100644 --- a/Documentation/filesystems/squashfs.txt +++ b/Documentation/filesystems/squashfs.txt @@ -2,7 +2,7 @@ SQUASHFS 4.0 FILESYSTEM ======================= Squashfs is a compressed read-only filesystem for Linux. -It uses zlib/lzo compression to compress files, inodes and directories. +It uses zlib/lzo/xz compression to compress files, inodes and directories. Inodes in the system are very small and all blocks are packed to minimise data overhead. Block sizes greater than 4K are supported up to a maximum of 1Mbytes (default block size 128K). @@ -55,6 +55,8 @@ create populated squashfs filesystems. This and other squashfs utilities can be obtained from http://www.squashfs.org. Usage instructions can be obtained from this site also. +The squashfs-tools development tree is now located on kernel.org + git://git.kernel.org/pub/scm/fs/squashfs/squashfs-tools.git 3. SQUASHFS FILESYSTEM DESIGN ----------------------------- diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt index 8e4fab639d9..a0a61d2f389 100644 --- a/Documentation/filesystems/ubifs.txt +++ b/Documentation/filesystems/ubifs.txt @@ -111,34 +111,6 @@ The following is an example of the kernel boot arguments to attach mtd0 to UBI and mount volume "rootfs": ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs - -Module Parameters for Debugging -=============================== - -When UBIFS has been compiled with debugging enabled, there are 2 module -parameters that are available to control aspects of testing and debugging. - -debug_chks Selects extra checks that UBIFS can do while running: - - Check Flag value - - General checks 1 - Check Tree Node Cache (TNC) 2 - Check indexing tree size 4 - Check orphan area 8 - Check old indexing tree 16 - Check LEB properties (lprops) 32 - Check leaf nodes and inodes 64 - -debug_tsts Selects a mode of testing, as follows: - - Test mode Flag value - - Failure mode for recovery testing 4 - -For example, set debug_chks to 3 to enable general and TNC checks. - - References ========== diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 88b9f5519af..52d8fb81cff 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -229,6 +229,8 @@ struct super_operations { ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); + int (*nr_cached_objects)(struct super_block *); + void (*free_cached_objects)(struct super_block *, int); }; All methods are called without any locks being held, unless otherwise @@ -301,6 +303,26 @@ or bottom half). quota_write: called by the VFS to write to filesystem quota file. + nr_cached_objects: called by the sb cache shrinking function for the + filesystem to return the number of freeable cached objects it contains. + Optional. + + free_cache_objects: called by the sb cache shrinking function for the + filesystem to scan the number of objects indicated to try to free them. + Optional, but any filesystem implementing this method needs to also + implement ->nr_cached_objects for it to be called correctly. + + We can't do anything with any errors that the filesystem might + encountered, hence the void return type. This will never be called if + the VM is trying to reclaim under GFP_NOFS conditions, hence this + method does not need to handle that situation itself. + + Implementations must include conditional reschedule calls inside any + scanning loop that is done. This allows the VFS to determine + appropriate scan batch sizes without having to worry about whether + implementations will cause holdoff problems due to large scan batch + sizes. + Whoever sets up the inode is responsible for filling in the "i_op" field. This is a pointer to a "struct inode_operations" which describes the methods that can be performed on individual inodes. @@ -333,8 +355,8 @@ struct inode_operations { void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int, unsigned int); - int (*check_acl)(struct inode *, int, unsigned int); + int (*permission) (struct inode *, int); + int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -423,7 +445,7 @@ otherwise noted. permission: called by the VFS to check for access rights on a POSIX-like filesystem. - May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk + May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk mode, the filesystem must check the permission without blocking or storing to the inode. @@ -755,7 +777,7 @@ struct file_operations { int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, int datasync); + int (*fsync) (struct file *, loff_t, loff_t, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); |