diff options
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/Locking | 15 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/pnfs.txt | 7 | ||||
-rw-r--r-- | Documentation/filesystems/ntfs.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 15 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 7 | ||||
-rw-r--r-- | Documentation/filesystems/sysfs.txt | 16 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 103 |
7 files changed, 120 insertions, 45 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 977d8919cc6..2e994efe12c 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -19,6 +19,8 @@ prototypes: void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); + struct vfsmount *(*d_automount)(struct path *path); + int (*d_manage)(struct dentry *, bool); locking rules: rename_lock ->d_lock may block rcu-walk @@ -29,6 +31,8 @@ d_delete: no yes no no d_release: no no yes no d_iput: no no yes no d_dname: no no no no +d_automount: no no yes no +d_manage: no no yes (ref-walk) maybe --------------------------- inode_operations --------------------------- prototypes: @@ -56,7 +60,6 @@ ata *); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); - long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); locking rules: @@ -84,7 +87,6 @@ getxattr: no listxattr: no removexattr: yes truncate_range: yes -fallocate: no fiemap: no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. @@ -164,13 +166,11 @@ prototypes: void (*kill_sb) (struct super_block *); locking rules: may block -get_sb yes mount yes kill_sb yes -->get_sb() returns error or 0 with locked superblock attached to the vfsmount -(exclusive on ->s_umount). -->mount() returns ERR_PTR or the root dentry. +->mount() returns ERR_PTR or the root dentry; its superblock should be locked +on return. ->kill_sb() takes a write-locked superblock, does all shutdown work on it, unlocks and drops the reference. @@ -343,7 +343,6 @@ prototypes: int (*fl_grant)(struct file_lock *, struct file_lock *, int); void (*fl_release_private)(struct file_lock *); void (*fl_break)(struct file_lock *); /* break_lease callback */ - int (*fl_mylease)(struct file_lock *, struct file_lock *); int (*fl_change)(struct file_lock **, int); locking rules: @@ -353,7 +352,6 @@ fl_notify: yes no fl_grant: no no fl_release_private: maybe no fl_break: yes no -fl_mylease: yes no fl_change yes no --------------------------- buffer_head ----------------------------------- @@ -435,6 +433,7 @@ prototypes: ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int (*setlease)(struct file *, long, struct file_lock **); + long (*fallocate)(struct file *, int, loff_t, loff_t); }; locking rules: diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt index bc0b9cfe095..983e14abe7e 100644 --- a/Documentation/filesystems/nfs/pnfs.txt +++ b/Documentation/filesystems/nfs/pnfs.txt @@ -46,3 +46,10 @@ data server cache file driver devices refer to data servers, which are kept in a module level cache. Its reference is held over the lifetime of the deviceid pointing to it. + +lseg +---- +lseg maintains an extra reference corresponding to the NFS_LSEG_VALID +bit which holds it in the pnfs_layout_hdr's list. When the final lseg +is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED +bit is set, preventing any new lsegs from being added. diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt index 6ef8cf3bc9a..933bc66ccff 100644 --- a/Documentation/filesystems/ntfs.txt +++ b/Documentation/filesystems/ntfs.txt @@ -460,6 +460,8 @@ Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog. 2.1.30: - Fix writev() (it kept writing the first segment over and over again instead of moving onto subsequent segments). + - Fix crash in ntfs_mft_record_alloc() when mapping the new extent mft + record failed. 2.1.29: - Fix a deadlock when mounting read-write. 2.1.28: diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 266d2059b9b..0c986c9e851 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -365,8 +365,8 @@ must be done in the RCU callback. [recommended] vfs now tries to do path walking in "rcu-walk mode", which avoids atomic operations and scalability hazards on dentries and inodes (see -Documentation/filesystems/path-walk.txt). d_hash and d_compare changes (above) -are examples of the changes required to support this. For more complex +Documentation/filesystems/path-lookup.txt). d_hash and d_compare changes +(above) are examples of the changes required to support this. For more complex filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so no changes are required to the filesystem. However, this is costly and loses the benefits of rcu-walk mode. We will begin to add filesystem callbacks that @@ -383,8 +383,8 @@ Documentation/filesystems/vfs.txt for more details. permission and check_acl are inode permission checks that are called on many or all directory inodes on the way down a path walk (to check for -exec permission). These must now be rcu-walk aware (flags & IPERM_RCU). See -Documentation/filesystems/vfs.txt for more details. +exec permission). These must now be rcu-walk aware (flags & IPERM_FLAG_RCU). +See Documentation/filesystems/vfs.txt for more details. -- [mandatory] @@ -394,3 +394,10 @@ file) you must return -EOPNOTSUPP if FALLOC_FL_PUNCH_HOLE is set in mode. Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set, so the i_size should not change when hole punching, even when puching the end of a file off. + +-- +[mandatory] + ->get_sb() is gone. Switch to use of ->mount(). Typically it's just +a matter of switching from calling get_sb_... to mount_... and changing the +function type. If you were doing it manually, just switch from setting ->mnt_root +to some pointer to returning that pointer. On errors return ERR_PTR(...). diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 9471225212c..23cae6548d3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -375,6 +375,7 @@ Anonymous: 0 kB Swap: 0 kB KernelPageSize: 4 kB MMUPageSize: 4 kB +Locked: 374 kB The first of these lines shows the same information as is displayed for the mapping in /proc/PID/maps. The remaining lines show the size of the mapping @@ -670,6 +671,8 @@ varies by architecture and compile options. The following is from a > cat /proc/meminfo +The "Locked" indicates whether the mapping is locked in memory or not. + MemTotal: 16344972 kB MemFree: 13634064 kB @@ -1320,6 +1323,10 @@ scaled linearly with /proc/<pid>/oom_score_adj. Writing to /proc/<pid>/oom_score_adj or /proc/<pid>/oom_adj will change the other with its scaled value. +The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last +value set by a CAP_SYS_RESOURCE process. To reduce the value any lower +requires CAP_SYS_RESOURCE. + NOTICE: /proc/<pid>/oom_adj is deprecated and will be removed, please see Documentation/feature-removal-schedule.txt. diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index 5d1335faec2..f806e50aaa6 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt @@ -39,10 +39,12 @@ userspace. Top-level directories in sysfs represent the common ancestors of object hierarchies; i.e. the subsystems the objects belong to. -Sysfs internally stores the kobject that owns the directory in the -->d_fsdata pointer of the directory's dentry. This allows sysfs to do -reference counting directly on the kobject when the file is opened and -closed. +Sysfs internally stores a pointer to the kobject that implements a +directory in the sysfs_dirent object associated with the directory. In +the past this kobject pointer has been used by sysfs to do reference +counting directly on the kobject whenever the file is opened or closed. +With the current sysfs implementation the kobject reference count is +only modified directly by the function sysfs_schedule_callback(). Attributes @@ -208,9 +210,9 @@ Other notes: is 4096. - show() methods should return the number of bytes printed into the - buffer. This is the return value of snprintf(). + buffer. This is the return value of scnprintf(). -- show() should always use snprintf(). +- show() should always use scnprintf(). - store() should return the number of bytes used from the buffer. If the entire buffer has been used, just return the count argument. @@ -229,7 +231,7 @@ A very simple (and naive) implementation of a device attribute is: static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%s\n", dev->name); + return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name); } static ssize_t store_name(struct device *dev, struct device_attribute *attr, diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index fbb324e2bd4..ef0714aa8e4 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -95,10 +95,11 @@ functions: extern int unregister_filesystem(struct file_system_type *); The passed struct file_system_type describes your filesystem. When a -request is made to mount a device onto a directory in your filespace, -the VFS will call the appropriate get_sb() method for the specific -filesystem. The dentry for the mount point will then be updated to -point to the root inode for the new filesystem. +request is made to mount a filesystem onto a directory in your namespace, +the VFS will call the appropriate mount() method for the specific +filesystem. New vfsmount refering to the tree returned by ->mount() +will be attached to the mountpoint, so that when pathname resolution +reaches the mountpoint it will jump into the root of that vfsmount. You can see all filesystems that are registered to the kernel in the file /proc/filesystems. @@ -107,14 +108,14 @@ file /proc/filesystems. struct file_system_type ----------------------- -This describes the filesystem. As of kernel 2.6.22, the following +This describes the filesystem. As of kernel 2.6.39, the following members are defined: struct file_system_type { const char *name; int fs_flags; - int (*get_sb) (struct file_system_type *, int, - const char *, void *, struct vfsmount *); + struct dentry (*mount) (struct file_system_type *, int, + const char *, void *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; @@ -128,11 +129,11 @@ struct file_system_type { fs_flags: various flags (i.e. FS_REQUIRES_DEV, FS_NO_DCACHE, etc.) - get_sb: the method to call when a new instance of this + mount: the method to call when a new instance of this filesystem should be mounted kill_sb: the method to call when an instance of this filesystem - should be unmounted + should be shut down owner: for internal VFS use: you should initialize this to THIS_MODULE in most cases. @@ -141,7 +142,7 @@ struct file_system_type { s_lock_key, s_umount_key: lockdep-specific -The get_sb() method has the following arguments: +The mount() method has the following arguments: struct file_system_type *fs_type: describes the filesystem, partly initialized by the specific filesystem code @@ -153,32 +154,39 @@ The get_sb() method has the following arguments: void *data: arbitrary mount options, usually comes as an ASCII string (see "Mount Options" section) - struct vfsmount *mnt: a vfs-internal representation of a mount point +The mount() method must return the root dentry of the tree requested by +caller. An active reference to its superblock must be grabbed and the +superblock must be locked. On failure it should return ERR_PTR(error). -The get_sb() method must determine if the block device specified -in the dev_name and fs_type contains a filesystem of the type the method -supports. If it succeeds in opening the named block device, it initializes a -struct super_block descriptor for the filesystem contained by the block device. -On failure it returns an error. +The arguments match those of mount(2) and their interpretation +depends on filesystem type. E.g. for block filesystems, dev_name is +interpreted as block device name, that device is opened and if it +contains a suitable filesystem image the method creates and initializes +struct super_block accordingly, returning its root dentry to caller. + +->mount() may choose to return a subtree of existing filesystem - it +doesn't have to create a new one. The main result from the caller's +point of view is a reference to dentry at the root of (sub)tree to +be attached; creation of new superblock is a common side effect. The most interesting member of the superblock structure that the -get_sb() method fills in is the "s_op" field. This is a pointer to +mount() method fills in is the "s_op" field. This is a pointer to a "struct super_operations" which describes the next level of the filesystem implementation. -Usually, a filesystem uses one of the generic get_sb() implementations -and provides a fill_super() method instead. The generic methods are: +Usually, a filesystem uses one of the generic mount() implementations +and provides a fill_super() callback instead. The generic variants are: - get_sb_bdev: mount a filesystem residing on a block device + mount_bdev: mount a filesystem residing on a block device - get_sb_nodev: mount a filesystem that is not backed by a device + mount_nodev: mount a filesystem that is not backed by a device - get_sb_single: mount a filesystem which shares the instance between + mount_single: mount a filesystem which shares the instance between all mounts -A fill_super() method implementation has the following arguments: +A fill_super() callback implementation has the following arguments: - struct super_block *sb: the superblock structure. The method fill_super() + struct super_block *sb: the superblock structure. The callback must initialize this properly. void *data: arbitrary mount options, usually comes as an ASCII @@ -415,8 +423,8 @@ otherwise noted. permission: called by the VFS to check for access rights on a POSIX-like filesystem. - May be called in rcu-walk mode (flags & IPERM_RCU). If in rcu-walk - mode, the filesystem must check the permission without blocking or + May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk + mode, the filesystem must check the permission without blocking or storing to the inode. If a situation is encountered that rcu-walk cannot handle, return @@ -864,6 +872,8 @@ struct dentry_operations { void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); + struct vfsmount *(*d_automount)(struct path *); + int (*d_manage)(struct dentry *, bool, bool); }; d_revalidate: called when the VFS needs to revalidate a dentry. This @@ -930,6 +940,47 @@ struct dentry_operations { at the end of the buffer, and returns a pointer to the first char. dynamic_dname() helper function is provided to take care of this. + d_automount: called when an automount dentry is to be traversed (optional). + This should create a new VFS mount record and return the record to the + caller. The caller is supplied with a path parameter giving the + automount directory to describe the automount target and the parent + VFS mount record to provide inheritable mount parameters. NULL should + be returned if someone else managed to make the automount first. If + the vfsmount creation failed, then an error code should be returned. + If -EISDIR is returned, then the directory will be treated as an + ordinary directory and returned to pathwalk to continue walking. + + If a vfsmount is returned, the caller will attempt to mount it on the + mountpoint and will remove the vfsmount from its expiration list in + the case of failure. The vfsmount should be returned with 2 refs on + it to prevent automatic expiration - the caller will clean up the + additional ref. + + This function is only used if DCACHE_NEED_AUTOMOUNT is set on the + dentry. This is set by __d_instantiate() if S_AUTOMOUNT is set on the + inode being added. + + d_manage: called to allow the filesystem to manage the transition from a + dentry (optional). This allows autofs, for example, to hold up clients + waiting to explore behind a 'mountpoint' whilst letting the daemon go + past and construct the subtree there. 0 should be returned to let the + calling process continue. -EISDIR can be returned to tell pathwalk to + use this directory as an ordinary directory and to ignore anything + mounted on it and not to check the automount flag. Any other error + code will abort pathwalk completely. + + If the 'mounting_here' parameter is true, then namespace_sem is being + held by the caller and the function should not initiate any mounts or + unmounts that it will then wait for. + + If the 'rcu_walk' parameter is true, then the caller is doing a + pathwalk in RCU-walk mode. Sleeping is not permitted in this mode, + and the caller can be asked to leave it and call again by returing + -ECHILD. + + This function is only used if DCACHE_MANAGE_TRANSIT is set on the + dentry being transited from. + Example : static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) |