summaryrefslogtreecommitdiffstats
path: root/Documentation/filesystems
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/Locking15
-rw-r--r--Documentation/filesystems/nfs/pnfs.txt7
-rw-r--r--Documentation/filesystems/ntfs.txt2
-rw-r--r--Documentation/filesystems/porting15
-rw-r--r--Documentation/filesystems/proc.txt7
-rw-r--r--Documentation/filesystems/sysfs.txt16
-rw-r--r--Documentation/filesystems/vfs.txt103
7 files changed, 120 insertions, 45 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 977d8919cc6..2e994efe12c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -19,6 +19,8 @@ prototypes:
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
+ struct vfsmount *(*d_automount)(struct path *path);
+ int (*d_manage)(struct dentry *, bool);
locking rules:
rename_lock ->d_lock may block rcu-walk
@@ -29,6 +31,8 @@ d_delete: no yes no no
d_release: no no yes no
d_iput: no no yes no
d_dname: no no no no
+d_automount: no no yes no
+d_manage: no no yes (ref-walk) maybe
--------------------------- inode_operations ---------------------------
prototypes:
@@ -56,7 +60,6 @@ ata *);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
void (*truncate_range)(struct inode *, loff_t, loff_t);
- long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
locking rules:
@@ -84,7 +87,6 @@ getxattr: no
listxattr: no
removexattr: yes
truncate_range: yes
-fallocate: no
fiemap: no
Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
victim.
@@ -164,13 +166,11 @@ prototypes:
void (*kill_sb) (struct super_block *);
locking rules:
may block
-get_sb yes
mount yes
kill_sb yes
-->get_sb() returns error or 0 with locked superblock attached to the vfsmount
-(exclusive on ->s_umount).
-->mount() returns ERR_PTR or the root dentry.
+->mount() returns ERR_PTR or the root dentry; its superblock should be locked
+on return.
->kill_sb() takes a write-locked superblock, does all shutdown work on it,
unlocks and drops the reference.
@@ -343,7 +343,6 @@ prototypes:
int (*fl_grant)(struct file_lock *, struct file_lock *, int);
void (*fl_release_private)(struct file_lock *);
void (*fl_break)(struct file_lock *); /* break_lease callback */
- int (*fl_mylease)(struct file_lock *, struct file_lock *);
int (*fl_change)(struct file_lock **, int);
locking rules:
@@ -353,7 +352,6 @@ fl_notify: yes no
fl_grant: no no
fl_release_private: maybe no
fl_break: yes no
-fl_mylease: yes no
fl_change yes no
--------------------------- buffer_head -----------------------------------
@@ -435,6 +433,7 @@ prototypes:
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **);
+ long (*fallocate)(struct file *, int, loff_t, loff_t);
};
locking rules:
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt
index bc0b9cfe095..983e14abe7e 100644
--- a/Documentation/filesystems/nfs/pnfs.txt
+++ b/Documentation/filesystems/nfs/pnfs.txt
@@ -46,3 +46,10 @@ data server cache
file driver devices refer to data servers, which are kept in a module
level cache. Its reference is held over the lifetime of the deviceid
pointing to it.
+
+lseg
+----
+lseg maintains an extra reference corresponding to the NFS_LSEG_VALID
+bit which holds it in the pnfs_layout_hdr's list. When the final lseg
+is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED
+bit is set, preventing any new lsegs from being added.
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index 6ef8cf3bc9a..933bc66ccff 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -460,6 +460,8 @@ Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
2.1.30:
- Fix writev() (it kept writing the first segment over and over again
instead of moving onto subsequent segments).
+ - Fix crash in ntfs_mft_record_alloc() when mapping the new extent mft
+ record failed.
2.1.29:
- Fix a deadlock when mounting read-write.
2.1.28:
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 266d2059b9b..0c986c9e851 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -365,8 +365,8 @@ must be done in the RCU callback.
[recommended]
vfs now tries to do path walking in "rcu-walk mode", which avoids
atomic operations and scalability hazards on dentries and inodes (see
-Documentation/filesystems/path-walk.txt). d_hash and d_compare changes (above)
-are examples of the changes required to support this. For more complex
+Documentation/filesystems/path-lookup.txt). d_hash and d_compare changes
+(above) are examples of the changes required to support this. For more complex
filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so
no changes are required to the filesystem. However, this is costly and loses
the benefits of rcu-walk mode. We will begin to add filesystem callbacks that
@@ -383,8 +383,8 @@ Documentation/filesystems/vfs.txt for more details.
permission and check_acl are inode permission checks that are called
on many or all directory inodes on the way down a path walk (to check for
-exec permission). These must now be rcu-walk aware (flags & IPERM_RCU). See
-Documentation/filesystems/vfs.txt for more details.
+exec permission). These must now be rcu-walk aware (flags & IPERM_FLAG_RCU).
+See Documentation/filesystems/vfs.txt for more details.
--
[mandatory]
@@ -394,3 +394,10 @@ file) you must return -EOPNOTSUPP if FALLOC_FL_PUNCH_HOLE is set in mode.
Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set,
so the i_size should not change when hole punching, even when puching the end of
a file off.
+
+--
+[mandatory]
+ ->get_sb() is gone. Switch to use of ->mount(). Typically it's just
+a matter of switching from calling get_sb_... to mount_... and changing the
+function type. If you were doing it manually, just switch from setting ->mnt_root
+to some pointer to returning that pointer. On errors return ERR_PTR(...).
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 9471225212c..23cae6548d3 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -375,6 +375,7 @@ Anonymous: 0 kB
Swap: 0 kB
KernelPageSize: 4 kB
MMUPageSize: 4 kB
+Locked: 374 kB
The first of these lines shows the same information as is displayed for the
mapping in /proc/PID/maps. The remaining lines show the size of the mapping
@@ -670,6 +671,8 @@ varies by architecture and compile options. The following is from a
> cat /proc/meminfo
+The "Locked" indicates whether the mapping is locked in memory or not.
+
MemTotal: 16344972 kB
MemFree: 13634064 kB
@@ -1320,6 +1323,10 @@ scaled linearly with /proc/<pid>/oom_score_adj.
Writing to /proc/<pid>/oom_score_adj or /proc/<pid>/oom_adj will change the
other with its scaled value.
+The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last
+value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
+requires CAP_SYS_RESOURCE.
+
NOTICE: /proc/<pid>/oom_adj is deprecated and will be removed, please see
Documentation/feature-removal-schedule.txt.
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index 5d1335faec2..f806e50aaa6 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -39,10 +39,12 @@ userspace. Top-level directories in sysfs represent the common
ancestors of object hierarchies; i.e. the subsystems the objects
belong to.
-Sysfs internally stores the kobject that owns the directory in the
-->d_fsdata pointer of the directory's dentry. This allows sysfs to do
-reference counting directly on the kobject when the file is opened and
-closed.
+Sysfs internally stores a pointer to the kobject that implements a
+directory in the sysfs_dirent object associated with the directory. In
+the past this kobject pointer has been used by sysfs to do reference
+counting directly on the kobject whenever the file is opened or closed.
+With the current sysfs implementation the kobject reference count is
+only modified directly by the function sysfs_schedule_callback().
Attributes
@@ -208,9 +210,9 @@ Other notes:
is 4096.
- show() methods should return the number of bytes printed into the
- buffer. This is the return value of snprintf().
+ buffer. This is the return value of scnprintf().
-- show() should always use snprintf().
+- show() should always use scnprintf().
- store() should return the number of bytes used from the buffer. If the
entire buffer has been used, just return the count argument.
@@ -229,7 +231,7 @@ A very simple (and naive) implementation of a device attribute is:
static ssize_t show_name(struct device *dev, struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%s\n", dev->name);
+ return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name);
}
static ssize_t store_name(struct device *dev, struct device_attribute *attr,
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index fbb324e2bd4..ef0714aa8e4 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -95,10 +95,11 @@ functions:
extern int unregister_filesystem(struct file_system_type *);
The passed struct file_system_type describes your filesystem. When a
-request is made to mount a device onto a directory in your filespace,
-the VFS will call the appropriate get_sb() method for the specific
-filesystem. The dentry for the mount point will then be updated to
-point to the root inode for the new filesystem.
+request is made to mount a filesystem onto a directory in your namespace,
+the VFS will call the appropriate mount() method for the specific
+filesystem. New vfsmount refering to the tree returned by ->mount()
+will be attached to the mountpoint, so that when pathname resolution
+reaches the mountpoint it will jump into the root of that vfsmount.
You can see all filesystems that are registered to the kernel in the
file /proc/filesystems.
@@ -107,14 +108,14 @@ file /proc/filesystems.
struct file_system_type
-----------------------
-This describes the filesystem. As of kernel 2.6.22, the following
+This describes the filesystem. As of kernel 2.6.39, the following
members are defined:
struct file_system_type {
const char *name;
int fs_flags;
- int (*get_sb) (struct file_system_type *, int,
- const char *, void *, struct vfsmount *);
+ struct dentry (*mount) (struct file_system_type *, int,
+ const char *, void *);
void (*kill_sb) (struct super_block *);
struct module *owner;
struct file_system_type * next;
@@ -128,11 +129,11 @@ struct file_system_type {
fs_flags: various flags (i.e. FS_REQUIRES_DEV, FS_NO_DCACHE, etc.)
- get_sb: the method to call when a new instance of this
+ mount: the method to call when a new instance of this
filesystem should be mounted
kill_sb: the method to call when an instance of this filesystem
- should be unmounted
+ should be shut down
owner: for internal VFS use: you should initialize this to THIS_MODULE in
most cases.
@@ -141,7 +142,7 @@ struct file_system_type {
s_lock_key, s_umount_key: lockdep-specific
-The get_sb() method has the following arguments:
+The mount() method has the following arguments:
struct file_system_type *fs_type: describes the filesystem, partly initialized
by the specific filesystem code
@@ -153,32 +154,39 @@ The get_sb() method has the following arguments:
void *data: arbitrary mount options, usually comes as an ASCII
string (see "Mount Options" section)
- struct vfsmount *mnt: a vfs-internal representation of a mount point
+The mount() method must return the root dentry of the tree requested by
+caller. An active reference to its superblock must be grabbed and the
+superblock must be locked. On failure it should return ERR_PTR(error).
-The get_sb() method must determine if the block device specified
-in the dev_name and fs_type contains a filesystem of the type the method
-supports. If it succeeds in opening the named block device, it initializes a
-struct super_block descriptor for the filesystem contained by the block device.
-On failure it returns an error.
+The arguments match those of mount(2) and their interpretation
+depends on filesystem type. E.g. for block filesystems, dev_name is
+interpreted as block device name, that device is opened and if it
+contains a suitable filesystem image the method creates and initializes
+struct super_block accordingly, returning its root dentry to caller.
+
+->mount() may choose to return a subtree of existing filesystem - it
+doesn't have to create a new one. The main result from the caller's
+point of view is a reference to dentry at the root of (sub)tree to
+be attached; creation of new superblock is a common side effect.
The most interesting member of the superblock structure that the
-get_sb() method fills in is the "s_op" field. This is a pointer to
+mount() method fills in is the "s_op" field. This is a pointer to
a "struct super_operations" which describes the next level of the
filesystem implementation.
-Usually, a filesystem uses one of the generic get_sb() implementations
-and provides a fill_super() method instead. The generic methods are:
+Usually, a filesystem uses one of the generic mount() implementations
+and provides a fill_super() callback instead. The generic variants are:
- get_sb_bdev: mount a filesystem residing on a block device
+ mount_bdev: mount a filesystem residing on a block device
- get_sb_nodev: mount a filesystem that is not backed by a device
+ mount_nodev: mount a filesystem that is not backed by a device
- get_sb_single: mount a filesystem which shares the instance between
+ mount_single: mount a filesystem which shares the instance between
all mounts
-A fill_super() method implementation has the following arguments:
+A fill_super() callback implementation has the following arguments:
- struct super_block *sb: the superblock structure. The method fill_super()
+ struct super_block *sb: the superblock structure. The callback
must initialize this properly.
void *data: arbitrary mount options, usually comes as an ASCII
@@ -415,8 +423,8 @@ otherwise noted.
permission: called by the VFS to check for access rights on a POSIX-like
filesystem.
- May be called in rcu-walk mode (flags & IPERM_RCU). If in rcu-walk
- mode, the filesystem must check the permission without blocking or
+ May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk
+ mode, the filesystem must check the permission without blocking or
storing to the inode.
If a situation is encountered that rcu-walk cannot handle, return
@@ -864,6 +872,8 @@ struct dentry_operations {
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)(struct dentry *, char *, int);
+ struct vfsmount *(*d_automount)(struct path *);
+ int (*d_manage)(struct dentry *, bool, bool);
};
d_revalidate: called when the VFS needs to revalidate a dentry. This
@@ -930,6 +940,47 @@ struct dentry_operations {
at the end of the buffer, and returns a pointer to the first char.
dynamic_dname() helper function is provided to take care of this.
+ d_automount: called when an automount dentry is to be traversed (optional).
+ This should create a new VFS mount record and return the record to the
+ caller. The caller is supplied with a path parameter giving the
+ automount directory to describe the automount target and the parent
+ VFS mount record to provide inheritable mount parameters. NULL should
+ be returned if someone else managed to make the automount first. If
+ the vfsmount creation failed, then an error code should be returned.
+ If -EISDIR is returned, then the directory will be treated as an
+ ordinary directory and returned to pathwalk to continue walking.
+
+ If a vfsmount is returned, the caller will attempt to mount it on the
+ mountpoint and will remove the vfsmount from its expiration list in
+ the case of failure. The vfsmount should be returned with 2 refs on
+ it to prevent automatic expiration - the caller will clean up the
+ additional ref.
+
+ This function is only used if DCACHE_NEED_AUTOMOUNT is set on the
+ dentry. This is set by __d_instantiate() if S_AUTOMOUNT is set on the
+ inode being added.
+
+ d_manage: called to allow the filesystem to manage the transition from a
+ dentry (optional). This allows autofs, for example, to hold up clients
+ waiting to explore behind a 'mountpoint' whilst letting the daemon go
+ past and construct the subtree there. 0 should be returned to let the
+ calling process continue. -EISDIR can be returned to tell pathwalk to
+ use this directory as an ordinary directory and to ignore anything
+ mounted on it and not to check the automount flag. Any other error
+ code will abort pathwalk completely.
+
+ If the 'mounting_here' parameter is true, then namespace_sem is being
+ held by the caller and the function should not initiate any mounts or
+ unmounts that it will then wait for.
+
+ If the 'rcu_walk' parameter is true, then the caller is doing a
+ pathwalk in RCU-walk mode. Sleeping is not permitted in this mode,
+ and the caller can be asked to leave it and call again by returing
+ -ECHILD.
+
+ This function is only used if DCACHE_MANAGE_TRANSIT is set on the
+ dentry being transited from.
+
Example :
static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen)