7 files changed, 120 insertions, 45 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 977d8919cc6..2e994efe12c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -19,6 +19,8 @@ prototypes:
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
+	struct vfsmount *(*d_automount)(struct path *path);
+	int (*d_manage)(struct dentry *, bool);
 
 locking rules:
 		rename_lock	->d_lock	may block	rcu-walk
@@ -29,6 +31,8 @@ d_delete:	no		yes		no		no
 d_release:	no		no		yes		no
 d_iput:		no		no		yes		no
 d_dname:	no		no		no		no
+d_automount:	no		no		yes		no
+d_manage:	no		no		yes (ref-walk)	maybe
 
 --------------------------- inode_operations --------------------------- 
 prototypes:
@@ -56,7 +60,6 @@ ata *);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
 	void (*truncate_range)(struct inode *, loff_t, loff_t);
-	long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 
 locking rules:
@@ -84,7 +87,6 @@ getxattr:	no
 listxattr:	no
 removexattr:	yes
 truncate_range:	yes
-fallocate:	no
 fiemap:		no
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
@@ -164,13 +166,11 @@ prototypes:
 	void (*kill_sb) (struct super_block *);
 locking rules:
 		may block
-get_sb		yes
 mount		yes
 kill_sb		yes
 
-->get_sb() returns error or 0 with locked superblock attached to the vfsmount
-(exclusive on ->s_umount).
-->mount() returns ERR_PTR or the root dentry.
+->mount() returns ERR_PTR or the root dentry; its superblock should be locked
+on return.
 ->kill_sb() takes a write-locked superblock, does all shutdown work on it,
 unlocks and drops the reference.
 
@@ -343,7 +343,6 @@ prototypes:
 	int (*fl_grant)(struct file_lock *, struct file_lock *, int);
 	void (*fl_release_private)(struct file_lock *);
 	void (*fl_break)(struct file_lock *); /* break_lease callback */
-	int (*fl_mylease)(struct file_lock *, struct file_lock *);
 	int (*fl_change)(struct file_lock **, int);
 
 locking rules:
@@ -353,7 +352,6 @@ fl_notify:		yes		no
 fl_grant:		no		no
 fl_release_private:	maybe		no
 fl_break:		yes		no
-fl_mylease:		yes		no
 fl_change		yes		no
 
 --------------------------- buffer_head -----------------------------------
@@ -435,6 +433,7 @@ prototypes:
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
 			size_t, unsigned int);
 	int (*setlease)(struct file *, long, struct file_lock **);
+	long (*fallocate)(struct file *, int, loff_t, loff_t);
 };
 
 locking rules:
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt
index bc0b9cfe095..983e14abe7e 100644
--- a/Documentation/filesystems/nfs/pnfs.txt
+++ b/Documentation/filesystems/nfs/pnfs.txt
@@ -46,3 +46,10 @@ data server cache
 file driver devices refer to data servers, which are kept in a module
 level cache.  Its reference is held over the lifetime of the deviceid
 pointing to it.
+
+lseg
+----
+lseg maintains an extra reference corresponding to the NFS_LSEG_VALID
+bit which holds it in the pnfs_layout_hdr's list.  When the final lseg
+is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED
+bit is set, preventing any new lsegs from being added.
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index 6ef8cf3bc9a..933bc66ccff 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -460,6 +460,8 @@ Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
 2.1.30:
 	- Fix writev() (it kept writing the first segment over and over again
 	  instead of moving onto subsequent segments).
+	- Fix crash in ntfs_mft_record_alloc() when mapping the new extent mft
+	  record failed.
 2.1.29:
 	- Fix a deadlock when mounting read-write.
 2.1.28:
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 266d2059b9b..0c986c9e851 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -365,8 +365,8 @@ must be done in the RCU callback.
 [recommended]
 	vfs now tries to do path walking in "rcu-walk mode", which avoids
 atomic operations and scalability hazards on dentries and inodes (see
-Documentation/filesystems/path-walk.txt). d_hash and d_compare changes (above)
-are examples of the changes required to support this. For more complex
+Documentation/filesystems/path-lookup.txt). d_hash and d_compare changes
+(above) are examples of the changes required to support this. For more complex
 filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so
 no changes are required to the filesystem. However, this is costly and loses
 the benefits of rcu-walk mode. We will begin to add filesystem callbacks that
@@ -383,8 +383,8 @@ Documentation/filesystems/vfs.txt for more details.
 
 	permission and check_acl are inode permission checks that are called
 on many or all directory inodes on the way down a path walk (to check for
-exec permission). These must now be rcu-walk aware (flags & IPERM_RCU). See
-Documentation/filesystems/vfs.txt for more details.
+exec permission). These must now be rcu-walk aware (flags & IPERM_FLAG_RCU).
+See Documentation/filesystems/vfs.txt for more details.
  
 --
 [mandatory]
@@ -394,3 +394,10 @@ file) you must return -EOPNOTSUPP if FALLOC_FL_PUNCH_HOLE is set in mode.
 Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set,
 so the i_size should not change when hole punching, even when puching the end of
 a file off.
+
+--
+[mandatory]
+	->get_sb() is gone.  Switch to use of ->mount().  Typically it's just
+a matter of switching from calling get_sb_... to mount_... and changing the
+function type.  If you were doing it manually, just switch from setting ->mnt_root
+to some pointer to returning that pointer.  On errors return ERR_PTR(...).
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 9471225212c..23cae6548d3 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -375,6 +375,7 @@ Anonymous:             0 kB
 Swap:                  0 kB
 KernelPageSize:        4 kB
 MMUPageSize:           4 kB
+Locked:              374 kB
 
 The first of these lines shows the same information as is displayed for the
 mapping in /proc/PID/maps.  The remaining lines show the size of the mapping
@@ -670,6 +671,8 @@ varies by architecture and compile options.  The following is from a
 
 > cat /proc/meminfo
 
+The "Locked" indicates whether the mapping is locked in memory or not.
+
 
 MemTotal:     16344972 kB
 MemFree:      13634064 kB
@@ -1320,6 +1323,10 @@ scaled linearly with /proc/<pid>/oom_score_adj.
 Writing to /proc/<pid>/oom_score_adj or /proc/<pid>/oom_adj will change the
 other with its scaled value.
 
+The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last
+value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
+requires CAP_SYS_RESOURCE.
+
 NOTICE: /proc/<pid>/oom_adj is deprecated and will be removed, please see
 Documentation/feature-removal-schedule.txt.
 
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index 5d1335faec2..f806e50aaa6 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -39,10 +39,12 @@ userspace. Top-level directories in sysfs represent the common
 ancestors of object hierarchies; i.e. the subsystems the objects
 belong to. 
 
-Sysfs internally stores the kobject that owns the directory in the
-->d_fsdata pointer of the directory's dentry. This allows sysfs to do
-reference counting directly on the kobject when the file is opened and
-closed. 
+Sysfs internally stores a pointer to the kobject that implements a
+directory in the sysfs_dirent object associated with the directory. In
+the past this kobject pointer has been used by sysfs to do reference
+counting directly on the kobject whenever the file is opened or closed.
+With the current sysfs implementation the kobject reference count is
+only modified directly by the function sysfs_schedule_callback().
 
 
 Attributes
@@ -208,9 +210,9 @@ Other notes:
   is 4096. 
 
 - show() methods should return the number of bytes printed into the
-  buffer. This is the return value of snprintf().
+  buffer. This is the return value of scnprintf().
 
-- show() should always use snprintf(). 
+- show() should always use scnprintf().
 
 - store() should return the number of bytes used from the buffer. If the
   entire buffer has been used, just return the count argument.
@@ -229,7 +231,7 @@ A very simple (and naive) implementation of a device attribute is:
 static ssize_t show_name(struct device *dev, struct device_attribute *attr,
                          char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%s\n", dev->name);
+	return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name);
 }
 
 static ssize_t store_name(struct device *dev, struct device_attribute *attr,
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index fbb324e2bd4..ef0714aa8e4 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -95,10 +95,11 @@ functions:
    extern int unregister_filesystem(struct file_system_type *);
 
 The passed struct file_system_type describes your filesystem. When a
-request is made to mount a device onto a directory in your filespace,
-the VFS will call the appropriate get_sb() method for the specific
-filesystem. The dentry for the mount point will then be updated to
-point to the root inode for the new filesystem.
+request is made to mount a filesystem onto a directory in your namespace,
+the VFS will call the appropriate mount() method for the specific
+filesystem.  New vfsmount refering to the tree returned by ->mount()
+will be attached to the mountpoint, so that when pathname resolution
+reaches the mountpoint it will jump into the root of that vfsmount.
 
 You can see all filesystems that are registered to the kernel in the
 file /proc/filesystems.
@@ -107,14 +108,14 @@ file /proc/filesystems.
 struct file_system_type
 -----------------------
 
-This describes the filesystem. As of kernel 2.6.22, the following
+This describes the filesystem. As of kernel 2.6.39, the following
 members are defined:
 
 struct file_system_type {
 	const char *name;
 	int fs_flags;
-        int (*get_sb) (struct file_system_type *, int,
-                       const char *, void *, struct vfsmount *);
+        struct dentry (*mount) (struct file_system_type *, int,
+                       const char *, void *);
         void (*kill_sb) (struct super_block *);
         struct module *owner;
         struct file_system_type * next;
@@ -128,11 +129,11 @@ struct file_system_type {
 
   fs_flags: various flags (i.e. FS_REQUIRES_DEV, FS_NO_DCACHE, etc.)
 
-  get_sb: the method to call when a new instance of this
+  mount: the method to call when a new instance of this
 	filesystem should be mounted
 
   kill_sb: the method to call when an instance of this filesystem
-	should be unmounted
+	should be shut down
 
   owner: for internal VFS use: you should initialize this to THIS_MODULE in
   	most cases.
@@ -141,7 +142,7 @@ struct file_system_type {
 
   s_lock_key, s_umount_key: lockdep-specific
 
-The get_sb() method has the following arguments:
+The mount() method has the following arguments:
 
   struct file_system_type *fs_type: describes the filesystem, partly initialized
   	by the specific filesystem code
@@ -153,32 +154,39 @@ The get_sb() method has the following arguments:
   void *data: arbitrary mount options, usually comes as an ASCII
 	string (see "Mount Options" section)
 
-  struct vfsmount *mnt: a vfs-internal representation of a mount point
+The mount() method must return the root dentry of the tree requested by
+caller.  An active reference to its superblock must be grabbed and the
+superblock must be locked.  On failure it should return ERR_PTR(error).
 
-The get_sb() method must determine if the block device specified
-in the dev_name and fs_type contains a filesystem of the type the method
-supports. If it succeeds in opening the named block device, it initializes a
-struct super_block descriptor for the filesystem contained by the block device.
-On failure it returns an error.
+The arguments match those of mount(2) and their interpretation
+depends on filesystem type.  E.g. for block filesystems, dev_name is
+interpreted as block device name, that device is opened and if it
+contains a suitable filesystem image the method creates and initializes
+struct super_block accordingly, returning its root dentry to caller.
+
+->mount() may choose to return a subtree of existing filesystem - it
+doesn't have to create a new one.  The main result from the caller's
+point of view is a reference to dentry at the root of (sub)tree to
+be attached; creation of new superblock is a common side effect.
 
 The most interesting member of the superblock structure that the
-get_sb() method fills in is the "s_op" field. This is a pointer to
+mount() method fills in is the "s_op" field. This is a pointer to
 a "struct super_operations" which describes the next level of the
 filesystem implementation.
 
-Usually, a filesystem uses one of the generic get_sb() implementations
-and provides a fill_super() method instead. The generic methods are:
+Usually, a filesystem uses one of the generic mount() implementations
+and provides a fill_super() callback instead. The generic variants are:
 
-  get_sb_bdev: mount a filesystem residing on a block device
+  mount_bdev: mount a filesystem residing on a block device
 
-  get_sb_nodev: mount a filesystem that is not backed by a device
+  mount_nodev: mount a filesystem that is not backed by a device
 
-  get_sb_single: mount a filesystem which shares the instance between
+  mount_single: mount a filesystem which shares the instance between
   	all mounts
 
-A fill_super() method implementation has the following arguments:
+A fill_super() callback implementation has the following arguments:
 
-  struct super_block *sb: the superblock structure. The method fill_super()
+  struct super_block *sb: the superblock structure. The callback
   	must initialize this properly.
 
   void *data: arbitrary mount options, usually comes as an ASCII
@@ -415,8 +423,8 @@ otherwise noted.
   permission: called by the VFS to check for access rights on a POSIX-like
   	filesystem.
 
-	May be called in rcu-walk mode (flags & IPERM_RCU). If in rcu-walk
-	mode, the filesystem must check the permission without blocking or
+	May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk
+        mode, the filesystem must check the permission without blocking or
 	storing to the inode.
 
 	If a situation is encountered that rcu-walk cannot handle, return
@@ -864,6 +872,8 @@ struct dentry_operations {
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
+	struct vfsmount *(*d_automount)(struct path *);
+	int (*d_manage)(struct dentry *, bool, bool);
 };
 
   d_revalidate: called when the VFS needs to revalidate a dentry. This
@@ -930,6 +940,47 @@ struct dentry_operations {
 	at the end of the buffer, and returns a pointer to the first char.
 	dynamic_dname() helper function is provided to take care of this.
 
+  d_automount: called when an automount dentry is to be traversed (optional).
+	This should create a new VFS mount record and return the record to the
+	caller.  The caller is supplied with a path parameter giving the
+	automount directory to describe the automount target and the parent
+	VFS mount record to provide inheritable mount parameters.  NULL should
+	be returned if someone else managed to make the automount first.  If
+	the vfsmount creation failed, then an error code should be returned.
+	If -EISDIR is returned, then the directory will be treated as an
+	ordinary directory and returned to pathwalk to continue walking.
+
+	If a vfsmount is returned, the caller will attempt to mount it on the
+	mountpoint and will remove the vfsmount from its expiration list in
+	the case of failure.  The vfsmount should be returned with 2 refs on
+	it to prevent automatic expiration - the caller will clean up the
+	additional ref.
+
+	This function is only used if DCACHE_NEED_AUTOMOUNT is set on the
+	dentry.  This is set by __d_instantiate() if S_AUTOMOUNT is set on the
+	inode being added.
+
+  d_manage: called to allow the filesystem to manage the transition from a
+	dentry (optional).  This allows autofs, for example, to hold up clients
+	waiting to explore behind a 'mountpoint' whilst letting the daemon go
+	past and construct the subtree there.  0 should be returned to let the
+	calling process continue.  -EISDIR can be returned to tell pathwalk to
+	use this directory as an ordinary directory and to ignore anything
+	mounted on it and not to check the automount flag.  Any other error
+	code will abort pathwalk completely.
+
+	If the 'mounting_here' parameter is true, then namespace_sem is being
+	held by the caller and the function should not initiate any mounts or
+	unmounts that it will then wait for.
+
+	If the 'rcu_walk' parameter is true, then the caller is doing a
+	pathwalk in RCU-walk mode.  Sleeping is not permitted in this mode,
+	and the caller can be asked to leave it and call again by returing
+	-ECHILD.
+
+	This function is only used if DCACHE_MANAGE_TRANSIT is set on the
+	dentry being transited from.
+
 Example :
 
 static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen)