4 files changed, 531 insertions, 277 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index f1997e9da61..94d93b1f8b5 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -464,15 +464,12 @@ prototypes:
 			size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
 			size_t, unsigned int);
-	int (*setlease)(struct file *, long, struct file_lock **);
+	int (*setlease)(struct file *, long, struct file_lock **, void **);
 	long (*fallocate)(struct file *, int, loff_t, loff_t);
 };
 
 locking rules:
-	All may block except for ->setlease.
-	No VFS locks held on entry except for ->setlease.
-
-->setlease has the file_list_lock held and must not sleep.
+	All may block.
 
 ->llseek() locking has moved from llseek to the individual llseek
 implementations.  If your fs is not using generic_file_llseek, you
@@ -496,6 +493,10 @@ components. And there are other reasons why the current interface is a mess...
 ->read on directories probably must go away - we should just enforce -EISDIR
 in sys_read() and friends.
 
+->setlease operations should call generic_setlease() before or after setting
+the lease within the individual filesystem to record the result of the
+operation
+
 --------------------------- dquot_operations -------------------------------
 prototypes:
 	int (*write_dquot) (struct dquot *);
diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs4.txt
new file mode 100644
index 00000000000..39d02e19fb6
--- /dev/null
+++ b/Documentation/filesystems/autofs4.txt
@@ -0,0 +1,520 @@
+<head>
+<style> p { max-width:50em} ol, ul {max-width: 40em}</style>
+</head>
+
+autofs - how it works
+=====================
+
+Purpose
+-------
+
+The goal of autofs is to provide on-demand mounting and race free
+automatic unmounting of various other filesystems.  This provides two
+key advantages:
+
+1. There is no need to delay boot until all filesystems that
+   might be needed are mounted.  Processes that try to access those
+   slow filesystems might be delayed but other processes can
+   continue freely.  This is particularly important for
+   network filesystems (e.g. NFS) or filesystems stored on
+   media with a media-changing robot.
+
+2. The names and locations of filesystems can be stored in
+   a remote database and can change at any time.  The content
+   in that data base at the time of access will be used to provide
+   a target for the access.  The interpretation of names in the
+   filesystem can even be programmatic rather than database-backed,
+   allowing wildcards for example, and can vary based on the user who
+   first accessed a name.
+
+Context
+-------
+
+The "autofs4" filesystem module is only one part of an autofs system.
+There also needs to be a user-space program which looks up names
+and mounts filesystems.  This will often be the "automount" program,
+though other tools including "systemd" can make use of "autofs4".
+This document describes only the kernel module and the interactions
+required with any user-space program.  Subsequent text refers to this
+as the "automount daemon" or simply "the daemon".
+
+"autofs4" is a Linux kernel module with provides the "autofs"
+filesystem type.  Several "autofs" filesystems can be mounted and they
+can each be managed separately, or all managed by the same daemon.
+
+Content
+-------
+
+An autofs filesystem can contain 3 sorts of objects: directories,
+symbolic links and mount traps.  Mount traps are directories with
+extra properties as described in the next section.
+
+Objects can only be created by the automount daemon: symlinks are
+created with a regular `symlink` system call, while directories and
+mount traps are created with `mkdir`.  The determination of whether a
+directory should be a mount trap or not is quite _ad hoc_, largely for
+historical reasons, and is determined in part by the
+*direct*/*indirect*/*offset* mount options, and the *maxproto* mount option.
+
+If neither the *direct* or *offset* mount options are given (so the
+mount is considered to be *indirect*), then the root directory is
+always a regular directory, otherwise it is a mount trap when it is
+empty and a regular directory when not empty.  Note that *direct* and
+*offset* are treated identically so a concise summary is that the root
+directory is a mount trap only if the filesystem is mounted *direct*
+and the root is empty.
+
+Directories created in the root directory are mount traps only if the
+filesystem is mounted  *indirect* and they are empty.
+
+Directories further down the tree depend on the *maxproto* mount
+option and particularly whether it is less than five or not.
+When *maxproto* is five, no directories further down the
+tree are ever mount traps, they are always regular directories.  When
+the *maxproto* is four (or three), these directories are mount traps
+precisely when they are empty.
+
+So: non-empty (i.e. non-leaf) directories are never mount traps. Empty
+directories are sometimes mount traps, and sometimes not depending on
+where in the tree they are (root, top level, or lower), the *maxproto*,
+and whether the mount was *indirect* or not.
+
+Mount Traps
+---------------
+
+A core element of the implementation of autofs is the Mount Traps
+which are provided by the Linux VFS.  Any directory provided by a
+filesystem can be designated as a trap.  This involves two separate
+features that work together to allow autofs to do its job.
+
+**DCACHE_NEED_AUTOMOUNT**
+
+If a dentry has the DCACHE_NEED_AUTOMOUNT flag set (which gets set if
+the inode has S_AUTOMOUNT set, or can be set directly) then it is
+(potentially) a mount trap.  Any access to this directory beyond a
+"`stat`" will (normally) cause the `d_op->d_automount()` dentry operation
+to be called. The task of this method is to find the filesystem that
+should be mounted on the directory and to return it.  The VFS is
+responsible for actually mounting the root of this filesystem on the
+directory.
+
+autofs doesn't find the filesystem itself but sends a message to the
+automount daemon asking it to find and mount the filesystem.  The
+autofs `d_automount` method then waits for the daemon to report that
+everything is ready.  It will then return "`NULL`" indicating that the
+mount has already happened.  The VFS doesn't try to mount anything but
+follows down the mount that is already there.
+
+This functionality is sufficient for some users of mount traps such
+as NFS which creates traps so that mountpoints on the server can be
+reflected on the client.  However it is not sufficient for autofs.  As
+mounting onto a directory is considered to be "beyond a `stat`", the
+automount daemon would not be able to mount a filesystem on the 'trap'
+directory without some way to avoid getting caught in the trap.  For
+that purpose there is another flag.
+
+**DCACHE_MANAGE_TRANSIT**
+
+If a dentry has DCACHE_MANAGE_TRANSIT set then two very different but
+related behaviors are invoked, both using the `d_op->d_manage()`
+dentry operation.
+
+Firstly, before checking to see if any filesystem is mounted on the
+directory, d_manage() will be called with the `rcu_walk` parameter set
+to `false`.  It may return one of three things:
+
+-  A return value of zero indicates that there is nothing special
+   about this dentry and normal checks for mounts and automounts
+   should proceed.
+
+   autofs normally returns zero, but first waits for any
+   expiry (automatic unmounting of the mounted filesystem) to
+   complete.  This avoids races.
+
+-  A return value of `-EISDIR` tells the VFS to ignore any mounts
+   on the directory and to not consider calling `->d_automount()`.
+   This effectively disables the **DCACHE_NEED_AUTOMOUNT** flag
+   causing the directory not be a mount trap after all.
+
+   autofs returns this if it detects that the process performing the
+   lookup is the automount daemon and that the mount has been
+   requested but has not yet completed.  How it determines this is
+   discussed later.  This allows the automount daemon not to get
+   caught in the mount trap.
+
+   There is a subtlety here.  It is possible that a second autofs
+   filesystem can be mounted below the first and for both of them to
+   be managed by the same daemon.  For the daemon to be able to mount
+   something on the second it must be able to "walk" down past the
+   first.  This means that d_manage cannot *always* return -EISDIR for
+   the automount daemon.  It must only return it when a mount has
+   been requested, but has not yet completed.
+
+   `d_manage` also returns `-EISDIR` if the dentry shouldn't be a
+   mount trap, either because it is a symbolic link or because it is
+   not empty.
+
+-  Any other negative value is treated as an error and returned
+   to the caller.
+
+   autofs can return
+
+   - -ENOENT if the automount daemon failed to mount anything,
+   - -ENOMEM if it ran out of memory,
+   - -EINTR if a signal arrived while waiting for expiry to
+     complete
+   - or any other error sent down by the automount daemon.
+
+
+The second use case only occurs during an "RCU-walk" and so `rcu_walk`
+will be set.
+
+An RCU-walk is a fast and lightweight process for walking down a
+filename path (i.e. it is like running on tip-toes).  RCU-walk cannot
+cope with all situations so when it finds a difficulty it falls back
+to "REF-walk", which is slower but more robust.
+
+RCU-walk will never call `->d_automount`; the filesystems must already
+be mounted or RCU-walk cannot handle the path.
+To determine if a mount-trap is safe for RCU-walk mode it calls
+`->d_manage()` with `rcu_walk` set to `true`.
+
+In this case `d_manage()` must avoid blocking and should avoid taking
+spinlocks if at all possible.  Its sole purpose is to determine if it
+would be safe to follow down into any mounted directory and the only
+reason that it might not be is if an expiry of the mount is
+underway.
+
+In the `rcu_walk` case, `d_manage()` cannot return -EISDIR to tell the
+VFS that this is a directory that doesn't require d_automount.  If
+`rcu_walk` sees a dentry with DCACHE_NEED_AUTOMOUNT set but nothing
+mounted, it *will* fall back to REF-walk.  `d_manage()` cannot make the
+VFS remain in RCU-walk mode, but can only tell it to get out of
+RCU-walk mode by returning `-ECHILD`.
+
+So `d_manage()`, when called with `rcu_walk` set, should either return
+-ECHILD if there is any reason to believe it is unsafe to end the
+mounted filesystem, and otherwise should return 0.
+
+autofs will return `-ECHILD` if an expiry of the filesystem has been
+initiated or is being considered, otherwise it returns 0.
+
+
+Mountpoint expiry
+-----------------
+
+The VFS has a mechansim for automatically expiring unused mounts,
+much as it can expire any unused dentry information from the dcache.
+This is guided by the MNT_SHRINKABLE flag.  This  only applies to
+mounts that were created by `d_automount()` returning a filesystem to be
+mounted.  As autofs doesn't return such a filesystem but leaves the
+mounting to the automount daemon, it must involve the automount daemon
+in unmounting as well.  This also means that autofs has more control
+of expiry.
+
+The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to
+the `umount` system call.  Unmounting with MNT_EXPIRE will fail unless
+a previous attempt had been made, and the filesystem has been inactive
+and untouched since that previous attempt.  autofs4 does not depend on
+this but has its own internal tracking of whether filesystems were
+recently used.  This allows individual names in the autofs directory
+to expire separately.
+
+With version 4 of the protocol, the automount daemon can try to
+unmount any filesystems mounted on the autofs filesystem or remove any
+symbolic links or empty directories any time it likes.  If the unmount
+or removal is successful the filesystem will be returned to the state
+it was before the mount or creation, so that any access of the name
+will trigger normal auto-mount processing.  In particlar, `rmdir` and
+`unlink` do not leave negative entries in the dcache as a normal
+filesystem would, so an attempt to access a recently-removed object is
+passed to autofs for handling.
+
+With version 5, this is not safe except for unmounting from top-level
+directories.  As lower-level directories are never mount traps, other
+processes will see an empty directory as soon as the filesystem is
+unmounted.  So it is generally safest to use the autofs expiry
+protocol described below.
+
+Normally the daemon only wants to remove entries which haven't been
+used for a while.  For this purpose autofs maintains a "`last_used`"
+time stamp on each directory or symlink.  For symlinks it genuinely
+does record the last time the symlink was "used" or followed to find
+out where it points to.  For directories the field is a slight
+misnomer.  It actually records the last time that autofs checked if
+the directory or one of its descendents was busy and found that it
+was.  This is just as useful and doesn't require updating the field so
+often.
+
+The daemon is able to ask autofs if anything is due to be expired,
+using an `ioctl` as discussed later.  For a *direct* mount, autofs
+considers if the entire mount-tree can be unmounted or not.  For an
+*indirect* mount, autofs considers each of the names in the top level
+directory to determine if any of those can be unmounted and cleaned
+up.
+
+There is an option with indirect mounts to consider each of the leaves
+that has been mounted on instead of considering the top-level names.
+This is intended for compatability with version 4 of autofs and should
+be considered as deprecated.
+
+When autofs considers a directory it checks the `last_used` time and
+compares it with the "timeout" value set when the filesystem was
+mounted, though this check is ignored in some cases. It also checks if
+the directory or anything below it is in use.  For symbolic links,
+only the `last_used` time is ever considered.
+
+If both appear to support expiring the directory or symlink, an action
+is taken.
+
+There are two ways to ask autofs to consider expiry.  The first is to
+use the **AUTOFS_IOC_EXPIRE** ioctl.  This only works for indirect
+mounts.  If it finds something in the root directory to expire it will
+return the name of that thing.  Once a name has been returned the
+automount daemon needs to unmount any filesystems mounted below the
+name normally.  As described above, this is unsafe for non-toplevel
+mounts in a version-5 autofs.  For this reason the current `automountd`
+does not use this ioctl.
+
+The second mechanism uses either the **AUTOFS_DEV_IOCTL_EXPIRE_CMD** or
+the **AUTOFS_IOC_EXPIRE_MULTI** ioctl.  This will work for both direct and
+indirect mounts.  If it selects an object to expire, it will notify
+the daemon using the notification mechanism described below.  This
+will block until the daemon acknowledges the expiry notification.
+This implies that the "`EXPIRE`" ioctl must be sent from a different
+thread than the one which handles notification.
+
+While the ioctl is blocking, the entry is marked as "expiring" and
+`d_manage` will block until the daemon affirms that the unmount has
+completed (together with removing any directories that might have been
+necessary), or has been aborted.
+
+Communicating with autofs: detecting the daemon
+-----------------------------------------------
+
+There are several forms of communication between the automount daemon
+and the filesystem.  As we have already seen, the daemon can create and
+remove directories and symlinks using normal filesystem operations.
+autofs knows whether a process requesting some operation is the daemon
+or not based on its process-group id number (see getpgid(1)).
+
+When an autofs filesystem it mounted the pgid of the mounting
+processes is recorded unless the "pgrp=" option is given, in which
+case that number is recorded instead.  Any request arriving from a
+process in that process group is considered to come from the daemon.
+If the daemon ever has to be stopped and restarted a new pgid can be
+provided through an ioctl as will be described below.
+
+Communicating with autofs: the event pipe
+-----------------------------------------
+
+When an autofs filesystem is mounted, the 'write' end of a pipe must
+be passed using the 'fd=' mount option.  autofs will write
+notification messages to this pipe for the daemon to respond to.
+For version 5, the format of the message is:
+
+        struct autofs_v5_packet {
+                int proto_version;                /* Protocol version */
+                int type;                        /* Type of packet */
+                autofs_wqt_t wait_queue_token;
+                __u32 dev;
+                __u64 ino;
+                __u32 uid;
+                __u32 gid;
+                __u32 pid;
+                __u32 tgid;
+                __u32 len;
+                char name[NAME_MAX+1];
+        };
+
+where the type is one of
+
+        autofs_ptype_missing_indirect
+        autofs_ptype_expire_indirect
+        autofs_ptype_missing_direct
+        autofs_ptype_expire_direct
+
+so messages can indicate that a name is missing (something tried to
+access it but it isn't there) or that it has been selected for expiry.
+
+The pipe will be set to "packet mode" (equivalent to passing
+`O_DIRECT`) to _pipe2(2)_ so that a read from the pipe will return at
+most one packet, and any unread portion of a packet will be discarded.
+
+The `wait_queue_token` is a unique number which can identify a
+particular request to be acknowledged.  When a message is sent over
+the pipe the affected dentry is marked as either "active" or
+"expiring" and other accesses to it block until the message is
+acknowledged using one of the ioctls below and the relevant
+`wait_queue_token`.
+
+Communicating with autofs: root directory ioctls
+------------------------------------------------
+
+The root directory of an autofs filesystem will respond to a number of
+ioctls.   The process issuing the ioctl must have the CAP_SYS_ADMIN
+capability, or must be the automount daemon.
+
+The available ioctl commands are:
+
+- **AUTOFS_IOC_READY**: a notification has been handled.  The argument
+    to the ioctl command is the "wait_queue_token" number
+    corresponding to the notification being acknowledged.
+- **AUTOFS_IOC_FAIL**: similar to above, but indicates failure with
+    the error code `ENOENT`.
+- **AUTOFS_IOC_CATATONIC**: Causes the autofs to enter "catatonic"
+    mode meaning that it stops sending notifications to the daemon.
+    This mode is also entered if a write to the pipe fails.
+- **AUTOFS_IOC_PROTOVER**:  This returns the protocol version in use.
+- **AUTOFS_IOC_PROTOSUBVER**: Returns the protocol sub-version which
+    is really a version number for the implementation.  It is
+    currently 2.
+- **AUTOFS_IOC_SETTIMEOUT**:  This passes a pointer to an unsigned
+    long.  The value is used to set the timeout for expiry, and
+    the current timeout value is stored back through the pointer.
+- **AUTOFS_IOC_ASKUMOUNT**:  Returns, in the pointed-to `int`, 1 if
+    the filesystem could be unmounted.  This is only a hint as
+    the situation could change at any instant.  This call can be
+    use to avoid a more expensive full unmount attempt.
+- **AUTOFS_IOC_EXPIRE**: as described above, this asks if there is
+    anything suitable to expire.  A pointer to a packet:
+
+        struct autofs_packet_expire_multi {
+                int proto_version;              /* Protocol version */
+                int type;                       /* Type of packet */
+                autofs_wqt_t wait_queue_token;
+                int len;
+                char name[NAME_MAX+1];
+        };
+
+     is required.  This is filled in with the name of something
+     that can be unmounted or removed.  If nothing can be expired,
+     `errno` is set to `EAGAIN`.  Even though a `wait_queue_token`
+     is present in the structure, no "wait queue" is established
+     and no acknowledgment is needed.
+- **AUTOFS_IOC_EXPIRE_MULTI**:  This is similar to
+     **AUTOFS_IOC_EXPIRE** except that it causes notification to be
+     sent to the daemon, and it blocks until the daemon acknowledges.
+     The argument is an integer which can contain two different flags.
+
+     **AUTOFS_EXP_IMMEDIATE** causes `last_used` time to be ignored
+     and objects are expired if the are not in use.
+
+     **AUTOFS_EXP_LEAVES** will select a leaf rather than a top-level
+     name to expire.  This is only safe when *maxproto* is 4.
+
+Communicating with autofs: char-device ioctls
+---------------------------------------------
+
+It is not always possible to open the root of an autofs filesystem,
+particularly a *direct* mounted filesystem.  If the automount daemon
+is restarted there is no way for it to regain control of existing
+mounts using any of the above communication channels.  To address this
+need there is a "miscellaneous" character device (major 10, minor 235)
+which can be used to communicate directly with the autofs filesystem.
+It requires CAP_SYS_ADMIN for access.
+
+The `ioctl`s that can be used on this device are described in a separate
+document `autofs4-mount-control.txt`, and are summarized briefly here.
+Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure:
+
+        struct autofs_dev_ioctl {
+                __u32 ver_major;
+                __u32 ver_minor;
+                __u32 size;             /* total size of data passed in
+                                         * including this struct */
+                __s32 ioctlfd;          /* automount command fd */
+
+                __u32 arg1;             /* Command parameters */
+                __u32 arg2;
+
+                char path[0];
+        };
+
+For the **OPEN_MOUNT** and **IS_MOUNTPOINT** commands, the target
+filesystem is identified by the `path`.  All other commands identify
+the filesystem by the `ioctlfd` which is a file descriptor open on the
+root, and which can be returned by **OPEN_MOUNT**.
+
+The `ver_major` and `ver_minor` are in/out parameters which check that
+the requested version is supported, and report the maximum version
+that the kernel module can support.
+
+Commands are:
+
+- **AUTOFS_DEV_IOCTL_VERSION_CMD**: does nothing, except validate and
+    set version numbers.
+- **AUTOFS_DEV_IOCTL_OPENMOUNT_CMD**: return an open file descriptor
+    on the root of an autofs filesystem.  The filesystem is identified
+    by name and device number, which is stored in `arg1`.  Device
+    numbers for existing filesystems can be found in
+    `/proc/self/mountinfo`.
+- **AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD**: same as `close(ioctlfd)`.
+- **AUTOFS_DEV_IOCTL_SETPIPEFD_CMD**: if the  filesystem is in
+    catatonic mode, this can provide the write end of a new pipe
+    in `arg1` to re-establish communication with a daemon.  The
+    process group of the calling process is used to identify the
+    daemon.
+- **AUTOFS_DEV_IOCTL_REQUESTER_CMD**: `path` should be a
+    name within the filesystem that has been auto-mounted on.
+    arg1 is the dev number of the underlying autofs.  On successful
+    return, `arg1` and `arg2` will be the UID and GID of the process
+    which triggered that mount.
+
+- **AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD**: Check if path is a
+    mountpoint of a particular type - see separate documentation for
+    details.
+
+- **AUTOFS_DEV_IOCTL_PROTOVER_CMD**:
+- **AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD**:
+- **AUTOFS_DEV_IOCTL_READY_CMD**:
+- **AUTOFS_DEV_IOCTL_FAIL_CMD**:
+- **AUTOFS_DEV_IOCTL_CATATONIC_CMD**:
+- **AUTOFS_DEV_IOCTL_TIMEOUT_CMD**:
+- **AUTOFS_DEV_IOCTL_EXPIRE_CMD**:
+- **AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD**:  These all have the same
+    function as the similarly named **AUTOFS_IOC** ioctls, except
+    that **FAIL** can be given an explicit error number in `arg1`
+    instead of assuming `ENOENT`, and this **EXPIRE** command
+    corresponds to **AUTOFS_IOC_EXPIRE_MULTI**.
+
+Catatonic mode
+--------------
+
+As mentioned, an autofs mount can enter "catatonic" mode.  This
+happens if a write to the notification pipe fails, or if it is
+explicitly requested by an `ioctl`.
+
+When entering catatonic mode, the pipe is closed and any pending
+notifications are acknowledged with the error `ENOENT`.
+
+Once in catatonic mode attempts to access non-existing names will
+result in `ENOENT` while attempts to access existing directories will
+be treated in the same way as if they came from the daemon, so mount
+traps will not fire.
+
+When the filesystem is mounted a _uid_ and _gid_ can be given which
+set the ownership of directories and symbolic links.  When the
+filesystem is in catatonic mode, any process with a matching UID can
+create directories or symlinks in the root directory, but not in other
+directories.
+
+Catatonic mode can only be left via the
+**AUTOFS_DEV_IOCTL_OPENMOUNT_CMD** ioctl on the `/dev/autofs`.
+
+autofs, name spaces, and shared mounts
+--------------------------------------
+
+With bind mounts and name spaces it is possible for an autofs
+filesystem to appear at multiple places in one or more filesystem
+name spaces.  For this to work sensibly, the autofs filesystem should
+always be mounted "shared". e.g.
+
+> `mount --make-shared /autofs/mount/point`
+
+The automount daemon is only able to mange a single mount location for
+an autofs filesystem and if mounts on that are not 'shared', other
+locations will not behave as expected.  In particular access to those
+other locations will likely result in the `ELOOP` error
+
+> Too many levels of symbolic links
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index 61947facfc0..553f10d0307 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -14,7 +14,6 @@ Table of contents
   - The Device-Mapper driver
   - The Software RAID / MD driver
   - Limitations when using the MD driver
-- ChangeLog
 
 
 Overview
@@ -450,270 +449,3 @@ number of sectors BEFORE attempting to use it.  You have been warned!
 
 Even better is to simply use the Device-Mapper for linear raid and then you do
 not have this problem with odd numbers of sectors.
-
-
-ChangeLog
-=========
-
-2.1.30:
-	- Fix writev() (it kept writing the first segment over and over again
-	  instead of moving onto subsequent segments).
-	- Fix crash in ntfs_mft_record_alloc() when mapping the new extent mft
-	  record failed.
-2.1.29:
-	- Fix a deadlock when mounting read-write.
-2.1.28:
-	- Fix a deadlock.
-2.1.27:
-	- Implement page migration support so the kernel can move memory used
-	  by NTFS files and directories around for management purposes.
-	- Add support for writing to sparse files created with Windows XP SP2.
-	- Many minor improvements and bug fixes.
-2.1.26:
-	- Implement support for sector sizes above 512 bytes (up to the maximum
-	  supported by NTFS which is 4096 bytes).
-	- Enhance support for NTFS volumes which were supported by Windows but
-	  not by Linux due to invalid attribute list attribute flags.
-	- A few minor updates and bug fixes.
-2.1.25:
-	- Write support is now extended with write(2) being able to both
-	  overwrite existing file data and to extend files.  Also, if a write
-	  to a sparse region occurs, write(2) will fill in the hole.  Note,
-	  mmap(2) based writes still do not support writing into holes or
-	  writing beyond the initialized size.
-	- Write support has a new feature and that is that truncate(2) and
-	  open(2) with O_TRUNC are now implemented thus files can be both made
-	  smaller and larger.
-	- Note: Both write(2) and truncate(2)/open(2) with O_TRUNC still have
-	  limitations in that they
-	  - only provide limited support for highly fragmented files.
-	  - only work on regular, i.e. uncompressed and unencrypted files.
-	  - never create sparse files although this will change once directory
-	    operations are implemented.
-	- Lots of bug fixes and enhancements across the board.
-2.1.24:
-	- Support journals ($LogFile) which have been modified by chkdsk.  This
-	  means users can boot into Windows after we marked the volume dirty.
-	  The Windows boot will run chkdsk and then reboot.  The user can then
-	  immediately boot into Linux rather than having to do a full Windows
-	  boot first before rebooting into Linux and we will recognize such a
-	  journal and empty it as it is clean by definition.
-	- Support journals ($LogFile) with only one restart page as well as
-	  journals with two different restart pages.  We sanity check both and
-	  either use the only sane one or the more recent one of the two in the
-	  case that both are valid.
-	- Lots of bug fixes and enhancements across the board.
-2.1.23:
-	- Stamp the user space journal, aka transaction log, aka $UsnJrnl, if
-	  it is present and active thus telling Windows and applications using
-	  the transaction log that changes can have happened on the volume
-	  which are not recorded in $UsnJrnl.
-	- Detect the case when Windows has been hibernated (suspended to disk)
-	  and if this is the case do not allow (re)mounting read-write to
-	  prevent data corruption when you boot back into the suspended
-	  Windows session.
-	- Implement extension of resident files using the normal file write
-	  code paths, i.e. most very small files can be extended to be a little
-	  bit bigger but not by much.
-	- Add new mount option "disable_sparse".  (See list of mount options
-	  above for details.)
-	- Improve handling of ntfs volumes with errors and strange boot sectors
-	  in particular.
-	- Fix various bugs including a nasty deadlock that appeared in recent
-	  kernels (around 2.6.11-2.6.12 timeframe).
-2.1.22:
-	- Improve handling of ntfs volumes with errors.
-	- Fix various bugs and race conditions.
-2.1.21:
-	- Fix several race conditions and various other bugs.
-	- Many internal cleanups, code reorganization, optimizations, and mft
-	  and index record writing code rewritten to fit in with the changes.
-	- Update Documentation/filesystems/ntfs.txt with instructions on how to
-	  use the Device-Mapper driver with NTFS ftdisk/LDM raid.
-2.1.20:
-	- Fix two stupid bugs introduced in 2.1.18 release.
-2.1.19:
-	- Minor bugfix in handling of the default upcase table.
-	- Many internal cleanups and improvements.  Many thanks to Linus
-	  Torvalds and Al Viro for the help and advice with the sparse
-	  annotations and cleanups.
-2.1.18:
-	- Fix scheduling latencies at mount time.  (Ingo Molnar)
-	- Fix endianness bug in a little traversed portion of the attribute
-	  lookup code.
-2.1.17:
-	- Fix bugs in mount time error code paths.
-2.1.16:
-	- Implement access time updates (including mtime and ctime).
-	- Implement fsync(2), fdatasync(2), and msync(2) system calls.
-	- Enable the readv(2) and writev(2) system calls.
-	- Enable access via the asynchronous io (aio) API by adding support for
-	  the aio_read(3) and aio_write(3) functions.
-2.1.15:
-	- Invalidate quotas when (re)mounting read-write.
-	  NOTE:  This now only leave user space journalling on the side.  (See
-	  note for version 2.1.13, below.)
-2.1.14:
-	- Fix an NFSd caused deadlock reported by several users.
-2.1.13:
-	- Implement writing of inodes (access time updates are not implemented
-	  yet so mounting with -o noatime,nodiratime is enforced).
-	- Enable writing out of resident files so you can now overwrite any
-	  uncompressed, unencrypted, nonsparse file as long as you do not
-	  change the file size.
-	- Add housekeeping of ntfs system files so that ntfsfix no longer needs
-	  to be run after writing to an NTFS volume.
-	  NOTE:  This still leaves quota tracking and user space journalling on
-	  the side but they should not cause data corruption.  In the worst
-	  case the charged quotas will be out of date ($Quota) and some
-	  userspace applications might get confused due to the out of date
-	  userspace journal ($UsnJrnl).
-2.1.12:
-	- Fix the second fix to the decompression engine from the 2.1.9 release
-	  and some further internals cleanups.
-2.1.11:
-	- Driver internal cleanups.
-2.1.10:
-	- Force read-only (re)mounting of volumes with unsupported volume
-	  flags and various cleanups.
-2.1.9:
-	- Fix two bugs in handling of corner cases in the decompression engine.
-2.1.8:
-	- Read the $MFT mirror and compare it to the $MFT and if the two do not
-	  match, force a read-only mount and do not allow read-write remounts.
-	- Read and parse the $LogFile journal and if it indicates that the
-	  volume was not shutdown cleanly, force a read-only mount and do not
-	  allow read-write remounts.  If the $LogFile indicates a clean
-	  shutdown and a read-write (re)mount is requested, empty $LogFile to
-	  ensure that Windows cannot cause data corruption by replaying a stale
-	  journal after Linux has written to the volume.
-	- Improve time handling so that the NTFS time is fully preserved when
-	  converted to kernel time and only up to 99 nano-seconds are lost when
-	  kernel time is converted to NTFS time.
-2.1.7:
-	- Enable NFS exporting of mounted NTFS volumes.
-2.1.6:
-	- Fix minor bug in handling of compressed directories that fixes the
-	  erroneous "du" and "stat" output people reported.
-2.1.5:
-	- Minor bug fix in attribute list attribute handling that fixes the
-	  I/O errors on "ls" of certain fragmented files found by at least two
-	  people running Windows XP.
-2.1.4:
-	- Minor update allowing compilation with all gcc versions (well, the
-	  ones the kernel can be compiled with anyway).
-2.1.3:
-	- Major bug fixes for reading files and volumes in corner cases which
-	  were being hit by Windows 2k/XP users.
-2.1.2:
-	- Major bug fixes alleviating the hangs in statfs experienced by some
-	  users.
-2.1.1:
-	- Update handling of compressed files so people no longer get the
-	  frequently reported warning messages about initialized_size !=
-	  data_size.
-2.1.0:
-	- Add configuration option for developmental write support.
-	- Initial implementation of file overwriting. (Writes to resident files
-	  are not written out to disk yet, so avoid writing to files smaller
-	  than about 1kiB.)
-	- Intercept/abort changes in file size as they are not implemented yet.
-2.0.25:
-	- Minor bugfixes in error code paths and small cleanups.
-2.0.24:
-	- Small internal cleanups.
-	- Support for sendfile system call. (Christoph Hellwig)
-2.0.23:
-	- Massive internal locking changes to mft record locking. Fixes
-	  various race conditions and deadlocks.
-	- Fix ntfs over loopback for compressed files by adding an
-	  optimization barrier. (gcc was screwing up otherwise ?)
-	Thanks go to Christoph Hellwig for pointing these two out:
-	- Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs().
-	- Fix ntfs_free() for ia64 and parisc.
-2.0.22:
-	- Small internal cleanups.
-2.0.21:
-	These only affect 32-bit architectures:
-	- Check for, and refuse to mount too large volumes (maximum is 2TiB).
-	- Check for, and refuse to open too large files and directories
-	  (maximum is 16TiB).
-2.0.20:
-	- Support non-resident directory index bitmaps. This means we now cope
-	  with huge directories without problems.
-	- Fix a page leak that manifested itself in some cases when reading
-	  directory contents.
-	- Internal cleanups.
-2.0.19:
-	- Fix race condition and improvements in block i/o interface.
-	- Optimization when reading compressed files.
-2.0.18:
-	- Fix race condition in reading of compressed files.
-2.0.17:
-	- Cleanups and optimizations.
-2.0.16:
-	- Fix stupid bug introduced in 2.0.15 in new attribute inode API.
-	- Big internal cleanup replacing the mftbmp access hacks by using the
-	  new attribute inode API instead.
-2.0.15:
-	- Bug fix in parsing of remount options.
-	- Internal changes implementing attribute (fake) inodes allowing all
-	  attribute i/o to go via the page cache and to use all the normal
-	  vfs/mm functionality.
-2.0.14:
-	- Internal changes improving run list merging code and minor locking
-	  change to not rely on BKL in ntfs_statfs().
-2.0.13:
-	- Internal changes towards using iget5_locked() in preparation for
-	  fake inodes and small cleanups to ntfs_volume structure.
-2.0.12:
-	- Internal cleanups in address space operations made possible by the
-	  changes introduced in the previous release.
-2.0.11:
-	- Internal updates and cleanups introducing the first step towards
-	  fake inode based attribute i/o.
-2.0.10:
-	- Microsoft says that the maximum number of inodes is 2^32 - 1. Update
-	  the driver accordingly to only use 32-bits to store inode numbers on
-	  32-bit architectures. This improves the speed of the driver a little.
-2.0.9:
-	- Change decompression engine to use a single buffer. This should not
-	  affect performance except perhaps on the most heavy i/o on SMP
-	  systems when accessing multiple compressed files from multiple
-	  devices simultaneously.
-	- Minor updates and cleanups.
-2.0.8:
-	- Remove now obsolete show_inodes and posix mount option(s).
-	- Restore show_sys_files mount option.
-	- Add new mount option case_sensitive, to determine if the driver
-	  treats file names as case sensitive or not.
-	- Mostly drop support for short file names (for backwards compatibility
-	  we only support accessing files via their short file name if one
-	  exists).
-	- Fix dcache aliasing issues wrt short/long file names.
-	- Cleanups and minor fixes.
-2.0.7:
-	- Just cleanups.
-2.0.6:
-	- Major bugfix to make compatible with other kernel changes. This fixes
-	  the hangs/oopses on umount.
-	- Locking cleanup in directory operations (remove BKL usage).
-2.0.5:
-	- Major buffer overflow bug fix.
-	- Minor cleanups and updates for kernel 2.5.12.
-2.0.4:
-	- Cleanups and updates for kernel 2.5.11.
-2.0.3:
-	- Small bug fixes, cleanups, and performance improvements.
-2.0.2:
-	- Use default fmask of 0177 so that files are no executable by default.
-	  If you want owner executable files, just use fmask=0077.
-	- Update for kernel 2.5.9 but preserve backwards compatibility with
-	  kernel 2.5.7.
-	- Minor bug fixes, cleanups, and updates.
-2.0.1:
-	- Minor updates, primarily set the executable bit by default on files
-	  so they can be executed.
-2.0.0:
-	- Started ChangeLog.
-
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 61d65cc65c5..fceff7c00a3 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -237,7 +237,7 @@ noted. This means that most methods can block safely. All methods are
 only called from a process context (i.e. not from an interrupt handler
 or bottom half).
 
-  alloc_inode: this method is called by inode_alloc() to allocate memory
+  alloc_inode: this method is called by alloc_inode() to allocate memory
  	for struct inode and initialize it.  If this function is not
  	defined, a simple 'struct inode' is allocated.  Normally
  	alloc_inode will be used to allocate a larger structure which
@@ -826,7 +826,7 @@ struct file_operations {
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
-	int (*setlease)(struct file *, long arg, struct file_lock **);
+	int (*setlease)(struct file *, long arg, struct file_lock **, void **);
 	long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
 	int (*show_fdinfo)(struct seq_file *m, struct file *f);
 };
@@ -895,8 +895,9 @@ otherwise noted.
   splice_read: called by the VFS to splice data from file to a pipe. This
 	       method is used by the splice(2) system call
 
-  setlease: called by the VFS to set or release a file lock lease.
-	    setlease has the file_lock_lock held and must not sleep.
+  setlease: called by the VFS to set or release a file lock lease. setlease
+	    implementations should call generic_setlease to record or remove
+	    the lease in the inode after setting it.
 
   fallocate: called by the VFS to preallocate blocks or punch a hole.