summaryrefslogtreecommitdiffstats
path: root/Documentation/filesystems
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/Locking4
-rw-r--r--Documentation/filesystems/devpts.txt132
-rw-r--r--Documentation/filesystems/files.txt6
-rw-r--r--Documentation/filesystems/ocfs2.txt3
-rw-r--r--Documentation/filesystems/proc.txt36
-rw-r--r--Documentation/filesystems/ubifs.txt3
-rw-r--r--Documentation/filesystems/vfs.txt5
-rw-r--r--Documentation/filesystems/xfs.txt4
8 files changed, 174 insertions, 19 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 23d2f4460de..cfbfa15a46b 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -394,11 +394,10 @@ prototypes:
unsigned long (*get_unmapped_area)(struct file *, unsigned long,
unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
- int (*dir_notify)(struct file *, unsigned long);
};
locking rules:
- All except ->poll() may block.
+ All may block.
BKL
llseek: no (see below)
read: no
@@ -424,7 +423,6 @@ sendfile: no
sendpage: no
get_unmapped_area: no
check_flags: no
-dir_notify: no
->llseek() locking has moved from llseek to the individual llseek
implementations. If your fs is not using generic_file_llseek, you
diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt
new file mode 100644
index 00000000000..68dffd87f9b
--- /dev/null
+++ b/Documentation/filesystems/devpts.txt
@@ -0,0 +1,132 @@
+
+To support containers, we now allow multiple instances of devpts filesystem,
+such that indices of ptys allocated in one instance are independent of indices
+allocated in other instances of devpts.
+
+To preserve backward compatibility, this support for multiple instances is
+enabled only if:
+
+ - CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and
+ - '-o newinstance' mount option is specified while mounting devpts
+
+IOW, devpts now supports both single-instance and multi-instance semantics.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and
+this referred to as the "legacy" mode. In this mode, the new mount options
+(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message
+on console.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the
+'newinstance' option (as in current start-up scripts) the new mount binds
+to the initial kernel mount of devpts. This mode is referred to as the
+'single-instance' mode and the current, single-instance semantics are
+preserved, i.e PTYs are common across the system.
+
+The only difference between this single-instance mode and the legacy mode
+is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which
+can safely be ignored.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified,
+the mount is considered to be in the multi-instance mode and a new instance
+of the devpts fs is created. Any ptys created in this instance are independent
+of ptys in other instances of devpts. Like in the single-instance mode, the
+/dev/pts/ptmx node is present. To effectively use the multi-instance mode,
+open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or
+bind-mount.
+
+Eg: A container startup script could do the following:
+
+ $ chmod 0666 /dev/pts/ptmx
+ $ rm /dev/ptmx
+ $ ln -s pts/ptmx /dev/ptmx
+ $ ns_exec -cm /bin/bash
+
+ # We are now in new container
+
+ $ umount /dev/pts
+ $ mount -t devpts -o newinstance lxcpts /dev/pts
+ $ sshd -p 1234
+
+where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs
+/bin/bash in the child process. A pty created by the sshd is not visible in
+the original mount of /dev/pts.
+
+User-space changes
+------------------
+
+In multi-instance mode (i.e '-o newinstance' mount option is specified at least
+once), following user-space issues should be noted.
+
+1. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored
+ and no change is needed to system-startup scripts.
+
+2. To effectively use multi-instance mode (i.e -o newinstance is specified)
+ administrators or startup scripts should "redirect" open of /dev/ptmx to
+ /dev/pts/ptmx using either a bind mount or symlink.
+
+ $ mount -t devpts -o newinstance devpts /dev/pts
+
+ followed by either
+
+ $ rm /dev/ptmx
+ $ ln -s pts/ptmx /dev/ptmx
+ $ chmod 666 /dev/pts/ptmx
+ or
+ $ mount -o bind /dev/pts/ptmx /dev/ptmx
+
+3. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it
+ enables better error-reporting and treats both single-instance and
+ multi-instance mounts similarly.
+
+ But this method requires that system-startup scripts set the mode of
+ /dev/pts/ptmx correctly (default mode is 0000). The scripts can set the
+ mode by, either
+
+ - adding ptmxmode mount option to devpts entry in /etc/fstab, or
+ - using 'chmod 0666 /dev/pts/ptmx'
+
+4. If multi-instance mode mount is needed for containers, but the system
+ startup scripts have not yet been updated, container-startup scripts
+ should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single-
+ instance mounts.
+
+ Or, in general, container-startup scripts should use:
+
+ mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts
+ if [ ! -L /dev/ptmx ]; then
+ mount -o bind /dev/pts/ptmx /dev/ptmx
+ fi
+
+ When all devpts mounts are multi-instance, /dev/ptmx can permanently be
+ a symlink to pts/ptmx and the bind mount can be ignored.
+
+5. A multi-instance mount that is not accompanied by the /dev/ptmx to
+ /dev/pts/ptmx redirection would result in an unusable/unreachable pty.
+
+ mount -t devpts -o newinstance lxcpts /dev/pts
+
+ immediately followed by:
+
+ open("/dev/ptmx")
+
+ would create a pty, say /dev/pts/7, in the initial kernel mount.
+ But /dev/pts/7 would be invisible in the new mount.
+
+6. The permissions for /dev/pts/ptmx node should be specified when mounting
+ /dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000).
+
+ mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts
+
+ The permissions can be later be changed as usual with 'chmod'.
+
+ chmod 666 /dev/pts/ptmx
+
+7. A mount of devpts without the 'newinstance' option results in binding to
+ initial kernel mount. This behavior while preserving legacy semantics,
+ does not provide strict isolation in a container environment. i.e by
+ mounting devpts without the 'newinstance' option, a container could
+ get visibility into the 'host' or root container's devpts.
+
+ To workaround this and have strict isolation, all mounts of devpts,
+ including the mount in the root container, should use the newinstance
+ option.
diff --git a/Documentation/filesystems/files.txt b/Documentation/filesystems/files.txt
index bb0142f6108..ac2facc50d2 100644
--- a/Documentation/filesystems/files.txt
+++ b/Documentation/filesystems/files.txt
@@ -76,13 +76,13 @@ the fdtable structure -
5. Handling of the file structures is special. Since the look-up
of the fd (fget()/fget_light()) are lock-free, it is possible
that look-up may race with the last put() operation on the
- file structure. This is avoided using atomic_inc_not_zero()
+ file structure. This is avoided using atomic_long_inc_not_zero()
on ->f_count :
rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
- if (atomic_inc_not_zero(&file->f_count))
+ if (atomic_long_inc_not_zero(&file->f_count))
*fput_needed = 1;
else
/* Didn't get the reference, someone's freed */
@@ -92,7 +92,7 @@ the fdtable structure -
....
return file;
- atomic_inc_not_zero() detects if refcounts is already zero or
+ atomic_long_inc_not_zero() detects if refcounts is already zero or
goes to zero during increment. If it does, we fail
fget()/fget_light().
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
index 67310fbbb7d..c2a0871280a 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -31,7 +31,6 @@ Features which OCFS2 does not support yet:
- quotas
- Directory change notification (F_NOTIFY)
- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
- - POSIX ACLs
Mount options
=============
@@ -79,3 +78,5 @@ inode64 Indicates that Ocfs2 is allowed to create inodes at
bits of significance.
user_xattr (*) Enables Extended User Attributes.
nouser_xattr Disables Extended User Attributes.
+acl Enables POSIX Access Control Lists support.
+noacl (*) Disables POSIX Access Control Lists support.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index bb1b0dd3bfc..d105eb45282 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -140,6 +140,7 @@ Table 1-1: Process specific entries in /proc
statm Process memory status information
status Process status in human readable form
wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan
+ stack Report full stack trace, enable via CONFIG_STACKTRACE
smaps Extension based on maps, the rss size for each mapped file
..............................................................................
@@ -1339,10 +1340,13 @@ nmi_watchdog
Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
the NMI watchdog is enabled and will continuously test all online cpus to
-determine whether or not they are still functioning properly.
+determine whether or not they are still functioning properly. Currently,
+passing "nmi_watchdog=" parameter at boot time is required for this function
+to work.
-Because the NMI watchdog shares registers with oprofile, by disabling the NMI
-watchdog, oprofile may have more registers to utilize.
+If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the
+NMI watchdog shares registers with oprofile. By disabling the NMI watchdog,
+oprofile may have more registers to utilize.
msgmni
------
@@ -1382,6 +1386,15 @@ swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer
to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100
causes the kernel to prefer to reclaim dentries and inodes.
+dirty_background_bytes
+----------------------
+
+Contains the amount of dirty memory at which the pdflush background writeback
+daemon will start writeback.
+
+If dirty_background_bytes is written, dirty_background_ratio becomes a function
+of its value (dirty_background_bytes / the amount of dirtyable system memory).
+
dirty_background_ratio
----------------------
@@ -1390,14 +1403,29 @@ pages + file cache, not including locked pages and HugePages), the number of
pages at which the pdflush background writeback daemon will start writing out
dirty data.
+If dirty_background_ratio is written, dirty_background_bytes becomes a function
+of its value (dirty_background_ratio * the amount of dirtyable system memory).
+
+dirty_bytes
+-----------
+
+Contains the amount of dirty memory at which a process generating disk writes
+will itself start writeback.
+
+If dirty_bytes is written, dirty_ratio becomes a function of its value
+(dirty_bytes / the amount of dirtyable system memory).
+
dirty_ratio
------------------
+-----------
Contains, as a percentage of the dirtyable system memory (free pages + mapped
pages + file cache, not including locked pages and HugePages), the number of
pages at which a process which is generating disk writes will itself start
writing out dirty data.
+If dirty_ratio is written, dirty_bytes becomes a function of its value
+(dirty_ratio * the amount of dirtyable system memory).
+
dirty_writeback_centisecs
-------------------------
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
index dd84ea3c10d..84da2a4ba25 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.txt
@@ -95,6 +95,9 @@ no_chk_data_crc skip checking of CRCs on data nodes in order to
of this option is that corruption of the contents
of a file can go unnoticed.
chk_data_crc (*) do not skip checking CRCs on data nodes
+compr=none override default compressor and set it to "none"
+compr=lzo override default compressor and set it to "lzo"
+compr=zlib override default compressor and set it to "zlib"
Quick usage instructions
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 5579bda58a6..ef19afa186a 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -733,7 +733,6 @@ struct file_operations {
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
- int (*dir_notify)(struct file *filp, unsigned long arg);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
@@ -800,8 +799,6 @@ otherwise noted.
check_flags: called by the fcntl(2) system call for F_SETFL command
- dir_notify: called by the fcntl(2) system call for F_NOTIFY command
-
flock: called by the flock(2) system call
splice_write: called by the VFS to splice data from a pipe to a file. This
@@ -931,7 +928,7 @@ manipulate dentries:
d_lookup: look up a dentry given its parent and path name component
It looks up the child of that given name from the dcache
hash table. If it is found, the reference count is incremented
- and the dentry is returned. The caller must use d_put()
+ and the dentry is returned. The caller must use dput()
to free the dentry when it finishes using it.
For further information on dentry locking, please refer to the document
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 0a1668ba260..9878f50d6ed 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -229,10 +229,6 @@ The following sysctls are available for the XFS filesystem:
ISGID bit is cleared if the irix_sgid_inherit compatibility sysctl
is set.
- fs.xfs.restrict_chown (Min: 0 Default: 1 Max: 1)
- Controls whether unprivileged users can use chown to "give away"
- a file to another user.
-
fs.xfs.inherit_sync (Min: 0 Default: 1 Max: 1)
Setting this to "1" will cause the "sync" flag set
by the xfs_io(8) chattr command on a directory to be