summaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/debugfs-olpc16
-rw-r--r--Documentation/ABI/testing/sysfs-block-dm25
-rw-r--r--Documentation/ABI/testing/sysfs-driver-samsung-laptop18
-rw-r--r--Documentation/Makefile2
-rw-r--r--Documentation/clk.txt233
-rw-r--r--Documentation/device-mapper/thin-provisioning.txt65
-rw-r--r--Documentation/device-mapper/verity.txt194
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-at91.txt60
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-pmc.txt11
-rw-r--r--Documentation/devicetree/bindings/arm/spear.txt8
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-omap.txt36
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-twl4030.txt23
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_i2c.txt32
-rw-r--r--Documentation/devicetree/bindings/gpio/sodaville.txt48
-rw-r--r--Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt33
-rw-r--r--Documentation/devicetree/bindings/mtd/arm-versatile.txt4
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel-dataflash.txt3
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel-nand.txt41
-rw-r--r--Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt4
-rw-r--r--Documentation/devicetree/bindings/mtd/gpio-control-nand.txt3
-rw-r--r--Documentation/devicetree/bindings/mtd/mtd-physmap.txt23
-rw-r--r--Documentation/devicetree/bindings/mtd/nand.txt7
-rw-r--r--Documentation/devicetree/bindings/mtd/partition.txt38
-rw-r--r--Documentation/devicetree/bindings/usb/atmel-usb.txt49
-rw-r--r--Documentation/devicetree/bindings/usb/tegra-usb.txt13
-rw-r--r--Documentation/devicetree/usage-model.txt412
-rw-r--r--Documentation/dma-buf-sharing.txt120
-rw-r--r--Documentation/filesystems/ext4.txt8
-rw-r--r--Documentation/filesystems/files.txt4
-rw-r--r--Documentation/gpio.txt40
-rw-r--r--Documentation/i2c/busses/i2c-i8011
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--Documentation/laptops/asus-laptop.txt2
-rw-r--r--Documentation/laptops/sony-laptop.txt5
-rw-r--r--Documentation/virtual/kvm/api.txt259
-rw-r--r--Documentation/virtual/kvm/ppc-pv.txt24
-rw-r--r--Documentation/vm/Makefile8
-rw-r--r--Documentation/vm/hugepage-mmap.c91
-rw-r--r--Documentation/vm/hugepage-shm.c98
-rw-r--r--Documentation/vm/map_hugetlb.c77
-rw-r--r--Documentation/vm/page-types.c1102
-rw-r--r--Documentation/watchdog/00-INDEX19
-rw-r--r--Documentation/watchdog/convert_drivers_to_kernel_api.txt4
-rw-r--r--Documentation/watchdog/watchdog-kernel-api.txt11
44 files changed, 1802 insertions, 1480 deletions
diff --git a/Documentation/ABI/testing/debugfs-olpc b/Documentation/ABI/testing/debugfs-olpc
new file mode 100644
index 00000000000..bd76cc6d55f
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-olpc
@@ -0,0 +1,16 @@
+What: /sys/kernel/debug/olpc-ec/cmd
+Date: Dec 2011
+KernelVersion: 3.4
+Contact: devel@lists.laptop.org
+Description:
+
+A generic interface for executing OLPC Embedded Controller commands and
+reading their responses.
+
+To execute a command, write data with the format: CC:N A A A A
+CC is the (hex) command, N is the count of expected reply bytes, and A A A A
+are optional (hex) arguments.
+
+To read the response (if any), read from the generic node after executing
+a command. Hex reply bytes will be returned, *whether or not* they came from
+the immediately previous command.
diff --git a/Documentation/ABI/testing/sysfs-block-dm b/Documentation/ABI/testing/sysfs-block-dm
new file mode 100644
index 00000000000..87ca5691e29
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-block-dm
@@ -0,0 +1,25 @@
+What: /sys/block/dm-<num>/dm/name
+Date: January 2009
+KernelVersion: 2.6.29
+Contact: dm-devel@redhat.com
+Description: Device-mapper device name.
+ Read-only string containing mapped device name.
+Users: util-linux, device-mapper udev rules
+
+What: /sys/block/dm-<num>/dm/uuid
+Date: January 2009
+KernelVersion: 2.6.29
+Contact: dm-devel@redhat.com
+Description: Device-mapper device UUID.
+ Read-only string containing DM-UUID or empty string
+ if DM-UUID is not set.
+Users: util-linux, device-mapper udev rules
+
+What: /sys/block/dm-<num>/dm/suspended
+Date: June 2009
+KernelVersion: 2.6.31
+Contact: dm-devel@redhat.com
+Description: Device-mapper device suspend state.
+ Contains the value 1 while the device is suspended.
+ Otherwise it contains 0. Read-only attribute.
+Users: util-linux, device-mapper udev rules
diff --git a/Documentation/ABI/testing/sysfs-driver-samsung-laptop b/Documentation/ABI/testing/sysfs-driver-samsung-laptop
index e82e7c2b8f8..678819a3f8b 100644
--- a/Documentation/ABI/testing/sysfs-driver-samsung-laptop
+++ b/Documentation/ABI/testing/sysfs-driver-samsung-laptop
@@ -17,3 +17,21 @@ Description: Some Samsung laptops have different "performance levels"
Specifically, not all support the "overclock" option,
and it's still unknown if this value even changes
anything, other than making the user feel a bit better.
+
+What: /sys/devices/platform/samsung/battery_life_extender
+Date: December 1, 2011
+KernelVersion: 3.3
+Contact: Corentin Chary <corentin.chary@gmail.com>
+Description: Max battery charge level can be modified, battery cycle
+ life can be extended by reducing the max battery charge
+ level.
+ 0 means normal battery mode (100% charge)
+ 1 means battery life extender mode (80% charge)
+
+What: /sys/devices/platform/samsung/usb_charge
+Date: December 1, 2011
+KernelVersion: 3.3
+Contact: Corentin Chary <corentin.chary@gmail.com>
+Description: Use your USB ports to charge devices, even
+ when your laptop is powered off.
+ 1 means enabled, 0 means disabled.
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 9b4bc5c76f3..30b656ece7a 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -1,3 +1,3 @@
obj-m := DocBook/ accounting/ auxdisplay/ connector/ \
filesystems/ filesystems/configfs/ ia64/ laptops/ networking/ \
- pcmcia/ spi/ timers/ vm/ watchdog/src/
+ pcmcia/ spi/ timers/ watchdog/src/
diff --git a/Documentation/clk.txt b/Documentation/clk.txt
new file mode 100644
index 00000000000..1943fae014f
--- /dev/null
+++ b/Documentation/clk.txt
@@ -0,0 +1,233 @@
+ The Common Clk Framework
+ Mike Turquette <mturquette@ti.com>
+
+This document endeavours to explain the common clk framework details,
+and how to port a platform over to this framework. It is not yet a
+detailed explanation of the clock api in include/linux/clk.h, but
+perhaps someday it will include that information.
+
+ Part 1 - introduction and interface split
+
+The common clk framework is an interface to control the clock nodes
+available on various devices today. This may come in the form of clock
+gating, rate adjustment, muxing or other operations. This framework is
+enabled with the CONFIG_COMMON_CLK option.
+
+The interface itself is divided into two halves, each shielded from the
+details of its counterpart. First is the common definition of struct
+clk which unifies the framework-level accounting and infrastructure that
+has traditionally been duplicated across a variety of platforms. Second
+is a common implementation of the clk.h api, defined in
+drivers/clk/clk.c. Finally there is struct clk_ops, whose operations
+are invoked by the clk api implementation.
+
+The second half of the interface is comprised of the hardware-specific
+callbacks registered with struct clk_ops and the corresponding
+hardware-specific structures needed to model a particular clock. For
+the remainder of this document any reference to a callback in struct
+clk_ops, such as .enable or .set_rate, implies the hardware-specific
+implementation of that code. Likewise, references to struct clk_foo
+serve as a convenient shorthand for the implementation of the
+hardware-specific bits for the hypothetical "foo" hardware.
+
+Tying the two halves of this interface together is struct clk_hw, which
+is defined in struct clk_foo and pointed to within struct clk. This
+allows easy for navigation between the two discrete halves of the common
+clock interface.
+
+ Part 2 - common data structures and api
+
+Below is the common struct clk definition from
+include/linux/clk-private.h, modified for brevity:
+
+ struct clk {
+ const char *name;
+ const struct clk_ops *ops;
+ struct clk_hw *hw;
+ char **parent_names;
+ struct clk **parents;
+ struct clk *parent;
+ struct hlist_head children;
+ struct hlist_node child_node;
+ ...
+ };
+
+The members above make up the core of the clk tree topology. The clk
+api itself defines several driver-facing functions which operate on
+struct clk. That api is documented in include/linux/clk.h.
+
+Platforms and devices utilizing the common struct clk use the struct
+clk_ops pointer in struct clk to perform the hardware-specific parts of
+the operations defined in clk.h:
+
+ struct clk_ops {
+ int (*prepare)(struct clk_hw *hw);
+ void (*unprepare)(struct clk_hw *hw);
+ int (*enable)(struct clk_hw *hw);
+ void (*disable)(struct clk_hw *hw);
+ int (*is_enabled)(struct clk_hw *hw);
+ unsigned long (*recalc_rate)(struct clk_hw *hw,
+ unsigned long parent_rate);
+ long (*round_rate)(struct clk_hw *hw, unsigned long,
+ unsigned long *);
+ int (*set_parent)(struct clk_hw *hw, u8 index);
+ u8 (*get_parent)(struct clk_hw *hw);
+ int (*set_rate)(struct clk_hw *hw, unsigned long);
+ void (*init)(struct clk_hw *hw);
+ };
+
+ Part 3 - hardware clk implementations
+
+The strength of the common struct clk comes from its .ops and .hw pointers
+which abstract the details of struct clk from the hardware-specific bits, and
+vice versa. To illustrate consider the simple gateable clk implementation in
+drivers/clk/clk-gate.c:
+
+struct clk_gate {
+ struct clk_hw hw;
+ void __iomem *reg;
+ u8 bit_idx;
+ ...
+};
+
+struct clk_gate contains struct clk_hw hw as well as hardware-specific
+knowledge about which register and bit controls this clk's gating.
+Nothing about clock topology or accounting, such as enable_count or
+notifier_count, is needed here. That is all handled by the common
+framework code and struct clk.
+
+Let's walk through enabling this clk from driver code:
+
+ struct clk *clk;
+ clk = clk_get(NULL, "my_gateable_clk");
+
+ clk_prepare(clk);
+ clk_enable(clk);
+
+The call graph for clk_enable is very simple:
+
+clk_enable(clk);
+ clk->ops->enable(clk->hw);
+ [resolves to...]
+ clk_gate_enable(hw);
+ [resolves struct clk gate with to_clk_gate(hw)]
+ clk_gate_set_bit(gate);
+
+And the definition of clk_gate_set_bit:
+
+static void clk_gate_set_bit(struct clk_gate *gate)
+{
+ u32 reg;
+
+ reg = __raw_readl(gate->reg);
+ reg |= BIT(gate->bit_idx);
+ writel(reg, gate->reg);
+}
+
+Note that to_clk_gate is defined as:
+
+#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, clk)
+
+This pattern of abstraction is used for every clock hardware
+representation.
+
+ Part 4 - supporting your own clk hardware
+
+When implementing support for a new type of clock it only necessary to
+include the following header:
+
+#include <linux/clk-provider.h>
+
+include/linux/clk.h is included within that header and clk-private.h
+must never be included from the code which implements the operations for
+a clock. More on that below in Part 5.
+
+To construct a clk hardware structure for your platform you must define
+the following:
+
+struct clk_foo {
+ struct clk_hw hw;
+ ... hardware specific data goes here ...
+};
+
+To take advantage of your data you'll need to support valid operations
+for your clk:
+
+struct clk_ops clk_foo_ops {
+ .enable = &clk_foo_enable;
+ .disable = &clk_foo_disable;
+};
+
+Implement the above functions using container_of:
+
+#define to_clk_foo(_hw) container_of(_hw, struct clk_foo, hw)
+
+int clk_foo_enable(struct clk_hw *hw)
+{
+ struct clk_foo *foo;
+
+ foo = to_clk_foo(hw);
+
+ ... perform magic on foo ...
+
+ return 0;
+};
+
+Below is a matrix detailing which clk_ops are mandatory based upon the
+hardware capbilities of that clock. A cell marked as "y" means
+mandatory, a cell marked as "n" implies that either including that
+callback is invalid or otherwise uneccesary. Empty cells are either
+optional or must be evaluated on a case-by-case basis.
+
+ clock hardware characteristics
+ -----------------------------------------------------------
+ | gate | change rate | single parent | multiplexer | root |
+ |------|-------------|---------------|-------------|------|
+.prepare | | | | | |
+.unprepare | | | | | |
+ | | | | | |
+.enable | y | | | | |
+.disable | y | | | | |
+.is_enabled | y | | | | |
+ | | | | | |
+.recalc_rate | | y | | | |
+.round_rate | | y | | | |
+.set_rate | | y | | | |
+ | | | | | |
+.set_parent | | | n | y | n |
+.get_parent | | | n | y | n |
+ | | | | | |
+.init | | | | | |
+ -----------------------------------------------------------
+
+Finally, register your clock at run-time with a hardware-specific
+registration function. This function simply populates struct clk_foo's
+data and then passes the common struct clk parameters to the framework
+with a call to:
+
+clk_register(...)
+
+See the basic clock types in drivers/clk/clk-*.c for examples.
+
+ Part 5 - static initialization of clock data
+
+For platforms with many clocks (often numbering into the hundreds) it
+may be desirable to statically initialize some clock data. This
+presents a problem since the definition of struct clk should be hidden
+from everyone except for the clock core in drivers/clk/clk.c.
+
+To get around this problem struct clk's definition is exposed in
+include/linux/clk-private.h along with some macros for more easily
+initializing instances of the basic clock types. These clocks must
+still be initialized with the common clock framework via a call to
+__clk_init.
+
+clk-private.h must NEVER be included by code which implements struct
+clk_ops callbacks, nor must it be included by any logic which pokes
+around inside of struct clk at run-time. To do so is a layering
+violation.
+
+To better enforce this policy, always follow this simple rule: any
+statically initialized clock data MUST be defined in a separate file
+from the logic that implements its ops. Basically separate the logic
+from the data and all is well.
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 1ff044d87ca..3370bc4d7b9 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -75,10 +75,12 @@ less sharing than average you'll need a larger-than-average metadata device.
As a guide, we suggest you calculate the number of bytes to use in the
metadata device as 48 * $data_dev_size / $data_block_size but round it up
-to 2MB if the answer is smaller. The largest size supported is 16GB.
+to 2MB if the answer is smaller. If you're creating large numbers of
+snapshots which are recording large amounts of change, you may find you
+need to increase this.
-If you're creating large numbers of snapshots which are recording large
-amounts of change, you may need find you need to increase this.
+The largest size supported is 16GB: If the device is larger,
+a warning will be issued and the excess space will not be used.
Reloading a pool table
----------------------
@@ -167,6 +169,38 @@ ii) Using an internal snapshot.
dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
+External snapshots
+------------------
+
+You can use an external _read only_ device as an origin for a
+thinly-provisioned volume. Any read to an unprovisioned area of the
+thin device will be passed through to the origin. Writes trigger
+the allocation of new blocks as usual.
+
+One use case for this is VM hosts that want to run guests on
+thinly-provisioned volumes but have the base image on another device
+(possibly shared between many VMs).
+
+You must not write to the origin device if you use this technique!
+Of course, you may write to the thin device and take internal snapshots
+of the thin volume.
+
+i) Creating a snapshot of an external device
+
+ This is the same as creating a thin device.
+ You don't mention the origin at this stage.
+
+ dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+ii) Using a snapshot of an external device.
+
+ Append an extra parameter to the thin target specifying the origin:
+
+ dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
+
+ N.B. All descendants (internal snapshots) of this snapshot require the
+ same extra origin parameter.
+
Deactivation
------------
@@ -189,7 +223,13 @@ i) Constructor
<low water mark (blocks)> [<number of feature args> [<arg>]*]
Optional feature arguments:
- - 'skip_block_zeroing': skips the zeroing of newly-provisioned blocks.
+
+ skip_block_zeroing: Skip the zeroing of newly-provisioned blocks.
+
+ ignore_discard: Disable discard support.
+
+ no_discard_passdown: Don't pass discards down to the underlying
+ data device, but just remove the mapping.
Data block size must be between 64KB (128 sectors) and 1GB
(2097152 sectors) inclusive.
@@ -237,16 +277,6 @@ iii) Messages
Deletes a thin device. Irreversible.
- trim <dev id> <new size in sectors>
-
- Delete mappings from the end of a thin device. Irreversible.
- You might want to use this if you're reducing the size of
- your thinly-provisioned device. In many cases, due to the
- sharing of blocks between devices, it is not possible to
- determine in advance how much space 'trim' will release. (In
- future a userspace tool might be able to perform this
- calculation.)
-
set_transaction_id <current id> <new id>
Userland volume managers, such as LVM, need a way to
@@ -262,7 +292,7 @@ iii) Messages
i) Constructor
- thin <pool dev> <dev id>
+ thin <pool dev> <dev id> [<external origin dev>]
pool dev:
the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
@@ -271,6 +301,11 @@ i) Constructor
the internal device identifier of the device to be
activated.
+ external origin dev:
+ an optional block device outside the pool to be treated as a
+ read-only snapshot origin: reads to unprovisioned areas of the
+ thin target will be mapped to this device.
+
The pool doesn't store any size against the thin devices. If you
load a thin target that is smaller than you've been using previously,
then you'll have no access to blocks mapped beyond the end. If you
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
new file mode 100644
index 00000000000..32e48797a14
--- /dev/null
+++ b/Documentation/device-mapper/verity.txt
@@ -0,0 +1,194 @@
+dm-verity
+==========
+
+Device-Mapper's "verity" target provides transparent integrity checking of
+block devices using a cryptographic digest provided by the kernel crypto API.
+This target is read-only.
+
+Construction Parameters
+=======================
+ <version> <dev> <hash_dev> <hash_start>
+ <data_block_size> <hash_block_size>
+ <num_data_blocks> <hash_start_block>
+ <algorithm> <digest> <salt>
+
+<version>
+ This is the version number of the on-disk format.
+
+ 0 is the original format used in the Chromium OS.
+ The salt is appended when hashing, digests are stored continuously and
+ the rest of the block is padded with zeros.
+
+ 1 is the current format that should be used for new devices.
+ The salt is prepended when hashing and each digest is
+ padded with zeros to the power of two.
+
+<dev>
+ This is the device containing the data the integrity of which needs to be
+ checked. It may be specified as a path, like /dev/sdaX, or a device number,
+ <major>:<minor>.
+
+<hash_dev>
+ This is the device that that supplies the hash tree data. It may be
+ specified similarly to the device path and may be the same device. If the
+ same device is used, the hash_start should be outside of the dm-verity
+ configured device size.
+
+<data_block_size>
+ The block size on a data device. Each block corresponds to one digest on
+ the hash device.
+
+<hash_block_size>
+ The size of a hash block.
+
+<num_data_blocks>
+ The number of data blocks on the data device. Additional blocks are
+ inaccessible. You can place hashes to the same partition as data, in this
+ case hashes are placed after <num_data_blocks>.
+
+<hash_start_block>
+ This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
+ to the root block of the hash tree.
+
+<algorithm>
+ The cryptographic hash algorithm used for this device. This should
+ be the name of the algorithm, like "sha1".
+
+<digest>
+ The hexadecimal encoding of the cryptographic hash of the root hash block
+ and the salt. This hash should be trusted as there is no other authenticity
+ beyond this point.
+
+<salt>
+ The hexadecimal encoding of the salt value.
+
+Theory of operation
+===================
+
+dm-verity is meant to be setup as part of a verified boot path. This
+may be anything ranging from a boot using tboot or trustedgrub to just
+booting from a known-good device (like a USB drive or CD).
+
+When a dm-verity device is configured, it is expected that the caller
+has been authenticated in some way (cryptographic signatures, etc).
+After instantiation, all hashes will be verified on-demand during
+disk access. If they cannot be verified up to the root node of the
+tree, the root hash, then the I/O will fail. This should identify
+tampering with any data on the device and the hash data.
+
+Cryptographic hashes are used to assert the integrity of the device on a
+per-block basis. This allows for a lightweight hash computation on first read
+into the page cache. Block hashes are stored linearly-aligned to the nearest
+block the size of a page.
+
+Hash Tree
+---------
+
+Each node in the tree is a cryptographic hash. If it is a leaf node, the hash
+is of some block data on disk. If it is an intermediary node, then the hash is
+of a number of child nodes.
+
+Each entry in the tree is a collection of neighboring nodes that fit in one
+block. The number is determined based on block_size and the size of the
+selected cryptographic digest algorithm. The hashes are linearly-ordered in
+this entry and any unaligned trailing space is ignored but included when
+calculating the parent node.
+
+The tree looks something like:
+
+alg = sha256, num_blocks = 32768, block_size = 4096
+
+ [ root ]
+ / . . . \
+ [entry_0] [entry_1]
+ / . . . \ . . . \
+ [entry_0_0] . . . [entry_0_127] . . . . [entry_1_127]
+ / ... \ / . . . \ / \
+ blk_0 ... blk_127 blk_16256 blk_16383 blk_32640 . . . blk_32767
+
+
+On-disk format
+==============
+
+Below is the recommended on-disk format. The verity kernel code does not
+read the on-disk header. It only reads the hash blocks which directly
+follow the header. It is expected that a user-space tool will verify the
+integrity of the verity_header and then call dmsetup with the correct
+parameters. Alternatively, the header can be omitted and the dmsetup
+parameters can be passed via the kernel command-line in a rooted chain
+of trust where the command-line is verified.
+
+The on-disk format is especially useful in cases where the hash blocks
+are on a separate partition. The magic number allows easy identification
+of the partition contents. Alternatively, the hash blocks can be stored
+in the same partition as the data to be verified. In such a configuration
+the filesystem on the partition would be sized a little smaller than
+the full-partition, leaving room for the hash blocks.
+
+struct superblock {
+ uint8_t signature[8]
+ "verity\0\0";
+
+ uint8_t version;
+ 1 - current format
+
+ uint8_t data_block_bits;
+ log2(data block size)
+
+ uint8_t hash_block_bits;
+ log2(hash block size)
+
+ uint8_t pad1[1];
+ zero padding
+
+ uint16_t salt_size;
+ big-endian salt size
+
+ uint8_t pad2[2];
+ zero padding
+
+ uint32_t data_blocks_hi;
+ big-endian high 32 bits of the 64-bit number of data blocks
+
+ uint32_t data_blocks_lo;
+ big-endian low 32 bits of the 64-bit number of data blocks
+
+ uint8_t algorithm[16];
+ cryptographic algorithm
+
+ uint8_t salt[384];
+ salt (the salt size is specified above)
+
+ uint8_t pad3[88];
+ zero padding to 512-byte boundary
+}
+
+Directly following the header (and with sector number padded to the next hash
+block boundary) are the hash blocks which are stored a depth at a time
+(starting from the root), sorted in order of increasing index.
+
+Status
+======
+V (for Valid) is returned if every check performed so far was valid.
+If any check failed, C (for Corruption) is returned.
+
+Example
+=======
+
+Setup a device:
+ dmsetup create vroot --table \
+ "0 2097152 "\
+ "verity 1 /dev/sda1 /dev/sda2 4096 4096 2097152 1 "\
+ "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
+ "1234000000000000000000000000000000000000000000000000000000000000"
+
+A command line tool veritysetup is available to compute or verify
+the hash tree or activate the kernel driver. This is available from
+the LVM2 upstream repository and may be supplied as a package called
+device-mapper-verity-tools:
+ git://sources.redhat.com/git/lvm2
+ http://sourceware.org/git/?p=lvm2.git
+ http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/verity?cvsroot=lvm2
+
+veritysetup -a vroot /dev/sda1 /dev/sda2 \
+ 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt
index 1aeaf6f2a1b..ecc81e36871 100644
--- a/Documentation/devicetree/bindings/arm/atmel-at91.txt
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.txt
@@ -30,3 +30,63 @@ One interrupt per TC channel in a TC block:
reg = <0xfffdc000 0x100>;
interrupts = <26 4 27 4 28 4>;
};
+
+RSTC Reset Controller required properties:
+- compatible: Should be "atmel,<chip>-rstc".
+ <chip> can be "at91sam9260" or "at91sam9g45"
+- reg: Should contain registers location and length
+
+Example:
+
+ rstc@fffffd00 {
+ compatible = "atmel,at91sam9260-rstc";
+ reg = <0xfffffd00 0x10>;
+ };
+
+RAMC SDRAM/DDR Controller required properties:
+- compatible: Should be "atmel,at91sam9260-sdramc",
+ "atmel,at91sam9g45-ddramc",
+- reg: Should contain registers location and length
+ For at91sam9263 and at91sam9g45 you must specify 2 entries.
+
+Examples:
+
+ ramc0: ramc@ffffe800 {
+ compatible = "atmel,at91sam9g45-ddramc";
+ reg = <0xffffe800 0x200>;
+ };
+
+ ramc0: ramc@ffffe400 {
+ compatible = "atmel,at91sam9g45-ddramc";
+ reg = <0xffffe400 0x200
+ 0xffffe600 0x200>;
+ };
+
+SHDWC Shutdown Controller
+
+required properties:
+- compatible: Should be "atmel,<chip>-shdwc".
+ <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
+- reg: Should contain registers location and length
+
+optional properties:
+- atmel,wakeup-mode: String, operation mode of the wakeup mode.
+ Supported values are: "none", "high", "low", "any".
+- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
+
+optional at91sam9260 properties:
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9rl properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9x5 properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+
+Example:
+
+ rstc@fffffd00 {
+ compatible = "atmel,at91sam9260-rstc";
+ reg = <0xfffffd00 0x10>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/atmel-pmc.txt b/Documentation/devicetree/bindings/arm/atmel-pmc.txt
new file mode 100644
index 00000000000..389bed5056e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/atmel-pmc.txt
@@ -0,0 +1,11 @@
+* Power Management Controller (PMC)
+
+Required properties:
+- compatible: Should be "atmel,at91rm9200-pmc"
+- reg: Should contain PMC registers location and length
+
+Examples:
+ pmc: pmc@fffffc00 {
+ compatible = "atmel,at91rm9200-pmc";
+ reg = <0xfffffc00 0x100>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/spear.txt b/Documentation/devicetree/bindings/arm/spear.txt
new file mode 100644
index 00000000000..f8e54f09232
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/spear.txt
@@ -0,0 +1,8 @@
+ST SPEAr Platforms Device Tree Bindings
+---------------------------------------
+
+Boards with the ST SPEAr600 SoC shall have the following properties:
+
+Required root node property:
+
+compatible = "st,spear600";
diff --git a/Documentation/devicetree/bindings/gpio/gpio-omap.txt b/Documentation/devicetree/bindings/gpio/gpio-omap.txt
new file mode 100644
index 00000000000..bff51a2fee1
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-omap.txt
@@ -0,0 +1,36 @@
+OMAP GPIO controller bindings
+
+Required properties:
+- compatible:
+ - "ti,omap2-gpio" for OMAP2 controllers
+ - "ti,omap3-gpio" for OMAP3 controllers
+ - "ti,omap4-gpio" for OMAP4 controllers
+- #gpio-cells : Should be two.
+ - first cell is the pin number
+ - second cell is used to specify optional parameters (unused)
+- gpio-controller : Marks the device node as a GPIO controller.
+- #interrupt-cells : Should be 2.
+- interrupt-controller: Mark the device node as an interrupt controller
+ The first cell is the GPIO number.
+ The second cell is used to specify flags:
+ bits[3:0] trigger type and level flags:
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 4 = active high level-sensitive.
+ 8 = active low level-sensitive.
+
+OMAP specific properties:
+- ti,hwmods: Name of the hwmod associated to the GPIO:
+ "gpio<X>", <X> being the 1-based instance number from the HW spec
+
+
+Example:
+
+gpio4: gpio4 {
+ compatible = "ti,omap4-gpio";
+ ti,hwmods = "gpio4";
+ #gpio-cells = <2>;
+ gpio-controller;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-twl4030.txt b/Documentation/devicetree/bindings/gpio/gpio-twl4030.txt
new file mode 100644
index 00000000000..16695d9cf1e
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-twl4030.txt
@@ -0,0 +1,23 @@
+twl4030 GPIO controller bindings
+
+Required properties:
+- compatible:
+ - "ti,twl4030-gpio" for twl4030 GPIO controller
+- #gpio-cells : Should be two.
+ - first cell is the pin number
+ - second cell is used to specify optional parameters (unused)
+- gpio-controller : Marks the device node as a GPIO controller.
+- #interrupt-cells : Should be 2.
+- interrupt-controller: Mark the device node as an interrupt controller
+ The first cell is the GPIO number.
+ The second cell is not used.
+
+Example:
+
+twl_gpio: gpio {
+ compatible = "ti,twl4030-gpio";
+ #gpio-cells = <2>;
+ gpio-controller;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio_i2c.txt b/Documentation/devicetree/bindings/gpio/gpio_i2c.txt
new file mode 100644
index 00000000000..4f8ec947c6b
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio_i2c.txt
@@ -0,0 +1,32 @@
+Device-Tree bindings for i2c gpio driver
+
+Required properties:
+ - compatible = "i2c-gpio";
+ - gpios: sda and scl gpio
+
+
+Optional properties:
+ - i2c-gpio,sda-open-drain: sda as open drain
+ - i2c-gpio,scl-open-drain: scl as open drain
+ - i2c-gpio,scl-output-only: scl as output only
+ - i2c-gpio,delay-us: delay between GPIO operations (may depend on each platform)
+ - i2c-gpio,timeout-ms: timeout to get data
+
+Example nodes:
+
+i2c@0 {
+ compatible = "i2c-gpio";
+ gpios = <&pioA 23 0 /* sda */
+ &pioA 24 0 /* scl */
+ >;
+ i2c-gpio,sda-open-drain;
+ i2c-gpio,scl-open-drain;
+ i2c-gpio,delay-us = <2>; /* ~100 kHz */
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rv3029c2@56 {
+ compatible = "rv3029c2";
+ reg = <0x56>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/gpio/sodaville.txt b/Documentation/devicetree/bindings/gpio/sodaville.txt
new file mode 100644
index 00000000000..563eff22b97
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/sodaville.txt
@@ -0,0 +1,48 @@
+GPIO controller on CE4100 / Sodaville SoCs
+==========================================
+
+The bindings for CE4100's GPIO controller match the generic description
+which is covered by the gpio.txt file in this folder.
+
+The only additional property is the intel,muxctl property which holds the
+value which is written into the MUXCNTL register.
+
+There is no compatible property for now because the driver is probed via
+PCI id (vendor 0x8086 device 0x2e67).
+
+The interrupt specifier consists of two cells encoded as follows:
+ - <1st cell>: The interrupt-number that identifies the interrupt source.
+ - <2nd cell>: The level-sense information, encoded as follows:
+ 4 - active high level-sensitive
+ 8 - active low level-sensitive
+
+Example of the GPIO device and one user:
+
+ pcigpio: gpio@b,1 {
+ /* two cells for GPIO and interrupt */
+ #gpio-cells = <2>;
+ #interrupt-cells = <2>;
+ compatible = "pci8086,2e67.2",
+ "pci8086,2e67",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x15900 0x0 0x0 0x0 0x0>;
+ /* Interrupt line of the gpio device */
+ interrupts = <15 1>;
+ /* It is an interrupt and GPIO controller itself */
+ interrupt-controller;
+ gpio-controller;
+ intel,muxctl = <0>;
+ };
+
+ testuser@20 {
+ compatible = "example,testuser";
+ /* User the 11th GPIO line as an active high triggered
+ * level interrupt
+ */
+ interrupts = <11 8>;
+ interrupt-parent = <&pcigpio>;
+ /* Use this GPIO also with the gpio functions */
+ gpios = <&pcigpio 11 0>;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
new file mode 100644
index 00000000000..dbd4368ab8c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
@@ -0,0 +1,33 @@
+* TI Highspeed MMC host controller for OMAP
+
+The Highspeed MMC Host Controller on TI OMAP family
+provides an interface for MMC, SD, and SDIO types of memory cards.
+
+Required properties:
+- compatible:
+ Should be "ti,omap2-hsmmc", for OMAP2 controllers
+ Should be "ti,omap3-hsmmc", for OMAP3 controllers
+ Should be "ti,omap4-hsmmc", for OMAP4 controllers
+- ti,hwmods: Must be "mmc<n>", n is controller instance starting 1
+- reg : should contain hsmmc registers location and length
+
+Optional properties:
+ti,dual-volt: boolean, supports dual voltage cards
+<supply-name>-supply: phandle to the regulator device tree node
+"supply-name" examples are "vmmc", "vmmc_aux" etc
+ti,bus-width: Number of data lines, default assumed is 1 if the property is missing.
+cd-gpios: GPIOs for card detection
+wp-gpios: GPIOs for write protection
+ti,non-removable: non-removable slot (like eMMC)
+ti,needs-special-reset: Requires a special softreset sequence
+
+Example:
+ mmc1: mmc@0x4809c000 {
+ compatible = "ti,omap4-hsmmc";
+ reg = <0x4809c000 0x400>;
+ ti,hwmods = "mmc1";
+ ti,dual-volt;
+ ti,bus-width = <4>;
+ vmmc-supply = <&vmmc>; /* phandle to regulator node */
+ ti,non-removable;
+ };
diff --git a/Documentation/devicetree/bindings/mtd/arm-versatile.txt b/Documentation/devicetree/bindings/mtd/arm-versatile.txt
index 476845db94d..beace4b89da 100644
--- a/Documentation/devicetree/bindings/mtd/arm-versatile.txt
+++ b/Documentation/devicetree/bindings/mtd/arm-versatile.txt
@@ -4,5 +4,5 @@ Required properties:
- compatible : must be "arm,versatile-flash";
- bank-width : width in bytes of flash interface.
-Optional properties:
-- Subnode partition map from mtd flash binding
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
diff --git a/Documentation/devicetree/bindings/mtd/atmel-dataflash.txt b/Documentation/devicetree/bindings/mtd/atmel-dataflash.txt
index ef66ddd01da..1889a4db5b7 100644
--- a/Documentation/devicetree/bindings/mtd/atmel-dataflash.txt
+++ b/Documentation/devicetree/bindings/mtd/atmel-dataflash.txt
@@ -3,6 +3,9 @@
Required properties:
- compatible : "atmel,<model>", "atmel,<series>", "atmel,dataflash".
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
Example:
flash@1 {
diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
new file mode 100644
index 00000000000..5903ecf6e89
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
@@ -0,0 +1,41 @@
+Atmel NAND flash
+
+Required properties:
+- compatible : "atmel,at91rm9200-nand".
+- reg : should specify localbus address and size used for the chip,
+ and if availlable the ECC.
+- atmel,nand-addr-offset : offset for the address latch.
+- atmel,nand-cmd-offset : offset for the command latch.
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+ representing partitions.
+
+- gpios : specifies the gpio pins to control the NAND device. detect is an
+ optional gpio and may be set to 0 if not present.
+
+Optional properties:
+- nand-ecc-mode : String, operation mode of the NAND ecc mode, soft by default.
+ Supported values are: "none", "soft", "hw", "hw_syndrome", "hw_oob_first",
+ "soft_bch".
+- nand-bus-width : 8 or 16 bus width if not present 8
+- nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
+
+Examples:
+nand0: nand@40000000,0 {
+ compatible = "atmel,at91rm9200-nand";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x40000000 0x10000000
+ 0xffffe800 0x200
+ >;
+ atmel,nand-addr-offset = <21>;
+ atmel,nand-cmd-offset = <22>;
+ nand-on-flash-bbt;
+ nand-ecc-mode = "soft";
+ gpios = <&pioC 13 0
+ &pioC 14 0
+ 0
+ >;
+ partition@0 {
+ ...
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt b/Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt
index 00f1f546b32..fce4894f5a9 100644
--- a/Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt
@@ -19,6 +19,10 @@ Optional properties:
read registers (tR). Required if property "gpios" is not used
(R/B# pins not connected).
+Each flash chip described may optionally contain additional sub-nodes
+describing partitions of the address space. See partition.txt for more
+detail.
+
Examples:
upm@1,0 {
diff --git a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
index 719f4dc58df..36ef07d3c90 100644
--- a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
@@ -25,6 +25,9 @@ Optional properties:
GPIO state and before and after command byte writes, this register will be
read to ensure that the GPIO accesses have completed.
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
Examples:
gpio-nand@1,0 {
diff --git a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
index 80152cb567d..a63c2bd7de2 100644
--- a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
+++ b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
@@ -23,27 +23,8 @@ are defined:
- vendor-id : Contains the flash chip's vendor id (1 byte).
- device-id : Contains the flash chip's device id (1 byte).
-In addition to the information on the mtd bank itself, the
-device tree may optionally contain additional information
-describing partitions of the address space. This can be
-used on platforms which have strong conventions about which
-portions of a flash are used for what purposes, but which don't
-use an on-flash partition table such as RedBoot.
-
-Each partition is represented as a sub-node of the mtd device.
-Each node's name represents the name of the corresponding
-partition of the mtd device.
-
-Flash partitions
- - reg : The partition's offset and size within the mtd bank.
- - label : (optional) The label / name for this partition.
- If omitted, the label is taken from the node name (excluding
- the unit address).
- - read-only : (optional) This parameter, if present, is a hint to
- Linux that this partition should only be mounted
- read-only. This is usually used for flash partitions
- containing early-boot firmware images or data which should not
- be clobbered.
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
Example:
diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
new file mode 100644
index 00000000000..03855c8c492
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/nand.txt
@@ -0,0 +1,7 @@
+* MTD generic binding
+
+- nand-ecc-mode : String, operation mode of the NAND ecc mode.
+ Supported values are: "none", "soft", "hw", "hw_syndrome", "hw_oob_first",
+ "soft_bch".
+- nand-bus-width : 8 or 16 bus width if not present 8
+- nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
diff --git a/Documentation/devicetree/bindings/mtd/partition.txt b/Documentation/devicetree/bindings/mtd/partition.txt
new file mode 100644
index 00000000000..f114ce1657c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/partition.txt
@@ -0,0 +1,38 @@
+Representing flash partitions in devicetree
+
+Partitions can be represented by sub-nodes of an mtd device. This can be used
+on platforms which have strong conventions about which portions of a flash are
+used for what purposes, but which don't use an on-flash partition table such
+as RedBoot.
+
+#address-cells & #size-cells must both be present in the mtd device and be
+equal to 1.
+
+Required properties:
+- reg : The partition's offset and size within the mtd bank.
+
+Optional properties:
+- label : The label / name for this partition. If omitted, the label is taken
+ from the node name (excluding the unit address).
+- read-only : This parameter, if present, is a hint to Linux that this
+ partition should only be mounted read-only. This is usually used for flash
+ partitions containing early-boot firmware images or data which should not be
+ clobbered.
+
+Examples:
+
+
+flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ label = "u-boot";
+ reg = <0x0000000 0x100000>;
+ read-only;
+ };
+
+ uimage@100000 {
+ reg = <0x0100000 0x200000>;
+ };
+];
diff --git a/Documentation/devicetree/bindings/usb/atmel-usb.txt b/Documentation/devicetree/bindings/usb/atmel-usb.txt
new file mode 100644
index 00000000000..60bd2150a3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/atmel-usb.txt
@@ -0,0 +1,49 @@
+Atmel SOC USB controllers
+
+OHCI
+
+Required properties:
+ - compatible: Should be "atmel,at91rm9200-ohci" for USB controllers
+ used in host mode.
+ - num-ports: Number of ports.
+ - atmel,vbus-gpio: If present, specifies a gpio that needs to be
+ activated for the bus to be powered.
+ - atmel,oc-gpio: If present, specifies a gpio that needs to be
+ activated for the overcurrent detection.
+
+usb0: ohci@00500000 {
+ compatible = "atmel,at91rm9200-ohci", "usb-ohci";
+ reg = <0x00500000 0x100000>;
+ interrupts = <20 4>;
+ num-ports = <2>;
+};
+
+EHCI
+
+Required properties:
+ - compatible: Should be "atmel,at91sam9g45-ehci" for USB controllers
+ used in host mode.
+
+usb1: ehci@00800000 {
+ compatible = "atmel,at91sam9g45-ehci", "usb-ehci";
+ reg = <0x00800000 0x100000>;
+ interrupts = <22 4>;
+};
+
+AT91 USB device controller
+
+Required properties:
+ - compatible: Should be "atmel,at91rm9200-udc"
+ - reg: Address and length of the register set for the device
+ - interrupts: Should contain macb interrupt
+
+Optional properties:
+ - atmel,vbus-gpio: If present, specifies a gpio that needs to be
+ activated for the bus to be powered.
+
+usb1: gadget@fffa4000 {
+ compatible = "atmel,at91rm9200-udc";
+ reg = <0xfffa4000 0x4000>;
+ interrupts = <10 4>;
+ atmel,vbus-gpio = <&pioC 5 0>;
+};
diff --git a/Documentation/devicetree/bindings/usb/tegra-usb.txt b/Documentation/devicetree/bindings/usb/tegra-usb.txt
index 035d63d5646..007005ddbe1 100644
--- a/Documentation/devicetree/bindings/usb/tegra-usb.txt
+++ b/Documentation/devicetree/bindings/usb/tegra-usb.txt
@@ -11,3 +11,16 @@ Required properties :
- phy_type : Should be one of "ulpi" or "utmi".
- nvidia,vbus-gpio : If present, specifies a gpio that needs to be
activated for the bus to be powered.
+
+Optional properties:
+ - dr_mode : dual role mode. Indicates the working mode for
+ nvidia,tegra20-ehci compatible controllers. Can be "host", "peripheral",
+ or "otg". Default to "host" if not defined for backward compatibility.
+ host means this is a host controller
+ peripheral means it is device controller
+ otg means it can operate as either ("on the go")
+ - nvidia,has-legacy-mode : boolean indicates whether this controller can
+ operate in legacy mode (as APX 2500 / 2600). In legacy mode some
+ registers are accessed through the APB_MISC base address instead of
+ the USB controller. Since this is a legacy issue it probably does not
+ warrant a compatible string of its own.
diff --git a/Documentation/devicetree/usage-model.txt b/Documentation/devicetree/usage-model.txt
new file mode 100644
index 00000000000..c5a80099b71
--- /dev/null
+++ b/Documentation/devicetree/usage-model.txt
@@ -0,0 +1,412 @@
+Linux and the Device Tree
+-------------------------
+The Linux usage model for device tree data
+
+Author: Grant Likely <grant.likely@secretlab.ca>
+
+This article describes how Linux uses the device tree. An overview of
+the device tree data format can be found on the device tree usage page
+at devicetree.org[1].
+
+[1] http://devicetree.org/Device_Tree_Usage
+
+The "Open Firmware Device Tree", or simply Device Tree (DT), is a data
+structure and language for describing hardware. More specifically, it
+is a description of hardware that is readable by an operating system
+so that the operating system doesn't need to hard code details of the
+machine.
+
+Structurally, the DT is a tree, or acyclic graph with named nodes, and
+nodes may have an arbitrary number of named properties encapsulating
+arbitrary data. A mechanism also exists to create arbitrary
+links from one node to another outside of the natural tree structure.
+
+Conceptually, a common set of usage conventions, called 'bindings',
+is defined for how data should appear in the tree to describe typical
+hardware characteristics including data busses, interrupt lines, GPIO
+connections, and peripheral devices.
+
+As much as possible, hardware is described using existing bindings to
+maximize use of existing support code, but since property and node
+names are simply text strings, it is easy to extend existing bindings
+or create new ones by defining new nodes and properties. Be wary,
+however, of creating a new binding without first doing some homework
+about what already exists. There are currently two different,
+incompatible, bindings for i2c busses that came about because the new
+binding was created without first investigating how i2c devices were
+already being enumerated in existing systems.
+
+1. History
+----------
+The DT was originally created by Open Firmware as part of the
+communication method for passing data from Open Firmware to a client
+program (like to an operating system). An operating system used the
+Device Tree to discover the topology of the hardware at runtime, and
+thereby support a majority of available hardware without hard coded
+information (assuming drivers were available for all devices).
+
+Since Open Firmware is commonly used on PowerPC and SPARC platforms,
+the Linux support for those architectures has for a long time used the
+Device Tree.
+
+In 2005, when PowerPC Linux began a major cleanup and to merge 32-bit
+and 64-bit support, the decision was made to require DT support on all
+powerpc platforms, regardless of whether or not they used Open
+Firmware. To do this, a DT representation called the Flattened Device
+Tree (FDT) was created which could be passed to the kernel as a binary
+blob without requiring a real Open Firmware implementation. U-Boot,
+kexec, and other bootloaders were modified to support both passing a
+Device Tree Binary (dtb) and to modify a dtb at boot time. DT was
+also added to the PowerPC boot wrapper (arch/powerpc/boot/*) so that
+a dtb could be wrapped up with the kernel image to support booting
+existing non-DT aware firmware.
+
+Some time later, FDT infrastructure was generalized to be usable by
+all architectures. At the time of this writing, 6 mainlined
+architectures (arm, microblaze, mips, powerpc, sparc, and x86) and 1
+out of mainline (nios) have some level of DT support.
+
+2. Data Model
+-------------
+If you haven't already read the Device Tree Usage[1] page,
+then go read it now. It's okay, I'll wait....
+
+2.1 High Level View
+-------------------
+The most important thing to understand is that the DT is simply a data
+structure that describes the hardware. There is nothing magical about
+it, and it doesn't magically make all hardware configuration problems
+go away. What it does do is provide a language for decoupling the
+hardware configuration from the board and device driver support in the
+Linux kernel (or any other operating system for that matter). Using
+it allows board and device support to become data driven; to make
+setup decisions based on data passed into the kernel instead of on
+per-machine hard coded selections.
+
+Ideally, data driven platform setup should result in less code
+duplication and make it easier to support a wide range of hardware
+with a single kernel image.
+
+Linux uses DT data for three major purposes:
+1) platform identification,
+2) runtime configuration, and
+3) device population.
+
+2.2 Platform Identification
+---------------------------
+First and foremost, the kernel will use data in the DT to identify the
+specific machine. In a perfect world, the specific platform shouldn't
+matter to the kernel because all platform details would be described
+perfectly by the device tree in a consistent and reliable manner.
+Hardware is not perfect though, and so the kernel must identify the
+machine during early boot so that it has the opportunity to run
+machine-specific fixups.
+
+In the majority of cases, the machine identity is irrelevant, and the
+kernel will instead select setup code based on the machine's core
+CPU or SoC. On ARM for example, setup_arch() in
+arch/arm/kernel/setup.c will call setup_machine_fdt() in
+arch/arm/kernel/devicetree.c which searches through the machine_desc
+table and selects the machine_desc which best matches the device tree
+data. It determines the best match by looking at the 'compatible'
+property in the root device tree node, and comparing it with the
+dt_compat list in struct machine_desc.
+
+The 'compatible' property contains a sorted list of strings starting
+with the exact name of the machine, followed by an optional list of
+boards it is compatible with sorted from most compatible to least. For
+example, the root compatible properties for the TI BeagleBoard and its
+successor, the BeagleBoard xM board might look like:
+
+ compatible = "ti,omap3-beagleboard", "ti,omap3450", "ti,omap3";
+ compatible = "ti,omap3-beagleboard-xm", "ti,omap3450", "ti,omap3";
+
+Where "ti,omap3-beagleboard-xm" specifies the exact model, it also
+claims that it compatible with the OMAP 3450 SoC, and the omap3 family
+of SoCs in general. You'll notice that the list is sorted from most
+specific (exact board) to least specific (SoC family).
+
+Astute readers might point out that the Beagle xM could also claim
+compatibility with the original Beagle board. However, one should be
+cautioned about doing so at the board level since there is typically a
+high level of change from one board to another, even within the same
+product line, and it is hard to nail down exactly what is meant when one
+board claims to be compatible with another. For the top level, it is
+better to err on the side of caution and not claim one board is
+compatible with another. The notable exception would be when one
+board is a carrier for another, such as a CPU module attached to a
+carrier board.
+
+One more note on compatible values. Any string used in a compatible
+property must be documented as to what it indicates. Add
+documentation for compatible strings in Documentation/devicetree/bindings.
+
+Again on ARM, for each machine_desc, the kernel looks to see if
+any of the dt_compat list entries appear in the compatible property.
+If one does, then that machine_desc is a candidate for driving the
+machine. After searching the entire table of machine_descs,
+setup_machine_fdt() returns the 'most compatible' machine_desc based
+on which entry in the compatible property each machine_desc matches
+against. If no matching machine_desc is found, then it returns NULL.
+
+The reasoning behind this scheme is the observation that in the majority
+of cases, a single machine_desc can support a large number of boards
+if they all use the same SoC, or same family of SoCs. However,
+invariably there will be some exceptions where a specific board will
+require special setup code that is not useful in the generic case.
+Special cases could be handled by explicitly checking for the
+troublesome board(s) in generic setup code, but doing so very quickly
+becomes ugly and/or unmaintainable if it is more than just a couple of
+cases.
+
+Instead, the compatible list allows a generic machine_desc to provide
+support for a wide common set of boards by specifying "less
+compatible" value in the dt_compat list. In the example above,
+generic board support can claim compatibility with "ti,omap3" or
+"ti,omap3450". If a bug was discovered on the original beagleboard
+that required special workaround code during early boot, then a new
+machine_desc could be added which implements the workarounds and only
+matches on "ti,omap3-beagleboard".
+
+PowerPC uses a slightly different scheme where it calls the .probe()
+hook from each machine_desc, and the first one returning TRUE is used.
+However, this approach does not take into account the priority of the
+compatible list, and probably should be avoided for new architecture
+support.
+
+2.3 Runtime configuration
+-------------------------
+In most cases, a DT will be the sole method of communicating data from
+firmware to the kernel, so also gets used to pass in runtime and
+configuration data like the kernel parameters string and the location
+of an initrd image.
+
+Most of this data is contained in the /chosen node, and when booting
+Linux it will look something like this:
+
+ chosen {
+ bootargs = "console=ttyS0,115200 loglevel=8";
+ initrd-start = <0xc8000000>;
+ initrd-end = <0xc8200000>;
+ };
+
+The bootargs property contains the kernel arguments, and the initrd-*
+properties define the address and size of an initrd blob. The
+chosen node may also optionally contain an arbitrary number of
+additional properties for platform-specific configuration data.
+
+During early boot, the architecture setup code calls of_scan_flat_dt()
+several times with different helper callbacks to parse device tree
+data before paging is setup. The of_scan_flat_dt() code scans through
+the device tree and uses the helpers to extract information required
+during early boot. Typically the early_init_dt_scan_chosen() helper
+is used to parse the chosen node including kernel parameters,
+early_init_dt_scan_root() to initialize the DT address space model,
+and early_init_dt_scan_memory() to determine the size and
+location of usable RAM.
+
+On ARM, the function setup_machine_fdt() is responsible for early
+scanning of the device tree after selecting the correct machine_desc
+that supports the board.
+
+2.4 Device population
+---------------------
+After the board has been identified, and after the early configuration data
+has been parsed, then kernel initialization can proceed in the normal
+way. At some point in this process, unflatten_device_tree() is called
+to convert the data into a more efficient runtime representation.
+This is also when machine-specific setup hooks will get called, like
+the machine_desc .init_early(), .init_irq() and .init_machine() hooks
+on ARM. The remainder of this section uses examples from the ARM
+implementation, but all architectures will do pretty much the same
+thing when using a DT.
+
+As can be guessed by the names, .init_early() is used for any machine-
+specific setup that needs to be executed early in the boot process,
+and .init_irq() is used to set up interrupt handling. Using a DT
+doesn't materially change the behaviour of either of these functions.
+If a DT is provided, then both .init_early() and .init_irq() are able
+to call any of the DT query functions (of_* in include/linux/of*.h) to
+get additional data about the platform.
+
+The most interesting hook in the DT context is .init_machine() which
+is primarily responsible for populating the Linux device model with
+data about the platform. Historically this has been implemented on
+embedded platforms by defining a set of static clock structures,
+platform_devices, and other data in the board support .c file, and
+registering it en-masse in .init_machine(). When DT is used, then
+instead of hard coding static devices for each platform, the list of
+devices can be obtained by parsing the DT, and allocating device
+structures dynamically.
+
+The simplest case is when .init_machine() is only responsible for
+registering a block of platform_devices. A platform_device is a concept
+used by Linux for memory or I/O mapped devices which cannot be detected
+by hardware, and for 'composite' or 'virtual' devices (more on those
+later). While there is no 'platform device' terminology for the DT,
+platform devices roughly correspond to device nodes at the root of the
+tree and children of simple memory mapped bus nodes.
+
+About now is a good time to lay out an example. Here is part of the
+device tree for the NVIDIA Tegra board.
+
+/{
+ compatible = "nvidia,harmony", "nvidia,tegra20";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&intc>;
+
+ chosen { };
+ aliases { };
+
+ memory {
+ device_type = "memory";
+ reg = <0x00000000 0x40000000>;
+ };
+
+ soc {
+ compatible = "nvidia,tegra20-soc", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ intc: interrupt-controller@50041000 {
+ compatible = "nvidia,tegra20-gic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x50041000 0x1000>, < 0x50040100 0x0100 >;
+ };
+
+ serial@70006300 {
+ compatible = "nvidia,tegra20-uart";
+ reg = <0x70006300 0x100>;
+ interrupts = <122>;
+ };
+
+ i2s1: i2s@70002800 {
+ compatible = "nvidia,tegra20-i2s";
+ reg = <0x70002800 0x100>;
+ interrupts = <77>;
+ codec = <&wm8903>;
+ };
+
+ i2c@7000c000 {
+ compatible = "nvidia,tegra20-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x7000c000 0x100>;
+ interrupts = <70>;
+
+ wm8903: codec@1a {
+ compatible = "wlf,wm8903";
+ reg = <0x1a>;
+ interrupts = <347>;
+ };
+ };
+ };
+
+ sound {
+ compatible = "nvidia,harmony-sound";
+ i2s-controller = <&i2s1>;
+ i2s-codec = <&wm8903>;
+ };
+};
+
+At .machine_init() time, Tegra board support code will need to look at
+this DT and decide which nodes to create platform_devices for.
+However, looking at the tree, it is not immediately obvious what kind
+of device each node represents, or even if a node represents a device
+at all. The /chosen, /aliases, and /memory nodes are informational
+nodes that don't describe devices (although arguably memory could be
+considered a device). The children of the /soc node are memory mapped
+devices, but the codec@1a is an i2c device, and the sound node
+represents not a device, but rather how other devices are connected
+together to create the audio subsystem. I know what each device is
+because I'm familiar with the board design, but how does the kernel
+know what to do with each node?
+
+The trick is that the kernel starts at the root of the tree and looks
+for nodes that have a 'compatible' property. First, it is generally
+assumed that any node with a 'compatible' property represents a device
+of some kind, and second, it can be assumed that any node at the root
+of the tree is either directly attached to the processor bus, or is a
+miscellaneous system device that cannot be described any other way.
+For each of these nodes, Linux allocates and registers a
+platform_device, which in turn may get bound to a platform_driver.
+
+Why is using a platform_device for these nodes a safe assumption?
+Well, for the way that Linux models devices, just about all bus_types
+assume that its devices are children of a bus controller. For
+example, each i2c_client is a child of an i2c_master. Each spi_device
+is a child of an SPI bus. Similarly for USB, PCI, MDIO, etc. The
+same hierarchy is also found in the DT, where I2C device nodes only
+ever appear as children of an I2C bus node. Ditto for SPI, MDIO, USB,
+etc. The only devices which do not require a specific type of parent
+device are platform_devices (and amba_devices, but more on that
+later), which will happily live at the base of the Linux /sys/devices
+tree. Therefore, if a DT node is at the root of the tree, then it
+really probably is best registered as a platform_device.
+
+Linux board support code calls of_platform_populate(NULL, NULL, NULL)
+to kick off discovery of devices at the root of the tree. The
+parameters are all NULL because when starting from the root of the
+tree, there is no need to provide a starting node (the first NULL), a
+parent struct device (the last NULL), and we're not using a match
+table (yet). For a board that only needs to register devices,
+.init_machine() can be completely empty except for the
+of_platform_populate() call.
+
+In the Tegra example, this accounts for the /soc and /sound nodes, but
+what about the children of the SoC node? Shouldn't they be registered
+as platform devices too? For Linux DT support, the generic behaviour
+is for child devices to be registered by the parent's device driver at
+driver .probe() time. So, an i2c bus device driver will register a
+i2c_client for each child node, an SPI bus driver will register
+its spi_device children, and similarly for other bus_types.
+According to that model, a driver could be written that binds to the
+SoC node and simply registers platform_devices for each of its
+children. The board support code would allocate and register an SoC
+device, a (theoretical) SoC device driver could bind to the SoC device,
+and register platform_devices for /soc/interrupt-controller, /soc/serial,
+/soc/i2s, and /soc/i2c in its .probe() hook. Easy, right?
+
+Actually, it turns out that registering children of some
+platform_devices as more platform_devices is a common pattern, and the
+device tree support code reflects that and makes the above example
+simpler. The second argument to of_platform_populate() is an
+of_device_id table, and any node that matches an entry in that table
+will also get its child nodes registered. In the tegra case, the code
+can look something like this:
+
+static void __init harmony_init_machine(void)
+{
+ /* ... */
+ of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+}
+
+"simple-bus" is defined in the ePAPR 1.0 specification as a property
+meaning a simple memory mapped bus, so the of_platform_populate() code
+could be written to just assume simple-bus compatible nodes will
+always be traversed. However, we pass it in as an argument so that
+board support code can always override the default behaviour.
+
+[Need to add discussion of adding i2c/spi/etc child devices]
+
+Appendix A: AMBA devices
+------------------------
+
+ARM Primecells are a certain kind of device attached to the ARM AMBA
+bus which include some support for hardware detection and power
+management. In Linux, struct amba_device and the amba_bus_type is
+used to represent Primecell devices. However, the fiddly bit is that
+not all devices on an AMBA bus are Primecells, and for Linux it is
+typical for both amba_device and platform_device instances to be
+siblings of the same bus segment.
+
+When using the DT, this creates problems for of_platform_populate()
+because it must decide whether to register each node as either a
+platform_device or an amba_device. This unfortunately complicates the
+device creation model a little bit, but the solution turns out not to
+be too invasive. If a node is compatible with "arm,amba-primecell", then
+of_platform_populate() will register it as an amba_device instead of a
+platform_device.
diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index 225f96d88f5..3bbd5c51605 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -32,8 +32,12 @@ The buffer-user
*IMPORTANT*: [see https://lkml.org/lkml/2011/12/20/211 for more details]
For this first version, A buffer shared using the dma_buf sharing API:
- *may* be exported to user space using "mmap" *ONLY* by exporter, outside of
- this framework.
-- may be used *ONLY* by importers that do not need CPU access to the buffer.
+ this framework.
+- with this new iteration of the dma-buf api cpu access from the kernel has been
+ enable, see below for the details.
+
+dma-buf operations for device dma only
+--------------------------------------
The dma_buf buffer sharing API usage contains the following steps:
@@ -219,10 +223,120 @@ NOTES:
If the exporter chooses not to allow an attach() operation once a
map_dma_buf() API has been called, it simply returns an error.
-Miscellaneous notes:
+Kernel cpu access to a dma-buf buffer object
+--------------------------------------------
+
+The motivation to allow cpu access from the kernel to a dma-buf object from the
+importers side are:
+- fallback operations, e.g. if the devices is connected to a usb bus and the
+ kernel needs to shuffle the data around first before sending it away.
+- full transparency for existing users on the importer side, i.e. userspace
+ should not notice the difference between a normal object from that subsystem
+ and an imported one backed by a dma-buf. This is really important for drm
+ opengl drivers that expect to still use all the existing upload/download
+ paths.
+
+Access to a dma_buf from the kernel context involves three steps:
+
+1. Prepare access, which invalidate any necessary caches and make the object
+ available for cpu access.
+2. Access the object page-by-page with the dma_buf map apis
+3. Finish access, which will flush any necessary cpu caches and free reserved
+ resources.
+
+1. Prepare access
+
+ Before an importer can access a dma_buf object with the cpu from the kernel
+ context, it needs to notify the exporter of the access that is about to
+ happen.
+
+ Interface:
+ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+ size_t start, size_t len,
+ enum dma_data_direction direction)
+
+ This allows the exporter to ensure that the memory is actually available for
+ cpu access - the exporter might need to allocate or swap-in and pin the
+ backing storage. The exporter also needs to ensure that cpu access is
+ coherent for the given range and access direction. The range and access
+ direction can be used by the exporter to optimize the cache flushing, i.e.
+ access outside of the range or with a different direction (read instead of
+ write) might return stale or even bogus data (e.g. when the exporter needs to
+ copy the data to temporary storage).
+
+ This step might fail, e.g. in oom conditions.
+
+2. Accessing the buffer
+
+ To support dma_buf objects residing in highmem cpu access is page-based using
+ an api similar to kmap. Accessing a dma_buf is done in aligned chunks of
+ PAGE_SIZE size. Before accessing a chunk it needs to be mapped, which returns
+ a pointer in kernel virtual address space. Afterwards the chunk needs to be
+ unmapped again. There is no limit on how often a given chunk can be mapped
+ and unmapped, i.e. the importer does not need to call begin_cpu_access again
+ before mapping the same chunk again.
+
+ Interfaces:
+ void *dma_buf_kmap(struct dma_buf *, unsigned long);
+ void dma_buf_kunmap(struct dma_buf *, unsigned long, void *);
+
+ There are also atomic variants of these interfaces. Like for kmap they
+ facilitate non-blocking fast-paths. Neither the importer nor the exporter (in
+ the callback) is allowed to block when using these.
+
+ Interfaces:
+ void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
+ void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
+
+ For importers all the restrictions of using kmap apply, like the limited
+ supply of kmap_atomic slots. Hence an importer shall only hold onto at most 2
+ atomic dma_buf kmaps at the same time (in any given process context).
+
+ dma_buf kmap calls outside of the range specified in begin_cpu_access are
+ undefined. If the range is not PAGE_SIZE aligned, kmap needs to succeed on
+ the partial chunks at the beginning and end but may return stale or bogus
+ data outside of the range (in these partial chunks).
+
+ Note that these calls need to always succeed. The exporter needs to complete
+ any preparations that might fail in begin_cpu_access.
+
+3. Finish access
+
+ When the importer is done accessing the range specified in begin_cpu_access,
+ it needs to announce this to the exporter (to facilitate cache flushing and
+ unpinning of any pinned resources). The result of of any dma_buf kmap calls
+ after end_cpu_access is undefined.
+
+ Interface:
+ void dma_buf_end_cpu_access(struct dma_buf *dma_buf,
+ size_t start, size_t len,
+ enum dma_data_direction dir);
+
+
+Miscellaneous notes
+-------------------
+
- Any exporters or users of the dma-buf buffer sharing framework must have
a 'select DMA_SHARED_BUFFER' in their respective Kconfigs.
+- In order to avoid fd leaks on exec, the FD_CLOEXEC flag must be set
+ on the file descriptor. This is not just a resource leak, but a
+ potential security hole. It could give the newly exec'd application
+ access to buffers, via the leaked fd, to which it should otherwise
+ not be permitted access.
+
+ The problem with doing this via a separate fcntl() call, versus doing it
+ atomically when the fd is created, is that this is inherently racy in a
+ multi-threaded app[3]. The issue is made worse when it is library code
+ opening/creating the file descriptor, as the application may not even be
+ aware of the fd's.
+
+ To avoid this problem, userspace must have a way to request O_CLOEXEC
+ flag be set when the dma-buf fd is created. So any API provided by
+ the exporting driver to create a dmabuf fd must provide a way to let
+ userspace control setting of O_CLOEXEC flag passed in to dma_buf_fd().
+
References:
[1] struct dma_buf_ops in include/linux/dma-buf.h
[2] All interfaces mentioned above defined in include/linux/dma-buf.h
+[3] https://lwn.net/Articles/236486/
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 8c10bf375c7..1b7f9acbcbb 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -144,9 +144,6 @@ journal_async_commit Commit block can be written to disk without waiting
mount the device. This will enable 'journal_checksum'
internally.
-journal=update Update the ext4 file system's journal to the current
- format.
-
journal_dev=devnum When the external journal device's major/minor numbers
have changed, this option allows the user to specify
the new journal location. The journal device is
@@ -356,11 +353,6 @@ nouid32 Disables 32-bit UIDs and GIDs. This is for
interoperability with older kernels which only
store and expect 16-bit values.
-resize Allows to resize filesystem to the end of the last
- existing block group, further resize has to be done
- with resize2fs either online, or offline. It can be
- used only with conjunction with remount.
-
block_validity This options allows to enables/disables the in-kernel
noblock_validity facility for tracking filesystem metadata blocks
within internal data structures. This allows multi-
diff --git a/Documentation/filesystems/files.txt b/Documentation/filesystems/files.txt
index ac2facc50d2..46dfc6b038c 100644
--- a/Documentation/filesystems/files.txt
+++ b/Documentation/filesystems/files.txt
@@ -113,8 +113,8 @@ the fdtable structure -
if (fd >= 0) {
/* locate_fd() may have expanded fdtable, load the ptr */
fdt = files_fdtable(files);
- FD_SET(fd, fdt->open_fds);
- FD_CLR(fd, fdt->close_on_exec);
+ __set_open_fd(fd, fdt);
+ __clear_close_on_exec(fd, fdt);
spin_unlock(&files->file_lock);
.....
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 792faa3c06c..620a07844e8 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -271,9 +271,26 @@ Some platforms may also use knowledge about what GPIOs are active for
power management, such as by powering down unused chip sectors and, more
easily, gating off unused clocks.
-Note that requesting a GPIO does NOT cause it to be configured in any
-way; it just marks that GPIO as in use. Separate code must handle any
-pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown).
+For GPIOs that use pins known to the pinctrl subsystem, that subsystem should
+be informed of their use; a gpiolib driver's .request() operation may call
+pinctrl_request_gpio(), and a gpiolib driver's .free() operation may call
+pinctrl_free_gpio(). The pinctrl subsystem allows a pinctrl_request_gpio()
+to succeed concurrently with a pin or pingroup being "owned" by a device for
+pin multiplexing.
+
+Any programming of pin multiplexing hardware that is needed to route the
+GPIO signal to the appropriate pin should occur within a GPIO driver's
+.direction_input() or .direction_output() operations, and occur after any
+setup of an output GPIO's value. This allows a glitch-free migration from a
+pin's special function to GPIO. This is sometimes required when using a GPIO
+to implement a workaround on signals typically driven by a non-GPIO HW block.
+
+Some platforms allow some or all GPIO signals to be routed to different pins.
+Similarly, other aspects of the GPIO or pin may need to be configured, such as
+pullup/pulldown. Platform software should arrange that any such details are
+configured prior to gpio_request() being called for those GPIOs, e.g. using
+the pinctrl subsystem's mapping table, so that GPIO users need not be aware
+of these details.
Also note that it's your responsibility to have stopped using a GPIO
before you free it.
@@ -302,6 +319,8 @@ where 'flags' is currently defined to specify the following properties:
* GPIOF_INIT_LOW - as output, set initial level to LOW
* GPIOF_INIT_HIGH - as output, set initial level to HIGH
+ * GPIOF_OPEN_DRAIN - gpio pin is open drain type.
+ * GPIOF_OPEN_SOURCE - gpio pin is open source type.
since GPIOF_INIT_* are only valid when configured as output, so group valid
combinations as:
@@ -310,8 +329,19 @@ combinations as:
* GPIOF_OUT_INIT_LOW - configured as output, initial level LOW
* GPIOF_OUT_INIT_HIGH - configured as output, initial level HIGH
-In the future, these flags can be extended to support more properties such
-as open-drain status.
+When setting the flag as GPIOF_OPEN_DRAIN then it will assume that pins is
+open drain type. Such pins will not be driven to 1 in output mode. It is
+require to connect pull-up on such pins. By enabling this flag, gpio lib will
+make the direction to input when it is asked to set value of 1 in output mode
+to make the pin HIGH. The pin is make to LOW by driving value 0 in output mode.
+
+When setting the flag as GPIOF_OPEN_SOURCE then it will assume that pins is
+open source type. Such pins will not be driven to 0 in output mode. It is
+require to connect pull-down on such pin. By enabling this flag, gpio lib will
+make the direction to input when it is asked to set value of 0 in output mode
+to make the pin LOW. The pin is make to HIGH by driving value 1 in output mode.
+
+In the future, these flags can be extended to support more properties.
Further more, to ease the claim/release of multiple GPIOs, 'struct gpio' is
introduced to encapsulate all three fields as:
diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index 2871fd50034..71f55bbcefc 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -20,6 +20,7 @@ Supported adapters:
* Intel Patsburg (PCH)
* Intel DH89xxCC (PCH)
* Intel Panther Point (PCH)
+ * Intel Lynx Point (PCH)
Datasheets: Publicly available at the Intel website
On Intel Patsburg and later chipsets, both the normal host SMBus controller
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 58eac231fe6..c1601e5a8b7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1699,6 +1699,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
The default is to send the implementation identification
information.
+ nfsd.nfs4_disable_idmapping=
+ [NFSv4] When set to the default of '1', the NFSv4
+ server will return only numeric uids and gids to
+ clients using auth_sys, and will accept numeric uids
+ and gids from such clients. This is intended to ease
+ migration from NFSv2/v3.
objlayoutdriver.osd_login_prog=
[NFS] [OBJLAYOUT] sets the pathname to the program which
@@ -1869,6 +1875,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
shutdown the other cpus. Instead use the REBOOT_VECTOR
irq.
+ nomodule Disable module load
+
nopat [X86] Disable PAT (page attribute table extension of
pagetables) support.
diff --git a/Documentation/laptops/asus-laptop.txt b/Documentation/laptops/asus-laptop.txt
index 803e51f6768..a1e04d67928 100644
--- a/Documentation/laptops/asus-laptop.txt
+++ b/Documentation/laptops/asus-laptop.txt
@@ -45,7 +45,7 @@ Status
Usage
-----
- Try "modprobe asus_acpi". Check your dmesg (simply type dmesg). You should
+ Try "modprobe asus-laptop". Check your dmesg (simply type dmesg). You should
see some lines like this :
Asus Laptop Extras version 0.42
diff --git a/Documentation/laptops/sony-laptop.txt b/Documentation/laptops/sony-laptop.txt
index 2bd4e82e5d9..0d5ac7f5287 100644
--- a/Documentation/laptops/sony-laptop.txt
+++ b/Documentation/laptops/sony-laptop.txt
@@ -17,6 +17,11 @@ subsystem. See the logs of acpid or /proc/acpi/event and
devices are created by the driver. Additionally, loading the driver with the
debug option will report all events in the kernel log.
+The "scancodes" passed to the input system (that can be remapped with udev)
+are indexes to the table "sony_laptop_input_keycode_map" in the sony-laptop.c
+module. For example the "FN/E" key combination (EJECTCD on some models)
+generates the scancode 20 (0x14).
+
Backlight control:
------------------
If your laptop model supports it, you will find sysfs files in the
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e1d94bf4056..6386f8c0482 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -95,7 +95,7 @@ described as 'basic' will be available.
Capability: basic
Architectures: all
Type: system ioctl
-Parameters: none
+Parameters: machine type identifier (KVM_VM_*)
Returns: a VM fd that can be used to control the new virtual machine.
The new VM has no virtual cpus and no memory. An mmap() of a VM fd
@@ -103,6 +103,11 @@ will access the virtual machine's physical address space; offset zero
corresponds to guest physical address zero. Use of mmap() on a VM fd
is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is
available.
+You most certainly want to use 0 as machine type.
+
+In order to create user controlled virtual machines on S390, check
+KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
+privileged user (CAP_SYS_ADMIN).
4.3 KVM_GET_MSR_INDEX_LIST
@@ -213,6 +218,11 @@ allocation of vcpu ids. For example, if userspace wants
single-threaded guest vcpus, it should make all vcpu ids be a multiple
of the number of vcpus per vcore.
+For virtual cpus that have been created with S390 user controlled virtual
+machines, the resulting vcpu fd can be memory mapped at page offset
+KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
+cpu's hardware control block.
+
4.8 KVM_GET_DIRTY_LOG (vm ioctl)
Capability: basic
@@ -1159,6 +1169,14 @@ following flags are specified:
/* Depends on KVM_CAP_IOMMU */
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
+/* The following two depend on KVM_CAP_PCI_2_3 */
+#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
+#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
+
+If KVM_DEV_ASSIGN_PCI_2_3 is set, the kernel will manage legacy INTx interrupts
+via the PCI-2.3-compliant device-level mask, thus enable IRQ sharing with other
+assigned devices or host devices. KVM_DEV_ASSIGN_MASK_INTX specifies the
+guest's view on the INTx mask, see KVM_ASSIGN_SET_INTX_MASK for details.
The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure
isolation of the device. Usages not specifying this flag are deprecated.
@@ -1399,6 +1417,71 @@ The following flags are defined:
If datamatch flag is set, the event will be signaled only if the written value
to the registered address is equal to datamatch in struct kvm_ioeventfd.
+4.59 KVM_DIRTY_TLB
+
+Capability: KVM_CAP_SW_TLB
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_dirty_tlb (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_dirty_tlb {
+ __u64 bitmap;
+ __u32 num_dirty;
+};
+
+This must be called whenever userspace has changed an entry in the shared
+TLB, prior to calling KVM_RUN on the associated vcpu.
+
+The "bitmap" field is the userspace address of an array. This array
+consists of a number of bits, equal to the total number of TLB entries as
+determined by the last successful call to KVM_CONFIG_TLB, rounded up to the
+nearest multiple of 64.
+
+Each bit corresponds to one TLB entry, ordered the same as in the shared TLB
+array.
+
+The array is little-endian: the bit 0 is the least significant bit of the
+first byte, bit 8 is the least significant bit of the second byte, etc.
+This avoids any complications with differing word sizes.
+
+The "num_dirty" field is a performance hint for KVM to determine whether it
+should skip processing the bitmap and just invalidate everything. It must
+be set to the number of set bits in the bitmap.
+
+4.60 KVM_ASSIGN_SET_INTX_MASK
+
+Capability: KVM_CAP_PCI_2_3
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_assigned_pci_dev (in)
+Returns: 0 on success, -1 on error
+
+Allows userspace to mask PCI INTx interrupts from the assigned device. The
+kernel will not deliver INTx interrupts to the guest between setting and
+clearing of KVM_ASSIGN_SET_INTX_MASK via this interface. This enables use of
+and emulation of PCI 2.3 INTx disable command register behavior.
+
+This may be used for both PCI 2.3 devices supporting INTx disable natively and
+older devices lacking this support. Userspace is responsible for emulating the
+read value of the INTx disable bit in the guest visible PCI command register.
+When modifying the INTx disable state, userspace should precede updating the
+physical device command register by calling this ioctl to inform the kernel of
+the new intended INTx mask state.
+
+Note that the kernel uses the device INTx disable bit to internally manage the
+device interrupt state for PCI 2.3 devices. Reads of this register may
+therefore not match the expected value. Writes should always use the guest
+intended INTx disable value rather than attempting to read-copy-update the
+current physical device state. Races between user and kernel updates to the
+INTx disable bit are handled lazily in the kernel. It's possible the device
+may generate unintended interrupts, but they will not be injected into the
+guest.
+
+See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
+by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is
+evaluated.
+
4.62 KVM_CREATE_SPAPR_TCE
Capability: KVM_CAP_SPAPR_TCE
@@ -1491,6 +1574,101 @@ following algorithm:
Some guests configure the LINT1 NMI input to cause a panic, aiding in
debugging.
+4.65 KVM_S390_UCAS_MAP
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_ucas_mapping (in)
+Returns: 0 in case of success
+
+The parameter is defined like this:
+ struct kvm_s390_ucas_mapping {
+ __u64 user_addr;
+ __u64 vcpu_addr;
+ __u64 length;
+ };
+
+This ioctl maps the memory at "user_addr" with the length "length" to
+the vcpu's address space starting at "vcpu_addr". All parameters need to
+be alligned by 1 megabyte.
+
+4.66 KVM_S390_UCAS_UNMAP
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_ucas_mapping (in)
+Returns: 0 in case of success
+
+The parameter is defined like this:
+ struct kvm_s390_ucas_mapping {
+ __u64 user_addr;
+ __u64 vcpu_addr;
+ __u64 length;
+ };
+
+This ioctl unmaps the memory in the vcpu's address space starting at
+"vcpu_addr" with the length "length". The field "user_addr" is ignored.
+All parameters need to be alligned by 1 megabyte.
+
+4.67 KVM_S390_VCPU_FAULT
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: vcpu absolute address (in)
+Returns: 0 in case of success
+
+This call creates a page table entry on the virtual cpu's address space
+(for user controlled virtual machines) or the virtual machine's address
+space (for regular virtual machines). This only works for minor faults,
+thus it's recommended to access subject memory page via the user page
+table upfront. This is useful to handle validity intercepts for user
+controlled virtual machines to fault in the virtual cpu's lowcore pages
+prior to calling the KVM_RUN ioctl.
+
+4.68 KVM_SET_ONE_REG
+
+Capability: KVM_CAP_ONE_REG
+Architectures: all
+Type: vcpu ioctl
+Parameters: struct kvm_one_reg (in)
+Returns: 0 on success, negative value on failure
+
+struct kvm_one_reg {
+ __u64 id;
+ __u64 addr;
+};
+
+Using this ioctl, a single vcpu register can be set to a specific value
+defined by user space with the passed in struct kvm_one_reg, where id
+refers to the register identifier as described below and addr is a pointer
+to a variable with the respective size. There can be architecture agnostic
+and architecture specific registers. Each have their own range of operation
+and their own constants and width. To keep track of the implemented
+registers, find a list below:
+
+ Arch | Register | Width (bits)
+ | |
+ PPC | KVM_REG_PPC_HIOR | 64
+
+4.69 KVM_GET_ONE_REG
+
+Capability: KVM_CAP_ONE_REG
+Architectures: all
+Type: vcpu ioctl
+Parameters: struct kvm_one_reg (in and out)
+Returns: 0 on success, negative value on failure
+
+This ioctl allows to receive the value of a single register implemented
+in a vcpu. The register to read is indicated by the "id" field of the
+kvm_one_reg struct passed in. On success, the register value can be found
+at the memory location pointed to by "addr".
+
+The list of registers accessible using this interface is identical to the
+list in 4.64.
+
5. The kvm_run structure
Application code obtains a pointer to the kvm_run structure by
@@ -1651,6 +1829,20 @@ s390 specific.
s390 specific.
+ /* KVM_EXIT_S390_UCONTROL */
+ struct {
+ __u64 trans_exc_code;
+ __u32 pgm_code;
+ } s390_ucontrol;
+
+s390 specific. A page fault has occurred for a user controlled virtual
+machine (KVM_VM_S390_UNCONTROL) on it's host page table that cannot be
+resolved by the kernel.
+The program code and the translation exception code that were placed
+in the cpu's lowcore are presented here as defined by the z Architecture
+Principles of Operation Book in the Chapter for Dynamic Address Translation
+(DAT)
+
/* KVM_EXIT_DCR */
struct {
__u32 dcrn;
@@ -1693,6 +1885,29 @@ developer registration required to access it).
/* Fix the size of the union. */
char padding[256];
};
+
+ /*
+ * shared registers between kvm and userspace.
+ * kvm_valid_regs specifies the register classes set by the host
+ * kvm_dirty_regs specified the register classes dirtied by userspace
+ * struct kvm_sync_regs is architecture specific, as well as the
+ * bits for kvm_valid_regs and kvm_dirty_regs
+ */
+ __u64 kvm_valid_regs;
+ __u64 kvm_dirty_regs;
+ union {
+ struct kvm_sync_regs regs;
+ char padding[1024];
+ } s;
+
+If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access
+certain guest registers without having to call SET/GET_*REGS. Thus we can
+avoid some system call overhead if userspace has to handle the exit.
+Userspace can query the validity of the structure by checking
+kvm_valid_regs for specific bits. These bits are architecture specific
+and usually define the validity of a groups of registers. (e.g. one bit
+ for general purpose registers)
+
};
6. Capabilities that can be enabled
@@ -1741,3 +1956,45 @@ HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the
HTAB invisible to the guest.
When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur.
+
+6.3 KVM_CAP_SW_TLB
+
+Architectures: ppc
+Parameters: args[0] is the address of a struct kvm_config_tlb
+Returns: 0 on success; -1 on error
+
+struct kvm_config_tlb {
+ __u64 params;
+ __u64 array;
+ __u32 mmu_type;
+ __u32 array_len;
+};
+
+Configures the virtual CPU's TLB array, establishing a shared memory area
+between userspace and KVM. The "params" and "array" fields are userspace
+addresses of mmu-type-specific data structures. The "array_len" field is an
+safety mechanism, and should be set to the size in bytes of the memory that
+userspace has reserved for the array. It must be at least the size dictated
+by "mmu_type" and "params".
+
+While KVM_RUN is active, the shared region is under control of KVM. Its
+contents are undefined, and any modification by userspace results in
+boundedly undefined behavior.
+
+On return from KVM_RUN, the shared region will reflect the current state of
+the guest's TLB. If userspace makes any changes, it must call KVM_DIRTY_TLB
+to tell KVM which entries have been changed, prior to calling KVM_RUN again
+on this vcpu.
+
+For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+ - The "params" field is of type "struct kvm_book3e_206_tlb_params".
+ - The "array" field points to an array of type "struct
+ kvm_book3e_206_tlb_entry".
+ - The array consists of all entries in the first TLB, followed by all
+ entries in the second TLB.
+ - Within a TLB, entries are ordered first by increasing set number. Within a
+ set, entries are ordered by way (increasing ESEL).
+ - The hash for determining set number in TLB0 is: (MAS2 >> 12) & (num_sets - 1)
+ where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
+ - The tsize field of mas1 shall be set to 4K on TLB0, even though the
+ hardware ignores this value for TLB0.
diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt
index 2b7ce190cde..6e7c3705093 100644
--- a/Documentation/virtual/kvm/ppc-pv.txt
+++ b/Documentation/virtual/kvm/ppc-pv.txt
@@ -81,28 +81,8 @@ additional registers to the magic page. If you add fields to the magic page,
also define a new hypercall feature to indicate that the host can give you more
registers. Only if the host supports the additional features, make use of them.
-The magic page has the following layout as described in
-arch/powerpc/include/asm/kvm_para.h:
-
-struct kvm_vcpu_arch_shared {
- __u64 scratch1;
- __u64 scratch2;
- __u64 scratch3;
- __u64 critical; /* Guest may not get interrupts if == r1 */
- __u64 sprg0;
- __u64 sprg1;
- __u64 sprg2;
- __u64 sprg3;
- __u64 srr0;
- __u64 srr1;
- __u64 dar;
- __u64 msr;
- __u32 dsisr;
- __u32 int_pending; /* Tells the guest if we have an interrupt */
-};
-
-Additions to the page must only occur at the end. Struct fields are always 32
-or 64 bit aligned, depending on them being 32 or 64 bit wide respectively.
+The magic page layout is described by struct kvm_vcpu_arch_shared
+in arch/powerpc/include/asm/kvm_para.h.
Magic page features
===================
diff --git a/Documentation/vm/Makefile b/Documentation/vm/Makefile
deleted file mode 100644
index 3fa4d066886..00000000000
--- a/Documentation/vm/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# kbuild trick to avoid linker error. Can be omitted if a module is built.
-obj- := dummy.o
-
-# List of programs to build
-hostprogs-y := page-types hugepage-mmap hugepage-shm map_hugetlb
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
diff --git a/Documentation/vm/hugepage-mmap.c b/Documentation/vm/hugepage-mmap.c
deleted file mode 100644
index db0dd9a33d5..00000000000
--- a/Documentation/vm/hugepage-mmap.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * hugepage-mmap:
- *
- * Example of using huge page memory in a user application using the mmap
- * system call. Before running this application, make sure that the
- * administrator has mounted the hugetlbfs filesystem (on some directory
- * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
- * example, the app is requesting memory of size 256MB that is backed by
- * huge pages.
- *
- * For the ia64 architecture, the Linux kernel reserves Region number 4 for
- * huge pages. That means that if one requires a fixed address, a huge page
- * aligned address starting with 0x800000... will be required. If a fixed
- * address is not required, the kernel will select an address in the proper
- * range.
- * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#define FILE_NAME "/mnt/hugepagefile"
-#define LENGTH (256UL*1024*1024)
-#define PROTECTION (PROT_READ | PROT_WRITE)
-
-/* Only ia64 requires this */
-#ifdef __ia64__
-#define ADDR (void *)(0x8000000000000000UL)
-#define FLAGS (MAP_SHARED | MAP_FIXED)
-#else
-#define ADDR (void *)(0x0UL)
-#define FLAGS (MAP_SHARED)
-#endif
-
-static void check_bytes(char *addr)
-{
- printf("First hex is %x\n", *((unsigned int *)addr));
-}
-
-static void write_bytes(char *addr)
-{
- unsigned long i;
-
- for (i = 0; i < LENGTH; i++)
- *(addr + i) = (char)i;
-}
-
-static void read_bytes(char *addr)
-{
- unsigned long i;
-
- check_bytes(addr);
- for (i = 0; i < LENGTH; i++)
- if (*(addr + i) != (char)i) {
- printf("Mismatch at %lu\n", i);
- break;
- }
-}
-
-int main(void)
-{
- void *addr;
- int fd;
-
- fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
- if (fd < 0) {
- perror("Open failed");
- exit(1);
- }
-
- addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- unlink(FILE_NAME);
- exit(1);
- }
-
- printf("Returned address is %p\n", addr);
- check_bytes(addr);
- write_bytes(addr);
- read_bytes(addr);
-
- munmap(addr, LENGTH);
- close(fd);
- unlink(FILE_NAME);
-
- return 0;
-}
diff --git a/Documentation/vm/hugepage-shm.c b/Documentation/vm/hugepage-shm.c
deleted file mode 100644
index 07956d8592c..00000000000
--- a/Documentation/vm/hugepage-shm.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * hugepage-shm:
- *
- * Example of using huge page memory in a user application using Sys V shared
- * memory system calls. In this example the app is requesting 256MB of
- * memory that is backed by huge pages. The application uses the flag
- * SHM_HUGETLB in the shmget system call to inform the kernel that it is
- * requesting huge pages.
- *
- * For the ia64 architecture, the Linux kernel reserves Region number 4 for
- * huge pages. That means that if one requires a fixed address, a huge page
- * aligned address starting with 0x800000... will be required. If a fixed
- * address is not required, the kernel will select an address in the proper
- * range.
- * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
- *
- * Note: The default shared memory limit is quite low on many kernels,
- * you may need to increase it via:
- *
- * echo 268435456 > /proc/sys/kernel/shmmax
- *
- * This will increase the maximum size per shared memory segment to 256MB.
- * The other limit that you will hit eventually is shmall which is the
- * total amount of shared memory in pages. To set it to 16GB on a system
- * with a 4kB pagesize do:
- *
- * echo 4194304 > /proc/sys/kernel/shmall
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/ipc.h>
-#include <sys/shm.h>
-#include <sys/mman.h>
-
-#ifndef SHM_HUGETLB
-#define SHM_HUGETLB 04000
-#endif
-
-#define LENGTH (256UL*1024*1024)
-
-#define dprintf(x) printf(x)
-
-/* Only ia64 requires this */
-#ifdef __ia64__
-#define ADDR (void *)(0x8000000000000000UL)
-#define SHMAT_FLAGS (SHM_RND)
-#else
-#define ADDR (void *)(0x0UL)
-#define SHMAT_FLAGS (0)
-#endif
-
-int main(void)
-{
- int shmid;
- unsigned long i;
- char *shmaddr;
-
- if ((shmid = shmget(2, LENGTH,
- SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
- perror("shmget");
- exit(1);
- }
- printf("shmid: 0x%x\n", shmid);
-
- shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS);
- if (shmaddr == (char *)-1) {
- perror("Shared memory attach failure");
- shmctl(shmid, IPC_RMID, NULL);
- exit(2);
- }
- printf("shmaddr: %p\n", shmaddr);
-
- dprintf("Starting the writes:\n");
- for (i = 0; i < LENGTH; i++) {
- shmaddr[i] = (char)(i);
- if (!(i % (1024 * 1024)))
- dprintf(".");
- }
- dprintf("\n");
-
- dprintf("Starting the Check...");
- for (i = 0; i < LENGTH; i++)
- if (shmaddr[i] != (char)i)
- printf("\nIndex %lu mismatched\n", i);
- dprintf("Done.\n");
-
- if (shmdt((const void *)shmaddr) != 0) {
- perror("Detach failure");
- shmctl(shmid, IPC_RMID, NULL);
- exit(3);
- }
-
- shmctl(shmid, IPC_RMID, NULL);
-
- return 0;
-}
diff --git a/Documentation/vm/map_hugetlb.c b/Documentation/vm/map_hugetlb.c
deleted file mode 100644
index eda1a6d3578..00000000000
--- a/Documentation/vm/map_hugetlb.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Example of using hugepage memory in a user application using the mmap
- * system call with MAP_HUGETLB flag. Before running this program make
- * sure the administrator has allocated enough default sized huge pages
- * to cover the 256 MB allocation.
- *
- * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
- * That means the addresses starting with 0x800000... will need to be
- * specified. Specifying a fixed address is not required on ppc64, i386
- * or x86_64.
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#define LENGTH (256UL*1024*1024)
-#define PROTECTION (PROT_READ | PROT_WRITE)
-
-#ifndef MAP_HUGETLB
-#define MAP_HUGETLB 0x40000 /* arch specific */
-#endif
-
-/* Only ia64 requires this */
-#ifdef __ia64__
-#define ADDR (void *)(0x8000000000000000UL)
-#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
-#else
-#define ADDR (void *)(0x0UL)
-#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
-#endif
-
-static void check_bytes(char *addr)
-{
- printf("First hex is %x\n", *((unsigned int *)addr));
-}
-
-static void write_bytes(char *addr)
-{
- unsigned long i;
-
- for (i = 0; i < LENGTH; i++)
- *(addr + i) = (char)i;
-}
-
-static void read_bytes(char *addr)
-{
- unsigned long i;
-
- check_bytes(addr);
- for (i = 0; i < LENGTH; i++)
- if (*(addr + i) != (char)i) {
- printf("Mismatch at %lu\n", i);
- break;
- }
-}
-
-int main(void)
-{
- void *addr;
-
- addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
-
- printf("Returned address is %p\n", addr);
- check_bytes(addr);
- write_bytes(addr);
- read_bytes(addr);
-
- munmap(addr, LENGTH);
-
- return 0;
-}
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
deleted file mode 100644
index 0b13f02d405..00000000000
--- a/Documentation/vm/page-types.c
+++ /dev/null
@@ -1,1102 +0,0 @@
-/*
- * page-types: Tool for querying page flags
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; version 2.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should find a copy of v2 of the GNU General Public License somewhere on
- * your Linux system; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2009 Intel corporation
- *
- * Authors: Wu Fengguang <fengguang.wu@intel.com>
- */
-
-#define _LARGEFILE64_SOURCE
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <stdarg.h>
-#include <string.h>
-#include <getopt.h>
-#include <limits.h>
-#include <assert.h>
-#include <sys/types.h>
-#include <sys/errno.h>
-#include <sys/fcntl.h>
-#include <sys/mount.h>
-#include <sys/statfs.h>
-#include "../../include/linux/magic.h"
-
-
-#ifndef MAX_PATH
-# define MAX_PATH 256
-#endif
-
-#ifndef STR
-# define _STR(x) #x
-# define STR(x) _STR(x)
-#endif
-
-/*
- * pagemap kernel ABI bits
- */
-
-#define PM_ENTRY_BYTES sizeof(uint64_t)
-#define PM_STATUS_BITS 3
-#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
-#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
-#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
-#define PM_PSHIFT_BITS 6
-#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
-#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
-#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
-#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
-#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
-
-#define PM_PRESENT PM_STATUS(4LL)
-#define PM_SWAP PM_STATUS(2LL)
-
-
-/*
- * kernel page flags
- */
-
-#define KPF_BYTES 8
-#define PROC_KPAGEFLAGS "/proc/kpageflags"
-
-/* copied from kpageflags_read() */
-#define KPF_LOCKED 0
-#define KPF_ERROR 1
-#define KPF_REFERENCED 2
-#define KPF_UPTODATE 3
-#define KPF_DIRTY 4
-#define KPF_LRU 5
-#define KPF_ACTIVE 6
-#define KPF_SLAB 7
-#define KPF_WRITEBACK 8
-#define KPF_RECLAIM 9
-#define KPF_BUDDY 10
-
-/* [11-20] new additions in 2.6.31 */
-#define KPF_MMAP 11
-#define KPF_ANON 12
-#define KPF_SWAPCACHE 13
-#define KPF_SWAPBACKED 14
-#define KPF_COMPOUND_HEAD 15
-#define KPF_COMPOUND_TAIL 16
-#define KPF_HUGE 17
-#define KPF_UNEVICTABLE 18
-#define KPF_HWPOISON 19
-#define KPF_NOPAGE 20
-#define KPF_KSM 21
-#define KPF_THP 22
-
-/* [32-] kernel hacking assistances */
-#define KPF_RESERVED 32
-#define KPF_MLOCKED 33
-#define KPF_MAPPEDTODISK 34
-#define KPF_PRIVATE 35
-#define KPF_PRIVATE_2 36
-#define KPF_OWNER_PRIVATE 37
-#define KPF_ARCH 38
-#define KPF_UNCACHED 39
-
-/* [48-] take some arbitrary free slots for expanding overloaded flags
- * not part of kernel API
- */
-#define KPF_READAHEAD 48
-#define KPF_SLOB_FREE 49
-#define KPF_SLUB_FROZEN 50
-#define KPF_SLUB_DEBUG 51
-
-#define KPF_ALL_BITS ((uint64_t)~0ULL)
-#define KPF_HACKERS_BITS (0xffffULL << 32)
-#define KPF_OVERLOADED_BITS (0xffffULL << 48)
-#define BIT(name) (1ULL << KPF_##name)
-#define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL))
-
-static const char *page_flag_names[] = {
- [KPF_LOCKED] = "L:locked",
- [KPF_ERROR] = "E:error",
- [KPF_REFERENCED] = "R:referenced",
- [KPF_UPTODATE] = "U:uptodate",
- [KPF_DIRTY] = "D:dirty",
- [KPF_LRU] = "l:lru",
- [KPF_ACTIVE] = "A:active",
- [KPF_SLAB] = "S:slab",
- [KPF_WRITEBACK] = "W:writeback",
- [KPF_RECLAIM] = "I:reclaim",
- [KPF_BUDDY] = "B:buddy",
-
- [KPF_MMAP] = "M:mmap",
- [KPF_ANON] = "a:anonymous",
- [KPF_SWAPCACHE] = "s:swapcache",
- [KPF_SWAPBACKED] = "b:swapbacked",
- [KPF_COMPOUND_HEAD] = "H:compound_head",
- [KPF_COMPOUND_TAIL] = "T:compound_tail",
- [KPF_HUGE] = "G:huge",
- [KPF_UNEVICTABLE] = "u:unevictable",
- [KPF_HWPOISON] = "X:hwpoison",
- [KPF_NOPAGE] = "n:nopage",
- [KPF_KSM] = "x:ksm",
- [KPF_THP] = "t:thp",
-
- [KPF_RESERVED] = "r:reserved",
- [KPF_MLOCKED] = "m:mlocked",
- [KPF_MAPPEDTODISK] = "d:mappedtodisk",
- [KPF_PRIVATE] = "P:private",
- [KPF_PRIVATE_2] = "p:private_2",
- [KPF_OWNER_PRIVATE] = "O:owner_private",
- [KPF_ARCH] = "h:arch",
- [KPF_UNCACHED] = "c:uncached",
-
- [KPF_READAHEAD] = "I:readahead",
- [KPF_SLOB_FREE] = "P:slob_free",
- [KPF_SLUB_FROZEN] = "A:slub_frozen",
- [KPF_SLUB_DEBUG] = "E:slub_debug",
-};
-
-
-static const char *debugfs_known_mountpoints[] = {
- "/sys/kernel/debug",
- "/debug",
- 0,
-};
-
-/*
- * data structures
- */
-
-static int opt_raw; /* for kernel developers */
-static int opt_list; /* list pages (in ranges) */
-static int opt_no_summary; /* don't show summary */
-static pid_t opt_pid; /* process to walk */
-
-#define MAX_ADDR_RANGES 1024
-static int nr_addr_ranges;
-static unsigned long opt_offset[MAX_ADDR_RANGES];
-static unsigned long opt_size[MAX_ADDR_RANGES];
-
-#define MAX_VMAS 10240
-static int nr_vmas;
-static unsigned long pg_start[MAX_VMAS];
-static unsigned long pg_end[MAX_VMAS];
-
-#define MAX_BIT_FILTERS 64
-static int nr_bit_filters;
-static uint64_t opt_mask[MAX_BIT_FILTERS];
-static uint64_t opt_bits[MAX_BIT_FILTERS];
-
-static int page_size;
-
-static int pagemap_fd;
-static int kpageflags_fd;
-
-static int opt_hwpoison;
-static int opt_unpoison;
-
-static char hwpoison_debug_fs[MAX_PATH+1];
-static int hwpoison_inject_fd;
-static int hwpoison_forget_fd;
-
-#define HASH_SHIFT 13
-#define HASH_SIZE (1 << HASH_SHIFT)
-#define HASH_MASK (HASH_SIZE - 1)
-#define HASH_KEY(flags) (flags & HASH_MASK)
-
-static unsigned long total_pages;
-static unsigned long nr_pages[HASH_SIZE];
-static uint64_t page_flags[HASH_SIZE];
-
-
-/*
- * helper functions
- */
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define min_t(type, x, y) ({ \
- type __min1 = (x); \
- type __min2 = (y); \
- __min1 < __min2 ? __min1 : __min2; })
-
-#define max_t(type, x, y) ({ \
- type __max1 = (x); \
- type __max2 = (y); \
- __max1 > __max2 ? __max1 : __max2; })
-
-static unsigned long pages2mb(unsigned long pages)
-{
- return (pages * page_size) >> 20;
-}
-
-static void fatal(const char *x, ...)
-{
- va_list ap;
-
- va_start(ap, x);
- vfprintf(stderr, x, ap);
- va_end(ap);
- exit(EXIT_FAILURE);
-}
-
-static int checked_open(const char *pathname, int flags)
-{
- int fd = open(pathname, flags);
-
- if (fd < 0) {
- perror(pathname);
- exit(EXIT_FAILURE);
- }
-
- return fd;
-}
-
-/*
- * pagemap/kpageflags routines
- */
-
-static unsigned long do_u64_read(int fd, char *name,
- uint64_t *buf,
- unsigned long index,
- unsigned long count)
-{
- long bytes;
-
- if (index > ULONG_MAX / 8)
- fatal("index overflow: %lu\n", index);
-
- if (lseek(fd, index * 8, SEEK_SET) < 0) {
- perror(name);
- exit(EXIT_FAILURE);
- }
-
- bytes = read(fd, buf, count * 8);
- if (bytes < 0) {
- perror(name);
- exit(EXIT_FAILURE);
- }
- if (bytes % 8)
- fatal("partial read: %lu bytes\n", bytes);
-
- return bytes / 8;
-}
-
-static unsigned long kpageflags_read(uint64_t *buf,
- unsigned long index,
- unsigned long pages)
-{
- return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
-}
-
-static unsigned long pagemap_read(uint64_t *buf,
- unsigned long index,
- unsigned long pages)
-{
- return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages);
-}
-
-static unsigned long pagemap_pfn(uint64_t val)
-{
- unsigned long pfn;
-
- if (val & PM_PRESENT)
- pfn = PM_PFRAME(val);
- else
- pfn = 0;
-
- return pfn;
-}
-
-
-/*
- * page flag names
- */
-
-static char *page_flag_name(uint64_t flags)
-{
- static char buf[65];
- int present;
- int i, j;
-
- for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
- present = (flags >> i) & 1;
- if (!page_flag_names[i]) {
- if (present)
- fatal("unknown flag bit %d\n", i);
- continue;
- }
- buf[j++] = present ? page_flag_names[i][0] : '_';
- }
-
- return buf;
-}
-
-static char *page_flag_longname(uint64_t flags)
-{
- static char buf[1024];
- int i, n;
-
- for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) {
- if (!page_flag_names[i])
- continue;
- if ((flags >> i) & 1)
- n += snprintf(buf + n, sizeof(buf) - n, "%s,",
- page_flag_names[i] + 2);
- }
- if (n)
- n--;
- buf[n] = '\0';
-
- return buf;
-}
-
-
-/*
- * page list and summary
- */
-
-static void show_page_range(unsigned long voffset,
- unsigned long offset, uint64_t flags)
-{
- static uint64_t flags0;
- static unsigned long voff;
- static unsigned long index;
- static unsigned long count;
-
- if (flags == flags0 && offset == index + count &&
- (!opt_pid || voffset == voff + count)) {
- count++;
- return;
- }
-
- if (count) {
- if (opt_pid)
- printf("%lx\t", voff);
- printf("%lx\t%lx\t%s\n",
- index, count, page_flag_name(flags0));
- }
-
- flags0 = flags;
- index = offset;
- voff = voffset;
- count = 1;
-}
-
-static void show_page(unsigned long voffset,
- unsigned long offset, uint64_t flags)
-{
- if (opt_pid)
- printf("%lx\t", voffset);
- printf("%lx\t%s\n", offset, page_flag_name(flags));
-}
-
-static void show_summary(void)
-{
- int i;
-
- printf(" flags\tpage-count MB"
- " symbolic-flags\t\t\tlong-symbolic-flags\n");
-
- for (i = 0; i < ARRAY_SIZE(nr_pages); i++) {
- if (nr_pages[i])
- printf("0x%016llx\t%10lu %8lu %s\t%s\n",
- (unsigned long long)page_flags[i],
- nr_pages[i],
- pages2mb(nr_pages[i]),
- page_flag_name(page_flags[i]),
- page_flag_longname(page_flags[i]));
- }
-
- printf(" total\t%10lu %8lu\n",
- total_pages, pages2mb(total_pages));
-}
-
-
-/*
- * page flag filters
- */
-
-static int bit_mask_ok(uint64_t flags)
-{
- int i;
-
- for (i = 0; i < nr_bit_filters; i++) {
- if (opt_bits[i] == KPF_ALL_BITS) {
- if ((flags & opt_mask[i]) == 0)
- return 0;
- } else {
- if ((flags & opt_mask[i]) != opt_bits[i])
- return 0;
- }
- }
-
- return 1;
-}
-
-static uint64_t expand_overloaded_flags(uint64_t flags)
-{
- /* SLOB/SLUB overload several page flags */
- if (flags & BIT(SLAB)) {
- if (flags & BIT(PRIVATE))
- flags ^= BIT(PRIVATE) | BIT(SLOB_FREE);
- if (flags & BIT(ACTIVE))
- flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN);
- if (flags & BIT(ERROR))
- flags ^= BIT(ERROR) | BIT(SLUB_DEBUG);
- }
-
- /* PG_reclaim is overloaded as PG_readahead in the read path */
- if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM))
- flags ^= BIT(RECLAIM) | BIT(READAHEAD);
-
- return flags;
-}
-
-static uint64_t well_known_flags(uint64_t flags)
-{
- /* hide flags intended only for kernel hacker */
- flags &= ~KPF_HACKERS_BITS;
-
- /* hide non-hugeTLB compound pages */
- if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE)))
- flags &= ~BITS_COMPOUND;
-
- return flags;
-}
-
-static uint64_t kpageflags_flags(uint64_t flags)
-{
- flags = expand_overloaded_flags(flags);
-
- if (!opt_raw)
- flags = well_known_flags(flags);
-
- return flags;
-}
-
-/* verify that a mountpoint is actually a debugfs instance */
-static int debugfs_valid_mountpoint(const char *debugfs)
-{
- struct statfs st_fs;
-
- if (statfs(debugfs, &st_fs) < 0)
- return -ENOENT;
- else if (st_fs.f_type != (long) DEBUGFS_MAGIC)
- return -ENOENT;
-
- return 0;
-}
-
-/* find the path to the mounted debugfs */
-static const char *debugfs_find_mountpoint(void)
-{
- const char **ptr;
- char type[100];
- FILE *fp;
-
- ptr = debugfs_known_mountpoints;
- while (*ptr) {
- if (debugfs_valid_mountpoint(*ptr) == 0) {
- strcpy(hwpoison_debug_fs, *ptr);
- return hwpoison_debug_fs;
- }
- ptr++;
- }
-
- /* give up and parse /proc/mounts */
- fp = fopen("/proc/mounts", "r");
- if (fp == NULL)
- perror("Can't open /proc/mounts for read");
-
- while (fscanf(fp, "%*s %"
- STR(MAX_PATH)
- "s %99s %*s %*d %*d\n",
- hwpoison_debug_fs, type) == 2) {
- if (strcmp(type, "debugfs") == 0)
- break;
- }
- fclose(fp);
-
- if (strcmp(type, "debugfs") != 0)
- return NULL;
-
- return hwpoison_debug_fs;
-}
-
-/* mount the debugfs somewhere if it's not mounted */
-
-static void debugfs_mount(void)
-{
- const char **ptr;
-
- /* see if it's already mounted */
- if (debugfs_find_mountpoint())
- return;
-
- ptr = debugfs_known_mountpoints;
- while (*ptr) {
- if (mount(NULL, *ptr, "debugfs", 0, NULL) == 0) {
- /* save the mountpoint */
- strcpy(hwpoison_debug_fs, *ptr);
- break;
- }
- ptr++;
- }
-
- if (*ptr == NULL) {
- perror("mount debugfs");
- exit(EXIT_FAILURE);
- }
-}
-
-/*
- * page actions
- */
-
-static void prepare_hwpoison_fd(void)
-{
- char buf[MAX_PATH + 1];
-
- debugfs_mount();
-
- if (opt_hwpoison && !hwpoison_inject_fd) {
- snprintf(buf, MAX_PATH, "%s/hwpoison/corrupt-pfn",
- hwpoison_debug_fs);
- hwpoison_inject_fd = checked_open(buf, O_WRONLY);
- }
-
- if (opt_unpoison && !hwpoison_forget_fd) {
- snprintf(buf, MAX_PATH, "%s/hwpoison/unpoison-pfn",
- hwpoison_debug_fs);
- hwpoison_forget_fd = checked_open(buf, O_WRONLY);
- }
-}
-
-static int hwpoison_page(unsigned long offset)
-{
- char buf[100];
- int len;
-
- len = sprintf(buf, "0x%lx\n", offset);
- len = write(hwpoison_inject_fd, buf, len);
- if (len < 0) {
- perror("hwpoison inject");
- return len;
- }
- return 0;
-}
-
-static int unpoison_page(unsigned long offset)
-{
- char buf[100];
- int len;
-
- len = sprintf(buf, "0x%lx\n", offset);
- len = write(hwpoison_forget_fd, buf, len);
- if (len < 0) {
- perror("hwpoison forget");
- return len;
- }
- return 0;
-}
-
-/*
- * page frame walker
- */
-
-static int hash_slot(uint64_t flags)
-{
- int k = HASH_KEY(flags);
- int i;
-
- /* Explicitly reserve slot 0 for flags 0: the following logic
- * cannot distinguish an unoccupied slot from slot (flags==0).
- */
- if (flags == 0)
- return 0;
-
- /* search through the remaining (HASH_SIZE-1) slots */
- for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) {
- if (!k || k >= ARRAY_SIZE(page_flags))
- k = 1;
- if (page_flags[k] == 0) {
- page_flags[k] = flags;
- return k;
- }
- if (page_flags[k] == flags)
- return k;
- }
-
- fatal("hash table full: bump up HASH_SHIFT?\n");
- exit(EXIT_FAILURE);
-}
-
-static void add_page(unsigned long voffset,
- unsigned long offset, uint64_t flags)
-{
- flags = kpageflags_flags(flags);
-
- if (!bit_mask_ok(flags))
- return;
-
- if (opt_hwpoison)
- hwpoison_page(offset);
- if (opt_unpoison)
- unpoison_page(offset);
-
- if (opt_list == 1)
- show_page_range(voffset, offset, flags);
- else if (opt_list == 2)
- show_page(voffset, offset, flags);
-
- nr_pages[hash_slot(flags)]++;
- total_pages++;
-}
-
-#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */
-static void walk_pfn(unsigned long voffset,
- unsigned long index,
- unsigned long count)
-{
- uint64_t buf[KPAGEFLAGS_BATCH];
- unsigned long batch;
- long pages;
- unsigned long i;
-
- while (count) {
- batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH);
- pages = kpageflags_read(buf, index, batch);
- if (pages == 0)
- break;
-
- for (i = 0; i < pages; i++)
- add_page(voffset + i, index + i, buf[i]);
-
- index += pages;
- count -= pages;
- }
-}
-
-#define PAGEMAP_BATCH (64 << 10)
-static void walk_vma(unsigned long index, unsigned long count)
-{
- uint64_t buf[PAGEMAP_BATCH];
- unsigned long batch;
- unsigned long pages;
- unsigned long pfn;
- unsigned long i;
-
- while (count) {
- batch = min_t(unsigned long, count, PAGEMAP_BATCH);
- pages = pagemap_read(buf, index, batch);
- if (pages == 0)
- break;
-
- for (i = 0; i < pages; i++) {
- pfn = pagemap_pfn(buf[i]);
- if (pfn)
- walk_pfn(index + i, pfn, 1);
- }
-
- index += pages;
- count -= pages;
- }
-}
-
-static void walk_task(unsigned long index, unsigned long count)
-{
- const unsigned long end = index + count;
- unsigned long start;
- int i = 0;
-
- while (index < end) {
-
- while (pg_end[i] <= index)
- if (++i >= nr_vmas)
- return;
- if (pg_start[i] >= end)
- return;
-
- start = max_t(unsigned long, pg_start[i], index);
- index = min_t(unsigned long, pg_end[i], end);
-
- assert(start < index);
- walk_vma(start, index - start);
- }
-}
-
-static void add_addr_range(unsigned long offset, unsigned long size)
-{
- if (nr_addr_ranges >= MAX_ADDR_RANGES)
- fatal("too many addr ranges\n");
-
- opt_offset[nr_addr_ranges] = offset;
- opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
- nr_addr_ranges++;
-}
-
-static void walk_addr_ranges(void)
-{
- int i;
-
- kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
-
- if (!nr_addr_ranges)
- add_addr_range(0, ULONG_MAX);
-
- for (i = 0; i < nr_addr_ranges; i++)
- if (!opt_pid)
- walk_pfn(0, opt_offset[i], opt_size[i]);
- else
- walk_task(opt_offset[i], opt_size[i]);
-
- close(kpageflags_fd);
-}
-
-
-/*
- * user interface
- */
-
-static const char *page_flag_type(uint64_t flag)
-{
- if (flag & KPF_HACKERS_BITS)
- return "(r)";
- if (flag & KPF_OVERLOADED_BITS)
- return "(o)";
- return " ";
-}
-
-static void usage(void)
-{
- int i, j;
-
- printf(
-"page-types [options]\n"
-" -r|--raw Raw mode, for kernel developers\n"
-" -d|--describe flags Describe flags\n"
-" -a|--addr addr-spec Walk a range of pages\n"
-" -b|--bits bits-spec Walk pages with specified bits\n"
-" -p|--pid pid Walk process address space\n"
-#if 0 /* planned features */
-" -f|--file filename Walk file address space\n"
-#endif
-" -l|--list Show page details in ranges\n"
-" -L|--list-each Show page details one by one\n"
-" -N|--no-summary Don't show summary info\n"
-" -X|--hwpoison hwpoison pages\n"
-" -x|--unpoison unpoison pages\n"
-" -h|--help Show this usage message\n"
-"flags:\n"
-" 0x10 bitfield format, e.g.\n"
-" anon bit-name, e.g.\n"
-" 0x10,anon comma-separated list, e.g.\n"
-"addr-spec:\n"
-" N one page at offset N (unit: pages)\n"
-" N+M pages range from N to N+M-1\n"
-" N,M pages range from N to M-1\n"
-" N, pages range from N to end\n"
-" ,M pages range from 0 to M-1\n"
-"bits-spec:\n"
-" bit1,bit2 (flags & (bit1|bit2)) != 0\n"
-" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n"
-" bit1,~bit2 (flags & (bit1|bit2)) == bit1\n"
-" =bit1,bit2 flags == (bit1|bit2)\n"
-"bit-names:\n"
- );
-
- for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
- if (!page_flag_names[i])
- continue;
- printf("%16s%s", page_flag_names[i] + 2,
- page_flag_type(1ULL << i));
- if (++j > 3) {
- j = 0;
- putchar('\n');
- }
- }
- printf("\n "
- "(r) raw mode bits (o) overloaded bits\n");
-}
-
-static unsigned long long parse_number(const char *str)
-{
- unsigned long long n;
-
- n = strtoll(str, NULL, 0);
-
- if (n == 0 && str[0] != '0')
- fatal("invalid name or number: %s\n", str);
-
- return n;
-}
-
-static void parse_pid(const char *str)
-{
- FILE *file;
- char buf[5000];
-
- opt_pid = parse_number(str);
-
- sprintf(buf, "/proc/%d/pagemap", opt_pid);
- pagemap_fd = checked_open(buf, O_RDONLY);
-
- sprintf(buf, "/proc/%d/maps", opt_pid);
- file = fopen(buf, "r");
- if (!file) {
- perror(buf);
- exit(EXIT_FAILURE);
- }
-
- while (fgets(buf, sizeof(buf), file) != NULL) {
- unsigned long vm_start;
- unsigned long vm_end;
- unsigned long long pgoff;
- int major, minor;
- char r, w, x, s;
- unsigned long ino;
- int n;
-
- n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
- &vm_start,
- &vm_end,
- &r, &w, &x, &s,
- &pgoff,
- &major, &minor,
- &ino);
- if (n < 10) {
- fprintf(stderr, "unexpected line: %s\n", buf);
- continue;
- }
- pg_start[nr_vmas] = vm_start / page_size;
- pg_end[nr_vmas] = vm_end / page_size;
- if (++nr_vmas >= MAX_VMAS) {
- fprintf(stderr, "too many VMAs\n");
- break;
- }
- }
- fclose(file);
-}
-
-static void parse_file(const char *name)
-{
-}
-
-static void parse_addr_range(const char *optarg)
-{
- unsigned long offset;
- unsigned long size;
- char *p;
-
- p = strchr(optarg, ',');
- if (!p)
- p = strchr(optarg, '+');
-
- if (p == optarg) {
- offset = 0;
- size = parse_number(p + 1);
- } else if (p) {
- offset = parse_number(optarg);
- if (p[1] == '\0')
- size = ULONG_MAX;
- else {
- size = parse_number(p + 1);
- if (*p == ',') {
- if (size < offset)
- fatal("invalid range: %lu,%lu\n",
- offset, size);
- size -= offset;
- }
- }
- } else {
- offset = parse_number(optarg);
- size = 1;
- }
-
- add_addr_range(offset, size);
-}
-
-static void add_bits_filter(uint64_t mask, uint64_t bits)
-{
- if (nr_bit_filters >= MAX_BIT_FILTERS)
- fatal("too much bit filters\n");
-
- opt_mask[nr_bit_filters] = mask;
- opt_bits[nr_bit_filters] = bits;
- nr_bit_filters++;
-}
-
-static uint64_t parse_flag_name(const char *str, int len)
-{
- int i;
-
- if (!*str || !len)
- return 0;
-
- if (len <= 8 && !strncmp(str, "compound", len))
- return BITS_COMPOUND;
-
- for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) {
- if (!page_flag_names[i])
- continue;
- if (!strncmp(str, page_flag_names[i] + 2, len))
- return 1ULL << i;
- }
-
- return parse_number(str);
-}
-
-static uint64_t parse_flag_names(const char *str, int all)
-{
- const char *p = str;
- uint64_t flags = 0;
-
- while (1) {
- if (*p == ',' || *p == '=' || *p == '\0') {
- if ((*str != '~') || (*str == '~' && all && *++str))
- flags |= parse_flag_name(str, p - str);
- if (*p != ',')
- break;
- str = p + 1;
- }
- p++;
- }
-
- return flags;
-}
-
-static void parse_bits_mask(const char *optarg)
-{
- uint64_t mask;
- uint64_t bits;
- const char *p;
-
- p = strchr(optarg, '=');
- if (p == optarg) {
- mask = KPF_ALL_BITS;
- bits = parse_flag_names(p + 1, 0);
- } else if (p) {
- mask = parse_flag_names(optarg, 0);
- bits = parse_flag_names(p + 1, 0);
- } else if (strchr(optarg, '~')) {
- mask = parse_flag_names(optarg, 1);
- bits = parse_flag_names(optarg, 0);
- } else {
- mask = parse_flag_names(optarg, 0);
- bits = KPF_ALL_BITS;
- }
-
- add_bits_filter(mask, bits);
-}
-
-static void describe_flags(const char *optarg)
-{
- uint64_t flags = parse_flag_names(optarg, 0);
-
- printf("0x%016llx\t%s\t%s\n",
- (unsigned long long)flags,
- page_flag_name(flags),
- page_flag_longname(flags));
-}
-
-static const struct option opts[] = {
- { "raw" , 0, NULL, 'r' },
- { "pid" , 1, NULL, 'p' },
- { "file" , 1, NULL, 'f' },
- { "addr" , 1, NULL, 'a' },
- { "bits" , 1, NULL, 'b' },
- { "describe" , 1, NULL, 'd' },
- { "list" , 0, NULL, 'l' },
- { "list-each" , 0, NULL, 'L' },
- { "no-summary", 0, NULL, 'N' },
- { "hwpoison" , 0, NULL, 'X' },
- { "unpoison" , 0, NULL, 'x' },
- { "help" , 0, NULL, 'h' },
- { NULL , 0, NULL, 0 }
-};
-
-int main(int argc, char *argv[])
-{
- int c;
-
- page_size = getpagesize();
-
- while ((c = getopt_long(argc, argv,
- "rp:f:a:b:d:lLNXxh", opts, NULL)) != -1) {
- switch (c) {
- case 'r':
- opt_raw = 1;
- break;
- case 'p':
- parse_pid(optarg);
- break;
- case 'f':
- parse_file(optarg);
- break;
- case 'a':
- parse_addr_range(optarg);
- break;
- case 'b':
- parse_bits_mask(optarg);
- break;
- case 'd':
- describe_flags(optarg);
- exit(0);
- case 'l':
- opt_list = 1;
- break;
- case 'L':
- opt_list = 2;
- break;
- case 'N':
- opt_no_summary = 1;
- break;
- case 'X':
- opt_hwpoison = 1;
- prepare_hwpoison_fd();
- break;
- case 'x':
- opt_unpoison = 1;
- prepare_hwpoison_fd();
- break;
- case 'h':
- usage();
- exit(0);
- default:
- usage();
- exit(1);
- }
- }
-
- if (opt_list && opt_pid)
- printf("voffset\t");
- if (opt_list == 1)
- printf("offset\tlen\tflags\n");
- if (opt_list == 2)
- printf("offset\tflags\n");
-
- walk_addr_ranges();
-
- if (opt_list == 1)
- show_page_range(0, 0, 0); /* drain the buffer */
-
- if (opt_no_summary)
- return 0;
-
- if (opt_list)
- printf("\n\n");
-
- show_summary();
-
- return 0;
-}
diff --git a/Documentation/watchdog/00-INDEX b/Documentation/watchdog/00-INDEX
deleted file mode 100644
index fc9082a1477..00000000000
--- a/Documentation/watchdog/00-INDEX
+++ /dev/null
@@ -1,19 +0,0 @@
-00-INDEX
- - this file.
-convert_drivers_to_kernel_api.txt
- - how-to for converting old watchdog drivers to the new kernel API.
-hpwdt.txt
- - information on the HP iLO2 NMI watchdog
-pcwd-watchdog.txt
- - documentation for Berkshire Products PC Watchdog ISA cards.
-src/
- - directory holding watchdog related example programs.
-watchdog-api.txt
- - description of the Linux Watchdog driver API.
-watchdog-kernel-api.txt
- - description of the Linux WatchDog Timer Driver Core kernel API.
-watchdog-parameters.txt
- - information on driver parameters (for drivers other than
- the ones that have driver-specific files here)
-wdt.txt
- - description of the Watchdog Timer Interfaces for Linux.
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.txt b/Documentation/watchdog/convert_drivers_to_kernel_api.txt
index be8119bb15d..271b8850dde 100644
--- a/Documentation/watchdog/convert_drivers_to_kernel_api.txt
+++ b/Documentation/watchdog/convert_drivers_to_kernel_api.txt
@@ -59,6 +59,10 @@ Here is a overview of the functions and probably needed actions:
WDIOC_GETTIMEOUT:
No preparations needed
+ WDIOC_GETTIMELEFT:
+ It needs get_timeleft() callback to be defined. Otherwise it
+ will return EOPNOTSUPP
+
Other IOCTLs can be served using the ioctl-callback. Note that this is mainly
intended for porting old drivers; new drivers should not invent private IOCTLs.
Private IOCTLs are processed first. When the callback returns with
diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
index 9e162465b0c..227f6cd0e5f 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -1,6 +1,6 @@
The Linux WatchDog Timer Driver Core kernel API.
===============================================
-Last reviewed: 29-Nov-2011
+Last reviewed: 16-Mar-2012
Wim Van Sebroeck <wim@iguana.be>
@@ -77,6 +77,7 @@ struct watchdog_ops {
int (*ping)(struct watchdog_device *);
unsigned int (*status)(struct watchdog_device *);
int (*set_timeout)(struct watchdog_device *, unsigned int);
+ unsigned int (*get_timeleft)(struct watchdog_device *);
long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
};
@@ -117,11 +118,13 @@ they are supported. These optional routines/operations are:
status of the device is reported with watchdog WDIOF_* status flags/bits.
* set_timeout: this routine checks and changes the timeout of the watchdog
timer device. It returns 0 on success, -EINVAL for "parameter out of range"
- and -EIO for "could not write value to the watchdog". On success the timeout
- value of the watchdog_device will be changed to the value that was just used
- to re-program the watchdog timer device.
+ and -EIO for "could not write value to the watchdog". On success this
+ routine should set the timeout value of the watchdog_device to the
+ achieved timeout value (which may be different from the requested one
+ because the watchdog does not necessarily has a 1 second resolution).
(Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
watchdog's info structure).
+* get_timeleft: this routines returns the time that's left before a reset.
* ioctl: if this routine is present then it will be called first before we do
our own internal ioctl call handling. This routine should return -ENOIOCTLCMD
if a command is not supported. The parameters that are passed to the ioctl