summaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/stable/firewire-cdev103
-rw-r--r--Documentation/ABI/stable/sysfs-bus-firewire122
-rw-r--r--Documentation/ABI/testing/sysfs-class-backlight-driver-adp887056
-rw-r--r--Documentation/Changes43
-rw-r--r--Documentation/CodingStyle4
-rw-r--r--Documentation/DocBook/80211.tmpl5
-rw-r--r--Documentation/accounting/cgroupstats.txt4
-rw-r--r--Documentation/cgroups/blkio-controller.txt41
-rw-r--r--Documentation/cgroups/cgroups.txt60
-rw-r--r--Documentation/cgroups/cpuacct.txt21
-rw-r--r--Documentation/cgroups/cpusets.txt28
-rw-r--r--Documentation/cgroups/devices.txt6
-rw-r--r--Documentation/cgroups/freezer-subsystem.txt20
-rw-r--r--Documentation/cgroups/memory.txt58
-rw-r--r--Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt22
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio.txt46
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_nvidia.txt8
-rw-r--r--Documentation/feature-removal-schedule.txt39
-rw-r--r--Documentation/filesystems/caching/netfs-api.txt16
-rw-r--r--Documentation/filesystems/nilfs2.txt1
-rw-r--r--Documentation/filesystems/proc.txt1
-rw-r--r--Documentation/filesystems/ubifs.txt28
-rw-r--r--Documentation/hwmon/f71882fg4
-rw-r--r--Documentation/hwmon/k10temp8
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--Documentation/kmemleak.txt4
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt5
-rw-r--r--Documentation/md.txt2
-rw-r--r--Documentation/mmc/00-INDEX2
-rw-r--r--Documentation/mmc/mmc-async-req.txt87
-rw-r--r--Documentation/networking/ifenslave.c18
-rw-r--r--Documentation/networking/ip-sysctl.txt31
-rw-r--r--Documentation/networking/netdev-features.txt154
-rw-r--r--Documentation/networking/nfc.txt128
-rw-r--r--Documentation/networking/stmmac.txt200
-rw-r--r--Documentation/power/devices.txt67
-rw-r--r--Documentation/power/runtime_pm.txt31
-rw-r--r--Documentation/printk-formats.txt119
-rw-r--r--Documentation/scheduler/sched-design-CFS.txt7
-rw-r--r--Documentation/scheduler/sched-rt-group.txt7
-rw-r--r--Documentation/spinlocks.txt45
-rw-r--r--Documentation/usb/error-codes.txt9
-rw-r--r--Documentation/virtual/lguest/lguest.c47
-rw-r--r--Documentation/vm/hwpoison.txt6
-rw-r--r--Documentation/x86/boot.txt2
45 files changed, 1291 insertions, 426 deletions
diff --git a/Documentation/ABI/stable/firewire-cdev b/Documentation/ABI/stable/firewire-cdev
new file mode 100644
index 00000000000..16d03082736
--- /dev/null
+++ b/Documentation/ABI/stable/firewire-cdev
@@ -0,0 +1,103 @@
+What: /dev/fw[0-9]+
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ The character device files /dev/fw* are the interface between
+ firewire-core and IEEE 1394 device drivers implemented in
+ userspace. The ioctl(2)- and read(2)-based ABI is defined and
+ documented in <linux/firewire-cdev.h>.
+
+ This ABI offers most of the features which firewire-core also
+ exposes to kernelspace IEEE 1394 drivers.
+
+ Each /dev/fw* is associated with one IEEE 1394 node, which can
+ be remote or local nodes. Operations on a /dev/fw* file have
+ different scope:
+ - The 1394 node which is associated with the file:
+ - Asynchronous request transmission
+ - Get the Configuration ROM
+ - Query node ID
+ - Query maximum speed of the path between this node
+ and local node
+ - The 1394 bus (i.e. "card") to which the node is attached to:
+ - Isochronous stream transmission and reception
+ - Asynchronous stream transmission and reception
+ - Asynchronous broadcast request transmission
+ - PHY packet transmission and reception
+ - Allocate, reallocate, deallocate isochronous
+ resources (channels, bandwidth) at the bus's IRM
+ - Query node IDs of local node, root node, IRM, bus
+ manager
+ - Query cycle time
+ - Bus reset initiation, bus reset event reception
+ - All 1394 buses:
+ - Allocation of IEEE 1212 address ranges on the local
+ link layers, reception of inbound requests to such
+ an address range, asynchronous response transmission
+ to inbound requests
+ - Addition of descriptors or directories to the local
+ nodes' Configuration ROM
+
+ Due to the different scope of operations and in order to let
+ userland implement different access permission models, some
+ operations are restricted to /dev/fw* files that are associated
+ with a local node:
+ - Addition of descriptors or directories to the local
+ nodes' Configuration ROM
+ - PHY packet transmission and reception
+
+ A /dev/fw* file remains associated with one particular node
+ during its entire life time. Bus topology changes, and hence
+ node ID changes, are tracked by firewire-core. ABI users do not
+ need to be aware of topology.
+
+ The following file operations are supported:
+
+ open(2)
+ Currently the only useful flags are O_RDWR.
+
+ ioctl(2)
+ Initiate various actions. Some take immediate effect, others
+ are performed asynchronously while or after the ioctl returns.
+ See the inline documentation in <linux/firewire-cdev.h> for
+ descriptions of all ioctls.
+
+ poll(2), select(2), epoll_wait(2) etc.
+ Watch for events to become available to be read.
+
+ read(2)
+ Receive various events. There are solicited events like
+ outbound asynchronous transaction completion or isochronous
+ buffer completion, and unsolicited events such as bus resets,
+ request reception, or PHY packet reception. Always use a read
+ buffer which is large enough to receive the largest event that
+ could ever arrive. See <linux/firewire-cdev.h> for descriptions
+ of all event types and for which ioctls affect reception of
+ events.
+
+ mmap(2)
+ Allocate a DMA buffer for isochronous reception or transmission
+ and map it into the process address space. The arguments should
+ be used as follows: addr = NULL, length = the desired buffer
+ size, i.e. number of packets times size of largest packet,
+ prot = at least PROT_READ for reception and at least PROT_WRITE
+ for transmission, flags = MAP_SHARED, fd = the handle to the
+ /dev/fw*, offset = 0.
+
+ Isochronous reception works in packet-per-buffer fashion except
+ for multichannel reception which works in buffer-fill mode.
+
+ munmap(2)
+ Unmap the isochronous I/O buffer from the process address space.
+
+ close(2)
+ Besides stopping and freeing I/O contexts that were associated
+ with the file descriptor, back out any changes to the local
+ nodes' Configuration ROM. Deallocate isochronous channels and
+ bandwidth at the IRM that were marked for kernel-assisted
+ re- and deallocation.
+
+Users: libraw1394
+ libdc1394
+ tools like jujuutils, fwhack, ...
diff --git a/Documentation/ABI/stable/sysfs-bus-firewire b/Documentation/ABI/stable/sysfs-bus-firewire
new file mode 100644
index 00000000000..3d484e5dc84
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-bus-firewire
@@ -0,0 +1,122 @@
+What: /sys/bus/firewire/devices/fw[0-9]+/
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ IEEE 1394 node device attributes.
+ Read-only. Mutable during the node device's lifetime.
+ See IEEE 1212 for semantic definitions.
+
+ config_rom
+ Contents of the Configuration ROM register.
+ Binary attribute; an array of host-endian u32.
+
+ guid
+ The node's EUI-64 in the bus information block of
+ Configuration ROM.
+ Hexadecimal string representation of an u64.
+
+
+What: /sys/bus/firewire/devices/fw[0-9]+/units
+Date: June 2009
+KernelVersion: 2.6.31
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ IEEE 1394 node device attribute.
+ Read-only. Mutable during the node device's lifetime.
+ See IEEE 1212 for semantic definitions.
+
+ units
+ Summary of all units present in an IEEE 1394 node.
+ Contains space-separated tuples of specifier_id and
+ version of each unit present in the node. Specifier_id
+ and version are hexadecimal string representations of
+ u24 of the respective unit directory entries.
+ Specifier_id and version within each tuple are separated
+ by a colon.
+
+Users: udev rules to set ownership and access permissions or ACLs of
+ /dev/fw[0-9]+ character device files
+
+
+What: /sys/bus/firewire/devices/fw[0-9]+[.][0-9]+/
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ IEEE 1394 unit device attributes.
+ Read-only. Immutable during the unit device's lifetime.
+ See IEEE 1212 for semantic definitions.
+
+ modalias
+ Same as MODALIAS in the uevent at device creation.
+
+ rom_index
+ Offset of the unit directory within the parent device's
+ (node device's) Configuration ROM, in quadlets.
+ Decimal string representation.
+
+
+What: /sys/bus/firewire/devices/*/
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ Attributes common to IEEE 1394 node devices and unit devices.
+ Read-only. Mutable during the node device's lifetime.
+ Immutable during the unit device's lifetime.
+ See IEEE 1212 for semantic definitions.
+
+ These attributes are only created if the root directory of an
+ IEEE 1394 node or the unit directory of an IEEE 1394 unit
+ actually contains according entries.
+
+ hardware_version
+ Hexadecimal string representation of an u24.
+
+ hardware_version_name
+ Contents of a respective textual descriptor leaf.
+
+ model
+ Hexadecimal string representation of an u24.
+
+ model_name
+ Contents of a respective textual descriptor leaf.
+
+ specifier_id
+ Hexadecimal string representation of an u24.
+ Mandatory in unit directories according to IEEE 1212.
+
+ vendor
+ Hexadecimal string representation of an u24.
+ Mandatory in the root directory according to IEEE 1212.
+
+ vendor_name
+ Contents of a respective textual descriptor leaf.
+
+ version
+ Hexadecimal string representation of an u24.
+ Mandatory in unit directories according to IEEE 1212.
+
+
+What: /sys/bus/firewire/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id
+ formerly
+ /sys/bus/ieee1394/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id
+Date: Feb 2004
+KernelVersion: 2.6.4
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ SCSI target port identifier and logical unit identifier of a
+ logical unit of an SBP-2 target. The identifiers are specified
+ in SAM-2...SAM-4 annex A. They are persistent and world-wide
+ unique properties the SBP-2 attached target.
+
+ Read-only attribute, immutable during the target's lifetime.
+ Format, as exposed by firewire-sbp2 since 2.6.22, May 2007:
+ Colon-separated hexadecimal string representations of
+ u64 EUI-64 : u24 directory_ID : u16 LUN
+ without 0x prefixes, without whitespace. The former sbp2 driver
+ (removed in 2.6.37 after being superseded by firewire-sbp2) used
+ a somewhat shorter format which was not as close to SAM.
+
+Users: udev rules to create /dev/disk/by-id/ symlinks
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
new file mode 100644
index 00000000000..aa11dbdd794
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
@@ -0,0 +1,56 @@
+What: /sys/class/backlight/<backlight>/<ambient light zone>_max
+What: /sys/class/backlight/<backlight>/l1_daylight_max
+What: /sys/class/backlight/<backlight>/l2_bright_max
+What: /sys/class/backlight/<backlight>/l3_office_max
+What: /sys/class/backlight/<backlight>/l4_indoor_max
+What: /sys/class/backlight/<backlight>/l5_dark_max
+Date: Mai 2011
+KernelVersion: 2.6.40
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Control the maximum brightness for <ambient light zone>
+ on this <backlight>. Values are between 0 and 127. This file
+ will also show the brightness level stored for this
+ <ambient light zone>.
+
+What: /sys/class/backlight/<backlight>/<ambient light zone>_dim
+What: /sys/class/backlight/<backlight>/l2_bright_dim
+What: /sys/class/backlight/<backlight>/l3_office_dim
+What: /sys/class/backlight/<backlight>/l4_indoor_dim
+What: /sys/class/backlight/<backlight>/l5_dark_dim
+Date: Mai 2011
+KernelVersion: 2.6.40
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Control the dim brightness for <ambient light zone>
+ on this <backlight>. Values are between 0 and 127, typically
+ set to 0. Full off when the backlight is disabled.
+ This file will also show the dim brightness level stored for
+ this <ambient light zone>.
+
+What: /sys/class/backlight/<backlight>/ambient_light_level
+Date: Mai 2011
+KernelVersion: 2.6.40
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Get conversion value of the light sensor.
+ This value is updated every 80 ms (when the light sensor
+ is enabled). Returns integer between 0 (dark) and
+ 8000 (max ambient brightness)
+
+What: /sys/class/backlight/<backlight>/ambient_light_zone
+Date: Mai 2011
+KernelVersion: 2.6.40
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Get/Set current ambient light zone. Reading returns
+ integer between 1..5 (1 = daylight, 2 = bright, ..., 5 = dark).
+ Writing a value between 1..5 forces the backlight controller
+ to enter the corresponding ambient light zone.
+ Writing 0 returns to normal/automatic ambient light level
+ operation. The ambient light sensing feature on these devices
+ is an extension to the API documented in
+ Documentation/ABI/stable/sysfs-class-backlight.
+ It can be enabled by writing the value stored in
+ /sys/class/backlight/<backlight>/max_brightness to
+ /sys/class/backlight/<backlight>/brightness. \ No newline at end of file
diff --git a/Documentation/Changes b/Documentation/Changes
index 5f4828a034e..b1758088527 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -2,13 +2,7 @@ Intro
=====
This document is designed to provide a list of the minimum levels of
-software necessary to run the 2.6 kernels, as well as provide brief
-instructions regarding any other "Gotchas" users may encounter when
-trying life on the Bleeding Edge. If upgrading from a pre-2.4.x
-kernel, please consult the Changes file included with 2.4.x kernels for
-additional information; most of that information will not be repeated
-here. Basically, this document assumes that your system is already
-functional and running at least 2.4.x kernels.
+software necessary to run the 3.0 kernels.
This document is originally based on my "Changes" file for 2.0.x kernels
and therefore owes credit to the same people as that file (Jared Mauch,
@@ -22,11 +16,10 @@ Upgrade to at *least* these software revisions before thinking you've
encountered a bug! If you're unsure what version you're currently
running, the suggested command should tell you.
-Again, keep in mind that this list assumes you are already
-functionally running a Linux 2.4 kernel. Also, not all tools are
-necessary on all systems; obviously, if you don't have any ISDN
-hardware, for example, you probably needn't concern yourself with
-isdn4k-utils.
+Again, keep in mind that this list assumes you are already functionally
+running a Linux kernel. Also, not all tools are necessary on all
+systems; obviously, if you don't have any ISDN hardware, for example,
+you probably needn't concern yourself with isdn4k-utils.
o Gnu C 3.2 # gcc --version
o Gnu make 3.80 # make --version
@@ -114,12 +107,12 @@ Ksymoops
If the unthinkable happens and your kernel oopses, you may need the
ksymoops tool to decode it, but in most cases you don't.
-In the 2.6 kernel it is generally preferred to build the kernel with
-CONFIG_KALLSYMS so that it produces readable dumps that can be used as-is
-(this also produces better output than ksymoops).
-If for some reason your kernel is not build with CONFIG_KALLSYMS and
-you have no way to rebuild and reproduce the Oops with that option, then
-you can still decode that Oops with ksymoops.
+It is generally preferred to build the kernel with CONFIG_KALLSYMS so
+that it produces readable dumps that can be used as-is (this also
+produces better output than ksymoops). If for some reason your kernel
+is not build with CONFIG_KALLSYMS and you have no way to rebuild and
+reproduce the Oops with that option, then you can still decode that Oops
+with ksymoops.
Module-Init-Tools
-----------------
@@ -261,8 +254,8 @@ needs to be recompiled or (preferably) upgraded.
NFS-utils
---------
-In 2.4 and earlier kernels, the nfs server needed to know about any
-client that expected to be able to access files via NFS. This
+In ancient (2.4 and earlier) kernels, the nfs server needed to know
+about any client that expected to be able to access files via NFS. This
information would be given to the kernel by "mountd" when the client
mounted the filesystem, or by "exportfs" at system startup. exportfs
would take information about active clients from /var/lib/nfs/rmtab.
@@ -272,11 +265,11 @@ which is not always easy, particularly when trying to implement
fail-over. Even when the system is working well, rmtab suffers from
getting lots of old entries that never get removed.
-With 2.6 we have the option of having the kernel tell mountd when it
-gets a request from an unknown host, and mountd can give appropriate
-export information to the kernel. This removes the dependency on
-rmtab and means that the kernel only needs to know about currently
-active clients.
+With modern kernels we have the option of having the kernel tell mountd
+when it gets a request from an unknown host, and mountd can give
+appropriate export information to the kernel. This removes the
+dependency on rmtab and means that the kernel only needs to know about
+currently active clients.
To enable this new functionality, you need to:
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 58b0bf91783..fa6e25b94a5 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -680,8 +680,8 @@ ones already enabled by DEBUG.
Chapter 14: Allocating memory
The kernel provides the following general purpose memory allocators:
-kmalloc(), kzalloc(), kcalloc(), and vmalloc(). Please refer to the API
-documentation for further information about them.
+kmalloc(), kzalloc(), kcalloc(), vmalloc(), and vzalloc(). Please refer to
+the API documentation for further information about them.
The preferred form for passing a size of a struct is the following:
diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
index 8906648f962..445289cd0e6 100644
--- a/Documentation/DocBook/80211.tmpl
+++ b/Documentation/DocBook/80211.tmpl
@@ -402,8 +402,9 @@
!Finclude/net/mac80211.h set_key_cmd
!Finclude/net/mac80211.h ieee80211_key_conf
!Finclude/net/mac80211.h ieee80211_key_flags
-!Finclude/net/mac80211.h ieee80211_tkip_key_type
-!Finclude/net/mac80211.h ieee80211_get_tkip_key
+!Finclude/net/mac80211.h ieee80211_get_tkip_p1k
+!Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv
+!Finclude/net/mac80211.h ieee80211_get_tkip_p2k
!Finclude/net/mac80211.h ieee80211_key_removed
</chapter>
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt
index eda40fd39ca..d16a9849e60 100644
--- a/Documentation/accounting/cgroupstats.txt
+++ b/Documentation/accounting/cgroupstats.txt
@@ -21,7 +21,7 @@ information will not be available.
To extract cgroup statistics a utility very similar to getdelays.c
has been developed, the sample output of the utility is shown below
-~/balbir/cgroupstats # ./getdelays -C "/cgroup/a"
+~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a"
sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
-~/balbir/cgroupstats # ./getdelays -C "/cgroup"
+~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup"
sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index 465351d4cf8..84f0a15fc21 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -28,16 +28,19 @@ cgroups. Here is what you can do.
- Enable group scheduling in CFQ
CONFIG_CFQ_GROUP_IOSCHED=y
-- Compile and boot into kernel and mount IO controller (blkio).
+- Compile and boot into kernel and mount IO controller (blkio); see
+ cgroups.txt, Why are cgroups needed?.
- mount -t cgroup -o blkio none /cgroup
+ mount -t tmpfs cgroup_root /sys/fs/cgroup
+ mkdir /sys/fs/cgroup/blkio
+ mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
- Create two cgroups
- mkdir -p /cgroup/test1/ /cgroup/test2
+ mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2
- Set weights of group test1 and test2
- echo 1000 > /cgroup/test1/blkio.weight
- echo 500 > /cgroup/test2/blkio.weight
+ echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight
+ echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight
- Create two same size files (say 512MB each) on same disk (file1, file2) and
launch two dd threads in different cgroup to read those files.
@@ -46,12 +49,12 @@ cgroups. Here is what you can do.
echo 3 > /proc/sys/vm/drop_caches
dd if=/mnt/sdb/zerofile1 of=/dev/null &
- echo $! > /cgroup/test1/tasks
- cat /cgroup/test1/tasks
+ echo $! > /sys/fs/cgroup/blkio/test1/tasks
+ cat /sys/fs/cgroup/blkio/test1/tasks
dd if=/mnt/sdb/zerofile2 of=/dev/null &
- echo $! > /cgroup/test2/tasks
- cat /cgroup/test2/tasks
+ echo $! > /sys/fs/cgroup/blkio/test2/tasks
+ cat /sys/fs/cgroup/blkio/test2/tasks
- At macro level, first dd should finish first. To get more precise data, keep
on looking at (with the help of script), at blkio.disk_time and
@@ -68,13 +71,13 @@ Throttling/Upper Limit policy
- Enable throttling in block layer
CONFIG_BLK_DEV_THROTTLING=y
-- Mount blkio controller
- mount -t cgroup -o blkio none /cgroup/blkio
+- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)
+ mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
- Specify a bandwidth rate on particular device for root group. The format
for policy is "<major>:<minor> <byes_per_second>".
- echo "8:16 1048576" > /cgroup/blkio/blkio.read_bps_device
+ echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
Above will put a limit of 1MB/second on reads happening for root group
on device having major/minor number 8:16.
@@ -87,7 +90,7 @@ Throttling/Upper Limit policy
1024+0 records out
4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
- Limits for writes can be put using blkio.write_bps_device file.
+ Limits for writes can be put using blkio.throttle.write_bps_device file.
Hierarchical Cgroups
====================
@@ -108,7 +111,7 @@ Hierarchical Cgroups
CFQ and throttling will practically treat all groups at same level.
pivot
- / | \ \
+ / / \ \
root test1 test2 test3
Down the line we can implement hierarchical accounting/control support
@@ -149,7 +152,7 @@ Proportional weight policy files
Following is the format.
- #echo dev_maj:dev_minor weight > /path/to/cgroup/blkio.weight_device
+ # echo dev_maj:dev_minor weight > blkio.weight_device
Configure weight=300 on /dev/sdb (8:16) in this cgroup
# echo 8:16 300 > blkio.weight_device
# cat blkio.weight_device
@@ -283,28 +286,28 @@ Throttling/Upper limit policy files
specified in bytes per second. Rules are per deivce. Following is
the format.
- echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.read_bps_device
+ echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
- blkio.throttle.write_bps_device
- Specifies upper limit on WRITE rate to the device. IO rate is
specified in bytes per second. Rules are per deivce. Following is
the format.
- echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.write_bps_device
+ echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
- blkio.throttle.read_iops_device
- Specifies upper limit on READ rate from the device. IO rate is
specified in IO per second. Rules are per deivce. Following is
the format.
- echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.read_iops_device
+ echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
- blkio.throttle.write_iops_device
- Specifies upper limit on WRITE rate to the device. IO rate is
specified in io per second. Rules are per deivce. Following is
the format.
- echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.write_iops_device
+ echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
Note: If both BW and IOPS rules are specified for a device, then IO is
subjectd to both the constraints.
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 0ed99f08f1f..cd67e90003c 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -138,11 +138,11 @@ With the ability to classify tasks differently for different resources
the admin can easily set up a script which receives exec notifications
and depending on who is launching the browser he can
- # echo browser_pid > /mnt/<restype>/<userclass>/tasks
+ # echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
With only a single hierarchy, he now would potentially have to create
a separate cgroup for every browser launched and associate it with
-approp network and other resource class. This may lead to
+appropriate network and other resource class. This may lead to
proliferation of such cgroups.
Also lets say that the administrator would like to give enhanced network
@@ -153,9 +153,9 @@ apps enhanced CPU power,
With ability to write pids directly to resource classes, it's just a
matter of :
- # echo pid > /mnt/network/<new_class>/tasks
+ # echo pid > /sys/fs/cgroup/network/<new_class>/tasks
(after some time)
- # echo pid > /mnt/network/<orig_class>/tasks
+ # echo pid > /sys/fs/cgroup/network/<orig_class>/tasks
Without this ability, he would have to split the cgroup into
multiple separate ones and then associate the new cgroups with the
@@ -310,21 +310,24 @@ subsystem, this is the case for the cpuset.
To start a new job that is to be contained within a cgroup, using
the "cpuset" cgroup subsystem, the steps are something like:
- 1) mkdir /dev/cgroup
- 2) mount -t cgroup -ocpuset cpuset /dev/cgroup
- 3) Create the new cgroup by doing mkdir's and write's (or echo's) in
- the /dev/cgroup virtual file system.
- 4) Start a task that will be the "founding father" of the new job.
- 5) Attach that task to the new cgroup by writing its pid to the
- /dev/cgroup tasks file for that cgroup.
- 6) fork, exec or clone the job tasks from this founding father task.
+ 1) mount -t tmpfs cgroup_root /sys/fs/cgroup
+ 2) mkdir /sys/fs/cgroup/cpuset
+ 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ 4) Create the new cgroup by doing mkdir's and write's (or echo's) in
+ the /sys/fs/cgroup virtual file system.
+ 5) Start a task that will be the "founding father" of the new job.
+ 6) Attach that task to the new cgroup by writing its pid to the
+ /sys/fs/cgroup/cpuset/tasks file for that cgroup.
+ 7) fork, exec or clone the job tasks from this founding father task.
For example, the following sequence of commands will setup a cgroup
named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
and then start a subshell 'sh' in that cgroup:
- mount -t cgroup cpuset -ocpuset /dev/cgroup
- cd /dev/cgroup
+ mount -t tmpfs cgroup_root /sys/fs/cgroup
+ mkdir /sys/fs/cgroup/cpuset
+ mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset
+ cd /sys/fs/cgroup/cpuset
mkdir Charlie
cd Charlie
/bin/echo 2-3 > cpuset.cpus
@@ -345,7 +348,7 @@ Creating, modifying, using the cgroups can be done through the cgroup
virtual filesystem.
To mount a cgroup hierarchy with all available subsystems, type:
-# mount -t cgroup xxx /dev/cgroup
+# mount -t cgroup xxx /sys/fs/cgroup
The "xxx" is not interpreted by the cgroup code, but will appear in
/proc/mounts so may be any useful identifying string that you like.
@@ -354,23 +357,32 @@ Note: Some subsystems do not work without some user input first. For instance,
if cpusets are enabled the user will have to populate the cpus and mems files
for each new cgroup created before that group can be used.
+As explained in section `1.2 Why are cgroups needed?' you should create
+different hierarchies of cgroups for each single resource or group of
+resources you want to control. Therefore, you should mount a tmpfs on
+/sys/fs/cgroup and create directories for each cgroup resource or resource
+group.
+
+# mount -t tmpfs cgroup_root /sys/fs/cgroup
+# mkdir /sys/fs/cgroup/rg1
+
To mount a cgroup hierarchy with just the cpuset and memory
subsystems, type:
-# mount -t cgroup -o cpuset,memory hier1 /dev/cgroup
+# mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
To change the set of subsystems bound to a mounted hierarchy, just
remount with different options:
-# mount -o remount,cpuset,blkio hier1 /dev/cgroup
+# mount -o remount,cpuset,blkio hier1 /sys/fs/cgroup/rg1
Now memory is removed from the hierarchy and blkio is added.
Note this will add blkio to the hierarchy but won't remove memory or
cpuset, because the new options are appended to the old ones:
-# mount -o remount,blkio /dev/cgroup
+# mount -o remount,blkio /sys/fs/cgroup/rg1
To Specify a hierarchy's release_agent:
# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
- xxx /dev/cgroup
+ xxx /sys/fs/cgroup/rg1
Note that specifying 'release_agent' more than once will return failure.
@@ -379,17 +391,17 @@ when the hierarchy consists of a single (root) cgroup. Supporting
the ability to arbitrarily bind/unbind subsystems from an existing
cgroup hierarchy is intended to be implemented in the future.
-Then under /dev/cgroup you can find a tree that corresponds to the
-tree of the cgroups in the system. For instance, /dev/cgroup
+Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
+tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
is the cgroup that holds the whole system.
If you want to change the value of release_agent:
-# echo "/sbin/new_release_agent" > /dev/cgroup/release_agent
+# echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
It can also be changed via remount.
-If you want to create a new cgroup under /dev/cgroup:
-# cd /dev/cgroup
+If you want to create a new cgroup under /sys/fs/cgroup/rg1:
+# cd /sys/fs/cgroup/rg1
# mkdir my_cgroup
Now you want to do something with this cgroup.
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt
index 8b930946c52..9ad85df4b98 100644
--- a/Documentation/cgroups/cpuacct.txt
+++ b/Documentation/cgroups/cpuacct.txt
@@ -10,26 +10,25 @@ directly present in its group.
Accounting groups can be created by first mounting the cgroup filesystem.
-# mkdir /cgroups
-# mount -t cgroup -ocpuacct none /cgroups
-
-With the above step, the initial or the parent accounting group
-becomes visible at /cgroups. At bootup, this group includes all the
-tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
-/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
-this group which is essentially the CPU time obtained by all the tasks
+# mount -t cgroup -ocpuacct none /sys/fs/cgroup
+
+With the above step, the initial or the parent accounting group becomes
+visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
+the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
+/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained
+by this group which is essentially the CPU time obtained by all the tasks
in the system.
-New accounting groups can be created under the parent group /cgroups.
+New accounting groups can be created under the parent group /sys/fs/cgroup.
-# cd /cgroups
+# cd /sys/fs/cgroup
# mkdir g1
# echo $$ > g1
The above steps create a new group g1 and move the current shell
process (bash) into it. CPU time consumed by this bash and its children
can be obtained from g1/cpuacct.usage and the same is accumulated in
-/cgroups/cpuacct.usage also.
+/sys/fs/cgroup/cpuacct.usage also.
cpuacct.stat file lists a few statistics which further divide the
CPU time obtained by the cgroup into user and system times. Currently
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt
index 98a30829af7..5b0d78e55cc 100644
--- a/Documentation/cgroups/cpusets.txt
+++ b/Documentation/cgroups/cpusets.txt
@@ -661,21 +661,21 @@ than stress the kernel.
To start a new job that is to be contained within a cpuset, the steps are:
- 1) mkdir /dev/cpuset
- 2) mount -t cgroup -ocpuset cpuset /dev/cpuset
+ 1) mkdir /sys/fs/cgroup/cpuset
+ 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
3) Create the new cpuset by doing mkdir's and write's (or echo's) in
- the /dev/cpuset virtual file system.
+ the /sys/fs/cgroup/cpuset virtual file system.
4) Start a task that will be the "founding father" of the new job.
5) Attach that task to the new cpuset by writing its pid to the
- /dev/cpuset tasks file for that cpuset.
+ /sys/fs/cgroup/cpuset tasks file for that cpuset.
6) fork, exec or clone the job tasks from this founding father task.
For example, the following sequence of commands will setup a cpuset
named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
and then start a subshell 'sh' in that cpuset:
- mount -t cgroup -ocpuset cpuset /dev/cpuset
- cd /dev/cpuset
+ mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ cd /sys/fs/cgroup/cpuset
mkdir Charlie
cd Charlie
/bin/echo 2-3 > cpuset.cpus
@@ -710,14 +710,14 @@ Creating, modifying, using the cpusets can be done through the cpuset
virtual filesystem.
To mount it, type:
-# mount -t cgroup -o cpuset cpuset /dev/cpuset
+# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset
-Then under /dev/cpuset you can find a tree that corresponds to the
-tree of the cpusets in the system. For instance, /dev/cpuset
+Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
+tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
is the cpuset that holds the whole system.
-If you want to create a new cpuset under /dev/cpuset:
-# cd /dev/cpuset
+If you want to create a new cpuset under /sys/fs/cgroup/cpuset:
+# cd /sys/fs/cgroup/cpuset
# mkdir my_cpuset
Now you want to do something with this cpuset.
@@ -765,12 +765,12 @@ wrapper around the cgroup filesystem.
The command
-mount -t cpuset X /dev/cpuset
+mount -t cpuset X /sys/fs/cgroup/cpuset
is equivalent to
-mount -t cgroup -ocpuset,noprefix X /dev/cpuset
-echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
+mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
+echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
2.2 Adding/removing cpus
------------------------
diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt
index 57ca4c89fe5..16624a7f822 100644
--- a/Documentation/cgroups/devices.txt
+++ b/Documentation/cgroups/devices.txt
@@ -22,16 +22,16 @@ removed from the child(ren).
An entry is added using devices.allow, and removed using
devices.deny. For instance
- echo 'c 1:3 mr' > /cgroups/1/devices.allow
+ echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
allows cgroup 1 to read and mknod the device usually known as
/dev/null. Doing
- echo a > /cgroups/1/devices.deny
+ echo a > /sys/fs/cgroup/1/devices.deny
will remove the default 'a *:* rwm' entry. Doing
- echo a > /cgroups/1/devices.allow
+ echo a > /sys/fs/cgroup/1/devices.allow
will add the 'a *:* rwm' entry to the whitelist.
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt
index 41f37fea127..c21d77742a0 100644
--- a/Documentation/cgroups/freezer-subsystem.txt
+++ b/Documentation/cgroups/freezer-subsystem.txt
@@ -59,28 +59,28 @@ is non-freezable.
* Examples of usage :
- # mkdir /containers
- # mount -t cgroup -ofreezer freezer /containers
- # mkdir /containers/0
- # echo $some_pid > /containers/0/tasks
+ # mkdir /sys/fs/cgroup/freezer
+ # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
+ # mkdir /sys/fs/cgroup/freezer/0
+ # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
to get status of the freezer subsystem :
- # cat /containers/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
THAWED
to freeze all tasks in the container :
- # echo FROZEN > /containers/0/freezer.state
- # cat /containers/0/freezer.state
+ # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
FREEZING
- # cat /containers/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
FROZEN
to unfreeze all tasks in the container :
- # echo THAWED > /containers/0/freezer.state
- # cat /containers/0/freezer.state
+ # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
THAWED
This is the basic mechanism which should do the right thing for user space task
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 7c163477fcd..06eb6d957c8 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -1,8 +1,8 @@
Memory Resource Controller
-NOTE: The Memory Resource Controller has been generically been referred
- to as the memory controller in this document. Do not confuse memory
- controller used here with the memory controller that is used in hardware.
+NOTE: The Memory Resource Controller has generically been referred to as the
+ memory controller in this document. Do not confuse memory controller
+ used here with the memory controller that is used in hardware.
(For editors)
In this document:
@@ -70,6 +70,7 @@ Brief summary of control files.
(See sysctl's vm.swappiness)
memory.move_charge_at_immigrate # set/show controls of moving charges
memory.oom_control # set/show oom controls.
+ memory.numa_stat # show the number of memory usage per numa node
1. History
@@ -181,7 +182,7 @@ behind this approach is that a cgroup that aggressively uses a shared
page will eventually get charged for it (once it is uncharged from
the cgroup that brought it in -- this will happen on memory pressure).
-Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used..
+Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.
When you do swapoff and make swapped-out pages of shmem(tmpfs) to
be backed into memory in force, charges for pages are accounted against the
caller of swapoff rather than the users of shmem.
@@ -213,7 +214,7 @@ affecting global LRU, memory+swap limit is better than just limiting swap from
OS point of view.
* What happens when a cgroup hits memory.memsw.limit_in_bytes
-When a cgroup his memory.memsw.limit_in_bytes, it's useless to do swap-out
+When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
in this cgroup. Then, swap-out will not be done by cgroup routine and file
caches are dropped. But as mentioned above, global LRU can do swapout memory
from it for sanity of the system's memory management state. You can't forbid
@@ -263,16 +264,17 @@ b. Enable CONFIG_RESOURCE_COUNTERS
c. Enable CONFIG_CGROUP_MEM_RES_CTLR
d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension)
-1. Prepare the cgroups
-# mkdir -p /cgroups
-# mount -t cgroup none /cgroups -o memory
+1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
+# mount -t tmpfs none /sys/fs/cgroup
+# mkdir /sys/fs/cgroup/memory
+# mount -t cgroup none /sys/fs/cgroup/memory -o memory
2. Make the new group and move bash into it
-# mkdir /cgroups/0
-# echo $$ > /cgroups/0/tasks
+# mkdir /sys/fs/cgroup/memory/0
+# echo $$ > /sys/fs/cgroup/memory/0/tasks
Since now we're in the 0 cgroup, we can alter the memory limit:
-# echo 4M > /cgroups/0/memory.limit_in_bytes
+# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
@@ -280,11 +282,11 @@ mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
NOTE: We cannot set limits on the root cgroup any more.
-# cat /cgroups/0/memory.limit_in_bytes
+# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
4194304
We can check the usage:
-# cat /cgroups/0/memory.usage_in_bytes
+# cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
1216512
A successful write to this file does not guarantee a successful set of
@@ -464,6 +466,24 @@ value for efficient access. (Of course, when necessary, it's synchronized.)
If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
value in memory.stat(see 5.2).
+5.6 numa_stat
+
+This is similar to numa_maps but operates on a per-memcg basis. This is
+useful for providing visibility into the numa locality information within
+an memcg since the pages are allowed to be allocated from any physical
+node. One of the usecases is evaluating application performance by
+combining this information with the application's cpu allocation.
+
+We export "total", "file", "anon" and "unevictable" pages per-node for
+each memcg. The ouput format of memory.numa_stat is:
+
+total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
+file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
+anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+
+And we have total = file + anon + unevictable.
+
6. Hierarchy support
The memory controller supports a deep hierarchy and hierarchical accounting.
@@ -471,13 +491,13 @@ The hierarchy is created by creating the appropriate cgroups in the
cgroup filesystem. Consider for example, the following cgroup filesystem
hierarchy
- root
+ root
/ | \
- / | \
- a b c
- | \
- | \
- d e
+ / | \
+ a b c
+ | \
+ | \
+ d e
In the diagram above, with hierarchical accounting enabled, all memory
usage of e, is accounted to its ancestors up until the root (i.e, c and root),
diff --git a/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt
new file mode 100644
index 00000000000..4363ae4b3c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt
@@ -0,0 +1,22 @@
+* Freescale i.MX/MXC GPIO controller
+
+Required properties:
+- compatible : Should be "fsl,<soc>-gpio"
+- reg : Address and length of the register set for the device
+- interrupts : Should be the port interrupt shared by all 32 pins, if
+ one number. If two numbers, the first one is the interrupt shared
+ by low 16 pins and the second one is for high 16 pins.
+- gpio-controller : Marks the device node as a gpio controller.
+- #gpio-cells : Should be two. The first cell is the pin number and
+ the second cell is used to specify optional parameters (currently
+ unused).
+
+Example:
+
+gpio0: gpio@73f84000 {
+ compatible = "fsl,imx51-gpio", "fsl,imx31-gpio";
+ reg = <0x73f84000 0x4000>;
+ interrupts = <50 51>;
+ gpio-controller;
+ #gpio-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt
index edaa84d288a..4e16ba4feab 100644
--- a/Documentation/devicetree/bindings/gpio/gpio.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio.txt
@@ -4,17 +4,45 @@ Specifying GPIO information for devices
1) gpios property
-----------------
-Nodes that makes use of GPIOs should define them using `gpios' property,
-format of which is: <&gpio-controller1-phandle gpio1-specifier
- &gpio-controller2-phandle gpio2-specifier
- 0 /* holes are permitted, means no GPIO 3 */
- &gpio-controller4-phandle gpio4-specifier
- ...>;
+Nodes that makes use of GPIOs should specify them using one or more
+properties, each containing a 'gpio-list':
-Note that gpio-specifier length is controller dependent.
+ gpio-list ::= <single-gpio> [gpio-list]
+ single-gpio ::= <gpio-phandle> <gpio-specifier>
+ gpio-phandle : phandle to gpio controller node
+ gpio-specifier : Array of #gpio-cells specifying specific gpio
+ (controller specific)
+
+GPIO properties should be named "[<name>-]gpios". Exact
+meaning of each gpios property must be documented in the device tree
+binding for each device.
+
+For example, the following could be used to describe gpios pins to use
+as chip select lines; with chip selects 0, 1 and 3 populated, and chip
+select 2 left empty:
+
+ gpio1: gpio1 {
+ gpio-controller
+ #gpio-cells = <2>;
+ };
+ gpio2: gpio2 {
+ gpio-controller
+ #gpio-cells = <1>;
+ };
+ [...]
+ chipsel-gpios = <&gpio1 12 0>,
+ <&gpio1 13 0>,
+ <0>, /* holes are permitted, means no GPIO 2 */
+ <&gpio2 2>;
+
+Note that gpio-specifier length is controller dependent. In the
+above example, &gpio1 uses 2 cells to specify a gpio, while &gpio2
+only uses one.
gpio-specifier may encode: bank, pin position inside the bank,
whether pin is open-drain and whether pin is logically inverted.
+Exact meaning of each specifier cell is controller specific, and must
+be documented in the device tree binding for the device.
Example of the node using GPIOs:
@@ -28,8 +56,8 @@ and empty GPIO flags as accepted by the "qe_pio_e" gpio-controller.
2) gpio-controller nodes
------------------------
-Every GPIO controller node must have #gpio-cells property defined,
-this information will be used to translate gpio-specifiers.
+Every GPIO controller node must both an empty "gpio-controller"
+property, and have #gpio-cells contain the size of the gpio-specifier.
Example of two SOC GPIO banks defined as gpio-controller nodes:
diff --git a/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt b/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt
new file mode 100644
index 00000000000..eb4b530d64e
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt
@@ -0,0 +1,8 @@
+NVIDIA Tegra 2 GPIO controller
+
+Required properties:
+- compatible : "nvidia,tegra20-gpio"
+- #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters:
+ - bit 0 specifies polarity (0 for normal, 1 for inverted)
+- gpio-controller : Marks the device node as a GPIO controller.
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 1a9446b5915..b1c921c2751 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -481,23 +481,6 @@ Who: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
----------------------------
-What: namespace cgroup (ns_cgroup)
-When: 2.6.38
-Why: The ns_cgroup leads to some problems:
- * cgroup creation is out-of-control
- * cgroup name can conflict when pids are looping
- * it is not possible to have a single process handling
- a lot of namespaces without falling in a exponential creation time
- * we may want to create a namespace without creating a cgroup
-
- The ns_cgroup is replaced by a compatibility flag 'clone_children',
- where a newly created cgroup will copy the parent cgroup values.
- The userspace has to manually create a cgroup and add a task to
- the 'tasks' file.
-Who: Daniel Lezcano <daniel.lezcano@free.fr>
-
-----------------------------
-
What: iwlwifi disable_hw_scan module parameters
When: 2.6.40
Why: Hareware scan is the prefer method for iwlwifi devices for
@@ -600,3 +583,25 @@ Why: Superseded by the UVCIOC_CTRL_QUERY ioctl.
Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
----------------------------
+
+What: For VIDIOC_S_FREQUENCY the type field must match the device node's type.
+ If not, return -EINVAL.
+When: 3.2
+Why: It makes no sense to switch the tuner to radio mode by calling
+ VIDIOC_S_FREQUENCY on a video node, or to switch the tuner to tv mode by
+ calling VIDIOC_S_FREQUENCY on a radio node. This is the first step of a
+ move to more consistent handling of tv and radio tuners.
+Who: Hans Verkuil <hans.verkuil@cisco.com>
+
+----------------------------
+
+What: Opening a radio device node will no longer automatically switch the
+ tuner mode from tv to radio.
+When: 3.3
+Why: Just opening a V4L device should not change the state of the hardware
+ like that. It's very unexpected and against the V4L spec. Instead, you
+ switch to radio mode by calling VIDIOC_S_FREQUENCY. This is the second
+ and last step of the move to consistent handling of tv and radio tuners.
+Who: Hans Verkuil <hans.verkuil@cisco.com>
+
+----------------------------
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index a167ab876c3..7cc6bf2871e 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -673,6 +673,22 @@ storage request to complete, or it may attempt to cancel the storage request -
in which case the page will not be stored in the cache this time.
+BULK INODE PAGE UNCACHE
+-----------------------
+
+A convenience routine is provided to perform an uncache on all the pages
+attached to an inode. This assumes that the pages on the inode correspond on a
+1:1 basis with the pages in the cache.
+
+ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
+ struct inode *inode);
+
+This takes the netfs cookie that the pages were cached with and the inode that
+the pages are attached to. This function will wait for pages to finish being
+written to the cache and for the cache to finish with the page generally. No
+error is returned.
+
+
==========================
INDEX AND DATA FILE UPDATE
==========================
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index d5c0cef38a7..873a2ab2e9f 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -40,7 +40,6 @@ Features which NILFS2 does not support yet:
- POSIX ACLs
- quotas
- fsck
- - resize
- defragmentation
Mount options
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index f4817802406..db3b1aba32a 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -843,6 +843,7 @@ Provides counts of softirq handlers serviced since boot time, for each cpu.
TASKLET: 0 0 0 290
SCHED: 27035 26983 26971 26746
HRTIMER: 0 0 0 0
+ RCU: 1678 1769 2178 2250
1.3 IDE devices in /proc/ide
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
index 8e4fab639d9..a0a61d2f389 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.txt
@@ -111,34 +111,6 @@ The following is an example of the kernel boot arguments to attach mtd0
to UBI and mount volume "rootfs":
ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs
-
-Module Parameters for Debugging
-===============================
-
-When UBIFS has been compiled with debugging enabled, there are 2 module
-parameters that are available to control aspects of testing and debugging.
-
-debug_chks Selects extra checks that UBIFS can do while running:
-
- Check Flag value
-
- General checks 1
- Check Tree Node Cache (TNC) 2
- Check indexing tree size 4
- Check orphan area 8
- Check old indexing tree 16
- Check LEB properties (lprops) 32
- Check leaf nodes and inodes 64
-
-debug_tsts Selects a mode of testing, as follows:
-
- Test mode Flag value
-
- Failure mode for recovery testing 4
-
-For example, set debug_chks to 3 to enable general and TNC checks.
-
-
References
==========
diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg
index 84d2623810f..de91c0db584 100644
--- a/Documentation/hwmon/f71882fg
+++ b/Documentation/hwmon/f71882fg
@@ -22,6 +22,10 @@ Supported chips:
Prefix: 'f71869'
Addresses scanned: none, address read from Super I/O config space
Datasheet: Available from the Fintek website
+ * Fintek F71869A
+ Prefix: 'f71869a'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Not public
* Fintek F71882FG and F71883FG
Prefix: 'f71882fg'
Addresses scanned: none, address read from Super I/O config space
diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp
index 0393c89277c..a10f73624ad 100644
--- a/Documentation/hwmon/k10temp
+++ b/Documentation/hwmon/k10temp
@@ -9,8 +9,8 @@ Supported chips:
Socket S1G3: Athlon II, Sempron, Turion II
* AMD Family 11h processors:
Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra)
-* AMD Family 12h processors: "Llano"
-* AMD Family 14h processors: "Brazos" (C/E/G-Series)
+* AMD Family 12h processors: "Llano" (E2/A4/A6/A8-Series)
+* AMD Family 14h processors: "Brazos" (C/E/G/Z-Series)
* AMD Family 15h processors: "Bulldozer"
Prefix: 'k10temp'
@@ -20,12 +20,16 @@ Supported chips:
http://support.amd.com/us/Processor_TechDocs/31116.pdf
BIOS and Kernel Developer's Guide (BKDG) for AMD Family 11h Processors:
http://support.amd.com/us/Processor_TechDocs/41256.pdf
+ BIOS and Kernel Developer's Guide (BKDG) for AMD Family 12h Processors:
+ http://support.amd.com/us/Processor_TechDocs/41131.pdf
BIOS and Kernel Developer's Guide (BKDG) for AMD Family 14h Models 00h-0Fh Processors:
http://support.amd.com/us/Processor_TechDocs/43170.pdf
Revision Guide for AMD Family 10h Processors:
http://support.amd.com/us/Processor_TechDocs/41322.pdf
Revision Guide for AMD Family 11h Processors:
http://support.amd.com/us/Processor_TechDocs/41788.pdf
+ Revision Guide for AMD Family 12h Processors:
+ http://support.amd.com/us/Processor_TechDocs/44739.pdf
Revision Guide for AMD Family 14h Models 00h-0Fh Processors:
http://support.amd.com/us/Processor_TechDocs/47534.pdf
AMD Family 11h Processor Power and Thermal Data Sheet for Notebooks:
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index fd248a31821..aa47be71df4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2015,6 +2015,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
the default.
off: Turn ECRC off
on: Turn ECRC on.
+ realloc reallocate PCI resources if allocations done by BIOS
+ are erroneous.
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
Management.
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
index 090e6ee0453..51063e681ca 100644
--- a/Documentation/kmemleak.txt
+++ b/Documentation/kmemleak.txt
@@ -11,7 +11,9 @@ with the difference that the orphan objects are not freed but only
reported via /sys/kernel/debug/kmemleak. A similar method is used by the
Valgrind tool (memcheck --leak-check) to detect the memory leaks in
user-space applications.
-Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze and tile.
+
+Please check DEBUG_KMEMLEAK dependencies in lib/Kconfig.debug for supported
+architectures.
Usage
-----
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 1565eefd6fd..61815483efa 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -534,6 +534,8 @@ Events that are never propagated by the driver:
0x2404 System is waking up from hibernation to undock
0x2405 System is waking up from hibernation to eject bay
0x5010 Brightness level changed/control event
+0x6000 KEYBOARD: Numlock key pressed
+0x6005 KEYBOARD: Fn key pressed (TO BE VERIFIED)
Events that are propagated by the driver to userspace:
@@ -545,6 +547,8 @@ Events that are propagated by the driver to userspace:
0x3006 Bay hotplug request (hint to power up SATA link when
the optical drive tray is ejected)
0x4003 Undocked (see 0x2x04), can sleep again
+0x4010 Docked into hotplug port replicator (non-ACPI dock)
+0x4011 Undocked from hotplug port replicator (non-ACPI dock)
0x500B Tablet pen inserted into its storage bay
0x500C Tablet pen removed from its storage bay
0x6011 ALARM: battery is too hot
@@ -552,6 +556,7 @@ Events that are propagated by the driver to userspace:
0x6021 ALARM: a sensor is too hot
0x6022 ALARM: a sensor is extremely hot
0x6030 System thermal table changed
+0x6040 Nvidia Optimus/AC adapter related (TO BE VERIFIED)
Battery nearly empty alarms are a last resort attempt to get the
operating system to hibernate or shutdown cleanly (0x2313), or shutdown
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 2366b1c8cf1..f0eee83ff78 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -555,7 +555,7 @@ also have
sync_min
sync_max
The two values, given as numbers of sectors, indicate a range
- withing the array where 'check'/'repair' will operate. Must be
+ within the array where 'check'/'repair' will operate. Must be
a multiple of chunk_size. When it reaches "sync_max" it will
pause, rather than complete.
You can use 'select' or 'poll' on "sync_completed" to wait for
diff --git a/Documentation/mmc/00-INDEX b/Documentation/mmc/00-INDEX
index 93dd7a71407..a9ba6720ffd 100644
--- a/Documentation/mmc/00-INDEX
+++ b/Documentation/mmc/00-INDEX
@@ -4,3 +4,5 @@ mmc-dev-attrs.txt
- info on SD and MMC device attributes
mmc-dev-parts.txt
- info on SD and MMC device partitions
+mmc-async-req.txt
+ - info on mmc asynchronous requests
diff --git a/Documentation/mmc/mmc-async-req.txt b/Documentation/mmc/mmc-async-req.txt
new file mode 100644
index 00000000000..ae1907b10e4
--- /dev/null
+++ b/Documentation/mmc/mmc-async-req.txt
@@ -0,0 +1,87 @@
+Rationale
+=========
+
+How significant is the cache maintenance overhead?
+It depends. Fast eMMC and multiple cache levels with speculative cache
+pre-fetch makes the cache overhead relatively significant. If the DMA
+preparations for the next request are done in parallel with the current
+transfer, the DMA preparation overhead would not affect the MMC performance.
+The intention of non-blocking (asynchronous) MMC requests is to minimize the
+time between when an MMC request ends and another MMC request begins.
+Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
+dma_unmap_sg are processing. Using non-blocking MMC requests makes it
+possible to prepare the caches for next job in parallel with an active
+MMC request.
+
+MMC block driver
+================
+
+The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
+The increase in throughput is proportional to the time it takes to
+prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
+a request and how fast the memory is. The faster the MMC/SD is the
+more significant the prepare request time becomes. Roughly the expected
+performance gain is 5% for large writes and 10% on large reads on a L2 cache
+platform. In power save mode, when clocks run on a lower frequency, the DMA
+preparation may cost even more. As long as these slower preparations are run
+in parallel with the transfer performance won't be affected.
+
+Details on measurements from IOZone and mmc_test
+================================================
+
+https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
+
+MMC core API extension
+======================
+
+There is one new public function mmc_start_req().
+It starts a new MMC command request for a host. The function isn't
+truly non-blocking. If there is an ongoing async request it waits
+for completion of that request and starts the new one and returns. It
+doesn't wait for the new request to complete. If there is no ongoing
+request it starts the new request and returns immediately.
+
+MMC host extensions
+===================
+
+There are two optional members in the mmc_host_ops -- pre_req() and
+post_req() -- that the host driver may implement in order to move work
+to before and after the actual mmc_host_ops.request() function is called.
+In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
+descriptor, and post_req() runs the dma_unmap_sg().
+
+Optimize for the first request
+==============================
+
+The first request in a series of requests can't be prepared in parallel
+with the previous transfer, since there is no previous request.
+The argument is_first_req in pre_req() indicates that there is no previous
+request. The host driver may optimize for this scenario to minimize
+the performance loss. A way to optimize for this is to split the current
+request in two chunks, prepare the first chunk and start the request,
+and finally prepare the second chunk and start the transfer.
+
+Pseudocode to handle is_first_req scenario with minimal prepare overhead:
+
+if (is_first_req && req->size > threshold)
+ /* start MMC transfer for the complete transfer size */
+ mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
+
+ /*
+ * Begin to prepare DMA while cmd is being processed by MMC.
+ * The first chunk of the request should take the same time
+ * to prepare as the "MMC process command time".
+ * If prepare time exceeds MMC cmd time
+ * the transfer is delayed, guesstimate max 4k as first chunk size.
+ */
+ prepare_1st_chunk_for_dma(req);
+ /* flush pending desc to the DMAC (dmaengine.h) */
+ dma_issue_pending(req->dma_desc);
+
+ prepare_2nd_chunk_for_dma(req);
+ /*
+ * The second issue_pending should be called before MMC runs out
+ * of the first chunk. If the MMC runs out of the first data chunk
+ * before this call, the transfer is delayed.
+ */
+ dma_issue_pending(req->dma_desc);
diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c
index 2bac9618c34..65968fbf1e4 100644
--- a/Documentation/networking/ifenslave.c
+++ b/Documentation/networking/ifenslave.c
@@ -260,7 +260,7 @@ int main(int argc, char *argv[])
case 'V': opt_V++; exclusive++; break;
case '?':
- fprintf(stderr, usage_msg);
+ fprintf(stderr, "%s", usage_msg);
res = 2;
goto out;
}
@@ -268,13 +268,13 @@ int main(int argc, char *argv[])
/* options check */
if (exclusive > 1) {
- fprintf(stderr, usage_msg);
+ fprintf(stderr, "%s", usage_msg);
res = 2;
goto out;
}
if (opt_v || opt_V) {
- printf(version);
+ printf("%s", version);
if (opt_V) {
res = 0;
goto out;
@@ -282,14 +282,14 @@ int main(int argc, char *argv[])
}
if (opt_u) {
- printf(usage_msg);
+ printf("%s", usage_msg);
res = 0;
goto out;
}
if (opt_h) {
- printf(usage_msg);
- printf(help_msg);
+ printf("%s", usage_msg);
+ printf("%s", help_msg);
res = 0;
goto out;
}
@@ -309,7 +309,7 @@ int main(int argc, char *argv[])
goto out;
} else {
/* Just show usage */
- fprintf(stderr, usage_msg);
+ fprintf(stderr, "%s", usage_msg);
res = 2;
goto out;
}
@@ -320,7 +320,7 @@ int main(int argc, char *argv[])
master_ifname = *spp++;
if (master_ifname == NULL) {
- fprintf(stderr, usage_msg);
+ fprintf(stderr, "%s", usage_msg);
res = 2;
goto out;
}
@@ -339,7 +339,7 @@ int main(int argc, char *argv[])
if (slave_ifname == NULL) {
if (opt_d || opt_c) {
- fprintf(stderr, usage_msg);
+ fprintf(stderr, "%s", usage_msg);
res = 2;
goto out;
}
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index d3d653a5f9b..db2a4067013 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -106,16 +106,6 @@ inet_peer_maxttl - INTEGER
when the number of entries in the pool is very small).
Measured in seconds.
-inet_peer_gc_mintime - INTEGER
- Minimum interval between garbage collection passes. This interval is
- in effect under high memory pressure on the pool.
- Measured in seconds.
-
-inet_peer_gc_maxtime - INTEGER
- Minimum interval between garbage collection passes. This interval is
- in effect under low (or absent) memory pressure on the pool.
- Measured in seconds.
-
TCP variables:
somaxconn - INTEGER
@@ -346,7 +336,7 @@ tcp_orphan_retries - INTEGER
when RTO retransmissions remain unacknowledged.
See tcp_retries2 for more details.
- The default value is 7.
+ The default value is 8.
If your machine is a loaded WEB server,
you should think about lowering this value, such sockets
may consume significant resources. Cf. tcp_max_orphans.
@@ -394,7 +384,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
min: Minimal size of receive buffer used by TCP sockets.
It is guaranteed to each TCP socket, even under moderate memory
pressure.
- Default: 8K
+ Default: 1 page
default: initial size of receive buffer used by TCP sockets.
This value overrides net.core.rmem_default used by other protocols.
@@ -483,7 +473,7 @@ tcp_window_scaling - BOOLEAN
tcp_wmem - vector of 3 INTEGERs: min, default, max
min: Amount of memory reserved for send buffers for TCP sockets.
Each TCP socket has rights to use it due to fact of its birth.
- Default: 4K
+ Default: 1 page
default: initial size of send buffer used by TCP sockets. This
value overrides net.core.wmem_default used by other protocols.
@@ -553,13 +543,13 @@ udp_rmem_min - INTEGER
Minimal size of receive buffer used by UDP sockets in moderation.
Each UDP socket is able to use the size for receiving data, even if
total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
- Default: 4096
+ Default: 1 page
udp_wmem_min - INTEGER
Minimal size of send buffer used by UDP sockets in moderation.
Each UDP socket is able to use the size for sending data, even if
total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
- Default: 4096
+ Default: 1 page
CIPSOv4 Variables:
@@ -1465,10 +1455,17 @@ sctp_mem - vector of 3 INTEGERs: min, pressure, max
Default is calculated at boot time from amount of available memory.
sctp_rmem - vector of 3 INTEGERs: min, default, max
- See tcp_rmem for a description.
+ Only the first value ("min") is used, "default" and "max" are
+ ignored.
+
+ min: Minimal size of receive buffer used by SCTP socket.
+ It is guaranteed to each SCTP socket (but not association) even
+ under moderate memory pressure.
+
+ Default: 1 page
sctp_wmem - vector of 3 INTEGERs: min, default, max
- See tcp_wmem for a description.
+ Currently this tunable has no effect.
addr_scope_policy - INTEGER
Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt
new file mode 100644
index 00000000000..4b1c0dcef84
--- /dev/null
+++ b/Documentation/networking/netdev-features.txt
@@ -0,0 +1,154 @@
+Netdev features mess and how to get out from it alive
+=====================================================
+
+Author:
+ Michał Mirosław <mirq-linux@rere.qmqm.pl>
+
+
+
+ Part I: Feature sets
+======================
+
+Long gone are the days when a network card would just take and give packets
+verbatim. Today's devices add multiple features and bugs (read: offloads)
+that relieve an OS of various tasks like generating and checking checksums,
+splitting packets, classifying them. Those capabilities and their state
+are commonly referred to as netdev features in Linux kernel world.
+
+There are currently three sets of features relevant to the driver, and
+one used internally by network core:
+
+ 1. netdev->hw_features set contains features whose state may possibly
+ be changed (enabled or disabled) for a particular device by user's
+ request. This set should be initialized in ndo_init callback and not
+ changed later.
+
+ 2. netdev->features set contains features which are currently enabled
+ for a device. This should be changed only by network core or in
+ error paths of ndo_set_features callback.
+
+ 3. netdev->vlan_features set contains features whose state is inherited
+ by child VLAN devices (limits netdev->features set). This is currently
+ used for all VLAN devices whether tags are stripped or inserted in
+ hardware or software.
+
+ 4. netdev->wanted_features set contains feature set requested by user.
+ This set is filtered by ndo_fix_features callback whenever it or
+ some device-specific conditions change. This set is internal to
+ networking core and should not be referenced in drivers.
+
+
+
+ Part II: Controlling enabled features
+=======================================
+
+When current feature set (netdev->features) is to be changed, new set
+is calculated and filtered by calling ndo_fix_features callback
+and netdev_fix_features(). If the resulting set differs from current
+set, it is passed to ndo_set_features callback and (if the callback
+returns success) replaces value stored in netdev->features.
+NETDEV_FEAT_CHANGE notification is issued after that whenever current
+set might have changed.
+
+The following events trigger recalculation:
+ 1. device's registration, after ndo_init returned success
+ 2. user requested changes in features state
+ 3. netdev_update_features() is called
+
+ndo_*_features callbacks are called with rtnl_lock held. Missing callbacks
+are treated as always returning success.
+
+A driver that wants to trigger recalculation must do so by calling
+netdev_update_features() while holding rtnl_lock. This should not be done
+from ndo_*_features callbacks. netdev->features should not be modified by
+driver except by means of ndo_fix_features callback.
+
+
+
+ Part III: Implementation hints
+================================
+
+ * ndo_fix_features:
+
+All dependencies between features should be resolved here. The resulting
+set can be reduced further by networking core imposed limitations (as coded
+in netdev_fix_features()). For this reason it is safer to disable a feature
+when its dependencies are not met instead of forcing the dependency on.
+
+This callback should not modify hardware nor driver state (should be
+stateless). It can be called multiple times between successive
+ndo_set_features calls.
+
+Callback must not alter features contained in NETIF_F_SOFT_FEATURES or
+NETIF_F_NEVER_CHANGE sets. The exception is NETIF_F_VLAN_CHALLENGED but
+care must be taken as the change won't affect already configured VLANs.
+
+ * ndo_set_features:
+
+Hardware should be reconfigured to match passed feature set. The set
+should not be altered unless some error condition happens that can't
+be reliably detected in ndo_fix_features. In this case, the callback
+should update netdev->features to match resulting hardware state.
+Errors returned are not (and cannot be) propagated anywhere except dmesg.
+(Note: successful return is zero, >0 means silent error.)
+
+
+
+ Part IV: Features
+===================
+
+For current list of features, see include/linux/netdev_features.h.
+This section describes semantics of some of them.
+
+ * Transmit checksumming
+
+For complete description, see comments near the top of include/linux/skbuff.h.
+
+Note: NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM + NETIF_F_IPV6_CSUM.
+It means that device can fill TCP/UDP-like checksum anywhere in the packets
+whatever headers there might be.
+
+ * Transmit TCP segmentation offload
+
+NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit
+set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6).
+
+ * Transmit DMA from high memory
+
+On platforms where this is relevant, NETIF_F_HIGHDMA signals that
+ndo_start_xmit can handle skbs with frags in high memory.
+
+ * Transmit scatter-gather
+
+Those features say that ndo_start_xmit can handle fragmented skbs:
+NETIF_F_SG --- paged skbs (skb_shinfo()->frags), NETIF_F_FRAGLIST ---
+chained skbs (skb->next/prev list).
+
+ * Software features
+
+Features contained in NETIF_F_SOFT_FEATURES are features of networking
+stack. Driver should not change behaviour based on them.
+
+ * LLTX driver (deprecated for hardware drivers)
+
+NETIF_F_LLTX should be set in drivers that implement their own locking in
+transmit path or don't need locking at all (e.g. software tunnels).
+In ndo_start_xmit, it is recommended to use a try_lock and return
+NETDEV_TX_LOCKED when the spin lock fails. The locking should also properly
+protect against other callbacks (the rules you need to find out).
+
+Don't use it for new drivers.
+
+ * netns-local device
+
+NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between
+network namespaces (e.g. loopback).
+
+Don't use it in drivers.
+
+ * VLAN challenged
+
+NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN
+headers. Some drivers set this because the cards can't handle the bigger MTU.
+[FIXME: Those cases could be fixed in VLAN code by allowing only reduced-MTU
+VLANs. This may be not useful, though.]
diff --git a/Documentation/networking/nfc.txt b/Documentation/networking/nfc.txt
new file mode 100644
index 00000000000..b24c29bdae2
--- /dev/null
+++ b/Documentation/networking/nfc.txt
@@ -0,0 +1,128 @@
+Linux NFC subsystem
+===================
+
+The Near Field Communication (NFC) subsystem is required to standardize the
+NFC device drivers development and to create an unified userspace interface.
+
+This document covers the architecture overview, the device driver interface
+description and the userspace interface description.
+
+Architecture overview
+---------------------
+
+The NFC subsystem is responsible for:
+ - NFC adapters management;
+ - Polling for targets;
+ - Low-level data exchange;
+
+The subsystem is divided in some parts. The 'core' is responsible for
+providing the device driver interface. On the other side, it is also
+responsible for providing an interface to control operations and low-level
+data exchange.
+
+The control operations are available to userspace via generic netlink.
+
+The low-level data exchange interface is provided by the new socket family
+PF_NFC. The NFC_SOCKPROTO_RAW performs raw communication with NFC targets.
+
+
+ +--------------------------------------+
+ | USER SPACE |
+ +--------------------------------------+
+ ^ ^
+ | low-level | control
+ | data exchange | operations
+ | |
+ | v
+ | +-----------+
+ | AF_NFC | netlink |
+ | socket +-----------+
+ | raw ^
+ | |
+ v v
+ +---------+ +-----------+
+ | rawsock | <--------> | core |
+ +---------+ +-----------+
+ ^
+ |
+ v
+ +-----------+
+ | driver |
+ +-----------+
+
+Device Driver Interface
+-----------------------
+
+When registering on the NFC subsystem, the device driver must inform the core
+of the set of supported NFC protocols and the set of ops callbacks. The ops
+callbacks that must be implemented are the following:
+
+* start_poll - setup the device to poll for targets
+* stop_poll - stop on progress polling operation
+* activate_target - select and initialize one of the targets found
+* deactivate_target - deselect and deinitialize the selected target
+* data_exchange - send data and receive the response (transceive operation)
+
+Userspace interface
+--------------------
+
+The userspace interface is divided in control operations and low-level data
+exchange operation.
+
+CONTROL OPERATIONS:
+
+Generic netlink is used to implement the interface to the control operations.
+The operations are composed by commands and events, all listed below:
+
+* NFC_CMD_GET_DEVICE - get specific device info or dump the device list
+* NFC_CMD_START_POLL - setup a specific device to polling for targets
+* NFC_CMD_STOP_POLL - stop the polling operation in a specific device
+* NFC_CMD_GET_TARGET - dump the list of targets found by a specific device
+
+* NFC_EVENT_DEVICE_ADDED - reports an NFC device addition
+* NFC_EVENT_DEVICE_REMOVED - reports an NFC device removal
+* NFC_EVENT_TARGETS_FOUND - reports START_POLL results when 1 or more targets
+are found
+
+The user must call START_POLL to poll for NFC targets, passing the desired NFC
+protocols through NFC_ATTR_PROTOCOLS attribute. The device remains in polling
+state until it finds any target. However, the user can stop the polling
+operation by calling STOP_POLL command. In this case, it will be checked if
+the requester of STOP_POLL is the same of START_POLL.
+
+If the polling operation finds one or more targets, the event TARGETS_FOUND is
+sent (including the device id). The user must call GET_TARGET to get the list of
+all targets found by such device. Each reply message has target attributes with
+relevant information such as the supported NFC protocols.
+
+All polling operations requested through one netlink socket are stopped when
+it's closed.
+
+LOW-LEVEL DATA EXCHANGE:
+
+The userspace must use PF_NFC sockets to perform any data communication with
+targets. All NFC sockets use AF_NFC:
+
+struct sockaddr_nfc {
+ sa_family_t sa_family;
+ __u32 dev_idx;
+ __u32 target_idx;
+ __u32 nfc_protocol;
+};
+
+To establish a connection with one target, the user must create an
+NFC_SOCKPROTO_RAW socket and call the 'connect' syscall with the sockaddr_nfc
+struct correctly filled. All information comes from NFC_EVENT_TARGETS_FOUND
+netlink event. As a target can support more than one NFC protocol, the user
+must inform which protocol it wants to use.
+
+Internally, 'connect' will result in an activate_target call to the driver.
+When the socket is closed, the target is deactivated.
+
+The data format exchanged through the sockets is NFC protocol dependent. For
+instance, when communicating with MIFARE tags, the data exchanged are MIFARE
+commands and their responses.
+
+The first received package is the response to the first sent package and so
+on. In order to allow valid "empty" responses, every data received has a NULL
+header of 1 byte.
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index 80a7a345490..57a24108b84 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -7,7 +7,7 @@ This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers
(Synopsys IP blocks); it has been fully tested on STLinux platforms.
Currently this network device driver is for all STM embedded MAC/GMAC
-(7xxx SoCs). Other platforms start using it i.e. ARM SPEAr.
+(i.e. 7xxx/5xxx SoCs) and it's known working on other platforms i.e. ARM SPEAr.
DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100
Universal version 4.0 have been used for developing the first code
@@ -71,7 +71,7 @@ Several performance tests on STM platforms showed this optimisation allows to sp
the CPU while having the maximum throughput.
4.4) WOL
-Wake up on Lan feature through Magic Frame is only supported for the GMAC
+Wake up on Lan feature through Magic and Unicast frames are supported for the GMAC
core.
4.5) DMA descriptors
@@ -91,11 +91,15 @@ LRO is not supported.
The driver is compatible with PAL to work with PHY and GPHY devices.
4.9) Platform information
-Several information came from the platform; please refer to the
-driver's Header file in include/linux directory.
+Several driver's information can be passed through the platform
+These are included in the include/linux/stmmac.h header file
+and detailed below as well:
-struct plat_stmmacenet_data {
+ struct plat_stmmacenet_data {
int bus_id;
+ int phy_addr;
+ int interface;
+ struct stmmac_mdio_bus_data *mdio_bus_data;
int pbl;
int clk_csr;
int has_gmac;
@@ -103,67 +107,135 @@ struct plat_stmmacenet_data {
int tx_coe;
int bugged_jumbo;
int pmt;
- void (*fix_mac_speed)(void *priv, unsigned int speed);
- void (*bus_setup)(unsigned long ioaddr);
-#ifdef CONFIG_STM_DRIVERS
- struct stm_pad_config *pad_config;
-#endif
- void *bsp_priv;
-};
+ int force_sf_dma_mode;
+ void (*fix_mac_speed)(void *priv, unsigned int speed);
+ void (*bus_setup)(void __iomem *ioaddr);
+ int (*init)(struct platform_device *pdev);
+ void (*exit)(struct platform_device *pdev);
+ void *bsp_priv;
+ };
Where:
-- pbl (Programmable Burst Length) is maximum number of
- beats to be transferred in one DMA transaction.
- GMAC also enables the 4xPBL by default.
-- fix_mac_speed and bus_setup are used to configure internal target
- registers (on STM platforms);
-- has_gmac: GMAC core is on board (get it at run-time in the next step);
-- bus_id: bus identifier.
-- tx_coe: core is able to perform the tx csum in HW.
-- enh_desc: if sets the MAC will use the enhanced descriptor structure.
-- clk_csr: CSR Clock range selection.
-- bugged_jumbo: some HWs are not able to perform the csum in HW for
- over-sized frames due to limited buffer sizes. Setting this
- flag the csum will be done in SW on JUMBO frames.
-
-struct plat_stmmacphy_data {
- int bus_id;
- int phy_addr;
- unsigned int phy_mask;
- int interface;
- int (*phy_reset)(void *priv);
- void *priv;
-};
+ o bus_id: bus identifier.
+ o phy_addr: the physical address can be passed from the platform.
+ If it is set to -1 the driver will automatically
+ detect it at run-time by probing all the 32 addresses.
+ o interface: PHY device's interface.
+ o mdio_bus_data: specific platform fields for the MDIO bus.
+ o pbl: the Programmable Burst Length is maximum number of beats to
+ be transferred in one DMA transaction.
+ GMAC also enables the 4xPBL by default.
+ o clk_csr: CSR Clock range selection.
+ o has_gmac: uses the GMAC core.
+ o enh_desc: if sets the MAC will use the enhanced descriptor structure.
+ o tx_coe: core is able to perform the tx csum in HW.
+ o bugged_jumbo: some HWs are not able to perform the csum in HW for
+ over-sized frames due to limited buffer sizes.
+ Setting this flag the csum will be done in SW on
+ JUMBO frames.
+ o pmt: core has the embedded power module (optional).
+ o force_sf_dma_mode: force DMA to use the Store and Forward mode
+ instead of the Threshold.
+ o fix_mac_speed: this callback is used for modifying some syscfg registers
+ (on ST SoCs) according to the link speed negotiated by the
+ physical layer .
+ o bus_setup: perform HW setup of the bus. For example, on some ST platforms
+ this field is used to configure the AMBA bridge to generate more
+ efficient STBus traffic.
+ o init/exit: callbacks used for calling a custom initialisation;
+ this is sometime necessary on some platforms (e.g. ST boxes)
+ where the HW needs to have set some PIO lines or system cfg
+ registers.
+ o custom_cfg: this is a custom configuration that can be passed while
+ initialising the resources.
+
+The we have:
+
+ struct stmmac_mdio_bus_data {
+ int bus_id;
+ int (*phy_reset)(void *priv);
+ unsigned int phy_mask;
+ int *irqs;
+ int probed_phy_irq;
+ };
Where:
-- bus_id: bus identifier;
-- phy_addr: physical address used for the attached phy device;
- set it to -1 to get it at run-time;
-- interface: physical MII interface mode;
-- phy_reset: hook to reset HW function.
-
-SOURCES:
-- Kconfig
-- Makefile
-- stmmac_main.c: main network device driver;
-- stmmac_mdio.c: mdio functions;
-- stmmac_ethtool.c: ethtool support;
-- stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts
- Only tested on ST40 platforms based.
-- stmmac.h: private driver structure;
-- common.h: common definitions and VFTs;
-- descs.h: descriptor structure definitions;
-- dwmac1000_core.c: GMAC core functions;
-- dwmac1000_dma.c: dma functions for the GMAC chip;
-- dwmac1000.h: specific header file for the GMAC;
-- dwmac100_core: MAC 100 core and dma code;
-- dwmac100_dma.c: dma funtions for the MAC chip;
-- dwmac1000.h: specific header file for the MAC;
-- dwmac_lib.c: generic DMA functions shared among chips
-- enh_desc.c: functions for handling enhanced descriptors
-- norm_desc.c: functions for handling normal descriptors
-
-TODO:
-- XGMAC controller is not supported.
-- Review the timer optimisation code to use an embedded device that seems to be
+ o bus_id: bus identifier;
+ o phy_reset: hook to reset the phy device attached to the bus.
+ o phy_mask: phy mask passed when register the MDIO bus within the driver.
+ o irqs: list of IRQs, one per PHY.
+ o probed_phy_irq: if irqs is NULL, use this for probed PHY.
+
+Below an example how the structures above are using on ST platforms.
+
+ static struct plat_stmmacenet_data stxYYY_ethernet_platform_data = {
+ .pbl = 32,
+ .has_gmac = 0,
+ .enh_desc = 0,
+ .fix_mac_speed = stxYYY_ethernet_fix_mac_speed,
+ |
+ |-> to write an internal syscfg
+ | on this platform when the
+ | link speed changes from 10 to
+ | 100 and viceversa
+ .init = &stmmac_claim_resource,
+ |
+ |-> On ST SoC this calls own "PAD"
+ | manager framework to claim
+ | all the resources necessary
+ | (GPIO ...). The .custom_cfg field
+ | is used to pass a custom config.
+};
+
+Below the usage of the stmmac_mdio_bus_data: on this SoC, in fact,
+there are two MAC cores: one MAC is for MDIO Bus/PHY emulation
+with fixed_link support.
+
+static struct stmmac_mdio_bus_data stmmac1_mdio_bus = {
+ .bus_id = 1,
+ |
+ |-> phy device on the bus_id 1
+ .phy_reset = phy_reset;
+ |
+ |-> function to provide the phy_reset on this board
+ .phy_mask = 0,
+};
+
+static struct fixed_phy_status stmmac0_fixed_phy_status = {
+ .link = 1,
+ .speed = 100,
+ .duplex = 1,
+};
+
+During the board's device_init we can configure the first
+MAC for fixed_link by calling:
+ fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status));)
+and the second one, with a real PHY device attached to the bus,
+by using the stmmac_mdio_bus_data structure (to provide the id, the
+reset procedure etc).
+
+4.10) List of source files:
+ o Kconfig
+ o Makefile
+ o stmmac_main.c: main network device driver;
+ o stmmac_mdio.c: mdio functions;
+ o stmmac_ethtool.c: ethtool support;
+ o stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts
+ Only tested on ST40 platforms based.
+ o stmmac.h: private driver structure;
+ o common.h: common definitions and VFTs;
+ o descs.h: descriptor structure definitions;
+ o dwmac1000_core.c: GMAC core functions;
+ o dwmac1000_dma.c: dma functions for the GMAC chip;
+ o dwmac1000.h: specific header file for the GMAC;
+ o dwmac100_core: MAC 100 core and dma code;
+ o dwmac100_dma.c: dma funtions for the MAC chip;
+ o dwmac1000.h: specific header file for the MAC;
+ o dwmac_lib.c: generic DMA functions shared among chips
+ o enh_desc.c: functions for handling enhanced descriptors
+ o norm_desc.c: functions for handling normal descriptors
+
+5) TODO:
+ o XGMAC is not supported.
+ o Review the timer optimisation code to use an embedded device that will be
available in new chip generations.
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 88880839ece..64565aac6e4 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -520,59 +520,20 @@ Support for power domains is provided through the pwr_domain field of struct
device. This field is a pointer to an object of type struct dev_power_domain,
defined in include/linux/pm.h, providing a set of power management callbacks
analogous to the subsystem-level and device driver callbacks that are executed
-for the given device during all power transitions, in addition to the respective
-subsystem-level callbacks. Specifically, the power domain "suspend" callbacks
-(i.e. ->runtime_suspend(), ->suspend(), ->freeze(), ->poweroff(), etc.) are
-executed after the analogous subsystem-level callbacks, while the power domain
-"resume" callbacks (i.e. ->runtime_resume(), ->resume(), ->thaw(), ->restore,
-etc.) are executed before the analogous subsystem-level callbacks. Error codes
-returned by the "suspend" and "resume" power domain callbacks are ignored.
-
-Power domain ->runtime_idle() callback is executed before the subsystem-level
-->runtime_idle() callback and the result returned by it is not ignored. Namely,
-if it returns error code, the subsystem-level ->runtime_idle() callback will not
-be called and the helper function rpm_idle() executing it will return error
-code. This mechanism is intended to help platforms where saving device state
-is a time consuming operation and should only be carried out if all devices
-in the power domain are idle, before turning off the shared power resource(s).
-Namely, the power domain ->runtime_idle() callback may return error code until
-the pm_runtime_idle() helper (or its asychronous version) has been called for
-all devices in the power domain (it is recommended that the returned error code
-be -EBUSY in those cases), preventing the subsystem-level ->runtime_idle()
-callback from being run prematurely.
-
-The support for device power domains is only relevant to platforms needing to
-use the same subsystem-level (e.g. platform bus type) and device driver power
-management callbacks in many different power domain configurations and wanting
-to avoid incorporating the support for power domains into the subsystem-level
-callbacks. The other platforms need not implement it or take it into account
-in any way.
-
-
-System Devices
---------------
-System devices (sysdevs) follow a slightly different API, which can be found in
-
- include/linux/sysdev.h
- drivers/base/sys.c
-
-System devices will be suspended with interrupts disabled, and after all other
-devices have been suspended. On resume, they will be resumed before any other
-devices, and also with interrupts disabled. These things occur in special
-"sysdev_driver" phases, which affect only system devices.
-
-Thus, after the suspend_noirq (or freeze_noirq or poweroff_noirq) phase, when
-the non-boot CPUs are all offline and IRQs are disabled on the remaining online
-CPU, then a sysdev_driver.suspend phase is carried out, and the system enters a
-sleep state (or a system image is created). During resume (or after the image
-has been created or loaded) a sysdev_driver.resume phase is carried out, IRQs
-are enabled on the only online CPU, the non-boot CPUs are enabled, and the
-resume_noirq (or thaw_noirq or restore_noirq) phase begins.
-
-Code to actually enter and exit the system-wide low power state sometimes
-involves hardware details that are only known to the boot firmware, and
-may leave a CPU running software (from SRAM or flash memory) that monitors
-the system and manages its wakeup sequence.
+for the given device during all power transitions, instead of the respective
+subsystem-level callbacks. Specifically, if a device's pm_domain pointer is
+not NULL, the ->suspend() callback from the object pointed to by it will be
+executed instead of its subsystem's (e.g. bus type's) ->suspend() callback and
+anlogously for all of the remaining callbacks. In other words, power management
+domain callbacks, if defined for the given device, always take precedence over
+the callbacks provided by the device's subsystem (e.g. bus type).
+
+The support for device power management domains is only relevant to platforms
+needing to use the same device driver power management callbacks in many
+different power domain configurations and wanting to avoid incorporating the
+support for power domains into subsystem-level callbacks, for example by
+modifying the platform bus type. Other platforms need not implement it or take
+it into account in any way.
Device Low Power (suspend) States
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 654097b130b..b24875b1ced 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -501,13 +501,29 @@ helper functions described in Section 4. In that case, pm_runtime_resume()
should be used. Of course, for this purpose the device's run-time PM has to be
enabled earlier by calling pm_runtime_enable().
-If the device bus type's or driver's ->probe() or ->remove() callback runs
+If the device bus type's or driver's ->probe() callback runs
pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts,
they will fail returning -EAGAIN, because the device's usage counter is
-incremented by the core before executing ->probe() and ->remove(). Still, it
-may be desirable to suspend the device as soon as ->probe() or ->remove() has
-finished, so the PM core uses pm_runtime_idle_sync() to invoke the
-subsystem-level idle callback for the device at that time.
+incremented by the driver core before executing ->probe(). Still, it may be
+desirable to suspend the device as soon as ->probe() has finished, so the driver
+core uses pm_runtime_put_sync() to invoke the subsystem-level idle callback for
+the device at that time.
+
+Moreover, the driver core prevents runtime PM callbacks from racing with the bus
+notifier callback in __device_release_driver(), which is necessary, because the
+notifier is used by some subsystems to carry out operations affecting the
+runtime PM functionality. It does so by calling pm_runtime_get_sync() before
+driver_sysfs_remove() and the BUS_NOTIFY_UNBIND_DRIVER notifications. This
+resumes the device if it's in the suspended state and prevents it from
+being suspended again while those routines are being executed.
+
+To allow bus types and drivers to put devices into the suspended state by
+calling pm_runtime_suspend() from their ->remove() routines, the driver core
+executes pm_runtime_put_sync() after running the BUS_NOTIFY_UNBIND_DRIVER
+notifications in __device_release_driver(). This requires bus types and
+drivers to make their ->remove() callbacks avoid races with runtime PM directly,
+but also it allows of more flexibility in the handling of devices during the
+removal of their drivers.
The user space can effectively disallow the driver of the device to power manage
it at run time by changing the value of its /sys/devices/.../power/control
@@ -566,11 +582,6 @@ to do this is:
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
-The PM core always increments the run-time usage counter before calling the
-->prepare() callback and decrements it after calling the ->complete() callback.
-Hence disabling run-time PM temporarily like this will not cause any run-time
-suspend callbacks to be lost.
-
7. Generic subsystem callbacks
Subsystems may wish to conserve code space by using the set of generic power
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 1b5a5ddbc3e..5df176ed59b 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -9,7 +9,121 @@ If variable is of Type, use printk format specifier:
size_t %zu or %zx
ssize_t %zd or %zx
-Raw pointer value SHOULD be printed with %p.
+Raw pointer value SHOULD be printed with %p. The kernel supports
+the following extended format specifiers for pointer types:
+
+Symbols/Function Pointers:
+
+ %pF versatile_init+0x0/0x110
+ %pf versatile_init
+ %pS versatile_init+0x0/0x110
+ %ps versatile_init
+ %pB prev_fn_of_versatile_init+0x88/0x88
+
+ For printing symbols and function pointers. The 'S' and 's' specifiers
+ result in the symbol name with ('S') or without ('s') offsets. Where
+ this is used on a kernel without KALLSYMS - the symbol address is
+ printed instead.
+
+ The 'B' specifier results in the symbol name with offsets and should be
+ used when printing stack backtraces. The specifier takes into
+ consideration the effect of compiler optimisations which may occur
+ when tail-call's are used and marked with the noreturn GCC attribute.
+
+ On ia64, ppc64 and parisc64 architectures function pointers are
+ actually function descriptors which must first be resolved. The 'F' and
+ 'f' specifiers perform this resolution and then provide the same
+ functionality as the 'S' and 's' specifiers.
+
+Kernel Pointers:
+
+ %pK 0x01234567 or 0x0123456789abcdef
+
+ For printing kernel pointers which should be hidden from unprivileged
+ users. The behaviour of %pK depends on the kptr_restrict sysctl - see
+ Documentation/sysctl/kernel.txt for more details.
+
+Struct Resources:
+
+ %pr [mem 0x60000000-0x6fffffff flags 0x2200] or
+ [mem 0x0000000060000000-0x000000006fffffff flags 0x2200]
+ %pR [mem 0x60000000-0x6fffffff pref] or
+ [mem 0x0000000060000000-0x000000006fffffff pref]
+
+ For printing struct resources. The 'R' and 'r' specifiers result in a
+ printed resource with ('R') or without ('r') a decoded flags member.
+
+MAC/FDDI addresses:
+
+ %pM 00:01:02:03:04:05
+ %pMF 00-01-02-03-04-05
+ %pm 000102030405
+
+ For printing 6-byte MAC/FDDI addresses in hex notation. The 'M' and 'm'
+ specifiers result in a printed address with ('M') or without ('m') byte
+ separators. The default byte separator is the colon (':').
+
+ Where FDDI addresses are concerned the 'F' specifier can be used after
+ the 'M' specifier to use dash ('-') separators instead of the default
+ separator.
+
+IPv4 addresses:
+
+ %pI4 1.2.3.4
+ %pi4 001.002.003.004
+ %p[Ii][hnbl]
+
+ For printing IPv4 dot-separated decimal addresses. The 'I4' and 'i4'
+ specifiers result in a printed address with ('i4') or without ('I4')
+ leading zeros.
+
+ The additional 'h', 'n', 'b', and 'l' specifiers are used to specify
+ host, network, big or little endian order addresses respectively. Where
+ no specifier is provided the default network/big endian order is used.
+
+IPv6 addresses:
+
+ %pI6 0001:0002:0003:0004:0005:0006:0007:0008
+ %pi6 00010002000300040005000600070008
+ %pI6c 1:2:3:4:5:6:7:8
+
+ For printing IPv6 network-order 16-bit hex addresses. The 'I6' and 'i6'
+ specifiers result in a printed address with ('I6') or without ('i6')
+ colon-separators. Leading zeros are always used.
+
+ The additional 'c' specifier can be used with the 'I' specifier to
+ print a compressed IPv6 address as described by
+ http://tools.ietf.org/html/rfc5952
+
+UUID/GUID addresses:
+
+ %pUb 00010203-0405-0607-0809-0a0b0c0d0e0f
+ %pUB 00010203-0405-0607-0809-0A0B0C0D0E0F
+ %pUl 03020100-0504-0706-0809-0a0b0c0e0e0f
+ %pUL 03020100-0504-0706-0809-0A0B0C0E0E0F
+
+ For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L',
+ 'b' and 'B' specifiers are used to specify a little endian order in
+ lower ('l') or upper case ('L') hex characters - and big endian order
+ in lower ('b') or upper case ('B') hex characters.
+
+ Where no additional specifiers are used the default little endian
+ order with lower case hex characters will be printed.
+
+struct va_format:
+
+ %pV
+
+ For printing struct va_format structures. These contain a format string
+ and va_list as follows:
+
+ struct va_format {
+ const char *fmt;
+ va_list *va;
+ };
+
+ Do not use this feature without some mechanism to verify the
+ correctness of the format string and va_list arguments.
u64 SHOULD be printed with %llu/%llx, (unsigned long long):
@@ -32,4 +146,5 @@ Reminder: sizeof() result is of type size_t.
Thank you for your cooperation and attention.
-By Randy Dunlap <rdunlap@xenotime.net>
+By Randy Dunlap <rdunlap@xenotime.net> and
+Andrew Murray <amurray@mpc-data.co.uk>
diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt
index 99961993257..91ecff07ced 100644
--- a/Documentation/scheduler/sched-design-CFS.txt
+++ b/Documentation/scheduler/sched-design-CFS.txt
@@ -223,9 +223,10 @@ When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each
group created using the pseudo filesystem. See example steps below to create
task groups and modify their CPU share using the "cgroups" pseudo filesystem.
- # mkdir /dev/cpuctl
- # mount -t cgroup -ocpu none /dev/cpuctl
- # cd /dev/cpuctl
+ # mount -t tmpfs cgroup_root /sys/fs/cgroup
+ # mkdir /sys/fs/cgroup/cpu
+ # mount -t cgroup -ocpu none /sys/fs/cgroup/cpu
+ # cd /sys/fs/cgroup/cpu
# mkdir multimedia # create "multimedia" group of tasks
# mkdir browser # create "browser" group of tasks
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index 605b0d40329..71b54d54998 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -129,9 +129,8 @@ priority!
Enabling CONFIG_RT_GROUP_SCHED lets you explicitly allocate real
CPU bandwidth to task groups.
-This uses the /cgroup virtual file system and
-"/cgroup/<cgroup>/cpu.rt_runtime_us" to control the CPU time reserved for each
-control group.
+This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us"
+to control the CPU time reserved for each control group.
For more information on working with control groups, you should read
Documentation/cgroups/cgroups.txt as well.
@@ -150,7 +149,7 @@ For now, this can be simplified to just the following (but see Future plans):
===============
There is work in progress to make the scheduling period for each group
-("/cgroup/<cgroup>/cpu.rt_period_us") configurable as well.
+("<cgroup>/cpu.rt_period_us") configurable as well.
The constraint on the period is that a subgroup must have a smaller or
equal period to its parent. But realistically its not very useful _yet_
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index 2e3c64b1a6a..9dbe885ecd8 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -13,18 +13,8 @@ static DEFINE_SPINLOCK(xxx_lock);
The above is always safe. It will disable interrupts _locally_, but the
spinlock itself will guarantee the global lock, so it will guarantee that
there is only one thread-of-control within the region(s) protected by that
-lock. This works well even under UP. The above sequence under UP
-essentially is just the same as doing
-
- unsigned long flags;
-
- save_flags(flags); cli();
- ... critical section ...
- restore_flags(flags);
-
-so the code does _not_ need to worry about UP vs SMP issues: the spinlocks
-work correctly under both (and spinlocks are actually more efficient on
-architectures that allow doing the "save_flags + cli" in one operation).
+lock. This works well even under UP also, so the code does _not_ need to
+worry about UP vs SMP issues: the spinlocks work correctly under both.
NOTE! Implications of spin_locks for memory are further described in:
@@ -36,27 +26,7 @@ The above is usually pretty simple (you usually need and want only one
spinlock for most things - using more than one spinlock can make things a
lot more complex and even slower and is usually worth it only for
sequences that you _know_ need to be split up: avoid it at all cost if you
-aren't sure). HOWEVER, it _does_ mean that if you have some code that does
-
- cli();
- .. critical section ..
- sti();
-
-and another sequence that does
-
- spin_lock_irqsave(flags);
- .. critical section ..
- spin_unlock_irqrestore(flags);
-
-then they are NOT mutually exclusive, and the critical regions can happen
-at the same time on two different CPU's. That's fine per se, but the
-critical regions had better be critical for different things (ie they
-can't stomp on each other).
-
-The above is a problem mainly if you end up mixing code - for example the
-routines in ll_rw_block() tend to use cli/sti to protect the atomicity of
-their actions, and if a driver uses spinlocks instead then you should
-think about issues like the above.
+aren't sure).
This is really the only really hard part about spinlocks: once you start
using spinlocks they tend to expand to areas you might not have noticed
@@ -120,11 +90,10 @@ Lesson 3: spinlocks revisited.
The single spin-lock primitives above are by no means the only ones. They
are the most safe ones, and the ones that work under all circumstances,
-but partly _because_ they are safe they are also fairly slow. They are
-much faster than a generic global cli/sti pair, but slower than they'd
-need to be, because they do have to disable interrupts (which is just a
-single instruction on a x86, but it's an expensive one - and on other
-architectures it can be worse).
+but partly _because_ they are safe they are also fairly slow. They are slower
+than they'd need to be, because they do have to disable interrupts
+(which is just a single instruction on a x86, but it's an expensive one -
+and on other architectures it can be worse).
If you have a case where you have to protect a data structure across
several CPU's and you want to use spinlocks you can potentially use
diff --git a/Documentation/usb/error-codes.txt b/Documentation/usb/error-codes.txt
index d83703ea74b..b3f606b81a0 100644
--- a/Documentation/usb/error-codes.txt
+++ b/Documentation/usb/error-codes.txt
@@ -76,6 +76,13 @@ A transfer's actual_length may be positive even when an error has been
reported. That's because transfers often involve several packets, so that
one or more packets could finish before an error stops further endpoint I/O.
+For isochronous URBs, the urb status value is non-zero only if the URB is
+unlinked, the device is removed, the host controller is disabled, or the total
+transferred length is less than the requested length and the URB_SHORT_NOT_OK
+flag is set. Completion handlers for isochronous URBs should only see
+urb->status set to zero, -ENOENT, -ECONNRESET, -ESHUTDOWN, or -EREMOTEIO.
+Individual frame descriptor status fields may report more status codes.
+
0 Transfer completed successfully
@@ -132,7 +139,7 @@ one or more packets could finish before an error stops further endpoint I/O.
device removal events immediately.
-EXDEV ISO transfer only partially completed
- look at individual frame status for details
+ (only set in iso_frame_desc[n].status, not urb->status)
-EINVAL ISO madness, if this happens: Log off and go home
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c
index cd9d6af61d0..043bd7df313 100644
--- a/Documentation/virtual/lguest/lguest.c
+++ b/Documentation/virtual/lguest/lguest.c
@@ -51,7 +51,7 @@
#include <asm/bootparam.h>
#include "../../../include/linux/lguest_launcher.h"
/*L:110
- * We can ignore the 42 include files we need for this program, but I do want
+ * We can ignore the 43 include files we need for this program, but I do want
* to draw attention to the use of kernel-style types.
*
* As Linus said, "C is a Spartan language, and so should your naming be." I
@@ -65,7 +65,6 @@ typedef uint16_t u16;
typedef uint8_t u8;
/*:*/
-#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
#define BRIDGE_PFX "bridge:"
#ifndef SIOCBRADDIF
#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
@@ -861,8 +860,10 @@ static void console_output(struct virtqueue *vq)
/* writev can return a partial write, so we loop here. */
while (!iov_empty(iov, out)) {
int len = writev(STDOUT_FILENO, iov, out);
- if (len <= 0)
- err(1, "Write to stdout gave %i", len);
+ if (len <= 0) {
+ warn("Write to stdout gave %i (%d)", len, errno);
+ break;
+ }
iov_consume(iov, out, len);
}
@@ -898,7 +899,7 @@ static void net_output(struct virtqueue *vq)
* same format: what a coincidence!
*/
if (writev(net_info->tunfd, iov, out) < 0)
- errx(1, "Write to tun failed?");
+ warnx("Write to tun failed (%d)?", errno);
/*
* Done with that one; wait_for_vq_desc() will send the interrupt if
@@ -955,7 +956,7 @@ static void net_input(struct virtqueue *vq)
*/
len = readv(net_info->tunfd, iov, in);
if (len <= 0)
- err(1, "Failed to read from tun.");
+ warn("Failed to read from tun (%d).", errno);
/*
* Mark that packet buffer as used, but don't interrupt here. We want
@@ -1093,9 +1094,10 @@ static void update_device_status(struct device *dev)
warnx("Device %s configuration FAILED", dev->name);
if (dev->running)
reset_device(dev);
- } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
- if (!dev->running)
- start_device(dev);
+ } else {
+ if (dev->running)
+ err(1, "Device %s features finalized twice", dev->name);
+ start_device(dev);
}
}
@@ -1120,25 +1122,11 @@ static void handle_output(unsigned long addr)
return;
}
- /*
- * Devices *can* be used before status is set to DRIVER_OK.
- * The original plan was that they would never do this: they
- * would always finish setting up their status bits before
- * actually touching the virtqueues. In practice, we allowed
- * them to, and they do (eg. the disk probes for partition
- * tables as part of initialization).
- *
- * If we see this, we start the device: once it's running, we
- * expect the device to catch all the notifications.
- */
+ /* Devices should not be used before features are finalized. */
for (vq = i->vq; vq; vq = vq->next) {
if (addr != vq->config.pfn*getpagesize())
continue;
- if (i->running)
- errx(1, "Notification on running %s", i->name);
- /* This just calls create_thread() for each virtqueue */
- start_device(i);
- return;
+ errx(1, "Notification on %s before setup!", i->name);
}
}
@@ -1370,7 +1358,7 @@ static void setup_console(void)
* --sharenet=<name> option which opens or creates a named pipe. This can be
* used to send packets to another guest in a 1:1 manner.
*
- * More sopisticated is to use one of the tools developed for project like UML
+ * More sophisticated is to use one of the tools developed for project like UML
* to do networking.
*
* Faster is to do virtio bonding in kernel. Doing this 1:1 would be
@@ -1380,7 +1368,7 @@ static void setup_console(void)
* multiple inter-guest channels behind one interface, although it would
* require some manner of hotplugging new virtio channels.
*
- * Finally, we could implement a virtio network switch in the kernel.
+ * Finally, we could use a virtio network switch in the kernel, ie. vhost.
:*/
static u32 str2ip(const char *ipaddr)
@@ -2017,10 +2005,7 @@ int main(int argc, char *argv[])
/* Tell the entry path not to try to reload segment registers. */
boot->hdr.loadflags |= KEEP_SEGMENTS;
- /*
- * We tell the kernel to initialize the Guest: this returns the open
- * /dev/lguest file descriptor.
- */
+ /* We tell the kernel to initialize the Guest. */
tell_kernel(start);
/* Ensure that we terminate if a device-servicing child dies. */
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt
index 12f9ba20ccb..55006846660 100644
--- a/Documentation/vm/hwpoison.txt
+++ b/Documentation/vm/hwpoison.txt
@@ -129,12 +129,12 @@ Limit injection to pages owned by memgroup. Specified by inode number
of the memcg.
Example:
- mkdir /cgroup/hwpoison
+ mkdir /sys/fs/cgroup/mem/hwpoison
usemem -m 100 -s 1000 &
- echo `jobs -p` > /cgroup/hwpoison/tasks
+ echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks
- memcg_ino=$(ls -id /cgroup/hwpoison | cut -f1 -d' ')
+ memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ')
echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg
page-types -p `pidof init` --hwpoison # shall do nothing
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 9b7221a86df..7c3a8801b7c 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -674,7 +674,7 @@ Protocol: 2.10+
Field name: init_size
Type: read
-Offset/size: 0x25c/4
+Offset/size: 0x260/4
This field indicates the amount of linear contiguous memory starting
at the kernel runtime start address that the kernel needs before it