diff options
Diffstat (limited to 'Documentation')
78 files changed, 3264 insertions, 771 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss index 0a92a7c93a6..4f29e5f1ebf 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss @@ -31,3 +31,31 @@ Date: March 2009 Kernel Version: 2.6.30 Contact: iss_storagedev@hp.com Description: A symbolic link to /sys/block/cciss!cXdY + +Where: /sys/bus/pci/devices/<dev>/ccissX/rescan +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Kicks of a rescan of the controller to discover logical + drive topology changes. + +Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Displays the 8-byte LUN ID used to address logical + drive Y of controller X. + +Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Displays the RAID level of logical drive Y of + controller X. + +Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Displays the usage count (number of opens) of logical drive Y + of controller X. diff --git a/Documentation/ABI/testing/sysfs-class-usb_host b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc index 46b66ad1f1b..4e8106f7cfd 100644 --- a/Documentation/ABI/testing/sysfs-class-usb_host +++ b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc @@ -1,4 +1,4 @@ -What: /sys/class/usb_host/usb_hostN/wusb_chid +What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_chid Date: July 2008 KernelVersion: 2.6.27 Contact: David Vrabel <david.vrabel@csr.com> @@ -9,7 +9,7 @@ Description: Set an all zero CHID to stop the host controller. -What: /sys/class/usb_host/usb_hostN/wusb_trust_timeout +What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_trust_timeout Date: July 2008 KernelVersion: 2.6.27 Contact: David Vrabel <david.vrabel@csr.com> diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable deleted file mode 100644 index 175bb4f7051..00000000000 --- a/Documentation/ABI/testing/sysfs-devices-cache_disable +++ /dev/null @@ -1,18 +0,0 @@ -What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X -Date: August 2008 -KernelVersion: 2.6.27 -Contact: mark.langsdorf@amd.com -Description: These files exist in every cpu's cache index directories. - There are currently 2 cache_disable_# files in each - directory. Reading from these files on a supported - processor will return that cache disable index value - for that processor and node. Writing to one of these - files will cause the specificed cache index to be disabled. - - Currently, only AMD Family 10h Processors support cache index - disable, and only for their L3 caches. See the BIOS and - Kernel Developer's Guide at - http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf - for formatting information and other details on the - cache index disable. -Users: joachim.deguara@amd.com diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu new file mode 100644 index 00000000000..a703b9e9aeb --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -0,0 +1,156 @@ +What: /sys/devices/system/cpu/ +Date: pre-git history +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: + A collection of both global and individual CPU attributes + + Individual CPU attributes are contained in subdirectories + named by the kernel's logical CPU number, e.g.: + + /sys/devices/system/cpu/cpu#/ + +What: /sys/devices/system/cpu/sched_mc_power_savings + /sys/devices/system/cpu/sched_smt_power_savings +Date: June 2006 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: Discover and adjust the kernel's multi-core scheduler support. + + Possible values are: + + 0 - No power saving load balance (default value) + 1 - Fill one thread/core/package first for long running threads + 2 - Also bias task wakeups to semi-idle cpu package for power + savings + + sched_mc_power_savings is dependent upon SCHED_MC, which is + itself architecture dependent. + + sched_smt_power_savings is dependent upon SCHED_SMT, which + is itself architecture dependent. + + The two files are independent of each other. It is possible + that one file may be present without the other. + + Introduced by git commit 5c45bf27. + + +What: /sys/devices/system/cpu/kernel_max + /sys/devices/system/cpu/offline + /sys/devices/system/cpu/online + /sys/devices/system/cpu/possible + /sys/devices/system/cpu/present +Date: December 2008 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: CPU topology files that describe kernel limits related to + hotplug. Briefly: + + kernel_max: the maximum cpu index allowed by the kernel + configuration. + + offline: cpus that are not online because they have been + HOTPLUGGED off or exceed the limit of cpus allowed by the + kernel configuration (kernel_max above). + + online: cpus that are online and being scheduled. + + possible: cpus that have been allocated resources and can be + brought online if they are present. + + present: cpus that have been identified as being present in + the system. + + See Documentation/cputopology.txt for more information. + + + +What: /sys/devices/system/cpu/cpu#/node +Date: October 2009 +Contact: Linux memory management mailing list <linux-mm@kvack.org> +Description: Discover NUMA node a CPU belongs to + + When CONFIG_NUMA is enabled, a symbolic link that points + to the corresponding NUMA node directory. + + For example, the following symlink is created for cpu42 + in NUMA node 2: + + /sys/devices/system/cpu/cpu42/node2 -> ../../node/node2 + + +What: /sys/devices/system/cpu/cpu#/topology/core_id + /sys/devices/system/cpu/cpu#/topology/core_siblings + /sys/devices/system/cpu/cpu#/topology/core_siblings_list + /sys/devices/system/cpu/cpu#/topology/physical_package_id + /sys/devices/system/cpu/cpu#/topology/thread_siblings + /sys/devices/system/cpu/cpu#/topology/thread_siblings_list +Date: December 2008 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: CPU topology files that describe a logical CPU's relationship + to other cores and threads in the same physical package. + + One cpu# directory is created per logical CPU in the system, + e.g. /sys/devices/system/cpu/cpu42/. + + Briefly, the files above are: + + core_id: the CPU core ID of cpu#. Typically it is the + hardware platform's identifier (rather than the kernel's). + The actual value is architecture and platform dependent. + + core_siblings: internal kernel map of cpu#'s hardware threads + within the same physical_package_id. + + core_siblings_list: human-readable list of the logical CPU + numbers within the same physical_package_id as cpu#. + + physical_package_id: physical package id of cpu#. Typically + corresponds to a physical socket number, but the actual value + is architecture and platform dependent. + + thread_siblings: internel kernel map of cpu#'s hardware + threads within the same core as cpu# + + thread_siblings_list: human-readable list of cpu#'s hardware + threads within the same core as cpu# + + See Documentation/cputopology.txt for more information. + + +What: /sys/devices/system/cpu/cpuidle/current_driver + /sys/devices/system/cpu/cpuidle/current_governer_ro +Date: September 2007 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: Discover cpuidle policy and mechanism + + Various CPUs today support multiple idle levels that are + differentiated by varying exit latencies and power + consumption during idle. + + Idle policy (governor) is differentiated from idle mechanism + (driver) + + current_driver: displays current idle mechanism + + current_governor_ro: displays current idle policy + + See files in Documentation/cpuidle/ for more information. + + +What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X +Date: August 2008 +KernelVersion: 2.6.27 +Contact: mark.langsdorf@amd.com +Description: These files exist in every cpu's cache index directories. + There are currently 2 cache_disable_# files in each + directory. Reading from these files on a supported + processor will return that cache disable index value + for that processor and node. Writing to one of these + files will cause the specificed cache index to be disabled. + + Currently, only AMD Family 10h Processors support cache index + disable, and only for their L3 caches. See the BIOS and + Kernel Developer's Guide at + http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf + for formatting information and other details on the + cache index disable. +Users: joachim.deguara@amd.com diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl index b0756d0fd57..8bca1d5cec0 100644 --- a/Documentation/DocBook/tracepoint.tmpl +++ b/Documentation/DocBook/tracepoint.tmpl @@ -86,4 +86,9 @@ !Iinclude/trace/events/irq.h </chapter> + <chapter id="signal"> + <title>SIGNAL</title> +!Iinclude/trace/events/signal.h + </chapter> + </book> diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 187bbf10c92..8608fd85e92 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -1,185 +1,10 @@ CONFIG_RCU_TRACE debugfs Files and Formats -The rcupreempt and rcutree implementations of RCU provide debugfs trace -output that summarizes counters and state. This information is useful for -debugging RCU itself, and can sometimes also help to debug abuses of RCU. -Note that the rcuclassic implementation of RCU does not provide debugfs -trace output. - -The following sections describe the debugfs files and formats for -preemptable RCU (rcupreempt) and hierarchical RCU (rcutree). - - -Preemptable RCU debugfs Files and Formats - -This implementation of RCU provides three debugfs files under the -top-level directory RCU: rcu/rcuctrs (which displays the per-CPU -counters used by preemptable RCU) rcu/rcugp (which displays grace-period -counters), and rcu/rcustats (which internal counters for debugging RCU). - -The output of "cat rcu/rcuctrs" looks as follows: - -CPU last cur F M - 0 5 -5 0 0 - 1 -1 0 0 0 - 2 0 1 0 0 - 3 0 1 0 0 - 4 0 1 0 0 - 5 0 1 0 0 - 6 0 2 0 0 - 7 0 -1 0 0 - 8 0 1 0 0 -ggp = 26226, state = waitzero - -The per-CPU fields are as follows: - -o "CPU" gives the CPU number. Offline CPUs are not displayed. - -o "last" gives the value of the counter that is being decremented - for the current grace period phase. In the example above, - the counters sum to 4, indicating that there are still four - RCU read-side critical sections still running that started - before the last counter flip. - -o "cur" gives the value of the counter that is currently being - both incremented (by rcu_read_lock()) and decremented (by - rcu_read_unlock()). In the example above, the counters sum to - 1, indicating that there is only one RCU read-side critical section - still running that started after the last counter flip. - -o "F" indicates whether RCU is waiting for this CPU to acknowledge - a counter flip. In the above example, RCU is not waiting on any, - which is consistent with the state being "waitzero" rather than - "waitack". - -o "M" indicates whether RCU is waiting for this CPU to execute a - memory barrier. In the above example, RCU is not waiting on any, - which is consistent with the state being "waitzero" rather than - "waitmb". - -o "ggp" is the global grace-period counter. - -o "state" is the RCU state, which can be one of the following: - - o "idle": there is no grace period in progress. - - o "waitack": RCU just incremented the global grace-period - counter, which has the effect of reversing the roles of - the "last" and "cur" counters above, and is waiting for - all the CPUs to acknowledge the flip. Once the flip has - been acknowledged, CPUs will no longer be incrementing - what are now the "last" counters, so that their sum will - decrease monotonically down to zero. - - o "waitzero": RCU is waiting for the sum of the "last" counters - to decrease to zero. - - o "waitmb": RCU is waiting for each CPU to execute a memory - barrier, which ensures that instructions from a given CPU's - last RCU read-side critical section cannot be reordered - with instructions following the memory-barrier instruction. - -The output of "cat rcu/rcugp" looks as follows: - -oldggp=48870 newggp=48873 - -Note that reading from this file provokes a synchronize_rcu(). The -"oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before -executing the synchronize_rcu(), and the "newggp" value is also the -"ggp" value, but taken after the synchronize_rcu() command returns. - - -The output of "cat rcu/rcugp" looks as follows: - -na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871 -1=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640 -z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639 - -These are counters tracking internal preemptable-RCU events, however, -some of them may be useful for debugging algorithms using RCU. In -particular, the "nl", "wl", and "dl" values track the number of RCU -callbacks in various states. The fields are as follows: - -o "na" is the total number of RCU callbacks that have been enqueued - since boot. - -o "nl" is the number of RCU callbacks waiting for the previous - grace period to end so that they can start waiting on the next - grace period. - -o "wa" is the total number of RCU callbacks that have started waiting - for a grace period since boot. "na" should be roughly equal to - "nl" plus "wa". - -o "wl" is the number of RCU callbacks currently waiting for their - grace period to end. - -o "da" is the total number of RCU callbacks whose grace periods - have completed since boot. "wa" should be roughly equal to - "wl" plus "da". - -o "dr" is the total number of RCU callbacks that have been removed - from the list of callbacks ready to invoke. "dr" should be roughly - equal to "da". - -o "di" is the total number of RCU callbacks that have been invoked - since boot. "di" should be roughly equal to "da", though some - early versions of preemptable RCU had a bug so that only the - last CPU's count of invocations was displayed, rather than the - sum of all CPU's counts. - -o "1" is the number of calls to rcu_try_flip(). This should be - roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1" - described below. In other words, the number of times that - the state machine is visited should be equal to the sum of the - number of times that each state is visited plus the number of - times that the state-machine lock acquisition failed. - -o "e1" is the number of times that rcu_try_flip() was unable to - acquire the fliplock. - -o "i1" is the number of calls to rcu_try_flip_idle(). - -o "ie1" is the number of times rcu_try_flip_idle() exited early - due to the calling CPU having no work for RCU. - -o "g1" is the number of times that rcu_try_flip_idle() decided - to start a new grace period. "i1" should be roughly equal to - "ie1" plus "g1". - -o "a1" is the number of calls to rcu_try_flip_waitack(). - -o "ae1" is the number of times that rcu_try_flip_waitack() found - that at least one CPU had not yet acknowledge the new grace period - (AKA "counter flip"). - -o "a2" is the number of time rcu_try_flip_waitack() found that - all CPUs had acknowledged. "a1" should be roughly equal to - "ae1" plus "a2". (This particular output was collected on - a 128-CPU machine, hence the smaller-than-usual fraction of - calls to rcu_try_flip_waitack() finding all CPUs having already - acknowledged.) - -o "z1" is the number of calls to rcu_try_flip_waitzero(). - -o "ze1" is the number of times that rcu_try_flip_waitzero() found - that not all of the old RCU read-side critical sections had - completed. - -o "z2" is the number of times that rcu_try_flip_waitzero() finds - the sum of the counters equal to zero, in other words, that - all of the old RCU read-side critical sections had completed. - The value of "z1" should be roughly equal to "ze1" plus - "z2". - -o "m1" is the number of calls to rcu_try_flip_waitmb(). - -o "me1" is the number of times that rcu_try_flip_waitmb() finds - that at least one CPU has not yet executed a memory barrier. - -o "m2" is the number of times that rcu_try_flip_waitmb() finds that - all CPUs have executed a memory barrier. +The rcutree implementation of RCU provides debugfs trace output that +summarizes counters and state. This information is useful for debugging +RCU itself, and can sometimes also help to debug abuses of RCU. +The following sections describe the debugfs files and formats. Hierarchical RCU debugfs Files and Formats @@ -210,9 +35,10 @@ rcu_bh: 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 -The first section lists the rcu_data structures for rcu, the second for -rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. -The fields are as follows: +The first section lists the rcu_data structures for rcu_sched, the second +for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an +additional section for rcu_preempt. Each section has one line per CPU, +or eight for this 8-CPU system. The fields are as follows: o The number at the beginning of each line is the CPU number. CPUs numbers followed by an exclamation mark are offline, @@ -223,9 +49,9 @@ o The number at the beginning of each line is the CPU number. o "c" is the count of grace periods that this CPU believes have completed. CPUs in dynticks idle mode may lag quite a ways - behind, for example, CPU 4 under "rcu" above, which has slept - through the past 25 RCU grace periods. It is not unusual to - see CPUs lagging by thousands of grace periods. + behind, for example, CPU 4 under "rcu_sched" above, which has + slept through the past 25 RCU grace periods. It is not unusual + to see CPUs lagging by thousands of grace periods. o "g" is the count of grace periods that this CPU believes have started. Again, CPUs in dynticks idle mode may lag behind. @@ -308,8 +134,10 @@ The output of "cat rcu/rcugp" looks as follows: rcu_sched: completed=33062 gpnum=33063 rcu_bh: completed=464 gpnum=464 -Again, this output is for both "rcu" and "rcu_bh". The fields are -taken from the rcu_state structure, and are as follows: +Again, this output is for both "rcu_sched" and "rcu_bh". Note that +kernels built with CONFIG_TREE_PREEMPT_RCU will have an additional +"rcu_preempt" line. The fields are taken from the rcu_state structure, +and are as follows: o "completed" is the number of grace periods that have completed. It is comparable to the "c" field from rcu/rcudata in that a @@ -324,23 +152,24 @@ o "gpnum" is the number of grace periods that have started. It is If these two fields are equal (as they are for "rcu_bh" above), then there is no grace period in progress, in other words, RCU is idle. On the other hand, if the two fields differ (as they - do for "rcu" above), then an RCU grace period is in progress. + do for "rcu_sched" above), then an RCU grace period is in progress. The output of "cat rcu/rcuhier" looks as follows, with very long lines: -c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 -1/1 0:127 ^0 -3/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 -3/3f 0:5 ^0 2/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 +c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0 +1/1 .>. 0:127 ^0 +3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 +3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 rcu_bh: -c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 -0/1 0:127 ^0 -0/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 -0/3f 0:5 ^0 0/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 +c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0 +0/1 .>. 0:127 ^0 +0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 +0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 -This is once again split into "rcu" and "rcu_bh" portions. The fields are -as follows: +This is once again split into "rcu_sched" and "rcu_bh" portions, +and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional +"rcu_preempt" section. The fields are as follows: o "c" is exactly the same as "completed" under rcu/rcugp. @@ -372,6 +201,11 @@ o "fqlh" is the number of calls to force_quiescent_state() that exited immediately (without even being counted in nfqs above) due to contention on ->fqslock. +o "oqlen" is the number of callbacks on the "orphan" callback + list. RCU callbacks are placed on this list by CPUs going + offline, and are "adopted" either by the CPU helping the outgoing + CPU or by the next rcu_barrier*() call, whichever comes first. + o Each element of the form "1/1 0:127 ^0" represents one struct rcu_node. Each line represents one level of the hierarchy, from root to leaves. It is best to think of the rcu_data structures @@ -379,7 +213,7 @@ o Each element of the form "1/1 0:127 ^0" represents one struct might be either one, two, or three levels of rcu_node structures, depending on the relationship between CONFIG_RCU_FANOUT and CONFIG_NR_CPUS. - + o The numbers separated by the "/" are the qsmask followed by the qsmaskinit. The qsmask will have one bit set for each entity in the next lower level that @@ -389,10 +223,19 @@ o Each element of the form "1/1 0:127 ^0" represents one struct The value of qsmaskinit is assigned to that of qsmask at the beginning of each grace period. - For example, for "rcu", the qsmask of the first entry - of the lowest level is 0x14, meaning that we are still - waiting for CPUs 2 and 4 to check in for the current - grace period. + For example, for "rcu_sched", the qsmask of the first + entry of the lowest level is 0x14, meaning that we + are still waiting for CPUs 2 and 4 to check in for the + current grace period. + + o The characters separated by the ">" indicate the state + of the blocked-tasks lists. A "T" preceding the ">" + indicates that at least one task blocked in an RCU + read-side critical section blocks the current grace + period, while a "." preceding the ">" indicates otherwise. + The character following the ">" indicates similarly for + the next grace period. A "T" should appear in this + field only for rcu-preempt. o The numbers separated by the ":" are the range of CPUs served by this struct rcu_node. This can be helpful @@ -431,8 +274,9 @@ rcu_bh: 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 -As always, this is once again split into "rcu" and "rcu_bh" portions. -The fields are as follows: +As always, this is once again split into "rcu_sched" and "rcu_bh" +portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional +"rcu_preempt" section. The fields are as follows: o "np" is the number of times that __rcu_pending() has been invoked for the corresponding flavor of RCU. diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index e41a7fecf0d..d542ca243b8 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -830,7 +830,7 @@ sched: Critical sections Grace period Barrier SRCU: Critical sections Grace period Barrier srcu_read_lock synchronize_srcu N/A - srcu_read_unlock + srcu_read_unlock synchronize_srcu_expedited SRCU: Initialization/cleanup init_srcu_struct diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index b7f9d3b4bbf..72651f788f4 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -232,7 +232,7 @@ your e-mail client so that it sends your patches untouched. When sending patches to Linus, always follow step #7. Large changes are not appropriate for mailing lists, and some -maintainers. If your patch, uncompressed, exceeds 40 kB in size, +maintainers. If your patch, uncompressed, exceeds 300 kB in size, it is preferred that you store your patch on an Internet-accessible server, and provide instead a URL (link) pointing to your patch. diff --git a/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt b/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt index 26422f0f908..b87292e05f2 100644 --- a/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt +++ b/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt @@ -55,4 +55,4 @@ Maintainers This board is maintained by Simtec Electronics. -(c) 2004 Ben Dooks, Simtec Electronics +Copyright 2004 Ben Dooks, Simtec Electronics diff --git a/Documentation/arm/Samsung-S3C24XX/GPIO.txt b/Documentation/arm/Samsung-S3C24XX/GPIO.txt index 948c8718d96..2af2cf39915 100644 --- a/Documentation/arm/Samsung-S3C24XX/GPIO.txt +++ b/Documentation/arm/Samsung-S3C24XX/GPIO.txt @@ -134,4 +134,4 @@ Authour Ben Dooks, 03 October 2004 -(c) 2004 Ben Dooks, Simtec Electronics +Copyright 2004 Ben Dooks, Simtec Electronics diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt index cff6227b448..081892df4fd 100644 --- a/Documentation/arm/Samsung-S3C24XX/Overview.txt +++ b/Documentation/arm/Samsung-S3C24XX/Overview.txt @@ -299,4 +299,4 @@ Port Contributors Document Author --------------- -Ben Dooks, (c) 2004-2005,2006 Simtec Electronics +Ben Dooks, Copyright 2004-2006 Simtec Electronics diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2412.txt b/Documentation/arm/Samsung-S3C24XX/S3C2412.txt index 295d971a15e..f057876b920 100644 --- a/Documentation/arm/Samsung-S3C24XX/S3C2412.txt +++ b/Documentation/arm/Samsung-S3C24XX/S3C2412.txt @@ -117,4 +117,4 @@ ATA Document Author --------------- -Ben Dooks, (c) 2006 Simtec Electronics +Ben Dooks, Copyright 2006 Simtec Electronics diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2413.txt b/Documentation/arm/Samsung-S3C24XX/S3C2413.txt index ab2a88858f1..909bdc7dd7b 100644 --- a/Documentation/arm/Samsung-S3C24XX/S3C2413.txt +++ b/Documentation/arm/Samsung-S3C24XX/S3C2413.txt @@ -18,4 +18,4 @@ Camera Interface Document Author --------------- -Ben Dooks, (c) 2006 Simtec Electronics +Ben Dooks, Copyright 2006 Simtec Electronics diff --git a/Documentation/arm/Samsung-S3C24XX/Suspend.txt b/Documentation/arm/Samsung-S3C24XX/Suspend.txt index a30fe510572..7edd0e2e6c5 100644 --- a/Documentation/arm/Samsung-S3C24XX/Suspend.txt +++ b/Documentation/arm/Samsung-S3C24XX/Suspend.txt @@ -133,5 +133,5 @@ Configuration Document Author --------------- -Ben Dooks, (c) 2004 Simtec Electronics +Ben Dooks, Copyright 2004 Simtec Electronics diff --git a/Documentation/arm/Samsung-S3C24XX/USB-Host.txt b/Documentation/arm/Samsung-S3C24XX/USB-Host.txt index 67671eba423..f82b1faefad 100644 --- a/Documentation/arm/Samsung-S3C24XX/USB-Host.txt +++ b/Documentation/arm/Samsung-S3C24XX/USB-Host.txt @@ -90,4 +90,4 @@ Platform Data Document Author --------------- -Ben Dooks, (c) 2005 Simtec Electronics +Ben Dooks, Copyright 2005 Simtec Electronics diff --git a/Documentation/arm/tcm.txt b/Documentation/arm/tcm.txt index 074f4be6667..77fd9376e6d 100644 --- a/Documentation/arm/tcm.txt +++ b/Documentation/arm/tcm.txt @@ -29,11 +29,13 @@ TCM location and size. Notice that this is not a MMU table: you actually move the physical location of the TCM around. At the place you put it, it will mask any underlying RAM from the CPU so it is usually wise not to overlap any physical RAM with -the TCM. The TCM memory exists totally outside the MMU and will -override any MMU mappings. +the TCM. -Code executing inside the ITCM does not "see" any MMU mappings -and e.g. register accesses must be made to physical addresses. +The TCM memory can then be remapped to another address again using +the MMU, but notice that the TCM if often used in situations where +the MMU is turned off. To avoid confusion the current Linux +implementation will map the TCM 1 to 1 from physical to virtual +memory in the location specified by the machine. TCM is used for a few things: diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg new file mode 100644 index 00000000000..f87cfa0dc2f --- /dev/null +++ b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg @@ -0,0 +1,588 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> +<svg + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + version="1.0" + width="210mm" + height="297mm" + viewBox="0 0 21000 29700" + id="svg2" + style="fill-rule:evenodd"> + <defs + id="defs4" /> + <g + id="Default" + style="visibility:visible"> + <desc + id="desc180">Master slide</desc> + </g> + <path + d="M 11999,8601 L 11899,8301 L 12099,8301 L 11999,8601 z" + id="path193" + style="fill:#008000;visibility:visible" /> + <path + d="M 11999,7801 L 11999,8361" + id="path197" + style="fill:none;stroke:#008000;visibility:visible" /> + <path + d="M 7999,10401 L 7899,10101 L 8099,10101 L 7999,10401 z" + id="path209" + style="fill:#008000;visibility:visible" /> + <path + d="M 7999,9601 L 7999,10161" + id="path213" + style="fill:none;stroke:#008000;visibility:visible" /> + <path + d="M 11999,7801 L 11685,7840 L 11724,7644 L 11999,7801 z" + id="path225" + style="fill:#008000;visibility:visible" /> + <path + d="M 7999,7001 L 11764,7754" + id="path229" + style="fill:none;stroke:#008000;visibility:visible" /> + <g + transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-1244.4792,1416.5139)" + id="g245" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <text + id="text247"> + <tspan + x="9139 9368 9579 9808 9986 10075 10252 10481 10659 10837 10909" + y="9284" + id="tspan249">RSDataReply</tspan> + </text> + </g> + <path + d="M 7999,9601 L 8281,9458 L 8311,9655 L 7999,9601 z" + id="path259" + style="fill:#008000;visibility:visible" /> + <path + d="M 11999,9001 L 8236,9565" + id="path263" + style="fill:none;stroke:#008000;visibility:visible" /> + <g + transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,1620.9382,-1639.4947)" + id="g279" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <text + id="text281"> + <tspan + x="8743 8972 9132 9310 9573 9801 10013 10242 10419 10597 10775 10953 11114" + y="7023" + id="tspan283">CsumRSRequest</tspan> + </text> + </g> + <text + id="text297" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692" + y="5707" + id="tspan299">w_make_resync_request()</tspan> + </text> + <text + id="text313" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378" + y="7806" + id="tspan315">receive_DataRequest()</tspan> + </text> + <text + id="text329" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399" + y="8606" + id="tspan331">drbd_endio_read_sec()</tspan> + </text> + <text + id="text345" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13825 13986 14164 14426 14604 14710 14871 15049 15154 15332 15510 15616" + y="9007" + id="tspan347">w_e_end_csum_rs_req()</tspan> + </text> + <text + id="text361" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4444 4550 4728 4889 5066 5138 5299 5477 5655 5883 6095 6324 6501 6590 6768 6997 7175 7352 7424 7585 7691" + y="9507" + id="tspan363">receive_RSDataReply()</tspan> + </text> + <text + id="text377" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4457 4635 4741 4918 5096 5274 5452 5630 5807 5879 6057 6235 6464 6569 6641 6730 6908 7086 7247 7425 7585 7691" + y="10407" + id="tspan379">drbd_endio_write_sec()</tspan> + </text> + <text + id="text393" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4647 4825 5003 5180 5358 5536 5714 5820 5997 6158 6319 6497 6658 6836 7013 7085 7263 7424 7585 7691" + y="10907" + id="tspan395">e_end_resync_block()</tspan> + </text> + <path + d="M 11999,11601 L 11685,11640 L 11724,11444 L 11999,11601 z" + id="path405" + style="fill:#000080;visibility:visible" /> + <path + d="M 7999,10801 L 11764,11554" + id="path409" + style="fill:none;stroke:#000080;visibility:visible" /> + <g + transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,2434.7562,-1674.649)" + id="g425" + style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded"> + <text + id="text427"> + <tspan + x="9320 9621 9726 9798 9887 10065 10277 10438" + y="10943" + id="tspan429">WriteAck</tspan> + </text> + </g> + <text + id="text443" + style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12199 12377 12555 12644 12821 13033 13105 13283 13444 13604 13816 13977 14138 14244" + y="11559" + id="tspan445">got_BlockAck()</tspan> + </text> + <text + id="text459" + style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="7999 8304 8541 8778 8990 9201 9413 9650 10001 10120 10357 10594 10806 11043 11280 11398 11703 11940 12152 12364 12601 12812 12931 13049 13261 13498 13710 13947 14065 14302 14540 14658 14777 14870 15107 15225 15437 15649 15886" + y="4877" + id="tspan461">Checksum based Resync, case not in sync</tspan> + </text> + <text + id="text475" + style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="6961 7266 7571 7854 8159 8299 8536 8654 8891 9010 9247 9484 9603 9840 9958 10077 10170 10407" + y="2806" + id="tspan477">DRBD-8.3 data flow</tspan> + </text> + <text + id="text491" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="5190 5419 5596 5774 5952 6113 6291 6468 6646 6824 6985 7146 7324 7586 7692" + y="7005" + id="tspan493">w_e_send_csum()</tspan> + </text> + <path + d="M 11999,17601 L 11899,17301 L 12099,17301 L 11999,17601 z" + id="path503" + style="fill:#008000;visibility:visible" /> + <path + d="M 11999,16801 L 11999,17361" + id="path507" + style="fill:none;stroke:#008000;visibility:visible" /> + <path + d="M 11999,16801 L 11685,16840 L 11724,16644 L 11999,16801 z" + id="path519" + style="fill:#008000;visibility:visible" /> + <path + d="M 7999,16001 L 11764,16754" + id="path523" + style="fill:none;stroke:#008000;visibility:visible" /> + <g + transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-2539.5806,1529.3491)" + id="g539" + style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded"> + <text + id="text541"> + <tspan + x="9269 9498 9709 9798 9959 10048 10226 10437 10598 10776" + y="18265" + id="tspan543">RSIsInSync</tspan> + </text> + </g> + <path + d="M 7999,18601 L 8281,18458 L 8311,18655 L 7999,18601 z" + id="path553" + style="fill:#000080;visibility:visible" /> + <path + d="M 11999,18001 L 8236,18565" + id="path557" + style="fill:none;stroke:#000080;visibility:visible" /> + <g + transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,3461.4027,-1449.3012)" + id="g573" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <text + id="text575"> + <tspan + x="8743 8972 9132 9310 9573 9801 10013 10242 10419 10597 10775 10953 11114" + y="16023" + id="tspan577">CsumRSRequest</tspan> + </text> + </g> + <text + id="text591" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378" + y="16806" + id="tspan593">receive_DataRequest()</tspan> + </text> + <text + id="text607" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399" + y="17606" + id="tspan609">drbd_endio_read_sec()</tspan> + </text> + <text + id="text623" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13825 13986 14164 14426 14604 14710 14871 15049 15154 15332 15510 15616" + y="18007" + id="tspan625">w_e_end_csum_rs_req()</tspan> + </text> + <text + id="text639" + style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="5735 5913 6091 6180 6357 6446 6607 6696 6874 7085 7246 7424 7585 7691" + y="18507" + id="tspan641">got_IsInSync()</tspan> + </text> + <text + id="text655" + style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="7999 8304 8541 8778 8990 9201 9413 9650 10001 10120 10357 10594 10806 11043 11280 11398 11703 11940 12152 12364 12601 12812 12931 13049 13261 13498 13710 13947 14065 14159 14396 14514 14726 14937 15175" + y="13877" + id="tspan657">Checksum based Resync, case in sync</tspan> + </text> + <path + d="M 12000,24601 L 11900,24301 L 12100,24301 L 12000,24601 z" + id="path667" + style="fill:#008000;visibility:visible" /> + <path + d="M 12000,23801 L 12000,24361" + id="path671" + style="fill:none;stroke:#008000;visibility:visible" /> + <path + d="M 8000,26401 L 7900,26101 L 8100,26101 L 8000,26401 z" + id="path683" + style="fill:#008000;visibility:visible" /> + <path + d="M 8000,25601 L 8000,26161" + id="path687" + style="fill:none;stroke:#008000;visibility:visible" /> + <path + d="M 12000,23801 L 11686,23840 L 11725,23644 L 12000,23801 z" + id="path699" + style="fill:#008000;visibility:visible" /> + <path + d="M 8000,23001 L 11765,23754" + id="path703" + style="fill:none;stroke:#008000;visibility:visible" /> + <g + transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-3543.8452,1630.5143)" + id="g719" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <text + id="text721"> + <tspan + x="9464 9710 9921 10150 10328 10505 10577" + y="25236" + id="tspan723">OVReply</tspan> + </text> + </g> + <path + d="M 8000,25601 L 8282,25458 L 8312,25655 L 8000,25601 z" + id="path733" + style="fill:#008000;visibility:visible" /> + <path + d="M 12000,25001 L 8237,25565" + id="path737" + style="fill:none;stroke:#008000;visibility:visible" /> + <g + transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,4918.2801,-1381.2128)" + id="g753" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <text + id="text755"> + <tspan + x="9142 9388 9599 9828 10006 10183 10361 10539 10700" + y="23106" + id="tspan757">OVRequest</tspan> + </text> + </g> + <text + id="text771" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13656 13868 14097 14274 14452 14630 14808 14969 15058 15163" + y="23806" + id="tspan773">receive_OVRequest()</tspan> + </text> + <text + id="text787" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14084 14262 14439 14617 14795 14956 15134 15295 15400" + y="24606" + id="tspan789">drbd_endio_read_sec()</tspan> + </text> + <text + id="text803" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12192 12421 12598 12776 12954 13132 13310 13487 13665 13843 14004 14182 14288 14465 14643 14749" + y="25007" + id="tspan805">w_e_end_ov_req()</tspan> + </text> + <text + id="text819" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="5101 5207 5385 5546 5723 5795 5956 6134 6312 6557 6769 6998 7175 7353 7425 7586 7692" + y="25507" + id="tspan821">receive_OVReply()</tspan> + </text> + <text + id="text835" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692" + y="26407" + id="tspan837">drbd_endio_read_sec()</tspan> + </text> + <text + id="text851" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4902 5131 5308 5486 5664 5842 6020 6197 6375 6553 6714 6892 6998 7175 7353 7425 7586 7692" + y="26907" + id="tspan853">w_e_end_ov_reply()</tspan> + </text> + <path + d="M 12000,27601 L 11686,27640 L 11725,27444 L 12000,27601 z" + id="path863" + style="fill:#000080;visibility:visible" /> + <path + d="M 8000,26801 L 11765,27554" + id="path867" + style="fill:none;stroke:#000080;visibility:visible" /> + <g + transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,5704.1907,-1328.312)" + id="g883" + style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded"> + <text + id="text885"> + <tspan + x="9279 9525 9736 9965 10143 10303 10481 10553" + y="26935" + id="tspan887">OVResult</tspan> + </text> + </g> + <text + id="text901" + style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12200 12378 12556 12645 12822 13068 13280 13508 13686 13847 14025 14097 14185 14291" + y="27559" + id="tspan903">got_OVResult()</tspan> + </text> + <text + id="text917" + style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="8000 8330 8567 8660 8754 8991 9228 9346 9558 9795 9935 10028 10146" + y="21877" + id="tspan919">Online verify</tspan> + </text> + <text + id="text933" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4641 4870 5047 5310 5488 5649 5826 6004 6182 6343 6521 6626 6804 6982 7160 7338 7499 7587 7693" + y="23005" + id="tspan935">w_make_ov_request()</tspan> + </text> + <path + d="M 8000,6500 L 7900,6200 L 8100,6200 L 8000,6500 z" + id="path945" + style="fill:#008000;visibility:visible" /> + <path + d="M 8000,5700 L 8000,6260" + id="path949" + style="fill:none;stroke:#008000;visibility:visible" /> + <path + d="M 3900,5500 L 3700,5500 L 3700,11000 L 3900,11000" + id="path961" + style="fill:none;stroke:#000000;visibility:visible" /> + <path + d="M 3900,14500 L 3700,14500 L 3700,18600 L 3900,18600" + id="path973" + style="fill:none;stroke:#000000;visibility:visible" /> + <path + d="M 3900,22800 L 3700,22800 L 3700,26900 L 3900,26900" + id="path985" + style="fill:none;stroke:#000000;visibility:visible" /> + <text + id="text1001" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692" + y="6506" + id="tspan1003">drbd_endio_read_sec()</tspan> + </text> + <text + id="text1017" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692" + y="14708" + id="tspan1019">w_make_resync_request()</tspan> + </text> + <text + id="text1033" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="5190 5419 5596 5774 5952 6113 6291 6468 6646 6824 6985 7146 7324 7586 7692" + y="16006" + id="tspan1035">w_e_send_csum()</tspan> + </text> + <path + d="M 8000,15501 L 7900,15201 L 8100,15201 L 8000,15501 z" + id="path1045" + style="fill:#008000;visibility:visible" /> + <path + d="M 8000,14701 L 8000,15261" + id="path1049" + style="fill:none;stroke:#008000;visibility:visible" /> + <text + id="text1065" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692" + y="15507" + id="tspan1067">drbd_endio_read_sec()</tspan> + </text> + <path + d="M 16100,9000 L 16300,9000 L 16300,7500 L 16100,7500" + id="path1077" + style="fill:none;stroke:#000000;visibility:visible" /> + <path + d="M 16100,18000 L 16300,18000 L 16300,16500 L 16100,16500" + id="path1089" + style="fill:none;stroke:#000000;visibility:visible" /> + <path + d="M 16100,25000 L 16300,25000 L 16300,23500 L 16100,23500" + id="path1101" + style="fill:none;stroke:#000000;visibility:visible" /> + <text + id="text1117" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="2026 2132 2293 2471 2648 2826 3004 3076 3254 3431 3503 3681 3787" + y="5402" + id="tspan1119">rs_begin_io()</tspan> + </text> + <text + id="text1133" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="2027 2133 2294 2472 2649 2827 3005 3077 3255 3432 3504 3682 3788" + y="14402" + id="tspan1135">rs_begin_io()</tspan> + </text> + <text + id="text1149" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="2026 2132 2293 2471 2648 2826 3004 3076 3254 3431 3503 3681 3787" + y="22602" + id="tspan1151">rs_begin_io()</tspan> + </text> + <text + id="text1165" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="1426 1532 1693 1871 2031 2209 2472 2649 2721 2899 2988 3166 3344 3416 3593 3699" + y="11302" + id="tspan1167">rs_complete_io()</tspan> + </text> + <text + id="text1181" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="1526 1632 1793 1971 2131 2309 2572 2749 2821 2999 3088 3266 3444 3516 3693 3799" + y="18931" + id="tspan1183">rs_complete_io()</tspan> + </text> + <text + id="text1197" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="1526 1632 1793 1971 2131 2309 2572 2749 2821 2999 3088 3266 3444 3516 3693 3799" + y="27231" + id="tspan1199">rs_complete_io()</tspan> + </text> + <text + id="text1213" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="16126 16232 16393 16571 16748 16926 17104 17176 17354 17531 17603 17781 17887" + y="7402" + id="tspan1215">rs_begin_io()</tspan> + </text> + <text + id="text1229" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="16127 16233 16394 16572 16749 16927 17105 17177 17355 17532 17604 17782 17888" + y="16331" + id="tspan1231">rs_begin_io()</tspan> + </text> + <text + id="text1245" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="16127 16233 16394 16572 16749 16927 17105 17177 17355 17532 17604 17782 17888" + y="23302" + id="tspan1247">rs_begin_io()</tspan> + </text> + <text + id="text1261" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388" + y="9302" + id="tspan1263">rs_complete_io()</tspan> + </text> + <text + id="text1277" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388" + y="18331" + id="tspan1279">rs_complete_io()</tspan> + </text> + <text + id="text1293" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="16126 16232 16393 16571 16731 16909 17172 17349 17421 17599 17688 17866 18044 18116 18293 18399" + y="25302" + id="tspan1295">rs_complete_io()</tspan> + </text> +</svg> diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/blockdev/drbd/DRBD-data-packets.svg new file mode 100644 index 00000000000..48a1e2165fe --- /dev/null +++ b/Documentation/blockdev/drbd/DRBD-data-packets.svg @@ -0,0 +1,459 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> +<svg + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + version="1.0" + width="210mm" + height="297mm" + viewBox="0 0 21000 29700" + id="svg2" + style="fill-rule:evenodd"> + <defs + id="defs4" /> + <g + id="Default" + style="visibility:visible"> + <desc + id="desc176">Master slide</desc> + </g> + <path + d="M 11999,19601 L 11899,19301 L 12099,19301 L 11999,19601 z" + id="path189" + style="fill:#008000;visibility:visible" /> + <path + d="M 11999,18801 L 11999,19361" + id="path193" + style="fill:none;stroke:#008000;visibility:visible" /> + <path + d="M 7999,21401 L 7899,21101 L 8099,21101 L 7999,21401 z" + id="path205" + style="fill:#008000;visibility:visible" /> + <path + d="M 7999,20601 L 7999,21161" + id="path209" + style="fill:none;stroke:#008000;visibility:visible" /> + <path + d="M 11999,18801 L 11685,18840 L 11724,18644 L 11999,18801 z" + id="path221" + style="fill:#008000;visibility:visible" /> + <path + d="M 7999,18001 L 11764,18754" + id="path225" + style="fill:none;stroke:#008000;visibility:visible" /> + <text + x="-3023.845" + y="1106.8124" + transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)" + id="text243" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="6115.1553 6344.1553 6555.1553 6784.1553 6962.1553 7051.1553 7228.1553 7457.1553 7635.1553 7813.1553 7885.1553" + y="21390.812" + id="tspan245">RSDataReply</tspan> + </text> + <path + d="M 7999,20601 L 8281,20458 L 8311,20655 L 7999,20601 z" + id="path255" + style="fill:#008000;visibility:visible" /> + <path + d="M 11999,20001 L 8236,20565" + id="path259" + style="fill:none;stroke:#008000;visibility:visible" /> + <text + x="3502.5356" + y="-2184.6621" + transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)" + id="text277" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12321.536 12550.536 12761.536 12990.536 13168.536 13257.536 13434.536 13663.536 13841.536 14019.536 14196.536 14374.536 14535.536" + y="15854.338" + id="tspan279">RSDataRequest</tspan> + </text> + <text + id="text293" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692" + y="17807" + id="tspan295">w_make_resync_request()</tspan> + </text> + <text + id="text309" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378" + y="18806" + id="tspan311">receive_DataRequest()</tspan> + </text> + <text + id="text325" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399" + y="19606" + id="tspan327">drbd_endio_read_sec()</tspan> + </text> + <text + id="text341" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13770 13931 14109 14287 14375 14553 14731 14837 15015 15192 15298" + y="20007" + id="tspan343">w_e_end_rsdata_req()</tspan> + </text> + <text + id="text357" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4444 4550 4728 4889 5066 5138 5299 5477 5655 5883 6095 6324 6501 6590 6768 6997 7175 7352 7424 7585 7691" + y="20507" + id="tspan359">receive_RSDataReply()</tspan> + </text> + <text + id="text373" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4457 4635 4741 4918 5096 5274 5452 5630 5807 5879 6057 6235 6464 6569 6641 6730 6908 7086 7247 7425 7585 7691" + y="21407" + id="tspan375">drbd_endio_write_sec()</tspan> + </text> + <text + id="text389" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4647 4825 5003 5180 5358 5536 5714 5820 5997 6158 6319 6497 6658 6836 7013 7085 7263 7424 7585 7691" + y="21907" + id="tspan391">e_end_resync_block()</tspan> + </text> + <path + d="M 11999,22601 L 11685,22640 L 11724,22444 L 11999,22601 z" + id="path401" + style="fill:#000080;visibility:visible" /> + <path + d="M 7999,21801 L 11764,22554" + id="path405" + style="fill:none;stroke:#000080;visibility:visible" /> + <text + x="4290.3008" + y="-2369.6162" + transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)" + id="text423" + style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="13610.301 13911.301 14016.301 14088.301 14177.301 14355.301 14567.301 14728.301" + y="19573.385" + id="tspan425">WriteAck</tspan> + </text> + <text + id="text439" + style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12199 12377 12555 12644 12821 13033 13105 13283 13444 13604 13816 13977 14138 14244" + y="22559" + id="tspan441">got_BlockAck()</tspan> + </text> + <text + id="text455" + style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="7999 8304 8541 8753 8964 9201 9413 9531 9769 9862 10099 10310 10522 10734 10852 10971 11208 11348 11585 11822" + y="16877" + id="tspan457">Resync blocks, 4-32K</tspan> + </text> + <path + d="M 12000,7601 L 11900,7301 L 12100,7301 L 12000,7601 z" + id="path467" + style="fill:#008000;visibility:visible" /> + <path + d="M 12000,6801 L 12000,7361" + id="path471" + style="fill:none;stroke:#008000;visibility:visible" /> + <path + d="M 12000,6801 L 11686,6840 L 11725,6644 L 12000,6801 z" + id="path483" + style="fill:#008000;visibility:visible" /> + <path + d="M 8000,6001 L 11765,6754" + id="path487" + style="fill:none;stroke:#008000;visibility:visible" /> + <text + x="-1288.1796" + y="1279.7666" + transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)" + id="text505" + style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="8174.8208 8475.8203 8580.8203 8652.8203 8741.8203 8919.8203 9131.8203 9292.8203" + y="9516.7666" + id="tspan507">WriteAck</tspan> + </text> + <path + d="M 8000,8601 L 8282,8458 L 8312,8655 L 8000,8601 z" + id="path517" + style="fill:#000080;visibility:visible" /> + <path + d="M 12000,8001 L 8237,8565" + id="path521" + style="fill:none;stroke:#000080;visibility:visible" /> + <text + x="1065.6655" + y="-2097.7664" + transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)" + id="text539" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="10682.666 10911.666 11088.666 11177.666" + y="4107.2339" + id="tspan541">Data</tspan> + </text> + <text + id="text555" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4746 4924 5030 5207 5385 5563 5826 6003 6164 6342 6520 6626 6803 6981 7159 7337 7498 7587 7692" + y="5505" + id="tspan557">drbd_make_request()</tspan> + </text> + <text + id="text571" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13639 13817 13906 14084 14190" + y="6806" + id="tspan573">receive_Data()</tspan> + </text> + <text + id="text587" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14207 14312 14384 14473 14651 14829 14990 15168 15328 15434" + y="7606" + id="tspan589">drbd_endio_write_sec()</tspan> + </text> + <text + id="text603" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12192 12370 12548 12725 12903 13081 13259 13437 13509 13686 13847 14008 14114" + y="8007" + id="tspan605">e_end_block()</tspan> + </text> + <text + id="text619" + style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="5647 5825 6003 6092 6269 6481 6553 6731 6892 7052 7264 7425 7586 7692" + y="8606" + id="tspan621">got_BlockAck()</tspan> + </text> + <text + id="text635" + style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="8000 8305 8542 8779 9016 9109 9346 9486 9604 9956 10049 10189 10328 10565 10705 10942 11179 11298 11603 11742 11835 11954 12191 12310 12428 12665 12902 13139 13279 13516 13753" + y="4877" + id="tspan637">Regular mirrored write, 512-32K</tspan> + </text> + <text + id="text651" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="5381 5610 5787 5948 6126 6304 6482 6659 6837 7015 7087 7265 7426 7587 7692" + y="6003" + id="tspan653">w_send_dblock()</tspan> + </text> + <path + d="M 8000,6800 L 7900,6500 L 8100,6500 L 8000,6800 z" + id="path663" + style="fill:#008000;visibility:visible" /> + <path + d="M 8000,6000 L 8000,6560" + id="path667" + style="fill:none;stroke:#008000;visibility:visible" /> + <text + id="text683" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4602 4780 4886 5063 5241 5419 5597 5775 5952 6024 6202 6380 6609 6714 6786 6875 7053 7231 7409 7515 7587 7692" + y="6905" + id="tspan685">drbd_endio_write_pri()</tspan> + </text> + <path + d="M 12000,13602 L 11900,13302 L 12100,13302 L 12000,13602 z" + id="path695" + style="fill:#008000;visibility:visible" /> + <path + d="M 12000,12802 L 12000,13362" + id="path699" + style="fill:none;stroke:#008000;visibility:visible" /> + <path + d="M 12000,12802 L 11686,12841 L 11725,12645 L 12000,12802 z" + id="path711" + style="fill:#008000;visibility:visible" /> + <path + d="M 8000,12002 L 11765,12755" + id="path715" + style="fill:none;stroke:#008000;visibility:visible" /> + <text + x="-2155.5266" + y="1201.5964" + transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)" + id="text733" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="7202.4736 7431.4736 7608.4736 7697.4736 7875.4736 8104.4736 8282.4736 8459.4736 8531.4736" + y="15454.597" + id="tspan735">DataReply</tspan> + </text> + <path + d="M 8000,14602 L 8282,14459 L 8312,14656 L 8000,14602 z" + id="path745" + style="fill:#008000;visibility:visible" /> + <path + d="M 12000,14002 L 8237,14566" + id="path749" + style="fill:none;stroke:#008000;visibility:visible" /> + <text + x="2280.3804" + y="-2103.2141" + transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)" + id="text767" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="11316.381 11545.381 11722.381 11811.381 11989.381 12218.381 12396.381 12573.381 12751.381 12929.381 13090.381" + y="9981.7861" + id="tspan769">DataRequest</tspan> + </text> + <text + id="text783" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="4746 4924 5030 5207 5385 5563 5826 6003 6164 6342 6520 6626 6803 6981 7159 7337 7498 7587 7692" + y="11506" + id="tspan785">drbd_make_request()</tspan> + </text> + <text + id="text799" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13639 13817 13906 14084 14312 14490 14668 14846 15024 15185 15273 15379" + y="12807" + id="tspan801">receive_DataRequest()</tspan> + </text> + <text + id="text815" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14084 14262 14439 14617 14795 14956 15134 15295 15400" + y="13607" + id="tspan817">drbd_endio_read_sec()</tspan> + </text> + <text + id="text831" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="12192 12421 12598 12776 12954 13132 13310 13487 13665 13843 14021 14110 14288 14465 14571 14749 14927 15033" + y="14008" + id="tspan833">w_e_end_data_req()</tspan> + </text> + <g + id="g835" + style="visibility:visible"> + <desc + id="desc837">Drawing</desc> + <text + id="text847" + style="font-size:318px;font-weight:400;fill:#008000;font-family:Helvetica embedded"> + <tspan + x="4885 4991 5169 5330 5507 5579 5740 5918 6096 6324 6502 6591 6769 6997 7175 7353 7425 7586 7692" + y="14607" + id="tspan849">receive_DataReply()</tspan> + </text> + </g> + <text + id="text863" + style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="8000 8305 8398 8610 8821 8914 9151 9363 9575 9693 9833 10070 10307 10544 10663 10781 11018 11255 11493 11632 11869 12106" + y="10878" + id="tspan865">Diskless read, 512-32K</tspan> + </text> + <text + id="text879" + style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="5029 5258 5435 5596 5774 5952 6130 6307 6413 6591 6769 6947 7125 7230 7408 7586 7692" + y="12004" + id="tspan881">w_send_read_req()</tspan> + </text> + <text + id="text895" + style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="6961 7266 7571 7854 8159 8278 8515 8633 8870 9107 9226 9463 9581 9700 9793 10030" + y="2806" + id="tspan897">DRBD 8 data flow</tspan> + </text> + <path + d="M 3900,5300 L 3700,5300 L 3700,7000 L 3900,7000" + id="path907" + style="fill:none;stroke:#000000;visibility:visible" /> + <path + d="M 3900,17600 L 3700,17600 L 3700,22000 L 3900,22000" + id="path919" + style="fill:none;stroke:#000000;visibility:visible" /> + <path + d="M 16100,20000 L 16300,20000 L 16300,18500 L 16100,18500" + id="path931" + style="fill:none;stroke:#000000;visibility:visible" /> + <text + id="text947" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="2126 2304 2376 2554 2731 2909 3087 3159 3337 3515 3587 3764 3870" + y="5202" + id="tspan949">al_begin_io()</tspan> + </text> + <text + id="text963" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="1632 1810 1882 2060 2220 2398 2661 2839 2910 3088 3177 3355 3533 3605 3783 3888" + y="7331" + id="tspan965">al_complete_io()</tspan> + </text> + <text + id="text979" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="2126 2232 2393 2571 2748 2926 3104 3176 3354 3531 3603 3781 3887" + y="17431" + id="tspan981">rs_begin_io()</tspan> + </text> + <text + id="text995" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="1626 1732 1893 2071 2231 2409 2672 2849 2921 3099 3188 3366 3544 3616 3793 3899" + y="22331" + id="tspan997">rs_complete_io()</tspan> + </text> + <text + id="text1011" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="16027 16133 16294 16472 16649 16827 17005 17077 17255 17432 17504 17682 17788" + y="18402" + id="tspan1013">rs_begin_io()</tspan> + </text> + <text + id="text1027" + style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded"> + <tspan + x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388" + y="20331" + id="tspan1029">rs_complete_io()</tspan> + </text> +</svg> diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/blockdev/drbd/README.txt new file mode 100644 index 00000000000..627b0a1bf35 --- /dev/null +++ b/Documentation/blockdev/drbd/README.txt @@ -0,0 +1,16 @@ +Description + + DRBD is a shared-nothing, synchronously replicated block device. It + is designed to serve as a building block for high availability + clusters and in this context, is a "drop-in" replacement for shared + storage. Simplistically, you could see it as a network RAID 1. + + Please visit http://www.drbd.org to find out more. + +The here included files are intended to help understand the implementation + +DRBD-8.3-data-packets.svg, DRBD-data-packets.svg + relates some functions, and write packets. + +conn-states-8.dot, disk-states-8.dot, node-states-8.dot + The sub graphs of DRBD's state transitions diff --git a/Documentation/blockdev/drbd/conn-states-8.dot b/Documentation/blockdev/drbd/conn-states-8.dot new file mode 100644 index 00000000000..025e8cf5e64 --- /dev/null +++ b/Documentation/blockdev/drbd/conn-states-8.dot @@ -0,0 +1,18 @@ +digraph conn_states { + StandAllone -> WFConnection [ label = "ioctl_set_net()" ] + WFConnection -> Unconnected [ label = "unable to bind()" ] + WFConnection -> WFReportParams [ label = "in connect() after accept" ] + WFReportParams -> StandAllone [ label = "checks in receive_param()" ] + WFReportParams -> Connected [ label = "in receive_param()" ] + WFReportParams -> WFBitMapS [ label = "sync_handshake()" ] + WFReportParams -> WFBitMapT [ label = "sync_handshake()" ] + WFBitMapS -> SyncSource [ label = "receive_bitmap()" ] + WFBitMapT -> SyncTarget [ label = "receive_bitmap()" ] + SyncSource -> Connected + SyncTarget -> Connected + SyncSource -> PausedSyncS + SyncTarget -> PausedSyncT + PausedSyncS -> SyncSource + PausedSyncT -> SyncTarget + Connected -> WFConnection [ label = "* on network error" ] +} diff --git a/Documentation/blockdev/drbd/disk-states-8.dot b/Documentation/blockdev/drbd/disk-states-8.dot new file mode 100644 index 00000000000..d06cfb46fb9 --- /dev/null +++ b/Documentation/blockdev/drbd/disk-states-8.dot @@ -0,0 +1,16 @@ +digraph disk_states { + Diskless -> Inconsistent [ label = "ioctl_set_disk()" ] + Diskless -> Consistent [ label = "ioctl_set_disk()" ] + Diskless -> Outdated [ label = "ioctl_set_disk()" ] + Consistent -> Outdated [ label = "receive_param()" ] + Consistent -> UpToDate [ label = "receive_param()" ] + Consistent -> Inconsistent [ label = "start resync" ] + Outdated -> Inconsistent [ label = "start resync" ] + UpToDate -> Inconsistent [ label = "ioctl_replicate" ] + Inconsistent -> UpToDate [ label = "resync completed" ] + Consistent -> Failed [ label = "io completion error" ] + Outdated -> Failed [ label = "io completion error" ] + UpToDate -> Failed [ label = "io completion error" ] + Inconsistent -> Failed [ label = "io completion error" ] + Failed -> Diskless [ label = "sending notify to peer" ] +} diff --git a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot new file mode 100644 index 00000000000..6d9cf0a7b11 --- /dev/null +++ b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot @@ -0,0 +1,85 @@ +// vim: set sw=2 sts=2 : +digraph { + rankdir=BT + bgcolor=white + + node [shape=plaintext] + node [fontcolor=black] + + StandAlone [ style=filled,fillcolor=gray,label=StandAlone ] + + node [fontcolor=lightgray] + + Unconnected [ label=Unconnected ] + + CommTrouble [ shape=record, + label="{communication loss|{Timeout|BrokenPipe|NetworkFailure}}" ] + + node [fontcolor=gray] + + subgraph cluster_try_connect { + label="try to connect, handshake" + rank=max + WFConnection [ label=WFConnection ] + WFReportParams [ label=WFReportParams ] + } + + TearDown [ label=TearDown ] + + Connected [ label=Connected,style=filled,fillcolor=green,fontcolor=black ] + + node [fontcolor=lightblue] + + StartingSyncS [ label=StartingSyncS ] + StartingSyncT [ label=StartingSyncT ] + + subgraph cluster_bitmap_exchange { + node [fontcolor=red] + fontcolor=red + label="new application (WRITE?) requests blocked\lwhile bitmap is exchanged" + + WFBitMapT [ label=WFBitMapT ] + WFSyncUUID [ label=WFSyncUUID ] + WFBitMapS [ label=WFBitMapS ] + } + + node [fontcolor=blue] + + cluster_resync [ shape=record,label="{<any>resynchronisation process running\l'concurrent' application requests allowed|{{<T>PausedSyncT\nSyncTarget}|{<S>PausedSyncS\nSyncSource}}}" ] + + node [shape=box,fontcolor=black] + + // drbdadm [label="drbdadm connect"] + // handshake [label="drbd_connect()\ndrbd_do_handshake\ndrbd_sync_handshake() etc."] + // comm_error [label="communication trouble"] + + // + // edges + // -------------------------------------- + + StandAlone -> Unconnected [ label="drbdadm connect" ] + Unconnected -> StandAlone [ label="drbdadm disconnect\lor serious communication trouble" ] + Unconnected -> WFConnection [ label="receiver thread is started" ] + WFConnection -> WFReportParams [ headlabel="accept()\land/or \lconnect()\l" ] + + WFReportParams -> StandAlone [ label="during handshake\lpeers do not agree\labout something essential" ] + WFReportParams -> Connected [ label="data identical\lno sync needed",color=green,fontcolor=green ] + + WFReportParams -> WFBitMapS + WFReportParams -> WFBitMapT + WFBitMapT -> WFSyncUUID [minlen=0.1,constraint=false] + + WFBitMapS -> cluster_resync:S + WFSyncUUID -> cluster_resync:T + + edge [color=green] + cluster_resync:any -> Connected [ label="resnyc done",fontcolor=green ] + + edge [color=red] + WFReportParams -> CommTrouble + Connected -> CommTrouble + cluster_resync:any -> CommTrouble + edge [color=black] + CommTrouble -> Unconnected [label="receiver thread is stopped" ] + +} diff --git a/Documentation/blockdev/drbd/node-states-8.dot b/Documentation/blockdev/drbd/node-states-8.dot new file mode 100644 index 00000000000..4a2b00c2354 --- /dev/null +++ b/Documentation/blockdev/drbd/node-states-8.dot @@ -0,0 +1,14 @@ +digraph node_states { + Secondary -> Primary [ label = "ioctl_set_state()" ] + Primary -> Secondary [ label = "ioctl_set_state()" ] +} + +digraph peer_states { + Secondary -> Primary [ label = "recv state packet" ] + Primary -> Secondary [ label = "recv state packet" ] + Primary -> Unknown [ label = "connection lost" ] + Secondary -> Unknown [ label = "connection lost" ] + Unknown -> Primary [ label = "connected" ] + Unknown -> Secondary [ label = "connected" ] +} + diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt new file mode 100644 index 00000000000..630879cd9a4 --- /dev/null +++ b/Documentation/cgroups/blkio-controller.txt @@ -0,0 +1,135 @@ + Block IO Controller + =================== +Overview +======== +cgroup subsys "blkio" implements the block io controller. There seems to be +a need of various kinds of IO control policies (like proportional BW, max BW) +both at leaf nodes as well as at intermediate nodes in a storage hierarchy. +Plan is to use the same cgroup based management interface for blkio controller +and based on user options switch IO policies in the background. + +In the first phase, this patchset implements proportional weight time based +division of disk policy. It is implemented in CFQ. Hence this policy takes +effect only on leaf nodes when CFQ is being used. + +HOWTO +===== +You can do a very simple testing of running two dd threads in two different +cgroups. Here is what you can do. + +- Enable group scheduling in CFQ + CONFIG_CFQ_GROUP_IOSCHED=y + +- Compile and boot into kernel and mount IO controller (blkio). + + mount -t cgroup -o blkio none /cgroup + +- Create two cgroups + mkdir -p /cgroup/test1/ /cgroup/test2 + +- Set weights of group test1 and test2 + echo 1000 > /cgroup/test1/blkio.weight + echo 500 > /cgroup/test2/blkio.weight + +- Create two same size files (say 512MB each) on same disk (file1, file2) and + launch two dd threads in different cgroup to read those files. + + sync + echo 3 > /proc/sys/vm/drop_caches + + dd if=/mnt/sdb/zerofile1 of=/dev/null & + echo $! > /cgroup/test1/tasks + cat /cgroup/test1/tasks + + dd if=/mnt/sdb/zerofile2 of=/dev/null & + echo $! > /cgroup/test2/tasks + cat /cgroup/test2/tasks + +- At macro level, first dd should finish first. To get more precise data, keep + on looking at (with the help of script), at blkio.disk_time and + blkio.disk_sectors files of both test1 and test2 groups. This will tell how + much disk time (in milli seconds), each group got and how many secotors each + group dispatched to the disk. We provide fairness in terms of disk time, so + ideally io.disk_time of cgroups should be in proportion to the weight. + +Various user visible config options +=================================== +CONFIG_CFQ_GROUP_IOSCHED + - Enables group scheduling in CFQ. Currently only 1 level of group + creation is allowed. + +CONFIG_DEBUG_CFQ_IOSCHED + - Enables some debugging messages in blktrace. Also creates extra + cgroup file blkio.dequeue. + +Config options selected automatically +===================================== +These config options are not user visible and are selected/deselected +automatically based on IO scheduler configuration. + +CONFIG_BLK_CGROUP + - Block IO controller. Selected by CONFIG_CFQ_GROUP_IOSCHED. + +CONFIG_DEBUG_BLK_CGROUP + - Debug help. Selected by CONFIG_DEBUG_CFQ_IOSCHED. + +Details of cgroup files +======================= +- blkio.weight + - Specifies per cgroup weight. + + Currently allowed range of weights is from 100 to 1000. + +- blkio.time + - disk time allocated to cgroup per device in milliseconds. First + two fields specify the major and minor number of the device and + third field specifies the disk time allocated to group in + milliseconds. + +- blkio.sectors + - number of sectors transferred to/from disk by the group. First + two fields specify the major and minor number of the device and + third field specifies the number of sectors transferred by the + group to/from the device. + +- blkio.dequeue + - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This + gives the statistics about how many a times a group was dequeued + from service tree of the device. First two fields specify the major + and minor number of the device and third field specifies the number + of times a group was dequeued from a particular device. + +CFQ sysfs tunable +================= +/sys/block/<disk>/queue/iosched/group_isolation + +If group_isolation=1, it provides stronger isolation between groups at the +expense of throughput. By default group_isolation is 0. In general that +means that if group_isolation=0, expect fairness for sequential workload +only. Set group_isolation=1 to see fairness for random IO workload also. + +Generally CFQ will put random seeky workload in sync-noidle category. CFQ +will disable idling on these queues and it does a collective idling on group +of such queues. Generally these are slow moving queues and if there is a +sync-noidle service tree in each group, that group gets exclusive access to +disk for certain period. That means it will bring the throughput down if +group does not have enough IO to drive deeper queue depths and utilize disk +capacity to the fullest in the slice allocated to it. But the flip side is +that even a random reader should get better latencies and overall throughput +if there are lots of sequential readers/sync-idle workload running in the +system. + +If group_isolation=0, then CFQ automatically moves all the random seeky queues +in the root group. That means there will be no service differentiation for +that kind of workload. This leads to better throughput as we do collective +idling on root sync-noidle tree. + +By default one should run with group_isolation=0. If that is not sufficient +and one wants stronger isolation between groups, then set group_isolation=1 +but this will come at cost of reduced throughput. + +What works +========== +- Currently only sync IO queues are support. All the buffered writes are + still system wide and not per group. Hence we will not see service + differentiation between buffered writes between groups. diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 455d4e6d346..0b33bfe7dde 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -227,7 +227,14 @@ as the path relative to the root of the cgroup file system. Each cgroup is represented by a directory in the cgroup file system containing the following files describing that cgroup: - - tasks: list of tasks (by pid) attached to that cgroup + - tasks: list of tasks (by pid) attached to that cgroup. This list + is not guaranteed to be sorted. Writing a thread id into this file + moves the thread into this cgroup. + - cgroup.procs: list of tgids in the cgroup. This list is not + guaranteed to be sorted or free of duplicate tgids, and userspace + should sort/uniquify the list if this property is required. + Writing a tgid into this file moves all threads with that tgid into + this cgroup. - notify_on_release flag: run the release agent on exit? - release_agent: the path to use for release notifications (this file exists in the top cgroup only) @@ -374,7 +381,7 @@ Now you want to do something with this cgroup. In this directory you can find several files: # ls -notify_on_release tasks +cgroup.procs notify_on_release tasks (plus whatever files added by the attached subsystems) Now attach your shell to this cgroup: diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c index 1711adc3337..b07add3467f 100644 --- a/Documentation/connector/cn_test.c +++ b/Documentation/connector/cn_test.c @@ -34,7 +34,7 @@ static char cn_test_name[] = "cn_test"; static struct sock *nls; static struct timer_list cn_test_timer; -static void cn_test_callback(struct cn_msg *msg) +static void cn_test_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) { pr_info("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n", __func__, jiffies, msg->id.idx, msg->id.val, diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt index 81e6bf6ead5..78c9466a9aa 100644 --- a/Documentation/connector/connector.txt +++ b/Documentation/connector/connector.txt @@ -23,7 +23,7 @@ handling, etc... The Connector driver allows any kernelspace agents to use netlink based networking for inter-process communication in a significantly easier way: -int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *)); +int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *)); void cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask); struct cb_id @@ -53,15 +53,15 @@ struct cn_msg Connector interfaces. /*****************************************/ -int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *)); +int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *)); Registers new callback with connector core. struct cb_id *id - unique connector's user identifier. It must be registered in connector.h for legal in-kernel users. char *name - connector's callback symbolic name. - void (*callback) (void *) - connector's callback. - Argument must be dereferenced to struct cn_msg *. + void (*callback) (struct cn..) - connector's callback. + cn_msg and the sender's credentials void cn_del_callback(struct cb_id *id); diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index b41f3e58aef..f1c5c4bccd3 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt @@ -1,15 +1,28 @@ -Export cpu topology info via sysfs. Items (attributes) are similar +Export CPU topology info via sysfs. Items (attributes) are similar to /proc/cpuinfo. 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id: -represent the physical package id of cpu X; + + physical package id of cpuX. Typically corresponds to a physical + socket number, but the actual value is architecture and platform + dependent. + 2) /sys/devices/system/cpu/cpuX/topology/core_id: -represent the cpu core id to cpu X; + + the CPU core ID of cpuX. Typically it is the hardware platform's + identifier (rather than the kernel's). The actual value is + architecture and platform dependent. + 3) /sys/devices/system/cpu/cpuX/topology/thread_siblings: -represent the thread siblings to cpu X in the same core; + + internel kernel map of cpuX's hardware threads within the same + core as cpuX + 4) /sys/devices/system/cpu/cpuX/topology/core_siblings: -represent the thread siblings to cpu X in the same physical package; + + internal kernel map of cpuX's hardware threads within the same + physical_package_id. To implement it in an architecture-neutral way, a new source file, drivers/base/topology.c, is to export the 4 attributes. @@ -32,32 +45,32 @@ not defined by include/asm-XXX/topology.h: 3) thread_siblings: just the given CPU 4) core_siblings: just the given CPU -Additionally, cpu topology information is provided under +Additionally, CPU topology information is provided under /sys/devices/system/cpu and includes these files. The internal source for the output is in brackets ("[]"). - kernel_max: the maximum cpu index allowed by the kernel configuration. + kernel_max: the maximum CPU index allowed by the kernel configuration. [NR_CPUS-1] - offline: cpus that are not online because they have been + offline: CPUs that are not online because they have been HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit - of cpus allowed by the kernel configuration (kernel_max + of CPUs allowed by the kernel configuration (kernel_max above). [~cpu_online_mask + cpus >= NR_CPUS] - online: cpus that are online and being scheduled [cpu_online_mask] + online: CPUs that are online and being scheduled [cpu_online_mask] - possible: cpus that have been allocated resources and can be + possible: CPUs that have been allocated resources and can be brought online if they are present. [cpu_possible_mask] - present: cpus that have been identified as being present in the + present: CPUs that have been identified as being present in the system. [cpu_present_mask] The format for the above output is compatible with cpulist_parse() [see <linux/cpumask.h>]. Some examples follow. -In this example, there are 64 cpus in the system but cpus 32-63 exceed +In this example, there are 64 CPUs in the system but cpus 32-63 exceed the kernel max which is limited to 0..31 by the NR_CPUS config option -being 32. Note also that cpus 2 and 4-31 are not online but could be +being 32. Note also that CPUs 2 and 4-31 are not online but could be brought online as they are both present and possible. kernel_max: 31 @@ -67,8 +80,8 @@ brought online as they are both present and possible. present: 0-31 In this example, the NR_CPUS config option is 128, but the kernel was -started with possible_cpus=144. There are 4 cpus in the system and cpu2 -was manually taken offline (and is the only cpu that can be brought +started with possible_cpus=144. There are 4 CPUs in the system and cpu2 +was manually taken offline (and is the only CPU that can be brought online.) kernel_max: 127 @@ -78,4 +91,4 @@ online.) present: 0-3 See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter -as well as more information on the various cpumask's. +as well as more information on the various cpumasks. diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt index 59a91e5c690..611f5a5499b 100644 --- a/Documentation/debugging-via-ohci1394.txt +++ b/Documentation/debugging-via-ohci1394.txt @@ -64,14 +64,14 @@ be used to view the printk buffer of a remote machine, even with live update. Bernhard Kaindl enhanced firescope to support accessing 64-bit machines from 32-bit firescope and vice versa: -- ftp://ftp.suse.de/private/bk/firewire/tools/firescope-0.2.2.tar.bz2 +- http://halobates.de/firewire/firescope-0.2.2.tar.bz2 and he implemented fast system dump (alpha version - read README.txt): -- ftp://ftp.suse.de/private/bk/firewire/tools/firedump-0.1.tar.bz2 +- http://halobates.de/firewire/firedump-0.1.tar.bz2 There is also a gdb proxy for firewire which allows to use gdb to access data which can be referenced from symbols found by gdb in vmlinux: -- ftp://ftp.suse.de/private/bk/firewire/tools/fireproxy-0.33.tar.bz2 +- http://halobates.de/firewire/fireproxy-0.33.tar.bz2 The latest version of this gdb proxy (fireproxy-0.34) can communicate (not yet stable) with kgdb over an memory-based communication module (kgdbom). @@ -178,7 +178,7 @@ Step-by-step instructions for using firescope with early OHCI initialization: Notes ----- -Documentation and specifications: ftp://ftp.suse.de/private/bk/firewire/docs +Documentation and specifications: http://halobates.de/firewire/ FireWire is a trademark of Apple Inc. - for more information please refer to: http://en.wikipedia.org/wiki/FireWire diff --git a/Documentation/dontdiff b/Documentation/dontdiff index e1efc400bed..e151b2a3626 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -65,6 +65,7 @@ aicdb.h* asm-offsets.h asm_offsets.h autoconf.h* +av_permissions.h bbootsect bin2c binkernel.spec @@ -95,12 +96,14 @@ docproc elf2ecoff elfconfig.h* fixdep +flask.h fore200e_mkfirm fore200e_pca_fw.c* gconf gen-devlist gen_crc32table gen_init_cpio +genheaders genksyms *_gray256.c ihex2fw diff --git a/Documentation/fb/framebuffer.txt b/Documentation/fb/framebuffer.txt index b3e3a035683..fe79e3c8847 100644 --- a/Documentation/fb/framebuffer.txt +++ b/Documentation/fb/framebuffer.txt @@ -312,10 +312,8 @@ and to the following documentation: 8. Mailing list --------------- -There are several frame buffer device related mailing lists at SourceForge: - - linux-fbdev-announce@lists.sourceforge.net, for announcements, - - linux-fbdev-user@lists.sourceforge.net, for generic user support, - - linux-fbdev-devel@lists.sourceforge.net, for project developers. +There is a frame buffer device related mailing list at kernel.org: +linux-fbdev@vger.kernel.org. Point your web browser to http://sourceforge.net/projects/linux-fbdev/ for subscription information and archive browsing. diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 89a47b5aff0..591e94448e6 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,6 +6,21 @@ be removed from this file. --------------------------- +What: USER_SCHED +When: 2.6.34 + +Why: USER_SCHED was implemented as a proof of concept for group scheduling. + The effect of USER_SCHED can already be achieved from userspace with + the help of libcgroup. The removal of USER_SCHED will also simplify + the scheduler code with the removal of one major ifdef. There are also + issues USER_SCHED has with USER_NS. A decision was taken not to fix + those and instead remove USER_SCHED. Also new group scheduling + features will not be implemented for USER_SCHED. + +Who: Dhaval Giani <dhaval@linux.vnet.ibm.com> + +--------------------------- + What: PRISM54 When: 2.6.34 @@ -302,18 +317,6 @@ Who: ocfs2-devel@oss.oracle.com --------------------------- -What: SCTP_GET_PEER_ADDRS_NUM_OLD, SCTP_GET_PEER_ADDRS_OLD, - SCTP_GET_LOCAL_ADDRS_NUM_OLD, SCTP_GET_LOCAL_ADDRS_OLD -When: June 2009 -Why: A newer version of the options have been introduced in 2005 that - removes the limitions of the old API. The sctp library has been - converted to use these new options at the same time. Any user - space app that directly uses the old options should convert to using - the new options. -Who: Vlad Yasevich <vladislav.yasevich@hp.com> - ---------------------------- - What: Ability for non root users to shm_get hugetlb pages based on mlock resource limits When: 2.6.31 @@ -404,20 +407,19 @@ Who: Alex Chiang <achiang@hp.com> --------------------------- -What: i2c-voodoo3 driver -When: October 2009 -Why: Superseded by tdfxfb. I2C/DDC support used to live in a separate - driver but this caused driver conflicts. -Who: Jean Delvare <khali@linux-fr.org> - Krzysztof Helt <krzysztof.h1@wp.pl> - ---------------------------- - What: CONFIG_RFKILL_INPUT When: 2.6.33 Why: Should be implemented in userspace, policy daemon. Who: Johannes Berg <johannes@sipsolutions.net> +--------------------------- + +What: CONFIG_INOTIFY +When: 2.6.33 +Why: last user (audit) will be converted to the newer more generic + and more easily maintained fsnotify subsystem +Who: Eric Paris <eparis@redhat.com> + ---------------------------- What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be @@ -451,3 +453,33 @@ Why: OSS sound_core grabs all legacy minors (0-255) of SOUND_MAJOR will also allow making ALSA OSS emulation independent of sound_core. The dependency will be broken then too. Who: Tejun Heo <tj@kernel.org> + +---------------------------- + +What: Support for VMware's guest paravirtuliazation technique [VMI] will be + dropped. +When: 2.6.37 or earlier. +Why: With the recent innovations in CPU hardware acceleration technologies + from Intel and AMD, VMware ran a few experiments to compare these + techniques to guest paravirtualization technique on VMware's platform. + These hardware assisted virtualization techniques have outperformed the + performance benefits provided by VMI in most of the workloads. VMware + expects that these hardware features will be ubiquitous in a couple of + years, as a result, VMware has started a phased retirement of this + feature from the hypervisor. We will be removing this feature from the + Kernel too. Right now we are targeting 2.6.37 but can retire earlier if + technical reasons (read opportunity to remove major chunk of pvops) + arise. + + Please note that VMI has always been an optimization and non-VMI kernels + still work fine on VMware's platform. + Latest versions of VMware's product which support VMI are, + Workstation 7.0 and VSphere 4.0 on ESX side, future maintainence + releases for these products will continue supporting VMI. + + For more details about VMI retirement take a look at this, + http://blogs.vmware.com/guestosguide/2009/09/vmi-retirement.html + +Who: Alok N Kataria <akataria@vmware.com> + +---------------------------- diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 9e94b9491d8..a91e2e2095b 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -235,6 +235,7 @@ proc files. neg=N Number of negative lookups made pos=N Number of positive lookups made crt=N Number of objects created by lookup + tmo=N Number of lookups timed out and requeued Updates n=N Number of update cookie requests seen nul=N Number of upd reqs given a NULL parent run=N Number of upd reqs granted CPU time @@ -250,8 +251,10 @@ proc files. ok=N Number of successful alloc reqs wt=N Number of alloc reqs that waited on lookup completion nbf=N Number of alloc reqs rejected -ENOBUFS + int=N Number of alloc reqs aborted -ERESTARTSYS ops=N Number of alloc reqs submitted owt=N Number of alloc reqs waited for CPU time + abt=N Number of alloc reqs aborted due to object death Retrvls n=N Number of retrieval (read) requests seen ok=N Number of successful retr reqs wt=N Number of retr reqs that waited on lookup completion @@ -261,6 +264,7 @@ proc files. oom=N Number of retr reqs failed -ENOMEM ops=N Number of retr reqs submitted owt=N Number of retr reqs waited for CPU time + abt=N Number of retr reqs aborted due to object death Stores n=N Number of storage (write) requests seen ok=N Number of successful store reqs agn=N Number of store reqs on a page already pending storage @@ -268,12 +272,37 @@ proc files. oom=N Number of store reqs failed -ENOMEM ops=N Number of store reqs submitted run=N Number of store reqs granted CPU time + pgs=N Number of pages given store req processing time + rxd=N Number of store reqs deleted from tracking tree + olm=N Number of store reqs over store limit + VmScan nos=N Number of release reqs against pages with no pending store + gon=N Number of release reqs against pages stored by time lock granted + bsy=N Number of release reqs ignored due to in-progress store + can=N Number of page stores cancelled due to release req Ops pend=N Number of times async ops added to pending queues run=N Number of times async ops given CPU time enq=N Number of times async ops queued for processing + can=N Number of async ops cancelled + rej=N Number of async ops rejected due to object lookup/create failure dfr=N Number of async ops queued for deferred release rel=N Number of async ops released gc=N Number of deferred-release async ops garbage collected + CacheOp alo=N Number of in-progress alloc_object() cache ops + luo=N Number of in-progress lookup_object() cache ops + luc=N Number of in-progress lookup_complete() cache ops + gro=N Number of in-progress grab_object() cache ops + upo=N Number of in-progress update_object() cache ops + dro=N Number of in-progress drop_object() cache ops + pto=N Number of in-progress put_object() cache ops + syn=N Number of in-progress sync_cache() cache ops + atc=N Number of in-progress attr_changed() cache ops + rap=N Number of in-progress read_or_alloc_page() cache ops + ras=N Number of in-progress read_or_alloc_pages() cache ops + alp=N Number of in-progress allocate_page() cache ops + als=N Number of in-progress allocate_pages() cache ops + wrp=N Number of in-progress write_page() cache ops + ucp=N Number of in-progress uncache_page() cache ops + dsp=N Number of in-progress dissociate_pages() cache ops (*) /proc/fs/fscache/histogram @@ -299,6 +328,87 @@ proc files. jiffy range covered, and the SECS field the equivalent number of seconds. +=========== +OBJECT LIST +=========== + +If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a +list of all the objects currently allocated and allow them to be viewed +through: + + /proc/fs/fscache/objects + +This will look something like: + + [root@andromeda ~]# head /proc/fs/fscache/objects + OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA + ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================ + 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a + 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a + +where the first set of columns before the '|' describe the object: + + COLUMN DESCRIPTION + ======= =============================================================== + OBJECT Object debugging ID (appears as OBJ%x in some debug messages) + PARENT Debugging ID of parent object + STAT Object state + CHLDN Number of child objects of this object + OPS Number of outstanding operations on this object + OOP Number of outstanding child object management operations + IPR + EX Number of outstanding exclusive operations + READS Number of outstanding read operations + EM Object's event mask + EV Events raised on this object + F Object flags + S Object slow-work work item flags + +and the second set of columns describe the object's cookie, if present: + + COLUMN DESCRIPTION + =============== ======================================================= + NETFS_COOKIE_DEF Name of netfs cookie definition + TY Cookie type (IX - index, DT - data, hex - special) + FL Cookie flags + NETFS_DATA Netfs private data stored in the cookie + OBJECT_KEY Object key } 1 column, with separating comma + AUX_DATA Object aux data } presence may be configured + +The data shown may be filtered by attaching the a key to an appropriate keyring +before viewing the file. Something like: + + keyctl add user fscache:objlist <restrictions> @s + +where <restrictions> are a selection of the following letters: + + K Show hexdump of object key (don't show if not given) + A Show hexdump of object aux data (don't show if not given) + +and the following paired letters: + + C Show objects that have a cookie + c Show objects that don't have a cookie + B Show objects that are busy + b Show objects that aren't busy + W Show objects that have pending writes + w Show objects that don't have pending writes + R Show objects that have outstanding reads + r Show objects that don't have outstanding reads + S Show objects that have slow work queued + s Show objects that don't have slow work queued + +If neither side of a letter pair is given, then both are implied. For example: + + keyctl add user fscache:objlist KB @s + +shows objects that are busy, and lists their object keys, but does not dump +their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is +not implied. + +By default all objects and all fields will be shown. + + ========= DEBUGGING ========= diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index 2666b1ed5e9..1902c57b72e 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt @@ -641,7 +641,7 @@ data file must be retired (see the relinquish cookie function below). Furthermore, note that this does not cancel the asynchronous read or write operation started by the read/alloc and write functions, so the page -invalidation and release functions must use: +invalidation functions must use: bool fscache_check_page_write(struct fscache_cookie *cookie, struct page *page); @@ -654,6 +654,25 @@ to see if a page is being written to the cache, and: to wait for it to finish if it is. +When releasepage() is being implemented, a special FS-Cache function exists to +manage the heuristics of coping with vmscan trying to eject pages, which may +conflict with the cache trying to write pages to the cache (which may itself +need to allocate memory): + + bool fscache_maybe_release_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp); + +This takes the netfs cookie, and the page and gfp arguments as supplied to +releasepage(). It will return false if the page cannot be released yet for +some reason and if it returns true, the page has been uncached and can now be +released. + +To make a page available for release, this function may wait for an outstanding +storage request to complete, or it may attempt to cancel the storage request - +in which case the page will not be stored in the cache this time. + + ========================== INDEX AND DATA FILE UPDATE ========================== diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 570f9bd9be2..05d5cf1d743 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt @@ -123,10 +123,18 @@ resuid=n The user ID which may use the reserved blocks. sb=n Use alternate superblock at this location. -quota -noquota -grpquota -usrquota +quota These options are ignored by the filesystem. They +noquota are used only by quota tools to recognize volumes +grpquota where quota should be turned on. See documentation +usrquota in the quota-tools package for more details + (http://sourceforge.net/projects/linuxquota). + +jqfmt=<quota type> These options tell filesystem details about quota +usrjquota=<file> so that quota information can be properly updated +grpjquota=<file> during journal replay. They replace the above + quota options. See documentation in the quota-tools + package for more details + (http://sourceforge.net/projects/linuxquota). bh (*) ext3 associates buffer heads to data pages to nobh (a) cache disk block mapping information diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index bf4f4b7e11b..6d94e0696f8 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -134,9 +134,15 @@ ro Mount filesystem read only. Note that ext4 will mount options "ro,noload" can be used to prevent writes to the filesystem. +journal_checksum Enable checksumming of the journal transactions. + This will allow the recovery code in e2fsck and the + kernel to detect corruption in the kernel. It is a + compatible change and will be ignored by older kernels. + journal_async_commit Commit block can be written to disk without waiting for descriptor blocks. If enabled older kernels cannot - mount the device. + mount the device. This will enable 'journal_checksum' + internally. journal=update Update the ext4 file system's journal to the current format. diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index c2a0871280a..c58b9f5ba00 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -20,15 +20,16 @@ Lots of code taken from ext3 and other projects. Authors in alphabetical order: Joel Becker <joel.becker@oracle.com> Zach Brown <zach.brown@oracle.com> -Mark Fasheh <mark.fasheh@oracle.com> +Mark Fasheh <mfasheh@suse.com> Kurt Hackel <kurt.hackel@oracle.com> +Tao Ma <tao.ma@oracle.com> Sunil Mushran <sunil.mushran@oracle.com> Manish Singh <manish.singh@oracle.com> +Tiger Yang <tiger.yang@oracle.com> Caveats ======= Features which OCFS2 does not support yet: - - quotas - Directory change notification (F_NOTIFY) - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) @@ -70,7 +71,6 @@ commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata performance. localalloc=8(*) Allows custom localalloc size in MB. If the value is too large, the fs will silently revert it to the default. - Localalloc is not enabled for local mounts. localflocks This disables cluster aware flock. inode64 Indicates that Ocfs2 is allowed to create inodes at any location in the filesystem, including those which diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 2c48f945546..4af0018533f 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1072,7 +1072,8 @@ second). The meanings of the columns are as follows, from left to right: - irq: servicing interrupts - softirq: servicing softirqs - steal: involuntary wait -- guest: running a guest +- guest: running a normal guest +- guest_nice: running a niced guest The "intr" line gives counts of interrupts serviced since boot time, for each of the possible system interrupts. The first column is the total of all diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt index 84eb26808de..cb8a3a00cc9 100644 --- a/Documentation/flexible-arrays.txt +++ b/Documentation/flexible-arrays.txt @@ -1,5 +1,5 @@ Using flexible arrays in the kernel -Last updated for 2.6.31 +Last updated for 2.6.32 Jonathan Corbet <corbet@lwn.net> Large contiguous memory allocations can be unreliable in the Linux kernel. @@ -40,6 +40,13 @@ argument is passed directly to the internal memory allocation calls. With the current code, using flags to ask for high memory is likely to lead to notably unpleasant side effects. +It is also possible to define flexible arrays at compile time with: + + DEFINE_FLEX_ARRAY(name, element_size, total); + +This macro will result in a definition of an array with the given name; the +element size and total will be checked for validity at compile time. + Storing data into a flexible array is accomplished with a call to: int flex_array_put(struct flex_array *array, unsigned int element_nr, @@ -76,16 +83,30 @@ particular element has never been allocated. Note that it is possible to get back a valid pointer for an element which has never been stored in the array. Memory for array elements is allocated one page at a time; a single allocation could provide memory for several -adjacent elements. The flexible array code does not know if a specific -element has been written; it only knows if the associated memory is -present. So a flex_array_get() call on an element which was never stored -in the array has the potential to return a pointer to random data. If the -caller does not have a separate way to know which elements were actually -stored, it might be wise, at least, to add GFP_ZERO to the flags argument -to ensure that all elements are zeroed. - -There is no way to remove a single element from the array. It is possible, -though, to remove all elements with a call to: +adjacent elements. Flexible array elements are normally initialized to the +value FLEX_ARRAY_FREE (defined as 0x6c in <linux/poison.h>), so errors +involving that number probably result from use of unstored array entries. +Note that, if array elements are allocated with __GFP_ZERO, they will be +initialized to zero and this poisoning will not happen. + +Individual elements in the array can be cleared with: + + int flex_array_clear(struct flex_array *array, unsigned int element_nr); + +This function will set the given element to FLEX_ARRAY_FREE and return +zero. If storage for the indicated element is not allocated for the array, +flex_array_clear() will return -EINVAL instead. Note that clearing an +element does not release the storage associated with it; to reduce the +allocated size of an array, call: + + int flex_array_shrink(struct flex_array *array); + +The return value will be the number of pages of memory actually freed. +This function works by scanning the array for pages containing nothing but +FLEX_ARRAY_FREE bytes, so (1) it can be expensive, and (2) it will not work +if the array's pages are allocated with __GFP_ZERO. + +It is possible to remove all elements of an array with a call to: void flex_array_free_parts(struct flex_array *array); diff --git a/Documentation/hwmon/ltc4215 b/Documentation/hwmon/ltc4215 index 2e6a21eb656..c196a184625 100644 --- a/Documentation/hwmon/ltc4215 +++ b/Documentation/hwmon/ltc4215 @@ -22,12 +22,13 @@ Usage Notes ----------- This driver does not probe for LTC4215 devices, due to the fact that some -of the possible addresses are unfriendly to probing. You will need to use -the "force" parameter to tell the driver where to find the device. +of the possible addresses are unfriendly to probing. You will have to +instantiate the devices explicitly. Example: the following will load the driver for an LTC4215 at address 0x44 on I2C bus #0: -$ modprobe ltc4215 force=0,0x44 +$ modprobe ltc4215 +$ echo ltc4215 0x44 > /sys/bus/i2c/devices/i2c-0/new_device Sysfs entries diff --git a/Documentation/hwmon/ltc4245 b/Documentation/hwmon/ltc4245 index bae7a3adc5d..02838a47d86 100644 --- a/Documentation/hwmon/ltc4245 +++ b/Documentation/hwmon/ltc4245 @@ -23,12 +23,13 @@ Usage Notes ----------- This driver does not probe for LTC4245 devices, due to the fact that some -of the possible addresses are unfriendly to probing. You will need to use -the "force" parameter to tell the driver where to find the device. +of the possible addresses are unfriendly to probing. You will have to +instantiate the devices explicitly. Example: the following will load the driver for an LTC4245 at address 0x23 on I2C bus #1: -$ modprobe ltc4245 force=1,0x23 +$ modprobe ltc4245 +$ echo ltc4245 0x23 > /sys/bus/i2c/devices/i2c-1/new_device Sysfs entries diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface index dcbd502c879..82def883361 100644 --- a/Documentation/hwmon/sysfs-interface +++ b/Documentation/hwmon/sysfs-interface @@ -353,10 +353,20 @@ power[1-*]_average Average power use Unit: microWatt RO -power[1-*]_average_interval Power use averaging interval +power[1-*]_average_interval Power use averaging interval. A poll + notification is sent to this file if the + hardware changes the averaging interval. Unit: milliseconds RW +power[1-*]_average_interval_max Maximum power use averaging interval + Unit: milliseconds + RO + +power[1-*]_average_interval_min Minimum power use averaging interval + Unit: milliseconds + RO + power[1-*]_average_highest Historical average maximum power use Unit: microWatt RO @@ -365,6 +375,18 @@ power[1-*]_average_lowest Historical average minimum power use Unit: microWatt RO +power[1-*]_average_max A poll notification is sent to + power[1-*]_average when power use + rises above this value. + Unit: microWatt + RW + +power[1-*]_average_min A poll notification is sent to + power[1-*]_average when power use + sinks below this value. + Unit: microWatt + RW + power[1-*]_input Instantaneous power use Unit: microWatt RO @@ -381,6 +403,39 @@ power[1-*]_reset_history Reset input_highest, input_lowest, average_highest and average_lowest. WO +power[1-*]_accuracy Accuracy of the power meter. + Unit: Percent + RO + +power[1-*]_alarm 1 if the system is drawing more power than the + cap allows; 0 otherwise. A poll notification is + sent to this file when the power use exceeds the + cap. This file only appears if the cap is known + to be enforced by hardware. + RO + +power[1-*]_cap If power use rises above this limit, the + system should take action to reduce power use. + A poll notification is sent to this file if the + cap is changed by the hardware. The *_cap + files only appear if the cap is known to be + enforced by hardware. + Unit: microWatt + RW + +power[1-*]_cap_hyst Margin of hysteresis built around capping and + notification. + Unit: microWatt + RW + +power[1-*]_cap_max Maximum cap that can be set. + Unit: microWatt + RO + +power[1-*]_cap_min Minimum cap that can be set. + Unit: microWatt + RO + ********** * Energy * ********** diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4 index c5b37c57055..ac540c71c7e 100644 --- a/Documentation/i2c/busses/i2c-piix4 +++ b/Documentation/i2c/busses/i2c-piix4 @@ -8,7 +8,7 @@ Supported adapters: Datasheet: Only available via NDA from ServerWorks * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges Datasheet: Not publicly available - * AMD SB900 + * AMD Hudson-2 Datasheet: Not publicly available * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge Datasheet: Publicly available at the SMSC website http://www.smsc.com diff --git a/Documentation/i2c/busses/i2c-voodoo3 b/Documentation/i2c/busses/i2c-voodoo3 deleted file mode 100644 index 62d90a454d3..00000000000 --- a/Documentation/i2c/busses/i2c-voodoo3 +++ /dev/null @@ -1,62 +0,0 @@ -Kernel driver i2c-voodoo3 - -Supported adapters: - * 3dfx Voodoo3 based cards - * Voodoo Banshee based cards - -Authors: - Frodo Looijaard <frodol@dds.nl>, - Philip Edelbrock <phil@netroedge.com>, - Ralph Metzler <rjkm@thp.uni-koeln.de>, - Mark D. Studebaker <mdsxyz123@yahoo.com> - -Main contact: Philip Edelbrock <phil@netroedge.com> - -The code is based upon Ralph's test code (he did the hard stuff ;') - -Description ------------ - -The 3dfx Voodoo3 chip contains two I2C interfaces (aka a I2C 'master' or -'host'). - -The first interface is used for DDC (Data Display Channel) which is a -serial channel through the VGA monitor connector to a DDC-compliant -monitor. This interface is defined by the Video Electronics Standards -Association (VESA). The standards are available for purchase at -http://www.vesa.org . - -The second interface is a general-purpose I2C bus. The intent by 3dfx was -to allow manufacturers to add extra chips to the video card such as a -TV-out chip such as the BT869 or possibly even I2C based temperature -sensors like the ADM1021 or LM75. - -Stability ---------- - -Seems to be stable on the test machine, but needs more testing on other -machines. Simultaneous accesses of the DDC and I2C busses may cause errors. - -Supported Devices ------------------ - -Specifically, this driver was written and tested on the '3dfx Voodoo3 AGP -3000' which has a tv-out feature (s-video or composite). According to the -docs and discussions, this code should work for any Voodoo3 based cards as -well as Voodoo Banshee based cards. The DDC interface has been tested on a -Voodoo Banshee card. - -Issues ------- - -Probably many, but it seems to work OK on my system. :') - - -External Device Connection --------------------------- - -The digital video input jumpers give availability to the I2C bus. -Specifically, pins 13 and 25 (bottom row middle, and bottom right-end) are -the I2C clock and I2C data lines, respectively. +5V and GND are probably -also easily available making the addition of extra I2C/SMBus devices easy -to implement. diff --git a/Documentation/i2c/i2c-stub b/Documentation/i2c/i2c-stub index 0d8be1c20c1..fa4b669c166 100644 --- a/Documentation/i2c/i2c-stub +++ b/Documentation/i2c/i2c-stub @@ -2,9 +2,9 @@ MODULE: i2c-stub DESCRIPTION: -This module is a very simple fake I2C/SMBus driver. It implements four -types of SMBus commands: write quick, (r/w) byte, (r/w) byte data, and -(r/w) word data. +This module is a very simple fake I2C/SMBus driver. It implements five +types of SMBus commands: write quick, (r/w) byte, (r/w) byte data, (r/w) +word data, and (r/w) I2C block data. You need to provide chip addresses as a module parameter when loading this driver, which will then only react to SMBus commands to these addresses. @@ -21,8 +21,8 @@ EEPROMs, among others. The typical use-case is like this: 1. load this module - 2. use i2cset (from lm_sensors project) to pre-load some data - 3. load the target sensors chip driver module + 2. use i2cset (from the i2c-tools project) to pre-load some data + 3. load the target chip driver module 4. observe its behavior in the kernel log There's a script named i2c-stub-from-dump in the i2c-tools package which @@ -33,6 +33,12 @@ PARAMETERS: int chip_addr[10]: The SMBus addresses to emulate chips at. +unsigned long functionality: + Functionality override, to disable some commands. See I2C_FUNC_* + constants in <linux/i2c.h> for the suitable values. For example, + value 0x1f0000 would only enable the quick, byte and byte data + commands. + CAVEATS: If your target driver polls some byte or word waiting for it to change, the diff --git a/Documentation/i2c/instantiating-devices b/Documentation/i2c/instantiating-devices index c740b7b4108..e89490270ab 100644 --- a/Documentation/i2c/instantiating-devices +++ b/Documentation/i2c/instantiating-devices @@ -188,7 +188,7 @@ segment, the address is sufficient to uniquely identify the device to be deleted. Example: -# echo eeprom 0x50 > /sys/class/i2c-adapter/i2c-3/new_device +# echo eeprom 0x50 > /sys/bus/i2c/devices/i2c-3/new_device While this interface should only be used when in-kernel device declaration can't be done, there is a variety of cases where it can be helpful: diff --git a/Documentation/i2c/old-module-parameters b/Documentation/i2c/old-module-parameters new file mode 100644 index 00000000000..8e2b629d533 --- /dev/null +++ b/Documentation/i2c/old-module-parameters @@ -0,0 +1,44 @@ +I2C device driver binding control from user-space +================================================= + +Up to kernel 2.6.32, many i2c drivers used helper macros provided by +<linux/i2c.h> which created standard module parameters to let the user +control how the driver would probe i2c buses and attach to devices. These +parameters were known as "probe" (to let the driver probe for an extra +address), "force" (to forcibly attach the driver to a given device) and +"ignore" (to prevent a driver from probing a given address). + +With the conversion of the i2c subsystem to the standard device driver +binding model, it became clear that these per-module parameters were no +longer needed, and that a centralized implementation was possible. The new, +sysfs-based interface is described in the documentation file +"instantiating-devices", section "Method 4: Instantiate from user-space". + +Below is a mapping from the old module parameters to the new interface. + +Attaching a driver to an I2C device +----------------------------------- + +Old method (module parameters): +# modprobe <driver> probe=1,0x2d +# modprobe <driver> force=1,0x2d +# modprobe <driver> force_<device>=1,0x2d + +New method (sysfs interface): +# echo <device> 0x2d > /sys/bus/i2c/devices/i2c-1/new_device + +Preventing a driver from attaching to an I2C device +--------------------------------------------------- + +Old method (module parameters): +# modprobe <driver> ignore=1,0x2f + +New method (sysfs interface): +# echo dummy 0x2f > /sys/bus/i2c/devices/i2c-1/new_device +# modprobe <driver> + +Of course, it is important to instantiate the "dummy" device before loading +the driver. The dummy device will be handled by i2c-core itself, preventing +other drivers from binding to it later on. If there is a real device at the +problematic address, and you want another driver to bind to it, then simply +pass the name of the device in question instead of "dummy". diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt index 744687dd195..8a366959f5c 100644 --- a/Documentation/infiniband/user_mad.txt +++ b/Documentation/infiniband/user_mad.txt @@ -128,8 +128,8 @@ Setting IsSM Capability Bit To create the appropriate character device files automatically with udev, a rule like - KERNEL="umad*", NAME="infiniband/%k" - KERNEL="issm*", NAME="infiniband/%k" + KERNEL=="umad*", NAME="infiniband/%k" + KERNEL=="issm*", NAME="infiniband/%k" can be used. This will create device nodes named diff --git a/Documentation/infiniband/user_verbs.txt b/Documentation/infiniband/user_verbs.txt index f847501e50b..afe3f8da901 100644 --- a/Documentation/infiniband/user_verbs.txt +++ b/Documentation/infiniband/user_verbs.txt @@ -58,7 +58,7 @@ Memory pinning To create the appropriate character device files automatically with udev, a rule like - KERNEL="uverbs*", NAME="infiniband/%k" + KERNEL=="uverbs*", NAME="infiniband/%k" can be used. This will create device nodes named diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI index 686e107923e..5fe8de5cc72 100644 --- a/Documentation/isdn/INTERFACE.CAPI +++ b/Documentation/isdn/INTERFACE.CAPI @@ -60,10 +60,9 @@ open() operation on regular files or character devices. After a successful return from register_appl(), CAPI messages from the application may be passed to the driver for the device via calls to the -send_message() callback function. The CAPI message to send is stored in the -data portion of an skb. Conversely, the driver may call Kernel CAPI's -capi_ctr_handle_message() function to pass a received CAPI message to Kernel -CAPI for forwarding to an application, specifying its ApplID. +send_message() callback function. Conversely, the driver may call Kernel +CAPI's capi_ctr_handle_message() function to pass a received CAPI message to +Kernel CAPI for forwarding to an application, specifying its ApplID. Deregistration requests (CAPI operation CAPI_RELEASE) from applications are forwarded as calls to the release_appl() callback function, passing the same @@ -142,6 +141,7 @@ u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb) to accepting or queueing the message. Errors occurring during the actual processing of the message should be signaled with an appropriate reply message. + May be called in process or interrupt context. Calls to this function are not serialized by Kernel CAPI, ie. it must be prepared to be re-entered. @@ -154,7 +154,8 @@ read_proc_t *ctr_read_proc system entry, /proc/capi/controllers/<n>; will be called with a pointer to the device's capi_ctr structure as the last (data) argument -Note: Callback functions are never called in interrupt context. +Note: Callback functions except send_message() are never called in interrupt +context. - to be filled in before calling capi_ctr_ready(): @@ -171,14 +172,40 @@ u8 serial[CAPI_SERIAL_LEN] value to return for CAPI_GET_SERIAL -4.3 The _cmsg Structure +4.3 SKBs + +CAPI messages are passed between Kernel CAPI and the driver via send_message() +and capi_ctr_handle_message(), stored in the data portion of a socket buffer +(skb). Each skb contains a single CAPI message coded according to the CAPI 2.0 +standard. + +For the data transfer messages, DATA_B3_REQ and DATA_B3_IND, the actual +payload data immediately follows the CAPI message itself within the same skb. +The Data and Data64 parameters are not used for processing. The Data64 +parameter may be omitted by setting the length field of the CAPI message to 22 +instead of 30. + + +4.4 The _cmsg Structure (declared in <linux/isdn/capiutil.h>) The _cmsg structure stores the contents of a CAPI 2.0 message in an easily -accessible form. It contains members for all possible CAPI 2.0 parameters, of -which only those appearing in the message type currently being processed are -actually used. Unused members should be set to zero. +accessible form. It contains members for all possible CAPI 2.0 parameters, +including subparameters of the Additional Info and B Protocol structured +parameters, with the following exceptions: + +* second Calling party number (CONNECT_IND) + +* Data64 (DATA_B3_REQ and DATA_B3_IND) + +* Sending complete (subparameter of Additional Info, CONNECT_REQ and INFO_REQ) + +* Global Configuration (subparameter of B Protocol, CONNECT_REQ, CONNECT_RESP + and SELECT_B_PROTOCOL_REQ) + +Only those parameters appearing in the message type currently being processed +are actually used. Unused members should be set to zero. Members are named after the CAPI 2.0 standard names of the parameters they represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data @@ -190,18 +217,19 @@ u16 for CAPI parameters of type 'word' u32 for CAPI parameters of type 'dword' -_cstruct for CAPI parameters of type 'struct' not containing any - variably-sized (struct) subparameters (eg. 'Called Party Number') +_cstruct for CAPI parameters of type 'struct' The member is a pointer to a buffer containing the parameter in CAPI encoding (length + content). It may also be NULL, which will be taken to represent an empty (zero length) parameter. + Subparameters are stored in encoded form within the content part. -_cmstruct for CAPI parameters of type 'struct' containing 'struct' - subparameters ('Additional Info' and 'B Protocol') +_cmstruct alternative representation for CAPI parameters of type 'struct' + (used only for the 'Additional Info' and 'B Protocol' parameters) The representation is a single byte containing one of the values: - CAPI_DEFAULT: the parameter is empty - CAPI_COMPOSE: the values of the subparameters are stored - individually in the corresponding _cmsg structure members + CAPI_DEFAULT: The parameter is empty/absent. + CAPI_COMPOSE: The parameter is present. + Subparameter values are stored individually in the corresponding + _cmsg structure members. Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert messages between their transport encoding described in the CAPI 2.0 standard @@ -297,3 +325,26 @@ char *capi_cmd2str(u8 Command, u8 Subcommand) be NULL if the command/subcommand is not one of those defined in the CAPI 2.0 standard. + +7. Debugging + +The module kernelcapi has a module parameter showcapimsgs controlling some +debugging output produced by the module. It can only be set when the module is +loaded, via a parameter "showcapimsgs=<n>" to the modprobe command, either on +the command line or in the configuration file. + +If the lowest bit of showcapimsgs is set, kernelcapi logs controller and +application up and down events. + +In addition, every registered CAPI controller has an associated traceflag +parameter controlling how CAPI messages sent from and to tha controller are +logged. The traceflag parameter is initialized with the value of the +showcapimsgs parameter when the controller is registered, but can later be +changed via the MANUFACTURER_REQ command KCAPI_CMD_TRACE. + +If the value of traceflag is non-zero, CAPI messages are logged. +DATA_B3 messages are only logged if the value of traceflag is > 2. + +If the lowest bit of traceflag is set, only the command/subcommand and message +length are logged. Otherwise, kernelcapi logs a readable representation of +the entire message. diff --git a/Documentation/isdn/README.gigaset b/Documentation/isdn/README.gigaset index f9963103ae3..0fc9831d7ec 100644 --- a/Documentation/isdn/README.gigaset +++ b/Documentation/isdn/README.gigaset @@ -5,7 +5,7 @@ GigaSet 307x Device Driver ------------ 1.1. Hardware -------- - This release supports the connection of the Gigaset 307x/417x family of + This driver supports the connection of the Gigaset 307x/417x family of ISDN DECT bases via Gigaset M101 Data, Gigaset M105 Data or direct USB connection. The following devices are reported to be compatible: @@ -33,7 +33,7 @@ GigaSet 307x Device Driver http://gigaset307x.sourceforge.net/ We had also reports from users of Gigaset M105 who could use the drivers - with SX 100 and CX 100 ISDN bases (only in unimodem mode, see section 2.4.) + with SX 100 and CX 100 ISDN bases (only in unimodem mode, see section 2.5.) If you have another device that works with our driver, please let us know. Chances of getting an USB device to work are good if the output of @@ -49,7 +49,7 @@ GigaSet 307x Device Driver -------- The driver works with ISDN4linux and so can be used with any software which is able to use ISDN4linux for ISDN connections (voice or data). - CAPI4Linux support is planned but not yet available. + Experimental Kernel CAPI support is available as a compilation option. There are some user space tools available at http://sourceforge.net/projects/gigaset307x/ @@ -102,20 +102,28 @@ GigaSet 307x Device Driver 2.3. ISDN4linux ---------- This is the "normal" mode of operation. After loading the module you can - set up the ISDN system just as you'd do with any ISDN card. - Your distribution should provide some configuration utility. - If not, you can use some HOWTOs like + set up the ISDN system just as you'd do with any ISDN card supported by + the ISDN4Linux subsystem. Most distributions provide some configuration + utility. If not, you can use some HOWTOs like http://www.linuxhaven.de/dlhp/HOWTO/DE-ISDN-HOWTO-5.html - If this doesn't work, because you have some recent device like SX100 where + If this doesn't work, because you have some device like SX100 where debug output (see section 3.2.) shows something like this when dialing CMD Received: ERROR Available Params: 0 Connection State: 0, Response: -1 gigaset_process_response: resp_code -1 in ConState 0 ! Timeout occurred - you might need to use unimodem mode: + you might need to use unimodem mode. (see section 2.5.) -2.4. Unimodem mode +2.4. CAPI + ---- + If the driver is compiled with CAPI support (kernel configuration option + GIGASET_CAPI, experimental) it can also be used with CAPI 2.0 kernel and + user space applications. ISDN4Linux is supported in this configuration + via the capidrv compatibility driver. The kernel module capidrv.ko must + be loaded explicitly ("modprobe capidrv") if needed. + +2.5. Unimodem mode ------------- This is needed for some devices [e.g. SX100] as they have problems with the "normal" commands. @@ -160,7 +168,7 @@ GigaSet 307x Device Driver configuration file like /etc/modprobe.conf.local, using that should be preferred. -2.5. Call-ID (CID) mode +2.6. Call-ID (CID) mode ------------------ Call-IDs are numbers used to tag commands to, and responses from, the Gigaset base in order to support the simultaneous handling of multiple @@ -188,7 +196,7 @@ GigaSet 307x Device Driver You can also use /sys/class/tty/ttyGxy/cidmode for changing the CID mode setting (ttyGxy is ttyGU0 or ttyGB0). -2.6. Unregistered Wireless Devices (M101/M105) +2.7. Unregistered Wireless Devices (M101/M105) ----------------------------------------- The main purpose of the ser_gigaset and usb_gigaset drivers is to allow the M101 and M105 wireless devices to be used as ISDN devices for ISDN @@ -228,7 +236,7 @@ GigaSet 307x Device Driver You have two or more DECT data adapters (M101/M105) and only the first one you turn on works. Solution: - Select Unimodem mode for all DECT data adapters. (see section 2.4.) + Select Unimodem mode for all DECT data adapters. (see section 2.5.) Problem: Messages like this: @@ -236,7 +244,7 @@ GigaSet 307x Device Driver appear in your syslog. Solution: Check whether your M10x wireless device is correctly registered to the - Gigaset base. (see section 2.6.) + Gigaset base. (see section 2.7.) 3.2. Telling the driver to provide more information ---------------------------------------------- diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6fa7292947e..495a39a7734 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -85,7 +85,6 @@ parameter is applicable: PPT Parallel port support is enabled. PS2 Appropriate PS/2 support is enabled. RAM RAM disk support is enabled. - ROOTPLUG The example Root Plug LSM is enabled. S390 S390 architecture is enabled. SCSI Appropriate SCSI support is enabled. A lot of drivers has their options described inside of @@ -345,6 +344,15 @@ and is between 256 and 4096 characters. It is defined in the file Change the amount of debugging information output when initialising the APIC and IO-APIC components. + show_lapic= [APIC,X86] Advanced Programmable Interrupt Controller + Limit apic dumping. The parameter defines the maximal + number of local apics being dumped. Also it is possible + to set it to "all" by meaning -- no limit here. + Format: { 1 (default) | 2 | ... | all }. + The parameter valid if only apic=debug or + apic=verbose is specified. + Example: apic=debug show_lapic=all + apm= [APM] Advanced Power Management See header of arch/x86/kernel/apm_32.c. @@ -671,6 +679,7 @@ and is between 256 and 4096 characters. It is defined in the file earlyprintk= [X86,SH,BLACKFIN] earlyprintk=vga earlyprintk=serial[,ttySn[,baudrate]] + earlyprintk=ttySn[,baudrate] earlyprintk=dbgp[debugController#] Append ",keep" to not disable it when the real console @@ -778,6 +787,13 @@ and is between 256 and 4096 characters. It is defined in the file by the set_ftrace_notrace file in the debugfs tracing directory. + ftrace_graph_filter=[function-list] + [FTRACE] Limit the top level callers functions traced + by the function graph tracer at boot up. + function-list is a comma separated list of functions + that can be changed at run time by the + set_graph_function file in the debugfs tracing directory. + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) @@ -2031,8 +2047,15 @@ and is between 256 and 4096 characters. It is defined in the file print-fatal-signals= [KNL] debug: print fatal signals - print-fatal-signals=1: print segfault info to - the kernel console. + + If enabled, warn about various signal handling + related application anomalies: too many signals, + too many POSIX.1 timers, fatal signals causing a + coredump - etc. + + If you hit the warning due to signal overflow, + you might want to try "ulimit -i unlimited". + default: off. printk.time= Show timing data prefixed to each printk message line @@ -2163,15 +2186,6 @@ and is between 256 and 4096 characters. It is defined in the file Useful for devices that are detected asynchronously (e.g. USB and MMC devices). - root_plug.vendor_id= - [ROOTPLUG] Override the default vendor ID - - root_plug.product_id= - [ROOTPLUG] Override the default product ID - - root_plug.debug= - [ROOTPLUG] Enable debugging output - rw [KNL] Mount root device read-write on boot S [KNL] Run init in single mode @@ -2181,6 +2195,8 @@ and is between 256 and 4096 characters. It is defined in the file sbni= [NET] Granch SBNI12 leased line adapter + sched_debug [KNL] Enables verbose scheduler debug messages. + sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver Format: <io>[,<timeout>[,<isapnp>]] @@ -2589,6 +2605,9 @@ and is between 256 and 4096 characters. It is defined in the file uart6850= [HW,OSS] Format: <io>,<irq> + uhash_entries= [KNL,NET] + Set number of hash buckets for UDP/UDP-Lite connections + uhci-hcd.ignore_oc= [USB] Ignore overcurrent events (default N). Some badly-designed motherboards generate lots of diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 5a4bc8cf6d0..e1a11416102 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -593,6 +593,115 @@ struct kvm_irqchip { } chip; }; +4.27 KVM_XEN_HVM_CONFIG + +Capability: KVM_CAP_XEN_HVM +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_xen_hvm_config (in) +Returns: 0 on success, -1 on error + +Sets the MSR that the Xen HVM guest uses to initialize its hypercall +page, and provides the starting address and size of the hypercall +blobs in userspace. When the guest writes the MSR, kvm copies one +page of a blob (32- or 64-bit, depending on the vcpu mode) to guest +memory. + +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; + +4.27 KVM_GET_CLOCK + +Capability: KVM_CAP_ADJUST_CLOCK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_clock_data (out) +Returns: 0 on success, -1 on error + +Gets the current timestamp of kvmclock as seen by the current guest. In +conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios +such as migration. + +struct kvm_clock_data { + __u64 clock; /* kvmclock current value */ + __u32 flags; + __u32 pad[9]; +}; + +4.28 KVM_SET_CLOCK + +Capability: KVM_CAP_ADJUST_CLOCK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_clock_data (in) +Returns: 0 on success, -1 on error + +Sets the current timestamp of kvmclock to the valued specific in its parameter. +In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios +such as migration. + +struct kvm_clock_data { + __u64 clock; /* kvmclock current value */ + __u32 flags; + __u32 pad[9]; +}; + +4.29 KVM_GET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (out) +Returns: 0 on success, -1 on error + +Gets currently pending exceptions, interrupts, and NMIs as well as related +states of the vcpu. + +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; /* must be zero */ +}; + +4.30 KVM_SET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (in) +Returns: 0 on success, -1 on error + +Set pending exceptions, interrupts, and NMIs as well as related states of the +vcpu. + +See KVM_GET_VCPU_EVENTS for the data structure. + + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index ba9373f82ab..098de5bce00 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -42,7 +42,6 @@ #include <signal.h> #include "linux/lguest_launcher.h" #include "linux/virtio_config.h" -#include <linux/virtio_ids.h> #include "linux/virtio_net.h" #include "linux/virtio_blk.h" #include "linux/virtio_console.h" diff --git a/Documentation/i2c/chips/eeprom b/Documentation/misc-devices/eeprom index f7e8104b576..f7e8104b576 100644 --- a/Documentation/i2c/chips/eeprom +++ b/Documentation/misc-devices/eeprom diff --git a/Documentation/i2c/chips/max6875 b/Documentation/misc-devices/max6875 index 10ca43cd1a7..1e89ee3ccc1 100644 --- a/Documentation/i2c/chips/max6875 +++ b/Documentation/misc-devices/max6875 @@ -42,10 +42,12 @@ General Remarks Valid addresses for the MAX6875 are 0x50 and 0x52. Valid addresses for the MAX6874 are 0x50, 0x52, 0x54 and 0x56. -The driver does not probe any address, so you must force the address. +The driver does not probe any address, so you explicitly instantiate the +devices. Example: -$ modprobe max6875 force=0,0x50 +$ modprobe max6875 +$ echo max6875 0x50 > /sys/bus/i2c/devices/i2c-0/new_device The MAX6874/MAX6875 ignores address bit 0, so this driver attaches to multiple addresses. For example, for address 0x50, it also reserves 0x51. diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index d5181ce9ff6..61f516b135b 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -1,7 +1,7 @@ Linux Ethernet Bonding Driver HOWTO - Latest update: 12 November 2007 + Latest update: 23 September 2009 Initial release : Thomas Davis <tadavis at lbl.gov> Corrections, HA extensions : 2000/10/03-15 : @@ -614,6 +614,46 @@ primary The primary option is only valid for active-backup mode. +primary_reselect + + Specifies the reselection policy for the primary slave. This + affects how the primary slave is chosen to become the active slave + when failure of the active slave or recovery of the primary slave + occurs. This option is designed to prevent flip-flopping between + the primary slave and other slaves. Possible values are: + + always or 0 (default) + + The primary slave becomes the active slave whenever it + comes back up. + + better or 1 + + The primary slave becomes the active slave when it comes + back up, if the speed and duplex of the primary slave is + better than the speed and duplex of the current active + slave. + + failure or 2 + + The primary slave becomes the active slave only if the + current active slave fails and the primary slave is up. + + The primary_reselect setting is ignored in two cases: + + If no slaves are active, the first slave to recover is + made the active slave. + + When initially enslaved, the primary slave is always made + the active slave. + + Changing the primary_reselect policy via sysfs will cause an + immediate selection of the best active slave according to the new + policy. This may or may not result in a change of the active + slave, depending upon the circumstances. + + This option was added for bonding version 3.6.0. + updelay Specifies the time, in milliseconds, to wait before enabling a diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index fbe427a6580..006b39dec87 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -164,6 +164,14 @@ tcp_congestion_control - STRING additional choices may be available based on kernel configuration. Default is set as part of kernel configuration. +tcp_cookie_size - INTEGER + Default size of TCP Cookie Transactions (TCPCT) option, that may be + overridden on a per socket basis by the TCPCT socket option. + Values greater than the maximum (16) are interpreted as the maximum. + Values greater than zero and less than the minimum (8) are interpreted + as the minimum. Odd values are interpreted as the next even value. + Default: 0 (off). + tcp_dsack - BOOLEAN Allows TCP to send "duplicate" SACKs. @@ -723,6 +731,12 @@ accept_source_route - BOOLEAN default TRUE (router) FALSE (host) +accept_local - BOOLEAN + Accept packets with local source addresses. In combination with + suitable routing, this can be used to direct packets between two + local interfaces over the wire and have them accepted properly. + default FALSE + rp_filter - INTEGER 0 - No source validation. 1 - Strict mode as defined in RFC3704 Strict Reverse Path @@ -738,8 +752,8 @@ rp_filter - INTEGER to prevent IP spoofing from DDos attacks. If using asymmetric routing or other complicated routing, then loose mode is recommended. - conf/all/rp_filter must also be set to non-zero to do source validation - on the interface + The max value from conf/{all,interface}/rp_filter is used + when doing source validation on the {interface}. Default value is 0. Note that some distributions enable it in startup scripts. @@ -1086,6 +1100,24 @@ accept_dad - INTEGER 2: Enable DAD, and disable IPv6 operation if MAC-based duplicate link-local address has been found. +force_tllao - BOOLEAN + Enable sending the target link-layer address option even when + responding to a unicast neighbor solicitation. + Default: FALSE + + Quoting from RFC 2461, section 4.4, Target link-layer address: + + "The option MUST be included for multicast solicitations in order to + avoid infinite Neighbor Solicitation "recursion" when the peer node + does not have a cache entry to return a Neighbor Advertisements + message. When responding to unicast solicitations, the option can be + omitted since the sender of the solicitation has the correct link- + layer address; otherwise it would not have be able to send the unicast + solicitation in the first place. However, including the link-layer + address in this case adds little overhead and eliminates a potential + race condition where the sender deletes the cached link-layer address + prior to receiving a response to a previous solicitation." + icmp/*: ratelimit - INTEGER Limit the maximal rates for sending ICMPv6 packets. diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt index c6cf4a3c16e..61bb645d50e 100644 --- a/Documentation/networking/pktgen.txt +++ b/Documentation/networking/pktgen.txt @@ -90,6 +90,11 @@ Examples: pgset "dstmac 00:00:00:00:00:00" sets MAC destination address pgset "srcmac 00:00:00:00:00:00" sets MAC source address + pgset "queue_map_min 0" Sets the min value of tx queue interval + pgset "queue_map_max 7" Sets the max value of tx queue interval, for multiqueue devices + To select queue 1 of a given device, + use queue_map_min=1 and queue_map_max=1 + pgset "src_mac_count 1" Sets the number of MACs we'll range through. The 'minimum' MAC is what you set with srcmac. @@ -101,6 +106,9 @@ Examples: IPDST_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND MPLS_RND, VID_RND, SVID_RND + QUEUE_MAP_RND # queue map random + QUEUE_MAP_CPU # queue map mirrors smp_processor_id() + pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then cycle through the port range. diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c index 43d14310421..a7936fe8444 100644 --- a/Documentation/networking/timestamping/timestamping.c +++ b/Documentation/networking/timestamping/timestamping.c @@ -381,7 +381,7 @@ int main(int argc, char **argv) memset(&hwtstamp, 0, sizeof(hwtstamp)); strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name)); hwtstamp.ifr_data = (void *)&hwconfig; - memset(&hwconfig, 0, sizeof(&hwconfig)); + memset(&hwconfig, 0, sizeof(hwconfig)); hwconfig.tx_type = (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt index 059934363ca..446f43b309d 100644 --- a/Documentation/pcmcia/driver-changes.txt +++ b/Documentation/pcmcia/driver-changes.txt @@ -1,5 +1,17 @@ This file details changes in 2.6 which affect PCMCIA card driver authors: +* no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33) + Instead of the cs_error() callback or the CS_CHECK() macro, please use + Linux-style checking of return values, and -- if necessary -- debug + messages using "dev_dbg()" or "pr_debug()". + +* New CIS tuple access (as of 2.6.33) + Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and + pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is + only interested in one (raw) tuple, or "pcmcia_loop_tuple()" if it is + interested in all tuples of one type. To decode the MAC from CISTPL_FUNCE, + a new helper "pcmcia_get_mac_from_cis()" was added. + * New configuration loop helper (as of 2.6.28) By calling pcmcia_loop_config(), a driver can iterate over all available configuration options. During a driver's probe() phase, one doesn't need diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index f49a33b704d..4a3109b2884 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -38,7 +38,7 @@ struct dev_pm_ops { ... int (*runtime_suspend)(struct device *dev); int (*runtime_resume)(struct device *dev); - void (*runtime_idle)(struct device *dev); + int (*runtime_idle)(struct device *dev); ... }; @@ -71,9 +71,9 @@ what to do to handle the device). purpose). In particular, if the driver requires remote wakeup capability for proper -functioning and device_may_wakeup() returns 'false' for the device, then +functioning and device_run_wake() returns 'false' for the device, then ->runtime_suspend() should return -EBUSY. On the other hand, if -device_may_wakeup() returns 'true' for the device and the device is put +device_run_wake() returns 'true' for the device and the device is put into a low power state during the execution of its bus type's ->runtime_suspend(), it is expected that remote wake-up (i.e. hardware mechanism allowing the device to request a change of its power state, such as PCI PME) @@ -114,7 +114,8 @@ The action performed by a bus type's ->runtime_idle() callback is totally dependent on the bus type in question, but the expected and recommended action is to check if the device can be suspended (i.e. if all of the conditions necessary for suspending the device are satisfied) and to queue up a suspend -request for the device in that case. +request for the device in that case. The value returned by this callback is +ignored by the PM core. The helper functions provided by the PM core, described in Section 4, guarantee that the following constraints are met with respect to the bus type's run-time @@ -214,6 +215,9 @@ defined in include/linux/pm.h: being executed for that device and it is not practical to wait for the suspend to complete; means "start a resume as soon as you've suspended" + unsigned int run_wake; + - set if the device is capable of generating run-time wake-up events + enum rpm_status runtime_status; - the run-time PM status of the device; this field's initial value is RPM_SUSPENDED, which means that each device is initially regarded by the diff --git a/Documentation/powerpc/dts-bindings/fsl/mpc5200.txt b/Documentation/powerpc/dts-bindings/fsl/mpc5200.txt index 8447fd7090d..cabc780f725 100644 --- a/Documentation/powerpc/dts-bindings/fsl/mpc5200.txt +++ b/Documentation/powerpc/dts-bindings/fsl/mpc5200.txt @@ -178,3 +178,13 @@ External interrupts: external irq3: interrupts = <1 3 n>; 'n' is sense (0: level high, 1: edge rising, 2: edge falling 3: level low) +fsl,mpc5200-mscan nodes +----------------------- +In addition to the required compatible-, reg- and interrupt-properites, you can +also specify which clock source shall be used for the controller: + +- fsl,mscan-clock-source- a string describing the clock source. Valid values + are: "ip" for ip bus clock + "ref" for reference clock (XTAL) + "ref" is default in case this property is not + present. diff --git a/Documentation/scsi/hptiop.txt b/Documentation/scsi/hptiop.txt index a6eb4add1be..9605179711f 100644 --- a/Documentation/scsi/hptiop.txt +++ b/Documentation/scsi/hptiop.txt @@ -3,6 +3,25 @@ HIGHPOINT ROCKETRAID 3xxx/4xxx ADAPTER DRIVER (hptiop) Controller Register Map ------------------------- +For RR44xx Intel IOP based adapters, the controller IOP is accessed via PCI BAR0 and BAR2: + + BAR0 offset Register + 0x11C5C Link Interface IRQ Set + 0x11C60 Link Interface IRQ Clear + + BAR2 offset Register + 0x10 Inbound Message Register 0 + 0x14 Inbound Message Register 1 + 0x18 Outbound Message Register 0 + 0x1C Outbound Message Register 1 + 0x20 Inbound Doorbell Register + 0x24 Inbound Interrupt Status Register + 0x28 Inbound Interrupt Mask Register + 0x30 Outbound Interrupt Status Register + 0x34 Outbound Interrupt Mask Register + 0x40 Inbound Queue Port + 0x44 Outbound Queue Port + For Intel IOP based adapters, the controller IOP is accessed via PCI BAR0: BAR0 offset Register @@ -93,7 +112,7 @@ The driver exposes following sysfs attributes: ----------------------------------------------------------------------------- -Copyright (C) 2006-2007 HighPoint Technologies, Inc. All Rights Reserved. +Copyright (C) 2006-2009 HighPoint Technologies, Inc. All Rights Reserved. This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index ebc50f808ea..9dbf4470c7e 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -41,6 +41,13 @@ expand files, provided the time taken to do so isn't too long. Operations of both types may sleep during execution, thus tying up the thread loaned to it. +A further class of work item is available, based on the slow work item class: + + (*) Delayed slow work items. + +These are slow work items that have a timer to defer queueing of the item for +a while. + THREAD-TO-CLASS ALLOCATION -------------------------- @@ -64,9 +71,11 @@ USING SLOW WORK ITEMS Firstly, a module or subsystem wanting to make use of slow work items must register its interest: - int ret = slow_work_register_user(); + int ret = slow_work_register_user(struct module *module); -This will return 0 if successful, or a -ve error upon failure. +This will return 0 if successful, or a -ve error upon failure. The module +pointer should be the module interested in using this facility (almost +certainly THIS_MODULE). Slow work items may then be set up by: @@ -93,6 +102,10 @@ Slow work items may then be set up by: or: + delayed_slow_work_init(&myitem, &myitem_ops); + + or: + vslow_work_init(&myitem, &myitem_ops); depending on its class. @@ -102,15 +115,92 @@ A suitably set up work item can then be enqueued for processing: int ret = slow_work_enqueue(&myitem); This will return a -ve error if the thread pool is unable to gain a reference -on the item, 0 otherwise. +on the item, 0 otherwise, or (for delayed work): + + int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay); The items are reference counted, so there ought to be no need for a flush -operation. When all a module's slow work items have been processed, and the +operation. But as the reference counting is optional, means to cancel +existing work items are also included: + + cancel_slow_work(&myitem); + cancel_delayed_slow_work(&myitem); + +can be used to cancel pending work. The above cancel function waits for +existing work to have been executed (or prevent execution of them, depending +on timing). + + +When all a module's slow work items have been processed, and the module has no further interest in the facility, it should unregister its interest: - slow_work_unregister_user(); + slow_work_unregister_user(struct module *module); + +The module pointer is used to wait for all outstanding work items for that +module before completing the unregistration. This prevents the put_ref() code +from being taken away before it completes. module should almost certainly be +THIS_MODULE. + + +================ +HELPER FUNCTIONS +================ + +The slow-work facility provides a function by which it can be determined +whether or not an item is queued for later execution: + + bool queued = slow_work_is_queued(struct slow_work *work); + +If it returns false, then the item is not on the queue (it may be executing +with a requeue pending). This can be used to work out whether an item on which +another depends is on the queue, thus allowing a dependent item to be queued +after it. + +If the above shows an item on which another depends not to be queued, then the +owner of the dependent item might need to wait. However, to avoid locking up +the threads unnecessarily be sleeping in them, it can make sense under some +circumstances to return the work item to the queue, thus deferring it until +some other items have had a chance to make use of the yielded thread. + +To yield a thread and defer an item, the work function should simply enqueue +the work item again and return. However, this doesn't work if there's nothing +actually on the queue, as the thread just vacated will jump straight back into +the item's work function, thus busy waiting on a CPU. + +Instead, the item should use the thread to wait for the dependency to go away, +but rather than using schedule() or schedule_timeout() to sleep, it should use +the following function: + + bool requeue = slow_work_sleep_till_thread_needed( + struct slow_work *work, + signed long *_timeout); + +This will add a second wait and then sleep, such that it will be woken up if +either something appears on the queue that could usefully make use of the +thread - and behind which this item can be queued, or if the event the caller +set up to wait for happens. True will be returned if something else appeared +on the queue and this work function should perhaps return, of false if +something else woke it up. The timeout is as for schedule_timeout(). + +For example: + + wq = bit_waitqueue(&my_flags, MY_BIT); + init_wait(&wait); + requeue = false; + do { + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); + if (!test_bit(MY_BIT, &my_flags)) + break; + requeue = slow_work_sleep_till_thread_needed(&my_work, + &timeout); + } while (timeout > 0 && !requeue); + finish_wait(wq, &wait); + if (!test_bit(MY_BIT, &my_flags) + goto do_my_thing; + if (requeue) + return; // to slow_work =============== @@ -118,7 +208,8 @@ ITEM OPERATIONS =============== Each work item requires a table of operations of type struct slow_work_ops. -All members are required: +Only ->execute() is required; the getting and putting of a reference and the +describing of an item are all optional. (*) Get a reference on an item: @@ -148,6 +239,16 @@ All members are required: This should perform the work required of the item. It may sleep, it may perform disk I/O and it may wait for locks. + (*) View an item through /proc: + + void (*desc)(struct slow_work *work, struct seq_file *m); + + If supplied, this should print to 'm' a small string describing the work + the item is to do. This should be no more than about 40 characters, and + shouldn't include a newline character. + + See the 'Viewing executing and queued items' section below. + ================== POOL CONFIGURATION @@ -172,3 +273,50 @@ The slow-work thread pool has a number of configurables: is bounded to between 1 and one fewer than the number of active threads. This ensures there is always at least one thread that can process very slow work items, and always at least one thread that won't. + + +================================== +VIEWING EXECUTING AND QUEUED ITEMS +================================== + +If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available: + + /sys/kernel/debug/slow_work/runqueue + +through which the list of work items being executed and the queues of items to +be executed may be viewed. The owner of a work item is given the chance to +add some information of its own. + +The contents look something like the following: + + THR PID ITEM ADDR FL MARK DESC + === ===== ================ == ===== ========== + 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK + 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 + 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK + 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN + 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 + 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 + 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 + 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN + vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 + vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 + vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 + vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 + vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 + vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 + vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK + vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK + vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK + vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK + vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK + vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK + vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK + vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK + +In the 'THR' column, executing items show the thread they're occupying and +queued threads indicate which queue they're on. 'PID' shows the process ID of +a slow-work thread that's executing something. 'FL' shows the work item flags. +'MARK' indicates how long since an item was queued or began executing. Lastly, +the 'DESC' column permits the owner of an item to give some information. + diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 1c8eb4518ce..8923597bd2b 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -522,7 +522,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. pcm_devs - Number of PCM devices assigned to each card (default = 1, up to 4) pcm_substreams - Number of PCM substreams assigned to each PCM - (default = 8, up to 16) + (default = 8, up to 128) hrtimer - Use hrtimer (=1, default) or system timer (=0) fake_buffer - Fake buffer allocations (default = 1) @@ -798,6 +798,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. setup before initializing the codecs. This option is available only when CONFIG_SND_HDA_PATCH_LOADER=y is set. See HD-Audio.txt for details. + beep_mode - Selects the beep registration mode (0=off, 1=on, 2= + dynamic registration via mute switch on/off); the default + value is set via CONFIG_SND_HDA_INPUT_BEEP_MODE kconfig. [Single (global) options] single_cmd - Use single immediate commands to communicate with @@ -1454,6 +1457,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. Module for internal PC-Speaker. + nopcm - Disable PC-Speaker PCM sound. Only beeps remain. nforce_wa - enable NForce chipset workaround. Expect bad sound. This module supports system beeps, some kind of PCM playback and @@ -1631,7 +1635,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. Module snd-sscape ----------------- - Module for ENSONIQ SoundScape PnP cards. + Module for ENSONIQ SoundScape cards. port - Port # (PnP setup) wss_port - WSS Port # (PnP setup) @@ -1639,10 +1643,11 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. mpu_irq - MPU-401 IRQ # (PnP setup) dma - DMA # (PnP setup) dma2 - 2nd DMA # (PnP setup, -1 to disable) + joystick - Enable gameport - 0 = disable (default), 1 = enable + + This module supports multiple cards. - This module supports multiple cards. ISA PnP must be enabled. - You need sscape_ctl tool in alsa-tools package for loading - the microcode. + The driver requires the firmware loader support on kernel. Module snd-sun-amd7930 (on sparc only) -------------------------------------- diff --git a/Documentation/sound/alsa/ControlNames.txt b/Documentation/sound/alsa/ControlNames.txt index 5b18298e949..fea65bb6269 100644 --- a/Documentation/sound/alsa/ControlNames.txt +++ b/Documentation/sound/alsa/ControlNames.txt @@ -18,8 +18,9 @@ SOURCE: Master Master Mono Hardware Master + Speaker (internal speaker) Headphone - PC Speaker + Beep (beep generator) Phone Phone Input Phone Output diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index f1708b79f96..9000cd84d07 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt @@ -209,6 +209,7 @@ AD1884A / AD1883 / AD1984A / AD1984B laptop laptop with HP jack sensing mobile mobile devices with HP jack sensing thinkpad Lenovo Thinkpad X300 + touchsmart HP Touchsmart AD1884 ====== @@ -358,6 +359,7 @@ STAC9227/9228/9229/927x 5stack-no-fp D965 5stack without front panel dell-3stack Dell Dimension E520 dell-bios Fixes with Dell BIOS setup + volknob Fixes with volume-knob widget 0x24 auto BIOS setup (default) STAC92HD71B* @@ -389,6 +391,7 @@ STAC92HD83* ref Reference board mic-ref Reference board with power management for ports dell-s14 Dell laptop + hp HP laptops with (inverted) mute-LED auto BIOS setup (default) STAC9872 diff --git a/Documentation/sysctl/ctl_unnumbered.txt b/Documentation/sysctl/ctl_unnumbered.txt deleted file mode 100644 index 23003a8ea3e..00000000000 --- a/Documentation/sysctl/ctl_unnumbered.txt +++ /dev/null @@ -1,22 +0,0 @@ - -Except for a few extremely rare exceptions user space applications do not use -the binary sysctl interface. Instead everyone uses /proc/sys/... with -readable ascii names. - -Recently the kernel has started supporting setting the binary sysctl value to -CTL_UNNUMBERED so we no longer need to assign a binary sysctl path to allow -sysctls to show up in /proc/sys. - -Assigning binary sysctl numbers is an endless source of conflicts in sysctl.h, -breaking of the user space ABI (because of those conflicts), and maintenance -problems. A complete pass through all of the sysctl users revealed multiple -instances where the sysctl binary interface was broken and had gone undetected -for years. - -So please do not add new binary sysctl numbers. They are unneeded and -problematic. - -If you really need a new binary sysctl number please first merge your sysctl -into the kernel and then as a separate patch allocate a binary sysctl number. - -(ebiederm@xmission.com, June 2007) diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index 70d68ce8640..a87dc277a5c 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -1,5 +1,5 @@ Generic Thermal Sysfs driver How To -========================= +=================================== Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com> @@ -10,20 +10,20 @@ Copyright (c) 2008 Intel Corporation 0. Introduction -The generic thermal sysfs provides a set of interfaces for thermal zone devices (sensors) -and thermal cooling devices (fan, processor...) to register with the thermal management -solution and to be a part of it. +The generic thermal sysfs provides a set of interfaces for thermal zone +devices (sensors) and thermal cooling devices (fan, processor...) to register +with the thermal management solution and to be a part of it. -This how-to focuses on enabling new thermal zone and cooling devices to participate -in thermal management. -This solution is platform independent and any type of thermal zone devices and -cooling devices should be able to make use of the infrastructure. +This how-to focuses on enabling new thermal zone and cooling devices to +participate in thermal management. +This solution is platform independent and any type of thermal zone devices +and cooling devices should be able to make use of the infrastructure. -The main task of the thermal sysfs driver is to expose thermal zone attributes as well -as cooling device attributes to the user space. -An intelligent thermal management application can make decisions based on inputs -from thermal zone attributes (the current temperature and trip point temperature) -and throttle appropriate devices. +The main task of the thermal sysfs driver is to expose thermal zone attributes +as well as cooling device attributes to the user space. +An intelligent thermal management application can make decisions based on +inputs from thermal zone attributes (the current temperature and trip point +temperature) and throttle appropriate devices. [0-*] denotes any positive number starting from 0 [1-*] denotes any positive number starting from 1 @@ -31,77 +31,77 @@ and throttle appropriate devices. 1. thermal sysfs driver interface functions 1.1 thermal zone device interface -1.1.1 struct thermal_zone_device *thermal_zone_device_register(char *name, int trips, - void *devdata, struct thermal_zone_device_ops *ops) - - This interface function adds a new thermal zone device (sensor) to - /sys/class/thermal folder as thermal_zone[0-*]. - It tries to bind all the thermal cooling devices registered at the same time. - - name: the thermal zone name. - trips: the total number of trip points this thermal zone supports. - devdata: device private data - ops: thermal zone device call-backs. - .bind: bind the thermal zone device with a thermal cooling device. - .unbind: unbind the thermal zone device with a thermal cooling device. - .get_temp: get the current temperature of the thermal zone. - .get_mode: get the current mode (user/kernel) of the thermal zone. - "kernel" means thermal management is done in kernel. - "user" will prevent kernel thermal driver actions upon trip points - so that user applications can take charge of thermal management. - .set_mode: set the mode (user/kernel) of the thermal zone. - .get_trip_type: get the type of certain trip point. - .get_trip_temp: get the temperature above which the certain trip point - will be fired. +1.1.1 struct thermal_zone_device *thermal_zone_device_register(char *name, + int trips, void *devdata, struct thermal_zone_device_ops *ops) + + This interface function adds a new thermal zone device (sensor) to + /sys/class/thermal folder as thermal_zone[0-*]. It tries to bind all the + thermal cooling devices registered at the same time. + + name: the thermal zone name. + trips: the total number of trip points this thermal zone supports. + devdata: device private data + ops: thermal zone device call-backs. + .bind: bind the thermal zone device with a thermal cooling device. + .unbind: unbind the thermal zone device with a thermal cooling device. + .get_temp: get the current temperature of the thermal zone. + .get_mode: get the current mode (user/kernel) of the thermal zone. + - "kernel" means thermal management is done in kernel. + - "user" will prevent kernel thermal driver actions upon trip points + so that user applications can take charge of thermal management. + .set_mode: set the mode (user/kernel) of the thermal zone. + .get_trip_type: get the type of certain trip point. + .get_trip_temp: get the temperature above which the certain trip point + will be fired. 1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz) - This interface function removes the thermal zone device. - It deletes the corresponding entry form /sys/class/thermal folder and unbind all - the thermal cooling devices it uses. + This interface function removes the thermal zone device. + It deletes the corresponding entry form /sys/class/thermal folder and + unbind all the thermal cooling devices it uses. 1.2 thermal cooling device interface 1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name, - void *devdata, struct thermal_cooling_device_ops *) - - This interface function adds a new thermal cooling device (fan/processor/...) to - /sys/class/thermal/ folder as cooling_device[0-*]. - It tries to bind itself to all the thermal zone devices register at the same time. - name: the cooling device name. - devdata: device private data. - ops: thermal cooling devices call-backs. - .get_max_state: get the Maximum throttle state of the cooling device. - .get_cur_state: get the Current throttle state of the cooling device. - .set_cur_state: set the Current throttle state of the cooling device. + void *devdata, struct thermal_cooling_device_ops *) + + This interface function adds a new thermal cooling device (fan/processor/...) + to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself + to all the thermal zone devices register at the same time. + name: the cooling device name. + devdata: device private data. + ops: thermal cooling devices call-backs. + .get_max_state: get the Maximum throttle state of the cooling device. + .get_cur_state: get the Current throttle state of the cooling device. + .set_cur_state: set the Current throttle state of the cooling device. 1.2.2 void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) - This interface function remove the thermal cooling device. - It deletes the corresponding entry form /sys/class/thermal folder and unbind - itself from all the thermal zone devices using it. + This interface function remove the thermal cooling device. + It deletes the corresponding entry form /sys/class/thermal folder and + unbind itself from all the thermal zone devices using it. 1.3 interface for binding a thermal zone device with a thermal cooling device 1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, - int trip, struct thermal_cooling_device *cdev); + int trip, struct thermal_cooling_device *cdev); - This interface function bind a thermal cooling device to the certain trip point - of a thermal zone device. - This function is usually called in the thermal zone device .bind callback. - tz: the thermal zone device - cdev: thermal cooling device - trip: indicates which trip point the cooling devices is associated with - in this thermal zone. + This interface function bind a thermal cooling device to the certain trip + point of a thermal zone device. + This function is usually called in the thermal zone device .bind callback. + tz: the thermal zone device + cdev: thermal cooling device + trip: indicates which trip point the cooling devices is associated with + in this thermal zone. 1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, - int trip, struct thermal_cooling_device *cdev); + int trip, struct thermal_cooling_device *cdev); - This interface function unbind a thermal cooling device from the certain trip point - of a thermal zone device. - This function is usually called in the thermal zone device .unbind callback. - tz: the thermal zone device - cdev: thermal cooling device - trip: indicates which trip point the cooling devices is associated with - in this thermal zone. + This interface function unbind a thermal cooling device from the certain + trip point of a thermal zone device. This function is usually called in + the thermal zone device .unbind callback. + tz: the thermal zone device + cdev: thermal cooling device + trip: indicates which trip point the cooling devices is associated with + in this thermal zone. 2. sysfs attributes structure @@ -114,153 +114,166 @@ if hwmon is compiled in or built as a module. Thermal zone device sys I/F, created once it's registered: /sys/class/thermal/thermal_zone[0-*]: - |-----type: Type of the thermal zone - |-----temp: Current temperature - |-----mode: Working mode of the thermal zone - |-----trip_point_[0-*]_temp: Trip point temperature - |-----trip_point_[0-*]_type: Trip point type + |---type: Type of the thermal zone + |---temp: Current temperature + |---mode: Working mode of the thermal zone + |---trip_point_[0-*]_temp: Trip point temperature + |---trip_point_[0-*]_type: Trip point type Thermal cooling device sys I/F, created once it's registered: /sys/class/thermal/cooling_device[0-*]: - |-----type : Type of the cooling device(processor/fan/...) - |-----max_state: Maximum cooling state of the cooling device - |-----cur_state: Current cooling state of the cooling device + |---type: Type of the cooling device(processor/fan/...) + |---max_state: Maximum cooling state of the cooling device + |---cur_state: Current cooling state of the cooling device -These two dynamic attributes are created/removed in pairs. -They represent the relationship between a thermal zone and its associated cooling device. -They are created/removed for each -thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution. +Then next two dynamic attributes are created/removed in pairs. They represent +the relationship between a thermal zone and its associated cooling device. +They are created/removed for each successful execution of +thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device. -/sys/class/thermal/thermal_zone[0-*] - |-----cdev[0-*]: The [0-*]th cooling device in the current thermal zone - |-----cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with +/sys/class/thermal/thermal_zone[0-*]: + |---cdev[0-*]: [0-*]th cooling device in current thermal zone + |---cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with Besides the thermal zone device sysfs I/F and cooling device sysfs I/F, -the generic thermal driver also creates a hwmon sysfs I/F for each _type_ of -thermal zone device. E.g. the generic thermal driver registers one hwmon class device -and build the associated hwmon sysfs I/F for all the registered ACPI thermal zones. +the generic thermal driver also creates a hwmon sysfs I/F for each _type_ +of thermal zone device. E.g. the generic thermal driver registers one hwmon +class device and build the associated hwmon sysfs I/F for all the registered +ACPI thermal zones. + /sys/class/hwmon/hwmon[0-*]: - |-----name: The type of the thermal zone devices. - |-----temp[1-*]_input: The current temperature of thermal zone [1-*]. - |-----temp[1-*]_critical: The critical trip point of thermal zone [1-*]. + |---name: The type of the thermal zone devices + |---temp[1-*]_input: The current temperature of thermal zone [1-*] + |---temp[1-*]_critical: The critical trip point of thermal zone [1-*] + Please read Documentation/hwmon/sysfs-interface for additional information. *************************** * Thermal zone attributes * *************************** -type Strings which represent the thermal zone type. - This is given by thermal zone driver as part of registration. - Eg: "acpitz" indicates it's an ACPI thermal device. - In order to keep it consistent with hwmon sys attribute, - this should be a short, lowercase string, - not containing spaces nor dashes. - RO - Required - -temp Current temperature as reported by thermal zone (sensor) - Unit: millidegree Celsius - RO - Required - -mode One of the predefined values in [kernel, user] - This file gives information about the algorithm - that is currently managing the thermal zone. - It can be either default kernel based algorithm - or user space application. - RW - Optional - kernel = Thermal management in kernel thermal zone driver. - user = Preventing kernel thermal zone driver actions upon - trip points so that user application can take full - charge of the thermal management. - -trip_point_[0-*]_temp The temperature above which trip point will be fired - Unit: millidegree Celsius - RO - Optional - -trip_point_[0-*]_type Strings which indicate the type of the trip point - E.g. it can be one of critical, hot, passive, - active[0-*] for ACPI thermal zone. - RO - Optional - -cdev[0-*] Sysfs link to the thermal cooling device node where the sys I/F - for cooling device throttling control represents. - RO - Optional - -cdev[0-*]_trip_point The trip point with which cdev[0-*] is associated in this thermal zone - -1 means the cooling device is not associated with any trip point. - RO - Optional - -****************************** -* Cooling device attributes * -****************************** - -type String which represents the type of device - eg: For generic ACPI: this should be "Fan", - "Processor" or "LCD" - eg. For memory controller device on intel_menlow platform: - this should be "Memory controller" - RO - Required - -max_state The maximum permissible cooling state of this cooling device. - RO - Required - -cur_state The current cooling state of this cooling device. - the value can any integer numbers between 0 and max_state, - cur_state == 0 means no cooling - cur_state == max_state means the maximum cooling. - RW - Required +type + Strings which represent the thermal zone type. + This is given by thermal zone driver as part of registration. + E.g: "acpitz" indicates it's an ACPI thermal device. + In order to keep it consistent with hwmon sys attribute; this should + be a short, lowercase string, not containing spaces nor dashes. + RO, Required + +temp + Current temperature as reported by thermal zone (sensor). + Unit: millidegree Celsius + RO, Required + +mode + One of the predefined values in [kernel, user]. + This file gives information about the algorithm that is currently + managing the thermal zone. It can be either default kernel based + algorithm or user space application. + kernel = Thermal management in kernel thermal zone driver. + user = Preventing kernel thermal zone driver actions upon + trip points so that user application can take full + charge of the thermal management. + RW, Optional + +trip_point_[0-*]_temp + The temperature above which trip point will be fired. + Unit: millidegree Celsius + RO, Optional + +trip_point_[0-*]_type + Strings which indicate the type of the trip point. + E.g. it can be one of critical, hot, passive, active[0-*] for ACPI + thermal zone. + RO, Optional + +cdev[0-*] + Sysfs link to the thermal cooling device node where the sys I/F + for cooling device throttling control represents. + RO, Optional + +cdev[0-*]_trip_point + The trip point with which cdev[0-*] is associated in this thermal + zone; -1 means the cooling device is not associated with any trip + point. + RO, Optional + +passive + Attribute is only present for zones in which the passive cooling + policy is not supported by native thermal driver. Default is zero + and can be set to a temperature (in millidegrees) to enable a + passive trip point for the zone. Activation is done by polling with + an interval of 1 second. + Unit: millidegrees Celsius + RW, Optional + +***************************** +* Cooling device attributes * +***************************** + +type + String which represents the type of device, e.g: + - for generic ACPI: should be "Fan", "Processor" or "LCD" + - for memory controller device on intel_menlow platform: + should be "Memory controller". + RO, Required + +max_state + The maximum permissible cooling state of this cooling device. + RO, Required + +cur_state + The current cooling state of this cooling device. + The value can any integer numbers between 0 and max_state: + - cur_state == 0 means no cooling + - cur_state == max_state means the maximum cooling. + RW, Required 3. A simple implementation -ACPI thermal zone may support multiple trip points like critical/hot/passive/active. -If an ACPI thermal zone supports critical, passive, active[0] and active[1] at the same time, -it may register itself as a thermal_zone_device (thermal_zone1) with 4 trip points in all. -It has one processor and one fan, which are both registered as thermal_cooling_device. -If the processor is listed in _PSL method, and the fan is listed in _AL0 method, -the sys I/F structure will be built like this: +ACPI thermal zone may support multiple trip points like critical, hot, +passive, active. If an ACPI thermal zone supports critical, passive, +active[0] and active[1] at the same time, it may register itself as a +thermal_zone_device (thermal_zone1) with 4 trip points in all. +It has one processor and one fan, which are both registered as +thermal_cooling_device. + +If the processor is listed in _PSL method, and the fan is listed in _AL0 +method, the sys I/F structure will be built like this: /sys/class/thermal: |thermal_zone1: - |-----type: acpitz - |-----temp: 37000 - |-----mode: kernel - |-----trip_point_0_temp: 100000 - |-----trip_point_0_type: critical - |-----trip_point_1_temp: 80000 - |-----trip_point_1_type: passive - |-----trip_point_2_temp: 70000 - |-----trip_point_2_type: active0 - |-----trip_point_3_temp: 60000 - |-----trip_point_3_type: active1 - |-----cdev0: --->/sys/class/thermal/cooling_device0 - |-----cdev0_trip_point: 1 /* cdev0 can be used for passive */ - |-----cdev1: --->/sys/class/thermal/cooling_device3 - |-----cdev1_trip_point: 2 /* cdev1 can be used for active[0]*/ + |---type: acpitz + |---temp: 37000 + |---mode: kernel + |---trip_point_0_temp: 100000 + |---trip_point_0_type: critical + |---trip_point_1_temp: 80000 + |---trip_point_1_type: passive + |---trip_point_2_temp: 70000 + |---trip_point_2_type: active0 + |---trip_point_3_temp: 60000 + |---trip_point_3_type: active1 + |---cdev0: --->/sys/class/thermal/cooling_device0 + |---cdev0_trip_point: 1 /* cdev0 can be used for passive */ + |---cdev1: --->/sys/class/thermal/cooling_device3 + |---cdev1_trip_point: 2 /* cdev1 can be used for active[0]*/ |cooling_device0: - |-----type: Processor - |-----max_state: 8 - |-----cur_state: 0 + |---type: Processor + |---max_state: 8 + |---cur_state: 0 |cooling_device3: - |-----type: Fan - |-----max_state: 2 - |-----cur_state: 0 + |---type: Fan + |---max_state: 2 + |---cur_state: 0 /sys/class/hwmon: |hwmon0: - |-----name: acpitz - |-----temp1_input: 37000 - |-----temp1_crit: 100000 + |---name: acpitz + |---temp1_input: 37000 + |---temp1_crit: 100000 diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 7003e10f10f..641a1ef2a7f 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -213,10 +213,19 @@ If you can't trace NMI functions, then skip this option. <details to be filled> -HAVE_FTRACE_SYSCALLS +HAVE_SYSCALL_TRACEPOINTS --------------------- -<details to be filled> +You need very few things to get the syscalls tracing in an arch. + +- Have a NR_syscalls variable in <asm/unistd.h> that provides the number + of syscalls supported by the arch. +- Implement arch_syscall_addr() that resolves a syscall address from a + syscall number. +- Support the TIF_SYSCALL_TRACEPOINT thread flags +- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace + in the ptrace syscalls tracing path. +- Tag this arch as HAVE_SYSCALL_TRACEPOINTS. HAVE_FTRACE_MCOUNT_RECORD diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 957b22fde2d..8179692fbb9 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -1231,6 +1231,7 @@ something like this simple program: #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> +#include <string.h> #define _STR(x) #x #define STR(x) _STR(x) @@ -1265,6 +1266,7 @@ const char *find_debugfs(void) return NULL; } + strcat(debugfs, "/tracing/"); debugfs_found = 1; return debugfs; diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt new file mode 100644 index 00000000000..47aabeebbdf --- /dev/null +++ b/Documentation/trace/kprobetrace.txt @@ -0,0 +1,149 @@ + Kprobe-based Event Tracing + ========================== + + Documentation is written by Masami Hiramatsu + + +Overview +-------- +These events are similar to tracepoint based events. Instead of Tracepoint, +this is based on kprobes (kprobe and kretprobe). So it can probe wherever +kprobes can probe (this means, all functions body except for __kprobes +functions). Unlike the Tracepoint based event, this can be added and removed +dynamically, on the fly. + +To enable this feature, build your kernel with CONFIG_KPROBE_TRACING=y. + +Similar to the events tracer, this doesn't need to be activated via +current_tracer. Instead of that, add probe points via +/sys/kernel/debug/tracing/kprobe_events, and enable it via +/sys/kernel/debug/tracing/events/kprobes/<EVENT>/enabled. + + +Synopsis of kprobe_events +------------------------- + p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe + r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe + + GRP : Group name. If omitted, use "kprobes" for it. + EVENT : Event name. If omitted, the event name is generated + based on SYMBOL+offs or MEMADDR. + SYMBOL[+offs] : Symbol+offset where the probe is inserted. + MEMADDR : Address where the probe is inserted. + + FETCHARGS : Arguments. Each probe can have up to 128 args. + %REG : Fetch register REG + @ADDR : Fetch memory at ADDR (ADDR should be in kernel) + @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) + $stackN : Fetch Nth entry of stack (N >= 0) + $stack : Fetch stack address. + $argN : Fetch function argument. (N >= 0)(*) + $retval : Fetch return value.(**) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) + NAME=FETCHARG: Set NAME as the argument name of FETCHARG. + + (*) aN may not correct on asmlinkaged functions and at the middle of + function body. + (**) only for return probe. + (***) this is useful for fetching a field of data structures. + + +Per-Probe Event Filtering +------------------------- + Per-probe event filtering feature allows you to set different filter on each +probe and gives you what arguments will be shown in trace buffer. If an event +name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event +under tracing/events/kprobes/<EVENT>, at the directory you can see 'id', +'enabled', 'format' and 'filter'. + +enabled: + You can enable/disable the probe by writing 1 or 0 on it. + +format: + This shows the format of this probe event. + +filter: + You can write filtering rules of this event. + +id: + This shows the id of this probe event. + + +Event Profiling +--------------- + You can check the total number of probe hits and probe miss-hits via +/sys/kernel/debug/tracing/kprobe_profile. + The first column is event name, the second is the number of probe hits, +the third is the number of probe miss-hits. + + +Usage examples +-------------- +To add a probe as a new event, write a new definition to kprobe_events +as below. + + echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events + + This sets a kprobe on the top of do_sys_open() function with recording +1st to 4th arguments as "myprobe" event. As this example shows, users can +choose more familiar names for each arguments. + + echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events + + This sets a kretprobe on the return point of do_sys_open() function with +recording return value as "myretprobe" event. + You can see the format of these events via +/sys/kernel/debug/tracing/events/kprobes/<EVENT>/format. + + cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format +name: myprobe +ID: 75 +format: + field:unsigned short common_type; offset:0; size:2; + field:unsigned char common_flags; offset:2; size:1; + field:unsigned char common_preempt_count; offset:3; size:1; + field:int common_pid; offset:4; size:4; + field:int common_tgid; offset:8; size:4; + + field: unsigned long ip; offset:16;tsize:8; + field: int nargs; offset:24;tsize:4; + field: unsigned long dfd; offset:32;tsize:8; + field: unsigned long filename; offset:40;tsize:8; + field: unsigned long flags; offset:48;tsize:8; + field: unsigned long mode; offset:56;tsize:8; + +print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode + + + You can see that the event has 4 arguments as in the expressions you specified. + + echo > /sys/kernel/debug/tracing/kprobe_events + + This clears all probe points. + + Right after definition, each event is disabled by default. For tracing these +events, you need to enable it. + + echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable + echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable + + And you can see the traced information via /sys/kernel/debug/tracing/trace. + + cat /sys/kernel/debug/tracing/trace +# tracer: nop +# +# TASK-PID CPU# TIMESTAMP FUNCTION +# | | | | | + <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 + <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe + <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6 + <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 + <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10 + <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 + + + Each line shows when the kernel hits an event, and <- SYMBOL means kernel +returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel +returns from do_sys_open to sys_open+0x1b). + + diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt new file mode 100644 index 00000000000..3ffadf8da61 --- /dev/null +++ b/Documentation/vm/hwpoison.txt @@ -0,0 +1,136 @@ +What is hwpoison? + +Upcoming Intel CPUs have support for recovering from some memory errors +(``MCA recovery''). This requires the OS to declare a page "poisoned", +kill the processes associated with it and avoid using it in the future. + +This patchkit implements the necessary infrastructure in the VM. + +To quote the overview comment: + + * High level machine check handler. Handles pages reported by the + * hardware as being corrupted usually due to a 2bit ECC memory or cache + * failure. + * + * This focusses on pages detected as corrupted in the background. + * When the current CPU tries to consume corruption the currently + * running process can just be killed directly instead. This implies + * that if the error cannot be handled for some reason it's safe to + * just ignore it because no corruption has been consumed yet. Instead + * when that happens another machine check will happen. + * + * Handles page cache pages in various states. The tricky part + * here is that we can access any page asynchronous to other VM + * users, because memory failures could happen anytime and anywhere, + * possibly violating some of their assumptions. This is why this code + * has to be extremely careful. Generally it tries to use normal locking + * rules, as in get the standard locks, even if that means the + * error handling takes potentially a long time. + * + * Some of the operations here are somewhat inefficient and have non + * linear algorithmic complexity, because the data structures have not + * been optimized for this case. This is in particular the case + * for the mapping from a vma to a process. Since this case is expected + * to be rare we hope we can get away with this. + +The code consists of a the high level handler in mm/memory-failure.c, +a new page poison bit and various checks in the VM to handle poisoned +pages. + +The main target right now is KVM guests, but it works for all kinds +of applications. KVM support requires a recent qemu-kvm release. + +For the KVM use there was need for a new signal type so that +KVM can inject the machine check into the guest with the proper +address. This in theory allows other applications to handle +memory failures too. The expection is that near all applications +won't do that, but some very specialized ones might. + +--- + +There are two (actually three) modi memory failure recovery can be in: + +vm.memory_failure_recovery sysctl set to zero: + All memory failures cause a panic. Do not attempt recovery. + (on x86 this can be also affected by the tolerant level of the + MCE subsystem) + +early kill + (can be controlled globally and per process) + Send SIGBUS to the application as soon as the error is detected + This allows applications who can process memory errors in a gentle + way (e.g. drop affected object) + This is the mode used by KVM qemu. + +late kill + Send SIGBUS when the application runs into the corrupted page. + This is best for memory error unaware applications and default + Note some pages are always handled as late kill. + +--- + +User control: + +vm.memory_failure_recovery + See sysctl.txt + +vm.memory_failure_early_kill + Enable early kill mode globally + +PR_MCE_KILL + Set early/late kill mode/revert to system default + arg1: PR_MCE_KILL_CLEAR: Revert to system default + arg1: PR_MCE_KILL_SET: arg2 defines thread specific mode + PR_MCE_KILL_EARLY: Early kill + PR_MCE_KILL_LATE: Late kill + PR_MCE_KILL_DEFAULT: Use system global default +PR_MCE_KILL_GET + return current mode + + +--- + +Testing: + +madvise(MADV_POISON, ....) + (as root) + Poison a page in the process for testing + + +hwpoison-inject module through debugfs + /sys/debug/hwpoison/corrupt-pfn + +Inject hwpoison fault at PFN echoed into this file + + +Architecture specific MCE injector + +x86 has mce-inject, mce-test + +Some portable hwpoison test programs in mce-test, see blow. + +--- + +References: + +http://halobates.de/mce-lc09-2.pdf + Overview presentation from LinuxCon 09 + +git://git.kernel.org/pub/scm/utils/cpu/mce/mce-test.git + Test suite (hwpoison specific portable tests in tsrc) + +git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git + x86 specific injector + + +--- + +Limitations: + +- Not all page types are supported and never will. Most kernel internal +objects cannot be recovered, only LRU pages for now. +- Right now hugepage support is missing. + +--- +Andi Kleen, Oct 2009 + diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt index 72a22f65960..262d8e6793a 100644 --- a/Documentation/vm/ksm.txt +++ b/Documentation/vm/ksm.txt @@ -52,15 +52,15 @@ The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/, readable by all but writable only by root: max_kernel_pages - set to maximum number of kernel pages that KSM may use - e.g. "echo 2000 > /sys/kernel/mm/ksm/max_kernel_pages" + e.g. "echo 100000 > /sys/kernel/mm/ksm/max_kernel_pages" Value 0 imposes no limit on the kernel pages KSM may use; but note that any process using MADV_MERGEABLE can cause KSM to allocate these pages, unswappable until it exits. - Default: 2000 (chosen for demonstration purposes) + Default: quarter of memory (chosen to not pin too much) pages_to_scan - how many present pages to scan before ksmd goes to sleep - e.g. "echo 200 > /sys/kernel/mm/ksm/pages_to_scan" - Default: 200 (chosen for demonstration purposes) + e.g. "echo 100 > /sys/kernel/mm/ksm/pages_to_scan" + Default: 100 (chosen for demonstration purposes) sleep_millisecs - how many milliseconds ksmd should sleep before next scan e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs" @@ -70,7 +70,8 @@ run - set 0 to stop ksmd from running but keep merged pages, set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run", set 2 to stop ksmd and unmerge all pages currently merged, but leave mergeable areas registered for next run - Default: 1 (for immediate use by apps which register) + Default: 0 (must be changed to 1 to activate KSM, + except if CONFIG_SYSFS is disabled) The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/: @@ -86,4 +87,4 @@ pages_volatile embraces several different kinds of activity, but a high proportion there would also indicate poor use of madvise MADV_MERGEABLE. Izik Eidus, -Hugh Dickins, 30 July 2009 +Hugh Dickins, 24 Sept 2009 diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index fa1a30d9e9d..4793c6aac73 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c @@ -2,7 +2,10 @@ * page-types: Tool for querying page flags * * Copyright (C) 2009 Intel corporation - * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> + * + * Authors: Wu Fengguang <fengguang.wu@intel.com> + * + * Released under the General Public License (GPL). */ #define _LARGEFILE64_SOURCE @@ -69,7 +72,9 @@ #define KPF_COMPOUND_TAIL 16 #define KPF_HUGE 17 #define KPF_UNEVICTABLE 18 +#define KPF_HWPOISON 19 #define KPF_NOPAGE 20 +#define KPF_KSM 21 /* [32-] kernel hacking assistances */ #define KPF_RESERVED 32 @@ -116,7 +121,9 @@ static char *page_flag_names[] = { [KPF_COMPOUND_TAIL] = "T:compound_tail", [KPF_HUGE] = "G:huge", [KPF_UNEVICTABLE] = "u:unevictable", + [KPF_HWPOISON] = "X:hwpoison", [KPF_NOPAGE] = "n:nopage", + [KPF_KSM] = "x:ksm", [KPF_RESERVED] = "r:reserved", [KPF_MLOCKED] = "m:mlocked", @@ -152,9 +159,6 @@ static unsigned long opt_size[MAX_ADDR_RANGES]; static int nr_vmas; static unsigned long pg_start[MAX_VMAS]; static unsigned long pg_end[MAX_VMAS]; -static unsigned long voffset; - -static int pagemap_fd; #define MAX_BIT_FILTERS 64 static int nr_bit_filters; @@ -163,9 +167,16 @@ static uint64_t opt_bits[MAX_BIT_FILTERS]; static int page_size; -#define PAGES_BATCH (64 << 10) /* 64k pages */ +static int pagemap_fd; static int kpageflags_fd; +static int opt_hwpoison; +static int opt_unpoison; + +static char *hwpoison_debug_fs = "/debug/hwpoison"; +static int hwpoison_inject_fd; +static int hwpoison_forget_fd; + #define HASH_SHIFT 13 #define HASH_SIZE (1 << HASH_SHIFT) #define HASH_MASK (HASH_SIZE - 1) @@ -207,6 +218,74 @@ static void fatal(const char *x, ...) exit(EXIT_FAILURE); } +static int checked_open(const char *pathname, int flags) +{ + int fd = open(pathname, flags); + + if (fd < 0) { + perror(pathname); + exit(EXIT_FAILURE); + } + + return fd; +} + +/* + * pagemap/kpageflags routines + */ + +static unsigned long do_u64_read(int fd, char *name, + uint64_t *buf, + unsigned long index, + unsigned long count) +{ + long bytes; + + if (index > ULONG_MAX / 8) + fatal("index overflow: %lu\n", index); + + if (lseek(fd, index * 8, SEEK_SET) < 0) { + perror(name); + exit(EXIT_FAILURE); + } + + bytes = read(fd, buf, count * 8); + if (bytes < 0) { + perror(name); + exit(EXIT_FAILURE); + } + if (bytes % 8) + fatal("partial read: %lu bytes\n", bytes); + + return bytes / 8; +} + +static unsigned long kpageflags_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); +} + +static unsigned long pagemap_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages); +} + +static unsigned long pagemap_pfn(uint64_t val) +{ + unsigned long pfn; + + if (val & PM_PRESENT) + pfn = PM_PFRAME(val); + else + pfn = 0; + + return pfn; +} + /* * page flag names @@ -255,7 +334,8 @@ static char *page_flag_longname(uint64_t flags) * page list and summary */ -static void show_page_range(unsigned long offset, uint64_t flags) +static void show_page_range(unsigned long voffset, + unsigned long offset, uint64_t flags) { static uint64_t flags0; static unsigned long voff; @@ -281,7 +361,8 @@ static void show_page_range(unsigned long offset, uint64_t flags) count = 1; } -static void show_page(unsigned long offset, uint64_t flags) +static void show_page(unsigned long voffset, + unsigned long offset, uint64_t flags) { if (opt_pid) printf("%lx\t", voffset); @@ -362,6 +443,62 @@ static uint64_t well_known_flags(uint64_t flags) return flags; } +static uint64_t kpageflags_flags(uint64_t flags) +{ + flags = expand_overloaded_flags(flags); + + if (!opt_raw) + flags = well_known_flags(flags); + + return flags; +} + +/* + * page actions + */ + +static void prepare_hwpoison_fd(void) +{ + char buf[100]; + + if (opt_hwpoison && !hwpoison_inject_fd) { + sprintf(buf, "%s/corrupt-pfn", hwpoison_debug_fs); + hwpoison_inject_fd = checked_open(buf, O_WRONLY); + } + + if (opt_unpoison && !hwpoison_forget_fd) { + sprintf(buf, "%s/renew-pfn", hwpoison_debug_fs); + hwpoison_forget_fd = checked_open(buf, O_WRONLY); + } +} + +static int hwpoison_page(unsigned long offset) +{ + char buf[100]; + int len; + + len = sprintf(buf, "0x%lx\n", offset); + len = write(hwpoison_inject_fd, buf, len); + if (len < 0) { + perror("hwpoison inject"); + return len; + } + return 0; +} + +static int unpoison_page(unsigned long offset) +{ + char buf[100]; + int len; + + len = sprintf(buf, "0x%lx\n", offset); + len = write(hwpoison_forget_fd, buf, len); + if (len < 0) { + perror("hwpoison forget"); + return len; + } + return 0; +} /* * page frame walker @@ -394,104 +531,83 @@ static int hash_slot(uint64_t flags) exit(EXIT_FAILURE); } -static void add_page(unsigned long offset, uint64_t flags) +static void add_page(unsigned long voffset, + unsigned long offset, uint64_t flags) { - flags = expand_overloaded_flags(flags); - - if (!opt_raw) - flags = well_known_flags(flags); + flags = kpageflags_flags(flags); if (!bit_mask_ok(flags)) return; + if (opt_hwpoison) + hwpoison_page(offset); + if (opt_unpoison) + unpoison_page(offset); + if (opt_list == 1) - show_page_range(offset, flags); + show_page_range(voffset, offset, flags); else if (opt_list == 2) - show_page(offset, flags); + show_page(voffset, offset, flags); nr_pages[hash_slot(flags)]++; total_pages++; } -static void walk_pfn(unsigned long index, unsigned long count) +#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */ +static void walk_pfn(unsigned long voffset, + unsigned long index, + unsigned long count) { + uint64_t buf[KPAGEFLAGS_BATCH]; unsigned long batch; - unsigned long n; + unsigned long pages; unsigned long i; - if (index > ULONG_MAX / KPF_BYTES) - fatal("index overflow: %lu\n", index); - - lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); - while (count) { - uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; - - batch = min_t(unsigned long, count, PAGES_BATCH); - n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); - if (n == 0) + batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH); + pages = kpageflags_read(buf, index, batch); + if (pages == 0) break; - if (n < 0) { - perror(PROC_KPAGEFLAGS); - exit(EXIT_FAILURE); - } - if (n % KPF_BYTES != 0) - fatal("partial read: %lu bytes\n", n); - n = n / KPF_BYTES; + for (i = 0; i < pages; i++) + add_page(voffset + i, index + i, buf[i]); - for (i = 0; i < n; i++) - add_page(index + i, kpageflags_buf[i]); - - index += batch; - count -= batch; + index += pages; + count -= pages; } } - -#define PAGEMAP_BATCH 4096 -static unsigned long task_pfn(unsigned long pgoff) +#define PAGEMAP_BATCH (64 << 10) +static void walk_vma(unsigned long index, unsigned long count) { - static uint64_t buf[PAGEMAP_BATCH]; - static unsigned long start; - static long count; - uint64_t pfn; + uint64_t buf[PAGEMAP_BATCH]; + unsigned long batch; + unsigned long pages; + unsigned long pfn; + unsigned long i; - if (pgoff < start || pgoff >= start + count) { - if (lseek64(pagemap_fd, - (uint64_t)pgoff * PM_ENTRY_BYTES, - SEEK_SET) < 0) { - perror("pagemap seek"); - exit(EXIT_FAILURE); - } - count = read(pagemap_fd, buf, sizeof(buf)); - if (count == 0) - return 0; - if (count < 0) { - perror("pagemap read"); - exit(EXIT_FAILURE); - } - if (count % PM_ENTRY_BYTES) { - fatal("pagemap read not aligned.\n"); - exit(EXIT_FAILURE); - } - count /= PM_ENTRY_BYTES; - start = pgoff; - } + while (count) { + batch = min_t(unsigned long, count, PAGEMAP_BATCH); + pages = pagemap_read(buf, index, batch); + if (pages == 0) + break; - pfn = buf[pgoff - start]; - if (pfn & PM_PRESENT) - pfn = PM_PFRAME(pfn); - else - pfn = 0; + for (i = 0; i < pages; i++) { + pfn = pagemap_pfn(buf[i]); + if (pfn) + walk_pfn(index + i, pfn, 1); + } - return pfn; + index += pages; + count -= pages; + } } static void walk_task(unsigned long index, unsigned long count) { - int i = 0; const unsigned long end = index + count; + unsigned long start; + int i = 0; while (index < end) { @@ -501,15 +617,11 @@ static void walk_task(unsigned long index, unsigned long count) if (pg_start[i] >= end) return; - voffset = max_t(unsigned long, pg_start[i], index); - index = min_t(unsigned long, pg_end[i], end); + start = max_t(unsigned long, pg_start[i], index); + index = min_t(unsigned long, pg_end[i], end); - assert(voffset < index); - for (; voffset < index; voffset++) { - unsigned long pfn = task_pfn(voffset); - if (pfn) - walk_pfn(pfn, 1); - } + assert(start < index); + walk_vma(start, index - start); } } @@ -527,18 +639,14 @@ static void walk_addr_ranges(void) { int i; - kpageflags_fd = open(PROC_KPAGEFLAGS, O_RDONLY); - if (kpageflags_fd < 0) { - perror(PROC_KPAGEFLAGS); - exit(EXIT_FAILURE); - } + kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); if (!nr_addr_ranges) add_addr_range(0, ULONG_MAX); for (i = 0; i < nr_addr_ranges; i++) if (!opt_pid) - walk_pfn(opt_offset[i], opt_size[i]); + walk_pfn(0, opt_offset[i], opt_size[i]); else walk_task(opt_offset[i], opt_size[i]); @@ -575,6 +683,8 @@ static void usage(void) " -l|--list Show page details in ranges\n" " -L|--list-each Show page details one by one\n" " -N|--no-summary Don't show summay info\n" +" -X|--hwpoison hwpoison pages\n" +" -x|--unpoison unpoison pages\n" " -h|--help Show this usage message\n" "addr-spec:\n" " N one page at offset N (unit: pages)\n" @@ -624,11 +734,7 @@ static void parse_pid(const char *str) opt_pid = parse_number(str); sprintf(buf, "/proc/%d/pagemap", opt_pid); - pagemap_fd = open(buf, O_RDONLY); - if (pagemap_fd < 0) { - perror(buf); - exit(EXIT_FAILURE); - } + pagemap_fd = checked_open(buf, O_RDONLY); sprintf(buf, "/proc/%d/maps", opt_pid); file = fopen(buf, "r"); @@ -788,6 +894,8 @@ static struct option opts[] = { { "list" , 0, NULL, 'l' }, { "list-each" , 0, NULL, 'L' }, { "no-summary", 0, NULL, 'N' }, + { "hwpoison" , 0, NULL, 'X' }, + { "unpoison" , 0, NULL, 'x' }, { "help" , 0, NULL, 'h' }, { NULL , 0, NULL, 0 } }; @@ -799,7 +907,7 @@ int main(int argc, char *argv[]) page_size = getpagesize(); while ((c = getopt_long(argc, argv, - "rp:f:a:b:lLNh", opts, NULL)) != -1) { + "rp:f:a:b:lLNXxh", opts, NULL)) != -1) { switch (c) { case 'r': opt_raw = 1; @@ -825,6 +933,14 @@ int main(int argc, char *argv[]) case 'N': opt_no_summary = 1; break; + case 'X': + opt_hwpoison = 1; + prepare_hwpoison_fd(); + break; + case 'x': + opt_unpoison = 1; + prepare_hwpoison_fd(); + break; case 'h': usage(); exit(0); @@ -844,7 +960,7 @@ int main(int argc, char *argv[]) walk_addr_ranges(); if (opt_list == 1) - show_page_range(0, 0); /* drain the buffer */ + show_page_range(0, 0, 0); /* drain the buffer */ if (opt_no_summary) return 0; diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt index 600a304a828..df09b9650a8 100644 --- a/Documentation/vm/pagemap.txt +++ b/Documentation/vm/pagemap.txt @@ -57,7 +57,9 @@ There are three components to pagemap: 16. COMPOUND_TAIL 16. HUGE 18. UNEVICTABLE + 19. HWPOISON 20. NOPAGE + 21. KSM Short descriptions to the page flags: @@ -86,9 +88,15 @@ Short descriptions to the page flags: 17. HUGE this is an integral part of a HugeTLB page +19. HWPOISON + hardware detected memory corruption on this page: don't touch the data! + 20. NOPAGE no page frame exists at the requested address +21. KSM + identical memory pages dynamically shared between one or more processes + [IO related page flags] 1. ERROR IO error occurred 3. UPTODATE page has up-to-date data diff --git a/Documentation/w1/masters/ds2482 b/Documentation/w1/masters/ds2482 index 9210d6fa502..299b91c7609 100644 --- a/Documentation/w1/masters/ds2482 +++ b/Documentation/w1/masters/ds2482 @@ -24,8 +24,8 @@ General Remarks Valid addresses are 0x18, 0x19, 0x1a, and 0x1b. However, the device cannot be detected without writing to the i2c bus, so no -detection is done. -You should force the device address. +detection is done. You should instantiate the device explicitly. -$ modprobe ds2482 force=0,0x18 +$ modprobe ds2482 +$ echo ds2482 0x18 > /sys/bus/i2c/devices/i2c-0/new_device |