diff options
Diffstat (limited to 'Documentation')
93 files changed, 2173 insertions, 787 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 2f06d40fe07..2e33dc6b234 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -189,6 +189,14 @@ Description: A computed peak value based on the sum squared magnitude of the underlying value in the specified directions. +What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_raw +What: /sys/bus/iio/devices/iio:deviceX/in_pressure_raw +KernelVersion: 3.8 +Contact: linux-iio@vger.kernel.org +Description: + Raw pressure measurement from channel Y. Units after + application of scale and offset are kilopascal. + What: /sys/bus/iio/devices/iio:deviceX/in_accel_offset What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_offset What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_offset @@ -197,6 +205,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_offset What: /sys/bus/iio/devices/iio:deviceX/in_voltage_offset What: /sys/bus/iio/devices/iio:deviceX/in_tempY_offset What: /sys/bus/iio/devices/iio:deviceX/in_temp_offset +What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_offset +What: /sys/bus/iio/devices/iio:deviceX/in_pressure_offset KernelVersion: 2.6.35 Contact: linux-iio@vger.kernel.org Description: @@ -226,6 +236,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_magn_scale What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_scale What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_scale What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_scale +What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_scale +What: /sys/bus/iio/devices/iio:deviceX/in_pressure_scale KernelVersion: 2.6.35 Contact: linux-iio@vger.kernel.org Description: @@ -245,6 +257,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibbias What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias +What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias +What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibbias KernelVersion: 2.6.35 Contact: linux-iio@vger.kernel.org Description: @@ -262,6 +276,8 @@ What /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale What /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale what /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale what /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale +What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale +What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale KernelVersion: 2.6.35 Contact: linux-iio@vger.kernel.org Description: @@ -275,6 +291,8 @@ What: /sys/.../iio:deviceX/in_voltage-voltage_scale_available What: /sys/.../iio:deviceX/out_voltageX_scale_available What: /sys/.../iio:deviceX/out_altvoltageX_scale_available What: /sys/.../iio:deviceX/in_capacitance_scale_available +What: /sys/.../iio:deviceX/in_pressure_scale_available +What: /sys/.../iio:deviceX/in_pressureY_scale_available KernelVersion: 2.6.35 Contact: linux-iio@vger.kernel.org Description: @@ -694,6 +712,8 @@ What: /sys/.../buffer/scan_elements/in_voltageY_en What: /sys/.../buffer/scan_elements/in_voltageY-voltageZ_en What: /sys/.../buffer/scan_elements/in_incli_x_en What: /sys/.../buffer/scan_elements/in_incli_y_en +What: /sys/.../buffer/scan_elements/in_pressureY_en +What: /sys/.../buffer/scan_elements/in_pressure_en KernelVersion: 2.6.37 Contact: linux-iio@vger.kernel.org Description: @@ -707,6 +727,8 @@ What: /sys/.../buffer/scan_elements/in_voltageY_type What: /sys/.../buffer/scan_elements/in_voltage_type What: /sys/.../buffer/scan_elements/in_voltageY_supply_type What: /sys/.../buffer/scan_elements/in_timestamp_type +What: /sys/.../buffer/scan_elements/in_pressureY_type +What: /sys/.../buffer/scan_elements/in_pressure_type KernelVersion: 2.6.37 Contact: linux-iio@vger.kernel.org Description: @@ -751,6 +773,8 @@ What: /sys/.../buffer/scan_elements/in_magn_z_index What: /sys/.../buffer/scan_elements/in_incli_x_index What: /sys/.../buffer/scan_elements/in_incli_y_index What: /sys/.../buffer/scan_elements/in_timestamp_index +What: /sys/.../buffer/scan_elements/in_pressureY_index +What: /sys/.../buffer/scan_elements/in_pressure_index KernelVersion: 2.6.37 Contact: linux-iio@vger.kernel.org Description: diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 23d78b5aab1..0ba6ea2f89d 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -11,7 +11,7 @@ What: /sys/class/devfreq/.../governor Date: September 2011 Contact: MyungJoo Ham <myungjoo.ham@samsung.com> Description: - The /sys/class/devfreq/.../governor shows the name of the + The /sys/class/devfreq/.../governor show or set the name of the governor used by the corresponding devfreq object. What: /sys/class/devfreq/.../cur_freq @@ -19,15 +19,16 @@ Date: September 2011 Contact: MyungJoo Ham <myungjoo.ham@samsung.com> Description: The /sys/class/devfreq/.../cur_freq shows the current - frequency of the corresponding devfreq object. + frequency of the corresponding devfreq object. Same as + target_freq when get_cur_freq() is not implemented by + devfreq driver. -What: /sys/class/devfreq/.../central_polling -Date: September 2011 -Contact: MyungJoo Ham <myungjoo.ham@samsung.com> +What: /sys/class/devfreq/.../target_freq +Date: September 2012 +Contact: Rajagopal Venkat <rajagopal.venkat@linaro.org> Description: - The /sys/class/devfreq/.../central_polling shows whether - the devfreq ojbect is using devfreq-provided central - polling mechanism or not. + The /sys/class/devfreq/.../target_freq shows the next governor + predicted target frequency of the corresponding devfreq object. What: /sys/class/devfreq/.../polling_interval Date: September 2011 @@ -43,6 +44,17 @@ Description: (/sys/class/devfreq/.../central_polling is 0), this value may be useless. +What: /sys/class/devfreq/.../trans_stat +Date: October 2012 +Contact: MyungJoo Ham <myungjoo.ham@samsung.com> +Descrtiption: + This ABI shows the statistics of devfreq behavior on a + specific device. It shows the time spent in each state and + the number of transitions between states. + In order to activate this ABI, the devfreq target device + driver should provide the list of available frequencies + with its profile. + What: /sys/class/devfreq/.../userspace/set_freq Date: September 2011 Contact: MyungJoo Ham <myungjoo.ham@samsung.com> @@ -50,3 +62,19 @@ Description: The /sys/class/devfreq/.../userspace/set_freq shows and sets the requested frequency for the devfreq object if userspace governor is in effect. + +What: /sys/class/devfreq/.../available_frequencies +Date: October 2012 +Contact: Nishanth Menon <nm@ti.com> +Description: + The /sys/class/devfreq/.../available_frequencies shows + the available frequencies of the corresponding devfreq object. + This is a snapshot of available frequencies and not limited + by the min/max frequency restrictions. + +What: /sys/class/devfreq/.../available_governors +Date: October 2012 +Contact: Nishanth Menon <nm@ti.com> +Description: + The /sys/class/devfreq/.../available_governors shows + currently available governors in the system. diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 45000f0db4d..7fc2997b23a 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -204,3 +204,34 @@ Description: This attribute has no effect on system-wide suspend/resume and hibernation. + +What: /sys/devices/.../power/pm_qos_no_power_off +Date: September 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/devices/.../power/pm_qos_no_power_off attribute + is used for manipulating the PM QoS "no power off" flag. If + set, this flag indicates to the kernel that power should not + be removed entirely from the device. + + Not all drivers support this attribute. If it isn't supported, + it is not present. + + This attribute has no effect on system-wide suspend/resume and + hibernation. + +What: /sys/devices/.../power/pm_qos_remote_wakeup +Date: September 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/devices/.../power/pm_qos_remote_wakeup attribute + is used for manipulating the PM QoS "remote wakeup required" + flag. If set, this flag indicates to the kernel that the + device is a source of user events that have to be signaled from + its low-power states. + + Not all drivers support this attribute. If it isn't supported, + it is not present. + + This attribute has no effect on system-wide suspend/resume and + hibernation. diff --git a/Documentation/ABI/testing/sysfs-devices-sun b/Documentation/ABI/testing/sysfs-devices-sun new file mode 100644 index 00000000000..86be9848a77 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-sun @@ -0,0 +1,14 @@ +Whatt: /sys/devices/.../sun +Date: October 2012 +Contact: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> +Description: + The file contains a Slot-unique ID which provided by the _SUN + method in the ACPI namespace. The value is written in Advanced + Configuration and Power Interface Specification as follows: + + "The _SUN value is required to be unique among the slots of + the same type. It is also recommended that this number match + the slot number printed on the physical slot whenever possible." + + So reading the sysfs file, we can identify a physical position + of the slot in the system. diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty index 0c430150d92..ad22fb0ee76 100644 --- a/Documentation/ABI/testing/sysfs-tty +++ b/Documentation/ABI/testing/sysfs-tty @@ -26,3 +26,115 @@ Description: UART port in serial_core, that is bound to TTY like ttyS0. uartclk = 16 * baud_base + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/type +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Shows the current tty type for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/line +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Shows the current tty line number for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/port +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Shows the current tty port I/O address for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/irq +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Shows the current primary interrupt for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/flags +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the tty port status flags for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/xmit_fifo_size +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the transmit FIFO size for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/close_delay +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the closing delay time for this port in ms. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/closing_wait +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the close wait time for this port in ms. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/custom_divisor +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the custom divisor if any that is set on this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/io_type +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the I/O type that is to be used with the iomem base + address. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/iomem_base +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + The I/O memory base for this port. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/iomem_reg_shift +Date: October 2012 +Contact: Alan Cox <alan@linux.intel.com> +Description: + Show the register shift indicating the spacing to be used + for accesses on this iomem address. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. diff --git a/Documentation/DocBook/gadget.tmpl b/Documentation/DocBook/gadget.tmpl index 6ef2f0073e5..4017f147ba2 100644 --- a/Documentation/DocBook/gadget.tmpl +++ b/Documentation/DocBook/gadget.tmpl @@ -671,7 +671,7 @@ than a kernel driver. <para>There's a USB Mass Storage class driver, which provides a different solution for interoperability with systems such as MS-Windows and MacOS. -That <emphasis>File-backed Storage</emphasis> driver uses a +That <emphasis>Mass Storage</emphasis> driver uses a file or block device as backing store for a drive, like the <filename>loop</filename> driver. The USB host uses the BBB, CB, or CBI versions of the mass diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl index ac3d0018140..ddb05e98af0 100644 --- a/Documentation/DocBook/uio-howto.tmpl +++ b/Documentation/DocBook/uio-howto.tmpl @@ -719,6 +719,62 @@ framework to set up sysfs files for this region. Simply leave it alone. </para> </sect1> +<sect1 id="using uio_dmem_genirq"> +<title>Using uio_dmem_genirq for platform devices</title> + <para> + In addition to statically allocated memory ranges, they may also be + a desire to use dynamically allocated regions in a user space driver. + In particular, being able to access memory made available through the + dma-mapping API, may be particularly useful. The + <varname>uio_dmem_genirq</varname> driver provides a way to accomplish + this. + </para> + <para> + This driver is used in a similar manner to the + <varname>"uio_pdrv_genirq"</varname> driver with respect to interrupt + configuration and handling. + </para> + <para> + Set the <varname>.name</varname> element of + <varname>struct platform_device</varname> to + <varname>"uio_dmem_genirq"</varname> to use this driver. + </para> + <para> + When using this driver, fill in the <varname>.platform_data</varname> + element of <varname>struct platform_device</varname>, which is of type + <varname>struct uio_dmem_genirq_pdata</varname> and which contains the + following elements: + </para> + <itemizedlist> + <listitem><varname>struct uio_info uioinfo</varname>: The same + structure used as the <varname>uio_pdrv_genirq</varname> platform + data</listitem> + <listitem><varname>unsigned int *dynamic_region_sizes</varname>: + Pointer to list of sizes of dynamic memory regions to be mapped into + user space. + </listitem> + <listitem><varname>unsigned int num_dynamic_regions</varname>: + Number of elements in <varname>dynamic_region_sizes</varname> array. + </listitem> + </itemizedlist> + <para> + The dynamic regions defined in the platform data will be appended to + the <varname> mem[] </varname> array after the platform device + resources, which implies that the total number of static and dynamic + memory regions cannot exceed <varname>MAX_UIO_MAPS</varname>. + </para> + <para> + The dynamic memory regions will be allocated when the UIO device file, + <varname>/dev/uioX</varname> is opened. + Simiar to static memory resources, the memory region information for + dynamic regions is then visible via sysfs at + <varname>/sys/class/uio/uioX/maps/mapY/*</varname>. + The dynmaic memory regions will be freed when the UIO device file is + closed. When no processes are holding the device file open, the address + returned to userspace is ~0. + </para> +</sect1> + </chapter> <chapter id="userspace_driver" xreflabel="Writing a driver in user space"> diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt index 1401cece745..9bc95942ec2 100644 --- a/Documentation/IRQ-domain.txt +++ b/Documentation/IRQ-domain.txt @@ -7,6 +7,21 @@ systems with multiple interrupt controllers the kernel must ensure that each one gets assigned non-overlapping allocations of Linux IRQ numbers. +The number of interrupt controllers registered as unique irqchips +show a rising tendency: for example subdrivers of different kinds +such as GPIO controllers avoid reimplementing identical callback +mechanisms as the IRQ core system by modelling their interrupt +handlers as irqchips, i.e. in effect cascading interrupt controllers. + +Here the interrupt number loose all kind of correspondence to +hardware interrupt numbers: whereas in the past, IRQ numbers could +be chosen so they matched the hardware IRQ line into the root +interrupt controller (i.e. the component actually fireing the +interrupt line to the CPU) nowadays this number is just a number. + +For this reason we need a mechanism to separate controller-local +interrupt numbers, called hardware irq's, from Linux IRQ numbers. + The irq_alloc_desc*() and irq_free_desc*() APIs provide allocation of irq numbers, but they don't provide any support for reverse mapping of the controller-local IRQ (hwirq) number into the Linux IRQ number @@ -40,6 +55,10 @@ required hardware setup. When an interrupt is received, irq_find_mapping() function should be used to find the Linux IRQ number from the hwirq number. +The irq_create_mapping() function must be called *atleast once* +before any call to irq_find_mapping(), lest the descriptor will not +be allocated. + If the driver has the Linux IRQ number or the irq_data pointer, and needs to know the associated hwirq number (such as in the irq_chip callbacks) then it can be directly obtained from irq_data->hwirq. @@ -119,4 +138,17 @@ numbers. Most users of legacy mappings should use irq_domain_add_simple() which will use a legacy domain only if an IRQ range is supplied by the -system and will otherwise use a linear domain mapping. +system and will otherwise use a linear domain mapping. The semantics +of this call are such that if an IRQ range is specified then +descriptors will be allocated on-the-fly for it, and if no range is +specified it will fall through to irq_domain_add_linear() which meand +*no* irq descriptors will be allocated. + +A typical use case for simple domains is where an irqchip provider +is supporting both dynamic and static IRQ assignments. + +In order to avoid ending up in a situation where a linear domain is +used and no descriptor gets allocated it is very important to make sure +that the driver using the simple domain call irq_create_mapping() +before any irq_find_mapping() since the latter will actually work +for the static IRQ assignment case. diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt index 7c1dfb19fc4..7f40c72a9c5 100644 --- a/Documentation/RCU/RTFP.txt +++ b/Documentation/RCU/RTFP.txt @@ -186,7 +186,7 @@ Bibtex Entries @article{Kung80 ,author="H. T. Kung and Q. Lehman" -,title="Concurrent Maintenance of Binary Search Trees" +,title="Concurrent Manipulation of Binary Search Trees" ,Year="1980" ,Month="September" ,journal="ACM Transactions on Database Systems" diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index cdb20d41a44..31ef8fe07f8 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -271,15 +271,14 @@ over a rather long period of time, but improvements are always welcome! The same cautions apply to call_rcu_bh() and call_rcu_sched(). 9. All RCU list-traversal primitives, which include - rcu_dereference(), list_for_each_entry_rcu(), - list_for_each_continue_rcu(), and list_for_each_safe_rcu(), - must be either within an RCU read-side critical section or - must be protected by appropriate update-side locks. RCU - read-side critical sections are delimited by rcu_read_lock() - and rcu_read_unlock(), or by similar primitives such as - rcu_read_lock_bh() and rcu_read_unlock_bh(), in which case - the matching rcu_dereference() primitive must be used in order - to keep lockdep happy, in this case, rcu_dereference_bh(). + rcu_dereference(), list_for_each_entry_rcu(), and + list_for_each_safe_rcu(), must be either within an RCU read-side + critical section or must be protected by appropriate update-side + locks. RCU read-side critical sections are delimited by + rcu_read_lock() and rcu_read_unlock(), or by similar primitives + such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which + case the matching rcu_dereference() primitive must be used in + order to keep lockdep happy, in this case, rcu_dereference_bh(). The reason that it is permissible to use RCU list-traversal primitives when the update-side lock is held is that doing so diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt index 4349c1487e9..adb5a378284 100644 --- a/Documentation/RCU/listRCU.txt +++ b/Documentation/RCU/listRCU.txt @@ -205,7 +205,7 @@ RCU ("read-copy update") its name. The RCU code is as follows: audit_copy_rule(&ne->rule, &e->rule); ne->rule.action = newaction; ne->rule.file_count = newfield_count; - list_replace_rcu(e, ne); + list_replace_rcu(&e->list, &ne->list); call_rcu(&e->rcu, audit_free_rule); return 0; } diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt index 4202ad09313..141d531aa14 100644 --- a/Documentation/RCU/rcuref.txt +++ b/Documentation/RCU/rcuref.txt @@ -20,7 +20,7 @@ release_referenced() delete() { { ... write_lock(&list_lock); atomic_dec(&el->rc, relfunc) ... - ... delete_element + ... remove_element } write_unlock(&list_lock); ... if (atomic_dec_and_test(&el->rc)) @@ -52,7 +52,7 @@ release_referenced() delete() { { ... spin_lock(&list_lock); if (atomic_dec_and_test(&el->rc)) ... - call_rcu(&el->head, el_free); delete_element + call_rcu(&el->head, el_free); remove_element ... spin_unlock(&list_lock); } ... if (atomic_dec_and_test(&el->rc)) @@ -64,3 +64,60 @@ Sometimes, a reference to the element needs to be obtained in the update (write) stream. In such cases, atomic_inc_not_zero() might be overkill, since we hold the update-side spinlock. One might instead use atomic_inc() in such cases. + +It is not always convenient to deal with "FAIL" in the +search_and_reference() code path. In such cases, the +atomic_dec_and_test() may be moved from delete() to el_free() +as follows: + +1. 2. +add() search_and_reference() +{ { + alloc_object rcu_read_lock(); + ... search_for_element + atomic_set(&el->rc, 1); atomic_inc(&el->rc); + spin_lock(&list_lock); ... + + add_element rcu_read_unlock(); + ... } + spin_unlock(&list_lock); 4. +} delete() +3. { +release_referenced() spin_lock(&list_lock); +{ ... + ... remove_element + if (atomic_dec_and_test(&el->rc)) spin_unlock(&list_lock); + kfree(el); ... + ... call_rcu(&el->head, el_free); +} ... +5. } +void el_free(struct rcu_head *rhp) +{ + release_referenced(); +} + +The key point is that the initial reference added by add() is not removed +until after a grace period has elapsed following removal. This means that +search_and_reference() cannot find this element, which means that the value +of el->rc cannot increase. Thus, once it reaches zero, there are no +readers that can or ever will be able to reference the element. The +element can therefore safely be freed. This in turn guarantees that if +any reader finds the element, that reader may safely acquire a reference +without checking the value of the reference counter. + +In cases where delete() can sleep, synchronize_rcu() can be called from +delete(), so that el_free() can be subsumed into delete as follows: + +4. +delete() +{ + spin_lock(&list_lock); + ... + remove_element + spin_unlock(&list_lock); + ... + synchronize_rcu(); + if (atomic_dec_and_test(&el->rc)) + kfree(el); + ... +} diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 672d1908325..c776968f446 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -10,51 +10,63 @@ for rcutree and next for rcutiny. CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats -These implementations of RCU provides several debugfs files under the +These implementations of RCU provide several debugfs directories under the top-level directory "rcu": -rcu/rcudata: +rcu/rcu_bh +rcu/rcu_preempt +rcu/rcu_sched + +Each directory contains files for the corresponding flavor of RCU. +Note that rcu/rcu_preempt is only present for CONFIG_TREE_PREEMPT_RCU. +For CONFIG_TREE_RCU, the RCU flavor maps onto the RCU-sched flavor, +so that activity for both appears in rcu/rcu_sched. + +In addition, the following file appears in the top-level directory: +rcu/rcutorture. This file displays rcutorture test progress. The output +of "cat rcu/rcutorture" looks as follows: + +rcutorture test sequence: 0 (test in progress) +rcutorture update version number: 615 + +The first line shows the number of rcutorture tests that have completed +since boot. If a test is currently running, the "(test in progress)" +string will appear as shown above. The second line shows the number of +update cycles that the current test has started, or zero if there is +no test in progress. + + +Within each flavor directory (rcu/rcu_bh, rcu/rcu_sched, and possibly +also rcu/rcu_preempt) the following files will be present: + +rcudata: Displays fields in struct rcu_data. -rcu/rcudata.csv: - Comma-separated values spreadsheet version of rcudata. -rcu/rcugp: +rcuexp: + Displays statistics for expedited grace periods. +rcugp: Displays grace-period counters. -rcu/rcuhier: +rcuhier: Displays the struct rcu_node hierarchy. -rcu/rcu_pending: +rcu_pending: Displays counts of the reasons rcu_pending() decided that RCU had work to do. -rcu/rcutorture: - Displays rcutorture test progress. -rcu/rcuboost: +rcuboost: Displays RCU boosting statistics. Only present if CONFIG_RCU_BOOST=y. -The output of "cat rcu/rcudata" looks as follows: - -rcu_sched: - 0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 - 1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 - 2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 - 3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 - 4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 - 5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 - 6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 - 7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 -rcu_bh: - 0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 - 1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 - 2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 - 3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 - 4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 - 5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 - 6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 - 7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 - -The first section lists the rcu_data structures for rcu_sched, the second -for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an -additional section for rcu_preempt. Each section has one line per CPU, -or eight for this 8-CPU system. The fields are as follows: +The output of "cat rcu/rcu_preempt/rcudata" looks as follows: + + 0!c=30455 g=30456 pq=1 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716 + 1!c=30719 g=30720 pq=1 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982 + 2!c=30150 g=30151 pq=1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458 + 3 c=31249 g=31250 pq=1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622 + 4!c=29502 g=29503 pq=1 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521 + 5 c=31201 g=31202 pq=1 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698 + 6!c=30253 g=30254 pq=1 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353 + 7 c=31178 g=31178 pq=1 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969 + +This file has one line per CPU, or eight for this 8-CPU system. +The fields are as follows: o The number at the beginning of each line is the CPU number. CPUs numbers followed by an exclamation mark are offline, @@ -64,11 +76,13 @@ o The number at the beginning of each line is the CPU number. substantially larger than the number of actual CPUs. o "c" is the count of grace periods that this CPU believes have - completed. Offlined CPUs and CPUs in dynticks idle mode may - lag quite a ways behind, for example, CPU 6 under "rcu_sched" - above, which has been offline through not quite 40,000 RCU grace - periods. It is not unusual to see CPUs lagging by thousands of - grace periods. + completed. Offlined CPUs and CPUs in dynticks idle mode may lag + quite a ways behind, for example, CPU 4 under "rcu_sched" above, + which has been offline through 16 RCU grace periods. It is not + unusual to see offline CPUs lagging by thousands of grace periods. + Note that although the grace-period number is an unsigned long, + it is printed out as a signed long to allow more human-friendly + representation near boot time. o "g" is the count of grace periods that this CPU believes have started. Again, offlined CPUs and CPUs in dynticks idle mode @@ -84,30 +98,25 @@ o "pq" indicates that this CPU has passed through a quiescent state CPU has not yet reported that fact, (2) some other CPU has not yet reported for this grace period, or (3) both. -o "pgp" indicates which grace period the last-observed quiescent - state for this CPU corresponds to. This is important for handling - the race between CPU 0 reporting an extended dynticks-idle - quiescent state for CPU 1 and CPU 1 suddenly waking up and - reporting its own quiescent state. If CPU 1 was the last CPU - for the current grace period, then the CPU that loses this race - will attempt to incorrectly mark CPU 1 as having checked in for - the next grace period! - o "qp" indicates that RCU still expects a quiescent state from this CPU. Offlined CPUs and CPUs in dyntick idle mode might well have qp=1, which is OK: RCU is still ignoring them. o "dt" is the current value of the dyntick counter that is incremented - when entering or leaving dynticks idle state, either by the - scheduler or by irq. This number is even if the CPU is in - dyntick idle mode and odd otherwise. The number after the first - "/" is the interrupt nesting depth when in dyntick-idle state, - or one greater than the interrupt-nesting depth otherwise. - The number after the second "/" is the NMI nesting depth. + when entering or leaving idle, either due to a context switch or + due to an interrupt. This number is even if the CPU is in idle + from RCU's viewpoint and odd otherwise. The number after the + first "/" is the interrupt nesting depth when in idle state, + or a large number added to the interrupt-nesting depth when + running a non-idle task. Some architectures do not accurately + count interrupt nesting when running in non-idle kernel context, + which can result in interesting anomalies such as negative + interrupt-nesting levels. The number after the second "/" + is the NMI nesting depth. o "df" is the number of times that some other CPU has forced a quiescent state on behalf of this CPU due to this CPU being in - dynticks-idle state. + idle state. o "of" is the number of times that some other CPU has forced a quiescent state on behalf of this CPU due to this CPU being @@ -120,9 +129,13 @@ o "of" is the number of times that some other CPU has forced a error, so it makes sense to err conservatively. o "ql" is the number of RCU callbacks currently residing on - this CPU. This is the total number of callbacks, regardless - of what state they are in (new, waiting for grace period to - start, waiting for grace period to end, ready to invoke). + this CPU. The first number is the number of "lazy" callbacks + that are known to RCU to only be freeing memory, and the number + after the "/" is the total number of callbacks, lazy or not. + These counters count callbacks regardless of what phase of + grace-period processing that they are in (new, waiting for + grace period to start, waiting for grace period to end, ready + to invoke). o "qs" gives an indication of the state of the callback queue with four characters: @@ -150,6 +163,43 @@ o "qs" gives an indication of the state of the callback queue If there are no callbacks in a given one of the above states, the corresponding character is replaced by ".". +o "b" is the batch limit for this CPU. If more than this number + of RCU callbacks is ready to invoke, then the remainder will + be deferred. + +o "ci" is the number of RCU callbacks that have been invoked for + this CPU. Note that ci+nci+ql is the number of callbacks that have + been registered in absence of CPU-hotplug activity. + +o "nci" is the number of RCU callbacks that have been offloaded from + this CPU. This will always be zero unless the kernel was built + with CONFIG_RCU_NOCB_CPU=y and the "rcu_nocbs=" kernel boot + parameter was specified. + +o "co" is the number of RCU callbacks that have been orphaned due to + this CPU going offline. These orphaned callbacks have been moved + to an arbitrarily chosen online CPU. + +o "ca" is the number of RCU callbacks that have been adopted by this + CPU due to other CPUs going offline. Note that ci+co-ca+ql is + the number of RCU callbacks registered on this CPU. + + +Kernels compiled with CONFIG_RCU_BOOST=y display the following from +/debug/rcu/rcu_preempt/rcudata: + + 0!c=12865 g=12866 pq=1 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871 + 1 c=14407 g=14408 pq=1 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485 + 2 c=14407 g=14408 pq=1 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490 + 3 c=14407 g=14408 pq=1 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290 + 4 c=14405 g=14406 pq=1 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114 + 5!c=14168 g=14169 pq=1 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722 + 6 c=14404 g=14405 pq=1 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811 + 7 c=14407 g=14408 pq=1 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042 + +This is similar to the output discussed above, but contains the following +additional fields: + o "kt" is the per-CPU kernel-thread state. The digit preceding the first slash is zero if there is no work pending and 1 otherwise. The character between the first pair of slashes is @@ -184,35 +234,51 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of This field is displayed only for CONFIG_RCU_BOOST kernels. -o "b" is the batch limit for this CPU. If more than this number - of RCU callbacks is ready to invoke, then the remainder will - be deferred. -o "ci" is the number of RCU callbacks that have been invoked for - this CPU. Note that ci+ql is the number of callbacks that have - been registered in absence of CPU-hotplug activity. +The output of "cat rcu/rcu_preempt/rcuexp" looks as follows: -o "co" is the number of RCU callbacks that have been orphaned due to - this CPU going offline. These orphaned callbacks have been moved - to an arbitrarily chosen online CPU. +s=21872 d=21872 w=0 tf=0 wd1=0 wd2=0 n=0 sc=21872 dt=21872 dl=0 dx=21872 + +These fields are as follows: + +o "s" is the starting sequence number. -o "ca" is the number of RCU callbacks that have been adopted due to - other CPUs going offline. Note that ci+co-ca+ql is the number of - RCU callbacks registered on this CPU. +o "d" is the ending sequence number. When the starting and ending + numbers differ, there is an expedited grace period in progress. -There is also an rcu/rcudata.csv file with the same information in -comma-separated-variable spreadsheet format. +o "w" is the number of times that the sequence numbers have been + in danger of wrapping. +o "tf" is the number of times that contention has resulted in a + failure to begin an expedited grace period. -The output of "cat rcu/rcugp" looks as follows: +o "wd1" and "wd2" are the number of times that an attempt to + start an expedited grace period found that someone else had + completed an expedited grace period that satisfies the + attempted request. "Our work is done." -rcu_sched: completed=33062 gpnum=33063 -rcu_bh: completed=464 gpnum=464 +o "n" is number of times that contention was so great that + the request was demoted from an expedited grace period to + a normal grace period. + +o "sc" is the number of times that the attempt to start a + new expedited grace period succeeded. + +o "dt" is the number of times that we attempted to update + the "d" counter. + +o "dl" is the number of times that we failed to update the "d" + counter. + +o "dx" is the number of times that we succeeded in updating + the "d" counter. -Again, this output is for both "rcu_sched" and "rcu_bh". Note that -kernels built with CONFIG_TREE_PREEMPT_RCU will have an additional -"rcu_preempt" line. The fields are taken from the rcu_state structure, -and are as follows: + +The output of "cat rcu/rcu_preempt/rcugp" looks as follows: + +completed=31249 gpnum=31250 age=1 max=18 + +These fields are taken from the rcu_state structure, and are as follows: o "completed" is the number of grace periods that have completed. It is comparable to the "c" field from rcu/rcudata in that a @@ -220,44 +286,42 @@ o "completed" is the number of grace periods that have completed. that the corresponding RCU grace period has completed. o "gpnum" is the number of grace periods that have started. It is - comparable to the "g" field from rcu/rcudata in that a CPU - whose "g" field matches the value of "gpnum" is aware that the - corresponding RCU grace period has started. + similarly comparable to the "g" field from rcu/rcudata in that + a CPU whose "g" field matches the value of "gpnum" is aware that + the corresponding RCU grace period has started. + + If these two fields are equal, then there is no grace period + in progress, in other words, RCU is idle. On the other hand, + if the two fields differ (as they are above), then an RCU grace + period is in progress. - If these two fields are equal (as they are for "rcu_bh" above), - then there is no grace period in progress, in other words, RCU - is idle. On the other hand, if the two fields differ (as they - do for "rcu_sched" above), then an RCU grace period is in progress. +o "age" is the number of jiffies that the current grace period + has extended for, or zero if there is no grace period currently + in effect. +o "max" is the age in jiffies of the longest-duration grace period + thus far. -The output of "cat rcu/rcuhier" looks as follows, with very long lines: +The output of "cat rcu/rcu_preempt/rcuhier" looks as follows: -c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 -1/1 ..>. 0:127 ^0 -3/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3 -3/3f ..>. 0:5 ^0 2/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3 -rcu_bh: -c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 -0/1 ..>. 0:127 ^0 -0/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3 -0/3f ..>. 0:5 ^0 0/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3 +c=14407 g=14408 s=0 jfq=2 j=c863 nfqs=12040/nfqsng=0(12040) fqlh=1051 oqlen=0/0 +3/3 ..>. 0:7 ^0 +e/e ..>. 0:3 ^0 d/d ..>. 4:7 ^1 -This is once again split into "rcu_sched" and "rcu_bh" portions, -and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional -"rcu_preempt" section. The fields are as follows: +The fields are as follows: -o "c" is exactly the same as "completed" under rcu/rcugp. +o "c" is exactly the same as "completed" under rcu/rcu_preempt/rcugp. -o "g" is exactly the same as "gpnum" under rcu/rcugp. +o "g" is exactly the same as "gpnum" under rcu/rcu_preempt/rcugp. -o "s" is the "signaled" state that drives force_quiescent_state()'s +o "s" is the current state of the force_quiescent_state() state machine. o "jfq" is the number of jiffies remaining for this grace period before force_quiescent_state() is invoked to help push things - along. Note that CPUs in dyntick-idle mode throughout the grace - period will not report on their own, but rather must be check by - some other CPU via force_quiescent_state(). + along. Note that CPUs in idle mode throughout the grace period + will not report on their own, but rather must be check by some + other CPU via force_quiescent_state(). o "j" is the low-order four hex digits of the jiffies counter. Yes, Paul did run into a number of problems that turned out to @@ -268,7 +332,8 @@ o "nfqs" is the number of calls to force_quiescent_state() since o "nfqsng" is the number of useless calls to force_quiescent_state(), where there wasn't actually a grace period active. This can - happen due to races. The number in parentheses is the difference + no longer happen due to grace-period processing being pushed + into a kthread. The number in parentheses is the difference between "nfqs" and "nfqsng", or the number of times that force_quiescent_state() actually did some real work. @@ -276,28 +341,27 @@ o "fqlh" is the number of calls to force_quiescent_state() that exited immediately (without even being counted in nfqs above) due to contention on ->fqslock. -o Each element of the form "1/1 0:127 ^0" represents one struct - rcu_node. Each line represents one level of the hierarchy, from - root to leaves. It is best to think of the rcu_data structures - as forming yet another level after the leaves. Note that there - might be either one, two, or three levels of rcu_node structures, - depending on the relationship between CONFIG_RCU_FANOUT and - CONFIG_NR_CPUS. +o Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node + structure. Each line represents one level of the hierarchy, + from root to leaves. It is best to think of the rcu_data + structures as forming yet another level after the leaves. + Note that there might be either one, two, three, or even four + levels of rcu_node structures, depending on the relationship + between CONFIG_RCU_FANOUT, CONFIG_RCU_FANOUT_LEAF (possibly + adjusted using the rcu_fanout_leaf kernel boot parameter), and + CONFIG_NR_CPUS (possibly adjusted using the nr_cpu_ids count of + possible CPUs for the booting hardware). o The numbers separated by the "/" are the qsmask followed by the qsmaskinit. The qsmask will have one bit - set for each entity in the next lower level that - has not yet checked in for the current grace period. + set for each entity in the next lower level that has + not yet checked in for the current grace period ("e" + indicating CPUs 5, 6, and 7 in the example above). The qsmaskinit will have one bit for each entity that is currently expected to check in during each grace period. The value of qsmaskinit is assigned to that of qsmask at the beginning of each grace period. - For example, for "rcu_sched", the qsmask of the first - entry of the lowest level is 0x14, meaning that we - are still waiting for CPUs 2 and 4 to check in for the - current grace period. - o The characters separated by the ">" indicate the state of the blocked-tasks lists. A "G" preceding the ">" indicates that at least one task blocked in an RCU @@ -312,48 +376,39 @@ o Each element of the form "1/1 0:127 ^0" represents one struct A "." character appears if the corresponding condition does not hold, so that "..>." indicates that no tasks are blocked. In contrast, "GE>T" indicates maximal - inconvenience from blocked tasks. + inconvenience from blocked tasks. CONFIG_TREE_RCU + builds of the kernel will always show "..>.". o The numbers separated by the ":" are the range of CPUs served by this struct rcu_node. This can be helpful in working out how the hierarchy is wired together. - For example, the first entry at the lowest level shows - "0:5", indicating that it covers CPUs 0 through 5. + For example, the example rcu_node structure shown above + has "0:7", indicating that it covers CPUs 0 through 7. o The number after the "^" indicates the bit in the - next higher level rcu_node structure that this - rcu_node structure corresponds to. - - For example, the first entry at the lowest level shows - "^0", indicating that it corresponds to bit zero in - the first entry at the middle level. - - -The output of "cat rcu/rcu_pending" looks as follows: - -rcu_sched: - 0 np=255892 qsp=53936 rpq=85 cbr=0 cng=14417 gpc=10033 gps=24320 nn=146741 - 1 np=261224 qsp=54638 rpq=33 cbr=0 cng=25723 gpc=16310 gps=2849 nn=155792 - 2 np=237496 qsp=49664 rpq=23 cbr=0 cng=2762 gpc=45478 gps=1762 nn=136629 - 3 np=236249 qsp=48766 rpq=98 cbr=0 cng=286 gpc=48049 gps=1218 nn=137723 - 4 np=221310 qsp=46850 rpq=7 cbr=0 cng=26 gpc=43161 gps=4634 nn=123110 - 5 np=237332 qsp=48449 rpq=9 cbr=0 cng=54 gpc=47920 gps=3252 nn=137456 - 6 np=219995 qsp=46718 rpq=12 cbr=0 cng=50 gpc=42098 gps=6093 nn=120834 - 7 np=249893 qsp=49390 rpq=42 cbr=0 cng=72 gpc=38400 gps=17102 nn=144888 -rcu_bh: - 0 np=146741 qsp=1419 rpq=6 cbr=0 cng=6 gpc=0 gps=0 nn=145314 - 1 np=155792 qsp=12597 rpq=3 cbr=0 cng=0 gpc=4 gps=8 nn=143180 - 2 np=136629 qsp=18680 rpq=1 cbr=0 cng=0 gpc=7 gps=6 nn=117936 - 3 np=137723 qsp=2843 rpq=0 cbr=0 cng=0 gpc=10 gps=7 nn=134863 - 4 np=123110 qsp=12433 rpq=0 cbr=0 cng=0 gpc=4 gps=2 nn=110671 - 5 np=137456 qsp=4210 rpq=1 cbr=0 cng=0 gpc=6 gps=5 nn=133235 - 6 np=120834 qsp=9902 rpq=2 cbr=0 cng=0 gpc=6 gps=3 nn=110921 - 7 np=144888 qsp=26336 rpq=0 cbr=0 cng=0 gpc=8 gps=2 nn=118542 - -As always, this is once again split into "rcu_sched" and "rcu_bh" -portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional -"rcu_preempt" section. The fields are as follows: + next higher level rcu_node structure that this rcu_node + structure corresponds to. For example, the "d/d ..>. 4:7 + ^1" has a "1" in this position, indicating that it + corresponds to the "1" bit in the "3" shown in the + "3/3 ..>. 0:7 ^0" entry on the next level up. + + +The output of "cat rcu/rcu_sched/rcu_pending" looks as follows: + + 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 + 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 + 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 + 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 + 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 + 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 + 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 + 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 + +The fields are as follows: + +o The leading number is the CPU number, with "!" indicating + an offline CPU. o "np" is the number of times that __rcu_pending() has been invoked for the corresponding flavor of RCU. @@ -377,38 +432,23 @@ o "gpc" is the number of times that an old grace period had o "gps" is the number of times that a new grace period had started, but this CPU was not yet aware of it. -o "nn" is the number of times that this CPU needed nothing. Alert - readers will note that the rcu "nn" number for a given CPU very - closely matches the rcu_bh "np" number for that same CPU. This - is due to short-circuit evaluation in rcu_pending(). - - -The output of "cat rcu/rcutorture" looks as follows: - -rcutorture test sequence: 0 (test in progress) -rcutorture update version number: 615 - -The first line shows the number of rcutorture tests that have completed -since boot. If a test is currently running, the "(test in progress)" -string will appear as shown above. The second line shows the number of -update cycles that the current test has started, or zero if there is -no test in progress. +o "nn" is the number of times that this CPU needed nothing. The output of "cat rcu/rcuboost" looks as follows: -0:5 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f - balk: nt=0 egt=989 bt=0 nb=0 ny=0 nos=16 -6:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f - balk: nt=0 egt=225 bt=0 nb=0 ny=0 nos=6 +0:3 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894 + balk: nt=0 egt=4695 bt=0 nb=0 ny=56 nos=0 +4:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894 + balk: nt=0 egt=6541 bt=0 nb=0 ny=126 nos=0 This information is output only for rcu_preempt. Each two-line entry corresponds to a leaf rcu_node strcuture. The fields are as follows: o "n:m" is the CPU-number range for the corresponding two-line entry. In the sample output above, the first entry covers - CPUs zero through five and the second entry covers CPUs 6 - and 7. + CPUs zero through three and the second entry covers CPUs four + through seven. o "tasks=TNEB" gives the state of the various segments of the rnp->blocked_tasks list: diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index bf0f6de2aa0..0cc7820967f 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -499,6 +499,8 @@ The foo_reclaim() function might appear as follows: { struct foo *fp = container_of(rp, struct foo, rcu); + foo_cleanup(fp->a); + kfree(fp); } @@ -521,6 +523,12 @@ o Use call_rcu() -after- removing a data element from an read-side critical sections that might be referencing that data item. +If the callback for call_rcu() is not doing anything more than calling +kfree() on the structure, you can use kfree_rcu() instead of call_rcu() +to avoid having to write your own callback: + + kfree_rcu(old_fp, rcu); + Again, see checklist.txt for additional rules governing the use of RCU. @@ -773,8 +781,8 @@ a single atomic update, converting to RCU will require special care. Also, the presence of synchronize_rcu() means that the RCU version of delete() can now block. If this is a problem, there is a callback-based -mechanism that never blocks, namely call_rcu(), that can be used in -place of synchronize_rcu(). +mechanism that never blocks, namely call_rcu() or kfree_rcu(), that can +be used in place of synchronize_rcu(). 7. FULL LIST OF RCU APIs @@ -789,9 +797,7 @@ RCU list traversal: list_for_each_entry_rcu hlist_for_each_entry_rcu hlist_nulls_for_each_entry_rcu - - list_for_each_continue_rcu (to be deprecated in favor of new - list_for_each_entry_continue_rcu) + list_for_each_entry_continue_rcu RCU pointer/list update: @@ -813,6 +819,7 @@ RCU: Critical sections Grace period Barrier rcu_read_unlock synchronize_rcu rcu_dereference synchronize_rcu_expedited call_rcu + kfree_rcu bh: Critical sections Grace period Barrier diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt new file mode 100644 index 00000000000..4f27785ca0c --- /dev/null +++ b/Documentation/acpi/enumeration.txt @@ -0,0 +1,227 @@ +ACPI based device enumeration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +ACPI 5 introduced a set of new resources (UartTSerialBus, I2cSerialBus, +SpiSerialBus, GpioIo and GpioInt) which can be used in enumerating slave +devices behind serial bus controllers. + +In addition we are starting to see peripherals integrated in the +SoC/Chipset to appear only in ACPI namespace. These are typically devices +that are accessed through memory-mapped registers. + +In order to support this and re-use the existing drivers as much as +possible we decided to do following: + + o Devices that have no bus connector resource are represented as + platform devices. + + o Devices behind real busses where there is a connector resource + are represented as struct spi_device or struct i2c_device + (standard UARTs are not busses so there is no struct uart_device). + +As both ACPI and Device Tree represent a tree of devices (and their +resources) this implementation follows the Device Tree way as much as +possible. + +The ACPI implementation enumerates devices behind busses (platform, SPI and +I2C), creates the physical devices and binds them to their ACPI handle in +the ACPI namespace. + +This means that when ACPI_HANDLE(dev) returns non-NULL the device was +enumerated from ACPI namespace. This handle can be used to extract other +device-specific configuration. There is an example of this below. + +Platform bus support +~~~~~~~~~~~~~~~~~~~~ +Since we are using platform devices to represent devices that are not +connected to any physical bus we only need to implement a platform driver +for the device and add supported ACPI IDs. If this same IP-block is used on +some other non-ACPI platform, the driver might work out of the box or needs +some minor changes. + +Adding ACPI support for an existing driver should be pretty +straightforward. Here is the simplest example: + + #ifdef CONFIG_ACPI + static struct acpi_device_id mydrv_acpi_match[] = { + /* ACPI IDs here */ + { } + }; + MODULE_DEVICE_TABLE(acpi, mydrv_acpi_match); + #endif + + static struct platform_driver my_driver = { + ... + .driver = { + .acpi_match_table = ACPI_PTR(mydrv_acpi_match), + }, + }; + +If the driver needs to perform more complex initialization like getting and +configuring GPIOs it can get its ACPI handle and extract this information +from ACPI tables. + +Currently the kernel is not able to automatically determine from which ACPI +device it should make the corresponding platform device so we need to add +the ACPI device explicitly to acpi_platform_device_ids list defined in +drivers/acpi/scan.c. This limitation is only for the platform devices, SPI +and I2C devices are created automatically as described below. + +SPI serial bus support +~~~~~~~~~~~~~~~~~~~~~~ +Slave devices behind SPI bus have SpiSerialBus resource attached to them. +This is extracted automatically by the SPI core and the slave devices are +enumerated once spi_register_master() is called by the bus driver. + +Here is what the ACPI namespace for a SPI slave might look like: + + Device (EEP0) + { + Name (_ADR, 1) + Name (_CID, Package() { + "ATML0025", + "AT25", + }) + ... + Method (_CRS, 0, NotSerialized) + { + SPISerialBus(1, PolarityLow, FourWireMode, 8, + ControllerInitiated, 1000000, ClockPolarityLow, + ClockPhaseFirst, "\\_SB.PCI0.SPI1",) + } + ... + +The SPI device drivers only need to add ACPI IDs in a similar way than with +the platform device drivers. Below is an example where we add ACPI support +to at25 SPI eeprom driver (this is meant for the above ACPI snippet): + + #ifdef CONFIG_ACPI + static struct acpi_device_id at25_acpi_match[] = { + { "AT25", 0 }, + { }, + }; + MODULE_DEVICE_TABLE(acpi, at25_acpi_match); + #endif + + static struct spi_driver at25_driver = { + .driver = { + ... + .acpi_match_table = ACPI_PTR(at25_acpi_match), + }, + }; + +Note that this driver actually needs more information like page size of the +eeprom etc. but at the time writing this there is no standard way of +passing those. One idea is to return this in _DSM method like: + + Device (EEP0) + { + ... + Method (_DSM, 4, NotSerialized) + { + Store (Package (6) + { + "byte-len", 1024, + "addr-mode", 2, + "page-size, 32 + }, Local0) + + // Check UUIDs etc. + + Return (Local0) + } + +Then the at25 SPI driver can get this configation by calling _DSM on its +ACPI handle like: + + struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_object_list input; + acpi_status status; + + /* Fill in the input buffer */ + + status = acpi_evaluate_object(ACPI_HANDLE(&spi->dev), "_DSM", + &input, &output); + if (ACPI_FAILURE(status)) + /* Handle the error */ + + /* Extract the data here */ + + kfree(output.pointer); + +I2C serial bus support +~~~~~~~~~~~~~~~~~~~~~~ +The slaves behind I2C bus controller only need to add the ACPI IDs like +with the platform and SPI drivers. However the I2C bus controller driver +needs to call acpi_i2c_register_devices() after it has added the adapter. + +An I2C bus (controller) driver does: + + ... + ret = i2c_add_numbered_adapter(adapter); + if (ret) + /* handle error */ + + of_i2c_register_devices(adapter); + /* Enumerate the slave devices behind this bus via ACPI */ + acpi_i2c_register_devices(adapter); + +Below is an example of how to add ACPI support to the existing mpu3050 +input driver: + + #ifdef CONFIG_ACPI + static struct acpi_device_id mpu3050_acpi_match[] = { + { "MPU3050", 0 }, + { }, + }; + MODULE_DEVICE_TABLE(acpi, mpu3050_acpi_match); + #endif + + static struct i2c_driver mpu3050_i2c_driver = { + .driver = { + .name = "mpu3050", + .owner = THIS_MODULE, + .pm = &mpu3050_pm, + .of_match_table = mpu3050_of_match, + .acpi_match_table ACPI_PTR(mpu3050_acpi_match), + }, + .probe = mpu3050_probe, + .remove = __devexit_p(mpu3050_remove), + .id_table = mpu3050_ids, + }; + +GPIO support +~~~~~~~~~~~~ +ACPI 5 introduced two new resources to describe GPIO connections: GpioIo +and GpioInt. These resources are used be used to pass GPIO numbers used by +the device to the driver. For example: + + Method (_CRS, 0, NotSerialized) + { + Name (SBUF, ResourceTemplate() + { + GpioIo (Exclusive, PullDefault, 0x0000, 0x0000, + IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0", + 0x00, ResourceConsumer,,) + { + // Pin List + 0x0055 + } + ... + + Return (SBUF) + } + } + +These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0" +specifies the path to the controller. In order to use these GPIOs in Linux +we need to translate them to the Linux GPIO numbers. + +The driver can do this by including <linux/acpi_gpio.h> and then calling +acpi_get_gpio(path, gpio). This will return the Linux GPIO number or +negative errno if there was no translation found. + +Other GpioIo parameters must be converted first by the driver to be +suitable to the gpiolib before passing them. + +In case of GpioInt resource an additional call to gpio_to_irq() must be +done before calling request_irq(). diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index 4110cca96bd..d758702fc03 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -41,7 +41,7 @@ ffffffbbffff0000 ffffffbcffffffff ~2MB [guard] ffffffbffc000000 ffffffbfffffffff 64MB modules -ffffffc000000000 ffffffffffffffff 256GB memory +ffffffc000000000 ffffffffffffffff 256GB kernel logical memory map Translation table lookup with 4KB pages: diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroups/00-INDEX index 3f58fa3d6d0..f78b90a35ad 100644 --- a/Documentation/cgroups/00-INDEX +++ b/Documentation/cgroups/00-INDEX @@ -1,7 +1,11 @@ 00-INDEX - this file +blkio-controller.txt + - Description for Block IO Controller, implementation and usage details. cgroups.txt - Control Groups definition, implementation details, examples and API. +cgroup_event_listener.c + - A user program for cgroup listener. cpuacct.txt - CPU Accounting Controller; account CPU usage for groups of tasks. cpusets.txt @@ -10,9 +14,13 @@ devices.txt - Device Whitelist Controller; description, interface and security. freezer-subsystem.txt - checkpointing; rationale to not use signals, interface. +hugetlb.txt + - HugeTLB Controller implementation and usage details. memcg_test.txt - Memory Resource Controller; implementation details. memory.txt - Memory Resource Controller; design, accounting, interface, testing. +net_prio.txt + - Network priority cgroups details and usages. resource_counter.txt - Resource Counter API. diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 9e04196c4d7..bcf1a00b06a 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -299,11 +299,9 @@ a cgroup hierarchy's release_agent path is empty. 1.5 What does clone_children do ? --------------------------------- -If the clone_children flag is enabled (1) in a cgroup, then all -cgroups created beneath will call the post_clone callbacks for each -subsystem of the newly created cgroup. Usually when this callback is -implemented for a subsystem, it copies the values of the parent -subsystem, this is the case for the cpuset. +This flag only affects the cpuset controller. If the clone_children +flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its +configuration from the parent during initialization. 1.6 How do I use cgroups ? -------------------------- @@ -553,16 +551,16 @@ call to cgroup_unload_subsys(). It should also set its_subsys.module = THIS_MODULE in its .c file. Each subsystem may export the following methods. The only mandatory -methods are create/destroy. Any others that are null are presumed to +methods are css_alloc/free. Any others that are null are presumed to be successful no-ops. -struct cgroup_subsys_state *create(struct cgroup *cgrp) +struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp) (cgroup_mutex held by caller) -Called to create a subsystem state object for a cgroup. The +Called to allocate a subsystem state object for a cgroup. The subsystem should allocate its subsystem state object for the passed cgroup, returning a pointer to the new object on success or a -negative error code. On success, the subsystem pointer should point to +ERR_PTR() value. On success, the subsystem pointer should point to a structure of type cgroup_subsys_state (typically embedded in a larger subsystem-specific object), which will be initialized by the cgroup system. Note that this will be called at initialization to @@ -571,24 +569,33 @@ identified by the passed cgroup object having a NULL parent (since it's the root of the hierarchy) and may be an appropriate place for initialization code. -void destroy(struct cgroup *cgrp) +int css_online(struct cgroup *cgrp) (cgroup_mutex held by caller) -The cgroup system is about to destroy the passed cgroup; the subsystem -should do any necessary cleanup and free its subsystem state -object. By the time this method is called, the cgroup has already been -unlinked from the file system and from the child list of its parent; -cgroup->parent is still valid. (Note - can also be called for a -newly-created cgroup if an error occurs after this subsystem's -create() method has been called for the new cgroup). +Called after @cgrp successfully completed all allocations and made +visible to cgroup_for_each_child/descendant_*() iterators. The +subsystem may choose to fail creation by returning -errno. This +callback can be used to implement reliable state sharing and +propagation along the hierarchy. See the comment on +cgroup_for_each_descendant_pre() for details. -int pre_destroy(struct cgroup *cgrp); +void css_offline(struct cgroup *cgrp); -Called before checking the reference count on each subsystem. This may -be useful for subsystems which have some extra references even if -there are not tasks in the cgroup. If pre_destroy() returns error code, -rmdir() will fail with it. From this behavior, pre_destroy() can be -called multiple times against a cgroup. +This is the counterpart of css_online() and called iff css_online() +has succeeded on @cgrp. This signifies the beginning of the end of +@cgrp. @cgrp is being removed and the subsystem should start dropping +all references it's holding on @cgrp. When all references are dropped, +cgroup removal will proceed to the next step - css_free(). After this +callback, @cgrp should be considered dead to the subsystem. + +void css_free(struct cgroup *cgrp) +(cgroup_mutex held by caller) + +The cgroup system is about to free @cgrp; the subsystem should free +its subsystem state object. By the time this method is called, @cgrp +is completely unused; @cgrp->parent is still valid. (Note - can also +be called for a newly-created cgroup if an error occurs after this +subsystem's create() method has been called for the new cgroup). int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) (cgroup_mutex held by caller) @@ -635,14 +642,6 @@ void exit(struct task_struct *task) Called during task exit. -void post_clone(struct cgroup *cgrp) -(cgroup_mutex held by caller) - -Called during cgroup_create() to do any parameter -initialization which might be required before a task could attach. For -example, in cpusets, no task may attach before 'cpus' and 'mems' are set -up. - void bind(struct cgroup *root) (cgroup_mutex held by caller) diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt index 7e62de1e59f..c96a72cbb30 100644 --- a/Documentation/cgroups/freezer-subsystem.txt +++ b/Documentation/cgroups/freezer-subsystem.txt @@ -49,13 +49,49 @@ prevent the freeze/unfreeze cycle from becoming visible to the tasks being frozen. This allows the bash example above and gdb to run as expected. -The freezer subsystem in the container filesystem defines a file named -freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the -cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup. -Reading will return the current state. +The cgroup freezer is hierarchical. Freezing a cgroup freezes all +tasks beloning to the cgroup and all its descendant cgroups. Each +cgroup has its own state (self-state) and the state inherited from the +parent (parent-state). Iff both states are THAWED, the cgroup is +THAWED. -Note freezer.state doesn't exist in root cgroup, which means root cgroup -is non-freezable. +The following cgroupfs files are created by cgroup freezer. + +* freezer.state: Read-write. + + When read, returns the effective state of the cgroup - "THAWED", + "FREEZING" or "FROZEN". This is the combined self and parent-states. + If any is freezing, the cgroup is freezing (FREEZING or FROZEN). + + FREEZING cgroup transitions into FROZEN state when all tasks + belonging to the cgroup and its descendants become frozen. Note that + a cgroup reverts to FREEZING from FROZEN after a new task is added + to the cgroup or one of its descendant cgroups until the new task is + frozen. + + When written, sets the self-state of the cgroup. Two values are + allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup, + if not already freezing, enters FREEZING state along with all its + descendant cgroups. + + If THAWED is written, the self-state of the cgroup is changed to + THAWED. Note that the effective state may not change to THAWED if + the parent-state is still freezing. If a cgroup's effective state + becomes THAWED, all its descendants which are freezing because of + the cgroup also leave the freezing state. + +* freezer.self_freezing: Read only. + + Shows the self-state. 0 if the self-state is THAWED; otherwise, 1. + This value is 1 iff the last write to freezer.state was "FROZEN". + +* freezer.parent_freezing: Read only. + + Shows the parent-state. 0 if none of the cgroup's ancestors is + frozen; otherwise, 1. + +The root cgroup is non-freezable and the above interface files don't +exist. * Examples of usage : @@ -85,18 +121,3 @@ to unfreeze all tasks in the container : This is the basic mechanism which should do the right thing for user space task in a simple scenario. - -It's important to note that freezing can be incomplete. In that case we return -EBUSY. This means that some tasks in the cgroup are busy doing something that -prevents us from completely freezing the cgroup at this time. After EBUSY, -the cgroup will remain partially frozen -- reflected by freezer.state reporting -"FREEZING" when read. The state will remain "FREEZING" until one of these -things happens: - - 1) Userspace cancels the freezing operation by writing "THAWED" to - the freezer.state file - 2) Userspace retries the freezing operation by writing "FROZEN" to - the freezer.state file (writing "FREEZING" is not legal - and returns EINVAL) - 3) The tasks that blocked the cgroup from entering the "FROZEN" - state disappear from the cgroup's set of tasks. diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 71c4da41344..a25cb3fafeb 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -144,9 +144,9 @@ Figure 1 shows the important aspects of the controller 3. Each page has a pointer to the page_cgroup, which in turn knows the cgroup it belongs to -The accounting is done as follows: mem_cgroup_charge() is invoked to set up -the necessary data structures and check if the cgroup that is being charged -is over its limit. If it is, then reclaim is invoked on the cgroup. +The accounting is done as follows: mem_cgroup_charge_common() is invoked to +set up the necessary data structures and check if the cgroup that is being +charged is over its limit. If it is, then reclaim is invoked on the cgroup. More details can be found in the reclaim section of this document. If everything goes well, a page meta-data-structure called page_cgroup is updated. page_cgroup has its own LRU on cgroup. diff --git a/Documentation/cgroups/net_prio.txt b/Documentation/cgroups/net_prio.txt index 01b32263559..a82cbd28ea8 100644 --- a/Documentation/cgroups/net_prio.txt +++ b/Documentation/cgroups/net_prio.txt @@ -51,3 +51,5 @@ One usage for the net_prio cgroup is with mqprio qdisc allowing application traffic to be steered to hardware/driver based traffic classes. These mappings can then be managed by administrators or other networking protocols such as DCBX. + +A new net_prio cgroup inherits the parent's configuration. diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 66ef8f35613..9f401350f50 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -207,6 +207,30 @@ by making it not-removable. In such cases you will also notice that the online file is missing under cpu0. +Q: Is CPU0 removable on X86? +A: Yes. If kernel is compiled with CONFIG_BOOTPARAM_HOTPLUG_CPU0=y, CPU0 is +removable by default. Otherwise, CPU0 is also removable by kernel option +cpu0_hotplug. + +But some features depend on CPU0. Two known dependencies are: + +1. Resume from hibernate/suspend depends on CPU0. Hibernate/suspend will fail if +CPU0 is offline and you need to online CPU0 before hibernate/suspend can +continue. +2. PIC interrupts also depend on CPU0. CPU0 can't be removed if a PIC interrupt +is detected. + +It's said poweroff/reboot may depend on CPU0 on some machines although I haven't +seen any poweroff/reboot failure so far after CPU0 is offline on a few tested +machines. + +Please let me know if you know or see any other dependencies of CPU0. + +If the dependencies are under your control, you can turn on CPU0 hotplug feature +either by CONFIG_BOOTPARAM_HOTPLUG_CPU0 or by kernel parameter cpu0_hotplug. + +--Fenghua Yu <fenghua.yu@intel.com> + Q: How do i find out if a particular CPU is not removable? A: Depending on the implementation, some architectures may show this by the absence of the "online" file. This is done if it can be determined ahead of diff --git a/Documentation/devices.txt b/Documentation/devices.txt index b6251cca926..08f01e79c41 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -2561,9 +2561,6 @@ Your cooperation is appreciated. 192 = /dev/usb/yurex1 First USB Yurex device ... 209 = /dev/usb/yurex16 16th USB Yurex device - 240 = /dev/usb/dabusb0 First daubusb device - ... - 243 = /dev/usb/dabusb3 Fourth dabusb device 180 block USB block devices 0 = /dev/uba First USB block device diff --git a/Documentation/devicetree/bindings/arm/arm-boards b/Documentation/devicetree/bindings/arm/arm-boards index fc81a7d6b0f..db5858e32d3 100644 --- a/Documentation/devicetree/bindings/arm/arm-boards +++ b/Documentation/devicetree/bindings/arm/arm-boards @@ -9,6 +9,10 @@ Required properties (in root node): FPGA type interrupt controllers, see the versatile-fpga-irq binding doc. +In the root node the Integrator/CP must have a /cpcon node pointing +to the CP control registers, and the Integrator/AP must have a +/syscon node pointing to the Integrator/AP system controller. + ARM Versatile Application and Platform Baseboards ------------------------------------------------- diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt index d187e9f7cf1..1196290082d 100644 --- a/Documentation/devicetree/bindings/arm/atmel-at91.txt +++ b/Documentation/devicetree/bindings/arm/atmel-at91.txt @@ -7,6 +7,12 @@ PIT Timer required properties: - interrupts: Should contain interrupt for the PIT which is the IRQ line shared across all System Controller members. +System Timer (ST) required properties: +- compatible: Should be "atmel,at91rm9200-st" +- reg: Should contain registers location and length +- interrupts: Should contain interrupt for the ST which is the IRQ line + shared across all System Controller members. + TC/TCLIB Timer required properties: - compatible: Should be "atmel,<chip>-tcb". <chip> can be "at91rm9200" or "at91sam9x5" diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt new file mode 100644 index 00000000000..f32494dbfe1 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/cpus.txt @@ -0,0 +1,77 @@ +* ARM CPUs binding description + +The device tree allows to describe the layout of CPUs in a system through +the "cpus" node, which in turn contains a number of subnodes (ie "cpu") +defining properties for every cpu. + +Bindings for CPU nodes follow the ePAPR standard, available from: + +http://devicetree.org + +For the ARM architecture every CPU node must contain the following properties: + +- device_type: must be "cpu" +- reg: property matching the CPU MPIDR[23:0] register bits + reg[31:24] bits must be set to 0 +- compatible: should be one of: + "arm,arm1020" + "arm,arm1020e" + "arm,arm1022" + "arm,arm1026" + "arm,arm720" + "arm,arm740" + "arm,arm7tdmi" + "arm,arm920" + "arm,arm922" + "arm,arm925" + "arm,arm926" + "arm,arm940" + "arm,arm946" + "arm,arm9tdmi" + "arm,cortex-a5" + "arm,cortex-a7" + "arm,cortex-a8" + "arm,cortex-a9" + "arm,cortex-a15" + "arm,arm1136" + "arm,arm1156" + "arm,arm1176" + "arm,arm11mpcore" + "faraday,fa526" + "intel,sa110" + "intel,sa1100" + "marvell,feroceon" + "marvell,mohawk" + "marvell,xsc3" + "marvell,xscale" + +Example: + + cpus { + #size-cells = <0>; + #address-cells = <1>; + + CPU0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x0>; + }; + + CPU1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x1>; + }; + + CPU2: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + }; + + CPU3: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x101>; + }; + }; diff --git a/Documentation/devicetree/bindings/arm/davinci/nand.txt b/Documentation/devicetree/bindings/arm/davinci/nand.txt index e37241f1fdd..49fc7ada929 100644 --- a/Documentation/devicetree/bindings/arm/davinci/nand.txt +++ b/Documentation/devicetree/bindings/arm/davinci/nand.txt @@ -23,29 +23,16 @@ Recommended properties : - ti,davinci-nand-buswidth: buswidth 8 or 16 - ti,davinci-nand-use-bbt: use flash based bad block table support. -Example (enbw_cmc board): -aemif@60000000 { - compatible = "ti,davinci-aemif"; - #address-cells = <2>; - #size-cells = <1>; - reg = <0x68000000 0x80000>; - ranges = <2 0 0x60000000 0x02000000 - 3 0 0x62000000 0x02000000 - 4 0 0x64000000 0x02000000 - 5 0 0x66000000 0x02000000 - 6 0 0x68000000 0x02000000>; - nand@3,0 { - compatible = "ti,davinci-nand"; - reg = <3 0x0 0x807ff - 6 0x0 0x8000>; - #address-cells = <1>; - #size-cells = <1>; - ti,davinci-chipselect = <1>; - ti,davinci-mask-ale = <0>; - ti,davinci-mask-cle = <0>; - ti,davinci-mask-chipsel = <0>; - ti,davinci-ecc-mode = "hw"; - ti,davinci-ecc-bits = <4>; - ti,davinci-nand-use-bbt; - }; +Example(da850 EVM ): +nand_cs3@62000000 { + compatible = "ti,davinci-nand"; + reg = <0x62000000 0x807ff + 0x68000000 0x8000>; + ti,davinci-chipselect = <1>; + ti,davinci-mask-ale = <0>; + ti,davinci-mask-cle = <0>; + ti,davinci-mask-chipsel = <0>; + ti,davinci-ecc-mode = "hw"; + ti,davinci-ecc-bits = <4>; + ti,davinci-nand-use-bbt; }; diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt index 7ca52161e7a..7c3ee3aeb7b 100644 --- a/Documentation/devicetree/bindings/arm/l2cc.txt +++ b/Documentation/devicetree/bindings/arm/l2cc.txt @@ -37,7 +37,7 @@ L2: cache-controller { reg = <0xfff12000 0x1000>; arm,data-latency = <1 1 1>; arm,tag-latency = <2 2 2>; - arm,filter-latency = <0x80000000 0x8000000>; + arm,filter-ranges = <0x80000000 0x8000000>; cache-unified; cache-level = <2>; interrupts = <45>; diff --git a/Documentation/devicetree/bindings/arm/omap/counter.txt b/Documentation/devicetree/bindings/arm/omap/counter.txt new file mode 100644 index 00000000000..5bd8aa09131 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/omap/counter.txt @@ -0,0 +1,15 @@ +OMAP Counter-32K bindings + +Required properties: +- compatible: Must be "ti,omap-counter32k" for OMAP controllers +- reg: Contains timer register address range (base address and length) +- ti,hwmods: Name of the hwmod associated to the counter, which is typically + "counter_32k" + +Example: + +counter32k: counter@4a304000 { + compatible = "ti,omap-counter32k"; + reg = <0x4a304000 0x20>; + ti,hwmods = "counter_32k"; +}; diff --git a/Documentation/devicetree/bindings/arm/omap/timer.txt b/Documentation/devicetree/bindings/arm/omap/timer.txt new file mode 100644 index 00000000000..8732d4d41f8 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/omap/timer.txt @@ -0,0 +1,31 @@ +OMAP Timer bindings + +Required properties: +- compatible: Must be "ti,omap2-timer" for OMAP2+ controllers. +- reg: Contains timer register address range (base address and + length). +- interrupts: Contains the interrupt information for the timer. The + format is being dependent on which interrupt controller + the OMAP device uses. +- ti,hwmods: Name of the hwmod associated to the timer, "timer<X>", + where <X> is the instance number of the timer from the + HW spec. + +Optional properties: +- ti,timer-alwon: Indicates the timer is in an alway-on power domain. +- ti,timer-dsp: Indicates the timer can interrupt the on-chip DSP in + addition to the ARM CPU. +- ti,timer-pwm: Indicates the timer can generate a PWM output. +- ti,timer-secure: Indicates the timer is reserved on a secure OMAP device + and therefore cannot be used by the kernel. + +Example: + +timer12: timer@48304000 { + compatible = "ti,omap2-timer"; + reg = <0x48304000 0x400>; + interrupts = <95>; + ti,hwmods = "timer12" + ti,timer-alwon; + ti,timer-secure; +}; diff --git a/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt b/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt index d2fe064a828..63dd8051521 100644 --- a/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt +++ b/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt @@ -2,9 +2,27 @@ properties: - compatible : Should be "ti,omap-ocp2scp" +- reg : Address and length of the register set for the device - #address-cells, #size-cells : Must be present if the device has sub-nodes - ranges : the child address space are mapped 1:1 onto the parent address space - ti,hwmods : must be "ocp2scp_usb_phy" Sub-nodes: All the devices connected to ocp2scp are described using sub-node to ocp2scp + +ocp2scp@4a0ad000 { + compatible = "ti,omap-ocp2scp"; + reg = <0x4a0ad000 0x1f>; + #address-cells = <1>; + #size-cells = <1>; + ranges; + ti,hwmods = "ocp2scp_usb_phy"; + + subnode1 { + ... + }; + + subnode2 { + ... + }; +}; diff --git a/Documentation/devicetree/bindings/clock/imx23-clock.txt b/Documentation/devicetree/bindings/clock/imx23-clock.txt index a0b867ef8d9..baadbb11fe9 100644 --- a/Documentation/devicetree/bindings/clock/imx23-clock.txt +++ b/Documentation/devicetree/bindings/clock/imx23-clock.txt @@ -52,7 +52,7 @@ clocks and IDs. lcdif 38 etm 39 usb 40 - usb_pwr 41 + usb_phy 41 Examples: diff --git a/Documentation/devicetree/bindings/clock/imx28-clock.txt b/Documentation/devicetree/bindings/clock/imx28-clock.txt index aa2af2866fe..52a49a4a50b 100644 --- a/Documentation/devicetree/bindings/clock/imx28-clock.txt +++ b/Documentation/devicetree/bindings/clock/imx28-clock.txt @@ -73,8 +73,8 @@ clocks and IDs. can1 59 usb0 60 usb1 61 - usb0_pwr 62 - usb1_pwr 63 + usb0_phy 62 + usb1_phy 63 enet_out 64 Examples: diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-spear.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-spear.txt new file mode 100644 index 00000000000..f3d44984d91 --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-spear.txt @@ -0,0 +1,42 @@ +SPEAr cpufreq driver +------------------- + +SPEAr SoC cpufreq driver for CPU frequency scaling. +It supports both uniprocessor (UP) and symmetric multiprocessor (SMP) systems +which share clock across all CPUs. + +Required properties: +- cpufreq_tbl: Table of frequencies CPU could be transitioned into, in the + increasing order. + +Optional properties: +- clock-latency: Specify the possible maximum transition latency for clock, in + unit of nanoseconds. + +Both required and optional properties listed above must be defined under node +/cpus/cpu@0. + +Examples: +-------- +cpus { + + <...> + + cpu@0 { + compatible = "arm,cortex-a9"; + reg = <0>; + + <...> + + cpufreq_tbl = < 166000 + 200000 + 250000 + 300000 + 400000 + 500000 + 600000 >; + }; + + <...> + +}; diff --git a/Documentation/devicetree/bindings/gpio/gpio-stmpe.txt b/Documentation/devicetree/bindings/gpio/gpio-stmpe.txt new file mode 100644 index 00000000000..a0e4cf88521 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-stmpe.txt @@ -0,0 +1,18 @@ +STMPE gpio +---------- + +Required properties: + - compatible: "st,stmpe-gpio" + +Optional properties: + - st,norequest-mask: bitmask specifying which GPIOs should _not_ be requestable + due to different usage (e.g. touch, keypad) + +Node name must be stmpe_gpio and should be child node of stmpe node to which it +belongs. + +Example: + stmpe_gpio { + compatible = "st,stmpe-gpio"; + st,norequest-mask = <0x20>; //gpio 5 can't be used + }; diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt index 4e16ba4feab..a33628759d3 100644 --- a/Documentation/devicetree/bindings/gpio/gpio.txt +++ b/Documentation/devicetree/bindings/gpio/gpio.txt @@ -75,4 +75,40 @@ Example of two SOC GPIO banks defined as gpio-controller nodes: gpio-controller; }; +2.1) gpio-controller and pinctrl subsystem +------------------------------------------ +gpio-controller on a SOC might be tightly coupled with the pinctrl +subsystem, in the sense that the pins can be used by other functions +together with optional gpio feature. + +While the pin allocation is totally managed by the pin ctrl subsystem, +gpio (under gpiolib) is still maintained by gpio drivers. It may happen +that different pin ranges in a SoC is managed by different gpio drivers. + +This makes it logical to let gpio drivers announce their pin ranges to +the pin ctrl subsystem and call 'pinctrl_request_gpio' in order to +request the corresponding pin before any gpio usage. + +For this, the gpio controller can use a pinctrl phandle and pins to +announce the pinrange to the pin ctrl subsystem. For example, + + qe_pio_e: gpio-controller@1460 { + #gpio-cells = <2>; + compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank"; + reg = <0x1460 0x18>; + gpio-controller; + gpio-ranges = <&pinctrl1 20 10>, <&pinctrl2 50 20>; + + } + +where, + &pinctrl1 and &pinctrl2 is the phandle to the pinctrl DT node. + + Next values specify the base pin and number of pins for the range + handled by 'qe_pio_e' gpio. In the given example from base pin 20 to + pin 29 under pinctrl1 and pin 50 to pin 69 under pinctrl2 is handled + by this gpio controller. + +The pinctrl node must have "#gpio-range-cells" property to show number of +arguments to pass with phandle from gpio controllers node. diff --git a/Documentation/devicetree/bindings/gpio/gpio_atmel.txt b/Documentation/devicetree/bindings/gpio/gpio_atmel.txt index 66efc804806..85f8c0d084f 100644 --- a/Documentation/devicetree/bindings/gpio/gpio_atmel.txt +++ b/Documentation/devicetree/bindings/gpio/gpio_atmel.txt @@ -9,6 +9,10 @@ Required properties: unused). - gpio-controller: Marks the device node as a GPIO controller. +optional properties: +- #gpio-lines: Number of gpio if absent 32. + + Example: pioA: gpio@fffff200 { compatible = "atmel,at91rm9200-gpio"; @@ -16,5 +20,6 @@ Example: interrupts = <2 4>; #gpio-cells = <2>; gpio-controller; + #gpio-lines = <19>; }; diff --git a/Documentation/devicetree/bindings/i2c/atmel-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-at91.txt index b689a0d9441..b689a0d9441 100644 --- a/Documentation/devicetree/bindings/i2c/atmel-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-at91.txt diff --git a/Documentation/devicetree/bindings/i2c/davinci.txt b/Documentation/devicetree/bindings/i2c/i2c-davinci.txt index 2dc935b4113..2dc935b4113 100644 --- a/Documentation/devicetree/bindings/i2c/davinci.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-davinci.txt diff --git a/Documentation/devicetree/bindings/i2c/gpio-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-gpio.txt index 4f8ec947c6b..4f8ec947c6b 100644 --- a/Documentation/devicetree/bindings/i2c/gpio-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-gpio.txt diff --git a/Documentation/devicetree/bindings/i2c/fsl-imx-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-imx.txt index 3614242e773..3614242e773 100644 --- a/Documentation/devicetree/bindings/i2c/fsl-imx-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-imx.txt diff --git a/Documentation/devicetree/bindings/i2c/fsl-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-mpc.txt index 1eacd6b20ed..1eacd6b20ed 100644 --- a/Documentation/devicetree/bindings/i2c/fsl-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mpc.txt diff --git a/Documentation/devicetree/bindings/i2c/mux.txt b/Documentation/devicetree/bindings/i2c/i2c-mux.txt index af84cce5cd7..af84cce5cd7 100644 --- a/Documentation/devicetree/bindings/i2c/mux.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mux.txt diff --git a/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt b/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt new file mode 100644 index 00000000000..f46d928aa73 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt @@ -0,0 +1,18 @@ + +* Marvell MV64XXX I2C controller + +Required properties : + + - reg : Offset and length of the register set for the device + - compatible : Should be "marvell,mv64xxx-i2c" + - interrupts : The interrupt number + - clock-frequency : Desired I2C bus clock frequency in Hz. + +Examples: + + i2c@11000 { + compatible = "marvell,mv64xxx-i2c"; + reg = <0x11000 0x20>; + interrupts = <29>; + clock-frequency = <100000>; + }; diff --git a/Documentation/devicetree/bindings/i2c/nomadik.txt b/Documentation/devicetree/bindings/i2c/i2c-nomadik.txt index 72065b0ff68..72065b0ff68 100644 --- a/Documentation/devicetree/bindings/i2c/nomadik.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-nomadik.txt diff --git a/Documentation/devicetree/bindings/i2c/cavium-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-octeon.txt index dced82ebe31..dced82ebe31 100644 --- a/Documentation/devicetree/bindings/i2c/cavium-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-octeon.txt diff --git a/Documentation/devicetree/bindings/i2c/omap-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-omap.txt index 56564aa4b44..56564aa4b44 100644 --- a/Documentation/devicetree/bindings/i2c/omap-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-omap.txt diff --git a/Documentation/devicetree/bindings/i2c/pnx.txt b/Documentation/devicetree/bindings/i2c/i2c-pnx.txt index fe98ada33ee..fe98ada33ee 100644 --- a/Documentation/devicetree/bindings/i2c/pnx.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-pnx.txt diff --git a/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-pxa-pci-ce4100.txt index 569b1624851..569b1624851 100644 --- a/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-pxa-pci-ce4100.txt diff --git a/Documentation/devicetree/bindings/i2c/mrvl-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-pxa.txt index 0f7945019f6..12b78ac507e 100644 --- a/Documentation/devicetree/bindings/i2c/mrvl-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-pxa.txt @@ -31,21 +31,3 @@ Examples: reg = <0xd4025000 0x1000>; interrupts = <58>; }; - -* Marvell MV64XXX I2C controller - -Required properties : - - - reg : Offset and length of the register set for the device - - compatible : Should be "marvell,mv64xxx-i2c" - - interrupts : The interrupt number - - clock-frequency : Desired I2C bus clock frequency in Hz. - -Examples: - - i2c@11000 { - compatible = "marvell,mv64xxx-i2c"; - reg = <0x11000 0x20>; - interrupts = <29>; - clock-frequency = <100000>; - }; diff --git a/Documentation/devicetree/bindings/i2c/samsung-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt index b6cb5a12c67..b6cb5a12c67 100644 --- a/Documentation/devicetree/bindings/i2c/samsung-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt diff --git a/Documentation/devicetree/bindings/i2c/sirf-i2c.txt b/Documentation/devicetree/bindings/i2c/i2c-sirf.txt index 7baf9e133fa..7baf9e133fa 100644 --- a/Documentation/devicetree/bindings/i2c/sirf-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-sirf.txt diff --git a/Documentation/devicetree/bindings/i2c/arm-versatile.txt b/Documentation/devicetree/bindings/i2c/i2c-versatile.txt index 361d31c51b6..361d31c51b6 100644 --- a/Documentation/devicetree/bindings/i2c/arm-versatile.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-versatile.txt diff --git a/Documentation/devicetree/bindings/i2c/xiic.txt b/Documentation/devicetree/bindings/i2c/i2c-xiic.txt index ceabbe91ae4..ceabbe91ae4 100644 --- a/Documentation/devicetree/bindings/i2c/xiic.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-xiic.txt diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt index 2f5322b119e..446859fcdca 100644 --- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt +++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt @@ -55,5 +55,7 @@ st-micro,24c256 i2c serial eeprom (24cxx) stm,m41t00 Serial Access TIMEKEEPER stm,m41t62 Serial real-time clock (RTC) with alarm stm,m41t80 M41T80 - SERIAL ACCESS RTC WITH ALARMS +taos,tsl2550 Ambient Light Sensor with SMBUS/Two Wire Serial Interface ti,tsc2003 I2C Touch-Screen Controller ti,tmp102 Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface +ti,tmp275 Digital Temperature Sensor diff --git a/Documentation/devicetree/bindings/leds/common.txt b/Documentation/devicetree/bindings/leds/common.txt new file mode 100644 index 00000000000..2d88816dd55 --- /dev/null +++ b/Documentation/devicetree/bindings/leds/common.txt @@ -0,0 +1,23 @@ +Common leds properties. + +Optional properties for child nodes: +- label : The label for this LED. If omitted, the label is + taken from the node name (excluding the unit address). + +- linux,default-trigger : This parameter, if present, is a + string defining the trigger assigned to the LED. Current triggers are: + "backlight" - LED will act as a back-light, controlled by the framebuffer + system + "default-on" - LED will turn on (but for leds-gpio see "default-state" + property in Documentation/devicetree/bindings/gpio/led.txt) + "heartbeat" - LED "double" flashes at a load average based rate + "ide-disk" - LED indicates disk activity + "timer" - LED flashes at a fixed, configurable rate + +Examples: + +system-status { + label = "Status"; + linux,default-trigger = "heartbeat"; + ... +}; diff --git a/Documentation/devicetree/bindings/gpio/led.txt b/Documentation/devicetree/bindings/leds/leds-gpio.txt index edc83c1c0d5..df1b3080f6b 100644 --- a/Documentation/devicetree/bindings/gpio/led.txt +++ b/Documentation/devicetree/bindings/leds/leds-gpio.txt @@ -10,16 +10,10 @@ LED sub-node properties: - gpios : Should specify the LED's GPIO, see "gpios property" in Documentation/devicetree/bindings/gpio/gpio.txt. Active low LEDs should be indicated using flags in the GPIO specifier. -- label : (optional) The label for this LED. If omitted, the label is - taken from the node name (excluding the unit address). -- linux,default-trigger : (optional) This parameter, if present, is a - string defining the trigger assigned to the LED. Current triggers are: - "backlight" - LED will act as a back-light, controlled by the framebuffer - system - "default-on" - LED will turn on, but see "default-state" below - "heartbeat" - LED "double" flashes at a load average based rate - "ide-disk" - LED indicates disk activity - "timer" - LED flashes at a fixed, configurable rate +- label : (optional) + see Documentation/devicetree/bindings/leds/common.txt +- linux,default-trigger : (optional) + see Documentation/devicetree/bindings/leds/common.txt - default-state: (optional) The initial state of the LED. Valid values are "on", "off", and "keep". If the LED is already on or off and the default-state property is set the to same value, then no diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt index 8e2e0ba2f48..a591c6741d7 100644 --- a/Documentation/devicetree/bindings/mmc/mmc.txt +++ b/Documentation/devicetree/bindings/mmc/mmc.txt @@ -21,6 +21,12 @@ Optional properties: - cd-inverted: when present, polarity on the cd gpio line is inverted - wp-inverted: when present, polarity on the wp gpio line is inverted - max-frequency: maximum operating clock frequency +- no-1-8-v: when present, denotes that 1.8v card voltage is not supported on + this system, even if the controller claims it is. + +Optional SDIO properties: +- keep-power-in-suspend: Preserves card power during a suspend/resume cycle +- enable-sdio-wakeup: Enables wake up of host system on SDIO IRQ assertion Example: @@ -33,4 +39,6 @@ sdhci@ab000000 { cd-inverted; wp-gpios = <&gpio 70 0>; max-frequency = <50000000>; + keep-power-in-suspend; + enable-sdio-wakeup; } diff --git a/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt b/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt index 630a7d7f471..97e9e315400 100644 --- a/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt +++ b/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt @@ -12,10 +12,6 @@ is used. The Samsung's SDHCI controller bindings extends this as listed below. [A] The property "samsung,cd-pinmux-gpio" can be used as stated in the "Optional Board Specific Properties" section below. -[B] If core card-detect bindings and "samsung,cd-pinmux-gpio" property - is not specified, it is assumed that there is no card detection - mechanism used. - Required SoC Specific Properties: - compatible: should be one of the following - "samsung,s3c6410-sdhci": For controllers compatible with s3c6410 sdhci @@ -24,14 +20,18 @@ Required SoC Specific Properties: controller. Required Board Specific Properties: -- gpios: Should specify the gpios used for clock, command and data lines. The - gpio specifier format depends on the gpio controller. +- Samsung GPIO variant (will be completely replaced by pinctrl): + - gpios: Should specify the gpios used for clock, command and data lines. The + gpio specifier format depends on the gpio controller. +- Pinctrl variant (preferred if available): + - pinctrl-0: Should specify pin control groups used for this controller. + - pinctrl-names: Should contain only one value - "default". Optional Board Specific Properties: - samsung,cd-pinmux-gpio: Specifies the card detect line that is routed through a pinmux to the card-detect pin of the card slot. This property should be used only if none of the mmc core card-detect properties are - used. + used. Only for Samsung GPIO variant. Example: sdhci@12530000 { @@ -40,12 +40,18 @@ Example: interrupts = <0 75 0>; bus-width = <4>; cd-gpios = <&gpk2 2 2 3 3>; + + /* Samsung GPIO variant */ gpios = <&gpk2 0 2 0 3>, /* clock line */ <&gpk2 1 2 0 3>, /* command line */ <&gpk2 3 2 3 3>, /* data line 0 */ <&gpk2 4 2 3 3>, /* data line 1 */ <&gpk2 5 2 3 3>, /* data line 2 */ <&gpk2 6 2 3 3>; /* data line 3 */ + + /* Pinctrl variant */ + pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_bus4>; + pinctrl-names = "default"; }; Note: This example shows both SoC specific and board specific properties diff --git a/Documentation/devicetree/bindings/mmc/synposis-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt index 06cd32d0805..06cd32d0805 100644 --- a/Documentation/devicetree/bindings/mmc/synposis-dw-mshc.txt +++ b/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt index be76a23b34c..ed271fc255b 100644 --- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt +++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt @@ -19,6 +19,7 @@ ti,dual-volt: boolean, supports dual voltage cards "supply-name" examples are "vmmc", "vmmc_aux" etc ti,non-removable: non-removable slot (like eMMC) ti,needs-special-reset: Requires a special softreset sequence +ti,needs-special-hs-handling: HSMMC IP needs special setting for handling High Speed Example: mmc1: mmc@0x4809c000 { diff --git a/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt b/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt new file mode 100644 index 00000000000..d7fb6abb3eb --- /dev/null +++ b/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt @@ -0,0 +1,23 @@ +* Wondermedia WM8505/WM8650 SD/MMC Host Controller + +This file documents differences between the core properties described +by mmc.txt and the properties used by the wmt-sdmmc driver. + +Required properties: +- compatible: Should be "wm,wm8505-sdhc". +- interrupts: Two interrupts are required - regular irq and dma irq. + +Optional properties: +- sdon-inverted: SD_ON bit is inverted on the controller + +Examples: + +sdhc@d800a000 { + compatible = "wm,wm8505-sdhc"; + reg = <0xd800a000 0x1000>; + interrupts = <20 21>; + clocks = <&sdhc>; + bus-width = <4>; + sdon-inverted; +}; + diff --git a/Documentation/devicetree/bindings/net/mdio-gpio.txt b/Documentation/devicetree/bindings/net/mdio-gpio.txt index bc954952901..c79bab02536 100644 --- a/Documentation/devicetree/bindings/net/mdio-gpio.txt +++ b/Documentation/devicetree/bindings/net/mdio-gpio.txt @@ -8,9 +8,16 @@ gpios property as described in section VIII.1 in the following order: MDC, MDIO. +Note: Each gpio-mdio bus should have an alias correctly numbered in "aliases" +node. + Example: -mdio { +aliases { + mdio-gpio0 = <&mdio0>; +}; + +mdio0: mdio { compatible = "virtual,mdio-gpio"; #address-cells = <1>; #size-cells = <0>; diff --git a/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt new file mode 100644 index 00000000000..3a268127b05 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt @@ -0,0 +1,141 @@ +* Atmel AT91 Pinmux Controller + +The AT91 Pinmux Controler, enables the IC +to share one PAD to several functional blocks. The sharing is done by +multiplexing the PAD input/output signals. For each PAD there are up to +8 muxing options (called periph modes). Since different modules require +different PAD settings (like pull up, keeper, etc) the contoller controls +also the PAD settings parameters. + +Please refer to pinctrl-bindings.txt in this directory for details of the +common pinctrl bindings used by client devices, including the meaning of the +phrase "pin configuration node". + +Atmel AT91 pin configuration node is a node of a group of pins which can be +used for a specific device or function. This node represents both mux and config +of the pins in that group. The 'pins' selects the function mode(also named pin +mode) this pin can work on and the 'config' configures various pad settings +such as pull-up, multi drive, etc. + +Required properties for iomux controller: +- compatible: "atmel,at91rm9200-pinctrl" +- atmel,mux-mask: array of mask (periph per bank) to describe if a pin can be + configured in this periph mode. All the periph and bank need to be describe. + +How to create such array: + +Each column will represent the possible peripheral of the pinctrl +Each line will represent a pio bank + +Take an example on the 9260 +Peripheral: 2 ( A and B) +Bank: 3 (A, B and C) +=> + + /* A B */ + 0xffffffff 0xffc00c3b /* pioA */ + 0xffffffff 0x7fff3ccf /* pioB */ + 0xffffffff 0x007fffff /* pioC */ + +For each peripheral/bank we will descibe in a u32 if a pin can can be +configured in it by putting 1 to the pin bit (1 << pin) + +Let's take the pioA on peripheral B +From the datasheet Table 10-2. +Peripheral B +PA0 MCDB0 +PA1 MCCDB +PA2 +PA3 MCDB3 +PA4 MCDB2 +PA5 MCDB1 +PA6 +PA7 +PA8 +PA9 +PA10 ETX2 +PA11 ETX3 +PA12 +PA13 +PA14 +PA15 +PA16 +PA17 +PA18 +PA19 +PA20 +PA21 +PA22 ETXER +PA23 ETX2 +PA24 ETX3 +PA25 ERX2 +PA26 ERX3 +PA27 ERXCK +PA28 ECRS +PA29 ECOL +PA30 RXD4 +PA31 TXD4 + +=> 0xffc00c3b + +Required properties for pin configuration node: +- atmel,pins: 4 integers array, represents a group of pins mux and config + setting. The format is atmel,pins = <PIN_BANK PIN_BANK_NUM PERIPH CONFIG>. + The PERIPH 0 means gpio. + +Bits used for CONFIG: +PULL_UP (1 << 0): indicate this pin need a pull up. +MULTIDRIVE (1 << 1): indicate this pin need to be configured as multidrive. +DEGLITCH (1 << 2): indicate this pin need deglitch. +PULL_DOWN (1 << 3): indicate this pin need a pull down. +DIS_SCHMIT (1 << 4): indicate this pin need to disable schmit trigger. +DEBOUNCE (1 << 16): indicate this pin need debounce. +DEBOUNCE_VAL (0x3fff << 17): debounce val. + +NOTE: +Some requirements for using atmel,at91rm9200-pinctrl binding: +1. We have pin function node defined under at91 controller node to represent + what pinmux functions this SoC supports. +2. The driver can use the function node's name and pin configuration node's + name describe the pin function and group hierarchy. + For example, Linux at91 pinctrl driver takes the function node's name + as the function name and pin configuration node's name as group name to + create the map table. +3. Each pin configuration node should have a phandle, devices can set pins + configurations by referring to the phandle of that pin configuration node. +4. The gpio controller must be describe in the pinctrl simple-bus. + +Examples: + +pinctrl@fffff400 { + #address-cells = <1>; + #size-cells = <1>; + ranges; + compatible = "atmel,at91rm9200-pinctrl", "simple-bus"; + reg = <0xfffff400 0x600>; + + atmel,mux-mask = < + /* A B */ + 0xffffffff 0xffc00c3b /* pioA */ + 0xffffffff 0x7fff3ccf /* pioB */ + 0xffffffff 0x007fffff /* pioC */ + >; + + /* shared pinctrl settings */ + dbgu { + pinctrl_dbgu: dbgu-0 { + atmel,pins = + <1 14 0x1 0x0 /* PB14 periph A */ + 1 15 0x1 0x1>; /* PB15 periph with pullup */ + }; + }; +}; + +dbgu: serial@fffff200 { + compatible = "atmel,at91sam9260-usart"; + reg = <0xfffff200 0x200>; + interrupts = <1 4 7>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_dbgu>; + status = "disabled"; +}; diff --git a/Documentation/devicetree/bindings/rtc/orion-rtc.txt b/Documentation/devicetree/bindings/rtc/orion-rtc.txt new file mode 100644 index 00000000000..3bf63ffa516 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/orion-rtc.txt @@ -0,0 +1,18 @@ +* Mvebu Real Time Clock + +RTC controller for the Kirkwood, the Dove, the Armada 370 and the +Armada XP SoCs + +Required properties: +- compatible : Should be "marvell,orion-rtc" +- reg: physical base address of the controller and length of memory mapped + region. +- interrupts: IRQ line for the RTC. + +Example: + +rtc@10300 { + compatible = "marvell,orion-rtc"; + reg = <0xd0010300 0x20>; + interrupts = <50>; +}; diff --git a/Documentation/devicetree/bindings/thermal/db8500-thermal.txt b/Documentation/devicetree/bindings/thermal/db8500-thermal.txt new file mode 100644 index 00000000000..2e1c06fad81 --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/db8500-thermal.txt @@ -0,0 +1,44 @@ +* ST-Ericsson DB8500 Thermal + +** Thermal node properties: + +- compatible : "stericsson,db8500-thermal"; +- reg : address range of the thermal sensor registers; +- interrupts : interrupts generated from PRCMU; +- interrupt-names : "IRQ_HOTMON_LOW" and "IRQ_HOTMON_HIGH"; +- num-trips : number of total trip points, this is required, set it 0 if none, + if greater than 0, the following properties must be defined; +- tripN-temp : temperature of trip point N, should be in ascending order; +- tripN-type : type of trip point N, should be one of "active" "passive" "hot" + "critical"; +- tripN-cdev-num : number of the cooling devices which can be bound to trip + point N, this is required if trip point N is defined, set it 0 if none, + otherwise the following cooling device names must be defined; +- tripN-cdev-nameM : name of the No. M cooling device of trip point N; + +Usually the num-trips and tripN-*** are separated in board related dts files. + +Example: +thermal@801573c0 { + compatible = "stericsson,db8500-thermal"; + reg = <0x801573c0 0x40>; + interrupts = <21 0x4>, <22 0x4>; + interrupt-names = "IRQ_HOTMON_LOW", "IRQ_HOTMON_HIGH"; + + num-trips = <3>; + + trip0-temp = <75000>; + trip0-type = "active"; + trip0-cdev-num = <1>; + trip0-cdev-name0 = "thermal-cpufreq-0"; + + trip1-temp = <80000>; + trip1-type = "active"; + trip1-cdev-num = <2>; + trip1-cdev-name0 = "thermal-cpufreq-0"; + trip1-cdev-name1 = "thermal-fan"; + + trip2-temp = <85000>; + trip2-type = "critical"; + trip2-cdev-num = <0>; +} diff --git a/Documentation/devicetree/bindings/tty/serial/fsl-mxs-auart.txt b/Documentation/devicetree/bindings/tty/serial/fsl-mxs-auart.txt index 2ee903fad25..273a8d5b330 100644 --- a/Documentation/devicetree/bindings/tty/serial/fsl-mxs-auart.txt +++ b/Documentation/devicetree/bindings/tty/serial/fsl-mxs-auart.txt @@ -6,11 +6,19 @@ Required properties: - reg : Address and length of the register set for the device - interrupts : Should contain the auart interrupt numbers +Optional properties: +- fsl,auart-dma-channel : The DMA channels, the first is for RX, the other + is for TX. If you add this property, it also means that you + will enable the DMA support for the auart. + Note: due to the hardware bug in imx23(see errata : 2836), + only the imx28 can enable the DMA support for the auart. + Example: auart0: serial@8006a000 { compatible = "fsl,imx28-auart", "fsl,imx23-auart"; reg = <0x8006a000 0x2000>; interrupts = <112 70 71>; + fsl,auart-dma-channel = <8 9>; }; Note: Each auart port should have an alias correctly numbered in "aliases" diff --git a/Documentation/devicetree/bindings/tty/serial/of-serial.txt b/Documentation/devicetree/bindings/tty/serial/of-serial.txt index ba385f2e0dd..1e1145ca4f3 100644 --- a/Documentation/devicetree/bindings/tty/serial/of-serial.txt +++ b/Documentation/devicetree/bindings/tty/serial/of-serial.txt @@ -14,7 +14,10 @@ Required properties: - "serial" if the port type is unknown. - reg : offset and length of the register set for the device. - interrupts : should contain uart interrupt. -- clock-frequency : the input clock frequency for the UART. +- clock-frequency : the input clock frequency for the UART + or + clocks phandle to refer to the clk used as per Documentation/devicetree + /bindings/clock/clock-bindings.txt Optional properties: - current-speed : the current active speed of the UART. diff --git a/Documentation/devicetree/bindings/usb/am33xx-usb.txt b/Documentation/devicetree/bindings/usb/am33xx-usb.txt index ca8fa56e9f0..ea840f7f925 100644 --- a/Documentation/devicetree/bindings/usb/am33xx-usb.txt +++ b/Documentation/devicetree/bindings/usb/am33xx-usb.txt @@ -1,14 +1,35 @@ AM33XX MUSB GLUE - compatible : Should be "ti,musb-am33xx" + - reg : offset and length of register sets, first usbss, then for musb instances + - interrupts : usbss, musb instance interrupts in order - ti,hwmods : must be "usb_otg_hs" - multipoint : Should be "1" indicating the musb controller supports multipoint. This is a MUSB configuration-specific setting. - - num_eps : Specifies the number of endpoints. This is also a + - num-eps : Specifies the number of endpoints. This is also a MUSB configuration-specific setting. Should be set to "16" - - ram_bits : Specifies the ram address size. Should be set to "12" - - port0_mode : Should be "3" to represent OTG. "1" signifies HOST and "2" + - ram-bits : Specifies the ram address size. Should be set to "12" + - port0-mode : Should be "3" to represent OTG. "1" signifies HOST and "2" represents PERIPHERAL. - - port1_mode : Should be "1" to represent HOST. "3" signifies OTG and "2" + - port1-mode : Should be "1" to represent HOST. "3" signifies OTG and "2" represents PERIPHERAL. - power : Should be "250". This signifies the controller can supply upto 500mA when operating in host mode. + +Example: + +usb@47400000 { + compatible = "ti,musb-am33xx"; + reg = <0x47400000 0x1000 /* usbss */ + 0x47401000 0x800 /* musb instance 0 */ + 0x47401800 0x800>; /* musb instance 1 */ + interrupts = <17 /* usbss */ + 18 /* musb instance 0 */ + 19>; /* musb instance 1 */ + multipoint = <1>; + num-eps = <16>; + ram-bits = <12>; + port0-mode = <3>; + port1-mode = <3>; + power = <250>; + ti,hwmods = "usb_otg_hs"; +}; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 9de2b9ff9d6..770a0193ca1 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -5,6 +5,7 @@ using them to avoid name-space collisions. ad Avionic Design GmbH adi Analog Devices, Inc. +ak Asahi Kasei Corp. amcc Applied Micro Circuits Corporation (APM, formally AMCC) apm Applied Micro Circuits Corporation (APM) arm ARM Ltd. @@ -25,6 +26,7 @@ gef GE Fanuc Intelligent Platforms Embedded Systems, Inc. hp Hewlett Packard ibm International Business Machines (IBM) idt Integrated Device Technologies, Inc. +img Imagination Technologies Ltd. intercontrol Inter Control Group linux Linux-specific binding marvell Marvell Technology Group Ltd. @@ -34,8 +36,9 @@ national National Semiconductor nintendo Nintendo nvidia NVIDIA nxp NXP Semiconductors +onnn ON Semiconductor Corp. picochip Picochip Ltd -powervr Imagination Technologies +powervr PowerVR (deprecated, use img) qcom Qualcomm, Inc. ramtron Ramtron International realtek Realtek Semiconductor Corp. @@ -45,6 +48,7 @@ schindler Schindler sil Silicon Image simtek sirf SiRF Technology, Inc. +snps Synopsys, Inc. st STMicroelectronics stericsson ST-Ericsson ti Texas Instruments diff --git a/Documentation/devicetree/usage-model.txt b/Documentation/devicetree/usage-model.txt index dca90fe22a9..ef9d06c9f8f 100644 --- a/Documentation/devicetree/usage-model.txt +++ b/Documentation/devicetree/usage-model.txt @@ -347,7 +347,7 @@ later), which will happily live at the base of the Linux /sys/devices tree. Therefore, if a DT node is at the root of the tree, then it really probably is best registered as a platform_device. -Linux board support code calls of_platform_populate(NULL, NULL, NULL) +Linux board support code calls of_platform_populate(NULL, NULL, NULL, NULL) to kick off discovery of devices at the root of the tree. The parameters are all NULL because when starting from the root of the tree, there is no need to provide a starting node (the first NULL), a diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 3fc0c31a6f5..3e4b3dd1e04 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -43,7 +43,7 @@ When mounting an XFS filesystem, the following options are accepted. Issue command to let the block device reclaim space freed by the filesystem. This is useful for SSD devices, thinly provisioned LUNs and virtual machine images, but may have a performance - impact. This option is incompatible with the nodelaylog option. + impact. dmapi Enable the DMAPI (Data Management API) event callouts. @@ -72,8 +72,15 @@ When mounting an XFS filesystem, the following options are accepted. Indicates that XFS is allowed to create inodes at any location in the filesystem, including those which will result in inode numbers occupying more than 32 bits of significance. This is - provided for backwards compatibility, but causes problems for - backup applications that cannot handle large inode numbers. + the default allocation option. Applications which do not handle + inode numbers bigger than 32 bits, should use inode32 option. + + inode32 + Indicates that XFS is limited to create inodes at locations which + will not result in inode numbers with more than 32 bits of + significance. This is provided for backwards compatibility, since + 64 bits inode numbers might cause problems for some applications + that cannot handle large inode numbers. largeio/nolargeio If "nolargeio" is specified, the optimal I/O reported in diff --git a/Documentation/firmware_class/README b/Documentation/firmware_class/README index 815b711bcd8..43fada989e6 100644 --- a/Documentation/firmware_class/README +++ b/Documentation/firmware_class/README @@ -22,12 +22,17 @@ - calls request_firmware(&fw_entry, $FIRMWARE, device) - kernel searchs the fimware image with name $FIRMWARE directly in the below search path of root filesystem: + User customized search path by module parameter 'path'[1] "/lib/firmware/updates/" UTS_RELEASE, "/lib/firmware/updates", "/lib/firmware/" UTS_RELEASE, "/lib/firmware" - If found, goto 7), else goto 2) + [1], the 'path' is a string parameter which length should be less + than 256, user should pass 'firmware_class.path=$CUSTOMIZED_PATH' + if firmware_class is built in kernel(the general situation) + 2), userspace: - /sys/class/firmware/xxx/{loading,data} appear. - hotplug gets called with a firmware identifier in $FIRMWARE @@ -114,3 +119,10 @@ on the setup, so I think that the choice on what firmware to make persistent should be left to userspace. + about firmware cache: + -------------------- + After firmware cache mechanism is introduced during system sleep, + request_firmware can be called safely inside device's suspend and + resume callback, and callers need't cache the firmware by + themselves any more for dealing with firmware loss during system + resume. diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index e08a883de36..77a1d11af72 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -439,6 +439,48 @@ slower clock delays the rising edge of SCK, and the I2C master adjusts its signaling rate accordingly. +GPIO controllers and the pinctrl subsystem +------------------------------------------ + +A GPIO controller on a SOC might be tightly coupled with the pinctrl +subsystem, in the sense that the pins can be used by other functions +together with an optional gpio feature. We have already covered the +case where e.g. a GPIO controller need to reserve a pin or set the +direction of a pin by calling any of: + +pinctrl_request_gpio() +pinctrl_free_gpio() +pinctrl_gpio_direction_input() +pinctrl_gpio_direction_output() + +But how does the pin control subsystem cross-correlate the GPIO +numbers (which are a global business) to a certain pin on a certain +pin controller? + +This is done by registering "ranges" of pins, which are essentially +cross-reference tables. These are described in +Documentation/pinctrl.txt + +While the pin allocation is totally managed by the pinctrl subsystem, +gpio (under gpiolib) is still maintained by gpio drivers. It may happen +that different pin ranges in a SoC is managed by different gpio drivers. + +This makes it logical to let gpio drivers announce their pin ranges to +the pin ctrl subsystem before it will call 'pinctrl_request_gpio' in order +to request the corresponding pin to be prepared by the pinctrl subsystem +before any gpio usage. + +For this, the gpio controller can register its pin range with pinctrl +subsystem. There are two ways of doing it currently: with or without DT. + +For with DT support refer to Documentation/devicetree/bindings/gpio/gpio.txt. + +For non-DT support, user can call gpiochip_add_pin_range() with appropriate +parameters to register a range of gpio pins with a pinctrl driver. For this +exact name string of pinctrl device has to be passed as one of the +argument to this routine. + + What do these conventions omit? =============================== One of the biggest things these conventions omit is pin multiplexing, since diff --git a/Documentation/hwmon/ads7828 b/Documentation/hwmon/ads7828 index 2bbebe6f771..f6e263e0f60 100644 --- a/Documentation/hwmon/ads7828 +++ b/Documentation/hwmon/ads7828 @@ -4,29 +4,47 @@ Kernel driver ads7828 Supported chips: * Texas Instruments/Burr-Brown ADS7828 Prefix: 'ads7828' - Addresses scanned: I2C 0x48, 0x49, 0x4a, 0x4b - Datasheet: Publicly available at the Texas Instruments website : + Datasheet: Publicly available at the Texas Instruments website: http://focus.ti.com/lit/ds/symlink/ads7828.pdf + * Texas Instruments ADS7830 + Prefix: 'ads7830' + Datasheet: Publicly available at the Texas Instruments website: + http://focus.ti.com/lit/ds/symlink/ads7830.pdf + Authors: Steve Hardy <shardy@redhat.com> + Vivien Didelot <vivien.didelot@savoirfairelinux.com> + Guillaume Roguez <guillaume.roguez@savoirfairelinux.com> + +Platform data +------------- + +The ads7828 driver accepts an optional ads7828_platform_data structure (defined +in include/linux/platform_data/ads7828.h). The structure fields are: -Module Parameters ------------------ +* diff_input: (bool) Differential operation + set to true for differential mode, false for default single ended mode. -* se_input: bool (default Y) - Single ended operation - set to N for differential mode -* int_vref: bool (default Y) - Operate with the internal 2.5V reference - set to N for external reference -* vref_mv: int (default 2500) - If using an external reference, set this to the reference voltage in mV +* ext_vref: (bool) External reference + set to true if it operates with an external reference, false for default + internal reference. + +* vref_mv: (unsigned int) Voltage reference + if using an external reference, set this to the reference voltage in mV, + otherwise it will default to the internal value (2500mV). This value will be + bounded with limits accepted by the chip, described in the datasheet. + + If no structure is provided, the configuration defaults to single ended + operation and internal voltage reference (2.5V). Description ----------- -This driver implements support for the Texas Instruments ADS7828. +This driver implements support for the Texas Instruments ADS7828 and ADS7830. -This device is a 12-bit 8-channel A-D converter. +The ADS7828 device is a 12-bit 8-channel A/D converter, while the ADS7830 does +8-bit sampling. It can operate in single ended mode (8 +ve inputs) or in differential mode, where 4 differential pairs can be measured. @@ -34,3 +52,7 @@ where 4 differential pairs can be measured. The chip also has the facility to use an external voltage reference. This may be required if your hardware supplies the ADS7828 from a 5V supply, see the datasheet for more details. + +There is no reliable way to identify this chip, so the driver will not scan +some addresses to try to auto-detect it. That means that you will have to +statically declare the device in the platform support code. diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp index f17256f069b..3374c085678 100644 --- a/Documentation/hwmon/coretemp +++ b/Documentation/hwmon/coretemp @@ -98,8 +98,10 @@ Process Processor TjMax(C) 45nm Atom Processors D525/510/425/410 100 + Z670/650 90 Z560/550/540/530P/530/520PT/520/515/510PT/510P 90 Z510/500 90 + N570/550 100 N475/470/455/450 100 N280/270 90 330/230 125 diff --git a/Documentation/hwmon/da9055 b/Documentation/hwmon/da9055 new file mode 100644 index 00000000000..855c3f536e0 --- /dev/null +++ b/Documentation/hwmon/da9055 @@ -0,0 +1,47 @@ +Supported chips: + * Dialog Semiconductors DA9055 PMIC + Prefix: 'da9055' + Datasheet: Datasheet is not publicly available. + +Authors: David Dajun Chen <dchen@diasemi.com> + +Description +----------- + +The DA9055 provides an Analogue to Digital Converter (ADC) with 10 bits +resolution and track and hold circuitry combined with an analogue input +multiplexer. The analogue input multiplexer will allow conversion of up to 5 +different inputs. The track and hold circuit ensures stable input voltages at +the input of the ADC during the conversion. + +The ADC is used to measure the following inputs: +Channel 0: VDDOUT - measurement of the system voltage +Channel 1: ADC_IN1 - high impedance input (0 - 2.5V) +Channel 2: ADC_IN2 - high impedance input (0 - 2.5V) +Channel 3: ADC_IN3 - high impedance input (0 - 2.5V) +Channel 4: Internal Tjunc. - sense (internal temp. sensor) + +By using sysfs attributes we can measure the system voltage VDDOUT, +chip junction temperature and auxiliary channels voltages. + +Voltage Monitoring +------------------ + +Voltages are sampled in a AUTO mode it can be manually sampled too and results +are stored in a 10 bit ADC. + +The system voltage is calculated as: + Milli volt = ((ADC value * 1000) / 85) + 2500 + +The voltages on ADC channels 1, 2 and 3 are calculated as: + Milli volt = (ADC value * 1000) / 102 + +Temperature Monitoring +---------------------- + +Temperatures are sampled by a 10 bit ADC. Junction temperatures +are monitored by the ADC channels. + +The junction temperature is calculated: + Degrees celsius = -0.4084 * (ADC_RES - T_OFFSET) + 307.6332 +The junction temperature attribute is supported by the driver. diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index ec9ae670869..14c3f4f1b61 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -1175,15 +1175,16 @@ When kbuild executes, the following steps are followed (roughly): in an init section in the image. Platform code *must* copy the blob to non-init memory prior to calling unflatten_device_tree(). - Example: - #arch/x86/platform/ce4100/Makefile - clean-files := *dtb.S + To use this command, simply add *.dtb into obj-y or targets, or make + some other target depend on %.dtb - DTC_FLAGS := -p 1024 - obj-y += foo.dtb.o + A central rule exists to create $(obj)/%.dtb from $(src)/%.dts; + architecture Makefiles do no need to explicitly write out that rule. - $(obj)/%.dtb: $(src)/%.dts - $(call cmd,dtc) + Example: + targets += $(dtb-y) + clean-files += *.dtb + DTC_FLAGS ?= -p 1024 --- 6.8 Custom kbuild commands diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9776f068306..28bd0f0e32c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1304,6 +1304,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. lapic [X86-32,APIC] Enable the local APIC even if BIOS disabled it. + lapic= [x86,APIC] "notscdeadline" Do not use TSC deadline + value for LAPIC timer one-shot implementation. Default + back to the programmable timer unit in the LAPIC. + lapic_timer_c2_ok [X86,APIC] trust the local apic timer in C2 power state. @@ -1984,6 +1988,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nox2apic [X86-64,APIC] Do not enable x2APIC mode. + cpu0_hotplug [X86] Turn on CPU0 hotplug feature when + CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off. + Some features depend on CPU0. Known dependencies are: + 1. Resume from suspend/hibernate depends on CPU0. + Suspend/hibernate will fail if CPU0 is offline and you + need to online CPU0 before suspend/hibernate. + 2. PIC interrupts also depend on CPU0. CPU0 can't be + removed if a PIC interrupt is detected. + It's said poweroff/reboot may depend on CPU0 on some + machines although I haven't seen such issues so far + after CPU0 is offline on a few tested machines. + If the dependencies are under your control, you can + turn on cpu0_hotplug. + nptcg= [IA-64] Override max number of concurrent global TLB purges which is reported from either PAL_VM_SUMMARY or SAL PALO. @@ -2394,6 +2412,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ramdisk_size= [RAM] Sizes of RAM disks in kilobytes See Documentation/blockdev/ramdisk.txt. + rcu_nocbs= [KNL,BOOT] + In kernels built with CONFIG_RCU_NOCB_CPU=y, set + the specified list of CPUs to be no-callback CPUs. + Invocation of these CPUs' RCU callbacks will + be offloaded to "rcuoN" kthreads created for + that purpose. This reduces OS jitter on the + offloaded CPUs, which can be useful for HPC and + real-time workloads. It can also improve energy + efficiency for asymmetric multiprocessors. + + rcu_nocbs_poll [KNL,BOOT] + Rather than requiring that offloaded CPUs + (specified by rcu_nocbs= above) explicitly + awaken the corresponding "rcuoN" kthreads, + make these kthreads poll for callbacks. + This improves the real-time response for the + offloaded CPUs by relieving them of the need to + wake up the corresponding kthread, but degrades + energy efficiency by requiring that the kthreads + periodically wake up to do the polling. + rcutree.blimit= [KNL,BOOT] Set maximum number of finished RCU callbacks to process in one batch. @@ -2859,6 +2898,22 @@ bytes respectively. Such letter suffixes can also be entirely omitted. to facilitate early boot debugging. See also Documentation/trace/events.txt + trace_options=[option-list] + [FTRACE] Enable or disable tracer options at boot. + The option-list is a comma delimited list of options + that can be enabled or disabled just as if you were + to echo the option name into + + /sys/kernel/debug/tracing/trace_options + + For example, to enable stacktrace option (to dump the + stack trace of each event), add to the command line: + + trace_options=stacktrace + + See also Documentation/trace/ftrace.txt "trace options" + section. + transparent_hugepage= [KNL] Format: [always|madvise|never] diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 2759f7c188f..3c4e1b3b80a 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -251,12 +251,13 @@ And there are a number of things that _must_ or _must_not_ be assumed: And for: - *A = X; Y = *A; + *A = X; *(A + 4) = Y; - we may get either of: + we may get any of: - STORE *A = X; Y = LOAD *A; - STORE *A = Y = X; + STORE *A = X; STORE *(A + 4) = Y; + STORE *(A + 4) = Y; STORE *A = X; + STORE {*A, *(A + 4) } = {X, Y}; ========================= diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 6d0c2519cf4..c6f993d491b 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -161,7 +161,8 @@ a recent addition and not present on older kernels. in the memory block. 'state' : read-write at read: contains online/offline state of memory. - at write: user can specify "online", "offline" command + at write: user can specify "online_kernel", + "online_movable", "online", "offline" command which will be performed on al sections in the block. 'phys_device' : read-only: designed to show the name of physical memory device. This is not well implemented now. @@ -255,6 +256,17 @@ For onlining, you have to write "online" to the section's state file as: % echo online > /sys/devices/system/memory/memoryXXX/state +This onlining will not change the ZONE type of the target memory section, +If the memory section is in ZONE_NORMAL, you can change it to ZONE_MOVABLE: + +% echo online_movable > /sys/devices/system/memory/memoryXXX/state +(NOTE: current limit: this memory section must be adjacent to ZONE_MOVABLE) + +And if the memory section is in ZONE_MOVABLE, you can change it to ZONE_NORMAL: + +% echo online_kernel > /sys/devices/system/memory/memoryXXX/state +(NOTE: current limit: this memory section must be adjacent to ZONE_NORMAL) + After this, section memoryXXX's state will be 'online' and the amount of available memory will be increased. @@ -377,15 +389,18 @@ The third argument is passed by pointer of struct memory_notify. struct memory_notify { unsigned long start_pfn; unsigned long nr_pages; + int status_change_nid_normal; int status_change_nid; } start_pfn is start_pfn of online/offline memory. nr_pages is # of pages of online/offline memory. +status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask +is (will be) set/clear, if this is -1, then nodemask status is not changed. status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be) set/clear. It means a new(memoryless) node gets new memory by online and a node loses all memory. If this is -1, then nodemask status is not changed. -If status_changed_nid >= 0, callback should create/discard structures for the +If status_changed_nid* >= 0, callback should create/discard structures for the node if necessary. -------------- diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt index 22ae8441489..0d98fac8893 100644 --- a/Documentation/mmc/mmc-dev-attrs.txt +++ b/Documentation/mmc/mmc-dev-attrs.txt @@ -25,6 +25,8 @@ All attributes are read-only. serial Product Serial Number (from CID Register) erase_size Erase group size preferred_erase_size Preferred erase size + raw_rpmb_size_mult RPMB partition size + rel_sectors Reliable write sector count Note on Erase Size and Preferred Erase Size: @@ -65,6 +67,11 @@ Note on Erase Size and Preferred Erase Size: "preferred_erase_size" is in bytes. +Note on raw_rpmb_size_mult: + "raw_rpmb_size_mult" is a mutliple of 128kB block. + RPMB size in byte is calculated by using the following equation: + RPMB partition size = 128kB x raw_rpmb_size_mult + SD/MMC/SDIO Clock Gating Attribute ================================== diff --git a/Documentation/networking/vxlan.txt b/Documentation/networking/vxlan.txt index 5b34b762d7d..6d993510f09 100644 --- a/Documentation/networking/vxlan.txt +++ b/Documentation/networking/vxlan.txt @@ -32,7 +32,7 @@ no entry is in the forwarding table. # ip link delete vxlan0 3. Show vxlan info - # ip -d show vxlan0 + # ip -d link show vxlan0 It is possible to create, destroy and display the vxlan forwarding table using the new bridge command. @@ -41,7 +41,7 @@ forwarding table using the new bridge command. # bridge fdb add to 00:17:42:8a:b4:05 dst 192.19.0.2 dev vxlan0 2. Delete forwarding table entry - # bridge fdb delete 00:17:42:8a:b4:05 + # bridge fdb delete 00:17:42:8a:b4:05 dev vxlan0 3. Show forwarding table # bridge fdb show dev vxlan0 diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt index 3b4ee532886..da40efbef6e 100644 --- a/Documentation/pinctrl.txt +++ b/Documentation/pinctrl.txt @@ -364,6 +364,9 @@ will get an pin number into its handled number range. Further it is also passed the range ID value, so that the pin controller knows which range it should deal with. +Calling pinctrl_add_gpio_range from pinctrl driver is DEPRECATED. Please see +section 2.1 of Documentation/devicetree/bindings/gpio/gpio.txt on how to bind +pinctrl and gpio drivers. PINMUX interfaces ================= @@ -1193,4 +1196,6 @@ foo_switch() ... } -The above has to be done from process context. +The above has to be done from process context. The reservation of the pins +will be done when the state is activated, so in effect one specific pin +can be used by different functions at different times on a running system. diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt index 17e130a8034..79a2a58425e 100644 --- a/Documentation/power/pm_qos_interface.txt +++ b/Documentation/power/pm_qos_interface.txt @@ -99,7 +99,7 @@ reading the aggregated value does not require any locking mechanism. From kernel mode the use of this interface is the following: -int dev_pm_qos_add_request(device, handle, value): +int dev_pm_qos_add_request(device, handle, type, value): Will insert an element into the list for that identified device with the target value. Upon change to this list the new target is recomputed and any registered notifiers are called only if the target value is now different. diff --git a/Documentation/telephony/00-INDEX b/Documentation/telephony/00-INDEX deleted file mode 100644 index 4ffe0ed5b6f..00000000000 --- a/Documentation/telephony/00-INDEX +++ /dev/null @@ -1,4 +0,0 @@ -00-INDEX - - this file. -ixj.txt - - document describing the Quicknet drivers. diff --git a/Documentation/telephony/ixj.txt b/Documentation/telephony/ixj.txt deleted file mode 100644 index db94fb6c567..00000000000 --- a/Documentation/telephony/ixj.txt +++ /dev/null @@ -1,394 +0,0 @@ -Linux Quicknet-Drivers-Howto -Quicknet Technologies, Inc. (www.quicknet.net) -Version 0.3.4 December 18, 1999 - -1.0 Introduction - -This document describes the first GPL release version of the Linux -driver for the Quicknet Internet PhoneJACK and Internet LineJACK -cards. More information about these cards is available at -www.quicknet.net. The driver version discussed in this document is -0.3.4. - -These cards offer nice telco style interfaces to use your standard -telephone/key system/PBX as the user interface for VoIP applications. -The Internet LineJACK also offers PSTN connectivity for a single line -Internet to PSTN gateway. Of course, you can add more than one card -to a system to obtain multi-line functionality. At this time, the -driver supports the POTS port on both the Internet PhoneJACK and the -Internet LineJACK, but the PSTN port on the latter card is not yet -supported. - -This document, and the drivers for the cards, are intended for a -limited audience that includes technically capable programmers who -would like to experiment with Quicknet cards. The drivers are -considered in ALPHA status and are not yet considered stable enough -for general, widespread use in an unlimited audience. - -That's worth saying again: - -THE LINUX DRIVERS FOR QUICKNET CARDS ARE PRESENTLY IN A ALPHA STATE -AND SHOULD NOT BE CONSIDERED AS READY FOR NORMAL WIDESPREAD USE. - -They are released early in the spirit of Internet development and to -make this technology available to innovators who would benefit from -early exposure. - -When we promote the device driver to "beta" level it will be -considered ready for non-programmer, non-technical users. Until then, -please be aware that these drivers may not be stable and may affect -the performance of your system. - - -1.1 Latest Additions/Improvements - -The 0.3.4 version of the driver is the first GPL release. Several -features had to be removed from the prior binary only module, mostly -for reasons of Intellectual Property rights. We can't release -information that is not ours - so certain aspects of the driver had to -be removed to protect the rights of others. - -Specifically, very old Internet PhoneJACK cards have non-standard -G.723.1 codecs (due to the early nature of the DSPs in those days). -The auto-conversion code to bring those cards into compliance with -today's standards is available as a binary only module to those people -needing it. If you bought your card after 1997 or so, you are OK - -it's only the very old cards that are affected. - -Also, the code to download G.728/G.729/G.729a codecs to the DSP is -available as a binary only module as well. This IP is not ours to -release. - -Hooks are built into the GPL driver to allow it to work with other -companion modules that are completely separate from this module. - -1.2 Copyright, Trademarks, Disclaimer, & Credits - -Copyright - -Copyright (c) 1999 Quicknet Technologies, Inc. Permission is granted -to freely copy and distribute this document provided you preserve it -in its original form. For corrections and minor changes contact the -maintainer at linux@quicknet.net. - -Trademarks - -Internet PhoneJACK and Internet LineJACK are registered trademarks of -Quicknet Technologies, Inc. - -Disclaimer - -Much of the info in this HOWTO is early information released by -Quicknet Technologies, Inc. for the express purpose of allowing early -testing and use of the Linux drivers developed for their products. -While every attempt has been made to be thorough, complete and -accurate, the information contained here may be unreliable and there -are likely a number of errors in this document. Please let the -maintainer know about them. Since this is free documentation, it -should be obvious that neither I nor previous authors can be held -legally responsible for any errors. - -Credits - -This HOWTO was written by: - - Greg Herlein <gherlein@quicknet.net> - Ed Okerson <eokerson@quicknet.net> - -1.3 Future Plans: You Can Help - -Please let the maintainer know of any errors in facts, opinions, -logic, spelling, grammar, clarity, links, etc. But first, if the date -is over a month old, check to see that you have the latest -version. Please send any info that you think belongs in this document. - -You can also contribute code and/or bug-fixes for the sample -applications. - - -1.4 Where to get things - -Info on latest versions of the driver are here: - -http://web.archive.org/web/*/http://www.quicknet.net/develop.htm - -1.5 Mailing List - -Quicknet operates a mailing list to provide a public forum on using -these drivers. - -To subscribe to the linux-sdk mailing list, send an email to: - - majordomo@linux.quicknet.net - -In the body of the email, type: - - subscribe linux-sdk <your-email-address> - -Please delete any signature block that you would normally add to the -bottom of your email - it tends to confuse majordomo. - -To send mail to the list, address your mail to - - linux-sdk@linux.quicknet.net - -Your message will go out to everyone on the list. - -To unsubscribe to the linux-sdk mailing list, send an email to: - - majordomo@linux.quicknet.net - -In the body of the email, type: - - unsubscribe linux-sdk <your-email-address> - - - -2.0 Requirements - -2.1 Quicknet Card(s) - -You will need at least one Internet PhoneJACK or Internet LineJACK -cards. These are ISA or PCI bus devices that use Plug-n-Play for -configuration, and use no IRQs. The driver will support up to 16 -cards in any one system, of any mix between the two types. - -Note that you will need two cards to do any useful testing alone, since -you will need a card on both ends of the connection. Of course, if -you are doing collaborative work, perhaps your friends or coworkers -have cards too. If not, we'll gladly sell them some! - - -2.2 ISAPNP - -Since the Quicknet cards are Plug-n-Play devices, you will need the -isapnp tools package to configure the cards, or you can use the isapnp -module to autoconfigure them. The former package probably came with -your Linux distribution. Documentation on this package is available -online at: - -http://mailer.wiwi.uni-marburg.de/linux/LDP/HOWTO/Plug-and-Play-HOWTO.html - -The isapnp autoconfiguration is available on the Quicknet website at: - - http://www.quicknet.net/develop.htm - -though it may be in the kernel by the time you read this. - - -3.0 Card Configuration - -If you did not get your drivers as part of the linux kernel, do the -following to install them: - - a. untar the distribution file. We use the following command: - tar -xvzf ixj-0.x.x.tgz - -This creates a subdirectory holding all the necessary files. Go to that -subdirectory. - - b. run the "ixj_dev_create" script to remove any stray device -files left in the /dev directory, and to create the new officially -designated device files. Note that the old devices were called -/dev/ixj, and the new method uses /dev/phone. - - c. type "make;make install" - this will compile and install the -module. - - d. type "depmod -av" to rebuild all your kernel version dependencies. - - e. if you are using the isapnp module to configure the cards - automatically, then skip to step f. Otherwise, ensure that you - have run the isapnp configuration utility to properly configure - the cards. - - e1. The Internet PhoneJACK has one configuration register that - requires 16 IO ports. The Internet LineJACK card has two - configuration registers and isapnp reports that IO 0 - requires 16 IO ports and IO 1 requires 8. The Quicknet - driver assumes that these registers are configured to be - contiguous, i.e. if IO 0 is set to 0x340 then IO 1 should - be set to 0x350. - - Make sure that none of the cards overlap if you have - multiple cards in the system. - - If you are new to the isapnp tools, you can jumpstart - yourself by doing the following: - - e2. go to the /etc directory and run pnpdump to get a blank - isapnp.conf file. - - pnpdump > /etc/isapnp.conf - - e3. edit the /etc/isapnp.conf file to set the IO warnings and - the register IO addresses. The IO warnings means that you - should find the line in the file that looks like this: - - (CONFLICT (IO FATAL)(IRQ FATAL)(DMA FATAL)(MEM FATAL)) # or WARNING - - and you should edit the line to look like this: - - (CONFLICT (IO WARNING)(IRQ FATAL)(DMA FATAL)(MEM FATAL)) # - or WARNING - - The next step is to set the IO port addresses. The issue - here is that isapnp does not identify all of the ports out - there. Specifically any device that does not have a driver - or module loaded by Linux will not be registered. This - includes older sound cards and network cards. We have - found that the IO port 0x300 is often used even though - isapnp claims that no-one is using those ports. We - recommend that for a single card installation that port - 0x340 (and 0x350) be used. The IO port line should change - from this: - - (IO 0 (SIZE 16) (BASE 0x0300) (CHECK)) - - to this: - - (IO 0 (SIZE 16) (BASE 0x0340) ) - - e4. if you have multiple Quicknet cards, make sure that you do - not have any overlaps. Be especially careful if you are - mixing Internet PhoneJACK and Internet LineJACK cards in - the same system. In these cases we recommend moving the - IO port addresses to the 0x400 block. Please note that on - a few machines the 0x400 series are used. Feel free to - experiment with other addresses. Our cards have been - proven to work using IO addresses of up to 0xFF0. - - e5. the last step is to uncomment the activation line so the - drivers will be associated with the port. This means the - line (immediately below) the IO line should go from this: - - # (ACT Y) - - to this: - - (ACT Y) - - Once you have finished editing the isapnp.conf file you - must submit it into the pnp driverconfigure the cards. - This is done using the following command: - - isapnp isapnp.conf - - If this works you should see a line that identifies the - Quicknet device, the IO port(s) chosen, and a message - "Enabled OK". - - f. if you are loading the module by hand, use insmod. An example -of this would look like this: - - insmod phonedev - insmod ixj dspio=0x320,0x310 xio=0,0x330 - -Then verify the module loaded by running lsmod. If you are not using a -module that matches your kernel version, you may need to "force" the -load using the -f option in the insmod command. - - insmod phonedev - insmod -f ixj dspio=0x320,0x310 xio=0,0x330 - - -If you are using isapnp to autoconfigure your card, then you do NOT -need any of the above, though you need to use depmod to load the -driver, like this: - - depmod ixj - -which will result in the needed drivers getting loaded automatically. - - g. if you are planning on having the kernel automatically request -the module for you, then you need to edit /etc/conf.modules and add the -following lines: - - options ixj dspio=0x340 xio=0x330 ixjdebug=0 - -If you do this, then when you execute an application that uses the -module the kernel will request that it is loaded. - - h. if you want non-root users to be able to read and write to the -ixj devices (this is a good idea!) you should do the following: - - - decide upon a group name to use and create that group if - needed. Add the user names to that group that you wish to - have access to the device. For example, we typically will - create a group named "ixj" in /etc/group and add all users - to that group that we want to run software that can use the - ixjX devices. - - - change the permissions on the device files, like this: - - chgrp ixj /dev/ixj* - chmod 660 /dev/ixj* - -Once this is done, then non-root users should be able to use the -devices. If you have enabled autoloading of modules, then the user -should be able to open the device and have the module loaded -automatically for them. - - -4.0 Driver Installation problems. - -We have tested these drivers on the 2.2.9, 2.2.10, 2.2.12, and 2.2.13 kernels -and in all cases have eventually been able to get the drivers to load and -run. We have found four types of problems that prevent this from happening. -The problems and solutions are: - - a. A step was missed in the installation. Go back and use section 3 -as a checklist. Many people miss running the ixj_dev_create script and thus -never load the device names into the filesystem. - - b. The kernel is inconsistently linked. We have found this problem in -the Out Of the Box installation of several distributions. The symptoms -are that neither driver will load, and that the unknown symbols include "jiffy" -and "kmalloc". The solution is to recompile both the kernel and the -modules. The command string for the final compile looks like this: - - In the kernel directory: - 1. cp .config /tmp - 2. make mrproper - 3. cp /tmp/.config . - 4. make clean;make bzImage;make modules;make modules_install - -This rebuilds both the kernel and all the modules and makes sure they all -have the same linkages. This generally solves the problem once the new -kernel is installed and the system rebooted. - - c. The kernel has been patched, then unpatched. This happens when -someone decides to use an earlier kernel after they load a later kernel. -The symptoms are proceeding through all three above steps and still not -being able to load the driver. What has happened is that the generated -header files are out of sync with the kernel itself. The solution is -to recompile (again) using "make mrproper". This will remove and then -regenerate all the necessary header files. Once this is done, then you -need to install and reboot the kernel. We have not seen any problem -loading one of our drivers after this treatment. - -5.0 Known Limitations - -We cannot currently play "dial-tone" and listen for DTMF digits at the -same time using the ISA PhoneJACK. This is a bug in the 8020 DSP chip -used on that product. All other Quicknet products function normally -in this regard. We have a work-around, but it's not done yet. Until -then, if you want dial-tone, you can always play a recorded dial-tone -sound into the audio until you have gathered the DTMF digits. - - - - - - - - - - - - - - - - - diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index ca1a1a34970..88c02334e35 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -112,6 +112,29 @@ temperature) and throttle appropriate devices. trip: indicates which trip point the cooling devices is associated with in this thermal zone. +1.4 Thermal Zone Parameters +1.4.1 struct thermal_bind_params + This structure defines the following parameters that are used to bind + a zone with a cooling device for a particular trip point. + .cdev: The cooling device pointer + .weight: The 'influence' of a particular cooling device on this zone. + This is on a percentage scale. The sum of all these weights + (for a particular zone) cannot exceed 100. + .trip_mask:This is a bit mask that gives the binding relation between + this thermal zone and cdev, for a particular trip point. + If nth bit is set, then the cdev and thermal zone are bound + for trip point n. + .match: This call back returns success(0) if the 'tz and cdev' need to + be bound, as per platform data. +1.4.2 struct thermal_zone_params + This structure defines the platform level parameters for a thermal zone. + This data, for each thermal zone should come from the platform layer. + This is an optional feature where some platforms can choose not to + provide this data. + .governor_name: Name of the thermal governor used for this zone + .num_tbps: Number of thermal_bind_params entries for this zone + .tbp: thermal_bind_params entries + 2. sysfs attributes structure RO read only value @@ -126,6 +149,7 @@ Thermal zone device sys I/F, created once it's registered: |---type: Type of the thermal zone |---temp: Current temperature |---mode: Working mode of the thermal zone + |---policy: Thermal governor used for this zone |---trip_point_[0-*]_temp: Trip point temperature |---trip_point_[0-*]_type: Trip point type |---trip_point_[0-*]_hyst: Hysteresis value for this trip point @@ -187,6 +211,10 @@ mode charge of the thermal management. RW, Optional +policy + One of the various thermal governors used for a particular zone. + RW, Required + trip_point_[0-*]_temp The temperature above which trip point will be fired. Unit: millidegree Celsius @@ -264,6 +292,7 @@ method, the sys I/F structure will be built like this: |---type: acpitz |---temp: 37000 |---mode: enabled + |---policy: step_wise |---trip_point_0_temp: 100000 |---trip_point_0_type: critical |---trip_point_1_temp: 80000 @@ -305,3 +334,38 @@ to a thermal_zone_device when it registers itself with the framework. The event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL, THERMAL_DEV_FAULT}. Notification can be sent when the current temperature crosses any of the configured thresholds. + +5. Export Symbol APIs: + +5.1: get_tz_trend: +This function returns the trend of a thermal zone, i.e the rate of change +of temperature of the thermal zone. Ideally, the thermal sensor drivers +are supposed to implement the callback. If they don't, the thermal +framework calculated the trend by comparing the previous and the current +temperature values. + +5.2:get_thermal_instance: +This function returns the thermal_instance corresponding to a given +{thermal_zone, cooling_device, trip_point} combination. Returns NULL +if such an instance does not exist. + +5.3:notify_thermal_framework: +This function handles the trip events from sensor drivers. It starts +throttling the cooling devices according to the policy configured. +For CRITICAL and HOT trip points, this notifies the respective drivers, +and does actual throttling for other trip points i.e ACTIVE and PASSIVE. +The throttling policy is based on the configured platform data; if no +platform data is provided, this uses the step_wise throttling policy. + +5.4:thermal_cdev_update: +This function serves as an arbitrator to set the state of a cooling +device. It sets the cooling device to the deepest cooling state if +possible. + +5.5:thermal_register_governor: +This function lets the various thermal governors to register themselves +with the Thermal framework. At run time, depending on a zone's platform +data, a particular governor is used for throttling. + +5.6:thermal_unregister_governor: +This function unregisters a governor from the thermal framework. diff --git a/Documentation/usb/error-codes.txt b/Documentation/usb/error-codes.txt index b3f606b81a0..9c3eb845ebe 100644 --- a/Documentation/usb/error-codes.txt +++ b/Documentation/usb/error-codes.txt @@ -21,6 +21,8 @@ Non-USB-specific: USB-specific: +-EBUSY The URB is already active. + -ENODEV specified USB-device or bus doesn't exist -ENOENT specified interface or endpoint does not exist or @@ -35,9 +37,8 @@ USB-specific: d) ISO: number_of_packets is < 0 e) various other cases --EAGAIN a) specified ISO start frame too early - b) (using ISO-ASAP) too much scheduled for the future - wait some time and try again. +-EXDEV ISO: URB_ISO_ASAP wasn't specified and all the frames + the URB would be scheduled in have already expired. -EFBIG Host controller driver can't schedule that many ISO frames. diff --git a/Documentation/usb/mass-storage.txt b/Documentation/usb/mass-storage.txt index e9b9334627b..59063ad7a60 100644 --- a/Documentation/usb/mass-storage.txt +++ b/Documentation/usb/mass-storage.txt @@ -20,9 +20,9 @@ This document describes how to use the gadget from user space, its relation to mass storage function (or MSF) and different gadgets - using it, and how it differs from File Storage Gadget (or FSG). It - will talk only briefly about how to use MSF within composite - gadgets. + using it, and how it differs from File Storage Gadget (or FSG) + (which is no longer included in Linux). It will talk only briefly + about how to use MSF within composite gadgets. * Module parameters @@ -198,16 +198,15 @@ The Mass Storage Function and thus the Mass Storage Gadget has been based on the File Storage Gadget. The difference between the two is that MSG is a composite gadget (ie. uses the composite framework) - while file storage gadget is a traditional gadget. From userspace + while file storage gadget was a traditional gadget. From userspace point of view this distinction does not really matter, but from kernel hacker's point of view, this means that (i) MSG does not duplicate code needed for handling basic USB protocol commands and (ii) MSF can be used in any other composite gadget. - Because of that, File Storage Gadget has been deprecated and - scheduled to be removed in Linux 3.8. All users need to transition - to the Mass Storage Gadget by that time. The two gadgets behave - mostly the same from the outside except: + Because of that, File Storage Gadget has been removed in Linux 3.8. + All users need to transition to the Mass Storage Gadget. The two + gadgets behave mostly the same from the outside except: 1. In FSG the “removable” and “cdrom” module parameters set the flag for all logical units whereas in MSG they accept a list of y/n diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 9efceff51bf..f15cb74c4f7 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -1013,7 +1013,7 @@ boot_params as that of 16-bit boot protocol, the boot loader should also fill the additional fields of the struct boot_params as that described in zero-page.txt. -After setupping the struct boot_params, the boot loader can load the +After setting up the struct boot_params, the boot loader can load the 32/64-bit kernel in the same way as that of 16-bit boot protocol. In 32-bit boot protocol, the kernel is started by jumping to the @@ -1023,7 +1023,7 @@ In 32-bit boot protocol, the kernel is started by jumping to the At entry, the CPU must be in 32-bit protected mode with paging disabled; a GDT must be loaded with the descriptors for selectors __BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat -segment; __BOOS_CS must have execute/read permission, and __BOOT_DS +segment; __BOOT_CS must have execute/read permission, and __BOOT_DS must have read/write permission; CS must be __BOOT_CS and DS, ES, SS must be __BOOT_DS; interrupt must be disabled; %esi must hold the base address of the struct boot_params; %ebp, %edi and %ebx must be zero. diff --git a/Documentation/zh_CN/arm/kernel_user_helpers.txt b/Documentation/zh_CN/arm/kernel_user_helpers.txt new file mode 100644 index 00000000000..cd7fc8f34cf --- /dev/null +++ b/Documentation/zh_CN/arm/kernel_user_helpers.txt @@ -0,0 +1,284 @@ +Chinese translated version of Documentation/arm/kernel_user_helpers.txt + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have a problem +communicating in English you can also ask the Chinese maintainer for +help. Contact the Chinese maintainer if this translation is outdated +or if there is a problem with the translation. + +Maintainer: Nicolas Pitre <nicolas.pitre@linaro.org> + Dave Martin <dave.martin@linaro.org> +Chinese maintainer: Fu Wei <tekkamanninja@gmail.com> +--------------------------------------------------------------------- +Documentation/arm/kernel_user_helpers.txt 的中文翻译 + +如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 +交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 +译存在问题,请联系中文版维护者。 +英文版维护者: Nicolas Pitre <nicolas.pitre@linaro.org> + Dave Martin <dave.martin@linaro.org> +中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com> +中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com> +中文版校译者: 宋冬生 Dongsheng Song <dongshneg.song@gmail.com> + 傅炜 Fu Wei <tekkamanninja@gmail.com> + + +以下为正文 +--------------------------------------------------------------------- +内核提供的用户空间辅助代码 +========================= + +在内核内存空间的固定地址处,有一个由内核提供并可从用户空间访问的代码 +段。它用于向用户空间提供因在许多 ARM CPU 中未实现的特性和/或指令而需 +内核提供帮助的某些操作。这些代码直接在用户模式下执行的想法是为了获得 +最佳效率,但那些与内核计数器联系过于紧密的部分,则被留给了用户库实现。 +事实上,此代码甚至可能因不同的 CPU 而异,这取决于其可用的指令集或它 +是否为 SMP 系统。换句话说,内核保留在不作出警告的情况下根据需要更改 +这些代码的权利。只有本文档描述的入口及其结果是保证稳定的。 + +这与完全成熟的 VDSO 实现不同(但两者并不冲突),尽管如此,VDSO 可阻止 +某些通过常量高效跳转到那些代码段的汇编技巧。且由于那些代码段在返回用户 +代码前仅使用少量的代码周期,则一个 VDSO 间接远程调用将会在这些简单的 +操作上增加一个可测量的开销。 + +在对那些拥有原生支持的新型处理器进行代码优化时,仅在已为其他操作使用 +了类似的新增指令,而导致二进制结果已与早期 ARM 处理器不兼容的情况下, +用户空间才应绕过这些辅助代码,并在内联函数中实现这些操作(无论是通过 +编译器在代码中直接放置,还是作为库函数调用实现的一部分)。也就是说, +如果你编译的代码不会为了其他目的使用新指令,则不要仅为了避免使用这些 +内核辅助代码,导致二进制程序无法在早期处理器上运行。 + +新的辅助代码可能随着时间的推移而增加,所以新内核中的某些辅助代码在旧 +内核中可能不存在。因此,程序必须在对任何辅助代码调用假设是安全之前, +检测 __kuser_helper_version 的值(见下文)。理想情况下,这种检测应该 +只在进程启动时执行一次;如果内核版本不支持所需辅助代码,则该进程可尽早 +中止执行。 + +kuser_helper_version +-------------------- + +位置: 0xffff0ffc + +参考声明: + + extern int32_t __kuser_helper_version; + +定义: + + 这个区域包含了当前运行内核实现的辅助代码版本号。用户空间可以通过读 + 取此版本号以确定特定的辅助代码是否存在。 + +使用范例: + +#define __kuser_helper_version (*(int32_t *)0xffff0ffc) + +void check_kuser_version(void) +{ + if (__kuser_helper_version < 2) { + fprintf(stderr, "can't do atomic operations, kernel too old\n"); + abort(); + } +} + +注意: + + 用户空间可以假设这个域的值不会在任何单个进程的生存期内改变。也就 + 是说,这个域可以仅在库的初始化阶段或进程启动阶段读取一次。 + +kuser_get_tls +------------- + +位置: 0xffff0fe0 + +参考原型: + + void * __kuser_get_tls(void); + +输入: + + lr = 返回地址 + +输出: + + r0 = TLS 值 + +被篡改的寄存器: + + 无 + +定义: + + 获取之前通过 __ARM_NR_set_tls 系统调用设置的 TLS 值。 + +使用范例: + +typedef void * (__kuser_get_tls_t)(void); +#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0) + +void foo() +{ + void *tls = __kuser_get_tls(); + printf("TLS = %p\n", tls); +} + +注意: + + - 仅在 __kuser_helper_version >= 1 时,此辅助代码存在 + (从内核版本 2.6.12 开始)。 + +kuser_cmpxchg +------------- + +位置: 0xffff0fc0 + +参考原型: + + int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr); + +输入: + + r0 = oldval + r1 = newval + r2 = ptr + lr = 返回地址 + +输出: + + r0 = 成功代码 (零或非零) + C flag = 如果 r0 == 0 则置 1,如果 r0 != 0 则清零。 + +被篡改的寄存器: + + r3, ip, flags + +定义: + + 仅在 *ptr 为 oldval 时原子保存 newval 于 *ptr 中。 + 如果 *ptr 被改变,则返回值为零,否则为非零值。 + 如果 *ptr 被改变,则 C flag 也会被置 1,以实现调用代码中的汇编 + 优化。 + +使用范例: + +typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr); +#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0) + +int atomic_add(volatile int *ptr, int val) +{ + int old, new; + + do { + old = *ptr; + new = old + val; + } while(__kuser_cmpxchg(old, new, ptr)); + + return new; +} + +注意: + + - 这个例程已根据需要包含了内存屏障。 + + - 仅在 __kuser_helper_version >= 2 时,此辅助代码存在 + (从内核版本 2.6.12 开始)。 + +kuser_memory_barrier +-------------------- + +位置: 0xffff0fa0 + +参考原型: + + void __kuser_memory_barrier(void); + +输入: + + lr = 返回地址 + +输出: + + 无 + +被篡改的寄存器: + + 无 + +定义: + + 应用于任何需要内存屏障以防止手动数据修改带来的一致性问题,以及 + __kuser_cmpxchg 中。 + +使用范例: + +typedef void (__kuser_dmb_t)(void); +#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0) + +注意: + + - 仅在 __kuser_helper_version >= 3 时,此辅助代码存在 + (从内核版本 2.6.15 开始)。 + +kuser_cmpxchg64 +--------------- + +位置: 0xffff0f60 + +参考原型: + + int __kuser_cmpxchg64(const int64_t *oldval, + const int64_t *newval, + volatile int64_t *ptr); + +输入: + + r0 = 指向 oldval + r1 = 指向 newval + r2 = 指向目标值 + lr = 返回地址 + +输出: + + r0 = 成功代码 (零或非零) + C flag = 如果 r0 == 0 则置 1,如果 r0 != 0 则清零。 + +被篡改的寄存器: + + r3, lr, flags + +定义: + + 仅在 *ptr 等于 *oldval 指向的 64 位值时,原子保存 *newval + 指向的 64 位值于 *ptr 中。如果 *ptr 被改变,则返回值为零, + 否则为非零值。 + + 如果 *ptr 被改变,则 C flag 也会被置 1,以实现调用代码中的汇编 + 优化。 + +使用范例: + +typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval, + const int64_t *newval, + volatile int64_t *ptr); +#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60) + +int64_t atomic_add64(volatile int64_t *ptr, int64_t val) +{ + int64_t old, new; + + do { + old = *ptr; + new = old + val; + } while(__kuser_cmpxchg64(&old, &new, ptr)); + + return new; +} + +注意: + + - 这个例程已根据需要包含了内存屏障。 + + - 由于这个过程的代码长度(此辅助代码跨越 2 个常规的 kuser “槽”), + 因此 0xffff0f80 不被作为有效的入口点。 + + - 仅在 __kuser_helper_version >= 5 时,此辅助代码存在 + (从内核版本 3.1 开始)。 diff --git a/Documentation/zh_CN/arm64/memory.txt b/Documentation/zh_CN/arm64/memory.txt index 83b51931470..a5f6283829f 100644 --- a/Documentation/zh_CN/arm64/memory.txt +++ b/Documentation/zh_CN/arm64/memory.txt @@ -47,21 +47,21 @@ AArch64 Linux 内存布局: ----------------------------------------------------------------------- 0000000000000000 0000007fffffffff 512GB 用户空间 -ffffff8000000000 ffffffbbfffcffff ~240GB vmalloc +ffffff8000000000 ffffffbbfffeffff ~240GB vmalloc -ffffffbbfffd0000 ffffffbcfffdffff 64KB [防护页] +ffffffbbffff0000 ffffffbbffffffff 64KB [防护页] -ffffffbbfffe0000 ffffffbcfffeffff 64KB PCI I/O 空间 +ffffffbc00000000 ffffffbdffffffff 8GB vmemmap -ffffffbbffff0000 ffffffbcffffffff 64KB [防护页] +ffffffbe00000000 ffffffbffbbfffff ~8GB [防护页,未来用于 vmmemap] -ffffffbc00000000 ffffffbdffffffff 8GB vmemmap +ffffffbffbe00000 ffffffbffbe0ffff 64KB PCI I/O 空间 -ffffffbe00000000 ffffffbffbffffff ~8GB [防护页,未来用于 vmmemap] +ffffffbbffff0000 ffffffbcffffffff ~2MB [防护页] ffffffbffc000000 ffffffbfffffffff 64MB 模块 -ffffffc000000000 ffffffffffffffff 256GB 内存空间 +ffffffc000000000 ffffffffffffffff 256GB 内核逻辑内存映射 4KB 页大小的转换表查找: |