diff options
Diffstat (limited to 'Documentation')
65 files changed, 1707 insertions, 265 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 40ac7759c3b..6e9c4050a41 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -14,6 +14,7 @@ Following translations are available on the WWW: - this file. ABI/ - info on kernel <-> userspace ABI and relative interface stability. + BUG-HUNTING - brute force method of doing binary search of patches to find bug. Changes @@ -66,6 +67,8 @@ VGA-softcursor.txt - how to change your VGA cursor from a blinking underscore. accounting/ - documentation on accounting and taskstats. +acpi/ + - info on ACPI-specific hooks in the kernel. aoe/ - description of AoE (ATA over Ethernet) along with config examples. applying-patches.txt @@ -126,18 +129,16 @@ devices.txt - plain ASCII listing of all the nodes in /dev/ with major minor #'s. digiepca.txt - info on Digi Intl. {PC,PCI,EISA}Xx and Xem series cards. -dnotify.txt - - info about directory notification in Linux. dontdiff - file containing a list of files that should never be diff'ed. driver-model/ - directory with info about Linux driver model. -drivers/ - - directory with driver documentation (currently only EDAC). dvb/ - info on Linux Digital Video Broadcast (DVB) subsystem. early-userspace/ - info about initramfs, klibc, and userspace early during boot. +edac.txt + - information on EDAC - Error Detection And Correction eisa.txt - info on EISA bus support. exception.txt @@ -334,20 +335,8 @@ rtc.txt - notes on how to use the Real Time Clock (aka CMOS clock) driver. s390/ - directory with info on using Linux on the IBM S390. -sched-arch.txt - - CPU Scheduler implementation hints for architecture specific code. -sched-coding.txt - - reference for various scheduler-related methods in the O(1) scheduler. -sched-design.txt - - goals, design and implementation of the Linux O(1) scheduler. -sched-design-CFS.txt - - goals, design and implementation of the Complete Fair Scheduler. -sched-domains.txt - - information on scheduling domains. -sched-nice-design.txt - - How and why the scheduler's nice levels are implemented. -sched-stats.txt - - information on schedstats (Linux Scheduler Statistics). +scheduler/ + - directory with info on the scheduler. scsi/ - directory with info on Linux scsi support. serial/ @@ -360,8 +349,6 @@ sgi-visws.txt - short blurb on the SGI Visual Workstations. sh/ - directory with info on porting Linux to a new architecture. -sharedsubtree.txt - - a description of shared subtrees for namespaces. smart-config.txt - description of the Smart Config makefile feature. sony-laptop.txt diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats new file mode 100644 index 00000000000..99233902e09 --- /dev/null +++ b/Documentation/ABI/testing/procfs-diskstats @@ -0,0 +1,22 @@ +What: /proc/diskstats +Date: February 2008 +Contact: Jerome Marchand <jmarchan@redhat.com> +Description: + The /proc/diskstats file displays the I/O statistics + of block devices. Each line contains the following 14 + fields: + 1 - major number + 2 - minor mumber + 3 - device name + 4 - reads completed succesfully + 5 - reads merged + 6 - sectors read + 7 - time spent reading (ms) + 8 - writes completed + 9 - writes merged + 10 - sectors written + 11 - time spent writing (ms) + 12 - I/Os currently in progress + 13 - time spent doing I/Os (ms) + 14 - weighted time spent doing I/Os (ms) + For more details refer to Documentation/iostats.txt diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block new file mode 100644 index 00000000000..4bd9ea53912 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block @@ -0,0 +1,28 @@ +What: /sys/block/<disk>/stat +Date: February 2008 +Contact: Jerome Marchand <jmarchan@redhat.com> +Description: + The /sys/block/<disk>/stat files displays the I/O + statistics of disk <disk>. They contain 11 fields: + 1 - reads completed succesfully + 2 - reads merged + 3 - sectors read + 4 - time spent reading (ms) + 5 - writes completed + 6 - writes merged + 7 - sectors written + 8 - time spent writing (ms) + 9 - I/Os currently in progress + 10 - time spent doing I/Os (ms) + 11 - weighted time spent doing I/Os (ms) + For more details refer Documentation/iostats.txt + + +What: /sys/block/<disk>/<part>/stat +Date: February 2008 +Contact: Jerome Marchand <jmarchan@redhat.com> +Description: + The /sys/block/<disk>/<part>/stat files display the + I/O statistics of partition <part>. The format is the + same as the above-written /sys/block/<disk>/stat + format. diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi new file mode 100644 index 00000000000..9470ed9afcc --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-acpi @@ -0,0 +1,99 @@ +What: /sys/firmware/acpi/interrupts/ +Date: February 2008 +Contact: Len Brown <lenb@kernel.org> +Description: + All ACPI interrupts are handled via a single IRQ, + the System Control Interrupt (SCI), which appears + as "acpi" in /proc/interrupts. + + However, one of the main functions of ACPI is to make + the platform understand random hardware without + special driver support. So while the SCI handles a few + well known (fixed feature) interrupts sources, such + as the power button, it can also handle a variable + number of a "General Purpose Events" (GPE). + + A GPE vectors to a specified handler in AML, which + can do a anything the BIOS writer wants from + OS context. GPE 0x12, for example, would vector + to a level or edge handler called _L12 or _E12. + The handler may do its business and return. + Or the handler may send send a Notify event + to a Linux device driver registered on an ACPI device, + such as a battery, or a processor. + + To figure out where all the SCI's are coming from, + /sys/firmware/acpi/interrupts contains a file listing + every possible source, and the count of how many + times it has triggered. + + $ cd /sys/firmware/acpi/interrupts + $ grep . * + error:0 + ff_gbl_lock:0 + ff_pmtimer:0 + ff_pwr_btn:0 + ff_rt_clk:0 + ff_slp_btn:0 + gpe00:0 + gpe01:0 + gpe02:0 + gpe03:0 + gpe04:0 + gpe05:0 + gpe06:0 + gpe07:0 + gpe08:0 + gpe09:174 + gpe0A:0 + gpe0B:0 + gpe0C:0 + gpe0D:0 + gpe0E:0 + gpe0F:0 + gpe10:0 + gpe11:60 + gpe12:0 + gpe13:0 + gpe14:0 + gpe15:0 + gpe16:0 + gpe17:0 + gpe18:0 + gpe19:7 + gpe1A:0 + gpe1B:0 + gpe1C:0 + gpe1D:0 + gpe1E:0 + gpe1F:0 + gpe_all:241 + sci:241 + + sci - The total number of times the ACPI SCI + has claimed an interrupt. + + gpe_all - count of SCI caused by GPEs. + + gpeXX - count for individual GPE source + + ff_gbl_lock - Global Lock + + ff_pmtimer - PM Timer + + ff_pwr_btn - Power Button + + ff_rt_clk - Real Time Clock + + ff_slp_btn - Sleep Button + + error - an interrupt that can't be accounted for above. + + Root has permission to clear any of these counters. Eg. + # echo 0 > gpe11 + + All counters can be cleared by clearing the total "sci": + # echo 0 > sci + + None of these counters has an effect on the function + of the system, they are simply statistics. diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/testing/sysfs-kernel-uids index 648d65dbc0e..28f14695a85 100644 --- a/Documentation/ABI/testing/sysfs-kernel-uids +++ b/Documentation/ABI/testing/sysfs-kernel-uids @@ -11,4 +11,4 @@ Description: example would be, if User A has shares = 1024 and user B has shares = 2048, User B will get twice the CPU bandwidth user A will. For more details refer - Documentation/sched-design-CFS.txt + Documentation/scheduler/sched-design-CFS.txt diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl index 4215f69ce7e..3a882d9a90a 100644 --- a/Documentation/DocBook/genericirq.tmpl +++ b/Documentation/DocBook/genericirq.tmpl @@ -172,7 +172,7 @@ <listitem><para>Chiplevel hardware encapsulation</para></listitem> </orderedlist> </para> - <sect1> + <sect1 id="Interrupt_control_flow"> <title>Interrupt control flow</title> <para> Each interrupt is described by an interrupt descriptor structure @@ -190,7 +190,7 @@ referenced by the assigned chip descriptor structure. </para> </sect1> - <sect1> + <sect1 id="Highlevel_Driver_API"> <title>Highlevel Driver API</title> <para> The highlevel Driver API consists of following functions: @@ -210,7 +210,7 @@ See the autogenerated function documentation for details. </para> </sect1> - <sect1> + <sect1 id="Highlevel_IRQ_flow_handlers"> <title>Highlevel IRQ flow handlers</title> <para> The generic layer provides a set of pre-defined irq-flow methods: @@ -224,9 +224,9 @@ specific) are assigned to specific interrupts by the architecture either during bootup or during device initialization. </para> - <sect2> + <sect2 id="Default_flow_implementations"> <title>Default flow implementations</title> - <sect3> + <sect3 id="Helper_functions"> <title>Helper functions</title> <para> The helper functions call the chip primitives and @@ -267,9 +267,9 @@ noop(irq) </para> </sect3> </sect2> - <sect2> + <sect2 id="Default_flow_handler_implementations"> <title>Default flow handler implementations</title> - <sect3> + <sect3 id="Default_Level_IRQ_flow_handler"> <title>Default Level IRQ flow handler</title> <para> handle_level_irq provides a generic implementation @@ -284,7 +284,7 @@ desc->chip->end(); </programlisting> </para> </sect3> - <sect3> + <sect3 id="Default_Edge_IRQ_flow_handler"> <title>Default Edge IRQ flow handler</title> <para> handle_edge_irq provides a generic implementation @@ -311,7 +311,7 @@ desc->chip->end(); </programlisting> </para> </sect3> - <sect3> + <sect3 id="Default_simple_IRQ_flow_handler"> <title>Default simple IRQ flow handler</title> <para> handle_simple_irq provides a generic implementation @@ -328,7 +328,7 @@ handle_IRQ_event(desc->action); </programlisting> </para> </sect3> - <sect3> + <sect3 id="Default_per_CPU_flow_handler"> <title>Default per CPU flow handler</title> <para> handle_percpu_irq provides a generic implementation @@ -349,7 +349,7 @@ desc->chip->end(); </para> </sect3> </sect2> - <sect2> + <sect2 id="Quirks_and_optimizations"> <title>Quirks and optimizations</title> <para> The generic functions are intended for 'clean' architectures and chips, @@ -358,7 +358,7 @@ desc->chip->end(); overriding the highlevel irq-flow handler. </para> </sect2> - <sect2> + <sect2 id="Delayed_interrupt_disable"> <title>Delayed interrupt disable</title> <para> This per interrupt selectable feature, which was introduced by Russell @@ -380,7 +380,7 @@ desc->chip->end(); </para> </sect2> </sect1> - <sect1> + <sect1 id="Chiplevel_hardware_encapsulation"> <title>Chiplevel hardware encapsulation</title> <para> The chip level hardware descriptor structure irq_chip diff --git a/Documentation/DocBook/lsm.tmpl b/Documentation/DocBook/lsm.tmpl index f6382219587..fe7664ce966 100644 --- a/Documentation/DocBook/lsm.tmpl +++ b/Documentation/DocBook/lsm.tmpl @@ -33,7 +33,7 @@ </authorgroup> </articleinfo> -<sect1><title>Introduction</title> +<sect1 id="Introduction"><title>Introduction</title> <para> In March 2001, the National Security Agency (NSA) gave a presentation diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl index 957cf5c2683..8e145857fc9 100644 --- a/Documentation/DocBook/mtdnand.tmpl +++ b/Documentation/DocBook/mtdnand.tmpl @@ -80,7 +80,7 @@ struct member has a short description which is marked with an [XXX] identifier. The following chapters explain the meaning of those identifiers. </para> - <sect1> + <sect1 id="Function_identifiers_XXX"> <title>Function identifiers [XXX]</title> <para> The functions are marked with [XXX] identifiers in the short @@ -115,7 +115,7 @@ </para></listitem> </itemizedlist> </sect1> - <sect1> + <sect1 id="Struct_member_identifiers_XXX"> <title>Struct member identifiers [XXX]</title> <para> The struct members are marked with [XXX] identifiers in the @@ -159,7 +159,7 @@ basic functions and fill out some really board dependent members in the nand chip description structure. </para> - <sect1> + <sect1 id="Basic_defines"> <title>Basic defines</title> <para> At least you have to provide a mtd structure and @@ -185,7 +185,7 @@ static struct nand_chip board_chip; static unsigned long baseaddr; </programlisting> </sect1> - <sect1> + <sect1 id="Partition_defines"> <title>Partition defines</title> <para> If you want to divide your device into partitions, then @@ -204,7 +204,7 @@ static struct mtd_partition partition_info[] = { }; </programlisting> </sect1> - <sect1> + <sect1 id="Hardware_control_functions"> <title>Hardware control function</title> <para> The hardware control function provides access to the @@ -246,7 +246,7 @@ static void board_hwcontrol(struct mtd_info *mtd, int cmd) } </programlisting> </sect1> - <sect1> + <sect1 id="Device_ready_function"> <title>Device ready function</title> <para> If the hardware interface has the ready busy pin of the NAND chip connected to a @@ -257,7 +257,7 @@ static void board_hwcontrol(struct mtd_info *mtd, int cmd) the function must not be defined and the function pointer this->dev_ready is set to NULL. </para> </sect1> - <sect1> + <sect1 id="Init_function"> <title>Init function</title> <para> The init function allocates memory and sets up all the board @@ -325,7 +325,7 @@ out: module_init(board_init); </programlisting> </sect1> - <sect1> + <sect1 id="Exit_function"> <title>Exit function</title> <para> The exit function is only neccecary if the driver is @@ -359,7 +359,7 @@ module_exit(board_cleanup); driver. For a list of functions which can be overridden by the board driver see the documentation of the nand_chip structure. </para> - <sect1> + <sect1 id="Multiple_chip_control"> <title>Multiple chip control</title> <para> The nand driver can control chip arrays. Therefor the @@ -419,9 +419,9 @@ static void board_select_chip (struct mtd_info *mtd, int chip) } </programlisting> </sect1> - <sect1> + <sect1 id="Hardware_ECC_support"> <title>Hardware ECC support</title> - <sect2> + <sect2 id="Functions_and_constants"> <title>Functions and constants</title> <para> The nand driver supports three different types of @@ -475,7 +475,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) </itemizedlist> </para> </sect2> - <sect2> + <sect2 id="Hardware_ECC_with_syndrome_calculation"> <title>Hardware ECC with syndrome calculation</title> <para> Many hardware ECC implementations provide Reed-Solomon @@ -500,7 +500,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) </para> </sect2> </sect1> - <sect1> + <sect1 id="Bad_Block_table_support"> <title>Bad block table support</title> <para> Most NAND chips mark the bad blocks at a defined @@ -552,7 +552,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) allows faster access than always checking the bad block information on the flash chip itself. </para> - <sect2> + <sect2 id="Flash_based_tables"> <title>Flash based tables</title> <para> It may be desired or neccecary to keep a bad block table in FLASH. @@ -587,7 +587,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) </itemizedlist> </para> </sect2> - <sect2> + <sect2 id="User_defined_tables"> <title>User defined tables</title> <para> User defined tables are created by filling out a @@ -676,7 +676,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) </para> </sect2> </sect1> - <sect1> + <sect1 id="Spare_area_placement"> <title>Spare area (auto)placement</title> <para> The nand driver implements different possibilities for @@ -730,7 +730,7 @@ struct nand_oobinfo { </para></listitem> </itemizedlist> </para> - <sect2> + <sect2 id="Placement_defined_by_fs_driver"> <title>Placement defined by fs driver</title> <para> The calling function provides a pointer to a nand_oobinfo @@ -760,7 +760,7 @@ struct nand_oobinfo { done according to the given scheme in the nand_oobinfo structure. </para> </sect2> - <sect2> + <sect2 id="Automatic_placement"> <title>Automatic placement</title> <para> Automatic placement uses the built in defaults to place the @@ -774,7 +774,7 @@ struct nand_oobinfo { done according to the default builtin scheme. </para> </sect2> - <sect2> + <sect2 id="User_space_placement_selection"> <title>User space placement selection</title> <para> All non ecc functions like mtd->read and mtd->write use an internal @@ -789,9 +789,9 @@ struct nand_oobinfo { </para> </sect2> </sect1> - <sect1> + <sect1 id="Spare_area_autoplacement_default"> <title>Spare area autoplacement default schemes</title> - <sect2> + <sect2 id="pagesize_256"> <title>256 byte pagesize</title> <informaltable><tgroup cols="3"><tbody> <row> @@ -843,7 +843,7 @@ pages this byte is reserved</entry> </row> </tbody></tgroup></informaltable> </sect2> - <sect2> + <sect2 id="pagesize_512"> <title>512 byte pagesize</title> <informaltable><tgroup cols="3"><tbody> <row> @@ -906,7 +906,7 @@ in this page</entry> </row> </tbody></tgroup></informaltable> </sect2> - <sect2> + <sect2 id="pagesize_2048"> <title>2048 byte pagesize</title> <informaltable><tgroup cols="3"><tbody> <row> @@ -1126,9 +1126,9 @@ in this page</entry> <para> This chapter describes the constants which might be relevant for a driver developer. </para> - <sect1> + <sect1 id="Chip_option_constants"> <title>Chip option constants</title> - <sect2> + <sect2 id="Constants_for_chip_id_table"> <title>Constants for chip id table</title> <para> These constants are defined in nand.h. They are ored together to describe @@ -1153,7 +1153,7 @@ in this page</entry> </programlisting> </para> </sect2> - <sect2> + <sect2 id="Constants_for_runtime_options"> <title>Constants for runtime options</title> <para> These constants are defined in nand.h. They are ored together to describe @@ -1171,7 +1171,7 @@ in this page</entry> </sect2> </sect1> - <sect1> + <sect1 id="EEC_selection_constants"> <title>ECC selection constants</title> <para> Use these constants to select the ECC algorithm. @@ -1192,7 +1192,7 @@ in this page</entry> </para> </sect1> - <sect1> + <sect1 id="Hardware_control_related_constants"> <title>Hardware control related constants</title> <para> These constants describe the requested hardware access function when @@ -1218,7 +1218,7 @@ in this page</entry> </para> </sect1> - <sect1> + <sect1 id="Bad_block_table_constants"> <title>Bad block table related constants</title> <para> These constants describe the options used for bad block diff --git a/Documentation/DocBook/procfs-guide.tmpl b/Documentation/DocBook/procfs-guide.tmpl index 2de84dc195a..1fd6a1ec759 100644 --- a/Documentation/DocBook/procfs-guide.tmpl +++ b/Documentation/DocBook/procfs-guide.tmpl @@ -85,7 +85,7 @@ - <preface> + <preface id="Preface"> <title>Preface</title> <para> @@ -230,7 +230,7 @@ - <sect1> + <sect1 id="Creating_a_symlink"> <title>Creating a symlink</title> <funcsynopsis> @@ -254,7 +254,7 @@ </para> </sect1> - <sect1> + <sect1 id="Creating_a_directory"> <title>Creating a directory</title> <funcsynopsis> @@ -274,7 +274,7 @@ - <sect1> + <sect1 id="Removing_an_entry"> <title>Removing an entry</title> <funcsynopsis> @@ -340,7 +340,7 @@ entry->write_proc = write_proc_foo; - <sect1> + <sect1 id="Reading_data"> <title>Reading data</title> <para> @@ -448,7 +448,7 @@ entry->write_proc = write_proc_foo; - <sect1> + <sect1 id="Writing_data"> <title>Writing data</title> <para> @@ -579,7 +579,7 @@ int foo_read_func(char *page, char **start, off_t off, - <sect1> + <sect1 id="Modules"> <title>Modules</title> <para> @@ -599,7 +599,7 @@ entry->owner = THIS_MODULE; - <sect1> + <sect1 id="Mode_and_ownership"> <title>Mode and ownership</title> <para> diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl index a8b88c47e80..b9e143e28c6 100644 --- a/Documentation/DocBook/rapidio.tmpl +++ b/Documentation/DocBook/rapidio.tmpl @@ -77,11 +77,11 @@ <chapter id="bugs"> <title>Known Bugs and Limitations</title> - <sect1> + <sect1 id="known_bugs"> <title>Bugs</title> <para>None. ;)</para> </sect1> - <sect1> + <sect1 id="Limitations"> <title>Limitations</title> <para> <orderedlist> @@ -100,7 +100,7 @@ on devices, request/map memory region resources, and manage mailboxes/doorbells. </para> - <sect1> + <sect1 id="Functions"> <title>Functions</title> !Iinclude/linux/rio_drv.h !Edrivers/rapidio/rio-driver.c @@ -116,23 +116,23 @@ subsystem. </para> - <sect1><title>Structures</title> + <sect1 id="Structures"><title>Structures</title> !Iinclude/linux/rio.h </sect1> - <sect1><title>Enumeration and Discovery</title> + <sect1 id="Enumeration_and_Discovery"><title>Enumeration and Discovery</title> !Idrivers/rapidio/rio-scan.c </sect1> - <sect1><title>Driver functionality</title> + <sect1 id="Driver_functionality"><title>Driver functionality</title> !Idrivers/rapidio/rio.c !Idrivers/rapidio/rio-access.c </sect1> - <sect1><title>Device model support</title> + <sect1 id="Device_model_support"><title>Device model support</title> !Idrivers/rapidio/rio-driver.c </sect1> - <sect1><title>Sysfs support</title> + <sect1 id="Sysfs_support"><title>Sysfs support</title> !Idrivers/rapidio/rio-sysfs.c </sect1> - <sect1><title>PPC32 support</title> + <sect1 id="PPC32_support"><title>PPC32 support</title> !Iarch/powerpc/kernel/rio.c !Earch/powerpc/sysdev/fsl_rio.c !Iarch/powerpc/sysdev/fsl_rio.c diff --git a/Documentation/DocBook/scsi.tmpl b/Documentation/DocBook/scsi.tmpl index f299ab182bb..10a150ae2a7 100644 --- a/Documentation/DocBook/scsi.tmpl +++ b/Documentation/DocBook/scsi.tmpl @@ -12,7 +12,7 @@ <surname>Bottomley</surname> <affiliation> <address> - <email>James.Bottomley@steeleye.com</email> + <email>James.Bottomley@hansenpartnership.com</email> </address> </affiliation> </author> diff --git a/Documentation/DocBook/videobook.tmpl b/Documentation/DocBook/videobook.tmpl index b3d93ee2769..89817795e66 100644 --- a/Documentation/DocBook/videobook.tmpl +++ b/Documentation/DocBook/videobook.tmpl @@ -170,7 +170,7 @@ int __init myradio_init(struct video_init *v) <para> The types available are </para> - <table frame="all"><title>Device Types</title> + <table frame="all" id="Device_Types"><title>Device Types</title> <tgroup cols="3" align="left"> <tbody> <row> @@ -291,7 +291,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) allows the applications to find out what sort of a card they have found and to figure out what they want to do about it. The fields in the structure are </para> - <table frame="all"><title>struct video_capability fields</title> + <table frame="all" id="video_capability_fields"><title>struct video_capability fields</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -365,7 +365,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) <para> The video_tuner structure has the following fields </para> - <table frame="all"><title>struct video_tuner fields</title> + <table frame="all" id="video_tuner_fields"><title>struct video_tuner fields</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -398,7 +398,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) </tgroup> </table> - <table frame="all"><title>struct video_tuner flags</title> + <table frame="all" id="video_tuner_flags"><title>struct video_tuner flags</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -421,7 +421,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) </tgroup> </table> - <table frame="all"><title>struct video_tuner modes</title> + <table frame="all" id="video_tuner_modes"><title>struct video_tuner modes</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -572,7 +572,7 @@ static int current_volume=0; <para> Then we fill in the video_audio structure. This has the following format </para> - <table frame="all"><title>struct video_audio fields</title> + <table frame="all" id="video_audio_fields"><title>struct video_audio fields</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -607,7 +607,7 @@ static int current_volume=0; </tgroup> </table> - <table frame="all"><title>struct video_audio flags</title> + <table frame="all" id="video_audio_flags"><title>struct video_audio flags</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -625,7 +625,7 @@ static int current_volume=0; </tgroup> </table> - <table frame="all"><title>struct video_audio modes</title> + <table frame="all" id="video_audio_modes"><title>struct video_audio modes</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -775,7 +775,7 @@ module_exit(cleanup); </para> </sect1> </chapter> - <chapter> + <chapter id="Video_Capture_Devices"> <title>Video Capture Devices</title> <sect1 id="introvid"> <title>Video Capture Device Types</title> @@ -855,7 +855,7 @@ static struct video_device my_camera We use the extra video capability flags that did not apply to the radio interface. The video related flags are </para> - <table frame="all"><title>Capture Capabilities</title> + <table frame="all" id="Capture_Capabilities"><title>Capture Capabilities</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1195,7 +1195,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) inputs to the video card). Our example card has a single camera input. The fields in the structure are </para> - <table frame="all"><title>struct video_channel fields</title> + <table frame="all" id="video_channel_fields"><title>struct video_channel fields</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1218,7 +1218,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) </tbody> </tgroup> </table> - <table frame="all"><title>struct video_channel flags</title> + <table frame="all" id="video_channel_flags"><title>struct video_channel flags</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1229,7 +1229,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) </tbody> </tgroup> </table> - <table frame="all"><title>struct video_channel types</title> + <table frame="all" id="video_channel_types"><title>struct video_channel types</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1242,7 +1242,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) </tbody> </tgroup> </table> - <table frame="all"><title>struct video_channel norms</title> + <table frame="all" id="video_channel_norms"><title>struct video_channel norms</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1328,7 +1328,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) for every other pixel in the image. The other common formats the interface defines are </para> - <table frame="all"><title>Framebuffer Encodings</title> + <table frame="all" id="Framebuffer_Encodings"><title>Framebuffer Encodings</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1466,7 +1466,7 @@ static struct video_buffer capture_fb; display. The video_window structure is used to describe the way the image should be displayed. </para> - <table frame="all"><title>struct video_window fields</title> + <table frame="all" id="video_window_fields"><title>struct video_window fields</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1503,7 +1503,7 @@ static struct video_buffer capture_fb; <para> Each clip is a struct video_clip which has the following fields </para> - <table frame="all"><title>video_clip fields</title> + <table frame="all" id="video_clip_fields"><title>video_clip fields</title> <tgroup cols="2" align="left"> <tbody> <row> diff --git a/Documentation/DocBook/z8530book.tmpl b/Documentation/DocBook/z8530book.tmpl index a507876447a..42c75ba71ba 100644 --- a/Documentation/DocBook/z8530book.tmpl +++ b/Documentation/DocBook/z8530book.tmpl @@ -77,7 +77,7 @@ </para> </chapter> - <chapter> + <chapter id="Driver_Modes"> <title>Driver Modes</title> <para> The Z85230 driver layer can drive Z8530, Z85C30 and Z85230 devices @@ -108,7 +108,7 @@ </para> </chapter> - <chapter> + <chapter id="Using_the_Z85230_driver"> <title>Using the Z85230 driver</title> <para> The Z85230 driver provides the back end interface to your board. To @@ -174,7 +174,7 @@ </para> </chapter> - <chapter> + <chapter id="Attaching_Network_Interfaces"> <title>Attaching Network Interfaces</title> <para> If you wish to use the network interface facilities of the driver, @@ -216,7 +216,7 @@ </para> </chapter> - <chapter> + <chapter id="Configuring_And_Activating_The_Port"> <title>Configuring And Activating The Port</title> <para> The Z85230 driver provides helper functions and tables to load the @@ -300,7 +300,7 @@ </para> </chapter> - <chapter> + <chapter id="Network_Layer_Functions"> <title>Network Layer Functions</title> <para> The Z8530 layer provides functions to queue packets for @@ -327,7 +327,7 @@ </para> </chapter> - <chapter> + <chapter id="Porting_The_Z8530_Driver"> <title>Porting The Z8530 Driver</title> <para> The Z8530 driver is written to be portable. In DMA mode it makes diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index d6cb1a86fd6..40121b5cca1 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -168,7 +168,7 @@ int get_family_id(int sd) char buf[256]; } ans; - int id, rc; + int id = 0, rc; struct nlattr *na; int rep_len; @@ -209,7 +209,7 @@ void print_delayacct(struct taskstats *t) void task_context_switch_counts(struct taskstats *t) { printf("\n\nTask %15s%15s\n" - " %15lu%15lu\n", + " %15llu%15llu\n", "voluntary", "nonvoluntary", t->nvcsw, t->nivcsw); } @@ -399,7 +399,7 @@ int main(int argc, char *argv[]) goto done; } - PRINTF("nlmsghdr size=%d, nlmsg_len=%d, rep_len=%d\n", + PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n", sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len); diff --git a/Documentation/acpi/dsdt-override.txt b/Documentation/acpi/dsdt-override.txt new file mode 100644 index 00000000000..5008f256a2d --- /dev/null +++ b/Documentation/acpi/dsdt-override.txt @@ -0,0 +1,15 @@ +Linux supports two methods of overriding the BIOS DSDT: + +CONFIG_ACPI_CUSTOM_DSDT builds the image into the kernel. + +CONFIG_ACPI_CUSTOM_DSDT_INITRD adds the image to the initrd. + +When to use these methods is described in detail on the +Linux/ACPI home page: +http://www.lesswatts.org/projects/acpi/overridingDSDT.php + +Note that if both options are used, the DSDT supplied +by the INITRD method takes precedence. + +Documentation/initramfs-add-dsdt.sh is provided for convenience +for use with the CONFIG_ACPI_CUSTOM_DSDT_INITRD method. diff --git a/Documentation/acpi/initramfs-add-dsdt.sh b/Documentation/acpi/initramfs-add-dsdt.sh new file mode 100755 index 00000000000..17ef6e838e1 --- /dev/null +++ b/Documentation/acpi/initramfs-add-dsdt.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Adds a DSDT file to the initrd (if it's an initramfs) +# first argument is the name of archive +# second argument is the name of the file to add +# The file will be copied as /DSDT.aml + +# 20060126: fix "Premature end of file" with some old cpio (Roland Robic) +# 20060205: this time it should really work + +# check the arguments +if [ $# -ne 2 ]; then + program_name=$(basename $0) + echo "\ +$program_name: too few arguments +Usage: $program_name initrd-name.img DSDT-to-add.aml +Adds a DSDT file to an initrd (in initramfs format) + + initrd-name.img: filename of the initrd in initramfs format + DSDT-to-add.aml: filename of the DSDT file to add + " 1>&2 + exit 1 +fi + +# we should check it's an initramfs + +tempcpio=$(mktemp -d) +# cleanup on exit, hangup, interrupt, quit, termination +trap 'rm -rf $tempcpio' 0 1 2 3 15 + +# extract the archive +gunzip -c "$1" > "$tempcpio"/initramfs.cpio || exit 1 + +# copy the DSDT file at the root of the directory so that we can call it "/DSDT.aml" +cp -f "$2" "$tempcpio"/DSDT.aml + +# add the file +cd "$tempcpio" +(echo DSDT.aml | cpio --quiet -H newc -o -A -O "$tempcpio"/initramfs.cpio) || exit 1 +cd "$OLDPWD" + +# re-compress the archive +gzip -c "$tempcpio"/initramfs.cpio > "$1" + diff --git a/Documentation/acpi/method-tracing.txt b/Documentation/acpi/method-tracing.txt new file mode 100644 index 00000000000..f6efb1ea559 --- /dev/null +++ b/Documentation/acpi/method-tracing.txt @@ -0,0 +1,26 @@ +/sys/module/acpi/parameters/: + +trace_method_name + The AML method name that the user wants to trace + +trace_debug_layer + The temporary debug_layer used when tracing the method. + Using 0xffffffff by default if it is 0. + +trace_debug_level + The temporary debug_level used when tracing the method. + Using 0x00ffffff by default if it is 0. + +trace_state + The status of the tracing feature. + + "enabled" means this feature is enabled + and the AML method is traced every time it's executed. + + "1" means this feature is enabled and the AML method + will only be traced during the next execution. + + "disabled" means this feature is disabled. + Users can enable/disable this debug tracing feature by + "echo string > /sys/module/acpi/parameters/trace_state". + "string" should be one of "enable", "disable" and "1". diff --git a/Documentation/aoe/mkdevs.sh b/Documentation/aoe/mkdevs.sh index 97374aacacb..44c0ab70243 100644 --- a/Documentation/aoe/mkdevs.sh +++ b/Documentation/aoe/mkdevs.sh @@ -29,6 +29,8 @@ rm -f $dir/interfaces mknod -m 0200 $dir/interfaces c $MAJOR 4 rm -f $dir/revalidate mknod -m 0200 $dir/revalidate c $MAJOR 5 +rm -f $dir/flush +mknod -m 0200 $dir/flush c $MAJOR 6 export n_partitions mkshelf=`echo $0 | sed 's!mkdevs!mkshelf!'` diff --git a/Documentation/aoe/udev-install.sh b/Documentation/aoe/udev-install.sh index 6449911c6a7..15e86f58c03 100644 --- a/Documentation/aoe/udev-install.sh +++ b/Documentation/aoe/udev-install.sh @@ -23,7 +23,10 @@ fi # /etc/udev/rules.d # rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`" -if test -z "$rules_d" || test ! -d "$rules_d"; then +if test -z "$rules_d" ; then + rules_d=/etc/udev/rules.d +fi +if test ! -d "$rules_d"; then echo "$me Error: cannot find udev rules directory" 1>&2 exit 1 fi diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt index a7ed1dc4f33..8686e789542 100644 --- a/Documentation/aoe/udev.txt +++ b/Documentation/aoe/udev.txt @@ -1,6 +1,7 @@ # These rules tell udev what device nodes to create for aoe support. -# They may be installed along the following lines (adjusted to what -# you see on your system). +# They may be installed along the following lines. Check the section +# 8 udev manpage to see whether your udev supports SUBSYSTEM, and +# whether it uses one or two equal signs for SUBSYSTEM and KERNEL. # # ecashin@makki ~$ su # Password: @@ -15,10 +16,11 @@ # # aoe char devices -SUBSYSTEM="aoe", KERNEL="discover", NAME="etherd/%k", GROUP="disk", MODE="0220" -SUBSYSTEM="aoe", KERNEL="err", NAME="etherd/%k", GROUP="disk", MODE="0440" -SUBSYSTEM="aoe", KERNEL="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220" -SUBSYSTEM="aoe", KERNEL="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM=="aoe", KERNEL=="discover", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM=="aoe", KERNEL=="err", NAME="etherd/%k", GROUP="disk", MODE="0440" +SUBSYSTEM=="aoe", KERNEL=="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM=="aoe", KERNEL=="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM=="aoe", KERNEL=="flush", NAME="etherd/%k", GROUP="disk", MODE="0220" # aoe block devices -KERNEL="etherd*", NAME="%k", GROUP="disk" +KERNEL=="etherd*", NAME="%k", GROUP="disk" diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt index 98a26f81fa7..42d7c4cb39c 100644 --- a/Documentation/cgroups.txt +++ b/Documentation/cgroups.txt @@ -456,7 +456,7 @@ methods are create/destroy. Any others that are null are presumed to be successful no-ops. struct cgroup_subsys_state *create(struct cgroup *cont) -LL=cgroup_mutex +(cgroup_mutex held by caller) Called to create a subsystem state object for a cgroup. The subsystem should allocate its subsystem state object for the passed @@ -471,14 +471,19 @@ it's the root of the hierarchy) and may be an appropriate place for initialization code. void destroy(struct cgroup *cont) -LL=cgroup_mutex +(cgroup_mutex held by caller) -The cgroup system is about to destroy the passed cgroup; the -subsystem should do any necessary cleanup +The cgroup system is about to destroy the passed cgroup; the subsystem +should do any necessary cleanup and free its subsystem state +object. By the time this method is called, the cgroup has already been +unlinked from the file system and from the child list of its parent; +cgroup->parent is still valid. (Note - can also be called for a +newly-created cgroup if an error occurs after this subsystem's +create() method has been called for the new cgroup). int can_attach(struct cgroup_subsys *ss, struct cgroup *cont, struct task_struct *task) -LL=cgroup_mutex +(cgroup_mutex held by caller) Called prior to moving a task into a cgroup; if the subsystem returns an error, this will abort the attach operation. If a NULL @@ -489,25 +494,20 @@ remain valid while the caller holds cgroup_mutex. void attach(struct cgroup_subsys *ss, struct cgroup *cont, struct cgroup *old_cont, struct task_struct *task) -LL=cgroup_mutex - Called after the task has been attached to the cgroup, to allow any post-attachment activity that requires memory allocations or blocking. void fork(struct cgroup_subsy *ss, struct task_struct *task) -LL=callback_mutex, maybe read_lock(tasklist_lock) Called when a task is forked into a cgroup. Also called during registration for all existing tasks. void exit(struct cgroup_subsys *ss, struct task_struct *task) -LL=callback_mutex Called during task exit int populate(struct cgroup_subsys *ss, struct cgroup *cont) -LL=none Called after creation of a cgroup to allow a subsystem to populate the cgroup directory with file entries. The subsystem should make @@ -524,7 +524,7 @@ example in cpusets, no task may attach before 'cpus' and 'mems' are set up. void bind(struct cgroup_subsys *ss, struct cgroup *root) -LL=callback_mutex +(cgroup_mutex held by caller) Called when a cgroup subsystem is rebound to a different hierarchy and root cgroup. Currently this will only involve movement between diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt new file mode 100644 index 00000000000..b5bbea92a61 --- /dev/null +++ b/Documentation/controllers/memory.txt @@ -0,0 +1,279 @@ +Memory Controller + +Salient features + +a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages +b. The infrastructure allows easy addition of other types of memory to control +c. Provides *zero overhead* for non memory controller users +d. Provides a double LRU: global memory pressure causes reclaim from the + global LRU; a cgroup on hitting a limit, reclaims from the per + cgroup LRU + +NOTE: Swap Cache (unmapped) is not accounted now. + +Benefits and Purpose of the memory controller + +The memory controller isolates the memory behaviour of a group of tasks +from the rest of the system. The article on LWN [12] mentions some probable +uses of the memory controller. The memory controller can be used to + +a. Isolate an application or a group of applications + Memory hungry applications can be isolated and limited to a smaller + amount of memory. +b. Create a cgroup with limited amount of memory, this can be used + as a good alternative to booting with mem=XXXX. +c. Virtualization solutions can control the amount of memory they want + to assign to a virtual machine instance. +d. A CD/DVD burner could control the amount of memory used by the + rest of the system to ensure that burning does not fail due to lack + of available memory. +e. There are several other use cases, find one or use the controller just + for fun (to learn and hack on the VM subsystem). + +1. History + +The memory controller has a long history. A request for comments for the memory +controller was posted by Balbir Singh [1]. At the time the RFC was posted +there were several implementations for memory control. The goal of the +RFC was to build consensus and agreement for the minimal features required +for memory control. The first RSS controller was posted by Balbir Singh[2] +in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the +RSS controller. At OLS, at the resource management BoF, everyone suggested +that we handle both page cache and RSS together. Another request was raised +to allow user space handling of OOM. The current memory controller is +at version 6; it combines both mapped (RSS) and unmapped Page +Cache Control [11]. + +2. Memory Control + +Memory is a unique resource in the sense that it is present in a limited +amount. If a task requires a lot of CPU processing, the task can spread +its processing over a period of hours, days, months or years, but with +memory, the same physical memory needs to be reused to accomplish the task. + +The memory controller implementation has been divided into phases. These +are: + +1. Memory controller +2. mlock(2) controller +3. Kernel user memory accounting and slab control +4. user mappings length controller + +The memory controller is the first controller developed. + +2.1. Design + +The core of the design is a counter called the res_counter. The res_counter +tracks the current memory usage and limit of the group of processes associated +with the controller. Each cgroup has a memory controller specific data +structure (mem_cgroup) associated with it. + +2.2. Accounting + + +--------------------+ + | mem_cgroup | + | (res_counter) | + +--------------------+ + / ^ \ + / | \ + +---------------+ | +---------------+ + | mm_struct | |.... | mm_struct | + | | | | | + +---------------+ | +---------------+ + | + + --------------+ + | + +---------------+ +------+--------+ + | page +----------> page_cgroup| + | | | | + +---------------+ +---------------+ + + (Figure 1: Hierarchy of Accounting) + + +Figure 1 shows the important aspects of the controller + +1. Accounting happens per cgroup +2. Each mm_struct knows about which cgroup it belongs to +3. Each page has a pointer to the page_cgroup, which in turn knows the + cgroup it belongs to + +The accounting is done as follows: mem_cgroup_charge() is invoked to setup +the necessary data structures and check if the cgroup that is being charged +is over its limit. If it is then reclaim is invoked on the cgroup. +More details can be found in the reclaim section of this document. +If everything goes well, a page meta-data-structure called page_cgroup is +allocated and associated with the page. This routine also adds the page to +the per cgroup LRU. + +2.2.1 Accounting details + +All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted. +RSS pages are accounted at the time of page_add_*_rmap() unless they've already +been accounted for earlier. A file page will be accounted for as Page Cache; +it's mapped into the page tables of a process, duplicate accounting is carefully +avoided. Page Cache pages are accounted at the time of add_to_page_cache(). +The corresponding routines that remove a page from the page tables or removes +a page from Page Cache is used to decrement the accounting counters of the +cgroup. + +2.3 Shared Page Accounting + +Shared pages are accounted on the basis of the first touch approach. The +cgroup that first touches a page is accounted for the page. The principle +behind this approach is that a cgroup that aggressively uses a shared +page will eventually get charged for it (once it is uncharged from +the cgroup that brought it in -- this will happen on memory pressure). + +2.4 Reclaim + +Each cgroup maintains a per cgroup LRU that consists of an active +and inactive list. When a cgroup goes over its limit, we first try +to reclaim memory from the cgroup so as to make space for the new +pages that the cgroup has touched. If the reclaim is unsuccessful, +an OOM routine is invoked to select and kill the bulkiest task in the +cgroup. + +The reclaim algorithm has not been modified for cgroups, except that +pages that are selected for reclaiming come from the per cgroup LRU +list. + +2. Locking + +The memory controller uses the following hierarchy + +1. zone->lru_lock is used for selecting pages to be isolated +2. mem->per_zone->lru_lock protects the per cgroup LRU (per zone) +3. lock_page_cgroup() is used to protect page->page_cgroup + +3. User Interface + +0. Configuration + +a. Enable CONFIG_CGROUPS +b. Enable CONFIG_RESOURCE_COUNTERS +c. Enable CONFIG_CGROUP_MEM_CONT + +1. Prepare the cgroups +# mkdir -p /cgroups +# mount -t cgroup none /cgroups -o memory + +2. Make the new group and move bash into it +# mkdir /cgroups/0 +# echo $$ > /cgroups/0/tasks + +Since now we're in the 0 cgroup, +We can alter the memory limit: +# echo -n 4M > /cgroups/0/memory.limit_in_bytes + +NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, +mega or gigabytes. + +# cat /cgroups/0/memory.limit_in_bytes +4194304 Bytes + +NOTE: The interface has now changed to display the usage in bytes +instead of pages + +We can check the usage: +# cat /cgroups/0/memory.usage_in_bytes +1216512 Bytes + +A successful write to this file does not guarantee a successful set of +this limit to the value written into the file. This can be due to a +number of factors, such as rounding up to page boundaries or the total +availability of memory on the system. The user is required to re-read +this file after a write to guarantee the value committed by the kernel. + +# echo -n 1 > memory.limit_in_bytes +# cat memory.limit_in_bytes +4096 Bytes + +The memory.failcnt field gives the number of times that the cgroup limit was +exceeded. + +The memory.stat file gives accounting information. Now, the number of +caches, RSS and Active pages/Inactive pages are shown. + +The memory.force_empty gives an interface to drop *all* charges by force. + +# echo -n 1 > memory.force_empty + +will drop all charges in cgroup. Currently, this is maintained for test. + +4. Testing + +Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11]. +Apart from that v6 has been tested with several applications and regular +daily use. The controller has also been tested on the PPC64, x86_64 and +UML platforms. + +4.1 Troubleshooting + +Sometimes a user might find that the application under a cgroup is +terminated. There are several causes for this: + +1. The cgroup limit is too low (just too low to do anything useful) +2. The user is using anonymous memory and swap is turned off or too low + +A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of +some of the pages cached in the cgroup (page cache pages). + +4.2 Task migration + +When a task migrates from one cgroup to another, it's charge is not +carried forward. The pages allocated from the original cgroup still +remain charged to it, the charge is dropped when the page is freed or +reclaimed. + +4.3 Removing a cgroup + +A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a +cgroup might have some charge associated with it, even though all +tasks have migrated away from it. Such charges are automatically dropped at +rmdir() if there are no tasks. + +4.4 Choosing what to account -- Page Cache (unmapped) vs RSS (mapped)? + +The type of memory accounted by the cgroup can be limited to just +mapped pages by writing "1" to memory.control_type field + +echo -n 1 > memory.control_type + +5. TODO + +1. Add support for accounting huge pages (as a separate controller) +2. Make per-cgroup scanner reclaim not-shared pages first +3. Teach controller to account for shared-pages +4. Start reclamation when the limit is lowered +5. Start reclamation in the background when the limit is + not yet hit but the usage is getting closer + +Summary + +Overall, the memory controller has been a stable controller and has been +commented and discussed quite extensively in the community. + +References + +1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/ +2. Singh, Balbir. Memory Controller (RSS Control), + http://lwn.net/Articles/222762/ +3. Emelianov, Pavel. Resource controllers based on process cgroups + http://lkml.org/lkml/2007/3/6/198 +4. Emelianov, Pavel. RSS controller based on process cgroups (v2) + http://lkml.org/lkml/2007/4/9/74 +5. Emelianov, Pavel. RSS controller based on process cgroups (v3) + http://lkml.org/lkml/2007/5/30/244 +6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ +7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control + subsystem (v3), http://lwn.net/Articles/235534/ +8. Singh, Balbir. RSS controller V2 test results (lmbench), + http://lkml.org/lkml/2007/5/17/232 +9. Singh, Balbir. RSS controller V2 AIM9 results + http://lkml.org/lkml/2007/5/18/1 +10. Singh, Balbir. Memory controller v6 results, + http://lkml.org/lkml/2007/8/19/36 +11. Singh, Balbir. Memory controller v6, http://lkml.org/lkml/2007/8/17/69 +12. Corbet, Jonathan, Controlling memory use in cgroups, + http://lwn.net/Articles/243795/ diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index 141bef1c859..43db6fe1281 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -523,21 +523,14 @@ from one cpuset to another, then the kernel will adjust the tasks memory placement, as above, the next time that the kernel attempts to allocate a page of memory for that task. -If a cpuset has its CPUs modified, then each task using that -cpuset does _not_ change its behavior automatically. In order to -minimize the impact on the critical scheduling code in the kernel, -tasks will continue to use their prior CPU placement until they -are rebound to their cpuset, by rewriting their pid to the 'tasks' -file of their cpuset. If a task had been bound to some subset of its -cpuset using the sched_setaffinity() call, and if any of that subset -is still allowed in its new cpuset settings, then the task will be -restricted to the intersection of the CPUs it was allowed on before, -and its new cpuset CPU placement. If, on the other hand, there is -no overlap between a tasks prior placement and its new cpuset CPU -placement, then the task will be allowed to run on any CPU allowed -in its new cpuset. If a task is moved from one cpuset to another, -its CPU placement is updated in the same way as if the tasks pid is -rewritten to the 'tasks' file of its current cpuset. +If a cpuset has its 'cpus' modified, then each task in that cpuset +will have its allowed CPU placement changed immediately. Similarly, +if a tasks pid is written to a cpusets 'tasks' file, in either its +current cpuset or another cpuset, then its allowed CPU placement is +changed immediately. If such a task had been bound to some subset +of its cpuset using the sched_setaffinity() call, the task will be +allowed to run on any CPU allowed in its new cpuset, negating the +affect of the prior sched_setaffinity() call. In summary, the memory placement of a task whose cpuset is changed is updated by the kernel, on the next allocation of a page for that task, diff --git a/Documentation/drivers/edac/edac.txt b/Documentation/edac.txt index a5c36842ece..a5c36842ece 100644 --- a/Documentation/drivers/edac/edac.txt +++ b/Documentation/edac.txt diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt index 113165b4830..2ebb94d6ed8 100644 --- a/Documentation/email-clients.txt +++ b/Documentation/email-clients.txt @@ -170,7 +170,6 @@ Sylpheed (GUI) - Works well for inlining text (or using attachments). - Allows use of an external editor. -- Not good for IMAP. - Is slow on large folders. - Won't do TLS SMTP auth over a non-SSL connection. - Has a helpful ruler bar in the compose window. diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 68ce1300a36..ce9503c892b 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,14 +6,6 @@ be removed from this file. --------------------------- -What: MXSER -When: December 2007 -Why: Old mxser driver is obsoleted by the mxser_new. Give it some time yet - and remove it. -Who: Jiri Slaby <jirislaby@gmail.com> - ---------------------------- - What: dev->power.power_state When: July 2007 Why: Broken design for runtime control over driver power states, confusing @@ -107,17 +99,6 @@ Who: Eric Biederman <ebiederm@xmission.com> --------------------------- -What: a.out interpreter support for ELF executables -When: 2.6.25 -Files: fs/binfmt_elf.c -Why: Using a.out interpreters for ELF executables was a feature for - transition from a.out to ELF. But now it is unlikely to be still - needed anymore and removing it would simplify the hairy ELF - loader code. -Who: Andi Kleen <ak@suse.de> - ---------------------------- - What: remove EXPORT_SYMBOL(kernel_thread) When: August 2006 Files: arch/*/kernel/*_ksyms.c @@ -200,14 +181,6 @@ Who: Len Brown <len.brown@intel.com> --------------------------- -What: 'time' kernel boot parameter -When: January 2008 -Why: replaced by 'printk.time=<value>' so that printk timestamps can be - enabled or disabled as needed -Who: Randy Dunlap <randy.dunlap@oracle.com> - ---------------------------- - What: libata spindown skipping and warning When: Dec 2008 Why: Some halt(8) implementations synchronize caches for and spin diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 1de155e2dc3..e68021c08fb 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -32,6 +32,8 @@ directory-locking - info about the locking scheme used for directory operations. dlmfs.txt - info on the userspace interface to the OCFS2 DLM. +dnotify.txt + - info about directory notification in Linux. ecryptfs.txt - docs on eCryptfs: stacked cryptographic filesystem for Linux. ext2.txt @@ -80,6 +82,8 @@ relay.txt - info on relay, for efficient streaming from kernel to user space. romfs.txt - description of the ROMFS filesystem. +sharedsubtree.txt + - a description of shared subtrees for namespaces. smbfs.txt - info on using filesystems with the SMB protocol (Win 3.11 and NT). spufs.txt diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 37c10cba717..42d4b30b104 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -90,7 +90,6 @@ of the locking scheme for directory operations. prototypes: struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*read_inode) (struct inode *); void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); @@ -114,7 +113,6 @@ locking rules: BKL s_lock s_umount alloc_inode: no no no destroy_inode: no -read_inode: no (see below) dirty_inode: no (must not sleep) write_inode: no put_inode: no @@ -133,7 +131,6 @@ show_options: no (vfsmount->sem) quota_read: no no no (see below) quota_write: no no no (see below) -->read_inode() is not a method - it's a callback used in iget(). ->remount_fs() will have the s_umount lock if it's already mounted. When called from get_sb_single, it does NOT have the s_umount lock. ->quota_read() and ->quota_write() functions are both guaranteed to diff --git a/Documentation/dnotify.txt b/Documentation/filesystems/dnotify.txt index 6984fca6002..9f5d338ddbb 100644 --- a/Documentation/dnotify.txt +++ b/Documentation/filesystems/dnotify.txt @@ -69,24 +69,24 @@ Example #include <signal.h> #include <stdio.h> #include <unistd.h> - + static volatile int event_fd; - + static void handler(int sig, siginfo_t *si, void *data) { event_fd = si->si_fd; } - + int main(void) { struct sigaction act; int fd; - + act.sa_sigaction = handler; sigemptyset(&act.sa_mask); act.sa_flags = SA_SIGINFO; sigaction(SIGRTMIN + 1, &act, NULL); - + fd = open(".", O_RDONLY); fcntl(fd, F_SETSIG, SIGRTMIN + 1); fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT); diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt index 758e50401c1..6973b980ca2 100644 --- a/Documentation/filesystems/isofs.txt +++ b/Documentation/filesystems/isofs.txt @@ -24,6 +24,7 @@ Mount options unique to the isofs filesystem. map=normal Map non-Rock Ridge filenames to lower case map=acorn As map=normal but also apply Acorn extensions if present mode=xxx Sets the permissions on files to xxx + dmode=xxx Sets the permissions on directories to xxx nojoliet Ignore Joliet extensions if they are present. norock Ignore Rock Ridge extensions if they are present. hide Completely strip hidden files from the file system. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 0f33c77bc14..92b888d540a 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -34,8 +34,8 @@ FOO_I(inode) (see in-tree filesystems for examples). Make them ->alloc_inode and ->destroy_inode in your super_operations. -Keep in mind that now you need explicit initialization of private data - -typically in ->read_inode() and after getting an inode from new_inode(). +Keep in mind that now you need explicit initialization of private data +typically between calling iget_locked() and unlocking the inode. At some point that will become mandatory. @@ -173,10 +173,10 @@ should be a non-blocking function that initializes those parts of a newly created inode to allow the test function to succeed. 'data' is passed as an opaque value to both test and set functions. -When the inode has been created by iget5_locked(), it will be returned with -the I_NEW flag set and will still be locked. read_inode has not been -called so the file system still has to finalize the initialization. Once -the inode is initialized it must be unlocked by calling unlock_new_inode(). +When the inode has been created by iget5_locked(), it will be returned with the +I_NEW flag set and will still be locked. The filesystem then needs to finalize +the initialization. Once the inode is initialized it must be unlocked by +calling unlock_new_inode(). The filesystem is responsible for setting (and possibly testing) i_ino when appropriate. There is also a simpler iget_locked function that @@ -184,11 +184,19 @@ just takes the superblock and inode number as arguments and does the test and set for you. e.g. - inode = iget_locked(sb, ino); - if (inode->i_state & I_NEW) { - read_inode_from_disk(inode); - unlock_new_inode(inode); - } + inode = iget_locked(sb, ino); + if (inode->i_state & I_NEW) { + err = read_inode_from_disk(inode); + if (err < 0) { + iget_failed(inode); + return err; + } + unlock_new_inode(inode); + } + +Note that if the process of setting up a new inode fails, then iget_failed() +should be called on the inode to render it dead, and an appropriate error +should be passed back to the caller. --- [recommended] diff --git a/Documentation/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index 736540045dc..736540045dc 100644 --- a/Documentation/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 9d019d35728..81e5be6e6e3 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -151,7 +151,7 @@ The get_sb() method has the following arguments: const char *dev_name: the device name we are mounting. void *data: arbitrary mount options, usually comes as an ASCII - string + string (see "Mount Options" section) struct vfsmount *mnt: a vfs-internal representation of a mount point @@ -182,7 +182,7 @@ A fill_super() method implementation has the following arguments: must initialize this properly. void *data: arbitrary mount options, usually comes as an ASCII - string + string (see "Mount Options" section) int silent: whether or not to be silent on error @@ -203,8 +203,6 @@ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*read_inode) (struct inode *); - void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); @@ -242,15 +240,6 @@ or bottom half). ->alloc_inode was defined and simply undoes anything done by ->alloc_inode. - read_inode: this method is called to read a specific inode from the - mounted filesystem. The i_ino member in the struct inode is - initialized by the VFS to indicate which inode to read. Other - members are filled in by this method. - - You can set this to NULL and use iget5_locked() instead of iget() - to read inodes. This is necessary for filesystems for which the - inode number is not sufficient to identify an inode. - dirty_inode: this method is called by the VFS to mark an inode dirty. write_inode: this method is called when the VFS needs to write an @@ -302,15 +291,16 @@ or bottom half). umount_begin: called when the VFS is unmounting a filesystem. - show_options: called by the VFS to show mount options for /proc/<pid>/mounts. + show_options: called by the VFS to show mount options for + /proc/<pid>/mounts. (see "Mount Options" section) quota_read: called by the VFS to read from filesystem quota file. quota_write: called by the VFS to write to filesystem quota file. -The read_inode() method is responsible for filling in the "i_op" -field. This is a pointer to a "struct inode_operations" which -describes the methods that can be performed on individual inodes. +Whoever sets up the inode is responsible for filling in the "i_op" field. This +is a pointer to a "struct inode_operations" which describes the methods that +can be performed on individual inodes. The Inode Object @@ -980,6 +970,49 @@ manipulate dentries: For further information on dentry locking, please refer to the document Documentation/filesystems/dentry-locking.txt. +Mount Options +============= + +Parsing options +--------------- + +On mount and remount the filesystem is passed a string containing a +comma separated list of mount options. The options can have either of +these forms: + + option + option=value + +The <linux/parser.h> header defines an API that helps parse these +options. There are plenty of examples on how to use it in existing +filesystems. + +Showing options +--------------- + +If a filesystem accepts mount options, it must define show_options() +to show all the currently active options. The rules are: + + - options MUST be shown which are not default or their values differ + from the default + + - options MAY be shown which are enabled by default or have their + default value + +Options used only internally between a mount helper and the kernel +(such as file descriptors), or which only have an effect during the +mounting (such as ones controlling the creation of a journal) are exempt +from the above rules. + +The underlying reason for the above rules is to make sure, that a +mount can be accurately replicated (e.g. umounting and mounting again) +based on the information found in /proc/mounts. + +A simple method of saving options at mount/remount time and showing +them is provided with the save_mount_options() and +generic_show_options() helper functions. Please note, that using +these may have drawbacks. For more info see header comments for these +functions in fs/namespace.c. Resources ========= diff --git a/Documentation/hwmon/ads7828 b/Documentation/hwmon/ads7828 new file mode 100644 index 00000000000..75bc4beaf44 --- /dev/null +++ b/Documentation/hwmon/ads7828 @@ -0,0 +1,36 @@ +Kernel driver ads7828 +===================== + +Supported chips: + * Texas Instruments/Burr-Brown ADS7828 + Prefix: 'ads7828' + Addresses scanned: I2C 0x48, 0x49, 0x4a, 0x4b + Datasheet: Publicly available at the Texas Instruments website : + http://focus.ti.com/lit/ds/symlink/ads7828.pdf + +Authors: + Steve Hardy <steve@linuxrealtime.co.uk> + +Module Parameters +----------------- + +* se_input: bool (default Y) + Single ended operation - set to N for differential mode +* int_vref: bool (default Y) + Operate with the internal 2.5V reference - set to N for external reference +* vref_mv: int (default 2500) + If using an external reference, set this to the reference voltage in mV + +Description +----------- + +This driver implements support for the Texas Instruments ADS7828. + +This device is a 12-bit 8-channel A-D converter. + +It can operate in single ended mode (8 +ve inputs) or in differential mode, +where 4 differential pairs can be measured. + +The chip also has the facility to use an external voltage reference. This +may be required if your hardware supplies the ADS7828 from a 5V supply, see +the datasheet for more details. diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87 index 5b704a40256..f4ce1fdbeff 100644 --- a/Documentation/hwmon/it87 +++ b/Documentation/hwmon/it87 @@ -30,7 +30,7 @@ Supported chips: Datasheet: No longer be available Authors: - Christophe Gauthron <chrisg@0-in.com> + Christophe Gauthron Jean Delvare <khali@linux-fr.org> diff --git a/Documentation/hwmon/lm78 b/Documentation/hwmon/lm78 index dfc318a60fd..60932e26aba 100644 --- a/Documentation/hwmon/lm78 +++ b/Documentation/hwmon/lm78 @@ -4,12 +4,12 @@ Kernel driver lm78 Supported chips: * National Semiconductor LM78 / LM78-J Prefix: 'lm78' - Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports) + Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports) Datasheet: Publicly available at the National Semiconductor website http://www.national.com/ * National Semiconductor LM79 Prefix: 'lm79' - Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports) + Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports) Datasheet: Publicly available at the National Semiconductor website http://www.national.com/ diff --git a/Documentation/hwmon/lm87 b/Documentation/hwmon/lm87 index c952c57f0e1..ec27aa1b94c 100644 --- a/Documentation/hwmon/lm87 +++ b/Documentation/hwmon/lm87 @@ -4,8 +4,12 @@ Kernel driver lm87 Supported chips: * National Semiconductor LM87 Prefix: 'lm87' - Addresses scanned: I2C 0x2c - 0x2f + Addresses scanned: I2C 0x2c - 0x2e Datasheet: http://www.national.com/pf/LM/LM87.html + * Analog Devices ADM1024 + Prefix: 'adm1024' + Addresses scanned: I2C 0x2c - 0x2e + Datasheet: http://www.analog.com/en/prod/0,2877,ADM1024,00.html Authors: Frodo Looijaard <frodol@dds.nl>, @@ -19,11 +23,12 @@ Authors: Description ----------- -This driver implements support for the National Semiconductor LM87. +This driver implements support for the National Semiconductor LM87 +and the Analog Devices ADM1024. The LM87 implements up to three temperature sensors, up to two fan rotation speed sensors, up to seven voltage sensors, alarms, and some -miscellaneous stuff. +miscellaneous stuff. The ADM1024 is fully compatible. Temperatures are measured in degrees Celsius. Each input has a high and low alarm settings. A high limit produces an alarm when the value diff --git a/Documentation/hwmon/userspace-tools b/Documentation/hwmon/userspace-tools index 19900a8fe67..9865aeedc58 100644 --- a/Documentation/hwmon/userspace-tools +++ b/Documentation/hwmon/userspace-tools @@ -14,7 +14,7 @@ Lm-sensors Core set of utilities that will allow you to obtain health information, setup monitoring limits etc. You can get them on their homepage -http://www.lm-sensors.nu/ or as a package from your Linux distribution. +http://www.lm-sensors.org/ or as a package from your Linux distribution. If from website: Get lm-sensors from project web site. Please note, you need only userspace diff --git a/Documentation/hwmon/w83627ehf b/Documentation/hwmon/w83627ehf index ccc2bcb6106..d6e1ae30fa6 100644 --- a/Documentation/hwmon/w83627ehf +++ b/Documentation/hwmon/w83627ehf @@ -23,8 +23,9 @@ W83627DHG super I/O chips. We will refer to them collectively as Winbond chips. The chips implement three temperature sensors, five fan rotation speed sensors, ten analog voltage sensors (only nine for the 627DHG), one -VID (6 pins), alarms with beep warnings (control unimplemented), and -some automatic fan regulation strategies (plus manual fan control mode). +VID (6 pins for the 627EHF/EHG, 8 pins for the 627DHG), alarms with beep +warnings (control unimplemented), and some automatic fan regulation +strategies (plus manual fan control mode). Temperatures are measured in degrees Celsius and measurement resolution is 1 degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when diff --git a/Documentation/hwmon/w83627hf b/Documentation/hwmon/w83627hf index 79223192124..880a59f53da 100644 --- a/Documentation/hwmon/w83627hf +++ b/Documentation/hwmon/w83627hf @@ -73,5 +73,4 @@ doesn't help, you may just ignore the bogus VID reading with no harm done. For further information on this driver see the w83781d driver documentation. -[1] http://www2.lm-sensors.nu/~lm78/cvs/browse.cgi/lm_sensors2/doc/vid - +[1] http://www.lm-sensors.org/browser/lm-sensors/trunk/doc/vid diff --git a/Documentation/hwmon/w83781d b/Documentation/hwmon/w83781d index b1e9f80098e..6f800a0283e 100644 --- a/Documentation/hwmon/w83781d +++ b/Documentation/hwmon/w83781d @@ -4,20 +4,16 @@ Kernel driver w83781d Supported chips: * Winbond W83781D Prefix: 'w83781d' - Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports) + Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports) Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/w83781d.pdf * Winbond W83782D Prefix: 'w83782d' - Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports) + Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports) Datasheet: http://www.winbond.com/PDF/sheet/w83782d.pdf * Winbond W83783S Prefix: 'w83783s' Addresses scanned: I2C 0x2d Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/w83783s.pdf - * Winbond W83627HF - Prefix: 'w83627hf' - Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports) - Datasheet: http://www.winbond.com/PDF/sheet/w83627hf.pdf * Asus AS99127F Prefix: 'as99127f' Addresses scanned: I2C 0x28 - 0x2f @@ -50,20 +46,18 @@ force_subclients=bus,caddr,saddr,saddr Description ----------- -This driver implements support for the Winbond W83781D, W83782D, W83783S, -W83627HF chips, and the Asus AS99127F chips. We will refer to them -collectively as W8378* chips. +This driver implements support for the Winbond W83781D, W83782D, W83783S +chips, and the Asus AS99127F chips. We will refer to them collectively as +W8378* chips. There is quite some difference between these chips, but they are similar enough that it was sensible to put them together in one driver. -The W83627HF chip is assumed to be identical to the ISA W83782D. The Asus chips are similar to an I2C-only W83782D. Chip #vin #fanin #pwm #temp wchipid vendid i2c ISA as99127f 7 3 0 3 0x31 0x12c3 yes no as99127f rev.2 (type_name = as99127f) 0x31 0x5ca3 yes no w83781d 7 3 0 3 0x10-1 0x5ca3 yes yes -w83627hf 9 3 2 3 0x21 0x5ca3 yes yes(LPC) w83782d 9 3 2-4 3 0x30 0x5ca3 yes yes w83783s 5-6 3 2 1-2 0x40 0x5ca3 yes no @@ -143,9 +137,9 @@ Individual alarm and beep bits: 0x000400: in6 0x000800: fan3 0x001000: chassis -0x002000: temp3 (W83782D and W83627HF only) -0x010000: in7 (W83782D and W83627HF only) -0x020000: in8 (W83782D and W83627HF only) +0x002000: temp3 (W83782D only) +0x010000: in7 (W83782D only) +0x020000: in8 (W83782D only) If an alarm triggers, it will remain triggered until the hardware register is read at least once. This means that the cause for the alarm may diff --git a/Documentation/hwmon/w83l786ng b/Documentation/hwmon/w83l786ng new file mode 100644 index 00000000000..d8f55d7fff1 --- /dev/null +++ b/Documentation/hwmon/w83l786ng @@ -0,0 +1,54 @@ +Kernel driver w83l786ng +===================== + +Supported chips: + * Winbond W83L786NG/W83L786NR + Prefix: 'w83l786ng' + Addresses scanned: I2C 0x2e - 0x2f + Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/W83L786NRNG09.pdf + +Author: Kevin Lo <kevlo@kevlo.org> + + +Module Parameters +----------------- + +* reset boolean + (default 0) + Use 'reset=1' to reset the chip (via index 0x40, bit 7). The default + behavior is no chip reset to preserve BIOS settings + + +Description +----------- + +This driver implements support for Winbond W83L786NG/W83L786NR chips. + +The driver implements two temperature sensors, two fan rotation speed +sensors, and three voltage sensors. + +Temperatures are measured in degrees Celsius and measurement resolution is 1 +degC for temp1 and temp2. + +Fan rotation speeds are reported in RPM (rotations per minute). Fan readings +readings can be divided by a programmable divider (1, 2, 4, 8, 16, 32, 64 +or 128 for fan 1/2) to give the readings more range or accuracy. + +Voltage sensors (also known as IN sensors) report their values in millivolts. +An alarm is triggered if the voltage has crossed a programmable minimum +or maximum limit. + +/sys files +---------- + +pwm[1-2] - this file stores PWM duty cycle or DC value (fan speed) in range: + 0 (stop) to 255 (full) +pwm[1-2]_enable - this file controls mode of fan/temperature control: + * 0 Manual Mode + * 1 Thermal Cruise + * 2 Smart Fan II + * 4 FAN_SET +pwm[1-2]_mode - Select PWM of DC mode + * 0 DC + * 1 PWM +tolerance[1-2] - Value in degrees of Celsius (degC) for +- T diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4 index cf6b6cb02aa..ef1efa79b1d 100644 --- a/Documentation/i2c/busses/i2c-piix4 +++ b/Documentation/i2c/busses/i2c-piix4 @@ -95,4 +95,4 @@ of all affected systems, so the only safe solution was to prevent access to the SMBus on all IBM systems (detected using DMI data.) For additional information, read: -http://www2.lm-sensors.nu/~lm78/cvs/lm_sensors2/README.thinkpad +http://www.lm-sensors.org/browser/lm-sensors/trunk/README.thinkpad diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt index 47fc86830cd..81905e81585 100644 --- a/Documentation/input/input-programming.txt +++ b/Documentation/input/input-programming.txt @@ -22,7 +22,7 @@ static struct input_dev *button_dev; static void button_interrupt(int irq, void *dummy, struct pt_regs *fp) { - input_report_key(button_dev, BTN_1, inb(BUTTON_PORT) & 1); + input_report_key(button_dev, BTN_0, inb(BUTTON_PORT) & 1); input_sync(button_dev); } diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt index b963c3b4afa..5925c3cd030 100644 --- a/Documentation/iostats.txt +++ b/Documentation/iostats.txt @@ -58,7 +58,7 @@ they should not wrap twice before you notice them. Each set of stats only applies to the indicated device; if you want system-wide stats you'll have to find all the devices and sum them all up. -Field 1 -- # of reads issued +Field 1 -- # of reads completed This is the total number of reads completed successfully. Field 2 -- # of reads merged, field 6 -- # of writes merged Reads and writes which are adjacent to each other may be merged for @@ -132,6 +132,19 @@ words, the number of reads for partitions is counted slightly before time of queuing for partitions, and at completion for whole disks. This is a subtle distinction that is probably uninteresting for most cases. +More significant is the error induced by counting the numbers of +reads/writes before merges for partitions and after for disks. Since a +typical workload usually contains a lot of successive and adjacent requests, +the number of reads/writes issued can be several times higher than the +number of reads/writes completed. + +In 2.6.25, the full statistic set is again available for partitions and +disk and partition statistics are consistent again. Since we still don't +keep record of the partition-relative address, an operation is attributed to +the partition which contains the first sector of the request after the +eventual merges. As requests can be merged across partition, this could lead +to some (probably insignificant) innacuracy. + Additional notes ---------------- diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8fd5aa40585..a4fc7fc2143 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -147,8 +147,10 @@ and is between 256 and 4096 characters. It is defined in the file default: 0 acpi_sleep= [HW,ACPI] Sleep options - Format: { s3_bios, s3_mode } - See Documentation/power/video.txt + Format: { s3_bios, s3_mode, s3_beep } + See Documentation/power/video.txt for s3_bios and s3_mode. + s3_beep is for debugging; it makes the PC's speaker beep + as soon as the kernel's real-mode entry point is called. acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode Format: { level | edge | high | low } @@ -175,6 +177,9 @@ and is between 256 and 4096 characters. It is defined in the file acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT + acpi_no_initrd_override [KNL,ACPI] + Disable loading custom ACPI tables from the initramfs + acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS Format: To spoof as Windows 98: ="Microsoft Windows" @@ -1968,9 +1973,6 @@ and is between 256 and 4096 characters. It is defined in the file <deci-seconds>: poll all this frequency 0: no polling (default) - time Show timing data prefixed to each printk message line - [deprecated, see 'printk.time'] - tipar.timeout= [HW,PPT] Set communications timeout in tenths of a second (default 15). diff --git a/Documentation/leds-class.txt b/Documentation/leds-class.txt index 8c35c042611..56757c751d6 100644 --- a/Documentation/leds-class.txt +++ b/Documentation/leds-class.txt @@ -39,12 +39,33 @@ LED Device Naming Is currently of the form: -"devicename:colour" +"devicename:colour:function" There have been calls for LED properties such as colour to be exported as individual led class attributes. As a solution which doesn't incur as much overhead, I suggest these become part of the device name. The naming scheme -above leaves scope for further attributes should they be needed. +above leaves scope for further attributes should they be needed. If sections +of the name don't apply, just leave that section blank. + + +Hardware accelerated blink of LEDs +================================== + +Some LEDs can be programmed to blink without any CPU interaction. To +support this feature, a LED driver can optionally implement the +blink_set() function (see <linux/leds.h>). If implemeted, triggers can +attempt to use it before falling back to software timers. The blink_set() +function should return 0 if the blink setting is supported, or -EINVAL +otherwise, which means that LED blinking will be handled by software. + +The blink_set() function should choose a user friendly blinking +value if it is called with *delay_on==0 && *delay_off==0 parameters. In +this case the driver should give back the chosen value through delay_on +and delay_off parameters to the leds subsystem. + +Any call to the brightness_set() callback function should cancel the +previously programmed hardware blinking function so setting the brightness +to 0 can also cancel the blinking of the LED. Known Issues @@ -55,10 +76,6 @@ would cause nightmare dependency issues. I see this as a minor issue compared to the benefits the simple trigger functionality brings. The rest of the LED subsystem can be modular. -Some leds can be programmed to flash in hardware. As this isn't a generic -LED device property, this should be exported as a device specific sysfs -attribute rather than part of the class if this functionality is required. - Future Development ================== diff --git a/Documentation/mn10300/ABI.txt b/Documentation/mn10300/ABI.txt new file mode 100644 index 00000000000..1fef1f06dfd --- /dev/null +++ b/Documentation/mn10300/ABI.txt @@ -0,0 +1,149 @@ + ========================= + MN10300 FUNCTION CALL ABI + ========================= + +======= +GENERAL +======= + +The MN10300/AM33 kernel runs in little-endian mode; big-endian mode is not +supported. + +The stack grows downwards, and should always be 32-bit aligned. There are +separate stack pointer registers for userspace and the kernel. + + +================ +ARGUMENT PASSING +================ + +The first two arguments (assuming up to 32-bits per argument) to a function are +passed in the D0 and D1 registers respectively; all other arguments are passed +on the stack. + +If 64-bit arguments are being passed, then they are never split between +registers and the stack. If the first argument is a 64-bit value, it will be +passed in D0:D1. If the first argument is not a 64-bit value, but the second +is, the second will be passed entirely on the stack and D1 will be unused. + +Arguments smaller than 32-bits are not coelesced within a register or a stack +word. For example, two byte-sized arguments will always be passed in separate +registers or word-sized stack slots. + + +================= +CALLING FUNCTIONS +================= + +The caller must allocate twelve bytes on the stack for the callee's use before +it inserts a CALL instruction. The CALL instruction will write into the TOS +word, but won't actually modify the stack pointer; similarly, the RET +instruction reads from the TOS word of the stack, but doesn't move the stack +pointer beyond it. + + + Stack: + | | + | | + |---------------| SP+20 + | 4th Arg | + |---------------| SP+16 + | 3rd Arg | + |---------------| SP+12 + | D1 Save Slot | + |---------------| SP+8 + | D0 Save Slot | + |---------------| SP+4 + | Return Addr | + |---------------| SP + | | + | | + + +The caller must leave space on the stack (hence an allocation of twelve bytes) +in which the callee may store the first two arguments. + + +============ +RETURN VALUE +============ + +The return value is passed in D0 for an integer (or D0:D1 for a 64-bit value), +or A0 for a pointer. + +If the return value is a value larger than 64-bits, or is a structure or an +array, then a hidden first argument will be passed to the callee by the caller: +this will point to a piece of memory large enough to hold the result of the +function. In this case, the callee will return the value in that piece of +memory, and no value will be returned in D0 or A0. + + +=================== +REGISTER CLOBBERING +=================== + +The values in certain registers may be clobbered by the callee, and other +values must be saved: + + Clobber: D0-D1, A0-A1, E0-E3 + Save: D2-D3, A2-A3, E4-E7, SP + +All other non-supervisor-only registers are clobberable (such as MDR, MCRL, +MCRH). + + +================= +SPECIAL REGISTERS +================= + +Certain ordinary registers may carry special usage for the compiler: + + A3: Frame pointer + E2: TLS pointer + + +========== +KERNEL ABI +========== + +The kernel may use a slightly different ABI internally. + + (*) E2 + + If CONFIG_MN10300_CURRENT_IN_E2 is defined, then the current task pointer + will be kept in the E2 register, and that register will be marked + unavailable for the compiler to use as a scratch register. + + Normally the kernel uses something like: + + MOV SP,An + AND 0xFFFFE000,An + MOV (An),Rm // Rm holds current + MOV (yyy,Rm) // Access current->yyy + + To find the address of current; but since this option permits current to + be carried globally in an register, it can use: + + MOV (yyy,E2) // Access current->yyy + + instead. + + +=============== +SYSTEM CALL ABI +=============== + +System calls are called with the following convention: + + REGISTER ENTRY EXIT + =============== ======================= ======================= + D0 Syscall number Return value + A0 1st syscall argument Saved + D1 2nd syscall argument Saved + A3 3rd syscall argument Saved + A2 4th syscall argument Saved + D3 5th syscall argument Saved + D2 6th syscall argument Saved + +All other registers are saved. The layout is a consequence of the way the MOVM +instruction stores registers onto the stack. diff --git a/Documentation/mn10300/compartmentalisation.txt b/Documentation/mn10300/compartmentalisation.txt new file mode 100644 index 00000000000..8958b51dac4 --- /dev/null +++ b/Documentation/mn10300/compartmentalisation.txt @@ -0,0 +1,60 @@ + ========================================= + PART-SPECIFIC SOURCE COMPARTMENTALISATION + ========================================= + +The sources for various parts are compartmentalised at two different levels: + + (1) Processor level + + The "processor level" is a CPU core plus the other on-silicon + peripherals. + + Processor-specific header files are divided among directories in a similar + way to the CPU level: + + (*) include/asm-mn10300/proc-mn103e010/ + + Support for the AM33v2 CPU core. + + The appropriate processor is selected by a CONFIG_MN10300_PROC_YYYY option + from the "Processor support" choice menu in the arch/mn10300/Kconfig file. + + + (2) Unit level + + The "unit level" is a processor plus all the external peripherals + controlled by that processor. + + Unit-specific header files are divided among directories in a similar way + to the CPU level; not only that, but specific sources may also be + segregated into separate directories under the arch directory: + + (*) include/asm-mn10300/unit-asb2303/ + (*) arch/mn10300/unit-asb2303/ + + Support for the ASB2303 board with an ASB2308 daughter board. + + (*) include/asm-mn10300/unit-asb2305/ + (*) arch/mn10300/unit-asb2305/ + + Support for the ASB2305 board. + + The appropriate processor is selected by a CONFIG_MN10300_UNIT_ZZZZ option + from the "Unit type" choice menu in the arch/mn10300/Kconfig file. + + +============ +COMPILE TIME +============ + +When the kernel is compiled, symbolic links will be made in the asm header file +directory for this arch: + + include/asm-mn10300/proc => include/asm-mn10300/proc-YYYY/ + include/asm-mn10300/unit => include/asm-mn10300/unit-ZZZZ/ + +So that the header files contained in those directories can be accessed without +lots of #ifdef-age. + +The appropriate arch/mn10300/unit-ZZZZ directory will also be entered by the +compilation process; all other unit-specific directories will be ignored. diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index aea7e920966..9d60ab717a7 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -386,6 +386,11 @@ before suspending; then remount them after resuming. There is a work-around for this problem. For more information, see Documentation/usb/persist.txt. +Q: Can I suspend-to-disk using a swap partition under LVM? + +A: No. You can suspend successfully, but you'll not be able to +resume. uswsusp should be able to work with LVM. See suspend.sf.net. + Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were compiled with the similar configuration files. Anyway I found that suspend to disk (and resume) is much slower on 2.6.16 compared to diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index b5e46efeba8..7b4e8a70882 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -57,6 +57,7 @@ Table of Contents n) 4xx/Axon EMAC ethernet nodes o) Xilinx IP cores p) Freescale Synchronous Serial Interface + q) USB EHCI controllers VII - Specifying interrupt information for devices 1) interrupts property @@ -2577,6 +2578,20 @@ platforms are moved over to use the flattened-device-tree model. Requred properties: - current-speed : Baud rate of uartlite + v) Xilinx hwicap + + Xilinx hwicap devices provide access to the configuration logic + of the FPGA through the Internal Configuration Access Port + (ICAP). The ICAP enables partial reconfiguration of the FPGA, + readback of the configuration information, and some control over + 'warm boots' of the FPGA fabric. + + Required properties: + - xlnx,family : The family of the FPGA, necessary since the + capabilities of the underlying ICAP hardware + differ between different families. May be + 'virtex2p', 'virtex4', or 'virtex5'. + p) Freescale Synchronous Serial Interface The SSI is a serial device that communicates with audio codecs. It can @@ -2775,6 +2790,33 @@ platforms are moved over to use the flattened-device-tree model. interrupt-parent = < &ipic >; }; + q) USB EHCI controllers + + Required properties: + - compatible : should be "usb-ehci". + - reg : should contain at least address and length of the standard EHCI + register set for the device. Optional platform-dependent registers + (debug-port or other) can be also specified here, but only after + definition of standard EHCI registers. + - interrupts : one EHCI interrupt should be described here. + If device registers are implemented in big endian mode, the device + node should have "big-endian-regs" property. + If controller implementation operates with big endian descriptors, + "big-endian-desc" property should be specified. + If both big endian registers and descriptors are used by the controller + implementation, "big-endian" property can be specified instead of having + both "big-endian-regs" and "big-endian-desc". + + Example (Sequoia 440EPx): + ehci@e0000300 { + compatible = "ibm,usb-ehci-440epx", "usb-ehci"; + interrupt-parent = <&UIC0>; + interrupts = <1a 4>; + reg = <0 e0000300 90 0 e0000390 70>; + big-endian; + }; + + More devices will be defined as this spec matures. VII - Specifying interrupt information for devices diff --git a/Documentation/scheduler/00-INDEX b/Documentation/scheduler/00-INDEX new file mode 100644 index 00000000000..b5f5ca069b2 --- /dev/null +++ b/Documentation/scheduler/00-INDEX @@ -0,0 +1,16 @@ +00-INDEX + - this file. +sched-arch.txt + - CPU Scheduler implementation hints for architecture specific code. +sched-coding.txt + - reference for various scheduler-related methods in the O(1) scheduler. +sched-design.txt + - goals, design and implementation of the Linux O(1) scheduler. +sched-design-CFS.txt + - goals, design and implementation of the Complete Fair Scheduler. +sched-domains.txt + - information on scheduling domains. +sched-nice-design.txt + - How and why the scheduler's nice levels are implemented. +sched-stats.txt + - information on schedstats (Linux Scheduler Statistics). diff --git a/Documentation/sched-arch.txt b/Documentation/scheduler/sched-arch.txt index 941615a9769..941615a9769 100644 --- a/Documentation/sched-arch.txt +++ b/Documentation/scheduler/sched-arch.txt diff --git a/Documentation/sched-coding.txt b/Documentation/scheduler/sched-coding.txt index cbd8db752ac..cbd8db752ac 100644 --- a/Documentation/sched-coding.txt +++ b/Documentation/scheduler/sched-coding.txt diff --git a/Documentation/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index 88bcb876733..88bcb876733 100644 --- a/Documentation/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt diff --git a/Documentation/sched-design.txt b/Documentation/scheduler/sched-design.txt index 1605bf0cba8..1605bf0cba8 100644 --- a/Documentation/sched-design.txt +++ b/Documentation/scheduler/sched-design.txt diff --git a/Documentation/sched-domains.txt b/Documentation/scheduler/sched-domains.txt index a9e990ab980..a9e990ab980 100644 --- a/Documentation/sched-domains.txt +++ b/Documentation/scheduler/sched-domains.txt diff --git a/Documentation/sched-nice-design.txt b/Documentation/scheduler/sched-nice-design.txt index e2bae5a577e..e2bae5a577e 100644 --- a/Documentation/sched-nice-design.txt +++ b/Documentation/scheduler/sched-nice-design.txt diff --git a/Documentation/sched-stats.txt b/Documentation/scheduler/sched-stats.txt index 442e14d35de..442e14d35de 100644 --- a/Documentation/sched-stats.txt +++ b/Documentation/scheduler/sched-stats.txt diff --git a/Documentation/scsi/ChangeLog.arcmsr b/Documentation/scsi/ChangeLog.arcmsr index cd8403a33ee..de2bcacfa87 100644 --- a/Documentation/scsi/ChangeLog.arcmsr +++ b/Documentation/scsi/ChangeLog.arcmsr @@ -68,4 +68,45 @@ ** 2. modify the arcmsr_pci_slot_reset function ** 3. modify the arcmsr_pci_ers_disconnect_forepart function ** 4. modify the arcmsr_pci_ers_need_reset_forepart function +** 1.20.00.15 09/27/2007 Erich Chen & Nick Cheng +** 1. add arcmsr_enable_eoi_mode() on adapter Type B +** 2. add readl(reg->iop2drv_doorbell_reg) in arcmsr_handle_hbb_isr() +** in case of the doorbell interrupt clearance is cached +** 1.20.00.15 10/01/2007 Erich Chen & Nick Cheng +** 1. modify acb->devstate[i][j] +** as ARECA_RAID_GOOD instead of +** ARECA_RAID_GONE in arcmsr_alloc_ccb_pool +** 1.20.00.15 11/06/2007 Erich Chen & Nick Cheng +** 1. add conditional declaration for +** arcmsr_pci_error_detected() and +** arcmsr_pci_slot_reset +** 1.20.00.15 11/23/2007 Erich Chen & Nick Cheng +** 1.check if the sg list member number +** exceeds arcmsr default limit in arcmsr_build_ccb() +** 2.change the returned value type of arcmsr_build_ccb() +** from "void" to "int" +** 3.add the conditional check if arcmsr_build_ccb() +** returns FAILED +** 1.20.00.15 12/04/2007 Erich Chen & Nick Cheng +** 1. modify arcmsr_drain_donequeue() to ignore unknown +** command and let kernel process command timeout. +** This could handle IO request violating max. segments +** while Linux XFS over DM-CRYPT. +** Thanks to Milan Broz's comments <mbroz@redhat.com> +** 1.20.00.15 12/24/2007 Erich Chen & Nick Cheng +** 1.fix the portability problems +** 2.fix type B where we should _not_ iounmap() acb->pmu; +** it's not ioremapped. +** 3.add return -ENOMEM if ioremap() fails +** 4.transfer IS_SG64_ADDR w/ cpu_to_le32() +** in arcmsr_build_ccb +** 5. modify acb->devstate[i][j] as ARECA_RAID_GONE instead of +** ARECA_RAID_GOOD in arcmsr_alloc_ccb_pool() +** 6.fix arcmsr_cdb->Context as (unsigned long)arcmsr_cdb +** 7.add the checking state of +** (outbound_intstatus & ARCMSR_MU_OUTBOUND_HANDLE_INT) == 0 +** in arcmsr_handle_hba_isr +** 8.replace pci_alloc_consistent()/pci_free_consistent() with kmalloc()/kfree() in arcmsr_iop_message_xfer() +** 9. fix the release of dma memory for type B in arcmsr_free_ccb_pool() +** 10.fix the arcmsr_polling_hbb_ccbdone() ************************************************************************** diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt index 6f70f2b9327..a6d5354639b 100644 --- a/Documentation/scsi/scsi_mid_low_api.txt +++ b/Documentation/scsi/scsi_mid_low_api.txt @@ -1407,7 +1407,7 @@ Credits ======= The following people have contributed to this document: Mike Anderson <andmike at us dot ibm dot com> - James Bottomley <James dot Bottomley at steeleye dot com> + James Bottomley <James dot Bottomley at hansenpartnership dot com> Patrick Mansfield <patmans at us dot ibm dot com> Christoph Hellwig <hch at infradead dot org> Doug Ledford <dledford at redhat dot com> diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 24eac1bc735..8a4863c4edd 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -32,6 +32,7 @@ Currently, these files are in /proc/sys/vm: - min_unmapped_ratio - min_slab_ratio - panic_on_oom +- oom_dump_tasks - oom_kill_allocating_task - mmap_min_address - numa_zonelist_order @@ -232,6 +233,27 @@ according to your policy of failover. ============================================================= +oom_dump_tasks + +Enables a system-wide task dump (excluding kernel threads) to be +produced when the kernel performs an OOM-killing and includes such +information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and +name. This is helpful to determine why the OOM killer was invoked +and to identify the rogue task that caused it. + +If this is set to zero, this information is suppressed. On very +large systems with thousands of tasks it may not be feasible to dump +the memory state information for each one. Such systems should not +be forced to incur a performance penalty in OOM conditions when the +information may not be desired. + +If this is set to non-zero, this information is shown whenever the +OOM killer actually kills a memory-hogging task. + +The default value is 0. + +============================================================= + oom_kill_allocating_task This enables or disables killing the OOM-triggering task in diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt new file mode 100644 index 00000000000..ba9c2da5a8c --- /dev/null +++ b/Documentation/thermal/sysfs-api.txt @@ -0,0 +1,245 @@ +Generic Thermal Sysfs driver How To +========================= + +Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com> + +Updated: 2 January 2008 + +Copyright (c) 2008 Intel Corporation + + +0. Introduction + +The generic thermal sysfs provides a set of interfaces for thermal zone devices (sensors) +and thermal cooling devices (fan, processor...) to register with the thermal management +solution and to be a part of it. + +This how-to focuses on enabling new thermal zone and cooling devices to participate +in thermal management. +This solution is platform independent and any type of thermal zone devices and +cooling devices should be able to make use of the infrastructure. + +The main task of the thermal sysfs driver is to expose thermal zone attributes as well +as cooling device attributes to the user space. +An intelligent thermal management application can make decisions based on inputs +from thermal zone attributes (the current temperature and trip point temperature) +and throttle appropriate devices. + +[0-*] denotes any positive number starting from 0 +[1-*] denotes any positive number starting from 1 + +1. thermal sysfs driver interface functions + +1.1 thermal zone device interface +1.1.1 struct thermal_zone_device *thermal_zone_device_register(char *name, int trips, + void *devdata, struct thermal_zone_device_ops *ops) + + This interface function adds a new thermal zone device (sensor) to + /sys/class/thermal folder as thermal_zone[0-*]. + It tries to bind all the thermal cooling devices registered at the same time. + + name: the thermal zone name. + trips: the total number of trip points this thermal zone supports. + devdata: device private data + ops: thermal zone device call-backs. + .bind: bind the thermal zone device with a thermal cooling device. + .unbind: unbind the thermal zone device with a thermal cooling device. + .get_temp: get the current temperature of the thermal zone. + .get_mode: get the current mode (user/kernel) of the thermal zone. + "kernel" means thermal management is done in kernel. + "user" will prevent kernel thermal driver actions upon trip points + so that user applications can take charge of thermal management. + .set_mode: set the mode (user/kernel) of the thermal zone. + .get_trip_type: get the type of certain trip point. + .get_trip_temp: get the temperature above which the certain trip point + will be fired. + +1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz) + + This interface function removes the thermal zone device. + It deletes the corresponding entry form /sys/class/thermal folder and unbind all + the thermal cooling devices it uses. + +1.2 thermal cooling device interface +1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name, + void *devdata, struct thermal_cooling_device_ops *) + + This interface function adds a new thermal cooling device (fan/processor/...) to + /sys/class/thermal/ folder as cooling_device[0-*]. + It tries to bind itself to all the thermal zone devices register at the same time. + name: the cooling device name. + devdata: device private data. + ops: thermal cooling devices call-backs. + .get_max_state: get the Maximum throttle state of the cooling device. + .get_cur_state: get the Current throttle state of the cooling device. + .set_cur_state: set the Current throttle state of the cooling device. + +1.2.2 void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) + + This interface function remove the thermal cooling device. + It deletes the corresponding entry form /sys/class/thermal folder and unbind + itself from all the thermal zone devices using it. + +1.3 interface for binding a thermal zone device with a thermal cooling device +1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, + int trip, struct thermal_cooling_device *cdev); + + This interface function bind a thermal cooling device to the certain trip point + of a thermal zone device. + This function is usually called in the thermal zone device .bind callback. + tz: the thermal zone device + cdev: thermal cooling device + trip: indicates which trip point the cooling devices is associated with + in this thermal zone. + +1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, + int trip, struct thermal_cooling_device *cdev); + + This interface function unbind a thermal cooling device from the certain trip point + of a thermal zone device. + This function is usually called in the thermal zone device .unbind callback. + tz: the thermal zone device + cdev: thermal cooling device + trip: indicates which trip point the cooling devices is associated with + in this thermal zone. + +2. sysfs attributes structure + +RO read only value +RW read/write value + +All thermal sysfs attributes will be represented under /sys/class/thermal + +Thermal zone device sys I/F, created once it's registered: +|thermal_zone[0-*]: + |-----type: Type of the thermal zone + |-----temp: Current temperature + |-----mode: Working mode of the thermal zone + |-----trip_point_[0-*]_temp: Trip point temperature + |-----trip_point_[0-*]_type: Trip point type + +Thermal cooling device sys I/F, created once it's registered: +|cooling_device[0-*]: + |-----type : Type of the cooling device(processor/fan/...) + |-----max_state: Maximum cooling state of the cooling device + |-----cur_state: Current cooling state of the cooling device + + +These two dynamic attributes are created/removed in pairs. +They represent the relationship between a thermal zone and its associated cooling device. +They are created/removed for each +thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution. + +|thermal_zone[0-*] + |-----cdev[0-*]: The [0-*]th cooling device in the current thermal zone + |-----cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with + + +*************************** +* Thermal zone attributes * +*************************** + +type Strings which represent the thermal zone type. + This is given by thermal zone driver as part of registration. + Eg: "ACPI thermal zone" indicates it's a ACPI thermal device + RO + Optional + +temp Current temperature as reported by thermal zone (sensor) + Unit: degree Celsius + RO + Required + +mode One of the predefined values in [kernel, user] + This file gives information about the algorithm + that is currently managing the thermal zone. + It can be either default kernel based algorithm + or user space application. + RW + Optional + kernel = Thermal management in kernel thermal zone driver. + user = Preventing kernel thermal zone driver actions upon + trip points so that user application can take full + charge of the thermal management. + +trip_point_[0-*]_temp The temperature above which trip point will be fired + Unit: degree Celsius + RO + Optional + +trip_point_[0-*]_type Strings which indicate the type of the trip point + E.g. it can be one of critical, hot, passive, + active[0-*] for ACPI thermal zone. + RO + Optional + +cdev[0-*] Sysfs link to the thermal cooling device node where the sys I/F + for cooling device throttling control represents. + RO + Optional + +cdev[0-*]_trip_point The trip point with which cdev[0-*] is associated in this thermal zone + -1 means the cooling device is not associated with any trip point. + RO + Optional + +****************************** +* Cooling device attributes * +****************************** + +type String which represents the type of device + eg: For generic ACPI: this should be "Fan", + "Processor" or "LCD" + eg. For memory controller device on intel_menlow platform: + this should be "Memory controller" + RO + Optional + +max_state The maximum permissible cooling state of this cooling device. + RO + Required + +cur_state The current cooling state of this cooling device. + the value can any integer numbers between 0 and max_state, + cur_state == 0 means no cooling + cur_state == max_state means the maximum cooling. + RW + Required + +3. A simple implementation + +ACPI thermal zone may support multiple trip points like critical/hot/passive/active. +If an ACPI thermal zone supports critical, passive, active[0] and active[1] at the same time, +it may register itself as a thermal_zone_device (thermal_zone1) with 4 trip points in all. +It has one processor and one fan, which are both registered as thermal_cooling_device. +If the processor is listed in _PSL method, and the fan is listed in _AL0 method, +the sys I/F structure will be built like this: + +/sys/class/thermal: + +|thermal_zone1: + |-----type: ACPI thermal zone + |-----temp: 37 + |-----mode: kernel + |-----trip_point_0_temp: 100 + |-----trip_point_0_type: critical + |-----trip_point_1_temp: 80 + |-----trip_point_1_type: passive + |-----trip_point_2_temp: 70 + |-----trip_point_2_type: active[0] + |-----trip_point_3_temp: 60 + |-----trip_point_3_type: active[1] + |-----cdev0: --->/sys/class/thermal/cooling_device0 + |-----cdev0_trip_point: 1 /* cdev0 can be used for passive */ + |-----cdev1: --->/sys/class/thermal/cooling_device3 + |-----cdev1_trip_point: 2 /* cdev1 can be used for active[0]*/ + +|cooling_device0: + |-----type: Processor + |-----max_state: 8 + |-----cur_state: 0 + +|cooling_device3: + |-----type: Fan + |-----max_state: 2 + |-----cur_state: 0 diff --git a/Documentation/thinkpad-acpi.txt b/Documentation/thinkpad-acpi.txt index 10c041ca13c..6c2477754a2 100644 --- a/Documentation/thinkpad-acpi.txt +++ b/Documentation/thinkpad-acpi.txt @@ -1,7 +1,7 @@ ThinkPad ACPI Extras Driver - Version 0.17 - October 04th, 2007 + Version 0.19 + January 06th, 2008 Borislav Deianov <borislav@users.sf.net> Henrique de Moraes Holschuh <hmh@hmh.eng.br> @@ -215,6 +215,11 @@ The following commands can be written to the /proc/acpi/ibm/hotkey file: ... any other 8-hex-digit mask ... echo reset > /proc/acpi/ibm/hotkey -- restore the original mask +The procfs interface does not support NVRAM polling control. So as to +maintain maximum bug-to-bug compatibility, it does not report any masks, +nor does it allow one to manipulate the hot key mask when the firmware +does not support masks at all, even if NVRAM polling is in use. + sysfs notes: hotkey_bios_enabled: @@ -231,17 +236,26 @@ sysfs notes: to this value. hotkey_enable: - Enables/disables the hot keys feature, and reports - current status of the hot keys feature. + Enables/disables the hot keys feature in the ACPI + firmware, and reports current status of the hot keys + feature. Has no effect on the NVRAM hot key polling + functionality. 0: disables the hot keys feature / feature disabled 1: enables the hot keys feature / feature enabled hotkey_mask: - bit mask to enable driver-handling and ACPI event - generation for each hot key (see above). Returns the - current status of the hot keys mask, and allows one to - modify it. + bit mask to enable driver-handling (and depending on + the firmware, ACPI event generation) for each hot key + (see above). Returns the current status of the hot keys + mask, and allows one to modify it. + + Note: when NVRAM polling is active, the firmware mask + will be different from the value returned by + hotkey_mask. The driver will retain enabled bits for + hotkeys that are under NVRAM polling even if the + firmware refuses them, and will not set these bits on + the firmware hot key mask. hotkey_all_mask: bit mask that should enable event reporting for all @@ -257,12 +271,48 @@ sysfs notes: handled by the firmware anyway. Echo it to hotkey_mask above, to use. + hotkey_source_mask: + bit mask that selects which hot keys will the driver + poll the NVRAM for. This is auto-detected by the driver + based on the capabilities reported by the ACPI firmware, + but it can be overridden at runtime. + + Hot keys whose bits are set in both hotkey_source_mask + and also on hotkey_mask are polled for in NVRAM. Only a + few hot keys are available through CMOS NVRAM polling. + + Warning: when in NVRAM mode, the volume up/down/mute + keys are synthesized according to changes in the mixer, + so you have to use volume up or volume down to unmute, + as per the ThinkPad volume mixer user interface. When + in ACPI event mode, volume up/down/mute are reported as + separate events, but this behaviour may be corrected in + future releases of this driver, in which case the + ThinkPad volume mixer user interface semanthics will be + enforced. + + hotkey_poll_freq: + frequency in Hz for hot key polling. It must be between + 0 and 25 Hz. Polling is only carried out when strictly + needed. + + Setting hotkey_poll_freq to zero disables polling, and + will cause hot key presses that require NVRAM polling + to never be reported. + + Setting hotkey_poll_freq too low will cause repeated + pressings of the same hot key to be misreported as a + single key press, or to not even be detected at all. + The recommended polling frequency is 10Hz. + hotkey_radio_sw: if the ThinkPad has a hardware radio switch, this attribute will read 0 if the switch is in the "radios disabled" postition, and 1 if the switch is in the "radios enabled" position. + This attribute has poll()/select() support. + hotkey_report_mode: Returns the state of the procfs ACPI event report mode filter for hot keys. If it is set to 1 (the default), @@ -277,6 +327,25 @@ sysfs notes: May return -EPERM (write access locked out by module parameter) or -EACCES (read-only). + wakeup_reason: + Set to 1 if the system is waking up because the user + requested a bay ejection. Set to 2 if the system is + waking up because the user requested the system to + undock. Set to zero for normal wake-ups or wake-ups + due to unknown reasons. + + This attribute has poll()/select() support. + + wakeup_hotunplug_complete: + Set to 1 if the system was waken up because of an + undock or bay ejection request, and that request + was sucessfully completed. At this point, it might + be useful to send the system back to sleep, at the + user's choice. Refer to HKEY events 0x4003 and + 0x3003, below. + + This attribute has poll()/select() support. + input layer notes: A Hot key is mapped to a single input layer EV_KEY event, possibly @@ -427,6 +496,23 @@ Non hot-key ACPI HKEY event map: The above events are not propagated by the driver, except for legacy compatibility purposes when hotkey_report_mode is set to 1. +0x2304 System is waking up from suspend to undock +0x2305 System is waking up from suspend to eject bay +0x2404 System is waking up from hibernation to undock +0x2405 System is waking up from hibernation to eject bay + +The above events are never propagated by the driver. + +0x3003 Bay ejection (see 0x2x05) complete, can sleep again +0x4003 Undocked (see 0x2x04), can sleep again +0x5009 Tablet swivel: switched to tablet mode +0x500A Tablet swivel: switched to normal mode +0x500B Tablet pen insterted into its storage bay +0x500C Tablet pen removed from its storage bay +0x5010 Brightness level changed (newer Lenovo BIOSes) + +The above events are propagated by the driver. + Compatibility notes: ibm-acpi and thinkpad-acpi 0.15 (mainline kernels before 2.6.23) never @@ -1263,3 +1349,17 @@ Sysfs interface changelog: and the hwmon class for libsensors4 (lm-sensors 3) compatibility. Moved all hwmon attributes to this new platform device. + +0x020100: Marker for thinkpad-acpi with hot key NVRAM polling + support. If you must, use it to know you should not + start an userspace NVRAM poller (allows to detect when + NVRAM is compiled out by the user because it is + unneeded/undesired in the first place). +0x020101: Marker for thinkpad-acpi with hot key NVRAM polling + and proper hotkey_mask semanthics (version 8 of the + NVRAM polling patch). Some development snapshots of + 0.18 had an earlier version that did strange things + to hotkey_mask. + +0x020200: Add poll()/select() support to the following attributes: + hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index 488c1f31b99..7123fee708c 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c @@ -32,6 +32,13 @@ struct slabinfo { int sanity_checks, slab_size, store_user, trace; int order, poison, reclaim_account, red_zone; unsigned long partial, objects, slabs; + unsigned long alloc_fastpath, alloc_slowpath; + unsigned long free_fastpath, free_slowpath; + unsigned long free_frozen, free_add_partial, free_remove_partial; + unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill; + unsigned long cpuslab_flush, deactivate_full, deactivate_empty; + unsigned long deactivate_to_head, deactivate_to_tail; + unsigned long deactivate_remote_frees; int numa[MAX_NODES]; int numa_partial[MAX_NODES]; } slabinfo[MAX_SLABS]; @@ -64,8 +71,10 @@ int show_inverted = 0; int show_single_ref = 0; int show_totals = 0; int sort_size = 0; +int sort_active = 0; int set_debug = 0; int show_ops = 0; +int show_activity = 0; /* Debug options */ int sanity = 0; @@ -93,8 +102,10 @@ void usage(void) printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" "-a|--aliases Show aliases\n" + "-A|--activity Most active slabs first\n" "-d<options>|--debug=<options> Set/Clear Debug options\n" - "-e|--empty Show empty slabs\n" + "-D|--display-active Switch line format to activity\n" + "-e|--empty Show empty slabs\n" "-f|--first-alias Show first alias\n" "-h|--help Show usage information\n" "-i|--inverted Inverted list\n" @@ -281,8 +292,11 @@ int line = 0; void first_line(void) { - printf("Name Objects Objsize Space " - "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); + if (show_activity) + printf("Name Objects Alloc Free %%Fast\n"); + else + printf("Name Objects Objsize Space " + "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); } /* @@ -309,6 +323,12 @@ unsigned long slab_size(struct slabinfo *s) return s->slabs * (page_size << s->order); } +unsigned long slab_activity(struct slabinfo *s) +{ + return s->alloc_fastpath + s->free_fastpath + + s->alloc_slowpath + s->free_slowpath; +} + void slab_numa(struct slabinfo *s, int mode) { int node; @@ -392,6 +412,71 @@ const char *onoff(int x) return "Off"; } +void slab_stats(struct slabinfo *s) +{ + unsigned long total_alloc; + unsigned long total_free; + unsigned long total; + + if (!s->alloc_slab) + return; + + total_alloc = s->alloc_fastpath + s->alloc_slowpath; + total_free = s->free_fastpath + s->free_slowpath; + + if (!total_alloc) + return; + + printf("\n"); + printf("Slab Perf Counter Alloc Free %%Al %%Fr\n"); + printf("--------------------------------------------------\n"); + printf("Fastpath %8lu %8lu %3lu %3lu\n", + s->alloc_fastpath, s->free_fastpath, + s->alloc_fastpath * 100 / total_alloc, + s->free_fastpath * 100 / total_free); + printf("Slowpath %8lu %8lu %3lu %3lu\n", + total_alloc - s->alloc_fastpath, s->free_slowpath, + (total_alloc - s->alloc_fastpath) * 100 / total_alloc, + s->free_slowpath * 100 / total_free); + printf("Page Alloc %8lu %8lu %3lu %3lu\n", + s->alloc_slab, s->free_slab, + s->alloc_slab * 100 / total_alloc, + s->free_slab * 100 / total_free); + printf("Add partial %8lu %8lu %3lu %3lu\n", + s->deactivate_to_head + s->deactivate_to_tail, + s->free_add_partial, + (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, + s->free_add_partial * 100 / total_free); + printf("Remove partial %8lu %8lu %3lu %3lu\n", + s->alloc_from_partial, s->free_remove_partial, + s->alloc_from_partial * 100 / total_alloc, + s->free_remove_partial * 100 / total_free); + + printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", + s->deactivate_remote_frees, s->free_frozen, + s->deactivate_remote_frees * 100 / total_alloc, + s->free_frozen * 100 / total_free); + + printf("Total %8lu %8lu\n\n", total_alloc, total_free); + + if (s->cpuslab_flush) + printf("Flushes %8lu\n", s->cpuslab_flush); + + if (s->alloc_refill) + printf("Refill %8lu\n", s->alloc_refill); + + total = s->deactivate_full + s->deactivate_empty + + s->deactivate_to_head + s->deactivate_to_tail; + + if (total) + printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) " + "ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n", + s->deactivate_full, (s->deactivate_full * 100) / total, + s->deactivate_empty, (s->deactivate_empty * 100) / total, + s->deactivate_to_head, (s->deactivate_to_head * 100) / total, + s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); +} + void report(struct slabinfo *s) { if (strcmp(s->name, "*") == 0) @@ -430,6 +515,7 @@ void report(struct slabinfo *s) ops(s); show_tracking(s); slab_numa(s, 1); + slab_stats(s); } void slabcache(struct slabinfo *s) @@ -479,13 +565,27 @@ void slabcache(struct slabinfo *s) *p++ = 'T'; *p = 0; - printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", - s->name, s->objects, s->object_size, size_str, dist_str, - s->objs_per_slab, s->order, - s->slabs ? (s->partial * 100) / s->slabs : 100, - s->slabs ? (s->objects * s->object_size * 100) / - (s->slabs * (page_size << s->order)) : 100, - flags); + if (show_activity) { + unsigned long total_alloc; + unsigned long total_free; + + total_alloc = s->alloc_fastpath + s->alloc_slowpath; + total_free = s->free_fastpath + s->free_slowpath; + + printf("%-21s %8ld %8ld %8ld %3ld %3ld \n", + s->name, s->objects, + total_alloc, total_free, + total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, + total_free ? (s->free_fastpath * 100 / total_free) : 0); + } + else + printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", + s->name, s->objects, s->object_size, size_str, dist_str, + s->objs_per_slab, s->order, + s->slabs ? (s->partial * 100) / s->slabs : 100, + s->slabs ? (s->objects * s->object_size * 100) / + (s->slabs * (page_size << s->order)) : 100, + flags); } /* @@ -892,6 +992,8 @@ void sort_slabs(void) if (sort_size) result = slab_size(s1) < slab_size(s2); + else if (sort_active) + result = slab_activity(s1) < slab_activity(s2); else result = strcasecmp(s1->name, s2->name); @@ -1074,6 +1176,23 @@ void read_slab_dir(void) free(t); slab->store_user = get_obj("store_user"); slab->trace = get_obj("trace"); + slab->alloc_fastpath = get_obj("alloc_fastpath"); + slab->alloc_slowpath = get_obj("alloc_slowpath"); + slab->free_fastpath = get_obj("free_fastpath"); + slab->free_slowpath = get_obj("free_slowpath"); + slab->free_frozen= get_obj("free_frozen"); + slab->free_add_partial = get_obj("free_add_partial"); + slab->free_remove_partial = get_obj("free_remove_partial"); + slab->alloc_from_partial = get_obj("alloc_from_partial"); + slab->alloc_slab = get_obj("alloc_slab"); + slab->alloc_refill = get_obj("alloc_refill"); + slab->free_slab = get_obj("free_slab"); + slab->cpuslab_flush = get_obj("cpuslab_flush"); + slab->deactivate_full = get_obj("deactivate_full"); + slab->deactivate_empty = get_obj("deactivate_empty"); + slab->deactivate_to_head = get_obj("deactivate_to_head"); + slab->deactivate_to_tail = get_obj("deactivate_to_tail"); + slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); chdir(".."); if (slab->name[0] == ':') alias_targets++; @@ -1124,7 +1243,9 @@ void output_slabs(void) struct option opts[] = { { "aliases", 0, NULL, 'a' }, + { "activity", 0, NULL, 'A' }, { "debug", 2, NULL, 'd' }, + { "display-activity", 0, NULL, 'D' }, { "empty", 0, NULL, 'e' }, { "first-alias", 0, NULL, 'f' }, { "help", 0, NULL, 'h' }, @@ -1149,7 +1270,7 @@ int main(int argc, char *argv[]) page_size = getpagesize(); - while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS", + while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS", opts, NULL)) != -1) switch (c) { case '1': @@ -1158,11 +1279,17 @@ int main(int argc, char *argv[]) case 'a': show_alias = 1; break; + case 'A': + sort_active = 1; + break; case 'd': set_debug = 1; if (!debug_opt_scan(optarg)) fatal("Invalid debug option '%s'\n", optarg); break; + case 'D': + show_activity = 1; + break; case 'e': show_empty = 1; break; |