diff options
733 files changed, 34006 insertions, 18105 deletions
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl index b0756d0fd57..8bca1d5cec0 100644 --- a/Documentation/DocBook/tracepoint.tmpl +++ b/Documentation/DocBook/tracepoint.tmpl @@ -86,4 +86,9 @@ !Iinclude/trace/events/irq.h </chapter> + <chapter id="signal"> + <title>SIGNAL</title> +!Iinclude/trace/events/signal.h + </chapter> + </book> diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 187bbf10c92..8608fd85e92 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -1,185 +1,10 @@ CONFIG_RCU_TRACE debugfs Files and Formats -The rcupreempt and rcutree implementations of RCU provide debugfs trace -output that summarizes counters and state. This information is useful for -debugging RCU itself, and can sometimes also help to debug abuses of RCU. -Note that the rcuclassic implementation of RCU does not provide debugfs -trace output. - -The following sections describe the debugfs files and formats for -preemptable RCU (rcupreempt) and hierarchical RCU (rcutree). - - -Preemptable RCU debugfs Files and Formats - -This implementation of RCU provides three debugfs files under the -top-level directory RCU: rcu/rcuctrs (which displays the per-CPU -counters used by preemptable RCU) rcu/rcugp (which displays grace-period -counters), and rcu/rcustats (which internal counters for debugging RCU). - -The output of "cat rcu/rcuctrs" looks as follows: - -CPU last cur F M - 0 5 -5 0 0 - 1 -1 0 0 0 - 2 0 1 0 0 - 3 0 1 0 0 - 4 0 1 0 0 - 5 0 1 0 0 - 6 0 2 0 0 - 7 0 -1 0 0 - 8 0 1 0 0 -ggp = 26226, state = waitzero - -The per-CPU fields are as follows: - -o "CPU" gives the CPU number. Offline CPUs are not displayed. - -o "last" gives the value of the counter that is being decremented - for the current grace period phase. In the example above, - the counters sum to 4, indicating that there are still four - RCU read-side critical sections still running that started - before the last counter flip. - -o "cur" gives the value of the counter that is currently being - both incremented (by rcu_read_lock()) and decremented (by - rcu_read_unlock()). In the example above, the counters sum to - 1, indicating that there is only one RCU read-side critical section - still running that started after the last counter flip. - -o "F" indicates whether RCU is waiting for this CPU to acknowledge - a counter flip. In the above example, RCU is not waiting on any, - which is consistent with the state being "waitzero" rather than - "waitack". - -o "M" indicates whether RCU is waiting for this CPU to execute a - memory barrier. In the above example, RCU is not waiting on any, - which is consistent with the state being "waitzero" rather than - "waitmb". - -o "ggp" is the global grace-period counter. - -o "state" is the RCU state, which can be one of the following: - - o "idle": there is no grace period in progress. - - o "waitack": RCU just incremented the global grace-period - counter, which has the effect of reversing the roles of - the "last" and "cur" counters above, and is waiting for - all the CPUs to acknowledge the flip. Once the flip has - been acknowledged, CPUs will no longer be incrementing - what are now the "last" counters, so that their sum will - decrease monotonically down to zero. - - o "waitzero": RCU is waiting for the sum of the "last" counters - to decrease to zero. - - o "waitmb": RCU is waiting for each CPU to execute a memory - barrier, which ensures that instructions from a given CPU's - last RCU read-side critical section cannot be reordered - with instructions following the memory-barrier instruction. - -The output of "cat rcu/rcugp" looks as follows: - -oldggp=48870 newggp=48873 - -Note that reading from this file provokes a synchronize_rcu(). The -"oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before -executing the synchronize_rcu(), and the "newggp" value is also the -"ggp" value, but taken after the synchronize_rcu() command returns. - - -The output of "cat rcu/rcugp" looks as follows: - -na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871 -1=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640 -z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639 - -These are counters tracking internal preemptable-RCU events, however, -some of them may be useful for debugging algorithms using RCU. In -particular, the "nl", "wl", and "dl" values track the number of RCU -callbacks in various states. The fields are as follows: - -o "na" is the total number of RCU callbacks that have been enqueued - since boot. - -o "nl" is the number of RCU callbacks waiting for the previous - grace period to end so that they can start waiting on the next - grace period. - -o "wa" is the total number of RCU callbacks that have started waiting - for a grace period since boot. "na" should be roughly equal to - "nl" plus "wa". - -o "wl" is the number of RCU callbacks currently waiting for their - grace period to end. - -o "da" is the total number of RCU callbacks whose grace periods - have completed since boot. "wa" should be roughly equal to - "wl" plus "da". - -o "dr" is the total number of RCU callbacks that have been removed - from the list of callbacks ready to invoke. "dr" should be roughly - equal to "da". - -o "di" is the total number of RCU callbacks that have been invoked - since boot. "di" should be roughly equal to "da", though some - early versions of preemptable RCU had a bug so that only the - last CPU's count of invocations was displayed, rather than the - sum of all CPU's counts. - -o "1" is the number of calls to rcu_try_flip(). This should be - roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1" - described below. In other words, the number of times that - the state machine is visited should be equal to the sum of the - number of times that each state is visited plus the number of - times that the state-machine lock acquisition failed. - -o "e1" is the number of times that rcu_try_flip() was unable to - acquire the fliplock. - -o "i1" is the number of calls to rcu_try_flip_idle(). - -o "ie1" is the number of times rcu_try_flip_idle() exited early - due to the calling CPU having no work for RCU. - -o "g1" is the number of times that rcu_try_flip_idle() decided - to start a new grace period. "i1" should be roughly equal to - "ie1" plus "g1". - -o "a1" is the number of calls to rcu_try_flip_waitack(). - -o "ae1" is the number of times that rcu_try_flip_waitack() found - that at least one CPU had not yet acknowledge the new grace period - (AKA "counter flip"). - -o "a2" is the number of time rcu_try_flip_waitack() found that - all CPUs had acknowledged. "a1" should be roughly equal to - "ae1" plus "a2". (This particular output was collected on - a 128-CPU machine, hence the smaller-than-usual fraction of - calls to rcu_try_flip_waitack() finding all CPUs having already - acknowledged.) - -o "z1" is the number of calls to rcu_try_flip_waitzero(). - -o "ze1" is the number of times that rcu_try_flip_waitzero() found - that not all of the old RCU read-side critical sections had - completed. - -o "z2" is the number of times that rcu_try_flip_waitzero() finds - the sum of the counters equal to zero, in other words, that - all of the old RCU read-side critical sections had completed. - The value of "z1" should be roughly equal to "ze1" plus - "z2". - -o "m1" is the number of calls to rcu_try_flip_waitmb(). - -o "me1" is the number of times that rcu_try_flip_waitmb() finds - that at least one CPU has not yet executed a memory barrier. - -o "m2" is the number of times that rcu_try_flip_waitmb() finds that - all CPUs have executed a memory barrier. +The rcutree implementation of RCU provides debugfs trace output that +summarizes counters and state. This information is useful for debugging +RCU itself, and can sometimes also help to debug abuses of RCU. +The following sections describe the debugfs files and formats. Hierarchical RCU debugfs Files and Formats @@ -210,9 +35,10 @@ rcu_bh: 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 -The first section lists the rcu_data structures for rcu, the second for -rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. -The fields are as follows: +The first section lists the rcu_data structures for rcu_sched, the second +for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an +additional section for rcu_preempt. Each section has one line per CPU, +or eight for this 8-CPU system. The fields are as follows: o The number at the beginning of each line is the CPU number. CPUs numbers followed by an exclamation mark are offline, @@ -223,9 +49,9 @@ o The number at the beginning of each line is the CPU number. o "c" is the count of grace periods that this CPU believes have completed. CPUs in dynticks idle mode may lag quite a ways - behind, for example, CPU 4 under "rcu" above, which has slept - through the past 25 RCU grace periods. It is not unusual to - see CPUs lagging by thousands of grace periods. + behind, for example, CPU 4 under "rcu_sched" above, which has + slept through the past 25 RCU grace periods. It is not unusual + to see CPUs lagging by thousands of grace periods. o "g" is the count of grace periods that this CPU believes have started. Again, CPUs in dynticks idle mode may lag behind. @@ -308,8 +134,10 @@ The output of "cat rcu/rcugp" looks as follows: rcu_sched: completed=33062 gpnum=33063 rcu_bh: completed=464 gpnum=464 -Again, this output is for both "rcu" and "rcu_bh". The fields are -taken from the rcu_state structure, and are as follows: +Again, this output is for both "rcu_sched" and "rcu_bh". Note that +kernels built with CONFIG_TREE_PREEMPT_RCU will have an additional +"rcu_preempt" line. The fields are taken from the rcu_state structure, +and are as follows: o "completed" is the number of grace periods that have completed. It is comparable to the "c" field from rcu/rcudata in that a @@ -324,23 +152,24 @@ o "gpnum" is the number of grace periods that have started. It is If these two fields are equal (as they are for "rcu_bh" above), then there is no grace period in progress, in other words, RCU is idle. On the other hand, if the two fields differ (as they - do for "rcu" above), then an RCU grace period is in progress. + do for "rcu_sched" above), then an RCU grace period is in progress. The output of "cat rcu/rcuhier" looks as follows, with very long lines: -c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 -1/1 0:127 ^0 -3/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 -3/3f 0:5 ^0 2/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 +c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0 +1/1 .>. 0:127 ^0 +3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 +3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 rcu_bh: -c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 -0/1 0:127 ^0 -0/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 -0/3f 0:5 ^0 0/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 +c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0 +0/1 .>. 0:127 ^0 +0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 +0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 -This is once again split into "rcu" and "rcu_bh" portions. The fields are -as follows: +This is once again split into "rcu_sched" and "rcu_bh" portions, +and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional +"rcu_preempt" section. The fields are as follows: o "c" is exactly the same as "completed" under rcu/rcugp. @@ -372,6 +201,11 @@ o "fqlh" is the number of calls to force_quiescent_state() that exited immediately (without even being counted in nfqs above) due to contention on ->fqslock. +o "oqlen" is the number of callbacks on the "orphan" callback + list. RCU callbacks are placed on this list by CPUs going + offline, and are "adopted" either by the CPU helping the outgoing + CPU or by the next rcu_barrier*() call, whichever comes first. + o Each element of the form "1/1 0:127 ^0" represents one struct rcu_node. Each line represents one level of the hierarchy, from root to leaves. It is best to think of the rcu_data structures @@ -379,7 +213,7 @@ o Each element of the form "1/1 0:127 ^0" represents one struct might be either one, two, or three levels of rcu_node structures, depending on the relationship between CONFIG_RCU_FANOUT and CONFIG_NR_CPUS. - + o The numbers separated by the "/" are the qsmask followed by the qsmaskinit. The qsmask will have one bit set for each entity in the next lower level that @@ -389,10 +223,19 @@ o Each element of the form "1/1 0:127 ^0" represents one struct The value of qsmaskinit is assigned to that of qsmask at the beginning of each grace period. - For example, for "rcu", the qsmask of the first entry - of the lowest level is 0x14, meaning that we are still - waiting for CPUs 2 and 4 to check in for the current - grace period. + For example, for "rcu_sched", the qsmask of the first + entry of the lowest level is 0x14, meaning that we + are still waiting for CPUs 2 and 4 to check in for the + current grace period. + + o The characters separated by the ">" indicate the state + of the blocked-tasks lists. A "T" preceding the ">" + indicates that at least one task blocked in an RCU + read-side critical section blocks the current grace + period, while a "." preceding the ">" indicates otherwise. + The character following the ">" indicates similarly for + the next grace period. A "T" should appear in this + field only for rcu-preempt. o The numbers separated by the ":" are the range of CPUs served by this struct rcu_node. This can be helpful @@ -431,8 +274,9 @@ rcu_bh: 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 -As always, this is once again split into "rcu" and "rcu_bh" portions. -The fields are as follows: +As always, this is once again split into "rcu_sched" and "rcu_bh" +portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional +"rcu_preempt" section. The fields are as follows: o "np" is the number of times that __rcu_pending() has been invoked for the corresponding flavor of RCU. diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index e41a7fecf0d..d542ca243b8 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -830,7 +830,7 @@ sched: Critical sections Grace period Barrier SRCU: Critical sections Grace period Barrier srcu_read_lock synchronize_srcu N/A - srcu_read_unlock + srcu_read_unlock synchronize_srcu_expedited SRCU: Initialization/cleanup init_srcu_struct diff --git a/Documentation/dontdiff b/Documentation/dontdiff index e1efc400bed..e151b2a3626 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -65,6 +65,7 @@ aicdb.h* asm-offsets.h asm_offsets.h autoconf.h* +av_permissions.h bbootsect bin2c binkernel.spec @@ -95,12 +96,14 @@ docproc elf2ecoff elfconfig.h* fixdep +flask.h fore200e_mkfirm fore200e_pca_fw.c* gconf gen-devlist gen_crc32table gen_init_cpio +genheaders genksyms *_gray256.c ihex2fw diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index bc693fffabe..f613df8ec7b 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,6 +6,21 @@ be removed from this file. --------------------------- +What: USER_SCHED +When: 2.6.34 + +Why: USER_SCHED was implemented as a proof of concept for group scheduling. + The effect of USER_SCHED can already be achieved from userspace with + the help of libcgroup. The removal of USER_SCHED will also simplify + the scheduler code with the removal of one major ifdef. There are also + issues USER_SCHED has with USER_NS. A decision was taken not to fix + those and instead remove USER_SCHED. Also new group scheduling + features will not be implemented for USER_SCHED. + +Who: Dhaval Giani <dhaval@linux.vnet.ibm.com> + +--------------------------- + What: PRISM54 When: 2.6.34 diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 2c48f945546..4af0018533f 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1072,7 +1072,8 @@ second). The meanings of the columns are as follows, from left to right: - irq: servicing interrupts - softirq: servicing softirqs - steal: involuntary wait -- guest: running a guest +- guest: running a normal guest +- guest_nice: running a niced guest The "intr" line gives counts of interrupts serviced since boot time, for each of the possible system interrupts. The first column is the total of all diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9107b387e91..fce5b5e516c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -85,7 +85,6 @@ parameter is applicable: PPT Parallel port support is enabled. PS2 Appropriate PS/2 support is enabled. RAM RAM disk support is enabled. - ROOTPLUG The example Root Plug LSM is enabled. S390 S390 architecture is enabled. SCSI Appropriate SCSI support is enabled. A lot of drivers has their options described inside of @@ -345,6 +344,15 @@ and is between 256 and 4096 characters. It is defined in the file Change the amount of debugging information output when initialising the APIC and IO-APIC components. + show_lapic= [APIC,X86] Advanced Programmable Interrupt Controller + Limit apic dumping. The parameter defines the maximal + number of local apics being dumped. Also it is possible + to set it to "all" by meaning -- no limit here. + Format: { 1 (default) | 2 | ... | all }. + The parameter valid if only apic=debug or + apic=verbose is specified. + Example: apic=debug show_lapic=all + apm= [APM] Advanced Power Management See header of arch/x86/kernel/apm_32.c. @@ -779,6 +787,13 @@ and is between 256 and 4096 characters. It is defined in the file by the set_ftrace_notrace file in the debugfs tracing directory. + ftrace_graph_filter=[function-list] + [FTRACE] Limit the top level callers functions traced + by the function graph tracer at boot up. + function-list is a comma separated list of functions + that can be changed at run time by the + set_graph_function file in the debugfs tracing directory. + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) @@ -2032,8 +2047,15 @@ and is between 256 and 4096 characters. It is defined in the file print-fatal-signals= [KNL] debug: print fatal signals - print-fatal-signals=1: print segfault info to - the kernel console. + + If enabled, warn about various signal handling + related application anomalies: too many signals, + too many POSIX.1 timers, fatal signals causing a + coredump - etc. + + If you hit the warning due to signal overflow, + you might want to try "ulimit -i unlimited". + default: off. printk.time= Show timing data prefixed to each printk message line @@ -2164,15 +2186,6 @@ and is between 256 and 4096 characters. It is defined in the file Useful for devices that are detected asynchronously (e.g. USB and MMC devices). - root_plug.vendor_id= - [ROOTPLUG] Override the default vendor ID - - root_plug.product_id= - [ROOTPLUG] Override the default product ID - - root_plug.debug= - [ROOTPLUG] Enable debugging output - rw [KNL] Mount root device read-write on boot S [KNL] Run init in single mode @@ -2182,6 +2195,8 @@ and is between 256 and 4096 characters. It is defined in the file sbni= [NET] Granch SBNI12 leased line adapter + sched_debug [KNL] Enables verbose scheduler debug messages. + sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver Format: <io>[,<timeout>[,<isapnp>]] diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt index 059934363ca..446f43b309d 100644 --- a/Documentation/pcmcia/driver-changes.txt +++ b/Documentation/pcmcia/driver-changes.txt @@ -1,5 +1,17 @@ This file details changes in 2.6 which affect PCMCIA card driver authors: +* no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33) + Instead of the cs_error() callback or the CS_CHECK() macro, please use + Linux-style checking of return values, and -- if necessary -- debug + messages using "dev_dbg()" or "pr_debug()". + +* New CIS tuple access (as of 2.6.33) + Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and + pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is + only interested in one (raw) tuple, or "pcmcia_loop_tuple()" if it is + interested in all tuples of one type. To decode the MAC from CISTPL_FUNCE, + a new helper "pcmcia_get_mac_from_cis()" was added. + * New configuration loop helper (as of 2.6.28) By calling pcmcia_loop_config(), a driver can iterate over all available configuration options. During a driver's probe() phase, one doesn't need diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index 52bc3143372..9dbf4470c7e 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -279,9 +279,9 @@ The slow-work thread pool has a number of configurables: VIEWING EXECUTING AND QUEUED ITEMS ================================== -If CONFIG_SLOW_WORK_PROC is enabled, a proc file is made available: +If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available: - /proc/slow_work_rq + /sys/kernel/debug/slow_work/runqueue through which the list of work items being executed and the queues of items to be executed may be viewed. The owner of a work item is given the chance to diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 7003e10f10f..641a1ef2a7f 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -213,10 +213,19 @@ If you can't trace NMI functions, then skip this option. <details to be filled> -HAVE_FTRACE_SYSCALLS +HAVE_SYSCALL_TRACEPOINTS --------------------- -<details to be filled> +You need very few things to get the syscalls tracing in an arch. + +- Have a NR_syscalls variable in <asm/unistd.h> that provides the number + of syscalls supported by the arch. +- Implement arch_syscall_addr() that resolves a syscall address from a + syscall number. +- Support the TIF_SYSCALL_TRACEPOINT thread flags +- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace + in the ptrace syscalls tracing path. +- Tag this arch as HAVE_SYSCALL_TRACEPOINTS. HAVE_FTRACE_MCOUNT_RECORD diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt new file mode 100644 index 00000000000..47aabeebbdf --- /dev/null +++ b/Documentation/trace/kprobetrace.txt @@ -0,0 +1,149 @@ + Kprobe-based Event Tracing + ========================== + + Documentation is written by Masami Hiramatsu + + +Overview +-------- +These events are similar to tracepoint based events. Instead of Tracepoint, +this is based on kprobes (kprobe and kretprobe). So it can probe wherever +kprobes can probe (this means, all functions body except for __kprobes +functions). Unlike the Tracepoint based event, this can be added and removed +dynamically, on the fly. + +To enable this feature, build your kernel with CONFIG_KPROBE_TRACING=y. + +Similar to the events tracer, this doesn't need to be activated via +current_tracer. Instead of that, add probe points via +/sys/kernel/debug/tracing/kprobe_events, and enable it via +/sys/kernel/debug/tracing/events/kprobes/<EVENT>/enabled. + + +Synopsis of kprobe_events +------------------------- + p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe + r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe + + GRP : Group name. If omitted, use "kprobes" for it. + EVENT : Event name. If omitted, the event name is generated + based on SYMBOL+offs or MEMADDR. + SYMBOL[+offs] : Symbol+offset where the probe is inserted. + MEMADDR : Address where the probe is inserted. + + FETCHARGS : Arguments. Each probe can have up to 128 args. + %REG : Fetch register REG + @ADDR : Fetch memory at ADDR (ADDR should be in kernel) + @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) + $stackN : Fetch Nth entry of stack (N >= 0) + $stack : Fetch stack address. + $argN : Fetch function argument. (N >= 0)(*) + $retval : Fetch return value.(**) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) + NAME=FETCHARG: Set NAME as the argument name of FETCHARG. + + (*) aN may not correct on asmlinkaged functions and at the middle of + function body. + (**) only for return probe. + (***) this is useful for fetching a field of data structures. + + +Per-Probe Event Filtering +------------------------- + Per-probe event filtering feature allows you to set different filter on each +probe and gives you what arguments will be shown in trace buffer. If an event +name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event +under tracing/events/kprobes/<EVENT>, at the directory you can see 'id', +'enabled', 'format' and 'filter'. + +enabled: + You can enable/disable the probe by writing 1 or 0 on it. + +format: + This shows the format of this probe event. + +filter: + You can write filtering rules of this event. + +id: + This shows the id of this probe event. + + +Event Profiling +--------------- + You can check the total number of probe hits and probe miss-hits via +/sys/kernel/debug/tracing/kprobe_profile. + The first column is event name, the second is the number of probe hits, +the third is the number of probe miss-hits. + + +Usage examples +-------------- +To add a probe as a new event, write a new definition to kprobe_events +as below. + + echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events + + This sets a kprobe on the top of do_sys_open() function with recording +1st to 4th arguments as "myprobe" event. As this example shows, users can +choose more familiar names for each arguments. + + echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events + + This sets a kretprobe on the return point of do_sys_open() function with +recording return value as "myretprobe" event. + You can see the format of these events via +/sys/kernel/debug/tracing/events/kprobes/<EVENT>/format. + + cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format +name: myprobe +ID: 75 +format: + field:unsigned short common_type; offset:0; size:2; + field:unsigned char common_flags; offset:2; size:1; + field:unsigned char common_preempt_count; offset:3; size:1; + field:int common_pid; offset:4; size:4; + field:int common_tgid; offset:8; size:4; + + field: unsigned long ip; offset:16;tsize:8; + field: int nargs; offset:24;tsize:4; + field: unsigned long dfd; offset:32;tsize:8; + field: unsigned long filename; offset:40;tsize:8; + field: unsigned long flags; offset:48;tsize:8; + field: unsigned long mode; offset:56;tsize:8; + +print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode + + + You can see that the event has 4 arguments as in the expressions you specified. + + echo > /sys/kernel/debug/tracing/kprobe_events + + This clears all probe points. + + Right after definition, each event is disabled by default. For tracing these +events, you need to enable it. + + echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable + echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable + + And you can see the traced information via /sys/kernel/debug/tracing/trace. + + cat /sys/kernel/debug/tracing/trace +# tracer: nop +# +# TASK-PID CPU# TIMESTAMP FUNCTION +# | | | | | + <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 + <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe + <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6 + <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 + <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10 + <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 + + + Each line shows when the kernel hits an event, and <- SYMBOL means kernel +returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel +returns from do_sys_open to sys_open+0x1b). + + diff --git a/MAINTAINERS b/MAINTAINERS index ad59f178586..75f771c8579 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3045,11 +3045,8 @@ S: Maintained F: fs/autofs4/ KERNEL BUILD -M: Sam Ravnborg <sam@ravnborg.org> -T: git git://git.kernel.org/pub/scm/linux/kernel/git/sam/kbuild-next.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/sam/kbuild-fixes.git L: linux-kbuild@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/kbuild/ F: Makefile F: scripts/Makefile.* @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* @@ -379,6 +379,7 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn --exc PHONY += scripts_basic scripts_basic: $(Q)$(MAKE) $(build)=scripts/basic + $(Q)rm -f .tmp_quiet_recordmcount # To avoid any implicit rule to kick in, define an empty command. scripts/basic/%: scripts_basic ; diff --git a/arch/Kconfig b/arch/Kconfig index 7f418bbc261..eef3bbb9707 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG config HAVE_DEFAULT_NO_SPIN_MUTEXES bool +config HAVE_HW_BREAKPOINT + bool + depends on HAVE_PERF_EVENTS + select ANON_INODES + select PERF_EVENTS + + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 815680b585e..b3e888638bb 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -61,21 +61,24 @@ register struct thread_info *__current_thread_info __asm__("$8"); /* * Thread information flags: * - these are process state flags and used from assembly - * - pending work-to-be-done flags come first to fit in and immediate operand. + * - pending work-to-be-done flags come first and must be assigned to be + * within bits 0 to 7 to fit in and immediate operand. + * - ALPHA_UAC_SHIFT below must be kept consistent with the unaligned + * control flags. * * TIF_SYSCALL_TRACE is known to be 0 via blbs. */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 3 /* poll_idle is polling NEED_RESCHED */ -#define TIF_DIE_IF_KERNEL 4 /* dik recursion lock */ -#define TIF_UAC_NOPRINT 5 /* see sysinfo.h */ -#define TIF_UAC_NOFIX 6 -#define TIF_UAC_SIGBUS 7 -#define TIF_MEMDIE 8 -#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ -#define TIF_NOTIFY_RESUME 10 /* callback before returning to user */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_POLLING_NRFLAG 8 /* poll_idle is polling NEED_RESCHED */ +#define TIF_DIE_IF_KERNEL 9 /* dik recursion lock */ +#define TIF_UAC_NOPRINT 10 /* see sysinfo.h */ +#define TIF_UAC_NOFIX 11 +#define TIF_UAC_SIGBUS 12 +#define TIF_MEMDIE 13 +#define TIF_RESTORE_SIGMASK 14 /* restore signal mask in do_signal */ #define TIF_FREEZE 16 /* is freezing for suspend */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -94,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \ | _TIF_SYSCALL_TRACE) -#define ALPHA_UAC_SHIFT 6 +#define ALPHA_UAC_SHIFT 10 #define ALPHA_UAC_MASK (1 << TIF_UAC_NOPRINT | 1 << TIF_UAC_NOFIX | \ 1 << TIF_UAC_SIGBUS) diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 8e059e58b0a..53dd2f1a53a 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -1103,6 +1103,8 @@ marvel_agp_info(void) * Allocate the info structure. */ agp = kmalloc(sizeof(*agp), GFP_KERNEL); + if (!agp) + return NULL; /* * Fill it in. diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 76686497b1e..219bf271c0b 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -757,6 +757,8 @@ titan_agp_info(void) * Allocate the info structure. */ agp = kmalloc(sizeof(*agp), GFP_KERNEL); + if (!agp) + return NULL; /* * Fill it in. diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index cc783466142..c0de072b830 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -92,7 +92,7 @@ show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(irq, j)); #endif - seq_printf(p, " %14s", irq_desc[irq].chip->typename); + seq_printf(p, " %14s", irq_desc[irq].chip->name); seq_printf(p, " %c%s", (action->flags & IRQF_DISABLED)?'+':' ', action->name); diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 38c805dfc54..cfde865b78e 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -228,7 +228,7 @@ struct irqaction timer_irqaction = { }; static struct irq_chip rtc_irq_type = { - .typename = "RTC", + .name = "RTC", .startup = rtc_startup, .shutdown = rtc_enable_disable, .enable = rtc_enable_disable, diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c index 50bfec9b588..83a9ac28089 100644 --- a/arch/alpha/kernel/irq_i8259.c +++ b/arch/alpha/kernel/irq_i8259.c @@ -84,7 +84,7 @@ i8259a_end_irq(unsigned int irq) } struct irq_chip i8259a_irq_type = { - .typename = "XT-PIC", + .name = "XT-PIC", .startup = i8259a_startup_irq, .shutdown = i8259a_disable_irq, .enable = i8259a_enable_irq, diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c index 69199a76ec4..989ce46a0cf 100644 --- a/arch/alpha/kernel/irq_pyxis.c +++ b/arch/alpha/kernel/irq_pyxis.c @@ -71,7 +71,7 @@ pyxis_mask_and_ack_irq(unsigned int irq) } static struct irq_chip pyxis_irq_type = { - .typename = "PYXIS", + .name = "PYXIS", .startup = pyxis_startup_irq, .shutdown = pyxis_disable_irq, .enable = pyxis_enable_irq, diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c index 85229369a1f..d63e93e1e8b 100644 --- a/arch/alpha/kernel/irq_srm.c +++ b/arch/alpha/kernel/irq_srm.c @@ -49,7 +49,7 @@ srm_end_irq(unsigned int irq) /* Handle interrupts from the SRM, assuming no additional weirdness. */ static struct irq_chip srm_irq_type = { - .typename = "SRM", + .name = "SRM", .startup = srm_startup_irq, .shutdown = srm_disable_irq, .enable = srm_enable_irq, diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c index 382035ef739..20a30b8b965 100644 --- a/arch/alpha/kernel/sys_alcor.c +++ b/arch/alpha/kernel/sys_alcor.c @@ -90,7 +90,7 @@ alcor_end_irq(unsigned int irq) } static struct irq_chip alcor_irq_type = { - .typename = "ALCOR", + .name = "ALCOR", .startup = alcor_startup_irq, .shutdown = alcor_disable_irq, .enable = alcor_enable_irq, diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index ed349436732..affd0f3f25d 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -72,7 +72,7 @@ cabriolet_end_irq(unsigned int irq) } static struct irq_chip cabriolet_irq_type = { - .typename = "CABRIOLET", + .name = "CABRIOLET", .startup = cabriolet_startup_irq, .shutdown = cabriolet_disable_irq, .enable = cabriolet_enable_irq, diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 46e70ece517..d64e1e497e7 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -199,7 +199,7 @@ clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) } static struct irq_chip dp264_irq_type = { - .typename = "DP264", + .name = "DP264", .startup = dp264_startup_irq, .shutdown = dp264_disable_irq, .enable = dp264_enable_irq, @@ -210,7 +210,7 @@ static struct irq_chip dp264_irq_type = { }; static struct irq_chip clipper_irq_type = { - .typename = "CLIPPER", + .name = "CLIPPER", .startup = clipper_startup_irq, .shutdown = clipper_disable_irq, .enable = clipper_enable_irq, diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c index 660c23ef661..df2090ce5e7 100644 --- a/arch/alpha/kernel/sys_eb64p.c +++ b/arch/alpha/kernel/sys_eb64p.c @@ -70,7 +70,7 @@ eb64p_end_irq(unsigned int irq) } static struct irq_chip eb64p_irq_type = { - .typename = "EB64P", + .name = "EB64P", .startup = eb64p_startup_irq, .shutdown = eb64p_disable_irq, .enable = eb64p_enable_irq, diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index b99ea488d84..3ca1dbcf404 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -81,7 +81,7 @@ eiger_end_irq(unsigned int irq) } static struct irq_chip eiger_irq_type = { - .typename = "EIGER", + .name = "EIGER", .startup = eiger_startup_irq, .shutdown = eiger_disable_irq, .enable = eiger_enable_irq, diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index ef0b83a070a..7a7ae36fff9 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -119,7 +119,7 @@ jensen_local_end(unsigned int irq) } static struct irq_chip jensen_local_irq_type = { - .typename = "LOCAL", + .name = "LOCAL", .startup = jensen_local_startup, .shutdown = jensen_local_shutdown, .enable = jensen_local_enable, diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index bbfc4f20ca7..0bb3b5c4f69 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -170,7 +170,7 @@ marvel_irq_noop_return(unsigned int irq) } static struct irq_chip marvel_legacy_irq_type = { - .typename = "LEGACY", + .name = "LEGACY", .startup = marvel_irq_noop_return, .shutdown = marvel_irq_noop, .enable = marvel_irq_noop, @@ -180,7 +180,7 @@ static struct irq_chip marvel_legacy_irq_type = { }; static struct irq_chip io7_lsi_irq_type = { - .typename = "LSI", + .name = "LSI", .startup = io7_startup_irq, .shutdown = io7_disable_irq, .enable = io7_enable_irq, @@ -190,7 +190,7 @@ static struct irq_chip io7_lsi_irq_type = { }; static struct irq_chip io7_msi_irq_type = { - .typename = "MSI", + .name = "MSI", .startup = io7_startup_irq, .shutdown = io7_disable_irq, .enable = io7_enable_irq, diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c index 4e366641a08..ee886516981 100644 --- a/arch/alpha/kernel/sys_mikasa.c +++ b/arch/alpha/kernel/sys_mikasa.c @@ -69,7 +69,7 @@ mikasa_end_irq(unsigned int irq) } static struct irq_chip mikasa_irq_type = { - .typename = "MIKASA", + .name = "MIKASA", .startup = mikasa_startup_irq, .shutdown = mikasa_disable_irq, .enable = mikasa_enable_irq, diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c index 35753a173ba..86503fe73a8 100644 --- a/arch/alpha/kernel/sys_noritake.c +++ b/arch/alpha/kernel/sys_noritake.c @@ -74,7 +74,7 @@ noritake_end_irq(unsigned int irq) } static struct irq_chip noritake_irq_type = { - .typename = "NORITAKE", + .name = "NORITAKE", .startup = noritake_startup_irq, .shutdown = noritake_disable_irq, .enable = noritake_enable_irq, diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c index f3aec7e085c..26c322bf89e 100644 --- a/arch/alpha/kernel/sys_rawhide.c +++ b/arch/alpha/kernel/sys_rawhide.c @@ -136,7 +136,7 @@ rawhide_end_irq(unsigned int irq) } static struct irq_chip rawhide_irq_type = { - .typename = "RAWHIDE", + .name = "RAWHIDE", .startup = rawhide_startup_irq, .shutdown = rawhide_disable_irq, .enable = rawhide_enable_irq, diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c index d9f9cfeb993..8de1046fe91 100644 --- a/arch/alpha/kernel/sys_ruffian.c +++ b/arch/alpha/kernel/sys_ruffian.c @@ -66,7 +66,7 @@ ruffian_init_irq(void) common_init_isa_dma(); } -#define RUFFIAN_LATCH ((PIT_TICK_RATE + HZ / 2) / HZ) +#define RUFFIAN_LATCH DIV_ROUND_CLOSEST(PIT_TICK_RATE, HZ) static void __init ruffian_init_rtc(void) diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c index fc924637345..be161129eab 100644 --- a/arch/alpha/kernel/sys_rx164.c +++ b/arch/alpha/kernel/sys_rx164.c @@ -73,7 +73,7 @@ rx164_end_irq(unsigned int irq) } static struct irq_chip rx164_irq_type = { - .typename = "RX164", + .name = "RX164", .startup = rx164_startup_irq, .shutdown = rx164_disable_irq, .enable = rx164_enable_irq, diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index 426eb6906d0..b2abe27a23c 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -502,7 +502,7 @@ sable_lynx_mask_and_ack_irq(unsigned int irq) } static struct irq_chip sable_lynx_irq_type = { - .typename = "SABLE/LYNX", + .name = "SABLE/LYNX", .startup = sable_lynx_startup_irq, .shutdown = sable_lynx_disable_irq, .enable = sable_lynx_enable_irq, diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c index 830318c2166..230464885b5 100644 --- a/arch/alpha/kernel/sys_takara.c +++ b/arch/alpha/kernel/sys_takara.c @@ -75,7 +75,7 @@ takara_end_irq(unsigned int irq) } static struct irq_chip takara_irq_type = { - .typename = "TAKARA", + .name = "TAKARA", .startup = takara_startup_irq, .shutdown = takara_disable_irq, .enable = takara_enable_irq, diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 88978fc60f8..288053342c8 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -195,7 +195,7 @@ init_titan_irqs(struct irq_chip * ops, int imin, int imax) } static struct irq_chip titan_irq_type = { - .typename = "TITAN", + .name = "TITAN", .startup = titan_startup_irq, .shutdown = titan_disable_irq, .enable = titan_enable_irq, diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c index e91b4c3838a..62fd972e18e 100644 --- a/arch/alpha/kernel/sys_wildfire.c +++ b/arch/alpha/kernel/sys_wildfire.c @@ -158,7 +158,7 @@ wildfire_end_irq(unsigned int irq) } static struct irq_chip wildfire_irq_type = { - .typename = "WILDFIRE", + .name = "WILDFIRE", .startup = wildfire_startup_irq, .shutdown = wildfire_disable_irq, .enable = wildfire_enable_irq, diff --git a/arch/arm/configs/h3600_defconfig b/arch/arm/configs/h3600_defconfig index f6aed7747d4..efa78e144e5 100644 --- a/arch/arm/configs/h3600_defconfig +++ b/arch/arm/configs/h3600_defconfig @@ -1,86 +1,189 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc4 -# Thu Jun 9 01:59:03 2005 +# Linux kernel version: 2.6.32-rc5 +# Sat Oct 24 00:09:30 2009 # CONFIG_ARM=y -CONFIG_MMU=y -CONFIG_UID16=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_GENERIC_GPIO=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_ARCH_HAS_CPUFREQ=y +CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y +CONFIG_ARCH_MTD_XIP=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_VECTORS_BASE=0xffff0000 +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_CONSTRUCTORS=y # -# Code maturity level options +# General setup # CONFIG_EXPERIMENTAL=y -CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y # CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y +# CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set -CONFIG_HOTPLUG=y -CONFIG_KOBJECT_UEVENT=y + +# +# RCU Subsystem +# +CONFIG_TREE_RCU=y +# CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set # CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_GROUP_SCHED is not set +# CONFIG_CGROUPS is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +# CONFIG_RELAY is not set +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_IPC_NS is not set +# CONFIG_USER_NS is not set +# CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y # CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y +CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 +CONFIG_AIO=y + +# +# Kernel Performance Events And Counters +# +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLUB_DEBUG=y +CONFIG_COMPAT_BRK=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_CLK=y # -# Loadable module support +# GCOV-based kernel profiling # +# CONFIG_SLOW_WORK is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set # CONFIG_MODULE_UNLOAD is not set -CONFIG_OBSOLETE_MODPARM=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set +CONFIG_BLOCK=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_NOOP=y +CONFIG_DEFAULT_IOSCHED="noop" +CONFIG_FREEZER=y # # System Type # -# CONFIG_ARCH_CLPS7500 is not set +CONFIG_MMU=y +# CONFIG_ARCH_AAEC2000 is not set +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_AT91 is not set # CONFIG_ARCH_CLPS711X is not set -# CONFIG_ARCH_CO285 is not set +# CONFIG_ARCH_GEMINI is not set # CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set # CONFIG_ARCH_FOOTBRIDGE is not set -# CONFIG_ARCH_INTEGRATOR is not set -# CONFIG_ARCH_IOP3XX is not set -# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_STMP3XXX is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_NOMADIK is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set # CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set # CONFIG_ARCH_L7200 is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_LOKI is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_NS9XXX is not set +# CONFIG_ARCH_W90X900 is not set +# CONFIG_ARCH_PNX4008 is not set # CONFIG_ARCH_PXA is not set +# CONFIG_ARCH_MSM is not set # CONFIG_ARCH_RPC is not set CONFIG_ARCH_SA1100=y # CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_S5PC1XX is not set # CONFIG_ARCH_SHARK is not set # CONFIG_ARCH_LH7A40X is not set +# CONFIG_ARCH_U300 is not set +# CONFIG_ARCH_DAVINCI is not set # CONFIG_ARCH_OMAP is not set -# CONFIG_ARCH_VERSATILE is not set -# CONFIG_ARCH_IMX is not set -# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_BCMRING is not set # # SA11x0 Implementations @@ -106,27 +209,31 @@ CONFIG_CPU_32=y CONFIG_CPU_SA1100=y CONFIG_CPU_32v4=y CONFIG_CPU_ABRT_EV4=y +CONFIG_CPU_PABRT_LEGACY=y CONFIG_CPU_CACHE_V4WB=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WB=y -CONFIG_CPU_MINICACHE=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y # # Processor Features # +# CONFIG_CPU_ICACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_DISABLE is not set +CONFIG_ARM_L1_CACHE_SHIFT=5 # # Bus support # CONFIG_ISA=y -CONFIG_ISA_DMA_API=y - -# -# PCCARD (PCMCIA/CardBus) support -# +# CONFIG_PCI_SYSCALL is not set +# CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCCARD=y # CONFIG_PCMCIA_DEBUG is not set CONFIG_PCMCIA=y +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y # # PC-card bridges @@ -138,11 +245,41 @@ CONFIG_PCMCIA_SA1100=y # # Kernel Features # -# CONFIG_SMP is not set +CONFIG_TICK_ONESHOT=y +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set -CONFIG_DISCONTIGMEM=y +CONFIG_HZ=100 +# CONFIG_AEABI is not set +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +# CONFIG_HIGHMEM is not set +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_SPLIT_PTLOCK_CPUS=4096 +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y +# CONFIG_KSM is not set +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 # CONFIG_LEDS is not set CONFIG_ALIGNMENT_TRAP=y +# CONFIG_UACCESS_WITH_MEMCPY is not set # # Boot options @@ -151,22 +288,26 @@ CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="" # CONFIG_XIP_KERNEL is not set +# CONFIG_KEXEC is not set # -# CPU Frequency scaling +# CPU Power Management # CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_TABLE=y # CONFIG_CPU_FREQ_DEBUG is not set -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set +# CONFIG_CPU_FREQ_STAT is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_SA1100=y +# CONFIG_CPU_IDLE is not set # # Floating point emulation @@ -183,6 +324,8 @@ CONFIG_FPE_NWFPE=y # Userspace binary formats # CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_HAVE_AOUT=y # CONFIG_BINFMT_AOUT is not set # CONFIG_BINFMT_MISC is not set # CONFIG_ARTHUR is not set @@ -191,8 +334,120 @@ CONFIG_BINFMT_ELF=y # Power management options # CONFIG_PM=y -# CONFIG_PM_LEGACY is not set -# CONFIG_APM is not set +# CONFIG_PM_DEBUG is not set +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +# CONFIG_APM_EMULATION is not set +# CONFIG_PM_RUNTIME is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_PACKET is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +CONFIG_INET_LRO=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set +# CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +CONFIG_IRDA=m + +# +# IrDA protocols +# +CONFIG_IRLAN=m +CONFIG_IRNET=m +CONFIG_IRCOMM=m +# CONFIG_IRDA_ULTRA is not set + +# +# IrDA options +# +# CONFIG_IRDA_CACHE_LAST_LSAP is not set +# CONFIG_IRDA_FAST_RR is not set +# CONFIG_IRDA_DEBUG is not set + +# +# Infrared-port device drivers +# + +# +# SIR device drivers +# +# CONFIG_IRTTY_SIR is not set + +# +# Dongle support +# + +# +# FIR device drivers +# +CONFIG_SA1100_FIR=m +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set # # Device Drivers @@ -201,15 +456,17 @@ CONFIG_PM=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set - -# -# Memory Technology Devices (MTD) -# +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set CONFIG_MTD=y # CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_TESTS is not set # CONFIG_MTD_CONCAT is not set CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y @@ -218,15 +475,20 @@ CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1 # CONFIG_MTD_REDBOOT_PARTS_READONLY is not set # CONFIG_MTD_CMDLINE_PARTS is not set # CONFIG_MTD_AFS_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set # # User Modules And Translation Layers # CONFIG_MTD_CHAR=y +CONFIG_MTD_BLKDEVS=y CONFIG_MTD_BLOCK=y # CONFIG_FTL is not set # CONFIG_NFTL is not set # CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set # # RAM/ROM/Flash chip drivers @@ -249,6 +511,7 @@ CONFIG_MTD_MAP_BANK_WIDTH_4=y CONFIG_MTD_CFI_I2=y # CONFIG_MTD_CFI_I4 is not set # CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_OTP is not set CONFIG_MTD_CFI_INTELEXT=y # CONFIG_MTD_CFI_AMDSTD is not set # CONFIG_MTD_CFI_STAA is not set @@ -265,7 +528,7 @@ CONFIG_MTD_CFI_UTIL=y # CONFIG_MTD_PHYSMAP is not set # CONFIG_MTD_ARM_INTEGRATOR is not set CONFIG_MTD_SA1100=y -# CONFIG_MTD_EDB7312 is not set +# CONFIG_MTD_PLATRAM is not set # # Self-contained MTD device drivers @@ -273,7 +536,6 @@ CONFIG_MTD_SA1100=y # CONFIG_MTD_SLRAM is not set # CONFIG_MTD_PHRAM is not set # CONFIG_MTD_MTDRAM is not set -# CONFIG_MTD_BLKMTD is not set # CONFIG_MTD_BLOCK2MTD is not set # @@ -282,26 +544,21 @@ CONFIG_MTD_SA1100=y # CONFIG_MTD_DOC2000 is not set # CONFIG_MTD_DOC2001 is not set # CONFIG_MTD_DOC2001PLUS is not set - -# -# NAND Flash Device Drivers -# # CONFIG_MTD_NAND is not set +# CONFIG_MTD_ONENAND is not set # -# Parallel port support +# LPDDR flash memory drivers # -# CONFIG_PARPORT is not set +# CONFIG_MTD_LPDDR is not set # -# Plug and Play support +# UBI - Unsorted block images # +# CONFIG_MTD_UBI is not set +# CONFIG_PARPORT is not set # CONFIG_PNP is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_XD is not set +CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=m # CONFIG_BLK_DEV_CRYPTOLOOP is not set @@ -309,212 +566,58 @@ CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" +# CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y # CONFIG_ATA_OVER_ETH is not set +# CONFIG_MG_DISK is not set +# CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y +CONFIG_IDE=y # -# ATA/ATAPI/MFM/RLL support -# -CONFIG_IDE=m -CONFIG_BLK_DEV_IDE=m - -# -# Please see Documentation/ide.txt for help/info on IDE drives +# Please see Documentation/ide/ide.txt for help/info on IDE drives # # CONFIG_BLK_DEV_IDE_SATA is not set -CONFIG_BLK_DEV_IDEDISK=m -# CONFIG_IDEDISK_MULTI_MODE is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=m +CONFIG_IDE_GD=y +CONFIG_IDE_GD_ATA=y +# CONFIG_IDE_GD_ATAPI is not set +CONFIG_BLK_DEV_IDECS=y +# CONFIG_BLK_DEV_IDECD is not set # CONFIG_BLK_DEV_IDETAPE is not set -# CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y # # IDE chipset support/bugfixes # -CONFIG_IDE_GENERIC=m -# CONFIG_IDE_ARM is not set -# CONFIG_IDE_CHIPSETS is not set +# CONFIG_BLK_DEV_PLATFORM is not set # CONFIG_BLK_DEV_IDEDMA is not set -# CONFIG_IDEDMA_AUTO is not set -# CONFIG_BLK_DEV_HD is not set # # SCSI device support # +# CONFIG_RAID_ATTRS is not set # CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set # CONFIG_MD is not set - -# -# Fusion MPT device support -# - -# -# IEEE 1394 (FireWire) support -# - -# -# I2O device support -# - -# -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -# CONFIG_PACKET is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -# CONFIG_IP_TCPDIAG is not set -# CONFIG_IP_TCPDIAG_IPV6 is not set -# CONFIG_IPV6 is not set -# CONFIG_NETFILTER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set -# CONFIG_NET_CLS_ROUTE is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_HAMRADIO is not set -CONFIG_IRDA=m - -# -# IrDA protocols -# -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -# CONFIG_IRDA_ULTRA is not set - -# -# IrDA options -# -# CONFIG_IRDA_CACHE_LAST_LSAP is not set -# CONFIG_IRDA_FAST_RR is not set -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# - -# -# SIR device drivers -# -# CONFIG_IRTTY_SIR is not set - -# -# Dongle support -# - -# -# Old SIR device drivers -# -# CONFIG_IRPORT_SIR is not set - -# -# Old Serial dongle support -# - -# -# FIR device drivers -# -# CONFIG_NSC_FIR is not set -# CONFIG_WINBOND_FIR is not set -# CONFIG_SMC_IRCC_FIR is not set -# CONFIG_ALI_FIR is not set -CONFIG_SA1100_FIR=m -# CONFIG_VIA_FIR is not set -# CONFIG_BT is not set CONFIG_NETDEVICES=y # CONFIG_DUMMY is not set # CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set - -# -# ARCnet devices -# +# CONFIG_VETH is not set # CONFIG_ARCNET is not set - -# -# Ethernet (10 or 100Mbit) -# # CONFIG_NET_ETHERNET is not set - -# -# Ethernet (1000 Mbit) -# - -# -# Ethernet (10000 Mbit) -# - -# -# Token Ring devices -# +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set # CONFIG_TR is not set +# CONFIG_WLAN is not set # -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# PCMCIA network device support +# Enable WiMAX (Networking options) to see the WiMAX drivers # CONFIG_NET_PCMCIA=y # CONFIG_PCMCIA_3C589 is not set @@ -525,10 +628,6 @@ CONFIG_PCMCIA_PCNET=y # CONFIG_PCMCIA_SMC91C92 is not set # CONFIG_PCMCIA_XIRC2PS is not set # CONFIG_PCMCIA_AXNET is not set - -# -# Wan interfaces -# # CONFIG_WAN is not set CONFIG_PPP=m # CONFIG_PPP_MULTILINK is not set @@ -537,20 +636,23 @@ CONFIG_PPP_ASYNC=m # CONFIG_PPP_SYNC_TTY is not set CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +# CONFIG_PPP_MPPE is not set # CONFIG_PPPOE is not set +# CONFIG_PPPOL2TP is not set # CONFIG_SLIP is not set -# CONFIG_SHAPER is not set +CONFIG_SLHC=m # CONFIG_NETCONSOLE is not set - -# -# ISDN subsystem -# +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set # CONFIG_ISDN is not set +# CONFIG_PHONE is not set # # Input device support # CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set # # Userland interfaces @@ -560,7 +662,6 @@ CONFIG_INPUT_MOUSEDEV_PSAUX=y CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set # CONFIG_INPUT_EVDEV is not set # CONFIG_INPUT_EVBUG is not set @@ -568,47 +669,42 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # Input Device Drivers # CONFIG_INPUT_KEYBOARD=y -CONFIG_KEYBOARD_ATKBD=y -# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_ATKBD is not set # CONFIG_KEYBOARD_LKKBD is not set -# CONFIG_KEYBOARD_XTKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_KEYBOARD_MATRIX is not set # CONFIG_KEYBOARD_NEWTON is not set -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=y -# CONFIG_MOUSE_SERIAL is not set -# CONFIG_MOUSE_INPORT is not set -# CONFIG_MOUSE_LOGIBM is not set -# CONFIG_MOUSE_PC110PAD is not set -# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_KEYBOARD_OPENCORES is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set # # Hardware I/O ports # -CONFIG_SERIO=y -CONFIG_SERIO_SERPORT=y -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set +# CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices # CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +CONFIG_DEVKMEM=y # CONFIG_SERIAL_NONSTANDARD is not set # # Serial drivers # -CONFIG_SERIAL_8250=m -# CONFIG_SERIAL_8250_CS is not set -CONFIG_SERIAL_8250_NR_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set +# CONFIG_SERIAL_8250 is not set # # Non-8250 serial port support @@ -618,71 +714,125 @@ CONFIG_SERIAL_SA1100_CONSOLE=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set # -# IPMI +# PCMCIA character devices # -# CONFIG_IPMI_HANDLER is not set +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set +# CONFIG_IPWIRELESS is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set +# CONFIG_SPI is not set # -# Watchdog Cards +# PPS support # -# CONFIG_WATCHDOG is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set +# CONFIG_PPS is not set +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +# CONFIG_GPIO_SYSFS is not set # -# Ftape, the floppy tape device driver +# Memory mapped GPIO expanders: # -# CONFIG_DRM is not set # -# PCMCIA character devices +# I2C GPIO expanders: # -# CONFIG_SYNCLINK_CS is not set -# CONFIG_RAW_DRIVER is not set # -# TPM devices +# PCI GPIO expanders: # # -# I2C support +# SPI GPIO expanders: # -# CONFIG_I2C is not set # -# Misc devices +# AC97 GPIO expanders: # +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # -# Multimedia devices +# Sonics Silicon Backplane # -# CONFIG_VIDEO_DEV is not set +# CONFIG_SSB is not set # -# Digital Video Broadcasting Devices +# Multifunction device drivers # -# CONFIG_DVB is not set +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +CONFIG_HTC_EGPIO=y +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set + +# +# Multimedia Capabilities Port drivers +# +# CONFIG_MCP_SA11X0 is not set +# CONFIG_REGULATOR is not set +# CONFIG_MEDIA_SUPPORT is not set # # Graphics support # +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y -CONFIG_FB_SOFT_CURSOR=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_SVGALIB is not set # CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set # CONFIG_FB_MODE_HELPERS is not set # CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# CONFIG_FB_SA1100=y # CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set # # Console display driver support @@ -691,65 +841,54 @@ CONFIG_FB_SA1100=y # CONFIG_MDA_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y # CONFIG_FRAMEBUFFER_CONSOLE is not set - -# -# Logo configuration -# # CONFIG_LOGO is not set -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set - -# -# Sound -# -CONFIG_SOUND=y - -# -# Advanced Linux Sound Architecture -# -# CONFIG_SND is not set - -# -# Open Sound System -# -# CONFIG_SOUND_PRIME is not set - -# -# USB support -# -CONFIG_USB_ARCH_HAS_HCD=y -# CONFIG_USB_ARCH_HAS_OHCI is not set -# CONFIG_USB is not set +# CONFIG_SOUND is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set +CONFIG_RTC_LIB=y +# CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_UIO is not set # -# USB Gadget Support +# TI VLYNQ # -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -# CONFIG_MMC is not set +# CONFIG_STAGING is not set # # File systems # CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set # CONFIG_EXT3_FS is not set -# CONFIG_JBD is not set +# CONFIG_EXT4_FS is not set # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set - -# -# XFS support -# +# CONFIG_FS_POSIX_ACL is not set # CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_QUOTA is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set +CONFIG_FILE_LOCKING=y +CONFIG_FSNOTIFY=y CONFIG_DNOTIFY=y +# CONFIG_INOTIFY is not set +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set # CONFIG_AUTOFS_FS is not set # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set # # CD-ROM/DVD Filesystems @@ -771,16 +910,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # Pseudo filesystems # CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVPTS_FS_XATTR is not set # CONFIG_TMPFS is not set # CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_HFS_FS is not set @@ -788,34 +924,37 @@ CONFIG_RAMFS=y # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set CONFIG_JFFS2_FS=y CONFIG_JFFS2_FS_DEBUG=0 -# CONFIG_JFFS2_FS_NAND is not set -# CONFIG_JFFS2_FS_NOR_ECC is not set +CONFIG_JFFS2_FS_WRITEBUFFER=y +# CONFIG_JFFS2_FS_WBUF_VERIFY is not set +# CONFIG_JFFS2_SUMMARY is not set +# CONFIG_JFFS2_FS_XATTR is not set # CONFIG_JFFS2_COMPRESSION_OPTIONS is not set CONFIG_JFFS2_ZLIB=y +# CONFIG_JFFS2_LZO is not set CONFIG_JFFS2_RTIME=y # CONFIG_JFFS2_RUBIN is not set CONFIG_CRAMFS=m +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# +CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set # CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set CONFIG_NFSD=m # CONFIG_NFSD_V3 is not set -CONFIG_NFSD_TCP=y +# CONFIG_NFSD_V4 is not set CONFIG_LOCKD=y CONFIG_EXPORTFS=m +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -831,10 +970,6 @@ CONFIG_SMB_FS=m # # CONFIG_PARTITION_ADVANCED is not set CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# CONFIG_NLS=y CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_NLS_CODEPAGE_437 is not set @@ -875,20 +1010,34 @@ CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_UTF8 is not set - -# -# Profiling support -# -# CONFIG_PROFILING is not set +# CONFIG_DLM is not set # # Kernel hacking # # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_STRIP_ASM_SYMS is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set # CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_MEMORY_INIT=y CONFIG_FRAME_POINTER=y +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_LATENCYTOP is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_TRACING_SUPPORT=y +# CONFIG_FTRACE is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y # CONFIG_DEBUG_USER is not set # @@ -896,21 +1045,120 @@ CONFIG_FRAME_POINTER=y # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +CONFIG_CRYPTO_ALGAPI=m +CONFIG_CRYPTO_ALGAPI2=m +CONFIG_CRYPTO_RNG=m +CONFIG_CRYPTO_RNG2=m +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_MANAGER2 is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_GHASH is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set # -# Cryptographic options +# Compression # -# CONFIG_CRYPTO is not set +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set +# CONFIG_CRYPTO_LZO is not set # -# Hardware crypto devices +# Random Number Generation # +CONFIG_CRYPTO_ANSI_CPRNG=m +CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines # +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y +# CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_BZIP2=y +CONFIG_DECOMPRESS_LZMA=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_NLATTR=y diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig index 4e5c07f4e45..03a7f3857c5 100644 --- a/arch/arm/mach-sa1100/Kconfig +++ b/arch/arm/mach-sa1100/Kconfig @@ -53,23 +53,23 @@ config SA1100_COLLIE config SA1100_H3100 bool "Compaq iPAQ H3100" + select HTC_EGPIO help Say Y here if you intend to run this kernel on the Compaq iPAQ H3100 handheld computer. Information about this machine and the Linux port to this machine can be found at: <http://www.handhelds.org/Compaq/index.html#iPAQ_H3100> - <http://www.compaq.com/products/handhelds/pocketpc/> config SA1100_H3600 bool "Compaq iPAQ H3600/H3700" + select HTC_EGPIO help Say Y here if you intend to run this kernel on the Compaq iPAQ H3600 handheld computer. Information about this machine and the Linux port to this machine can be found at: <http://www.handhelds.org/Compaq/index.html#iPAQ_H3600> - <http://www.compaq.com/products/handhelds/pocketpc/> config SA1100_BADGE4 bool "HP Labs BadgePAD 4" diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile index bb7b8198d0c..89349c1dd7a 100644 --- a/arch/arm/mach-sa1100/Makefile +++ b/arch/arm/mach-sa1100/Makefile @@ -25,8 +25,8 @@ led-$(CONFIG_SA1100_CERF) += leds-cerf.o obj-$(CONFIG_SA1100_COLLIE) += collie.o -obj-$(CONFIG_SA1100_H3100) += h3600.o -obj-$(CONFIG_SA1100_H3600) += h3600.o +obj-$(CONFIG_SA1100_H3100) += h3100.o h3xxx.o +obj-$(CONFIG_SA1100_H3600) += h3600.o h3xxx.o obj-$(CONFIG_SA1100_HACKKIT) += hackkit.o led-$(CONFIG_SA1100_HACKKIT) += leds-hackkit.o diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index 55e64477a87..169e5b87dbf 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -249,10 +249,10 @@ static void __init assabet_init(void) #endif } - sa11x0_set_flash_data(&assabet_flash_data, assabet_flash_resources, - ARRAY_SIZE(assabet_flash_resources)); - sa11x0_set_irda_data(&assabet_irda_data); - sa11x0_set_mcp_data(&assabet_mcp_data); + sa11x0_register_mtd(&assabet_flash_data, assabet_flash_resources, + ARRAY_SIZE(assabet_flash_resources)); + sa11x0_register_irda(&assabet_irda_data); + sa11x0_register_mcp(&assabet_mcp_data); } /* diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c index ab5883b39dd..051ec0f0023 100644 --- a/arch/arm/mach-sa1100/badge4.c +++ b/arch/arm/mach-sa1100/badge4.c @@ -212,7 +212,7 @@ static int __init badge4_init(void) /* maybe turn on 5v0 from the start */ badge4_set_5V(BADGE4_5V_INITIALLY, five_v_on); - sa11x0_set_flash_data(&badge4_flash_data, &badge4_flash_resource, 1); + sa11x0_register_mtd(&badge4_flash_data, &badge4_flash_resource, 1); return 0; } diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c index fd3ad9cfc91..bc950ef418a 100644 --- a/arch/arm/mach-sa1100/cerf.c +++ b/arch/arm/mach-sa1100/cerf.c @@ -129,8 +129,8 @@ static struct mcp_plat_data cerf_mcp_data = { static void __init cerf_init(void) { platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices)); - sa11x0_set_flash_data(&cerf_flash_data, &cerf_flash_resource, 1); - sa11x0_set_mcp_data(&cerf_mcp_data); + sa11x0_register_mtd(&cerf_flash_data, &cerf_flash_resource, 1); + sa11x0_register_mcp(&cerf_mcp_data); } MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube") diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index bbf2ebcc306..9982c5c28ed 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -26,6 +26,7 @@ #include <linux/mtd/partitions.h> #include <linux/timer.h> #include <linux/gpio.h> +#include <linux/pda_power.h> #include <mach/hardware.h> #include <asm/mach-types.h> @@ -56,6 +57,7 @@ static struct resource collie_scoop_resources[] = { static struct scoop_config collie_scoop_setup = { .io_dir = COLLIE_SCOOP_IO_DIR, .io_out = COLLIE_SCOOP_IO_OUT, + .gpio_base = COLLIE_SCOOP_GPIO_BASE, }; struct platform_device colliescoop_device = { @@ -85,6 +87,70 @@ static struct scoop_pcmcia_config collie_pcmcia_config = { static struct mcp_plat_data collie_mcp_data = { .mccr0 = MCCR0_ADM | MCCR0_ExtClk, .sclk_rate = 9216000, + .gpio_base = COLLIE_TC35143_GPIO_BASE, +}; + +/* + * Collie AC IN + */ +static int collie_power_init(struct device *dev) +{ + int ret = gpio_request(COLLIE_GPIO_AC_IN, "ac in"); + if (ret) + goto err_gpio_req; + + ret = gpio_direction_input(COLLIE_GPIO_AC_IN); + if (ret) + goto err_gpio_in; + + return 0; + +err_gpio_in: + gpio_free(COLLIE_GPIO_AC_IN); +err_gpio_req: + return ret; +} + +static void collie_power_exit(struct device *dev) +{ + gpio_free(COLLIE_GPIO_AC_IN); +} + +static int collie_power_ac_online(void) +{ + return gpio_get_value(COLLIE_GPIO_AC_IN) == 2; +} + +static char *collie_ac_supplied_to[] = { + "main-battery", + "backup-battery", +}; + +static struct pda_power_pdata collie_power_data = { + .init = collie_power_init, + .is_ac_online = collie_power_ac_online, + .exit = collie_power_exit, + .supplied_to = collie_ac_supplied_to, + .num_supplicants = ARRAY_SIZE(collie_ac_supplied_to), +}; + +static struct resource collie_power_resource[] = { + { + .name = "ac", + .start = gpio_to_irq(COLLIE_GPIO_AC_IN), + .end = gpio_to_irq(COLLIE_GPIO_AC_IN), + .flags = IORESOURCE_IRQ | + IORESOURCE_IRQ_HIGHEDGE | + IORESOURCE_IRQ_LOWEDGE, + }, +}; + +static struct platform_device collie_power_device = { + .name = "pda-power", + .id = -1, + .dev.platform_data = &collie_power_data, + .resource = collie_power_resource, + .num_resources = ARRAY_SIZE(collie_power_resource), }; #ifdef CONFIG_SHARP_LOCOMO @@ -178,6 +244,7 @@ struct platform_device collie_locomo_device = { static struct platform_device *devices[] __initdata = { &collie_locomo_device, &colliescoop_device, + &collie_power_device, }; static struct mtd_partition collie_partitions[] = { @@ -248,22 +315,24 @@ static void __init collie_init(void) GPDR = GPIO_LDD8 | GPIO_LDD9 | GPIO_LDD10 | GPIO_LDD11 | GPIO_LDD12 | GPIO_LDD13 | GPIO_LDD14 | GPIO_LDD15 | GPIO_SSP_TXD | GPIO_SSP_SCLK | GPIO_SSP_SFRM | GPIO_SDLC_SCLK | - COLLIE_GPIO_UCB1x00_RESET | COLLIE_GPIO_nMIC_ON | - COLLIE_GPIO_nREMOCON_ON | GPIO_32_768kHz; + _COLLIE_GPIO_UCB1x00_RESET | _COLLIE_GPIO_nMIC_ON | + _COLLIE_GPIO_nREMOCON_ON | GPIO_32_768kHz; PPDR = PPC_LDD0 | PPC_LDD1 | PPC_LDD2 | PPC_LDD3 | PPC_LDD4 | PPC_LDD5 | PPC_LDD6 | PPC_LDD7 | PPC_L_PCLK | PPC_L_LCLK | PPC_L_FCLK | PPC_L_BIAS | PPC_TXD1 | PPC_TXD2 | PPC_TXD3 | PPC_TXD4 | PPC_SCLK | PPC_SFRM; - PWER = COLLIE_GPIO_AC_IN | COLLIE_GPIO_CO | COLLIE_GPIO_ON_KEY | - COLLIE_GPIO_WAKEUP | COLLIE_GPIO_nREMOCON_INT | PWER_RTC; + PWER = _COLLIE_GPIO_AC_IN | _COLLIE_GPIO_CO | _COLLIE_GPIO_ON_KEY | + _COLLIE_GPIO_WAKEUP | _COLLIE_GPIO_nREMOCON_INT | PWER_RTC; - PGSR = COLLIE_GPIO_nREMOCON_ON; + PGSR = _COLLIE_GPIO_nREMOCON_ON; PSDR = PPC_RXD1 | PPC_RXD2 | PPC_RXD3 | PPC_RXD4; PCFR = PCFR_OPDE; + GPSR |= _COLLIE_GPIO_UCB1x00_RESET; + platform_scoop_config = &collie_pcmcia_config; @@ -272,9 +341,9 @@ static void __init collie_init(void) printk(KERN_WARNING "collie: Unable to register LoCoMo device\n"); } - sa11x0_set_flash_data(&collie_flash_data, collie_flash_resources, - ARRAY_SIZE(collie_flash_resources)); - sa11x0_set_mcp_data(&collie_mcp_data); + sa11x0_register_mtd(&collie_flash_data, collie_flash_resources, + ARRAY_SIZE(collie_flash_resources)); + sa11x0_register_mcp(&collie_mcp_data); sharpsl_save_param(); } diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 23cfdd59395..9faea1511c1 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -162,6 +162,17 @@ static void sa1100_power_off(void) PMCR = PMCR_SF; } +static void sa11x0_register_device(struct platform_device *dev, void *data) +{ + int err; + dev->dev.platform_data = data; + err = platform_device_register(dev); + if (err) + printk(KERN_ERR "Unable to register device %s: %d\n", + dev->name, err); +} + + static struct resource sa11x0udc_resources[] = { [0] = { .start = 0x80000000, @@ -234,9 +245,9 @@ static struct platform_device sa11x0mcp_device = { .resource = sa11x0mcp_resources, }; -void sa11x0_set_mcp_data(struct mcp_plat_data *data) +void sa11x0_register_mcp(struct mcp_plat_data *data) { - sa11x0mcp_device.dev.platform_data = data; + sa11x0_register_device(&sa11x0mcp_device, data); } static struct resource sa11x0ssp_resources[] = { @@ -293,13 +304,13 @@ static struct platform_device sa11x0mtd_device = { .id = -1, }; -void sa11x0_set_flash_data(struct flash_platform_data *flash, - struct resource *res, int nr) +void sa11x0_register_mtd(struct flash_platform_data *flash, + struct resource *res, int nr) { flash->name = "sa1100"; - sa11x0mtd_device.dev.platform_data = flash; sa11x0mtd_device.resource = res; sa11x0mtd_device.num_resources = nr; + sa11x0_register_device(&sa11x0mtd_device, flash); } static struct resource sa11x0ir_resources[] = { @@ -329,9 +340,9 @@ static struct platform_device sa11x0ir_device = { .resource = sa11x0ir_resources, }; -void sa11x0_set_irda_data(struct irda_platform_data *irda) +void sa11x0_register_irda(struct irda_platform_data *irda) { - sa11x0ir_device.dev.platform_data = irda; + sa11x0_register_device(&sa11x0ir_device, irda); } static struct platform_device sa11x0rtc_device = { @@ -343,21 +354,15 @@ static struct platform_device *sa11x0_devices[] __initdata = { &sa11x0udc_device, &sa11x0uart1_device, &sa11x0uart3_device, - &sa11x0mcp_device, &sa11x0ssp_device, &sa11x0pcmcia_device, &sa11x0fb_device, - &sa11x0mtd_device, &sa11x0rtc_device, }; static int __init sa1100_init(void) { pm_power_off = sa1100_power_off; - - if (sa11x0ir_device.dev.platform_data) - platform_device_register(&sa11x0ir_device); - return platform_add_devices(sa11x0_devices, ARRAY_SIZE(sa11x0_devices)); } diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index 793c2e6c991..ec03f187c52 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -32,14 +32,11 @@ extern unsigned int sa11x0_ppcr_to_freq(unsigned int idx); struct flash_platform_data; struct resource; -extern void sa11x0_set_flash_data(struct flash_platform_data *flash, - struct resource *res, int nr); - -struct sa11x0_ssp_plat_ops; -extern void sa11x0_set_ssp_data(struct sa11x0_ssp_plat_ops *ops); +void sa11x0_register_mtd(struct flash_platform_data *flash, + struct resource *res, int nr); struct irda_platform_data; -void sa11x0_set_irda_data(struct irda_platform_data *irda); +void sa11x0_register_irda(struct irda_platform_data *irda); struct mcp_plat_data; -void sa11x0_set_mcp_data(struct mcp_plat_data *data); +void sa11x0_register_mcp(struct mcp_plat_data *data); diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c new file mode 100644 index 00000000000..0c7cea0dc01 --- /dev/null +++ b/arch/arm/mach-sa1100/h3100.c @@ -0,0 +1,95 @@ +/* + * Support for Compaq iPAQ H3100 handheld computer + * + * Copyright (c) 2000,1 Compaq Computer Corporation. (Author: Jamey Hicks) + * Copyright (c) 2009 Dmitry Artamonow <mad_soft@inbox.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/gpio.h> + +#include <asm/mach-types.h> +#include <asm/mach/arch.h> +#include <asm/mach/irda.h> + +#include <mach/h3xxx.h> + +#include "generic.h" + +/* + * helper for sa1100fb + */ +static void h3100_lcd_power(int enable) +{ + if (!gpio_request(H3XXX_EGPIO_LCD_ON, "LCD ON")) { + gpio_set_value(H3100_GPIO_LCD_3V_ON, enable); + gpio_direction_output(H3XXX_EGPIO_LCD_ON, enable); + gpio_free(H3XXX_EGPIO_LCD_ON); + } else { + pr_err("%s: can't request H3XXX_EGPIO_LCD_ON\n", __func__); + } +} + + +static void __init h3100_map_io(void) +{ + h3xxx_map_io(); + + sa1100fb_lcd_power = h3100_lcd_power; + + /* Older bootldrs put GPIO2-9 in alternate mode on the + assumption that they are used for video */ + GAFR &= ~0x000001fb; +} + +/* + * This turns the IRDA power on or off on the Compaq H3100 + */ +static int h3100_irda_set_power(struct device *dev, unsigned int state) +{ + gpio_set_value(H3100_GPIO_IR_ON, state); + return 0; +} + +static void h3100_irda_set_speed(struct device *dev, unsigned int speed) +{ + gpio_set_value(H3100_GPIO_IR_FSEL, !(speed < 4000000)); +} + +static struct irda_platform_data h3100_irda_data = { + .set_power = h3100_irda_set_power, + .set_speed = h3100_irda_set_speed, +}; + +static struct gpio_default_state h3100_default_gpio[] = { + { H3100_GPIO_IR_ON, GPIO_MODE_OUT0, "IrDA power" }, + { H3100_GPIO_IR_FSEL, GPIO_MODE_OUT0, "IrDA fsel" }, + { H3XXX_GPIO_COM_DCD, GPIO_MODE_IN, "COM DCD" }, + { H3XXX_GPIO_COM_CTS, GPIO_MODE_IN, "COM CTS" }, + { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, + { H3100_GPIO_LCD_3V_ON, GPIO_MODE_OUT0, "LCD 3v" }, +}; + +static void __init h3100_mach_init(void) +{ + h3xxx_init_gpio(h3100_default_gpio, ARRAY_SIZE(h3100_default_gpio)); + h3xxx_mach_init(); + sa11x0_register_irda(&h3100_irda_data); +} + +MACHINE_START(H3100, "Compaq iPAQ H3100") + .phys_io = 0x80000000, + .io_pg_offst = ((0xf8000000) >> 18) & 0xfffc, + .boot_params = 0xc0000100, + .map_io = h3100_map_io, + .init_irq = sa1100_init_irq, + .timer = &sa1100_timer, + .init_machine = h3100_mach_init, +MACHINE_END + diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 0eb2f159578..af3b71459f8 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -1,421 +1,127 @@ /* - * Hardware definitions for Compaq iPAQ H3xxx Handheld Computers + * Support for Compaq iPAQ H3600 handheld computer * - * Copyright 2000,1 Compaq Computer Corporation. + * Copyright (c) 2000,1 Compaq Computer Corporation. (Author: Jamey Hicks) + * Copyright (c) 2009 Dmitry Artamonow <mad_soft@inbox.ru> * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * and abstracted EGPIO interface. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. * */ -#include <linux/module.h> + #include <linux/init.h> #include <linux/kernel.h> -#include <linux/tty.h> -#include <linux/pm.h> -#include <linux/device.h> -#include <linux/mtd/mtd.h> -#include <linux/mtd/partitions.h> -#include <linux/serial_core.h> +#include <linux/gpio.h> -#include <asm/irq.h> -#include <mach/hardware.h> #include <asm/mach-types.h> -#include <asm/setup.h> - -#include <asm/mach/irq.h> #include <asm/mach/arch.h> -#include <asm/mach/flash.h> #include <asm/mach/irda.h> -#include <asm/mach/map.h> -#include <asm/mach/serial_sa1100.h> -#include <mach/h3600.h> -#include <mach/h3600_gpio.h> +#include <mach/h3xxx.h> #include "generic.h" -void (*assign_h3600_egpio)(enum ipaq_egpio_type x, int level); -EXPORT_SYMBOL(assign_h3600_egpio); - -static struct mtd_partition h3xxx_partitions[] = { - { - .name = "H3XXX boot firmware", - .size = 0x00040000, - .offset = 0, - .mask_flags = MTD_WRITEABLE, /* force read-only */ - }, { - .name = "H3XXX rootfs", - .size = MTDPART_SIZ_FULL, - .offset = 0x00040000, - } -}; - -static void h3xxx_set_vpp(int vpp) -{ - assign_h3600_egpio(IPAQ_EGPIO_VPP_ON, vpp); -} - -static struct flash_platform_data h3xxx_flash_data = { - .map_name = "cfi_probe", - .set_vpp = h3xxx_set_vpp, - .parts = h3xxx_partitions, - .nr_parts = ARRAY_SIZE(h3xxx_partitions), -}; - -static struct resource h3xxx_flash_resource = { - .start = SA1100_CS0_PHYS, - .end = SA1100_CS0_PHYS + SZ_32M - 1, - .flags = IORESOURCE_MEM, -}; - /* - * This turns the IRDA power on or off on the Compaq H3600 - */ -static int h3600_irda_set_power(struct device *dev, unsigned int state) -{ - assign_h3600_egpio( IPAQ_EGPIO_IR_ON, state ); - - return 0; -} - -static void h3600_irda_set_speed(struct device *dev, unsigned int speed) -{ - assign_h3600_egpio(IPAQ_EGPIO_IR_FSEL, !(speed < 4000000)); -} - -static struct irda_platform_data h3600_irda_data = { - .set_power = h3600_irda_set_power, - .set_speed = h3600_irda_set_speed, -}; - -static void h3xxx_mach_init(void) -{ - sa11x0_set_flash_data(&h3xxx_flash_data, &h3xxx_flash_resource, 1); - sa11x0_set_irda_data(&h3600_irda_data); -} - -/* - * low-level UART features + * helper for sa1100fb */ - -static void h3600_uart_set_mctrl(struct uart_port *port, u_int mctrl) +static void h3600_lcd_power(int enable) { - if (port->mapbase == _Ser3UTCR0) { - if (mctrl & TIOCM_RTS) - GPCR = GPIO_H3600_COM_RTS; - else - GPSR = GPIO_H3600_COM_RTS; + if (gpio_request(H3XXX_EGPIO_LCD_ON, "LCD power")) { + pr_err("%s: can't request H3XXX_EGPIO_LCD_ON\n", __func__); + goto err1; } -} - -static u_int h3600_uart_get_mctrl(struct uart_port *port) -{ - u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR; - - if (port->mapbase == _Ser3UTCR0) { - int gplr = GPLR; - /* DCD and CTS bits are inverted in GPLR by RS232 transceiver */ - if (gplr & GPIO_H3600_COM_DCD) - ret &= ~TIOCM_CD; - if (gplr & GPIO_H3600_COM_CTS) - ret &= ~TIOCM_CTS; + if (gpio_request(H3600_EGPIO_LCD_PCI, "LCD control")) { + pr_err("%s: can't request H3XXX_EGPIO_LCD_PCI\n", __func__); + goto err2; + } + if (gpio_request(H3600_EGPIO_LCD_5V_ON, "LCD 5v")) { + pr_err("%s: can't request H3XXX_EGPIO_LCD_5V_ON\n", __func__); + goto err3; + } + if (gpio_request(H3600_EGPIO_LVDD_ON, "LCD 9v/-6.5v")) { + pr_err("%s: can't request H3600_EGPIO_LVDD_ON\n", __func__); + goto err4; } - return ret; -} + gpio_direction_output(H3XXX_EGPIO_LCD_ON, enable); + gpio_direction_output(H3600_EGPIO_LCD_PCI, enable); + gpio_direction_output(H3600_EGPIO_LCD_5V_ON, enable); + gpio_direction_output(H3600_EGPIO_LVDD_ON, enable); -static void h3600_uart_pm(struct uart_port *port, u_int state, u_int oldstate) -{ - if (port->mapbase == _Ser2UTCR0) { /* TODO: REMOVE THIS */ - assign_h3600_egpio(IPAQ_EGPIO_IR_ON, !state); - } else if (port->mapbase == _Ser3UTCR0) { - assign_h3600_egpio(IPAQ_EGPIO_RS232_ON, !state); - } + gpio_free(H3600_EGPIO_LVDD_ON); +err4: gpio_free(H3600_EGPIO_LCD_5V_ON); +err3: gpio_free(H3600_EGPIO_LCD_PCI); +err2: gpio_free(H3XXX_EGPIO_LCD_ON); +err1: return; } -/* - * Enable/Disable wake up events for this serial port. - * Obviously, we only support this on the normal COM port. - */ -static int h3600_uart_set_wake(struct uart_port *port, u_int enable) +static void __init h3600_map_io(void) { - int err = -EINVAL; + h3xxx_map_io(); - if (port->mapbase == _Ser3UTCR0) { - if (enable) - PWER |= PWER_GPIO23 | PWER_GPIO25; /* DCD and CTS */ - else - PWER &= ~(PWER_GPIO23 | PWER_GPIO25); /* DCD and CTS */ - err = 0; - } - return err; + sa1100fb_lcd_power = h3600_lcd_power; } -static struct sa1100_port_fns h3600_port_fns __initdata = { - .set_mctrl = h3600_uart_set_mctrl, - .get_mctrl = h3600_uart_get_mctrl, - .pm = h3600_uart_pm, - .set_wake = h3600_uart_set_wake, -}; - /* - * helper for sa1100fb + * This turns the IRDA power on or off on the Compaq H3600 */ -static void h3xxx_lcd_power(int enable) +static int h3600_irda_set_power(struct device *dev, unsigned int state) { - assign_h3600_egpio(IPAQ_EGPIO_LCD_POWER, enable); + gpio_set_value(H3600_EGPIO_IR_ON, state); + return 0; } -static struct map_desc h3600_io_desc[] __initdata = { - { /* static memory bank 2 CS#2 */ - .virtual = H3600_BANK_2_VIRT, - .pfn = __phys_to_pfn(SA1100_CS2_PHYS), - .length = 0x02800000, - .type = MT_DEVICE - }, { /* static memory bank 4 CS#4 */ - .virtual = H3600_BANK_4_VIRT, - .pfn = __phys_to_pfn(SA1100_CS4_PHYS), - .length = 0x00800000, - .type = MT_DEVICE - }, { /* EGPIO 0 CS#5 */ - .virtual = H3600_EGPIO_VIRT, - .pfn = __phys_to_pfn(H3600_EGPIO_PHYS), - .length = 0x01000000, - .type = MT_DEVICE - } -}; - -/* - * Common map_io initialization - */ - -static void __init h3xxx_map_io(void) +static void h3600_irda_set_speed(struct device *dev, unsigned int speed) { - sa1100_map_io(); - iotable_init(h3600_io_desc, ARRAY_SIZE(h3600_io_desc)); - - sa1100_register_uart_fns(&h3600_port_fns); - sa1100_register_uart(0, 3); /* Common serial port */ -// sa1100_register_uart(1, 1); /* Microcontroller on 3100/3600 */ - - /* Ensure those pins are outputs and driving low */ - PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; - PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); - - /* Configure suspend conditions */ - PGSR = 0; - PWER = PWER_GPIO0 | PWER_RTC; - PCFR = PCFR_OPDE; - PSDR = 0; - - sa1100fb_lcd_power = h3xxx_lcd_power; + gpio_set_value(H3600_EGPIO_IR_FSEL, !(speed < 4000000)); } -/************************* H3100 *************************/ - -#ifdef CONFIG_SA1100_H3100 - -#define H3100_EGPIO (*(volatile unsigned int *)H3600_EGPIO_VIRT) -static unsigned int h3100_egpio = 0; - -static void h3100_control_egpio(enum ipaq_egpio_type x, int setp) +static int h3600_irda_startup(struct device *dev) { - unsigned int egpio = 0; - long gpio = 0; - unsigned long flags; - - switch (x) { - case IPAQ_EGPIO_LCD_POWER: - egpio |= EGPIO_H3600_LCD_ON; - gpio |= GPIO_H3100_LCD_3V_ON; - break; - case IPAQ_EGPIO_LCD_ENABLE: - break; - case IPAQ_EGPIO_CODEC_NRESET: - egpio |= EGPIO_H3600_CODEC_NRESET; - break; - case IPAQ_EGPIO_AUDIO_ON: - gpio |= GPIO_H3100_AUD_PWR_ON - | GPIO_H3100_AUD_ON; - break; - case IPAQ_EGPIO_QMUTE: - gpio |= GPIO_H3100_QMUTE; - break; - case IPAQ_EGPIO_OPT_NVRAM_ON: - egpio |= EGPIO_H3600_OPT_NVRAM_ON; - break; - case IPAQ_EGPIO_OPT_ON: - egpio |= EGPIO_H3600_OPT_ON; - break; - case IPAQ_EGPIO_CARD_RESET: - egpio |= EGPIO_H3600_CARD_RESET; - break; - case IPAQ_EGPIO_OPT_RESET: - egpio |= EGPIO_H3600_OPT_RESET; - break; - case IPAQ_EGPIO_IR_ON: - gpio |= GPIO_H3100_IR_ON; - break; - case IPAQ_EGPIO_IR_FSEL: - gpio |= GPIO_H3100_IR_FSEL; - break; - case IPAQ_EGPIO_RS232_ON: - egpio |= EGPIO_H3600_RS232_ON; - break; - case IPAQ_EGPIO_VPP_ON: - egpio |= EGPIO_H3600_VPP_ON; - break; - } + int err = gpio_request(H3600_EGPIO_IR_ON, "IrDA power"); + if (err) + goto err1; + err = gpio_direction_output(H3600_EGPIO_IR_ON, 0); + if (err) + goto err2; + err = gpio_request(H3600_EGPIO_IR_FSEL, "IrDA fsel"); + if (err) + goto err2; + err = gpio_direction_output(H3600_EGPIO_IR_FSEL, 0); + if (err) + goto err3; + return 0; - if (egpio || gpio) { - local_irq_save(flags); - if (setp) { - h3100_egpio |= egpio; - GPSR = gpio; - } else { - h3100_egpio &= ~egpio; - GPCR = gpio; - } - H3100_EGPIO = h3100_egpio; - local_irq_restore(flags); - } +err3: gpio_free(H3600_EGPIO_IR_FSEL); +err2: gpio_free(H3600_EGPIO_IR_ON); +err1: return err; } -#define H3100_DIRECT_EGPIO (GPIO_H3100_BT_ON \ - | GPIO_H3100_GPIO3 \ - | GPIO_H3100_QMUTE \ - | GPIO_H3100_LCD_3V_ON \ - | GPIO_H3100_AUD_ON \ - | GPIO_H3100_AUD_PWR_ON \ - | GPIO_H3100_IR_ON \ - | GPIO_H3100_IR_FSEL) - -static void __init h3100_map_io(void) +static void h3600_irda_shutdown(struct device *dev) { - h3xxx_map_io(); - - /* Initialize h3100-specific values here */ - GPCR = 0x0fffffff; /* All outputs are set low by default */ - GPDR = GPIO_H3600_COM_RTS | GPIO_H3600_L3_CLOCK | - GPIO_H3600_L3_MODE | GPIO_H3600_L3_DATA | - GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0 | - H3100_DIRECT_EGPIO; - - /* Older bootldrs put GPIO2-9 in alternate mode on the - assumption that they are used for video */ - GAFR &= ~H3100_DIRECT_EGPIO; - - H3100_EGPIO = h3100_egpio; - assign_h3600_egpio = h3100_control_egpio; + gpio_free(H3600_EGPIO_IR_ON); + gpio_free(H3600_EGPIO_IR_FSEL); } -MACHINE_START(H3100, "Compaq iPAQ H3100") - .phys_io = 0x80000000, - .io_pg_offst = ((0xf8000000) >> 18) & 0xfffc, - .boot_params = 0xc0000100, - .map_io = h3100_map_io, - .init_irq = sa1100_init_irq, - .timer = &sa1100_timer, - .init_machine = h3xxx_mach_init, -MACHINE_END - -#endif /* CONFIG_SA1100_H3100 */ - -/************************* H3600 *************************/ - -#ifdef CONFIG_SA1100_H3600 - -#define H3600_EGPIO (*(volatile unsigned int *)H3600_EGPIO_VIRT) -static unsigned int h3600_egpio = EGPIO_H3600_RS232_ON; - -static void h3600_control_egpio(enum ipaq_egpio_type x, int setp) -{ - unsigned int egpio = 0; - unsigned long flags; - - switch (x) { - case IPAQ_EGPIO_LCD_POWER: - egpio |= EGPIO_H3600_LCD_ON | - EGPIO_H3600_LCD_PCI | - EGPIO_H3600_LCD_5V_ON | - EGPIO_H3600_LVDD_ON; - break; - case IPAQ_EGPIO_LCD_ENABLE: - break; - case IPAQ_EGPIO_CODEC_NRESET: - egpio |= EGPIO_H3600_CODEC_NRESET; - break; - case IPAQ_EGPIO_AUDIO_ON: - egpio |= EGPIO_H3600_AUD_AMP_ON | - EGPIO_H3600_AUD_PWR_ON; - break; - case IPAQ_EGPIO_QMUTE: - egpio |= EGPIO_H3600_QMUTE; - break; - case IPAQ_EGPIO_OPT_NVRAM_ON: - egpio |= EGPIO_H3600_OPT_NVRAM_ON; - break; - case IPAQ_EGPIO_OPT_ON: - egpio |= EGPIO_H3600_OPT_ON; - break; - case IPAQ_EGPIO_CARD_RESET: - egpio |= EGPIO_H3600_CARD_RESET; - break; - case IPAQ_EGPIO_OPT_RESET: - egpio |= EGPIO_H3600_OPT_RESET; - break; - case IPAQ_EGPIO_IR_ON: - egpio |= EGPIO_H3600_IR_ON; - break; - case IPAQ_EGPIO_IR_FSEL: - egpio |= EGPIO_H3600_IR_FSEL; - break; - case IPAQ_EGPIO_RS232_ON: - egpio |= EGPIO_H3600_RS232_ON; - break; - case IPAQ_EGPIO_VPP_ON: - egpio |= EGPIO_H3600_VPP_ON; - break; - } +static struct irda_platform_data h3600_irda_data = { + .set_power = h3600_irda_set_power, + .set_speed = h3600_irda_set_speed, + .startup = h3600_irda_startup, + .shutdown = h3600_irda_shutdown, +}; - if (egpio) { - local_irq_save(flags); - if (setp) - h3600_egpio |= egpio; - else - h3600_egpio &= ~egpio; - H3600_EGPIO = h3600_egpio; - local_irq_restore(flags); - } -} +static struct gpio_default_state h3600_default_gpio[] = { + { H3XXX_GPIO_COM_DCD, GPIO_MODE_IN, "COM DCD" }, + { H3XXX_GPIO_COM_CTS, GPIO_MODE_IN, "COM CTS" }, + { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, +}; -static void __init h3600_map_io(void) +static void __init h3600_mach_init(void) { - h3xxx_map_io(); - - /* Initialize h3600-specific values here */ - - GPCR = 0x0fffffff; /* All outputs are set low by default */ - GPDR = GPIO_H3600_COM_RTS | GPIO_H3600_L3_CLOCK | - GPIO_H3600_L3_MODE | GPIO_H3600_L3_DATA | - GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0 | - GPIO_LDD15 | GPIO_LDD14 | GPIO_LDD13 | GPIO_LDD12 | - GPIO_LDD11 | GPIO_LDD10 | GPIO_LDD9 | GPIO_LDD8; - - H3600_EGPIO = h3600_egpio; /* Maintains across sleep? */ - assign_h3600_egpio = h3600_control_egpio; + h3xxx_init_gpio(h3600_default_gpio, ARRAY_SIZE(h3600_default_gpio)); + h3xxx_mach_init(); + sa11x0_register_irda(&h3600_irda_data); } MACHINE_START(H3600, "Compaq iPAQ H3600") @@ -425,8 +131,6 @@ MACHINE_START(H3600, "Compaq iPAQ H3600") .map_io = h3600_map_io, .init_irq = sa1100_init_irq, .timer = &sa1100_timer, - .init_machine = h3xxx_mach_init, + .init_machine = h3600_mach_init, MACHINE_END -#endif /* CONFIG_SA1100_H3600 */ - diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c new file mode 100644 index 00000000000..b0784c974c2 --- /dev/null +++ b/arch/arm/mach-sa1100/h3xxx.c @@ -0,0 +1,313 @@ +/* + * Support for Compaq iPAQ H3100 and H3600 handheld computers (common code) + * + * Copyright (c) 2000,1 Compaq Computer Corporation. (Author: Jamey Hicks) + * Copyright (c) 2009 Dmitry Artamonow <mad_soft@inbox.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/gpio.h> +#include <linux/gpio_keys.h> +#include <linux/input.h> +#include <linux/mfd/htc-egpio.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/platform_device.h> +#include <linux/serial_core.h> + +#include <asm/mach/flash.h> +#include <asm/mach/map.h> +#include <asm/mach/serial_sa1100.h> + +#include <mach/h3xxx.h> + +#include "generic.h" + +void h3xxx_init_gpio(struct gpio_default_state *s, size_t n) +{ + while (n--) { + const char *name = s->name; + int err; + + if (!name) + name = "[init]"; + err = gpio_request(s->gpio, name); + if (err) { + printk(KERN_ERR "gpio%u: unable to request: %d\n", + s->gpio, err); + continue; + } + if (s->mode >= 0) { + err = gpio_direction_output(s->gpio, s->mode); + } else { + err = gpio_direction_input(s->gpio); + } + if (err) { + printk(KERN_ERR "gpio%u: unable to set direction: %d\n", + s->gpio, err); + continue; + } + if (!s->name) + gpio_free(s->gpio); + s++; + } +} + + +/* + * H3xxx flash support + */ +static struct mtd_partition h3xxx_partitions[] = { + { + .name = "H3XXX boot firmware", + .size = 0x00040000, + .offset = 0, + .mask_flags = MTD_WRITEABLE, /* force read-only */ + }, { + .name = "H3XXX rootfs", + .size = MTDPART_SIZ_FULL, + .offset = 0x00040000, + } +}; + +static void h3xxx_set_vpp(int vpp) +{ + gpio_set_value(H3XXX_EGPIO_VPP_ON, vpp); +} + +static int h3xxx_flash_init(void) +{ + int err = gpio_request(H3XXX_EGPIO_VPP_ON, "Flash Vpp"); + if (err) { + pr_err("%s: can't request H3XXX_EGPIO_VPP_ON\n", __func__); + return err; + } + + err = gpio_direction_output(H3XXX_EGPIO_VPP_ON, 0); + if (err) + gpio_free(H3XXX_EGPIO_VPP_ON); + + return err; +} + +static void h3xxx_flash_exit(void) +{ + gpio_free(H3XXX_EGPIO_VPP_ON); +} + +static struct flash_platform_data h3xxx_flash_data = { + .map_name = "cfi_probe", + .set_vpp = h3xxx_set_vpp, + .init = h3xxx_flash_init, + .exit = h3xxx_flash_exit, + .parts = h3xxx_partitions, + .nr_parts = ARRAY_SIZE(h3xxx_partitions), +}; + +static struct resource h3xxx_flash_resource = { + .start = SA1100_CS0_PHYS, + .end = SA1100_CS0_PHYS + SZ_32M - 1, + .flags = IORESOURCE_MEM, +}; + + +/* + * H3xxx uart support + */ +static void h3xxx_uart_set_mctrl(struct uart_port *port, u_int mctrl) +{ + if (port->mapbase == _Ser3UTCR0) { + gpio_set_value(H3XXX_GPIO_COM_RTS, !(mctrl & TIOCM_RTS)); + } +} + +static u_int h3xxx_uart_get_mctrl(struct uart_port *port) +{ + u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR; + + if (port->mapbase == _Ser3UTCR0) { + /* + * DCD and CTS bits are inverted in GPLR by RS232 transceiver + */ + if (gpio_get_value(H3XXX_GPIO_COM_DCD)) + ret &= ~TIOCM_CD; + if (gpio_get_value(H3XXX_GPIO_COM_CTS)) + ret &= ~TIOCM_CTS; + } + + return ret; +} + +static void h3xxx_uart_pm(struct uart_port *port, u_int state, u_int oldstate) +{ + if (port->mapbase == _Ser3UTCR0) { + if (!gpio_request(H3XXX_EGPIO_RS232_ON, "RS232 transceiver")) { + gpio_direction_output(H3XXX_EGPIO_RS232_ON, !state); + gpio_free(H3XXX_EGPIO_RS232_ON); + } else { + pr_err("%s: can't request H3XXX_EGPIO_RS232_ON\n", + __func__); + } + } +} + +/* + * Enable/Disable wake up events for this serial port. + * Obviously, we only support this on the normal COM port. + */ +static int h3xxx_uart_set_wake(struct uart_port *port, u_int enable) +{ + int err = -EINVAL; + + if (port->mapbase == _Ser3UTCR0) { + if (enable) + PWER |= PWER_GPIO23 | PWER_GPIO25; /* DCD and CTS */ + else + PWER &= ~(PWER_GPIO23 | PWER_GPIO25); /* DCD and CTS */ + err = 0; + } + return err; +} + +static struct sa1100_port_fns h3xxx_port_fns __initdata = { + .set_mctrl = h3xxx_uart_set_mctrl, + .get_mctrl = h3xxx_uart_get_mctrl, + .pm = h3xxx_uart_pm, + .set_wake = h3xxx_uart_set_wake, +}; + +/* + * EGPIO + */ + +static struct resource egpio_resources[] = { + [0] = { + .start = H3600_EGPIO_PHYS, + .end = H3600_EGPIO_PHYS + 0x4 - 1, + .flags = IORESOURCE_MEM, + }, +}; + +static struct htc_egpio_chip egpio_chips[] = { + [0] = { + .reg_start = 0, + .gpio_base = H3XXX_EGPIO_BASE, + .num_gpios = 16, + .direction = HTC_EGPIO_OUTPUT, + .initial_values = 0x0080, /* H3XXX_EGPIO_RS232_ON */ + }, +}; + +static struct htc_egpio_platform_data egpio_info = { + .reg_width = 16, + .bus_width = 16, + .chip = egpio_chips, + .num_chips = ARRAY_SIZE(egpio_chips), +}; + +static struct platform_device h3xxx_egpio = { + .name = "htc-egpio", + .id = -1, + .resource = egpio_resources, + .num_resources = ARRAY_SIZE(egpio_resources), + .dev = { + .platform_data = &egpio_info, + }, +}; + +/* + * GPIO keys + */ + +static struct gpio_keys_button h3xxx_button_table[] = { + { + .code = KEY_POWER, + .gpio = H3XXX_GPIO_PWR_BUTTON, + .desc = "Power Button", + .active_low = 1, + .type = EV_KEY, + .wakeup = 1, + }, { + .code = KEY_ENTER, + .gpio = H3XXX_GPIO_ACTION_BUTTON, + .active_low = 1, + .desc = "Action button", + .type = EV_KEY, + .wakeup = 0, + }, +}; + +static struct gpio_keys_platform_data h3xxx_keys_data = { + .buttons = h3xxx_button_table, + .nbuttons = ARRAY_SIZE(h3xxx_button_table), +}; + +static struct platform_device h3xxx_keys = { + .name = "gpio-keys", + .id = -1, + .dev = { + .platform_data = &h3xxx_keys_data, + }, +}; + +static struct platform_device *h3xxx_devices[] = { + &h3xxx_egpio, + &h3xxx_keys, +}; + +void __init h3xxx_mach_init(void) +{ + sa1100_register_uart_fns(&h3xxx_port_fns); + sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); + platform_add_devices(h3xxx_devices, ARRAY_SIZE(h3xxx_devices)); +} + +static struct map_desc h3600_io_desc[] __initdata = { + { /* static memory bank 2 CS#2 */ + .virtual = H3600_BANK_2_VIRT, + .pfn = __phys_to_pfn(SA1100_CS2_PHYS), + .length = 0x02800000, + .type = MT_DEVICE + }, { /* static memory bank 4 CS#4 */ + .virtual = H3600_BANK_4_VIRT, + .pfn = __phys_to_pfn(SA1100_CS4_PHYS), + .length = 0x00800000, + .type = MT_DEVICE + }, { /* EGPIO 0 CS#5 */ + .virtual = H3600_EGPIO_VIRT, + .pfn = __phys_to_pfn(H3600_EGPIO_PHYS), + .length = 0x01000000, + .type = MT_DEVICE + } +}; + +/* + * Common map_io initialization + */ + +void __init h3xxx_map_io(void) +{ + sa1100_map_io(); + iotable_init(h3600_io_desc, ARRAY_SIZE(h3600_io_desc)); + + sa1100_register_uart(0, 3); /* Common serial port */ +// sa1100_register_uart(1, 1); /* Microcontroller on 3100/3600 */ + + /* Ensure those pins are outputs and driving low */ + PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; + PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + + /* Configure suspend conditions */ + PGSR = 0; + PCFR = PCFR_OPDE; + PSDR = 0; + + GPCR = 0x0fffffff; /* All outputs are set low by default */ + GPDR = 0; /* Configure all GPIOs as input */ +} + diff --git a/arch/arm/mach-sa1100/hackkit.c b/arch/arm/mach-sa1100/hackkit.c index e7056c0b562..51568dfc8e9 100644 --- a/arch/arm/mach-sa1100/hackkit.c +++ b/arch/arm/mach-sa1100/hackkit.c @@ -187,7 +187,7 @@ static struct resource hackkit_flash_resource = { static void __init hackkit_init(void) { - sa11x0_set_flash_data(&hackkit_flash_data, &hackkit_flash_resource, 1); + sa11x0_register_mtd(&hackkit_flash_data, &hackkit_flash_resource, 1); } /********************************************************************** diff --git a/arch/arm/mach-sa1100/include/mach/collie.h b/arch/arm/mach-sa1100/include/mach/collie.h index 9efb569cdb6..71a0b3fdcc8 100644 --- a/arch/arm/mach-sa1100/include/mach/collie.h +++ b/arch/arm/mach-sa1100/include/mach/collie.h @@ -25,29 +25,39 @@ #define COLLIE_GPIO_VPEN (COLLIE_SCOOP_GPIO_BASE + 7) #define COLLIE_SCP_LB_VOL_CHG SCOOP_GPCR_PA19 -#define COLLIE_SCOOP_IO_DIR ( COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R | \ +#define COLLIE_SCOOP_IO_DIR (COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R | \ COLLIE_SCP_5VON | COLLIE_SCP_AMP_ON | \ - COLLIE_SCP_LB_VOL_CHG ) -#define COLLIE_SCOOP_IO_OUT ( COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R ) + COLLIE_SCP_LB_VOL_CHG) +#define COLLIE_SCOOP_IO_OUT (COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R) -/* GPIOs for which the generic definition doesn't say much */ +/* GPIOs for gpiolib */ -#define COLLIE_GPIO_ON_KEY GPIO_GPIO (0) -#define COLLIE_GPIO_AC_IN GPIO_GPIO (1) -#define COLLIE_GPIO_SDIO_INT GPIO_GPIO (11) -#define COLLIE_GPIO_CF_IRQ GPIO_GPIO (14) -#define COLLIE_GPIO_nREMOCON_INT GPIO_GPIO (15) -#define COLLIE_GPIO_UCB1x00_RESET GPIO_GPIO (16) -#define COLLIE_GPIO_nMIC_ON GPIO_GPIO (17) -#define COLLIE_GPIO_nREMOCON_ON GPIO_GPIO (18) -#define COLLIE_GPIO_CO GPIO_GPIO (20) -#define COLLIE_GPIO_MCP_CLK GPIO_GPIO (21) -#define COLLIE_GPIO_CF_CD GPIO_GPIO (22) -#define COLLIE_GPIO_UCB1x00_IRQ GPIO_GPIO (23) -#define COLLIE_GPIO_WAKEUP GPIO_GPIO (24) -#define COLLIE_GPIO_GA_INT GPIO_GPIO (25) -#define COLLIE_GPIO_MAIN_BAT_LOW GPIO_GPIO (26) +#define COLLIE_GPIO_ON_KEY (0) +#define COLLIE_GPIO_AC_IN (1) +#define COLLIE_GPIO_SDIO_INT (11) +#define COLLIE_GPIO_CF_IRQ (14) +#define COLLIE_GPIO_nREMOCON_INT (15) +#define COLLIE_GPIO_UCB1x00_RESET (16) +#define COLLIE_GPIO_nMIC_ON (17) +#define COLLIE_GPIO_nREMOCON_ON (18) +#define COLLIE_GPIO_CO (20) +#define COLLIE_GPIO_MCP_CLK (21) +#define COLLIE_GPIO_CF_CD (22) +#define COLLIE_GPIO_UCB1x00_IRQ (23) +#define COLLIE_GPIO_WAKEUP (24) +#define COLLIE_GPIO_GA_INT (25) +#define COLLIE_GPIO_MAIN_BAT_LOW (26) +/* GPIO definitions for direct register access */ + +#define _COLLIE_GPIO_ON_KEY GPIO_GPIO(0) +#define _COLLIE_GPIO_AC_IN GPIO_GPIO(1) +#define _COLLIE_GPIO_nREMOCON_INT GPIO_GPIO(15) +#define _COLLIE_GPIO_UCB1x00_RESET GPIO_GPIO(16) +#define _COLLIE_GPIO_nMIC_ON GPIO_GPIO(17) +#define _COLLIE_GPIO_nREMOCON_ON GPIO_GPIO(18) +#define _COLLIE_GPIO_CO GPIO_GPIO(20) +#define _COLLIE_GPIO_WAKEUP GPIO_GPIO(24) /* Interrupts */ #define COLLIE_IRQ_GPIO_ON_KEY IRQ_GPIO0 @@ -70,19 +80,20 @@ #define COLLIE_LCM_IRQ_GPIO_nSD_WP IRQ_LOCOMO_GPIO14 /* GPIO's on the TC35143AF (Toshiba Analog Frontend) */ -#define COLLIE_TC35143_GPIO_VERSION0 UCB_IO_0 /* GPIO0=Version */ -#define COLLIE_TC35143_GPIO_TBL_CHK UCB_IO_1 /* GPIO1=TBL_CHK */ -#define COLLIE_TC35143_GPIO_VPEN_ON UCB_IO_2 /* GPIO2=VPNE_ON */ -#define COLLIE_TC35143_GPIO_IR_ON UCB_IO_3 /* GPIO3=IR_ON */ -#define COLLIE_TC35143_GPIO_AMP_ON UCB_IO_4 /* GPIO4=AMP_ON */ -#define COLLIE_TC35143_GPIO_VERSION1 UCB_IO_5 /* GPIO5=Version */ -#define COLLIE_TC35143_GPIO_FS8KLPF UCB_IO_5 /* GPIO5=fs 8k LPF */ -#define COLLIE_TC35143_GPIO_BUZZER_BIAS UCB_IO_6 /* GPIO6=BUZZER BIAS */ -#define COLLIE_TC35143_GPIO_MBAT_ON UCB_IO_7 /* GPIO7=MBAT_ON */ -#define COLLIE_TC35143_GPIO_BBAT_ON UCB_IO_8 /* GPIO8=BBAT_ON */ -#define COLLIE_TC35143_GPIO_TMP_ON UCB_IO_9 /* GPIO9=TMP_ON */ -#define COLLIE_TC35143_GPIO_IN ( UCB_IO_0 | UCB_IO_2 | UCB_IO_5 ) -#define COLLIE_TC35143_GPIO_OUT ( UCB_IO_1 | UCB_IO_3 | UCB_IO_4 | UCB_IO_6 | \ - UCB_IO_7 | UCB_IO_8 | UCB_IO_9 ) +#define COLLIE_TC35143_GPIO_BASE (GPIO_MAX + 13) +#define COLLIE_TC35143_GPIO_VERSION0 UCB_IO_0 +#define COLLIE_TC35143_GPIO_TBL_CHK UCB_IO_1 +#define COLLIE_TC35143_GPIO_VPEN_ON UCB_IO_2 +#define COLLIE_TC35143_GPIO_IR_ON UCB_IO_3 +#define COLLIE_TC35143_GPIO_AMP_ON UCB_IO_4 +#define COLLIE_TC35143_GPIO_VERSION1 UCB_IO_5 +#define COLLIE_TC35143_GPIO_FS8KLPF UCB_IO_5 +#define COLLIE_TC35143_GPIO_BUZZER_BIAS UCB_IO_6 +#define COLLIE_GPIO_MBAT_ON (COLLIE_TC35143_GPIO_BASE + 7) +#define COLLIE_GPIO_BBAT_ON (COLLIE_TC35143_GPIO_BASE + 8) +#define COLLIE_GPIO_TMP_ON (COLLIE_TC35143_GPIO_BASE + 9) +#define COLLIE_TC35143_GPIO_IN (UCB_IO_0 | UCB_IO_2 | UCB_IO_5) +#define COLLIE_TC35143_GPIO_OUT (UCB_IO_1 | UCB_IO_3 | UCB_IO_4 \ + | UCB_IO_6) #endif diff --git a/arch/arm/mach-sa1100/include/mach/gpio.h b/arch/arm/mach-sa1100/include/mach/gpio.h index 582a0c92da5..7befc104e9a 100644 --- a/arch/arm/mach-sa1100/include/mach/gpio.h +++ b/arch/arm/mach-sa1100/include/mach/gpio.h @@ -49,20 +49,9 @@ static inline void gpio_set_value(unsigned gpio, int value) #define gpio_cansleep __gpio_cansleep -static inline unsigned gpio_to_irq(unsigned gpio) -{ - if (gpio < 11) - return IRQ_GPIO0 + gpio; - else - return IRQ_GPIO11 - 11 + gpio; -} - -static inline unsigned irq_to_gpio(unsigned irq) -{ - if (irq < IRQ_GPIO11_27) - return irq - IRQ_GPIO0; - else - return irq - IRQ_GPIO11 + 11; -} +#define gpio_to_irq(gpio) ((gpio < 11) ? (IRQ_GPIO0 + gpio) : \ + (IRQ_GPIO11 - 11 + gpio)) +#define irq_to_gpio(irq) ((irq < IRQ_GPIO11_27) ? (irq - IRQ_GPIO0) : \ + (irq - IRQ_GPIO11 + 11)) #endif diff --git a/arch/arm/mach-sa1100/include/mach/h3600.h b/arch/arm/mach-sa1100/include/mach/h3600.h deleted file mode 100644 index 2827faa4742..00000000000 --- a/arch/arm/mach-sa1100/include/mach/h3600.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * - * Definitions for H3600 Handheld Computer - * - * Copyright 2000 Compaq Computer Corporation. - * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * - */ - -#ifndef _INCLUDE_H3600_H_ -#define _INCLUDE_H3600_H_ - -typedef int __bitwise pm_request_t; - -#define PM_SUSPEND ((__force pm_request_t) 1) /* enter D1-D3 */ -#define PM_RESUME ((__force pm_request_t) 2) /* enter D0 */ - -/* generalized support for H3xxx series Compaq Pocket PC's */ -#define machine_is_h3xxx() (machine_is_h3100() || machine_is_h3600()) - -/* Physical memory regions corresponding to chip selects */ -#define H3600_EGPIO_PHYS (SA1100_CS5_PHYS + 0x01000000) -#define H3600_BANK_2_PHYS SA1100_CS2_PHYS -#define H3600_BANK_4_PHYS SA1100_CS4_PHYS - -/* Virtual memory regions corresponding to chip selects 2 & 4 (used on sleeves) */ -#define H3600_EGPIO_VIRT 0xf0000000 -#define H3600_BANK_2_VIRT 0xf1000000 -#define H3600_BANK_4_VIRT 0xf3800000 - -/* - Machine-independent GPIO definitions - --- these are common across all current iPAQ platforms -*/ - -#define GPIO_H3600_NPOWER_BUTTON GPIO_GPIO (0) /* Also known as the "off button" */ - -#define GPIO_H3600_PCMCIA_CD1 GPIO_GPIO (10) -#define GPIO_H3600_PCMCIA_IRQ1 GPIO_GPIO (11) - -/* UDA1341 L3 Interface */ -#define GPIO_H3600_L3_DATA GPIO_GPIO (14) -#define GPIO_H3600_L3_MODE GPIO_GPIO (15) -#define GPIO_H3600_L3_CLOCK GPIO_GPIO (16) - -#define GPIO_H3600_PCMCIA_CD0 GPIO_GPIO (17) -#define GPIO_H3600_SYS_CLK GPIO_GPIO (19) -#define GPIO_H3600_PCMCIA_IRQ0 GPIO_GPIO (21) - -#define GPIO_H3600_COM_DCD GPIO_GPIO (23) -#define GPIO_H3600_OPT_IRQ GPIO_GPIO (24) -#define GPIO_H3600_COM_CTS GPIO_GPIO (25) -#define GPIO_H3600_COM_RTS GPIO_GPIO (26) - -#define IRQ_GPIO_H3600_NPOWER_BUTTON IRQ_GPIO0 -#define IRQ_GPIO_H3600_PCMCIA_CD1 IRQ_GPIO10 -#define IRQ_GPIO_H3600_PCMCIA_IRQ1 IRQ_GPIO11 -#define IRQ_GPIO_H3600_PCMCIA_CD0 IRQ_GPIO17 -#define IRQ_GPIO_H3600_PCMCIA_IRQ0 IRQ_GPIO21 -#define IRQ_GPIO_H3600_COM_DCD IRQ_GPIO23 -#define IRQ_GPIO_H3600_OPT_IRQ IRQ_GPIO24 -#define IRQ_GPIO_H3600_COM_CTS IRQ_GPIO25 - - -#ifndef __ASSEMBLY__ - -enum ipaq_egpio_type { - IPAQ_EGPIO_LCD_POWER, /* Power to the LCD panel */ - IPAQ_EGPIO_CODEC_NRESET, /* Clear to reset the audio codec (remember to return high) */ - IPAQ_EGPIO_AUDIO_ON, /* Audio power */ - IPAQ_EGPIO_QMUTE, /* Audio muting */ - IPAQ_EGPIO_OPT_NVRAM_ON, /* Non-volatile RAM on extension sleeves (SPI interface) */ - IPAQ_EGPIO_OPT_ON, /* Power to extension sleeves */ - IPAQ_EGPIO_CARD_RESET, /* Reset PCMCIA cards on extension sleeve (???) */ - IPAQ_EGPIO_OPT_RESET, /* Reset option pack (???) */ - IPAQ_EGPIO_IR_ON, /* IR sensor/emitter power */ - IPAQ_EGPIO_IR_FSEL, /* IR speed selection 1->fast, 0->slow */ - IPAQ_EGPIO_RS232_ON, /* Maxim RS232 chip power */ - IPAQ_EGPIO_VPP_ON, /* Turn on power to flash programming */ - IPAQ_EGPIO_LCD_ENABLE, /* Enable/disable LCD controller */ -}; - -extern void (*assign_h3600_egpio)(enum ipaq_egpio_type x, int level); - -#endif /* ASSEMBLY */ - -#endif /* _INCLUDE_H3600_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h deleted file mode 100644 index a36ca76d018..00000000000 --- a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * - * Definitions for H3600 Handheld Computer - * - * Copyright 2000 Compaq Computer Corporation. - * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * - */ - -#ifndef _INCLUDE_H3600_GPIO_H_ -#define _INCLUDE_H3600_GPIO_H_ - -/* - * GPIO lines that are common across ALL iPAQ models are in "h3600.h" - * This file contains machine-specific definitions - */ - -#define GPIO_H3600_SUSPEND GPIO_GPIO (0) -/* GPIO[2:9] used by LCD on H3600/3800, used as GPIO on H3100 */ -#define GPIO_H3100_BT_ON GPIO_GPIO (2) -#define GPIO_H3100_GPIO3 GPIO_GPIO (3) -#define GPIO_H3100_QMUTE GPIO_GPIO (4) -#define GPIO_H3100_LCD_3V_ON GPIO_GPIO (5) -#define GPIO_H3100_AUD_ON GPIO_GPIO (6) -#define GPIO_H3100_AUD_PWR_ON GPIO_GPIO (7) -#define GPIO_H3100_IR_ON GPIO_GPIO (8) -#define GPIO_H3100_IR_FSEL GPIO_GPIO (9) - -/* for H3600, audio sample rate clock generator */ -#define GPIO_H3600_CLK_SET0 GPIO_GPIO (12) -#define GPIO_H3600_CLK_SET1 GPIO_GPIO (13) - -#define GPIO_H3600_ACTION_BUTTON GPIO_GPIO (18) -#define GPIO_H3600_SOFT_RESET GPIO_GPIO (20) /* Also known as BATT_FAULT */ -#define GPIO_H3600_OPT_LOCK GPIO_GPIO (22) -#define GPIO_H3600_OPT_DET GPIO_GPIO (27) - -/****************************************************/ - -#define IRQ_GPIO_H3600_ACTION_BUTTON IRQ_GPIO18 -#define IRQ_GPIO_H3600_OPT_DET IRQ_GPIO27 - -/* H3100 / 3600 EGPIO pins */ -#define EGPIO_H3600_VPP_ON (1 << 0) -#define EGPIO_H3600_CARD_RESET (1 << 1) /* reset the attached pcmcia/compactflash card. active high. */ -#define EGPIO_H3600_OPT_RESET (1 << 2) /* reset the attached option pack. active high. */ -#define EGPIO_H3600_CODEC_NRESET (1 << 3) /* reset the onboard UDA1341. active low. */ -#define EGPIO_H3600_OPT_NVRAM_ON (1 << 4) /* apply power to optionpack nvram, active high. */ -#define EGPIO_H3600_OPT_ON (1 << 5) /* full power to option pack. active high. */ -#define EGPIO_H3600_LCD_ON (1 << 6) /* enable 3.3V to LCD. active high. */ -#define EGPIO_H3600_RS232_ON (1 << 7) /* UART3 transceiver force on. Active high. */ - -/* H3600 only EGPIO pins */ -#define EGPIO_H3600_LCD_PCI (1 << 8) /* LCD control IC enable. active high. */ -#define EGPIO_H3600_IR_ON (1 << 9) /* apply power to IR module. active high. */ -#define EGPIO_H3600_AUD_AMP_ON (1 << 10) /* apply power to audio power amp. active high. */ -#define EGPIO_H3600_AUD_PWR_ON (1 << 11) /* apply power to reset of audio circuit. active high. */ -#define EGPIO_H3600_QMUTE (1 << 12) /* mute control for onboard UDA1341. active high. */ -#define EGPIO_H3600_IR_FSEL (1 << 13) /* IR speed select: 1->fast, 0->slow */ -#define EGPIO_H3600_LCD_5V_ON (1 << 14) /* enable 5V to LCD. active high. */ -#define EGPIO_H3600_LVDD_ON (1 << 15) /* enable 9V and -6.5V to LCD. */ - - -#endif /* _INCLUDE_H3600_GPIO_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/h3xxx.h b/arch/arm/mach-sa1100/include/mach/h3xxx.h new file mode 100644 index 00000000000..7d9df16f04a --- /dev/null +++ b/arch/arm/mach-sa1100/include/mach/h3xxx.h @@ -0,0 +1,94 @@ +/* + * Definitions for Compaq iPAQ H3100 and H3600 handheld computers + * + * (c) 2000 Compaq Computer Corporation. (Author: Jamey Hicks) + * (c) 2009 Dmitry Artamonow <mad_soft@inbox.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _INCLUDE_H3XXX_H_ +#define _INCLUDE_H3XXX_H_ + +/* Physical memory regions corresponding to chip selects */ +#define H3600_EGPIO_PHYS (SA1100_CS5_PHYS + 0x01000000) +#define H3600_BANK_2_PHYS SA1100_CS2_PHYS +#define H3600_BANK_4_PHYS SA1100_CS4_PHYS + +/* Virtual memory regions corresponding to chip selects 2 & 4 (used on sleeves) */ +#define H3600_EGPIO_VIRT 0xf0000000 +#define H3600_BANK_2_VIRT 0xf1000000 +#define H3600_BANK_4_VIRT 0xf3800000 + +/* + * gpiolib numbers for all iPAQs + */ +#define H3XXX_GPIO_PWR_BUTTON 0 +#define H3XXX_GPIO_PCMCIA_CD1 10 +#define H3XXX_GPIO_PCMCIA_IRQ1 11 +#define H3XXX_GPIO_PCMCIA_CD0 17 +#define H3XXX_GPIO_ACTION_BUTTON 18 +#define H3XXX_GPIO_SYS_CLK 19 +#define H3XXX_GPIO_PCMCIA_IRQ0 21 +#define H3XXX_GPIO_COM_DCD 23 +#define H3XXX_GPIO_OPTION 24 +#define H3XXX_GPIO_COM_CTS 25 +#define H3XXX_GPIO_COM_RTS 26 + +/* machine-specific gpios */ + +#define H3100_GPIO_BT_ON 2 +#define H3100_GPIO_QMUTE 4 +#define H3100_GPIO_LCD_3V_ON 5 +#define H3100_GPIO_AUD_ON 6 +#define H3100_GPIO_AUD_PWR_ON 7 +#define H3100_GPIO_IR_ON 8 +#define H3100_GPIO_IR_FSEL 9 + +#define H3600_GPIO_CLK_SET0 12 /* audio sample rate clock generator */ +#define H3600_GPIO_CLK_SET1 13 +#define H3600_GPIO_SOFT_RESET 20 /* also known as BATT_FAULT */ +#define H3600_GPIO_OPT_LOCK 22 +#define H3600_GPIO_OPT_DET 27 + + +/* H3100 / 3600 EGPIO pins */ +#define H3XXX_EGPIO_BASE (GPIO_MAX + 1) + +#define H3XXX_EGPIO_VPP_ON (H3XXX_EGPIO_BASE + 0) +#define H3XXX_EGPIO_CARD_RESET (H3XXX_EGPIO_BASE + 1) /* reset the attached pcmcia/compactflash card. active high. */ +#define H3XXX_EGPIO_OPT_RESET (H3XXX_EGPIO_BASE + 2) /* reset the attached option pack. active high. */ +#define H3XXX_EGPIO_CODEC_NRESET (H3XXX_EGPIO_BASE + 3) /* reset the onboard UDA1341. active low. */ +#define H3XXX_EGPIO_OPT_NVRAM_ON (H3XXX_EGPIO_BASE + 4) /* apply power to optionpack nvram, active high. */ +#define H3XXX_EGPIO_OPT_ON (H3XXX_EGPIO_BASE + 5) /* full power to option pack. active high. */ +#define H3XXX_EGPIO_LCD_ON (H3XXX_EGPIO_BASE + 6) /* enable 3.3V to LCD. active high. */ +#define H3XXX_EGPIO_RS232_ON (H3XXX_EGPIO_BASE + 7) /* UART3 transceiver force on. Active high. */ + +/* H3600 only EGPIO pins */ +#define H3600_EGPIO_LCD_PCI (H3XXX_EGPIO_BASE + 8) /* LCD control IC enable. active high. */ +#define H3600_EGPIO_IR_ON (H3XXX_EGPIO_BASE + 9) /* apply power to IR module. active high. */ +#define H3600_EGPIO_AUD_AMP_ON (H3XXX_EGPIO_BASE + 10) /* apply power to audio power amp. active high. */ +#define H3600_EGPIO_AUD_PWR_ON (H3XXX_EGPIO_BASE + 11) /* apply power to reset of audio circuit. active high. */ +#define H3600_EGPIO_QMUTE (H3XXX_EGPIO_BASE + 12) /* mute control for onboard UDA1341. active high. */ +#define H3600_EGPIO_IR_FSEL (H3XXX_EGPIO_BASE + 13) /* IR speed select: 1->fast, 0->slow */ +#define H3600_EGPIO_LCD_5V_ON (H3XXX_EGPIO_BASE + 14) /* enable 5V to LCD. active high. */ +#define H3600_EGPIO_LVDD_ON (H3XXX_EGPIO_BASE + 15) /* enable 9V and -6.5V to LCD. */ + +struct gpio_default_state { + int gpio; + int mode; + const char *name; +}; + +#define GPIO_MODE_IN -1 +#define GPIO_MODE_OUT0 0 +#define GPIO_MODE_OUT1 1 + +void h3xxx_init_gpio(struct gpio_default_state *s, size_t n); +void __init h3xxx_map_io(void); +void __init h3xxx_mach_init(void); + +#endif /* _INCLUDE_H3XXX_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h index fb8b09a57ad..ed1a331508a 100644 --- a/arch/arm/mach-sa1100/include/mach/mcp.h +++ b/arch/arm/mach-sa1100/include/mach/mcp.h @@ -16,6 +16,7 @@ struct mcp_plat_data { u32 mccr0; u32 mccr1; unsigned int sclk_rate; + int gpio_base; }; #endif diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c index fd776bb666c..13ebd2d99bf 100644 --- a/arch/arm/mach-sa1100/jornada720.c +++ b/arch/arm/mach-sa1100/jornada720.c @@ -354,7 +354,7 @@ static struct resource jornada720_flash_resource = { static void __init jornada720_mach_init(void) { - sa11x0_set_flash_data(&jornada720_flash_data, &jornada720_flash_resource, 1); + sa11x0_register_mtd(&jornada720_flash_data, &jornada720_flash_resource, 1); } MACHINE_START(JORNADA720, "HP Jornada 720") diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c index 1f940df0e5a..68069d6dc07 100644 --- a/arch/arm/mach-sa1100/lart.c +++ b/arch/arm/mach-sa1100/lart.c @@ -28,7 +28,7 @@ static struct mcp_plat_data lart_mcp_data = { static void __init lart_init(void) { - sa11x0_set_mcp_data(&lart_mcp_data); + sa11x0_register_mcp(&lart_mcp_data); } static struct map_desc lart_io_desc[] __initdata = { diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c index e1458bc1868..1ccd6018d3a 100644 --- a/arch/arm/mach-sa1100/pleb.c +++ b/arch/arm/mach-sa1100/pleb.c @@ -109,7 +109,7 @@ static struct flash_platform_data pleb_flash_data = { static void __init pleb_init(void) { - sa11x0_set_flash_data(&pleb_flash_data, pleb_flash_resources, + sa11x0_register_mtd(&pleb_flash_data, pleb_flash_resources, ARRAY_SIZE(pleb_flash_resources)); diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c index ddd917d1083..85e82bb73d7 100644 --- a/arch/arm/mach-sa1100/shannon.c +++ b/arch/arm/mach-sa1100/shannon.c @@ -59,8 +59,8 @@ static struct mcp_plat_data shannon_mcp_data = { static void __init shannon_init(void) { - sa11x0_set_flash_data(&shannon_flash_data, &shannon_flash_resource, 1); - sa11x0_set_mcp_data(&shannon_mcp_data); + sa11x0_register_mtd(&shannon_flash_data, &shannon_flash_resource, 1); + sa11x0_register_mcp(&shannon_mcp_data); } static void __init shannon_map_io(void) diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index 3c74534f7fe..49cfd64663a 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -166,9 +166,9 @@ static void __init simpad_map_io(void) PCFR = 0; PSDR = 0; - sa11x0_set_flash_data(&simpad_flash_data, simpad_flash_resources, + sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources, ARRAY_SIZE(simpad_flash_resources)); - sa11x0_set_mcp_data(&simpad_mcp_data); + sa11x0_register_mcp(&simpad_mcp_data); } static void simpad_power_off(void) diff --git a/arch/avr32/include/asm/bug.h b/arch/avr32/include/asm/bug.h index 331d45bab18..2aa373cc61b 100644 --- a/arch/avr32/include/asm/bug.h +++ b/arch/avr32/include/asm/bug.h @@ -52,7 +52,7 @@ #define BUG() \ do { \ _BUG_OR_WARN(0); \ - for (;;); \ + unreachable(); \ } while (0) #define WARN_ON(condition) \ diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h index dcbaea7ce12..f0acde68aae 100644 --- a/arch/ia64/include/asm/swiotlb.h +++ b/arch/ia64/include/asm/swiotlb.h @@ -4,8 +4,6 @@ #include <linux/dma-mapping.h> #include <linux/swiotlb.h> -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; extern void pci_swiotlb_init(void); diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8431c..53292abf846 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = { void __init swiotlb_dma_init(void) { dma_ops = &swiotlb_dma_ops; - swiotlb_init(); + swiotlb_init(1); } void __init pci_swiotlb_init(void) @@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void) swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); machvec_init("dig"); - swiotlb_init(); + swiotlb_init(1); dma_ops = &swiotlb_dma_ops; #else panic("Unable to find Intel IOMMU"); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 1aad0d9f507..fd7620f025f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -358,7 +358,14 @@ config SGI_IP22 select SWAP_IO_SPACE select SYS_HAS_CPU_R4X00 select SYS_HAS_CPU_R5000 - select SYS_HAS_EARLY_PRINTK + # + # Disable EARLY_PRINTK for now since it leads to overwritten prom + # memory during early boot on some machines. + # + # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com + # for a more details discussion + # + # select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN @@ -410,7 +417,14 @@ config SGI_IP28 select SGI_HAS_ZILOG select SWAP_IO_SPACE select SYS_HAS_CPU_R10000 - select SYS_HAS_EARLY_PRINTK + # + # Disable EARLY_PRINTK for now since it leads to overwritten prom + # memory during early boot on some machines. + # + # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com + # for a more details discussion + # + # select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN help @@ -1439,6 +1453,7 @@ choice config PAGE_SIZE_4KB bool "4kB" + depends on !CPU_LOONGSON2 help This option select the standard 4kB Linux page size. On some R3000-family processors this is the only available page size. Using @@ -1763,7 +1778,7 @@ config SYS_SUPPORTS_SMARTMIPS config ARCH_FLATMEM_ENABLE def_bool y - depends on !NUMA + depends on !NUMA && !CPU_LOONGSON2 config ARCH_DISCONTIGMEM_ENABLE bool diff --git a/arch/mips/include/asm/bug.h b/arch/mips/include/asm/bug.h index 6cf29c26e87..540c98a810d 100644 --- a/arch/mips/include/asm/bug.h +++ b/arch/mips/include/asm/bug.h @@ -11,9 +11,7 @@ static inline void __noreturn BUG(void) { __asm__ __volatile__("break %0" : : "i" (BRK_BUG)); - /* Fool GCC into thinking the function doesn't return. */ - while (1) - ; + unreachable(); } #define HAVE_ARCH_BUG diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h index a2250f390a2..c892bfb3e2c 100644 --- a/arch/mips/include/asm/mman.h +++ b/arch/mips/include/asm/mman.h @@ -75,6 +75,7 @@ #define MADV_MERGEABLE 12 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ +#define MADV_HWPOISON 100 /* poison a page for testing */ /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h index fcf5f98d90c..83b5509e09e 100644 --- a/arch/mips/include/asm/system.h +++ b/arch/mips/include/asm/system.h @@ -12,6 +12,7 @@ #ifndef _ASM_SYSTEM_H #define _ASM_SYSTEM_H +#include <linux/kernel.h> #include <linux/types.h> #include <linux/irqflags.h> @@ -193,10 +194,6 @@ extern __u64 __xchg_u64_unsupported_on_32bit_kernels(volatile __u64 * m, __u64 v #define __xchg_u64 __xchg_u64_unsupported_on_32bit_kernels #endif -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid xchg(). */ -extern void __xchg_called_with_bad_pointer(void); - static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) { switch (size) { @@ -205,11 +202,17 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz case 8: return __xchg_u64(ptr, x); } - __xchg_called_with_bad_pointer(); + return x; } -#define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) & ~0xc); \ + \ + ((__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \ +}) extern void set_handler(unsigned long offset, void *addr, unsigned long len); extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len); diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 3fe1fcfa2e7..fe0d7980560 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -306,6 +306,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, if (cpu_has_llsc && R10000_LLSC_WAR) { __asm__ __volatile__ ( + " .set mips3 \n" " li %[err], 0 \n" "1: ll %[old], (%[addr]) \n" " move %[tmp], %[new] \n" @@ -320,6 +321,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, " "STR(PTR)" 1b, 4b \n" " "STR(PTR)" 2b, 4b \n" " .previous \n" + " .set mips0 \n" : [old] "=&r" (old), [err] "=&r" (err), [tmp] "=&r" (tmp) @@ -329,6 +331,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, : "memory"); } else if (cpu_has_llsc) { __asm__ __volatile__ ( + " .set mips3 \n" " li %[err], 0 \n" "1: ll %[old], (%[addr]) \n" " move %[tmp], %[new] \n" @@ -347,6 +350,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, " "STR(PTR)" 1b, 5b \n" " "STR(PTR)" 2b, 5b \n" " .previous \n" + " .set mips0 \n" : [old] "=&r" (old), [err] "=&r" (err), [tmp] "=&r" (tmp) diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 9f40e1ff9b4..041fc1afc3f 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -110,7 +110,6 @@ static struct korina_device korina_dev0_data = { static struct platform_device korina_dev0 = { .id = -1, .name = "korina", - .dev.driver_data = &korina_dev0_data, .resource = korina_dev0_res, .num_resources = ARRAY_SIZE(korina_dev0_res), }; @@ -332,6 +331,8 @@ static int __init plat_setup_devices(void) /* set the uart clock to the current cpu frequency */ rb532_uart_res[0].uartclk = idt_cpu_freq; + dev_set_drvdata(&korina_dev0.dev, &korina_dev0_data); + return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs)); } diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 3b100518539..bf3382f1904 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE config HCALL_STATS bool "Hypervisor call instrumentation" - depends on PPC_PSERIES && DEBUG_FS + depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS help Adds code to keep track of the number of hypervisor calls made and the amount of time spent in hypervisor calls. Wall time spent in diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index f1889abb89b..c568329723b 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_DEBUG_PAGEALLOC is not set -CONFIG_HCALL_STATS=y +# CONFIG_HCALL_STATS is not set # CONFIG_CODE_PATCHING_SELFTEST is not set # CONFIG_FTR_FIXUP_SELFTEST is not set # CONFIG_MSI_BITMAP_SELFTEST is not set diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index 9154e852673..f0fb4fc1f6e 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -19,6 +19,7 @@ #define _ASM_POWERPC_EMULATED_OPS_H #include <asm/atomic.h> +#include <linux/perf_event.h> #ifdef CONFIG_PPC_EMULATED_STATS @@ -57,7 +58,7 @@ extern u32 ppc_warn_emulated; extern void ppc_warn_emulated_print(const char *type); -#define PPC_WARN_EMULATED(type) \ +#define __PPC_WARN_EMULATED(type) \ do { \ atomic_inc(&ppc_emulated.type.val); \ if (ppc_warn_emulated) \ @@ -66,8 +67,22 @@ extern void ppc_warn_emulated_print(const char *type); #else /* !CONFIG_PPC_EMULATED_STATS */ -#define PPC_WARN_EMULATED(type) do { } while (0) +#define __PPC_WARN_EMULATED(type) do { } while (0) #endif /* !CONFIG_PPC_EMULATED_STATS */ +#define PPC_WARN_EMULATED(type, regs) \ + do { \ + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \ + 1, 0, regs, 0); \ + __PPC_WARN_EMULATED(type); \ + } while (0) + +#define PPC_WARN_ALIGNMENT(type, regs) \ + do { \ + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \ + 1, 0, regs, regs->dar); \ + __PPC_WARN_EMULATED(type); \ + } while (0) + #endif /* _ASM_POWERPC_EMULATED_OPS_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 6251a4b10be..c27caac47ad 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -274,6 +274,8 @@ struct hcall_stats { unsigned long num_calls; /* number of calls (on this CPU) */ unsigned long tb_total; /* total wall time (mftb) of calls. */ unsigned long purr_total; /* total cpu time (PURR) of calls. */ + unsigned long tb_start; + unsigned long purr_start; }; #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 6315edc205d..bc8dd53f718 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -489,6 +489,8 @@ #define SPRN_MMCR1 798 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ +#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL +#define MMCRA_SDAR_ERAT_MISS 0x20000000UL #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h new file mode 100644 index 00000000000..cbe2297d68b --- /dev/null +++ b/arch/powerpc/include/asm/trace.h @@ -0,0 +1,133 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM powerpc + +#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWERPC_H + +#include <linux/tracepoint.h> + +struct pt_regs; + +TRACE_EVENT(irq_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(irq_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(timer_interrupt_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(timer_interrupt_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +#ifdef CONFIG_PPC_PSERIES +extern void hcall_tracepoint_regfunc(void); +extern void hcall_tracepoint_unregfunc(void); + +TRACE_EVENT_FN(hcall_entry, + + TP_PROTO(unsigned long opcode, unsigned long *args), + + TP_ARGS(opcode, args), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + ), + + TP_fast_assign( + __entry->opcode = opcode; + ), + + TP_printk("opcode=%lu", __entry->opcode), + + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc +); + +TRACE_EVENT_FN(hcall_exit, + + TP_PROTO(unsigned long opcode, unsigned long retval, + unsigned long *retbuf), + + TP_ARGS(opcode, retval, retbuf), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + __field(unsigned long, retval) + ), + + TP_fast_assign( + __entry->opcode = opcode; + __entry->retval = retval; + ), + + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), + + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc +); +#endif + +#endif /* _TRACE_POWERPC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH asm +#define TRACE_INCLUDE_FILE trace + +#include <trace/define_trace.h> diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index a5b632e52fa..3839839f83c 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_SPE if ((instr >> 26) == 0x4) { - PPC_WARN_EMULATED(spe); + PPC_WARN_ALIGNMENT(spe, regs); return emulate_spe(regs, reg, instr); } #endif @@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs) flags |= SPLT; nb = 8; } - PPC_WARN_EMULATED(vsx); + PPC_WARN_ALIGNMENT(vsx, regs); return emulate_vsx(addr, reg, areg, regs, flags, nb); } #endif @@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs) * the exception of DCBZ which is handled as a special case here */ if (instr == DCBZ) { - PPC_WARN_EMULATED(dcbz); + PPC_WARN_ALIGNMENT(dcbz, regs); return emulate_dcbz(regs, addr); } if (unlikely(nb == 0)) @@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs) * function */ if (flags & M) { - PPC_WARN_EMULATED(multiple); + PPC_WARN_ALIGNMENT(multiple, regs); return emulate_multiple(regs, addr, reg, nb, flags, instr, swiz); } @@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs) /* Special case for 16-byte FP loads and stores */ if (nb == 16) { - PPC_WARN_EMULATED(fp_pair); + PPC_WARN_ALIGNMENT(fp_pair, regs); return emulate_fp_pair(addr, reg, flags); } - PPC_WARN_EMULATED(unaligned); + PPC_WARN_ALIGNMENT(unaligned, regs); /* If we are loading, get the data from user space, else * get it from register values diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9763267e38b..bdcb557d470 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -551,7 +551,7 @@ restore: BEGIN_FW_FTR_SECTION ld r5,SOFTE(r1) FW_FTR_SECTION_ELSE - b iseries_check_pending_irqs + b .Liseries_check_pending_irqs ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); @@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) #endif /* CONFIG_PPC_BOOK3E */ -iseries_check_pending_irqs: +.Liseries_check_pending_irqs: #ifdef CONFIG_PPC_ISERIES ld r5,SOFTE(r1) cmpdi 0,r5,0 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1808876edcc..c7eb4e0eb86 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) * prolog code of the PerformanceMonitor one. A little * trickery is thus necessary */ +performance_monitor_pSeries_1: . = 0xf00 b performance_monitor_pSeries +altivec_unavailable_pSeries_1: . = 0xf20 b altivec_unavailable_pSeries +vsx_unavailable_pSeries_1: . = 0xf40 b vsx_unavailable_pSeries diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index e5d12117798..02a334662cc 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -70,6 +70,8 @@ #include <asm/firmware.h> #include <asm/lv1call.h> #endif +#define CREATE_TRACE_POINTS +#include <asm/trace.h> int __irq_offset_value; static int ppc_spurious_interrupts; @@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); unsigned int irq; + trace_irq_entry(regs); + irq_enter(); check_stack_overflow(); @@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs) timer_interrupt(regs); } #endif + + trace_irq_exit(regs); } void __init init_IRQ(void) diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 87f1663584b..1eb85fbf53a 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1165,7 +1165,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, */ if (record) { struct perf_sample_data data = { - .addr = 0, + .addr = ~0ULL, .period = event->hw.last_period, }; diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 0f4c1c73a6a..199de527d41 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -73,10 +73,6 @@ #define MMCR1_PMCSEL_MSK 0x7f /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index c351b3a57fb..98b6a729a9d 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -73,10 +73,6 @@ #define MMCR1_PMCSEL_MSK 0x7f /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; int i, isbus, bit, grsel; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index ca399ba5034..84a607bda8f 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; int i; unsigned int pmc, ev, b, u, s, psel; unsigned int ttmset = 0; diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 28a4daacdc0..852f7b7f6b4 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -51,10 +51,6 @@ #define MMCR1_PMCSEL_MSK 0xff /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; unsigned int pmc, unit, combine, l2sel, psel; unsigned int pmc_inuse = 0; int i; diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 479574413a9..8eff48e20db 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -84,10 +84,6 @@ static short mmcr1_adder_bits[8] = { }; /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4271f7a655a..845c72ab735 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -660,6 +660,7 @@ late_initcall(check_cache_coherency); #ifdef CONFIG_DEBUG_FS struct dentry *powerpc_debugfs_root; +EXPORT_SYMBOL(powerpc_debugfs_root); static int powerpc_debugfs_init(void) { diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 53bcf3d792d..b152de3e64d 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 04f638d82fb..df2c9e932b3 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a136a11c490..36707dec94d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -54,6 +54,7 @@ #include <linux/irq.h> #include <linux/delay.h> #include <linux/perf_event.h> +#include <asm/trace.h> #include <asm/io.h> #include <asm/processor.h> @@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs) struct clock_event_device *evt = &decrementer->event; u64 now; + trace_timer_interrupt_entry(regs); + /* Ensure a positive value is written to the decrementer, or else * some CPUs will continuue to take decrementer exceptions */ set_dec(DECREMENTER_MAX); @@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs) now = decrementer->next_tb - now; if (now <= DECREMENTER_MAX) set_dec((int)now); + trace_timer_interrupt_exit(regs); return; } old_regs = set_irq_regs(regs); @@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs) irq_exit(); set_irq_regs(old_regs); + + trace_timer_interrupt_exit(regs); } void wakeup_decrementer(void) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6f0ae1a9bfa..9d1f9354d6c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate the mfspr rD, PVR. */ if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) { - PPC_WARN_EMULATED(mfpvr); + PPC_WARN_EMULATED(mfpvr, regs); rd = (instword >> 21) & 0x1f; regs->gpr[rd] = mfspr(SPRN_PVR); return 0; @@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulating the dcba insn is just a no-op. */ if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) { - PPC_WARN_EMULATED(dcba); + PPC_WARN_EMULATED(dcba, regs); return 0; } @@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs) int shift = (instword >> 21) & 0x1c; unsigned long msk = 0xf0000000UL >> shift; - PPC_WARN_EMULATED(mcrxr); + PPC_WARN_EMULATED(mcrxr, regs); regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); regs->xer &= ~0xf0000000UL; return 0; @@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate load/store string insn. */ if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { - PPC_WARN_EMULATED(string); + PPC_WARN_EMULATED(string, regs); return emulate_string_inst(regs, instword); } /* Emulate the popcntb (Population Count Bytes) instruction. */ if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) { - PPC_WARN_EMULATED(popcntb); + PPC_WARN_EMULATED(popcntb, regs); return emulate_popcntb_inst(regs, instword); } /* Emulate isel (Integer Select) instruction */ if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) { - PPC_WARN_EMULATED(isel); + PPC_WARN_EMULATED(isel, regs); return emulate_isel(regs, instword); } @@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs) #ifdef CONFIG_MATH_EMULATION errcode = do_mathemu(regs); if (errcode >= 0) - PPC_WARN_EMULATED(math); + PPC_WARN_EMULATED(math, regs); switch (errcode) { case 0: @@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs) #elif defined(CONFIG_8XX_MINIMAL_FPEMU) errcode = Soft_emulate_8xx(regs); if (errcode >= 0) - PPC_WARN_EMULATED(8xx); + PPC_WARN_EMULATED(8xx, regs); switch (errcode) { case 0: @@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs) flush_altivec_to_thread(current); - PPC_WARN_EMULATED(altivec); + PPC_WARN_EMULATED(altivec, regs); err = emulate_altivec(regs); if (err == 0) { regs->nip += 4; /* skip emulated instruction */ diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 75f3267fdc3..e68beac0a17 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -26,11 +26,11 @@ BEGIN_FTR_SECTION srd r8,r5,r11 mtctr r8 -setup: +.Lsetup: dcbt r9,r4 dcbz r9,r3 add r9,r9,r12 - bdnz setup + bdnz .Lsetup END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ) addi r3,r3,-8 srdi r8,r5,7 /* page is copied in 128 byte strides */ diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c1427b3634e..383a5d0e981 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -14,68 +14,94 @@ #define STK_PARM(i) (48 + ((i)-3)*8) -#ifdef CONFIG_HCALL_STATS +#ifdef CONFIG_TRACEPOINTS + + .section ".toc","aw" + + .globl hcall_tracepoint_refcount +hcall_tracepoint_refcount: + .llong 0 + + .section ".text" + /* * precall must preserve all registers. use unused STK_PARM() - * areas to save snapshots and opcode. + * areas to save snapshots and opcode. We branch around this + * in early init (eg when populating the MMU hashtable) by using an + * unconditional cpu feature. */ -#define HCALL_INST_PRECALL \ - std r3,STK_PARM(r3)(r1); /* save opcode */ \ - mftb r0; /* get timebase and */ \ - std r0,STK_PARM(r5)(r1); /* save for later */ \ +#define HCALL_INST_PRECALL(FIRST_REG) \ BEGIN_FTR_SECTION; \ - mfspr r0,SPRN_PURR; /* get PURR and */ \ - std r0,STK_PARM(r6)(r1); /* save for later */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); - + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + cmpdi r12,0; \ + beq+ 1f; \ + mflr r0; \ + std r3,STK_PARM(r3)(r1); \ + std r4,STK_PARM(r4)(r1); \ + std r5,STK_PARM(r5)(r1); \ + std r6,STK_PARM(r6)(r1); \ + std r7,STK_PARM(r7)(r1); \ + std r8,STK_PARM(r8)(r1); \ + std r9,STK_PARM(r9)(r1); \ + std r10,STK_PARM(r10)(r1); \ + std r0,16(r1); \ + addi r4,r1,STK_PARM(FIRST_REG); \ + stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + bl .__trace_hcall_entry; \ + addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,16(r1); \ + ld r3,STK_PARM(r3)(r1); \ + ld r4,STK_PARM(r4)(r1); \ + ld r5,STK_PARM(r5)(r1); \ + ld r6,STK_PARM(r6)(r1); \ + ld r7,STK_PARM(r7)(r1); \ + ld r8,STK_PARM(r8)(r1); \ + ld r9,STK_PARM(r9)(r1); \ + ld r10,STK_PARM(r10)(r1); \ + mtlr r0; \ +1: + /* * postcall is performed immediately before function return which * allows liberal use of volatile registers. We branch around this * in early init (eg when populating the MMU hashtable) by using an * unconditional cpu feature. */ -#define HCALL_INST_POSTCALL \ +#define __HCALL_INST_POSTCALL \ BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ - cmpldi cr7,r4,MAX_HCALL_OPCODE; \ - bgt- cr7,1f; \ - \ - /* get time and PURR snapshots after hcall */ \ - mftb r7; /* timebase after */ \ -BEGIN_FTR_SECTION; \ - mfspr r8,SPRN_PURR; /* PURR after */ \ - ld r6,STK_PARM(r6)(r1); /* PURR before */ \ - subf r6,r6,r8; /* delta */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ - ld r5,STK_PARM(r5)(r1); /* timebase before */ \ - subf r5,r5,r7; /* time delta */ \ - \ - /* calculate address of stat structure r4 = opcode */ \ - srdi r4,r4,2; /* index into array */ \ - mulli r4,r4,HCALL_STAT_SIZE; \ - LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ - add r4,r4,r7; \ - ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ - add r4,r4,r7; \ - \ - /* update stats */ \ - ld r7,HCALL_STAT_CALLS(r4); /* count */ \ - addi r7,r7,1; \ - std r7,HCALL_STAT_CALLS(r4); \ - ld r7,HCALL_STAT_TB(r4); /* timebase */ \ - add r7,r7,r5; \ - std r7,HCALL_STAT_TB(r4); \ -BEGIN_FTR_SECTION; \ - ld r7,HCALL_STAT_PURR(r4); /* PURR */ \ - add r7,r7,r6; \ - std r7,HCALL_STAT_PURR(r4); \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + cmpdi r12,0; \ + beq+ 1f; \ + mflr r0; \ + ld r6,STK_PARM(r3)(r1); \ + std r3,STK_PARM(r3)(r1); \ + mr r4,r3; \ + mr r3,r6; \ + std r0,16(r1); \ + stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + bl .__trace_hcall_exit; \ + addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,16(r1); \ + ld r3,STK_PARM(r3)(r1); \ + mtlr r0; \ 1: + +#define HCALL_INST_POSTCALL_NORETS \ + li r5,0; \ + __HCALL_INST_POSTCALL + +#define HCALL_INST_POSTCALL(BUFREG) \ + mr r5,BUFREG; \ + __HCALL_INST_POSTCALL + #else -#define HCALL_INST_PRECALL -#define HCALL_INST_POSTCALL +#define HCALL_INST_PRECALL(FIRST_ARG) +#define HCALL_INST_POSTCALL_NORETS +#define HCALL_INST_POSTCALL(BUFREG) #endif .text @@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r4) HVSC /* invoke the hypervisor */ - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL_NORETS lwz r0,8(r1) mtcrf 0xff,r0 @@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r5) std r4,STK_PARM(r4)(r1) /* Save ret buffer */ @@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall) std r6, 16(r12) std r7, 24(r12) - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 @@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r5) std r4,STK_PARM(r4)(r1) /* Save ret buffer */ @@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9) std r11,56(r12) std r0, 64(r12) - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 3631a4f277e..2f58c71b725 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -26,6 +26,7 @@ #include <asm/hvcall.h> #include <asm/firmware.h> #include <asm/cputable.h> +#include <asm/trace.h> DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); @@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = { #define HCALL_ROOT_DIR "hcall_inst" #define CPU_NAME_BUF_SIZE 32 + +static void probe_hcall_entry(unsigned long opcode, unsigned long *args) +{ + struct hcall_stats *h; + + if (opcode > MAX_HCALL_OPCODE) + return; + + h = &get_cpu_var(hcall_stats)[opcode / 4]; + h->tb_start = mftb(); + h->purr_start = mfspr(SPRN_PURR); +} + +static void probe_hcall_exit(unsigned long opcode, unsigned long retval, + unsigned long *retbuf) +{ + struct hcall_stats *h; + + if (opcode > MAX_HCALL_OPCODE) + return; + + h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h->num_calls++; + h->tb_total = mftb() - h->tb_start; + h->purr_total = mfspr(SPRN_PURR) - h->purr_start; + + put_cpu_var(hcall_stats); +} + static int __init hcall_inst_init(void) { struct dentry *hcall_root; @@ -110,6 +140,14 @@ static int __init hcall_inst_init(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; + if (register_trace_hcall_entry(probe_hcall_entry)) + return -EINVAL; + + if (register_trace_hcall_exit(probe_hcall_exit)) { + unregister_trace_hcall_entry(probe_hcall_entry); + return -EINVAL; + } + hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); if (!hcall_root) return -ENOMEM; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 903eb9eec68..0707653612b 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -39,6 +39,7 @@ #include <asm/cputable.h> #include <asm/udbg.h> #include <asm/smp.h> +#include <asm/trace.h> #include "plpar_wrappers.h" #include "pseries.h" @@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order) EXPORT_SYMBOL(arch_free_page); #endif + +#ifdef CONFIG_TRACEPOINTS +/* + * We optimise our hcall path by placing hcall_tracepoint_refcount + * directly in the TOC so we can check if the hcall tracepoints are + * enabled via a single load. + */ + +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ +extern long hcall_tracepoint_refcount; + +void hcall_tracepoint_regfunc(void) +{ + hcall_tracepoint_refcount++; +} + +void hcall_tracepoint_unregfunc(void) +{ + hcall_tracepoint_refcount--; +} + +void __trace_hcall_entry(unsigned long opcode, unsigned long *args) +{ + trace_hcall_entry(opcode, args); +} + +void __trace_hcall_exit(long opcode, unsigned long retval, + unsigned long *retbuf) +{ + trace_hcall_exit(opcode, retval, retbuf); +} +#endif diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 43c0acad716..16c673096a2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -95,6 +95,34 @@ config S390 select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE select HAVE_PERF_EVENTS + select ARCH_INLINE_SPIN_TRYLOCK + select ARCH_INLINE_SPIN_TRYLOCK_BH + select ARCH_INLINE_SPIN_LOCK + select ARCH_INLINE_SPIN_LOCK_BH + select ARCH_INLINE_SPIN_LOCK_IRQ + select ARCH_INLINE_SPIN_LOCK_IRQSAVE + select ARCH_INLINE_SPIN_UNLOCK + select ARCH_INLINE_SPIN_UNLOCK_BH + select ARCH_INLINE_SPIN_UNLOCK_IRQ + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + select ARCH_INLINE_READ_TRYLOCK + select ARCH_INLINE_READ_LOCK + select ARCH_INLINE_READ_LOCK_BH + select ARCH_INLINE_READ_LOCK_IRQ + select ARCH_INLINE_READ_LOCK_IRQSAVE + select ARCH_INLINE_READ_UNLOCK + select ARCH_INLINE_READ_UNLOCK_BH + select ARCH_INLINE_READ_UNLOCK_IRQ + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE + select ARCH_INLINE_WRITE_TRYLOCK + select ARCH_INLINE_WRITE_LOCK + select ARCH_INLINE_WRITE_LOCK_BH + select ARCH_INLINE_WRITE_LOCK_IRQ + select ARCH_INLINE_WRITE_LOCK_IRQSAVE + select ARCH_INLINE_WRITE_UNLOCK + select ARCH_INLINE_WRITE_UNLOCK_BH + select ARCH_INLINE_WRITE_UNLOCK_IRQ + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE config SCHED_OMIT_FRAME_POINTER bool diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index 7efd0abe888..efb74fd5156 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -49,7 +49,7 @@ #define BUG() do { \ __EMIT_BUG(0); \ - for (;;); \ + unreachable(); \ } while (0) #define WARN_ON(x) ({ \ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 41ce6861174..c9af0d19c7a 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -191,33 +191,4 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) #define _raw_read_relax(lock) cpu_relax() #define _raw_write_relax(lock) cpu_relax() -#define __always_inline__spin_lock -#define __always_inline__read_lock -#define __always_inline__write_lock -#define __always_inline__spin_lock_bh -#define __always_inline__read_lock_bh -#define __always_inline__write_lock_bh -#define __always_inline__spin_lock_irq -#define __always_inline__read_lock_irq -#define __always_inline__write_lock_irq -#define __always_inline__spin_lock_irqsave -#define __always_inline__read_lock_irqsave -#define __always_inline__write_lock_irqsave -#define __always_inline__spin_trylock -#define __always_inline__read_trylock -#define __always_inline__write_trylock -#define __always_inline__spin_trylock_bh -#define __always_inline__spin_unlock -#define __always_inline__read_unlock -#define __always_inline__write_unlock -#define __always_inline__spin_unlock_bh -#define __always_inline__read_unlock_bh -#define __always_inline__write_unlock_bh -#define __always_inline__spin_unlock_irq -#define __always_inline__read_unlock_irq -#define __always_inline__write_unlock_irq -#define __always_inline__spin_unlock_irqrestore -#define __always_inline__read_unlock_irqrestore -#define __always_inline__write_unlock_irqrestore - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index f5fe34dd821..5a82bc68193 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -203,73 +203,10 @@ out: #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned int sys_call_table[]; -static struct syscall_metadata **syscalls_metadata; - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - for (i = 0; i < NR_syscalls; i++) - if (syscalls_metadata[i]) - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) +unsigned long __init arch_syscall_addr(int nr) { - syscalls_metadata[num]->exit_id = id; -} - -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup(syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name + 3, str + 3)) - return start; - } - return NULL; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - struct syscall_metadata *meta; - int i; - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls, - GFP_KERNEL); - if (!syscalls_metadata) - return -ENOMEM; - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta((unsigned long)sys_call_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)sys_call_table[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 72ace9515a0..178084b4377 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -49,6 +49,7 @@ config X86 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA + select HAVE_HW_BREAKPOINT select HAVE_ARCH_KMEMCHECK config OUTPUT_FORMAT diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2649840d888..5e99762eb5c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -406,7 +406,7 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29bb6b..731318e5ac1 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -186,6 +186,15 @@ config X86_DS_SELFTEST config HAVE_MMIOTRACE_SUPPORT def_bool y +config X86_DECODER_SELFTEST + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL + ---help--- + Perform x86 instruction decoder selftests at build time. + This option is useful for checking the sanity of x86 instruction + decoder code. + If unsure, say "N". + # # IO delay types: # @@ -287,4 +296,18 @@ config OPTIMIZE_INLINING If unsure, say N. +config DEBUG_STRICT_USER_COPY_CHECKS + bool "Strict copy size checks" + depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING + ---help--- + Enabling this option turns a certain set of sanity checks for user + copy operations into compile time failures. + + The copy_from_user() etc checks are there to help test if there + are sufficient security checks on the length argument of + the copy operation, by having gcc prove that the argument is + within bounds. + + If unsure, or if you run an older (pre 4.4) gcc, say N. + endmenu diff --git a/arch/x86/Makefile b/arch/x86/Makefile index d2d24c9ee64..78b32be55e9 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -155,6 +155,9 @@ all: bzImage KBUILD_IMAGE := $(boot)/bzImage bzImage: vmlinux +ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) + $(Q)$(MAKE) $(build)=arch/x86/tools posttest +endif $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 30e9a264f69..cbf0776dbec 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -41,7 +41,7 @@ cflags-$(CONFIG_X86_ELAN) += -march=i486 # Geode GX1 support cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx - +cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) # add at the end to overwrite eventual tuning options from earlier # cpu entries cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa..9f828f87ca3 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -10,6 +10,7 @@ header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h header-y += processor-flags.h +header-y += hw_breakpoint.h unifdef-y += e820.h unifdef-y += ist.h diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e397aa8..7a15588e45d 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -17,6 +17,7 @@ #include <linux/user.h> #include <linux/elfcore.h> +#include <asm/debugreg.h> /* * fill in the user structure for an a.out core dump @@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg0; - dump->u_debugreg[1] = current->thread.debugreg1; - dump->u_debugreg[2] = current->thread.debugreg2; - dump->u_debugreg[3] = current->thread.debugreg3; - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - dump->u_debugreg[7] = current->thread.debugreg7; + aout_dump_debugregs(dump); if (dump->start_stack < TASK_SIZE) dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e2077d343c3..b97f786a48d 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -1,17 +1,13 @@ #ifdef __ASSEMBLY__ -#ifdef CONFIG_X86_32 -# define X86_ALIGN .long -#else -# define X86_ALIGN .quad -#endif +#include <asm/asm.h> #ifdef CONFIG_SMP .macro LOCK_PREFIX 1: lock .section .smp_locks,"a" - .align 4 - X86_ALIGN 1b + _ASM_ALIGN + _ASM_PTR 1b .previous .endm #else diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index c240efc74e0..69b74a7b877 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -84,6 +84,7 @@ static inline void alternatives_smp_switch(int smp) {} " .byte " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ + " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ ".previous\n" \ ".section .altinstr_replacement, \"ax\"\n" \ "663:\n\t" newinstr "\n664:\n" /* replacement */ \ diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 4b180897e6b..5af2982133b 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -23,19 +23,13 @@ #include <linux/irqreturn.h> #ifdef CONFIG_AMD_IOMMU -extern int amd_iommu_init(void); -extern int amd_iommu_init_dma_ops(void); -extern int amd_iommu_init_passthrough(void); + extern void amd_iommu_detect(void); -extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_flush_all_domains(void); -extern void amd_iommu_flush_all_devices(void); -extern void amd_iommu_shutdown(void); -extern void amd_iommu_apply_erratum_63(u16 devid); + #else -static inline int amd_iommu_init(void) { return -ENODEV; } + static inline void amd_iommu_detect(void) { } -static inline void amd_iommu_shutdown(void) { } + #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h new file mode 100644 index 00000000000..84786fb9a23 --- /dev/null +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2009 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <joerg.roedel@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_PROTO_H +#define _ASM_X86_AMD_IOMMU_PROTO_H + +struct amd_iommu; + +extern int amd_iommu_init_dma_ops(void); +extern int amd_iommu_init_passthrough(void); +extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_flush_all_domains(void); +extern void amd_iommu_flush_all_devices(void); +extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); + +#ifndef CONFIG_AMD_IOMMU_STATS + +static inline void amd_iommu_stats_init(void) { } + +#endif /* !CONFIG_AMD_IOMMU_STATS */ + +#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 2a2cc7a78a8..ba19ad4c47d 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -25,6 +25,11 @@ #include <linux/spinlock.h> /* + * Maximum number of IOMMUs supported + */ +#define MAX_IOMMUS 32 + +/* * some size calculation constants */ #define DEV_TABLE_ENTRY_SIZE 32 @@ -206,6 +211,9 @@ extern bool amd_iommu_dump; printk(KERN_INFO "AMD-Vi: " format, ## arg); \ } while(0); +/* global flag if IOMMUs cache non-present entries */ +extern bool amd_iommu_np_cache; + /* * Make iterating over all IOMMUs easier */ @@ -226,6 +234,8 @@ extern bool amd_iommu_dump; * independent of their use. */ struct protection_domain { + struct list_head list; /* for list of all protection domains */ + struct list_head dev_list; /* List of all devices in this domain */ spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ int mode; /* paging mode (0-6 levels) */ @@ -233,7 +243,20 @@ struct protection_domain { unsigned long flags; /* flags to find out type of domain */ bool updated; /* complete domain flush required */ unsigned dev_cnt; /* devices assigned to this domain */ + unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ void *priv; /* private data */ + +}; + +/* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ + struct device *dev; /* Device this data belong to */ + struct device *alias; /* The Alias Device */ + struct protection_domain *domain; /* Domain the device is bound to */ + atomic_t bind; /* Domain attach reverent count */ }; /* @@ -291,6 +314,9 @@ struct dma_ops_domain { struct amd_iommu { struct list_head list; + /* Index within the IOMMU array */ + int index; + /* locks the accesses to the hardware */ spinlock_t lock; @@ -357,6 +383,21 @@ struct amd_iommu { extern struct list_head amd_iommu_list; /* + * Array with pointers to each IOMMU struct + * The indices are referenced in the protection domains + */ +extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; + +/* Number of IOMMUs present in the system */ +extern int amd_iommus_present; + +/* + * Declarations for the global list of all protection domains + */ +extern spinlock_t amd_iommu_pd_lock; +extern struct list_head amd_iommu_pd_list; + +/* * Structure defining one entry in the device table */ struct dev_table_entry { @@ -416,15 +457,9 @@ extern unsigned amd_iommu_aperture_order; /* largest PCI device id we expect translation requests for */ extern u16 amd_iommu_last_bdf; -/* data structures for protection domain handling */ -extern struct protection_domain **amd_iommu_pd_table; - /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; -/* will be 1 if device isolation is enabled */ -extern bool amd_iommu_isolate; - /* * If true, the addresses will be flushed on unmap time, not when * they are reused @@ -462,11 +497,6 @@ struct __iommu_counter { #define ADD_STATS_COUNTER(name, x) #define SUB_STATS_COUNTER(name, x) -static inline void amd_iommu_stats_init(void) { } - #endif /* CONFIG_AMD_IOMMU_STATS */ -/* some function prototypes */ -extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); - #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 474d80d3e6c..b4ac2cdcb64 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -297,20 +297,20 @@ struct apic { int disable_esr; int dest_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); unsigned long (*check_apicid_present)(int apicid); void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); void (*setup_apic_routing)(void); int (*multi_timer_check)(int apic, int irq); int (*apicid_to_node)(int logical_apicid); int (*cpu_to_logical_apicid)(int cpu); int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); void (*setup_portio_remap)(void); int (*check_phys_apicid_present)(int phys_apicid); void (*enable_apic_mode)(void); @@ -488,6 +488,8 @@ static inline unsigned int read_apic_id(void) extern void default_setup_apic_routing(void); +extern struct apic apic_noop; + #ifdef CONFIG_X86_32 extern struct apic apic_default; @@ -532,9 +534,9 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } -static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static inline unsigned long default_check_apicid_present(int bit) @@ -542,9 +544,9 @@ static inline unsigned long default_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) +static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { - return phys_map; + *retmap = *phys_map; } /* Mapping from cpu number to logical apicid */ @@ -583,11 +585,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); #endif -static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 3b62da926de..7fe3b3060f0 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -11,6 +11,12 @@ #define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 #define APIC_DEFAULT_PHYS_BASE 0xfee00000 +/* + * This is the IO-APIC register space as specified + * by Intel docs: + */ +#define IO_APIC_SLOT_SIZE 1024 + #define APIC_ID 0x20 #define APIC_LVR 0x30 diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h deleted file mode 100644 index 82f613c607c..00000000000 --- a/arch/x86/include/asm/apicnum.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_APICNUM_H -#define _ASM_X86_APICNUM_H - -/* define MAX_IO_APICS */ -#ifdef CONFIG_X86_32 -# define MAX_IO_APICS 64 -#else -# define MAX_IO_APICS 128 -# define MAX_LOCAL_APIC 32768 -#endif - -#endif /* _ASM_X86_APICNUM_H */ diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index d9cf1cd156d..f654d1bb17f 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -22,14 +22,14 @@ do { \ ".popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (sizeof(struct bug_entry))); \ - for (;;) ; \ + unreachable(); \ } while (0) #else #define BUG() \ do { \ asm volatile("ud2"); \ - for (;;) ; \ + unreachable(); \ } while (0) #endif diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index b03bedb62aa..0918654305a 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h @@ -62,10 +62,8 @@ struct cal_chipset_ops { extern int use_calgary; #ifdef CONFIG_CALGARY_IOMMU -extern int calgary_iommu_init(void); extern void detect_calgary(void); #else -static inline int calgary_iommu_init(void) { return 1; } static inline void detect_calgary(void) { return; } #endif diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ee1931be659..ffb9bb6b6c3 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -8,14 +8,50 @@ * you need to test for the feature in boot_cpu_data. */ -#define xchg(ptr, v) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr)))) +extern void __xchg_wrong_size(void); + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ struct __xchg_dummy { unsigned long a[100]; }; #define __xg(x) ((struct __xchg_dummy *)(x)) +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case 1: \ + asm volatile("xchgb %b0,%1" \ + : "=q" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("xchgw %w0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("xchgl %0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + /* * The semantics of XCHGCMP8B are a bit strange, this is why * there is a loop and the loading of %%eax and %%edx has to @@ -71,57 +107,63 @@ static inline void __set_64bit_var(unsigned long long *ptr, (unsigned int)((value) >> 32)) \ : __set_64bit(ptr, ll_low((value)), ll_high((value)))) -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK - */ -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - } - return x; -} +extern void __cmpxchg_wrong_size(void); /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case 1: \ + asm volatile(lock "cmpxchgb %b1,%2" \ + : "=a"(__ret) \ + : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile(lock "cmpxchgw %w1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile(lock "cmpxchgl %1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") + +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") #ifdef CONFIG_X86_CMPXCHG #define __HAVE_ARCH_CMPXCHG 1 -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define sync_cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) + +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) #endif #ifdef CONFIG_X86_CMPXCHG64 @@ -133,94 +175,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, (unsigned long long)(n))) #endif -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -/* - * Always use locked operations when touching memory shared with a - * hypervisor, since the system may be SMP even if the guest kernel - * isn't. - */ -static inline unsigned long __sync_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old, unsigned long long new) diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 52de72e0de8..485ae415fae 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -3,9 +3,6 @@ #include <asm/alternative.h> /* Provides LOCK_PREFIX */ -#define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), \ - (ptr), sizeof(*(ptr)))) - #define __xg(x) ((volatile long *)(x)) static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) @@ -15,167 +12,118 @@ static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) #define _set_64bit set_64bit +extern void __xchg_wrong_size(void); +extern void __cmpxchg_wrong_size(void); + /* * Note: no "lock" prefix even on SMP: xchg always implies lock anyway * Note 2: xchg has side effect, so that attribute volatile is necessary, * but generally the primitive is invalid, *ptr is output argument. --ANK */ -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %k0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 8: - asm volatile("xchgq %0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - } - return x; -} +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case 1: \ + asm volatile("xchgb %b0,%1" \ + : "=q" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("xchgw %w0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("xchgl %k0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile("xchgq %0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + +#define __HAVE_ARCH_CMPXCHG 1 /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case 1: \ + asm volatile(lock "cmpxchgb %b1,%2" \ + : "=a"(__ret) \ + : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile(lock "cmpxchgw %w1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile(lock "cmpxchgl %k1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile(lock "cmpxchgq %1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) -#define __HAVE_ARCH_CMPXCHG 1 +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - asm volatile(LOCK_PREFIX "cmpxchgq %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") -/* - * Always use locked operations when touching memory shared with a - * hypervisor, since the system may be SMP even if the guest kernel - * isn't. - */ -static inline unsigned long __sync_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("cmpxchgl %k1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - asm volatile("cmpxchgq %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr)))) #define cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define sync_cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) + #define cmpxchg64_local(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37be9e..8240f76b531 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -18,6 +18,7 @@ #define DR_TRAP1 (0x2) /* db1 */ #define DR_TRAP2 (0x4) /* db2 */ #define DR_TRAP3 (0x8) /* db3 */ +#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ @@ -49,6 +50,8 @@ #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ +#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ @@ -67,4 +70,34 @@ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ +/* + * HW breakpoint additions + */ +#ifdef __KERNEL__ + +DECLARE_PER_CPU(unsigned long, cpu_dr7); + +static inline void hw_breakpoint_disable(void) +{ + /* Zero the control register for HW Breakpoint */ + set_debugreg(0UL, 7); + + /* Zero-out the individual HW breakpoint address registers */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); +} + +static inline int hw_breakpoint_active(void) +{ + return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; +} + +extern void aout_dump_debugregs(struct user *dump); + +extern void hw_breakpoint_restore(void); + +#endif /* __KERNEL__ */ + #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index cee34e9ca45..029f230ab63 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -8,7 +8,7 @@ struct dev_archdata { #ifdef CONFIG_X86_64 struct dma_map_ops *dma_ops; #endif -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif }; diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6a25d5d4283..0f6c02f3b7d 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -20,7 +20,8 @@ # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) #endif -extern dma_addr_t bad_dma_address; +#define DMA_ERROR_CODE 0 + extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; @@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); - return (dma_addr == bad_dma_address); + return (dma_addr == DMA_ERROR_CODE); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 6cfdafa409d..4ac5b0f33fc 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -35,8 +35,7 @@ extern int gart_iommu_aperture_allowed; extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); -extern void gart_iommu_init(void); -extern void gart_iommu_shutdown(void); +extern int gart_iommu_init(void); extern void __init gart_parse_options(char *); extern void gart_iommu_hole_init(void); @@ -48,12 +47,6 @@ extern void gart_iommu_hole_init(void); static inline void early_gart_iommu_check(void) { } -static inline void gart_iommu_init(void) -{ -} -static inline void gart_iommu_shutdown(void) -{ -} static inline void gart_parse_options(char *options) { } diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 82e3e8f0104..108eb6fd1ae 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -20,11 +20,11 @@ typedef struct { unsigned int irq_call_count; unsigned int irq_tlb_count; #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD unsigned int irq_threshold_count; -# endif #endif } ____cacheline_aligned irq_cpustat_t; diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 00000000000..0675a7c4c20 --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -0,0 +1,73 @@ +#ifndef _I386_HW_BREAKPOINT_H +#define _I386_HW_BREAKPOINT_H + +#ifdef __KERNEL__ +#define __ARCH_HW_BREAKPOINT_H + +/* + * The name should probably be something dealt in + * a higher level. While dealing with the user + * (display/resolving) + */ +struct arch_hw_breakpoint { + char *name; /* Contains name of the symbol to set bkpt */ + unsigned long address; + u8 len; + u8 type; +}; + +#include <linux/kdebug.h> +#include <linux/percpu.h> +#include <linux/list.h> + +/* Available HW breakpoint length encodings */ +#define X86_BREAKPOINT_LEN_1 0x40 +#define X86_BREAKPOINT_LEN_2 0x44 +#define X86_BREAKPOINT_LEN_4 0x4c +#define X86_BREAKPOINT_LEN_EXECUTE 0x40 + +#ifdef CONFIG_X86_64 +#define X86_BREAKPOINT_LEN_8 0x48 +#endif + +/* Available HW breakpoint type encodings */ + +/* trigger on instruction execute */ +#define X86_BREAKPOINT_EXECUTE 0x80 +/* trigger on memory write */ +#define X86_BREAKPOINT_WRITE 0x81 +/* trigger on memory read or write */ +#define X86_BREAKPOINT_RW 0x83 + +/* Total number of available HW breakpoint registers */ +#define HBP_NUM 4 + +struct perf_event; +struct pmu; + +extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); + + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); + +extern void +arch_fill_perf_breakpoint(struct perf_event *bp); + +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); + +extern int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type); + +extern struct pmu perf_ops_bp; + +#endif /* __KERNEL__ */ +#endif /* _I386_HW_BREAKPOINT_H */ + diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ba180d93b08..6e124269fd4 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -79,14 +79,32 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, int ioapic, int ioapic_pin, int trigger, int polarity) { - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; + irq_attr->ioapic = ioapic; + irq_attr->ioapic_pin = ioapic_pin; + irq_attr->trigger = trigger; + irq_attr->polarity = polarity; } -extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, - struct io_apic_irq_attr *irq_attr); +/* + * This is performance-critical, we want to do it O(1) + * + * Most irqs are mapped 1:1 with pins. + */ +struct irq_cfg { + struct irq_pin_list *irq_2_pin; + cpumask_var_t domain; + cpumask_var_t old_domain; + u8 vector; + u8 move_in_progress : 1; +}; + +extern struct irq_cfg *irq_cfg(unsigned int); +extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); +extern void send_cleanup_vector(struct irq_cfg *); + +struct irq_desc; +extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); extern void enable_IO_APIC(void); diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 00000000000..205b063e3e3 --- /dev/null +++ b/arch/x86/include/asm/inat.h @@ -0,0 +1,220 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/inat_types.h> + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} +#endif diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 00000000000..cb3c20ce39c --- /dev/null +++ b/arch/x86/include/asm/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 00000000000..96c2e0ad04c --- /dev/null +++ b/arch/x86/include/asm/insn.h @@ -0,0 +1,184 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include <asm/inat.h> + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *next_byte; +}; + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +/* The last prefix is needed for two-byte and three-byte opcodes */ +static inline insn_byte_t insn_last_prefix(struct insn *insn) +{ + return insn->prefixes.bytes[3]; +} + +extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, const void *kaddr) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, 0); +#endif +} + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index fd6d21bbee6..345c99cef15 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern void pci_iommu_shutdown(void); -extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ddda6cbed6f..ffd700ff5dc 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -34,6 +34,7 @@ static inline int irq_canonicalize(int irq) #ifdef CONFIG_HOTPLUG_CPU #include <linux/cpumask.h> extern void fixup_irqs(void); +extern void irq_force_complete_move(int); #endif extern void (*generic_interrupt_extension)(void); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f1363b72364..858baa061cf 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -108,6 +108,8 @@ struct mce_log { #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) +extern struct atomic_notifier_head x86_mce_decoder_chain; + #ifdef __KERNEL__ #include <linux/percpu.h> @@ -118,9 +120,11 @@ extern int mce_disabled; extern int mce_p5_enabled; #ifdef CONFIG_X86_MCE -void mcheck_init(struct cpuinfo_x86 *c); +int mcheck_init(void); +void mcheck_cpu_init(struct cpuinfo_x86 *c); #else -static inline void mcheck_init(struct cpuinfo_x86 *c) {} +static inline int mcheck_init(void) { return 0; } +static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} #endif #ifdef CONFIG_X86_ANCIENT_MCE @@ -214,5 +218,11 @@ void intel_init_thermal(struct cpuinfo_x86 *c); void mce_log_therm_throt_event(__u64 status); +#ifdef CONFIG_X86_THERMAL_VECTOR +extern void mcheck_intel_therm_init(void); +#else +static inline void mcheck_intel_therm_init(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 79c94500c0b..61d90b1331c 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -163,14 +163,16 @@ typedef struct physid_mask physid_mask_t; #define physids_shift_left(d, s, n) \ bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) -#define physids_coerce(map) ((map).mask[0]) +static inline unsigned long physids_coerce(physid_mask_t *map) +{ + return map->mask[0]; +} -#define physids_promote(physids) \ - ({ \ - physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ - __physid_mask.mask[0] = physids; \ - __physid_mask; \ - }) +static inline void physids_promote(unsigned long physids, physid_mask_t *map) +{ + physids_clear(*map); + map->mask[0] = physids; +} /* Note: will create very large stack frames if physid_mask_t is big */ #define physid_mask_of_physid(physid) \ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7e2b6ba962f..5bef931f8b1 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -247,8 +247,8 @@ do { \ #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); @@ -264,12 +264,12 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) wrmsr(msr_no, l, h); return 0; } -static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no, +static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); } -static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no, +static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index ad7ce3fd506..8d9f8548a87 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -28,9 +28,20 @@ */ #define ARCH_PERFMON_EVENT_MASK 0xffff +/* + * filter mask to validate fixed counter events. + * the following filters disqualify for fixed counters: + * - inv + * - edge + * - cnt-mask + * The other filters are supported by fixed counters. + * The any-thread option is supported starting with v3. + */ +#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 + #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c9786480f0f..6f8ec1c37e0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -30,6 +30,7 @@ struct mm_struct; #include <linux/math64.h> #include <linux/init.h> +#define HBP_NUM 4 /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -422,6 +423,8 @@ extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; +struct perf_event; + struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -443,13 +446,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; - /* Hardware debugging registers: */ - unsigned long debugreg0; - unsigned long debugreg1; - unsigned long debugreg2; - unsigned long debugreg3; - unsigned long debugreg6; - unsigned long debugreg7; + /* Save middle states of ptrace breakpoints */ + struct perf_event *ptrace_bps[HBP_NUM]; + /* Debug status used for traps, single steps, etc... */ + unsigned long debugreg6; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0f0d908349a..3d11fd0f44c 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -7,6 +7,7 @@ #ifdef __KERNEL__ #include <asm/segment.h> +#include <asm/page_types.h> #endif #ifndef __ASSEMBLY__ @@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) return regs->sp; } +/* Query offset/name of register from its name/offset */ +extern int regs_query_register_offset(const char *name); +extern const char *regs_query_register_name(unsigned int offset); +#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten. + * @offset: offset number of the register. + * + * regs_get_register returns the value of a register. The @offset is the + * offset of the register in struct pt_regs address which specified by @regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; + return *(unsigned long *)((unsigned long)regs + offset); +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static inline int regs_within_kernel_stack(struct pt_regs *regs, + unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + +/* Get Nth argument at function call */ +extern unsigned long regs_get_argument_nth(struct pt_regs *regs, + unsigned int n); + /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index ae907e61718..3d3e8353ee5 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -177,10 +177,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) */ #ifndef CONFIG_KMEMCHECK + +#if (__GNUC__ >= 4) +#define memcpy(t, f, n) __builtin_memcpy(t, f, n) +#else #define memcpy(t, f, n) \ (__builtin_constant_p((n)) \ ? __constant_memcpy((t), (f), (n)) \ : __memcpy((t), (f), (n))) +#endif #else /* * kmemcheck becomes very happy if we use the REP instructions unconditionally, @@ -316,11 +321,15 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET +#if (__GNUC__ >= 4) +#define memset(s, c, count) __builtin_memset(s, c, count) +#else #define memset(s, c, count) \ (__builtin_constant_p(c) \ ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ (count)) \ : __memset((s), (c), (count))) +#endif /* * find the first occurrence of byte 'c', or 1 past the area if none diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index b9e4e20174f..87ffcb12a1b 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -3,17 +3,14 @@ #include <linux/swiotlb.h> -/* SWIOTLB interface */ - -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; -extern void pci_swiotlb_init(void); +extern int pci_swiotlb_init(void); #else #define swiotlb 0 -static inline void pci_swiotlb_init(void) +static inline int pci_swiotlb_init(void) { + return 0; } #endif diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index f08f9737489..022a84386de 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -128,8 +128,6 @@ do { \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ - ".globl thread_return\n" \ - "thread_return:\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ @@ -157,19 +155,22 @@ extern void native_load_gs_index(unsigned); * Load a segment. Fall back on loading the zero * segment if something goes wrong.. */ -#define loadsegment(seg, value) \ - asm volatile("\n" \ - "1:\t" \ - "movl %k0,%%" #seg "\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3:\t" \ - "movl %k1, %%" #seg "\n\t" \ - "jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b,3b) \ - : :"r" (value), "r" (0) : "memory") - +#define loadsegment(seg, value) \ +do { \ + unsigned short __val = (value); \ + \ + asm volatile(" \n" \ + "1: movl %k0,%%" #seg " \n" \ + \ + ".section .fixup,\"ax\" \n" \ + "2: xorl %k0,%k0 \n" \ + " jmp 1b \n" \ + ".previous \n" \ + \ + _ASM_EXTABLE(1b, 2b) \ + \ + : "+r" (__val) : : "memory"); \ +} while (0) /* * Save a segment register away diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d2c6c930b49..abd3e0ea762 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -570,7 +570,6 @@ extern struct movsl_mask { #ifdef CONFIG_X86_32 # include "uaccess_32.h" #else -# define ARCH_HAS_SEARCH_EXTABLE # include "uaccess_64.h" #endif diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 632fb44b4cb..0c9825e97f3 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -187,9 +187,34 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n); -unsigned long __must_check copy_from_user(void *to, +unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long n); + + +extern void copy_from_user_overflow(void) +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS + __compiletime_error("copy_from_user() buffer size is not provably correct") +#else + __compiletime_warning("copy_from_user() buffer size is not provably correct") +#endif +; + +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); + else + copy_from_user_overflow(); + + return ret; +} + long __must_check strncpy_from_user(char *dst, const char __user *src, long count); long __must_check __strncpy_from_user(char *dst, diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index db24b215fc5..46324c6a4f6 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -19,12 +19,37 @@ __must_check unsigned long copy_user_generic(void *to, const void *from, unsigned len); __must_check unsigned long -copy_to_user(void __user *to, const void *from, unsigned len); +_copy_to_user(void __user *to, const void *from, unsigned len); __must_check unsigned long -copy_from_user(void *to, const void __user *from, unsigned len); +_copy_from_user(void *to, const void __user *from, unsigned len); __must_check unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len); +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + might_fault(); + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); +#ifdef CONFIG_DEBUG_VM + else + WARN(1, "Buffer overflow detected!\n"); +#endif + return ret; +} + +static __always_inline __must_check +int copy_to_user(void __user *dst, const void *src, unsigned size) +{ + might_fault(); + + return _copy_to_user(dst, src, size); +} + static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { @@ -176,8 +201,11 @@ __must_check long strlen_user(const char __user *str); __must_check unsigned long clear_user(void __user *mem, unsigned long len); __must_check unsigned long __clear_user(void __user *mem, unsigned long len); -__must_check long __copy_from_user_inatomic(void *dst, const void __user *src, - unsigned size); +static __must_check __always_inline int +__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) +{ + return copy_user_generic(dst, (__force const void *)src, size); +} static __must_check __always_inline int __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h index 9613c8c0b64..d6b17c76062 100644 --- a/arch/x86/include/asm/uv/uv_irq.h +++ b/arch/x86/include/asm/uv/uv_irq.h @@ -25,12 +25,14 @@ struct uv_IO_APIC_route_entry { dest : 32; }; -extern struct irq_chip uv_irq_chip; - -extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long); -extern void arch_disable_uv_irq(int, unsigned long); +enum { + UV_AFFINITY_ALL, + UV_AFFINITY_NODE, + UV_AFFINITY_CPU +}; -extern int uv_setup_irq(char *, int, int, unsigned long); -extern void uv_teardown_irq(unsigned int, int, unsigned long); +extern int uv_irq_2_mmr_info(int, unsigned long *, int *); +extern int uv_setup_irq(char *, int, int, unsigned long, int); +extern void uv_teardown_irq(unsigned int); #endif /* _ASM_X86_UV_UV_IRQ_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2c756fd4ab0..d8e71459f02 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -91,6 +91,14 @@ struct x86_init_timers { }; /** + * struct x86_init_iommu - platform specific iommu setup + * @iommu_init: platform specific iommu setup + */ +struct x86_init_iommu { + int (*iommu_init)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -101,6 +109,7 @@ struct x86_init_ops { struct x86_init_oem oem; struct x86_init_paging paging; struct x86_init_timers timers; + struct x86_init_iommu iommu; }; /** @@ -121,6 +130,7 @@ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long nowtime); + void (*iommu_shutdown)(void); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d8e5d0cdd67..4f2e66e29ec 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o +obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0285521e0a9..32fb09102a1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -28,6 +28,7 @@ #include <asm/proto.h> #include <asm/iommu.h> #include <asm/gart.h> +#include <asm/amd_iommu_proto.h> #include <asm/amd_iommu_types.h> #include <asm/amd_iommu.h> @@ -56,20 +57,115 @@ struct iommu_cmd { u32 data[4]; }; -static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, - struct unity_map_entry *e); -static struct dma_ops_domain *find_protection_domain(u16 devid); -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, int end_lvl, - u64 **pte_page, gfp_t gfp); -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages); static void reset_iommu_command_buffer(struct amd_iommu *iommu); -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, int map_size); static void update_domain(struct protection_domain *domain); +/**************************************************************************** + * + * Helper functions + * + ****************************************************************************/ + +static inline u16 get_device_id(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return calc_devid(pdev->bus->number, pdev->devfn); +} + +static struct iommu_dev_data *get_dev_data(struct device *dev) +{ + return dev->archdata.iommu; +} + +/* + * In this function the list of preallocated protection domains is traversed to + * find the domain for a specific device + */ +static struct dma_ops_domain *find_protection_domain(u16 devid) +{ + struct dma_ops_domain *entry, *ret = NULL; + unsigned long flags; + u16 alias = amd_iommu_alias_table[devid]; + + if (list_empty(&iommu_pd_list)) + return NULL; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + + list_for_each_entry(entry, &iommu_pd_list, list) { + if (entry->target_dev == devid || + entry->target_dev == alias) { + ret = entry; + break; + } + } + + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + return ret; +} + +/* + * This function checks if the driver got a valid device from the caller to + * avoid dereferencing invalid pointers. + */ +static bool check_device(struct device *dev) +{ + u16 devid; + + if (!dev || !dev->dma_mask) + return false; + + /* No device or no PCI device */ + if (!dev || dev->bus != &pci_bus_type) + return false; + + devid = get_device_id(dev); + + /* Out of our scope? */ + if (devid > amd_iommu_last_bdf) + return false; + + if (amd_iommu_rlookup_table[devid] == NULL) + return false; + + return true; +} + +static int iommu_init_device(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct pci_dev *pdev; + u16 devid, alias; + + if (dev->archdata.iommu) + return 0; + + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + + dev_data->dev = dev; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); + if (pdev) + dev_data->alias = &pdev->dev; + + atomic_set(&dev_data->bind, 0); + + dev->archdata.iommu = dev_data; + + + return 0; +} + +static void iommu_uninit_device(struct device *dev) +{ + kfree(dev->archdata.iommu); +} #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -90,7 +186,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem); DECLARE_STATS_COUNTER(total_map_requests); static struct dentry *stats_dir; -static struct dentry *de_isolate; static struct dentry *de_fflush; static void amd_iommu_stats_add(struct __iommu_counter *cnt) @@ -108,9 +203,6 @@ static void amd_iommu_stats_init(void) if (stats_dir == NULL) return; - de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, - (u32 *)&amd_iommu_isolate); - de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, (u32 *)&amd_iommu_unmap_flush); @@ -130,12 +222,6 @@ static void amd_iommu_stats_init(void) #endif -/* returns !0 if the IOMMU is caching non-present entries in its TLB */ -static int iommu_has_npcache(struct amd_iommu *iommu) -{ - return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); -} - /**************************************************************************** * * Interrupt handling functions @@ -199,6 +285,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) break; case EVENT_TYPE_ILL_CMD: printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + iommu->reset_in_progress = true; reset_iommu_command_buffer(iommu); dump_command(address); break; @@ -321,11 +408,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); - if (unlikely(i == EXIT_LOOP_COUNT)) { - spin_unlock(&iommu->lock); - reset_iommu_command_buffer(iommu); - spin_lock(&iommu->lock); - } + if (unlikely(i == EXIT_LOOP_COUNT)) + iommu->reset_in_progress = true; } /* @@ -372,26 +456,46 @@ static int iommu_completion_wait(struct amd_iommu *iommu) out: spin_unlock_irqrestore(&iommu->lock, flags); + if (iommu->reset_in_progress) + reset_iommu_command_buffer(iommu); + return 0; } +static void iommu_flush_complete(struct protection_domain *domain) +{ + int i; + + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need to wait for completion of all commands. + */ + iommu_completion_wait(amd_iommus[i]); + } +} + /* * Command send function for invalidating a device table entry */ -static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) +static int iommu_flush_device(struct device *dev) { + struct amd_iommu *iommu; struct iommu_cmd cmd; - int ret; + u16 devid; - BUG_ON(iommu == NULL); + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + /* Build command */ memset(&cmd, 0, sizeof(cmd)); CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); cmd.data[0] = devid; - ret = iommu_queue_command(iommu, &cmd); - - return ret; + return iommu_queue_command(iommu, &cmd); } static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, @@ -430,11 +534,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, * It invalidates a single PTE if the range to flush is within a single * page. Otherwise it flushes the whole TLB of the IOMMU. */ -static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, - u64 address, size_t size) +static void __iommu_flush_pages(struct protection_domain *domain, + u64 address, size_t size, int pde) { - int s = 0; - unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); + int s = 0, i; + unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); address &= PAGE_MASK; @@ -447,142 +551,212 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, s = 1; } - iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s); - return 0; + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need a TLB flush + */ + iommu_queue_inv_iommu_pages(amd_iommus[i], address, + domain->id, pde, s); + } + + return; } -/* Flush the whole IO/TLB for a given protection domain */ -static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_pages(struct protection_domain *domain, + u64 address, size_t size) { - u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - - INC_STATS_COUNTER(domain_flush_single); + __iommu_flush_pages(domain, address, size, 0); +} - iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); +/* Flush the whole IO/TLB for a given protection domain */ +static void iommu_flush_tlb(struct protection_domain *domain) +{ + __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_tlb_pde(struct protection_domain *domain) { - u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - - INC_STATS_COUNTER(domain_flush_single); - - iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); + __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } + /* - * This function flushes one domain on one IOMMU + * This function flushes the DTEs for all devices in domain */ -static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_domain_devices(struct protection_domain *domain) { - struct iommu_cmd cmd; + struct iommu_dev_data *dev_data; unsigned long flags; - __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, - domid, 1, 1); + spin_lock_irqsave(&domain->lock, flags); - spin_lock_irqsave(&iommu->lock, flags); - __iommu_queue_command(iommu, &cmd); - __iommu_completion_wait(iommu); - __iommu_wait_for_completion(iommu); - spin_unlock_irqrestore(&iommu->lock, flags); + list_for_each_entry(dev_data, &domain->dev_list, list) + iommu_flush_device(dev_data->dev); + + spin_unlock_irqrestore(&domain->lock, flags); } -static void flush_all_domains_on_iommu(struct amd_iommu *iommu) +static void iommu_flush_all_domain_devices(void) { - int i; + struct protection_domain *domain; + unsigned long flags; - for (i = 1; i < MAX_DOMAIN_ID; ++i) { - if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) - continue; - flush_domain_on_iommu(iommu, i); + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + + list_for_each_entry(domain, &amd_iommu_pd_list, list) { + iommu_flush_domain_devices(domain); + iommu_flush_complete(domain); } + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +void amd_iommu_flush_all_devices(void) +{ + iommu_flush_all_domain_devices(); } /* - * This function is used to flush the IO/TLB for a given protection domain - * on every IOMMU in the system + * This function uses heavy locking and may disable irqs for some time. But + * this is no issue because it is only called during resume. */ -static void iommu_flush_domain(u16 domid) +void amd_iommu_flush_all_domains(void) { - struct amd_iommu *iommu; + struct protection_domain *domain; + unsigned long flags; - INC_STATS_COUNTER(domain_flush_all); + spin_lock_irqsave(&amd_iommu_pd_lock, flags); - for_each_iommu(iommu) - flush_domain_on_iommu(iommu, domid); + list_for_each_entry(domain, &amd_iommu_pd_list, list) { + spin_lock(&domain->lock); + iommu_flush_tlb_pde(domain); + iommu_flush_complete(domain); + spin_unlock(&domain->lock); + } + + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } -void amd_iommu_flush_all_domains(void) +static void reset_iommu_command_buffer(struct amd_iommu *iommu) { - struct amd_iommu *iommu; + pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); - for_each_iommu(iommu) - flush_all_domains_on_iommu(iommu); + if (iommu->reset_in_progress) + panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); + + amd_iommu_reset_cmd_buffer(iommu); + amd_iommu_flush_all_devices(); + amd_iommu_flush_all_domains(); + + iommu->reset_in_progress = false; } -static void flush_all_devices_for_iommu(struct amd_iommu *iommu) +/**************************************************************************** + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + ****************************************************************************/ + +/* + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. + */ +static bool increase_address_space(struct protection_domain *domain, + gfp_t gfp) { - int i; + u64 *pte; - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if (iommu != amd_iommu_rlookup_table[i]) - continue; + if (domain->mode == PAGE_MODE_6_LEVEL) + /* address space already 64 bit large */ + return false; - iommu_queue_inv_dev_entry(iommu, i); - iommu_completion_wait(iommu); - } + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + + *pte = PM_LEVEL_PDE(domain->mode, + virt_to_phys(domain->pt_root)); + domain->pt_root = pte; + domain->mode += 1; + domain->updated = true; + + return true; } -static void flush_devices_by_domain(struct protection_domain *domain) +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, + int end_lvl, + u64 **pte_page, + gfp_t gfp) { - struct amd_iommu *iommu; - int i; + u64 *pte, *page; + int level; - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || - (amd_iommu_pd_table[i] != domain)) - continue; + while (address > PM_LEVEL_SIZE(domain->mode)) + increase_address_space(domain, gfp); - iommu = amd_iommu_rlookup_table[i]; - if (!iommu) - continue; + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - iommu_queue_inv_dev_entry(iommu, i); - iommu_completion_wait(iommu); + while (level > end_lvl) { + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); + } + + level -= 1; + + pte = IOMMU_PTE_PAGE(*pte); + + if (pte_page && level == end_lvl) + *pte_page = pte; + + pte = &pte[PM_LEVEL_INDEX(level, address)]; } + + return pte; } -static void reset_iommu_command_buffer(struct amd_iommu *iommu) +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, int map_size) { - pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); + int level; + u64 *pte; - if (iommu->reset_in_progress) - panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - iommu->reset_in_progress = true; + while (level > map_size) { + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; - amd_iommu_reset_cmd_buffer(iommu); - flush_all_devices_for_iommu(iommu); - flush_all_domains_on_iommu(iommu); + level -= 1; - iommu->reset_in_progress = false; -} + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[PM_LEVEL_INDEX(level, address)]; -void amd_iommu_flush_all_devices(void) -{ - flush_devices_by_domain(NULL); -} + if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { + pte = NULL; + break; + } + } -/**************************************************************************** - * - * The functions below are used the create the page table mappings for - * unity mapped regions. - * - ****************************************************************************/ + return pte; +} /* * Generic mapping functions. It maps a physical address into a DMA @@ -654,28 +828,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, } /* - * Init the unity mappings for a specific IOMMU in the system - * - * Basically iterates over all unity mapping entries and applies them to - * the default domain DMA of that IOMMU if necessary. - */ -static int iommu_init_unity_mappings(struct amd_iommu *iommu) -{ - struct unity_map_entry *entry; - int ret; - - list_for_each_entry(entry, &amd_iommu_unity_map, list) { - if (!iommu_for_unity_map(iommu, entry)) - continue; - ret = dma_ops_unity_map(iommu->default_dom, entry); - if (ret) - return ret; - } - - return 0; -} - -/* * This function actually applies the mapping to the page table of the * dma_ops domain. */ @@ -704,6 +856,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, } /* + * Init the unity mappings for a specific IOMMU in the system + * + * Basically iterates over all unity mapping entries and applies them to + * the default domain DMA of that IOMMU if necessary. + */ +static int iommu_init_unity_mappings(struct amd_iommu *iommu) +{ + struct unity_map_entry *entry; + int ret; + + list_for_each_entry(entry, &amd_iommu_unity_map, list) { + if (!iommu_for_unity_map(iommu, entry)) + continue; + ret = dma_ops_unity_map(iommu->default_dom, entry); + if (ret) + return ret; + } + + return 0; +} + +/* * Inits the unity mappings required for a specific device */ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, @@ -740,34 +914,23 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, */ /* - * This function checks if there is a PTE for a given dma address. If - * there is one, it returns the pointer to it. + * Used to reserve address ranges in the aperture (e.g. for exclusion + * ranges. */ -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, int map_size) +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, + unsigned long start_page, + unsigned int pages) { - int level; - u64 *pte; - - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - - while (level > map_size) { - if (!IOMMU_PTE_PRESENT(*pte)) - return NULL; - - level -= 1; + unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[PM_LEVEL_INDEX(level, address)]; + if (start_page + pages > last_page) + pages = last_page - start_page; - if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { - pte = NULL; - break; - } + for (i = start_page; i < start_page + pages; ++i) { + int index = i / APERTURE_RANGE_PAGES; + int page = i % APERTURE_RANGE_PAGES; + __set_bit(page, dom->aperture[index]->bitmap); } - - return pte; } /* @@ -775,11 +938,11 @@ static u64 *fetch_pte(struct protection_domain *domain, * aperture in case of dma_ops domain allocation or address allocation * failure. */ -static int alloc_new_range(struct amd_iommu *iommu, - struct dma_ops_domain *dma_dom, +static int alloc_new_range(struct dma_ops_domain *dma_dom, bool populate, gfp_t gfp) { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; + struct amd_iommu *iommu; int i; #ifdef CONFIG_IOMMU_STRESS @@ -819,14 +982,17 @@ static int alloc_new_range(struct amd_iommu *iommu, dma_dom->aperture_size += APERTURE_RANGE_SIZE; /* Intialize the exclusion range if necessary */ - if (iommu->exclusion_start && - iommu->exclusion_start >= dma_dom->aperture[index]->offset && - iommu->exclusion_start < dma_dom->aperture_size) { - unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = iommu_num_pages(iommu->exclusion_start, - iommu->exclusion_length, - PAGE_SIZE); - dma_ops_reserve_addresses(dma_dom, startpage, pages); + for_each_iommu(iommu) { + if (iommu->exclusion_start && + iommu->exclusion_start >= dma_dom->aperture[index]->offset + && iommu->exclusion_start < dma_dom->aperture_size) { + unsigned long startpage; + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length, + PAGE_SIZE); + startpage = iommu->exclusion_start >> PAGE_SHIFT; + dma_ops_reserve_addresses(dma_dom, startpage, pages); + } } /* @@ -928,7 +1094,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, } if (unlikely(address == -1)) - address = bad_dma_address; + address = DMA_ERROR_CODE; WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); @@ -973,6 +1139,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, * ****************************************************************************/ +/* + * This function adds a protection domain to the global protection domain list + */ +static void add_domain_to_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_add(&domain->list, &amd_iommu_pd_list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +/* + * This function removes a protection domain to the global + * protection domain list + */ +static void del_domain_from_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_del(&domain->list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + static u16 domain_id_alloc(void) { unsigned long flags; @@ -1000,26 +1191,6 @@ static void domain_id_free(int id) write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } -/* - * Used to reserve address ranges in the aperture (e.g. for exclusion - * ranges. - */ -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages) -{ - unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; - - if (start_page + pages > last_page) - pages = last_page - start_page; - - for (i = start_page; i < start_page + pages; ++i) { - int index = i / APERTURE_RANGE_PAGES; - int page = i % APERTURE_RANGE_PAGES; - __set_bit(page, dom->aperture[index]->bitmap); - } -} - static void free_pagetable(struct protection_domain *domain) { int i, j; @@ -1061,6 +1232,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; + del_domain_from_list(&dom->domain); + free_pagetable(&dom->domain); for (i = 0; i < APERTURE_MAX_RANGES; ++i) { @@ -1078,7 +1251,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) * It also intializes the page table and the address allocator data * structures required for the dma_ops interface */ -static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) +static struct dma_ops_domain *dma_ops_domain_alloc(void) { struct dma_ops_domain *dma_dom; @@ -1091,6 +1264,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) dma_dom->domain.id = domain_id_alloc(); if (dma_dom->domain.id == 0) goto free_dma_dom; + INIT_LIST_HEAD(&dma_dom->domain.dev_list); dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; @@ -1101,7 +1275,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) dma_dom->need_flush = false; dma_dom->target_dev = 0xffff; - if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) + add_domain_to_list(&dma_dom->domain); + + if (alloc_new_range(dma_dom, true, GFP_KERNEL)) goto free_dma_dom; /* @@ -1129,22 +1305,6 @@ static bool dma_ops_domain(struct protection_domain *domain) return domain->flags & PD_DMA_OPS_MASK; } -/* - * Find out the protection domain structure for a given PCI device. This - * will give us the pointer to the page table root for example. - */ -static struct protection_domain *domain_for_device(u16 devid) -{ - struct protection_domain *dom; - unsigned long flags; - - read_lock_irqsave(&amd_iommu_devtable_lock, flags); - dom = amd_iommu_pd_table[devid]; - read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - - return dom; -} - static void set_dte_entry(u16 devid, struct protection_domain *domain) { u64 pte_root = virt_to_phys(domain->pt_root); @@ -1156,42 +1316,123 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) amd_iommu_dev_table[devid].data[2] = domain->id; amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); +} + +static void clear_dte_entry(u16 devid) +{ + /* remove entry from the device table seen by the hardware */ + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[1] = 0; + amd_iommu_dev_table[devid].data[2] = 0; - amd_iommu_pd_table[devid] = domain; + amd_iommu_apply_erratum_63(devid); +} + +static void do_attach(struct device *dev, struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); + + /* Update data structures */ + dev_data->domain = domain; + list_add(&dev_data->list, &domain->dev_list); + set_dte_entry(devid, domain); + + /* Do reference counting */ + domain->dev_iommu[iommu->index] += 1; + domain->dev_cnt += 1; + + /* Flush the DTE entry */ + iommu_flush_device(dev); +} + +static void do_detach(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); + + /* decrease reference counters */ + dev_data->domain->dev_iommu[iommu->index] -= 1; + dev_data->domain->dev_cnt -= 1; + + /* Update data structures */ + dev_data->domain = NULL; + list_del(&dev_data->list); + clear_dte_entry(devid); + + /* Flush the DTE entry */ + iommu_flush_device(dev); } /* * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void __attach_device(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static int __attach_device(struct device *dev, + struct protection_domain *domain) { + struct iommu_dev_data *dev_data, *alias_data; + + dev_data = get_dev_data(dev); + alias_data = get_dev_data(dev_data->alias); + + if (!alias_data) + return -EINVAL; + /* lock domain */ spin_lock(&domain->lock); - /* update DTE entry */ - set_dte_entry(devid, domain); + /* Some sanity checks */ + if (alias_data->domain != NULL && + alias_data->domain != domain) + return -EBUSY; - domain->dev_cnt += 1; + if (dev_data->domain != NULL && + dev_data->domain != domain) + return -EBUSY; + + /* Do real assignment */ + if (dev_data->alias != dev) { + alias_data = get_dev_data(dev_data->alias); + if (alias_data->domain == NULL) + do_attach(dev_data->alias, domain); + + atomic_inc(&alias_data->bind); + } + + if (dev_data->domain == NULL) + do_attach(dev, domain); + + atomic_inc(&dev_data->bind); /* ready */ spin_unlock(&domain->lock); + + return 0; } /* * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void attach_device(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static int attach_device(struct device *dev, + struct protection_domain *domain) { unsigned long flags; + int ret; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - __attach_device(iommu, domain, devid); + ret = __attach_device(dev, domain); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); /* @@ -1199,98 +1440,125 @@ static void attach_device(struct amd_iommu *iommu, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - iommu_queue_inv_dev_entry(iommu, devid); - iommu_flush_tlb_pde(iommu, domain->id); + iommu_flush_tlb_pde(domain); + + return ret; } /* * Removes a device from a protection domain (unlocked) */ -static void __detach_device(struct protection_domain *domain, u16 devid) +static void __detach_device(struct device *dev) { + struct iommu_dev_data *dev_data = get_dev_data(dev); + struct iommu_dev_data *alias_data; + unsigned long flags; - /* lock domain */ - spin_lock(&domain->lock); - - /* remove domain from the lookup table */ - amd_iommu_pd_table[devid] = NULL; + BUG_ON(!dev_data->domain); - /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; - amd_iommu_dev_table[devid].data[1] = 0; - amd_iommu_dev_table[devid].data[2] = 0; + spin_lock_irqsave(&dev_data->domain->lock, flags); - amd_iommu_apply_erratum_63(devid); + if (dev_data->alias != dev) { + alias_data = get_dev_data(dev_data->alias); + if (atomic_dec_and_test(&alias_data->bind)) + do_detach(dev_data->alias); + } - /* decrease reference counter */ - domain->dev_cnt -= 1; + if (atomic_dec_and_test(&dev_data->bind)) + do_detach(dev); - /* ready */ - spin_unlock(&domain->lock); + spin_unlock_irqrestore(&dev_data->domain->lock, flags); /* * If we run in passthrough mode the device must be assigned to the * passthrough domain if it is detached from any other domain */ - if (iommu_pass_through) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - __attach_device(iommu, pt_domain, devid); - } + if (iommu_pass_through && dev_data->domain == NULL) + __attach_device(dev, pt_domain); } /* * Removes a device from a protection domain (with devtable_lock held) */ -static void detach_device(struct protection_domain *domain, u16 devid) +static void detach_device(struct device *dev) { unsigned long flags; /* lock device table */ write_lock_irqsave(&amd_iommu_devtable_lock, flags); - __detach_device(domain, devid); + __detach_device(dev); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } +/* + * Find out the protection domain structure for a given PCI device. This + * will give us the pointer to the page table root for example. + */ +static struct protection_domain *domain_for_device(struct device *dev) +{ + struct protection_domain *dom; + struct iommu_dev_data *dev_data, *alias_data; + unsigned long flags; + u16 devid, alias; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + dev_data = get_dev_data(dev); + alias_data = get_dev_data(dev_data->alias); + if (!alias_data) + return NULL; + + read_lock_irqsave(&amd_iommu_devtable_lock, flags); + dom = dev_data->domain; + if (dom == NULL && + alias_data->domain != NULL) { + __attach_device(dev, alias_data->domain); + dom = alias_data->domain; + } + + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return dom; +} + static int device_change_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct pci_dev *pdev = to_pci_dev(dev); - u16 devid = calc_devid(pdev->bus->number, pdev->devfn); + u16 devid; struct protection_domain *domain; struct dma_ops_domain *dma_domain; struct amd_iommu *iommu; unsigned long flags; - if (devid > amd_iommu_last_bdf) - goto out; - - devid = amd_iommu_alias_table[devid]; - - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - goto out; - - domain = domain_for_device(devid); + if (!check_device(dev)) + return 0; - if (domain && !dma_ops_domain(domain)) - WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " - "to a non-dma-ops domain\n", dev_name(dev)); + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; switch (action) { case BUS_NOTIFY_UNBOUND_DRIVER: + + domain = domain_for_device(dev); + if (!domain) goto out; if (iommu_pass_through) break; - detach_device(domain, devid); + detach_device(dev); break; case BUS_NOTIFY_ADD_DEVICE: + + iommu_init_device(dev); + + domain = domain_for_device(dev); + /* allocate a protection domain if a device is added */ dma_domain = find_protection_domain(devid); if (dma_domain) goto out; - dma_domain = dma_ops_domain_alloc(iommu); + dma_domain = dma_ops_domain_alloc(); if (!dma_domain) goto out; dma_domain->target_dev = devid; @@ -1300,11 +1568,15 @@ static int device_change_notifier(struct notifier_block *nb, spin_unlock_irqrestore(&iommu_pd_list_lock, flags); break; + case BUS_NOTIFY_DEL_DEVICE: + + iommu_uninit_device(dev); + default: goto out; } - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); iommu_completion_wait(iommu); out: @@ -1322,106 +1594,46 @@ static struct notifier_block device_nb = { *****************************************************************************/ /* - * This function checks if the driver got a valid device from the caller to - * avoid dereferencing invalid pointers. - */ -static bool check_device(struct device *dev) -{ - if (!dev || !dev->dma_mask) - return false; - - return true; -} - -/* - * In this function the list of preallocated protection domains is traversed to - * find the domain for a specific device - */ -static struct dma_ops_domain *find_protection_domain(u16 devid) -{ - struct dma_ops_domain *entry, *ret = NULL; - unsigned long flags; - - if (list_empty(&iommu_pd_list)) - return NULL; - - spin_lock_irqsave(&iommu_pd_list_lock, flags); - - list_for_each_entry(entry, &iommu_pd_list, list) { - if (entry->target_dev == devid) { - ret = entry; - break; - } - } - - spin_unlock_irqrestore(&iommu_pd_list_lock, flags); - - return ret; -} - -/* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the * requestor id for a given device. * If the device is not yet associated with a domain this is also done * in this function. */ -static int get_device_resources(struct device *dev, - struct amd_iommu **iommu, - struct protection_domain **domain, - u16 *bdf) +static struct protection_domain *get_domain(struct device *dev) { + struct protection_domain *domain; struct dma_ops_domain *dma_dom; - struct pci_dev *pcidev; - u16 _bdf; - - *iommu = NULL; - *domain = NULL; - *bdf = 0xffff; - - if (dev->bus != &pci_bus_type) - return 0; + u16 devid = get_device_id(dev); - pcidev = to_pci_dev(dev); - _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); + if (!check_device(dev)) + return ERR_PTR(-EINVAL); - /* device not translated by any IOMMU in the system? */ - if (_bdf > amd_iommu_last_bdf) - return 0; + domain = domain_for_device(dev); + if (domain != NULL && !dma_ops_domain(domain)) + return ERR_PTR(-EBUSY); - *bdf = amd_iommu_alias_table[_bdf]; + if (domain != NULL) + return domain; - *iommu = amd_iommu_rlookup_table[*bdf]; - if (*iommu == NULL) - return 0; - *domain = domain_for_device(*bdf); - if (*domain == NULL) { - dma_dom = find_protection_domain(*bdf); - if (!dma_dom) - dma_dom = (*iommu)->default_dom; - *domain = &dma_dom->domain; - attach_device(*iommu, *domain, *bdf); - DUMP_printk("Using protection domain %d for device %s\n", - (*domain)->id, dev_name(dev)); - } - - if (domain_for_device(_bdf) == NULL) - attach_device(*iommu, *domain, _bdf); + /* Device not bount yet - bind it */ + dma_dom = find_protection_domain(devid); + if (!dma_dom) + dma_dom = amd_iommu_rlookup_table[devid]->default_dom; + attach_device(dev, &dma_dom->domain); + DUMP_printk("Using protection domain %d for device %s\n", + dma_dom->domain.id, dev_name(dev)); - return 1; + return &dma_dom->domain; } static void update_device_table(struct protection_domain *domain) { - unsigned long flags; - int i; + struct iommu_dev_data *dev_data; - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if (amd_iommu_pd_table[i] != domain) - continue; - write_lock_irqsave(&amd_iommu_devtable_lock, flags); - set_dte_entry(i, domain); - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + list_for_each_entry(dev_data, &domain->dev_list, list) { + u16 devid = get_device_id(dev_data->dev); + set_dte_entry(devid, domain); } } @@ -1431,76 +1643,13 @@ static void update_domain(struct protection_domain *domain) return; update_device_table(domain); - flush_devices_by_domain(domain); - iommu_flush_domain(domain->id); + iommu_flush_domain_devices(domain); + iommu_flush_tlb_pde(domain); domain->updated = false; } /* - * This function is used to add another level to an IO page table. Adding - * another level increases the size of the address space by 9 bits to a size up - * to 64 bits. - */ -static bool increase_address_space(struct protection_domain *domain, - gfp_t gfp) -{ - u64 *pte; - - if (domain->mode == PAGE_MODE_6_LEVEL) - /* address space already 64 bit large */ - return false; - - pte = (void *)get_zeroed_page(gfp); - if (!pte) - return false; - - *pte = PM_LEVEL_PDE(domain->mode, - virt_to_phys(domain->pt_root)); - domain->pt_root = pte; - domain->mode += 1; - domain->updated = true; - - return true; -} - -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, - int end_lvl, - u64 **pte_page, - gfp_t gfp) -{ - u64 *pte, *page; - int level; - - while (address > PM_LEVEL_SIZE(domain->mode)) - increase_address_space(domain, gfp); - - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - - while (level > end_lvl) { - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(gfp); - if (!page) - return NULL; - *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); - } - - level -= 1; - - pte = IOMMU_PTE_PAGE(*pte); - - if (pte_page && level == end_lvl) - *pte_page = pte; - - pte = &pte[PM_LEVEL_INDEX(level, address)]; - } - - return pte; -} - -/* * This function fetches the PTE for a given address in the aperture */ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, @@ -1530,8 +1679,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, * This is the generic map function. It maps one 4kb page at paddr to * the given address in the DMA address space for the domain. */ -static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, - struct dma_ops_domain *dom, +static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, unsigned long address, phys_addr_t paddr, int direction) @@ -1544,7 +1692,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, pte = dma_ops_get_pte(dom, address); if (!pte) - return bad_dma_address; + return DMA_ERROR_CODE; __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; @@ -1565,8 +1713,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, /* * The generic unmapping function for on page in the DMA address space. */ -static void dma_ops_domain_unmap(struct amd_iommu *iommu, - struct dma_ops_domain *dom, +static void dma_ops_domain_unmap(struct dma_ops_domain *dom, unsigned long address) { struct aperture_range *aperture; @@ -1597,7 +1744,6 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, * Must be called with the domain lock held. */ static dma_addr_t __map_single(struct device *dev, - struct amd_iommu *iommu, struct dma_ops_domain *dma_dom, phys_addr_t paddr, size_t size, @@ -1625,7 +1771,7 @@ static dma_addr_t __map_single(struct device *dev, retry: address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, dma_mask); - if (unlikely(address == bad_dma_address)) { + if (unlikely(address == DMA_ERROR_CODE)) { /* * setting next_address here will let the address * allocator only scan the new allocated range in the @@ -1633,7 +1779,7 @@ retry: */ dma_dom->next_address = dma_dom->aperture_size; - if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) + if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) goto out; /* @@ -1645,8 +1791,8 @@ retry: start = address; for (i = 0; i < pages; ++i) { - ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); - if (ret == bad_dma_address) + ret = dma_ops_domain_map(dma_dom, start, paddr, dir); + if (ret == DMA_ERROR_CODE) goto out_unmap; paddr += PAGE_SIZE; @@ -1657,10 +1803,10 @@ retry: ADD_STATS_COUNTER(alloced_io_mem, size); if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { - iommu_flush_tlb(iommu, dma_dom->domain.id); + iommu_flush_tlb(&dma_dom->domain); dma_dom->need_flush = false; - } else if (unlikely(iommu_has_npcache(iommu))) - iommu_flush_pages(iommu, dma_dom->domain.id, address, size); + } else if (unlikely(amd_iommu_np_cache)) + iommu_flush_pages(&dma_dom->domain, address, size); out: return address; @@ -1669,20 +1815,19 @@ out_unmap: for (--i; i >= 0; --i) { start -= PAGE_SIZE; - dma_ops_domain_unmap(iommu, dma_dom, start); + dma_ops_domain_unmap(dma_dom, start); } dma_ops_free_addresses(dma_dom, address, pages); - return bad_dma_address; + return DMA_ERROR_CODE; } /* * Does the reverse of the __map_single function. Must be called with * the domain lock held too */ -static void __unmap_single(struct amd_iommu *iommu, - struct dma_ops_domain *dma_dom, +static void __unmap_single(struct dma_ops_domain *dma_dom, dma_addr_t dma_addr, size_t size, int dir) @@ -1690,7 +1835,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == bad_dma_address) || + if ((dma_addr == DMA_ERROR_CODE) || (dma_addr + size > dma_dom->aperture_size)) return; @@ -1699,7 +1844,7 @@ static void __unmap_single(struct amd_iommu *iommu, start = dma_addr; for (i = 0; i < pages; ++i) { - dma_ops_domain_unmap(iommu, dma_dom, start); + dma_ops_domain_unmap(dma_dom, start); start += PAGE_SIZE; } @@ -1708,7 +1853,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { - iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + iommu_flush_pages(&dma_dom->domain, dma_addr, size); dma_dom->need_flush = false; } } @@ -1722,36 +1867,29 @@ static dma_addr_t map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; - u16 devid; dma_addr_t addr; u64 dma_mask; phys_addr_t paddr = page_to_phys(page) + offset; INC_STATS_COUNTER(cnt_map_single); - if (!check_device(dev)) - return bad_dma_address; - - dma_mask = *dev->dma_mask; - - get_device_resources(dev, &iommu, &domain, &devid); - - if (iommu == NULL || domain == NULL) - /* device not handled by any AMD IOMMU */ + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) return (dma_addr_t)paddr; + else if (IS_ERR(domain)) + return DMA_ERROR_CODE; - if (!dma_ops_domain(domain)) - return bad_dma_address; + dma_mask = *dev->dma_mask; spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, + + addr = __map_single(dev, domain->priv, paddr, size, dir, false, dma_mask); - if (addr == bad_dma_address) + if (addr == DMA_ERROR_CODE) goto out; - iommu_completion_wait(iommu); + iommu_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1766,25 +1904,19 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; - u16 devid; INC_STATS_COUNTER(cnt_unmap_single); - if (!check_device(dev) || - !get_device_resources(dev, &iommu, &domain, &devid)) - /* device not handled by any AMD IOMMU */ - return; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); - __unmap_single(iommu, domain->priv, dma_addr, size, dir); + __unmap_single(domain->priv, dma_addr, size, dir); - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1816,9 +1948,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; - u16 devid; int i; struct scatterlist *s; phys_addr_t paddr; @@ -1827,25 +1957,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_map_sg); - if (!check_device(dev)) + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) + return map_sg_no_iommu(dev, sglist, nelems, dir); + else if (IS_ERR(domain)) return 0; dma_mask = *dev->dma_mask; - get_device_resources(dev, &iommu, &domain, &devid); - - if (!iommu || !domain) - return map_sg_no_iommu(dev, sglist, nelems, dir); - - if (!dma_ops_domain(domain)) - return 0; - spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { paddr = sg_phys(s); - s->dma_address = __map_single(dev, iommu, domain->priv, + s->dma_address = __map_single(dev, domain->priv, paddr, s->length, dir, false, dma_mask); @@ -1856,7 +1981,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1865,7 +1990,7 @@ out: unmap: for_each_sg(sglist, s, mapped_elems, i) { if (s->dma_address) - __unmap_single(iommu, domain->priv, s->dma_address, + __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } @@ -1884,30 +2009,25 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; struct scatterlist *s; - u16 devid; int i; INC_STATS_COUNTER(cnt_unmap_sg); - if (!check_device(dev) || - !get_device_resources(dev, &iommu, &domain, &devid)) - return; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { - __unmap_single(iommu, domain->priv, s->dma_address, + __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1920,49 +2040,44 @@ static void *alloc_coherent(struct device *dev, size_t size, { unsigned long flags; void *virt_addr; - struct amd_iommu *iommu; struct protection_domain *domain; - u16 devid; phys_addr_t paddr; u64 dma_mask = dev->coherent_dma_mask; INC_STATS_COUNTER(cnt_alloc_coherent); - if (!check_device(dev)) + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) { + virt_addr = (void *)__get_free_pages(flag, get_order(size)); + *dma_addr = __pa(virt_addr); + return virt_addr; + } else if (IS_ERR(domain)) return NULL; - if (!get_device_resources(dev, &iommu, &domain, &devid)) - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + dma_mask = dev->coherent_dma_mask; + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + flag |= __GFP_ZERO; - flag |= __GFP_ZERO; virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return NULL; paddr = virt_to_phys(virt_addr); - if (!iommu || !domain) { - *dma_addr = (dma_addr_t)paddr; - return virt_addr; - } - - if (!dma_ops_domain(domain)) - goto out_free; - if (!dma_mask) dma_mask = *dev->dma_mask; spin_lock_irqsave(&domain->lock, flags); - *dma_addr = __map_single(dev, iommu, domain->priv, paddr, + *dma_addr = __map_single(dev, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); - if (*dma_addr == bad_dma_address) { + if (*dma_addr == DMA_ERROR_CODE) { spin_unlock_irqrestore(&domain->lock, flags); goto out_free; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); @@ -1982,28 +2097,19 @@ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; - u16 devid; INC_STATS_COUNTER(cnt_free_coherent); - if (!check_device(dev)) - return; - - get_device_resources(dev, &iommu, &domain, &devid); - - if (!iommu || !domain) - goto free_mem; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) goto free_mem; spin_lock_irqsave(&domain->lock, flags); - __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); + __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); @@ -2017,22 +2123,7 @@ free_mem: */ static int amd_iommu_dma_supported(struct device *dev, u64 mask) { - u16 bdf; - struct pci_dev *pcidev; - - /* No device or no PCI device */ - if (!dev || dev->bus != &pci_bus_type) - return 0; - - pcidev = to_pci_dev(dev); - - bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - - /* Out of our scope? */ - if (bdf > amd_iommu_last_bdf) - return 0; - - return 1; + return check_device(dev); } /* @@ -2046,25 +2137,30 @@ static void prealloc_protection_domains(void) { struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; - struct amd_iommu *iommu; u16 devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - devid = calc_devid(dev->bus->number, dev->devfn); - if (devid > amd_iommu_last_bdf) - continue; - devid = amd_iommu_alias_table[devid]; - if (domain_for_device(devid)) + + /* Do we handle this device? */ + if (!check_device(&dev->dev)) continue; - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) + + iommu_init_device(&dev->dev); + + /* Is there already any domain for it? */ + if (domain_for_device(&dev->dev)) continue; - dma_dom = dma_ops_domain_alloc(iommu); + + devid = get_device_id(&dev->dev); + + dma_dom = dma_ops_domain_alloc(); if (!dma_dom) continue; init_unity_mappings_for_device(dma_dom, devid); dma_dom->target_dev = devid; + attach_device(&dev->dev, &dma_dom->domain); + list_add_tail(&dma_dom->list, &iommu_pd_list); } } @@ -2093,7 +2189,7 @@ int __init amd_iommu_init_dma_ops(void) * protection domain will be assigned to the default one. */ for_each_iommu(iommu) { - iommu->default_dom = dma_ops_domain_alloc(iommu); + iommu->default_dom = dma_ops_domain_alloc(); if (iommu->default_dom == NULL) return -ENOMEM; iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; @@ -2103,15 +2199,12 @@ int __init amd_iommu_init_dma_ops(void) } /* - * If device isolation is enabled, pre-allocate the protection - * domains for each device. + * Pre-allocate the protection domains for each device. */ - if (amd_iommu_isolate) - prealloc_protection_domains(); + prealloc_protection_domains(); iommu_detected = 1; - force_iommu = 1; - bad_dma_address = 0; + swiotlb = 0; #ifdef CONFIG_GART_IOMMU gart_iommu_aperture_disabled = 1; gart_iommu_aperture = 0; @@ -2150,14 +2243,17 @@ free_domains: static void cleanup_domain(struct protection_domain *domain) { + struct iommu_dev_data *dev_data, *next; unsigned long flags; - u16 devid; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) - if (amd_iommu_pd_table[devid] == domain) - __detach_device(domain, devid); + list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { + struct device *dev = dev_data->dev; + + do_detach(dev); + atomic_set(&dev_data->bind, 0); + } write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } @@ -2167,6 +2263,8 @@ static void protection_domain_free(struct protection_domain *domain) if (!domain) return; + del_domain_from_list(domain); + if (domain->id) domain_id_free(domain->id); @@ -2185,6 +2283,9 @@ static struct protection_domain *protection_domain_alloc(void) domain->id = domain_id_alloc(); if (!domain->id) goto out_err; + INIT_LIST_HEAD(&domain->dev_list); + + add_domain_to_list(domain); return domain; @@ -2241,26 +2342,23 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { - struct protection_domain *domain = dom->priv; + struct iommu_dev_data *dev_data = dev->archdata.iommu; struct amd_iommu *iommu; - struct pci_dev *pdev; u16 devid; - if (dev->bus != &pci_bus_type) + if (!check_device(dev)) return; - pdev = to_pci_dev(dev); - - devid = calc_devid(pdev->bus->number, pdev->devfn); + devid = get_device_id(dev); - if (devid > 0) - detach_device(domain, devid); + if (dev_data->domain != NULL) + detach_device(dev); iommu = amd_iommu_rlookup_table[devid]; if (!iommu) return; - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); iommu_completion_wait(iommu); } @@ -2268,35 +2366,30 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { struct protection_domain *domain = dom->priv; - struct protection_domain *old_domain; + struct iommu_dev_data *dev_data; struct amd_iommu *iommu; - struct pci_dev *pdev; + int ret; u16 devid; - if (dev->bus != &pci_bus_type) + if (!check_device(dev)) return -EINVAL; - pdev = to_pci_dev(dev); + dev_data = dev->archdata.iommu; - devid = calc_devid(pdev->bus->number, pdev->devfn); - - if (devid >= amd_iommu_last_bdf || - devid != amd_iommu_alias_table[devid]) - return -EINVAL; + devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; if (!iommu) return -EINVAL; - old_domain = domain_for_device(devid); - if (old_domain) - detach_device(old_domain, devid); + if (dev_data->domain) + detach_device(dev); - attach_device(iommu, domain, devid); + ret = attach_device(dev, domain); iommu_completion_wait(iommu); - return 0; + return ret; } static int amd_iommu_map_range(struct iommu_domain *dom, @@ -2342,7 +2435,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, iova += PAGE_SIZE; } - iommu_flush_domain(domain->id); + iommu_flush_tlb_pde(domain); } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, @@ -2393,8 +2486,9 @@ static struct iommu_ops amd_iommu_ops = { int __init amd_iommu_init_passthrough(void) { + struct amd_iommu *iommu; struct pci_dev *dev = NULL; - u16 devid, devid2; + u16 devid; /* allocate passthroug domain */ pt_domain = protection_domain_alloc(); @@ -2404,20 +2498,17 @@ int __init amd_iommu_init_passthrough(void) pt_domain->mode |= PAGE_MODE_NONE; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - struct amd_iommu *iommu; - devid = calc_devid(dev->bus->number, dev->devfn); - if (devid > amd_iommu_last_bdf) + if (!check_device(&dev->dev)) continue; - devid2 = amd_iommu_alias_table[devid]; + devid = get_device_id(&dev->dev); - iommu = amd_iommu_rlookup_table[devid2]; + iommu = amd_iommu_rlookup_table[devid]; if (!iommu) continue; - __attach_device(iommu, pt_domain, devid); - __attach_device(iommu, pt_domain, devid2); + attach_device(&dev->dev, pt_domain); } pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c20001e4f55..7ffc3996523 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -25,10 +25,12 @@ #include <linux/interrupt.h> #include <linux/msi.h> #include <asm/pci-direct.h> +#include <asm/amd_iommu_proto.h> #include <asm/amd_iommu_types.h> #include <asm/amd_iommu.h> #include <asm/iommu.h> #include <asm/gart.h> +#include <asm/x86_init.h> /* * definitions for the ACPI scanning code @@ -123,18 +125,24 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have to handle */ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ -#ifdef CONFIG_IOMMU_STRESS -bool amd_iommu_isolate = false; -#else -bool amd_iommu_isolate = true; /* if true, device isolation is - enabled */ -#endif - bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ +/* Array to assign indices to IOMMUs*/ +struct amd_iommu *amd_iommus[MAX_IOMMUS]; +int amd_iommus_present; + +/* IOMMUs have a non-present cache? */ +bool amd_iommu_np_cache __read_mostly; + +/* + * List of protection domains - used during resume + */ +LIST_HEAD(amd_iommu_pd_list); +spinlock_t amd_iommu_pd_lock; + /* * Pointer to the device table which is shared by all AMD IOMMUs * it is indexed by the PCI device id or the HT unit id and contains @@ -157,12 +165,6 @@ u16 *amd_iommu_alias_table; struct amd_iommu **amd_iommu_rlookup_table; /* - * The pd table (protection domain table) is used to find the protection domain - * data structure a device belongs to. Indexed with the PCI device id too. - */ -struct protection_domain **amd_iommu_pd_table; - -/* * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap * to know which ones are already in use. */ @@ -838,7 +840,18 @@ static void __init free_iommu_all(void) static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) { spin_lock_init(&iommu->lock); + + /* Add IOMMU to internal data structures */ list_add_tail(&iommu->list, &amd_iommu_list); + iommu->index = amd_iommus_present++; + + if (unlikely(iommu->index >= MAX_IOMMUS)) { + WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); + return -ENOSYS; + } + + /* Index is fine - add IOMMU to the array */ + amd_iommus[iommu->index] = iommu; /* * Copy data from ACPI table entry to the iommu struct @@ -868,6 +881,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); + if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) + amd_iommu_np_cache = true; + return pci_enable_device(iommu->dev); } @@ -925,7 +941,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) * ****************************************************************************/ -static int __init iommu_setup_msi(struct amd_iommu *iommu) +static int iommu_setup_msi(struct amd_iommu *iommu) { int r; @@ -1176,19 +1192,10 @@ static struct sys_device device_amd_iommu = { * functions. Finally it prints some information about AMD IOMMUs and * the driver state and enables the hardware. */ -int __init amd_iommu_init(void) +static int __init amd_iommu_init(void) { int i, ret = 0; - - if (no_iommu) { - printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); - return 0; - } - - if (!amd_iommu_detected) - return -ENODEV; - /* * First parse ACPI tables to find the largest Bus/Dev/Func * we need to handle. Upon this information the shared data @@ -1225,15 +1232,6 @@ int __init amd_iommu_init(void) if (amd_iommu_rlookup_table == NULL) goto free; - /* - * Protection Domain table - maps devices to protection domains - * This table has the same size as the rlookup_table - */ - amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - if (amd_iommu_pd_table == NULL) - goto free; - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, get_order(MAX_DOMAIN_ID/8)); @@ -1255,6 +1253,8 @@ int __init amd_iommu_init(void) */ amd_iommu_pd_alloc_bitmap[0] = 1; + spin_lock_init(&amd_iommu_pd_lock); + /* * now the data structures are allocated and basically initialized * start the real acpi table scan @@ -1286,17 +1286,12 @@ int __init amd_iommu_init(void) if (iommu_pass_through) goto out; - printk(KERN_INFO "AMD-Vi: device isolation "); - if (amd_iommu_isolate) - printk("enabled\n"); - else - printk("disabled\n"); - if (amd_iommu_unmap_flush) printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); else printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); + x86_platform.iommu_shutdown = disable_iommus; out: return ret; @@ -1304,9 +1299,6 @@ free: free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, get_order(MAX_DOMAIN_ID/8)); - free_pages((unsigned long)amd_iommu_pd_table, - get_order(rlookup_table_size)); - free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); @@ -1323,11 +1315,6 @@ free: goto out; } -void amd_iommu_shutdown(void) -{ - disable_iommus(); -} - /**************************************************************************** * * Early detect code. This code runs at IOMMU detection time in the DMA @@ -1342,16 +1329,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) void __init amd_iommu_detect(void) { - if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) + if (no_iommu || (iommu_detected && !gart_iommu_aperture)) return; if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { iommu_detected = 1; amd_iommu_detected = 1; -#ifdef CONFIG_GART_IOMMU - gart_iommu_aperture_disabled = 1; - gart_iommu_aperture = 0; -#endif + x86_init.iommu.iommu_init = amd_iommu_init; } } @@ -1372,10 +1356,6 @@ static int __init parse_amd_iommu_dump(char *str) static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { - if (strncmp(str, "isolate", 7) == 0) - amd_iommu_isolate = true; - if (strncmp(str, "share", 5) == 0) - amd_iommu_isolate = false; if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; } diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 128111d8ffe..e0dfb6856aa 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -28,6 +28,7 @@ #include <asm/pci-direct.h> #include <asm/dma.h> #include <asm/k8.h> +#include <asm/x86_init.h> int gart_iommu_aperture; int gart_iommu_aperture_disabled __initdata; @@ -400,6 +401,7 @@ void __init gart_iommu_hole_init(void) iommu_detected = 1; gart_iommu_aperture = 1; + x86_init.iommu.iommu_init = gart_iommu_init; aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; aper_size = (32 * 1024 * 1024) << aper_order; @@ -456,7 +458,7 @@ out: if (aper_alloc) { /* Got the aperture from the AGP bridge */ - } else if (swiotlb && !valid_agp) { + } else if (!valid_agp) { /* Do nothing */ } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || force_iommu || diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index da7b7b9f8bd..565c1bfc507 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,7 +2,7 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 894aa97f071..ad8c75b9e45 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -241,28 +241,13 @@ static int modern_apic(void) } /* - * bare function to substitute write operation - * and it's _that_ fast :) - */ -static void native_apic_write_dummy(u32 reg, u32 v) -{ - WARN_ON_ONCE((cpu_has_apic || !disable_apic)); -} - -static u32 native_apic_read_dummy(u32 reg) -{ - WARN_ON_ONCE((cpu_has_apic && !disable_apic)); - return 0; -} - -/* - * right after this call apic->write/read doesn't do anything - * note that there is no restore operation it works one way + * right after this call apic become NOOP driven + * so apic->write/read doesn't do anything */ void apic_disable(void) { - apic->read = native_apic_read_dummy; - apic->write = native_apic_write_dummy; + pr_info("APIC: switched to apic NOOP\n"); + apic = &apic_noop; } void native_apic_wait_icr_idle(void) @@ -459,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, v); - apic_write(APIC_TMICT, 0xffffffff); + apic_write(APIC_TMICT, 0); break; case CLOCK_EVT_MODE_RESUME: /* Nothing to do here */ @@ -1392,14 +1377,11 @@ void __init enable_IR_x2apic(void) unsigned long flags; struct IO_APIC_route_entry **ioapic_entries = NULL; int ret, x2apic_enabled = 0; - int dmar_table_init_ret = 0; + int dmar_table_init_ret; -#ifdef CONFIG_INTR_REMAP dmar_table_init_ret = dmar_table_init(); - if (dmar_table_init_ret) - pr_debug("dmar_table_init() failed with %d:\n", - dmar_table_init_ret); -#endif + if (dmar_table_init_ret && !x2apic_supported()) + return; ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c new file mode 100644 index 00000000000..d9acc3bee0f --- /dev/null +++ b/arch/x86/kernel/apic/apic_noop.c @@ -0,0 +1,200 @@ +/* + * NOOP APIC driver. + * + * Does almost nothing and should be substituted by a real apic driver via + * probe routine. + * + * Though in case if apic is disabled (for some reason) we try + * to not uglify the caller's code and allow to call (some) apic routines + * like self-ipi, etc... + */ + +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <asm/fixmap.h> +#include <asm/mpspec.h> +#include <asm/apicdef.h> +#include <asm/apic.h> +#include <asm/setup.h> + +#include <linux/smp.h> +#include <asm/ipi.h> + +#include <linux/interrupt.h> +#include <asm/acpi.h> +#include <asm/e820.h> + +static void noop_init_apic_ldr(void) { } +static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } +static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } +static void noop_send_IPI_allbutself(int vector) { } +static void noop_send_IPI_all(int vector) { } +static void noop_send_IPI_self(int vector) { } +static void noop_apic_wait_icr_idle(void) { } +static void noop_apic_icr_write(u32 low, u32 id) { } + +static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) +{ + return -1; +} + +static u32 noop_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static u64 noop_apic_icr_read(void) +{ + return 0; +} + +static int noop_cpu_to_logical_apicid(int cpu) +{ + return 0; +} + +static int noop_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return 0; +} + +static unsigned int noop_get_apic_id(unsigned long x) +{ + return 0; +} + +static int noop_probe(void) +{ + /* + * NOOP apic should not ever be + * enabled via probe routine + */ + return 0; +} + +static int noop_apic_id_registered(void) +{ + /* + * if we would be really "pedantic" + * we should pass read_apic_id() here + * but since NOOP suppose APIC ID = 0 + * lets save a few cycles + */ + return physid_isset(0, phys_cpu_present_map); +} + +static const struct cpumask *noop_target_cpus(void) +{ + /* only BSP here */ + return cpumask_of(0); +} + +static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) +{ + return physid_isset(apicid, *map); +} + +static unsigned long noop_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + if (cpu != 0) + pr_warning("APIC: Vector allocated for non-BSP cpu\n"); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); +} + +int noop_apicid_to_node(int logical_apicid) +{ + /* we're always on node 0 */ + return 0; +} + +static u32 noop_apic_read(u32 reg) +{ + WARN_ON_ONCE((cpu_has_apic && !disable_apic)); + return 0; +} + +static void noop_apic_write(u32 reg, u32 v) +{ + WARN_ON_ONCE((cpu_has_apic || !disable_apic)); +} + +struct apic apic_noop = { + .name = "noop", + .probe = noop_probe, + .acpi_madt_oem_check = NULL, + + .apic_id_registered = noop_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = noop_target_cpus, + .disable_esr = 0, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = noop_check_apicid_used, + .check_apicid_present = noop_check_apicid_present, + + .vector_allocation_domain = noop_vector_allocation_domain, + .init_apic_ldr = noop_init_apic_ldr, + + .ioapic_phys_id_map = default_ioapic_phys_id_map, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = noop_apicid_to_node, + + .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = physid_set_mask_of_physid, + + .setup_portio_remap = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, + .enable_apic_mode = NULL, + + .phys_pkg_id = noop_phys_pkg_id, + + .mps_oem_check = NULL, + + .get_apic_id = noop_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + + .send_IPI_mask = noop_send_IPI_mask, + .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, + .send_IPI_allbutself = noop_send_IPI_allbutself, + .send_IPI_all = noop_send_IPI_all, + .send_IPI_self = noop_send_IPI_self, + + .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, + + /* should be safe */ + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = NULL, + + .smp_callin_clear_local_apic = NULL, + .inquire_remote_apic = NULL, + + .read = noop_apic_read, + .write = noop_apic_write, + .icr_read = noop_apic_icr_read, + .icr_write = noop_apic_icr_write, + .wait_icr_idle = noop_apic_wait_icr_idle, + .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, +}; diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 77a06413b6b..38dcecfa581 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void) #endif } -static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } @@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - /* Mapping from cpu number to logical apicid */ static inline int bigsmp_cpu_to_logical_apicid(int cpu) { @@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu) return cpu_physical_id(cpu); } -static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) +static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xFFL); + physids_promote(0xFFL, retmap); } static int bigsmp_check_phys_apicid_present(int phys_apicid) @@ -230,7 +225,7 @@ struct apic apic_bigsmp = { .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, - .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 89174f847b4..e85f8fb7f8e 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -466,11 +466,11 @@ static const struct cpumask *es7000_target_cpus(void) return cpumask_of(smp_processor_id()); } -static unsigned long -es7000_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } + static unsigned long es7000_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); @@ -539,14 +539,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu) static int cpu_id; -static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) +static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) { - physid_mask_t mask; - - mask = physid_mask_of_physid(cpu_id); + physid_set_mask_of_physid(cpu_id, retmap); ++cpu_id; - - return mask; } /* Mapping from cpu number to logical apicid */ @@ -561,10 +557,10 @@ static int es7000_cpu_to_logical_apicid(int cpu) #endif } -static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) +static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); + physids_promote(0xFFL, retmap); } static int es7000_check_phys_apicid_present(int cpu_physical_apicid) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index dc69f28489f..c0b4468683f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -60,8 +60,6 @@ #include <asm/irq_remapping.h> #include <asm/hpet.h> #include <asm/hw_irq.h> -#include <asm/uv/uv_hub.h> -#include <asm/uv/uv_irq.h> #include <asm/apic.h> @@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) return pin; } -/* - * This is performance-critical, we want to do it O(1) - * - * Most irqs are mapped 1:1 with pins. - */ -struct irq_cfg { - struct irq_pin_list *irq_2_pin; - cpumask_var_t domain; - cpumask_var_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ static struct irq_cfg irq_cfgx[] = { @@ -209,7 +193,7 @@ int __init arch_early_irq_init(void) } #ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { struct irq_cfg *cfg = NULL; struct irq_desc *desc; @@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) /* end for move_irq_desc */ #else -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { return irq < nr_irqs ? irq_cfgx + irq : NULL; } @@ -555,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, add_pin_to_irq_node(cfg, node, newapic, newpin); } +static void __io_apic_modify_irq(struct irq_pin_list *entry, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) +{ + unsigned int reg, pin; + + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin * 2); + reg &= mask_and; + reg |= mask_or; + io_apic_modify(entry->apic, 0x10 + pin * 2, reg); + if (final) + final(entry); +} + static void io_apic_modify_irq(struct irq_cfg *cfg, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { - int pin; struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin * 2); - reg &= mask_and; - reg |= mask_or; - io_apic_modify(entry->apic, 0x10 + pin * 2, reg); - if (final) - final(entry); - } + for_each_irq_pin(entry, cfg->irq_2_pin) + __io_apic_modify_irq(entry, mask_and, mask_or, final); +} + +static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED, NULL); +} + +static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) @@ -595,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } -static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED, NULL); -} - -static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER, NULL); -} - static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { struct irq_cfg *cfg = desc->chip_data; @@ -1177,7 +1167,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int cpu, err; cpumask_var_t tmp_mask; - if ((cfg->move_in_progress) || cfg->move_cleanup_count) + if (cfg->move_in_progress) return -EBUSY; if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) @@ -1237,8 +1227,7 @@ next: return err; } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { int err; unsigned long flags; @@ -1599,9 +1588,6 @@ __apicdebuginit(void) print_IO_APIC(void) struct irq_desc *desc; unsigned int irq; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", @@ -1708,9 +1694,6 @@ __apicdebuginit(void) print_APIC_field(int base) { int i; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG); for (i = 0; i < 8; i++) @@ -1724,9 +1707,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy) unsigned int i, v, ver, maxlvt; u64 icr; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); @@ -1824,13 +1804,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n"); } -__apicdebuginit(void) print_all_local_APICs(void) +__apicdebuginit(void) print_local_APICs(int maxcpu) { int cpu; + if (!maxcpu) + return; + preempt_disable(); - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + if (cpu >= maxcpu) + break; smp_call_function_single(cpu, print_local_APIC, NULL, 1); + } preempt_enable(); } @@ -1839,7 +1825,7 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) + if (!nr_legacy_irqs) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); @@ -1866,21 +1852,41 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -__apicdebuginit(int) print_all_ICs(void) +static int __initdata show_lapic = 1; +static __init int setup_show_lapic(char *arg) { + int num = -1; + + if (strcmp(arg, "all") == 0) { + show_lapic = CONFIG_NR_CPUS; + } else { + get_option(&arg, &num); + if (num >= 0) + show_lapic = num; + } + + return 1; +} +__setup("show_lapic=", setup_show_lapic); + +__apicdebuginit(int) print_ICs(void) +{ + if (apic_verbosity == APIC_QUIET) + return 0; + print_PIC(); /* don't print out if apic is not there */ if (!cpu_has_apic && !apic_from_smp_config()) return 0; - print_all_local_APICs(); + print_local_APICs(show_lapic); print_IO_APIC(); return 0; } -fs_initcall(print_all_ICs); +fs_initcall(print_ICs); /* Where if anywhere is the i8259 connect in external int mode */ @@ -2031,7 +2037,7 @@ void __init setup_ioapic_ids_from_mpc(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -2058,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(phys_id_present_map, + if (apic->check_apicid_used(&phys_id_present_map, mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", apic_id, mp_ioapics[apic_id].apicid); @@ -2073,7 +2079,7 @@ void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); + apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", mp_ioapics[apic_id].apicid); @@ -2228,20 +2234,16 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) +void send_cleanup_vector(struct irq_cfg *cfg) { cpumask_var_t cleanup_mask; if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } @@ -2272,15 +2274,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - /* * Either sets desc->affinity to a valid value, and returns * ->cpu_mask_to_apicid of that, or returns BAD_APICID and * leaves desc->affinity untouched. */ -static unsigned int +unsigned int set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; @@ -2433,8 +2432,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) cfg = irq_cfg(irq); spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; @@ -2452,7 +2449,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) goto unlock; } __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; unlock: spin_unlock(&desc->lock); } @@ -2460,21 +2456,33 @@ unlock: irq_exit(); } -static void irq_complete_move(struct irq_desc **descp) +static void __irq_complete_move(struct irq_desc **descp, unsigned vector) { struct irq_desc *desc = *descp; struct irq_cfg *cfg = desc->chip_data; - unsigned vector, me; + unsigned me; if (likely(!cfg->move_in_progress)) return; - vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); } + +static void irq_complete_move(struct irq_desc **descp) +{ + __irq_complete_move(descp, ~get_irq_regs()->orig_ax); +} + +void irq_force_complete_move(int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + + __irq_complete_move(&desc, cfg->vector); +} #else static inline void irq_complete_move(struct irq_desc **descp) {} #endif @@ -2490,6 +2498,59 @@ static void ack_apic_edge(unsigned int irq) atomic_t irq_mis_count; +/* + * IO-APIC versions below 0x20 don't support EOI register. + * For the record, here is the information about various versions: + * 0Xh 82489DX + * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant + * 2Xh I/O(x)APIC which is PCI 2.2 Compliant + * 30h-FFh Reserved + * + * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic + * version as 0x2. This is an error with documentation and these ICH chips + * use io-apic's of version 0x20. + * + * For IO-APIC's with EOI register, we use that to do an explicit EOI. + * Otherwise, we simulate the EOI message manually by changing the trigger + * mode to edge and then back to level, with RTE being masked during this. +*/ +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry; + + for_each_irq_pin(entry, cfg->irq_2_pin) { + if (mp_ioapics[entry->apic].apicver >= 0x20) { + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + if (irq_remapped(irq)) + io_apic_eoi(entry->apic, entry->pin); + else + io_apic_eoi(entry->apic, cfg->vector); + } else { + __mask_and_edge_IO_APIC_irq(entry); + __unmask_and_level_IO_APIC_irq(entry); + } + } +} + +static void eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2525,6 +2586,19 @@ static void ack_apic_level(unsigned int irq) * level-triggered interrupt. We mask the source for the time of the * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro + * + * Also in the case when cpu goes offline, fixup_irqs() will forward + * any unhandled interrupt on the offlined cpu to the new cpu + * destination that is handling the corresponding interrupt. This + * interrupt forwarding is done via IPI's. Hence, in this case also + * level-triggered io-apic interrupt will be seen as an edge + * interrupt in the IRR. And we can't rely on the cpu's EOI + * to be broadcasted to the IO-APIC's which will clear the remoteIRR + * corresponding to the level-triggered interrupt. Hence on IO-APIC's + * supporting EOI register, we do an explicit EOI to clear the + * remote IRR and on IO-APIC's which don't have an EOI register, + * we use the above logic (mask+edge followed by unmask+level) from + * Manfred Spraul to clear the remote IRR. */ cfg = desc->chip_data; i = cfg->vector; @@ -2536,6 +2610,19 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); + /* + * Tail end of clearing remote IRR bit (either by delivering the EOI + * message via io-apic EOI register write or simulating it using + * mask+edge followed by unnask+level logic) manually when the + * level triggered interrupt is seen as the edge triggered interrupt + * at the cpu. + */ + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + + eoi_ioapic_irq(desc); + } + /* Now we can move and renable the irq */ if (unlikely(do_unmask_irq)) { /* Only migrate the irq if the ack has been received. @@ -2569,41 +2656,9 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq_desc(desc); } - - /* Tail end of version 0x11 I/O APIC bug workaround */ - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); - } } #ifdef CONFIG_INTR_REMAP -static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - - for_each_irq_pin(entry, cfg->irq_2_pin) - io_apic_eoi(entry->apic, entry->pin); -} - -static void -eoi_ioapic_irq(struct irq_desc *desc) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - static void ir_ack_apic_edge(unsigned int irq) { ack_APIC_irq(); @@ -3157,6 +3212,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) continue; desc_new = move_irq_desc(desc_new, node); + cfg_new = desc_new->chip_data; if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; @@ -3708,75 +3764,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -#ifdef CONFIG_X86_UV -/* - * Re-target the irq to the specified CPU and enable the specified MMR located - * on the specified blade to allow the sending of MSIs to the specified CPU. - */ -int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset) -{ - const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_cfg *cfg; - int mmr_pnode; - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - unsigned long flags; - int err; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - cfg = irq_cfg(irq); - - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - spin_lock_irqsave(&vector_lock, flags); - set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - spin_unlock_irqrestore(&vector_lock, flags); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; -} - -/* - * Disable the specified MMR located on the specified blade so that MSIs are - * longer allowed to be sent. - */ -void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) -{ - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->mask = 1; - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); -} -#endif /* CONFIG_X86_64 */ - int __init io_apic_get_redir_entries (int ioapic) { union IO_APIC_reg_01 reg_01; @@ -3944,7 +3931,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) */ if (physids_empty(apic_id_map)) - apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); @@ -3960,10 +3947,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(apic_id_map, apic_id)) { + if (apic->check_apicid_used(&apic_id_map, apic_id)) { for (i = 0; i < get_physical_broadcast(); i++) { - if (!apic->check_apicid_used(apic_id_map, i)) + if (!apic->check_apicid_used(&apic_id_map, i)) break; } @@ -3976,7 +3963,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - tmp = apic->apicid_to_cpu_present(apic_id); + apic->apicid_to_cpu_present(apic_id, &tmp); physids_or(apic_id_map, apic_id_map, tmp); if (reg_00.bits.ID != apic_id) { @@ -4106,7 +4093,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) for (i = 0; i < nr_ioapics; i++) { res[i].name = mem; res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); + snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; } @@ -4140,18 +4127,17 @@ void __init ioapic_init_mappings(void) #ifdef CONFIG_X86_32 fake_ioapic_page: #endif - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); + apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), + ioapic_phys); idx++; ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } } diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 7ff61d6a188..6389432a9db 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -39,7 +39,8 @@ int unknown_nmi_panic; int nmi_watchdog_enabled; -static cpumask_t backtrace_mask __read_mostly; +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled @@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) } /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (cpumask_test_cpu(cpu, &backtrace_mask)) { + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); @@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) show_regs(regs); dump_stack(); spin_unlock(&lock); - cpumask_clear_cpu(cpu, &backtrace_mask); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); rc = 1; } @@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void) { int i; - cpumask_copy(&backtrace_mask, cpu_online_mask); + cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); printk(KERN_INFO "sending NMI to all CPUs:\n"); apic->send_IPI_all(NMI_VECTOR); /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(&backtrace_mask)) + if (cpumask_empty(to_cpumask(backtrace_mask))) break; mdelay(1); } diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index efa00e2b850..07cdbdcd7a9 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -334,10 +334,9 @@ static inline const struct cpumask *numaq_target_cpus(void) return cpu_all_mask; } -static inline unsigned long -numaq_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static inline unsigned long numaq_check_apicid_present(int bit) @@ -371,10 +370,10 @@ static inline int numaq_multi_timer_check(int apic, int irq) return apic != 0 && irq == 0; } -static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) +static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); + return physids_promote(0xFUL, retmap); } static inline int numaq_cpu_to_logical_apicid(int cpu) @@ -402,12 +401,12 @@ static inline int numaq_apicid_to_node(int logical_apicid) return logical_apicid >> 4; } -static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) +static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) { int node = numaq_apicid_to_node(logical_apicid); int cpu = __ffs(logical_apicid & 0xf); - return physid_mask_of_physid(cpu + 4*node); + physid_set_mask_of_physid(cpu + 4*node, retmap); } /* Where the IO area was mapped on multiquad, always 0 otherwise */ diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 0c0182cc947..1a6559f6768 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -108,7 +108,7 @@ struct apic apic_default = { .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = default_apicid_to_cpu_present, + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 645ecc4ff0b..9b419263d90 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void) return cpumask_of(0); } -static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } @@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0x0F); + physids_promote(0x0FL, retmap); } -static physid_mask_t summit_apicid_to_cpu_present(int apicid) +static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) { - return physid_mask_of_physid(0); + physid_set_mask_of_physid(0, retmap); } static int summit_check_phys_apicid_present(int physical_apicid) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 326c25477d3..130c4b93487 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -409,6 +409,12 @@ static __init void map_mmioh_high(int max_pnode) map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); } +static __init void map_low_mmrs(void) +{ + init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); + init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); +} + static __init void uv_rtc_init(void) { long status; @@ -550,6 +556,8 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask; + map_low_mmrs(); + m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 151ace69a5a..b5b6b23bce5 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -204,7 +204,6 @@ #include <linux/module.h> #include <linux/poll.h> -#include <linux/smp_lock.h> #include <linux/types.h> #include <linux/stddef.h> #include <linux/timer.h> @@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user *user_list; static DEFINE_SPINLOCK(user_list_lock); +static DEFINE_MUTEX(apm_mutex); /* * Set up a segment that references the real mode segment 0x40 @@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) return -EPERM; switch (cmd) { case APM_IOC_STANDBY: - lock_kernel(); + mutex_lock(&apm_mutex); if (as->standbys_read > 0) { as->standbys_read--; as->standbys_pending--; @@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) queue_event(APM_USER_STANDBY, as); if (standbys_pending <= 0) standby(); - unlock_kernel(); + mutex_unlock(&apm_mutex); break; case APM_IOC_SUSPEND: - lock_kernel(); + mutex_lock(&apm_mutex); if (as->suspends_read > 0) { as->suspends_read--; as->suspends_pending--; @@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) queue_event(APM_USER_SUSPEND, as); if (suspends_pending <= 0) { ret = suspend(1); + mutex_unlock(&apm_mutex); } else { as->suspend_wait = 1; + mutex_unlock(&apm_mutex); wait_event_interruptible(apm_suspend_waitqueue, as->suspend_wait == 0); ret = as->suspend_result; } - unlock_kernel(); return ret; default: return -ENOTTY; @@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp) { struct apm_user *as; - lock_kernel(); as = kmalloc(sizeof(*as), GFP_KERNEL); if (as == NULL) { printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", sizeof(*as)); - unlock_kernel(); return -ENOMEM; } as->magic = APM_BIOS_MAGIC; @@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp) user_list = as; spin_unlock(&user_list_lock); filp->private_data = as; - unlock_kernel(); return 0; } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e957a9..1d2cb383410 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -5,6 +5,7 @@ # Don't trace early stages of a secondary CPU boot ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_common.o = -pg +CFLAGS_REMOVE_perf_event.o = -pg endif # Make sure load_percpu_segment has no stackprotector diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c910a716a71..7128b3799ce 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Multi core CPU? */ if (c->extended_cpuid_level >= 0x80000008) { diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c95e831bb09..e58d978e075 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); } enum { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a56..a4ec8b64754 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void) static void __cpuinit default_init(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 - display_cacheinfo(c); + cpu_detect_cache_sizes(c); #else /* Not much we can do here... */ /* Check if at least it has cpuid */ @@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) } } -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); #ifdef CONFIG_X86_64 /* On K8 L1 TLB is inclusive, so don't count it */ @@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) #endif c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -659,24 +654,31 @@ void __init early_cpu_init(void) const struct cpu_dev *const *cdev; int count = 0; +#ifdef PROCESSOR_SELECT printk(KERN_INFO "KERNEL supported cpus:\n"); +#endif + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { const struct cpu_dev *cpudev = *cdev; - unsigned int j; if (count >= X86_VENDOR_NUM) break; cpu_devs[count] = cpudev; count++; - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(KERN_INFO " %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); +#ifdef PROCESSOR_SELECT + { + unsigned int j; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(KERN_INFO " %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } } +#endif } - early_identify_cpu(&boot_cpu_data); } @@ -837,10 +839,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } -#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ - mcheck_init(c); -#endif + mcheck_cpu_init(c); select_idle_routine(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 6de9a908e40..3624e8a0f71 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -32,6 +32,6 @@ struct cpu_dev { extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; -extern void display_cacheinfo(struct cpuinfo_x86 *c); +extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 19807b89f05..4fbd384fb64 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) /* Handle the GX (Formally known as the GX2) */ if (c->x86 == 5 && c->x86_model == 5) - display_cacheinfo(c); + cpu_detect_cache_sizes(c); else init_cyrix(c); } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 804c40e2bc3..0df4c2b7107 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -488,22 +488,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) #endif } - if (trace) - printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if (l1i) - printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); - - if (l1d) - printk(KERN_CONT ", L1 D cache: %dK\n", l1d); - else - printk(KERN_CONT "\n"); - - if (l2) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - - if (l3) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 721a77ca811..0bcaa387586 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,9 @@ #include "mce-internal.h" +#define CREATE_TRACE_POINTS +#include <trace/events/mce.h> + int mce_disabled __read_mostly; #define MISC_MCELOG_MINOR 227 @@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; -static void default_decode_mce(struct mce *m) +/* + * CPU/chipset specific EDAC code can register a notifier call here to print + * MCE errors in a human-readable form. + */ +ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); + +static int default_decode_mce(struct notifier_block *nb, unsigned long val, + void *data) { pr_emerg("No human readable MCE decoding support on this CPU type.\n"); pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); + + return NOTIFY_STOP; } -/* - * CPU/chipset specific EDAC code can register a callback here to print - * MCE errors in a human-readable form: - */ -void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; -EXPORT_SYMBOL(x86_mce_decode_callback); +static struct notifier_block mce_dec_nb = { + .notifier_call = default_decode_mce, + .priority = -1, +}; /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -141,6 +152,9 @@ void mce_log(struct mce *mce) { unsigned next, entry; + /* Emit the trace record: */ + trace_mce_record(mce); + mce->finished = 0; wmb(); for (;;) { @@ -204,9 +218,9 @@ static void print_mce(struct mce *m) /* * Print out human-readable details about the MCE error, - * (if the CPU has an implementation for that): + * (if the CPU has an implementation for that) */ - x86_mce_decode_callback(m); + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); } static void print_mce_head(void) @@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); -static void mcheck_timer(unsigned long data) +static void mce_start_timer(unsigned long data) { struct timer_list *t = &per_cpu(mce_timer, data); int *n; @@ -1187,7 +1201,7 @@ int mce_notify_irq(void) } EXPORT_SYMBOL_GPL(mce_notify_irq); -static int mce_banks_init(void) +static int __cpuinit __mcheck_cpu_mce_banks_init(void) { int i; @@ -1206,7 +1220,7 @@ static int mce_banks_init(void) /* * Initialize Machine Checks for a CPU. */ -static int __cpuinit mce_cap_init(void) +static int __cpuinit __mcheck_cpu_cap_init(void) { unsigned b; u64 cap; @@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void) WARN_ON(banks != 0 && b != banks); banks = b; if (!mce_banks) { - int err = mce_banks_init(); + int err = __mcheck_cpu_mce_banks_init(); if (err) return err; @@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void) return 0; } -static void mce_init(void) +static void __mcheck_cpu_init_generic(void) { mce_banks_t all_banks; u64 cap; @@ -1273,7 +1287,7 @@ static void mce_init(void) } /* Add per CPU specific workarounds here */ -static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) +static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { if (c->x86_vendor == X86_VENDOR_UNKNOWN) { pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); @@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) return 0; } -static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) +static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) return; @@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) } } -static void mce_cpu_features(struct cpuinfo_x86 *c) +static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: @@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) } } -static void mce_init_timer(void) +static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); int *n = &__get_cpu_var(mce_next_interval); @@ -1380,7 +1394,7 @@ static void mce_init_timer(void) *n = check_interval * HZ; if (!*n) return; - setup_timer(t, mcheck_timer, smp_processor_id()); + setup_timer(t, mce_start_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); add_timer_on(t, smp_processor_id()); } @@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = * Called for each booted CPU to set up machine checks. * Must be called with preempt off: */ -void __cpuinit mcheck_init(struct cpuinfo_x86 *c) +void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) { if (mce_disabled) return; - mce_ancient_init(c); + __mcheck_cpu_ancient_init(c); if (!mce_available(c)) return; - if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { + if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { mce_disabled = 1; return; } machine_check_vector = do_machine_check; - mce_init(); - mce_cpu_features(c); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(c); + __mcheck_cpu_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); + } /* @@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str) } __setup("mce", mcheck_enable); +int __init mcheck_init(void) +{ + atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); + + mcheck_intel_therm_init(); + + return 0; +} + /* * Sysfs support */ @@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable); * Disable machine checks on suspend and shutdown. We can't really handle * them later. */ -static int mce_disable(void) +static int mce_disable_error_reporting(void) { int i; @@ -1663,12 +1687,12 @@ static int mce_disable(void) static int mce_suspend(struct sys_device *dev, pm_message_t state) { - return mce_disable(); + return mce_disable_error_reporting(); } static int mce_shutdown(struct sys_device *dev) { - return mce_disable(); + return mce_disable_error_reporting(); } /* @@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev) */ static int mce_resume(struct sys_device *dev) { - mce_init(); - mce_cpu_features(¤t_cpu_data); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(¤t_cpu_data); return 0; } @@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data) del_timer_sync(&__get_cpu_var(mce_timer)); if (!mce_available(¤t_cpu_data)) return; - mce_init(); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_timer(); } /* Reinit MCEs after user configuration changes */ @@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all) cmci_reenable(); cmci_recheck(); if (all) - mce_init_timer(); + __mcheck_cpu_init_timer(); } static struct sysdev_class mce_sysclass = { @@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) } /* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) +static void __cpuinit mce_disable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; if (!mce_available(¤t_cpu_data)) return; + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < banks; i++) { @@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h) } } -static void mce_reenable_cpu(void *h) +static void __cpuinit mce_reenable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; @@ -2025,7 +2050,7 @@ static __init void mce_init_banks(void) } } -static __init int mce_init_device(void) +static __init int mcheck_init_device(void) { int err; int i = 0; @@ -2053,7 +2078,7 @@ static __init int mce_init_device(void) return err; } -device_initcall(mce_init_device); +device_initcall(mcheck_init_device); /* * Old style boot options parsing. Only for compatibility. @@ -2101,7 +2126,7 @@ static int fake_panic_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, "%llu\n"); -static int __init mce_debugfs_init(void) +static int __init mcheck_debugfs_init(void) { struct dentry *dmce, *ffake_panic; @@ -2115,5 +2140,5 @@ static int __init mce_debugfs_init(void) return 0; } -late_initcall(mce_debugfs_init); +late_initcall(mcheck_debugfs_init); #endif diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba7533..4fef985fc22 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); static atomic_t therm_throt_en = ATOMIC_INIT(0); +static u32 lvtthmr_init __read_mostly; + #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) @@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } +void __init mcheck_intel_therm_init(void) +{ + /* + * This function is only called on boot CPU. Save the init thermal + * LVT value on BSP and use that value to restore APs' thermal LVT + * entry BIOS programmed later + */ + if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && + cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) + lvtthmr_init = apic_read(APIC_LVTTHMR); +} + void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) * since it might be delivered via SMI already: */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); + + /* + * The initial value of thermal LVT entries on all APs always reads + * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI + * sequence to them and LVT registers are reset to 0s except for + * the mask bits which are set to 1s when APs receive INIT IPI. + * Always restore the value that BIOS has programmed on AP based on + * BSP's info we saved since BIOS is always setting the same value + * for all threads/cores + */ + apic_write(APIC_LVTTHMR, lvtthmr_init); + + h = lvtthmr_init; + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c31184..c1bbed1021d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -77,6 +77,18 @@ struct cpu_hw_events { struct debug_store *ds; }; +struct event_constraint { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int code; +}; + +#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } +#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } + +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->idxmsk[0]; (e)++) + + /* * struct x86_pmu - generic x86 pmu */ @@ -102,6 +114,8 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); + int (*get_event_idx)(struct cpu_hw_events *cpuc, + struct hw_perf_event *hwc); }; static struct x86_pmu x86_pmu __read_mostly; @@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; +static const struct event_constraint *event_constraints; + /* * Not sure about some of these */ @@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event) return hw_event & P6_EVNTSEL_MASK; } +static const struct event_constraint intel_p6_event_constraints[] = +{ + EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT_END +}; /* * Intel PerfMon v3. Used on Core2 and later. @@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; +static const struct event_constraint intel_core_event_constraints[] = +{ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ + EVENT_CONSTRAINT_END +}; + +static const struct event_constraint intel_nehalem_event_constraints[] = +{ + EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ + EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + static u64 intel_pmu_event_map(int hw_event) { return intel_perfmon_event_map[hw_event]; @@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; -static const u64 nehalem_hw_cache_event_ids +static __initconst u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids }, }; -static const u64 core2_hw_cache_event_ids +static __initconst u64 core2_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids }, }; -static const u64 atom_hw_cache_event_ids +static __initconst u64 atom_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL #define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ (CORE_EVNTSEL_EVENT_MASK | \ @@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) return hw_event & CORE_EVNTSEL_MASK; } -static const u64 amd_hw_cache_event_ids +static __initconst u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event) */ hwc->config = ARCH_PERFMON_EVENTSEL_INT; + hwc->idx = -1; + /* * Count user and OS events unless requested not to. */ @@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) x86_pmu_enable_event(hwc, idx); } -static int -fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) +static int fixed_mode_idx(struct hw_perf_event *hwc) { unsigned int hw_event; @@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) if (!x86_pmu.num_events_fixed) return -1; + /* + * fixed counters do not take all possible filters + */ + if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) + return -1; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) @@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) } /* - * Find a PMC slot for the freshly enabled / scheduled in event: + * generic counter allocator: get next free counter */ -static int x86_pmu_enable(struct perf_event *event) +static int +gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) +{ + int idx; + + idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); + return idx == x86_pmu.num_events ? -1 : idx; +} + +/* + * intel-specific counter allocator: check event constraints + */ +static int +intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) +{ + const struct event_constraint *event_constraint; + int i, code; + + if (!event_constraints) + goto skip; + + code = hwc->config & CORE_EVNTSEL_EVENT_MASK; + + for_each_event_constraint(event_constraint, event_constraints) { + if (code == event_constraint->code) { + for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { + if (!test_and_set_bit(i, cpuc->used_mask)) + return i; + } + return -1; + } + } +skip: + return gen_get_event_idx(cpuc, hwc); +} + +static int +x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; int idx; - idx = fixed_mode_idx(event, hwc); + idx = fixed_mode_idx(hwc); if (idx == X86_PMC_IDX_FIXED_BTS) { /* BTS is already occupied. */ if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; hwc->config_base = 0; - hwc->event_base = 0; + hwc->event_base = 0; hwc->idx = idx; } else if (idx >= 0) { /* @@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event) } else { idx = hwc->idx; /* Try to get the previous generic event again */ - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { try_generic: - idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_events); - if (idx == x86_pmu.num_events) + idx = x86_pmu.get_event_idx(cpuc, hwc); + if (idx == -1) return -EAGAIN; set_bit(idx, cpuc->used_mask); hwc->idx = idx; } - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; } + return idx; +} + +/* + * Find a PMC slot for the freshly enabled / scheduled in event: + */ +static int x86_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = x86_schedule_event(cpuc, hwc); + if (idx < 0) + return idx; + perf_events_lapic_init(); x86_pmu.disable(hwc, idx); @@ -1852,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { .priority = 1 }; -static struct x86_pmu p6_pmu = { +static __initconst struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = p6_pmu_handle_irq, .disable_all = p6_pmu_disable_all, @@ -1877,9 +1989,10 @@ static struct x86_pmu p6_pmu = { */ .event_bits = 32, .event_mask = (1ULL << 32) - 1, + .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu intel_pmu = { +static __initconst struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, .disable_all = intel_pmu_disable_all, @@ -1900,9 +2013,10 @@ static struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, .disable_bts = intel_pmu_disable_bts, + .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu amd_pmu = { +static __initconst struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = amd_pmu_handle_irq, .disable_all = amd_pmu_disable_all, @@ -1920,9 +2034,10 @@ static struct x86_pmu amd_pmu = { .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, + .get_event_idx = gen_get_event_idx, }; -static int p6_pmu_init(void) +static __init int p6_pmu_init(void) { switch (boot_cpu_data.x86_model) { case 1: @@ -1932,10 +2047,12 @@ static int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ + event_constraints = intel_p6_event_constraints; break; case 9: case 13: /* Pentium M */ + event_constraints = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -1954,7 +2071,7 @@ static int p6_pmu_init(void) return 0; } -static int intel_pmu_init(void) +static __init int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; @@ -2007,12 +2124,14 @@ static int intel_pmu_init(void) sizeof(hw_cache_event_ids)); pr_cont("Core2 events, "); + event_constraints = intel_core_event_constraints; break; default: case 26: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: @@ -2025,7 +2144,7 @@ static int intel_pmu_init(void) return 0; } -static int amd_pmu_init(void) +static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) @@ -2105,11 +2224,47 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; +static int +validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct hw_perf_event fake_event = event->hw; + + if (event->pmu && event->pmu != &pmu) + return 0; + + return x86_schedule_event(cpuc, &fake_event) >= 0; +} + +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct cpu_hw_events fake_pmu; + + memset(&fake_pmu, 0, sizeof(fake_pmu)); + + if (!validate_event(&fake_pmu, leader)) + return -ENOSPC; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(&fake_pmu, sibling)) + return -ENOSPC; + } + + if (!validate_event(&fake_pmu, event)) + return -ENOSPC; + + return 0; +} + const struct pmu *hw_perf_event_init(struct perf_event *event) { int err; err = __hw_perf_event_init(event); + if (!err) { + if (event->group_leader != event) + err = validate_group(event); + } if (err) { if (event->destroy) event->destroy(event); diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fab786f60ed..898df9719af 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && - boot_cpu_data.x86 != 16) + boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) return; wd_ops = &k7_wd_ops; break; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index bb62b3e5caa..28000743bbb 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) early_init_transmeta(c); - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Print CMS and CPU revision */ max = cpuid_eax(0x80860000); diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 6a52d4b36a3..7ef24a79699 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -116,21 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file) { unsigned int cpu; struct cpuinfo_x86 *c; - int ret = 0; - - lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - ret = -ENXIO; /* No such CPU */ - goto out; - } + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + return -ENXIO; /* No such CPU */ + c = &cpu_data(cpu); if (c->cpuid_level < 0) - ret = -EIO; /* CPUID not supported */ -out: - unlock_kernel(); - return ret; + return -EIO; /* CPUID not supported */ + + return 0; } /* diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 5e409dc298a..a4849c10a77 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -27,8 +27,7 @@ #include <asm/cpu.h> #include <asm/reboot.h> #include <asm/virtext.h> -#include <asm/iommu.h> - +#include <asm/x86_init.h> #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) @@ -106,7 +105,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #endif #ifdef CONFIG_X86_64 - pci_iommu_shutdown(); + x86_platform.iommu_shutdown(); #endif crash_save_cpu(regs, safe_smp_processor_id()); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2d8a371d433..b8ce165dde5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -268,11 +268,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) show_registers(regs); #ifdef CONFIG_X86_32 - sp = (unsigned long) (®s->sp); - savesegment(ss, ss); - if (user_mode(regs)) { + if (user_mode_vm(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; + } else { + sp = kernel_stack_pointer(regs); + savesegment(ss, ss); } printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); print_symbol("%s", regs->ip); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f7dd2a7c3bf..e0ed4c7abb6 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -10,9 +10,9 @@ #include <linux/module.h> #include <linux/ptrace.h> #include <linux/kexec.h> +#include <linux/sysfs.h> #include <linux/bug.h> #include <linux/nmi.h> -#include <linux/sysfs.h> #include <asm/stacktrace.h> @@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!stack) { unsigned long dummy; + stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.sp; @@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, - data, NULL, &graph); + bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph); stack = (unsigned long *)context->previous_esp; if (!stack) @@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; @@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } - diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6be177..8e740934bd1 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -10,26 +10,28 @@ #include <linux/module.h> #include <linux/ptrace.h> #include <linux/kexec.h> +#include <linux/sysfs.h> #include <linux/bug.h> #include <linux/nmi.h> -#include <linux/sysfs.h> #include <asm/stacktrace.h> #include "dumpstack.h" +#define N_EXCEPTION_STACKS_END \ + (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) static char x86_stack_ids[][8] = { - [DEBUG_STACK - 1] = "#DB", - [NMI_STACK - 1] = "NMI", - [DOUBLEFAULT_STACK - 1] = "#DF", - [STACKFAULT_STACK - 1] = "#SS", - [MCE_STACK - 1] = "#MC", + [ DEBUG_STACK-1 ] = "#DB", + [ NMI_STACK-1 ] = "NMI", + [ DOUBLEFAULT_STACK-1 ] = "#DF", + [ STACKFAULT_STACK-1 ] = "#SS", + [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... - N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" + [ N_EXCEPTION_STACKS ... + N_EXCEPTION_STACKS_END ] = "#DB[?]" #endif - }; +}; int x86_is_stack_id(int id, char *name) { @@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name) } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, - unsigned *usedp, char **idp) + unsigned *usedp, char **idp) { unsigned k; @@ -202,21 +204,24 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { + unsigned long *irq_stack_end; + unsigned long *irq_stack; unsigned long *stack; + int cpu; int i; - const int cpu = smp_processor_id(); - unsigned long *irq_stack_end = - (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); - unsigned long *irq_stack = - (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); + + preempt_disable(); + cpu = smp_processor_id(); + + irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); + irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); /* - * debugging aid: "show_stack(NULL, NULL);" prints the - * back trace for this cpu. + * Debugging aid: "show_stack(NULL, NULL);" prints the + * back trace for this cpu: */ - if (sp == NULL) { if (task) sp = (unsigned long *)task->thread.sp; @@ -240,6 +245,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + preempt_enable(); + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -303,4 +310,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } - diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d607c..50b9c220e12 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -334,6 +334,10 @@ ENTRY(ret_from_fork) END(ret_from_fork) /* + * Interrupt exit functions should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" +/* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to * go as quickly as possible which is why some of this is @@ -383,6 +387,10 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -513,6 +521,10 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) +/* + * syscall stub including irq exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -705,6 +717,10 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* * System calls that need a pt_regs pointer. @@ -814,6 +830,10 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC +/* + * Irq entries should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) +/* + * End of kprobes section + */ + .popsection ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder @@ -1185,17 +1209,14 @@ END(ftrace_graph_caller) .globl return_to_handler return_to_handler: - pushl $0 pushl %eax - pushl %ecx pushl %edx movl %ebp, %eax call ftrace_return_to_handler - movl %eax, 0xc(%esp) + movl %eax, %ecx popl %edx - popl %ecx popl %eax - ret + jmp *%ecx #endif .section .rodata,"a" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f8f35..4deb8fc849d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -155,11 +155,11 @@ GLOBAL(return_to_handler) call ftrace_return_to_handler - movq %rax, 16(%rsp) + movq %rax, %rdi movq 8(%rsp), %rdx movq (%rsp), %rax - addq $16, %rsp - retq + addq $24, %rsp + jmp *%rdi #endif @@ -803,6 +803,10 @@ END(interrupt) call \func .endm +/* + * Interrupt entry/exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -941,6 +945,10 @@ ENTRY(retint_kernel) CFI_ENDPROC END(common_interrupt) +/* + * End of kprobes section + */ + .popsection /* * APIC interrupts. @@ -1491,12 +1499,17 @@ error_kernelspace: leaq irq_return(%rip),%rcx cmpq %rcx,RIP+8(%rsp) je error_swapgs - movl %ecx,%ecx /* zero extend */ - cmpq %rcx,RIP+8(%rsp) - je error_swapgs + movl %ecx,%eax /* zero extend */ + cmpq %rax,RIP+8(%rsp) + je bstep_iret cmpq $gs_change,RIP+8(%rsp) je error_swapgs jmp error_sti + +bstep_iret: + /* Fix truncated RIP */ + movq %rcx,RIP+8(%rsp) + jmp error_swapgs END(error_entry) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9dbb527e165..5a1b9758fd6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -9,6 +9,8 @@ * the dangers of modifying code on the run. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/spinlock.h> #include <linux/hardirq.h> #include <linux/uaccess.h> @@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data) switch (faulted) { case 0: - pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); + pr_info("converting mcount calls to 0f 1f 44 00 00\n"); memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); break; case 1: - pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); + pr_info("converting mcount calls to 66 66 66 66 90\n"); memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); break; case 2: - pr_info("ftrace: converting mcount calls to jmp . + 5\n"); + pr_info("converting mcount calls to jmp . + 5\n"); memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); break; } @@ -468,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned long *sys_call_table; -static struct syscall_metadata **syscalls_metadata; - -static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name, str)) - return start; - } - return NULL; -} - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) +unsigned long __init arch_syscall_addr(int nr) { - int i; - - if (!syscalls_metadata) - return -1; - - for (i = 0; i < NR_syscalls; i++) { - if (syscalls_metadata[i]) { - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - } - } - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) -{ - syscalls_metadata[num]->exit_id = id; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - int i; - struct syscall_metadata *meta; - unsigned long **psys_syscall_table = &sys_call_table; - - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * - NR_syscalls, GFP_KERNEL); - if (!syscalls_metadata) { - WARN_ON(1); - return -ENOMEM; - } - - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta(psys_syscall_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)(&sys_call_table)[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 780cd928fcd..22db86a3764 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -212,8 +212,8 @@ ENTRY(secondary_startup_64) */ lgdt early_gdt_descr(%rip) - /* set up data segments. actually 0 would do too */ - movl $__KERNEL_DS,%eax + /* set up data segments */ + xorl %eax,%eax movl %eax,%ds movl %eax,%ss movl %eax,%es diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 00000000000..d42f65ac492 --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c @@ -0,0 +1,555 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) 2009 IBM Corporation + * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> + * + * Authors: Alan Stern <stern@rowland.harvard.edu> + * K.Prasad <prasad@linux.vnet.ibm.com> + * Frederic Weisbecker <fweisbec@gmail.com> + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + */ + +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> +#include <linux/irqflags.h> +#include <linux/notifier.h> +#include <linux/kallsyms.h> +#include <linux/kprobes.h> +#include <linux/percpu.h> +#include <linux/kdebug.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/hw_breakpoint.h> +#include <asm/processor.h> +#include <asm/debugreg.h> + +/* Per cpu debug control register value */ +DEFINE_PER_CPU(unsigned long, cpu_dr7); +EXPORT_PER_CPU_SYMBOL(cpu_dr7); + +/* Per cpu debug address registers values */ +static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); + +/* + * Stores the breakpoints currently in use on each breakpoint address + * register for each cpus + */ +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); + + +static inline unsigned long +__encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + unsigned long bp_info; + + bp_info = (len | type) & 0xf; + bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); + bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); + + return bp_info; +} + +/* + * Encode the length, type, Exact, and Enable bits for a particular breakpoint + * as stored in debug register 7. + */ +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; +} + +/* + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. + */ +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) +{ + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); + + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; + + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; +} + +/* + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. Eventually we enable it in the debug control register. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (!*slot) { + *slot = bp; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return -EBUSY; + + set_debugreg(info->address, i); + __get_cpu_var(cpu_debugreg[i]) = info->address; + + dr7 = &__get_cpu_var(cpu_dr7); + *dr7 |= encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); + + return 0; +} + +/* + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (*slot == bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return; + + dr7 = &__get_cpu_var(cpu_dr7); + *dr7 &= ~__encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case X86_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case X86_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case X86_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; +#endif + } + return len_in_bytes; +} + +/* + * Check for virtual address in user space. + */ +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va <= TASK_SIZE - len); +} + +/* + * Check for virtual address in kernel space. + */ +static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Store a breakpoint's encoded address, length, and type. + */ +static int arch_store_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + /* + * For kernel-addresses, either the address or symbol name can be + * specified. + */ + if (info->name) + info->address = (unsigned long) + kallsyms_lookup_name(info->name); + if (info->address) + return 0; + + return -EINVAL; +} + +int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type) +{ + /* Len */ + switch (x86_len) { + case X86_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case X86_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case X86_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } + + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; + break; + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; + break; + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + break; + default: + return -EINVAL; + } + + return 0; +} + + +static int arch_build_bp_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + info->address = bp->attr.bp_addr; + + /* Len */ + switch (bp->attr.bp_len) { + case HW_BREAKPOINT_LEN_1: + info->len = X86_BREAKPOINT_LEN_1; + break; + case HW_BREAKPOINT_LEN_2: + info->len = X86_BREAKPOINT_LEN_2; + break; + case HW_BREAKPOINT_LEN_4: + info->len = X86_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + info->len = X86_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } + + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_W: + info->type = X86_BREAKPOINT_WRITE; + break; + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + info->type = X86_BREAKPOINT_RW; + break; + case HW_BREAKPOINT_X: + info->type = X86_BREAKPOINT_EXECUTE; + break; + default: + return -EINVAL; + } + + return 0; +} +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned int align; + int ret; + + + ret = arch_build_bp_info(bp); + if (ret) + return ret; + + ret = -EINVAL; + + if (info->type == X86_BREAKPOINT_EXECUTE) + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + if ((!arch_check_va_in_userspace(info->address, info->len)) && + info->len != X86_BREAKPOINT_EXECUTE) + return ret; + + switch (info->len) { + case X86_BREAKPOINT_LEN_1: + align = 0; + break; + case X86_BREAKPOINT_LEN_2: + align = 1; + break; + case X86_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + align = 7; + break; +#endif + default: + return ret; + } + + if (bp->callback) + ret = arch_store_info(bp); + + if (ret < 0) + return ret; + /* + * Check that the low-order bits of the address are appropriate + * for the alignment implied by len. + */ + if (info->address & align) + return -EINVAL; + + /* Check that the virtual address is in the proper range */ + if (tsk) { + if (!arch_check_va_in_userspace(info->address, info->len)) + return -EFAULT; + } else { + if (!arch_check_va_in_kernelspace(info->address, info->len)) + return -EFAULT; + } + + return 0; +} + +/* + * Dump the debug register contents to the user. + * We can't dump our per cpu values because it + * may contain cpu wide breakpoint, something that + * doesn't belong to the current task. + * + * TODO: include non-ptrace user breakpoints (perf) + */ +void aout_dump_debugregs(struct user *dump) +{ + int i; + int dr7 = 0; + struct perf_event *bp; + struct arch_hw_breakpoint *info; + struct thread_struct *thread = ¤t->thread; + + for (i = 0; i < HBP_NUM; i++) { + bp = thread->ptrace_bps[i]; + + if (bp && !bp->attr.disabled) { + dump->u_debugreg[i] = bp->attr.bp_addr; + info = counter_arch_bp(bp); + dr7 |= encode_dr7(i, info->len, info->type); + } else { + dump->u_debugreg[i] = 0; + } + } + + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + + dump->u_debugreg[7] = dr7; +} +EXPORT_SYMBOL_GPL(aout_dump_debugregs); + +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *t = &tsk->thread; + + for (i = 0; i < HBP_NUM; i++) { + unregister_hw_breakpoint(t->ptrace_bps[i]); + t->ptrace_bps[i] = NULL; + } +} + +void hw_breakpoint_restore(void) +{ + set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); + set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); + set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); + set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(__get_cpu_var(cpu_dr7), 7); +} +EXPORT_SYMBOL_GPL(hw_breakpoint_restore); + +/* + * Handle debug exception notifications. + * + * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. + * + * NOTIFY_DONE returned if one of the following conditions is true. + * i) When the causative address is from user-space and the exception + * is a valid one, i.e. not triggered as a result of lazy debug register + * switching + * ii) When there are more bits than trap<n> set in DR6 register (such + * as BD, BS or BT) indicating that more than one debug condition is + * met and requires some more action in do_debug(). + * + * NOTIFY_STOP returned for all other cases + * + */ +static int __kprobes hw_breakpoint_handler(struct die_args *args) +{ + int i, cpu, rc = NOTIFY_STOP; + struct perf_event *bp; + unsigned long dr7, dr6; + unsigned long *dr6_p; + + /* The DR6 value is pointed by args->err */ + dr6_p = (unsigned long *)ERR_PTR(args->err); + dr6 = *dr6_p; + + /* Do an early return if no trap bits are set in DR6 */ + if ((dr6 & DR_TRAP_BITS) == 0) + return NOTIFY_DONE; + + get_debugreg(dr7, 7); + /* Disable breakpoints during exception handling */ + set_debugreg(0UL, 7); + /* + * Assert that local interrupts are disabled + * Reset the DRn bits in the virtualized register value. + * The ptrace trigger routine will add in whatever is needed. + */ + current->thread.debugreg6 &= ~DR_TRAP_BITS; + cpu = get_cpu(); + + /* Handle all the breakpoints that were triggered */ + for (i = 0; i < HBP_NUM; ++i) { + if (likely(!(dr6 & (DR_TRAP0 << i)))) + continue; + + /* + * The counter may be concurrently released but that can only + * occur from a call_rcu() path. We can then safely fetch + * the breakpoint, use its callback, touch its counter + * while we are in an rcu_read_lock() path. + */ + rcu_read_lock(); + + bp = per_cpu(bp_per_reg[i], cpu); + if (bp) + rc = NOTIFY_DONE; + /* + * Reset the 'i'th TRAP bit in dr6 to denote completion of + * exception handling + */ + (*dr6_p) &= ~(DR_TRAP0 << i); + /* + * bp can be NULL due to lazy debug register switching + * or due to concurrent perf counter removing. + */ + if (!bp) { + rcu_read_unlock(); + break; + } + + (bp->callback)(bp, args->regs); + + rcu_read_unlock(); + } + if (dr6 & (~DR_TRAP_BITS)) + rc = NOTIFY_DONE; + + set_debugreg(dr7, 7); + put_cpu(); + + return rc; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( + struct notifier_block *unused, unsigned long val, void *data) +{ + if (val != DIE_DEBUG) + return NOTIFY_DONE; + + return hw_breakpoint_handler(data); +} + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ + /* TODO */ +} + +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) +{ + /* TODO */ +} diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 04bbd527856..fee6cc2b207 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR seq_printf(p, "%*s: ", prec, "TRM"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); -# endif #endif #ifdef CONFIG_X86_MCE seq_printf(p, "%*s: ", prec, "MCE"); @@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->irq_call_count; sum += irq_stats(cpu)->irq_tlb_count; #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR sum += irq_stats(cpu)->irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; -# endif #endif #ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); @@ -274,3 +274,93 @@ void smp_generic_interrupt(struct pt_regs *regs) } EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); + +#ifdef CONFIG_HOTPLUG_CPU +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) +{ + unsigned int irq, vector; + static int warned; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { + int break_affinity = 0; + int set_affinity = 1; + const struct cpumask *affinity; + + if (!desc) + continue; + if (irq == 2) + continue; + + /* interrupt's are disabled at this point */ + spin_lock(&desc->lock); + + affinity = desc->affinity; + if (!irq_has_action(irq) || + cpumask_equal(affinity, cpu_online_mask)) { + spin_unlock(&desc->lock); + continue; + } + + /* + * Complete the irq move. This cpu is going down and for + * non intr-remapping case, we can't wait till this interrupt + * arrives at this cpu before completing the irq move. + */ + irq_force_complete_move(irq); + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + break_affinity = 1; + affinity = cpu_all_mask; + } + + if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) + desc->chip->mask(irq); + + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, affinity); + else if (!(warned++)) + set_affinity = 0; + + if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) + desc->chip->unmask(irq); + + spin_unlock(&desc->lock); + + if (break_affinity && set_affinity) + printk("Broke affinity for irq %i\n", irq); + else if (!set_affinity) + printk("Cannot set affinity for irq %i\n", irq); + } + + /* + * We can remove mdelay() and then send spuriuous interrupts to + * new cpu targets for all the irqs that were handled previously by + * this cpu. While it works, I have seen spurious interrupt messages + * (nothing wrong but still...). + * + * So for now, retain mdelay(1) and check the IRR and then send those + * interrupts to new targets as this cpu is already offlined... + */ + mdelay(1); + + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irr; + + if (__get_cpu_var(vector_irq)[vector] < 0) + continue; + + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + if (irr & (1 << (vector % 32))) { + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + spin_lock(&desc->lock); + if (desc->chip->retrigger) + desc->chip->retrigger(irq); + spin_unlock(&desc->lock); + } + } +} +#endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 7d35d0fe232..10709f29d16 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } - -#ifdef CONFIG_HOTPLUG_CPU - -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - affinity = desc->affinity; - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - printk("Breaking affinity for irq %i\n", irq); - affinity = cpu_all_mask; - } - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); - else if (desc->action) - printk_once("Cannot set affinity for irq %i\n", irq); - } - -#if 0 - barrier(); - /* Ingo Molnar says: "after the IO-APIC masks have been redirected - [note the nop - the interrupt-enable boundary on x86 is two - instructions from sti] - to flush out pending hardirqs and - IPIs. After this point nothing is supposed to reach this CPU." */ - __asm__ __volatile__("sti; nop; cli"); - barrier(); -#else - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -#endif -} -#endif - diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 977d8b43a0d..acf8fbf8fbd 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -#ifdef CONFIG_HOTPLUG_CPU -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq; - static int warned; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - int break_affinity = 0; - int set_affinity = 1; - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - /* interrupt's are disabled at this point */ - spin_lock(&desc->lock); - - affinity = desc->affinity; - if (!irq_has_action(irq) || - cpumask_equal(affinity, cpu_online_mask)) { - spin_unlock(&desc->lock); - continue; - } - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - break_affinity = 1; - affinity = cpu_all_mask; - } - - if (desc->chip->mask) - desc->chip->mask(irq); - - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); - else if (!(warned++)) - set_affinity = 0; - - if (desc->chip->unmask) - desc->chip->unmask(irq); - - spin_unlock(&desc->lock); - - if (break_affinity && set_affinity) - printk("Broke affinity for irq %i\n", irq); - else if (!set_affinity) - printk("Cannot set affinity for irq %i\n", irq); - } - - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -} -#endif extern void call_softirq(void); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3..20a5b368946 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -43,6 +43,7 @@ #include <linux/smp.h> #include <linux/nmi.h> +#include <asm/debugreg.h> #include <asm/apicdef.h> #include <asm/system.h> @@ -88,7 +89,6 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; - gdb_regs[GDB_SP] = (int)®s->sp; #else gdb_regs[GDB_R8] = regs->r8; gdb_regs[GDB_R9] = regs->r9; @@ -101,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs32[GDB_PS] = regs->flags; gdb_regs32[GDB_CS] = regs->cs; gdb_regs32[GDB_SS] = regs->ss; - gdb_regs[GDB_SP] = regs->sp; #endif + gdb_regs[GDB_SP] = kernel_stack_pointer(regs); } /** @@ -434,6 +434,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) "resuming...\n"); kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c", "", regs); + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; return NOTIFY_STOP; } diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b00..1f3186ce213 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -48,31 +48,22 @@ #include <linux/preempt.h> #include <linux/module.h> #include <linux/kdebug.h> +#include <linux/kallsyms.h> #include <asm/cacheflush.h> #include <asm/desc.h> #include <asm/pgtable.h> #include <asm/uaccess.h> #include <asm/alternative.h> +#include <asm/insn.h> +#include <asm/debugreg.h> void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#ifdef CONFIG_X86_64 -#define stack_addr(regs) ((unsigned long *)regs->sp) -#else -/* - * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs - * don't save the ss and esp registers if the CPU is already in kernel - * mode when it traps. So for kprobes, regs->sp and regs->ss are not - * the [nonexistent] saved stack pointer and ss register, but rather - * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to - * point to the top of the pre-int3 stack. - */ -#define stack_addr(regs) ((unsigned long *)®s->sp) -#endif +#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -106,50 +97,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { /* ----------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; -static const u32 onebyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ - W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ - W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ - W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ - W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ - W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ - W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ - W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ - W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ - W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ - W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ - W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ - W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; -static const u32 twobyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ - W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ - W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ - W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ - W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ - W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ - W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ - W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ - W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ - W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; #undef W struct kretprobe_blackpoint kretprobe_blacklist[] = { @@ -244,6 +191,75 @@ retry: } } +/* Recover the probed instruction at addr for further analysis. */ +static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +{ + struct kprobe *kp; + kp = get_kprobe((void *)addr); + if (!kp) + return -EINVAL; + + /* + * Basically, kp->ainsn.insn has an original instruction. + * However, RIP-relative instruction can not do single-stepping + * at different place, fix_riprel() tweaks the displacement of + * that instruction. In that case, we can't recover the instruction + * from the kp->ainsn.insn. + * + * On the other hand, kp->opcode has a copy of the first byte of + * the probed instruction, which is overwritten by int3. And + * the instruction at kp->addr is not modified by kprobes except + * for the first byte, we can recover the original instruction + * from it and kp->opcode. + */ + memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + buf[0] = kp->opcode; + return 0; +} + +/* Dummy buffers for kallsyms_lookup */ +static char __dummy_buf[KSYM_NAME_LEN]; + +/* Check if paddr is at an instruction boundary */ +static int __kprobes can_probe(unsigned long paddr) +{ + int ret; + unsigned long addr, offset = 0; + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) + return 0; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr) { + kernel_insn_init(&insn, (void *)addr); + insn_get_opcode(&insn); + + /* + * Check if the instruction has been modified by another + * kprobe, in which case we replace the breakpoint by the + * original instruction in our buffer. + */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { + ret = recover_probed_instruction(buf, addr); + if (ret) + /* + * Another debugging subsystem might insert + * this breakpoint. In that case, we can't + * recover it. + */ + return 0; + kernel_insn_init(&insn, buf); + } + insn_get_length(&insn); + addr += insn.length; + } + + return (addr == paddr); +} + /* * Returns non-zero if opcode modifies the interrupt flag. */ @@ -277,68 +293,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) static void __kprobes fix_riprel(struct kprobe *p) { #ifdef CONFIG_X86_64 - u8 *insn = p->ainsn.insn; - s64 disp; - int need_modrm; - - /* Skip legacy instruction prefixes. */ - while (1) { - switch (*insn) { - case 0x66: - case 0x67: - case 0x2e: - case 0x3e: - case 0x26: - case 0x64: - case 0x65: - case 0x36: - case 0xf0: - case 0xf3: - case 0xf2: - ++insn; - continue; - } - break; - } + struct insn insn; + kernel_insn_init(&insn, p->ainsn.insn); - /* Skip REX instruction prefix. */ - if (is_REX_prefix(insn)) - ++insn; - - if (*insn == 0x0f) { - /* Two-byte opcode. */ - ++insn; - need_modrm = test_bit(*insn, - (unsigned long *)twobyte_has_modrm); - } else - /* One-byte opcode. */ - need_modrm = test_bit(*insn, - (unsigned long *)onebyte_has_modrm); - - if (need_modrm) { - u8 modrm = *++insn; - if ((modrm & 0xc7) == 0x05) { - /* %rip+disp32 addressing mode */ - /* Displacement follows ModRM byte. */ - ++insn; - /* - * The copied instruction uses the %rip-relative - * addressing mode. Adjust the displacement for the - * difference between the original location of this - * instruction and the location of the copy that will - * actually be run. The tricky bit here is making sure - * that the sign extension happens correctly in this - * calculation, since we need a signed 32-bit result to - * be sign-extended to 64 bits when it's added to the - * %rip value and yield the same 64-bit result that the - * sign-extension of the original signed 32-bit - * displacement would have given. - */ - disp = (u8 *) p->addr + *((s32 *) insn) - - (u8 *) p->ainsn.insn; - BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ - *(s32 *)insn = (s32) disp; - } + if (insn_rip_relative(&insn)) { + s64 newdisp; + u8 *disp; + insn_get_displacement(&insn); + /* + * The copied instruction uses the %rip-relative addressing + * mode. Adjust the displacement for the difference between + * the original location of this instruction and the location + * of the copy that will actually be run. The tricky bit here + * is making sure that the sign extension happens correctly in + * this calculation, since we need a signed 32-bit result to + * be sign-extended to 64 bits when it's added to the %rip + * value and yield the same 64-bit result that the sign- + * extension of the original signed 32-bit displacement would + * have given. + */ + newdisp = (u8 *) p->addr + (s64) insn.displacement.value - + (u8 *) p->ainsn.insn; + BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ + disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); + *(s32 *) disp = (s32) newdisp; } #endif } @@ -359,6 +337,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) int __kprobes arch_prepare_kprobe(struct kprobe *p) { + if (!can_probe((unsigned long)p->addr)) + return -EILSEQ; /* insn: must be on special executable page on x86. */ p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) @@ -472,17 +452,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: -#ifdef CONFIG_X86_64 - /* TODO: Provide re-entrancy from post_kprobes_handler() and - * avoid exception stack corruption while single-stepping on - * the instruction of the new probe. - */ - arch_disarm_kprobe(p); - regs->ip = (unsigned long)p->addr; - reset_current_kprobe(); - preempt_enable_no_resched(); - break; -#endif case KPROBE_HIT_ACTIVE: save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); @@ -491,18 +460,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_status = KPROBE_REENTER; break; case KPROBE_HIT_SS: - if (p == kprobe_running()) { - regs->flags &= ~X86_EFLAGS_TF; - regs->flags |= kcb->kprobe_saved_flags; - return 0; - } else { - /* A probe has been hit in the codepath leading up - * to, or just after, single-stepping of a probed - * instruction. This entire codepath should strictly - * reside in .kprobes.text section. Raise a warning - * to highlight this peculiar case. - */ - } + /* A probe has been hit in the codepath leading up to, or just + * after, single-stepping of a probed instruction. This entire + * codepath should strictly reside in .kprobes.text section. + * Raise a BUG or we'll continue in an endless reentering loop + * and eventually a stack overflow. + */ + printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", + p->addr); + dump_kprobe(p); + BUG(); default: /* impossible cases */ WARN_ON(1); @@ -967,8 +934,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) + if (post_kprobe_handler(args->regs)) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; ret = NOTIFY_STOP; + } break; case DIE_GPF: /* diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d0013..c843f8406da 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -25,6 +25,7 @@ #include <asm/desc.h> #include <asm/system.h> #include <asm/cacheflush.h> +#include <asm/debugreg.h> static void set_idt(void *newidt, __u16 limit) { @@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e9..4a8bb82248a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,6 +18,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/debugreg.h> static int init_one_level2_page(struct kimage *image, pgd_t *pgd, unsigned long addr) @@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8f1bf..2bcad3926ed 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -73,7 +73,6 @@ #include <linux/platform_device.h> #include <linux/miscdevice.h> #include <linux/capability.h> -#include <linux/smp_lock.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> @@ -201,7 +200,6 @@ static int do_microcode_update(const void __user *buf, size_t size) static int microcode_open(struct inode *unused1, struct file *unused2) { - cycle_kernel_lock(); return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 6a3cefc7dda..553449951b8 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -174,21 +174,17 @@ static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); struct cpuinfo_x86 *c = &cpu_data(cpu); - int ret = 0; - lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - ret = -ENXIO; /* No such CPU */ - goto out; - } + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + return -ENXIO; /* No such CPU */ + c = &cpu_data(cpu); if (!cpu_has(c, X86_FEATURE_MSR)) - ret = -EIO; /* MSR not supported */ -out: - unlock_kernel(); - return ret; + return -EIO; /* MSR not supported */ + + return 0; } /* diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 971a3bec47a..c563e4c8ff3 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -46,6 +46,7 @@ #include <asm/dma.h> #include <asm/rio.h> #include <asm/bios_ebda.h> +#include <asm/x86_init.h> #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; @@ -244,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev, if (panic_on_overflow) panic("Calgary: fix the allocator.\n"); else - return bad_dma_address; + return DMA_ERROR_CODE; } } @@ -260,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *vaddr, unsigned int npages, int direction) { unsigned long entry; - dma_addr_t ret = bad_dma_address; + dma_addr_t ret; entry = iommu_range_alloc(dev, tbl, npages); - if (unlikely(entry == bad_dma_address)) - goto error; + if (unlikely(entry == DMA_ERROR_CODE)) { + printk(KERN_WARNING "Calgary: failed to allocate %u pages in " + "iommu %p\n", npages, tbl); + return DMA_ERROR_CODE; + } /* set the return dma address */ ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); @@ -273,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, /* put the TCEs in the HW table */ tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, direction); - return ret; - -error: - printk(KERN_WARNING "Calgary: failed to allocate %u pages in " - "iommu %p\n", npages, tbl); - return bad_dma_address; } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, @@ -290,8 +288,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; /* were we called with bad_dma_address? */ - badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); - if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { + badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); + if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); return; @@ -318,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev) pdev = to_pci_dev(dev); + /* search up the device tree for an iommu */ pbus = pdev->bus; - - /* is the device behind a bridge? Look for the root bus */ - while (pbus->parent) + do { + tbl = pci_iommu(pbus); + if (tbl && tbl->it_busno == pbus->number) + break; + tbl = NULL; pbus = pbus->parent; - - tbl = pci_iommu(pbus); + } while (pbus); BUG_ON(tbl && (tbl->it_busno != pbus->number)); @@ -373,7 +373,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); entry = iommu_range_alloc(dev, tbl, npages); - if (entry == bad_dma_address) { + if (entry == DMA_ERROR_CODE) { /* makes sure unmap knows to stop */ s->dma_length = 0; goto error; @@ -391,7 +391,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, error: calgary_unmap_sg(dev, sg, nelems, dir, NULL); for_each_sg(sg, s, nelems, i) { - sg->dma_address = bad_dma_address; + sg->dma_address = DMA_ERROR_CODE; sg->dma_length = 0; } return 0; @@ -446,7 +446,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, /* set up tces to cover the allocated range */ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == bad_dma_address) + if (mapping == DMA_ERROR_CODE) goto free; *dma_handle = mapping; return ret; @@ -727,7 +727,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) struct iommu_table *tbl = pci_iommu(dev->bus); /* reserve EMERGENCY_PAGES from bad_dma_address and up */ - iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); + iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); /* avoid the BIOS/VGA first 640KB-1MB region */ /* for CalIOC2 - avoid the entire first MB */ @@ -1344,6 +1344,23 @@ static void __init get_tce_space_from_tar(void) return; } +static int __init calgary_iommu_init(void) +{ + int ret; + + /* ok, we're trying to use Calgary - let's roll */ + printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); + + ret = calgary_init(); + if (ret) { + printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " + "falling back to no_iommu\n", ret); + return ret; + } + + return 0; +} + void __init detect_calgary(void) { int bus; @@ -1357,7 +1374,7 @@ void __init detect_calgary(void) * if the user specified iommu=off or iommu=soft or we found * another HW IOMMU already, bail out. */ - if (swiotlb || no_iommu || iommu_detected) + if (no_iommu || iommu_detected) return; if (!use_calgary) @@ -1442,9 +1459,7 @@ void __init detect_calgary(void) printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", specified_table_size); - /* swiotlb for devices that aren't behind the Calgary. */ - if (max_pfn > MAX_DMA32_PFN) - swiotlb = 1; + x86_init.iommu.iommu_init = calgary_iommu_init; } return; @@ -1457,35 +1472,6 @@ cleanup: } } -int __init calgary_iommu_init(void) -{ - int ret; - - if (no_iommu || (swiotlb && !calgary_detected)) - return -ENODEV; - - if (!calgary_detected) - return -ENODEV; - - /* ok, we're trying to use Calgary - let's roll */ - printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); - - ret = calgary_init(); - if (ret) { - printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " - "falling back to no_iommu\n", ret); - return ret; - } - - force_iommu = 1; - bad_dma_address = 0x0; - /* dma_ops is set to swiotlb or nommu */ - if (!dma_ops) - dma_ops = &nommu_dma_ops; - - return 0; -} - static int __init calgary_parse_options(char *p) { unsigned int bridge; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index a6e804d16c3..afcc58b69c7 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -11,10 +11,11 @@ #include <asm/gart.h> #include <asm/calgary.h> #include <asm/amd_iommu.h> +#include <asm/x86_init.h> static int forbid_dac __read_mostly; -struct dma_map_ops *dma_ops; +struct dma_map_ops *dma_ops = &nommu_dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -42,9 +43,6 @@ int iommu_detected __read_mostly = 0; */ int iommu_pass_through __read_mostly; -dma_addr_t bad_dma_address __read_mostly = 0; -EXPORT_SYMBOL(bad_dma_address); - /* Dummy device used for NULL arguments (normally ISA). */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", @@ -126,20 +124,17 @@ void __init pci_iommu_alloc(void) /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #endif + if (pci_swiotlb_init()) + return; - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ gart_iommu_hole_init(); detect_calgary(); detect_intel_iommu(); + /* needs to be called after gart_iommu_hole_init */ amd_iommu_detect(); - - pci_swiotlb_init(); } void *dma_generic_alloc_coherent(struct device *dev, size_t size, @@ -214,7 +209,7 @@ static __init int iommu_setup(char *p) if (!strncmp(p, "allowdac", 8)) forbid_dac = 0; if (!strncmp(p, "nodac", 5)) - forbid_dac = -1; + forbid_dac = 1; if (!strncmp(p, "usedac", 6)) { forbid_dac = -1; return 1; @@ -289,25 +284,17 @@ static int __init pci_iommu_init(void) #ifdef CONFIG_PCI dma_debug_add_bus(&pci_bus_type); #endif + x86_init.iommu.iommu_init(); - calgary_iommu_init(); - - intel_iommu_init(); + if (swiotlb) { + printk(KERN_INFO "PCI-DMA: " + "Using software bounce buffering for IO (SWIOTLB)\n"); + swiotlb_print_info(); + } else + swiotlb_free(); - amd_iommu_init(); - - gart_iommu_init(); - - no_iommu_init(); return 0; } - -void pci_iommu_shutdown(void) -{ - gart_iommu_shutdown(); - - amd_iommu_shutdown(); -} /* Must execute after PCI subsystem */ rootfs_initcall(pci_iommu_init); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a7f1b64f86e..e6a0d402f17 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -39,6 +39,7 @@ #include <asm/swiotlb.h> #include <asm/dma.h> #include <asm/k8.h> +#include <asm/x86_init.h> static unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ @@ -46,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ +static dma_addr_t bad_dma_addr; + /* * If this is disabled the IOMMU will use an optimized flushing strategy * of only flushing when an mapping is reused. With it true the GART is @@ -92,7 +95,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, + boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&iommu_bitmap_lock, flags); @@ -216,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, if (panic_on_overflow) panic("dma_map_area overflow %lu bytes\n", size); iommu_full(dev, size, dir); - return bad_dma_address; + return bad_dma_addr; } for (i = 0; i < npages; i++) { @@ -294,7 +297,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, int i; #ifdef CONFIG_IOMMU_DEBUG - printk(KERN_DEBUG "dma_map_sg overflow\n"); + pr_debug("dma_map_sg overflow\n"); #endif for_each_sg(sg, s, nents, i) { @@ -302,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir, 0); - if (addr == bad_dma_address) { + if (addr == bad_dma_addr) { if (i > 0) gart_unmap_sg(dev, sg, i, dir, NULL); nents = 0; @@ -389,12 +392,14 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (!dev) dev = &x86_dma_fallback_dev; - out = 0; - start = 0; - start_sg = sgmap = sg; - seg_size = 0; - max_seg_size = dma_get_max_seg_size(dev); - ps = NULL; /* shut up gcc */ + out = 0; + start = 0; + start_sg = sg; + sgmap = sg; + seg_size = 0; + max_seg_size = dma_get_max_seg_size(dev); + ps = NULL; /* shut up gcc */ + for_each_sg(sg, s, nents, i) { dma_addr_t addr = sg_phys(s); @@ -417,11 +422,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, sgmap, pages, need) < 0) goto error; out++; - seg_size = 0; - sgmap = sg_next(sgmap); - pages = 0; - start = i; - start_sg = s; + + seg_size = 0; + sgmap = sg_next(sgmap); + pages = 0; + start = i; + start_sg = s; } } @@ -455,7 +461,7 @@ error: iommu_full(dev, pages << PAGE_SHIFT, dir); for_each_sg(sg, s, nents, i) - s->dma_address = bad_dma_address; + s->dma_address = bad_dma_addr; return 0; } @@ -479,7 +485,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, DMA_BIDIRECTIONAL, align_mask); flush_gart(); - if (paddr != bad_dma_address) { + if (paddr != bad_dma_addr) { *dma_addr = paddr; return page_address(page); } @@ -499,6 +505,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); } +static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return (dma_addr == bad_dma_addr); +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -515,7 +526,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; if (iommu_size < 64*1024*1024) { - printk(KERN_WARNING + pr_warning( "PCI-DMA: Warning: Small IOMMU %luMB." " Consider increasing the AGP aperture in BIOS\n", iommu_size >> 20); @@ -570,28 +581,32 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc) aperture_alloc = aper_alloc; } -static int gart_resume(struct sys_device *dev) +static void gart_fixup_northbridges(struct sys_device *dev) { - printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n"); + int i; - if (fix_up_north_bridges) { - int i; + if (!fix_up_north_bridges) + return; - printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n"); + pr_info("PCI-DMA: Restoring GART aperture settings\n"); - for (i = 0; i < num_k8_northbridges; i++) { - struct pci_dev *dev = k8_northbridges[i]; + for (i = 0; i < num_k8_northbridges; i++) { + struct pci_dev *dev = k8_northbridges[i]; - /* - * Don't enable translations just yet. That is the next - * step. Restore the pre-suspend aperture settings. - */ - pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, - aperture_order << 1); - pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, - aperture_alloc >> 25); - } + /* + * Don't enable translations just yet. That is the next + * step. Restore the pre-suspend aperture settings. + */ + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); + pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); } +} + +static int gart_resume(struct sys_device *dev) +{ + pr_info("PCI-DMA: Resuming GART IOMMU\n"); + + gart_fixup_northbridges(dev); enable_gart_translations(); @@ -604,15 +619,14 @@ static int gart_suspend(struct sys_device *dev, pm_message_t state) } static struct sysdev_class gart_sysdev_class = { - .name = "gart", - .suspend = gart_suspend, - .resume = gart_resume, + .name = "gart", + .suspend = gart_suspend, + .resume = gart_resume, }; static struct sys_device device_gart = { - .id = 0, - .cls = &gart_sysdev_class, + .cls = &gart_sysdev_class, }; /* @@ -627,7 +641,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) void *gatt; int i, error; - printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); + pr_info("PCI-DMA: Disabling AGP.\n"); + aper_size = aper_base = info->aper_size = 0; dev = NULL; for (i = 0; i < num_k8_northbridges; i++) { @@ -645,6 +660,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) } if (!aper_base) goto nommu; + info->aper_base = aper_base; info->aper_size = aper_size >> 20; @@ -667,14 +683,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) flush_gart(); - printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", + pr_info("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); return 0; nommu: /* Should not happen anymore */ - printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" + pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n" "falling back to iommu=soft.\n"); return -1; } @@ -686,14 +702,15 @@ static struct dma_map_ops gart_dma_ops = { .unmap_page = gart_unmap_page, .alloc_coherent = gart_alloc_coherent, .free_coherent = gart_free_coherent, + .mapping_error = gart_mapping_error, }; -void gart_iommu_shutdown(void) +static void gart_iommu_shutdown(void) { struct pci_dev *dev; int i; - if (no_agp && (dma_ops != &gart_dma_ops)) + if (no_agp) return; for (i = 0; i < num_k8_northbridges; i++) { @@ -708,7 +725,7 @@ void gart_iommu_shutdown(void) } } -void __init gart_iommu_init(void) +int __init gart_iommu_init(void) { struct agp_kern_info info; unsigned long iommu_start; @@ -718,7 +735,7 @@ void __init gart_iommu_init(void) long i; if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) - return; + return 0; #ifndef CONFIG_AGP_AMD64 no_agp = 1; @@ -730,35 +747,28 @@ void __init gart_iommu_init(void) (agp_copy_info(agp_bridge, &info) < 0); #endif - if (swiotlb) - return; - - /* Did we detect a different HW IOMMU? */ - if (iommu_detected && !gart_iommu_aperture) - return; - if (no_iommu || (!force_iommu && max_pfn <= MAX_DMA32_PFN) || !gart_iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { - printk(KERN_WARNING "More than 4GB of memory " - "but GART IOMMU not available.\n"); - printk(KERN_WARNING "falling back to iommu=soft.\n"); + pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); + pr_warning("falling back to iommu=soft.\n"); } - return; + return 0; } /* need to map that range */ - aper_size = info.aper_size << 20; - aper_base = info.aper_base; - end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + aper_size = info.aper_size << 20; + aper_base = info.aper_base; + end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + if (end_pfn > max_low_pfn_mapped) { start_pfn = (aper_base>>PAGE_SHIFT); init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); } - printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); + pr_info("PCI-DMA: using GART IOMMU.\n"); iommu_size = check_iommu_size(info.aper_base, aper_size); iommu_pages = iommu_size >> PAGE_SHIFT; @@ -773,8 +783,7 @@ void __init gart_iommu_init(void) ret = dma_debug_resize_entries(iommu_pages); if (ret) - printk(KERN_DEBUG - "PCI-DMA: Cannot trace all the entries\n"); + pr_debug("PCI-DMA: Cannot trace all the entries\n"); } #endif @@ -784,15 +793,14 @@ void __init gart_iommu_init(void) */ iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); - agp_memory_reserved = iommu_size; - printk(KERN_INFO - "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", + pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", iommu_size >> 20); - iommu_start = aper_size - iommu_size; - iommu_bus_base = info.aper_base + iommu_start; - bad_dma_address = iommu_bus_base; - iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); + agp_memory_reserved = iommu_size; + iommu_start = aper_size - iommu_size; + iommu_bus_base = info.aper_base + iommu_start; + bad_dma_addr = iommu_bus_base; + iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); /* * Unmap the IOMMU part of the GART. The alias of the page is @@ -814,7 +822,7 @@ void __init gart_iommu_init(void) * the pages as Not-Present: */ wbinvd(); - + /* * Now all caches are flushed and we can safely enable * GART hardware. Doing it early leaves the possibility @@ -838,6 +846,10 @@ void __init gart_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; + x86_platform.iommu_shutdown = gart_iommu_shutdown; + swiotlb = 0; + + return 0; } void __init gart_parse_options(char *p) @@ -856,7 +868,7 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush", 8)) + if (!strncmp(p, "fullflush", 9)) iommu_fullflush = 1; if (!strncmp(p, "nofullflush", 11)) iommu_fullflush = 0; diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a3933d4330c..22be12b60a8 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, dma_addr_t bus = page_to_phys(page) + offset; WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) - return bad_dma_address; + return DMA_ERROR_CODE; flush_write_buffers(); return bus; } @@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = { .sync_sg_for_device = nommu_sync_sg_for_device, .is_phys = 1, }; - -void __init no_iommu_init(void) -{ - if (dma_ops) - return; - - force_iommu = 0; /* no HW IOMMU */ - dma_ops = &nommu_dma_ops; -} diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index aaa6b7839f1..e3c0a66b9e7 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -42,18 +42,28 @@ static struct dma_map_ops swiotlb_dma_ops = { .dma_supported = NULL, }; -void __init pci_swiotlb_init(void) +/* + * pci_swiotlb_init - initialize swiotlb if necessary + * + * This returns non-zero if we are forced to use swiotlb (by the boot + * option). + */ +int __init pci_swiotlb_init(void) { + int use_swiotlb = swiotlb | swiotlb_force; + /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) + if (!no_iommu && max_pfn > MAX_DMA32_PFN) swiotlb = 1; #endif if (swiotlb_force) swiotlb = 1; + if (swiotlb) { - printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); - swiotlb_init(); + swiotlb_init(0); dma_ops = &swiotlb_dma_ops; } + + return use_swiotlb; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b577..744508e7cfd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,7 @@ #include <linux/clockchips.h> #include <linux/random.h> #include <trace/events/power.h> +#include <linux/hw_breakpoint.h> #include <asm/system.h> #include <asm/apic.h> #include <asm/syscalls.h> @@ -17,6 +18,7 @@ #include <asm/uaccess.h> #include <asm/i387.h> #include <asm/ds.h> +#include <asm/debugreg.h> unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -103,14 +105,7 @@ void flush_thread(void) } #endif - clear_tsk_thread_flag(tsk, TIF_DEBUG); - - tsk->thread.debugreg0 = 0; - tsk->thread.debugreg1 = 0; - tsk->thread.debugreg2 = 0; - tsk->thread.debugreg3 = 0; - tsk->thread.debugreg6 = 0; - tsk->thread.debugreg7 = 0; + flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else if (next->debugctlmsr != prev->debugctlmsr) update_debugctlmsr(next->debugctlmsr); - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg0, 0); - set_debugreg(next->debugreg1, 1); - set_debugreg(next->debugreg2, 2); - set_debugreg(next->debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg6, 6); - set_debugreg(next->debugreg7, 7); - } - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cda..075580b3568 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -58,6 +58,7 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/ds.h> +#include <asm/debugreg.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -134,7 +135,7 @@ void __show_regs(struct pt_regs *regs, int all) ss = regs->ss & 0xffff; gs = get_user_gs(regs); } else { - sp = (unsigned long) (®s->sp); + sp = kernel_stack_pointer(regs); savesegment(ss, ss); savesegment(gs, gs); } @@ -187,7 +188,7 @@ void __show_regs(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - __show_regs(regs, 1); + show_registers(regs); show_trace(NULL, regs, ®s->sp, regs->bp); } @@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, task_user_gs(p) = get_user_gs(regs); + p->thread.io_bitmap_ptr = NULL; tsk = current; + err = -ENOMEM; + + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eb62cbcaa49..a98fe88fab6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -52,6 +52,7 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/ds.h> +#include <asm/debugreg.h> asmlinkage extern void ret_from_fork(void); @@ -226,8 +227,7 @@ void __show_regs(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - printk(KERN_INFO "CPU %d:", smp_processor_id()); - __show_regs(regs, 1); + show_registers(regs); show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } @@ -297,12 +297,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; + p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); savesegment(fs, p->thread.fsindex); savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + err = -ENOMEM; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -341,6 +345,7 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + return err; } @@ -495,6 +500,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (preload_fpu) __math_state_restore(); + return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2dc66..04d182a7cfd 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,8 @@ #include <linux/seccomp.h> #include <linux/signal.h> #include <linux/workqueue.h> +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -34,6 +36,7 @@ #include <asm/prctl.h> #include <asm/proto.h> #include <asm/ds.h> +#include <asm/hw_breakpoint.h> #include "tls.h" @@ -49,6 +52,118 @@ enum x86_regset { REGSET_IOPERM32, }; +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { +#ifdef CONFIG_X86_64 + REG_OFFSET_NAME(r15), + REG_OFFSET_NAME(r14), + REG_OFFSET_NAME(r13), + REG_OFFSET_NAME(r12), + REG_OFFSET_NAME(r11), + REG_OFFSET_NAME(r10), + REG_OFFSET_NAME(r9), + REG_OFFSET_NAME(r8), +#endif + REG_OFFSET_NAME(bx), + REG_OFFSET_NAME(cx), + REG_OFFSET_NAME(dx), + REG_OFFSET_NAME(si), + REG_OFFSET_NAME(di), + REG_OFFSET_NAME(bp), + REG_OFFSET_NAME(ax), +#ifdef CONFIG_X86_32 + REG_OFFSET_NAME(ds), + REG_OFFSET_NAME(es), + REG_OFFSET_NAME(fs), + REG_OFFSET_NAME(gs), +#endif + REG_OFFSET_NAME(orig_ax), + REG_OFFSET_NAME(ip), + REG_OFFSET_NAME(cs), + REG_OFFSET_NAME(flags), + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(ss), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +static const int arg_offs_table[] = { +#ifdef CONFIG_X86_32 + [0] = offsetof(struct pt_regs, ax), + [1] = offsetof(struct pt_regs, dx), + [2] = offsetof(struct pt_regs, cx) +#else /* CONFIG_X86_64 */ + [0] = offsetof(struct pt_regs, di), + [1] = offsetof(struct pt_regs, si), + [2] = offsetof(struct pt_regs, dx), + [3] = offsetof(struct pt_regs, cx), + [4] = offsetof(struct pt_regs, r8), + [5] = offsetof(struct pt_regs, r9) +#endif +}; + +/** + * regs_get_argument_nth() - get Nth argument at function call + * @regs: pt_regs which contains registers at function entry. + * @n: argument number. + * + * regs_get_argument_nth() returns @n th argument of a function call. + * Since usually the kernel stack will be changed right after function entry, + * you must use this at function entry. If the @n th entry is NOT in the + * kernel stack or pt_regs, this returns 0. + */ +unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) +{ + if (n < ARRAY_SIZE(arg_offs_table)) + return *(unsigned long *)((char *)regs + arg_offs_table[n]); + else { + /* + * The typical case: arg n is on the stack. + * (Note: stack[0] = return address, so skip it) + */ + n -= ARRAY_SIZE(arg_offs_table); + return regs_get_kernel_stack_nth(regs, 1 + n); + } +} + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ - return TASK_SIZE - 3; -} - #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - return IA32_PAGE_OFFSET - 3; -#endif - return TASK_SIZE_MAX - 7; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -454,99 +555,239 @@ static int genregs_set(struct task_struct *target, return ret; } +static void ptrace_triggered(struct perf_event *bp, void *data) +{ + int i; + struct thread_struct *thread = &(current->thread); + + /* + * Store in the virtual DR6 register the fact that the breakpoint + * was hit so the thread's debugger will see it. + */ + for (i = 0; i < HBP_NUM; i++) { + if (thread->ptrace_bps[i] == bp) + break; + } + + thread->debugreg6 |= (DR_TRAP0 << i); +} + /* - * This function is trivial and will be inlined by the compiler. - * Having it separates the implementation details of debug - * registers from the interface details of ptrace. + * Walk through every ptrace breakpoints for this thread and + * build the dr7 value on top of their attributes. + * */ -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +static unsigned long ptrace_get_dr7(struct perf_event *bp[]) { - switch (n) { - case 0: return child->thread.debugreg0; - case 1: return child->thread.debugreg1; - case 2: return child->thread.debugreg2; - case 3: return child->thread.debugreg3; - case 6: return child->thread.debugreg6; - case 7: return child->thread.debugreg7; + int i; + int dr7 = 0; + struct arch_hw_breakpoint *info; + + for (i = 0; i < HBP_NUM; i++) { + if (bp[i] && !bp[i]->attr.disabled) { + info = counter_arch_bp(bp[i]); + dr7 |= encode_dr7(i, info->len, info->type); + } } - return 0; + + return dr7; } -static int ptrace_set_debugreg(struct task_struct *child, - int n, unsigned long data) +static struct perf_event * +ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + struct task_struct *tsk, int disabled) { - int i; + int err; + int gen_len, gen_type; + DEFINE_BREAKPOINT_ATTR(attr); - if (unlikely(n == 4 || n == 5)) - return -EIO; + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ + if (!bp) + return ERR_PTR(-EINVAL); - if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) - return -EIO; + err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); + if (err) + return ERR_PTR(err); - switch (n) { - case 0: child->thread.debugreg0 = data; break; - case 1: child->thread.debugreg1 = data; break; - case 2: child->thread.debugreg2 = data; break; - case 3: child->thread.debugreg3 = data; break; + attr = bp->attr; + attr.bp_len = gen_len; + attr.bp_type = gen_type; + attr.disabled = disabled; - case 6: - if ((data & ~0xffffffffUL) != 0) - return -EIO; - child->thread.debugreg6 = data; - break; + return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); +} + +/* + * Handle ptrace writes to debug register 7. + */ +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long old_dr7; + int i, orig_ret = 0, rc = 0; + int enabled, second_pass = 0; + unsigned len, type; + struct perf_event *bp; + + data &= ~DR_CONTROL_RESERVED; + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); +restore: + /* + * Loop through all the hardware breakpoints, making the + * appropriate changes to each. + */ + for (i = 0; i < HBP_NUM; i++) { + enabled = decode_dr7(data, i, &len, &type); + bp = thread->ptrace_bps[i]; + + if (!enabled) { + if (bp) { + /* + * Don't unregister the breakpoints right-away, + * unless all register_user_hw_breakpoint() + * requests have succeeded. This prevents + * any window of opportunity for debug + * register grabbing by other users. + */ + if (!second_pass) + continue; + + thread->ptrace_bps[i] = NULL; + bp = ptrace_modify_breakpoint(bp, len, type, + tsk, 1); + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + thread->ptrace_bps[i] = NULL; + break; + } + thread->ptrace_bps[i] = bp; + } + continue; + } + + bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); + + /* Incorrect bp, or we have a bug in bp API */ + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + thread->ptrace_bps[i] = NULL; + break; + } + thread->ptrace_bps[i] = bp; + } + /* + * Make a second pass to free the remaining unused breakpoints + * or to restore the original breakpoints if an error occurred. + */ + if (!second_pass) { + second_pass = 1; + if (rc < 0) { + orig_ret = rc; + data = old_dr7; + } + goto restore; + } + return ((orig_ret < 0) ? orig_ret : rc); +} + +/* + * Handle PTRACE_PEEKUSR calls for the debug register area. + */ +static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long val = 0; - case 7: + if (n < HBP_NUM) { + struct perf_event *bp; + bp = thread->ptrace_bps[n]; + if (!bp) + return 0; + val = bp->hw.info.address; + } else if (n == 6) { + val = thread->debugreg6; + } else if (n == 7) { + val = ptrace_get_dr7(thread->ptrace_bps); + } + return val; +} + +static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) +{ + struct perf_event *bp; + struct thread_struct *t = &tsk->thread; + DEFINE_BREAKPOINT_ATTR(attr); + + if (!t->ptrace_bps[nr]) { /* - * Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System Programming) + * Put stub len and type to register (reserve) an inactive but + * correct bp */ -#ifdef CONFIG_X86_32 -#define DR7_MASK 0x5f54 -#else -#define DR7_MASK 0x5554 -#endif - data &= ~DR_CONTROL_RESERVED; - for (i = 0; i < 4; i++) - if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; + attr.bp_addr = addr; + attr.bp_len = HW_BREAKPOINT_LEN_1; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + + bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); + } else { + bp = t->ptrace_bps[nr]; + t->ptrace_bps[nr] = NULL; + + attr = bp->attr; + attr.bp_addr = addr; + bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); } + /* + * CHECKME: the previous code returned -EIO if the addr wasn't a + * valid task virtual addr. The new one will return -EINVAL in this + * case. + * -EINVAL may be what we want for in-kernel breakpoints users, but + * -EIO looks better for ptrace, since we refuse a register writing + * for the user. And anyway this is the previous behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; return 0; } /* + * Handle PTRACE_POKEUSR calls for the debug register area. + */ +int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) +{ + struct thread_struct *thread = &(tsk->thread); + int rc = 0; + + /* There are no DR4 or DR5 registers */ + if (n == 4 || n == 5) + return -EIO; + + if (n == 6) { + thread->debugreg6 = val; + goto ret_path; + } + if (n < HBP_NUM) { + rc = ptrace_set_breakpoint_addr(tsk, n, val); + if (rc) + return rc; + } + /* All that's left is DR7 */ + if (n == 7) + rc = ptrace_write_dr7(tsk, val); + +ret_path: + return rc; +} + +/* * These access the current or another (stopped) task's io permission * bitmap for debugging or core dump. */ diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f93078746e0..2b97fc5b124 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -23,7 +23,7 @@ # include <linux/ctype.h> # include <linux/mc146818rtc.h> #else -# include <asm/iommu.h> +# include <asm/x86_init.h> #endif /* @@ -622,7 +622,7 @@ void native_machine_shutdown(void) #endif #ifdef CONFIG_X86_64 - pci_iommu_shutdown(); + x86_platform.iommu_shutdown(); #endif } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2a34f9c5be2..82e88cdda9b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -109,6 +109,7 @@ #ifdef CONFIG_X86_64 #include <asm/numa_64.h> #endif +#include <asm/mce.h> /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -247,7 +248,7 @@ EXPORT_SYMBOL(edd); * from boot_params into a safe place. * */ -static inline void copy_edd(void) +static inline void __init copy_edd(void) { memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, sizeof(edd.mbr_signature)); @@ -256,7 +257,7 @@ static inline void copy_edd(void) edd.edd_info_nr = boot_params.eddbuf_entries; } #else -static inline void copy_edd(void) +static inline void __init copy_edd(void) { } #endif @@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p) #endif #endif x86_init.oem.banner(); + + mcheck_init(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055a..fbf3b07c856 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* - * Re-enable any watchpoints before delivering the - * signal to user space. The processor register will - * have been cleared if the watchpoint triggered - * inside the kernel. - */ - if (current->thread.debugreg7) - set_debugreg(current->thread.debugreg7, 7); - /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 565ebc65920..324f2a44c22 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1250,16 +1250,7 @@ static void __ref remove_cpu_from_maps(int cpu) void cpu_disable_common(void) { int cpu = smp_processor_id(); - /* - * HACK: - * Allow any queued timer interrupts to get serviced - * This is only a temporary solution until we cleanup - * fixup_irqs as we do for IA64. - */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e37dcee0cc..33399176512 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - unsigned long condition; + unsigned long dr6; int si_code; - get_debugreg(condition, 6); + get_debugreg(dr6, 6); /* Catch kmemcheck conditions first of all! */ - if (condition & DR_STEP && kmemcheck_trap(regs)) + if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; + /* DR6 may or may not be cleared by the CPU */ + set_debugreg(0, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; - if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, - SIGTRAP) == NOTIFY_STOP) + /* Store the virtualized DR6 value */ + tsk->thread.debugreg6 = dr6; + + if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, + SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg7) - goto clear_dr7; + if (regs->flags & X86_VM_MASK) { + handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, 1); + return; } -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto debug_vm86; -#endif - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg6 = condition; - /* - * Single-stepping through TF: make sure we ignore any events in - * kernel space (but re-enable TF when returning to user mode). + * Single-stepping through system calls: ignore any exceptions in + * kernel space, but re-enable TF when returning to user mode. + * + * We already checked v86 mode above, so we can check for kernel mode + * by just checking the CPL of CS. */ - if (condition & DR_STEP) { - if (!user_mode(regs)) - goto clear_TF_reenable; + if ((dr6 & DR_STEP) && !user_mode(regs)) { + tsk->thread.debugreg6 &= ~DR_STEP; + set_tsk_thread_flag(tsk, TIF_SINGLESTEP); + regs->flags &= ~X86_EFLAGS_TF; } - - si_code = get_si_code(condition); - /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code, si_code); - - /* - * Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ -clear_dr7: - set_debugreg(0, 7); + si_code = get_si_code(tsk->thread.debugreg6); + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); - return; -#ifdef CONFIG_X86_32 -debug_vm86: - /* reenable preemption: handle_vm86_trap() might sleep */ - dec_preempt_count(); - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - conditional_cli(regs); - return; -#endif - -clear_TF_reenable: - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); return; } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index f37930954d1..eed156851f5 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) return; if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); + if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) + pr_info( + "Skipped synchronization checks as TSC is reliable.\n"); return; } - pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", - smp_processor_id(), cpu); - /* * Reset it - in case this is a second bootup: */ @@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu) cpu_relax(); if (nr_warps) { - printk("\n"); + pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", + smp_processor_id(), cpu); pr_warning("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); mark_tsc_unstable("check_tsc_sync_source failed"); } else { - printk(" passed.\n"); + pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", + smp_processor_id(), cpu); } /* diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index aeef529917e..61d805df4c9 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -9,10 +9,25 @@ */ #include <linux/module.h> +#include <linux/rbtree.h> #include <linux/irq.h> #include <asm/apic.h> #include <asm/uv/uv_irq.h> +#include <asm/uv/uv_hub.h> + +/* MMR offset and pnode of hub sourcing interrupts for a given irq */ +struct uv_irq_2_mmr_pnode{ + struct rb_node list; + unsigned long offset; + int pnode; + int irq; +}; + +static spinlock_t uv_irq_lock; +static struct rb_root uv_irq_root; + +static int uv_set_irq_affinity(unsigned int, const struct cpumask *); static void uv_noop(unsigned int irq) { @@ -39,25 +54,214 @@ struct irq_chip uv_irq_chip = { .unmask = uv_noop, .eoi = uv_ack_apic, .end = uv_noop, + .set_affinity = uv_set_irq_affinity, }; /* + * Add offset and pnode information of the hub sourcing interrupts to the + * rb tree for a specific irq. + */ +static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +{ + struct rb_node **link = &uv_irq_root.rb_node; + struct rb_node *parent = NULL; + struct uv_irq_2_mmr_pnode *n; + struct uv_irq_2_mmr_pnode *e; + unsigned long irqflags; + + n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, + uv_blade_to_memory_nid(blade)); + if (!n) + return -ENOMEM; + + n->irq = irq; + n->offset = offset; + n->pnode = uv_blade_to_pnode(blade); + spin_lock_irqsave(&uv_irq_lock, irqflags); + /* Find the right place in the rbtree: */ + while (*link) { + parent = *link; + e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); + + if (unlikely(irq == e->irq)) { + /* irq entry exists */ + e->pnode = uv_blade_to_pnode(blade); + e->offset = offset; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + kfree(n); + return 0; + } + + if (irq < e->irq) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Insert the node into the rbtree. */ + rb_link_node(&n->list, parent, link); + rb_insert_color(&n->list, &uv_irq_root); + + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; +} + +/* Retrieve offset and pnode information from the rb tree for a specific irq */ +int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +{ + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + + if (e->irq == irq) { + *offset = e->offset; + *pnode = e->pnode; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; + } + + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return -1; +} + +/* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. + */ +static int +arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset, int restrict) +{ + const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg; + int mmr_pnode; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + int err; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + cfg = irq_cfg(irq); + + err = assign_irq_vector(irq, cfg, eligible_cpu); + if (err != 0) + return err; + + if (restrict == UV_AFFINITY_CPU) + desc->status |= IRQ_NO_BALANCING; + else + desc->status |= IRQ_MOVE_PCNTXT; + + set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_name); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return irq; +} + +/* + * Disable the specified MMR located on the specified blade so that MSIs are + * longer allowed to be sent. + */ +static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->mask = 1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +} + +static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + unsigned int dest; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long mmr_offset; + unsigned mmr_pnode; + + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) + return -1; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = dest; + + /* Get previously stored MMR and pnode of hub sourcing interrupts */ + if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) + return -1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return 0; +} + +/* * Set up a mapping of an available irq and vector, and enable the specified * MMR that defines the MSI that is to be sent to the specified CPU when an * interrupt is raised. */ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, - unsigned long mmr_offset) + unsigned long mmr_offset, int restrict) { - int irq; - int ret; + int irq, ret; + + irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); - irq = create_irq(); if (irq <= 0) return -EBUSY; - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); - if (ret != irq) + ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, + restrict); + if (ret == irq) + uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); + else destroy_irq(irq); return ret; @@ -71,9 +275,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); * * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). */ -void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) +void uv_teardown_irq(unsigned int irq) { - arch_disable_uv_irq(mmr_blade, mmr_offset); + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + if (e->irq == irq) { + arch_disable_uv_irq(e->pnode, e->offset); + rb_erase(n, &uv_irq_root); + kfree(e); + break; + } + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); destroy_irq(irq); } EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553a1b1..abda6f53e71 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) return; } - apic_cpus = apic->apicid_to_cpu_present(m->apicid); + apic->apicid_to_cpu_present(m->apicid, &apic_cpus); physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); /* * Validate version @@ -486,7 +486,7 @@ static void end_cobalt_irq(unsigned int irq) } static struct irq_chip cobalt_irq_type = { - .typename = "Cobalt-APIC", + .name = "Cobalt-APIC", .startup = startup_cobalt_irq, .shutdown = disable_cobalt_irq, .enable = enable_cobalt_irq, @@ -523,7 +523,7 @@ static void end_piix4_master_irq(unsigned int irq) } static struct irq_chip piix4_master_irq_type = { - .typename = "PIIX4-master", + .name = "PIIX4-master", .startup = startup_piix4_master_irq, .ack = ack_cobalt_irq, .end = end_piix4_master_irq, @@ -531,7 +531,7 @@ static struct irq_chip piix4_master_irq_type = { static struct irq_chip piix4_virtual_irq_type = { - .typename = "PIIX4-virtual", + .name = "PIIX4-virtual", .shutdown = disable_8259A_irq, .enable = enable_8259A_irq, .disable = disable_8259A_irq, diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3ba5ce..a1029769b6f 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -30,9 +30,8 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); -EXPORT_SYMBOL(copy_from_user); -EXPORT_SYMBOL(copy_to_user); -EXPORT_SYMBOL(__copy_from_user_inatomic); +EXPORT_SYMBOL(_copy_from_user); +EXPORT_SYMBOL(_copy_to_user); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 4449a4a2c2e..d11c5ff7c65 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -14,10 +14,13 @@ #include <asm/time.h> #include <asm/irq.h> #include <asm/tsc.h> +#include <asm/iommu.h> void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_pgd_noop(pgd_t *unused) { } +int __init iommu_init_noop(void) { return 0; } +void iommu_shutdown_noop(void) { } /* * The platform setup functions are preset with the default functions @@ -62,6 +65,10 @@ struct x86_init_ops x86_init __initdata = { .tsc_pre_init = x86_init_noop, .timer_init = hpet_time_init, }, + + .iommu = { + .iommu_init = iommu_init_noop, + }, }; struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { @@ -72,4 +79,5 @@ struct x86_platform_ops x86_platform = { .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, + .iommu_shutdown = iommu_shutdown_noop, }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae07d261527..4fc80174191 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -42,6 +42,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" +#include <asm/debugreg.h> #include <asm/uaccess.h> #include <asm/msr.h> #include <asm/desc.h> @@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) trace_kvm_entry(vcpu->vcpu_id); kvm_x86_ops->run(vcpu, kvm_run); - if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } + /* + * If the guest has used debug registers, at least dr7 + * will be disabled while returning to the host. + * If we don't have active breakpoints in the host, we don't + * care about the messed up debug address registers. But if + * we have some of them active, restore the old state. + */ + if (hw_breakpoint_active()) + hw_breakpoint_restore(); set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore new file mode 100644 index 00000000000..8df89f0a3fe --- /dev/null +++ b/arch/x86/lib/.gitignore @@ -0,0 +1 @@ +inat-tables.c diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 85f5db95c60..a2d6472895f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -2,12 +2,25 @@ # Makefile for x86 specific library files. # +inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk +inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt +quiet_cmd_inat_tables = GEN $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + +$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call cmd,inat_tables) + +$(obj)/inat.o: $(obj)/inat-tables.c + +clean-files := inat-tables.c + obj-$(CONFIG_SMP) := msr.o lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-y += insn.o inat.o obj-y += msr-reg.o msr-reg-export.o diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 6ba0f7bb85e..cf889d4e076 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -65,7 +65,7 @@ .endm /* Standard copy_to_user with segment limit checking */ -ENTRY(copy_to_user) +ENTRY(_copy_to_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx @@ -75,10 +75,10 @@ ENTRY(copy_to_user) jae bad_to_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_to_user) +ENDPROC(_copy_to_user) /* Standard copy_from_user with segment limit checking */ -ENTRY(copy_from_user) +ENTRY(_copy_from_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx @@ -88,7 +88,7 @@ ENTRY(copy_from_user) jae bad_from_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_from_user) +ENDPROC(_copy_from_user) ENTRY(copy_user_generic) CFI_STARTPROC @@ -96,12 +96,6 @@ ENTRY(copy_user_generic) CFI_ENDPROC ENDPROC(copy_user_generic) -ENTRY(__copy_from_user_inatomic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(__copy_from_user_inatomic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c new file mode 100644 index 00000000000..46fc4ee09fc --- /dev/null +++ b/arch/x86/lib/inat.c @@ -0,0 +1,90 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/insn.h> + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_escape_id(esc_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && m) { + table = inat_escape_tables[n][m]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_group_id(grp_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { + table = inat_group_tables[n][m]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + return table[opcode]; +} + diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 00000000000..9f33b984d0e --- /dev/null +++ b/arch/x86/lib/insn.c @@ -0,0 +1,516 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#include <linux/string.h> +#include <asm/inat.h> +#include <asm/insn.h> + +#define get_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define peek_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; r; }) + +#define peek_nbyte_next(t, insn, n) \ + ({t r; r = *(t*)((insn)->next_byte + n); r; }) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int x86_64) +{ + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_legacy_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + /* Decode REX prefix */ + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + + prefixes->got = 1; + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op, pfx; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx = insn_last_prefix(insn); + insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); + } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: + opcode->got = 1; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx = insn_last_prefix(insn); + insn->attr = inat_get_group_attribute(mod, pfx, + insn->attr); + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; +} + +/* Decode moffset16/32/64 */ +static void __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + } + insn->moffset1.got = insn->moffset2.got = 1; +} + +/* Decode imm v32(Iz) */ +static void __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + } +} + +/* Decode imm v64(Iv/Ov) */ +static void __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + } + insn->immediate1.got = insn->immediate2.got = 1; +} + +/* Decode ptr16:16/32(Ap) */ +static void __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + __get_moffset(insn); + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + __get_immptr(insn); + break; + case INAT_IMM_VWORD32: + __get_immv32(insn); + break; + case INAT_IMM_VWORD: + __get_immv(insn); + break; + default: + break; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 33a1e3ca22d..41628b104b9 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -71,14 +71,9 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) } EXPORT_SYMBOL(wrmsr_on_cpu); -/* rdmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, + struct msr *msrs, + void (*msr_func) (void *info)) { struct msr_info rv; int this_cpu; @@ -92,11 +87,23 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) this_cpu = get_cpu(); if (cpumask_test_cpu(this_cpu, mask)) - __rdmsr_on_cpu(&rv); + msr_func(&rv); - smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); + smp_call_function_many(mask, msr_func, &rv, 1); put_cpu(); } + +/* rdmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); +} EXPORT_SYMBOL(rdmsr_on_cpus); /* @@ -107,24 +114,9 @@ EXPORT_SYMBOL(rdmsr_on_cpus); * @msrs: array of MSR values * */ -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) { - struct msr_info rv; - int this_cpu; - - memset(&rv, 0, sizeof(rv)); - - rv.off = cpumask_first(mask); - rv.msrs = msrs; - rv.msr_no = msr_no; - - this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask)) - __wrmsr_on_cpu(&rv); - - smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); - put_cpu(); + __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); } EXPORT_SYMBOL(wrmsr_on_cpus); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1f118d462ac..e218d5df85f 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user); * data to the requested size using zero bytes. */ unsigned long -copy_from_user(void *to, const void __user *from, unsigned long n) +_copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); @@ -882,4 +882,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) memset(to, 0, n); return n; } -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(_copy_from_user); + +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt new file mode 100644 index 00000000000..a793da5e560 --- /dev/null +++ b/arch/x86/lib/x86-opcode-map.txt @@ -0,0 +1,893 @@ +# x86 Opcode Maps +# +#<Opcode maps> +# Table: table-name +# Referrer: escaped-name +# AVXcode: avx-code +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +#<group maps> +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# +# AVX Superscripts +# (VEX): this opcode can accept VEX prefix. +# (oVEX): this opcode requires VEX prefix. +# (o128): this opcode only supports 128bit VEX. +# (o256): this opcode only supports 256bit VEX. +# + +Table: one byte opcode +Referrer: +AVXcode: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Xb,Yb +a5: MOVS/W/D/Q Xv,Yv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +af: SCAS/W/D/Q rAX,Xv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) +c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) +c6: Grp11 Eb,Ib (1A) +c7: Grp11 Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) +f3: REP/REPE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode (0x0f) +Referrer: 2-byte escape +AVXcode: 1 +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +0d: NOP Ev | GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib +# 0x0f 0x10-0x1f +10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) +11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) +12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) +13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) +14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) +15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) +16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) +17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) +18: Grp16 (1A) +19: +1a: +1b: +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) +29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) +2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) +2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) +2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) +2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) +2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) +2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) +51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) +52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) +53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) +54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) +55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) +56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) +57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) +58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) +59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) +5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) +5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) +5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) +5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) +5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) +5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) +# 0x0f 0x60-0x6f +60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) +61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) +62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) +63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) +64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) +65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) +66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) +67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) +68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) +69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) +6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) +6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) +6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) +6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) +6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) +6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) +# 0x0f 0x70-0x7f +70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) +75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) +76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) +77: emms/vzeroupper/vzeroall (VEX) +78: VMREAD Ed/q,Gd/q +79: VMWRITE Gd/q,Ed/q +7a: +7b: +7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) +7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) +7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) +7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) +# 0x0f 0x80-0x8f +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JNAE/JC Jz (f64) +83: JNB/JAE/JNC Jz (f64) +84: JZ/JE Jz (f64) +85: JNZ/JNE Jz (f64) +86: JBE/JNA Jz (f64) +87: JNBE/JA Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: GrpPDLK +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev +bd: BSR Gv,Ev +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) +c3: movnti Md/q,Gd/q +c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) +c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) +c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) +d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) +d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) +d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) +d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) +d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) +d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) +d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) +d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) +da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) +db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) +dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) +dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) +de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) +df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) +# 0x0f 0xe0-0xef +e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) +e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) +e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) +e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) +e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) +e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) +e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) +e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) +e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) +e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) +ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) +eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) +ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) +ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) +ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) +ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) +# 0x0f 0xf0-0xff +f0: lddqu Vdq,Mdq (F2),(VEX) +f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) +f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) +f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) +f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) +f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) +f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) +f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) +f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) +f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) +fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) +fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) +fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) +fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) +fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) +ff: +EndTable + +Table: 3-byte opcode 1 (0x0f 0x38) +Referrer: 3-byte escape 1 +AVXcode: 2 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) +01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) +02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) +03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) +04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) +05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) +06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) +07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) +08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) +09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) +0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) +0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) +0c: Vpermilps /r (66),(oVEX) +0d: Vpermilpd /r (66),(oVEX) +0e: vtestps /r (66),(oVEX) +0f: vtestpd /r (66),(oVEX) +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: +17: ptest Vdq,Wdq (66),(VEX) +18: vbroadcastss /r (66),(oVEX) +19: vbroadcastsd /r (66),(oVEX),(o256) +1a: vbroadcastf128 /r (66),(oVEX),(o256) +1b: +1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) +1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) +1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) +1f: +# 0x0f 0x38 0x20-0x2f +20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) +21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) +22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) +23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) +24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) +25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) +26: +27: +28: pmuldq Vdq,Wdq (66),(VEX),(o128) +29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) +2a: movntdqa Vdq,Mdq (66),(VEX),(o128) +2b: packusdw Vdq,Wdq (66),(VEX),(o128) +2c: vmaskmovps(ld) /r (66),(oVEX) +2d: vmaskmovpd(ld) /r (66),(oVEX) +2e: vmaskmovps(st) /r (66),(oVEX) +2f: vmaskmovpd(st) /r (66),(oVEX) +# 0x0f 0x38 0x30-0x3f +30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) +31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) +32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) +33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) +34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) +35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) +36: +37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) +38: pminsb Vdq,Wdq (66),(VEX),(o128) +39: pminsd Vdq,Wdq (66),(VEX),(o128) +3a: pminuw Vdq,Wdq (66),(VEX),(o128) +3b: pminud Vdq,Wdq (66),(VEX),(o128) +3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) +3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) +3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) +3f: pmaxud Vdq,Wdq (66),(VEX),(o128) +# 0x0f 0x38 0x40-0x8f +40: pmulld Vdq,Wdq (66),(VEX),(o128) +41: phminposuw Vdq,Wdq (66),(VEX),(o128) +80: INVEPT Gd/q,Mdq (66) +81: INVPID Gd/q,Mdq (66) +# 0x0f 0x38 0x90-0xbf (FMA) +96: vfmaddsub132pd/ps /r (66),(VEX) +97: vfmsubadd132pd/ps /r (66),(VEX) +98: vfmadd132pd/ps /r (66),(VEX) +99: vfmadd132sd/ss /r (66),(VEX),(o128) +9a: vfmsub132pd/ps /r (66),(VEX) +9b: vfmsub132sd/ss /r (66),(VEX),(o128) +9c: vfnmadd132pd/ps /r (66),(VEX) +9d: vfnmadd132sd/ss /r (66),(VEX),(o128) +9e: vfnmsub132pd/ps /r (66),(VEX) +9f: vfnmsub132sd/ss /r (66),(VEX),(o128) +a6: vfmaddsub213pd/ps /r (66),(VEX) +a7: vfmsubadd213pd/ps /r (66),(VEX) +a8: vfmadd213pd/ps /r (66),(VEX) +a9: vfmadd213sd/ss /r (66),(VEX),(o128) +aa: vfmsub213pd/ps /r (66),(VEX) +ab: vfmsub213sd/ss /r (66),(VEX),(o128) +ac: vfnmadd213pd/ps /r (66),(VEX) +ad: vfnmadd213sd/ss /r (66),(VEX),(o128) +ae: vfnmsub213pd/ps /r (66),(VEX) +af: vfnmsub213sd/ss /r (66),(VEX),(o128) +b6: vfmaddsub231pd/ps /r (66),(VEX) +b7: vfmsubadd231pd/ps /r (66),(VEX) +b8: vfmadd231pd/ps /r (66),(VEX) +b9: vfmadd231sd/ss /r (66),(VEX),(o128) +ba: vfmsub231pd/ps /r (66),(VEX) +bb: vfmsub231sd/ss /r (66),(VEX),(o128) +bc: vfnmadd231pd/ps /r (66),(VEX) +bd: vfnmadd231sd/ss /r (66),(VEX),(o128) +be: vfnmsub231pd/ps /r (66),(VEX) +bf: vfnmsub231sd/ss /r (66),(VEX),(o128) +# 0x0f 0x38 0xc0-0xff +db: aesimc Vdq,Wdq (66),(VEX),(o128) +dc: aesenc Vdq,Wdq (66),(VEX),(o128) +dd: aesenclast Vdq,Wdq (66),(VEX),(o128) +de: aesdec Vdq,Wdq (66),(VEX),(o128) +df: aesdeclast Vdq,Wdq (66),(VEX),(o128) +f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) +f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) +EndTable + +Table: 3-byte opcode 2 (0x0f 0x3a) +Referrer: 3-byte escape 2 +AVXcode: 3 +# 0x0f 0x3a 0x00-0xff +04: vpermilps /r,Ib (66),(oVEX) +05: vpermilpd /r,Ib (66),(oVEX) +06: vperm2f128 /r,Ib (66),(oVEX),(o256) +08: roundps Vdq,Wdq,Ib (66),(VEX) +09: roundpd Vdq,Wdq,Ib (66),(VEX) +0a: roundss Vss,Wss,Ib (66),(VEX),(o128) +0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) +0c: blendps Vdq,Wdq,Ib (66),(VEX) +0d: blendpd Vdq,Wdq,Ib (66),(VEX) +0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) +0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) +14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) +15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) +16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) +17: extractps Ed,Vdq,Ib (66),(VEX),(o128) +18: vinsertf128 /r,Ib (66),(oVEX),(o256) +19: vextractf128 /r,Ib (66),(oVEX),(o256) +20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) +21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) +22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) +40: dpps Vdq,Wdq,Ib (66),(VEX) +41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) +42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) +44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) +4a: vblendvps /r,Ib (66),(oVEX) +4b: vblendvpd /r,Ib (66),(oVEX) +4c: vpblendvb /r,Ib (66),(oVEX),(o128) +60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) +61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) +62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) +63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) +df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Ep +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) +7: VMPTRST Mq +EndTable + +GrpTable: Grp10 +EndTable + +GrpTable: Grp11 +0: MOV +EndTable + +GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) +4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) +6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) +EndTable + +GrpTable: Grp13 +2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) +4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) +6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) +EndTable + +GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) +3: psrldq Udq,Ib (66),(11B),(VEX),(o128) +6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) +7: pslldq Udq,Ib (66),(11B),(VEX),(o128) +EndTable + +GrpTable: Grp15 +0: fxsave +1: fxstor +2: ldmxcsr (VEX) +3: stmxcsr (VEX) +4: XSAVE +5: XRSTOR | lfence (11B) +6: mfence (11B) +7: clflush | sfence (11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61b41ca3b5a..d0474ad2a6e 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -35,34 +35,3 @@ int fixup_exception(struct pt_regs *regs) return 0; } - -#ifdef CONFIG_X86_64 -/* - * Need to defined our own search_extable on X86_64 to work around - * a B stepping K8 bug. - */ -const struct exception_table_entry * -search_extable(const struct exception_table_entry *first, - const struct exception_table_entry *last, - unsigned long value) -{ - /* B stepping K8 bug */ - if ((value >> 32) == 0) - value |= 0xffffffffUL << 32; - - while (first <= last) { - const struct exception_table_entry *mid; - long diff; - - mid = (last - first) / 2 + first; - diff = mid->insn - value; - if (diff == 0) - return mid; - else if (diff < 0) - first = mid+1; - else - last = mid-1; - } - return NULL; -} -#endif diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f4cee9028cf..f62777940df 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -38,7 +38,8 @@ enum x86_pf_error_code { * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ -static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) +static inline int __kprobes +kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) @@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int notify_page_fault(struct pt_regs *regs) +static inline int __kprobes notify_page_fault(struct pt_regs *regs) { int ret = 0; @@ -240,7 +241,7 @@ void vmalloc_sync_all(void) * * Handle a fault on the vmalloc or module mapping area */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; @@ -357,7 +358,7 @@ void vmalloc_sync_all(void) * * This assumes no large pages in there. */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -658,7 +659,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, show_fault_oops(regs, error_code, address); stackend = end_of_stack(tsk); - if (*stackend != STACK_END_MAGIC) + if (tsk != &init_task && *stackend != STACK_END_MAGIC) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); tsk->thread.cr2 = address; @@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline int +static noinline __kprobes int spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917..11a4ad4d625 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) struct die_args *arg = args; if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) + if (post_kmmio_handler(arg->err, arg->regs) == 1) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; return NOTIFY_STOP; + } return NOTIFY_DONE; } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index dbb5381f7b3..9d7ce96e5a5 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node); } @@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node); } diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 427fd1b56df..8565d944f7c 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -1,12 +1,13 @@ /* * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/io.h> #include <linux/mmiotrace.h> -#define MODULE_NAME "testmmiotrace" - static unsigned long mmio_address; module_param(mmio_address, ulong, 0); MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " @@ -30,7 +31,7 @@ static unsigned v32(unsigned i) static void do_write_test(void __iomem *p) { unsigned int i; - pr_info(MODULE_NAME ": write test.\n"); + pr_info("write test.\n"); mmiotrace_printk("Write test.\n"); for (i = 0; i < 256; i++) @@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p) { unsigned int i; unsigned errs[3] = { 0 }; - pr_info(MODULE_NAME ": read test.\n"); + pr_info("read test.\n"); mmiotrace_printk("Read test.\n"); for (i = 0; i < 256; i++) @@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p) static void do_read_far_test(void __iomem *p) { - pr_info(MODULE_NAME ": read far test.\n"); + pr_info("read far test.\n"); mmiotrace_printk("Read far test.\n"); ioread32(p + read_far); @@ -78,7 +79,7 @@ static void do_test(unsigned long size) { void __iomem *p = ioremap_nocache(mmio_address, size); if (!p) { - pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); + pr_err("could not ioremap, aborting.\n"); return; } mmiotrace_printk("ioremap returned %p.\n", p); @@ -94,24 +95,22 @@ static int __init init(void) unsigned long size = (read_far) ? (8 << 20) : (16 << 10); if (mmio_address == 0) { - pr_err(MODULE_NAME ": you have to use the module argument " - "mmio_address.\n"); - pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" - " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); + pr_err("you have to use the module argument mmio_address.\n"); + pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n"); return -ENXIO; } - pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " - "address space, and writing 16 kB of rubbish in there.\n", - size >> 10, mmio_address); + pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " + "and writing 16 kB of rubbish in there.\n", + size >> 10, mmio_address); do_test(size); - pr_info(MODULE_NAME ": All done.\n"); + pr_info("All done.\n"); return 0; } static void __exit cleanup(void) { - pr_debug(MODULE_NAME ": unloaded.\n"); + pr_debug("unloaded.\n"); } module_init(init); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 8aa85f17667..0a979f3e5b8 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -18,6 +18,7 @@ #include <asm/mce.h> #include <asm/xcr.h> #include <asm/suspend.h> +#include <asm/debugreg.h> #ifdef CONFIG_X86_32 static struct saved_context saved_context; @@ -142,31 +143,6 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - if (current->thread.debugreg7) { -#ifdef CONFIG_X86_32 - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); -#else - /* CONFIG_X86_64 */ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); -#endif - } - } /** diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile new file mode 100644 index 00000000000..f8208267733 --- /dev/null +++ b/arch/x86/tools/Makefile @@ -0,0 +1,31 @@ +PHONY += posttest + +ifeq ($(KBUILD_VERBOSE),1) + posttest_verbose = -v +else + posttest_verbose = +endif + +ifeq ($(CONFIG_64BIT),y) + posttest_64bit = -y +else + posttest_64bit = -n +endif + +distill_awk = $(srctree)/arch/x86/tools/distill.awk +chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk + +quiet_cmd_posttest = TEST $@ + cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) + +posttest: $(obj)/test_get_len vmlinux + $(call cmd,posttest) + +hostprogs-y := test_get_len + +# -I needed for generated C source and C source which in the kernel tree. +HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ + +# Dependencies are also needed. +$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c + diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk new file mode 100644 index 00000000000..0d13cd9fdcf --- /dev/null +++ b/arch/x86/tools/chkobjdump.awk @@ -0,0 +1,23 @@ +# GNU objdump version checker +# +# Usage: +# objdump -v | awk -f chkobjdump.awk +BEGIN { + # objdump version 2.19 or later is OK for the test. + od_ver = 2; + od_sver = 19; +} + +/^GNU/ { + split($4, ver, "."); + if (ver[1] > od_ver || + (ver[1] == od_ver && ver[2] >= od_sver)) { + exit 1; + } else { + printf("Warning: objdump version %s is older than %d.%d\n", + $4, od_ver, od_sver); + print("Warning: Skipping posttest."); + # Logic is inverted, because we just skip test without error. + exit 0; + } +} diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk new file mode 100644 index 00000000000..c13c0ee48ab --- /dev/null +++ b/arch/x86/tools/distill.awk @@ -0,0 +1,47 @@ +#!/bin/awk -f +# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len +# Distills the disassembly as follows: +# - Removes all lines except the disassembled instructions. +# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes +# into a single line. +# - Remove bad(or prefix only) instructions + +BEGIN { + prev_addr = "" + prev_hex = "" + prev_mnemonic = "" + bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" + fwait_expr = "^9b " + fwait_str="9b\tfwait" +} + +/^ *[0-9a-f]+ <[^>]*>:/ { + # Symbol entry + printf("%s%s\n", $2, $1) +} + +/^ *[0-9a-f]+:/ { + if (split($0, field, "\t") < 3) { + # This is a continuation of the same insn. + prev_hex = prev_hex field[2] + } else { + # Skip bad instructions + if (match(prev_mnemonic, bad_expr)) + prev_addr = "" + # Split fwait from other f* instructions + if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { + printf "%s\t%s\n", prev_addr, fwait_str + sub(fwait_expr, "", prev_hex) + } + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic + prev_addr = field[1] + prev_hex = field[2] + prev_mnemonic = field[3] + } +} + +END { + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic +} diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk new file mode 100644 index 00000000000..e34e92a28eb --- /dev/null +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -0,0 +1,380 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu <mhiramat@redhat.com> +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +# Awk implementation sanity check +function check_awk_implement() { + if (!match("abc", "[[:lower:]]+")) + return "Your awk doesn't support charactor-class." + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + +BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */\n" + ggid = 1 + geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable + + opnd_expr = "^[[:alpha:]/]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[[:alnum:]]+" + + imm_expr = "^[IJAO][[:lower:]]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + + modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\(66\\)" + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\(F2\\)" + max_lprefix = 4 + + vexok_expr = "\\(VEX\\)" + vexonly_expr = "\\(oVEX\\)" + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" + prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" + + clear_vars() +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); +} + +/^Referrer:/ { + if (NF != 1) { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" + } + } + print "" + clear_vars() +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(opnd, i,imm,mod) +{ + imm = null + mod = null + for (i in opnd) { + i = opnd[i] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + split($(i++), opnds, ",") + flags = convert_operands(opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check VEX only code + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + + # check VEX only code + if (match(ext, vexok_expr)) + flags = add_flags(flags, "INAT_VEXOK") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } else { + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + if (awkchecked != "") + exit 1 + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," + print "};" +} + diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c new file mode 100644 index 00000000000..d8214dc03fa --- /dev/null +++ b/arch/x86/tools/test_get_len.c @@ -0,0 +1,173 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> + +#define unlikely(cond) (cond) + +#include <asm/insn.h> +#include <inat.c> +#include <insn.c> + +/* + * Test of instruction analysis in general and insn_get_length() in + * particular. See if insn_get_length() and the disassembler agree + * on the length of each instruction in an elf disassembly. + * + * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len + */ + +const char *prog; +static int verbose; +static int x86_64; + +static void usage(void) +{ + fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" + " %s [-y|-n] [-v] \n", prog); + fprintf(stderr, "\t-y 64bit mode\n"); + fprintf(stderr, "\t-n 32bit mode\n"); + fprintf(stderr, "\t-v verbose mode\n"); + exit(1); +} + +static void malformed_line(const char *line, int line_nr) +{ + fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); + exit(3); +} + +static void dump_field(FILE *fp, const char *name, const char *indent, + struct insn_field *field) +{ + fprintf(fp, "%s.%s = {\n", indent, name); + fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", + indent, field->value, field->bytes[0], field->bytes[1], + field->bytes[2], field->bytes[3]); + fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, + field->got, field->nbytes); +} + +static void dump_insn(FILE *fp, struct insn *insn) +{ + fprintf(fp, "Instruction = { \n"); + dump_field(fp, "prefixes", "\t", &insn->prefixes); + dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); + dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); + dump_field(fp, "opcode", "\t", &insn->opcode); + dump_field(fp, "modrm", "\t", &insn->modrm); + dump_field(fp, "sib", "\t", &insn->sib); + dump_field(fp, "displacement", "\t", &insn->displacement); + dump_field(fp, "immediate1", "\t", &insn->immediate1); + dump_field(fp, "immediate2", "\t", &insn->immediate2); + fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", + insn->attr, insn->opnd_bytes, insn->addr_bytes); + fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", + insn->length, insn->x86_64, insn->kaddr); +} + +static void parse_args(int argc, char **argv) +{ + int c; + prog = argv[0]; + while ((c = getopt(argc, argv, "ynv")) != -1) { + switch (c) { + case 'y': + x86_64 = 1; + break; + case 'n': + x86_64 = 0; + break; + case 'v': + verbose = 1; + break; + default: + usage(); + } + } +} + +#define BUFSIZE 256 + +int main(int argc, char **argv) +{ + char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; + unsigned char insn_buf[16]; + struct insn insn; + int insns = 0, c; + int warnings = 0; + + parse_args(argc, argv); + + while (fgets(line, BUFSIZE, stdin)) { + char copy[BUFSIZE], *s, *tab1, *tab2; + int nb = 0; + unsigned int b; + + if (line[0] == '<') { + /* Symbol line */ + strcpy(sym, line); + continue; + } + + insns++; + memset(insn_buf, 0, 16); + strcpy(copy, line); + tab1 = strchr(copy, '\t'); + if (!tab1) + malformed_line(line, insns); + s = tab1 + 1; + s += strspn(s, " "); + tab2 = strchr(s, '\t'); + if (!tab2) + malformed_line(line, insns); + *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ + while (s < tab2) { + if (sscanf(s, "%x", &b) == 1) { + insn_buf[nb++] = (unsigned char) b; + s += 3; + } else + break; + } + /* Decode an instruction */ + insn_init(&insn, insn_buf, x86_64); + insn_get_length(&insn); + if (insn.length != nb) { + warnings++; + fprintf(stderr, "Warning: %s found difference at %s\n", + prog, sym); + fprintf(stderr, "Warning: %s", line); + fprintf(stderr, "Warning: objdump says %d bytes, but " + "insn_get_length() says %d\n", nb, + insn.length); + if (verbose) + dump_insn(stderr, &insn); + } + } + if (warnings) + fprintf(stderr, "Warning: decoded and checked %d" + " instructions with %d warnings\n", insns, warnings); + else + fprintf(stderr, "Succeed: decoded and checked %d" + " instructions\n", insns); + return 0; +} diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c index dc99e26f8e5..1b392c9e853 100644 --- a/drivers/ata/pata_pcmcia.c +++ b/drivers/ata/pata_pcmcia.c @@ -177,9 +177,6 @@ static struct ata_port_operations pcmcia_8bit_port_ops = { .drain_fifo = pcmcia_8bit_drain_fifo, }; -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - struct pcmcia_config_check { unsigned long ctl_base; @@ -252,7 +249,7 @@ static int pcmcia_init_one(struct pcmcia_device *pdev) struct ata_port *ap; struct ata_pcmcia_info *info; struct pcmcia_config_check *stk = NULL; - int last_ret = 0, last_fn = 0, is_kme = 0, ret = -ENOMEM, p; + int is_kme = 0, ret = -ENOMEM, p; unsigned long io_base, ctl_base; void __iomem *io_addr, *ctl_addr; int n_ports = 1; @@ -271,7 +268,6 @@ static int pcmcia_init_one(struct pcmcia_device *pdev) pdev->io.Attributes2 = IO_DATA_PATH_WIDTH_8; pdev->io.IOAddrLines = 3; pdev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - pdev->irq.IRQInfo1 = IRQ_LEVEL_ID; pdev->conf.Attributes = CONF_ENABLE_IRQ; pdev->conf.IntType = INT_MEMORY_AND_IO; @@ -296,8 +292,13 @@ static int pcmcia_init_one(struct pcmcia_device *pdev) } io_base = pdev->io.BasePort1; ctl_base = stk->ctl_base; - CS_CHECK(RequestIRQ, pcmcia_request_irq(pdev, &pdev->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(pdev, &pdev->conf)); + ret = pcmcia_request_irq(pdev, &pdev->irq); + if (ret) + goto failed; + + ret = pcmcia_request_configuration(pdev, &pdev->conf); + if (ret) + goto failed; /* iomap */ ret = -ENOMEM; @@ -351,8 +352,6 @@ static int pcmcia_init_one(struct pcmcia_device *pdev) kfree(stk); return 0; -cs_failed: - cs_error(pdev, last_fn, last_ret); failed: kfree(stk); info->ndev = 0; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 965ece2c7e4..13bb69d2abb 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -735,6 +735,21 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector part_stat_unlock(); } +/* + * Ensure we don't create aliases in VI caches + */ +static inline void +killalias(struct bio *bio) +{ + struct bio_vec *bv; + int i; + + if (bio_data_dir(bio) == READ) + __bio_for_each_segment(bv, bio, i, 0) { + flush_dcache_page(bv->bv_page); + } +} + void aoecmd_ata_rsp(struct sk_buff *skb) { @@ -853,8 +868,12 @@ aoecmd_ata_rsp(struct sk_buff *skb) if (buf && --buf->nframesout == 0 && buf->resid == 0) { diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector); - n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; - bio_endio(buf->bio, n); + if (buf->flags & BUFFL_FAIL) + bio_endio(buf->bio, -EIO); + else { + killalias(buf->bio); + bio_endio(buf->bio, 0); + } mempool_free(buf, d->bufpool); } diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c index b0e569ba730..2acdc605cb4 100644 --- a/drivers/bluetooth/bluecard_cs.c +++ b/drivers/bluetooth/bluecard_cs.c @@ -867,11 +867,9 @@ static int bluecard_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = bluecard_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -905,22 +903,16 @@ static int bluecard_config(struct pcmcia_device *link) break; } - if (i != 0) { - cs_error(link, RequestIO, i); + if (i != 0) goto failed; - } i = pcmcia_request_irq(link, &link->irq); - if (i != 0) { - cs_error(link, RequestIRQ, i); + if (i != 0) link->irq.AssignedIRQ = 0; - } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) goto failed; - } if (bluecard_open(info) != 0) goto failed; diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c index d58e22b9f06..d814a2755cc 100644 --- a/drivers/bluetooth/bt3c_cs.c +++ b/drivers/bluetooth/bt3c_cs.c @@ -659,11 +659,9 @@ static int bt3c_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = bt3c_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -740,21 +738,16 @@ static int bt3c_config(struct pcmcia_device *link) goto found_port; BT_ERR("No usable port range found"); - cs_error(link, RequestIO, -ENODEV); goto failed; found_port: i = pcmcia_request_irq(link, &link->irq); - if (i != 0) { - cs_error(link, RequestIRQ, i); + if (i != 0) link->irq.AssignedIRQ = 0; - } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) goto failed; - } if (bt3c_open(info) != 0) goto failed; diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c index efd689a062e..d339464dc15 100644 --- a/drivers/bluetooth/btuart_cs.c +++ b/drivers/bluetooth/btuart_cs.c @@ -588,11 +588,9 @@ static int btuart_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = btuart_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -669,21 +667,16 @@ static int btuart_config(struct pcmcia_device *link) goto found_port; BT_ERR("No usable port range found"); - cs_error(link, RequestIO, -ENODEV); goto failed; found_port: i = pcmcia_request_irq(link, &link->irq); - if (i != 0) { - cs_error(link, RequestIRQ, i); + if (i != 0) link->irq.AssignedIRQ = 0; - } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) goto failed; - } if (btuart_open(info) != 0) goto failed; diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c index b881a9cd874..4f02a6f3c98 100644 --- a/drivers/bluetooth/dtl1_cs.c +++ b/drivers/bluetooth/dtl1_cs.c @@ -573,11 +573,9 @@ static int dtl1_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = dtl1_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -622,16 +620,12 @@ static int dtl1_config(struct pcmcia_device *link) goto failed; i = pcmcia_request_irq(link, &link->irq); - if (i != 0) { - cs_error(link, RequestIRQ, i); + if (i != 0) link->irq.AssignedIRQ = 0; - } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) goto failed; - } if (dtl1_open(info) != 0) goto failed; diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig index ccb1fa89de2..2fb3a480f6b 100644 --- a/drivers/char/agp/Kconfig +++ b/drivers/char/agp/Kconfig @@ -56,9 +56,8 @@ config AGP_AMD X on AMD Irongate, 761, and 762 chipsets. config AGP_AMD64 - tristate "AMD Opteron/Athlon64 on-CPU GART support" if !GART_IOMMU + tristate "AMD Opteron/Athlon64 on-CPU GART support" depends on AGP && X86 - default y if GART_IOMMU help This option gives you AGP support for the GLX component of X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs. diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index c250a31efa5..2db4c0a29b0 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -23,8 +23,6 @@ * All rights reserved. Licensed under dual BSD/GPL license. */ -/* #define PCMCIA_DEBUG 6 */ - #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> @@ -47,18 +45,17 @@ /* #define ATR_CSUM */ -#ifdef PCMCIA_DEBUG -#define reader_to_dev(x) (&handle_to_dev(x->p_dev)) -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0600); -#define DEBUGP(n, rdr, x, args...) do { \ - if (pc_debug >= (n)) \ - dev_printk(KERN_DEBUG, reader_to_dev(rdr), "%s:" x, \ - __func__ , ## args); \ +#define reader_to_dev(x) (&x->p_dev->dev) + +/* n (debug level) is ignored */ +/* additional debug output may be enabled by re-compiling with + * CM4000_DEBUG set */ +/* #define CM4000_DEBUG */ +#define DEBUGP(n, rdr, x, args...) do { \ + dev_dbg(reader_to_dev(rdr), "%s:" x, \ + __func__ , ## args); \ } while (0) -#else -#define DEBUGP(n, rdr, x, args...) -#endif + static char *version = "cm4000_cs.c v2.4.0gm6 - All bugs added by Harald Welte"; #define T_1SEC (HZ) @@ -174,14 +171,13 @@ static unsigned char fi_di_table[10][14] = { /* 9 */ {0x09,0x19,0x29,0x39,0x49,0x59,0x69,0x11,0x11,0x99,0xA9,0xB9,0xC9,0xD9} }; -#ifndef PCMCIA_DEBUG +#ifndef CM4000_DEBUG #define xoutb outb #define xinb inb #else static inline void xoutb(unsigned char val, unsigned short port) { - if (pc_debug >= 7) - printk(KERN_DEBUG "outb(val=%.2x,port=%.4x)\n", val, port); + pr_debug("outb(val=%.2x,port=%.4x)\n", val, port); outb(val, port); } static inline unsigned char xinb(unsigned short port) @@ -189,8 +185,7 @@ static inline unsigned char xinb(unsigned short port) unsigned char val; val = inb(port); - if (pc_debug >= 7) - printk(KERN_DEBUG "%.2x=inb(%.4x)\n", val, port); + pr_debug("%.2x=inb(%.4x)\n", val, port); return val; } @@ -514,12 +509,10 @@ static int set_protocol(struct cm4000_dev *dev, struct ptsreq *ptsreq) for (i = 0; i < 4; i++) { xoutb(i, REG_BUF_ADDR(iobase)); xoutb(dev->pts[i], REG_BUF_DATA(iobase)); /* buf data */ -#ifdef PCMCIA_DEBUG - if (pc_debug >= 5) - printk("0x%.2x ", dev->pts[i]); +#ifdef CM4000_DEBUG + pr_debug("0x%.2x ", dev->pts[i]); } - if (pc_debug >= 5) - printk("\n"); + pr_debug("\n"); #else } #endif @@ -579,14 +572,13 @@ static int set_protocol(struct cm4000_dev *dev, struct ptsreq *ptsreq) pts_reply[i] = inb(REG_BUF_DATA(iobase)); } -#ifdef PCMCIA_DEBUG +#ifdef CM4000_DEBUG DEBUGP(2, dev, "PTSreply: "); for (i = 0; i < num_bytes_read; i++) { - if (pc_debug >= 5) - printk("0x%.2x ", pts_reply[i]); + pr_debug("0x%.2x ", pts_reply[i]); } - printk("\n"); -#endif /* PCMCIA_DEBUG */ + pr_debug("\n"); +#endif /* CM4000_DEBUG */ DEBUGP(5, dev, "Clear Tactive in Flags1\n"); xoutb(0x20, REG_FLAGS1(iobase)); @@ -655,7 +647,7 @@ static void terminate_monitor(struct cm4000_dev *dev) DEBUGP(5, dev, "Delete timer\n"); del_timer_sync(&dev->timer); -#ifdef PCMCIA_DEBUG +#ifdef CM4000_DEBUG dev->monitor_running = 0; #endif @@ -898,7 +890,7 @@ static void monitor_card(unsigned long p) DEBUGP(4, dev, "ATR checksum (0x%.2x, should " "be zero) failed\n", dev->atr_csum); } -#ifdef PCMCIA_DEBUG +#ifdef CM4000_DEBUG else if (test_bit(IS_BAD_LENGTH, &dev->flags)) { DEBUGP(4, dev, "ATR length error\n"); } else { @@ -1415,7 +1407,7 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) int size; int rc; void __user *argp = (void __user *)arg; -#ifdef PCMCIA_DEBUG +#ifdef CM4000_DEBUG char *ioctl_names[CM_IOC_MAXNR + 1] = { [_IOC_NR(CM_IOCGSTATUS)] "CM_IOCGSTATUS", [_IOC_NR(CM_IOCGATR)] "CM_IOCGATR", @@ -1423,9 +1415,9 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) [_IOC_NR(CM_IOCSPTS)] "CM_IOCSPTS", [_IOC_NR(CM_IOSDBGLVL)] "CM4000_DBGLVL", }; -#endif DEBUGP(3, dev, "cmm_ioctl(device=%d.%d) %s\n", imajor(inode), iminor(inode), ioctl_names[_IOC_NR(cmd)]); +#endif lock_kernel(); rc = -ENODEV; @@ -1523,7 +1515,7 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } case CM_IOCARDOFF: -#ifdef PCMCIA_DEBUG +#ifdef CM4000_DEBUG DEBUGP(4, dev, "... in CM_IOCARDOFF\n"); if (dev->flags0 & 0x01) { DEBUGP(4, dev, " Card inserted\n"); @@ -1625,18 +1617,9 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } break; -#ifdef PCMCIA_DEBUG - case CM_IOSDBGLVL: /* set debug log level */ - { - int old_pc_debug = 0; - - old_pc_debug = pc_debug; - if (copy_from_user(&pc_debug, argp, sizeof(int))) - rc = -EFAULT; - else if (old_pc_debug != pc_debug) - DEBUGP(0, dev, "Changed debug log level " - "to %i\n", pc_debug); - } +#ifdef CM4000_DEBUG + case CM_IOSDBGLVL: + rc = -ENOTTY; break; #endif default: diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index 4f0723b0797..a6a70e476be 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -17,8 +17,6 @@ * All rights reserved, Dual BSD/GPL Licensed. */ -/* #define PCMCIA_DEBUG 6 */ - #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> @@ -41,18 +39,16 @@ #include "cm4040_cs.h" -#ifdef PCMCIA_DEBUG -#define reader_to_dev(x) (&handle_to_dev(x->p_dev)) -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0600); -#define DEBUGP(n, rdr, x, args...) do { \ - if (pc_debug >= (n)) \ - dev_printk(KERN_DEBUG, reader_to_dev(rdr), "%s:" x, \ - __func__ , ##args); \ +#define reader_to_dev(x) (&x->p_dev->dev) + +/* n (debug level) is ignored */ +/* additional debug output may be enabled by re-compiling with + * CM4040_DEBUG set */ +/* #define CM4040_DEBUG */ +#define DEBUGP(n, rdr, x, args...) do { \ + dev_dbg(reader_to_dev(rdr), "%s:" x, \ + __func__ , ## args); \ } while (0) -#else -#define DEBUGP(n, rdr, x, args...) -#endif static char *version = "OMNIKEY CardMan 4040 v1.1.0gm5 - All bugs added by Harald Welte"; @@ -90,14 +86,13 @@ struct reader_dev { static struct pcmcia_device *dev_table[CM_MAX_DEV]; -#ifndef PCMCIA_DEBUG +#ifndef CM4040_DEBUG #define xoutb outb #define xinb inb #else static inline void xoutb(unsigned char val, unsigned short port) { - if (pc_debug >= 7) - printk(KERN_DEBUG "outb(val=%.2x,port=%.4x)\n", val, port); + pr_debug("outb(val=%.2x,port=%.4x)\n", val, port); outb(val, port); } @@ -106,8 +101,7 @@ static inline unsigned char xinb(unsigned short port) unsigned char val; val = inb(port); - if (pc_debug >= 7) - printk(KERN_DEBUG "%.2x=inb(%.4x)\n", val, port); + pr_debug("%.2x=inb(%.4x)\n", val, port); return val; } #endif @@ -260,23 +254,22 @@ static ssize_t cm4040_read(struct file *filp, char __user *buf, return -EIO; } dev->r_buf[i] = xinb(iobase + REG_OFFSET_BULK_IN); -#ifdef PCMCIA_DEBUG - if (pc_debug >= 6) - printk(KERN_DEBUG "%lu:%2x ", i, dev->r_buf[i]); +#ifdef CM4040_DEBUG + pr_debug("%lu:%2x ", i, dev->r_buf[i]); } - printk("\n"); + pr_debug("\n"); #else } #endif bytes_to_read = 5 + le32_to_cpu(*(__le32 *)&dev->r_buf[1]); - DEBUGP(6, dev, "BytesToRead=%lu\n", bytes_to_read); + DEBUGP(6, dev, "BytesToRead=%zu\n", bytes_to_read); min_bytes_to_read = min(count, bytes_to_read + 5); min_bytes_to_read = min_t(size_t, min_bytes_to_read, READ_WRITE_BUFFER_SIZE); - DEBUGP(6, dev, "Min=%lu\n", min_bytes_to_read); + DEBUGP(6, dev, "Min=%zu\n", min_bytes_to_read); for (i = 0; i < (min_bytes_to_read-5); i++) { rc = wait_for_bulk_in_ready(dev); @@ -288,11 +281,10 @@ static ssize_t cm4040_read(struct file *filp, char __user *buf, return -EIO; } dev->r_buf[i+5] = xinb(iobase + REG_OFFSET_BULK_IN); -#ifdef PCMCIA_DEBUG - if (pc_debug >= 6) - printk(KERN_DEBUG "%lu:%2x ", i, dev->r_buf[i]); +#ifdef CM4040_DEBUG + pr_debug("%lu:%2x ", i, dev->r_buf[i]); } - printk("\n"); + pr_debug("\n"); #else } #endif @@ -547,7 +539,7 @@ static int cm4040_config_check(struct pcmcia_device *p_dev, p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK; rc = pcmcia_request_io(p_dev, &p_dev->io); - dev_printk(KERN_INFO, &handle_to_dev(p_dev), + dev_printk(KERN_INFO, &p_dev->dev, "pcmcia_request_io returned 0x%x\n", rc); return rc; } @@ -569,7 +561,7 @@ static int reader_config(struct pcmcia_device *link, int devno) fail_rc = pcmcia_request_configuration(link, &link->conf); if (fail_rc != 0) { - dev_printk(KERN_INFO, &handle_to_dev(link), + dev_printk(KERN_INFO, &link->dev, "pcmcia_request_configuration failed 0x%x\n", fail_rc); goto cs_release; diff --git a/drivers/char/pcmcia/ipwireless/hardware.c b/drivers/char/pcmcia/ipwireless/hardware.c index 4c1820cad71..99cffdab105 100644 --- a/drivers/char/pcmcia/ipwireless/hardware.c +++ b/drivers/char/pcmcia/ipwireless/hardware.c @@ -1213,12 +1213,12 @@ static irqreturn_t ipwireless_handle_v2_v3_interrupt(int irq, irqreturn_t ipwireless_interrupt(int irq, void *dev_id) { - struct ipw_hardware *hw = dev_id; + struct ipw_dev *ipw = dev_id; - if (hw->hw_version == HW_VERSION_1) - return ipwireless_handle_v1_interrupt(irq, hw); + if (ipw->hardware->hw_version == HW_VERSION_1) + return ipwireless_handle_v1_interrupt(irq, ipw->hardware); else - return ipwireless_handle_v2_v3_interrupt(irq, hw); + return ipwireless_handle_v2_v3_interrupt(irq, ipw->hardware); } static void flush_packets_to_hw(struct ipw_hardware *hw) diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 5216fce0c62..dff24dae148 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -65,10 +65,7 @@ static void signalled_reboot_work(struct work_struct *work_reboot) struct ipw_dev *ipw = container_of(work_reboot, struct ipw_dev, work_reboot); struct pcmcia_device *link = ipw->link; - int ret = pcmcia_reset_card(link->socket); - - if (ret != 0) - cs_error(link, ResetCard, ret); + pcmcia_reset_card(link->socket); } static void signalled_reboot_callback(void *callback_data) @@ -79,208 +76,127 @@ static void signalled_reboot_callback(void *callback_data) schedule_work(&ipw->work_reboot); } -static int config_ipwireless(struct ipw_dev *ipw) +static int ipwireless_probe(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) { - struct pcmcia_device *link = ipw->link; - int ret; - tuple_t tuple; - unsigned short buf[64]; - cisparse_t parse; - unsigned short cor_value; + struct ipw_dev *ipw = priv_data; + struct resource *io_resource; memreq_t memreq_attr_memory; memreq_t memreq_common_memory; + int ret; - ipw->is_v2_card = 0; - - tuple.Attributes = 0; - tuple.TupleData = (cisdata_t *) buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - tuple.DesiredTuple = RETURN_FIRST_TUPLE; - - ret = pcmcia_get_first_tuple(link, &tuple); - - while (ret == 0) { - ret = pcmcia_get_tuple_data(link, &tuple); - - if (ret != 0) { - cs_error(link, GetTupleData, ret); - goto exit0; - } - ret = pcmcia_get_next_tuple(link, &tuple); - } - - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - - ret = pcmcia_get_first_tuple(link, &tuple); - - if (ret != 0) { - cs_error(link, GetFirstTuple, ret); - goto exit0; - } - - ret = pcmcia_get_tuple_data(link, &tuple); - - if (ret != 0) { - cs_error(link, GetTupleData, ret); - goto exit0; - } - - ret = pcmcia_parse_tuple(&tuple, &parse); - - if (ret != 0) { - cs_error(link, ParseTuple, ret); - goto exit0; - } - - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - link->io.BasePort1 = parse.cftable_entry.io.win[0].base; - link->io.NumPorts1 = parse.cftable_entry.io.win[0].len; - link->io.IOAddrLines = 16; - - link->irq.IRQInfo1 = parse.cftable_entry.irq.IRQInfo1; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + p_dev->io.BasePort1 = cfg->io.win[0].base; + p_dev->io.NumPorts1 = cfg->io.win[0].len; + p_dev->io.IOAddrLines = 16; /* 0x40 causes it to generate level mode interrupts. */ /* 0x04 enables IREQ pin. */ - cor_value = parse.cftable_entry.index | 0x44; - link->conf.ConfigIndex = cor_value; + p_dev->conf.ConfigIndex = cfg->index | 0x44; + ret = pcmcia_request_io(p_dev, &p_dev->io); + if (ret) + return ret; - /* IRQ and I/O settings */ - tuple.DesiredTuple = CISTPL_CONFIG; + io_resource = request_region(p_dev->io.BasePort1, p_dev->io.NumPorts1, + IPWIRELESS_PCCARD_NAME); - ret = pcmcia_get_first_tuple(link, &tuple); + if (cfg->mem.nwin == 0) + return 0; - if (ret != 0) { - cs_error(link, GetFirstTuple, ret); - goto exit0; - } + ipw->request_common_memory.Attributes = + WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE; + ipw->request_common_memory.Base = cfg->mem.win[0].host_addr; + ipw->request_common_memory.Size = cfg->mem.win[0].len; + if (ipw->request_common_memory.Size < 0x1000) + ipw->request_common_memory.Size = 0x1000; + ipw->request_common_memory.AccessSpeed = 0; - ret = pcmcia_get_tuple_data(link, &tuple); - - if (ret != 0) { - cs_error(link, GetTupleData, ret); - goto exit0; - } + ret = pcmcia_request_window(p_dev, &ipw->request_common_memory, + &ipw->handle_common_memory); - ret = pcmcia_parse_tuple(&tuple, &parse); + if (ret != 0) + goto exit1; - if (ret != 0) { - cs_error(link, GetTupleData, ret); - goto exit0; - } - link->conf.Attributes = CONF_ENABLE_IRQ; - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; - link->conf.IntType = INT_MEMORY_AND_IO; + memreq_common_memory.CardOffset = cfg->mem.win[0].card_addr; + memreq_common_memory.Page = 0; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.Handler = ipwireless_interrupt; - link->irq.Instance = ipw->hardware; + ret = pcmcia_map_mem_page(p_dev, ipw->handle_common_memory, + &memreq_common_memory); - ret = pcmcia_request_io(link, &link->io); + if (ret != 0) + goto exit2; - if (ret != 0) { - cs_error(link, RequestIO, ret); - goto exit0; - } + ipw->is_v2_card = cfg->mem.win[0].len == 0x100; - request_region(link->io.BasePort1, link->io.NumPorts1, + ipw->common_memory = ioremap(ipw->request_common_memory.Base, + ipw->request_common_memory.Size); + request_mem_region(ipw->request_common_memory.Base, + ipw->request_common_memory.Size, IPWIRELESS_PCCARD_NAME); - /* memory settings */ - - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - - ret = pcmcia_get_first_tuple(link, &tuple); - - if (ret != 0) { - cs_error(link, GetFirstTuple, ret); - goto exit1; - } - - ret = pcmcia_get_tuple_data(link, &tuple); + ipw->request_attr_memory.Attributes = + WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM | WIN_ENABLE; + ipw->request_attr_memory.Base = 0; + ipw->request_attr_memory.Size = 0; /* this used to be 0x1000 */ + ipw->request_attr_memory.AccessSpeed = 0; - if (ret != 0) { - cs_error(link, GetTupleData, ret); - goto exit1; - } - - ret = pcmcia_parse_tuple(&tuple, &parse); - - if (ret != 0) { - cs_error(link, ParseTuple, ret); - goto exit1; - } + ret = pcmcia_request_window(p_dev, &ipw->request_attr_memory, + &ipw->handle_attr_memory); - if (parse.cftable_entry.mem.nwin > 0) { - ipw->request_common_memory.Attributes = - WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE; - ipw->request_common_memory.Base = - parse.cftable_entry.mem.win[0].host_addr; - ipw->request_common_memory.Size = parse.cftable_entry.mem.win[0].len; - if (ipw->request_common_memory.Size < 0x1000) - ipw->request_common_memory.Size = 0x1000; - ipw->request_common_memory.AccessSpeed = 0; - - ret = pcmcia_request_window(&link, &ipw->request_common_memory, - &ipw->handle_common_memory); + if (ret != 0) + goto exit2; - if (ret != 0) { - cs_error(link, RequestWindow, ret); - goto exit1; - } + memreq_attr_memory.CardOffset = 0; + memreq_attr_memory.Page = 0; - memreq_common_memory.CardOffset = - parse.cftable_entry.mem.win[0].card_addr; - memreq_common_memory.Page = 0; + ret = pcmcia_map_mem_page(p_dev, ipw->handle_attr_memory, + &memreq_attr_memory); - ret = pcmcia_map_mem_page(ipw->handle_common_memory, - &memreq_common_memory); + if (ret != 0) + goto exit3; - if (ret != 0) { - cs_error(link, MapMemPage, ret); - goto exit1; - } + ipw->attr_memory = ioremap(ipw->request_attr_memory.Base, + ipw->request_attr_memory.Size); + request_mem_region(ipw->request_attr_memory.Base, + ipw->request_attr_memory.Size, IPWIRELESS_PCCARD_NAME); - ipw->is_v2_card = - parse.cftable_entry.mem.win[0].len == 0x100; + return 0; - ipw->common_memory = ioremap(ipw->request_common_memory.Base, +exit3: + pcmcia_release_window(p_dev, ipw->handle_attr_memory); +exit2: + if (ipw->common_memory) { + release_mem_region(ipw->request_common_memory.Base, ipw->request_common_memory.Size); - request_mem_region(ipw->request_common_memory.Base, - ipw->request_common_memory.Size, IPWIRELESS_PCCARD_NAME); - - ipw->request_attr_memory.Attributes = - WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM | WIN_ENABLE; - ipw->request_attr_memory.Base = 0; - ipw->request_attr_memory.Size = 0; /* this used to be 0x1000 */ - ipw->request_attr_memory.AccessSpeed = 0; - - ret = pcmcia_request_window(&link, &ipw->request_attr_memory, - &ipw->handle_attr_memory); + iounmap(ipw->common_memory); + pcmcia_release_window(p_dev, ipw->handle_common_memory); + } else + pcmcia_release_window(p_dev, ipw->handle_common_memory); +exit1: + release_resource(io_resource); + pcmcia_disable_device(p_dev); + return -1; +} - if (ret != 0) { - cs_error(link, RequestWindow, ret); - goto exit2; - } +static int config_ipwireless(struct ipw_dev *ipw) +{ + struct pcmcia_device *link = ipw->link; + int ret = 0; - memreq_attr_memory.CardOffset = 0; - memreq_attr_memory.Page = 0; + ipw->is_v2_card = 0; - ret = pcmcia_map_mem_page(ipw->handle_attr_memory, - &memreq_attr_memory); + ret = pcmcia_loop_config(link, ipwireless_probe, ipw); + if (ret != 0) + return ret; - if (ret != 0) { - cs_error(link, MapMemPage, ret); - goto exit2; - } + link->conf.Attributes = CONF_ENABLE_IRQ; + link->conf.IntType = INT_MEMORY_AND_IO; - ipw->attr_memory = ioremap(ipw->request_attr_memory.Base, - ipw->request_attr_memory.Size); - request_mem_region(ipw->request_attr_memory.Base, ipw->request_attr_memory.Size, - IPWIRELESS_PCCARD_NAME); - } + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; + link->irq.Handler = ipwireless_interrupt; INIT_WORK(&ipw->work_reboot, signalled_reboot_work); @@ -291,10 +207,8 @@ static int config_ipwireless(struct ipw_dev *ipw) ret = pcmcia_request_irq(link, &link->irq); - if (ret != 0) { - cs_error(link, RequestIRQ, ret); - goto exit3; - } + if (ret != 0) + goto exit; printk(KERN_INFO IPWIRELESS_PCCARD_NAME ": Card type %s\n", ipw->is_v2_card ? "V2/V3" : "V1"); @@ -316,12 +230,12 @@ static int config_ipwireless(struct ipw_dev *ipw) ipw->network = ipwireless_network_create(ipw->hardware); if (!ipw->network) - goto exit3; + goto exit; ipw->tty = ipwireless_tty_create(ipw->hardware, ipw->network, ipw->nodes); if (!ipw->tty) - goto exit3; + goto exit; ipwireless_init_hardware_v2_v3(ipw->hardware); @@ -331,35 +245,27 @@ static int config_ipwireless(struct ipw_dev *ipw) */ ret = pcmcia_request_configuration(link, &link->conf); - if (ret != 0) { - cs_error(link, RequestConfiguration, ret); - goto exit4; - } + if (ret != 0) + goto exit; link->dev_node = &ipw->nodes[0]; return 0; -exit4: - pcmcia_disable_device(link); -exit3: +exit: if (ipw->attr_memory) { release_mem_region(ipw->request_attr_memory.Base, ipw->request_attr_memory.Size); iounmap(ipw->attr_memory); - pcmcia_release_window(ipw->handle_attr_memory); - pcmcia_disable_device(link); + pcmcia_release_window(link, ipw->handle_attr_memory); } -exit2: if (ipw->common_memory) { release_mem_region(ipw->request_common_memory.Base, ipw->request_common_memory.Size); iounmap(ipw->common_memory); - pcmcia_release_window(ipw->handle_common_memory); + pcmcia_release_window(link, ipw->handle_common_memory); } -exit1: pcmcia_disable_device(link); -exit0: return -1; } @@ -378,9 +284,9 @@ static void release_ipwireless(struct ipw_dev *ipw) iounmap(ipw->attr_memory); } if (ipw->common_memory) - pcmcia_release_window(ipw->handle_common_memory); + pcmcia_release_window(ipw->link, ipw->handle_common_memory); if (ipw->attr_memory) - pcmcia_release_window(ipw->handle_attr_memory); + pcmcia_release_window(ipw->link, ipw->handle_attr_memory); /* Break the link with Card Services */ pcmcia_disable_device(ipw->link); @@ -406,7 +312,6 @@ static int ipwireless_attach(struct pcmcia_device *link) ipw->link = link; link->priv = ipw; - link->irq.Instance = ipw; /* Link this device into our device list. */ link->dev_node = &ipw->nodes[0]; @@ -421,7 +326,6 @@ static int ipwireless_attach(struct pcmcia_device *link) ret = config_ipwireless(ipw); if (ret != 0) { - cs_error(link, RegisterClient, ret); ipwireless_detach(link); return ret; } diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index caf6e4d1946..c31a0d913d3 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -554,7 +554,6 @@ static int mgslpc_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; link->conf.Attributes = 0; @@ -572,69 +571,51 @@ static int mgslpc_probe(struct pcmcia_device *link) /* Card has been inserted. */ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) +static int mgslpc_ioprobe(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + if (cfg->io.nwin > 0) { + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(cfg->io.flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(cfg->io.flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = cfg->io.win[0].base; + p_dev->io.NumPorts1 = cfg->io.win[0].len; + return pcmcia_request_io(p_dev, &p_dev->io); + } + return -ENODEV; +} static int mgslpc_config(struct pcmcia_device *link) { MGSLPC_INFO *info = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_fn, last_ret; - u_char buf[64]; - cistpl_cftable_entry_t dflt = { 0 }; - cistpl_cftable_entry_t *cfg; + int ret; if (debug_level >= DEBUG_LEVEL_INFO) printk("mgslpc_config(0x%p)\n", link); - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - /* get CIS configuration entry */ - - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple)); - - cfg = &(parse.cftable_entry); - CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple)); - CS_CHECK(ParseTuple, pcmcia_parse_tuple(&tuple, &parse)); - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) dflt = *cfg; - if (cfg->index == 0) - goto cs_failed; - - link->conf.ConfigIndex = cfg->index; - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io)); - } + ret = pcmcia_loop_config(link, mgslpc_ioprobe, NULL); + if (ret != 0) + goto failed; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 8; link->conf.Present = PRESENT_OPTION; - link->irq.Attributes |= IRQ_HANDLE_PRESENT; link->irq.Handler = mgslpc_isr; - link->irq.Instance = info; - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; info->io_base = link->io.BasePort1; info->irq_level = link->irq.AssignedIRQ; @@ -654,8 +635,7 @@ static int mgslpc_config(struct pcmcia_device *link) printk("\n"); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); +failed: mgslpc_release((u_long)link); return -ENODEV; } diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 47c2d276345..f06bb37defb 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -31,7 +31,7 @@ enum tpm_const { TPM_MINOR = 224, /* officially assigned */ - TPM_BUFSIZE = 2048, + TPM_BUFSIZE = 4096, TPM_NUM_DEVICES = 256, }; diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 0b73e4ec1ad..2405f17b29d 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -257,6 +257,10 @@ out: return size; } +static int itpm; +module_param(itpm, bool, 0444); +MODULE_PARM_DESC(itpm, "Force iTPM workarounds (found on some Lenovo laptops)"); + /* * If interrupts are used (signaled by an irq set in the vendor structure) * tpm.c can skip polling for the data to be available as the interrupt is @@ -293,7 +297,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, &chip->vendor.int_queue); status = tpm_tis_status(chip); - if ((status & TPM_STS_DATA_EXPECT) == 0) { + if (!itpm && (status & TPM_STS_DATA_EXPECT) == 0) { rc = -EIO; goto out_err; } @@ -467,6 +471,10 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, "1.2 TPM (device-id 0x%X, rev-id %d)\n", vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0))); + if (itpm) + dev_info(dev, "Intel iTPM workaround enabled\n"); + + /* Figure out the capabilities */ intfcaps = ioread32(chip->vendor.iobase + @@ -629,6 +637,7 @@ static struct pnp_device_id tpm_pnp_tbl[] __devinitdata = { {"", 0}, /* User Specified */ {"", 0} /* Terminator */ }; +MODULE_DEVICE_TABLE(pnp, tpm_pnp_tbl); static __devexit void tpm_tis_pnp_remove(struct pnp_dev *dev) { diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c index 2e8552dc5ed..c63f3d33914 100644 --- a/drivers/char/tty_port.c +++ b/drivers/char/tty_port.c @@ -219,8 +219,11 @@ int tty_port_block_til_ready(struct tty_port *port, /* if non-blocking mode is set we can pass directly to open unless the port has just hung up or is in another error state */ - if ((filp->f_flags & O_NONBLOCK) || - (tty->flags & (1 << TTY_IO_ERROR))) { + if (tty->flags & (1 << TTY_IO_ERROR)) { + port->flags |= ASYNC_NORMAL_ACTIVE; + return 0; + } + if (filp->f_flags & O_NONBLOCK) { /* Indicate we are open */ if (tty->termios->c_cflag & CBAUD) tty_port_raise_dtr_rts(port); diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 713ed7d3724..689cc6a6214 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c @@ -3,7 +3,6 @@ static bool report_gart_errors; static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); -static void (*orig_mce_callback)(struct mce *m); void amd_report_gart_errors(bool v) { @@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec) pr_warning("Huh? Unknown MCE error 0x%x\n", ec); } -static void amd_decode_mce(struct mce *m) +static int amd_decode_mce(struct notifier_block *nb, unsigned long val, + void *data) { + struct mce *m = (struct mce *)data; struct err_regs regs; int node, ecc; @@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m) } amd_decode_err_code(m->status & 0xffff); + + return NOTIFY_STOP; } +static struct notifier_block amd_mce_dec_nb = { + .notifier_call = amd_decode_mce, +}; + static int __init mce_amd_init(void) { /* * We can decode MCEs for Opteron and later CPUs: */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && - (boot_cpu_data.x86 >= 0xf)) { - /* safe the default decode mce callback */ - orig_mce_callback = x86_mce_decode_callback; - - x86_mce_decode_callback = amd_decode_mce; - } + (boot_cpu_data.x86 >= 0xf)) + atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); return 0; } @@ -442,7 +445,7 @@ early_initcall(mce_amd_init); #ifdef MODULE static void __exit mce_amd_exit(void) { - x86_mce_decode_callback = orig_mce_callback; + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); } MODULE_DESCRIPTION("AMD MCE decoder"); diff --git a/drivers/gpio/langwell_gpio.c b/drivers/gpio/langwell_gpio.c index 5711ce5353c..4baf3d7d0f8 100644 --- a/drivers/gpio/langwell_gpio.c +++ b/drivers/gpio/langwell_gpio.c @@ -144,13 +144,6 @@ static int lnw_irq_type(unsigned irq, unsigned type) static void lnw_irq_unmask(unsigned irq) { - struct lnw_gpio *lnw = get_irq_chip_data(irq); - u32 gpio = irq - lnw->irq_base; - u8 reg = gpio / 32; - void __iomem *gedr; - - gedr = (void __iomem *)(&lnw->reg_base->GEDR[reg]); - writel(BIT(gpio % 32), gedr); }; static void lnw_irq_mask(unsigned irq) @@ -183,13 +176,11 @@ static void lnw_irq_handler(unsigned irq, struct irq_desc *desc) gedr_v = readl(gedr); if (!gedr_v) continue; - for (gpio = reg*32; gpio < reg*32+32; gpio++) { - gedr_v = readl(gedr); + for (gpio = reg*32; gpio < reg*32+32; gpio++) if (gedr_v & BIT(gpio % 32)) { pr_debug("pin %d triggered\n", gpio); generic_handle_irq(lnw->irq_base + gpio); } - } /* clear the edge detect status bit */ writel(gedr_v, gedr); } diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c index 063b933d864..dd6396384c2 100644 --- a/drivers/ide/ide-cs.c +++ b/drivers/ide/ide-cs.c @@ -60,15 +60,6 @@ MODULE_AUTHOR("David Hinds <dahinds@users.sourceforge.net>"); MODULE_DESCRIPTION("PCMCIA ATA/IDE card driver"); MODULE_LICENSE("Dual MPL/GPL"); -#define INT_MODULE_PARM(n, v) static int n = v; module_param(n, int, 0) - -#ifdef CONFIG_PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -#else -#define DEBUG(n, args...) -#endif - /*====================================================================*/ typedef struct ide_info_t { @@ -98,7 +89,7 @@ static int ide_probe(struct pcmcia_device *link) { ide_info_t *info; - DEBUG(0, "ide_attach()\n"); + dev_dbg(&link->dev, "ide_attach()\n"); /* Create new ide device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -112,7 +103,6 @@ static int ide_probe(struct pcmcia_device *link) link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->io.IOAddrLines = 3; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -134,7 +124,7 @@ static void ide_detach(struct pcmcia_device *link) ide_hwif_t *hwif = info->host->ports[0]; unsigned long data_addr, ctl_addr; - DEBUG(0, "ide_detach(0x%p)\n", link); + dev_dbg(&link->dev, "ide_detach(0x%p)\n", link); data_addr = hwif->io_ports.data_addr; ctl_addr = hwif->io_ports.ctl_addr; @@ -217,9 +207,6 @@ out_release: ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - struct pcmcia_config_check { unsigned long ctl_base; int skip_vcc; @@ -282,11 +269,11 @@ static int ide_config(struct pcmcia_device *link) { ide_info_t *info = link->priv; struct pcmcia_config_check *stk = NULL; - int last_ret = 0, last_fn = 0, is_kme = 0; + int ret = 0, is_kme = 0; unsigned long io_base, ctl_base; struct ide_host *host; - DEBUG(0, "ide_config(0x%p)\n", link); + dev_dbg(&link->dev, "ide_config(0x%p)\n", link); is_kme = ((link->manf_id == MANFID_KME) && ((link->card_id == PRODID_KME_KXLC005_A) || @@ -306,8 +293,12 @@ static int ide_config(struct pcmcia_device *link) io_base = link->io.BasePort1; ctl_base = stk->ctl_base; - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* disable drive interrupts during IDE probe */ outb(0x02, ctl_base); @@ -342,8 +333,6 @@ err_mem: printk(KERN_NOTICE "ide-cs: ide_config failed memory allocation\n"); goto failed; -cs_failed: - cs_error(link, last_fn, last_ret); failed: kfree(stk); ide_release(link); @@ -363,7 +352,7 @@ static void ide_release(struct pcmcia_device *link) ide_info_t *info = link->priv; struct ide_host *host = info->host; - DEBUG(0, "ide_release(0x%p)\n", link); + dev_dbg(&link->dev, "ide_release(0x%p)\n", link); if (info->ndev) /* FIXME: if this fails we need to queue the cleanup somehow diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index a537925f765..2bcf1ace27c 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -447,6 +447,27 @@ static struct dmi_system_id __initdata i8042_dmi_reset_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "N10"), }, }, + { + .ident = "Dell Vostro 1320", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1320"), + }, + }, + { + .ident = "Dell Vostro 1520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1520"), + }, + }, + { + .ident = "Dell Vostro 1720", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1720"), + }, + }, { } }; diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c index c72565520e4..5a6ae646a63 100644 --- a/drivers/isdn/hardware/avm/avm_cs.c +++ b/drivers/isdn/hardware/avm/avm_cs.c @@ -111,8 +111,6 @@ static int avmcs_probe(struct pcmcia_device *p_dev) p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE; p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; - /* General socket configuration */ p_dev->conf.Attributes = CONF_ENABLE_IRQ; p_dev->conf.IntType = INT_MEMORY_AND_IO; @@ -198,7 +196,6 @@ static int avmcs_config(struct pcmcia_device *link) */ i = pcmcia_request_irq(link, &link->irq); if (i != 0) { - cs_error(link, RequestIRQ, i); /* undo */ pcmcia_disable_device(link); break; @@ -209,7 +206,6 @@ static int avmcs_config(struct pcmcia_device *link) */ i = pcmcia_request_configuration(link, &link->conf); if (i != 0) { - cs_error(link, RequestConfiguration, i); pcmcia_disable_device(link); break; } diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c index 23560c897ec..f9bdff39cf4 100644 --- a/drivers/isdn/hisax/avma1_cs.c +++ b/drivers/isdn/hisax/avma1_cs.c @@ -30,22 +30,6 @@ MODULE_DESCRIPTION("ISDN4Linux: PCMCIA client driver for AVM A1/Fritz!PCMCIA car MODULE_AUTHOR("Carsten Paeth"); MODULE_LICENSE("GPL"); -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); -static char *version = -"avma1_cs.c 1.00 1998/01/23 10:00:00 (Carsten Paeth)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -119,7 +103,7 @@ static int avma1cs_probe(struct pcmcia_device *p_dev) { local_info_t *local; - DEBUG(0, "avma1cs_attach()\n"); + dev_dbg(&p_dev->dev, "avma1cs_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(local_info_t), GFP_KERNEL); @@ -139,8 +123,6 @@ static int avma1cs_probe(struct pcmcia_device *p_dev) p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE; p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; - /* General socket configuration */ p_dev->conf.Attributes = CONF_ENABLE_IRQ; p_dev->conf.IntType = INT_MEMORY_AND_IO; @@ -161,7 +143,7 @@ static int avma1cs_probe(struct pcmcia_device *p_dev) static void avma1cs_detach(struct pcmcia_device *link) { - DEBUG(0, "avma1cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "avma1cs_detach(0x%p)\n", link); avma1cs_release(link); kfree(link->priv); } /* avma1cs_detach */ @@ -203,7 +185,7 @@ static int avma1cs_config(struct pcmcia_device *link) dev = link->priv; - DEBUG(0, "avma1cs_config(0x%p)\n", link); + dev_dbg(&link->dev, "avma1cs_config(0x%p)\n", link); devname[0] = 0; if (link->prod_id[1]) @@ -218,7 +200,6 @@ static int avma1cs_config(struct pcmcia_device *link) */ i = pcmcia_request_irq(link, &link->irq); if (i != 0) { - cs_error(link, RequestIRQ, i); /* undo */ pcmcia_disable_device(link); break; @@ -229,7 +210,6 @@ static int avma1cs_config(struct pcmcia_device *link) */ i = pcmcia_request_configuration(link, &link->conf); if (i != 0) { - cs_error(link, RequestConfiguration, i); pcmcia_disable_device(link); break; } @@ -281,7 +261,7 @@ static void avma1cs_release(struct pcmcia_device *link) { local_info_t *local = link->priv; - DEBUG(0, "avma1cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "avma1cs_release(0x%p)\n", link); /* now unregister function with hisax */ HiSax_closecard(local->node.minor); diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c index f4d0fe29bcf..a2f709f5397 100644 --- a/drivers/isdn/hisax/elsa_cs.c +++ b/drivers/isdn/hisax/elsa_cs.c @@ -57,23 +57,6 @@ MODULE_DESCRIPTION("ISDN4Linux: PCMCIA client driver for Elsa PCM cards"); MODULE_AUTHOR("Klaus Lichtenwalder"); MODULE_LICENSE("Dual MPL/GPL"); -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); -static char *version = -"elsa_cs.c $Revision: 1.2.2.4 $ $Date: 2004/01/25 15:07:06 $ (K.Lichtenwalder)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -142,7 +125,7 @@ static int elsa_cs_probe(struct pcmcia_device *link) { local_info_t *local; - DEBUG(0, "elsa_cs_attach()\n"); + dev_dbg(&link->dev, "elsa_cs_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(local_info_t), GFP_KERNEL); @@ -155,7 +138,6 @@ static int elsa_cs_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = IRQ_LEVEL_ID|IRQ_SHARE_ID; link->irq.Handler = NULL; /* @@ -188,7 +170,7 @@ static void elsa_cs_detach(struct pcmcia_device *link) { local_info_t *info = link->priv; - DEBUG(0, "elsa_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "elsa_cs_detach(0x%p)\n", link); info->busy = 1; elsa_cs_release(link); @@ -231,30 +213,25 @@ static int elsa_cs_configcheck(struct pcmcia_device *p_dev, static int elsa_cs_config(struct pcmcia_device *link) { local_info_t *dev; - int i, last_fn; + int i; IsdnCard_t icard; - DEBUG(0, "elsa_config(0x%p)\n", link); + dev_dbg(&link->dev, "elsa_config(0x%p)\n", link); dev = link->priv; i = pcmcia_loop_config(link, elsa_cs_configcheck, NULL); - if (i != 0) { - last_fn = RequestIO; - goto cs_failed; - } + if (i != 0) + goto failed; i = pcmcia_request_irq(link, &link->irq); if (i != 0) { link->irq.AssignedIRQ = 0; - last_fn = RequestIRQ; - goto cs_failed; + goto failed; } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - last_fn = RequestConfiguration; - goto cs_failed; - } + if (i != 0) + goto failed; /* At this point, the dev_node_t structure(s) should be initialized and arranged in a linked list at link->dev. *//* */ @@ -290,8 +267,7 @@ static int elsa_cs_config(struct pcmcia_device *link) ((local_info_t*)link->priv)->cardnr = i; return 0; -cs_failed: - cs_error(link, last_fn, i); +failed: elsa_cs_release(link); return -ENODEV; } /* elsa_cs_config */ @@ -308,7 +284,7 @@ static void elsa_cs_release(struct pcmcia_device *link) { local_info_t *local = link->priv; - DEBUG(0, "elsa_cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "elsa_cs_release(0x%p)\n", link); if (local) { if (local->cardnr >= 0) { diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c index 9a3c9f5e4fe..af5d393cc2d 100644 --- a/drivers/isdn/hisax/sedlbauer_cs.c +++ b/drivers/isdn/hisax/sedlbauer_cs.c @@ -57,24 +57,6 @@ MODULE_DESCRIPTION("ISDN4Linux: PCMCIA client driver for Sedlbauer cards"); MODULE_AUTHOR("Marcus Niemann"); MODULE_LICENSE("Dual MPL/GPL"); -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); -static char *version = -"sedlbauer_cs.c 1.1a 2001/01/28 15:04:04 (M.Niemann)"; -#else -#define DEBUG(n, args...) -#endif - /*====================================================================*/ @@ -151,7 +133,7 @@ static int sedlbauer_probe(struct pcmcia_device *link) { local_info_t *local; - DEBUG(0, "sedlbauer_attach()\n"); + dev_dbg(&link->dev, "sedlbauer_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(local_info_t), GFP_KERNEL); @@ -163,7 +145,6 @@ static int sedlbauer_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* @@ -198,7 +179,7 @@ static int sedlbauer_probe(struct pcmcia_device *link) static void sedlbauer_detach(struct pcmcia_device *link) { - DEBUG(0, "sedlbauer_detach(0x%p)\n", link); + dev_dbg(&link->dev, "sedlbauer_detach(0x%p)\n", link); ((local_info_t *)link->priv)->stop = 1; sedlbauer_release(link); @@ -214,9 +195,6 @@ static void sedlbauer_detach(struct pcmcia_device *link) device available to the system. ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int sedlbauer_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -293,11 +271,11 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev, req->Base = mem->win[0].host_addr; req->Size = mem->win[0].len; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win) != 0) + if (pcmcia_request_window(p_dev, req, &p_dev->win) != 0) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map) != 0) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) return -ENODEV; } return 0; @@ -309,10 +287,10 @@ static int sedlbauer_config(struct pcmcia_device *link) { local_info_t *dev = link->priv; win_req_t *req; - int last_fn, last_ret; + int ret; IsdnCard_t icard; - DEBUG(0, "sedlbauer_config(0x%p)\n", link); + dev_dbg(&link->dev, "sedlbauer_config(0x%p)\n", link); req = kzalloc(sizeof(win_req_t), GFP_KERNEL); if (!req) @@ -330,8 +308,8 @@ static int sedlbauer_config(struct pcmcia_device *link) these things without consulting the CIS, and most client drivers will only use the CIS to fill in implementation-defined details. */ - last_ret = pcmcia_loop_config(link, sedlbauer_config_check, req); - if (last_ret) + ret = pcmcia_loop_config(link, sedlbauer_config_check, req); + if (ret) goto failed; /* @@ -339,15 +317,20 @@ static int sedlbauer_config(struct pcmcia_device *link) handler to the interrupt, unless the 'Handler' member of the irq structure is initialized. */ - if (link->conf.Attributes & CONF_ENABLE_IRQ) - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + if (link->conf.Attributes & CONF_ENABLE_IRQ) { + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + } /* This actually configures the PCMCIA socket -- setting up the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -380,19 +363,18 @@ static int sedlbauer_config(struct pcmcia_device *link) icard.protocol = protocol; icard.typ = ISDN_CTYPE_SEDLBAUER_PCMCIA; - last_ret = hisax_init_pcmcia(link, &(((local_info_t*)link->priv)->stop), &icard); - if (last_ret < 0) { - printk(KERN_ERR "sedlbauer_cs: failed to initialize SEDLBAUER PCMCIA %d at i/o %#x\n", - last_ret, link->io.BasePort1); + ret = hisax_init_pcmcia(link, + &(((local_info_t *)link->priv)->stop), &icard); + if (ret < 0) { + printk(KERN_ERR "sedlbauer_cs: failed to initialize SEDLBAUER PCMCIA %d at i/o %#x\n", + ret, link->io.BasePort1); sedlbauer_release(link); return -ENODEV; } else - ((local_info_t*)link->priv)->cardnr = last_ret; + ((local_info_t *)link->priv)->cardnr = ret; return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: sedlbauer_release(link); return -ENODEV; @@ -410,7 +392,7 @@ failed: static void sedlbauer_release(struct pcmcia_device *link) { local_info_t *local = link->priv; - DEBUG(0, "sedlbauer_release(0x%p)\n", link); + dev_dbg(&link->dev, "sedlbauer_release(0x%p)\n", link); if (local) { if (local->cardnr >= 0) { diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c index 623d111544d..ea705394ce2 100644 --- a/drivers/isdn/hisax/teles_cs.c +++ b/drivers/isdn/hisax/teles_cs.c @@ -38,23 +38,6 @@ MODULE_DESCRIPTION("ISDN4Linux: PCMCIA client driver for Teles PCMCIA cards"); MODULE_AUTHOR("Christof Petig, christof.petig@wtal.de, Karsten Keil, kkeil@suse.de"); MODULE_LICENSE("GPL"); -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); -static char *version = -"teles_cs.c 2.10 2002/07/30 22:23:34 kkeil"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -133,7 +116,7 @@ static int teles_probe(struct pcmcia_device *link) { local_info_t *local; - DEBUG(0, "teles_attach()\n"); + dev_dbg(&link->dev, "teles_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(local_info_t), GFP_KERNEL); @@ -145,7 +128,6 @@ static int teles_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = IRQ_LEVEL_ID|IRQ_SHARE_ID; link->irq.Handler = NULL; /* @@ -178,7 +160,7 @@ static void teles_detach(struct pcmcia_device *link) { local_info_t *info = link->priv; - DEBUG(0, "teles_detach(0x%p)\n", link); + dev_dbg(&link->dev, "teles_detach(0x%p)\n", link); info->busy = 1; teles_cs_release(link); @@ -221,30 +203,25 @@ static int teles_cs_configcheck(struct pcmcia_device *p_dev, static int teles_cs_config(struct pcmcia_device *link) { local_info_t *dev; - int i, last_fn; + int i; IsdnCard_t icard; - DEBUG(0, "teles_config(0x%p)\n", link); + dev_dbg(&link->dev, "teles_config(0x%p)\n", link); dev = link->priv; i = pcmcia_loop_config(link, teles_cs_configcheck, NULL); - if (i != 0) { - last_fn = RequestIO; + if (i != 0) goto cs_failed; - } i = pcmcia_request_irq(link, &link->irq); if (i != 0) { link->irq.AssignedIRQ = 0; - last_fn = RequestIRQ; goto cs_failed; } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - last_fn = RequestConfiguration; + if (i != 0) goto cs_failed; - } /* At this point, the dev_node_t structure(s) should be initialized and arranged in a linked list at link->dev. *//* */ @@ -283,7 +260,6 @@ static int teles_cs_config(struct pcmcia_device *link) return 0; cs_failed: - cs_error(link, last_fn, i); teles_cs_release(link); return -ENODEV; } /* teles_cs_config */ @@ -300,7 +276,7 @@ static void teles_cs_release(struct pcmcia_device *link) { local_info_t *local = link->priv; - DEBUG(0, "teles_cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "teles_cs_release(0x%p)\n", link); if (local) { if (local->cardnr >= 0) { diff --git a/drivers/leds/leds-locomo.c b/drivers/leds/leds-locomo.c index 5d91362e306..1f7c10f6b7f 100644 --- a/drivers/leds/leds-locomo.c +++ b/drivers/leds/leds-locomo.c @@ -44,7 +44,7 @@ static void locomoled_brightness_set1(struct led_classdev *led_cdev, static struct led_classdev locomo_led0 = { .name = "locomo:amber:charge", - .default_trigger = "sharpsl-charge", + .default_trigger = "main-battery-charging", .brightness_set = locomoled_brightness_set0, }; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a053423785c..e07ce2e033a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1650,11 +1650,12 @@ static void raid1d(mddev_t *mddev) r1_bio->sector, r1_bio->sectors); unfreeze_array(conf); - } + } else + md_error(mddev, + conf->mirrors[r1_bio->read_disk].rdev); bio = r1_bio->bios[r1_bio->read_disk]; - if ((disk=read_balance(conf, r1_bio)) == -1 || - disk == r1_bio->read_disk) { + if ((disk=read_balance(conf, r1_bio)) == -1) { printk(KERN_ALERT "raid1: %s: unrecoverable I/O" " read error for block %llu\n", bdevname(bio->bi_bdev,b), diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c index ddf639ed2fd..98082416aa5 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -31,6 +31,7 @@ #include <linux/wait.h> #include <linux/slab.h> #include <linux/poll.h> +#include <linux/semaphore.h> #include <linux/module.h> #include <linux/list.h> #include <linux/freezer.h> diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c index 57271cb3b31..84815f9ef63 100644 --- a/drivers/mfd/mcp-core.c +++ b/drivers/mfd/mcp-core.c @@ -17,11 +17,11 @@ #include <linux/device.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/mfd/mcp.h> #include <mach/dma.h> #include <asm/system.h> -#include "mcp.h" #define to_mcp(d) container_of(d, struct mcp, attached_device) #define to_mcp_driver(d) container_of(d, struct mcp_driver, drv) diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c index 62b32dabf62..25842723272 100644 --- a/drivers/mfd/mcp-sa11x0.c +++ b/drivers/mfd/mcp-sa11x0.c @@ -19,6 +19,7 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/platform_device.h> +#include <linux/mfd/mcp.h> #include <mach/dma.h> #include <mach/hardware.h> @@ -28,7 +29,6 @@ #include <mach/assabet.h> -#include "mcp.h" struct mcp_sa11x0 { u32 mccr0; @@ -163,6 +163,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev) mcp->dma_audio_wr = DMA_Ser4MCP0Wr; mcp->dma_telco_rd = DMA_Ser4MCP1Rd; mcp->dma_telco_wr = DMA_Ser4MCP1Wr; + mcp->gpio_base = data->gpio_base; platform_set_drvdata(pdev, mcp); diff --git a/drivers/mfd/ucb1x00-assabet.c b/drivers/mfd/ucb1x00-assabet.c index 86fed4870f9..cea9da60850 100644 --- a/drivers/mfd/ucb1x00-assabet.c +++ b/drivers/mfd/ucb1x00-assabet.c @@ -14,10 +14,10 @@ #include <linux/fs.h> #include <linux/proc_fs.h> #include <linux/device.h> +#include <linux/mfd/ucb1x00.h> #include <mach/dma.h> -#include "ucb1x00.h" #define UCB1X00_ATTR(name,input)\ static ssize_t name##_show(struct device *dev, struct device_attribute *attr, \ diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index 60c3988f3cf..252b74188ec 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -25,12 +25,12 @@ #include <linux/interrupt.h> #include <linux/device.h> #include <linux/mutex.h> +#include <linux/mfd/ucb1x00.h> +#include <linux/gpio.h> #include <mach/dma.h> #include <mach/hardware.h> -#include "ucb1x00.h" - static DEFINE_MUTEX(ucb1x00_mutex); static LIST_HEAD(ucb1x00_drivers); static LIST_HEAD(ucb1x00_devices); @@ -108,6 +108,60 @@ unsigned int ucb1x00_io_read(struct ucb1x00 *ucb) return ucb1x00_reg_read(ucb, UCB_IO_DATA); } +static void ucb1x00_gpio_set(struct gpio_chip *chip, unsigned offset, int value) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + unsigned long flags; + + spin_lock_irqsave(&ucb->io_lock, flags); + if (value) + ucb->io_out |= 1 << offset; + else + ucb->io_out &= ~(1 << offset); + + ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out); + spin_unlock_irqrestore(&ucb->io_lock, flags); +} + +static int ucb1x00_gpio_get(struct gpio_chip *chip, unsigned offset) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + return ucb1x00_reg_read(ucb, UCB_IO_DATA) & (1 << offset); +} + +static int ucb1x00_gpio_direction_input(struct gpio_chip *chip, unsigned offset) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + unsigned long flags; + + spin_lock_irqsave(&ucb->io_lock, flags); + ucb->io_dir &= ~(1 << offset); + ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir); + spin_unlock_irqrestore(&ucb->io_lock, flags); + + return 0; +} + +static int ucb1x00_gpio_direction_output(struct gpio_chip *chip, unsigned offset + , int value) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + unsigned long flags; + + spin_lock_irqsave(&ucb->io_lock, flags); + ucb->io_dir |= (1 << offset); + ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir); + + if (value) + ucb->io_out |= 1 << offset; + else + ucb->io_out &= ~(1 << offset); + ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out); + spin_unlock_irqrestore(&ucb->io_lock, flags); + + return 0; +} + /* * UCB1300 data sheet says we must: * 1. enable ADC => 5us (including reference startup time) @@ -476,6 +530,7 @@ static int ucb1x00_probe(struct mcp *mcp) struct ucb1x00_driver *drv; unsigned int id; int ret = -ENODEV; + int temp; mcp_enable(mcp); id = mcp_reg_read(mcp, UCB_ID); @@ -508,12 +563,27 @@ static int ucb1x00_probe(struct mcp *mcp) goto err_free; } + ucb->gpio.base = -1; + if (mcp->gpio_base != 0) { + ucb->gpio.label = dev_name(&ucb->dev); + ucb->gpio.base = mcp->gpio_base; + ucb->gpio.ngpio = 10; + ucb->gpio.set = ucb1x00_gpio_set; + ucb->gpio.get = ucb1x00_gpio_get; + ucb->gpio.direction_input = ucb1x00_gpio_direction_input; + ucb->gpio.direction_output = ucb1x00_gpio_direction_output; + ret = gpiochip_add(&ucb->gpio); + if (ret) + goto err_free; + } else + dev_info(&ucb->dev, "gpio_base not set so no gpiolib support"); + ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING, "UCB1x00", ucb); if (ret) { printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n", ucb->irq, ret); - goto err_free; + goto err_gpio; } mcp_set_drvdata(mcp, ucb); @@ -522,6 +592,7 @@ static int ucb1x00_probe(struct mcp *mcp) if (ret) goto err_irq; + INIT_LIST_HEAD(&ucb->devs); mutex_lock(&ucb1x00_mutex); list_add(&ucb->node, &ucb1x00_devices); @@ -529,10 +600,14 @@ static int ucb1x00_probe(struct mcp *mcp) ucb1x00_add_dev(ucb, drv); } mutex_unlock(&ucb1x00_mutex); + goto out; err_irq: free_irq(ucb->irq, ucb); + err_gpio: + if (ucb->gpio.base != -1) + temp = gpiochip_remove(&ucb->gpio); err_free: kfree(ucb); err_disable: @@ -545,6 +620,7 @@ static void ucb1x00_remove(struct mcp *mcp) { struct ucb1x00 *ucb = mcp_get_drvdata(mcp); struct list_head *l, *n; + int ret; mutex_lock(&ucb1x00_mutex); list_del(&ucb->node); @@ -554,6 +630,12 @@ static void ucb1x00_remove(struct mcp *mcp) } mutex_unlock(&ucb1x00_mutex); + if (ucb->gpio.base != -1) { + ret = gpiochip_remove(&ucb->gpio); + if (ret) + dev_err(&ucb->dev, "Can't remove gpio chip: %d\n", ret); + } + free_irq(ucb->irq, ucb); device_unregister(&ucb->dev); } @@ -604,6 +686,7 @@ static int ucb1x00_resume(struct mcp *mcp) struct ucb1x00 *ucb = mcp_get_drvdata(mcp); struct ucb1x00_dev *dev; + ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir); mutex_lock(&ucb1x00_mutex); list_for_each_entry(dev, &ucb->devs, dev_node) { if (dev->drv->resume) diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c index 61b7d3eb9a2..000cb414a78 100644 --- a/drivers/mfd/ucb1x00-ts.c +++ b/drivers/mfd/ucb1x00-ts.c @@ -30,12 +30,12 @@ #include <linux/freezer.h> #include <linux/slab.h> #include <linux/kthread.h> +#include <linux/mfd/ucb1x00.h> #include <mach/dma.h> #include <mach/collie.h> #include <asm/mach-types.h> -#include "ucb1x00.h" struct ucb1x00_ts { diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index 49b7885c270..7f27576ca04 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -29,7 +29,7 @@ /* Current settings - values are 2*2^(reg_val/4) microamps. These are * exported since they are used by multiple drivers. */ -int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL] = { +int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1] = { 2, 2, 3, diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index c76677afda1..b5bbe59f9c5 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -106,7 +106,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); #if defined CONFIG_X86_64 - mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset); + mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, + UV_AFFINITY_CPU); if (mq->irq < 0) { dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", -mq->irq); @@ -136,7 +137,7 @@ static void xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq) { #if defined CONFIG_X86_64 - uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset); + uv_teardown_irq(mq->irq); #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV int mmr_pnode; diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index c85f6166056..bb47ff465c0 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -762,6 +762,8 @@ static int pxamci_remove(struct platform_device *pdev) if (mmc) { struct pxamci_host *host = mmc_priv(mmc); + mmc_remove_host(mmc); + if (host->pdata) { gpio_cd = host->pdata->gpio_card_detect; gpio_ro = host->pdata->gpio_card_ro; @@ -781,8 +783,6 @@ static int pxamci_remove(struct platform_device *pdev) if (host->pdata && host->pdata->exit) host->pdata->exit(&pdev->dev, mmc); - mmc_remove_host(mmc); - pxamci_stop_clock(host); writel(TXFIFO_WR_REQ|RXFIFO_RD_REQ|CLK_IS_OFF|STOP_CMD| END_CMD_RES|PRG_DONE|DATA_TRAN_DONE, diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index d600c2deff7..689d6a79ffc 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -118,11 +118,9 @@ static caddr_t remap_window(struct map_info *map, unsigned long to) DEBUG(2, "Remapping window from 0x%8.8x to 0x%8.8x", dev->offset, mrq.CardOffset); mrq.Page = 0; - ret = pcmcia_map_mem_page(win, &mrq); - if (ret != 0) { - cs_error(dev->p_dev, MapMemPage, ret); + ret = pcmcia_map_mem_page(dev->p_dev, win, &mrq); + if (ret != 0) return NULL; - } dev->offset = mrq.CardOffset; } return dev->win_base + (to & (dev->win_size-1)); @@ -327,8 +325,6 @@ static void pcmciamtd_set_vpp(struct map_info *map, int on) DEBUG(2, "dev = %p on = %d vpp = %d\n", dev, on, dev->vpp); ret = pcmcia_modify_configuration(link, &mod); - if (ret != 0) - cs_error(link, ModifyConfiguration, ret); } @@ -348,107 +344,116 @@ static void pcmciamtd_release(struct pcmcia_device *link) iounmap(dev->win_base); dev->win_base = NULL; } - pcmcia_release_window(link->win); + pcmcia_release_window(link, link->win); } pcmcia_disable_device(link); } -static void card_settings(struct pcmciamtd_dev *dev, struct pcmcia_device *link, int *new_name) +#ifdef CONFIG_MTD_DEBUG +static int pcmciamtd_cistpl_format(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data) { - int rc; - tuple_t tuple; cisparse_t parse; - u_char buf[64]; - - tuple.Attributes = 0; - tuple.TupleData = (cisdata_t *)buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - tuple.DesiredTuple = RETURN_FIRST_TUPLE; - - rc = pcmcia_get_first_tuple(link, &tuple); - while (rc == 0) { - rc = pcmcia_get_tuple_data(link, &tuple); - if (rc != 0) { - cs_error(link, GetTupleData, rc); - break; - } - rc = pcmcia_parse_tuple(&tuple, &parse); - if (rc != 0) { - cs_error(link, ParseTuple, rc); - break; - } - switch(tuple.TupleCode) { - case CISTPL_FORMAT: { - cistpl_format_t *t = &parse.format; - (void)t; /* Shut up, gcc */ - DEBUG(2, "Format type: %u, Error Detection: %u, offset = %u, length =%u", - t->type, t->edc, t->offset, t->length); - break; + if (!pcmcia_parse_tuple(tuple, &parse)) { + cistpl_format_t *t = &parse.format; + (void)t; /* Shut up, gcc */ + DEBUG(2, "Format type: %u, Error Detection: %u, offset = %u, length =%u", + t->type, t->edc, t->offset, t->length); + } + return -ENOSPC; +} - } +static int pcmciamtd_cistpl_jedec(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data) +{ + cisparse_t parse; + int i; - case CISTPL_DEVICE: { - cistpl_device_t *t = &parse.device; - int i; - DEBUG(2, "Common memory:"); - dev->pcmcia_map.size = t->dev[0].size; - for(i = 0; i < t->ndev; i++) { - DEBUG(2, "Region %d, type = %u", i, t->dev[i].type); - DEBUG(2, "Region %d, wp = %u", i, t->dev[i].wp); - DEBUG(2, "Region %d, speed = %u ns", i, t->dev[i].speed); - DEBUG(2, "Region %d, size = %u bytes", i, t->dev[i].size); - } - break; - } + if (!pcmcia_parse_tuple(tuple, &parse)) { + cistpl_jedec_t *t = &parse.jedec; + for (i = 0; i < t->nid; i++) + DEBUG(2, "JEDEC: 0x%02x 0x%02x", t->id[i].mfr, t->id[i].info); + } + return -ENOSPC; +} +#endif - case CISTPL_VERS_1: { - cistpl_vers_1_t *t = &parse.version_1; - int i; - if(t->ns) { - dev->mtd_name[0] = '\0'; - for(i = 0; i < t->ns; i++) { - if(i) - strcat(dev->mtd_name, " "); - strcat(dev->mtd_name, t->str+t->ofs[i]); - } - } - DEBUG(2, "Found name: %s", dev->mtd_name); - break; - } +static int pcmciamtd_cistpl_device(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data) +{ + struct pcmciamtd_dev *dev = priv_data; + cisparse_t parse; + cistpl_device_t *t = &parse.device; + int i; - case CISTPL_JEDEC_C: { - cistpl_jedec_t *t = &parse.jedec; - int i; - for(i = 0; i < t->nid; i++) { - DEBUG(2, "JEDEC: 0x%02x 0x%02x", t->id[i].mfr, t->id[i].info); - } - break; - } + if (pcmcia_parse_tuple(tuple, &parse)) + return -EINVAL; + + DEBUG(2, "Common memory:"); + dev->pcmcia_map.size = t->dev[0].size; + /* from here on: DEBUG only */ + for (i = 0; i < t->ndev; i++) { + DEBUG(2, "Region %d, type = %u", i, t->dev[i].type); + DEBUG(2, "Region %d, wp = %u", i, t->dev[i].wp); + DEBUG(2, "Region %d, speed = %u ns", i, t->dev[i].speed); + DEBUG(2, "Region %d, size = %u bytes", i, t->dev[i].size); + } + return 0; +} - case CISTPL_DEVICE_GEO: { - cistpl_device_geo_t *t = &parse.device_geo; - int i; - dev->pcmcia_map.bankwidth = t->geo[0].buswidth; - for(i = 0; i < t->ngeo; i++) { - DEBUG(2, "region: %d bankwidth = %u", i, t->geo[i].buswidth); - DEBUG(2, "region: %d erase_block = %u", i, t->geo[i].erase_block); - DEBUG(2, "region: %d read_block = %u", i, t->geo[i].read_block); - DEBUG(2, "region: %d write_block = %u", i, t->geo[i].write_block); - DEBUG(2, "region: %d partition = %u", i, t->geo[i].partition); - DEBUG(2, "region: %d interleave = %u", i, t->geo[i].interleave); - } - break; - } +static int pcmciamtd_cistpl_geo(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data) +{ + struct pcmciamtd_dev *dev = priv_data; + cisparse_t parse; + cistpl_device_geo_t *t = &parse.device_geo; + int i; - default: - DEBUG(2, "Unknown tuple code %d", tuple.TupleCode); - } + if (pcmcia_parse_tuple(tuple, &parse)) + return -EINVAL; + + dev->pcmcia_map.bankwidth = t->geo[0].buswidth; + /* from here on: DEBUG only */ + for (i = 0; i < t->ngeo; i++) { + DEBUG(2, "region: %d bankwidth = %u", i, t->geo[i].buswidth); + DEBUG(2, "region: %d erase_block = %u", i, t->geo[i].erase_block); + DEBUG(2, "region: %d read_block = %u", i, t->geo[i].read_block); + DEBUG(2, "region: %d write_block = %u", i, t->geo[i].write_block); + DEBUG(2, "region: %d partition = %u", i, t->geo[i].partition); + DEBUG(2, "region: %d interleave = %u", i, t->geo[i].interleave); + } + return 0; +} + + +static void card_settings(struct pcmciamtd_dev *dev, struct pcmcia_device *link, int *new_name) +{ + int i; - rc = pcmcia_get_next_tuple(link, &tuple); + if (p_dev->prod_id[0]) { + dev->mtd_name[0] = '\0'; + for (i = 0; i < 4; i++) { + if (i) + strcat(dev->mtd_name, " "); + if (p_dev->prod_id[i]) + strcat(dev->mtd_name, p_dev->prod_id[i]); + } + DEBUG(2, "Found name: %s", dev->mtd_name); } + +#ifdef CONFIG_MTD_DEBUG + pcmcia_loop_tuple(p_dev, CISTPL_FORMAT, pcmciamtd_cistpl_format, NULL); + pcmcia_loop_tuple(p_dev, CISTPL_JEDEC_C, pcmciamtd_cistpl_jedec, NULL); +#endif + pcmcia_loop_tuple(p_dev, CISTPL_DEVICE, pcmciamtd_cistpl_device, dev); + pcmcia_loop_tuple(p_dev, CISTPL_DEVICE_GEO, pcmciamtd_cistpl_geo, dev); + if(!dev->pcmcia_map.size) dev->pcmcia_map.size = MAX_PCMCIA_ADDR; @@ -481,16 +486,12 @@ static void card_settings(struct pcmciamtd_dev *dev, struct pcmcia_device *link, * MTD device available to the system. */ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int pcmciamtd_config(struct pcmcia_device *link) { struct pcmciamtd_dev *dev = link->priv; struct mtd_info *mtd = NULL; cs_status_t status; win_req_t req; - int last_ret = 0, last_fn = 0; int ret; int i; static char *probes[] = { "jedec_probe", "cfi_probe" }; @@ -529,7 +530,7 @@ static int pcmciamtd_config(struct pcmcia_device *link) int ret; DEBUG(2, "requesting window with size = %dKiB memspeed = %d", req.Size >> 10, req.AccessSpeed); - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); DEBUG(2, "ret = %d dev->win_size = %d", ret, dev->win_size); if(ret) { req.Size >>= 1; @@ -577,7 +578,6 @@ static int pcmciamtd_config(struct pcmcia_device *link) DEBUG(2, "Setting Configuration"); ret = pcmcia_request_configuration(link, &link->conf); if (ret != 0) { - cs_error(link, RequestConfiguration, ret); if (dev->win_base) { iounmap(dev->win_base); dev->win_base = NULL; @@ -652,8 +652,7 @@ static int pcmciamtd_config(struct pcmcia_device *link) link->dev_node = &dev->node; return 0; - cs_failed: - cs_error(link, last_fn, last_ret); + failed: err("CS Error, exiting"); pcmciamtd_release(link); return -ENODEV; diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index b58965a2b3a..17a27225cc9 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -118,14 +118,6 @@ INT_MODULE_PARM(full_duplex, 0); /* Autodetect link polarity reversal? */ INT_MODULE_PARM(auto_polarity, 1); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"3c574_cs.c 1.65ac1 2003/04/07 Donald Becker/David Hinds, becker@scyld.com.\n"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -278,7 +270,7 @@ static int tc574_probe(struct pcmcia_device *link) struct el3_private *lp; struct net_device *dev; - DEBUG(0, "3c574_attach()\n"); + dev_dbg(&link->dev, "3c574_attach()\n"); /* Create the PC card device object. */ dev = alloc_etherdev(sizeof(struct el3_private)); @@ -291,10 +283,8 @@ static int tc574_probe(struct pcmcia_device *link) spin_lock_init(&lp->window_lock); link->io.NumPorts1 = 32; link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &el3_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; @@ -319,7 +309,7 @@ static void tc574_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "3c574_detach(0x%p)\n", link); + dev_dbg(&link->dev, "3c574_detach()\n"); if (link->dev_node) unregister_netdev(dev); @@ -335,26 +325,23 @@ static void tc574_detach(struct pcmcia_device *link) ethernet device available to the system. */ -#define CS_CHECK(fn, ret) \ - do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static const char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; static int tc574_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; struct el3_private *lp = netdev_priv(dev); - tuple_t tuple; - __le16 buf[32]; - int last_fn, last_ret, i, j; + int ret, i, j; unsigned int ioaddr; __be16 *phys_addr; char *cardname; __u32 config; + u8 *buf; + size_t len; phys_addr = (__be16 *)dev->dev_addr; - DEBUG(0, "3c574_config(0x%p)\n", link); + dev_dbg(&link->dev, "3c574_config()\n"); link->io.IOAddrLines = 16; for (i = j = 0; j < 0x400; j += 0x20) { @@ -363,12 +350,16 @@ static int tc574_config(struct pcmcia_device *link) if (i == 0) break; } - if (i != 0) { - cs_error(link, RequestIO, i); + if (i != 0) + goto failed; + + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -378,16 +369,14 @@ static int tc574_config(struct pcmcia_device *link) /* The 3c574 normally uses an EEPROM for configuration info, including the hardware address. The future products may include a modem chip and put the address in the CIS. */ - tuple.Attributes = 0; - tuple.TupleData = (cisdata_t *)buf; - tuple.TupleDataMax = 64; - tuple.TupleOffset = 0; - tuple.DesiredTuple = 0x88; - if (pcmcia_get_first_tuple(link, &tuple) == 0) { - pcmcia_get_tuple_data(link, &tuple); + + len = pcmcia_get_tuple(link, 0x88, &buf); + if (buf && len >= 6) { for (i = 0; i < 3; i++) - phys_addr[i] = htons(le16_to_cpu(buf[i])); + phys_addr[i] = htons(le16_to_cpu(buf[i * 2])); + kfree(buf); } else { + kfree(buf); /* 0 < len < 6 */ EL3WINDOW(0); for (i = 0; i < 3; i++) phys_addr[i] = htons(read_eeprom(ioaddr, i + 10)); @@ -435,7 +424,8 @@ static int tc574_config(struct pcmcia_device *link) mii_status = mdio_read(ioaddr, phy & 0x1f, 1); if (mii_status != 0xffff) { lp->phys = phy & 0x1f; - DEBUG(0, " MII transceiver at index %d, status %x.\n", + dev_dbg(&link->dev, " MII transceiver at " + "index %d, status %x.\n", phy, mii_status); if ((mii_status & 0x0040) == 0) mii_preamble_required = 1; @@ -457,7 +447,7 @@ static int tc574_config(struct pcmcia_device *link) } link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "3c574_cs: register_netdev() failed\n"); @@ -478,8 +468,6 @@ static int tc574_config(struct pcmcia_device *link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: tc574_release(link); return -ENODEV; @@ -738,7 +726,7 @@ static int el3_open(struct net_device *dev) lp->media.expires = jiffies + HZ; add_timer(&lp->media); - DEBUG(2, "%s: opened, status %4.4x.\n", + dev_dbg(&link->dev, "%s: opened, status %4.4x.\n", dev->name, inw(dev->base_addr + EL3_STATUS)); return 0; @@ -772,7 +760,7 @@ static void pop_tx_status(struct net_device *dev) if (tx_status & 0x30) tc574_wait_for_completion(dev, TxReset); if (tx_status & 0x38) { - DEBUG(1, "%s: transmit error: status 0x%02x\n", + pr_debug("%s: transmit error: status 0x%02x\n", dev->name, tx_status); outw(TxEnable, ioaddr + EL3_CMD); dev->stats.tx_aborted_errors++; @@ -788,7 +776,7 @@ static netdev_tx_t el3_start_xmit(struct sk_buff *skb, struct el3_private *lp = netdev_priv(dev); unsigned long flags; - DEBUG(3, "%s: el3_start_xmit(length = %ld) called, " + pr_debug("%s: el3_start_xmit(length = %ld) called, " "status %4.4x.\n", dev->name, (long)skb->len, inw(ioaddr + EL3_STATUS)); @@ -827,7 +815,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) return IRQ_NONE; ioaddr = dev->base_addr; - DEBUG(3, "%s: interrupt, status %4.4x.\n", + pr_debug("%s: interrupt, status %4.4x.\n", dev->name, inw(ioaddr + EL3_STATUS)); spin_lock(&lp->window_lock); @@ -836,7 +824,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) (IntLatch | RxComplete | RxEarly | StatsFull)) { if (!netif_device_present(dev) || ((status & 0xe000) != 0x2000)) { - DEBUG(1, "%s: Interrupt from dead card\n", dev->name); + pr_debug("%s: Interrupt from dead card\n", dev->name); break; } @@ -846,7 +834,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) work_budget = el3_rx(dev, work_budget); if (status & TxAvailable) { - DEBUG(3, " TX room bit was handled.\n"); + pr_debug(" TX room bit was handled.\n"); /* There's room in the FIFO for a full-sized packet. */ outw(AckIntr | TxAvailable, ioaddr + EL3_CMD); netif_wake_queue(dev); @@ -886,7 +874,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) } if (--work_budget < 0) { - DEBUG(0, "%s: Too much work in interrupt, " + pr_debug("%s: Too much work in interrupt, " "status %4.4x.\n", dev->name, status); /* Clear all interrupts */ outw(AckIntr | 0xFF, ioaddr + EL3_CMD); @@ -896,7 +884,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); } - DEBUG(3, "%s: exiting interrupt, status %4.4x.\n", + pr_debug("%s: exiting interrupt, status %4.4x.\n", dev->name, inw(ioaddr + EL3_STATUS)); spin_unlock(&lp->window_lock); @@ -1003,7 +991,7 @@ static void update_stats(struct net_device *dev) unsigned int ioaddr = dev->base_addr; u8 rx, tx, up; - DEBUG(2, "%s: updating the statistics.\n", dev->name); + pr_debug("%s: updating the statistics.\n", dev->name); if (inw(ioaddr+EL3_STATUS) == 0xffff) /* No card. */ return; @@ -1039,7 +1027,7 @@ static int el3_rx(struct net_device *dev, int worklimit) unsigned int ioaddr = dev->base_addr; short rx_status; - DEBUG(3, "%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", + pr_debug("%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", dev->name, inw(ioaddr+EL3_STATUS), inw(ioaddr+RxStatus)); while (!((rx_status = inw(ioaddr + RxStatus)) & 0x8000) && worklimit > 0) { @@ -1061,7 +1049,7 @@ static int el3_rx(struct net_device *dev, int worklimit) skb = dev_alloc_skb(pkt_len+5); - DEBUG(3, " Receiving packet size %d status %4.4x.\n", + pr_debug(" Receiving packet size %d status %4.4x.\n", pkt_len, rx_status); if (skb != NULL) { skb_reserve(skb, 2); @@ -1072,7 +1060,7 @@ static int el3_rx(struct net_device *dev, int worklimit) dev->stats.rx_packets++; dev->stats.rx_bytes += pkt_len; } else { - DEBUG(1, "%s: couldn't allocate a sk_buff of" + pr_debug("%s: couldn't allocate a sk_buff of" " size %d.\n", dev->name, pkt_len); dev->stats.rx_dropped++; } @@ -1101,7 +1089,7 @@ static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) struct mii_ioctl_data *data = if_mii(rq); int phy = lp->phys & 0x1f; - DEBUG(2, "%s: In ioct(%-.6s, %#4.4x) %4.4x %4.4x %4.4x %4.4x.\n", + pr_debug("%s: In ioct(%-.6s, %#4.4x) %4.4x %4.4x %4.4x %4.4x.\n", dev->name, rq->ifr_ifrn.ifrn_name, cmd, data->phy_id, data->reg_num, data->val_in, data->val_out); @@ -1178,7 +1166,7 @@ static int el3_close(struct net_device *dev) struct el3_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - DEBUG(2, "%s: shutting down ethercard.\n", dev->name); + dev_dbg(&link->dev, "%s: shutting down ethercard.\n", dev->name); if (pcmcia_dev_present(link)) { unsigned long flags; diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c index 569fb06793c..6f8d7e2e592 100644 --- a/drivers/net/pcmcia/3c589_cs.c +++ b/drivers/net/pcmcia/3c589_cs.c @@ -130,14 +130,6 @@ MODULE_LICENSE("GPL"); /* Special hook for setting if_port when module is loaded */ INT_MODULE_PARM(if_port, 0); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -DRV_NAME ".c " DRV_VERSION " 2001/10/13 00:08:50 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -189,7 +181,7 @@ static int tc589_probe(struct pcmcia_device *link) struct el3_private *lp; struct net_device *dev; - DEBUG(0, "3c589_attach()\n"); + dev_dbg(&link->dev, "3c589_attach()\n"); /* Create new ethernet device */ dev = alloc_etherdev(sizeof(struct el3_private)); @@ -202,10 +194,8 @@ static int tc589_probe(struct pcmcia_device *link) spin_lock_init(&lp->lock); link->io.NumPorts1 = 16; link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &el3_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; @@ -231,7 +221,7 @@ static void tc589_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "3c589_detach(0x%p)\n", link); + dev_dbg(&link->dev, "3c589_detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -249,29 +239,20 @@ static void tc589_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int tc589_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; struct el3_private *lp = netdev_priv(dev); - tuple_t tuple; - __le16 buf[32]; __be16 *phys_addr; - int last_fn, last_ret, i, j, multi = 0, fifo; + int ret, i, j, multi = 0, fifo; unsigned int ioaddr; char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; + u8 *buf; + size_t len; - DEBUG(0, "3c589_config(0x%p)\n", link); + dev_dbg(&link->dev, "3c589_config\n"); phys_addr = (__be16 *)dev->dev_addr; - tuple.Attributes = 0; - tuple.TupleData = (cisdata_t *)buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - tuple.Attributes = TUPLE_RETURN_COMMON; - /* Is this a 3c562? */ if (link->manf_id != MANFID_3COM) printk(KERN_INFO "3c589_cs: hmmm, is this really a " @@ -287,12 +268,16 @@ static int tc589_config(struct pcmcia_device *link) if (i == 0) break; } - if (i != 0) { - cs_error(link, RequestIO, i); + if (i != 0) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -301,12 +286,13 @@ static int tc589_config(struct pcmcia_device *link) /* The 3c589 has an extra EEPROM for configuration info, including the hardware address. The 3c562 puts the address in the CIS. */ - tuple.DesiredTuple = 0x88; - if (pcmcia_get_first_tuple(link, &tuple) == 0) { - pcmcia_get_tuple_data(link, &tuple); - for (i = 0; i < 3; i++) - phys_addr[i] = htons(le16_to_cpu(buf[i])); + len = pcmcia_get_tuple(link, 0x88, &buf); + if (buf && len >= 6) { + for (i = 0; i < 3; i++) + phys_addr[i] = htons(le16_to_cpu(buf[i*2])); + kfree(buf); } else { + kfree(buf); /* 0 < len < 6 */ for (i = 0; i < 3; i++) phys_addr[i] = htons(read_eeprom(ioaddr, i)); if (phys_addr[0] == htons(0x6060)) { @@ -328,7 +314,7 @@ static int tc589_config(struct pcmcia_device *link) printk(KERN_ERR "3c589_cs: invalid if_port requested\n"); link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_ERR "3c589_cs: register_netdev() failed\n"); @@ -347,8 +333,6 @@ static int tc589_config(struct pcmcia_device *link) if_names[dev->if_port]); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: tc589_release(link); return -ENODEV; @@ -511,24 +495,8 @@ static void netdev_get_drvinfo(struct net_device *dev, sprintf(info->bus_info, "PCMCIA 0x%lx", dev->base_addr); } -#ifdef PCMCIA_DEBUG -static u32 netdev_get_msglevel(struct net_device *dev) -{ - return pc_debug; -} - -static void netdev_set_msglevel(struct net_device *dev, u32 level) -{ - pc_debug = level; -} -#endif /* PCMCIA_DEBUG */ - static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, -#ifdef PCMCIA_DEBUG - .get_msglevel = netdev_get_msglevel, - .set_msglevel = netdev_set_msglevel, -#endif /* PCMCIA_DEBUG */ }; static int el3_config(struct net_device *dev, struct ifmap *map) @@ -563,7 +531,7 @@ static int el3_open(struct net_device *dev) lp->media.expires = jiffies + HZ; add_timer(&lp->media); - DEBUG(1, "%s: opened, status %4.4x.\n", + dev_dbg(&link->dev, "%s: opened, status %4.4x.\n", dev->name, inw(dev->base_addr + EL3_STATUS)); return 0; @@ -596,7 +564,7 @@ static void pop_tx_status(struct net_device *dev) if (tx_status & 0x30) tc589_wait_for_completion(dev, TxReset); if (tx_status & 0x38) { - DEBUG(1, "%s: transmit error: status 0x%02x\n", + pr_debug("%s: transmit error: status 0x%02x\n", dev->name, tx_status); outw(TxEnable, ioaddr + EL3_CMD); dev->stats.tx_aborted_errors++; @@ -612,7 +580,7 @@ static netdev_tx_t el3_start_xmit(struct sk_buff *skb, struct el3_private *priv = netdev_priv(dev); unsigned long flags; - DEBUG(3, "%s: el3_start_xmit(length = %ld) called, " + pr_debug("%s: el3_start_xmit(length = %ld) called, " "status %4.4x.\n", dev->name, (long)skb->len, inw(ioaddr + EL3_STATUS)); @@ -654,14 +622,14 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) ioaddr = dev->base_addr; - DEBUG(3, "%s: interrupt, status %4.4x.\n", + pr_debug("%s: interrupt, status %4.4x.\n", dev->name, inw(ioaddr + EL3_STATUS)); spin_lock(&lp->lock); while ((status = inw(ioaddr + EL3_STATUS)) & (IntLatch | RxComplete | StatsFull)) { if ((status & 0xe000) != 0x2000) { - DEBUG(1, "%s: interrupt from dead card\n", dev->name); + pr_debug("%s: interrupt from dead card\n", dev->name); handled = 0; break; } @@ -670,7 +638,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) el3_rx(dev); if (status & TxAvailable) { - DEBUG(3, " TX room bit was handled.\n"); + pr_debug(" TX room bit was handled.\n"); /* There's room in the FIFO for a full-sized packet. */ outw(AckIntr | TxAvailable, ioaddr + EL3_CMD); netif_wake_queue(dev); @@ -722,7 +690,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) lp->last_irq = jiffies; spin_unlock(&lp->lock); - DEBUG(3, "%s: exiting interrupt, status %4.4x.\n", + pr_debug("%s: exiting interrupt, status %4.4x.\n", dev->name, inw(ioaddr + EL3_STATUS)); return IRQ_RETVAL(handled); } @@ -833,7 +801,7 @@ static void update_stats(struct net_device *dev) { unsigned int ioaddr = dev->base_addr; - DEBUG(2, "%s: updating the statistics.\n", dev->name); + pr_debug("%s: updating the statistics.\n", dev->name); /* Turn off statistics updates while reading. */ outw(StatsDisable, ioaddr + EL3_CMD); /* Switch to the stats window, and read everything. */ @@ -861,7 +829,7 @@ static int el3_rx(struct net_device *dev) int worklimit = 32; short rx_status; - DEBUG(3, "%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", + pr_debug("%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", dev->name, inw(ioaddr+EL3_STATUS), inw(ioaddr+RX_STATUS)); while (!((rx_status = inw(ioaddr + RX_STATUS)) & 0x8000) && worklimit > 0) { @@ -883,7 +851,7 @@ static int el3_rx(struct net_device *dev) skb = dev_alloc_skb(pkt_len+5); - DEBUG(3, " Receiving packet size %d status %4.4x.\n", + pr_debug(" Receiving packet size %d status %4.4x.\n", pkt_len, rx_status); if (skb != NULL) { skb_reserve(skb, 2); @@ -894,7 +862,7 @@ static int el3_rx(struct net_device *dev) dev->stats.rx_packets++; dev->stats.rx_bytes += pkt_len; } else { - DEBUG(1, "%s: couldn't allocate a sk_buff of" + pr_debug("%s: couldn't allocate a sk_buff of" " size %d.\n", dev->name, pkt_len); dev->stats.rx_dropped++; } @@ -935,7 +903,7 @@ static int el3_close(struct net_device *dev) struct pcmcia_device *link = lp->p_dev; unsigned int ioaddr = dev->base_addr; - DEBUG(1, "%s: shutting down ethercard.\n", dev->name); + dev_dbg(&link->dev, "%s: shutting down ethercard.\n", dev->name); if (pcmcia_dev_present(link)) { /* Turn off statistics ASAP. We update dev->stats below. */ diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c index 3131a59a8d3..800597b82d1 100644 --- a/drivers/net/pcmcia/axnet_cs.c +++ b/drivers/net/pcmcia/axnet_cs.c @@ -75,16 +75,6 @@ MODULE_AUTHOR("David Hinds <dahinds@users.sourceforge.net>"); MODULE_DESCRIPTION("Asix AX88190 PCMCIA ethernet driver"); MODULE_LICENSE("GPL"); -#ifdef PCMCIA_DEBUG -#define INT_MODULE_PARM(n, v) static int n = v; module_param(n, int, 0) - -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"axnet_cs.c 1.28 2002/06/29 06:27:37 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -167,7 +157,7 @@ static int axnet_probe(struct pcmcia_device *link) struct net_device *dev; struct ei_device *ei_local; - DEBUG(0, "axnet_attach()\n"); + dev_dbg(&link->dev, "axnet_attach()\n"); dev = alloc_etherdev(sizeof(struct ei_device) + sizeof(axnet_dev_t)); if (!dev) @@ -180,7 +170,6 @@ static int axnet_probe(struct pcmcia_device *link) info->p_dev = link; link->priv = dev; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -205,7 +194,7 @@ static void axnet_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "axnet_detach(0x%p)\n", link); + dev_dbg(&link->dev, "axnet_detach(0x%p)\n", link); if (link->dev_node) unregister_netdev(dev); @@ -272,9 +261,6 @@ static int get_prom(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int try_io_port(struct pcmcia_device *link) { int j, ret; @@ -341,26 +327,29 @@ static int axnet_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; axnet_dev_t *info = PRIV(dev); - int i, j, j2, last_ret, last_fn; + int i, j, j2, ret; - DEBUG(0, "axnet_config(0x%p)\n", link); + dev_dbg(&link->dev, "axnet_config(0x%p)\n", link); /* don't trust the CIS on this; Linksys got it wrong */ link->conf.Present = 0x63; - last_ret = pcmcia_loop_config(link, axnet_configcheck, NULL); - if (last_ret != 0) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, axnet_configcheck, NULL); + if (ret != 0) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; if (link->io.NumPorts2 == 8) { link->conf.Attributes |= CONF_ENABLE_SPKR; link->conf.Status = CCSR_AUDIO_ENA; } - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; + dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -410,7 +399,7 @@ static int axnet_config(struct pcmcia_device *link) info->phy_id = (i < 32) ? i : -1; link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "axnet_cs: register_netdev() failed\n"); @@ -426,14 +415,12 @@ static int axnet_config(struct pcmcia_device *link) dev->base_addr, dev->irq, dev->dev_addr); if (info->phy_id != -1) { - DEBUG(0, " MII transceiver at index %d, status %x.\n", info->phy_id, j); + dev_dbg(&link->dev, " MII transceiver at index %d, status %x.\n", info->phy_id, j); } else { printk(KERN_NOTICE " No MII transceivers found!\n"); } return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: axnet_release(link); return -ENODEV; @@ -543,7 +530,7 @@ static int axnet_open(struct net_device *dev) struct pcmcia_device *link = info->p_dev; unsigned int nic_base = dev->base_addr; - DEBUG(2, "axnet_open('%s')\n", dev->name); + dev_dbg(&link->dev, "axnet_open('%s')\n", dev->name); if (!pcmcia_dev_present(link)) return -ENODEV; @@ -572,7 +559,7 @@ static int axnet_close(struct net_device *dev) axnet_dev_t *info = PRIV(dev); struct pcmcia_device *link = info->p_dev; - DEBUG(2, "axnet_close('%s')\n", dev->name); + dev_dbg(&link->dev, "axnet_close('%s')\n", dev->name); ax_close(dev); free_irq(dev->irq, dev); @@ -741,10 +728,8 @@ static void block_input(struct net_device *dev, int count, int xfer_count = count; char *buf = skb->data; -#ifdef PCMCIA_DEBUG if ((ei_debug > 4) && (count != 4)) - printk(KERN_DEBUG "%s: [bi=%d]\n", dev->name, count+4); -#endif + pr_debug("%s: [bi=%d]\n", dev->name, count+4); outb_p(ring_offset & 0xff, nic_base + EN0_RSARLO); outb_p(ring_offset >> 8, nic_base + EN0_RSARHI); outb_p(E8390_RREAD+E8390_START, nic_base + AXNET_CMD); @@ -762,10 +747,7 @@ static void block_output(struct net_device *dev, int count, { unsigned int nic_base = dev->base_addr; -#ifdef PCMCIA_DEBUG - if (ei_debug > 4) - printk(KERN_DEBUG "%s: [bo=%d]\n", dev->name, count); -#endif + pr_debug("%s: [bo=%d]\n", dev->name, count); /* Round the count up for word writes. Do we need to do this? What effect will an odd byte count have on the 8390? diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c index 7b5c77b7bd2..21d9c9d815d 100644 --- a/drivers/net/pcmcia/com20020_cs.c +++ b/drivers/net/pcmcia/com20020_cs.c @@ -53,11 +53,7 @@ #define VERSION "arcnet: COM20020 PCMCIA support loaded.\n" -#ifdef PCMCIA_DEBUG - -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) +#ifdef DEBUG static void regdump(struct net_device *dev) { @@ -92,7 +88,6 @@ static void regdump(struct net_device *dev) #else -#define DEBUG(n, args...) do { } while (0) static inline void regdump(struct net_device *dev) { } #endif @@ -144,7 +139,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) struct net_device *dev; struct arcnet_local *lp; - DEBUG(0, "com20020_attach()\n"); + dev_dbg(&p_dev->dev, "com20020_attach()\n"); /* Create new network device */ info = kzalloc(sizeof(struct com20020_dev_t), GFP_KERNEL); @@ -169,11 +164,10 @@ static int com20020_probe(struct pcmcia_device *p_dev) p_dev->io.NumPorts1 = 16; p_dev->io.IOAddrLines = 16; p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; p_dev->conf.Attributes = CONF_ENABLE_IRQ; p_dev->conf.IntType = INT_MEMORY_AND_IO; - p_dev->irq.Instance = info->dev = dev; + info->dev = dev; p_dev->priv = info; return com20020_config(p_dev); @@ -198,12 +192,12 @@ static void com20020_detach(struct pcmcia_device *link) struct com20020_dev_t *info = link->priv; struct net_device *dev = info->dev; - DEBUG(1,"detach...\n"); + dev_dbg(&link->dev, "detach...\n"); - DEBUG(0, "com20020_detach(0x%p)\n", link); + dev_dbg(&link->dev, "com20020_detach\n"); if (link->dev_node) { - DEBUG(1,"unregister...\n"); + dev_dbg(&link->dev, "unregister...\n"); unregister_netdev(dev); @@ -218,16 +212,16 @@ static void com20020_detach(struct pcmcia_device *link) com20020_release(link); /* Unlink device structure, free bits */ - DEBUG(1,"unlinking...\n"); + dev_dbg(&link->dev, "unlinking...\n"); if (link->priv) { dev = info->dev; if (dev) { - DEBUG(1,"kfree...\n"); + dev_dbg(&link->dev, "kfree...\n"); free_netdev(dev); } - DEBUG(1,"kfree2...\n"); + dev_dbg(&link->dev, "kfree2...\n"); kfree(info); } @@ -241,25 +235,22 @@ static void com20020_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int com20020_config(struct pcmcia_device *link) { struct arcnet_local *lp; com20020_dev_t *info; struct net_device *dev; - int i, last_ret, last_fn; + int i, ret; int ioaddr; info = link->priv; dev = info->dev; - DEBUG(1,"config...\n"); + dev_dbg(&link->dev, "config...\n"); - DEBUG(0, "com20020_config(0x%p)\n", link); + dev_dbg(&link->dev, "com20020_config\n"); - DEBUG(1,"arcnet: baseport1 is %Xh\n", link->io.BasePort1); + dev_dbg(&link->dev, "baseport1 is %Xh\n", link->io.BasePort1); i = -ENODEV; if (!link->io.BasePort1) { @@ -276,26 +267,27 @@ static int com20020_config(struct pcmcia_device *link) if (i != 0) { - DEBUG(1,"arcnet: requestIO failed totally!\n"); + dev_dbg(&link->dev, "requestIO failed totally!\n"); goto failed; } ioaddr = dev->base_addr = link->io.BasePort1; - DEBUG(1,"arcnet: got ioaddr %Xh\n", ioaddr); + dev_dbg(&link->dev, "got ioaddr %Xh\n", ioaddr); - DEBUG(1,"arcnet: request IRQ %d (%Xh/%Xh)\n", - link->irq.AssignedIRQ, - link->irq.IRQInfo1, link->irq.IRQInfo2); + dev_dbg(&link->dev, "request IRQ %d\n", + link->irq.AssignedIRQ); i = pcmcia_request_irq(link, &link->irq); if (i != 0) { - DEBUG(1,"arcnet: requestIRQ failed totally!\n"); + dev_dbg(&link->dev, "requestIRQ failed totally!\n"); goto failed; } dev->irq = link->irq.AssignedIRQ; - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; if (com20020_check(dev)) { @@ -308,26 +300,25 @@ static int com20020_config(struct pcmcia_device *link) lp->card_flags = ARC_CAN_10MBIT; /* pretend all of them can 10Mbit */ link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = com20020_found(dev, 0); /* calls register_netdev */ if (i != 0) { - DEBUG(1,KERN_NOTICE "com20020_cs: com20020_found() failed\n"); + dev_printk(KERN_NOTICE, &link->dev, + "com20020_cs: com20020_found() failed\n"); link->dev_node = NULL; goto failed; } strcpy(info->node.dev_name, dev->name); - DEBUG(1,KERN_INFO "%s: port %#3lx, irq %d\n", + dev_dbg(&link->dev,KERN_INFO "%s: port %#3lx, irq %d\n", dev->name, dev->base_addr, dev->irq); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: - DEBUG(1,"com20020_config failed...\n"); + dev_dbg(&link->dev, "com20020_config failed...\n"); com20020_release(link); return -ENODEV; } /* com20020_config */ @@ -342,7 +333,7 @@ failed: static void com20020_release(struct pcmcia_device *link) { - DEBUG(0, "com20020_release(0x%p)\n", link); + dev_dbg(&link->dev, "com20020_release\n"); pcmcia_disable_device(link); } diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 7e01fbdb87e..6e3e1ced6db 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -72,13 +72,6 @@ MODULE_LICENSE("GPL"); /* 0:4KB*2 TX buffer else:8KB*2 TX buffer */ INT_MODULE_PARM(sram_config, 0); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = DRV_NAME ".c " DRV_VERSION " 2002/03/23"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ /* @@ -245,7 +238,7 @@ static int fmvj18x_probe(struct pcmcia_device *link) local_info_t *lp; struct net_device *dev; - DEBUG(0, "fmvj18x_attach()\n"); + dev_dbg(&link->dev, "fmvj18x_attach()\n"); /* Make up a FMVJ18x specific data structure */ dev = alloc_etherdev(sizeof(local_info_t)); @@ -262,10 +255,8 @@ static int fmvj18x_probe(struct pcmcia_device *link) link->io.IOAddrLines = 5; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &fjn_interrupt; - link->irq.Instance = dev; /* General socket configuration */ link->conf.Attributes = CONF_ENABLE_IRQ; @@ -285,7 +276,7 @@ static void fmvj18x_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "fmvj18x_detach(0x%p)\n", link); + dev_dbg(&link->dev, "fmvj18x_detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -297,9 +288,6 @@ static void fmvj18x_detach(struct pcmcia_device *link) /*====================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int mfc_try_io_port(struct pcmcia_device *link) { int i, ret; @@ -341,33 +329,38 @@ static int ungermann_try_io_port(struct pcmcia_device *link) return ret; /* RequestIO failed */ } +static int fmvj18x_ioprobe(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + return 0; /* strange, but that's what the code did already before... */ +} + static int fmvj18x_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; local_info_t *lp = netdev_priv(dev); - tuple_t tuple; - cisparse_t parse; - u_short buf[32]; - int i, last_fn = 0, last_ret = 0, ret; + int i, ret; unsigned int ioaddr; cardtype_t cardtype; char *card_name = "unknown"; - u_char *node_id; + u8 *buf; + size_t len; + u_char buggybuf[32]; + + dev_dbg(&link->dev, "fmvj18x_config\n"); - DEBUG(0, "fmvj18x_config(0x%p)\n", link); + len = pcmcia_get_tuple(link, CISTPL_FUNCE, &buf); + kfree(buf); - tuple.TupleData = (u_char *)buf; - tuple.TupleDataMax = 64; - tuple.TupleOffset = 0; - tuple.DesiredTuple = CISTPL_FUNCE; - tuple.TupleOffset = 0; - if (pcmcia_get_first_tuple(link, &tuple) == 0) { + if (len) { /* Yes, I have CISTPL_FUNCE. Let's check CISTPL_MANFID */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple)); - CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple)); - CS_CHECK(ParseTuple, pcmcia_parse_tuple(&tuple, &parse)); - link->conf.ConfigIndex = parse.cftable_entry.index; + ret = pcmcia_loop_config(link, fmvj18x_ioprobe, NULL); + if (ret != 0) + goto failed; + switch (link->manf_id) { case MANFID_TDK: cardtype = TDK; @@ -433,17 +426,24 @@ static int fmvj18x_config(struct pcmcia_device *link) if (link->io.NumPorts2 != 0) { link->irq.Attributes = - IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT; + IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; ret = mfc_try_io_port(link); - if (ret != 0) goto cs_failed; + if (ret != 0) goto failed; } else if (cardtype == UNGERMANN) { ret = ungermann_try_io_port(link); - if (ret != 0) goto cs_failed; + if (ret != 0) goto failed; } else { - CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io)); + ret = pcmcia_request_io(link, &link->io); + if (ret) + goto failed; } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; + dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -474,21 +474,21 @@ static int fmvj18x_config(struct pcmcia_device *link) case CONTEC: case NEC: case KME: - tuple.DesiredTuple = CISTPL_FUNCE; - tuple.TupleOffset = 0; - CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple)); - tuple.TupleOffset = 0; - CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple)); if (cardtype == MBH10304) { - /* MBH10304's CIS_FUNCE is corrupted */ - node_id = &(tuple.TupleData[5]); card_name = "FMV-J182"; - } else { - while (tuple.TupleData[0] != CISTPL_FUNCE_LAN_NODE_ID ) { - CS_CHECK(GetNextTuple, pcmcia_get_next_tuple(link, &tuple)); - CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple)); + + len = pcmcia_get_tuple(link, CISTPL_FUNCE, &buf); + if (len < 11) { + kfree(buf); + goto failed; } - node_id = &(tuple.TupleData[2]); + /* Read MACID from CIS */ + for (i = 5; i < 11; i++) + dev->dev_addr[i] = buf[i]; + kfree(buf); + } else { + if (pcmcia_get_mac_from_cis(link, dev)) + goto failed; if( cardtype == TDK ) { card_name = "TDK LAK-CD021"; } else if( cardtype == LA501 ) { @@ -501,9 +501,6 @@ static int fmvj18x_config(struct pcmcia_device *link) card_name = "C-NET(PC)C"; } } - /* Read MACID from CIS */ - for (i = 0; i < 6; i++) - dev->dev_addr[i] = node_id[i]; break; case UNGERMANN: /* Read MACID from register */ @@ -513,12 +510,12 @@ static int fmvj18x_config(struct pcmcia_device *link) break; case XXX10304: /* Read MACID from Buggy CIS */ - if (fmvj18x_get_hwinfo(link, tuple.TupleData) == -1) { + if (fmvj18x_get_hwinfo(link, buggybuf) == -1) { printk(KERN_NOTICE "fmvj18x_cs: unable to read hardware net address.\n"); goto failed; } for (i = 0 ; i < 6; i++) { - dev->dev_addr[i] = tuple.TupleData[i]; + dev->dev_addr[i] = buggybuf[i]; } card_name = "FMV-J182"; break; @@ -533,7 +530,7 @@ static int fmvj18x_config(struct pcmcia_device *link) lp->cardtype = cardtype; link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "fmvj18x_cs: register_netdev() failed\n"); @@ -551,9 +548,6 @@ static int fmvj18x_config(struct pcmcia_device *link) return 0; -cs_failed: - /* All Card Services errors end up here */ - cs_error(link, last_fn, last_ret); failed: fmvj18x_release(link); return -ENODEV; @@ -571,16 +565,14 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = 0; req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); - if (i != 0) { - cs_error(link, RequestWindow, i); + i = pcmcia_request_window(link, &req, &link->win); + if (i != 0) return -1; - } base = ioremap(req.Base, req.Size); mem.Page = 0; mem.CardOffset = 0; - pcmcia_map_mem_page(link->win, &mem); + pcmcia_map_mem_page(link, link->win, &mem); /* * MBH10304 CISTPL_FUNCE_LAN_NODE_ID format @@ -605,9 +597,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) } iounmap(base); - j = pcmcia_release_window(link->win); - if (j != 0) - cs_error(link, ReleaseWindow, j); + j = pcmcia_release_window(link, link->win); return (i != 0x200) ? 0 : -1; } /* fmvj18x_get_hwinfo */ @@ -626,11 +616,9 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = 0; req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); - if (i != 0) { - cs_error(link, RequestWindow, i); + i = pcmcia_request_window(link, &req, &link->win); + if (i != 0) return -1; - } lp->base = ioremap(req.Base, req.Size); if (lp->base == NULL) { @@ -640,11 +628,10 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link) mem.Page = 0; mem.CardOffset = 0; - i = pcmcia_map_mem_page(link->win, &mem); + i = pcmcia_map_mem_page(link, link->win, &mem); if (i != 0) { iounmap(lp->base); lp->base = NULL; - cs_error(link, MapMemPage, i); return -1; } @@ -671,15 +658,13 @@ static void fmvj18x_release(struct pcmcia_device *link) u_char __iomem *tmp; int j; - DEBUG(0, "fmvj18x_release(0x%p)\n", link); + dev_dbg(&link->dev, "fmvj18x_release\n"); if (lp->base != NULL) { tmp = lp->base; lp->base = NULL; /* set NULL before iounmap */ iounmap(tmp); - j = pcmcia_release_window(link->win); - if (j != 0) - cs_error(link, ReleaseWindow, j); + j = pcmcia_release_window(link, link->win); } pcmcia_disable_device(link); @@ -788,8 +773,8 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id) outb(tx_stat, ioaddr + TX_STATUS); outb(rx_stat, ioaddr + RX_STATUS); - DEBUG(4, "%s: interrupt, rx_status %02x.\n", dev->name, rx_stat); - DEBUG(4, " tx_status %02x.\n", tx_stat); + pr_debug("%s: interrupt, rx_status %02x.\n", dev->name, rx_stat); + pr_debug(" tx_status %02x.\n", tx_stat); if (rx_stat || (inb(ioaddr + RX_MODE) & F_BUF_EMP) == 0) { /* there is packet(s) in rx buffer */ @@ -809,8 +794,8 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id) } netif_wake_queue(dev); } - DEBUG(4, "%s: exiting interrupt,\n", dev->name); - DEBUG(4, " tx_status %02x, rx_status %02x.\n", tx_stat, rx_stat); + pr_debug("%s: exiting interrupt,\n", dev->name); + pr_debug(" tx_status %02x, rx_status %02x.\n", tx_stat, rx_stat); outb(D_TX_INTR, ioaddr + TX_INTR); outb(D_RX_INTR, ioaddr + RX_INTR); @@ -882,7 +867,7 @@ static netdev_tx_t fjn_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } - DEBUG(4, "%s: Transmitting a packet of length %lu.\n", + pr_debug("%s: Transmitting a packet of length %lu.\n", dev->name, (unsigned long)skb->len); dev->stats.tx_bytes += skb->len; @@ -937,7 +922,7 @@ static void fjn_reset(struct net_device *dev) unsigned int ioaddr = dev->base_addr; int i; - DEBUG(4, "fjn_reset(%s) called.\n",dev->name); + pr_debug("fjn_reset(%s) called.\n",dev->name); /* Reset controller */ if( sram_config == 0 ) @@ -1015,13 +1000,13 @@ static void fjn_rx(struct net_device *dev) unsigned int ioaddr = dev->base_addr; int boguscount = 10; /* 5 -> 10: by agy 19940922 */ - DEBUG(4, "%s: in rx_packet(), rx_status %02x.\n", + pr_debug("%s: in rx_packet(), rx_status %02x.\n", dev->name, inb(ioaddr + RX_STATUS)); while ((inb(ioaddr + RX_MODE) & F_BUF_EMP) == 0) { u_short status = inw(ioaddr + DATAPORT); - DEBUG(4, "%s: Rxing packet mode %02x status %04x.\n", + pr_debug("%s: Rxing packet mode %02x status %04x.\n", dev->name, inb(ioaddr + RX_MODE), status); #ifndef final_version if (status == 0) { @@ -1061,16 +1046,14 @@ static void fjn_rx(struct net_device *dev) (pkt_len + 1) >> 1); skb->protocol = eth_type_trans(skb, dev); -#ifdef PCMCIA_DEBUG - if (pc_debug > 5) { + { int i; - printk(KERN_DEBUG "%s: Rxed packet of length %d: ", - dev->name, pkt_len); + pr_debug("%s: Rxed packet of length %d: ", + dev->name, pkt_len); for (i = 0; i < 14; i++) - printk(" %02x", skb->data[i]); - printk(".\n"); + pr_debug(" %02x", skb->data[i]); + pr_debug(".\n"); } -#endif netif_rx(skb); dev->stats.rx_packets++; @@ -1094,7 +1077,7 @@ static void fjn_rx(struct net_device *dev) } if (i > 0) - DEBUG(5, "%s: Exint Rx packet with mode %02x after " + pr_debug("%s: Exint Rx packet with mode %02x after " "%d ticks.\n", dev->name, inb(ioaddr + RX_MODE), i); } */ @@ -1112,24 +1095,8 @@ static void netdev_get_drvinfo(struct net_device *dev, sprintf(info->bus_info, "PCMCIA 0x%lx", dev->base_addr); } -#ifdef PCMCIA_DEBUG -static u32 netdev_get_msglevel(struct net_device *dev) -{ - return pc_debug; -} - -static void netdev_set_msglevel(struct net_device *dev, u32 level) -{ - pc_debug = level; -} -#endif /* PCMCIA_DEBUG */ - static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, -#ifdef PCMCIA_DEBUG - .get_msglevel = netdev_get_msglevel, - .set_msglevel = netdev_set_msglevel, -#endif /* PCMCIA_DEBUG */ }; static int fjn_config(struct net_device *dev, struct ifmap *map){ @@ -1141,7 +1108,7 @@ static int fjn_open(struct net_device *dev) struct local_info_t *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - DEBUG(4, "fjn_open('%s').\n", dev->name); + pr_debug("fjn_open('%s').\n", dev->name); if (!pcmcia_dev_present(link)) return -ENODEV; @@ -1167,7 +1134,7 @@ static int fjn_close(struct net_device *dev) struct pcmcia_device *link = lp->p_dev; unsigned int ioaddr = dev->base_addr; - DEBUG(4, "fjn_close('%s').\n", dev->name); + pr_debug("fjn_close('%s').\n", dev->name); lp->open_time = 0; netif_stop_queue(dev); diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 06618af1a46..37f4a6fdc3e 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -69,17 +69,6 @@ #define PCMCIA #include "../tokenring/ibmtr.c" -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"ibmtr_cs.c 1.10 1996/01/06 05:19:00 (Steve Kipisz)\n" -" 2.2.7 1999/05/03 12:00:00 (Mike Phillips)\n" -" 2.4.2 2001/30/28 Midnight (Burt Silverman)\n"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -130,6 +119,12 @@ static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, }; +static irqreturn_t ibmtr_interrupt(int irq, void *dev_id) { + ibmtr_dev_t *info = dev_id; + struct net_device *dev = info->dev; + return tok_interrupt(irq, dev); +}; + /*====================================================================== ibmtr_attach() creates an "instance" of the driver, allocating @@ -143,7 +138,7 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link) ibmtr_dev_t *info; struct net_device *dev; - DEBUG(0, "ibmtr_attach()\n"); + dev_dbg(&link->dev, "ibmtr_attach()\n"); /* Create new token-ring device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -161,14 +156,13 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 4; link->io.IOAddrLines = 16; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; - link->irq.Handler = &tok_interrupt; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; + link->irq.Handler = ibmtr_interrupt; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; - link->irq.Instance = info->dev = dev; + info->dev = dev; SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); @@ -190,7 +184,7 @@ static void ibmtr_detach(struct pcmcia_device *link) struct net_device *dev = info->dev; struct tok_info *ti = netdev_priv(dev); - DEBUG(0, "ibmtr_detach(0x%p)\n", link); + dev_dbg(&link->dev, "ibmtr_detach\n"); /* * When the card removal interrupt hits tok_interrupt(), @@ -217,9 +211,6 @@ static void ibmtr_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int __devinit ibmtr_config(struct pcmcia_device *link) { ibmtr_dev_t *info = link->priv; @@ -227,9 +218,9 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) struct tok_info *ti = netdev_priv(dev); win_req_t req; memreq_t mem; - int i, last_ret, last_fn; + int i, ret; - DEBUG(0, "ibmtr_config(0x%p)\n", link); + dev_dbg(&link->dev, "ibmtr_config\n"); link->conf.ConfigIndex = 0x61; @@ -241,11 +232,15 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) if (i != 0) { /* Couldn't get 0xA20-0xA23. Try ALTERNATE at 0xA24-0xA27. */ link->io.BasePort1 = 0xA24; - CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io)); + ret = pcmcia_request_io(link, &link->io); + if (ret) + goto failed; } dev->base_addr = link->io.BasePort1; - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; ti->irq = link->irq.AssignedIRQ; ti->global_int_enable=GLOBAL_INT_ENABLE+((dev->irq==9) ? 2 : dev->irq); @@ -256,11 +251,15 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x2000; req.AccessSpeed = 250; - CS_CHECK(RequestWindow, pcmcia_request_window(&link, &req, &link->win)); + ret = pcmcia_request_window(link, &req, &link->win); + if (ret) + goto failed; mem.CardOffset = mmiobase; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(link->win, &mem)); + ret = pcmcia_map_mem_page(link, link->win, &mem); + if (ret) + goto failed; ti->mmio = ioremap(req.Base, req.Size); /* Allocate the SRAM memory window */ @@ -269,17 +268,23 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) req.Base = 0; req.Size = sramsize * 1024; req.AccessSpeed = 250; - CS_CHECK(RequestWindow, pcmcia_request_window(&link, &req, &info->sram_win_handle)); + ret = pcmcia_request_window(link, &req, &info->sram_win_handle); + if (ret) + goto failed; mem.CardOffset = srambase; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(info->sram_win_handle, &mem)); + ret = pcmcia_map_mem_page(link, info->sram_win_handle, &mem); + if (ret) + goto failed; ti->sram_base = mem.CardOffset >> 12; ti->sram_virt = ioremap(req.Base, req.Size); ti->sram_phys = req.Base; - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* Set up the Token-Ring Controller Configuration Register and turn on the card. Check the "Local Area Network Credit Card @@ -287,7 +292,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) ibmtr_hw_setup(dev, mmiobase); link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = ibmtr_probe_card(dev); if (i != 0) { @@ -305,8 +310,6 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) dev->dev_addr); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: ibmtr_release(link); return -ENODEV; @@ -325,12 +328,12 @@ static void ibmtr_release(struct pcmcia_device *link) ibmtr_dev_t *info = link->priv; struct net_device *dev = info->dev; - DEBUG(0, "ibmtr_release(0x%p)\n", link); + dev_dbg(&link->dev, "ibmtr_release\n"); if (link->win) { struct tok_info *ti = netdev_priv(dev); iounmap(ti->mmio); - pcmcia_release_window(info->sram_win_handle); + pcmcia_release_window(link, info->sram_win_handle); } pcmcia_disable_device(link); } diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index 5ed6339c52b..dae5ef6b260 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -381,13 +381,6 @@ typedef struct _mace_private { Private Global Variables ---------------------------------------------------------------------------- */ -#ifdef PCMCIA_DEBUG -static char rcsid[] = -"nmclan_cs.c,v 0.16 1995/07/01 06:42:17 rpao Exp rpao"; -static char *version = -DRV_NAME " " DRV_VERSION " (Roger C. Pao)"; -#endif - static const char *if_names[]={ "Auto", "10baseT", "BNC", }; @@ -406,12 +399,6 @@ MODULE_LICENSE("GPL"); /* 0=auto, 1=10baseT, 2 = 10base2, default=auto */ INT_MODULE_PARM(if_port, 0); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -#else -#define DEBUG(n, args...) -#endif /* ---------------------------------------------------------------------------- Function Prototypes @@ -462,8 +449,7 @@ static int nmclan_probe(struct pcmcia_device *link) mace_private *lp; struct net_device *dev; - DEBUG(0, "nmclan_attach()\n"); - DEBUG(1, "%s\n", rcsid); + dev_dbg(&link->dev, "nmclan_attach()\n"); /* Create new ethernet device */ dev = alloc_etherdev(sizeof(mace_private)); @@ -477,10 +463,8 @@ static int nmclan_probe(struct pcmcia_device *link) link->io.NumPorts1 = 32; link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 5; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; link->irq.Handler = &mace_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; @@ -507,7 +491,7 @@ static void nmclan_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "nmclan_detach(0x%p)\n", link); + dev_dbg(&link->dev, "nmclan_detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -654,37 +638,40 @@ nmclan_config ethernet device available to the system. ---------------------------------------------------------------------------- */ -#define CS_CHECK(fn, ret) \ - do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int nmclan_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; mace_private *lp = netdev_priv(dev); - tuple_t tuple; - u_char buf[64]; - int i, last_ret, last_fn; + u8 *buf; + size_t len; + int i, ret; unsigned int ioaddr; - DEBUG(0, "nmclan_config(0x%p)\n", link); + dev_dbg(&link->dev, "nmclan_config\n"); + + ret = pcmcia_request_io(link, &link->io); + if (ret) + goto failed; + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; - CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io)); - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; ioaddr = dev->base_addr; /* Read the ethernet address from the CIS. */ - tuple.DesiredTuple = 0x80 /* CISTPL_CFTABLE_ENTRY_MISC */; - tuple.TupleData = buf; - tuple.TupleDataMax = 64; - tuple.TupleOffset = 0; - tuple.Attributes = 0; - CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple)); - CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple)); - memcpy(dev->dev_addr, tuple.TupleData, ETHER_ADDR_LEN); + len = pcmcia_get_tuple(link, 0x80, &buf); + if (!buf || len < ETHER_ADDR_LEN) { + kfree(buf); + goto failed; + } + memcpy(dev->dev_addr, buf, ETHER_ADDR_LEN); + kfree(buf); /* Verify configuration by reading the MACE ID. */ { @@ -693,7 +680,7 @@ static int nmclan_config(struct pcmcia_device *link) sig[0] = mace_read(lp, ioaddr, MACE_CHIPIDL); sig[1] = mace_read(lp, ioaddr, MACE_CHIPIDH); if ((sig[0] == 0x40) && ((sig[1] & 0x0F) == 0x09)) { - DEBUG(0, "nmclan_cs configured: mace id=%x %x\n", + dev_dbg(&link->dev, "nmclan_cs configured: mace id=%x %x\n", sig[0], sig[1]); } else { printk(KERN_NOTICE "nmclan_cs: mace id not found: %x %x should" @@ -712,7 +699,7 @@ static int nmclan_config(struct pcmcia_device *link) printk(KERN_NOTICE "nmclan_cs: invalid if_port requested\n"); link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = register_netdev(dev); if (i != 0) { @@ -729,8 +716,6 @@ static int nmclan_config(struct pcmcia_device *link) dev->dev_addr); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: nmclan_release(link); return -ENODEV; @@ -744,7 +729,7 @@ nmclan_release ---------------------------------------------------------------------------- */ static void nmclan_release(struct pcmcia_device *link) { - DEBUG(0, "nmclan_release(0x%p)\n", link); + dev_dbg(&link->dev, "nmclan_release\n"); pcmcia_disable_device(link); } @@ -795,7 +780,7 @@ static void nmclan_reset(struct net_device *dev) /* Reset Xilinx */ reg.Action = CS_WRITE; reg.Offset = CISREG_COR; - DEBUG(1, "nmclan_reset: OrigCorValue=0x%lX, resetting...\n", + dev_dbg(&link->dev, "nmclan_reset: OrigCorValue=0x%lX, resetting...\n", OrigCorValue); reg.Value = COR_SOFT_RESET; pcmcia_access_configuration_register(link, ®); @@ -872,7 +857,7 @@ static int mace_close(struct net_device *dev) mace_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - DEBUG(2, "%s: shutting down ethercard.\n", dev->name); + dev_dbg(&link->dev, "%s: shutting down ethercard.\n", dev->name); /* Mask off all interrupts from the MACE chip. */ outb(0xFF, ioaddr + AM2150_MACE_BASE + MACE_IMR); @@ -891,24 +876,8 @@ static void netdev_get_drvinfo(struct net_device *dev, sprintf(info->bus_info, "PCMCIA 0x%lx", dev->base_addr); } -#ifdef PCMCIA_DEBUG -static u32 netdev_get_msglevel(struct net_device *dev) -{ - return pc_debug; -} - -static void netdev_set_msglevel(struct net_device *dev, u32 level) -{ - pc_debug = level; -} -#endif /* PCMCIA_DEBUG */ - static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, -#ifdef PCMCIA_DEBUG - .get_msglevel = netdev_get_msglevel, - .set_msglevel = netdev_set_msglevel, -#endif /* PCMCIA_DEBUG */ }; /* ---------------------------------------------------------------------------- @@ -946,7 +915,7 @@ static netdev_tx_t mace_start_xmit(struct sk_buff *skb, netif_stop_queue(dev); - DEBUG(3, "%s: mace_start_xmit(length = %ld) called.\n", + pr_debug("%s: mace_start_xmit(length = %ld) called.\n", dev->name, (long)skb->len); #if (!TX_INTERRUPTABLE) @@ -1008,7 +977,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) int IntrCnt = MACE_MAX_IR_ITERATIONS; if (dev == NULL) { - DEBUG(2, "mace_interrupt(): irq 0x%X for unknown device.\n", + pr_debug("mace_interrupt(): irq 0x%X for unknown device.\n", irq); return IRQ_NONE; } @@ -1031,7 +1000,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) } if (!netif_device_present(dev)) { - DEBUG(2, "%s: interrupt from dead card\n", dev->name); + pr_debug("%s: interrupt from dead card\n", dev->name); return IRQ_NONE; } @@ -1039,7 +1008,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) /* WARNING: MACE_IR is a READ/CLEAR port! */ status = inb(ioaddr + AM2150_MACE_BASE + MACE_IR); - DEBUG(3, "mace_interrupt: irq 0x%X status 0x%X.\n", irq, status); + pr_debug("mace_interrupt: irq 0x%X status 0x%X.\n", irq, status); if (status & MACE_IR_RCVINT) { mace_rx(dev, MACE_MAX_RX_ITERATIONS); @@ -1158,7 +1127,7 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt) ) { rx_status = inw(ioaddr + AM2150_RCV); - DEBUG(3, "%s: in mace_rx(), framecnt 0x%X, rx_status" + pr_debug("%s: in mace_rx(), framecnt 0x%X, rx_status" " 0x%X.\n", dev->name, rx_framecnt, rx_status); if (rx_status & MACE_RCVFS_RCVSTS) { /* Error, update stats. */ @@ -1185,7 +1154,7 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt) lp->mace_stats.rfs_rcvcc += inb(ioaddr + AM2150_RCV); /* rcv collision count */ - DEBUG(3, " receiving packet size 0x%X rx_status" + pr_debug(" receiving packet size 0x%X rx_status" " 0x%X.\n", pkt_len, rx_status); skb = dev_alloc_skb(pkt_len+2); @@ -1204,7 +1173,7 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt) outb(0xFF, ioaddr + AM2150_RCV_NEXT); /* skip to next frame */ continue; } else { - DEBUG(1, "%s: couldn't allocate a sk_buff of size" + pr_debug("%s: couldn't allocate a sk_buff of size" " %d.\n", dev->name, pkt_len); lp->linux_stats.rx_dropped++; } @@ -1220,28 +1189,28 @@ pr_linux_stats ---------------------------------------------------------------------------- */ static void pr_linux_stats(struct net_device_stats *pstats) { - DEBUG(2, "pr_linux_stats\n"); - DEBUG(2, " rx_packets=%-7ld tx_packets=%ld\n", + pr_debug("pr_linux_stats\n"); + pr_debug(" rx_packets=%-7ld tx_packets=%ld\n", (long)pstats->rx_packets, (long)pstats->tx_packets); - DEBUG(2, " rx_errors=%-7ld tx_errors=%ld\n", + pr_debug(" rx_errors=%-7ld tx_errors=%ld\n", (long)pstats->rx_errors, (long)pstats->tx_errors); - DEBUG(2, " rx_dropped=%-7ld tx_dropped=%ld\n", + pr_debug(" rx_dropped=%-7ld tx_dropped=%ld\n", (long)pstats->rx_dropped, (long)pstats->tx_dropped); - DEBUG(2, " multicast=%-7ld collisions=%ld\n", + pr_debug(" multicast=%-7ld collisions=%ld\n", (long)pstats->multicast, (long)pstats->collisions); - DEBUG(2, " rx_length_errors=%-7ld rx_over_errors=%ld\n", + pr_debug(" rx_length_errors=%-7ld rx_over_errors=%ld\n", (long)pstats->rx_length_errors, (long)pstats->rx_over_errors); - DEBUG(2, " rx_crc_errors=%-7ld rx_frame_errors=%ld\n", + pr_debug(" rx_crc_errors=%-7ld rx_frame_errors=%ld\n", (long)pstats->rx_crc_errors, (long)pstats->rx_frame_errors); - DEBUG(2, " rx_fifo_errors=%-7ld rx_missed_errors=%ld\n", + pr_debug(" rx_fifo_errors=%-7ld rx_missed_errors=%ld\n", (long)pstats->rx_fifo_errors, (long)pstats->rx_missed_errors); - DEBUG(2, " tx_aborted_errors=%-7ld tx_carrier_errors=%ld\n", + pr_debug(" tx_aborted_errors=%-7ld tx_carrier_errors=%ld\n", (long)pstats->tx_aborted_errors, (long)pstats->tx_carrier_errors); - DEBUG(2, " tx_fifo_errors=%-7ld tx_heartbeat_errors=%ld\n", + pr_debug(" tx_fifo_errors=%-7ld tx_heartbeat_errors=%ld\n", (long)pstats->tx_fifo_errors, (long)pstats->tx_heartbeat_errors); - DEBUG(2, " tx_window_errors=%ld\n", + pr_debug(" tx_window_errors=%ld\n", (long)pstats->tx_window_errors); } /* pr_linux_stats */ @@ -1250,48 +1219,48 @@ pr_mace_stats ---------------------------------------------------------------------------- */ static void pr_mace_stats(mace_statistics *pstats) { - DEBUG(2, "pr_mace_stats\n"); + pr_debug("pr_mace_stats\n"); - DEBUG(2, " xmtsv=%-7d uflo=%d\n", + pr_debug(" xmtsv=%-7d uflo=%d\n", pstats->xmtsv, pstats->uflo); - DEBUG(2, " lcol=%-7d more=%d\n", + pr_debug(" lcol=%-7d more=%d\n", pstats->lcol, pstats->more); - DEBUG(2, " one=%-7d defer=%d\n", + pr_debug(" one=%-7d defer=%d\n", pstats->one, pstats->defer); - DEBUG(2, " lcar=%-7d rtry=%d\n", + pr_debug(" lcar=%-7d rtry=%d\n", pstats->lcar, pstats->rtry); /* MACE_XMTRC */ - DEBUG(2, " exdef=%-7d xmtrc=%d\n", + pr_debug(" exdef=%-7d xmtrc=%d\n", pstats->exdef, pstats->xmtrc); /* RFS1--Receive Status (RCVSTS) */ - DEBUG(2, " oflo=%-7d clsn=%d\n", + pr_debug(" oflo=%-7d clsn=%d\n", pstats->oflo, pstats->clsn); - DEBUG(2, " fram=%-7d fcs=%d\n", + pr_debug(" fram=%-7d fcs=%d\n", pstats->fram, pstats->fcs); /* RFS2--Runt Packet Count (RNTPC) */ /* RFS3--Receive Collision Count (RCVCC) */ - DEBUG(2, " rfs_rntpc=%-7d rfs_rcvcc=%d\n", + pr_debug(" rfs_rntpc=%-7d rfs_rcvcc=%d\n", pstats->rfs_rntpc, pstats->rfs_rcvcc); /* MACE_IR */ - DEBUG(2, " jab=%-7d babl=%d\n", + pr_debug(" jab=%-7d babl=%d\n", pstats->jab, pstats->babl); - DEBUG(2, " cerr=%-7d rcvcco=%d\n", + pr_debug(" cerr=%-7d rcvcco=%d\n", pstats->cerr, pstats->rcvcco); - DEBUG(2, " rntpco=%-7d mpco=%d\n", + pr_debug(" rntpco=%-7d mpco=%d\n", pstats->rntpco, pstats->mpco); /* MACE_MPC */ - DEBUG(2, " mpc=%d\n", pstats->mpc); + pr_debug(" mpc=%d\n", pstats->mpc); /* MACE_RNTPC */ - DEBUG(2, " rntpc=%d\n", pstats->rntpc); + pr_debug(" rntpc=%d\n", pstats->rntpc); /* MACE_RCVCC */ - DEBUG(2, " rcvcc=%d\n", pstats->rcvcc); + pr_debug(" rcvcc=%d\n", pstats->rcvcc); } /* pr_mace_stats */ @@ -1360,7 +1329,7 @@ static struct net_device_stats *mace_get_stats(struct net_device *dev) update_stats(dev->base_addr, dev); - DEBUG(1, "%s: updating the statistics.\n", dev->name); + pr_debug("%s: updating the statistics.\n", dev->name); pr_linux_stats(&lp->linux_stats); pr_mace_stats(&lp->mace_stats); @@ -1427,7 +1396,7 @@ static void BuildLAF(int *ladrf, int *adr) ladrf[byte] |= (1 << (hashcode & 7)); #ifdef PCMCIA_DEBUG - if (pc_debug > 2) + if (0) printk(KERN_DEBUG " adr =%pM\n", adr); printk(KERN_DEBUG " hashcode = %d(decimal), ladrf[0:63] =", hashcode); for (i = 0; i < 8; i++) @@ -1454,12 +1423,12 @@ static void restore_multicast_list(struct net_device *dev) unsigned int ioaddr = dev->base_addr; int i; - DEBUG(2, "%s: restoring Rx mode to %d addresses.\n", + pr_debug("%s: restoring Rx mode to %d addresses.\n", dev->name, num_addrs); if (num_addrs > 0) { - DEBUG(1, "Attempt to restore multicast list detected.\n"); + pr_debug("Attempt to restore multicast list detected.\n"); mace_write(lp, ioaddr, MACE_IAC, MACE_IAC_ADDRCHG | MACE_IAC_LOGADDR); /* Poll ADDRCHG bit */ @@ -1511,11 +1480,11 @@ static void set_multicast_list(struct net_device *dev) struct dev_mc_list *dmi = dev->mc_list; #ifdef PCMCIA_DEBUG - if (pc_debug > 1) { + { static int old; if (dev->mc_count != old) { old = dev->mc_count; - DEBUG(0, "%s: setting Rx mode to %d addresses.\n", + pr_debug("%s: setting Rx mode to %d addresses.\n", dev->name, old); } } @@ -1546,7 +1515,7 @@ static void restore_multicast_list(struct net_device *dev) unsigned int ioaddr = dev->base_addr; mace_private *lp = netdev_priv(dev); - DEBUG(2, "%s: restoring Rx mode to %d addresses.\n", dev->name, + pr_debug("%s: restoring Rx mode to %d addresses.\n", dev->name, lp->multicast_num_addrs); if (dev->flags & IFF_PROMISC) { @@ -1567,11 +1536,11 @@ static void set_multicast_list(struct net_device *dev) mace_private *lp = netdev_priv(dev); #ifdef PCMCIA_DEBUG - if (pc_debug > 1) { + { static int old; if (dev->mc_count != old) { old = dev->mc_count; - DEBUG(0, "%s: setting Rx mode to %d addresses.\n", + pr_debug("%s: setting Rx mode to %d addresses.\n", dev->name, old); } } diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 94c9ad2746b..cbe462ed221 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -71,15 +71,6 @@ static const char *if_names[] = { "auto", "10baseT", "10base2"}; -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"pcnet_cs.c 1.153 2003/11/09 18:53:09 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -265,7 +256,7 @@ static int pcnet_probe(struct pcmcia_device *link) pcnet_dev_t *info; struct net_device *dev; - DEBUG(0, "pcnet_attach()\n"); + dev_dbg(&link->dev, "pcnet_attach()\n"); /* Create new ethernet device */ dev = __alloc_ei_netdev(sizeof(pcnet_dev_t)); @@ -275,7 +266,6 @@ static int pcnet_probe(struct pcmcia_device *link) link->priv = dev; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -297,7 +287,7 @@ static void pcnet_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "pcnet_detach(0x%p)\n", link); + dev_dbg(&link->dev, "pcnet_detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -326,17 +316,15 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = 0; req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); - if (i != 0) { - cs_error(link, RequestWindow, i); + i = pcmcia_request_window(link, &req, &link->win); + if (i != 0) return NULL; - } virt = ioremap(req.Base, req.Size); mem.Page = 0; for (i = 0; i < NR_INFO; i++) { mem.CardOffset = hw_info[i].offset & ~(req.Size-1); - pcmcia_map_mem_page(link->win, &mem); + pcmcia_map_mem_page(link, link->win, &mem); base = &virt[hw_info[i].offset & (req.Size-1)]; if ((readb(base+0) == hw_info[i].a0) && (readb(base+2) == hw_info[i].a1) && @@ -348,9 +336,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) } iounmap(virt); - j = pcmcia_release_window(link->win); - if (j != 0) - cs_error(link, ReleaseWindow, j); + j = pcmcia_release_window(link, link->win); return (i < NR_INFO) ? hw_info+i : NULL; } /* get_hwinfo */ @@ -495,9 +481,6 @@ static hw_info_t *get_hwired(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int try_io_port(struct pcmcia_device *link) { int j, ret; @@ -567,19 +550,19 @@ static int pcnet_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; pcnet_dev_t *info = PRIV(dev); - int last_ret, last_fn, start_pg, stop_pg, cm_offset; + int ret, start_pg, stop_pg, cm_offset; int has_shmem = 0; hw_info_t *local_hw_info; - DEBUG(0, "pcnet_config(0x%p)\n", link); + dev_dbg(&link->dev, "pcnet_config\n"); - last_ret = pcmcia_loop_config(link, pcnet_confcheck, &has_shmem); - if (last_ret) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, pcnet_confcheck, &has_shmem); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; if (link->io.NumPorts2 == 8) { link->conf.Attributes |= CONF_ENABLE_SPKR; @@ -589,7 +572,9 @@ static int pcnet_config(struct pcmcia_device *link) (link->card_id == PRODID_IBM_HOME_AND_AWAY)) link->conf.ConfigIndex |= 0x10; - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; if (info->flags & HAS_MISC_REG) { @@ -660,7 +645,7 @@ static int pcnet_config(struct pcmcia_device *link) mii_phy_probe(dev); link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "pcnet_cs: register_netdev() failed\n"); @@ -687,8 +672,6 @@ static int pcnet_config(struct pcmcia_device *link) printk(" hw_addr %pM\n", dev->dev_addr); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: pcnet_release(link); return -ENODEV; @@ -706,7 +689,7 @@ static void pcnet_release(struct pcmcia_device *link) { pcnet_dev_t *info = PRIV(link->priv); - DEBUG(0, "pcnet_release(0x%p)\n", link); + dev_dbg(&link->dev, "pcnet_release\n"); if (info->flags & USE_SHMEM) iounmap(info->base); @@ -960,7 +943,7 @@ static void mii_phy_probe(struct net_device *dev) phyid = tmp << 16; phyid |= mdio_read(mii_addr, i, MII_PHYID_REG2); phyid &= MII_PHYID_REV_MASK; - DEBUG(0, "%s: MII at %d is 0x%08x\n", dev->name, i, phyid); + pr_debug("%s: MII at %d is 0x%08x\n", dev->name, i, phyid); if (phyid == AM79C9XX_HOME_PHY) { info->pna_phy = i; } else if (phyid != AM79C9XX_ETH_PHY) { @@ -976,7 +959,7 @@ static int pcnet_open(struct net_device *dev) struct pcmcia_device *link = info->p_dev; unsigned int nic_base = dev->base_addr; - DEBUG(2, "pcnet_open('%s')\n", dev->name); + dev_dbg(&link->dev, "pcnet_open('%s')\n", dev->name); if (!pcmcia_dev_present(link)) return -ENODEV; @@ -1008,7 +991,7 @@ static int pcnet_close(struct net_device *dev) pcnet_dev_t *info = PRIV(dev); struct pcmcia_device *link = info->p_dev; - DEBUG(2, "pcnet_close('%s')\n", dev->name); + dev_dbg(&link->dev, "pcnet_close('%s')\n", dev->name); ei_close(dev); free_irq(dev->irq, dev); @@ -1251,10 +1234,8 @@ static void dma_block_input(struct net_device *dev, int count, int xfer_count = count; char *buf = skb->data; -#ifdef PCMCIA_DEBUG if ((ei_debug > 4) && (count != 4)) - printk(KERN_DEBUG "%s: [bi=%d]\n", dev->name, count+4); -#endif + pr_debug("%s: [bi=%d]\n", dev->name, count+4); if (ei_status.dmaing) { printk(KERN_NOTICE "%s: DMAing conflict in dma_block_input." "[DMAstat:%1x][irqlock:%1x]\n", @@ -1495,7 +1476,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, pcnet_dev_t *info = PRIV(dev); win_req_t req; memreq_t mem; - int i, window_size, offset, last_ret, last_fn; + int i, window_size, offset, ret; window_size = (stop_pg - start_pg) << 8; if (window_size > 32 * 1024) @@ -1509,13 +1490,17 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, req.Attributes |= WIN_USE_WAIT; req.Base = 0; req.Size = window_size; req.AccessSpeed = mem_speed; - CS_CHECK(RequestWindow, pcmcia_request_window(&link, &req, &link->win)); + ret = pcmcia_request_window(link, &req, &link->win); + if (ret) + goto failed; mem.CardOffset = (start_pg << 8) + cm_offset; offset = mem.CardOffset % window_size; mem.CardOffset -= offset; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(link->win, &mem)); + ret = pcmcia_map_mem_page(link, link->win, &mem); + if (ret) + goto failed; /* Try scribbling on the buffer */ info->base = ioremap(req.Base, window_size); @@ -1527,8 +1512,8 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, pcnet_reset_8390(dev); if (i != (TX_PAGES<<8)) { iounmap(info->base); - pcmcia_release_window(link->win); - info->base = NULL; link->win = NULL; + pcmcia_release_window(link, link->win); + info->base = NULL; link->win = 0; goto failed; } @@ -1549,8 +1534,6 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, info->flags |= USE_SHMEM; return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: return 1; } @@ -1788,7 +1771,6 @@ static int __init init_pcnet_cs(void) static void __exit exit_pcnet_cs(void) { - DEBUG(0, "pcnet_cs: unloading\n"); pcmcia_unregister_driver(&pcnet_driver); } diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index 7bde2cd34c7..9e0da370912 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -79,14 +79,6 @@ MODULE_FIRMWARE(FIRMWARE_NAME); */ INT_MODULE_PARM(if_port, 0); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -static const char *version = -"smc91c92_cs.c 1.123 2006/11/09 Donald Becker, becker@scyld.com.\n"; -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -#else -#define DEBUG(n, args...) -#endif #define DRV_NAME "smc91c92_cs" #define DRV_VERSION "1.123" @@ -126,12 +118,6 @@ struct smc_private { int rx_ovrn; }; -struct smc_cfg_mem { - tuple_t tuple; - cisparse_t parse; - u_char buf[255]; -}; - /* Special definitions for Megahertz multifunction cards */ #define MEGAHERTZ_ISR 0x0380 @@ -329,7 +315,7 @@ static int smc91c92_probe(struct pcmcia_device *link) struct smc_private *smc; struct net_device *dev; - DEBUG(0, "smc91c92_attach()\n"); + dev_dbg(&link->dev, "smc91c92_attach()\n"); /* Create new ethernet device */ dev = alloc_etherdev(sizeof(struct smc_private)); @@ -343,10 +329,8 @@ static int smc91c92_probe(struct pcmcia_device *link) link->io.NumPorts1 = 16; link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 4; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &smc_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -377,7 +361,7 @@ static void smc91c92_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "smc91c92_detach(0x%p)\n", link); + dev_dbg(&link->dev, "smc91c92_detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -408,34 +392,7 @@ static int cvt_ascii_address(struct net_device *dev, char *s) return 0; } -/*====================================================================*/ - -static int first_tuple(struct pcmcia_device *handle, tuple_t *tuple, - cisparse_t *parse) -{ - int i; - - i = pcmcia_get_first_tuple(handle, tuple); - if (i != 0) - return i; - i = pcmcia_get_tuple_data(handle, tuple); - if (i != 0) - return i; - return pcmcia_parse_tuple(tuple, parse); -} - -static int next_tuple(struct pcmcia_device *handle, tuple_t *tuple, - cisparse_t *parse) -{ - int i; - - if ((i = pcmcia_get_next_tuple(handle, tuple)) != 0 || - (i = pcmcia_get_tuple_data(handle, tuple)) != 0) - return i; - return pcmcia_parse_tuple(tuple, parse); -} - -/*====================================================================== +/*==================================================================== Configuration stuff for Megahertz cards @@ -490,19 +447,14 @@ static int mhz_mfc_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; struct smc_private *smc = netdev_priv(dev); - struct smc_cfg_mem *cfg_mem; win_req_t req; memreq_t mem; int i; - cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL); - if (!cfg_mem) - return -ENOMEM; - link->conf.Attributes |= CONF_ENABLE_SPKR; link->conf.Status = CCSR_AUDIO_ENA; link->irq.Attributes = - IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT; + IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; link->io.IOAddrLines = 16; link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts2 = 8; @@ -510,91 +462,80 @@ static int mhz_mfc_config(struct pcmcia_device *link) /* The Megahertz combo cards have modem-like CIS entries, so we have to explicitly try a bunch of port combinations. */ if (pcmcia_loop_config(link, mhz_mfc_config_check, NULL)) - goto free_cfg_mem; + return -ENODEV; + dev->base_addr = link->io.BasePort1; /* Allocate a memory window, for accessing the ISR */ req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) - goto free_cfg_mem; + return -ENODEV; + smc->base = ioremap(req.Base, req.Size); mem.CardOffset = mem.Page = 0; if (smc->manfid == MANFID_MOTOROLA) mem.CardOffset = link->conf.ConfigBase; - i = pcmcia_map_mem_page(link->win, &mem); + i = pcmcia_map_mem_page(link, link->win, &mem); if ((i == 0) && (smc->manfid == MANFID_MEGAHERTZ) && (smc->cardid == PRODID_MEGAHERTZ_EM3288)) mhz_3288_power(link); -free_cfg_mem: - kfree(cfg_mem); - return -ENODEV; + return 0; } -static int mhz_setup(struct pcmcia_device *link) +static int pcmcia_get_versmac(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv) { - struct net_device *dev = link->priv; - struct smc_cfg_mem *cfg_mem; - tuple_t *tuple; - cisparse_t *parse; - u_char *buf, *station_addr; - int rc; + struct net_device *dev = priv; + cisparse_t parse; - cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL); - if (!cfg_mem) - return -1; + if (pcmcia_parse_tuple(tuple, &parse)) + return -EINVAL; - tuple = &cfg_mem->tuple; - parse = &cfg_mem->parse; - buf = cfg_mem->buf; + if ((parse.version_1.ns > 3) && + (cvt_ascii_address(dev, + (parse.version_1.str + parse.version_1.ofs[3])))) + return 0; - tuple->Attributes = tuple->TupleOffset = 0; - tuple->TupleData = (cisdata_t *)buf; - tuple->TupleDataMax = 255; + return -EINVAL; +}; + +static int mhz_setup(struct pcmcia_device *link) +{ + struct net_device *dev = link->priv; + size_t len; + u8 *buf; + int rc; /* Read the station address from the CIS. It is stored as the last (fourth) string in the Version 1 Version/ID tuple. */ - tuple->DesiredTuple = CISTPL_VERS_1; - if (first_tuple(link, tuple, parse) != 0) { - rc = -1; - goto free_cfg_mem; - } + if ((link->prod_id[3]) && + (cvt_ascii_address(dev, link->prod_id[3]) == 0)) + return 0; + + /* Workarounds for broken cards start here. */ /* Ugh -- the EM1144 card has two VERS_1 tuples!?! */ - if (next_tuple(link, tuple, parse) != 0) - first_tuple(link, tuple, parse); - if (parse->version_1.ns > 3) { - station_addr = parse->version_1.str + parse->version_1.ofs[3]; - if (cvt_ascii_address(dev, station_addr) == 0) { - rc = 0; - goto free_cfg_mem; - } - } + if (!pcmcia_loop_tuple(link, CISTPL_VERS_1, pcmcia_get_versmac, dev)) + return 0; /* Another possibility: for the EM3288, in a special tuple */ - tuple->DesiredTuple = 0x81; - if (pcmcia_get_first_tuple(link, tuple) != 0) { - rc = -1; - goto free_cfg_mem; - } - if (pcmcia_get_tuple_data(link, tuple) != 0) { - rc = -1; - goto free_cfg_mem; - } - buf[12] = '\0'; - if (cvt_ascii_address(dev, buf) == 0) { - rc = 0; - goto free_cfg_mem; - } rc = -1; -free_cfg_mem: - kfree(cfg_mem); - return rc; -} + len = pcmcia_get_tuple(link, 0x81, &buf); + if (buf && len >= 13) { + buf[12] = '\0'; + if (cvt_ascii_address(dev, buf)) + rc = 0; + } + kfree(buf); + + return rc; +}; /*====================================================================== @@ -684,58 +625,21 @@ static int smc_config(struct pcmcia_device *link) return i; } + static int smc_setup(struct pcmcia_device *link) { struct net_device *dev = link->priv; - struct smc_cfg_mem *cfg_mem; - tuple_t *tuple; - cisparse_t *parse; - cistpl_lan_node_id_t *node_id; - u_char *buf, *station_addr; - int i, rc; - - cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL); - if (!cfg_mem) - return -ENOMEM; - - tuple = &cfg_mem->tuple; - parse = &cfg_mem->parse; - buf = cfg_mem->buf; - - tuple->Attributes = tuple->TupleOffset = 0; - tuple->TupleData = (cisdata_t *)buf; - tuple->TupleDataMax = 255; /* Check for a LAN function extension tuple */ - tuple->DesiredTuple = CISTPL_FUNCE; - i = first_tuple(link, tuple, parse); - while (i == 0) { - if (parse->funce.type == CISTPL_FUNCE_LAN_NODE_ID) - break; - i = next_tuple(link, tuple, parse); - } - if (i == 0) { - node_id = (cistpl_lan_node_id_t *)parse->funce.data; - if (node_id->nb == 6) { - for (i = 0; i < 6; i++) - dev->dev_addr[i] = node_id->id[i]; - rc = 0; - goto free_cfg_mem; - } - } + if (!pcmcia_get_mac_from_cis(link, dev)) + return 0; + /* Try the third string in the Version 1 Version/ID tuple. */ if (link->prod_id[2]) { - station_addr = link->prod_id[2]; - if (cvt_ascii_address(dev, station_addr) == 0) { - rc = 0; - goto free_cfg_mem; - } + if (cvt_ascii_address(dev, link->prod_id[2]) == 0) + return 0; } - - rc = -1; -free_cfg_mem: - kfree(cfg_mem); - return rc; + return -1; } /*====================================================================*/ @@ -749,7 +653,7 @@ static int osi_config(struct pcmcia_device *link) link->conf.Attributes |= CONF_ENABLE_SPKR; link->conf.Status = CCSR_AUDIO_ENA; link->irq.Attributes = - IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT; + IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; link->io.NumPorts1 = 64; link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts2 = 8; @@ -794,41 +698,31 @@ static int osi_load_firmware(struct pcmcia_device *link) return err; } -static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid) +static int pcmcia_osi_mac(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv) { - struct net_device *dev = link->priv; - struct smc_cfg_mem *cfg_mem; - tuple_t *tuple; - u_char *buf; - int i, rc; + struct net_device *dev = priv; + int i; - cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL); - if (!cfg_mem) - return -1; + if (tuple->TupleDataLen < 8) + return -EINVAL; + if (tuple->TupleData[0] != 0x04) + return -EINVAL; + for (i = 0; i < 6; i++) + dev->dev_addr[i] = tuple->TupleData[i+2]; + return 0; +}; - tuple = &cfg_mem->tuple; - buf = cfg_mem->buf; - tuple->Attributes = TUPLE_RETURN_COMMON; - tuple->TupleData = (cisdata_t *)buf; - tuple->TupleDataMax = 255; - tuple->TupleOffset = 0; +static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid) +{ + struct net_device *dev = link->priv; + int rc; /* Read the station address from tuple 0x90, subtuple 0x04 */ - tuple->DesiredTuple = 0x90; - i = pcmcia_get_first_tuple(link, tuple); - while (i == 0) { - i = pcmcia_get_tuple_data(link, tuple); - if ((i != 0) || (buf[0] == 0x04)) - break; - i = pcmcia_get_next_tuple(link, tuple); - } - if (i != 0) { - rc = -1; - goto free_cfg_mem; - } - for (i = 0; i < 6; i++) - dev->dev_addr[i] = buf[i+2]; + if (pcmcia_loop_tuple(link, 0x90, pcmcia_osi_mac, dev)) + return -1; if (((manfid == MANFID_OSITECH) && (cardid == PRODID_OSITECH_SEVEN)) || @@ -836,20 +730,17 @@ static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid) (cardid == PRODID_PSION_NET100))) { rc = osi_load_firmware(link); if (rc) - goto free_cfg_mem; + return rc; } else if (manfid == MANFID_OSITECH) { /* Make sure both functions are powered up */ set_bits(0x300, link->io.BasePort1 + OSITECH_AUI_PWR); /* Now, turn on the interrupt for both card functions */ set_bits(0x300, link->io.BasePort1 + OSITECH_RESET_ISR); - DEBUG(2, "AUI/PWR: %4.4x RESET/ISR: %4.4x\n", + dev_dbg(&link->dev, "AUI/PWR: %4.4x RESET/ISR: %4.4x\n", inw(link->io.BasePort1 + OSITECH_AUI_PWR), inw(link->io.BasePort1 + OSITECH_RESET_ISR)); } - rc = 0; -free_cfg_mem: - kfree(cfg_mem); - return rc; + return 0; } static int smc91c92_suspend(struct pcmcia_device *link) @@ -959,12 +850,6 @@ static int check_sig(struct pcmcia_device *link) ======================================================================*/ -#define CS_EXIT_TEST(ret, svc, label) \ -if (ret != 0) { \ - cs_error(link, svc, ret); \ - goto label; \ -} - static int smc91c92_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; @@ -974,7 +859,7 @@ static int smc91c92_config(struct pcmcia_device *link) unsigned int ioaddr; u_long mir; - DEBUG(0, "smc91c92_config(0x%p)\n", link); + dev_dbg(&link->dev, "smc91c92_config\n"); smc->manfid = link->manf_id; smc->cardid = link->card_id; @@ -990,12 +875,15 @@ static int smc91c92_config(struct pcmcia_device *link) } else { i = smc_config(link); } - CS_EXIT_TEST(i, RequestIO, config_failed); + if (i) + goto config_failed; i = pcmcia_request_irq(link, &link->irq); - CS_EXIT_TEST(i, RequestIRQ, config_failed); + if (i) + goto config_failed; i = pcmcia_request_configuration(link, &link->conf); - CS_EXIT_TEST(i, RequestConfiguration, config_failed); + if (i) + goto config_failed; if (smc->manfid == MANFID_MOTOROLA) mot_config(link); @@ -1074,7 +962,7 @@ static int smc91c92_config(struct pcmcia_device *link) } link->dev_node = &smc->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_ERR "smc91c92_cs: register_netdev() failed\n"); @@ -1100,7 +988,7 @@ static int smc91c92_config(struct pcmcia_device *link) if (smc->cfg & CFG_MII_SELECT) { if (smc->mii_if.phy_id != -1) { - DEBUG(0, " MII transceiver at index %d, status %x.\n", + dev_dbg(&link->dev, " MII transceiver at index %d, status %x.\n", smc->mii_if.phy_id, j); } else { printk(KERN_NOTICE " No MII transceivers found!\n"); @@ -1110,7 +998,7 @@ static int smc91c92_config(struct pcmcia_device *link) config_undo: unregister_netdev(dev); -config_failed: /* CS_EXIT_TEST() calls jump to here... */ +config_failed: smc91c92_release(link); return -ENODEV; } /* smc91c92_config */ @@ -1125,7 +1013,7 @@ config_failed: /* CS_EXIT_TEST() calls jump to here... */ static void smc91c92_release(struct pcmcia_device *link) { - DEBUG(0, "smc91c92_release(0x%p)\n", link); + dev_dbg(&link->dev, "smc91c92_release\n"); if (link->win) { struct net_device *dev = link->priv; struct smc_private *smc = netdev_priv(dev); @@ -1222,10 +1110,10 @@ static int smc_open(struct net_device *dev) struct smc_private *smc = netdev_priv(dev); struct pcmcia_device *link = smc->p_dev; -#ifdef PCMCIA_DEBUG - DEBUG(0, "%s: smc_open(%p), ID/Window %4.4x.\n", + dev_dbg(&link->dev, "%s: smc_open(%p), ID/Window %4.4x.\n", dev->name, dev, inw(dev->base_addr + BANK_SELECT)); - if (pc_debug > 1) smc_dump(dev); +#ifdef PCMCIA_DEBUG + smc_dump(dev); #endif /* Check that the PCMCIA card is still here. */ @@ -1260,7 +1148,7 @@ static int smc_close(struct net_device *dev) struct pcmcia_device *link = smc->p_dev; unsigned int ioaddr = dev->base_addr; - DEBUG(0, "%s: smc_close(), status %4.4x.\n", + dev_dbg(&link->dev, "%s: smc_close(), status %4.4x.\n", dev->name, inw(ioaddr + BANK_SELECT)); netif_stop_queue(dev); @@ -1327,7 +1215,7 @@ static void smc_hardware_send_packet(struct net_device * dev) u_char *buf = skb->data; u_int length = skb->len; /* The chip will pad to ethernet min. */ - DEBUG(2, "%s: Trying to xmit packet of length %d.\n", + pr_debug("%s: Trying to xmit packet of length %d.\n", dev->name, length); /* send the packet length: +6 for status word, length, and ctl */ @@ -1382,7 +1270,7 @@ static netdev_tx_t smc_start_xmit(struct sk_buff *skb, netif_stop_queue(dev); - DEBUG(2, "%s: smc_start_xmit(length = %d) called," + pr_debug("%s: smc_start_xmit(length = %d) called," " status %4.4x.\n", dev->name, skb->len, inw(ioaddr + 2)); if (smc->saved_skb) { @@ -1429,7 +1317,7 @@ static netdev_tx_t smc_start_xmit(struct sk_buff *skb, } /* Otherwise defer until the Tx-space-allocated interrupt. */ - DEBUG(2, "%s: memory allocation deferred.\n", dev->name); + pr_debug("%s: memory allocation deferred.\n", dev->name); outw((IM_ALLOC_INT << 8) | (ir & 0xff00), ioaddr + INTERRUPT); spin_unlock_irqrestore(&smc->lock, flags); @@ -1494,7 +1382,7 @@ static void smc_eph_irq(struct net_device *dev) SMC_SELECT_BANK(0); ephs = inw(ioaddr + EPH); - DEBUG(2, "%s: Ethernet protocol handler interrupt, status" + pr_debug("%s: Ethernet protocol handler interrupt, status" " %4.4x.\n", dev->name, ephs); /* Could be a counter roll-over warning: update stats. */ card_stats = inw(ioaddr + COUNTER); @@ -1534,7 +1422,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) ioaddr = dev->base_addr; - DEBUG(3, "%s: SMC91c92 interrupt %d at %#x.\n", dev->name, + pr_debug("%s: SMC91c92 interrupt %d at %#x.\n", dev->name, irq, ioaddr); spin_lock(&smc->lock); @@ -1543,7 +1431,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) if ((saved_bank & 0xff00) != 0x3300) { /* The device does not exist -- the card could be off-line, or maybe it has been ejected. */ - DEBUG(1, "%s: SMC91c92 interrupt %d for non-existent" + pr_debug("%s: SMC91c92 interrupt %d for non-existent" "/ejected device.\n", dev->name, irq); handled = 0; goto irq_done; @@ -1557,7 +1445,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) do { /* read the status flag, and mask it */ status = inw(ioaddr + INTERRUPT) & 0xff; - DEBUG(3, "%s: Status is %#2.2x (mask %#2.2x).\n", dev->name, + pr_debug("%s: Status is %#2.2x (mask %#2.2x).\n", dev->name, status, mask); if ((status & mask) == 0) { if (bogus_cnt == INTR_WORK) @@ -1602,7 +1490,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) smc_eph_irq(dev); } while (--bogus_cnt); - DEBUG(3, " Restoring saved registers mask %2.2x bank %4.4x" + pr_debug(" Restoring saved registers mask %2.2x bank %4.4x" " pointer %4.4x.\n", mask, saved_bank, saved_pointer); /* restore state register */ @@ -1610,7 +1498,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) outw(saved_pointer, ioaddr + POINTER); SMC_SELECT_BANK(saved_bank); - DEBUG(3, "%s: Exiting interrupt IRQ%d.\n", dev->name, irq); + pr_debug("%s: Exiting interrupt IRQ%d.\n", dev->name, irq); irq_done: @@ -1661,7 +1549,7 @@ static void smc_rx(struct net_device *dev) rx_status = inw(ioaddr + DATA_1); packet_length = inw(ioaddr + DATA_1) & 0x07ff; - DEBUG(2, "%s: Receive status %4.4x length %d.\n", + pr_debug("%s: Receive status %4.4x length %d.\n", dev->name, rx_status, packet_length); if (!(rx_status & RS_ERRORS)) { @@ -1672,7 +1560,7 @@ static void smc_rx(struct net_device *dev) skb = dev_alloc_skb(packet_length+2); if (skb == NULL) { - DEBUG(1, "%s: Low memory, packet dropped.\n", dev->name); + pr_debug("%s: Low memory, packet dropped.\n", dev->name); dev->stats.rx_dropped++; outw(MC_RELEASE, ioaddr + MMU_CMD); return; @@ -1832,7 +1720,7 @@ static void smc_reset(struct net_device *dev) struct smc_private *smc = netdev_priv(dev); int i; - DEBUG(0, "%s: smc91c92 reset called.\n", dev->name); + pr_debug("%s: smc91c92 reset called.\n", dev->name); /* The first interaction must be a write to bring the chip out of sleep mode. */ @@ -2149,18 +2037,6 @@ static u32 smc_get_link(struct net_device *dev) return ret; } -#ifdef PCMCIA_DEBUG -static u32 smc_get_msglevel(struct net_device *dev) -{ - return pc_debug; -} - -static void smc_set_msglevel(struct net_device *dev, u32 val) -{ - pc_debug = val; -} -#endif - static int smc_nway_reset(struct net_device *dev) { struct smc_private *smc = netdev_priv(dev); @@ -2184,10 +2060,6 @@ static const struct ethtool_ops ethtool_ops = { .get_settings = smc_get_settings, .set_settings = smc_set_settings, .get_link = smc_get_link, -#ifdef PCMCIA_DEBUG - .get_msglevel = smc_get_msglevel, - .set_msglevel = smc_set_msglevel, -#endif .nway_reset = smc_nway_reset, }; diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index cf842310253..fe504b7f369 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -211,20 +211,6 @@ enum xirc_cmd { /* Commands */ static const char *if_names[] = { "Auto", "10BaseT", "10Base2", "AUI", "100BaseT" }; -/**************** - * All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - * you do not define PCMCIA_DEBUG at all, all the debug code will be - * left out. If you compile with PCMCIA_DEBUG=0, the debug code will - * be present but disabled -- but it can then be enabled for specific - * modules at load time with a 'pc_debug=#' option to insmod. - */ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KDBG_XIRC args) -#else -#define DEBUG(n, args...) -#endif #define KDBG_XIRC KERN_DEBUG "xirc2ps_cs: " #define KERR_XIRC KERN_ERR "xirc2ps_cs: " @@ -359,7 +345,7 @@ static void xirc_tx_timeout(struct net_device *dev); static void xirc2ps_tx_timeout_task(struct work_struct *work); static void set_addresses(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static int set_card_type(struct pcmcia_device *link, const void *s); +static int set_card_type(struct pcmcia_device *link); static int do_config(struct net_device *dev, struct ifmap *map); static int do_open(struct net_device *dev); static int do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); @@ -371,28 +357,6 @@ static void do_powerdown(struct net_device *dev); static int do_stop(struct net_device *dev); /*=============== Helper functions =========================*/ -static int -first_tuple(struct pcmcia_device *handle, tuple_t *tuple, cisparse_t *parse) -{ - int err; - - if ((err = pcmcia_get_first_tuple(handle, tuple)) == 0 && - (err = pcmcia_get_tuple_data(handle, tuple)) == 0) - err = pcmcia_parse_tuple(tuple, parse); - return err; -} - -static int -next_tuple(struct pcmcia_device *handle, tuple_t *tuple, cisparse_t *parse) -{ - int err; - - if ((err = pcmcia_get_next_tuple(handle, tuple)) == 0 && - (err = pcmcia_get_tuple_data(handle, tuple)) == 0) - err = pcmcia_parse_tuple(tuple, parse); - return err; -} - #define SelectPage(pgnr) outb((pgnr), ioaddr + XIRCREG_PR) #define GetByte(reg) ((unsigned)inb(ioaddr + (reg))) #define GetWord(reg) ((unsigned)inw(ioaddr + (reg))) @@ -400,7 +364,7 @@ next_tuple(struct pcmcia_device *handle, tuple_t *tuple, cisparse_t *parse) #define PutWord(reg,value) outw((value), ioaddr+(reg)) /*====== Functions used for debugging =================================*/ -#if defined(PCMCIA_DEBUG) && 0 /* reading regs may change system status */ +#if 0 /* reading regs may change system status */ static void PrintRegisters(struct net_device *dev) { @@ -432,7 +396,7 @@ PrintRegisters(struct net_device *dev) } } } -#endif /* PCMCIA_DEBUG */ +#endif /* 0 */ /*============== MII Management functions ===============*/ @@ -576,7 +540,7 @@ xirc2ps_probe(struct pcmcia_device *link) struct net_device *dev; local_info_t *local; - DEBUG(0, "attach()\n"); + dev_dbg(&link->dev, "attach()\n"); /* Allocate the device structure */ dev = alloc_etherdev(sizeof(local_info_t)); @@ -592,7 +556,6 @@ xirc2ps_probe(struct pcmcia_device *link) link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; link->irq.Handler = xirc2ps_interrupt; - link->irq.Instance = dev; /* Fill in card specific entries */ dev->netdev_ops = &netdev_ops; @@ -615,7 +578,7 @@ xirc2ps_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "detach(0x%p)\n", link); + dev_dbg(&link->dev, "detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -644,17 +607,25 @@ xirc2ps_detach(struct pcmcia_device *link) * */ static int -set_card_type(struct pcmcia_device *link, const void *s) +set_card_type(struct pcmcia_device *link) { struct net_device *dev = link->priv; local_info_t *local = netdev_priv(dev); - #ifdef PCMCIA_DEBUG - unsigned cisrev = ((const unsigned char *)s)[2]; - #endif - unsigned mediaid= ((const unsigned char *)s)[3]; - unsigned prodid = ((const unsigned char *)s)[4]; + u8 *buf; + unsigned int cisrev, mediaid, prodid; + size_t len; + + len = pcmcia_get_tuple(link, CISTPL_MANFID, &buf); + if (len < 5) { + dev_err(&link->dev, "invalid CIS -- sorry\n"); + return 0; + } - DEBUG(0, "cisrev=%02x mediaid=%02x prodid=%02x\n", + cisrev = buf[2]; + mediaid = buf[3]; + prodid = buf[4]; + + dev_dbg(&link->dev, "cisrev=%02x mediaid=%02x prodid=%02x\n", cisrev, mediaid, prodid); local->mohawk = 0; @@ -761,6 +732,26 @@ xirc2ps_config_check(struct pcmcia_device *p_dev, } + +static int pcmcia_get_mac_ce(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv) +{ + struct net_device *dev = priv; + int i; + + if (tuple->TupleDataLen != 13) + return -EINVAL; + if ((tuple->TupleData[0] != 2) || (tuple->TupleData[1] != 1) || + (tuple->TupleData[2] != 6)) + return -EINVAL; + /* another try (James Lehmer's CE2 version 4.1)*/ + for (i = 2; i < 6; i++) + dev->dev_addr[i] = tuple->TupleData[i+2]; + return 0; +}; + + /**************** * xirc2ps_config() is scheduled to run after a CARD_INSERTION event * is received, to configure the PCMCIA socket, and to make the @@ -772,33 +763,21 @@ xirc2ps_config(struct pcmcia_device * link) struct net_device *dev = link->priv; local_info_t *local = netdev_priv(dev); unsigned int ioaddr; - tuple_t tuple; - cisparse_t parse; - int err, i; - u_char buf[64]; - cistpl_lan_node_id_t *node_id = (cistpl_lan_node_id_t*)parse.funce.data; + int err; + u8 *buf; + size_t len; local->dingo_ccr = NULL; - DEBUG(0, "config(0x%p)\n", link); - - /* - * This reads the card's CONFIG tuple to find its configuration - * registers. - */ - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = 64; - tuple.TupleOffset = 0; + dev_dbg(&link->dev, "config\n"); /* Is this a valid card */ - tuple.DesiredTuple = CISTPL_MANFID; - if ((err=first_tuple(link, &tuple, &parse))) { + if (link->has_manf_id == 0) { printk(KNOT_XIRC "manfid not found in CIS\n"); goto failure; } - switch(parse.manfid.manf) { + switch (link->manf_id) { case MANFID_XIRCOM: local->manf_str = "Xircom"; break; @@ -817,65 +796,44 @@ xirc2ps_config(struct pcmcia_device * link) break; default: printk(KNOT_XIRC "Unknown Card Manufacturer ID: 0x%04x\n", - (unsigned)parse.manfid.manf); + (unsigned)link->manf_id); goto failure; } - DEBUG(0, "found %s card\n", local->manf_str); + dev_dbg(&link->dev, "found %s card\n", local->manf_str); - if (!set_card_type(link, buf)) { + if (!set_card_type(link)) { printk(KNOT_XIRC "this card is not supported\n"); goto failure; } /* get the ethernet address from the CIS */ - tuple.DesiredTuple = CISTPL_FUNCE; - for (err = first_tuple(link, &tuple, &parse); !err; - err = next_tuple(link, &tuple, &parse)) { - /* Once I saw two CISTPL_FUNCE_LAN_NODE_ID entries: - * the first one with a length of zero the second correct - - * so I skip all entries with length 0 */ - if (parse.funce.type == CISTPL_FUNCE_LAN_NODE_ID - && ((cistpl_lan_node_id_t *)parse.funce.data)->nb) - break; - } - if (err) { /* not found: try to get the node-id from tuple 0x89 */ - tuple.DesiredTuple = 0x89; /* data layout looks like tuple 0x22 */ - if ((err = pcmcia_get_first_tuple(link, &tuple)) == 0 && - (err = pcmcia_get_tuple_data(link, &tuple)) == 0) { - if (tuple.TupleDataLen == 8 && *buf == CISTPL_FUNCE_LAN_NODE_ID) - memcpy(&parse, buf, 8); - else - err = -1; - } - } - if (err) { /* another try (James Lehmer's CE2 version 4.1)*/ - tuple.DesiredTuple = CISTPL_FUNCE; - for (err = first_tuple(link, &tuple, &parse); !err; - err = next_tuple(link, &tuple, &parse)) { - if (parse.funce.type == 0x02 && parse.funce.data[0] == 1 - && parse.funce.data[1] == 6 && tuple.TupleDataLen == 13) { - buf[1] = 4; - memcpy(&parse, buf+1, 8); - break; + err = pcmcia_get_mac_from_cis(link, dev); + + /* not found: try to get the node-id from tuple 0x89 */ + if (err) { + len = pcmcia_get_tuple(link, 0x89, &buf); + /* data layout looks like tuple 0x22 */ + if (buf && len == 8) { + if (*buf == CISTPL_FUNCE_LAN_NODE_ID) { + int i; + for (i = 2; i < 6; i++) + dev->dev_addr[i] = buf[i+2]; + } else + err = -1; } - } + kfree(buf); } + + if (err) + err = pcmcia_loop_tuple(link, CISTPL_FUNCE, pcmcia_get_mac_ce, dev); + if (err) { printk(KNOT_XIRC "node-id not found in CIS\n"); goto failure; } - node_id = (cistpl_lan_node_id_t *)parse.funce.data; - if (node_id->nb != 6) { - printk(KNOT_XIRC "malformed node-id in CIS\n"); - goto failure; - } - for (i=0; i < 6; i++) - dev->dev_addr[i] = node_id->id[i]; link->io.IOAddrLines =10; link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - link->irq.Attributes = IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; if (local->modem) { int pass; @@ -916,10 +874,8 @@ xirc2ps_config(struct pcmcia_device * link) goto port_found; } link->io.BasePort1 = 0; /* let CS decide */ - if ((err=pcmcia_request_io(link, &link->io))) { - cs_error(link, RequestIO, err); + if ((err=pcmcia_request_io(link, &link->io))) goto config_error; - } } port_found: if (err) @@ -929,19 +885,15 @@ xirc2ps_config(struct pcmcia_device * link) * Now allocate an interrupt line. Note that this does not * actually assign a handler to the interrupt. */ - if ((err=pcmcia_request_irq(link, &link->irq))) { - cs_error(link, RequestIRQ, err); + if ((err=pcmcia_request_irq(link, &link->irq))) goto config_error; - } /**************** * This actually configures the PCMCIA socket -- setting up * the I/O windows and the interrupt mapping. */ - if ((err=pcmcia_request_configuration(link, &link->conf))) { - cs_error(link, RequestConfiguration, err); + if ((err=pcmcia_request_configuration(link, &link->conf))) goto config_error; - } if (local->dingo) { conf_reg_t reg; @@ -956,17 +908,13 @@ xirc2ps_config(struct pcmcia_device * link) reg.Action = CS_WRITE; reg.Offset = CISREG_IOBASE_0; reg.Value = link->io.BasePort2 & 0xff; - if ((err = pcmcia_access_configuration_register(link, ®))) { - cs_error(link, AccessConfigurationRegister, err); + if ((err = pcmcia_access_configuration_register(link, ®))) goto config_error; - } reg.Action = CS_WRITE; reg.Offset = CISREG_IOBASE_1; reg.Value = (link->io.BasePort2 >> 8) & 0xff; - if ((err = pcmcia_access_configuration_register(link, ®))) { - cs_error(link, AccessConfigurationRegister, err); + if ((err = pcmcia_access_configuration_register(link, ®))) goto config_error; - } /* There is no config entry for the Ethernet part which * is at 0x0800. So we allocate a window into the attribute @@ -975,17 +923,14 @@ xirc2ps_config(struct pcmcia_device * link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = req.Size = 0; req.AccessSpeed = 0; - if ((err = pcmcia_request_window(&link, &req, &link->win))) { - cs_error(link, RequestWindow, err); + if ((err = pcmcia_request_window(link, &req, &link->win))) goto config_error; - } + local->dingo_ccr = ioremap(req.Base,0x1000) + 0x0800; mem.CardOffset = 0x0; mem.Page = 0; - if ((err = pcmcia_map_mem_page(link->win, &mem))) { - cs_error(link, MapMemPage, err); + if ((err = pcmcia_map_mem_page(link, link->win, &mem))) goto config_error; - } /* Setup the CCRs; there are no infos in the CIS about the Ethernet * part. @@ -1044,7 +989,7 @@ xirc2ps_config(struct pcmcia_device * link) do_reset(dev, 1); /* a kludge to make the cem56 work */ link->dev_node = &local->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if ((err=register_netdev(dev))) { printk(KNOT_XIRC "register_netdev() failed\n"); @@ -1077,7 +1022,7 @@ xirc2ps_config(struct pcmcia_device * link) static void xirc2ps_release(struct pcmcia_device *link) { - DEBUG(0, "release(0x%p)\n", link); + dev_dbg(&link->dev, "release\n"); if (link->win) { struct net_device *dev = link->priv; @@ -1144,7 +1089,7 @@ xirc2ps_interrupt(int irq, void *dev_id) PutByte(XIRCREG_CR, 0); } - DEBUG(6, "%s: interrupt %d at %#x.\n", dev->name, irq, ioaddr); + pr_debug("%s: interrupt %d at %#x.\n", dev->name, irq, ioaddr); saved_page = GetByte(XIRCREG_PR); /* Read the ISR to see whats the cause for the interrupt. @@ -1154,7 +1099,7 @@ xirc2ps_interrupt(int irq, void *dev_id) bytes_rcvd = 0; loop_entry: if (int_status == 0xff) { /* card may be ejected */ - DEBUG(3, "%s: interrupt %d for dead card\n", dev->name, irq); + pr_debug("%s: interrupt %d for dead card\n", dev->name, irq); goto leave; } eth_status = GetByte(XIRCREG_ESR); @@ -1167,7 +1112,7 @@ xirc2ps_interrupt(int irq, void *dev_id) PutByte(XIRCREG40_TXST0, 0); PutByte(XIRCREG40_TXST1, 0); - DEBUG(3, "%s: ISR=%#2.2x ESR=%#2.2x RSR=%#2.2x TSR=%#4.4x\n", + pr_debug("%s: ISR=%#2.2x ESR=%#2.2x RSR=%#2.2x TSR=%#4.4x\n", dev->name, int_status, eth_status, rx_status, tx_status); /***** receive section ******/ @@ -1178,14 +1123,14 @@ xirc2ps_interrupt(int irq, void *dev_id) /* too many bytes received during this int, drop the rest of the * packets */ dev->stats.rx_dropped++; - DEBUG(2, "%s: RX drop, too much done\n", dev->name); + pr_debug("%s: RX drop, too much done\n", dev->name); } else if (rsr & PktRxOk) { struct sk_buff *skb; pktlen = GetWord(XIRCREG0_RBC); bytes_rcvd += pktlen; - DEBUG(5, "rsr=%#02x packet_length=%u\n", rsr, pktlen); + pr_debug("rsr=%#02x packet_length=%u\n", rsr, pktlen); skb = dev_alloc_skb(pktlen+3); /* 1 extra so we can use insw */ if (!skb) { @@ -1253,19 +1198,19 @@ xirc2ps_interrupt(int irq, void *dev_id) dev->stats.multicast++; } } else { /* bad packet */ - DEBUG(5, "rsr=%#02x\n", rsr); + pr_debug("rsr=%#02x\n", rsr); } if (rsr & PktTooLong) { dev->stats.rx_frame_errors++; - DEBUG(3, "%s: Packet too long\n", dev->name); + pr_debug("%s: Packet too long\n", dev->name); } if (rsr & CRCErr) { dev->stats.rx_crc_errors++; - DEBUG(3, "%s: CRC error\n", dev->name); + pr_debug("%s: CRC error\n", dev->name); } if (rsr & AlignErr) { dev->stats.rx_fifo_errors++; /* okay ? */ - DEBUG(3, "%s: Alignment error\n", dev->name); + pr_debug("%s: Alignment error\n", dev->name); } /* clear the received/dropped/error packet */ @@ -1277,7 +1222,7 @@ xirc2ps_interrupt(int irq, void *dev_id) if (rx_status & 0x10) { /* Receive overrun */ dev->stats.rx_over_errors++; PutByte(XIRCREG_CR, ClearRxOvrun); - DEBUG(3, "receive overrun cleared\n"); + pr_debug("receive overrun cleared\n"); } /***** transmit section ******/ @@ -1290,13 +1235,13 @@ xirc2ps_interrupt(int irq, void *dev_id) if (nn < n) /* rollover */ dev->stats.tx_packets += 256 - n; else if (n == nn) { /* happens sometimes - don't know why */ - DEBUG(0, "PTR not changed?\n"); + pr_debug("PTR not changed?\n"); } else dev->stats.tx_packets += lp->last_ptr_value - n; netif_wake_queue(dev); } if (tx_status & 0x0002) { /* Execessive collissions */ - DEBUG(0, "tx restarted due to execssive collissions\n"); + pr_debug("tx restarted due to execssive collissions\n"); PutByte(XIRCREG_CR, RestartTx); /* restart transmitter process */ } if (tx_status & 0x0040) @@ -1315,14 +1260,14 @@ xirc2ps_interrupt(int irq, void *dev_id) maxrx_bytes = 2000; else if (maxrx_bytes > 22000) maxrx_bytes = 22000; - DEBUG(1, "set maxrx=%u (rcvd=%u ticks=%lu)\n", + pr_debug("set maxrx=%u (rcvd=%u ticks=%lu)\n", maxrx_bytes, bytes_rcvd, duration); } else if (!duration && maxrx_bytes < 22000) { /* now much faster */ maxrx_bytes += 2000; if (maxrx_bytes > 22000) maxrx_bytes = 22000; - DEBUG(1, "set maxrx=%u\n", maxrx_bytes); + pr_debug("set maxrx=%u\n", maxrx_bytes); } } @@ -1372,7 +1317,7 @@ do_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned freespace; unsigned pktlen = skb->len; - DEBUG(1, "do_start_xmit(skb=%p, dev=%p) len=%u\n", + pr_debug("do_start_xmit(skb=%p, dev=%p) len=%u\n", skb, dev, pktlen); @@ -1398,7 +1343,7 @@ do_start_xmit(struct sk_buff *skb, struct net_device *dev) freespace &= 0x7fff; /* TRS doesn't work - (indeed it is eliminated with sil-rev 1) */ okay = pktlen +2 < freespace; - DEBUG(2 + (okay ? 2 : 0), "%s: avail. tx space=%u%s\n", + pr_debug("%s: avail. tx space=%u%s\n", dev->name, freespace, okay ? " (okay)":" (not enough)"); if (!okay) { /* not enough space */ return NETDEV_TX_BUSY; /* upper layer may decide to requeue this packet */ @@ -1500,7 +1445,7 @@ do_config(struct net_device *dev, struct ifmap *map) { local_info_t *local = netdev_priv(dev); - DEBUG(0, "do_config(%p)\n", dev); + pr_debug("do_config(%p)\n", dev); if (map->port != 255 && map->port != dev->if_port) { if (map->port > 4) return -EINVAL; @@ -1527,7 +1472,7 @@ do_open(struct net_device *dev) local_info_t *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - DEBUG(0, "do_open(%p)\n", dev); + dev_dbg(&link->dev, "do_open(%p)\n", dev); /* Check that the PCMCIA card is still here. */ /* Physical device present signature. */ @@ -1561,7 +1506,7 @@ do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) unsigned int ioaddr = dev->base_addr; struct mii_ioctl_data *data = if_mii(rq); - DEBUG(1, "%s: ioctl(%-.6s, %#04x) %04x %04x %04x %04x\n", + pr_debug("%s: ioctl(%-.6s, %#04x) %04x %04x %04x %04x\n", dev->name, rq->ifr_ifrn.ifrn_name, cmd, data->phy_id, data->reg_num, data->val_in, data->val_out); @@ -1610,7 +1555,7 @@ do_reset(struct net_device *dev, int full) unsigned int ioaddr = dev->base_addr; unsigned value; - DEBUG(0, "%s: do_reset(%p,%d)\n", dev? dev->name:"eth?", dev, full); + pr_debug("%s: do_reset(%p,%d)\n", dev? dev->name:"eth?", dev, full); hardreset(dev); PutByte(XIRCREG_CR, SoftReset); /* set */ @@ -1648,8 +1593,8 @@ do_reset(struct net_device *dev, int full) } msleep(40); /* wait 40 msec to let it complete */ - #ifdef PCMCIA_DEBUG - if (pc_debug) { + #if 0 + { SelectPage(0); value = GetByte(XIRCREG_ESR); /* read the ESR */ printk(KERN_DEBUG "%s: ESR is: %#02x\n", dev->name, value); @@ -1666,7 +1611,7 @@ do_reset(struct net_device *dev, int full) value |= DisableLinkPulse; PutByte(XIRCREG1_ECR, value); #endif - DEBUG(0, "%s: ECR is: %#02x\n", dev->name, value); + pr_debug("%s: ECR is: %#02x\n", dev->name, value); SelectPage(0x42); PutByte(XIRCREG42_SWC0, 0x20); /* disable source insertion */ @@ -1844,7 +1789,7 @@ do_powerdown(struct net_device *dev) unsigned int ioaddr = dev->base_addr; - DEBUG(0, "do_powerdown(%p)\n", dev); + pr_debug("do_powerdown(%p)\n", dev); SelectPage(4); PutByte(XIRCREG4_GPR1, 0); /* clear bit 0: power down */ @@ -1858,7 +1803,7 @@ do_stop(struct net_device *dev) local_info_t *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - DEBUG(0, "do_stop(%p)\n", dev); + dev_dbg(&link->dev, "do_stop(%p)\n", dev); if (!link) return -ENODEV; diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index d0593ed9170..f6036fb4231 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -43,21 +43,6 @@ #include "airo.h" -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -static char *version = "$Revision: 1.2 $"; -#define DEBUG(n, args...) if (pc_debug > (n)) printk(KERN_DEBUG args); -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -145,11 +130,10 @@ static int airo_probe(struct pcmcia_device *p_dev) { local_info_t *local; - DEBUG(0, "airo_attach()\n"); + dev_dbg(&p_dev->dev, "airo_attach()\n"); /* Interrupt setup */ p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; p_dev->irq.Handler = NULL; /* @@ -184,7 +168,7 @@ static int airo_probe(struct pcmcia_device *p_dev) static void airo_detach(struct pcmcia_device *link) { - DEBUG(0, "airo_detach(0x%p)\n", link); + dev_dbg(&link->dev, "airo_detach\n"); airo_release(link); @@ -204,9 +188,6 @@ static void airo_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int airo_cs_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -275,11 +256,11 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev, req->Base = mem->win[0].host_addr; req->Size = mem->win[0].len; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win) != 0) + if (pcmcia_request_window(p_dev, req, &p_dev->win) != 0) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map) != 0) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) return -ENODEV; } /* If we got this far, we're cool! */ @@ -291,11 +272,11 @@ static int airo_config(struct pcmcia_device *link) { local_info_t *dev; win_req_t *req; - int last_fn, last_ret; + int ret; dev = link->priv; - DEBUG(0, "airo_config(0x%p)\n", link); + dev_dbg(&link->dev, "airo_config\n"); req = kzalloc(sizeof(win_req_t), GFP_KERNEL); if (!req) @@ -315,8 +296,8 @@ static int airo_config(struct pcmcia_device *link) * and most client drivers will only use the CIS to fill in * implementation-defined details. */ - last_ret = pcmcia_loop_config(link, airo_cs_config_check, req); - if (last_ret) + ret = pcmcia_loop_config(link, airo_cs_config_check, req); + if (ret) goto failed; /* @@ -324,21 +305,25 @@ static int airo_config(struct pcmcia_device *link) handler to the interrupt, unless the 'Handler' member of the irq structure is initialized. */ - if (link->conf.Attributes & CONF_ENABLE_IRQ) - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + if (link->conf.Attributes & CONF_ENABLE_IRQ) { + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + } /* This actually configures the PCMCIA socket -- setting up the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; ((local_info_t *)link->priv)->eth_dev = init_airo_card(link->irq.AssignedIRQ, - link->io.BasePort1, 1, &handle_to_dev(link)); + link->io.BasePort1, 1, &link->dev); if (!((local_info_t *)link->priv)->eth_dev) - goto cs_failed; + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -368,8 +353,6 @@ static int airo_config(struct pcmcia_device *link) kfree(req); return 0; - cs_failed: - cs_error(link, last_fn, last_ret); failed: airo_release(link); kfree(req); @@ -386,7 +369,7 @@ static int airo_config(struct pcmcia_device *link) static void airo_release(struct pcmcia_device *link) { - DEBUG(0, "airo_release(0x%p)\n", link); + dev_dbg(&link->dev, "airo_release\n"); pcmcia_disable_device(link); } diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c index ddaa859c349..32407911842 100644 --- a/drivers/net/wireless/atmel_cs.c +++ b/drivers/net/wireless/atmel_cs.c @@ -55,22 +55,6 @@ #include "atmel.h" -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -static char *version = "$Revision: 1.2 $"; -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -155,11 +139,10 @@ static int atmel_probe(struct pcmcia_device *p_dev) { local_info_t *local; - DEBUG(0, "atmel_attach()\n"); + dev_dbg(&p_dev->dev, "atmel_attach()\n"); /* Interrupt setup */ p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; p_dev->irq.Handler = NULL; /* @@ -194,7 +177,7 @@ static int atmel_probe(struct pcmcia_device *p_dev) static void atmel_detach(struct pcmcia_device *link) { - DEBUG(0, "atmel_detach(0x%p)\n", link); + dev_dbg(&link->dev, "atmel_detach\n"); atmel_release(link); @@ -209,9 +192,6 @@ static void atmel_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - /* Call-back function to interrogate PCMCIA-specific information about the current existance of the card */ static int card_present(void *arg) @@ -275,13 +255,13 @@ static int atmel_config_check(struct pcmcia_device *p_dev, static int atmel_config(struct pcmcia_device *link) { local_info_t *dev; - int last_fn, last_ret; + int ret; struct pcmcia_device_id *did; dev = link->priv; - did = dev_get_drvdata(&handle_to_dev(link)); + did = dev_get_drvdata(&link->dev); - DEBUG(0, "atmel_config(0x%p)\n", link); + dev_dbg(&link->dev, "atmel_config\n"); /* In this loop, we scan the CIS for configuration table entries, @@ -303,31 +283,36 @@ static int atmel_config(struct pcmcia_device *link) handler to the interrupt, unless the 'Handler' member of the irq structure is initialized. */ - if (link->conf.Attributes & CONF_ENABLE_IRQ) - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + if (link->conf.Attributes & CONF_ENABLE_IRQ) { + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + } /* This actually configures the PCMCIA socket -- setting up the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; if (link->irq.AssignedIRQ == 0) { printk(KERN_ALERT "atmel: cannot assign IRQ: check that CONFIG_ISA is set in kernel config."); - goto cs_failed; + goto failed; } ((local_info_t*)link->priv)->eth_dev = init_atmel_card(link->irq.AssignedIRQ, link->io.BasePort1, did ? did->driver_info : ATMEL_FW_TYPE_NONE, - &handle_to_dev(link), + &link->dev, card_present, link); if (!((local_info_t*)link->priv)->eth_dev) - goto cs_failed; + goto failed; /* @@ -340,8 +325,6 @@ static int atmel_config(struct pcmcia_device *link) return 0; - cs_failed: - cs_error(link, last_fn, last_ret); failed: atmel_release(link); return -ENODEV; @@ -359,7 +342,7 @@ static void atmel_release(struct pcmcia_device *link) { struct net_device *dev = ((local_info_t*)link->priv)->eth_dev; - DEBUG(0, "atmel_release(0x%p)\n", link); + dev_dbg(&link->dev, "atmel_release\n"); if (dev) stop_atmel_card(dev); diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c index 6c3a74964ab..984174bc7b0 100644 --- a/drivers/net/wireless/b43/pcmcia.c +++ b/drivers/net/wireless/b43/pcmcia.c @@ -65,35 +65,15 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev) struct ssb_bus *ssb; win_req_t win; memreq_t mem; - tuple_t tuple; - cisparse_t parse; int err = -ENOMEM; int res = 0; - unsigned char buf[64]; ssb = kzalloc(sizeof(*ssb), GFP_KERNEL); if (!ssb) goto out_error; err = -ENODEV; - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - res = pcmcia_get_first_tuple(dev, &tuple); - if (res != 0) - goto err_kfree_ssb; - res = pcmcia_get_tuple_data(dev, &tuple); - if (res != 0) - goto err_kfree_ssb; - res = pcmcia_parse_tuple(&tuple, &parse); - if (res != 0) - goto err_kfree_ssb; - - dev->conf.ConfigBase = parse.config.base; - dev->conf.Present = parse.config.rmask[0]; dev->conf.Attributes = CONF_ENABLE_IRQ; dev->conf.IntType = INT_MEMORY_AND_IO; @@ -107,20 +87,18 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev) win.Base = 0; win.Size = SSB_CORE_SIZE; win.AccessSpeed = 250; - res = pcmcia_request_window(&dev, &win, &dev->win); + res = pcmcia_request_window(dev, &win, &dev->win); if (res != 0) goto err_kfree_ssb; mem.CardOffset = 0; mem.Page = 0; - res = pcmcia_map_mem_page(dev->win, &mem); + res = pcmcia_map_mem_page(dev, dev->win, &mem); if (res != 0) goto err_disable; dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - dev->irq.IRQInfo1 = IRQ_LEVEL_ID; dev->irq.Handler = NULL; /* The handler is registered later. */ - dev->irq.Instance = NULL; res = pcmcia_request_irq(dev, &dev->irq); if (res != 0) goto err_disable; diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index ad8eab4a639..c9640a3e02c 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c @@ -274,9 +274,6 @@ static int sandisk_enable_wireless(struct net_device *dev) conf_reg_t reg; struct hostap_interface *iface = netdev_priv(dev); local_info_t *local = iface->local; - tuple_t tuple; - cisparse_t *parse = NULL; - u_char buf[64]; struct hostap_cs_priv *hw_priv = local->hw_priv; if (hw_priv->link->io.NumPorts1 < 0x42) { @@ -285,28 +282,13 @@ static int sandisk_enable_wireless(struct net_device *dev) goto done; } - parse = kmalloc(sizeof(cisparse_t), GFP_KERNEL); - if (parse == NULL) { - ret = -ENOMEM; - goto done; - } - - tuple.Attributes = TUPLE_RETURN_COMMON; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - if (hw_priv->link->manf_id != 0xd601 || hw_priv->link->card_id != 0x0101) { /* No SanDisk manfid found */ ret = -ENODEV; goto done; } - tuple.DesiredTuple = CISTPL_LONGLINK_MFC; - if (pcmcia_get_first_tuple(hw_priv->link, &tuple) || - pcmcia_get_tuple_data(hw_priv->link, &tuple) || - pcmcia_parse_tuple(&tuple, parse) || - parse->longlink_mfc.nfn < 2) { + if (hw_priv->link->socket->functions < 2) { /* No multi-function links found */ ret = -ENODEV; goto done; @@ -354,7 +336,6 @@ static int sandisk_enable_wireless(struct net_device *dev) udelay(10); done: - kfree(parse); return ret; } @@ -529,10 +510,6 @@ static void prism2_detach(struct pcmcia_device *link) } -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - - /* run after a CARD_INSERTION event is received to configure the PCMCIA * socket and make the device available to the system */ @@ -624,7 +601,6 @@ static int prism2_config(struct pcmcia_device *link) struct hostap_interface *iface; local_info_t *local; int ret = 1; - int last_fn, last_ret; struct hostap_cs_priv *hw_priv; PDEBUG(DEBUG_FLOW, "prism2_config()\n"); @@ -636,19 +612,18 @@ static int prism2_config(struct pcmcia_device *link) } /* Look for an appropriate configuration table entry in the CIS */ - last_ret = pcmcia_loop_config(link, prism2_config_check, NULL); - if (last_ret) { + ret = pcmcia_loop_config(link, prism2_config_check, NULL); + if (ret) { if (!ignore_cis_vcc) printk(KERN_ERR "GetNextTuple(): No matching " "CIS configuration. Maybe you need the " "ignore_cis_vcc=1 parameter.\n"); - cs_error(link, RequestIO, last_ret); goto failed; } /* Need to allocate net_device before requesting IRQ handler */ dev = prism2_init_local_data(&prism2_pccard_funcs, 0, - &handle_to_dev(link)); + &link->dev); if (dev == NULL) goto failed; link->priv = dev; @@ -666,13 +641,11 @@ static int prism2_config(struct pcmcia_device *link) * irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | - IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = prism2_interrupt; - link->irq.Instance = dev; - CS_CHECK(RequestIRQ, - pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -680,8 +653,9 @@ static int prism2_config(struct pcmcia_device *link) * the I/O windows and the interrupt mapping, and putting the * card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -714,9 +688,6 @@ static int prism2_config(struct pcmcia_device *link) } return ret; - cs_failed: - cs_error(link, last_fn, last_ret); - failed: kfree(hw_priv); prism2_release((u_long)link); diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c index 62381768f2d..b1d84592b95 100644 --- a/drivers/net/wireless/libertas/if_cs.c +++ b/drivers/net/wireless/libertas/if_cs.c @@ -590,7 +590,7 @@ static int if_cs_prog_helper(struct if_cs_card *card) /* TODO: make firmware file configurable */ ret = request_firmware(&fw, "libertas_cs_helper.fw", - &handle_to_dev(card->p_dev)); + &card->p_dev->dev); if (ret) { lbs_pr_err("can't load helper firmware\n"); ret = -ENODEV; @@ -663,7 +663,7 @@ static int if_cs_prog_real(struct if_cs_card *card) /* TODO: make firmware file configurable */ ret = request_firmware(&fw, "libertas_cs.fw", - &handle_to_dev(card->p_dev)); + &card->p_dev->dev); if (ret) { lbs_pr_err("can't load firmware\n"); ret = -ENODEV; @@ -793,18 +793,37 @@ static void if_cs_release(struct pcmcia_device *p_dev) * configure the card at this point -- we wait until we receive a card * insertion event. */ + +static int if_cs_ioprobe(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + p_dev->io.BasePort1 = cfg->io.win[0].base; + p_dev->io.NumPorts1 = cfg->io.win[0].len; + + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + if (cfg->io.nwin != 1) { + lbs_pr_err("wrong CIS (check number of IO windows)\n"); + return -ENODEV; + } + + /* This reserves IO space but doesn't actually enable it */ + return pcmcia_request_io(p_dev, &p_dev->io); +} + static int if_cs_probe(struct pcmcia_device *p_dev) { int ret = -ENOMEM; unsigned int prod_id; struct lbs_private *priv; struct if_cs_card *card; - /* CIS parsing */ - tuple_t tuple; - cisparse_t parse; - cistpl_cftable_entry_t *cfg = &parse.cftable_entry; - cistpl_io_t *io = &cfg->io; - u_char buf[64]; lbs_deb_enter(LBS_DEB_CS); @@ -818,48 +837,15 @@ static int if_cs_probe(struct pcmcia_device *p_dev) p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = NULL; - p_dev->irq.IRQInfo1 = IRQ_INFO2_VALID | IRQ_LEVEL_ID; p_dev->conf.Attributes = 0; p_dev->conf.IntType = INT_MEMORY_AND_IO; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - if ((ret = pcmcia_get_first_tuple(p_dev, &tuple)) != 0 || - (ret = pcmcia_get_tuple_data(p_dev, &tuple)) != 0 || - (ret = pcmcia_parse_tuple(&tuple, &parse)) != 0) - { - lbs_pr_err("error in pcmcia_get_first_tuple etc\n"); - goto out1; - } - - p_dev->conf.ConfigIndex = cfg->index; - - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1) { - p_dev->conf.Attributes |= CONF_ENABLE_IRQ; - } - - /* IO window settings */ - if (cfg->io.nwin != 1) { - lbs_pr_err("wrong CIS (check number of IO windows)\n"); - ret = -ENODEV; + if (pcmcia_loop_config(p_dev, if_cs_ioprobe, NULL)) { + lbs_pr_err("error in pcmcia_loop_config\n"); goto out1; } - p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - p_dev->io.BasePort1 = io->win[0].base; - p_dev->io.NumPorts1 = io->win[0].len; - /* This reserves IO space but doesn't actually enable it */ - ret = pcmcia_request_io(p_dev, &p_dev->io); - if (ret) { - lbs_pr_err("error in pcmcia_request_io\n"); - goto out1; - } /* * Allocate an interrupt line. Note that this does not assign diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index 9498b46c99a..e61e6b9440a 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -145,23 +145,6 @@ static const unsigned int txConfEUD = 0x10; /* Enable Uni-Data packets */ static const unsigned int txConfKey = 0x02; /* Scramble data packets */ static const unsigned int txConfLoop = 0x01; /* Loopback mode */ -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"netwave_cs.c 0.3.0 Thu Jul 17 14:36:02 1997 (John Markus Bjørndalen)\n"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -383,7 +366,7 @@ static int netwave_probe(struct pcmcia_device *link) struct net_device *dev; netwave_private *priv; - DEBUG(0, "netwave_attach()\n"); + dev_dbg(&link->dev, "netwave_attach()\n"); /* Initialize the struct pcmcia_device structure */ dev = alloc_etherdev(sizeof(netwave_private)); @@ -401,8 +384,7 @@ static int netwave_probe(struct pcmcia_device *link) link->io.IOAddrLines = 5; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &netwave_interrupt; /* General socket configuration */ @@ -421,8 +403,6 @@ static int netwave_probe(struct pcmcia_device *link) dev->watchdog_timeo = TX_TIMEOUT; - link->irq.Instance = dev; - return netwave_pcmcia_config( link); } /* netwave_attach */ @@ -438,7 +418,7 @@ static void netwave_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "netwave_detach(0x%p)\n", link); + dev_dbg(&link->dev, "netwave_detach\n"); netwave_release(link); @@ -725,18 +705,15 @@ static const struct iw_handler_def netwave_handler_def = * */ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int netwave_pcmcia_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; netwave_private *priv = netdev_priv(dev); - int i, j, last_ret, last_fn; + int i, j, ret; win_req_t req; memreq_t mem; u_char __iomem *ramBase = NULL; - DEBUG(0, "netwave_pcmcia_config(0x%p)\n", link); + dev_dbg(&link->dev, "netwave_pcmcia_config\n"); /* * Try allocating IO ports. This tries a few fixed addresses. @@ -749,22 +726,24 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { if (i == 0) break; } - if (i != 0) { - cs_error(link, RequestIO, i); + if (i != 0) goto failed; - } /* * Now allocate an interrupt line. Note that this does not * actually assign a handler to the interrupt. */ - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; /* * This actually configures the PCMCIA socket -- setting up * the I/O windows and the interrupt mapping. */ - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* * Allocate a 32K memory window. Note that the struct pcmcia_device @@ -772,14 +751,18 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { * device needs several windows, you'll need to keep track of * the handles in your private data structure, dev->priv. */ - DEBUG(1, "Setting mem speed of %d\n", mem_speed); + dev_dbg(&link->dev, "Setting mem speed of %d\n", mem_speed); req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_CM|WIN_ENABLE; req.Base = 0; req.Size = 0x8000; req.AccessSpeed = mem_speed; - CS_CHECK(RequestWindow, pcmcia_request_window(&link, &req, &link->win)); + ret = pcmcia_request_window(link, &req, &link->win); + if (ret) + goto failed; mem.CardOffset = 0x20000; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(link->win, &mem)); + ret = pcmcia_map_mem_page(link, link->win, &mem); + if (ret) + goto failed; /* Store base address of the common window frame */ ramBase = ioremap(req.Base, 0x8000); @@ -787,7 +770,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_DEBUG "netwave_cs: register_netdev() failed\n"); @@ -818,8 +801,6 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { get_uint16(ramBase + NETWAVE_EREG_ARW+2)); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: netwave_release(link); return -ENODEV; @@ -837,7 +818,7 @@ static void netwave_release(struct pcmcia_device *link) struct net_device *dev = link->priv; netwave_private *priv = netdev_priv(dev); - DEBUG(0, "netwave_release(0x%p)\n", link); + dev_dbg(&link->dev, "netwave_release\n"); pcmcia_disable_device(link); if (link->win) @@ -892,7 +873,7 @@ static void netwave_reset(struct net_device *dev) { u_char __iomem *ramBase = priv->ramBase; unsigned int iobase = dev->base_addr; - DEBUG(0, "netwave_reset: Done with hardware reset\n"); + pr_debug("netwave_reset: Done with hardware reset\n"); priv->timeoutCounter = 0; @@ -988,7 +969,7 @@ static int netwave_hw_xmit(unsigned char* data, int len, dev->stats.tx_bytes += len; - DEBUG(3, "Transmitting with SPCQ %x SPU %x LIF %x ISPLQ %x\n", + pr_debug("Transmitting with SPCQ %x SPU %x LIF %x ISPLQ %x\n", readb(ramBase + NETWAVE_EREG_SPCQ), readb(ramBase + NETWAVE_EREG_SPU), readb(ramBase + NETWAVE_EREG_LIF), @@ -1000,7 +981,7 @@ static int netwave_hw_xmit(unsigned char* data, int len, MaxData = get_uint16(ramBase + NETWAVE_EREG_TDP+2); DataOffset = get_uint16(ramBase + NETWAVE_EREG_TDP+4); - DEBUG(3, "TxFreeList %x, MaxData %x, DataOffset %x\n", + pr_debug("TxFreeList %x, MaxData %x, DataOffset %x\n", TxFreeList, MaxData, DataOffset); /* Copy packet to the adapter fragment buffers */ @@ -1088,7 +1069,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) status = inb(iobase + NETWAVE_REG_ASR); if (!pcmcia_dev_present(link)) { - DEBUG(1, "netwave_interrupt: Interrupt with status 0x%x " + pr_debug("netwave_interrupt: Interrupt with status 0x%x " "from removed or suspended card!\n", status); break; } @@ -1132,7 +1113,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) int txStatus; txStatus = readb(ramBase + NETWAVE_EREG_TSER); - DEBUG(3, "Transmit done. TSER = %x id %x\n", + pr_debug("Transmit done. TSER = %x id %x\n", txStatus, readb(ramBase + NETWAVE_EREG_TSER + 1)); if (txStatus & 0x20) { @@ -1156,7 +1137,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) * TxGU and TxNOAP is set. (Those are the only ones * to set TxErr). */ - DEBUG(3, "netwave_interrupt: TxDN with error status %x\n", + pr_debug("netwave_interrupt: TxDN with error status %x\n", txStatus); /* Clear out TxGU, TxNOAP, TxErr and TxTrys */ @@ -1164,7 +1145,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) writeb(0xdf & txStatus, ramBase+NETWAVE_EREG_TSER+4); ++dev->stats.tx_errors; } - DEBUG(3, "New status is TSER %x ASR %x\n", + pr_debug("New status is TSER %x ASR %x\n", readb(ramBase + NETWAVE_EREG_TSER), inb(iobase + NETWAVE_REG_ASR)); @@ -1172,7 +1153,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) } /* TxBA, this would trigger on all error packets received */ /* if (status & 0x01) { - DEBUG(4, "Transmit buffers available, %x\n", status); + pr_debug("Transmit buffers available, %x\n", status); } */ } @@ -1190,7 +1171,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) */ static void netwave_watchdog(struct net_device *dev) { - DEBUG(1, "%s: netwave_watchdog: watchdog timer expired\n", dev->name); + pr_debug("%s: netwave_watchdog: watchdog timer expired\n", dev->name); netwave_reset(dev); dev->trans_start = jiffies; netif_wake_queue(dev); @@ -1211,7 +1192,7 @@ static int netwave_rx(struct net_device *dev) int i; u_char *ptr; - DEBUG(3, "xinw_rx: Receiving ... \n"); + pr_debug("xinw_rx: Receiving ... \n"); /* Receive max 10 packets for now. */ for (i = 0; i < 10; i++) { @@ -1237,7 +1218,7 @@ static int netwave_rx(struct net_device *dev) skb = dev_alloc_skb(rcvLen+5); if (skb == NULL) { - DEBUG(1, "netwave_rx: Could not allocate an sk_buff of " + pr_debug("netwave_rx: Could not allocate an sk_buff of " "length %d\n", rcvLen); ++dev->stats.rx_dropped; /* Tell the adapter to skip the packet */ @@ -1279,7 +1260,7 @@ static int netwave_rx(struct net_device *dev) wait_WOC(iobase); writeb(NETWAVE_CMD_SRP, ramBase + NETWAVE_EREG_CB + 0); writeb(NETWAVE_CMD_EOC, ramBase + NETWAVE_EREG_CB + 1); - DEBUG(3, "Packet reception ok\n"); + pr_debug("Packet reception ok\n"); } return 0; } @@ -1288,7 +1269,7 @@ static int netwave_open(struct net_device *dev) { netwave_private *priv = netdev_priv(dev); struct pcmcia_device *link = priv->p_dev; - DEBUG(1, "netwave_open: starting.\n"); + dev_dbg(&link->dev, "netwave_open: starting.\n"); if (!pcmcia_dev_present(link)) return -ENODEV; @@ -1305,7 +1286,7 @@ static int netwave_close(struct net_device *dev) { netwave_private *priv = netdev_priv(dev); struct pcmcia_device *link = priv->p_dev; - DEBUG(1, "netwave_close: finishing.\n"); + dev_dbg(&link->dev, "netwave_close: finishing.\n"); link->open--; netif_stop_queue(dev); @@ -1358,11 +1339,11 @@ static void set_multicast_list(struct net_device *dev) u_char rcvMode = 0; #ifdef PCMCIA_DEBUG - if (pc_debug > 2) { - static int old; + { + xstatic int old; if (old != dev->mc_count) { old = dev->mc_count; - DEBUG(0, "%s: setting Rx mode to %d addresses.\n", + pr_debug("%s: setting Rx mode to %d addresses.\n", dev->name, dev->mc_count); } } diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c index 38c1c9d2abb..f27bb8367c9 100644 --- a/drivers/net/wireless/orinoco/orinoco_cs.c +++ b/drivers/net/wireless/orinoco/orinoco_cs.c @@ -109,7 +109,7 @@ orinoco_cs_probe(struct pcmcia_device *link) struct orinoco_private *priv; struct orinoco_pccard *card; - priv = alloc_orinocodev(sizeof(*card), &handle_to_dev(link), + priv = alloc_orinocodev(sizeof(*card), &link->dev, orinoco_cs_hard_reset, NULL); if (!priv) return -ENOMEM; @@ -120,10 +120,8 @@ orinoco_cs_probe(struct pcmcia_device *link) link->priv = priv; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = orinoco_interrupt; - link->irq.Instance = priv; /* General socket configuration defaults can go here. In this * client, we assume very little, and rely on the CIS for @@ -160,12 +158,6 @@ static void orinoco_cs_detach(struct pcmcia_device *link) * device available to the system. */ -#define CS_CHECK(fn, ret) do { \ - last_fn = (fn); \ - if ((last_ret = (ret)) != 0) \ - goto cs_failed; \ -} while (0) - static int orinoco_cs_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -240,7 +232,7 @@ orinoco_cs_config(struct pcmcia_device *link) struct orinoco_private *priv = link->priv; struct orinoco_pccard *card = priv->card; hermes_t *hw = &priv->hw; - int last_fn, last_ret; + int ret; void __iomem *mem; /* @@ -257,13 +249,12 @@ orinoco_cs_config(struct pcmcia_device *link) * and most client drivers will only use the CIS to fill in * implementation-defined details. */ - last_ret = pcmcia_loop_config(link, orinoco_cs_config_check, NULL); - if (last_ret) { + ret = pcmcia_loop_config(link, orinoco_cs_config_check, NULL); + if (ret) { if (!ignore_cis_vcc) printk(KERN_ERR PFX "GetNextTuple(): No matching " "CIS configuration. Maybe you need the " "ignore_cis_vcc=1 parameter.\n"); - cs_error(link, RequestIO, last_ret); goto failed; } @@ -272,14 +263,16 @@ orinoco_cs_config(struct pcmcia_device *link) * a handler to the interrupt, unless the 'Handler' member of * the irq structure is initialized. */ - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; /* We initialize the hermes structure before completing PCMCIA * configuration just in case the interrupt handler gets * called. */ mem = ioport_map(link->io.BasePort1, link->io.NumPorts1); if (!mem) - goto cs_failed; + goto failed; hermes_struct_init(hw, mem, HERMES_16BIT_REGSPACING); @@ -288,8 +281,9 @@ orinoco_cs_config(struct pcmcia_device *link) * the I/O windows and the interrupt mapping, and putting the * card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* Ok, we have the configuration, prepare to register the netdev */ card->node.major = card->node.minor = 0; @@ -315,9 +309,6 @@ orinoco_cs_config(struct pcmcia_device *link) * net_device has been registered */ return 0; - cs_failed: - cs_error(link, last_fn, last_ret); - failed: orinoco_cs_release(link); return -ENODEV; diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c index c361310b885..59bda240fdc 100644 --- a/drivers/net/wireless/orinoco/spectrum_cs.c +++ b/drivers/net/wireless/orinoco/spectrum_cs.c @@ -73,9 +73,6 @@ static void spectrum_cs_release(struct pcmcia_device *link); #define HCR_MEM16 0x10 /* memory width bit, should be preserved */ -#define CS_CHECK(fn, ret) \ - do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - /* * Reset the card using configuration registers COR and CCSR. * If IDLE is 1, stop the firmware, so that it can be safely rewritten. @@ -83,7 +80,7 @@ static void spectrum_cs_release(struct pcmcia_device *link); static int spectrum_reset(struct pcmcia_device *link, int idle) { - int last_ret, last_fn; + int ret; conf_reg_t reg; u_int save_cor; @@ -95,23 +92,26 @@ spectrum_reset(struct pcmcia_device *link, int idle) reg.Function = 0; reg.Action = CS_READ; reg.Offset = CISREG_COR; - CS_CHECK(AccessConfigurationRegister, - pcmcia_access_configuration_register(link, ®)); + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; save_cor = reg.Value; /* Soft-Reset card */ reg.Action = CS_WRITE; reg.Offset = CISREG_COR; reg.Value = (save_cor | COR_SOFT_RESET); - CS_CHECK(AccessConfigurationRegister, - pcmcia_access_configuration_register(link, ®)); + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; udelay(1000); /* Read CCSR */ reg.Action = CS_READ; reg.Offset = CISREG_CCSR; - CS_CHECK(AccessConfigurationRegister, - pcmcia_access_configuration_register(link, ®)); + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; /* * Start or stop the firmware. Memory width bit should be @@ -120,21 +120,22 @@ spectrum_reset(struct pcmcia_device *link, int idle) reg.Action = CS_WRITE; reg.Offset = CISREG_CCSR; reg.Value = (idle ? HCR_IDLE : HCR_RUN) | (reg.Value & HCR_MEM16); - CS_CHECK(AccessConfigurationRegister, - pcmcia_access_configuration_register(link, ®)); + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; udelay(1000); /* Restore original COR configuration index */ reg.Action = CS_WRITE; reg.Offset = CISREG_COR; reg.Value = (save_cor & ~COR_SOFT_RESET); - CS_CHECK(AccessConfigurationRegister, - pcmcia_access_configuration_register(link, ®)); + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; udelay(1000); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); +failed: return -ENODEV; } @@ -181,7 +182,7 @@ spectrum_cs_probe(struct pcmcia_device *link) struct orinoco_private *priv; struct orinoco_pccard *card; - priv = alloc_orinocodev(sizeof(*card), &handle_to_dev(link), + priv = alloc_orinocodev(sizeof(*card), &link->dev, spectrum_cs_hard_reset, spectrum_cs_stop_firmware); if (!priv) @@ -193,10 +194,8 @@ spectrum_cs_probe(struct pcmcia_device *link) link->priv = priv; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = orinoco_interrupt; - link->irq.Instance = priv; /* General socket configuration defaults can go here. In this * client, we assume very little, and rely on the CIS for @@ -307,7 +306,7 @@ spectrum_cs_config(struct pcmcia_device *link) struct orinoco_private *priv = link->priv; struct orinoco_pccard *card = priv->card; hermes_t *hw = &priv->hw; - int last_fn, last_ret; + int ret; void __iomem *mem; /* @@ -324,13 +323,12 @@ spectrum_cs_config(struct pcmcia_device *link) * and most client drivers will only use the CIS to fill in * implementation-defined details. */ - last_ret = pcmcia_loop_config(link, spectrum_cs_config_check, NULL); - if (last_ret) { + ret = pcmcia_loop_config(link, spectrum_cs_config_check, NULL); + if (ret) { if (!ignore_cis_vcc) printk(KERN_ERR PFX "GetNextTuple(): No matching " "CIS configuration. Maybe you need the " "ignore_cis_vcc=1 parameter.\n"); - cs_error(link, RequestIO, last_ret); goto failed; } @@ -339,14 +337,16 @@ spectrum_cs_config(struct pcmcia_device *link) * a handler to the interrupt, unless the 'Handler' member of * the irq structure is initialized. */ - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; /* We initialize the hermes structure before completing PCMCIA * configuration just in case the interrupt handler gets * called. */ mem = ioport_map(link->io.BasePort1, link->io.NumPorts1); if (!mem) - goto cs_failed; + goto failed; hermes_struct_init(hw, mem, HERMES_16BIT_REGSPACING); @@ -355,8 +355,9 @@ spectrum_cs_config(struct pcmcia_device *link) * the I/O windows and the interrupt mapping, and putting the * card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* Ok, we have the configuration, prepare to register the netdev */ card->node.major = card->node.minor = 0; @@ -386,9 +387,6 @@ spectrum_cs_config(struct pcmcia_device *link) * net_device has been registered */ return 0; - cs_failed: - cs_error(link, last_fn, last_ret); - failed: spectrum_cs_release(link); return -ENODEV; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 1c88c2ea59a..5b8e3e4cdd9 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -71,25 +71,7 @@ typedef u_char mac_addr[ETH_ALEN]; /* Hardware address */ #include "rayctl.h" #include "ray_cs.h" -/* All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef RAYLINK_DEBUG -#define PCMCIA_DEBUG RAYLINK_DEBUG -#endif -#ifdef PCMCIA_DEBUG -static int ray_debug; -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -/* #define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); */ -#define DEBUG(n, args...) if (pc_debug > (n)) printk(args); -#else -#define DEBUG(n, args...) -#endif /** Prototypes based on PCMCIA skeleton driver *******************************/ static int ray_config(struct pcmcia_device *link); static void ray_release(struct pcmcia_device *link); @@ -325,7 +307,7 @@ static int ray_probe(struct pcmcia_device *p_dev) ray_dev_t *local; struct net_device *dev; - DEBUG(1, "ray_attach()\n"); + dev_dbg(&p_dev->dev, "ray_attach()\n"); /* Allocate space for private device-specific data */ dev = alloc_etherdev(sizeof(ray_dev_t)); @@ -341,8 +323,7 @@ static int ray_probe(struct pcmcia_device *p_dev) p_dev->io.IOAddrLines = 5; /* Interrupt setup. For PCMCIA, driver takes what's given */ - p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; + p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = &ray_interrupt; /* General socket configuration */ @@ -351,13 +332,12 @@ static int ray_probe(struct pcmcia_device *p_dev) p_dev->conf.ConfigIndex = 1; p_dev->priv = dev; - p_dev->irq.Instance = dev; local->finder = p_dev; local->card_status = CARD_INSERTED; local->authentication_state = UNAUTHENTICATED; local->num_multi = 0; - DEBUG(2, "ray_attach p_dev = %p, dev = %p, local = %p, intr = %p\n", + dev_dbg(&p_dev->dev, "ray_attach p_dev = %p, dev = %p, local = %p, intr = %p\n", p_dev, dev, local, &ray_interrupt); /* Raylink entries in the device structure */ @@ -370,7 +350,7 @@ static int ray_probe(struct pcmcia_device *p_dev) #endif /* WIRELESS_SPY */ - DEBUG(2, "ray_cs ray_attach calling ether_setup.)\n"); + dev_dbg(&p_dev->dev, "ray_cs ray_attach calling ether_setup.)\n"); netif_stop_queue(dev); init_timer(&local->timer); @@ -393,7 +373,7 @@ static void ray_detach(struct pcmcia_device *link) struct net_device *dev; ray_dev_t *local; - DEBUG(1, "ray_detach(0x%p)\n", link); + dev_dbg(&link->dev, "ray_detach\n"); this_device = NULL; dev = link->priv; @@ -408,7 +388,7 @@ static void ray_detach(struct pcmcia_device *link) unregister_netdev(dev); free_netdev(dev); } - DEBUG(2, "ray_cs ray_detach ending\n"); + dev_dbg(&link->dev, "ray_cs ray_detach ending\n"); } /* ray_detach */ /*============================================================================= @@ -416,19 +396,17 @@ static void ray_detach(struct pcmcia_device *link) is received, to configure the PCMCIA socket, and to make the ethernet device available to the system. =============================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) #define MAX_TUPLE_SIZE 128 static int ray_config(struct pcmcia_device *link) { - int last_fn = 0, last_ret = 0; + int ret = 0; int i; win_req_t req; memreq_t mem; struct net_device *dev = (struct net_device *)link->priv; ray_dev_t *local = netdev_priv(dev); - DEBUG(1, "ray_config(0x%p)\n", link); + dev_dbg(&link->dev, "ray_config\n"); /* Determine card type and firmware version */ printk(KERN_INFO "ray_cs Detected: %s%s%s%s\n", @@ -440,14 +418,17 @@ static int ray_config(struct pcmcia_device *link) /* Now allocate an interrupt line. Note that this does not actually assign a handler to the interrupt. */ - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; /* This actually configures the PCMCIA socket -- setting up the I/O windows and the interrupt mapping. */ - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /*** Set up 32k window for shared memory (transmit and control) ************/ req.Attributes = @@ -455,10 +436,14 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x8000; req.AccessSpeed = ray_mem_speed; - CS_CHECK(RequestWindow, pcmcia_request_window(&link, &req, &link->win)); + ret = pcmcia_request_window(link, &req, &link->win); + if (ret) + goto failed; mem.CardOffset = 0x0000; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(link->win, &mem)); + ret = pcmcia_map_mem_page(link, link->win, &mem); + if (ret) + goto failed; local->sram = ioremap(req.Base, req.Size); /*** Set up 16k window for shared memory (receive buffer) ***************/ @@ -467,11 +452,14 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x4000; req.AccessSpeed = ray_mem_speed; - CS_CHECK(RequestWindow, - pcmcia_request_window(&link, &req, &local->rmem_handle)); + ret = pcmcia_request_window(link, &req, &local->rmem_handle); + if (ret) + goto failed; mem.CardOffset = 0x8000; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(local->rmem_handle, &mem)); + ret = pcmcia_map_mem_page(link, local->rmem_handle, &mem); + if (ret) + goto failed; local->rmem = ioremap(req.Base, req.Size); /*** Set up window for attribute memory ***********************************/ @@ -480,22 +468,25 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x1000; req.AccessSpeed = ray_mem_speed; - CS_CHECK(RequestWindow, - pcmcia_request_window(&link, &req, &local->amem_handle)); + ret = pcmcia_request_window(link, &req, &local->amem_handle); + if (ret) + goto failed; mem.CardOffset = 0x0000; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(local->amem_handle, &mem)); + ret = pcmcia_map_mem_page(link, local->amem_handle, &mem); + if (ret) + goto failed; local->amem = ioremap(req.Base, req.Size); - DEBUG(3, "ray_config sram=%p\n", local->sram); - DEBUG(3, "ray_config rmem=%p\n", local->rmem); - DEBUG(3, "ray_config amem=%p\n", local->amem); + dev_dbg(&link->dev, "ray_config sram=%p\n", local->sram); + dev_dbg(&link->dev, "ray_config rmem=%p\n", local->rmem); + dev_dbg(&link->dev, "ray_config amem=%p\n", local->amem); if (ray_init(dev) < 0) { ray_release(link); return -ENODEV; } - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = register_netdev(dev); if (i != 0) { printk("ray_config register_netdev() failed\n"); @@ -511,9 +502,7 @@ static int ray_config(struct pcmcia_device *link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); - +failed: ray_release(link); return -ENODEV; } /* ray_config */ @@ -543,9 +532,9 @@ static int ray_init(struct net_device *dev) struct ccs __iomem *pccs; ray_dev_t *local = netdev_priv(dev); struct pcmcia_device *link = local->finder; - DEBUG(1, "ray_init(0x%p)\n", dev); + dev_dbg(&link->dev, "ray_init(0x%p)\n", dev); if (!(pcmcia_dev_present(link))) { - DEBUG(0, "ray_init - device not present\n"); + dev_dbg(&link->dev, "ray_init - device not present\n"); return -1; } @@ -567,13 +556,13 @@ static int ray_init(struct net_device *dev) local->fw_ver = local->startup_res.firmware_version[0]; local->fw_bld = local->startup_res.firmware_version[1]; local->fw_var = local->startup_res.firmware_version[2]; - DEBUG(1, "ray_init firmware version %d.%d \n", local->fw_ver, + dev_dbg(&link->dev, "ray_init firmware version %d.%d \n", local->fw_ver, local->fw_bld); local->tib_length = 0x20; if ((local->fw_ver == 5) && (local->fw_bld >= 30)) local->tib_length = local->startup_res.tib_length; - DEBUG(2, "ray_init tib_length = 0x%02x\n", local->tib_length); + dev_dbg(&link->dev, "ray_init tib_length = 0x%02x\n", local->tib_length); /* Initialize CCS's to buffer free state */ pccs = ccs_base(local); for (i = 0; i < NUMBER_OF_CCS; i++) { @@ -592,7 +581,7 @@ static int ray_init(struct net_device *dev) clear_interrupt(local); /* Clear any interrupt from the card */ local->card_status = CARD_AWAITING_PARAM; - DEBUG(2, "ray_init ending\n"); + dev_dbg(&link->dev, "ray_init ending\n"); return 0; } /* ray_init */ @@ -605,9 +594,9 @@ static int dl_startup_params(struct net_device *dev) struct ccs __iomem *pccs; struct pcmcia_device *link = local->finder; - DEBUG(1, "dl_startup_params entered\n"); + dev_dbg(&link->dev, "dl_startup_params entered\n"); if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs dl_startup_params - device not present\n"); + dev_dbg(&link->dev, "ray_cs dl_startup_params - device not present\n"); return -1; } @@ -625,7 +614,7 @@ static int dl_startup_params(struct net_device *dev) local->dl_param_ccs = ccsindex; pccs = ccs_base(local) + ccsindex; writeb(CCS_DOWNLOAD_STARTUP_PARAMS, &pccs->cmd); - DEBUG(2, "dl_startup_params start ccsindex = %d\n", + dev_dbg(&link->dev, "dl_startup_params start ccsindex = %d\n", local->dl_param_ccs); /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { @@ -641,7 +630,7 @@ static int dl_startup_params(struct net_device *dev) local->timer.data = (long)local; local->timer.function = &verify_dl_startup; add_timer(&local->timer); - DEBUG(2, + dev_dbg(&link->dev, "ray_cs dl_startup_params started timer for verify_dl_startup\n"); return 0; } /* dl_startup_params */ @@ -717,11 +706,11 @@ static void verify_dl_startup(u_long data) struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs verify_dl_startup - device not present\n"); + dev_dbg(&link->dev, "ray_cs verify_dl_startup - device not present\n"); return; } -#ifdef PCMCIA_DEBUG - if (pc_debug > 2) { +#if 0 + { int i; printk(KERN_DEBUG "verify_dl_startup parameters sent via ccs %d:\n", @@ -760,7 +749,7 @@ static void start_net(u_long data) int ccsindex; struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs start_net - device not present\n"); + dev_dbg(&link->dev, "ray_cs start_net - device not present\n"); return; } /* Fill in the CCS fields for the ECF */ @@ -771,7 +760,7 @@ static void start_net(u_long data) writeb(0, &pccs->var.start_network.update_param); /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(1, "ray start net failed - card not ready for intr\n"); + dev_dbg(&link->dev, "ray start net failed - card not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); return; } @@ -790,7 +779,7 @@ static void join_net(u_long data) struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs join_net - device not present\n"); + dev_dbg(&link->dev, "ray_cs join_net - device not present\n"); return; } /* Fill in the CCS fields for the ECF */ @@ -802,7 +791,7 @@ static void join_net(u_long data) writeb(0, &pccs->var.join_network.net_initiated); /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(1, "ray join net failed - card not ready for intr\n"); + dev_dbg(&link->dev, "ray join net failed - card not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); return; } @@ -821,7 +810,7 @@ static void ray_release(struct pcmcia_device *link) ray_dev_t *local = netdev_priv(dev); int i; - DEBUG(1, "ray_release(0x%p)\n", link); + dev_dbg(&link->dev, "ray_release\n"); del_timer(&local->timer); @@ -829,15 +818,15 @@ static void ray_release(struct pcmcia_device *link) iounmap(local->rmem); iounmap(local->amem); /* Do bother checking to see if these succeed or not */ - i = pcmcia_release_window(local->amem_handle); + i = pcmcia_release_window(link, local->amem_handle); if (i != 0) - DEBUG(0, "ReleaseWindow(local->amem) ret = %x\n", i); - i = pcmcia_release_window(local->rmem_handle); + dev_dbg(&link->dev, "ReleaseWindow(local->amem) ret = %x\n", i); + i = pcmcia_release_window(link, local->rmem_handle); if (i != 0) - DEBUG(0, "ReleaseWindow(local->rmem) ret = %x\n", i); + dev_dbg(&link->dev, "ReleaseWindow(local->rmem) ret = %x\n", i); pcmcia_disable_device(link); - DEBUG(2, "ray_release ending\n"); + dev_dbg(&link->dev, "ray_release ending\n"); } static int ray_suspend(struct pcmcia_device *link) @@ -871,9 +860,9 @@ static int ray_dev_init(struct net_device *dev) ray_dev_t *local = netdev_priv(dev); struct pcmcia_device *link = local->finder; - DEBUG(1, "ray_dev_init(dev=%p)\n", dev); + dev_dbg(&link->dev, "ray_dev_init(dev=%p)\n", dev); if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_dev_init - device not present\n"); + dev_dbg(&link->dev, "ray_dev_init - device not present\n"); return -1; } #ifdef RAY_IMMEDIATE_INIT @@ -887,7 +876,7 @@ static int ray_dev_init(struct net_device *dev) /* Postpone the card init so that we can still configure the card, * for example using the Wireless Extensions. The init will happen * in ray_open() - Jean II */ - DEBUG(1, + dev_dbg(&link->dev, "ray_dev_init: postponing card init to ray_open() ; Status = %d\n", local->card_status); #endif /* RAY_IMMEDIATE_INIT */ @@ -896,7 +885,7 @@ static int ray_dev_init(struct net_device *dev) memcpy(dev->dev_addr, &local->sparm.b4.a_mac_addr, ADDRLEN); memset(dev->broadcast, 0xff, ETH_ALEN); - DEBUG(2, "ray_dev_init ending\n"); + dev_dbg(&link->dev, "ray_dev_init ending\n"); return 0; } @@ -906,9 +895,9 @@ static int ray_dev_config(struct net_device *dev, struct ifmap *map) ray_dev_t *local = netdev_priv(dev); struct pcmcia_device *link = local->finder; /* Dummy routine to satisfy device structure */ - DEBUG(1, "ray_dev_config(dev=%p,ifmap=%p)\n", dev, map); + dev_dbg(&link->dev, "ray_dev_config(dev=%p,ifmap=%p)\n", dev, map); if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_dev_config - device not present\n"); + dev_dbg(&link->dev, "ray_dev_config - device not present\n"); return -1; } @@ -924,14 +913,14 @@ static netdev_tx_t ray_dev_start_xmit(struct sk_buff *skb, short length = skb->len; if (!pcmcia_dev_present(link)) { - DEBUG(2, "ray_dev_start_xmit - device not present\n"); + dev_dbg(&link->dev, "ray_dev_start_xmit - device not present\n"); dev_kfree_skb(skb); return NETDEV_TX_OK; } - DEBUG(3, "ray_dev_start_xmit(skb=%p, dev=%p)\n", skb, dev); + dev_dbg(&link->dev, "ray_dev_start_xmit(skb=%p, dev=%p)\n", skb, dev); if (local->authentication_state == NEED_TO_AUTH) { - DEBUG(0, "ray_cs Sending authentication request.\n"); + dev_dbg(&link->dev, "ray_cs Sending authentication request.\n"); if (!build_auth_frame(local, local->auth_id, OPEN_AUTH_REQUEST)) { local->authentication_state = AUTHENTICATED; netif_stop_queue(dev); @@ -971,7 +960,7 @@ static int ray_hw_xmit(unsigned char *data, int len, struct net_device *dev, struct tx_msg __iomem *ptx; /* Address of xmit buffer in PC space */ short int addr; /* Address of xmit buffer in card space */ - DEBUG(3, "ray_hw_xmit(data=%p, len=%d, dev=%p)\n", data, len, dev); + pr_debug("ray_hw_xmit(data=%p, len=%d, dev=%p)\n", data, len, dev); if (len + TX_HEADER_LENGTH > TX_BUF_SIZE) { printk(KERN_INFO "ray_hw_xmit packet too large: %d bytes\n", len); @@ -979,9 +968,9 @@ static int ray_hw_xmit(unsigned char *data, int len, struct net_device *dev, } switch (ccsindex = get_free_tx_ccs(local)) { case ECCSBUSY: - DEBUG(2, "ray_hw_xmit tx_ccs table busy\n"); + pr_debug("ray_hw_xmit tx_ccs table busy\n"); case ECCSFULL: - DEBUG(2, "ray_hw_xmit No free tx ccs\n"); + pr_debug("ray_hw_xmit No free tx ccs\n"); case ECARDGONE: netif_stop_queue(dev); return XMIT_NO_CCS; @@ -1018,12 +1007,12 @@ static int ray_hw_xmit(unsigned char *data, int len, struct net_device *dev, writeb(PSM_CAM, &pccs->var.tx_request.pow_sav_mode); writeb(local->net_default_tx_rate, &pccs->var.tx_request.tx_rate); writeb(0, &pccs->var.tx_request.antenna); - DEBUG(3, "ray_hw_xmit default_tx_rate = 0x%x\n", + pr_debug("ray_hw_xmit default_tx_rate = 0x%x\n", local->net_default_tx_rate); /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(2, "ray_hw_xmit failed - ECF not ready for intr\n"); + pr_debug("ray_hw_xmit failed - ECF not ready for intr\n"); /* TBD very inefficient to copy packet to buffer, and then not send it, but the alternative is to queue the messages and that won't be done for a while. Maybe set tbusy until a CCS is free? @@ -1040,7 +1029,7 @@ static int translate_frame(ray_dev_t *local, struct tx_msg __iomem *ptx, { __be16 proto = ((struct ethhdr *)data)->h_proto; if (ntohs(proto) >= 1536) { /* DIX II ethernet frame */ - DEBUG(3, "ray_cs translate_frame DIX II\n"); + pr_debug("ray_cs translate_frame DIX II\n"); /* Copy LLC header to card buffer */ memcpy_toio(&ptx->var, eth2_llc, sizeof(eth2_llc)); memcpy_toio(((void __iomem *)&ptx->var) + sizeof(eth2_llc), @@ -1056,9 +1045,9 @@ static int translate_frame(ray_dev_t *local, struct tx_msg __iomem *ptx, len - ETH_HLEN); return (int)sizeof(struct snaphdr_t) - ETH_HLEN; } else { /* already 802 type, and proto is length */ - DEBUG(3, "ray_cs translate_frame 802\n"); + pr_debug("ray_cs translate_frame 802\n"); if (proto == htons(0xffff)) { /* evil netware IPX 802.3 without LLC */ - DEBUG(3, "ray_cs translate_frame evil IPX\n"); + pr_debug("ray_cs translate_frame evil IPX\n"); memcpy_toio(&ptx->var, data + ETH_HLEN, len - ETH_HLEN); return 0 - ETH_HLEN; } @@ -1603,7 +1592,7 @@ static int ray_open(struct net_device *dev) struct pcmcia_device *link; link = local->finder; - DEBUG(1, "ray_open('%s')\n", dev->name); + dev_dbg(&link->dev, "ray_open('%s')\n", dev->name); if (link->open == 0) local->num_multi = 0; @@ -1613,7 +1602,7 @@ static int ray_open(struct net_device *dev) if (local->card_status == CARD_AWAITING_PARAM) { int i; - DEBUG(1, "ray_open: doing init now !\n"); + dev_dbg(&link->dev, "ray_open: doing init now !\n"); /* Download startup parameters */ if ((i = dl_startup_params(dev)) < 0) { @@ -1629,7 +1618,7 @@ static int ray_open(struct net_device *dev) else netif_start_queue(dev); - DEBUG(2, "ray_open ending\n"); + dev_dbg(&link->dev, "ray_open ending\n"); return 0; } /* end ray_open */ @@ -1640,7 +1629,7 @@ static int ray_dev_close(struct net_device *dev) struct pcmcia_device *link; link = local->finder; - DEBUG(1, "ray_dev_close('%s')\n", dev->name); + dev_dbg(&link->dev, "ray_dev_close('%s')\n", dev->name); link->open--; netif_stop_queue(dev); @@ -1656,7 +1645,7 @@ static int ray_dev_close(struct net_device *dev) /*===========================================================================*/ static void ray_reset(struct net_device *dev) { - DEBUG(1, "ray_reset entered\n"); + pr_debug("ray_reset entered\n"); return; } @@ -1669,17 +1658,17 @@ static int interrupt_ecf(ray_dev_t *local, int ccs) struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs interrupt_ecf - device not present\n"); + dev_dbg(&link->dev, "ray_cs interrupt_ecf - device not present\n"); return -1; } - DEBUG(2, "interrupt_ecf(local=%p, ccs = 0x%x\n", local, ccs); + dev_dbg(&link->dev, "interrupt_ecf(local=%p, ccs = 0x%x\n", local, ccs); while (i && (readb(local->amem + CIS_OFFSET + ECF_INTR_OFFSET) & ECF_INTR_SET)) i--; if (i == 0) { - DEBUG(2, "ray_cs interrupt_ecf card not ready for interrupt\n"); + dev_dbg(&link->dev, "ray_cs interrupt_ecf card not ready for interrupt\n"); return -1; } /* Fill the mailbox, then kick the card */ @@ -1698,12 +1687,12 @@ static int get_free_tx_ccs(ray_dev_t *local) struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs get_free_tx_ccs - device not present\n"); + dev_dbg(&link->dev, "ray_cs get_free_tx_ccs - device not present\n"); return ECARDGONE; } if (test_and_set_bit(0, &local->tx_ccs_lock)) { - DEBUG(1, "ray_cs tx_ccs_lock busy\n"); + dev_dbg(&link->dev, "ray_cs tx_ccs_lock busy\n"); return ECCSBUSY; } @@ -1716,7 +1705,7 @@ static int get_free_tx_ccs(ray_dev_t *local) } } local->tx_ccs_lock = 0; - DEBUG(2, "ray_cs ERROR no free tx CCS for raylink card\n"); + dev_dbg(&link->dev, "ray_cs ERROR no free tx CCS for raylink card\n"); return ECCSFULL; } /* get_free_tx_ccs */ @@ -1730,11 +1719,11 @@ static int get_free_ccs(ray_dev_t *local) struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs get_free_ccs - device not present\n"); + dev_dbg(&link->dev, "ray_cs get_free_ccs - device not present\n"); return ECARDGONE; } if (test_and_set_bit(0, &local->ccs_lock)) { - DEBUG(1, "ray_cs ccs_lock busy\n"); + dev_dbg(&link->dev, "ray_cs ccs_lock busy\n"); return ECCSBUSY; } @@ -1747,7 +1736,7 @@ static int get_free_ccs(ray_dev_t *local) } } local->ccs_lock = 0; - DEBUG(1, "ray_cs ERROR no free CCS for raylink card\n"); + dev_dbg(&link->dev, "ray_cs ERROR no free CCS for raylink card\n"); return ECCSFULL; } /* get_free_ccs */ @@ -1823,7 +1812,7 @@ static struct net_device_stats *ray_get_stats(struct net_device *dev) struct pcmcia_device *link = local->finder; struct status __iomem *p = local->sram + STATUS_BASE; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs net_device_stats - device not present\n"); + dev_dbg(&link->dev, "ray_cs net_device_stats - device not present\n"); return &local->stats; } if (readb(&p->mrx_overflow_for_host)) { @@ -1856,12 +1845,12 @@ static void ray_update_parm(struct net_device *dev, UCHAR objid, UCHAR *value, struct ccs __iomem *pccs; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_update_parm - device not present\n"); + dev_dbg(&link->dev, "ray_update_parm - device not present\n"); return; } if ((ccsindex = get_free_ccs(local)) < 0) { - DEBUG(0, "ray_update_parm - No free ccs\n"); + dev_dbg(&link->dev, "ray_update_parm - No free ccs\n"); return; } pccs = ccs_base(local) + ccsindex; @@ -1874,7 +1863,7 @@ static void ray_update_parm(struct net_device *dev, UCHAR objid, UCHAR *value, } /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(0, "ray_cs associate failed - ECF not ready for intr\n"); + dev_dbg(&link->dev, "ray_cs associate failed - ECF not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); } } @@ -1891,12 +1880,12 @@ static void ray_update_multi_list(struct net_device *dev, int all) void __iomem *p = local->sram + HOST_TO_ECF_BASE; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_update_multi_list - device not present\n"); + dev_dbg(&link->dev, "ray_update_multi_list - device not present\n"); return; } else - DEBUG(2, "ray_update_multi_list(%p)\n", dev); + dev_dbg(&link->dev, "ray_update_multi_list(%p)\n", dev); if ((ccsindex = get_free_ccs(local)) < 0) { - DEBUG(1, "ray_update_multi - No free ccs\n"); + dev_dbg(&link->dev, "ray_update_multi - No free ccs\n"); return; } pccs = ccs_base(local) + ccsindex; @@ -1910,7 +1899,7 @@ static void ray_update_multi_list(struct net_device *dev, int all) for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { memcpy_toio(p, dmi->dmi_addr, ETH_ALEN); - DEBUG(1, + dev_dbg(&link->dev, "ray_update_multi add addr %02x%02x%02x%02x%02x%02x\n", dmi->dmi_addr[0], dmi->dmi_addr[1], dmi->dmi_addr[2], dmi->dmi_addr[3], @@ -1921,12 +1910,12 @@ static void ray_update_multi_list(struct net_device *dev, int all) if (i > 256 / ADDRLEN) i = 256 / ADDRLEN; writeb((UCHAR) i, &pccs->var); - DEBUG(1, "ray_cs update_multi %d addresses in list\n", i); + dev_dbg(&link->dev, "ray_cs update_multi %d addresses in list\n", i); /* Interrupt the firmware to process the command */ local->num_multi = i; } if (interrupt_ecf(local, ccsindex)) { - DEBUG(1, + dev_dbg(&link->dev, "ray_cs update_multi failed - ECF not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); } @@ -1938,11 +1927,11 @@ static void set_multicast_list(struct net_device *dev) ray_dev_t *local = netdev_priv(dev); UCHAR promisc; - DEBUG(2, "ray_cs set_multicast_list(%p)\n", dev); + pr_debug("ray_cs set_multicast_list(%p)\n", dev); if (dev->flags & IFF_PROMISC) { if (local->sparm.b5.a_promiscuous_mode == 0) { - DEBUG(1, "ray_cs set_multicast_list promisc on\n"); + pr_debug("ray_cs set_multicast_list promisc on\n"); local->sparm.b5.a_promiscuous_mode = 1; promisc = 1; ray_update_parm(dev, OBJID_promiscuous_mode, @@ -1950,7 +1939,7 @@ static void set_multicast_list(struct net_device *dev) } } else { if (local->sparm.b5.a_promiscuous_mode == 1) { - DEBUG(1, "ray_cs set_multicast_list promisc off\n"); + pr_debug("ray_cs set_multicast_list promisc off\n"); local->sparm.b5.a_promiscuous_mode = 0; promisc = 0; ray_update_parm(dev, OBJID_promiscuous_mode, @@ -1984,19 +1973,19 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) if (dev == NULL) /* Note that we want interrupts with dev->start == 0 */ return IRQ_NONE; - DEBUG(4, "ray_cs: interrupt for *dev=%p\n", dev); + pr_debug("ray_cs: interrupt for *dev=%p\n", dev); local = netdev_priv(dev); link = (struct pcmcia_device *)local->finder; if (!pcmcia_dev_present(link)) { - DEBUG(2, - "ray_cs interrupt from device not present or suspended.\n"); + pr_debug( + "ray_cs interrupt from device not present or suspended.\n"); return IRQ_NONE; } rcsindex = readb(&((struct scb __iomem *)(local->sram))->rcs_index); if (rcsindex >= (NUMBER_OF_CCS + NUMBER_OF_RCS)) { - DEBUG(1, "ray_cs interrupt bad rcsindex = 0x%x\n", rcsindex); + dev_dbg(&link->dev, "ray_cs interrupt bad rcsindex = 0x%x\n", rcsindex); clear_interrupt(local); return IRQ_HANDLED; } @@ -2008,33 +1997,33 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) case CCS_DOWNLOAD_STARTUP_PARAMS: /* Happens in firmware someday */ del_timer(&local->timer); if (status == CCS_COMMAND_COMPLETE) { - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt download_startup_parameters OK\n"); } else { - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt download_startup_parameters fail\n"); } break; case CCS_UPDATE_PARAMS: - DEBUG(1, "ray_cs interrupt update params done\n"); + dev_dbg(&link->dev, "ray_cs interrupt update params done\n"); if (status != CCS_COMMAND_COMPLETE) { tmp = readb(&pccs->var.update_param. failure_cause); - DEBUG(0, + dev_dbg(&link->dev, "ray_cs interrupt update params failed - reason %d\n", tmp); } break; case CCS_REPORT_PARAMS: - DEBUG(1, "ray_cs interrupt report params done\n"); + dev_dbg(&link->dev, "ray_cs interrupt report params done\n"); break; case CCS_UPDATE_MULTICAST_LIST: /* Note that this CCS isn't returned */ - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt CCS Update Multicast List done\n"); break; case CCS_UPDATE_POWER_SAVINGS_MODE: - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt update power save mode done\n"); break; case CCS_START_NETWORK: @@ -2043,11 +2032,11 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) if (readb (&pccs->var.start_network.net_initiated) == 1) { - DEBUG(0, + dev_dbg(&link->dev, "ray_cs interrupt network \"%s\" started\n", local->sparm.b4.a_current_ess_id); } else { - DEBUG(0, + dev_dbg(&link->dev, "ray_cs interrupt network \"%s\" joined\n", local->sparm.b4.a_current_ess_id); } @@ -2075,12 +2064,12 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) local->timer.expires = jiffies + HZ * 5; local->timer.data = (long)local; if (status == CCS_START_NETWORK) { - DEBUG(0, + dev_dbg(&link->dev, "ray_cs interrupt network \"%s\" start failed\n", local->sparm.b4.a_current_ess_id); local->timer.function = &start_net; } else { - DEBUG(0, + dev_dbg(&link->dev, "ray_cs interrupt network \"%s\" join failed\n", local->sparm.b4.a_current_ess_id); local->timer.function = &join_net; @@ -2091,19 +2080,19 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) case CCS_START_ASSOCIATION: if (status == CCS_COMMAND_COMPLETE) { local->card_status = CARD_ASSOC_COMPLETE; - DEBUG(0, "ray_cs association successful\n"); + dev_dbg(&link->dev, "ray_cs association successful\n"); } else { - DEBUG(0, "ray_cs association failed,\n"); + dev_dbg(&link->dev, "ray_cs association failed,\n"); local->card_status = CARD_ASSOC_FAILED; join_net((u_long) local); } break; case CCS_TX_REQUEST: if (status == CCS_COMMAND_COMPLETE) { - DEBUG(3, + dev_dbg(&link->dev, "ray_cs interrupt tx request complete\n"); } else { - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt tx request failed\n"); } if (!sniffer) @@ -2111,21 +2100,21 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) netif_wake_queue(dev); break; case CCS_TEST_MEMORY: - DEBUG(1, "ray_cs interrupt mem test done\n"); + dev_dbg(&link->dev, "ray_cs interrupt mem test done\n"); break; case CCS_SHUTDOWN: - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt Unexpected CCS returned - Shutdown\n"); break; case CCS_DUMP_MEMORY: - DEBUG(1, "ray_cs interrupt dump memory done\n"); + dev_dbg(&link->dev, "ray_cs interrupt dump memory done\n"); break; case CCS_START_TIMER: - DEBUG(2, + dev_dbg(&link->dev, "ray_cs interrupt DING - raylink timer expired\n"); break; default: - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt Unexpected CCS 0x%x returned 0x%x\n", rcsindex, cmd); } @@ -2139,7 +2128,7 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) ray_rx(dev, local, prcs); break; case REJOIN_NET_COMPLETE: - DEBUG(1, "ray_cs interrupt rejoin net complete\n"); + dev_dbg(&link->dev, "ray_cs interrupt rejoin net complete\n"); local->card_status = CARD_ACQ_COMPLETE; /* do we need to clear tx buffers CCS's? */ if (local->sparm.b4.a_network_type == ADHOC) { @@ -2149,7 +2138,7 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) memcpy_fromio(&local->bss_id, prcs->var.rejoin_net_complete. bssid, ADDRLEN); - DEBUG(1, + dev_dbg(&link->dev, "ray_cs new BSSID = %02x%02x%02x%02x%02x%02x\n", local->bss_id[0], local->bss_id[1], local->bss_id[2], local->bss_id[3], @@ -2159,15 +2148,15 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) } break; case ROAMING_INITIATED: - DEBUG(1, "ray_cs interrupt roaming initiated\n"); + dev_dbg(&link->dev, "ray_cs interrupt roaming initiated\n"); netif_stop_queue(dev); local->card_status = CARD_DOING_ACQ; break; case JAPAN_CALL_SIGN_RXD: - DEBUG(1, "ray_cs interrupt japan call sign rx\n"); + dev_dbg(&link->dev, "ray_cs interrupt japan call sign rx\n"); break; default: - DEBUG(1, + dev_dbg(&link->dev, "ray_cs Unexpected interrupt for RCS 0x%x cmd = 0x%x\n", rcsindex, (unsigned int)readb(&prcs->interrupt_id)); @@ -2186,7 +2175,7 @@ static void ray_rx(struct net_device *dev, ray_dev_t *local, int rx_len; unsigned int pkt_addr; void __iomem *pmsg; - DEBUG(4, "ray_rx process rx packet\n"); + pr_debug("ray_rx process rx packet\n"); /* Calculate address of packet within Rx buffer */ pkt_addr = ((readb(&prcs->var.rx_packet.rx_data_ptr[0]) << 8) @@ -2199,28 +2188,28 @@ static void ray_rx(struct net_device *dev, ray_dev_t *local, pmsg = local->rmem + pkt_addr; switch (readb(pmsg)) { case DATA_TYPE: - DEBUG(4, "ray_rx data type\n"); + pr_debug("ray_rx data type\n"); rx_data(dev, prcs, pkt_addr, rx_len); break; case AUTHENTIC_TYPE: - DEBUG(4, "ray_rx authentic type\n"); + pr_debug("ray_rx authentic type\n"); if (sniffer) rx_data(dev, prcs, pkt_addr, rx_len); else rx_authenticate(local, prcs, pkt_addr, rx_len); break; case DEAUTHENTIC_TYPE: - DEBUG(4, "ray_rx deauth type\n"); + pr_debug("ray_rx deauth type\n"); if (sniffer) rx_data(dev, prcs, pkt_addr, rx_len); else rx_deauthenticate(local, prcs, pkt_addr, rx_len); break; case NULL_MSG_TYPE: - DEBUG(3, "ray_cs rx NULL msg\n"); + pr_debug("ray_cs rx NULL msg\n"); break; case BEACON_TYPE: - DEBUG(4, "ray_rx beacon type\n"); + pr_debug("ray_rx beacon type\n"); if (sniffer) rx_data(dev, prcs, pkt_addr, rx_len); @@ -2233,7 +2222,7 @@ static void ray_rx(struct net_device *dev, ray_dev_t *local, ray_get_stats(dev); break; default: - DEBUG(0, "ray_cs unknown pkt type %2x\n", + pr_debug("ray_cs unknown pkt type %2x\n", (unsigned int)readb(pmsg)); break; } @@ -2262,7 +2251,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, rx_len > (dev->mtu + RX_MAC_HEADER_LENGTH + ETH_HLEN + FCS_LEN)) { - DEBUG(0, + pr_debug( "ray_cs invalid packet length %d received \n", rx_len); return; @@ -2273,17 +2262,17 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, rx_len > (dev->mtu + RX_MAC_HEADER_LENGTH + ETH_HLEN + FCS_LEN)) { - DEBUG(0, + pr_debug( "ray_cs invalid packet length %d received \n", rx_len); return; } } } - DEBUG(4, "ray_cs rx_data packet\n"); + pr_debug("ray_cs rx_data packet\n"); /* If fragmented packet, verify sizes of fragments add up */ if (readb(&prcs->var.rx_packet.next_frag_rcs_index) != 0xFF) { - DEBUG(1, "ray_cs rx'ed fragment\n"); + pr_debug("ray_cs rx'ed fragment\n"); tmp = (readb(&prcs->var.rx_packet.totalpacketlength[0]) << 8) + readb(&prcs->var.rx_packet.totalpacketlength[1]); total_len = tmp; @@ -2301,7 +2290,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, } while (1); if (tmp < 0) { - DEBUG(0, + pr_debug( "ray_cs rx_data fragment lengths don't add up\n"); local->stats.rx_dropped++; release_frag_chain(local, prcs); @@ -2313,7 +2302,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, skb = dev_alloc_skb(total_len + 5); if (skb == NULL) { - DEBUG(0, "ray_cs rx_data could not allocate skb\n"); + pr_debug("ray_cs rx_data could not allocate skb\n"); local->stats.rx_dropped++; if (readb(&prcs->var.rx_packet.next_frag_rcs_index) != 0xFF) release_frag_chain(local, prcs); @@ -2321,7 +2310,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, } skb_reserve(skb, 2); /* Align IP on 16 byte (TBD check this) */ - DEBUG(4, "ray_cs rx_data total_len = %x, rx_len = %x\n", total_len, + pr_debug("ray_cs rx_data total_len = %x, rx_len = %x\n", total_len, rx_len); /************************/ @@ -2354,7 +2343,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, tmp = 17; if (readb(&prcs->var.rx_packet.next_frag_rcs_index) != 0xFF) { prcslink = prcs; - DEBUG(1, "ray_cs rx_data in fragment loop\n"); + pr_debug("ray_cs rx_data in fragment loop\n"); do { prcslink = rcs_base(local) + @@ -2426,8 +2415,8 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) memcpy(destaddr, ieee80211_get_DA(pmac), ADDRLEN); memcpy(srcaddr, ieee80211_get_SA(pmac), ADDRLEN); -#ifdef PCMCIA_DEBUG - if (pc_debug > 3) { +#if 0 + if { print_hex_dump(KERN_DEBUG, "skb->data before untranslate: ", DUMP_PREFIX_NONE, 16, 1, skb->data, 64, true); @@ -2441,7 +2430,7 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) if (psnap->dsap != 0xaa || psnap->ssap != 0xaa || psnap->ctrl != 3) { /* not a snap type so leave it alone */ - DEBUG(3, "ray_cs untranslate NOT SNAP %02x %02x %02x\n", + pr_debug("ray_cs untranslate NOT SNAP %02x %02x %02x\n", psnap->dsap, psnap->ssap, psnap->ctrl); delta = RX_MAC_HEADER_LENGTH - ETH_HLEN; @@ -2450,7 +2439,7 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) } else { /* Its a SNAP */ if (memcmp(psnap->org, org_bridge, 3) == 0) { /* EtherII and nuke the LLC */ - DEBUG(3, "ray_cs untranslate Bridge encap\n"); + pr_debug("ray_cs untranslate Bridge encap\n"); delta = RX_MAC_HEADER_LENGTH + sizeof(struct snaphdr_t) - ETH_HLEN; peth = (struct ethhdr *)(skb->data + delta); @@ -2459,14 +2448,14 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) switch (ntohs(type)) { case ETH_P_IPX: case ETH_P_AARP: - DEBUG(3, "ray_cs untranslate RFC IPX/AARP\n"); + pr_debug("ray_cs untranslate RFC IPX/AARP\n"); delta = RX_MAC_HEADER_LENGTH - ETH_HLEN; peth = (struct ethhdr *)(skb->data + delta); peth->h_proto = htons(len - RX_MAC_HEADER_LENGTH); break; default: - DEBUG(3, "ray_cs untranslate RFC default\n"); + pr_debug("ray_cs untranslate RFC default\n"); delta = RX_MAC_HEADER_LENGTH + sizeof(struct snaphdr_t) - ETH_HLEN; peth = (struct ethhdr *)(skb->data + delta); @@ -2482,12 +2471,12 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) } /* TBD reserve skb_reserve(skb, delta); */ skb_pull(skb, delta); - DEBUG(3, "untranslate after skb_pull(%d), skb->data = %p\n", delta, + pr_debug("untranslate after skb_pull(%d), skb->data = %p\n", delta, skb->data); memcpy(peth->h_dest, destaddr, ADDRLEN); memcpy(peth->h_source, srcaddr, ADDRLEN); -#ifdef PCMCIA_DEBUG - if (pc_debug > 3) { +#if 0 + { int i; printk(KERN_DEBUG "skb->data after untranslate:"); for (i = 0; i < 64; i++) @@ -2529,7 +2518,7 @@ static void release_frag_chain(ray_dev_t *local, struct rcs __iomem *prcs) while (tmp--) { writeb(CCS_BUFFER_FREE, &prcslink->buffer_status); if (rcsindex >= (NUMBER_OF_CCS + NUMBER_OF_RCS)) { - DEBUG(1, "ray_cs interrupt bad rcsindex = 0x%x\n", + pr_debug("ray_cs interrupt bad rcsindex = 0x%x\n", rcsindex); break; } @@ -2543,9 +2532,9 @@ static void release_frag_chain(ray_dev_t *local, struct rcs __iomem *prcs) static void authenticate(ray_dev_t *local) { struct pcmcia_device *link = local->finder; - DEBUG(0, "ray_cs Starting authentication.\n"); + dev_dbg(&link->dev, "ray_cs Starting authentication.\n"); if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs authenticate - device not present\n"); + dev_dbg(&link->dev, "ray_cs authenticate - device not present\n"); return; } @@ -2573,11 +2562,11 @@ static void rx_authenticate(ray_dev_t *local, struct rcs __iomem *prcs, copy_from_rx_buff(local, buff, pkt_addr, rx_len & 0xff); /* if we are trying to get authenticated */ if (local->sparm.b4.a_network_type == ADHOC) { - DEBUG(1, "ray_cs rx_auth var= %02x %02x %02x %02x %02x %02x\n", + pr_debug("ray_cs rx_auth var= %02x %02x %02x %02x %02x %02x\n", msg->var[0], msg->var[1], msg->var[2], msg->var[3], msg->var[4], msg->var[5]); if (msg->var[2] == 1) { - DEBUG(0, "ray_cs Sending authentication response.\n"); + pr_debug("ray_cs Sending authentication response.\n"); if (!build_auth_frame (local, msg->mac.addr_2, OPEN_AUTH_RESPONSE)) { local->authentication_state = NEED_TO_AUTH; @@ -2591,13 +2580,13 @@ static void rx_authenticate(ray_dev_t *local, struct rcs __iomem *prcs, /* Verify authentication sequence #2 and success */ if (msg->var[2] == 2) { if ((msg->var[3] | msg->var[4]) == 0) { - DEBUG(1, "Authentication successful\n"); + pr_debug("Authentication successful\n"); local->card_status = CARD_AUTH_COMPLETE; associate(local); local->authentication_state = AUTHENTICATED; } else { - DEBUG(0, "Authentication refused\n"); + pr_debug("Authentication refused\n"); local->card_status = CARD_AUTH_REFUSED; join_net((u_long) local); local->authentication_state = @@ -2617,22 +2606,22 @@ static void associate(ray_dev_t *local) struct net_device *dev = link->priv; int ccsindex; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs associate - device not present\n"); + dev_dbg(&link->dev, "ray_cs associate - device not present\n"); return; } /* If no tx buffers available, return */ if ((ccsindex = get_free_ccs(local)) < 0) { /* TBD should never be here but... what if we are? */ - DEBUG(1, "ray_cs associate - No free ccs\n"); + dev_dbg(&link->dev, "ray_cs associate - No free ccs\n"); return; } - DEBUG(1, "ray_cs Starting association with access point\n"); + dev_dbg(&link->dev, "ray_cs Starting association with access point\n"); pccs = ccs_base(local) + ccsindex; /* fill in the CCS */ writeb(CCS_START_ASSOCIATION, &pccs->cmd); /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(1, "ray_cs associate failed - ECF not ready for intr\n"); + dev_dbg(&link->dev, "ray_cs associate failed - ECF not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); del_timer(&local->timer); @@ -2655,7 +2644,7 @@ static void rx_deauthenticate(ray_dev_t *local, struct rcs __iomem *prcs, /* UCHAR buff[256]; struct rx_msg *msg = (struct rx_msg *)buff; */ - DEBUG(0, "Deauthentication frame received\n"); + pr_debug("Deauthentication frame received\n"); local->authentication_state = UNAUTHENTICATED; /* Need to reauthenticate or rejoin depending on reason code */ /* copy_from_rx_buff(local, buff, pkt_addr, rx_len & 0xff); @@ -2823,7 +2812,7 @@ static int build_auth_frame(ray_dev_t *local, UCHAR *dest, int auth_type) /* If no tx buffers available, return */ if ((ccsindex = get_free_tx_ccs(local)) < 0) { - DEBUG(1, "ray_cs send authenticate - No free tx ccs\n"); + pr_debug("ray_cs send authenticate - No free tx ccs\n"); return -1; } @@ -2855,7 +2844,7 @@ static int build_auth_frame(ray_dev_t *local, UCHAR *dest, int auth_type) /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(1, + pr_debug( "ray_cs send authentication request failed - ECF not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); return -1; @@ -2942,9 +2931,9 @@ static int __init init_ray_cs(void) { int rc; - DEBUG(1, "%s\n", rcsid); + pr_debug("%s\n", rcsid); rc = pcmcia_register_driver(&ray_driver); - DEBUG(1, "raylink init_module register_pcmcia_driver returns 0x%x\n", + pr_debug("raylink init_module register_pcmcia_driver returns 0x%x\n", rc); #ifdef CONFIG_PROC_FS @@ -2964,7 +2953,7 @@ static int __init init_ray_cs(void) static void __exit exit_ray_cs(void) { - DEBUG(0, "ray_cs: cleanup_module\n"); + pr_debug("ray_cs: cleanup_module\n"); #ifdef CONFIG_PROC_FS remove_proc_entry("driver/ray_cs/ray_cs", NULL); diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index 431a20ec6db..33918fd5b23 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -3656,10 +3656,7 @@ wv_pcmcia_reset(struct net_device * dev) i = pcmcia_access_configuration_register(link, ®); if (i != 0) - { - cs_error(link, AccessConfigurationRegister, i); return FALSE; - } #ifdef DEBUG_CONFIG_INFO printk(KERN_DEBUG "%s: wavelan_pcmcia_reset(): Config reg is 0x%x\n", @@ -3670,19 +3667,13 @@ wv_pcmcia_reset(struct net_device * dev) reg.Value = reg.Value | COR_SW_RESET; i = pcmcia_access_configuration_register(link, ®); if (i != 0) - { - cs_error(link, AccessConfigurationRegister, i); return FALSE; - } reg.Action = CS_WRITE; reg.Value = COR_LEVEL_IRQ | COR_CONFIG; i = pcmcia_access_configuration_register(link, ®); if (i != 0) - { - cs_error(link, AccessConfigurationRegister, i); return FALSE; - } #ifdef DEBUG_CONFIG_TRACE printk(KERN_DEBUG "%s: <-wv_pcmcia_reset()\n", dev->name); @@ -3857,10 +3848,7 @@ wv_pcmcia_config(struct pcmcia_device * link) { i = pcmcia_request_io(link, &link->io); if (i != 0) - { - cs_error(link, RequestIO, i); break; - } /* * Now allocate an interrupt line. Note that this does not @@ -3868,10 +3856,7 @@ wv_pcmcia_config(struct pcmcia_device * link) */ i = pcmcia_request_irq(link, &link->irq); if (i != 0) - { - cs_error(link, RequestIRQ, i); break; - } /* * This actually configures the PCMCIA socket -- setting up @@ -3880,10 +3865,7 @@ wv_pcmcia_config(struct pcmcia_device * link) link->conf.ConfigIndex = 1; i = pcmcia_request_configuration(link, &link->conf); if (i != 0) - { - cs_error(link, RequestConfiguration, i); break; - } /* * Allocate a small memory window. Note that the struct pcmcia_device @@ -3894,24 +3876,18 @@ wv_pcmcia_config(struct pcmcia_device * link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = req.Size = 0; req.AccessSpeed = mem_speed; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) - { - cs_error(link, RequestWindow, i); break; - } lp->mem = ioremap(req.Base, req.Size); dev->mem_start = (u_long)lp->mem; dev->mem_end = dev->mem_start + req.Size; mem.CardOffset = 0; mem.Page = 0; - i = pcmcia_map_mem_page(link->win, &mem); + i = pcmcia_map_mem_page(link, link->win, &mem); if (i != 0) - { - cs_error(link, MapMemPage, i); break; - } /* Feed device with this info... */ dev->irq = link->irq.AssignedIRQ; @@ -3923,7 +3899,7 @@ wv_pcmcia_config(struct pcmcia_device * link) lp->mem, dev->irq, (u_int) dev->base_addr); #endif - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = register_netdev(dev); if(i != 0) { @@ -4462,8 +4438,7 @@ wavelan_probe(struct pcmcia_device *p_dev) p_dev->io.IOAddrLines = 3; /* Interrupt setup */ - p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; + p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = wavelan_interrupt; /* General socket configuration */ @@ -4475,7 +4450,7 @@ wavelan_probe(struct pcmcia_device *p_dev) if (!dev) return -ENOMEM; - p_dev->priv = p_dev->irq.Instance = dev; + p_dev->priv = dev; lp = netdev_priv(dev); diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 4f1e0cfe609..5f0401a52cf 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -67,23 +67,7 @@ /* For rough constant delay */ #define WL3501_NOPLOOP(n) { int x = 0; while (x++ < n) slow_down_io(); } -/* - * All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If you do not - * define PCMCIA_DEBUG at all, all the debug code will be left out. If you - * compile with PCMCIA_DEBUG=0, the debug code will be present but disabled -- - * but it can then be enabled for specific modules at load time with a - * 'pc_debug=#' option to insmod. - */ -#define PCMCIA_DEBUG 0 -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define dprintk(n, format, args...) \ - { if (pc_debug > (n)) \ - printk(KERN_INFO "%s: " format "\n", __func__ , ##args); } -#else -#define dprintk(n, format, args...) -#endif + #define wl3501_outb(a, b) { outb(a, b); slow_down_io(); } #define wl3501_outb_p(a, b) { outb_p(a, b); slow_down_io(); } @@ -684,10 +668,10 @@ static void wl3501_mgmt_scan_confirm(struct wl3501_card *this, u16 addr) int matchflag = 0; struct wl3501_scan_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); if (sig.status == WL3501_STATUS_SUCCESS) { - dprintk(3, "success"); + pr_debug("success"); if ((this->net_type == IW_MODE_INFRA && (sig.cap_info & WL3501_MGMT_CAPABILITY_ESS)) || (this->net_type == IW_MODE_ADHOC && @@ -722,7 +706,7 @@ static void wl3501_mgmt_scan_confirm(struct wl3501_card *this, u16 addr) } } } else if (sig.status == WL3501_STATUS_TIMEOUT) { - dprintk(3, "timeout"); + pr_debug("timeout"); this->join_sta_bss = 0; for (i = this->join_sta_bss; i < this->bss_cnt; i++) if (!wl3501_mgmt_join(this, i)) @@ -879,7 +863,7 @@ static int wl3501_mgmt_auth(struct wl3501_card *this) .timeout = 1000, }; - dprintk(3, "entry"); + pr_debug("entry"); memcpy(sig.mac_addr, this->bssid, ETH_ALEN); return wl3501_esbq_exec(this, &sig, sizeof(sig)); } @@ -893,7 +877,7 @@ static int wl3501_mgmt_association(struct wl3501_card *this) .cap_info = this->cap_info, }; - dprintk(3, "entry"); + pr_debug("entry"); memcpy(sig.mac_addr, this->bssid, ETH_ALEN); return wl3501_esbq_exec(this, &sig, sizeof(sig)); } @@ -903,7 +887,7 @@ static void wl3501_mgmt_join_confirm(struct net_device *dev, u16 addr) struct wl3501_card *this = netdev_priv(dev); struct wl3501_join_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); if (sig.status == WL3501_STATUS_SUCCESS) { if (this->net_type == IW_MODE_INFRA) { @@ -962,7 +946,7 @@ static inline void wl3501_md_confirm_interrupt(struct net_device *dev, { struct wl3501_md_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); wl3501_free_tx_buffer(this, sig.data); if (netif_queue_stopped(dev)) @@ -1017,7 +1001,7 @@ static inline void wl3501_md_ind_interrupt(struct net_device *dev, static inline void wl3501_get_confirm_interrupt(struct wl3501_card *this, u16 addr, void *sig, int size) { - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &this->sig_get_confirm, sizeof(this->sig_get_confirm)); wake_up(&this->wait); @@ -1029,7 +1013,7 @@ static inline void wl3501_start_confirm_interrupt(struct net_device *dev, { struct wl3501_start_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); if (sig.status == WL3501_STATUS_SUCCESS) netif_wake_queue(dev); @@ -1041,7 +1025,7 @@ static inline void wl3501_assoc_confirm_interrupt(struct net_device *dev, struct wl3501_card *this = netdev_priv(dev); struct wl3501_assoc_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); if (sig.status == WL3501_STATUS_SUCCESS) @@ -1053,7 +1037,7 @@ static inline void wl3501_auth_confirm_interrupt(struct wl3501_card *this, { struct wl3501_auth_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); if (sig.status == WL3501_STATUS_SUCCESS) @@ -1069,7 +1053,7 @@ static inline void wl3501_rx_interrupt(struct net_device *dev) u8 sig_id; struct wl3501_card *this = netdev_priv(dev); - dprintk(3, "entry"); + pr_debug("entry"); loop: morepkts = 0; if (!wl3501_esbq_confirm(this)) @@ -1302,7 +1286,7 @@ static int wl3501_reset(struct net_device *dev) wl3501_ack_interrupt(this); wl3501_unblock_interrupt(this); wl3501_mgmt_scan(this, 100); - dprintk(1, "%s: device reset", dev->name); + pr_debug("%s: device reset", dev->name); rc = 0; out: return rc; @@ -1376,7 +1360,7 @@ static int wl3501_open(struct net_device *dev) link->open++; /* Initial WL3501 firmware */ - dprintk(1, "%s: Initialize WL3501 firmware...", dev->name); + pr_debug("%s: Initialize WL3501 firmware...", dev->name); if (wl3501_init_firmware(this)) goto fail; /* Initial device variables */ @@ -1388,7 +1372,7 @@ static int wl3501_open(struct net_device *dev) wl3501_unblock_interrupt(this); wl3501_mgmt_scan(this, 100); rc = 0; - dprintk(1, "%s: WL3501 opened", dev->name); + pr_debug("%s: WL3501 opened", dev->name); printk(KERN_INFO "%s: Card Name: %s\n" "%s: Firmware Date: %s\n", dev->name, this->card_name, @@ -1914,8 +1898,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev) p_dev->io.IOAddrLines = 5; /* Interrupt setup */ - p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; + p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = wl3501_interrupt; /* General socket configuration */ @@ -1938,16 +1921,13 @@ static int wl3501_probe(struct pcmcia_device *p_dev) dev->wireless_handlers = &wl3501_handler_def; SET_ETHTOOL_OPS(dev, &ops); netif_stop_queue(dev); - p_dev->priv = p_dev->irq.Instance = dev; + p_dev->priv = dev; return wl3501_config(p_dev); out_link: return -ENOMEM; } -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - /** * wl3501_config - configure the PCMCIA socket and make eth device available * @link - FILL_IN @@ -1959,7 +1939,7 @@ do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) static int wl3501_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; - int i = 0, j, last_fn, last_ret; + int i = 0, j, ret; struct wl3501_card *this; /* Try allocating IO ports. This tries a few fixed addresses. If you @@ -1975,24 +1955,26 @@ static int wl3501_config(struct pcmcia_device *link) if (i == 0) break; } - if (i != 0) { - cs_error(link, RequestIO, i); + if (i != 0) goto failed; - } /* Now allocate an interrupt line. Note that this does not actually * assign a handler to the interrupt. */ - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; /* This actually configures the PCMCIA socket -- setting up the I/O * windows and the interrupt mapping. */ - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev)) { printk(KERN_NOTICE "wl3501_cs: register_netdev() failed\n"); goto failed; @@ -2041,8 +2023,6 @@ static int wl3501_config(struct pcmcia_device *link) netif_start_queue(dev); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: wl3501_release(link); return -ENODEV; diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c index 8fdfa4f537a..7dd370fa343 100644 --- a/drivers/parport/parport_cs.c +++ b/drivers/parport/parport_cs.c @@ -67,14 +67,6 @@ MODULE_LICENSE("Dual MPL/GPL"); INT_MODULE_PARM(epp_mode, 1); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"parport_cs.c 1.29 2002/10/11 06:57:41 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -103,7 +95,7 @@ static int parport_probe(struct pcmcia_device *link) { parport_info_t *info; - DEBUG(0, "parport_attach()\n"); + dev_dbg(&link->dev, "parport_attach()\n"); /* Create new parport device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -114,7 +106,6 @@ static int parport_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -132,7 +123,7 @@ static int parport_probe(struct pcmcia_device *link) static void parport_detach(struct pcmcia_device *link) { - DEBUG(0, "parport_detach(0x%p)\n", link); + dev_dbg(&link->dev, "parport_detach\n"); parport_cs_release(link); @@ -147,9 +138,6 @@ static void parport_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int parport_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -178,18 +166,20 @@ static int parport_config(struct pcmcia_device *link) { parport_info_t *info = link->priv; struct parport *p; - int last_ret, last_fn; + int ret; - DEBUG(0, "parport_config(0x%p)\n", link); + dev_dbg(&link->dev, "parport_config\n"); - last_ret = pcmcia_loop_config(link, parport_config_check, NULL); - if (last_ret) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, parport_config_check, NULL); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; p = parport_pc_probe_port(link->io.BasePort1, link->io.BasePort2, link->irq.AssignedIRQ, PARPORT_DMA_NONE, @@ -213,8 +203,6 @@ static int parport_config(struct pcmcia_device *link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: parport_cs_release(link); return -ENODEV; @@ -232,7 +220,7 @@ static void parport_cs_release(struct pcmcia_device *link) { parport_info_t *info = link->priv; - DEBUG(0, "parport_release(0x%p)\n", link); + dev_dbg(&link->dev, "parport_release\n"); if (info->ndev) { struct parport *p = info->port; diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index b952ebc7a78..416f6ac65b7 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -645,10 +645,13 @@ void __init detect_intel_iommu(void) "x2apic and Intr-remapping.\n"); #endif #ifdef CONFIG_DMAR - if (ret && !no_iommu && !iommu_detected && !swiotlb && - !dmar_disabled) + if (ret && !no_iommu && !iommu_detected && !dmar_disabled) iommu_detected = 1; #endif +#ifdef CONFIG_X86 + if (ret) + x86_init.iommu.iommu_init = intel_iommu_init; +#endif } early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); dmar_tbl = NULL; diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 1840a0578a4..9261327b49f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3266,7 +3266,7 @@ int __init intel_iommu_init(void) * Check the need for DMA-remapping initialization now. * Above initialization will also be used by Interrupt-remapping. */ - if (no_iommu || swiotlb || dmar_disabled) + if (no_iommu || dmar_disabled) return -ENODEV; iommu_init_mempool(); @@ -3287,7 +3287,9 @@ int __init intel_iommu_init(void) "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); init_timer(&unmap_timer); - force_iommu = 1; +#ifdef CONFIG_SWIOTLB + swiotlb = 0; +#endif dma_ops = &intel_dma_ops; init_iommu_sysfs(); diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig index 17f38a781d4..cd5082d3ca1 100644 --- a/drivers/pcmcia/Kconfig +++ b/drivers/pcmcia/Kconfig @@ -17,24 +17,6 @@ menuconfig PCCARD if PCCARD -config PCMCIA_DEBUG - bool "Enable PCCARD debugging" - help - Say Y here to enable PCMCIA subsystem debugging. You - will need to choose the debugging level either via the - kernel command line, or module options depending whether - you build the PCMCIA as modules. - - The kernel command line options are: - pcmcia_core.pc_debug=N - pcmcia.pc_debug=N - sa11xx_core.pc_debug=N - - The module option is called pc_debug=N - - In all the above examples, N is the debugging verbosity - level. - config PCMCIA tristate "16-bit PCMCIA support" select CRC32 @@ -196,9 +178,13 @@ config PCMCIA_BCM63XX tristate "bcm63xx pcmcia support" depends on BCM63XX && PCMCIA +config PCMCIA_SOC_COMMON + tristate + config PCMCIA_SA1100 tristate "SA1100 support" depends on ARM && ARCH_SA1100 && PCMCIA + select PCMCIA_SOC_COMMON help Say Y here to include support for SA11x0-based PCMCIA or CF sockets, found on HP iPAQs, Yopy, and other StrongARM(R)/ @@ -209,6 +195,7 @@ config PCMCIA_SA1100 config PCMCIA_SA1111 tristate "SA1111 support" depends on ARM && ARCH_SA1100 && SA1111 && PCMCIA + select PCMCIA_SOC_COMMON help Say Y here to include support for SA1111-based PCMCIA or CF sockets, found on the Jornada 720, Graphicsmaster and other @@ -222,9 +209,28 @@ config PCMCIA_PXA2XX depends on (ARCH_LUBBOCK || MACH_MAINSTONE || PXA_SHARPSL \ || MACH_ARMCORE || ARCH_PXA_PALM || TRIZEPS_PCMCIA \ || ARCH_VIPER || ARCH_PXA_ESERIES || MACH_STARGATE2) + select PCMCIA_SOC_COMMON help Say Y here to include support for the PXA2xx PCMCIA controller +config PCMCIA_DEBUG + bool "Enable debugging" + depends on (PCMCIA_SA1111 || PCMCIA_SA1100 || PCMCIA_PXA2XX) + help + Say Y here to enable debugging for the SoC PCMCIA layer. + You will need to choose the debugging level either via the + kernel command line, or module options depending whether + you build the drivers as modules. + + The kernel command line options are: + sa11xx_core.pc_debug=N + pxa2xx_core.pc_debug=N + + The module option is called pc_debug=N + + In all the above examples, N is the debugging verbosity + level. + config PCMCIA_PROBE bool default y if ISA && !ARCH_SA1100 && !ARCH_CLPS711X && !PARISC diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile index a03a38acd77..38293831399 100644 --- a/drivers/pcmcia/Makefile +++ b/drivers/pcmcia/Makefile @@ -22,8 +22,9 @@ obj-$(CONFIG_I82365) += i82365.o obj-$(CONFIG_I82092) += i82092.o obj-$(CONFIG_TCIC) += tcic.o obj-$(CONFIG_PCMCIA_M8XX) += m8xx_pcmcia.o -obj-$(CONFIG_PCMCIA_SA1100) += sa11xx_core.o sa1100_cs.o -obj-$(CONFIG_PCMCIA_SA1111) += sa11xx_core.o sa1111_cs.o +obj-$(CONFIG_PCMCIA_SOC_COMMON) += soc_common.o +obj-$(CONFIG_PCMCIA_SA1100) += sa11xx_base.o sa1100_cs.o +obj-$(CONFIG_PCMCIA_SA1111) += sa11xx_base.o sa1111_cs.o obj-$(CONFIG_M32R_PCC) += m32r_pcc.o obj-$(CONFIG_M32R_CFC) += m32r_cfc.o obj-$(CONFIG_PCMCIA_AU1X00) += au1x00_ss.o @@ -35,9 +36,6 @@ obj-$(CONFIG_BFIN_CFPCMCIA) += bfin_cf_pcmcia.o obj-$(CONFIG_AT91_CF) += at91_cf.o obj-$(CONFIG_ELECTRA_CF) += electra_cf.o -sa11xx_core-y += soc_common.o sa11xx_base.o -pxa2xx_core-y += soc_common.o pxa2xx_base.o - au1x00_ss-y += au1000_generic.o au1x00_ss-$(CONFIG_MIPS_PB1000) += au1000_pb1x00.o au1x00_ss-$(CONFIG_MIPS_PB1100) += au1000_pb1x00.o @@ -77,4 +75,4 @@ pxa2xx-obj-$(CONFIG_MACH_PALMLD) += pxa2xx_palmld.o pxa2xx-obj-$(CONFIG_MACH_E740) += pxa2xx_e740.o pxa2xx-obj-$(CONFIG_MACH_STARGATE2) += pxa2xx_stargate2.o -obj-$(CONFIG_PCMCIA_PXA2XX) += pxa2xx_core.o $(pxa2xx-obj-y) +obj-$(CONFIG_PCMCIA_PXA2XX) += pxa2xx_base.o $(pxa2xx-obj-y) diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c index db77e1f3309..4cd70d05681 100644 --- a/drivers/pcmcia/cardbus.c +++ b/drivers/pcmcia/cardbus.c @@ -91,7 +91,7 @@ static u_int xlate_rom_addr(void __iomem *b, u_int addr) static void cb_release_cis_mem(struct pcmcia_socket * s) { if (s->cb_cis_virt) { - cs_dbg(s, 1, "cb_release_cis_mem()\n"); + dev_dbg(&s->dev, "cb_release_cis_mem()\n"); iounmap(s->cb_cis_virt); s->cb_cis_virt = NULL; s->cb_cis_res = NULL; @@ -132,7 +132,7 @@ int read_cb_mem(struct pcmcia_socket * s, int space, u_int addr, u_int len, void struct pci_dev *dev; struct resource *res; - cs_dbg(s, 3, "read_cb_mem(%d, %#x, %u)\n", space, addr, len); + dev_dbg(&s->dev, "read_cb_mem(%d, %#x, %u)\n", space, addr, len); dev = pci_get_slot(s->cb_dev->subordinate, 0); if (!dev) diff --git a/drivers/pcmcia/cirrus.h b/drivers/pcmcia/cirrus.h index ecd4fc7f666..446a4576e73 100644 --- a/drivers/pcmcia/cirrus.h +++ b/drivers/pcmcia/cirrus.h @@ -30,16 +30,6 @@ #ifndef _LINUX_CIRRUS_H #define _LINUX_CIRRUS_H -#ifndef PCI_VENDOR_ID_CIRRUS -#define PCI_VENDOR_ID_CIRRUS 0x1013 -#endif -#ifndef PCI_DEVICE_ID_CIRRUS_6729 -#define PCI_DEVICE_ID_CIRRUS_6729 0x1100 -#endif -#ifndef PCI_DEVICE_ID_CIRRUS_6832 -#define PCI_DEVICE_ID_CIRRUS_6832 0x1110 -#endif - #define PD67_MISC_CTL_1 0x16 /* Misc control 1 */ #define PD67_FIFO_CTL 0x17 /* FIFO control */ #define PD67_MISC_CTL_2 0x1E /* Misc control 2 */ diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index 6c4a4fc8363..8c1b73cf021 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -138,7 +138,7 @@ int pcmcia_read_cis_mem(struct pcmcia_socket *s, int attr, u_int addr, void __iomem *sys, *end; unsigned char *buf = ptr; - cs_dbg(s, 3, "pcmcia_read_cis_mem(%d, %#x, %u)\n", attr, addr, len); + dev_dbg(&s->dev, "pcmcia_read_cis_mem(%d, %#x, %u)\n", attr, addr, len); if (attr & IS_INDIRECT) { /* Indirect accesses use a bunch of special registers at fixed @@ -190,7 +190,7 @@ int pcmcia_read_cis_mem(struct pcmcia_socket *s, int attr, u_int addr, addr = 0; } } - cs_dbg(s, 3, " %#2.2x %#2.2x %#2.2x %#2.2x ...\n", + dev_dbg(&s->dev, " %#2.2x %#2.2x %#2.2x %#2.2x ...\n", *(u_char *)(ptr+0), *(u_char *)(ptr+1), *(u_char *)(ptr+2), *(u_char *)(ptr+3)); return 0; @@ -204,7 +204,7 @@ void pcmcia_write_cis_mem(struct pcmcia_socket *s, int attr, u_int addr, void __iomem *sys, *end; unsigned char *buf = ptr; - cs_dbg(s, 3, "pcmcia_write_cis_mem(%d, %#x, %u)\n", attr, addr, len); + dev_dbg(&s->dev, "pcmcia_write_cis_mem(%d, %#x, %u)\n", attr, addr, len); if (attr & IS_INDIRECT) { /* Indirect accesses use a bunch of special registers at fixed @@ -584,7 +584,7 @@ int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, tuple_ ofs += link[1] + 2; } if (i == MAX_TUPLES) { - cs_dbg(s, 1, "cs: overrun in pcmcia_get_next_tuple\n"); + dev_dbg(&s->dev, "cs: overrun in pcmcia_get_next_tuple\n"); return -ENOSPC; } @@ -1440,7 +1440,7 @@ int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse) break; } if (ret) - __cs_dbg(0, "parse_tuple failed %d\n", ret); + pr_debug("parse_tuple failed %d\n", ret); return ret; } EXPORT_SYMBOL(pcmcia_parse_tuple); @@ -1482,6 +1482,67 @@ done: } EXPORT_SYMBOL(pccard_read_tuple); + +/** + * pccard_loop_tuple() - loop over tuples in the CIS + * @s: the struct pcmcia_socket where the card is inserted + * @function: the device function we loop for + * @code: which CIS code shall we look for? + * @parse: buffer where the tuple shall be parsed (or NULL, if no parse) + * @priv_data: private data to be passed to the loop_tuple function. + * @loop_tuple: function to call for each CIS entry of type @function. IT + * gets passed the raw tuple, the paresed tuple (if @parse is + * set) and @priv_data. + * + * pccard_loop_tuple() loops over all CIS entries of type @function, and + * calls the @loop_tuple function for each entry. If the call to @loop_tuple + * returns 0, the loop exits. Returns 0 on success or errorcode otherwise. + */ +int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function, + cisdata_t code, cisparse_t *parse, void *priv_data, + int (*loop_tuple) (tuple_t *tuple, + cisparse_t *parse, + void *priv_data)) +{ + tuple_t tuple; + cisdata_t *buf; + int ret; + + buf = kzalloc(256, GFP_KERNEL); + if (buf == NULL) { + dev_printk(KERN_WARNING, &s->dev, "no memory to read tuple\n"); + return -ENOMEM; + } + + tuple.TupleData = buf; + tuple.TupleDataMax = 255; + tuple.TupleOffset = 0; + tuple.DesiredTuple = code; + tuple.Attributes = 0; + + ret = pccard_get_first_tuple(s, function, &tuple); + while (!ret) { + if (pccard_get_tuple_data(s, &tuple)) + goto next_entry; + + if (parse) + if (pcmcia_parse_tuple(&tuple, parse)) + goto next_entry; + + ret = loop_tuple(&tuple, parse, priv_data); + if (!ret) + break; + +next_entry: + ret = pccard_get_next_tuple(s, function, &tuple); + } + + kfree(buf); + return ret; +} +EXPORT_SYMBOL(pccard_loop_tuple); + + /*====================================================================== This tries to determine if a card has a sensible CIS. It returns diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index 698d75cda08..790af87a922 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -61,17 +61,6 @@ INT_MODULE_PARM(unreset_limit, 30); /* unreset_check's */ /* Access speed for attribute memory windows */ INT_MODULE_PARM(cis_speed, 300); /* ns */ -#ifdef CONFIG_PCMCIA_DEBUG -static int pc_debug; - -module_param(pc_debug, int, 0644); - -int cs_debug_level(int level) -{ - return pc_debug > level; -} -#endif - socket_state_t dead_socket = { .csc_mask = SS_DETECT, @@ -190,7 +179,7 @@ int pcmcia_register_socket(struct pcmcia_socket *socket) if (!socket || !socket->ops || !socket->dev.parent || !socket->resource_ops) return -EINVAL; - cs_dbg(socket, 0, "pcmcia_register_socket(0x%p)\n", socket->ops); + dev_dbg(&socket->dev, "pcmcia_register_socket(0x%p)\n", socket->ops); spin_lock_init(&socket->lock); @@ -262,6 +251,13 @@ int pcmcia_register_socket(struct pcmcia_socket *socket) pcmcia_parse_events(socket, SS_DETECT); + /* + * Let's try to get the PCMCIA module for 16-bit PCMCIA support. + * If it fails, it doesn't matter -- we still have 32-bit CardBus + * support to offer, so this is not a failure mode. + */ + request_module_nowait("pcmcia"); + return 0; err: @@ -282,7 +278,7 @@ void pcmcia_unregister_socket(struct pcmcia_socket *socket) if (!socket) return; - cs_dbg(socket, 0, "pcmcia_unregister_socket(0x%p)\n", socket->ops); + dev_dbg(&socket->dev, "pcmcia_unregister_socket(0x%p)\n", socket->ops); if (socket->thread) kthread_stop(socket->thread); @@ -335,7 +331,7 @@ static int send_event(struct pcmcia_socket *s, event_t event, int priority) if (s->state & SOCKET_CARDBUS) return 0; - cs_dbg(s, 1, "send_event(event %d, pri %d, callback 0x%p)\n", + dev_dbg(&s->dev, "send_event(event %d, pri %d, callback 0x%p)\n", event, priority, s->callback); if (!s->callback) @@ -352,7 +348,7 @@ static int send_event(struct pcmcia_socket *s, event_t event, int priority) static void socket_remove_drivers(struct pcmcia_socket *skt) { - cs_dbg(skt, 4, "remove_drivers\n"); + dev_dbg(&skt->dev, "remove_drivers\n"); send_event(skt, CS_EVENT_CARD_REMOVAL, CS_EVENT_PRI_HIGH); } @@ -361,7 +357,7 @@ static int socket_reset(struct pcmcia_socket *skt) { int status, i; - cs_dbg(skt, 4, "reset\n"); + dev_dbg(&skt->dev, "reset\n"); skt->socket.flags |= SS_OUTPUT_ENA | SS_RESET; skt->ops->set_socket(skt, &skt->socket); @@ -383,7 +379,7 @@ static int socket_reset(struct pcmcia_socket *skt) msleep(unreset_check * 10); } - cs_err(skt, "time out after reset.\n"); + dev_printk(KERN_ERR, &skt->dev, "time out after reset.\n"); return -ETIMEDOUT; } @@ -397,7 +393,7 @@ static void socket_shutdown(struct pcmcia_socket *s) { int status; - cs_dbg(s, 4, "shutdown\n"); + dev_dbg(&s->dev, "shutdown\n"); socket_remove_drivers(s); s->state &= SOCKET_INUSE | SOCKET_PRESENT; @@ -432,7 +428,7 @@ static int socket_setup(struct pcmcia_socket *skt, int initial_delay) { int status, i; - cs_dbg(skt, 4, "setup\n"); + dev_dbg(&skt->dev, "setup\n"); skt->ops->get_status(skt, &status); if (!(status & SS_DETECT)) @@ -452,13 +448,15 @@ static int socket_setup(struct pcmcia_socket *skt, int initial_delay) } if (status & SS_PENDING) { - cs_err(skt, "voltage interrogation timed out.\n"); + dev_printk(KERN_ERR, &skt->dev, + "voltage interrogation timed out.\n"); return -ETIMEDOUT; } if (status & SS_CARDBUS) { if (!(skt->features & SS_CAP_CARDBUS)) { - cs_err(skt, "cardbus cards are not supported.\n"); + dev_printk(KERN_ERR, &skt->dev, + "cardbus cards are not supported.\n"); return -EINVAL; } skt->state |= SOCKET_CARDBUS; @@ -472,7 +470,7 @@ static int socket_setup(struct pcmcia_socket *skt, int initial_delay) else if (!(status & SS_XVCARD)) skt->socket.Vcc = skt->socket.Vpp = 50; else { - cs_err(skt, "unsupported voltage key.\n"); + dev_printk(KERN_ERR, &skt->dev, "unsupported voltage key.\n"); return -EIO; } @@ -489,7 +487,7 @@ static int socket_setup(struct pcmcia_socket *skt, int initial_delay) skt->ops->get_status(skt, &status); if (!(status & SS_POWERON)) { - cs_err(skt, "unable to apply power.\n"); + dev_printk(KERN_ERR, &skt->dev, "unable to apply power.\n"); return -EIO; } @@ -509,7 +507,7 @@ static int socket_insert(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 4, "insert\n"); + dev_dbg(&skt->dev, "insert\n"); if (!cs_socket_get(skt)) return -ENODEV; @@ -529,7 +527,7 @@ static int socket_insert(struct pcmcia_socket *skt) skt->state |= SOCKET_CARDBUS_CONFIG; } #endif - cs_dbg(skt, 4, "insert done\n"); + dev_dbg(&skt->dev, "insert done\n"); send_event(skt, CS_EVENT_CARD_INSERTION, CS_EVENT_PRI_LOW); } else { @@ -576,7 +574,7 @@ static int socket_late_resume(struct pcmcia_socket *skt) * FIXME: need a better check here for cardbus cards. */ if (verify_cis_cache(skt) != 0) { - cs_dbg(skt, 4, "cis mismatch - different card\n"); + dev_dbg(&skt->dev, "cis mismatch - different card\n"); socket_remove_drivers(skt); destroy_cis_cache(skt); /* @@ -587,7 +585,7 @@ static int socket_late_resume(struct pcmcia_socket *skt) msleep(200); send_event(skt, CS_EVENT_CARD_INSERTION, CS_EVENT_PRI_LOW); } else { - cs_dbg(skt, 4, "cis matches cache\n"); + dev_dbg(&skt->dev, "cis matches cache\n"); send_event(skt, CS_EVENT_PM_RESUME, CS_EVENT_PRI_LOW); } } else { @@ -723,7 +721,7 @@ static int pccardd(void *__skt) void pcmcia_parse_events(struct pcmcia_socket *s, u_int events) { unsigned long flags; - cs_dbg(s, 4, "parse_events: events %08x\n", events); + dev_dbg(&s->dev, "parse_events: events %08x\n", events); if (s->thread) { spin_lock_irqsave(&s->thread_lock, flags); s->thread_events |= events; @@ -773,19 +771,22 @@ int pcmcia_reset_card(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 1, "resetting socket\n"); + dev_dbg(&skt->dev, "resetting socket\n"); mutex_lock(&skt->skt_mutex); do { if (!(skt->state & SOCKET_PRESENT)) { + dev_dbg(&skt->dev, "can't reset, not present\n"); ret = -ENODEV; break; } if (skt->state & SOCKET_SUSPEND) { + dev_dbg(&skt->dev, "can't reset, suspended\n"); ret = -EBUSY; break; } if (skt->state & SOCKET_CARDBUS) { + dev_dbg(&skt->dev, "can't reset, is cardbus\n"); ret = -EPERM; break; } @@ -818,7 +819,7 @@ int pcmcia_suspend_card(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 1, "suspending socket\n"); + dev_dbg(&skt->dev, "suspending socket\n"); mutex_lock(&skt->skt_mutex); do { @@ -848,7 +849,7 @@ int pcmcia_resume_card(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 1, "waking up socket\n"); + dev_dbg(&skt->dev, "waking up socket\n"); mutex_lock(&skt->skt_mutex); do { @@ -876,7 +877,7 @@ int pcmcia_eject_card(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 1, "user eject request\n"); + dev_dbg(&skt->dev, "user eject request\n"); mutex_lock(&skt->skt_mutex); do { @@ -905,7 +906,7 @@ int pcmcia_insert_card(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 1, "user insert request\n"); + dev_dbg(&skt->dev, "user insert request\n"); mutex_lock(&skt->skt_mutex); do { diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 1f4098f1354..3bc02d53a3a 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -107,28 +107,6 @@ static inline void cs_socket_put(struct pcmcia_socket *skt) } } -#ifdef CONFIG_PCMCIA_DEBUG -extern int cs_debug_level(int); - -#define cs_dbg(skt, lvl, fmt, arg...) do { \ - if (cs_debug_level(lvl)) \ - dev_printk(KERN_DEBUG, &skt->dev, \ - "cs: " fmt, ## arg); \ -} while (0) -#define __cs_dbg(lvl, fmt, arg...) do { \ - if (cs_debug_level(lvl)) \ - printk(KERN_DEBUG \ - "cs: " fmt, ## arg); \ -} while (0) - -#else -#define cs_dbg(skt, lvl, fmt, arg...) do { } while (0) -#define __cs_dbg(lvl, fmt, arg...) do { } while (0) -#endif - -#define cs_err(skt, fmt, arg...) \ - dev_printk(KERN_ERR, &skt->dev, "cs: " fmt, ## arg) - /* * Stuff internal to module "pcmcia_core": @@ -170,10 +148,6 @@ extern struct rw_semaphore pcmcia_socket_list_rwsem; extern struct list_head pcmcia_socket_list; extern struct class pcmcia_socket_class; -int pcmcia_get_window(struct pcmcia_socket *s, - window_handle_t *handle, - int idx, - win_req_t *req); int pccard_register_pcmcia(struct pcmcia_socket *s, struct pcmcia_callback *c); struct pcmcia_socket *pcmcia_get_socket_by_nr(unsigned int nr); @@ -199,6 +173,22 @@ int pcmcia_replace_cis(struct pcmcia_socket *s, const u8 *data, const size_t len); int pccard_validate_cis(struct pcmcia_socket *s, unsigned int *count); +/* loop over CIS entries */ +int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function, + cisdata_t code, cisparse_t *parse, void *priv_data, + int (*loop_tuple) (tuple_t *tuple, + cisparse_t *parse, + void *priv_data)); + +int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); + +int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); + +int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); + + /* rsrc_mgr.c */ int pcmcia_validate_mem(struct pcmcia_socket *s); struct resource *pcmcia_find_io_region(unsigned long base, diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index f5b7079f13d..05893d41dd4 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -41,129 +41,11 @@ MODULE_AUTHOR("David Hinds <dahinds@users.sourceforge.net>"); MODULE_DESCRIPTION("PCMCIA Driver Services"); MODULE_LICENSE("GPL"); -#ifdef CONFIG_PCMCIA_DEBUG -int ds_pc_debug; - -module_param_named(pc_debug, ds_pc_debug, int, 0644); - -#define ds_dbg(lvl, fmt, arg...) do { \ - if (ds_pc_debug > (lvl)) \ - printk(KERN_DEBUG "ds: " fmt , ## arg); \ -} while (0) -#define ds_dev_dbg(lvl, dev, fmt, arg...) do { \ - if (ds_pc_debug > (lvl)) \ - dev_printk(KERN_DEBUG, dev, "ds: " fmt , ## arg); \ -} while (0) -#else -#define ds_dbg(lvl, fmt, arg...) do { } while (0) -#define ds_dev_dbg(lvl, dev, fmt, arg...) do { } while (0) -#endif spinlock_t pcmcia_dev_list_lock; /*====================================================================*/ -/* code which was in cs.c before */ - -/* String tables for error messages */ - -typedef struct lookup_t { - const int key; - const char *msg; -} lookup_t; - -static const lookup_t error_table[] = { - { 0, "Operation succeeded" }, - { -EIO, "Input/Output error" }, - { -ENODEV, "No card present" }, - { -EINVAL, "Bad parameter" }, - { -EACCES, "Configuration locked" }, - { -EBUSY, "Resource in use" }, - { -ENOSPC, "No more items" }, - { -ENOMEM, "Out of resource" }, -}; - - -static const lookup_t service_table[] = { - { AccessConfigurationRegister, "AccessConfigurationRegister" }, - { AddSocketServices, "AddSocketServices" }, - { AdjustResourceInfo, "AdjustResourceInfo" }, - { CheckEraseQueue, "CheckEraseQueue" }, - { CloseMemory, "CloseMemory" }, - { DeregisterClient, "DeregisterClient" }, - { DeregisterEraseQueue, "DeregisterEraseQueue" }, - { GetCardServicesInfo, "GetCardServicesInfo" }, - { GetClientInfo, "GetClientInfo" }, - { GetConfigurationInfo, "GetConfigurationInfo" }, - { GetEventMask, "GetEventMask" }, - { GetFirstClient, "GetFirstClient" }, - { GetFirstRegion, "GetFirstRegion" }, - { GetFirstTuple, "GetFirstTuple" }, - { GetNextClient, "GetNextClient" }, - { GetNextRegion, "GetNextRegion" }, - { GetNextTuple, "GetNextTuple" }, - { GetStatus, "GetStatus" }, - { GetTupleData, "GetTupleData" }, - { MapMemPage, "MapMemPage" }, - { ModifyConfiguration, "ModifyConfiguration" }, - { ModifyWindow, "ModifyWindow" }, - { OpenMemory, "OpenMemory" }, - { ParseTuple, "ParseTuple" }, - { ReadMemory, "ReadMemory" }, - { RegisterClient, "RegisterClient" }, - { RegisterEraseQueue, "RegisterEraseQueue" }, - { RegisterMTD, "RegisterMTD" }, - { ReleaseConfiguration, "ReleaseConfiguration" }, - { ReleaseIO, "ReleaseIO" }, - { ReleaseIRQ, "ReleaseIRQ" }, - { ReleaseWindow, "ReleaseWindow" }, - { RequestConfiguration, "RequestConfiguration" }, - { RequestIO, "RequestIO" }, - { RequestIRQ, "RequestIRQ" }, - { RequestSocketMask, "RequestSocketMask" }, - { RequestWindow, "RequestWindow" }, - { ResetCard, "ResetCard" }, - { SetEventMask, "SetEventMask" }, - { ValidateCIS, "ValidateCIS" }, - { WriteMemory, "WriteMemory" }, - { BindDevice, "BindDevice" }, - { BindMTD, "BindMTD" }, - { ReportError, "ReportError" }, - { SuspendCard, "SuspendCard" }, - { ResumeCard, "ResumeCard" }, - { EjectCard, "EjectCard" }, - { InsertCard, "InsertCard" }, - { ReplaceCIS, "ReplaceCIS" } -}; - -const char *pcmcia_error_func(int func) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(service_table); i++) - if (service_table[i].key == func) - return service_table[i].msg; - - return "Unknown service number"; -} -EXPORT_SYMBOL(pcmcia_error_func); - -const char *pcmcia_error_ret(int ret) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(error_table); i++) - if (error_table[i].key == ret) - return error_table[i].msg; - - return "unknown"; -} -EXPORT_SYMBOL(pcmcia_error_ret); - -/*======================================================================*/ - - - static void pcmcia_check_driver(struct pcmcia_driver *p_drv) { struct pcmcia_device_id *did = p_drv->id_table; @@ -303,7 +185,7 @@ int pcmcia_register_driver(struct pcmcia_driver *driver) spin_lock_init(&driver->dynids.lock); INIT_LIST_HEAD(&driver->dynids.list); - ds_dbg(3, "registering driver %s\n", driver->drv.name); + pr_debug("registering driver %s\n", driver->drv.name); error = driver_register(&driver->drv); if (error < 0) @@ -323,7 +205,7 @@ EXPORT_SYMBOL(pcmcia_register_driver); */ void pcmcia_unregister_driver(struct pcmcia_driver *driver) { - ds_dbg(3, "unregistering driver %s\n", driver->drv.name); + pr_debug("unregistering driver %s\n", driver->drv.name); driver_unregister(&driver->drv); pcmcia_free_dynids(driver); } @@ -350,14 +232,14 @@ void pcmcia_put_dev(struct pcmcia_device *p_dev) static void pcmcia_release_function(struct kref *ref) { struct config_t *c = container_of(ref, struct config_t, ref); - ds_dbg(1, "releasing config_t\n"); + pr_debug("releasing config_t\n"); kfree(c); } static void pcmcia_release_dev(struct device *dev) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); - ds_dev_dbg(1, dev, "releasing device\n"); + dev_dbg(dev, "releasing device\n"); pcmcia_put_socket(p_dev->socket); kfree(p_dev->devname); kref_put(&p_dev->function_config->ref, pcmcia_release_function); @@ -367,7 +249,7 @@ static void pcmcia_release_dev(struct device *dev) static void pcmcia_add_device_later(struct pcmcia_socket *s, int mfc) { if (!s->pcmcia_state.device_add_pending) { - ds_dev_dbg(1, &s->dev, "scheduling to add %s secondary" + dev_dbg(&s->dev, "scheduling to add %s secondary" " device to %d\n", mfc ? "mfc" : "pfc", s->sock); s->pcmcia_state.device_add_pending = 1; s->pcmcia_state.mfc_pfc = mfc; @@ -405,7 +287,7 @@ static int pcmcia_device_probe(struct device * dev) */ did = dev_get_drvdata(&p_dev->dev); - ds_dev_dbg(1, dev, "trying to bind to %s\n", p_drv->drv.name); + dev_dbg(dev, "trying to bind to %s\n", p_drv->drv.name); if ((!p_drv->probe) || (!p_dev->function_config) || (!try_module_get(p_drv->owner))) { @@ -428,7 +310,7 @@ static int pcmcia_device_probe(struct device * dev) ret = p_drv->probe(p_dev); if (ret) { - ds_dev_dbg(1, dev, "binding to %s failed with %d\n", + dev_dbg(dev, "binding to %s failed with %d\n", p_drv->drv.name, ret); goto put_module; } @@ -456,7 +338,7 @@ static void pcmcia_card_remove(struct pcmcia_socket *s, struct pcmcia_device *le struct pcmcia_device *tmp; unsigned long flags; - ds_dev_dbg(2, leftover ? &leftover->dev : &s->dev, + dev_dbg(leftover ? &leftover->dev : &s->dev, "pcmcia_card_remove(%d) %s\n", s->sock, leftover ? leftover->devname : ""); @@ -475,7 +357,7 @@ static void pcmcia_card_remove(struct pcmcia_socket *s, struct pcmcia_device *le p_dev->_removed=1; spin_unlock_irqrestore(&pcmcia_dev_list_lock, flags); - ds_dev_dbg(2, &p_dev->dev, "unregistering device\n"); + dev_dbg(&p_dev->dev, "unregistering device\n"); device_unregister(&p_dev->dev); } @@ -492,7 +374,7 @@ static int pcmcia_device_remove(struct device * dev) p_dev = to_pcmcia_dev(dev); p_drv = to_pcmcia_drv(dev->driver); - ds_dev_dbg(1, dev, "removing device\n"); + dev_dbg(dev, "removing device\n"); /* If we're removing the primary module driving a * pseudo multi-function card, we need to unbind @@ -572,7 +454,7 @@ static int pcmcia_device_query(struct pcmcia_device *p_dev) } if (!pccard_read_tuple(p_dev->socket, p_dev->func, CISTPL_DEVICE_GEO, devgeo)) { - ds_dev_dbg(0, &p_dev->dev, + dev_dbg(&p_dev->dev, "mem device geometry probably means " "FUNCID_MEMORY\n"); p_dev->func_id = CISTPL_FUNCID_MEMORY; @@ -628,7 +510,7 @@ struct pcmcia_device * pcmcia_device_add(struct pcmcia_socket *s, unsigned int f mutex_lock(&device_add_lock); - ds_dbg(3, "adding device to %d, function %d\n", s->sock, function); + pr_debug("adding device to %d, function %d\n", s->sock, function); /* max of 4 devices per card */ if (s->device_count == 4) @@ -654,7 +536,7 @@ struct pcmcia_device * pcmcia_device_add(struct pcmcia_socket *s, unsigned int f p_dev->devname = kasprintf(GFP_KERNEL, "pcmcia%s", dev_name(&p_dev->dev)); if (!p_dev->devname) goto err_free; - ds_dev_dbg(3, &p_dev->dev, "devname is %s\n", p_dev->devname); + dev_dbg(&p_dev->dev, "devname is %s\n", p_dev->devname); spin_lock_irqsave(&pcmcia_dev_list_lock, flags); @@ -677,7 +559,7 @@ struct pcmcia_device * pcmcia_device_add(struct pcmcia_socket *s, unsigned int f spin_unlock_irqrestore(&pcmcia_dev_list_lock, flags); if (!p_dev->function_config) { - ds_dev_dbg(3, &p_dev->dev, "creating config_t\n"); + dev_dbg(&p_dev->dev, "creating config_t\n"); p_dev->function_config = kzalloc(sizeof(struct config_t), GFP_KERNEL); if (!p_dev->function_config) @@ -722,20 +604,20 @@ static int pcmcia_card_add(struct pcmcia_socket *s) int ret = 0; if (!(s->resource_setup_done)) { - ds_dev_dbg(3, &s->dev, + dev_dbg(&s->dev, "no resources available, delaying card_add\n"); return -EAGAIN; /* try again, but later... */ } if (pcmcia_validate_mem(s)) { - ds_dev_dbg(3, &s->dev, "validating mem resources failed, " + dev_dbg(&s->dev, "validating mem resources failed, " "delaying card_add\n"); return -EAGAIN; /* try again, but later... */ } ret = pccard_validate_cis(s, &no_chains); if (ret || !no_chains) { - ds_dev_dbg(0, &s->dev, "invalid CIS or invalid resources\n"); + dev_dbg(&s->dev, "invalid CIS or invalid resources\n"); return -ENODEV; } @@ -756,7 +638,7 @@ static void pcmcia_delayed_add_device(struct work_struct *work) { struct pcmcia_socket *s = container_of(work, struct pcmcia_socket, device_add); - ds_dev_dbg(1, &s->dev, "adding additional device to %d\n", s->sock); + dev_dbg(&s->dev, "adding additional device to %d\n", s->sock); pcmcia_device_add(s, s->pcmcia_state.mfc_pfc); s->pcmcia_state.device_add_pending = 0; s->pcmcia_state.mfc_pfc = 0; @@ -766,7 +648,7 @@ static int pcmcia_requery(struct device *dev, void * _data) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); if (!p_dev->dev.driver) { - ds_dev_dbg(1, dev, "update device information\n"); + dev_dbg(dev, "update device information\n"); pcmcia_device_query(p_dev); } @@ -780,7 +662,7 @@ static void pcmcia_bus_rescan(struct pcmcia_socket *skt, int new_cis) unsigned long flags; /* must be called with skt_mutex held */ - ds_dev_dbg(0, &skt->dev, "re-scanning socket %d\n", skt->sock); + dev_dbg(&skt->dev, "re-scanning socket %d\n", skt->sock); spin_lock_irqsave(&pcmcia_dev_list_lock, flags); if (list_empty(&skt->devices_list)) @@ -835,7 +717,7 @@ static int pcmcia_load_firmware(struct pcmcia_device *dev, char * filename) if (!filename) return -EINVAL; - ds_dev_dbg(1, &dev->dev, "trying to load CIS file %s\n", filename); + dev_dbg(&dev->dev, "trying to load CIS file %s\n", filename); if (request_firmware(&fw, filename, &dev->dev) == 0) { if (fw->size >= CISTPL_MAX_CIS_SIZE) { @@ -953,14 +835,14 @@ static inline int pcmcia_devmatch(struct pcmcia_device *dev, * after it has re-checked that there is no possible module * with a prod_id/manf_id/card_id match. */ - ds_dev_dbg(0, &dev->dev, + dev_dbg(&dev->dev, "skipping FUNC_ID match until userspace interaction\n"); if (!dev->allow_func_id_match) return 0; } if (did->match_flags & PCMCIA_DEV_ID_MATCH_FAKE_CIS) { - ds_dev_dbg(0, &dev->dev, "device needs a fake CIS\n"); + dev_dbg(&dev->dev, "device needs a fake CIS\n"); if (!dev->socket->fake_cis) pcmcia_load_firmware(dev, did->cisfile); @@ -992,9 +874,9 @@ static int pcmcia_bus_match(struct device * dev, struct device_driver * drv) { /* match dynamic devices first */ spin_lock(&p_drv->dynids.lock); list_for_each_entry(dynid, &p_drv->dynids.list, node) { - ds_dev_dbg(3, dev, "trying to match to %s\n", drv->name); + dev_dbg(dev, "trying to match to %s\n", drv->name); if (pcmcia_devmatch(p_dev, &dynid->id)) { - ds_dev_dbg(0, dev, "matched to %s\n", drv->name); + dev_dbg(dev, "matched to %s\n", drv->name); spin_unlock(&p_drv->dynids.lock); return 1; } @@ -1004,15 +886,15 @@ static int pcmcia_bus_match(struct device * dev, struct device_driver * drv) { #ifdef CONFIG_PCMCIA_IOCTL /* matching by cardmgr */ if (p_dev->cardmgr == p_drv) { - ds_dev_dbg(0, dev, "cardmgr matched to %s\n", drv->name); + dev_dbg(dev, "cardmgr matched to %s\n", drv->name); return 1; } #endif while (did && did->match_flags) { - ds_dev_dbg(3, dev, "trying to match to %s\n", drv->name); + dev_dbg(dev, "trying to match to %s\n", drv->name); if (pcmcia_devmatch(p_dev, did)) { - ds_dev_dbg(0, dev, "matched to %s\n", drv->name); + dev_dbg(dev, "matched to %s\n", drv->name); return 1; } did++; @@ -1218,7 +1100,7 @@ static int pcmcia_dev_suspend(struct device * dev, pm_message_t state) if (p_dev->suspended) return 0; - ds_dev_dbg(2, dev, "suspending\n"); + dev_dbg(dev, "suspending\n"); if (dev->driver) p_drv = to_pcmcia_drv(dev->driver); @@ -1238,7 +1120,7 @@ static int pcmcia_dev_suspend(struct device * dev, pm_message_t state) } if (p_dev->device_no == p_dev->func) { - ds_dev_dbg(2, dev, "releasing configuration\n"); + dev_dbg(dev, "releasing configuration\n"); pcmcia_release_configuration(p_dev); } @@ -1258,7 +1140,7 @@ static int pcmcia_dev_resume(struct device * dev) if (!p_dev->suspended) return 0; - ds_dev_dbg(2, dev, "resuming\n"); + dev_dbg(dev, "resuming\n"); if (dev->driver) p_drv = to_pcmcia_drv(dev->driver); @@ -1267,7 +1149,7 @@ static int pcmcia_dev_resume(struct device * dev) goto out; if (p_dev->device_no == p_dev->func) { - ds_dev_dbg(2, dev, "requesting configuration\n"); + dev_dbg(dev, "requesting configuration\n"); ret = pcmcia_request_configuration(p_dev, &p_dev->conf); if (ret) goto out; @@ -1309,14 +1191,14 @@ static int pcmcia_bus_resume_callback(struct device *dev, void * _data) static int pcmcia_bus_resume(struct pcmcia_socket *skt) { - ds_dev_dbg(2, &skt->dev, "resuming socket %d\n", skt->sock); + dev_dbg(&skt->dev, "resuming socket %d\n", skt->sock); bus_for_each_dev(&pcmcia_bus_type, NULL, skt, pcmcia_bus_resume_callback); return 0; } static int pcmcia_bus_suspend(struct pcmcia_socket *skt) { - ds_dev_dbg(2, &skt->dev, "suspending socket %d\n", skt->sock); + dev_dbg(&skt->dev, "suspending socket %d\n", skt->sock); if (bus_for_each_dev(&pcmcia_bus_type, NULL, skt, pcmcia_bus_suspend_callback)) { pcmcia_bus_resume(skt); @@ -1348,7 +1230,7 @@ static int ds_event(struct pcmcia_socket *skt, event_t event, int priority) return -ENODEV; } - ds_dev_dbg(1, &skt->dev, "ds_event(0x%06x, %d, 0x%p)\n", + dev_dbg(&skt->dev, "ds_event(0x%06x, %d, 0x%p)\n", event, priority, skt); switch (event) { diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c index a4aacb830b8..c13fd936051 100644 --- a/drivers/pcmcia/i82365.c +++ b/drivers/pcmcia/i82365.c @@ -63,21 +63,6 @@ #include "vg468.h" #include "ricoh.h" -#ifdef CONFIG_PCMCIA_DEBUG -static const char version[] = -"i82365.c 1.265 1999/11/10 18:36:21 (David Hinds)"; - -static int pc_debug; - -module_param(pc_debug, int, 0644); - -#define debug(lvl, fmt, arg...) do { \ - if (pc_debug > (lvl)) \ - printk(KERN_DEBUG "i82365: " fmt , ## arg); \ -} while (0) -#else -#define debug(lvl, fmt, arg...) do { } while (0) -#endif static irqreturn_t i365_count_irq(int, void *); static inline int _check_irq(int irq, int flags) @@ -501,13 +486,13 @@ static irqreturn_t i365_count_irq(int irq, void *dev) { i365_get(irq_sock, I365_CSC); irq_hits++; - debug(2, "-> hit on irq %d\n", irq); + pr_debug("i82365: -> hit on irq %d\n", irq); return IRQ_HANDLED; } static u_int __init test_irq(u_short sock, int irq) { - debug(2, " testing ISA irq %d\n", irq); + pr_debug("i82365: testing ISA irq %d\n", irq); if (request_irq(irq, i365_count_irq, IRQF_PROBE_SHARED, "scan", i365_count_irq) != 0) return 1; @@ -515,7 +500,7 @@ static u_int __init test_irq(u_short sock, int irq) msleep(10); if (irq_hits) { free_irq(irq, i365_count_irq); - debug(2, " spurious hit!\n"); + pr_debug("i82365: spurious hit!\n"); return 1; } @@ -528,7 +513,7 @@ static u_int __init test_irq(u_short sock, int irq) /* mask all interrupts */ i365_set(sock, I365_CSCINT, 0); - debug(2, " hits = %d\n", irq_hits); + pr_debug("i82365: hits = %d\n", irq_hits); return (irq_hits != 1); } @@ -854,7 +839,7 @@ static irqreturn_t pcic_interrupt(int irq, void *dev) u_long flags = 0; int handled = 0; - debug(4, "pcic_interrupt(%d)\n", irq); + pr_debug("pcic_interrupt(%d)\n", irq); for (j = 0; j < 20; j++) { active = 0; @@ -878,7 +863,7 @@ static irqreturn_t pcic_interrupt(int irq, void *dev) events |= (csc & I365_CSC_READY) ? SS_READY : 0; } ISA_UNLOCK(i, flags); - debug(2, "socket %d event 0x%02x\n", i, events); + pr_debug("socket %d event 0x%02x\n", i, events); if (events) pcmcia_parse_events(&socket[i].socket, events); @@ -890,7 +875,7 @@ static irqreturn_t pcic_interrupt(int irq, void *dev) if (j == 20) printk(KERN_NOTICE "i82365: infinite loop in interrupt handler\n"); - debug(4, "interrupt done\n"); + pr_debug("pcic_interrupt done\n"); return IRQ_RETVAL(handled); } /* pcic_interrupt */ @@ -932,7 +917,7 @@ static int i365_get_status(u_short sock, u_int *value) } } - debug(1, "GetStatus(%d) = %#4.4x\n", sock, *value); + pr_debug("GetStatus(%d) = %#4.4x\n", sock, *value); return 0; } /* i365_get_status */ @@ -943,7 +928,7 @@ static int i365_set_socket(u_short sock, socket_state_t *state) struct i82365_socket *t = &socket[sock]; u_char reg; - debug(1, "SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + pr_debug("SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " "io_irq %d, csc_mask %#2.2x)\n", sock, state->flags, state->Vcc, state->Vpp, state->io_irq, state->csc_mask); @@ -1052,7 +1037,7 @@ static int i365_set_io_map(u_short sock, struct pccard_io_map *io) { u_char map, ioctl; - debug(1, "SetIOMap(%d, %d, %#2.2x, %d ns, " + pr_debug("SetIOMap(%d, %d, %#2.2x, %d ns, " "%#llx-%#llx)\n", sock, io->map, io->flags, io->speed, (unsigned long long)io->start, (unsigned long long)io->stop); map = io->map; @@ -1082,7 +1067,7 @@ static int i365_set_mem_map(u_short sock, struct pccard_mem_map *mem) u_short base, i; u_char map; - debug(1, "SetMemMap(%d, %d, %#2.2x, %d ns, %#llx-%#llx, " + pr_debug("SetMemMap(%d, %d, %#2.2x, %d ns, %#llx-%#llx, " "%#x)\n", sock, mem->map, mem->flags, mem->speed, (unsigned long long)mem->res->start, (unsigned long long)mem->res->end, mem->card_start); diff --git a/drivers/pcmcia/m32r_cfc.c b/drivers/pcmcia/m32r_cfc.c index 7dfbee1dcd7..26a621c9e2f 100644 --- a/drivers/pcmcia/m32r_cfc.c +++ b/drivers/pcmcia/m32r_cfc.c @@ -38,17 +38,6 @@ #include "m32r_cfc.h" -#ifdef CONFIG_PCMCIA_DEBUG -static int m32r_cfc_debug; -module_param(m32r_cfc_debug, int, 0644); -#define debug(lvl, fmt, arg...) do { \ - if (m32r_cfc_debug > (lvl)) \ - printk(KERN_DEBUG "m32r_cfc: " fmt , ## arg); \ -} while (0) -#else -#define debug(n, args...) do { } while (0) -#endif - /* Poll status interval -- 0 means default to interrupt */ static int poll_interval = 0; @@ -123,7 +112,7 @@ void pcc_ioread_byte(int sock, unsigned long port, void *buf, size_t size, unsigned char *bp = (unsigned char *)buf; unsigned long flags; - debug(3, "m32r_cfc: pcc_ioread_byte: sock=%d, port=%#lx, buf=%p, " + pr_debug("m32r_cfc: pcc_ioread_byte: sock=%d, port=%#lx, buf=%p, " "size=%u, nmemb=%d, flag=%d\n", sock, port, buf, size, nmemb, flag); @@ -132,7 +121,7 @@ void pcc_ioread_byte(int sock, unsigned long port, void *buf, size_t size, printk("m32r_cfc:ioread_byte null port :%#lx\n",port); return; } - debug(3, "m32r_cfc: pcc_ioread_byte: addr=%#lx\n", addr); + pr_debug("m32r_cfc: pcc_ioread_byte: addr=%#lx\n", addr); spin_lock_irqsave(&pcc_lock, flags); /* read Byte */ @@ -148,7 +137,7 @@ void pcc_ioread_word(int sock, unsigned long port, void *buf, size_t size, unsigned short *bp = (unsigned short *)buf; unsigned long flags; - debug(3, "m32r_cfc: pcc_ioread_word: sock=%d, port=%#lx, " + pr_debug("m32r_cfc: pcc_ioread_word: sock=%d, port=%#lx, " "buf=%p, size=%u, nmemb=%d, flag=%d\n", sock, port, buf, size, nmemb, flag); @@ -163,7 +152,7 @@ void pcc_ioread_word(int sock, unsigned long port, void *buf, size_t size, printk("m32r_cfc:ioread_word null port :%#lx\n",port); return; } - debug(3, "m32r_cfc: pcc_ioread_word: addr=%#lx\n", addr); + pr_debug("m32r_cfc: pcc_ioread_word: addr=%#lx\n", addr); spin_lock_irqsave(&pcc_lock, flags); /* read Word */ @@ -179,7 +168,7 @@ void pcc_iowrite_byte(int sock, unsigned long port, void *buf, size_t size, unsigned char *bp = (unsigned char *)buf; unsigned long flags; - debug(3, "m32r_cfc: pcc_iowrite_byte: sock=%d, port=%#lx, " + pr_debug("m32r_cfc: pcc_iowrite_byte: sock=%d, port=%#lx, " "buf=%p, size=%u, nmemb=%d, flag=%d\n", sock, port, buf, size, nmemb, flag); @@ -189,7 +178,7 @@ void pcc_iowrite_byte(int sock, unsigned long port, void *buf, size_t size, printk("m32r_cfc:iowrite_byte null port:%#lx\n",port); return; } - debug(3, "m32r_cfc: pcc_iowrite_byte: addr=%#lx\n", addr); + pr_debug("m32r_cfc: pcc_iowrite_byte: addr=%#lx\n", addr); spin_lock_irqsave(&pcc_lock, flags); while (nmemb--) @@ -204,7 +193,7 @@ void pcc_iowrite_word(int sock, unsigned long port, void *buf, size_t size, unsigned short *bp = (unsigned short *)buf; unsigned long flags; - debug(3, "m32r_cfc: pcc_iowrite_word: sock=%d, port=%#lx, " + pr_debug("m32r_cfc: pcc_iowrite_word: sock=%d, port=%#lx, " "buf=%p, size=%u, nmemb=%d, flag=%d\n", sock, port, buf, size, nmemb, flag); @@ -226,7 +215,7 @@ void pcc_iowrite_word(int sock, unsigned long port, void *buf, size_t size, return; } #endif - debug(3, "m32r_cfc: pcc_iowrite_word: addr=%#lx\n", addr); + pr_debug("m32r_cfc: pcc_iowrite_word: addr=%#lx\n", addr); spin_lock_irqsave(&pcc_lock, flags); while (nmemb--) @@ -262,7 +251,7 @@ static struct timer_list poll_timer; static unsigned int pcc_get(u_short sock, unsigned int reg) { unsigned int val = inw(reg); - debug(3, "m32r_cfc: pcc_get: reg(0x%08x)=0x%04x\n", reg, val); + pr_debug("m32r_cfc: pcc_get: reg(0x%08x)=0x%04x\n", reg, val); return val; } @@ -270,7 +259,7 @@ static unsigned int pcc_get(u_short sock, unsigned int reg) static void pcc_set(u_short sock, unsigned int reg, unsigned int data) { outw(data, reg); - debug(3, "m32r_cfc: pcc_set: reg(0x%08x)=0x%04x\n", reg, data); + pr_debug("m32r_cfc: pcc_set: reg(0x%08x)=0x%04x\n", reg, data); } /*====================================================================== @@ -286,14 +275,14 @@ static int __init is_alive(u_short sock) { unsigned int stat; - debug(3, "m32r_cfc: is_alive:\n"); + pr_debug("m32r_cfc: is_alive:\n"); printk("CF: "); stat = pcc_get(sock, (unsigned int)PLD_CFSTS); if (!stat) printk("No "); printk("Card is detected at socket %d : stat = 0x%08x\n", sock, stat); - debug(3, "m32r_cfc: is_alive: sock stat is 0x%04x\n", stat); + pr_debug("m32r_cfc: is_alive: sock stat is 0x%04x\n", stat); return 0; } @@ -303,7 +292,7 @@ static void add_pcc_socket(ulong base, int irq, ulong mapaddr, { pcc_socket_t *t = &socket[pcc_sockets]; - debug(3, "m32r_cfc: add_pcc_socket: base=%#lx, irq=%d, " + pr_debug("m32r_cfc: add_pcc_socket: base=%#lx, irq=%d, " "mapaddr=%#lx, ioaddr=%08x\n", base, irq, mapaddr, ioaddr); @@ -358,7 +347,7 @@ static void add_pcc_socket(ulong base, int irq, ulong mapaddr, /* eject interrupt */ request_irq(irq+1, pcc_interrupt, 0, "m32r_cfc", pcc_interrupt); #endif - debug(3, "m32r_cfc: enable CFMSK, RDYSEL\n"); + pr_debug("m32r_cfc: enable CFMSK, RDYSEL\n"); pcc_set(pcc_sockets, (unsigned int)PLD_CFIMASK, 0x01); #endif /* CONFIG_PLAT_USRV */ #if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_USRV) || defined(CONFIG_PLAT_OPSPUT) @@ -378,26 +367,26 @@ static irqreturn_t pcc_interrupt(int irq, void *dev) u_int events = 0; int handled = 0; - debug(3, "m32r_cfc: pcc_interrupt: irq=%d, dev=%p\n", irq, dev); + pr_debug("m32r_cfc: pcc_interrupt: irq=%d, dev=%p\n", irq, dev); for (i = 0; i < pcc_sockets; i++) { if (socket[i].cs_irq1 != irq && socket[i].cs_irq2 != irq) continue; handled = 1; - debug(3, "m32r_cfc: pcc_interrupt: socket %d irq 0x%02x ", + pr_debug("m32r_cfc: pcc_interrupt: socket %d irq 0x%02x ", i, irq); events |= SS_DETECT; /* insert or eject */ if (events) pcmcia_parse_events(&socket[i].socket, events); } - debug(3, "m32r_cfc: pcc_interrupt: done\n"); + pr_debug("m32r_cfc: pcc_interrupt: done\n"); return IRQ_RETVAL(handled); } /* pcc_interrupt */ static void pcc_interrupt_wrapper(u_long data) { - debug(3, "m32r_cfc: pcc_interrupt_wrapper:\n"); + pr_debug("m32r_cfc: pcc_interrupt_wrapper:\n"); pcc_interrupt(0, NULL); init_timer(&poll_timer); poll_timer.expires = jiffies + poll_interval; @@ -410,17 +399,17 @@ static int _pcc_get_status(u_short sock, u_int *value) { u_int status; - debug(3, "m32r_cfc: _pcc_get_status:\n"); + pr_debug("m32r_cfc: _pcc_get_status:\n"); status = pcc_get(sock, (unsigned int)PLD_CFSTS); *value = (status) ? SS_DETECT : 0; - debug(3, "m32r_cfc: _pcc_get_status: status=0x%08x\n", status); + pr_debug("m32r_cfc: _pcc_get_status: status=0x%08x\n", status); #if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_USRV) || defined(CONFIG_PLAT_OPSPUT) if ( status ) { /* enable CF power */ status = inw((unsigned int)PLD_CPCR); if (!(status & PLD_CPCR_CF)) { - debug(3, "m32r_cfc: _pcc_get_status: " + pr_debug("m32r_cfc: _pcc_get_status: " "power on (CPCR=0x%08x)\n", status); status |= PLD_CPCR_CF; outw(status, (unsigned int)PLD_CPCR); @@ -439,7 +428,7 @@ static int _pcc_get_status(u_short sock, u_int *value) status &= ~PLD_CPCR_CF; outw(status, (unsigned int)PLD_CPCR); udelay(100); - debug(3, "m32r_cfc: _pcc_get_status: " + pr_debug("m32r_cfc: _pcc_get_status: " "power off (CPCR=0x%08x)\n", status); } #elif defined(CONFIG_PLAT_MAPPI2) || defined(CONFIG_PLAT_MAPPI3) @@ -465,13 +454,13 @@ static int _pcc_get_status(u_short sock, u_int *value) /* disable CF power */ pcc_set(sock, (unsigned int)PLD_CPCR, 0); udelay(100); - debug(3, "m32r_cfc: _pcc_get_status: " + pr_debug("m32r_cfc: _pcc_get_status: " "power off (CPCR=0x%08x)\n", status); } #else #error no platform configuration #endif - debug(3, "m32r_cfc: _pcc_get_status: GetStatus(%d) = %#4.4x\n", + pr_debug("m32r_cfc: _pcc_get_status: GetStatus(%d) = %#4.4x\n", sock, *value); return 0; } /* _get_status */ @@ -480,7 +469,7 @@ static int _pcc_get_status(u_short sock, u_int *value) static int _pcc_set_socket(u_short sock, socket_state_t *state) { - debug(3, "m32r_cfc: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + pr_debug("m32r_cfc: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " "io_irq %d, csc_mask %#2.2x)\n", sock, state->flags, state->Vcc, state->Vpp, state->io_irq, state->csc_mask); @@ -492,41 +481,39 @@ static int _pcc_set_socket(u_short sock, socket_state_t *state) } #endif if (state->flags & SS_RESET) { - debug(3, ":RESET\n"); + pr_debug(":RESET\n"); pcc_set(sock,(unsigned int)PLD_CFRSTCR,0x101); }else{ pcc_set(sock,(unsigned int)PLD_CFRSTCR,0x100); } if (state->flags & SS_OUTPUT_ENA){ - debug(3, ":OUTPUT_ENA\n"); + pr_debug(":OUTPUT_ENA\n"); /* bit clear */ pcc_set(sock,(unsigned int)PLD_CFBUFCR,0); } else { pcc_set(sock,(unsigned int)PLD_CFBUFCR,1); } -#ifdef CONFIG_PCMCIA_DEBUG if(state->flags & SS_IOCARD){ - debug(3, ":IOCARD"); + pr_debug(":IOCARD"); } if (state->flags & SS_PWR_AUTO) { - debug(3, ":PWR_AUTO"); + pr_debug(":PWR_AUTO"); } if (state->csc_mask & SS_DETECT) - debug(3, ":csc-SS_DETECT"); + pr_debug(":csc-SS_DETECT"); if (state->flags & SS_IOCARD) { if (state->csc_mask & SS_STSCHG) - debug(3, ":STSCHG"); + pr_debug(":STSCHG"); } else { if (state->csc_mask & SS_BATDEAD) - debug(3, ":BATDEAD"); + pr_debug(":BATDEAD"); if (state->csc_mask & SS_BATWARN) - debug(3, ":BATWARN"); + pr_debug(":BATWARN"); if (state->csc_mask & SS_READY) - debug(3, ":READY"); + pr_debug(":READY"); } - debug(3, "\n"); -#endif + pr_debug("\n"); return 0; } /* _set_socket */ @@ -536,7 +523,7 @@ static int _pcc_set_io_map(u_short sock, struct pccard_io_map *io) { u_char map; - debug(3, "m32r_cfc: SetIOMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m32r_cfc: SetIOMap(%d, %d, %#2.2x, %d ns, " "%#llx-%#llx)\n", sock, io->map, io->flags, io->speed, (unsigned long long)io->start, (unsigned long long)io->stop); @@ -554,7 +541,7 @@ static int _pcc_set_mem_map(u_short sock, struct pccard_mem_map *mem) u_long addr; pcc_socket_t *t = &socket[sock]; - debug(3, "m32r_cfc: SetMemMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m32r_cfc: SetMemMap(%d, %d, %#2.2x, %d ns, " "%#llx, %#x)\n", sock, map, mem->flags, mem->speed, (unsigned long long)mem->static_start, mem->card_start); @@ -640,11 +627,11 @@ static int pcc_get_status(struct pcmcia_socket *s, u_int *value) unsigned int sock = container_of(s, struct pcc_socket, socket)->number; if (socket[sock].flags & IS_ALIVE) { - debug(3, "m32r_cfc: pcc_get_status: sock(%d) -EINVAL\n", sock); + dev_dbg(&s->dev, "pcc_get_status: sock(%d) -EINVAL\n", sock); *value = 0; return -EINVAL; } - debug(3, "m32r_cfc: pcc_get_status: sock(%d)\n", sock); + dev_dbg(&s->dev, "pcc_get_status: sock(%d)\n", sock); LOCKED(_pcc_get_status(sock, value)); } @@ -653,10 +640,10 @@ static int pcc_set_socket(struct pcmcia_socket *s, socket_state_t *state) unsigned int sock = container_of(s, struct pcc_socket, socket)->number; if (socket[sock].flags & IS_ALIVE) { - debug(3, "m32r_cfc: pcc_set_socket: sock(%d) -EINVAL\n", sock); + dev_dbg(&s->dev, "pcc_set_socket: sock(%d) -EINVAL\n", sock); return -EINVAL; } - debug(3, "m32r_cfc: pcc_set_socket: sock(%d)\n", sock); + dev_dbg(&s->dev, "pcc_set_socket: sock(%d)\n", sock); LOCKED(_pcc_set_socket(sock, state)); } @@ -665,10 +652,10 @@ static int pcc_set_io_map(struct pcmcia_socket *s, struct pccard_io_map *io) unsigned int sock = container_of(s, struct pcc_socket, socket)->number; if (socket[sock].flags & IS_ALIVE) { - debug(3, "m32r_cfc: pcc_set_io_map: sock(%d) -EINVAL\n", sock); + dev_dbg(&s->dev, "pcc_set_io_map: sock(%d) -EINVAL\n", sock); return -EINVAL; } - debug(3, "m32r_cfc: pcc_set_io_map: sock(%d)\n", sock); + dev_dbg(&s->dev, "pcc_set_io_map: sock(%d)\n", sock); LOCKED(_pcc_set_io_map(sock, io)); } @@ -677,16 +664,16 @@ static int pcc_set_mem_map(struct pcmcia_socket *s, struct pccard_mem_map *mem) unsigned int sock = container_of(s, struct pcc_socket, socket)->number; if (socket[sock].flags & IS_ALIVE) { - debug(3, "m32r_cfc: pcc_set_mem_map: sock(%d) -EINVAL\n", sock); + dev_dbg(&s->dev, "pcc_set_mem_map: sock(%d) -EINVAL\n", sock); return -EINVAL; } - debug(3, "m32r_cfc: pcc_set_mem_map: sock(%d)\n", sock); + dev_dbg(&s->dev, "pcc_set_mem_map: sock(%d)\n", sock); LOCKED(_pcc_set_mem_map(sock, mem)); } static int pcc_init(struct pcmcia_socket *s) { - debug(3, "m32r_cfc: pcc_init()\n"); + dev_dbg(&s->dev, "pcc_init()\n"); return 0; } diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c index c6524f99ccc..72844c5a6d0 100644 --- a/drivers/pcmcia/m32r_pcc.c +++ b/drivers/pcmcia/m32r_pcc.c @@ -45,16 +45,6 @@ #define PCC_DEBUG_DBEX -#ifdef CONFIG_PCMCIA_DEBUG -static int m32r_pcc_debug; -module_param(m32r_pcc_debug, int, 0644); -#define debug(lvl, fmt, arg...) do { \ - if (m32r_pcc_debug > (lvl)) \ - printk(KERN_DEBUG "m32r_pcc: " fmt , ## arg); \ -} while (0) -#else -#define debug(n, args...) do { } while (0) -#endif /* Poll status interval -- 0 means default to interrupt */ static int poll_interval = 0; @@ -358,7 +348,7 @@ static irqreturn_t pcc_interrupt(int irq, void *dev) u_int events, active; int handled = 0; - debug(4, "m32r: pcc_interrupt(%d)\n", irq); + pr_debug("m32r_pcc: pcc_interrupt(%d)\n", irq); for (j = 0; j < 20; j++) { active = 0; @@ -369,13 +359,14 @@ static irqreturn_t pcc_interrupt(int irq, void *dev) handled = 1; irc = pcc_get(i, PCIRC); irc >>=16; - debug(2, "m32r-pcc:interrupt: socket %d pcirc 0x%02x ", i, irc); + pr_debug("m32r_pcc: interrupt: socket %d pcirc 0x%02x ", + i, irc); if (!irc) continue; events = (irc) ? SS_DETECT : 0; events |= (pcc_get(i,PCCR) & PCCR_PCEN) ? SS_READY : 0; - debug(2, " event 0x%02x\n", events); + pr_debug("m32r_pcc: event 0x%02x\n", events); if (events) pcmcia_parse_events(&socket[i].socket, events); @@ -388,7 +379,7 @@ static irqreturn_t pcc_interrupt(int irq, void *dev) if (j == 20) printk(KERN_NOTICE "m32r-pcc: infinite loop in interrupt handler\n"); - debug(4, "m32r-pcc: interrupt done\n"); + pr_debug("m32r_pcc: interrupt done\n"); return IRQ_RETVAL(handled); } /* pcc_interrupt */ @@ -422,7 +413,7 @@ static int _pcc_get_status(u_short sock, u_int *value) status = pcc_get(sock,PCCSIGCR); *value |= (status & PCCSIGCR_VEN) ? SS_POWERON : 0; - debug(3, "m32r-pcc: GetStatus(%d) = %#4.4x\n", sock, *value); + pr_debug("m32r_pcc: GetStatus(%d) = %#4.4x\n", sock, *value); return 0; } /* _get_status */ @@ -432,7 +423,7 @@ static int _pcc_set_socket(u_short sock, socket_state_t *state) { u_long reg = 0; - debug(3, "m32r-pcc: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + pr_debug("m32r_pcc: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " "io_irq %d, csc_mask %#2.2x)", sock, state->flags, state->Vcc, state->Vpp, state->io_irq, state->csc_mask); @@ -448,11 +439,11 @@ static int _pcc_set_socket(u_short sock, socket_state_t *state) } if (state->flags & SS_RESET) { - debug(3, ":RESET\n"); + pr_debug("m32r_pcc: :RESET\n"); reg |= PCCSIGCR_CRST; } if (state->flags & SS_OUTPUT_ENA){ - debug(3, ":OUTPUT_ENA\n"); + pr_debug("m32r_pcc: :OUTPUT_ENA\n"); /* bit clear */ } else { reg |= PCCSIGCR_SEN; @@ -460,28 +451,26 @@ static int _pcc_set_socket(u_short sock, socket_state_t *state) pcc_set(sock,PCCSIGCR,reg); -#ifdef CONFIG_PCMCIA_DEBUG if(state->flags & SS_IOCARD){ - debug(3, ":IOCARD"); + pr_debug("m32r_pcc: :IOCARD"); } if (state->flags & SS_PWR_AUTO) { - debug(3, ":PWR_AUTO"); + pr_debug("m32r_pcc: :PWR_AUTO"); } if (state->csc_mask & SS_DETECT) - debug(3, ":csc-SS_DETECT"); + pr_debug("m32r_pcc: :csc-SS_DETECT"); if (state->flags & SS_IOCARD) { if (state->csc_mask & SS_STSCHG) - debug(3, ":STSCHG"); + pr_debug("m32r_pcc: :STSCHG"); } else { if (state->csc_mask & SS_BATDEAD) - debug(3, ":BATDEAD"); + pr_debug("m32r_pcc: :BATDEAD"); if (state->csc_mask & SS_BATWARN) - debug(3, ":BATWARN"); + pr_debug("m32r_pcc: :BATWARN"); if (state->csc_mask & SS_READY) - debug(3, ":READY"); + pr_debug("m32r_pcc: :READY"); } - debug(3, "\n"); -#endif + pr_debug("m32r_pcc: \n"); return 0; } /* _set_socket */ @@ -491,7 +480,7 @@ static int _pcc_set_io_map(u_short sock, struct pccard_io_map *io) { u_char map; - debug(3, "m32r-pcc: SetIOMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m32r_pcc: SetIOMap(%d, %d, %#2.2x, %d ns, " "%#llx-%#llx)\n", sock, io->map, io->flags, io->speed, (unsigned long long)io->start, (unsigned long long)io->stop); @@ -515,7 +504,7 @@ static int _pcc_set_mem_map(u_short sock, struct pccard_mem_map *mem) #endif #endif - debug(3, "m32r-pcc: SetMemMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m32r_pcc: SetMemMap(%d, %d, %#2.2x, %d ns, " "%#llx, %#x)\n", sock, map, mem->flags, mem->speed, (unsigned long long)mem->static_start, mem->card_start); @@ -662,7 +651,7 @@ static int pcc_set_mem_map(struct pcmcia_socket *s, struct pccard_mem_map *mem) static int pcc_init(struct pcmcia_socket *s) { - debug(4, "m32r-pcc: init call\n"); + pr_debug("m32r_pcc: init call\n"); return 0; } diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c index 403559ba49d..7f79c4e169a 100644 --- a/drivers/pcmcia/m8xx_pcmcia.c +++ b/drivers/pcmcia/m8xx_pcmcia.c @@ -64,14 +64,6 @@ #include <pcmcia/cs.h> #include <pcmcia/ss.h> -#ifdef CONFIG_PCMCIA_DEBUG -static int pc_debug; -module_param(pc_debug, int, 0); -#define dprintk(args...) printk(KERN_DEBUG "m8xx_pcmcia: " args); -#else -#define dprintk(args...) -#endif - #define pcmcia_info(args...) printk(KERN_INFO "m8xx_pcmcia: "args) #define pcmcia_error(args...) printk(KERN_ERR "m8xx_pcmcia: "args) @@ -565,7 +557,7 @@ static irqreturn_t m8xx_interrupt(int irq, void *dev) unsigned int i, events, pscr, pipr, per; pcmconf8xx_t *pcmcia = socket[0].pcmcia; - dprintk("Interrupt!\n"); + pr_debug("m8xx_pcmcia: Interrupt!\n"); /* get interrupt sources */ pscr = in_be32(&pcmcia->pcmc_pscr); @@ -614,7 +606,7 @@ static irqreturn_t m8xx_interrupt(int irq, void *dev) /* call the handler */ - dprintk("slot %u: events = 0x%02x, pscr = 0x%08x, " + pr_debug("m8xx_pcmcia: slot %u: events = 0x%02x, pscr = 0x%08x, " "pipr = 0x%08x\n", i, events, pscr, pipr); if (events) { @@ -641,7 +633,7 @@ static irqreturn_t m8xx_interrupt(int irq, void *dev) /* clear the interrupt sources */ out_be32(&pcmcia->pcmc_pscr, pscr); - dprintk("Interrupt done.\n"); + pr_debug("m8xx_pcmcia: Interrupt done.\n"); return IRQ_HANDLED; } @@ -815,7 +807,7 @@ static int m8xx_get_status(struct pcmcia_socket *sock, unsigned int *value) }; } - dprintk("GetStatus(%d) = %#2.2x\n", lsock, *value); + pr_debug("m8xx_pcmcia: GetStatus(%d) = %#2.2x\n", lsock, *value); return 0; } @@ -828,7 +820,7 @@ static int m8xx_set_socket(struct pcmcia_socket *sock, socket_state_t * state) unsigned long flags; pcmconf8xx_t *pcmcia = socket[0].pcmcia; - dprintk("SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + pr_debug("m8xx_pcmcia: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " "io_irq %d, csc_mask %#2.2x)\n", lsock, state->flags, state->Vcc, state->Vpp, state->io_irq, state->csc_mask); @@ -974,7 +966,7 @@ static int m8xx_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) #define M8XX_SIZE (io->stop - io->start + 1) #define M8XX_BASE (PCMCIA_IO_WIN_BASE + io->start) - dprintk("SetIOMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m8xx_pcmcia: SetIOMap(%d, %d, %#2.2x, %d ns, " "%#4.4llx-%#4.4llx)\n", lsock, io->map, io->flags, io->speed, (unsigned long long)io->start, (unsigned long long)io->stop); @@ -988,7 +980,7 @@ static int m8xx_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) if (io->flags & MAP_ACTIVE) { - dprintk("io->flags & MAP_ACTIVE\n"); + pr_debug("m8xx_pcmcia: io->flags & MAP_ACTIVE\n"); winnr = (PCMCIA_MEM_WIN_NO * PCMCIA_SOCKETS_NO) + (lsock * PCMCIA_IO_WIN_NO) + io->map; @@ -1018,8 +1010,8 @@ static int m8xx_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) out_be32(&w->or, reg); - dprintk("Socket %u: Mapped io window %u at %#8.8x, " - "OR = %#8.8x.\n", lsock, io->map, w->br, w->or); + pr_debug("m8xx_pcmcia: Socket %u: Mapped io window %u at " + "%#8.8x, OR = %#8.8x.\n", lsock, io->map, w->br, w->or); } else { /* shutdown IO window */ winnr = (PCMCIA_MEM_WIN_NO * PCMCIA_SOCKETS_NO) @@ -1033,14 +1025,14 @@ static int m8xx_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) out_be32(&w->or, 0); /* turn off window */ out_be32(&w->br, 0); /* turn off base address */ - dprintk("Socket %u: Unmapped io window %u at %#8.8x, " - "OR = %#8.8x.\n", lsock, io->map, w->br, w->or); + pr_debug("m8xx_pcmcia: Socket %u: Unmapped io window %u at " + "%#8.8x, OR = %#8.8x.\n", lsock, io->map, w->br, w->or); } /* copy the struct and modify the copy */ s->io_win[io->map] = *io; s->io_win[io->map].flags &= (MAP_WRPROT | MAP_16BIT | MAP_ACTIVE); - dprintk("SetIOMap exit\n"); + pr_debug("m8xx_pcmcia: SetIOMap exit\n"); return 0; } @@ -1055,7 +1047,7 @@ static int m8xx_set_mem_map(struct pcmcia_socket *sock, unsigned int reg, winnr; pcmconf8xx_t *pcmcia = s->pcmcia; - dprintk("SetMemMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m8xx_pcmcia: SetMemMap(%d, %d, %#2.2x, %d ns, " "%#5.5llx, %#5.5x)\n", lsock, mem->map, mem->flags, mem->speed, (unsigned long long)mem->static_start, mem->card_start); @@ -1098,7 +1090,7 @@ static int m8xx_set_mem_map(struct pcmcia_socket *sock, out_be32(&w->or, reg); - dprintk("Socket %u: Mapped memory window %u at %#8.8x, " + pr_debug("m8xx_pcmcia: Socket %u: Mapped memory window %u at %#8.8x, " "OR = %#8.8x.\n", lsock, mem->map, w->br, w->or); if (mem->flags & MAP_ACTIVE) { @@ -1108,7 +1100,7 @@ static int m8xx_set_mem_map(struct pcmcia_socket *sock, + mem->card_start; } - dprintk("SetMemMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m8xx_pcmcia: SetMemMap(%d, %d, %#2.2x, %d ns, " "%#5.5llx, %#5.5x)\n", lsock, mem->map, mem->flags, mem->speed, (unsigned long long)mem->static_start, mem->card_start); @@ -1129,7 +1121,7 @@ static int m8xx_sock_init(struct pcmcia_socket *sock) pccard_io_map io = { 0, 0, 0, 0, 1 }; pccard_mem_map mem = { 0, 0, 0, 0, 0, 0 }; - dprintk("sock_init(%d)\n", s); + pr_debug("m8xx_pcmcia: sock_init(%d)\n", s); m8xx_set_socket(sock, &dead_socket); for (i = 0; i < PCMCIA_IO_WIN_NO; i++) { diff --git a/drivers/pcmcia/o2micro.h b/drivers/pcmcia/o2micro.h index 72188c462c9..624442fc0d3 100644 --- a/drivers/pcmcia/o2micro.h +++ b/drivers/pcmcia/o2micro.h @@ -30,28 +30,6 @@ #ifndef _LINUX_O2MICRO_H #define _LINUX_O2MICRO_H -#ifndef PCI_VENDOR_ID_O2 -#define PCI_VENDOR_ID_O2 0x1217 -#endif -#ifndef PCI_DEVICE_ID_O2_6729 -#define PCI_DEVICE_ID_O2_6729 0x6729 -#endif -#ifndef PCI_DEVICE_ID_O2_6730 -#define PCI_DEVICE_ID_O2_6730 0x673a -#endif -#ifndef PCI_DEVICE_ID_O2_6832 -#define PCI_DEVICE_ID_O2_6832 0x6832 -#endif -#ifndef PCI_DEVICE_ID_O2_6836 -#define PCI_DEVICE_ID_O2_6836 0x6836 -#endif -#ifndef PCI_DEVICE_ID_O2_6812 -#define PCI_DEVICE_ID_O2_6812 0x6872 -#endif -#ifndef PCI_DEVICE_ID_O2_6933 -#define PCI_DEVICE_ID_O2_6933 0x6933 -#endif - /* Additional PCI configuration registers */ #define O2_MUX_CONTROL 0x90 /* 32 bit */ diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index 30cf71d2ee2..c4d7908fa37 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -58,17 +58,6 @@ typedef struct user_info_t { } user_info_t; -#ifdef CONFIG_PCMCIA_DEBUG -extern int ds_pc_debug; - -#define ds_dbg(lvl, fmt, arg...) do { \ - if (ds_pc_debug >= lvl) \ - printk(KERN_DEBUG "ds: " fmt , ## arg); \ -} while (0) -#else -#define ds_dbg(lvl, fmt, arg...) do { } while (0) -#endif - static struct pcmcia_device *get_pcmcia_device(struct pcmcia_socket *s, unsigned int function) { @@ -229,6 +218,61 @@ static int pcmcia_adjust_resource_info(adjust_t *adj) return (ret); } + +/** pcmcia_get_window + */ +static int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out, + window_handle_t wh, win_req_t *req) +{ + pccard_mem_map *win; + window_handle_t w; + + wh--; + if (!s || !(s->state & SOCKET_PRESENT)) + return -ENODEV; + if (wh >= MAX_WIN) + return -EINVAL; + for (w = wh; w < MAX_WIN; w++) + if (s->state & SOCKET_WIN_REQ(w)) + break; + if (w == MAX_WIN) + return -EINVAL; + win = &s->win[w]; + req->Base = win->res->start; + req->Size = win->res->end - win->res->start + 1; + req->AccessSpeed = win->speed; + req->Attributes = 0; + if (win->flags & MAP_ATTRIB) + req->Attributes |= WIN_MEMORY_TYPE_AM; + if (win->flags & MAP_ACTIVE) + req->Attributes |= WIN_ENABLE; + if (win->flags & MAP_16BIT) + req->Attributes |= WIN_DATA_WIDTH_16; + if (win->flags & MAP_USE_WAIT) + req->Attributes |= WIN_USE_WAIT; + + *wh_out = w + 1; + return 0; +} /* pcmcia_get_window */ + + +/** pcmcia_get_mem_page + * + * Change the card address of an already open memory window. + */ +static int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh, + memreq_t *req) +{ + wh--; + if (wh >= MAX_WIN) + return -EINVAL; + + req->Page = 0; + req->CardOffset = skt->win[wh].card_start; + return 0; +} /* pcmcia_get_mem_page */ + + /** pccard_get_status * * Get the current socket state bits. We don't support the latched @@ -431,7 +475,7 @@ static int bind_request(struct pcmcia_socket *s, bind_info_t *bind_info) if (!s) return -EINVAL; - ds_dbg(2, "bind_request(%d, '%s')\n", s->sock, + pr_debug("bind_request(%d, '%s')\n", s->sock, (char *)bind_info->dev_info); p_drv = get_pcmcia_driver(&bind_info->dev_info); @@ -623,7 +667,7 @@ static int ds_open(struct inode *inode, struct file *file) static int warning_printed = 0; int ret = 0; - ds_dbg(0, "ds_open(socket %d)\n", i); + pr_debug("ds_open(socket %d)\n", i); lock_kernel(); s = pcmcia_get_socket_by_nr(i); @@ -685,7 +729,7 @@ static int ds_release(struct inode *inode, struct file *file) struct pcmcia_socket *s; user_info_t *user, **link; - ds_dbg(0, "ds_release(socket %d)\n", iminor(inode)); + pr_debug("ds_release(socket %d)\n", iminor(inode)); user = file->private_data; if (CHECK_USER(user)) @@ -719,7 +763,7 @@ static ssize_t ds_read(struct file *file, char __user *buf, user_info_t *user; int ret; - ds_dbg(2, "ds_read(socket %d)\n", iminor(file->f_path.dentry->d_inode)); + pr_debug("ds_read(socket %d)\n", iminor(file->f_path.dentry->d_inode)); if (count < 4) return -EINVAL; @@ -744,7 +788,7 @@ static ssize_t ds_read(struct file *file, char __user *buf, static ssize_t ds_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - ds_dbg(2, "ds_write(socket %d)\n", iminor(file->f_path.dentry->d_inode)); + pr_debug("ds_write(socket %d)\n", iminor(file->f_path.dentry->d_inode)); if (count != 4) return -EINVAL; @@ -762,7 +806,7 @@ static u_int ds_poll(struct file *file, poll_table *wait) struct pcmcia_socket *s; user_info_t *user; - ds_dbg(2, "ds_poll(socket %d)\n", iminor(file->f_path.dentry->d_inode)); + pr_debug("ds_poll(socket %d)\n", iminor(file->f_path.dentry->d_inode)); user = file->private_data; if (CHECK_USER(user)) @@ -790,7 +834,7 @@ static int ds_ioctl(struct inode * inode, struct file * file, ds_ioctl_arg_t *buf; user_info_t *user; - ds_dbg(2, "ds_ioctl(socket %d, %#x, %#lx)\n", iminor(inode), cmd, arg); + pr_debug("ds_ioctl(socket %d, %#x, %#lx)\n", iminor(inode), cmd, arg); user = file->private_data; if (CHECK_USER(user)) @@ -809,13 +853,13 @@ static int ds_ioctl(struct inode * inode, struct file * file, if (cmd & IOC_IN) { if (!access_ok(VERIFY_READ, uarg, size)) { - ds_dbg(3, "ds_ioctl(): verify_read = %d\n", -EFAULT); + pr_debug("ds_ioctl(): verify_read = %d\n", -EFAULT); return -EFAULT; } } if (cmd & IOC_OUT) { if (!access_ok(VERIFY_WRITE, uarg, size)) { - ds_dbg(3, "ds_ioctl(): verify_write = %d\n", -EFAULT); + pr_debug("ds_ioctl(): verify_write = %d\n", -EFAULT); return -EFAULT; } } @@ -927,15 +971,15 @@ static int ds_ioctl(struct inode * inode, struct file * file, goto free_out; break; case DS_GET_FIRST_WINDOW: - ret = pcmcia_get_window(s, &buf->win_info.handle, 0, + ret = pcmcia_get_window(s, &buf->win_info.handle, 1, &buf->win_info.window); break; case DS_GET_NEXT_WINDOW: ret = pcmcia_get_window(s, &buf->win_info.handle, - buf->win_info.handle->index + 1, &buf->win_info.window); + buf->win_info.handle + 1, &buf->win_info.window); break; case DS_GET_MEM_PAGE: - ret = pcmcia_get_mem_page(buf->win_info.handle, + ret = pcmcia_get_mem_page(s, buf->win_info.handle, &buf->win_info.map); break; case DS_REPLACE_CIS: @@ -962,7 +1006,7 @@ static int ds_ioctl(struct inode * inode, struct file * file, } if ((err == 0) && (ret != 0)) { - ds_dbg(2, "ds_ioctl: ret = %d\n", ret); + pr_debug("ds_ioctl: ret = %d\n", ret); switch (ret) { case -ENODEV: case -EINVAL: diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index d919e96c0af..a8bf8c1b45e 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -20,6 +20,7 @@ #include <linux/delay.h> #include <linux/pci.h> #include <linux/device.h> +#include <linux/netdevice.h> #include <pcmcia/cs_types.h> #include <pcmcia/ss.h> @@ -43,21 +44,6 @@ static u8 pcmcia_used_irq[NR_IRQS]; #endif -#ifdef CONFIG_PCMCIA_DEBUG -extern int ds_pc_debug; - -#define ds_dbg(skt, lvl, fmt, arg...) do { \ - if (ds_pc_debug >= lvl) \ - dev_printk(KERN_DEBUG, &skt->dev, \ - "pcmcia_resource: " fmt, \ - ## arg); \ -} while (0) -#else -#define ds_dbg(skt, lvl, fmt, arg...) do { } while (0) -#endif - - - /** alloc_io_space * * Special stuff for managing IO windows, because they are scarce @@ -72,14 +58,14 @@ static int alloc_io_space(struct pcmcia_socket *s, u_int attr, align = (*base) ? (lines ? 1<<lines : 0) : 1; if (align && (align < num)) { if (*base) { - ds_dbg(s, 0, "odd IO request: num %#x align %#x\n", + dev_dbg(&s->dev, "odd IO request: num %#x align %#x\n", num, align); align = 0; } else while (align && (align < num)) align <<= 1; } if (*base & ~(align-1)) { - ds_dbg(s, 0, "odd IO request: base %#x align %#x\n", + dev_dbg(&s->dev, "odd IO request: base %#x align %#x\n", *base, align); align = 0; } @@ -173,8 +159,10 @@ int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, s = p_dev->socket; c = p_dev->function_config; - if (!(c->state & CONFIG_LOCKED)) + if (!(c->state & CONFIG_LOCKED)) { + dev_dbg(&s->dev, "Configuration isnt't locked\n"); return -EACCES; + } addr = (c->ConfigBase + reg->Offset) >> 1; @@ -188,6 +176,7 @@ int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, pcmcia_write_cis_mem(s, 1, addr, 1, &val); break; default: + dev_dbg(&s->dev, "Invalid conf register request\n"); return -EINVAL; break; } @@ -196,68 +185,21 @@ int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, EXPORT_SYMBOL(pcmcia_access_configuration_register); -/** pcmcia_get_window - */ -int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *handle, - int idx, win_req_t *req) -{ - window_t *win; - int w; - - if (!s || !(s->state & SOCKET_PRESENT)) - return -ENODEV; - for (w = idx; w < MAX_WIN; w++) - if (s->state & SOCKET_WIN_REQ(w)) - break; - if (w == MAX_WIN) - return -EINVAL; - win = &s->win[w]; - req->Base = win->ctl.res->start; - req->Size = win->ctl.res->end - win->ctl.res->start + 1; - req->AccessSpeed = win->ctl.speed; - req->Attributes = 0; - if (win->ctl.flags & MAP_ATTRIB) - req->Attributes |= WIN_MEMORY_TYPE_AM; - if (win->ctl.flags & MAP_ACTIVE) - req->Attributes |= WIN_ENABLE; - if (win->ctl.flags & MAP_16BIT) - req->Attributes |= WIN_DATA_WIDTH_16; - if (win->ctl.flags & MAP_USE_WAIT) - req->Attributes |= WIN_USE_WAIT; - *handle = win; - return 0; -} /* pcmcia_get_window */ -EXPORT_SYMBOL(pcmcia_get_window); - - -/** pcmcia_get_mem_page - * - * Change the card address of an already open memory window. - */ -int pcmcia_get_mem_page(window_handle_t win, memreq_t *req) +int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh, + memreq_t *req) { - if ((win == NULL) || (win->magic != WINDOW_MAGIC)) - return -EINVAL; - req->Page = 0; - req->CardOffset = win->ctl.card_start; - return 0; -} /* pcmcia_get_mem_page */ -EXPORT_SYMBOL(pcmcia_get_mem_page); - + struct pcmcia_socket *s = p_dev->socket; -int pcmcia_map_mem_page(window_handle_t win, memreq_t *req) -{ - struct pcmcia_socket *s; - if ((win == NULL) || (win->magic != WINDOW_MAGIC)) + wh--; + if (wh >= MAX_WIN) return -EINVAL; - s = win->sock; if (req->Page != 0) { - ds_dbg(s, 0, "failure: requested page is zero\n"); + dev_dbg(&s->dev, "failure: requested page is zero\n"); return -EINVAL; } - win->ctl.card_start = req->CardOffset; - if (s->ops->set_mem_map(s, &win->ctl) != 0) { - ds_dbg(s, 0, "failed to set_mem_map\n"); + s->win[wh].card_start = req->CardOffset; + if (s->ops->set_mem_map(s, &s->win[wh]) != 0) { + dev_dbg(&s->dev, "failed to set_mem_map\n"); return -EIO; } return 0; @@ -278,10 +220,14 @@ int pcmcia_modify_configuration(struct pcmcia_device *p_dev, s = p_dev->socket; c = p_dev->function_config; - if (!(s->state & SOCKET_PRESENT)) + if (!(s->state & SOCKET_PRESENT)) { + dev_dbg(&s->dev, "No card present\n"); return -ENODEV; - if (!(c->state & CONFIG_LOCKED)) + } + if (!(c->state & CONFIG_LOCKED)) { + dev_dbg(&s->dev, "Configuration isnt't locked\n"); return -EACCES; + } if (mod->Attributes & CONF_IRQ_CHANGE_VALID) { if (mod->Attributes & CONF_ENABLE_IRQ) { @@ -295,7 +241,7 @@ int pcmcia_modify_configuration(struct pcmcia_device *p_dev, } if (mod->Attributes & CONF_VCC_CHANGE_VALID) { - ds_dbg(s, 0, "changing Vcc is not allowed at this time\n"); + dev_dbg(&s->dev, "changing Vcc is not allowed at this time\n"); return -EINVAL; } @@ -303,7 +249,7 @@ int pcmcia_modify_configuration(struct pcmcia_device *p_dev, if ((mod->Attributes & CONF_VPP1_CHANGE_VALID) && (mod->Attributes & CONF_VPP2_CHANGE_VALID)) { if (mod->Vpp1 != mod->Vpp2) { - ds_dbg(s, 0, "Vpp1 and Vpp2 must be the same\n"); + dev_dbg(&s->dev, "Vpp1 and Vpp2 must be the same\n"); return -EINVAL; } s->socket.Vpp = mod->Vpp1; @@ -314,7 +260,7 @@ int pcmcia_modify_configuration(struct pcmcia_device *p_dev, } } else if ((mod->Attributes & CONF_VPP1_CHANGE_VALID) || (mod->Attributes & CONF_VPP2_CHANGE_VALID)) { - ds_dbg(s, 0, "changing Vcc is not allowed at this time\n"); + dev_dbg(&s->dev, "changing Vcc is not allowed at this time\n"); return -EINVAL; } @@ -425,11 +371,11 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) if (c->state & CONFIG_LOCKED) return -EACCES; if (c->irq.Attributes != req->Attributes) { - ds_dbg(s, 0, "IRQ attributes must match assigned ones\n"); + dev_dbg(&s->dev, "IRQ attributes must match assigned ones\n"); return -EINVAL; } if (s->irq.AssignedIRQ != req->AssignedIRQ) { - ds_dbg(s, 0, "IRQ must match assigned one\n"); + dev_dbg(&s->dev, "IRQ must match assigned one\n"); return -EINVAL; } if (--s->irq.Config == 0) { @@ -437,8 +383,8 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) s->irq.AssignedIRQ = 0; } - if (req->Attributes & IRQ_HANDLE_PRESENT) { - free_irq(req->AssignedIRQ, req->Instance); + if (req->Handler) { + free_irq(req->AssignedIRQ, p_dev->priv); } #ifdef CONFIG_PCMCIA_PROBE @@ -449,30 +395,34 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) } /* pcmcia_release_irq */ -int pcmcia_release_window(window_handle_t win) +int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) { - struct pcmcia_socket *s; + struct pcmcia_socket *s = p_dev->socket; + pccard_mem_map *win; - if ((win == NULL) || (win->magic != WINDOW_MAGIC)) + wh--; + if (wh >= MAX_WIN) return -EINVAL; - s = win->sock; - if (!(win->handle->_win & CLIENT_WIN_REQ(win->index))) + + win = &s->win[wh]; + + if (!(p_dev->_win & CLIENT_WIN_REQ(wh))) { + dev_dbg(&s->dev, "not releasing unknown window\n"); return -EINVAL; + } /* Shut down memory window */ - win->ctl.flags &= ~MAP_ACTIVE; - s->ops->set_mem_map(s, &win->ctl); - s->state &= ~SOCKET_WIN_REQ(win->index); + win->flags &= ~MAP_ACTIVE; + s->ops->set_mem_map(s, win); + s->state &= ~SOCKET_WIN_REQ(wh); /* Release system memory */ - if (win->ctl.res) { - release_resource(win->ctl.res); - kfree(win->ctl.res); - win->ctl.res = NULL; + if (win->res) { + release_resource(win->res); + kfree(win->res); + win->res = NULL; } - win->handle->_win &= ~CLIENT_WIN_REQ(win->index); - - win->magic = 0; + p_dev->_win &= ~CLIENT_WIN_REQ(wh); return 0; } /* pcmcia_release_window */ @@ -492,12 +442,14 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, return -ENODEV; if (req->IntType & INT_CARDBUS) { - ds_dbg(p_dev->socket, 0, "IntType may not be INT_CARDBUS\n"); + dev_dbg(&s->dev, "IntType may not be INT_CARDBUS\n"); return -EINVAL; } c = p_dev->function_config; - if (c->state & CONFIG_LOCKED) + if (c->state & CONFIG_LOCKED) { + dev_dbg(&s->dev, "Configuration is locked\n"); return -EACCES; + } /* Do power control. We don't allow changes in Vcc. */ s->socket.Vpp = req->Vpp; @@ -609,40 +561,44 @@ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req) struct pcmcia_socket *s = p_dev->socket; config_t *c; - if (!(s->state & SOCKET_PRESENT)) + if (!(s->state & SOCKET_PRESENT)) { + dev_dbg(&s->dev, "No card present\n"); return -ENODEV; + } if (!req) return -EINVAL; c = p_dev->function_config; - if (c->state & CONFIG_LOCKED) + if (c->state & CONFIG_LOCKED) { + dev_dbg(&s->dev, "Configuration is locked\n"); return -EACCES; + } if (c->state & CONFIG_IO_REQ) { - ds_dbg(s, 0, "IO already configured\n"); + dev_dbg(&s->dev, "IO already configured\n"); return -EBUSY; } if (req->Attributes1 & (IO_SHARED | IO_FORCE_ALIAS_ACCESS)) { - ds_dbg(s, 0, "bad attribute setting for IO region 1\n"); + dev_dbg(&s->dev, "bad attribute setting for IO region 1\n"); return -EINVAL; } if ((req->NumPorts2 > 0) && (req->Attributes2 & (IO_SHARED | IO_FORCE_ALIAS_ACCESS))) { - ds_dbg(s, 0, "bad attribute setting for IO region 2\n"); + dev_dbg(&s->dev, "bad attribute setting for IO region 2\n"); return -EINVAL; } - ds_dbg(s, 1, "trying to allocate resource 1\n"); + dev_dbg(&s->dev, "trying to allocate resource 1\n"); if (alloc_io_space(s, req->Attributes1, &req->BasePort1, req->NumPorts1, req->IOAddrLines)) { - ds_dbg(s, 0, "allocation of resource 1 failed\n"); + dev_dbg(&s->dev, "allocation of resource 1 failed\n"); return -EBUSY; } if (req->NumPorts2) { - ds_dbg(s, 1, "trying to allocate resource 2\n"); + dev_dbg(&s->dev, "trying to allocate resource 2\n"); if (alloc_io_space(s, req->Attributes2, &req->BasePort2, req->NumPorts2, req->IOAddrLines)) { - ds_dbg(s, 0, "allocation of resource 2 failed\n"); + dev_dbg(&s->dev, "allocation of resource 2 failed\n"); release_io_space(s, req->BasePort1, req->NumPorts1); return -EBUSY; } @@ -680,13 +636,17 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) int ret = -EINVAL, irq = 0; int type; - if (!(s->state & SOCKET_PRESENT)) + if (!(s->state & SOCKET_PRESENT)) { + dev_dbg(&s->dev, "No card present\n"); return -ENODEV; + } c = p_dev->function_config; - if (c->state & CONFIG_LOCKED) + if (c->state & CONFIG_LOCKED) { + dev_dbg(&s->dev, "Configuration is locked\n"); return -EACCES; + } if (c->state & CONFIG_IRQ_REQ) { - ds_dbg(s, 0, "IRQ already configured\n"); + dev_dbg(&s->dev, "IRQ already configured\n"); return -EBUSY; } @@ -704,7 +664,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) /* if the underlying IRQ infrastructure allows for it, only allocate * the IRQ, but do not enable it */ - if (!(req->Attributes & IRQ_HANDLE_PRESENT)) + if (!(req->Handler)) type |= IRQ_NOAUTOEN; #endif /* IRQ_NOAUTOEN */ @@ -714,7 +674,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) } else { int try; u32 mask = s->irq_mask; - void *data = &p_dev->dev.driver; /* something unique to this device */ + void *data = p_dev; /* something unique to this device */ for (try = 0; try < 64; try++) { irq = try % 32; @@ -731,12 +691,12 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) * registering a dummy handle works, i.e. if the IRQ isn't * marked as used by the kernel resource management core */ ret = request_irq(irq, - (req->Attributes & IRQ_HANDLE_PRESENT) ? req->Handler : test_action, + (req->Handler) ? req->Handler : test_action, type, p_dev->devname, - (req->Attributes & IRQ_HANDLE_PRESENT) ? req->Instance : data); + (req->Handler) ? p_dev->priv : data); if (!ret) { - if (!(req->Attributes & IRQ_HANDLE_PRESENT)) + if (!req->Handler) free_irq(irq, data); break; } @@ -745,17 +705,22 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) #endif /* only assign PCI irq if no IRQ already assigned */ if (ret && !s->irq.AssignedIRQ) { - if (!s->pci_irq) + if (!s->pci_irq) { + dev_printk(KERN_INFO, &s->dev, "no IRQ found\n"); return ret; + } type = IRQF_SHARED; irq = s->pci_irq; } - if (ret && (req->Attributes & IRQ_HANDLE_PRESENT)) { + if (ret && req->Handler) { ret = request_irq(irq, req->Handler, type, - p_dev->devname, req->Instance); - if (ret) + p_dev->devname, p_dev->priv); + if (ret) { + dev_printk(KERN_INFO, &s->dev, + "request_irq() failed\n"); return ret; + } } /* Make sure the fact the request type was overridden is passed back */ @@ -787,17 +752,19 @@ EXPORT_SYMBOL(pcmcia_request_irq); * Request_window() establishes a mapping between card memory space * and system memory space. */ -int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh) +int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_handle_t *wh) { - struct pcmcia_socket *s = (*p_dev)->socket; - window_t *win; + struct pcmcia_socket *s = p_dev->socket; + pccard_mem_map *win; u_long align; int w; - if (!(s->state & SOCKET_PRESENT)) + if (!(s->state & SOCKET_PRESENT)) { + dev_dbg(&s->dev, "No card present\n"); return -ENODEV; + } if (req->Attributes & (WIN_PAGED | WIN_SHARED)) { - ds_dbg(s, 0, "bad attribute setting for iomem region\n"); + dev_dbg(&s->dev, "bad attribute setting for iomem region\n"); return -EINVAL; } @@ -808,12 +775,12 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h (req->Attributes & WIN_STRICT_ALIGN)) ? req->Size : s->map_size); if (req->Size & (s->map_size-1)) { - ds_dbg(s, 0, "invalid map size\n"); + dev_dbg(&s->dev, "invalid map size\n"); return -EINVAL; } if ((req->Base && (s->features & SS_CAP_STATIC_MAP)) || (req->Base & (align-1))) { - ds_dbg(s, 0, "invalid base address\n"); + dev_dbg(&s->dev, "invalid base address\n"); return -EINVAL; } if (req->Base) @@ -823,52 +790,48 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h for (w = 0; w < MAX_WIN; w++) if (!(s->state & SOCKET_WIN_REQ(w))) break; if (w == MAX_WIN) { - ds_dbg(s, 0, "all windows are used already\n"); + dev_dbg(&s->dev, "all windows are used already\n"); return -EINVAL; } win = &s->win[w]; - win->magic = WINDOW_MAGIC; - win->index = w; - win->handle = *p_dev; - win->sock = s; if (!(s->features & SS_CAP_STATIC_MAP)) { - win->ctl.res = pcmcia_find_mem_region(req->Base, req->Size, align, + win->res = pcmcia_find_mem_region(req->Base, req->Size, align, (req->Attributes & WIN_MAP_BELOW_1MB), s); - if (!win->ctl.res) { - ds_dbg(s, 0, "allocating mem region failed\n"); + if (!win->res) { + dev_dbg(&s->dev, "allocating mem region failed\n"); return -EINVAL; } } - (*p_dev)->_win |= CLIENT_WIN_REQ(w); + p_dev->_win |= CLIENT_WIN_REQ(w); /* Configure the socket controller */ - win->ctl.map = w+1; - win->ctl.flags = 0; - win->ctl.speed = req->AccessSpeed; + win->map = w+1; + win->flags = 0; + win->speed = req->AccessSpeed; if (req->Attributes & WIN_MEMORY_TYPE) - win->ctl.flags |= MAP_ATTRIB; + win->flags |= MAP_ATTRIB; if (req->Attributes & WIN_ENABLE) - win->ctl.flags |= MAP_ACTIVE; + win->flags |= MAP_ACTIVE; if (req->Attributes & WIN_DATA_WIDTH_16) - win->ctl.flags |= MAP_16BIT; + win->flags |= MAP_16BIT; if (req->Attributes & WIN_USE_WAIT) - win->ctl.flags |= MAP_USE_WAIT; - win->ctl.card_start = 0; - if (s->ops->set_mem_map(s, &win->ctl) != 0) { - ds_dbg(s, 0, "failed to set memory mapping\n"); + win->flags |= MAP_USE_WAIT; + win->card_start = 0; + if (s->ops->set_mem_map(s, win) != 0) { + dev_dbg(&s->dev, "failed to set memory mapping\n"); return -EIO; } s->state |= SOCKET_WIN_REQ(w); /* Return window handle */ if (s->features & SS_CAP_STATIC_MAP) { - req->Base = win->ctl.static_start; + req->Base = win->static_start; } else { - req->Base = win->ctl.res->start; + req->Base = win->res->start; } - *wh = win; + *wh = w + 1; return 0; } /* pcmcia_request_window */ @@ -879,19 +842,46 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev) { pcmcia_release_io(p_dev, &p_dev->io); pcmcia_release_irq(p_dev, &p_dev->irq); if (p_dev->win) - pcmcia_release_window(p_dev->win); + pcmcia_release_window(p_dev, p_dev->win); } EXPORT_SYMBOL(pcmcia_disable_device); struct pcmcia_cfg_mem { - tuple_t tuple; + struct pcmcia_device *p_dev; + void *priv_data; + int (*conf_check) (struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data); cisparse_t parse; - u8 buf[256]; cistpl_cftable_entry_t dflt; }; /** + * pcmcia_do_loop_config() - internal helper for pcmcia_loop_config() + * + * pcmcia_do_loop_config() is the internal callback for the call from + * pcmcia_loop_config() to pccard_loop_tuple(). Data is transferred + * by a struct pcmcia_cfg_mem. + */ +static int pcmcia_do_loop_config(tuple_t *tuple, cisparse_t *parse, void *priv) +{ + cistpl_cftable_entry_t *cfg = &parse->cftable_entry; + struct pcmcia_cfg_mem *cfg_mem = priv; + + /* default values */ + cfg_mem->p_dev->conf.ConfigIndex = cfg->index; + if (cfg->flags & CISTPL_CFTABLE_DEFAULT) + cfg_mem->dflt = *cfg; + + return cfg_mem->conf_check(cfg_mem->p_dev, cfg, &cfg_mem->dflt, + cfg_mem->p_dev->socket->socket.Vcc, + cfg_mem->priv_data); +} + +/** * pcmcia_loop_config() - loop over configuration options * @p_dev: the struct pcmcia_device which we need to loop for. * @conf_check: function to call for each configuration option. @@ -913,48 +903,174 @@ int pcmcia_loop_config(struct pcmcia_device *p_dev, void *priv_data) { struct pcmcia_cfg_mem *cfg_mem; - - tuple_t *tuple; int ret; - unsigned int vcc; cfg_mem = kzalloc(sizeof(struct pcmcia_cfg_mem), GFP_KERNEL); if (cfg_mem == NULL) return -ENOMEM; - /* get the current Vcc setting */ - vcc = p_dev->socket->socket.Vcc; + cfg_mem->p_dev = p_dev; + cfg_mem->conf_check = conf_check; + cfg_mem->priv_data = priv_data; - tuple = &cfg_mem->tuple; - tuple->TupleData = cfg_mem->buf; - tuple->TupleDataMax = 255; - tuple->TupleOffset = 0; - tuple->DesiredTuple = CISTPL_CFTABLE_ENTRY; - tuple->Attributes = 0; + ret = pccard_loop_tuple(p_dev->socket, p_dev->func, + CISTPL_CFTABLE_ENTRY, &cfg_mem->parse, + cfg_mem, pcmcia_do_loop_config); - ret = pcmcia_get_first_tuple(p_dev, tuple); - while (!ret) { - cistpl_cftable_entry_t *cfg = &cfg_mem->parse.cftable_entry; + kfree(cfg_mem); + return ret; +} +EXPORT_SYMBOL(pcmcia_loop_config); + + +struct pcmcia_loop_mem { + struct pcmcia_device *p_dev; + void *priv_data; + int (*loop_tuple) (struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data); +}; + +/** + * pcmcia_do_loop_tuple() - internal helper for pcmcia_loop_config() + * + * pcmcia_do_loop_tuple() is the internal callback for the call from + * pcmcia_loop_tuple() to pccard_loop_tuple(). Data is transferred + * by a struct pcmcia_cfg_mem. + */ +static int pcmcia_do_loop_tuple(tuple_t *tuple, cisparse_t *parse, void *priv) +{ + struct pcmcia_loop_mem *loop = priv; + + return loop->loop_tuple(loop->p_dev, tuple, loop->priv_data); +}; + +/** + * pcmcia_loop_tuple() - loop over tuples in the CIS + * @p_dev: the struct pcmcia_device which we need to loop for. + * @code: which CIS code shall we look for? + * @priv_data: private data to be passed to the loop_tuple function. + * @loop_tuple: function to call for each CIS entry of type @function. IT + * gets passed the raw tuple and @priv_data. + * + * pcmcia_loop_tuple() loops over all CIS entries of type @function, and + * calls the @loop_tuple function for each entry. If the call to @loop_tuple + * returns 0, the loop exits. Returns 0 on success or errorcode otherwise. + */ +int pcmcia_loop_tuple(struct pcmcia_device *p_dev, cisdata_t code, + int (*loop_tuple) (struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data), + void *priv_data) +{ + struct pcmcia_loop_mem loop = { + .p_dev = p_dev, + .loop_tuple = loop_tuple, + .priv_data = priv_data}; - if (pcmcia_get_tuple_data(p_dev, tuple)) - goto next_entry; + return pccard_loop_tuple(p_dev->socket, p_dev->func, code, NULL, + &loop, pcmcia_do_loop_tuple); +}; +EXPORT_SYMBOL(pcmcia_loop_tuple); - if (pcmcia_parse_tuple(tuple, &cfg_mem->parse)) - goto next_entry; - /* default values */ - p_dev->conf.ConfigIndex = cfg->index; - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - cfg_mem->dflt = *cfg; +struct pcmcia_loop_get { + size_t len; + cisdata_t **buf; +}; - ret = conf_check(p_dev, cfg, &cfg_mem->dflt, vcc, priv_data); - if (!ret) - break; +/** + * pcmcia_do_get_tuple() - internal helper for pcmcia_get_tuple() + * + * pcmcia_do_get_tuple() is the internal callback for the call from + * pcmcia_get_tuple() to pcmcia_loop_tuple(). As we're only interested in + * the first tuple, return 0 unconditionally. Create a memory buffer large + * enough to hold the content of the tuple, and fill it with the tuple data. + * The caller is responsible to free the buffer. + */ +static int pcmcia_do_get_tuple(struct pcmcia_device *p_dev, tuple_t *tuple, + void *priv) +{ + struct pcmcia_loop_get *get = priv; + + *get->buf = kzalloc(tuple->TupleDataLen, GFP_KERNEL); + if (*get->buf) { + get->len = tuple->TupleDataLen; + memcpy(*get->buf, tuple->TupleData, tuple->TupleDataLen); + } else + dev_dbg(&p_dev->dev, "do_get_tuple: out of memory\n"); + return 0; +}; -next_entry: - ret = pcmcia_get_next_tuple(p_dev, tuple); +/** + * pcmcia_get_tuple() - get first tuple from CIS + * @p_dev: the struct pcmcia_device which we need to loop for. + * @code: which CIS code shall we look for? + * @buf: pointer to store the buffer to. + * + * pcmcia_get_tuple() gets the content of the first CIS entry of type @code. + * It returns the buffer length (or zero). The caller is responsible to free + * the buffer passed in @buf. + */ +size_t pcmcia_get_tuple(struct pcmcia_device *p_dev, cisdata_t code, + unsigned char **buf) +{ + struct pcmcia_loop_get get = { + .len = 0, + .buf = buf, + }; + + *get.buf = NULL; + pcmcia_loop_tuple(p_dev, code, pcmcia_do_get_tuple, &get); + + return get.len; +}; +EXPORT_SYMBOL(pcmcia_get_tuple); + + +/** + * pcmcia_do_get_mac() - internal helper for pcmcia_get_mac_from_cis() + * + * pcmcia_do_get_mac() is the internal callback for the call from + * pcmcia_get_mac_from_cis() to pcmcia_loop_tuple(). We check whether the + * tuple contains a proper LAN_NODE_ID of length 6, and copy the data + * to struct net_device->dev_addr[i]. + */ +static int pcmcia_do_get_mac(struct pcmcia_device *p_dev, tuple_t *tuple, + void *priv) +{ + struct net_device *dev = priv; + int i; + + if (tuple->TupleData[0] != CISTPL_FUNCE_LAN_NODE_ID) + return -EINVAL; + if (tuple->TupleDataLen < ETH_ALEN + 2) { + dev_warn(&p_dev->dev, "Invalid CIS tuple length for " + "LAN_NODE_ID\n"); + return -EINVAL; } - return ret; -} -EXPORT_SYMBOL(pcmcia_loop_config); + if (tuple->TupleData[1] != ETH_ALEN) { + dev_warn(&p_dev->dev, "Invalid header for LAN_NODE_ID\n"); + return -EINVAL; + } + for (i = 0; i < 6; i++) + dev->dev_addr[i] = tuple->TupleData[i+2]; + return 0; +}; + +/** + * pcmcia_get_mac_from_cis() - read out MAC address from CISTPL_FUNCE + * @p_dev: the struct pcmcia_device for which we want the address. + * @dev: a properly prepared struct net_device to store the info to. + * + * pcmcia_get_mac_from_cis() reads out the hardware MAC address from + * CISTPL_FUNCE and stores it into struct net_device *dev->dev_addr which + * must be set up properly by the driver (see examples!). + */ +int pcmcia_get_mac_from_cis(struct pcmcia_device *p_dev, struct net_device *dev) +{ + return pcmcia_loop_tuple(p_dev, CISTPL_FUNCE, pcmcia_do_get_mac, dev); +}; +EXPORT_SYMBOL(pcmcia_get_mac_from_cis); + diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c index 70a33468bcd..e1741cd875a 100644 --- a/drivers/pcmcia/pd6729.c +++ b/drivers/pcmcia/pd6729.c @@ -213,7 +213,8 @@ static irqreturn_t pd6729_interrupt(int irq, void *dev) if (csc & I365_CSC_DETECT) { events |= SS_DETECT; - dprintk("Card detected in socket %i!\n", i); + dev_vdbg(&socket[i].socket.dev, + "Card detected in socket %i!\n", i); } if (indirect_read(&socket[i], I365_INTCTL) @@ -331,11 +332,11 @@ static int pd6729_set_socket(struct pcmcia_socket *sock, socket_state_t *state) reg = I365_PWR_NORESET; /* default: disable resetdrv on resume */ if (state->flags & SS_PWR_AUTO) { - dprintk("Auto power\n"); + dev_dbg(&sock->dev, "Auto power\n"); reg |= I365_PWR_AUTO; /* automatic power mngmnt */ } if (state->flags & SS_OUTPUT_ENA) { - dprintk("Power Enabled\n"); + dev_dbg(&sock->dev, "Power Enabled\n"); reg |= I365_PWR_OUT; /* enable power */ } @@ -343,40 +344,44 @@ static int pd6729_set_socket(struct pcmcia_socket *sock, socket_state_t *state) case 0: break; case 33: - dprintk("setting voltage to Vcc to 3.3V on socket %i\n", + dev_dbg(&sock->dev, + "setting voltage to Vcc to 3.3V on socket %i\n", socket->number); reg |= I365_VCC_5V; indirect_setbit(socket, PD67_MISC_CTL_1, PD67_MC1_VCC_3V); break; case 50: - dprintk("setting voltage to Vcc to 5V on socket %i\n", + dev_dbg(&sock->dev, + "setting voltage to Vcc to 5V on socket %i\n", socket->number); reg |= I365_VCC_5V; indirect_resetbit(socket, PD67_MISC_CTL_1, PD67_MC1_VCC_3V); break; default: - dprintk("pd6729: pd6729_set_socket called with " - "invalid VCC power value: %i\n", - state->Vcc); + dev_dbg(&sock->dev, + "pd6729_set_socket called with invalid VCC power " + "value: %i\n", state->Vcc); return -EINVAL; } switch (state->Vpp) { case 0: - dprintk("not setting Vpp on socket %i\n", socket->number); + dev_dbg(&sock->dev, "not setting Vpp on socket %i\n", + socket->number); break; case 33: case 50: - dprintk("setting Vpp to Vcc for socket %i\n", socket->number); + dev_dbg(&sock->dev, "setting Vpp to Vcc for socket %i\n", + socket->number); reg |= I365_VPP1_5V; break; case 120: - dprintk("setting Vpp to 12.0\n"); + dev_dbg(&sock->dev, "setting Vpp to 12.0\n"); reg |= I365_VPP1_12V; break; default: - dprintk("pd6729: pd6729_set_socket called with invalid VPP power value: %i\n", - state->Vpp); + dev_dbg(&sock->dev, "pd6729: pd6729_set_socket called with " + "invalid VPP power value: %i\n", state->Vpp); return -EINVAL; } @@ -438,7 +443,7 @@ static int pd6729_set_io_map(struct pcmcia_socket *sock, /* Check error conditions */ if (map > 1) { - dprintk("pd6729_set_io_map with invalid map"); + dev_dbg(&sock->dev, "pd6729_set_io_map with invalid map\n"); return -EINVAL; } @@ -446,7 +451,7 @@ static int pd6729_set_io_map(struct pcmcia_socket *sock, if (indirect_read(socket, I365_ADDRWIN) & I365_ENA_IO(map)) indirect_resetbit(socket, I365_ADDRWIN, I365_ENA_IO(map)); - /* dprintk("set_io_map: Setting range to %x - %x\n", + /* dev_dbg(&sock->dev, "set_io_map: Setting range to %x - %x\n", io->start, io->stop);*/ /* write the new values */ @@ -478,12 +483,12 @@ static int pd6729_set_mem_map(struct pcmcia_socket *sock, map = mem->map; if (map > 4) { - printk("pd6729_set_mem_map: invalid map"); + dev_warn(&sock->dev, "invalid map requested\n"); return -EINVAL; } if ((mem->res->start > mem->res->end) || (mem->speed > 1000)) { - printk("pd6729_set_mem_map: invalid address / speed"); + dev_warn(&sock->dev, "invalid invalid address / speed\n"); return -EINVAL; } @@ -529,12 +534,12 @@ static int pd6729_set_mem_map(struct pcmcia_socket *sock, if (mem->flags & MAP_WRPROT) i |= I365_MEM_WRPROT; if (mem->flags & MAP_ATTRIB) { - /* dprintk("requesting attribute memory for socket %i\n", - socket->number);*/ + /* dev_dbg(&sock->dev, "requesting attribute memory for " + "socket %i\n", socket->number);*/ i |= I365_MEM_REG; } else { - /* dprintk("requesting normal memory for socket %i\n", - socket->number);*/ + /* dev_dbg(&sock->dev, "requesting normal memory for " + "socket %i\n", socket->number);*/ } indirect_write16(socket, base + I365_W_OFF, i); @@ -577,7 +582,7 @@ static struct pccard_operations pd6729_operations = { static irqreturn_t pd6729_test(int irq, void *dev) { - dprintk("-> hit on irq %d\n", irq); + pr_devel("-> hit on irq %d\n", irq); return IRQ_HANDLED; } @@ -642,13 +647,13 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, goto err_out_free_mem; if (!pci_resource_start(dev, 0)) { - printk(KERN_INFO "pd6729: refusing to load the driver " - "as the io_base is 0.\n"); + dev_warn(&dev->dev, "refusing to load the driver as the " + "io_base is NULL.\n"); goto err_out_free_mem; } - printk(KERN_INFO "pd6729: Cirrus PD6729 PCI to PCMCIA Bridge " - "at 0x%llx on irq %d\n", + dev_info(&dev->dev, "Cirrus PD6729 PCI to PCMCIA Bridge at 0x%llx " + "on irq %d\n", (unsigned long long)pci_resource_start(dev, 0), dev->irq); /* * Since we have no memory BARs some firmware may not @@ -656,14 +661,14 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, */ pci_read_config_byte(dev, PCI_COMMAND, &configbyte); if (!(configbyte & PCI_COMMAND_MEMORY)) { - printk(KERN_DEBUG "pd6729: Enabling PCI_COMMAND_MEMORY.\n"); + dev_dbg(&dev->dev, "pd6729: Enabling PCI_COMMAND_MEMORY.\n"); configbyte |= PCI_COMMAND_MEMORY; pci_write_config_byte(dev, PCI_COMMAND, configbyte); } ret = pci_request_regions(dev, "pd6729"); if (ret) { - printk(KERN_INFO "pd6729: pci request region failed.\n"); + dev_warn(&dev->dev, "pci request region failed.\n"); goto err_out_disable; } @@ -672,7 +677,7 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, mask = pd6729_isa_scan(); if (irq_mode == 0 && mask == 0) { - printk(KERN_INFO "pd6729: no ISA interrupt is available.\n"); + dev_warn(&dev->dev, "no ISA interrupt is available.\n"); goto err_out_free_res; } @@ -697,8 +702,8 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, /* Register the interrupt handler */ if ((ret = request_irq(dev->irq, pd6729_interrupt, IRQF_SHARED, "pd6729", socket))) { - printk(KERN_ERR "pd6729: Failed to register irq %d, " - "aborting\n", dev->irq); + dev_err(&dev->dev, "Failed to register irq %d\n", + dev->irq); goto err_out_free_res; } } else { @@ -713,8 +718,7 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, for (i = 0; i < MAX_SOCKETS; i++) { ret = pcmcia_register_socket(&socket[i].socket); if (ret) { - printk(KERN_INFO "pd6729: pcmcia_register_socket " - "failed.\n"); + dev_warn(&dev->dev, "pcmcia_register_socket failed.\n"); for (j = 0; j < i ; j++) pcmcia_unregister_socket(&socket[j].socket); goto err_out_free_res2; diff --git a/drivers/pcmcia/pd6729.h b/drivers/pcmcia/pd6729.h index f392e458cdf..41418d394c5 100644 --- a/drivers/pcmcia/pd6729.h +++ b/drivers/pcmcia/pd6729.h @@ -1,13 +1,6 @@ #ifndef _INCLUDE_GUARD_PD6729_H_ #define _INCLUDE_GUARD_PD6729_H_ -/* Debuging defines */ -#ifdef NOTRACE -#define dprintk(fmt, args...) printk(fmt , ## args) -#else -#define dprintk(fmt, args...) do {} while (0) -#endif - /* Flags for I365_GENCTL */ #define I365_DF_VS1 0x40 /* DF-step Voltage Sense */ #define I365_DF_VS2 0x80 diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index 0e35acb1366..84dde7768ad 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -228,9 +228,43 @@ static const char *skt_names[] = { #define SKT_DEV_INFO_SIZE(n) \ (sizeof(struct skt_dev_info) + (n)*sizeof(struct soc_pcmcia_socket)) +int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt) +{ + skt->res_skt.start = _PCMCIA(skt->nr); + skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; + skt->res_skt.name = skt_names[skt->nr]; + skt->res_skt.flags = IORESOURCE_MEM; + + skt->res_io.start = _PCMCIAIO(skt->nr); + skt->res_io.end = _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1; + skt->res_io.name = "io"; + skt->res_io.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + skt->res_mem.start = _PCMCIAMem(skt->nr); + skt->res_mem.end = _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1; + skt->res_mem.name = "memory"; + skt->res_mem.flags = IORESOURCE_MEM; + + skt->res_attr.start = _PCMCIAAttr(skt->nr); + skt->res_attr.end = _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1; + skt->res_attr.name = "attribute"; + skt->res_attr.flags = IORESOURCE_MEM; + + return soc_pcmcia_add_one(skt); +} + +void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops) +{ + /* Provide our PXA2xx specific timing routines. */ + ops->set_timing = pxa2xx_pcmcia_set_timing; +#ifdef CONFIG_CPU_FREQ + ops->frequency_change = pxa2xx_pcmcia_frequency_change; +#endif +} + int __pxa2xx_drv_pcmcia_probe(struct device *dev) { - int i, ret; + int i, ret = 0; struct pcmcia_low_level *ops; struct skt_dev_info *sinfo; struct soc_pcmcia_socket *skt; @@ -240,6 +274,8 @@ int __pxa2xx_drv_pcmcia_probe(struct device *dev) ops = (struct pcmcia_low_level *)dev->platform_data; + pxa2xx_drv_pcmcia_ops(ops); + sinfo = kzalloc(SKT_DEV_INFO_SIZE(ops->nr), GFP_KERNEL); if (!sinfo) return -ENOMEM; @@ -250,40 +286,25 @@ int __pxa2xx_drv_pcmcia_probe(struct device *dev) for (i = 0; i < ops->nr; i++) { skt = &sinfo->skt[i]; - skt->nr = ops->first + i; - skt->irq = NO_IRQ; - - skt->res_skt.start = _PCMCIA(skt->nr); - skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; - skt->res_skt.name = skt_names[skt->nr]; - skt->res_skt.flags = IORESOURCE_MEM; - - skt->res_io.start = _PCMCIAIO(skt->nr); - skt->res_io.end = _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1; - skt->res_io.name = "io"; - skt->res_io.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + skt->nr = ops->first + i; + skt->ops = ops; + skt->socket.owner = ops->owner; + skt->socket.dev.parent = dev; + skt->socket.pci_irq = NO_IRQ; - skt->res_mem.start = _PCMCIAMem(skt->nr); - skt->res_mem.end = _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1; - skt->res_mem.name = "memory"; - skt->res_mem.flags = IORESOURCE_MEM; - - skt->res_attr.start = _PCMCIAAttr(skt->nr); - skt->res_attr.end = _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1; - skt->res_attr.name = "attribute"; - skt->res_attr.flags = IORESOURCE_MEM; + ret = pxa2xx_drv_pcmcia_add_one(skt); + if (ret) + break; } - /* Provide our PXA2xx specific timing routines. */ - ops->set_timing = pxa2xx_pcmcia_set_timing; -#ifdef CONFIG_CPU_FREQ - ops->frequency_change = pxa2xx_pcmcia_frequency_change; -#endif - - ret = soc_common_drv_pcmcia_probe(dev, ops, sinfo); - - if (!ret) + if (ret) { + while (--i >= 0) + soc_pcmcia_remove_one(&sinfo->skt[i]); + kfree(sinfo); + } else { pxa2xx_configure_sockets(dev); + dev_set_drvdata(dev, sinfo); + } return ret; } @@ -297,7 +318,16 @@ static int pxa2xx_drv_pcmcia_probe(struct platform_device *dev) static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev) { - return soc_common_drv_pcmcia_remove(&dev->dev); + struct skt_dev_info *sinfo = platform_get_drvdata(dev); + int i; + + platform_set_drvdata(dev, NULL); + + for (i = 0; i < sinfo->nskt; i++) + soc_pcmcia_remove_one(&sinfo->skt[i]); + + kfree(sinfo); + return 0; } static int pxa2xx_drv_pcmcia_suspend(struct device *dev) diff --git a/drivers/pcmcia/pxa2xx_base.h b/drivers/pcmcia/pxa2xx_base.h index 235d681652c..cb5efaec886 100644 --- a/drivers/pcmcia/pxa2xx_base.h +++ b/drivers/pcmcia/pxa2xx_base.h @@ -1,3 +1,6 @@ /* temporary measure */ extern int __pxa2xx_drv_pcmcia_probe(struct device *); +int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt); +void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops); + diff --git a/drivers/pcmcia/pxa2xx_cm_x255.c b/drivers/pcmcia/pxa2xx_cm_x255.c index 5143a760153..05913d0bbdb 100644 --- a/drivers/pcmcia/pxa2xx_cm_x255.c +++ b/drivers/pcmcia/pxa2xx_cm_x255.c @@ -44,7 +44,7 @@ static int cmx255_pcmcia_hw_init(struct soc_pcmcia_socket *skt) return ret; gpio_direction_output(GPIO_PCMCIA_RESET, 0); - skt->irq = skt->nr == 0 ? PCMCIA_S0_RDYINT : PCMCIA_S1_RDYINT; + skt->socket.pci_irq = skt->nr == 0 ? PCMCIA_S0_RDYINT : PCMCIA_S1_RDYINT; ret = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); if (!ret) gpio_free(GPIO_PCMCIA_RESET); diff --git a/drivers/pcmcia/pxa2xx_cm_x270.c b/drivers/pcmcia/pxa2xx_cm_x270.c index a7b943d01e3..5662646b84d 100644 --- a/drivers/pcmcia/pxa2xx_cm_x270.c +++ b/drivers/pcmcia/pxa2xx_cm_x270.c @@ -38,7 +38,7 @@ static int cmx270_pcmcia_hw_init(struct soc_pcmcia_socket *skt) return ret; gpio_direction_output(GPIO_PCMCIA_RESET, 0); - skt->irq = PCMCIA_S0_RDYINT; + skt->socket.pci_irq = PCMCIA_S0_RDYINT; ret = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); if (!ret) gpio_free(GPIO_PCMCIA_RESET); diff --git a/drivers/pcmcia/pxa2xx_e740.c b/drivers/pcmcia/pxa2xx_e740.c index d09c0dc4a31..8bfbd4dca13 100644 --- a/drivers/pcmcia/pxa2xx_e740.c +++ b/drivers/pcmcia/pxa2xx_e740.c @@ -38,7 +38,7 @@ static struct pcmcia_irqs cd_irqs[] = { static int e740_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - skt->irq = skt->nr == 0 ? IRQ_GPIO(GPIO_E740_PCMCIA_RDY0) : + skt->socket.pci_irq = skt->nr == 0 ? IRQ_GPIO(GPIO_E740_PCMCIA_RDY0) : IRQ_GPIO(GPIO_E740_PCMCIA_RDY1); return soc_pcmcia_request_irqs(skt, &cd_irqs[skt->nr], 1); diff --git a/drivers/pcmcia/pxa2xx_lubbock.c b/drivers/pcmcia/pxa2xx_lubbock.c index 6cbb1b1f7cf..b9f8c8fb42b 100644 --- a/drivers/pcmcia/pxa2xx_lubbock.c +++ b/drivers/pcmcia/pxa2xx_lubbock.c @@ -32,6 +32,7 @@ static int lubbock_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) { + struct sa1111_pcmcia_socket *s = to_skt(skt); unsigned int pa_dwr_mask, pa_dwr_set, misc_mask, misc_set; int ret = 0; @@ -149,7 +150,7 @@ lubbock_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, if (ret == 0) { lubbock_set_misc_wr(misc_mask, misc_set); - sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, pa_dwr_set); + sa1111_set_io(s->dev, pa_dwr_mask, pa_dwr_set); } #if 1 @@ -175,7 +176,7 @@ lubbock_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, * Switch to 5V, Configure socket with 5V voltage */ lubbock_set_misc_wr(misc_mask, 0); - sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, 0); + sa1111_set_io(s->dev, pa_dwr_mask, 0); /* * It takes about 100ms to turn off Vcc. @@ -200,12 +201,8 @@ lubbock_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, static struct pcmcia_low_level lubbock_pcmcia_ops = { .owner = THIS_MODULE, - .hw_init = sa1111_pcmcia_hw_init, - .hw_shutdown = sa1111_pcmcia_hw_shutdown, - .socket_state = sa1111_pcmcia_socket_state, .configure_socket = lubbock_pcmcia_configure_socket, .socket_init = sa1111_pcmcia_socket_init, - .socket_suspend = sa1111_pcmcia_socket_suspend, .first = 0, .nr = 2, }; @@ -228,8 +225,9 @@ int pcmcia_lubbock_init(struct sa1111_dev *sadev) /* Set CF Socket 1 power to standby mode. */ lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); - sadev->dev.platform_data = &lubbock_pcmcia_ops; - ret = __pxa2xx_drv_pcmcia_probe(&sadev->dev); + pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); + ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, + pxa2xx_drv_pcmcia_add_one); } return ret; diff --git a/drivers/pcmcia/pxa2xx_mainstone.c b/drivers/pcmcia/pxa2xx_mainstone.c index 1138551ba8f..92016fe932b 100644 --- a/drivers/pcmcia/pxa2xx_mainstone.c +++ b/drivers/pcmcia/pxa2xx_mainstone.c @@ -44,7 +44,7 @@ static int mst_pcmcia_hw_init(struct soc_pcmcia_socket *skt) * before we enable them as outputs. */ - skt->irq = (skt->nr == 0) ? MAINSTONE_S0_IRQ : MAINSTONE_S1_IRQ; + skt->socket.pci_irq = (skt->nr == 0) ? MAINSTONE_S0_IRQ : MAINSTONE_S1_IRQ; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } diff --git a/drivers/pcmcia/pxa2xx_palmld.c b/drivers/pcmcia/pxa2xx_palmld.c index 5ba9b3664a0..6fb6f7f0672 100644 --- a/drivers/pcmcia/pxa2xx_palmld.c +++ b/drivers/pcmcia/pxa2xx_palmld.c @@ -45,7 +45,7 @@ static int palmld_pcmcia_hw_init(struct soc_pcmcia_socket *skt) if (ret) goto err4; - skt->irq = IRQ_GPIO(GPIO_NR_PALMLD_PCMCIA_READY); + skt->socket.pci_irq = IRQ_GPIO(GPIO_NR_PALMLD_PCMCIA_READY); return 0; err4: diff --git a/drivers/pcmcia/pxa2xx_palmtx.c b/drivers/pcmcia/pxa2xx_palmtx.c index e07b5c51ec5..b07b247a399 100644 --- a/drivers/pcmcia/pxa2xx_palmtx.c +++ b/drivers/pcmcia/pxa2xx_palmtx.c @@ -53,7 +53,7 @@ static int palmtx_pcmcia_hw_init(struct soc_pcmcia_socket *skt) if (ret) goto err5; - skt->irq = gpio_to_irq(GPIO_NR_PALMTX_PCMCIA_READY); + skt->socket.pci_irq = gpio_to_irq(GPIO_NR_PALMTX_PCMCIA_READY); return 0; err5: diff --git a/drivers/pcmcia/pxa2xx_sharpsl.c b/drivers/pcmcia/pxa2xx_sharpsl.c index bc43f78f6f0..0ea3b29440e 100644 --- a/drivers/pcmcia/pxa2xx_sharpsl.c +++ b/drivers/pcmcia/pxa2xx_sharpsl.c @@ -66,7 +66,7 @@ static int sharpsl_pcmcia_hw_init(struct soc_pcmcia_socket *skt) } } - skt->irq = SCOOP_DEV[skt->nr].irq; + skt->socket.pci_irq = SCOOP_DEV[skt->nr].irq; return 0; } diff --git a/drivers/pcmcia/pxa2xx_trizeps4.c b/drivers/pcmcia/pxa2xx_trizeps4.c index e0e5cb339b4..b7e596620db 100644 --- a/drivers/pcmcia/pxa2xx_trizeps4.c +++ b/drivers/pcmcia/pxa2xx_trizeps4.c @@ -53,7 +53,7 @@ static int trizeps_pcmcia_hw_init(struct soc_pcmcia_socket *skt) gpio_free(GPIO_PRDY); return -EINVAL; } - skt->irq = IRQ_GPIO(GPIO_PRDY); + skt->socket.pci_irq = IRQ_GPIO(GPIO_PRDY); break; #ifndef CONFIG_MACH_TRIZEPS_CONXS @@ -63,7 +63,7 @@ static int trizeps_pcmcia_hw_init(struct soc_pcmcia_socket *skt) break; } /* release the reset of this card */ - pr_debug("%s: sock %d irq %d\n", __func__, skt->nr, skt->irq); + pr_debug("%s: sock %d irq %d\n", __func__, skt->nr, skt->socket.pci_irq); /* supplementory irqs for the socket */ for (i = 0; i < ARRAY_SIZE(irqs); i++) { diff --git a/drivers/pcmcia/pxa2xx_viper.c b/drivers/pcmcia/pxa2xx_viper.c index 17871360fe9..27be2e154df 100644 --- a/drivers/pcmcia/pxa2xx_viper.c +++ b/drivers/pcmcia/pxa2xx_viper.c @@ -40,7 +40,7 @@ static int viper_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { unsigned long flags; - skt->irq = gpio_to_irq(VIPER_CF_RDY_GPIO); + skt->socket.pci_irq = gpio_to_irq(VIPER_CF_RDY_GPIO); if (gpio_request(VIPER_CF_CD_GPIO, "CF detect")) goto err_request_cd; diff --git a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c index e592e0e0d7e..de0e770ce6a 100644 --- a/drivers/pcmcia/rsrc_mgr.c +++ b/drivers/pcmcia/rsrc_mgr.c @@ -18,6 +18,7 @@ #include <pcmcia/cs_types.h> #include <pcmcia/ss.h> #include <pcmcia/cs.h> +#include <pcmcia/cistpl.h> #include "cs_internal.h" diff --git a/drivers/pcmcia/sa1100_assabet.c b/drivers/pcmcia/sa1100_assabet.c index ac8aa09ba0d..fd013a1ef47 100644 --- a/drivers/pcmcia/sa1100_assabet.c +++ b/drivers/pcmcia/sa1100_assabet.c @@ -27,7 +27,7 @@ static struct pcmcia_irqs irqs[] = { static int assabet_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - skt->irq = ASSABET_IRQ_GPIO_CF_IRQ; + skt->socket.pci_irq = ASSABET_IRQ_GPIO_CF_IRQ; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } diff --git a/drivers/pcmcia/sa1100_badge4.c b/drivers/pcmcia/sa1100_badge4.c index 1ca9737ea79..1ce53f493be 100644 --- a/drivers/pcmcia/sa1100_badge4.c +++ b/drivers/pcmcia/sa1100_badge4.c @@ -127,13 +127,10 @@ badge4_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state static struct pcmcia_low_level badge4_pcmcia_ops = { .owner = THIS_MODULE, - .hw_init = sa1111_pcmcia_hw_init, - .hw_shutdown = sa1111_pcmcia_hw_shutdown, - .socket_state = sa1111_pcmcia_socket_state, .configure_socket = badge4_pcmcia_configure_socket, - .socket_init = sa1111_pcmcia_socket_init, - .socket_suspend = sa1111_pcmcia_socket_suspend, + .first = 0, + .nr = 2, }; int pcmcia_badge4_init(struct device *dev) @@ -146,7 +143,9 @@ int pcmcia_badge4_init(struct device *dev) __func__, badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc); - ret = sa11xx_drv_pcmcia_probe(dev, &badge4_pcmcia_ops, 0, 2); + sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops); + ret = sa1111_pcmcia_add(dev, &badge4_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } return ret; diff --git a/drivers/pcmcia/sa1100_cerf.c b/drivers/pcmcia/sa1100_cerf.c index 63e6bc431a0..9bf088b1727 100644 --- a/drivers/pcmcia/sa1100_cerf.c +++ b/drivers/pcmcia/sa1100_cerf.c @@ -27,7 +27,7 @@ static struct pcmcia_irqs irqs[] = { static int cerf_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - skt->irq = CERF_IRQ_GPIO_CF_IRQ; + skt->socket.pci_irq = CERF_IRQ_GPIO_CF_IRQ; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c index 2d0e9975153..8db86b90c20 100644 --- a/drivers/pcmcia/sa1100_generic.c +++ b/drivers/pcmcia/sa1100_generic.c @@ -51,7 +51,7 @@ static int (*sa11x0_pcmcia_hw_init[])(struct device *dev) = { #ifdef CONFIG_SA1100_CERF pcmcia_cerf_init, #endif -#ifdef CONFIG_SA1100_H3600 +#if defined(CONFIG_SA1100_H3100) || defined(CONFIG_SA1100_H3600) pcmcia_h3600_init, #endif #ifdef CONFIG_SA1100_SHANNON @@ -83,7 +83,16 @@ static int sa11x0_drv_pcmcia_probe(struct platform_device *dev) static int sa11x0_drv_pcmcia_remove(struct platform_device *dev) { - return soc_common_drv_pcmcia_remove(&dev->dev); + struct skt_dev_info *sinfo = platform_get_drvdata(dev); + int i; + + platform_set_drvdata(dev, NULL); + + for (i = 0; i < sinfo->nskt; i++) + soc_pcmcia_remove_one(&sinfo->skt[i]); + + kfree(sinfo); + return 0; } static int sa11x0_drv_pcmcia_suspend(struct platform_device *dev, diff --git a/drivers/pcmcia/sa1100_h3600.c b/drivers/pcmcia/sa1100_h3600.c index 0cc3748f375..56329ad575a 100644 --- a/drivers/pcmcia/sa1100_h3600.c +++ b/drivers/pcmcia/sa1100_h3600.c @@ -10,47 +10,139 @@ #include <linux/interrupt.h> #include <linux/init.h> #include <linux/delay.h> +#include <linux/gpio.h> #include <mach/hardware.h> #include <asm/irq.h> #include <asm/mach-types.h> -#include <mach/h3600.h> +#include <mach/h3xxx.h> #include "sa1100_generic.h" static struct pcmcia_irqs irqs[] = { - { 0, IRQ_GPIO_H3600_PCMCIA_CD0, "PCMCIA CD0" }, - { 1, IRQ_GPIO_H3600_PCMCIA_CD1, "PCMCIA CD1" } + { .sock = 0, .str = "PCMCIA CD0" }, /* .irq will be filled later */ + { .sock = 1, .str = "PCMCIA CD1" } }; static int h3600_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - skt->irq = skt->nr ? IRQ_GPIO_H3600_PCMCIA_IRQ1 - : IRQ_GPIO_H3600_PCMCIA_IRQ0; + int err; + switch (skt->nr) { + case 0: + err = gpio_request(H3XXX_GPIO_PCMCIA_IRQ0, "PCMCIA IRQ0"); + if (err) + goto err00; + err = gpio_direction_input(H3XXX_GPIO_PCMCIA_IRQ0); + if (err) + goto err01; + skt->socket.pci_irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_IRQ0); + + err = gpio_request(H3XXX_GPIO_PCMCIA_CD0, "PCMCIA CD0"); + if (err) + goto err01; + err = gpio_direction_input(H3XXX_GPIO_PCMCIA_CD0); + if (err) + goto err02; + irqs[0].irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_CD0); + + err = gpio_request(H3XXX_EGPIO_OPT_NVRAM_ON, "OPT NVRAM ON"); + if (err) + goto err02; + err = gpio_direction_output(H3XXX_EGPIO_OPT_NVRAM_ON, 0); + if (err) + goto err03; + err = gpio_request(H3XXX_EGPIO_OPT_ON, "OPT ON"); + if (err) + goto err03; + err = gpio_direction_output(H3XXX_EGPIO_OPT_ON, 0); + if (err) + goto err04; + err = gpio_request(H3XXX_EGPIO_OPT_RESET, "OPT RESET"); + if (err) + goto err04; + err = gpio_direction_output(H3XXX_EGPIO_OPT_RESET, 0); + if (err) + goto err05; + err = gpio_request(H3XXX_EGPIO_CARD_RESET, "PCMCIA CARD RESET"); + if (err) + goto err05; + err = gpio_direction_output(H3XXX_EGPIO_CARD_RESET, 0); + if (err) + goto err06; + err = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); + if (err) + goto err06; + break; + case 1: + err = gpio_request(H3XXX_GPIO_PCMCIA_IRQ1, "PCMCIA IRQ1"); + if (err) + goto err10; + err = gpio_direction_input(H3XXX_GPIO_PCMCIA_IRQ1); + if (err) + goto err11; + skt->socket.pci_irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_IRQ1); + + err = gpio_request(H3XXX_GPIO_PCMCIA_CD1, "PCMCIA CD1"); + if (err) + goto err11; + err = gpio_direction_input(H3XXX_GPIO_PCMCIA_CD1); + if (err) + goto err12; + irqs[1].irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_CD1); + + err = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); + if (err) + goto err12; + break; + } + return 0; - return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); +err06: gpio_free(H3XXX_EGPIO_CARD_RESET); +err05: gpio_free(H3XXX_EGPIO_OPT_RESET); +err04: gpio_free(H3XXX_EGPIO_OPT_ON); +err03: gpio_free(H3XXX_EGPIO_OPT_NVRAM_ON); +err02: gpio_free(H3XXX_GPIO_PCMCIA_CD0); +err01: gpio_free(H3XXX_GPIO_PCMCIA_IRQ0); +err00: return err; + +err12: gpio_free(H3XXX_GPIO_PCMCIA_CD0); +err11: gpio_free(H3XXX_GPIO_PCMCIA_IRQ0); +err10: return err; } static void h3600_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) { soc_pcmcia_free_irqs(skt, irqs, ARRAY_SIZE(irqs)); - /* Disable CF bus: */ - assign_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON, 0); - assign_h3600_egpio(IPAQ_EGPIO_OPT_ON, 0); - assign_h3600_egpio(IPAQ_EGPIO_OPT_RESET, 1); + switch (skt->nr) { + case 0: + /* Disable CF bus: */ + gpio_set_value(H3XXX_EGPIO_OPT_NVRAM_ON, 0); + gpio_set_value(H3XXX_EGPIO_OPT_ON, 0); + gpio_set_value(H3XXX_EGPIO_OPT_RESET, 1); + + gpio_free(H3XXX_EGPIO_CARD_RESET); + gpio_free(H3XXX_EGPIO_OPT_RESET); + gpio_free(H3XXX_EGPIO_OPT_ON); + gpio_free(H3XXX_EGPIO_OPT_NVRAM_ON); + gpio_free(H3XXX_GPIO_PCMCIA_CD0); + gpio_free(H3XXX_GPIO_PCMCIA_IRQ0); + break; + case 1: + gpio_free(H3XXX_GPIO_PCMCIA_CD1); + gpio_free(H3XXX_GPIO_PCMCIA_IRQ1); + break; + } } static void h3600_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *state) { - unsigned long levels = GPLR; - switch (skt->nr) { case 0: - state->detect = levels & GPIO_H3600_PCMCIA_CD0 ? 0 : 1; - state->ready = levels & GPIO_H3600_PCMCIA_IRQ0 ? 1 : 0; + state->detect = !gpio_get_value(H3XXX_GPIO_PCMCIA_CD0); + state->ready = !!gpio_get_value(H3XXX_GPIO_PCMCIA_IRQ0); state->bvd1 = 0; state->bvd2 = 0; state->wrprot = 0; /* Not available on H3600. */ @@ -59,8 +151,8 @@ h3600_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *st break; case 1: - state->detect = levels & GPIO_H3600_PCMCIA_CD1 ? 0 : 1; - state->ready = levels & GPIO_H3600_PCMCIA_IRQ1 ? 1 : 0; + state->detect = !gpio_get_value(H3XXX_GPIO_PCMCIA_CD1); + state->ready = !!gpio_get_value(H3XXX_GPIO_PCMCIA_IRQ1); state->bvd1 = 0; state->bvd2 = 0; state->wrprot = 0; /* Not available on H3600. */ @@ -79,7 +171,7 @@ h3600_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_ return -1; } - assign_h3600_egpio(IPAQ_EGPIO_CARD_RESET, !!(state->flags & SS_RESET)); + gpio_set_value(H3XXX_EGPIO_CARD_RESET, !!(state->flags & SS_RESET)); /* Silently ignore Vpp, output enable, speaker enable. */ @@ -89,9 +181,9 @@ h3600_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_ static void h3600_pcmcia_socket_init(struct soc_pcmcia_socket *skt) { /* Enable CF bus: */ - assign_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON, 1); - assign_h3600_egpio(IPAQ_EGPIO_OPT_ON, 1); - assign_h3600_egpio(IPAQ_EGPIO_OPT_RESET, 0); + gpio_set_value(H3XXX_EGPIO_OPT_NVRAM_ON, 1); + gpio_set_value(H3XXX_EGPIO_OPT_ON, 1); + gpio_set_value(H3XXX_EGPIO_OPT_RESET, 0); msleep(10); @@ -109,10 +201,10 @@ static void h3600_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) * socket 0 then socket 1. */ if (skt->nr == 1) { - assign_h3600_egpio(IPAQ_EGPIO_OPT_ON, 0); - assign_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON, 0); + gpio_set_value(H3XXX_EGPIO_OPT_ON, 0); + gpio_set_value(H3XXX_EGPIO_OPT_NVRAM_ON, 0); /* hmm, does this suck power? */ - assign_h3600_egpio(IPAQ_EGPIO_OPT_RESET, 1); + gpio_set_value(H3XXX_EGPIO_OPT_RESET, 1); } } @@ -131,7 +223,7 @@ int __init pcmcia_h3600_init(struct device *dev) { int ret = -ENODEV; - if (machine_is_h3600()) + if (machine_is_h3600() || machine_is_h3100()) ret = sa11xx_drv_pcmcia_probe(dev, &h3600_pcmcia_ops, 0, 2); return ret; diff --git a/drivers/pcmcia/sa1100_jornada720.c b/drivers/pcmcia/sa1100_jornada720.c index 7eedb42f800..6bcabee6bde 100644 --- a/drivers/pcmcia/sa1100_jornada720.c +++ b/drivers/pcmcia/sa1100_jornada720.c @@ -22,25 +22,10 @@ #define SOCKET1_POWER (GPIO_GPIO1 | GPIO_GPIO3) #define SOCKET1_3V GPIO_GPIO3 -static int jornada720_pcmcia_hw_init(struct soc_pcmcia_socket *skt) -{ - unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; - - /* - * What is all this crap for? - */ - GRER |= 0x00000002; - /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ - sa1111_set_io_dir(SA1111_DEV(skt->dev), pin, 0, 0); - sa1111_set_io(SA1111_DEV(skt->dev), pin, 0); - sa1111_set_sleep_io(SA1111_DEV(skt->dev), pin, 0); - - return sa1111_pcmcia_hw_init(skt); -} - static int jornada720_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) { + struct sa1111_pcmcia_socket *s = to_skt(skt); unsigned int pa_dwr_mask, pa_dwr_set; int ret; @@ -97,7 +82,7 @@ jornada720_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_s unsigned long flags; local_irq_save(flags); - sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, pa_dwr_set); + sa1111_set_io(s->dev, pa_dwr_mask, pa_dwr_set); local_irq_restore(flags); } @@ -106,21 +91,30 @@ jornada720_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_s static struct pcmcia_low_level jornada720_pcmcia_ops = { .owner = THIS_MODULE, - .hw_init = jornada720_pcmcia_hw_init, - .hw_shutdown = sa1111_pcmcia_hw_shutdown, - .socket_state = sa1111_pcmcia_socket_state, .configure_socket = jornada720_pcmcia_configure_socket, - .socket_init = sa1111_pcmcia_socket_init, - .socket_suspend = sa1111_pcmcia_socket_suspend, + .first = 0, + .nr = 2, }; int __devinit pcmcia_jornada720_init(struct device *dev) { int ret = -ENODEV; - if (machine_is_jornada720()) - ret = sa11xx_drv_pcmcia_probe(dev, &jornada720_pcmcia_ops, 0, 2); + if (machine_is_jornada720()) { + unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; + + GRER |= 0x00000002; + + /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ + sa1111_set_io_dir(dev, pin, 0, 0); + sa1111_set_io(dev, pin, 0); + sa1111_set_sleep_io(dev, pin, 0); + + sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops); + ret = sa1111_pcmcia_add(dev, &jornada720_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); + } return ret; } diff --git a/drivers/pcmcia/sa1100_neponset.c b/drivers/pcmcia/sa1100_neponset.c index 0c76d337815..c95639b5f2a 100644 --- a/drivers/pcmcia/sa1100_neponset.c +++ b/drivers/pcmcia/sa1100_neponset.c @@ -43,6 +43,7 @@ static int neponset_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) { + struct sa1111_pcmcia_socket *s = to_skt(skt); unsigned int ncr_mask, ncr_set, pa_dwr_mask, pa_dwr_set; int ret; @@ -99,7 +100,7 @@ neponset_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_sta NCR_0 = (NCR_0 & ~ncr_mask) | ncr_set; local_irq_restore(flags); - sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, pa_dwr_set); + sa1111_set_io(s->dev, pa_dwr_mask, pa_dwr_set); } return 0; @@ -115,12 +116,10 @@ static void neponset_pcmcia_socket_init(struct soc_pcmcia_socket *skt) static struct pcmcia_low_level neponset_pcmcia_ops = { .owner = THIS_MODULE, - .hw_init = sa1111_pcmcia_hw_init, - .hw_shutdown = sa1111_pcmcia_hw_shutdown, - .socket_state = sa1111_pcmcia_socket_state, .configure_socket = neponset_pcmcia_configure_socket, .socket_init = neponset_pcmcia_socket_init, - .socket_suspend = sa1111_pcmcia_socket_suspend, + .first = 0, + .nr = 2, }; int pcmcia_neponset_init(struct sa1111_dev *sadev) @@ -135,7 +134,9 @@ int pcmcia_neponset_init(struct sa1111_dev *sadev) sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - ret = sa11xx_drv_pcmcia_probe(&sadev->dev, &neponset_pcmcia_ops, 0, 2); + sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops); + ret = sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } return ret; diff --git a/drivers/pcmcia/sa1100_shannon.c b/drivers/pcmcia/sa1100_shannon.c index 46d8c1977c2..c4d51867a05 100644 --- a/drivers/pcmcia/sa1100_shannon.c +++ b/drivers/pcmcia/sa1100_shannon.c @@ -28,7 +28,7 @@ static int shannon_pcmcia_hw_init(struct soc_pcmcia_socket *skt) GAFR &= ~(SHANNON_GPIO_EJECT_0 | SHANNON_GPIO_EJECT_1 | SHANNON_GPIO_RDY_0 | SHANNON_GPIO_RDY_1); - skt->irq = skt->nr ? SHANNON_IRQ_GPIO_RDY_1 : SHANNON_IRQ_GPIO_RDY_0; + skt->socket.pci_irq = skt->nr ? SHANNON_IRQ_GPIO_RDY_1 : SHANNON_IRQ_GPIO_RDY_0; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } diff --git a/drivers/pcmcia/sa1100_simpad.c b/drivers/pcmcia/sa1100_simpad.c index 33a08ae09fd..05bd504e6f1 100644 --- a/drivers/pcmcia/sa1100_simpad.c +++ b/drivers/pcmcia/sa1100_simpad.c @@ -28,7 +28,7 @@ static int simpad_pcmcia_hw_init(struct soc_pcmcia_socket *skt) clear_cs3_bit(VCC_3V_EN|VCC_5V_EN|EN0|EN1); - skt->irq = IRQ_GPIO_CF_IRQ; + skt->socket.pci_irq = IRQ_GPIO_CF_IRQ; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index 4be4e172ffa..de6bc333d29 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -28,23 +28,20 @@ static struct pcmcia_irqs irqs[] = { { 1, IRQ_S1_BVD1_STSCHG, "SA1111 CF BVD1" }, }; -int sa1111_pcmcia_hw_init(struct soc_pcmcia_socket *skt) +static int sa1111_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - if (skt->irq == NO_IRQ) - skt->irq = skt->nr ? IRQ_S1_READY_NINT : IRQ_S0_READY_NINT; - return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } -void sa1111_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) +static void sa1111_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) { soc_pcmcia_free_irqs(skt, irqs, ARRAY_SIZE(irqs)); } void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *state) { - struct sa1111_dev *sadev = SA1111_DEV(skt->dev); - unsigned long status = sa1111_readl(sadev->mapbase + SA1111_PCSR); + struct sa1111_pcmcia_socket *s = to_skt(skt); + unsigned long status = sa1111_readl(s->dev->mapbase + SA1111_PCSR); switch (skt->nr) { case 0: @@ -71,7 +68,7 @@ void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_sta int sa1111_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) { - struct sa1111_dev *sadev = SA1111_DEV(skt->dev); + struct sa1111_pcmcia_socket *s = to_skt(skt); unsigned int pccr_skt_mask, pccr_set_mask, val; unsigned long flags; @@ -100,10 +97,10 @@ int sa1111_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_s pccr_set_mask |= PCCR_S0_FLT|PCCR_S1_FLT; local_irq_save(flags); - val = sa1111_readl(sadev->mapbase + SA1111_PCCR); + val = sa1111_readl(s->dev->mapbase + SA1111_PCCR); val &= ~pccr_skt_mask; val |= pccr_set_mask & pccr_skt_mask; - sa1111_writel(val, sadev->mapbase + SA1111_PCCR); + sa1111_writel(val, s->dev->mapbase + SA1111_PCCR); local_irq_restore(flags); return 0; @@ -114,15 +111,51 @@ void sa1111_pcmcia_socket_init(struct soc_pcmcia_socket *skt) soc_pcmcia_enable_irqs(skt, irqs, ARRAY_SIZE(irqs)); } -void sa1111_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) +static void sa1111_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) { soc_pcmcia_disable_irqs(skt, irqs, ARRAY_SIZE(irqs)); } +int sa1111_pcmcia_add(struct sa1111_dev *dev, struct pcmcia_low_level *ops, + int (*add)(struct soc_pcmcia_socket *)) +{ + struct sa1111_pcmcia_socket *s; + int i, ret = 0; + + ops->hw_init = sa1111_pcmcia_hw_init; + ops->hw_shutdown = sa1111_pcmcia_hw_shutdown; + ops->socket_state = sa1111_pcmcia_socket_state; + ops->socket_suspend = sa1111_pcmcia_socket_suspend; + + for (i = 0; i < ops->nr; i++) { + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + s->soc.nr = ops->first + i; + s->soc.ops = ops; + s->soc.socket.owner = ops->owner; + s->soc.socket.dev.parent = &dev->dev; + s->soc.socket.pci_irq = s->soc.nr ? IRQ_S1_READY_NINT : IRQ_S0_READY_NINT; + s->dev = dev; + + ret = add(&s->soc); + if (ret == 0) { + s->next = dev_get_drvdata(&dev->dev); + dev_set_drvdata(&dev->dev, s); + } else + kfree(s); + } + + return ret; +} + static int pcmcia_probe(struct sa1111_dev *dev) { void __iomem *base; + dev_set_drvdata(&dev->dev, NULL); + if (!request_mem_region(dev->res.start, 512, SA1111_DRIVER_NAME(dev))) return -EBUSY; @@ -152,7 +185,15 @@ static int pcmcia_probe(struct sa1111_dev *dev) static int __devexit pcmcia_remove(struct sa1111_dev *dev) { - soc_common_drv_pcmcia_remove(&dev->dev); + struct sa1111_pcmcia_socket *next, *s = dev_get_drvdata(&dev->dev); + + dev_set_drvdata(&dev->dev, NULL); + + for (; next = s->next, s; s = next) { + soc_pcmcia_remove_one(&s->soc); + kfree(s); + } + release_mem_region(dev->res.start, 512); return 0; } diff --git a/drivers/pcmcia/sa1111_generic.h b/drivers/pcmcia/sa1111_generic.h index 10ced4a210d..02dc8577cda 100644 --- a/drivers/pcmcia/sa1111_generic.h +++ b/drivers/pcmcia/sa1111_generic.h @@ -1,12 +1,23 @@ #include "soc_common.h" #include "sa11xx_base.h" -extern int sa1111_pcmcia_hw_init(struct soc_pcmcia_socket *); -extern void sa1111_pcmcia_hw_shutdown(struct soc_pcmcia_socket *); +struct sa1111_pcmcia_socket { + struct soc_pcmcia_socket soc; + struct sa1111_dev *dev; + struct sa1111_pcmcia_socket *next; +}; + +static inline struct sa1111_pcmcia_socket *to_skt(struct soc_pcmcia_socket *s) +{ + return container_of(s, struct sa1111_pcmcia_socket, soc); +} + +int sa1111_pcmcia_add(struct sa1111_dev *dev, struct pcmcia_low_level *ops, + int (*add)(struct soc_pcmcia_socket *)); + extern void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *, struct pcmcia_state *); extern int sa1111_pcmcia_configure_socket(struct soc_pcmcia_socket *, const socket_state_t *); extern void sa1111_pcmcia_socket_init(struct soc_pcmcia_socket *); -extern void sa1111_pcmcia_socket_suspend(struct soc_pcmcia_socket *); extern int pcmcia_badge4_init(struct device *); extern int pcmcia_jornada720_init(struct device *); diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index e15d59f2d8a..fc9a6527019 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -171,12 +171,58 @@ static const char *skt_names[] = { #define SKT_DEV_INFO_SIZE(n) \ (sizeof(struct skt_dev_info) + (n)*sizeof(struct soc_pcmcia_socket)) +int sa11xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt) +{ + skt->res_skt.start = _PCMCIA(skt->nr); + skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; + skt->res_skt.name = skt_names[skt->nr]; + skt->res_skt.flags = IORESOURCE_MEM; + + skt->res_io.start = _PCMCIAIO(skt->nr); + skt->res_io.end = _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1; + skt->res_io.name = "io"; + skt->res_io.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + skt->res_mem.start = _PCMCIAMem(skt->nr); + skt->res_mem.end = _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1; + skt->res_mem.name = "memory"; + skt->res_mem.flags = IORESOURCE_MEM; + + skt->res_attr.start = _PCMCIAAttr(skt->nr); + skt->res_attr.end = _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1; + skt->res_attr.name = "attribute"; + skt->res_attr.flags = IORESOURCE_MEM; + + return soc_pcmcia_add_one(skt); +} +EXPORT_SYMBOL(sa11xx_drv_pcmcia_add_one); + +void sa11xx_drv_pcmcia_ops(struct pcmcia_low_level *ops) +{ + /* + * set default MECR calculation if the board specific + * code did not specify one... + */ + if (!ops->get_timing) + ops->get_timing = sa1100_pcmcia_default_mecr_timing; + + /* Provide our SA11x0 specific timing routines. */ + ops->set_timing = sa1100_pcmcia_set_timing; + ops->show_timing = sa1100_pcmcia_show_timing; +#ifdef CONFIG_CPU_FREQ + ops->frequency_change = sa1100_pcmcia_frequency_change; +#endif +} +EXPORT_SYMBOL(sa11xx_drv_pcmcia_ops); + int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr) { struct skt_dev_info *sinfo; struct soc_pcmcia_socket *skt; - int i; + int i, ret = 0; + + sa11xx_drv_pcmcia_ops(ops); sinfo = kzalloc(SKT_DEV_INFO_SIZE(nr), GFP_KERNEL); if (!sinfo) @@ -188,45 +234,26 @@ int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, for (i = 0; i < nr; i++) { skt = &sinfo->skt[i]; - skt->nr = first + i; - skt->irq = NO_IRQ; - - skt->res_skt.start = _PCMCIA(skt->nr); - skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; - skt->res_skt.name = skt_names[skt->nr]; - skt->res_skt.flags = IORESOURCE_MEM; - - skt->res_io.start = _PCMCIAIO(skt->nr); - skt->res_io.end = _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1; - skt->res_io.name = "io"; - skt->res_io.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + skt->nr = first + i; + skt->ops = ops; + skt->socket.owner = ops->owner; + skt->socket.dev.parent = dev; + skt->socket.pci_irq = NO_IRQ; - skt->res_mem.start = _PCMCIAMem(skt->nr); - skt->res_mem.end = _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1; - skt->res_mem.name = "memory"; - skt->res_mem.flags = IORESOURCE_MEM; - - skt->res_attr.start = _PCMCIAAttr(skt->nr); - skt->res_attr.end = _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1; - skt->res_attr.name = "attribute"; - skt->res_attr.flags = IORESOURCE_MEM; + ret = sa11xx_drv_pcmcia_add_one(skt); + if (ret) + break; } - /* - * set default MECR calculation if the board specific - * code did not specify one... - */ - if (!ops->get_timing) - ops->get_timing = sa1100_pcmcia_default_mecr_timing; - - /* Provide our SA11x0 specific timing routines. */ - ops->set_timing = sa1100_pcmcia_set_timing; - ops->show_timing = sa1100_pcmcia_show_timing; -#ifdef CONFIG_CPU_FREQ - ops->frequency_change = sa1100_pcmcia_frequency_change; -#endif + if (ret) { + while (--i >= 0) + soc_pcmcia_remove_one(&sinfo->skt[i]); + kfree(sinfo); + } else { + dev_set_drvdata(dev, sinfo); + } - return soc_common_drv_pcmcia_probe(dev, ops, sinfo); + return ret; } EXPORT_SYMBOL(sa11xx_drv_pcmcia_probe); diff --git a/drivers/pcmcia/sa11xx_base.h b/drivers/pcmcia/sa11xx_base.h index 7bc20828052..3d76d720f46 100644 --- a/drivers/pcmcia/sa11xx_base.h +++ b/drivers/pcmcia/sa11xx_base.h @@ -118,6 +118,8 @@ static inline unsigned int sa1100_pcmcia_cmd_time(unsigned int cpu_clock_khz, } +int sa11xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt); +void sa11xx_drv_pcmcia_ops(struct pcmcia_low_level *ops); extern int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr); #endif /* !defined(_PCMCIA_SA1100_H) */ diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index ef7e9e58782..6f1a86b43c6 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -144,10 +144,10 @@ soc_common_pcmcia_config_skt(struct soc_pcmcia_socket *skt, socket_state_t *stat */ if (skt->irq_state != 1 && state->io_irq) { skt->irq_state = 1; - set_irq_type(skt->irq, IRQ_TYPE_EDGE_FALLING); + set_irq_type(skt->socket.pci_irq, IRQ_TYPE_EDGE_FALLING); } else if (skt->irq_state == 1 && state->io_irq == 0) { skt->irq_state = 0; - set_irq_type(skt->irq, IRQ_TYPE_NONE); + set_irq_type(skt->socket.pci_irq, IRQ_TYPE_NONE); } skt->cs_state = *state; @@ -492,7 +492,8 @@ static ssize_t show_status(struct device *dev, struct device_attribute *attr, ch p+=sprintf(p, "Vcc : %d\n", skt->cs_state.Vcc); p+=sprintf(p, "Vpp : %d\n", skt->cs_state.Vpp); - p+=sprintf(p, "IRQ : %d (%d)\n", skt->cs_state.io_irq, skt->irq); + p+=sprintf(p, "IRQ : %d (%d)\n", skt->cs_state.io_irq, + skt->socket.pci_irq); if (skt->ops->show_timing) p+=skt->ops->show_timing(skt, p); @@ -574,7 +575,7 @@ void soc_pcmcia_enable_irqs(struct soc_pcmcia_socket *skt, EXPORT_SYMBOL(soc_pcmcia_enable_irqs); -LIST_HEAD(soc_pcmcia_sockets); +static LIST_HEAD(soc_pcmcia_sockets); static DEFINE_MUTEX(soc_pcmcia_sockets_lock); #ifdef CONFIG_CPU_FREQ @@ -609,177 +610,137 @@ static int soc_pcmcia_cpufreq_register(void) "notifier for PCMCIA (%d)\n", ret); return ret; } +fs_initcall(soc_pcmcia_cpufreq_register); static void soc_pcmcia_cpufreq_unregister(void) { cpufreq_unregister_notifier(&soc_pcmcia_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); } +module_exit(soc_pcmcia_cpufreq_unregister); -#else -static int soc_pcmcia_cpufreq_register(void) { return 0; } -static void soc_pcmcia_cpufreq_unregister(void) {} #endif -int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, - struct skt_dev_info *sinfo) +void soc_pcmcia_remove_one(struct soc_pcmcia_socket *skt) { - struct soc_pcmcia_socket *skt; - int ret, i; - mutex_lock(&soc_pcmcia_sockets_lock); + del_timer_sync(&skt->poll_timer); - /* - * Initialise the per-socket structure. - */ - for (i = 0; i < sinfo->nskt; i++) { - skt = &sinfo->skt[i]; + pcmcia_unregister_socket(&skt->socket); - skt->socket.ops = &soc_common_pcmcia_operations; - skt->socket.owner = ops->owner; - skt->socket.dev.parent = dev; + flush_scheduled_work(); - init_timer(&skt->poll_timer); - skt->poll_timer.function = soc_common_pcmcia_poll_event; - skt->poll_timer.data = (unsigned long)skt; - skt->poll_timer.expires = jiffies + SOC_PCMCIA_POLL_PERIOD; + skt->ops->hw_shutdown(skt); - skt->dev = dev; - skt->ops = ops; + soc_common_pcmcia_config_skt(skt, &dead_socket); - ret = request_resource(&iomem_resource, &skt->res_skt); - if (ret) - goto out_err_1; + list_del(&skt->node); + mutex_unlock(&soc_pcmcia_sockets_lock); - ret = request_resource(&skt->res_skt, &skt->res_io); - if (ret) - goto out_err_2; + iounmap(skt->virt_io); + skt->virt_io = NULL; + release_resource(&skt->res_attr); + release_resource(&skt->res_mem); + release_resource(&skt->res_io); + release_resource(&skt->res_skt); +} +EXPORT_SYMBOL(soc_pcmcia_remove_one); - ret = request_resource(&skt->res_skt, &skt->res_mem); - if (ret) - goto out_err_3; +int soc_pcmcia_add_one(struct soc_pcmcia_socket *skt) +{ + int ret; - ret = request_resource(&skt->res_skt, &skt->res_attr); - if (ret) - goto out_err_4; + init_timer(&skt->poll_timer); + skt->poll_timer.function = soc_common_pcmcia_poll_event; + skt->poll_timer.data = (unsigned long)skt; + skt->poll_timer.expires = jiffies + SOC_PCMCIA_POLL_PERIOD; - skt->virt_io = ioremap(skt->res_io.start, 0x10000); - if (skt->virt_io == NULL) { - ret = -ENOMEM; - goto out_err_5; - } + ret = request_resource(&iomem_resource, &skt->res_skt); + if (ret) + goto out_err_1; - if (list_empty(&soc_pcmcia_sockets)) - soc_pcmcia_cpufreq_register(); + ret = request_resource(&skt->res_skt, &skt->res_io); + if (ret) + goto out_err_2; - list_add(&skt->node, &soc_pcmcia_sockets); + ret = request_resource(&skt->res_skt, &skt->res_mem); + if (ret) + goto out_err_3; - /* - * We initialize default socket timing here, because - * we are not guaranteed to see a SetIOMap operation at - * runtime. - */ - ops->set_timing(skt); + ret = request_resource(&skt->res_skt, &skt->res_attr); + if (ret) + goto out_err_4; - ret = ops->hw_init(skt); - if (ret) - goto out_err_6; + skt->virt_io = ioremap(skt->res_io.start, 0x10000); + if (skt->virt_io == NULL) { + ret = -ENOMEM; + goto out_err_5; + } - skt->socket.features = SS_CAP_STATIC_MAP|SS_CAP_PCCARD; - skt->socket.resource_ops = &pccard_static_ops; - skt->socket.irq_mask = 0; - skt->socket.map_size = PAGE_SIZE; - skt->socket.pci_irq = skt->irq; - skt->socket.io_offset = (unsigned long)skt->virt_io; + mutex_lock(&soc_pcmcia_sockets_lock); - skt->status = soc_common_pcmcia_skt_state(skt); + list_add(&skt->node, &soc_pcmcia_sockets); - ret = pcmcia_register_socket(&skt->socket); - if (ret) - goto out_err_7; + /* + * We initialize default socket timing here, because + * we are not guaranteed to see a SetIOMap operation at + * runtime. + */ + skt->ops->set_timing(skt); - WARN_ON(skt->socket.sock != i); + ret = skt->ops->hw_init(skt); + if (ret) + goto out_err_6; - add_timer(&skt->poll_timer); + skt->socket.ops = &soc_common_pcmcia_operations; + skt->socket.features = SS_CAP_STATIC_MAP|SS_CAP_PCCARD; + skt->socket.resource_ops = &pccard_static_ops; + skt->socket.irq_mask = 0; + skt->socket.map_size = PAGE_SIZE; + skt->socket.io_offset = (unsigned long)skt->virt_io; - ret = device_create_file(&skt->socket.dev, &dev_attr_status); - if (ret) - goto out_err_8; - } + skt->status = soc_common_pcmcia_skt_state(skt); - dev_set_drvdata(dev, sinfo); - ret = 0; - goto out; + ret = pcmcia_register_socket(&skt->socket); + if (ret) + goto out_err_7; - do { - skt = &sinfo->skt[i]; + add_timer(&skt->poll_timer); + + mutex_unlock(&soc_pcmcia_sockets_lock); + + ret = device_create_file(&skt->socket.dev, &dev_attr_status); + if (ret) + goto out_err_8; + + return ret; - device_remove_file(&skt->socket.dev, &dev_attr_status); out_err_8: - del_timer_sync(&skt->poll_timer); - pcmcia_unregister_socket(&skt->socket); + mutex_lock(&soc_pcmcia_sockets_lock); + del_timer_sync(&skt->poll_timer); + pcmcia_unregister_socket(&skt->socket); out_err_7: - flush_scheduled_work(); + flush_scheduled_work(); - ops->hw_shutdown(skt); + skt->ops->hw_shutdown(skt); out_err_6: - list_del(&skt->node); - iounmap(skt->virt_io); + list_del(&skt->node); + mutex_unlock(&soc_pcmcia_sockets_lock); + iounmap(skt->virt_io); out_err_5: - release_resource(&skt->res_attr); + release_resource(&skt->res_attr); out_err_4: - release_resource(&skt->res_mem); + release_resource(&skt->res_mem); out_err_3: - release_resource(&skt->res_io); + release_resource(&skt->res_io); out_err_2: - release_resource(&skt->res_skt); + release_resource(&skt->res_skt); out_err_1: - i--; - } while (i > 0); - kfree(sinfo); - - out: - mutex_unlock(&soc_pcmcia_sockets_lock); return ret; } +EXPORT_SYMBOL(soc_pcmcia_add_one); -int soc_common_drv_pcmcia_remove(struct device *dev) -{ - struct skt_dev_info *sinfo = dev_get_drvdata(dev); - int i; - - dev_set_drvdata(dev, NULL); - - mutex_lock(&soc_pcmcia_sockets_lock); - for (i = 0; i < sinfo->nskt; i++) { - struct soc_pcmcia_socket *skt = &sinfo->skt[i]; - - del_timer_sync(&skt->poll_timer); - - pcmcia_unregister_socket(&skt->socket); - - flush_scheduled_work(); - - skt->ops->hw_shutdown(skt); - - soc_common_pcmcia_config_skt(skt, &dead_socket); - - list_del(&skt->node); - iounmap(skt->virt_io); - skt->virt_io = NULL; - release_resource(&skt->res_attr); - release_resource(&skt->res_mem); - release_resource(&skt->res_io); - release_resource(&skt->res_skt); - } - if (list_empty(&soc_pcmcia_sockets)) - soc_pcmcia_cpufreq_unregister(); - - mutex_unlock(&soc_pcmcia_sockets_lock); - - kfree(sinfo); - - return 0; -} -EXPORT_SYMBOL(soc_common_drv_pcmcia_remove); +MODULE_AUTHOR("John Dorsey <john+@cs.cmu.edu>"); +MODULE_DESCRIPTION("Linux PCMCIA Card Services: Common SoC support"); +MODULE_LICENSE("Dual MPL/GPL"); diff --git a/drivers/pcmcia/soc_common.h b/drivers/pcmcia/soc_common.h index 290e143839e..e40824ce6b0 100644 --- a/drivers/pcmcia/soc_common.h +++ b/drivers/pcmcia/soc_common.h @@ -30,14 +30,12 @@ struct soc_pcmcia_socket { /* * Info from low level handler */ - struct device *dev; unsigned int nr; - unsigned int irq; /* * Core PCMCIA state */ - struct pcmcia_low_level *ops; + const struct pcmcia_low_level *ops; unsigned int status; socket_state_t cs_state; @@ -135,10 +133,8 @@ extern void soc_pcmcia_enable_irqs(struct soc_pcmcia_socket *skt, struct pcmcia_ extern void soc_common_pcmcia_get_timing(struct soc_pcmcia_socket *, struct soc_pcmcia_timing *); -extern struct list_head soc_pcmcia_sockets; - -extern int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, struct skt_dev_info *sinfo); -extern int soc_common_drv_pcmcia_remove(struct device *dev); +void soc_pcmcia_remove_one(struct soc_pcmcia_socket *skt); +int soc_pcmcia_add_one(struct soc_pcmcia_socket *skt); #ifdef CONFIG_PCMCIA_DEBUG diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c index 6918849d511..12c49ee135e 100644 --- a/drivers/pcmcia/tcic.c +++ b/drivers/pcmcia/tcic.c @@ -55,21 +55,6 @@ #include <pcmcia/ss.h> #include "tcic.h" -#ifdef CONFIG_PCMCIA_DEBUG -static int pc_debug; - -module_param(pc_debug, int, 0644); -static const char version[] = -"tcic.c 1.111 2000/02/15 04:13:12 (David Hinds)"; - -#define debug(lvl, fmt, arg...) do { \ - if (pc_debug > (lvl)) \ - printk(KERN_DEBUG "tcic: " fmt , ## arg); \ -} while (0) -#else -#define debug(lvl, fmt, arg...) do { } while (0) -#endif - MODULE_AUTHOR("David Hinds <dahinds@users.sourceforge.net>"); MODULE_DESCRIPTION("Databook TCIC-2 PCMCIA socket driver"); MODULE_LICENSE("Dual MPL/GPL"); @@ -574,7 +559,7 @@ static irqreturn_t tcic_interrupt(int irq, void *dev) } else active = 1; - debug(2, "tcic_interrupt()\n"); + pr_debug("tcic_interrupt()\n"); for (i = 0; i < sockets; i++) { psock = socket_table[i].psock; @@ -611,13 +596,13 @@ static irqreturn_t tcic_interrupt(int irq, void *dev) } active = 0; - debug(2, "interrupt done\n"); + pr_debug("interrupt done\n"); return IRQ_HANDLED; } /* tcic_interrupt */ static void tcic_timer(u_long data) { - debug(2, "tcic_timer()\n"); + pr_debug("tcic_timer()\n"); tcic_timer_pending = 0; tcic_interrupt(0, NULL); } /* tcic_timer */ @@ -644,7 +629,7 @@ static int tcic_get_status(struct pcmcia_socket *sock, u_int *value) reg = tcic_getb(TCIC_PWR); if (reg & (TCIC_PWR_VCC(psock)|TCIC_PWR_VPP(psock))) *value |= SS_POWERON; - debug(1, "GetStatus(%d) = %#2.2x\n", psock, *value); + dev_dbg(&sock->dev, "GetStatus(%d) = %#2.2x\n", psock, *value); return 0; } /* tcic_get_status */ @@ -656,7 +641,7 @@ static int tcic_set_socket(struct pcmcia_socket *sock, socket_state_t *state) u_char reg; u_short scf1, scf2; - debug(1, "SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + dev_dbg(&sock->dev, "SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " "io_irq %d, csc_mask %#2.2x)\n", psock, state->flags, state->Vcc, state->Vpp, state->io_irq, state->csc_mask); tcic_setw(TCIC_ADDR+2, (psock << TCIC_SS_SHFT) | TCIC_ADR2_INDREG); @@ -731,7 +716,7 @@ static int tcic_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) u_int addr; u_short base, len, ioctl; - debug(1, "SetIOMap(%d, %d, %#2.2x, %d ns, " + dev_dbg(&sock->dev, "SetIOMap(%d, %d, %#2.2x, %d ns, " "%#llx-%#llx)\n", psock, io->map, io->flags, io->speed, (unsigned long long)io->start, (unsigned long long)io->stop); if ((io->map > 1) || (io->start > 0xffff) || (io->stop > 0xffff) || @@ -768,7 +753,7 @@ static int tcic_set_mem_map(struct pcmcia_socket *sock, struct pccard_mem_map *m u_short addr, ctl; u_long base, len, mmap; - debug(1, "SetMemMap(%d, %d, %#2.2x, %d ns, " + dev_dbg(&sock->dev, "SetMemMap(%d, %d, %#2.2x, %d ns, " "%#llx-%#llx, %#x)\n", psock, mem->map, mem->flags, mem->speed, (unsigned long long)mem->res->start, (unsigned long long)mem->res->end, mem->card_start); diff --git a/drivers/pcmcia/topic.h b/drivers/pcmcia/topic.h index edccfa5bb40..615a45a8fe8 100644 --- a/drivers/pcmcia/topic.h +++ b/drivers/pcmcia/topic.h @@ -114,22 +114,17 @@ static void topic97_zoom_video(struct pcmcia_socket *sock, int onoff) reg_zv |= TOPIC97_ZV_CONTROL_ENABLE; config_writeb(socket, TOPIC97_ZOOM_VIDEO_CONTROL, reg_zv); - reg = config_readb(socket, TOPIC97_MISC2); - reg |= TOPIC97_MISC2_ZV_ENABLE; - config_writeb(socket, TOPIC97_MISC2, reg); - - /* not sure this is needed, doc is unclear */ -#if 0 reg = config_readb(socket, TOPIC97_AUDIO_VIDEO_SWITCH); reg |= TOPIC97_AVS_AUDIO_CONTROL | TOPIC97_AVS_VIDEO_CONTROL; config_writeb(socket, TOPIC97_AUDIO_VIDEO_SWITCH, reg); -#endif - } - else { + } else { reg_zv &= ~TOPIC97_ZV_CONTROL_ENABLE; config_writeb(socket, TOPIC97_ZOOM_VIDEO_CONTROL, reg_zv); - } + reg = config_readb(socket, TOPIC97_AUDIO_VIDEO_SWITCH); + reg &= ~(TOPIC97_AVS_AUDIO_CONTROL | TOPIC97_AVS_VIDEO_CONTROL); + config_writeb(socket, TOPIC97_AUDIO_VIDEO_SWITCH, reg); + } } static int topic97_override(struct yenta_socket *socket) diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index cea6cef27e8..11867492551 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig @@ -77,6 +77,13 @@ config BATTERY_TOSA Say Y to enable support for the battery on the Sharp Zaurus SL-6000 (tosa) models. +config BATTERY_COLLIE + tristate "Sharp SL-5500 (collie) battery" + depends on SA1100_COLLIE && MCP_UCB1200 + help + Say Y to enable support for the battery on the Sharp Zaurus + SL-5500 (collie) models. + config BATTERY_WM97XX bool "WM97xx generic battery driver" depends on TOUCHSCREEN_WM97XX=y diff --git a/drivers/power/Makefile b/drivers/power/Makefile index b96f29d91c2..356cdfd3c8b 100644 --- a/drivers/power/Makefile +++ b/drivers/power/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_BATTERY_DS2782) += ds2782_battery.o obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o +obj-$(CONFIG_BATTERY_COLLIE) += collie_battery.o obj-$(CONFIG_BATTERY_WM97XX) += wm97xx_battery.o obj-$(CONFIG_BATTERY_BQ27x00) += bq27x00_battery.o obj-$(CONFIG_BATTERY_DA9030) += da9030_battery.o diff --git a/drivers/power/collie_battery.c b/drivers/power/collie_battery.c new file mode 100644 index 00000000000..039f41ae217 --- /dev/null +++ b/drivers/power/collie_battery.c @@ -0,0 +1,418 @@ +/* + * Battery and Power Management code for the Sharp SL-5x00 + * + * Copyright (C) 2009 Thomas Kunze + * + * based on tosa_battery.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/power_supply.h> +#include <linux/delay.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/gpio.h> +#include <linux/mfd/ucb1x00.h> + +#include <asm/mach/sharpsl_param.h> +#include <asm/mach-types.h> +#include <mach/collie.h> + +static DEFINE_MUTEX(bat_lock); /* protects gpio pins */ +static struct work_struct bat_work; +static struct ucb1x00 *ucb; + +struct collie_bat { + int status; + struct power_supply psy; + int full_chrg; + + struct mutex work_lock; /* protects data */ + + bool (*is_present)(struct collie_bat *bat); + int gpio_full; + int gpio_charge_on; + + int technology; + + int gpio_bat; + int adc_bat; + int adc_bat_divider; + int bat_max; + int bat_min; + + int gpio_temp; + int adc_temp; + int adc_temp_divider; +}; + +static struct collie_bat collie_bat_main; + +static unsigned long collie_read_bat(struct collie_bat *bat) +{ + unsigned long value = 0; + + if (bat->gpio_bat < 0 || bat->adc_bat < 0) + return 0; + mutex_lock(&bat_lock); + gpio_set_value(bat->gpio_bat, 1); + msleep(5); + ucb1x00_adc_enable(ucb); + value = ucb1x00_adc_read(ucb, bat->adc_bat, UCB_SYNC); + ucb1x00_adc_disable(ucb); + gpio_set_value(bat->gpio_bat, 0); + mutex_unlock(&bat_lock); + value = value * 1000000 / bat->adc_bat_divider; + + return value; +} + +static unsigned long collie_read_temp(struct collie_bat *bat) +{ + unsigned long value = 0; + if (bat->gpio_temp < 0 || bat->adc_temp < 0) + return 0; + + mutex_lock(&bat_lock); + gpio_set_value(bat->gpio_temp, 1); + msleep(5); + ucb1x00_adc_enable(ucb); + value = ucb1x00_adc_read(ucb, bat->adc_temp, UCB_SYNC); + ucb1x00_adc_disable(ucb); + gpio_set_value(bat->gpio_temp, 0); + mutex_unlock(&bat_lock); + + value = value * 10000 / bat->adc_temp_divider; + + return value; +} + +static int collie_bat_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + int ret = 0; + struct collie_bat *bat = container_of(psy, struct collie_bat, psy); + + if (bat->is_present && !bat->is_present(bat) + && psp != POWER_SUPPLY_PROP_PRESENT) { + return -ENODEV; + } + + switch (psp) { + case POWER_SUPPLY_PROP_STATUS: + val->intval = bat->status; + break; + case POWER_SUPPLY_PROP_TECHNOLOGY: + val->intval = bat->technology; + break; + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + val->intval = collie_read_bat(bat); + break; + case POWER_SUPPLY_PROP_VOLTAGE_MAX: + if (bat->full_chrg == -1) + val->intval = bat->bat_max; + else + val->intval = bat->full_chrg; + break; + case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN: + val->intval = bat->bat_max; + break; + case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN: + val->intval = bat->bat_min; + break; + case POWER_SUPPLY_PROP_TEMP: + val->intval = collie_read_temp(bat); + break; + case POWER_SUPPLY_PROP_PRESENT: + val->intval = bat->is_present ? bat->is_present(bat) : 1; + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static void collie_bat_external_power_changed(struct power_supply *psy) +{ + schedule_work(&bat_work); +} + +static irqreturn_t collie_bat_gpio_isr(int irq, void *data) +{ + pr_info("collie_bat_gpio irq: %d\n", gpio_get_value(irq_to_gpio(irq))); + schedule_work(&bat_work); + return IRQ_HANDLED; +} + +static void collie_bat_update(struct collie_bat *bat) +{ + int old; + struct power_supply *psy = &bat->psy; + + mutex_lock(&bat->work_lock); + + old = bat->status; + + if (bat->is_present && !bat->is_present(bat)) { + printk(KERN_NOTICE "%s not present\n", psy->name); + bat->status = POWER_SUPPLY_STATUS_UNKNOWN; + bat->full_chrg = -1; + } else if (power_supply_am_i_supplied(psy)) { + if (bat->status == POWER_SUPPLY_STATUS_DISCHARGING) { + gpio_set_value(bat->gpio_charge_on, 1); + mdelay(15); + } + + if (gpio_get_value(bat->gpio_full)) { + if (old == POWER_SUPPLY_STATUS_CHARGING || + bat->full_chrg == -1) + bat->full_chrg = collie_read_bat(bat); + + gpio_set_value(bat->gpio_charge_on, 0); + bat->status = POWER_SUPPLY_STATUS_FULL; + } else { + gpio_set_value(bat->gpio_charge_on, 1); + bat->status = POWER_SUPPLY_STATUS_CHARGING; + } + } else { + gpio_set_value(bat->gpio_charge_on, 0); + bat->status = POWER_SUPPLY_STATUS_DISCHARGING; + } + + if (old != bat->status) + power_supply_changed(psy); + + mutex_unlock(&bat->work_lock); +} + +static void collie_bat_work(struct work_struct *work) +{ + collie_bat_update(&collie_bat_main); +} + + +static enum power_supply_property collie_bat_main_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_MAX, + POWER_SUPPLY_PROP_PRESENT, + POWER_SUPPLY_PROP_TEMP, +}; + +static enum power_supply_property collie_bat_bu_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_MAX, + POWER_SUPPLY_PROP_PRESENT, +}; + +static struct collie_bat collie_bat_main = { + .status = POWER_SUPPLY_STATUS_DISCHARGING, + .full_chrg = -1, + .psy = { + .name = "main-battery", + .type = POWER_SUPPLY_TYPE_BATTERY, + .properties = collie_bat_main_props, + .num_properties = ARRAY_SIZE(collie_bat_main_props), + .get_property = collie_bat_get_property, + .external_power_changed = collie_bat_external_power_changed, + .use_for_apm = 1, + }, + + .gpio_full = COLLIE_GPIO_CO, + .gpio_charge_on = COLLIE_GPIO_CHARGE_ON, + + .technology = POWER_SUPPLY_TECHNOLOGY_LIPO, + + .gpio_bat = COLLIE_GPIO_MBAT_ON, + .adc_bat = UCB_ADC_INP_AD1, + .adc_bat_divider = 155, + .bat_max = 4310000, + .bat_min = 1551 * 1000000 / 414, + + .gpio_temp = COLLIE_GPIO_TMP_ON, + .adc_temp = UCB_ADC_INP_AD0, + .adc_temp_divider = 10000, +}; + +static struct collie_bat collie_bat_bu = { + .status = POWER_SUPPLY_STATUS_UNKNOWN, + .full_chrg = -1, + + .psy = { + .name = "backup-battery", + .type = POWER_SUPPLY_TYPE_BATTERY, + .properties = collie_bat_bu_props, + .num_properties = ARRAY_SIZE(collie_bat_bu_props), + .get_property = collie_bat_get_property, + .external_power_changed = collie_bat_external_power_changed, + }, + + .gpio_full = -1, + .gpio_charge_on = -1, + + .technology = POWER_SUPPLY_TECHNOLOGY_LiMn, + + .gpio_bat = COLLIE_GPIO_BBAT_ON, + .adc_bat = UCB_ADC_INP_AD1, + .adc_bat_divider = 155, + .bat_max = 3000000, + .bat_min = 1900000, + + .gpio_temp = -1, + .adc_temp = -1, + .adc_temp_divider = -1, +}; + +static struct { + int gpio; + char *name; + bool output; + int value; +} gpios[] = { + { COLLIE_GPIO_CO, "main battery full", 0, 0 }, + { COLLIE_GPIO_MAIN_BAT_LOW, "main battery low", 0, 0 }, + { COLLIE_GPIO_CHARGE_ON, "main charge on", 1, 0 }, + { COLLIE_GPIO_MBAT_ON, "main battery", 1, 0 }, + { COLLIE_GPIO_TMP_ON, "main battery temp", 1, 0 }, + { COLLIE_GPIO_BBAT_ON, "backup battery", 1, 0 }, +}; + +#ifdef CONFIG_PM +static int collie_bat_suspend(struct ucb1x00_dev *dev, pm_message_t state) +{ + /* flush all pending status updates */ + flush_scheduled_work(); + return 0; +} + +static int collie_bat_resume(struct ucb1x00_dev *dev) +{ + /* things may have changed while we were away */ + schedule_work(&bat_work); + return 0; +} +#else +#define collie_bat_suspend NULL +#define collie_bat_resume NULL +#endif + +static int __devinit collie_bat_probe(struct ucb1x00_dev *dev) +{ + int ret; + int i; + + if (!machine_is_collie()) + return -ENODEV; + + ucb = dev->ucb; + + for (i = 0; i < ARRAY_SIZE(gpios); i++) { + ret = gpio_request(gpios[i].gpio, gpios[i].name); + if (ret) { + i--; + goto err_gpio; + } + + if (gpios[i].output) + ret = gpio_direction_output(gpios[i].gpio, + gpios[i].value); + else + ret = gpio_direction_input(gpios[i].gpio); + + if (ret) + goto err_gpio; + } + + mutex_init(&collie_bat_main.work_lock); + + INIT_WORK(&bat_work, collie_bat_work); + + ret = power_supply_register(&dev->ucb->dev, &collie_bat_main.psy); + if (ret) + goto err_psy_reg_main; + ret = power_supply_register(&dev->ucb->dev, &collie_bat_bu.psy); + if (ret) + goto err_psy_reg_bu; + + ret = request_irq(gpio_to_irq(COLLIE_GPIO_CO), + collie_bat_gpio_isr, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + "main full", &collie_bat_main); + if (!ret) { + schedule_work(&bat_work); + return 0; + } + power_supply_unregister(&collie_bat_bu.psy); +err_psy_reg_bu: + power_supply_unregister(&collie_bat_main.psy); +err_psy_reg_main: + + /* see comment in collie_bat_remove */ + flush_scheduled_work(); + + i--; +err_gpio: + for (; i >= 0; i--) + gpio_free(gpios[i].gpio); + + return ret; +} + +static void __devexit collie_bat_remove(struct ucb1x00_dev *dev) +{ + int i; + + free_irq(gpio_to_irq(COLLIE_GPIO_CO), &collie_bat_main); + + power_supply_unregister(&collie_bat_bu.psy); + power_supply_unregister(&collie_bat_main.psy); + + /* + * now flush all pending work. + * we won't get any more schedules, since all + * sources (isr and external_power_changed) + * are unregistered now. + */ + flush_scheduled_work(); + + for (i = ARRAY_SIZE(gpios) - 1; i >= 0; i--) + gpio_free(gpios[i].gpio); +} + +static struct ucb1x00_driver collie_bat_driver = { + .add = collie_bat_probe, + .remove = __devexit_p(collie_bat_remove), + .suspend = collie_bat_suspend, + .resume = collie_bat_resume, +}; + +static int __init collie_bat_init(void) +{ + return ucb1x00_register_driver(&collie_bat_driver); +} + +static void __exit collie_bat_exit(void) +{ + ucb1x00_unregister_driver(&collie_bat_driver); +} + +module_init(collie_bat_init); +module_exit(collie_bat_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Thomas Kunze"); +MODULE_DESCRIPTION("Collie battery driver"); diff --git a/drivers/regulator/wm831x-isink.c b/drivers/regulator/wm831x-isink.c index 1d8d9879d3a..48857008758 100644 --- a/drivers/regulator/wm831x-isink.c +++ b/drivers/regulator/wm831x-isink.c @@ -167,6 +167,8 @@ static __devinit int wm831x_isink_probe(struct platform_device *pdev) return -ENOMEM; } + isink->wm831x = wm831x; + res = platform_get_resource(pdev, IORESOURCE_IO, 0); if (res == NULL) { dev_err(&pdev->dev, "No I/O resource\n"); diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c index 33a10c47260..4c5d5d0c4cf 100644 --- a/drivers/rtc/rtc-pcf50633.c +++ b/drivers/rtc/rtc-pcf50633.c @@ -292,8 +292,9 @@ static int __devinit pcf50633_rtc_probe(struct platform_device *pdev) &pcf50633_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc_dev)) { + int ret = PTR_ERR(rtc->rtc_dev); kfree(rtc); - return PTR_ERR(rtc->rtc_dev); + return ret; } pcf50633_register_irq(rtc->pcf, PCF50633_IRQ_ALARM, diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index 310c10795e9..6583c1a8b07 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -195,7 +195,7 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm, /* year, since the rtc epoch*/ buf[CCR_YEAR] = bin2bcd(tm->tm_year % 100); buf[CCR_WDAY] = tm->tm_wday & 0x07; - buf[CCR_Y2K] = bin2bcd(tm->tm_year / 100); + buf[CCR_Y2K] = bin2bcd((tm->tm_year + 1900) / 100); } /* If writing alarm registers, set compare bits on registers 0-4 */ @@ -280,9 +280,9 @@ static int x1205_fix_osc(struct i2c_client *client) int err; struct rtc_time tm; - tm.tm_hour = tm.tm_min = tm.tm_sec = 0; + memset(&tm, 0, sizeof(tm)); - err = x1205_set_datetime(client, &tm, 0, X1205_CCR_BASE, 0); + err = x1205_set_datetime(client, &tm, 1, X1205_CCR_BASE, 0); if (err < 0) dev_err(&client->dev, "unable to restart the oscillator\n"); diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c index 67cde013806..528733b4a39 100644 --- a/drivers/scsi/pcmcia/aha152x_stub.c +++ b/drivers/scsi/pcmcia/aha152x_stub.c @@ -54,15 +54,6 @@ #include <pcmcia/cistpl.h> #include <pcmcia/ds.h> -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"aha152x_cs.c 1.54 2000/06/12 21:27:25 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -103,7 +94,7 @@ static int aha152x_probe(struct pcmcia_device *link) { scsi_info_t *info; - DEBUG(0, "aha152x_attach()\n"); + dev_dbg(&link->dev, "aha152x_attach()\n"); /* Create new SCSI device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -115,7 +106,6 @@ static int aha152x_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; @@ -127,7 +117,7 @@ static int aha152x_probe(struct pcmcia_device *link) static void aha152x_detach(struct pcmcia_device *link) { - DEBUG(0, "aha152x_detach(0x%p)\n", link); + dev_dbg(&link->dev, "aha152x_detach\n"); aha152x_release_cs(link); @@ -137,9 +127,6 @@ static void aha152x_detach(struct pcmcia_device *link) /*====================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int aha152x_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -164,19 +151,22 @@ static int aha152x_config_cs(struct pcmcia_device *link) { scsi_info_t *info = link->priv; struct aha152x_setup s; - int last_ret, last_fn; + int ret; struct Scsi_Host *host; - DEBUG(0, "aha152x_config(0x%p)\n", link); + dev_dbg(&link->dev, "aha152x_config\n"); - last_ret = pcmcia_loop_config(link, aha152x_config_check, NULL); - if (last_ret) { - cs_error(link, RequestIO, last_ret); - goto failed; - } + ret = pcmcia_loop_config(link, aha152x_config_check, NULL); + if (ret) + goto failed; - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* Set configuration options for the aha152x driver */ memset(&s, 0, sizeof(s)); @@ -194,7 +184,7 @@ static int aha152x_config_cs(struct pcmcia_device *link) host = aha152x_probe_one(&s); if (host == NULL) { printk(KERN_INFO "aha152x_cs: no SCSI devices found\n"); - goto cs_failed; + goto failed; } sprintf(info->node.dev_name, "scsi%d", host->host_no); @@ -203,8 +193,6 @@ static int aha152x_config_cs(struct pcmcia_device *link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: aha152x_release_cs(link); return -ENODEV; diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c index 06254f46a0d..91404068407 100644 --- a/drivers/scsi/pcmcia/fdomain_stub.c +++ b/drivers/scsi/pcmcia/fdomain_stub.c @@ -59,16 +59,6 @@ MODULE_AUTHOR("David Hinds <dahinds@users.sourceforge.net>"); MODULE_DESCRIPTION("Future Domain PCMCIA SCSI driver"); MODULE_LICENSE("Dual MPL/GPL"); -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"fdomain_cs.c 1.47 2001/10/13 00:08:52 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif - /*====================================================================*/ typedef struct scsi_info_t { @@ -86,7 +76,7 @@ static int fdomain_probe(struct pcmcia_device *link) { scsi_info_t *info; - DEBUG(0, "fdomain_attach()\n"); + dev_dbg(&link->dev, "fdomain_attach()\n"); /* Create new SCSI device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -99,7 +89,6 @@ static int fdomain_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; @@ -111,7 +100,7 @@ static int fdomain_probe(struct pcmcia_device *link) static void fdomain_detach(struct pcmcia_device *link) { - DEBUG(0, "fdomain_detach(0x%p)\n", link); + dev_dbg(&link->dev, "fdomain_detach\n"); fdomain_release(link); @@ -120,9 +109,6 @@ static void fdomain_detach(struct pcmcia_device *link) /*====================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int fdomain_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -137,20 +123,22 @@ static int fdomain_config_check(struct pcmcia_device *p_dev, static int fdomain_config(struct pcmcia_device *link) { scsi_info_t *info = link->priv; - int last_ret, last_fn; + int ret; char str[22]; struct Scsi_Host *host; - DEBUG(0, "fdomain_config(0x%p)\n", link); + dev_dbg(&link->dev, "fdomain_config\n"); - last_ret = pcmcia_loop_config(link, fdomain_config_check, NULL); - if (last_ret) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, fdomain_config_check, NULL); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* A bad hack... */ release_region(link->io.BasePort1, link->io.NumPorts1); @@ -162,11 +150,11 @@ static int fdomain_config(struct pcmcia_device *link) host = __fdomain_16x0_detect(&fdomain_driver_template); if (!host) { printk(KERN_INFO "fdomain_cs: no SCSI devices found\n"); - goto cs_failed; + goto failed; } if (scsi_add_host(host, NULL)) - goto cs_failed; + goto failed; scsi_scan_host(host); sprintf(info->node.dev_name, "scsi%d", host->host_no); @@ -175,8 +163,6 @@ static int fdomain_config(struct pcmcia_device *link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: fdomain_release(link); return -ENODEV; @@ -188,7 +174,7 @@ static void fdomain_release(struct pcmcia_device *link) { scsi_info_t *info = link->priv; - DEBUG(0, "fdomain_release(0x%p)\n", link); + dev_dbg(&link->dev, "fdomain_release\n"); scsi_remove_host(info->host); pcmcia_disable_device(link); diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index e32c344d7ad..c2341af587a 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -1564,12 +1564,10 @@ static int nsp_cs_probe(struct pcmcia_device *link) link->io.IOAddrLines = 10; /* not used */ /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; /* Interrupt handler */ link->irq.Handler = &nspintr; - link->irq.Instance = info; link->irq.Attributes |= IRQF_SHARED; /* General socket configuration */ @@ -1684,10 +1682,10 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev, if (cfg_mem->req.Size < 0x1000) cfg_mem->req.Size = 0x1000; cfg_mem->req.AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, &cfg_mem->req, &p_dev->win) != 0) + if (pcmcia_request_window(p_dev, &cfg_mem->req, &p_dev->win) != 0) goto next_entry; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map) != 0) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) goto next_entry; cfg_mem->data->MmioAddress = (unsigned long) ioremap_nocache(cfg_mem->req.Base, cfg_mem->req.Size); diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c index 20c3e5e6d88..f85f094870b 100644 --- a/drivers/scsi/pcmcia/qlogic_stub.c +++ b/drivers/scsi/pcmcia/qlogic_stub.c @@ -62,15 +62,6 @@ static char qlogic_name[] = "qlogic_cs"; -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = "qlogic_cs.c 1.79-ac 2002/10/26 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif - static struct scsi_host_template qlogicfas_driver_template = { .module = THIS_MODULE, .name = qlogic_name, @@ -159,7 +150,7 @@ static int qlogic_probe(struct pcmcia_device *link) { scsi_info_t *info; - DEBUG(0, "qlogic_attach()\n"); + dev_dbg(&link->dev, "qlogic_attach()\n"); /* Create new SCSI device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -171,7 +162,6 @@ static int qlogic_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; @@ -183,7 +173,7 @@ static int qlogic_probe(struct pcmcia_device *link) static void qlogic_detach(struct pcmcia_device *link) { - DEBUG(0, "qlogic_detach(0x%p)\n", link); + dev_dbg(&link->dev, "qlogic_detach\n"); qlogic_release(link); kfree(link->priv); @@ -192,9 +182,6 @@ static void qlogic_detach(struct pcmcia_device *link) /*====================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int qlogic_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -213,19 +200,22 @@ static int qlogic_config_check(struct pcmcia_device *p_dev, static int qlogic_config(struct pcmcia_device * link) { scsi_info_t *info = link->priv; - int last_ret, last_fn; + int ret; struct Scsi_Host *host; - DEBUG(0, "qlogic_config(0x%p)\n", link); + dev_dbg(&link->dev, "qlogic_config\n"); - last_ret = pcmcia_loop_config(link, qlogic_config_check, NULL); - if (last_ret) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, qlogic_config_check, NULL); + if (ret) + goto failed; + + ret = pcmcia_request_irq(link, &link->irq); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; if ((info->manf_id == MANFID_MACNICA) || (info->manf_id == MANFID_PIONEER) || (info->manf_id == 0x0098)) { /* set ATAcmd */ @@ -244,7 +234,7 @@ static int qlogic_config(struct pcmcia_device * link) if (!host) { printk(KERN_INFO "%s: no SCSI devices found\n", qlogic_name); - goto cs_failed; + goto failed; } sprintf(info->node.dev_name, "scsi%d", host->host_no); @@ -253,12 +243,9 @@ static int qlogic_config(struct pcmcia_device * link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); - pcmcia_disable_device(link); failed: + pcmcia_disable_device(link); return -ENODEV; - } /* qlogic_config */ /*====================================================================*/ @@ -267,7 +254,7 @@ static void qlogic_release(struct pcmcia_device *link) { scsi_info_t *info = link->priv; - DEBUG(0, "qlogic_release(0x%p)\n", link); + dev_dbg(&link->dev, "qlogic_release\n"); scsi_remove_host(info->host); diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index b330c11a175..e7564d8f0cb 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -77,17 +77,6 @@ #include <pcmcia/ds.h> #include <pcmcia/ciscode.h> -/* ================================================================== */ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"sym53c500_cs.c 0.9c 2004/10/27 (Bob Tracy)"; -#else -#define DEBUG(n, args...) -#endif /* ================================================================== */ @@ -525,7 +514,7 @@ SYM53C500_release(struct pcmcia_device *link) struct scsi_info_t *info = link->priv; struct Scsi_Host *shost = info->host; - DEBUG(0, "SYM53C500_release(0x%p)\n", link); + dev_dbg(&link->dev, "SYM53C500_release\n"); /* * Do this before releasing/freeing resources. @@ -697,9 +686,6 @@ static struct scsi_host_template sym53c500_driver_template = { .shost_attrs = SYM53C500_shost_attrs }; -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int SYM53C500_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -719,24 +705,27 @@ static int SYM53C500_config(struct pcmcia_device *link) { struct scsi_info_t *info = link->priv; - int last_ret, last_fn; + int ret; int irq_level, port_base; struct Scsi_Host *host; struct scsi_host_template *tpnt = &sym53c500_driver_template; struct sym53c500_data *data; - DEBUG(0, "SYM53C500_config(0x%p)\n", link); + dev_dbg(&link->dev, "SYM53C500_config\n"); info->manf_id = link->manf_id; - last_ret = pcmcia_loop_config(link, SYM53C500_config_check, NULL); - if (last_ret) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, SYM53C500_config_check, NULL); + if (ret) + goto failed; + + ret = pcmcia_request_irq(link, &link->irq); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* * That's the trouble with copying liberally from another driver. @@ -824,8 +813,6 @@ err_release: printk(KERN_INFO "sym53c500_cs: no SCSI devices found\n"); return -ENODEV; -cs_failed: - cs_error(link, last_fn, last_ret); failed: SYM53C500_release(link); return -ENODEV; @@ -855,7 +842,7 @@ static int sym53c500_resume(struct pcmcia_device *link) static void SYM53C500_detach(struct pcmcia_device *link) { - DEBUG(0, "SYM53C500_detach(0x%p)\n", link); + dev_dbg(&link->dev, "SYM53C500_detach\n"); SYM53C500_release(link); @@ -868,7 +855,7 @@ SYM53C500_probe(struct pcmcia_device *link) { struct scsi_info_t *info; - DEBUG(0, "SYM53C500_attach()\n"); + dev_dbg(&link->dev, "SYM53C500_attach()\n"); /* Create new SCSI device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -880,7 +867,6 @@ SYM53C500_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/serial/bcm63xx_uart.c b/drivers/serial/bcm63xx_uart.c index beddaa6e906..37ad0c44993 100644 --- a/drivers/serial/bcm63xx_uart.c +++ b/drivers/serial/bcm63xx_uart.c @@ -242,7 +242,7 @@ static void bcm_uart_do_rx(struct uart_port *port) * higher than fifo size anyway since we're much faster than * serial port */ max_count = 32; - tty = port->info->port.tty; + tty = port->state->port.tty; do { unsigned int iestat, c, cstat; char flag; @@ -318,7 +318,7 @@ static void bcm_uart_do_tx(struct uart_port *port) return; } - xmit = &port->info->xmit; + xmit = &port->state->xmit; if (uart_circ_empty(xmit)) goto txq_empty; diff --git a/drivers/serial/of_serial.c b/drivers/serial/of_serial.c index 02406ba6da1..cdf172eda2e 100644 --- a/drivers/serial/of_serial.c +++ b/drivers/serial/of_serial.c @@ -161,6 +161,7 @@ static int of_platform_serial_remove(struct of_device *ofdev) static struct of_device_id __devinitdata of_platform_serial_table[] = { { .type = "serial", .compatible = "ns8250", .data = (void *)PORT_8250, }, { .type = "serial", .compatible = "ns16450", .data = (void *)PORT_16450, }, + { .type = "serial", .compatible = "ns16550a", .data = (void *)PORT_16550A, }, { .type = "serial", .compatible = "ns16550", .data = (void *)PORT_16550, }, { .type = "serial", .compatible = "ns16750", .data = (void *)PORT_16750, }, { .type = "serial", .compatible = "ns16850", .data = (void *)PORT_16850, }, diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index 7c7914f5fa0..fc413f0f8dd 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -54,14 +54,6 @@ #include "8250.h" -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = "serial_cs.c 1.134 2002/05/04 05:48:53 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -121,24 +113,20 @@ static void quirk_setup_brainboxes_0104(struct pcmcia_device *link, struct uart_ static int quirk_post_ibm(struct pcmcia_device *link) { conf_reg_t reg = { 0, CS_READ, 0x800, 0 }; - int last_ret, last_fn; + int ret; + + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; - last_ret = pcmcia_access_configuration_register(link, ®); - if (last_ret) { - last_fn = AccessConfigurationRegister; - goto cs_failed; - } reg.Action = CS_WRITE; reg.Value = reg.Value | 1; - last_ret = pcmcia_access_configuration_register(link, ®); - if (last_ret) { - last_fn = AccessConfigurationRegister; - goto cs_failed; - } + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; return 0; - cs_failed: - cs_error(link, last_fn, last_ret); + failed: return -ENODEV; } @@ -283,7 +271,7 @@ static void serial_remove(struct pcmcia_device *link) struct serial_info *info = link->priv; int i; - DEBUG(0, "serial_release(0x%p)\n", link); + dev_dbg(&link->dev, "serial_release\n"); /* * Recheck to see if the device is still configured. @@ -334,7 +322,7 @@ static int serial_probe(struct pcmcia_device *link) { struct serial_info *info; - DEBUG(0, "serial_attach()\n"); + dev_dbg(&link->dev, "serial_attach()\n"); /* Create new serial device */ info = kzalloc(sizeof (*info), GFP_KERNEL); @@ -346,7 +334,6 @@ static int serial_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; if (do_sound) { link->conf.Attributes |= CONF_ENABLE_SPKR; @@ -370,7 +357,7 @@ static void serial_detach(struct pcmcia_device *link) { struct serial_info *info = link->priv; - DEBUG(0, "serial_detach(0x%p)\n", link); + dev_dbg(&link->dev, "serial_detach\n"); /* * Ensure any outstanding scheduled tasks are completed. @@ -399,7 +386,7 @@ static int setup_serial(struct pcmcia_device *handle, struct serial_info * info, port.irq = irq; port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ; port.uartclk = 1843200; - port.dev = &handle_to_dev(handle); + port.dev = &handle->dev; if (buggy_uart) port.flags |= UPF_BUGGY_UART; @@ -426,21 +413,6 @@ static int setup_serial(struct pcmcia_device *handle, struct serial_info * info, /*====================================================================*/ -static int -first_tuple(struct pcmcia_device *handle, tuple_t * tuple, cisparse_t * parse) -{ - int i; - i = pcmcia_get_first_tuple(handle, tuple); - if (i != 0) - return i; - i = pcmcia_get_tuple_data(handle, tuple); - if (i != 0) - return i; - return pcmcia_parse_tuple(tuple, parse); -} - -/*====================================================================*/ - static int simple_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cf, cistpl_cftable_entry_t *dflt, @@ -522,15 +494,13 @@ static int simple_config(struct pcmcia_device *link) printk(KERN_NOTICE "serial_cs: no usable port range found, giving up\n"); - cs_error(link, RequestIO, i); return -1; found_port: i = pcmcia_request_irq(link, &link->irq); - if (i != 0) { - cs_error(link, RequestIRQ, i); + if (i != 0) link->irq.AssignedIRQ = 0; - } + if (info->multi && (info->manfid == MANFID_3COM)) link->conf.ConfigIndex &= ~(0x08); @@ -541,10 +511,8 @@ found_port: info->quirk->config(link); i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) return -1; - } return setup_serial(link, info, link->io.BasePort1, link->irq.AssignedIRQ); } @@ -613,7 +581,6 @@ static int multi_config(struct pcmcia_device *link) /* FIXME: comment does not fit, error handling does not fit */ printk(KERN_NOTICE "serial_cs: no usable port range found, giving up\n"); - cs_error(link, RequestIRQ, i); link->irq.AssignedIRQ = 0; } @@ -624,10 +591,8 @@ static int multi_config(struct pcmcia_device *link) info->quirk->config(link); i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) return -ENODEV; - } /* The Oxford Semiconductor OXCF950 cards are in fact single-port: * 8 registers are for the UART, the others are extra registers. @@ -665,6 +630,25 @@ static int multi_config(struct pcmcia_device *link) return 0; } +static int serial_check_for_multi(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cf, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + struct serial_info *info = p_dev->priv; + + if ((cf->io.nwin == 1) && (cf->io.win[0].len % 8 == 0)) + info->multi = cf->io.win[0].len >> 3; + + if ((cf->io.nwin == 2) && (cf->io.win[0].len == 8) && + (cf->io.win[1].len == 8)) + info->multi = 2; + + return 0; /* break */ +} + + /*====================================================================== serial_config() is scheduled to run after a CARD_INSERTION event @@ -676,46 +660,14 @@ static int multi_config(struct pcmcia_device *link) static int serial_config(struct pcmcia_device * link) { struct serial_info *info = link->priv; - struct serial_cfg_mem *cfg_mem; - tuple_t *tuple; - u_char *buf; - cisparse_t *parse; - cistpl_cftable_entry_t *cf; - int i, last_ret, last_fn; - - DEBUG(0, "serial_config(0x%p)\n", link); - - cfg_mem = kmalloc(sizeof(struct serial_cfg_mem), GFP_KERNEL); - if (!cfg_mem) - goto failed; + int i; - tuple = &cfg_mem->tuple; - parse = &cfg_mem->parse; - cf = &parse->cftable_entry; - buf = cfg_mem->buf; - - tuple->TupleData = (cisdata_t *) buf; - tuple->TupleOffset = 0; - tuple->TupleDataMax = 255; - tuple->Attributes = 0; - - /* Get configuration register information */ - tuple->DesiredTuple = CISTPL_CONFIG; - last_ret = first_tuple(link, tuple, parse); - if (last_ret != 0) { - last_fn = ParseTuple; - goto cs_failed; - } - link->conf.ConfigBase = parse->config.base; - link->conf.Present = parse->config.rmask[0]; + dev_dbg(&link->dev, "serial_config\n"); /* Is this a compliant multifunction card? */ - tuple->DesiredTuple = CISTPL_LONGLINK_MFC; - tuple->Attributes = TUPLE_RETURN_COMMON | TUPLE_RETURN_LINK; - info->multi = (first_tuple(link, tuple, parse) == 0); + info->multi = (link->socket->functions > 1); /* Is this a multiport card? */ - tuple->DesiredTuple = CISTPL_MANFID; info->manfid = link->manf_id; info->prodid = link->card_id; @@ -730,20 +682,11 @@ static int serial_config(struct pcmcia_device * link) /* Another check for dual-serial cards: look for either serial or multifunction cards that ask for appropriate IO port ranges */ - tuple->DesiredTuple = CISTPL_FUNCID; if ((info->multi == 0) && (link->has_func_id) && ((link->func_id == CISTPL_FUNCID_MULTI) || - (link->func_id == CISTPL_FUNCID_SERIAL))) { - tuple->DesiredTuple = CISTPL_CFTABLE_ENTRY; - if (first_tuple(link, tuple, parse) == 0) { - if ((cf->io.nwin == 1) && (cf->io.win[0].len % 8 == 0)) - info->multi = cf->io.win[0].len >> 3; - if ((cf->io.nwin == 2) && (cf->io.win[0].len == 8) && - (cf->io.win[1].len == 8)) - info->multi = 2; - } - } + (link->func_id == CISTPL_FUNCID_SERIAL))) + pcmcia_loop_config(link, serial_check_for_multi, info); /* * Apply any multi-port quirk. @@ -768,14 +711,10 @@ static int serial_config(struct pcmcia_device * link) goto failed; link->dev_node = &info->node[0]; - kfree(cfg_mem); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: serial_remove(link); - kfree(cfg_mem); return -ENODEV; } diff --git a/drivers/spi/spi_txx9.c b/drivers/spi/spi_txx9.c index 96057de133a..19f75627c3d 100644 --- a/drivers/spi/spi_txx9.c +++ b/drivers/spi/spi_txx9.c @@ -29,6 +29,8 @@ #define SPI_FIFO_SIZE 4 +#define SPI_MAX_DIVIDER 0xff /* Max. value for SPCR1.SER */ +#define SPI_MIN_DIVIDER 1 /* Min. value for SPCR1.SER */ #define TXx9_SPMCR 0x00 #define TXx9_SPCR0 0x04 @@ -193,11 +195,8 @@ static void txx9spi_work_one(struct txx9spi *c, struct spi_message *m) if (prev_speed_hz != speed_hz || prev_bits_per_word != bits_per_word) { - u32 n = (c->baseclk + speed_hz - 1) / speed_hz; - if (n < 1) - n = 1; - else if (n > 0xff) - n = 0xff; + int n = DIV_ROUND_UP(c->baseclk, speed_hz) - 1; + n = clamp(n, SPI_MIN_DIVIDER, SPI_MAX_DIVIDER); /* enter config mode */ txx9spi_wr(c, mcr | TXx9_SPMCR_CONFIG | TXx9_SPMCR_BCLR, TXx9_SPMCR); @@ -370,8 +369,8 @@ static int __init txx9spi_probe(struct platform_device *dev) goto exit; } c->baseclk = clk_get_rate(c->clk); - c->min_speed_hz = (c->baseclk + 0xff - 1) / 0xff; - c->max_speed_hz = c->baseclk; + c->min_speed_hz = DIV_ROUND_UP(c->baseclk, SPI_MAX_DIVIDER + 1); + c->max_speed_hz = c->baseclk / (SPI_MIN_DIVIDER + 1); res = platform_get_resource(dev, IORESOURCE_MEM, 0); if (!res) diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c index 100e7a5c5ea..e72f4046a5e 100644 --- a/drivers/ssb/pcmcia.c +++ b/drivers/ssb/pcmcia.c @@ -617,136 +617,140 @@ static int ssb_pcmcia_sprom_check_crc(const u16 *sprom, size_t size) } \ } while (0) -int ssb_pcmcia_get_invariants(struct ssb_bus *bus, - struct ssb_init_invariants *iv) +static int ssb_pcmcia_get_mac(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv) { - tuple_t tuple; - int res; - unsigned char buf[32]; + struct ssb_sprom *sprom = priv; + + if (tuple->TupleData[0] != CISTPL_FUNCE_LAN_NODE_ID) + return -EINVAL; + if (tuple->TupleDataLen != ETH_ALEN + 2) + return -EINVAL; + if (tuple->TupleData[1] != ETH_ALEN) + return -EINVAL; + memcpy(sprom->il0mac, &tuple->TupleData[2], ETH_ALEN); + return 0; +}; + +static int ssb_pcmcia_do_get_invariants(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv) +{ + struct ssb_init_invariants *iv = priv; struct ssb_sprom *sprom = &iv->sprom; struct ssb_boardinfo *bi = &iv->boardinfo; const char *error_description; + GOTO_ERROR_ON(tuple->TupleDataLen < 1, "VEN tpl < 1"); + switch (tuple->TupleData[0]) { + case SSB_PCMCIA_CIS_ID: + GOTO_ERROR_ON((tuple->TupleDataLen != 5) && + (tuple->TupleDataLen != 7), + "id tpl size"); + bi->vendor = tuple->TupleData[1] | + ((u16)tuple->TupleData[2] << 8); + break; + case SSB_PCMCIA_CIS_BOARDREV: + GOTO_ERROR_ON(tuple->TupleDataLen != 2, + "boardrev tpl size"); + sprom->board_rev = tuple->TupleData[1]; + break; + case SSB_PCMCIA_CIS_PA: + GOTO_ERROR_ON((tuple->TupleDataLen != 9) && + (tuple->TupleDataLen != 10), + "pa tpl size"); + sprom->pa0b0 = tuple->TupleData[1] | + ((u16)tuple->TupleData[2] << 8); + sprom->pa0b1 = tuple->TupleData[3] | + ((u16)tuple->TupleData[4] << 8); + sprom->pa0b2 = tuple->TupleData[5] | + ((u16)tuple->TupleData[6] << 8); + sprom->itssi_a = tuple->TupleData[7]; + sprom->itssi_bg = tuple->TupleData[7]; + sprom->maxpwr_a = tuple->TupleData[8]; + sprom->maxpwr_bg = tuple->TupleData[8]; + break; + case SSB_PCMCIA_CIS_OEMNAME: + /* We ignore this. */ + break; + case SSB_PCMCIA_CIS_CCODE: + GOTO_ERROR_ON(tuple->TupleDataLen != 2, + "ccode tpl size"); + sprom->country_code = tuple->TupleData[1]; + break; + case SSB_PCMCIA_CIS_ANTENNA: + GOTO_ERROR_ON(tuple->TupleDataLen != 2, + "ant tpl size"); + sprom->ant_available_a = tuple->TupleData[1]; + sprom->ant_available_bg = tuple->TupleData[1]; + break; + case SSB_PCMCIA_CIS_ANTGAIN: + GOTO_ERROR_ON(tuple->TupleDataLen != 2, + "antg tpl size"); + sprom->antenna_gain.ghz24.a0 = tuple->TupleData[1]; + sprom->antenna_gain.ghz24.a1 = tuple->TupleData[1]; + sprom->antenna_gain.ghz24.a2 = tuple->TupleData[1]; + sprom->antenna_gain.ghz24.a3 = tuple->TupleData[1]; + sprom->antenna_gain.ghz5.a0 = tuple->TupleData[1]; + sprom->antenna_gain.ghz5.a1 = tuple->TupleData[1]; + sprom->antenna_gain.ghz5.a2 = tuple->TupleData[1]; + sprom->antenna_gain.ghz5.a3 = tuple->TupleData[1]; + break; + case SSB_PCMCIA_CIS_BFLAGS: + GOTO_ERROR_ON((tuple->TupleDataLen != 3) && + (tuple->TupleDataLen != 5), + "bfl tpl size"); + sprom->boardflags_lo = tuple->TupleData[1] | + ((u16)tuple->TupleData[2] << 8); + break; + case SSB_PCMCIA_CIS_LEDS: + GOTO_ERROR_ON(tuple->TupleDataLen != 5, + "leds tpl size"); + sprom->gpio0 = tuple->TupleData[1]; + sprom->gpio1 = tuple->TupleData[2]; + sprom->gpio2 = tuple->TupleData[3]; + sprom->gpio3 = tuple->TupleData[4]; + break; + } + return -ENOSPC; /* continue with next entry */ + +error: + ssb_printk(KERN_ERR PFX + "PCMCIA: Failed to fetch device invariants: %s\n", + error_description); + return -ENODEV; +} + + +int ssb_pcmcia_get_invariants(struct ssb_bus *bus, + struct ssb_init_invariants *iv) +{ + struct ssb_sprom *sprom = &iv->sprom; + int res; + memset(sprom, 0xFF, sizeof(*sprom)); sprom->revision = 1; sprom->boardflags_lo = 0; sprom->boardflags_hi = 0; /* First fetch the MAC address. */ - memset(&tuple, 0, sizeof(tuple)); - tuple.DesiredTuple = CISTPL_FUNCE; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - res = pcmcia_get_first_tuple(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "MAC first tpl"); - res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "MAC first tpl data"); - while (1) { - GOTO_ERROR_ON(tuple.TupleDataLen < 1, "MAC tpl < 1"); - if (tuple.TupleData[0] == CISTPL_FUNCE_LAN_NODE_ID) - break; - res = pcmcia_get_next_tuple(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "MAC next tpl"); - res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "MAC next tpl data"); + res = pcmcia_loop_tuple(bus->host_pcmcia, CISTPL_FUNCE, + ssb_pcmcia_get_mac, sprom); + if (res != 0) { + ssb_printk(KERN_ERR PFX + "PCMCIA: Failed to fetch MAC address\n"); + return -ENODEV; } - GOTO_ERROR_ON(tuple.TupleDataLen != ETH_ALEN + 2, "MAC tpl size"); - memcpy(sprom->il0mac, &tuple.TupleData[2], ETH_ALEN); /* Fetch the vendor specific tuples. */ - memset(&tuple, 0, sizeof(tuple)); - tuple.DesiredTuple = SSB_PCMCIA_CIS; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - res = pcmcia_get_first_tuple(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "VEN first tpl"); - res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "VEN first tpl data"); - while (1) { - GOTO_ERROR_ON(tuple.TupleDataLen < 1, "VEN tpl < 1"); - switch (tuple.TupleData[0]) { - case SSB_PCMCIA_CIS_ID: - GOTO_ERROR_ON((tuple.TupleDataLen != 5) && - (tuple.TupleDataLen != 7), - "id tpl size"); - bi->vendor = tuple.TupleData[1] | - ((u16)tuple.TupleData[2] << 8); - break; - case SSB_PCMCIA_CIS_BOARDREV: - GOTO_ERROR_ON(tuple.TupleDataLen != 2, - "boardrev tpl size"); - sprom->board_rev = tuple.TupleData[1]; - break; - case SSB_PCMCIA_CIS_PA: - GOTO_ERROR_ON((tuple.TupleDataLen != 9) && - (tuple.TupleDataLen != 10), - "pa tpl size"); - sprom->pa0b0 = tuple.TupleData[1] | - ((u16)tuple.TupleData[2] << 8); - sprom->pa0b1 = tuple.TupleData[3] | - ((u16)tuple.TupleData[4] << 8); - sprom->pa0b2 = tuple.TupleData[5] | - ((u16)tuple.TupleData[6] << 8); - sprom->itssi_a = tuple.TupleData[7]; - sprom->itssi_bg = tuple.TupleData[7]; - sprom->maxpwr_a = tuple.TupleData[8]; - sprom->maxpwr_bg = tuple.TupleData[8]; - break; - case SSB_PCMCIA_CIS_OEMNAME: - /* We ignore this. */ - break; - case SSB_PCMCIA_CIS_CCODE: - GOTO_ERROR_ON(tuple.TupleDataLen != 2, - "ccode tpl size"); - sprom->country_code = tuple.TupleData[1]; - break; - case SSB_PCMCIA_CIS_ANTENNA: - GOTO_ERROR_ON(tuple.TupleDataLen != 2, - "ant tpl size"); - sprom->ant_available_a = tuple.TupleData[1]; - sprom->ant_available_bg = tuple.TupleData[1]; - break; - case SSB_PCMCIA_CIS_ANTGAIN: - GOTO_ERROR_ON(tuple.TupleDataLen != 2, - "antg tpl size"); - sprom->antenna_gain.ghz24.a0 = tuple.TupleData[1]; - sprom->antenna_gain.ghz24.a1 = tuple.TupleData[1]; - sprom->antenna_gain.ghz24.a2 = tuple.TupleData[1]; - sprom->antenna_gain.ghz24.a3 = tuple.TupleData[1]; - sprom->antenna_gain.ghz5.a0 = tuple.TupleData[1]; - sprom->antenna_gain.ghz5.a1 = tuple.TupleData[1]; - sprom->antenna_gain.ghz5.a2 = tuple.TupleData[1]; - sprom->antenna_gain.ghz5.a3 = tuple.TupleData[1]; - break; - case SSB_PCMCIA_CIS_BFLAGS: - GOTO_ERROR_ON((tuple.TupleDataLen != 3) && - (tuple.TupleDataLen != 5), - "bfl tpl size"); - sprom->boardflags_lo = tuple.TupleData[1] | - ((u16)tuple.TupleData[2] << 8); - break; - case SSB_PCMCIA_CIS_LEDS: - GOTO_ERROR_ON(tuple.TupleDataLen != 5, - "leds tpl size"); - sprom->gpio0 = tuple.TupleData[1]; - sprom->gpio1 = tuple.TupleData[2]; - sprom->gpio2 = tuple.TupleData[3]; - sprom->gpio3 = tuple.TupleData[4]; - break; - } - res = pcmcia_get_next_tuple(bus->host_pcmcia, &tuple); - if (res == -ENOSPC) - break; - GOTO_ERROR_ON(res != 0, "VEN next tpl"); - res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "VEN next tpl data"); - } + res = pcmcia_loop_tuple(bus->host_pcmcia, SSB_PCMCIA_CIS, + ssb_pcmcia_do_get_invariants, sprom); + if ((res == 0) || (res == -ENOSPC)) + return 0; - return 0; -error: ssb_printk(KERN_ERR PFX - "PCMCIA: Failed to fetch device invariants: %s\n", - error_description); + "PCMCIA: Failed to fetch device invariants\n"); return -ENODEV; } diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c index 80c0df8656f..39923cb388b 100644 --- a/drivers/staging/comedi/drivers/cb_das16_cs.c +++ b/drivers/staging/comedi/drivers/cb_das16_cs.c @@ -141,37 +141,14 @@ static int das16cs_timer_insn_config(struct comedi_device *dev, struct comedi_insn *insn, unsigned int *data); -static int get_prodid(struct comedi_device *dev, struct pcmcia_device *link) -{ - tuple_t tuple; - u_short buf[128]; - int prodid = 0; - - tuple.TupleData = (cisdata_t *) buf; - tuple.TupleOffset = 0; - tuple.TupleDataMax = 255; - tuple.DesiredTuple = CISTPL_MANFID; - tuple.Attributes = TUPLE_RETURN_COMMON; - if ((pcmcia_get_first_tuple(link, &tuple) == 0) && - (pcmcia_get_tuple_data(link, &tuple) == 0)) { - prodid = le16_to_cpu(buf[1]); - } - - return prodid; -} - static const struct das16cs_board *das16cs_probe(struct comedi_device *dev, struct pcmcia_device *link) { - int id; int i; - id = get_prodid(dev, link); - for (i = 0; i < n_boards; i++) { - if (das16cs_boards[i].device_id == id) { + if (das16cs_boards[i].device_id == link->card_id) return das16cs_boards + i; - } } printk("unknown board!\n"); @@ -660,27 +637,8 @@ static int das16cs_timer_insn_config(struct comedi_device *dev, ======================================================================*/ -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ #if defined(CONFIG_PCMCIA) || defined(CONFIG_PCMCIA_MODULE) -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = - "cb_das16_cs.c pcmcia code (David Schleef), modified from dummy_cs.c 1.31 2001/08/24 12:13:13 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif - -/*====================================================================*/ - static void das16cs_pcmcia_config(struct pcmcia_device *link); static void das16cs_pcmcia_release(struct pcmcia_device *link); static int das16cs_pcmcia_suspend(struct pcmcia_device *p_dev); @@ -733,7 +691,7 @@ static int das16cs_pcmcia_attach(struct pcmcia_device *link) { struct local_info_t *local; - DEBUG(0, "das16cs_pcmcia_attach()\n"); + dev_dbg(&link->dev, "das16cs_pcmcia_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(struct local_info_t), GFP_KERNEL); @@ -745,7 +703,6 @@ static int das16cs_pcmcia_attach(struct pcmcia_device *link) /* Initialize the pcmcia_device structure */ /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; link->conf.Attributes = 0; @@ -760,7 +717,7 @@ static int das16cs_pcmcia_attach(struct pcmcia_device *link) static void das16cs_pcmcia_detach(struct pcmcia_device *link) { - DEBUG(0, "das16cs_pcmcia_detach(0x%p)\n", link); + dev_dbg(&link->dev, "das16cs_pcmcia_detach\n"); if (link->dev_node) { ((struct local_info_t *)link->priv)->stop = 1; @@ -771,118 +728,55 @@ static void das16cs_pcmcia_detach(struct pcmcia_device *link) kfree(link->priv); } /* das16cs_pcmcia_detach */ -static void das16cs_pcmcia_config(struct pcmcia_device *link) -{ - struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_fn, last_ret; - u_char buf[64]; - cistpl_cftable_entry_t dflt = { 0 }; - DEBUG(0, "das16cs_pcmcia_config(0x%p)\n", link); - - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - last_fn = GetFirstTuple; - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret != 0) - goto cs_failed; - - last_fn = GetTupleData; - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret != 0) - goto cs_failed; - - last_fn = ParseTuple; - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret != 0) - goto cs_failed; - - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; +static int das16cs_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + if (cfg->index == 0) + return -EINVAL; - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_fn = GetFirstTuple; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) - goto cs_failed; - - while (1) { - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - if (pcmcia_get_tuple_data(link, &tuple)) - goto next_entry; - if (pcmcia_parse_tuple(&tuple, &parse)) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Does this card need audio output? */ -/* if (cfg->flags & CISTPL_CFTABLE_AUDIO) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } -*/ - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io)) - goto next_entry; + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; } + /* This reserves IO space but doesn't actually enable it */ + return pcmcia_request_io(p_dev, &p_dev->io); + } - /* If we got this far, we're cool! */ - break; + return 0; +} + +static void das16cs_pcmcia_config(struct pcmcia_device *link) +{ + struct local_info_t *dev = link->priv; + int ret; -next_entry: - last_fn = GetNextTuple; + dev_dbg(&link->dev, "das16cs_pcmcia_config\n"); - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) - goto cs_failed; + ret = pcmcia_loop_config(link, das16cs_pcmcia_config_loop, NULL); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } /* @@ -891,21 +785,18 @@ next_entry: irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_fn = RequestIRQ; - - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) - goto cs_failed; + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* This actually configures the PCMCIA socket -- setting up the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_fn = RequestConfiguration; - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret) - goto cs_failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -930,14 +821,13 @@ next_entry: return; -cs_failed: - cs_error(link, last_fn, last_ret); +failed: das16cs_pcmcia_release(link); } /* das16cs_pcmcia_config */ static void das16cs_pcmcia_release(struct pcmcia_device *link) { - DEBUG(0, "das16cs_pcmcia_release(0x%p)\n", link); + dev_dbg(&link->dev, "das16cs_pcmcia_release\n"); pcmcia_disable_device(link); } /* das16cs_pcmcia_release */ @@ -983,14 +873,13 @@ struct pcmcia_driver das16cs_driver = { static int __init init_das16cs_pcmcia_cs(void) { - DEBUG(0, "%s\n", version); pcmcia_register_driver(&das16cs_driver); return 0; } static void __exit exit_das16cs_pcmcia_cs(void) { - DEBUG(0, "das16cs_pcmcia_cs: unloading\n"); + pr_debug("das16cs_pcmcia_cs: unloading\n"); pcmcia_unregister_driver(&das16cs_driver); } diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c index 9cab21eaaa1..9b945e5fdd3 100644 --- a/drivers/staging/comedi/drivers/das08_cs.c +++ b/drivers/staging/comedi/drivers/das08_cs.c @@ -110,25 +110,6 @@ static int das08_cs_attach(struct comedi_device *dev, ======================================================================*/ -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static const char *version = - "das08.c pcmcia code (Frank Hess), modified from dummy_cs.c 1.31 2001/08/24 12:13:13 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif - -/*====================================================================*/ static void das08_pcmcia_config(struct pcmcia_device *link); static void das08_pcmcia_release(struct pcmcia_device *link); static int das08_pcmcia_suspend(struct pcmcia_device *p_dev); @@ -181,7 +162,7 @@ static int das08_pcmcia_attach(struct pcmcia_device *link) { struct local_info_t *local; - DEBUG(0, "das08_pcmcia_attach()\n"); + dev_dbg(&link->dev, "das08_pcmcia_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(struct local_info_t), GFP_KERNEL); @@ -192,7 +173,6 @@ static int das08_pcmcia_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* @@ -224,7 +204,7 @@ static int das08_pcmcia_attach(struct pcmcia_device *link) static void das08_pcmcia_detach(struct pcmcia_device *link) { - DEBUG(0, "das08_pcmcia_detach(0x%p)\n", link); + dev_dbg(&link->dev, "das08_pcmcia_detach\n"); if (link->dev_node) { ((struct local_info_t *)link->priv)->stop = 1; @@ -237,6 +217,44 @@ static void das08_pcmcia_detach(struct pcmcia_device *link) } /* das08_pcmcia_detach */ + +static int das08_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + if (cfg->index == 0) + return -ENODEV; + + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; + } + /* This reserves IO space but doesn't actually enable it */ + return pcmcia_request_io(p_dev, &p_dev->io); + } + return 0; +} + + /*====================================================================== das08_pcmcia_config() is scheduled to run after a CARD_INSERTION event @@ -248,128 +266,20 @@ static void das08_pcmcia_detach(struct pcmcia_device *link) static void das08_pcmcia_config(struct pcmcia_device *link) { struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_fn, last_ret; - u_char buf[64]; - cistpl_cftable_entry_t dflt = { 0 }; - - DEBUG(0, "das08_pcmcia_config(0x%p)\n", link); - - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - last_fn = GetFirstTuple; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) - goto cs_failed; - - last_fn = GetTupleData; - - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) - goto cs_failed; - - last_fn = ParseTuple; - - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) - goto cs_failed; - - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; - - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_fn = GetFirstTuple; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) - goto cs_failed; - - while (1) { - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) - goto next_entry; - - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Does this card need audio output? */ -/* if (cfg->flags & CISTPL_CFTABLE_AUDIO) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } -*/ - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io) != 0) - goto next_entry; - } - - /* If we got this far, we're cool! */ - break; + int ret; -next_entry: - last_fn = GetNextTuple; + dev_dbg(&link->dev, "das08_pcmcia_config\n"); - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) - goto cs_failed; + ret = pcmcia_loop_config(link, das08_pcmcia_config_loop, NULL); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_fn = RequestIRQ; - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) - goto cs_failed; + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -377,10 +287,9 @@ next_entry: the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_fn = RequestConfiguration; - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret) - goto cs_failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -405,8 +314,7 @@ next_entry: return; -cs_failed: - cs_error(link, last_fn, last_ret); +failed: das08_pcmcia_release(link); } /* das08_pcmcia_config */ @@ -421,7 +329,7 @@ cs_failed: static void das08_pcmcia_release(struct pcmcia_device *link) { - DEBUG(0, "das08_pcmcia_release(0x%p)\n", link); + dev_dbg(&link->dev, "das08_pcmcia_release\n"); pcmcia_disable_device(link); } /* das08_pcmcia_release */ @@ -477,14 +385,13 @@ struct pcmcia_driver das08_cs_driver = { static int __init init_das08_pcmcia_cs(void) { - DEBUG(0, "%s\n", version); pcmcia_register_driver(&das08_cs_driver); return 0; } static void __exit exit_das08_pcmcia_cs(void) { - DEBUG(0, "das08_pcmcia_cs: unloading\n"); + pr_debug("das08_pcmcia_cs: unloading\n"); pcmcia_unregister_driver(&das08_cs_driver); } diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c index ec31a397066..ef5e1183d47 100644 --- a/drivers/staging/comedi/drivers/ni_daq_700.c +++ b/drivers/staging/comedi/drivers/ni_daq_700.c @@ -436,25 +436,7 @@ static int dio700_detach(struct comedi_device *dev) return 0; }; -/* PCMCIA crap */ - -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = "ni_daq_700.c, based on dummy_cs.c"; -#else -#define DEBUG(n, args...) -#endif - -/*====================================================================*/ +/* PCMCIA crap -- watch your words, please! */ static void dio700_config(struct pcmcia_device *link); static void dio700_release(struct pcmcia_device *link); @@ -510,7 +492,7 @@ static int dio700_cs_attach(struct pcmcia_device *link) printk(KERN_INFO "ni_daq_700: cs-attach\n"); - DEBUG(0, "dio700_cs_attach()\n"); + dev_dbg(&link->dev, "dio700_cs_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(struct local_info_t), GFP_KERNEL); @@ -521,7 +503,6 @@ static int dio700_cs_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* @@ -555,7 +536,7 @@ static void dio700_cs_detach(struct pcmcia_device *link) printk(KERN_INFO "ni_daq_700: cs-detach!\n"); - DEBUG(0, "dio700_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "dio700_cs_detach\n"); if (link->dev_node) { ((struct local_info_t *)link->priv)->stop = 1; @@ -576,141 +557,85 @@ static void dio700_cs_detach(struct pcmcia_device *link) ======================================================================*/ -static void dio700_config(struct pcmcia_device *link) +static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) { - struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_ret; - u_char buf[64]; - win_req_t req; + win_req_t *req = priv_data; memreq_t map; - cistpl_cftable_entry_t dflt = { 0 }; - printk(KERN_INFO "ni_daq_700: cs-config\n"); - - DEBUG(0, "dio700_config(0x%p)\n", link); + if (cfg->index == 0) + return -ENODEV; - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; + /* Does this card need audio output? */ + if (cfg->flags & CISTPL_CFTABLE_AUDIO) { + p_dev->conf.Attributes |= CONF_ENABLE_SPKR; + p_dev->conf.Status = CCSR_AUDIO_ENA; } - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) { - cs_error(link, GetTupleData, last_ret); - goto cs_failed; + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; + } + /* This reserves IO space but doesn't actually enable it */ + if (pcmcia_request_io(p_dev, &p_dev->io) != 0) + return -ENODEV; } - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) { - cs_error(link, ParseTuple, last_ret); - goto cs_failed; + if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) { + cistpl_mem_t *mem = + (cfg->mem.nwin) ? &cfg->mem : &dflt->mem; + req->Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; + req->Attributes |= WIN_ENABLE; + req->Base = mem->win[0].host_addr; + req->Size = mem->win[0].len; + if (req->Size < 0x1000) + req->Size = 0x1000; + req->AccessSpeed = 0; + if (pcmcia_request_window(p_dev, req, &p_dev->win)) + return -ENODEV; + map.Page = 0; + map.CardOffset = mem->win[0].card_addr; + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map)) + return -ENODEV; } - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; - - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret != 0) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; - } - while (1) { - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - if (pcmcia_get_tuple_data(link, &tuple) != 0) - goto next_entry; - if (pcmcia_parse_tuple(&tuple, &parse) != 0) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Does this card need audio output? */ - if (cfg->flags & CISTPL_CFTABLE_AUDIO) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } + /* If we got this far, we're cool! */ + return 0; +} - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io) != 0) - goto next_entry; - } +static void dio700_config(struct pcmcia_device *link) +{ + struct local_info_t *dev = link->priv; + win_req_t req; + int ret; - if ((cfg->mem.nwin > 0) || (dflt.mem.nwin > 0)) { - cistpl_mem_t *mem = - (cfg->mem.nwin) ? &cfg->mem : &dflt.mem; - req.Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; - req.Attributes |= WIN_ENABLE; - req.Base = mem->win[0].host_addr; - req.Size = mem->win[0].len; - if (req.Size < 0x1000) - req.Size = 0x1000; - req.AccessSpeed = 0; - if (pcmcia_request_window(&link, &req, &link->win)) - goto next_entry; - map.Page = 0; - map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(link->win, &map)) - goto next_entry; - } - /* If we got this far, we're cool! */ - break; + printk(KERN_INFO "ni_daq_700: cs-config\n"); -next_entry: + dev_dbg(&link->dev, "dio700_config\n"); - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetNextTuple, last_ret); - goto cs_failed; - } + ret = pcmcia_loop_config(link, dio700_pcmcia_config_loop, &req); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } /* @@ -719,11 +644,9 @@ next_entry: irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) { - cs_error(link, RequestIRQ, last_ret); - goto cs_failed; - } + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -731,11 +654,9 @@ next_entry: the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret != 0) { - cs_error(link, RequestConfiguration, last_ret); - goto cs_failed; - } + ret = pcmcia_request_configuration(link, &link->conf); + if (ret != 0) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -763,7 +684,7 @@ next_entry: return; -cs_failed: +failed: printk(KERN_INFO "ni_daq_700 cs failed"); dio700_release(link); @@ -771,7 +692,7 @@ cs_failed: static void dio700_release(struct pcmcia_device *link) { - DEBUG(0, "dio700_release(0x%p)\n", link); + dev_dbg(&link->dev, "dio700_release\n"); pcmcia_disable_device(link); } /* dio700_release */ @@ -830,15 +751,13 @@ struct pcmcia_driver dio700_cs_driver = { static int __init init_dio700_cs(void) { - printk("ni_daq_700: cs-init \n"); - DEBUG(0, "%s\n", version); pcmcia_register_driver(&dio700_cs_driver); return 0; } static void __exit exit_dio700_cs(void) { - DEBUG(0, "ni_daq_700: unloading\n"); + pr_debug("ni_daq_700: unloading\n"); pcmcia_unregister_driver(&dio700_cs_driver); } diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c index 0700a8bddd1..9017be3a92f 100644 --- a/drivers/staging/comedi/drivers/ni_daq_dio24.c +++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c @@ -187,25 +187,7 @@ static int dio24_detach(struct comedi_device *dev) return 0; }; -/* PCMCIA crap */ - -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = "ni_daq_dio24.c, based on dummy_cs.c"; -#else -#define DEBUG(n, args...) -#endif - -/*====================================================================*/ +/* PCMCIA crap -- watch your words! */ static void dio24_config(struct pcmcia_device *link); static void dio24_release(struct pcmcia_device *link); @@ -261,7 +243,7 @@ static int dio24_cs_attach(struct pcmcia_device *link) printk(KERN_INFO "ni_daq_dio24: HOLA SOY YO - CS-attach!\n"); - DEBUG(0, "dio24_cs_attach()\n"); + dev_dbg(&link->dev, "dio24_cs_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(struct local_info_t), GFP_KERNEL); @@ -272,7 +254,6 @@ static int dio24_cs_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* @@ -306,7 +287,7 @@ static void dio24_cs_detach(struct pcmcia_device *link) printk(KERN_INFO "ni_daq_dio24: HOLA SOY YO - cs-detach!\n"); - DEBUG(0, "dio24_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "dio24_cs_detach\n"); if (link->dev_node) { ((struct local_info_t *)link->priv)->stop = 1; @@ -327,142 +308,85 @@ static void dio24_cs_detach(struct pcmcia_device *link) ======================================================================*/ -static void dio24_config(struct pcmcia_device *link) +static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) { - struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_ret; - u_char buf[64]; - win_req_t req; + win_req_t *req = priv_data; memreq_t map; - cistpl_cftable_entry_t dflt = { 0 }; - printk(KERN_INFO "ni_daq_dio24: HOLA SOY YO! - config\n"); - - DEBUG(0, "dio24_config(0x%p)\n", link); - - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; - } + if (cfg->index == 0) + return -ENODEV; - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) { - cs_error(link, GetTupleData, last_ret); - goto cs_failed; + /* Does this card need audio output? */ + if (cfg->flags & CISTPL_CFTABLE_AUDIO) { + p_dev->conf.Attributes |= CONF_ENABLE_SPKR; + p_dev->conf.Status = CCSR_AUDIO_ENA; } - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) { - cs_error(link, ParseTuple, last_ret); - goto cs_failed; + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; + } + /* This reserves IO space but doesn't actually enable it */ + if (pcmcia_request_io(p_dev, &p_dev->io) != 0) + return -ENODEV; } - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; - - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; + if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) { + cistpl_mem_t *mem = + (cfg->mem.nwin) ? &cfg->mem : &dflt->mem; + req->Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; + req->Attributes |= WIN_ENABLE; + req->Base = mem->win[0].host_addr; + req->Size = mem->win[0].len; + if (req->Size < 0x1000) + req->Size = 0x1000; + req->AccessSpeed = 0; + if (pcmcia_request_window(p_dev, req, &p_dev->win)) + return -ENODEV; + map.Page = 0; + map.CardOffset = mem->win[0].card_addr; + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map)) + return -ENODEV; } - while (1) { - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - if (pcmcia_get_tuple_data(link, &tuple) != 0) - goto next_entry; - if (pcmcia_parse_tuple(&tuple, &parse) != 0) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Does this card need audio output? */ - if (cfg->flags & CISTPL_CFTABLE_AUDIO) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } + /* If we got this far, we're cool! */ + return 0; +} - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io) != 0) - goto next_entry; - } +static void dio24_config(struct pcmcia_device *link) +{ + struct local_info_t *dev = link->priv; + int ret; + win_req_t req; - if ((cfg->mem.nwin > 0) || (dflt.mem.nwin > 0)) { - cistpl_mem_t *mem = - (cfg->mem.nwin) ? &cfg->mem : &dflt.mem; - req.Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; - req.Attributes |= WIN_ENABLE; - req.Base = mem->win[0].host_addr; - req.Size = mem->win[0].len; - if (req.Size < 0x1000) - req.Size = 0x1000; - req.AccessSpeed = 0; - if (pcmcia_request_window(&link, &req, &link->win)) - goto next_entry; - map.Page = 0; - map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(link->win, &map)) - goto next_entry; - } - /* If we got this far, we're cool! */ - break; + printk(KERN_INFO "ni_daq_dio24: HOLA SOY YO! - config\n"); -next_entry: + dev_dbg(&link->dev, "dio24_config\n"); - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetNextTuple, last_ret); - goto cs_failed; - } + ret = pcmcia_loop_config(link, dio24_pcmcia_config_loop, &req); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } /* @@ -471,11 +395,9 @@ next_entry: irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) { - cs_error(link, RequestIRQ, last_ret); - goto cs_failed; - } + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -483,11 +405,9 @@ next_entry: the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret) { - cs_error(link, RequestConfiguration, last_ret); - goto cs_failed; - } + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -515,7 +435,7 @@ next_entry: return; -cs_failed: +failed: printk(KERN_INFO "Fallo"); dio24_release(link); @@ -523,7 +443,7 @@ cs_failed: static void dio24_release(struct pcmcia_device *link) { - DEBUG(0, "dio24_release(0x%p)\n", link); + dev_dbg(&link->dev, "dio24_release\n"); pcmcia_disable_device(link); } /* dio24_release */ @@ -582,14 +502,12 @@ struct pcmcia_driver dio24_cs_driver = { static int __init init_dio24_cs(void) { printk("ni_daq_dio24: HOLA SOY YO!\n"); - DEBUG(0, "%s\n", version); pcmcia_register_driver(&dio24_cs_driver); return 0; } static void __exit exit_dio24_cs(void) { - DEBUG(0, "ni_dio24: unloading\n"); pcmcia_unregister_driver(&dio24_cs_driver); } diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c index a3053b8da1c..7d514b3ee75 100644 --- a/drivers/staging/comedi/drivers/ni_labpc_cs.c +++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c @@ -153,23 +153,6 @@ static int labpc_attach(struct comedi_device *dev, struct comedi_devconfig *it) return labpc_common_attach(dev, iobase, irq, 0); } -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static const char *version = - "ni_labpc.c, based on dummy_cs.c 1.31 2001/08/24 12:13:13"; -#else -#define DEBUG(n, args...) -#endif - /*====================================================================*/ /* @@ -236,7 +219,7 @@ static int labpc_cs_attach(struct pcmcia_device *link) { struct local_info_t *local; - DEBUG(0, "labpc_cs_attach()\n"); + dev_dbg(&link->dev, "labpc_cs_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(struct local_info_t), GFP_KERNEL); @@ -247,7 +230,6 @@ static int labpc_cs_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_FORCED_PULSE; - link->irq.IRQInfo1 = IRQ_INFO2_VALID | IRQ_PULSE_ID; link->irq.Handler = NULL; /* @@ -278,7 +260,7 @@ static int labpc_cs_attach(struct pcmcia_device *link) static void labpc_cs_detach(struct pcmcia_device *link) { - DEBUG(0, "labpc_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "labpc_cs_detach\n"); /* If the device is currently configured and active, we won't @@ -305,135 +287,84 @@ static void labpc_cs_detach(struct pcmcia_device *link) ======================================================================*/ -static void labpc_config(struct pcmcia_device *link) +static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) { - struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_ret; - u_char buf[64]; - win_req_t req; + win_req_t *req = priv_data; memreq_t map; - cistpl_cftable_entry_t dflt = { 0 }; - DEBUG(0, "labpc_config(0x%p)\n", link); + if (cfg->index == 0) + return -ENODEV; - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; + /* Does this card need audio output? */ + if (cfg->flags & CISTPL_CFTABLE_AUDIO) { + p_dev->conf.Attributes |= CONF_ENABLE_SPKR; + p_dev->conf.Status = CCSR_AUDIO_ENA; } - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) { - cs_error(link, GetTupleData, last_ret); - goto cs_failed; + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; + } + /* This reserves IO space but doesn't actually enable it */ + if (pcmcia_request_io(p_dev, &p_dev->io) != 0) + return -ENODEV; } - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) { - cs_error(link, ParseTuple, last_ret); - goto cs_failed; + if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) { + cistpl_mem_t *mem = + (cfg->mem.nwin) ? &cfg->mem : &dflt->mem; + req->Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; + req->Attributes |= WIN_ENABLE; + req->Base = mem->win[0].host_addr; + req->Size = mem->win[0].len; + if (req->Size < 0x1000) + req->Size = 0x1000; + req->AccessSpeed = 0; + if (pcmcia_request_window(p_dev, req, &p_dev->win)) + return -ENODEV; + map.Page = 0; + map.CardOffset = mem->win[0].card_addr; + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map)) + return -ENODEV; } - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; + /* If we got this far, we're cool! */ + return 0; +} - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; - } - while (1) { - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - if (pcmcia_get_tuple_data(link, &tuple)) - goto next_entry; - if (pcmcia_parse_tuple(&tuple, &parse)) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Does this card need audio output? */ - if (cfg->flags & CISTPL_CFTABLE_AUDIO) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io)) - goto next_entry; - } +static void labpc_config(struct pcmcia_device *link) +{ + struct local_info_t *dev = link->priv; + int ret; + win_req_t req; - if ((cfg->mem.nwin > 0) || (dflt.mem.nwin > 0)) { - cistpl_mem_t *mem = - (cfg->mem.nwin) ? &cfg->mem : &dflt.mem; - req.Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; - req.Attributes |= WIN_ENABLE; - req.Base = mem->win[0].host_addr; - req.Size = mem->win[0].len; - if (req.Size < 0x1000) - req.Size = 0x1000; - req.AccessSpeed = 0; - link->win = (window_handle_t) link; - if (pcmcia_request_window(&link, &req, &link->win)) - goto next_entry; - map.Page = 0; - map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(link->win, &map)) - goto next_entry; - } - /* If we got this far, we're cool! */ - break; + dev_dbg(&link->dev, "labpc_config\n"); -next_entry: - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetNextTuple, last_ret); - goto cs_failed; - } + ret = pcmcia_loop_config(link, labpc_pcmcia_config_loop, &req); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } /* @@ -442,11 +373,9 @@ next_entry: irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) { - cs_error(link, RequestIRQ, last_ret); - goto cs_failed; - } + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -454,11 +383,9 @@ next_entry: the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret) { - cs_error(link, RequestConfiguration, last_ret); - goto cs_failed; - } + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -486,14 +413,14 @@ next_entry: return; -cs_failed: +failed: labpc_release(link); } /* labpc_config */ static void labpc_release(struct pcmcia_device *link) { - DEBUG(0, "labpc_release(0x%p)\n", link); + dev_dbg(&link->dev, "labpc_release\n"); pcmcia_disable_device(link); } /* labpc_release */ @@ -551,14 +478,12 @@ struct pcmcia_driver labpc_cs_driver = { static int __init init_labpc_cs(void) { - DEBUG(0, "%s\n", version); pcmcia_register_driver(&labpc_cs_driver); return 0; } static void __exit exit_labpc_cs(void) { - DEBUG(0, "ni_labpc: unloading\n"); pcmcia_unregister_driver(&labpc_cs_driver); } diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c index 9aef87fc81d..d692f4bb47e 100644 --- a/drivers/staging/comedi/drivers/ni_mio_cs.c +++ b/drivers/staging/comedi/drivers/ni_mio_cs.c @@ -274,7 +274,6 @@ static int cs_attach(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; link->io.NumPorts1 = 16; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -312,96 +311,47 @@ static int mio_cs_resume(struct pcmcia_device *link) return 0; } -static void mio_cs_config(struct pcmcia_device *link) -{ - tuple_t tuple; - u_short buf[128]; - cisparse_t parse; - int manfid = 0, prodid = 0; - int ret; - - DPRINTK("mio_cs_config(link=%p)\n", link); - tuple.TupleData = (cisdata_t *) buf; - tuple.TupleOffset = 0; - tuple.TupleDataMax = 255; - tuple.Attributes = 0; +static int mio_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + int base, ret; - tuple.DesiredTuple = CISTPL_CONFIG; - ret = pcmcia_get_first_tuple(link, &tuple); - ret = pcmcia_get_tuple_data(link, &tuple); - ret = pcmcia_parse_tuple(&tuple, &parse); - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; + p_dev->io.NumPorts1 = cfg->io.win[0].len; + p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK; + p_dev->io.NumPorts2 = 0; -#if 0 - tuple.DesiredTuple = CISTPL_LONGLINK_MFC; - tuple.Attributes = TUPLE_RETURN_COMMON | TUPLE_RETURN_LINK; - info->multi(first_tuple(link, &tuple, &parse) == 0); -#endif - - tuple.DesiredTuple = CISTPL_MANFID; - tuple.Attributes = TUPLE_RETURN_COMMON; - if ((pcmcia_get_first_tuple(link, &tuple) == 0) && - (pcmcia_get_tuple_data(link, &tuple) == 0)) { - manfid = le16_to_cpu(buf[0]); - prodid = le16_to_cpu(buf[1]); + for (base = 0x000; base < 0x400; base += 0x20) { + p_dev->io.BasePort1 = base; + ret = pcmcia_request_io(p_dev, &p_dev->io); + if (!ret) + return 0; } - /* printk("manfid = 0x%04x, 0x%04x\n",manfid,prodid); */ + return -ENODEV; +} - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - tuple.Attributes = 0; - ret = pcmcia_get_first_tuple(link, &tuple); - ret = pcmcia_get_tuple_data(link, &tuple); - ret = pcmcia_parse_tuple(&tuple, &parse); -#if 0 - printk(" index: 0x%x\n", parse.cftable_entry.index); - printk(" flags: 0x%x\n", parse.cftable_entry.flags); - printk(" io flags: 0x%x\n", parse.cftable_entry.io.flags); - printk(" io nwin: 0x%x\n", parse.cftable_entry.io.nwin); - printk(" io base: 0x%x\n", parse.cftable_entry.io.win[0].base); - printk(" io len: 0x%x\n", parse.cftable_entry.io.win[0].len); - printk(" irq1: 0x%x\n", parse.cftable_entry.irq.IRQInfo1); - printk(" irq2: 0x%x\n", parse.cftable_entry.irq.IRQInfo2); - printk(" mem flags: 0x%x\n", parse.cftable_entry.mem.flags); - printk(" mem nwin: 0x%x\n", parse.cftable_entry.mem.nwin); - printk(" subtuples: 0x%x\n", parse.cftable_entry.subtuples); -#endif +static void mio_cs_config(struct pcmcia_device *link) +{ + int ret; -#if 0 - link->io.NumPorts1 = 0x20; - link->io.IOAddrLines = 5; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; -#endif - link->io.NumPorts1 = parse.cftable_entry.io.win[0].len; - link->io.IOAddrLines = - parse.cftable_entry.io.flags & CISTPL_IO_LINES_MASK; - link->io.NumPorts2 = 0; + DPRINTK("mio_cs_config(link=%p)\n", link); - { - int base; - for (base = 0x000; base < 0x400; base += 0x20) { - link->io.BasePort1 = base; - ret = pcmcia_request_io(link, &link->io); - /* printk("RequestIO 0x%02x\n",ret); */ - if (!ret) - break; - } + ret = pcmcia_loop_config(link, mio_pcmcia_config_loop, NULL); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + return; } - link->irq.IRQInfo1 = parse.cftable_entry.irq.IRQInfo1; - link->irq.IRQInfo2 = parse.cftable_entry.irq.IRQInfo2; ret = pcmcia_request_irq(link, &link->irq); if (ret) { printk("pcmcia_request_irq() returned error: %i\n", ret); } - /* printk("RequestIRQ 0x%02x\n",ret); */ - - link->conf.ConfigIndex = 1; ret = pcmcia_request_configuration(link, &link->conf); - /* printk("RequestConfiguration %d\n",ret); */ link->dev_node = &dev_node; } @@ -475,40 +425,17 @@ static int mio_cs_attach(struct comedi_device *dev, struct comedi_devconfig *it) return 0; } -static int get_prodid(struct comedi_device *dev, struct pcmcia_device *link) -{ - tuple_t tuple; - u_short buf[128]; - int prodid = 0; - - tuple.TupleData = (cisdata_t *) buf; - tuple.TupleOffset = 0; - tuple.TupleDataMax = 255; - tuple.DesiredTuple = CISTPL_MANFID; - tuple.Attributes = TUPLE_RETURN_COMMON; - if ((pcmcia_get_first_tuple(link, &tuple) == 0) && - (pcmcia_get_tuple_data(link, &tuple) == 0)) { - prodid = le16_to_cpu(buf[1]); - } - - return prodid; -} - static int ni_getboardtype(struct comedi_device *dev, struct pcmcia_device *link) { - int id; int i; - id = get_prodid(dev, link); - for (i = 0; i < n_ni_boards; i++) { - if (ni_boards[i].device_id == id) { + if (ni_boards[i].device_id == link->card_id) return i; - } } - printk("unknown board 0x%04x -- pretend it is a ", id); + printk("unknown board 0x%04x -- pretend it is a ", link->card_id); return 0; } diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c index 344b82353e0..5256fd93316 100644 --- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c +++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c @@ -55,23 +55,6 @@ Devices: [Quatech] DAQP-208 (daqp), DAQP-308 #include <pcmcia/cisreg.h> #include <pcmcia/ds.h> -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = "quatech_daqp_cs.c 1.10 2003/04/21 (Brent Baccala)"; -#else -#define DEBUG(n, args...) -#endif - /* Maximum number of separate DAQP devices we'll allow */ #define MAX_DEV 4 @@ -863,8 +846,6 @@ static int daqp_attach(struct comedi_device *dev, struct comedi_devconfig *it) { int ret; struct local_info_t *local = dev_table[it->options[0]]; - tuple_t tuple; - int i; struct comedi_subdevice *s; if (it->options[0] < 0 || it->options[0] >= MAX_DEV || !local) { @@ -883,29 +864,10 @@ static int daqp_attach(struct comedi_device *dev, struct comedi_devconfig *it) strcpy(local->board_name, "DAQP"); dev->board_name = local->board_name; - - tuple.DesiredTuple = CISTPL_VERS_1; - if (pcmcia_get_first_tuple(local->link, &tuple) == 0) { - u_char buf[128]; - - buf[0] = buf[sizeof(buf) - 1] = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 2; - if (pcmcia_get_tuple_data(local->link, &tuple) == 0) { - - for (i = 0; i < tuple.TupleDataLen - 4; i++) - if (buf[i] == 0) - break; - for (i++; i < tuple.TupleDataLen - 4; i++) - if (buf[i] == 0) - break; - i++; - if ((i < tuple.TupleDataLen - 4) - && (strncmp(buf + i, "DAQP", 4) == 0)) { - strncpy(local->board_name, buf + i, - sizeof(local->board_name)); - } + if (local->link->prod_id[2]) { + if (strncmp(local->link->prod_id[2], "DAQP", 4) == 0) { + strncpy(local->board_name, local->link->prod_id[2], + sizeof(local->board_name)); } } @@ -1058,7 +1020,7 @@ static int daqp_cs_attach(struct pcmcia_device *link) struct local_info_t *local; int i; - DEBUG(0, "daqp_cs_attach()\n"); + dev_dbg(&link->dev, "daqp_cs_attach()\n"); for (i = 0; i < MAX_DEV; i++) if (dev_table[i] == NULL) @@ -1079,10 +1041,8 @@ static int daqp_cs_attach(struct pcmcia_device *link) link->priv = local; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = daqp_interrupt; - link->irq.Instance = local; /* General socket configuration defaults can go here. In this @@ -1112,7 +1072,7 @@ static void daqp_cs_detach(struct pcmcia_device *link) { struct local_info_t *dev = link->priv; - DEBUG(0, "daqp_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "daqp_cs_detach\n"); if (link->dev_node) { dev->stop = 1; @@ -1134,115 +1094,54 @@ static void daqp_cs_detach(struct pcmcia_device *link) ======================================================================*/ -static void daqp_cs_config(struct pcmcia_device *link) -{ - struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_ret; - u_char buf[64]; - - DEBUG(0, "daqp_cs_config(0x%p)\n", link); - - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; - } - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) { - cs_error(link, GetTupleData, last_ret); - goto cs_failed; - } - - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) { - cs_error(link, ParseTuple, last_ret); - goto cs_failed; - } - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; +static int daqp_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + if (cfg->index == 0) + return -ENODEV; - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; + } } - while (1) { - cistpl_cftable_entry_t dflt = { 0 }; - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - if (pcmcia_get_tuple_data(link, &tuple)) - goto next_entry; - if (pcmcia_parse_tuple(&tuple, &parse)) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - } + /* This reserves IO space but doesn't actually enable it */ + return pcmcia_request_io(p_dev, &p_dev->io); +} - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io)) - goto next_entry; +static void daqp_cs_config(struct pcmcia_device *link) +{ + struct local_info_t *dev = link->priv; + int ret; - /* If we got this far, we're cool! */ - break; + dev_dbg(&link->dev, "daqp_cs_config\n"); -next_entry: - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetNextTuple, last_ret); - goto cs_failed; - } + ret = pcmcia_loop_config(link, daqp_pcmcia_config_loop, NULL); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } /* @@ -1251,11 +1150,9 @@ next_entry: irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) { - cs_error(link, RequestIRQ, last_ret); - goto cs_failed; - } + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -1263,11 +1160,9 @@ next_entry: the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret) { - cs_error(link, RequestConfiguration, last_ret); - goto cs_failed; - } + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -1296,14 +1191,14 @@ next_entry: return; -cs_failed: +failed: daqp_cs_release(link); } /* daqp_cs_config */ static void daqp_cs_release(struct pcmcia_device *link) { - DEBUG(0, "daqp_cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "daqp_cs_release\n"); pcmcia_disable_device(link); } /* daqp_cs_release */ @@ -1363,7 +1258,6 @@ struct pcmcia_driver daqp_cs_driver = { int __init init_module(void) { - DEBUG(0, "%s\n", version); pcmcia_register_driver(&daqp_cs_driver); comedi_driver_register(&driver_daqp); return 0; @@ -1371,7 +1265,6 @@ int __init init_module(void) void __exit cleanup_module(void) { - DEBUG(0, "daqp_cs: unloading\n"); comedi_driver_unregister(&driver_daqp); pcmcia_unregister_driver(&daqp_cs_driver); } diff --git a/drivers/staging/hv/BlkVsc.c b/drivers/staging/hv/BlkVsc.c index 51aa861292f..a48ee3a1264 100644 --- a/drivers/staging/hv/BlkVsc.c +++ b/drivers/staging/hv/BlkVsc.c @@ -16,6 +16,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> * Hank Janssen <hjanssen@microsoft.com> * */ diff --git a/drivers/staging/hv/Channel.c b/drivers/staging/hv/Channel.c index d649ee169d9..746370e8211 100644 --- a/drivers/staging/hv/Channel.c +++ b/drivers/staging/hv/Channel.c @@ -611,7 +611,7 @@ void VmbusChannelClose(struct vmbus_channel *Channel) /* Stop callback and cancel the timer asap */ Channel->OnChannelCallback = NULL; - del_timer(&Channel->poll_timer); + del_timer_sync(&Channel->poll_timer); /* Send a closing message */ info = kmalloc(sizeof(*info) + @@ -978,14 +978,10 @@ void VmbusChannelOnChannelEvent(struct vmbus_channel *Channel) { DumpVmbusChannel(Channel); ASSERT(Channel->OnChannelCallback); -#ifdef ENABLE_POLLING - del_timer(&Channel->poll_timer); - Channel->OnChannelCallback(Channel->ChannelCallbackContext); - channel->poll_timer.expires(jiffies + usecs_to_jiffies(100); - add_timer(&channel->poll_timer); -#else + Channel->OnChannelCallback(Channel->ChannelCallbackContext); -#endif + + mod_timer(&Channel->poll_timer, jiffies + usecs_to_jiffies(100)); } /** @@ -997,10 +993,6 @@ void VmbusChannelOnTimer(unsigned long data) if (channel->OnChannelCallback) { channel->OnChannelCallback(channel->ChannelCallbackContext); -#ifdef ENABLE_POLLING - channel->poll_timer.expires(jiffies + usecs_to_jiffies(100); - add_timer(&channel->poll_timer); -#endif } } diff --git a/drivers/staging/hv/ChannelMgmt.c b/drivers/staging/hv/ChannelMgmt.c index 3db62caedcf..ef38467ed4e 100644 --- a/drivers/staging/hv/ChannelMgmt.c +++ b/drivers/staging/hv/ChannelMgmt.c @@ -119,7 +119,7 @@ static inline void ReleaseVmbusChannel(void *context) */ void FreeVmbusChannel(struct vmbus_channel *Channel) { - del_timer(&Channel->poll_timer); + del_timer_sync(&Channel->poll_timer); /* * We have to release the channel's workqueue/thread in the vmbus's diff --git a/drivers/staging/hv/NetVsc.c b/drivers/staging/hv/NetVsc.c index d384c0ddf06..1c717f9a554 100644 --- a/drivers/staging/hv/NetVsc.c +++ b/drivers/staging/hv/NetVsc.c @@ -15,6 +15,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> * Hank Janssen <hjanssen@microsoft.com> */ #include <linux/kernel.h> diff --git a/drivers/staging/hv/NetVsc.h b/drivers/staging/hv/NetVsc.h index 3e7112f7c75..6e0e0349412 100644 --- a/drivers/staging/hv/NetVsc.h +++ b/drivers/staging/hv/NetVsc.h @@ -16,6 +16,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> * Hank Janssen <hjanssen@microsoft.com> * */ diff --git a/drivers/staging/hv/StorVsc.c b/drivers/staging/hv/StorVsc.c index 14015c92794..2f7c425896f 100644 --- a/drivers/staging/hv/StorVsc.c +++ b/drivers/staging/hv/StorVsc.c @@ -196,7 +196,7 @@ static int StorVscChannelInit(struct hv_device *Device) * Now, initiate the vsc/vsp initialization protocol on the open * channel */ - memset(request, sizeof(struct storvsc_request_extension), 0); + memset(request, 0, sizeof(struct storvsc_request_extension)); request->WaitEvent = osd_WaitEventCreate(); vstorPacket->Operation = VStorOperationBeginInitialization; @@ -233,7 +233,7 @@ static int StorVscChannelInit(struct hv_device *Device) DPRINT_INFO(STORVSC, "QUERY_PROTOCOL_VERSION_OPERATION..."); /* reuse the packet for version range supported */ - memset(vstorPacket, sizeof(struct vstor_packet), 0); + memset(vstorPacket, 0, sizeof(struct vstor_packet)); vstorPacket->Operation = VStorOperationQueryProtocolVersion; vstorPacket->Flags = REQUEST_COMPLETION_FLAG; @@ -266,7 +266,7 @@ static int StorVscChannelInit(struct hv_device *Device) /* Query channel properties */ DPRINT_INFO(STORVSC, "QUERY_PROPERTIES_OPERATION..."); - memset(vstorPacket, sizeof(struct vstor_packet), 0); + memset(vstorPacket, 0, sizeof(struct vstor_packet)); vstorPacket->Operation = VStorOperationQueryProperties; vstorPacket->Flags = REQUEST_COMPLETION_FLAG; vstorPacket->StorageChannelProperties.PortNumber = @@ -305,7 +305,7 @@ static int StorVscChannelInit(struct hv_device *Device) DPRINT_INFO(STORVSC, "END_INITIALIZATION_OPERATION..."); - memset(vstorPacket, sizeof(struct vstor_packet), 0); + memset(vstorPacket, 0, sizeof(struct vstor_packet)); vstorPacket->Operation = VStorOperationEndInitialization; vstorPacket->Flags = REQUEST_COMPLETION_FLAG; @@ -508,7 +508,7 @@ static int StorVscConnectToVsp(struct hv_device *Device) int ret; storDriver = (struct storvsc_driver_object *)Device->Driver; - memset(&props, sizeof(struct vmstorage_channel_properties), 0); + memset(&props, 0, sizeof(struct vmstorage_channel_properties)); /* Open the channel */ ret = Device->Driver->VmbusChannelInterface.Open(Device, diff --git a/drivers/staging/hv/blkvsc_drv.c b/drivers/staging/hv/blkvsc_drv.c index 99c49261a8b..62b282844a5 100644 --- a/drivers/staging/hv/blkvsc_drv.c +++ b/drivers/staging/hv/blkvsc_drv.c @@ -15,6 +15,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> * Hank Janssen <hjanssen@microsoft.com> */ #include <linux/init.h> diff --git a/drivers/staging/hv/netvsc_drv.c b/drivers/staging/hv/netvsc_drv.c index 3192d50f725..0d7459e2d03 100644 --- a/drivers/staging/hv/netvsc_drv.c +++ b/drivers/staging/hv/netvsc_drv.c @@ -15,6 +15,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> * Hank Janssen <hjanssen@microsoft.com> */ #include <linux/init.h> diff --git a/drivers/staging/rtl8187se/TODO b/drivers/staging/rtl8187se/TODO index c09a9160739..a762e79873e 100644 --- a/drivers/staging/rtl8187se/TODO +++ b/drivers/staging/rtl8187se/TODO @@ -11,5 +11,4 @@ TODO: - sparse fixes - integrate with drivers/net/wireless/rtl818x -Please send any patches to Greg Kroah-Hartman <greg@kroah.com> and -Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>. +Please send any patches to Greg Kroah-Hartman <greg@kroah.com>. diff --git a/drivers/staging/rtl8192su/TODO b/drivers/staging/rtl8192su/TODO index b13be9edb27..f11eec70003 100644 --- a/drivers/staging/rtl8192su/TODO +++ b/drivers/staging/rtl8192su/TODO @@ -14,5 +14,4 @@ TODO: - sparse fixes - integrate with drivers/net/wireless/rtl818x -Please send any patches to Greg Kroah-Hartman <greg@kroah.com> and -Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>. +Please send any patches to Greg Kroah-Hartman <greg@kroah.com>. diff --git a/drivers/staging/vt6655/TODO b/drivers/staging/vt6655/TODO index 8462cd17eb6..cb04aaafc46 100644 --- a/drivers/staging/vt6655/TODO +++ b/drivers/staging/vt6655/TODO @@ -16,6 +16,5 @@ TODO: - sparse fixes - integrate with drivers/net/wireless -Please send any patches to Greg Kroah-Hartman <greg@kroah.com>, -Forest Bond <forest@alittletooquiet.net> and Bartlomiej Zolnierkiewicz -<bzolnier@gmail.com>. +Please send any patches to Greg Kroah-Hartman <greg@kroah.com> +and Forest Bond <forest@alittletooquiet.net>. diff --git a/drivers/staging/vt6656/TODO b/drivers/staging/vt6656/TODO index 17cf50c6735..a318995ba07 100644 --- a/drivers/staging/vt6656/TODO +++ b/drivers/staging/vt6656/TODO @@ -15,6 +15,5 @@ TODO: - sparse fixes - integrate with drivers/net/wireless -Please send any patches to Greg Kroah-Hartman <greg@kroah.com>, -Forest Bond <forest@alittletooquiet.net> and Bartlomiej Zolnierkiewicz -<bzolnier@gmail.com>. +Please send any patches to Greg Kroah-Hartman <greg@kroah.com> +and Forest Bond <forest@alittletooquiet.net>. diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c index 347c3ed1d9f..d442fd35620 100644 --- a/drivers/telephony/ixj_pcmcia.c +++ b/drivers/telephony/ixj_pcmcia.c @@ -19,13 +19,6 @@ * PCMCIA service support for Quicknet cards */ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -#else -#define DEBUG(n, args...) -#endif typedef struct ixj_info_t { int ndev; @@ -39,7 +32,7 @@ static void ixj_cs_release(struct pcmcia_device * link); static int ixj_probe(struct pcmcia_device *p_dev) { - DEBUG(0, "ixj_attach()\n"); + dev_dbg(&p_dev->dev, "ixj_attach()\n"); /* Create new ixj device */ p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; p_dev->io.Attributes2 = IO_DATA_PATH_WIDTH_8; @@ -55,33 +48,30 @@ static int ixj_probe(struct pcmcia_device *p_dev) static void ixj_detach(struct pcmcia_device *link) { - DEBUG(0, "ixj_detach(0x%p)\n", link); + dev_dbg(&link->dev, "ixj_detach\n"); ixj_cs_release(link); kfree(link->priv); } -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static void ixj_get_serial(struct pcmcia_device * link, IXJ * j) { char *str; int i, place; - DEBUG(0, "ixj_get_serial(0x%p)\n", link); + dev_dbg(&link->dev, "ixj_get_serial\n"); str = link->prod_id[0]; if (!str) - goto cs_failed; + goto failed; printk("%s", str); str = link->prod_id[1]; if (!str) - goto cs_failed; + goto failed; printk(" %s", str); str = link->prod_id[2]; if (!str) - goto cs_failed; + goto failed; place = 1; for (i = strlen(str) - 1; i >= 0; i--) { switch (str[i]) { @@ -118,9 +108,9 @@ static void ixj_get_serial(struct pcmcia_device * link, IXJ * j) } str = link->prod_id[3]; if (!str) - goto cs_failed; + goto failed; printk(" version %s\n", str); - cs_failed: +failed: return; } @@ -151,13 +141,13 @@ static int ixj_config(struct pcmcia_device * link) cistpl_cftable_entry_t dflt = { 0 }; info = link->priv; - DEBUG(0, "ixj_config(0x%p)\n", link); + dev_dbg(&link->dev, "ixj_config\n"); if (pcmcia_loop_config(link, ixj_config_check, &dflt)) - goto cs_failed; + goto failed; if (pcmcia_request_configuration(link, &link->conf)) - goto cs_failed; + goto failed; /* * Register the card with the core. @@ -170,7 +160,7 @@ static int ixj_config(struct pcmcia_device * link) ixj_get_serial(link, j); return 0; - cs_failed: +failed: ixj_cs_release(link); return -ENODEV; } @@ -178,7 +168,7 @@ static int ixj_config(struct pcmcia_device * link) static void ixj_cs_release(struct pcmcia_device *link) { ixj_info_t *info = link->priv; - DEBUG(0, "ixj_cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "ixj_cs_release\n"); info->ndev = 0; pcmcia_disable_device(link); } diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 5ce839137ad..0f857e64505 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -444,7 +444,7 @@ resubmit: static inline int hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt) { - return usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0), + return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo, tt, NULL, 0, 1000); } diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c index d5b65962dd3..731150d4b1d 100644 --- a/drivers/usb/gadget/amd5536udc.c +++ b/drivers/usb/gadget/amd5536udc.c @@ -1213,7 +1213,12 @@ udc_queue(struct usb_ep *usbep, struct usb_request *usbreq, gfp_t gfp) tmp &= AMD_UNMASK_BIT(ep->num); writel(tmp, &dev->regs->ep_irqmsk); } - } + } else if (ep->in) { + /* enable ep irq */ + tmp = readl(&dev->regs->ep_irqmsk); + tmp &= AMD_UNMASK_BIT(ep->num); + writel(tmp, &dev->regs->ep_irqmsk); + } } else if (ep->dma) { @@ -2005,18 +2010,17 @@ __acquires(dev->lock) { int tmp; - /* empty queues and init hardware */ - udc_basic_init(dev); - for (tmp = 0; tmp < UDC_EP_NUM; tmp++) { - empty_req_queue(&dev->ep[tmp]); - } - if (dev->gadget.speed != USB_SPEED_UNKNOWN) { spin_unlock(&dev->lock); driver->disconnect(&dev->gadget); spin_lock(&dev->lock); } - /* init */ + + /* empty queues and init hardware */ + udc_basic_init(dev); + for (tmp = 0; tmp < UDC_EP_NUM; tmp++) + empty_req_queue(&dev->ep[tmp]); + udc_setup_endpoints(dev); } @@ -2472,6 +2476,13 @@ static irqreturn_t udc_data_in_isr(struct udc *dev, int ep_ix) } } + } else if (!use_dma && ep->in) { + /* disable interrupt */ + tmp = readl( + &dev->regs->ep_irqmsk); + tmp |= AMD_BIT(ep->num); + writel(tmp, + &dev->regs->ep_irqmsk); } } /* clear status bits */ @@ -3279,6 +3290,17 @@ static int udc_pci_probe( goto finished; } + spin_lock_init(&dev->lock); + /* udc csr registers base */ + dev->csr = dev->virt_addr + UDC_CSR_ADDR; + /* dev registers base */ + dev->regs = dev->virt_addr + UDC_DEVCFG_ADDR; + /* ep registers base */ + dev->ep_regs = dev->virt_addr + UDC_EPREGS_ADDR; + /* fifo's base */ + dev->rxfifo = (u32 __iomem *)(dev->virt_addr + UDC_RXFIFO_ADDR); + dev->txfifo = (u32 __iomem *)(dev->virt_addr + UDC_TXFIFO_ADDR); + if (request_irq(pdev->irq, udc_irq, IRQF_SHARED, name, dev) != 0) { dev_dbg(&dev->pdev->dev, "request_irq(%d) fail\n", pdev->irq); kfree(dev); @@ -3331,7 +3353,6 @@ static int udc_probe(struct udc *dev) udc_pollstall_timer.data = 0; /* device struct setup */ - spin_lock_init(&dev->lock); dev->gadget.ops = &udc_ops; dev_set_name(&dev->gadget.dev, "gadget"); @@ -3340,16 +3361,6 @@ static int udc_probe(struct udc *dev) dev->gadget.name = name; dev->gadget.is_dualspeed = 1; - /* udc csr registers base */ - dev->csr = dev->virt_addr + UDC_CSR_ADDR; - /* dev registers base */ - dev->regs = dev->virt_addr + UDC_DEVCFG_ADDR; - /* ep registers base */ - dev->ep_regs = dev->virt_addr + UDC_EPREGS_ADDR; - /* fifo's base */ - dev->rxfifo = (u32 __iomem *)(dev->virt_addr + UDC_RXFIFO_ADDR); - dev->txfifo = (u32 __iomem *)(dev->virt_addr + UDC_TXFIFO_ADDR); - /* init registers, interrupts, ... */ startup_registers(dev); diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 9835e071394..f5f5601701c 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -28,6 +28,7 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/timer.h> +#include <linux/ktime.h> #include <linux/list.h> #include <linux/interrupt.h> #include <linux/usb.h> @@ -676,6 +677,7 @@ static int ehci_run (struct usb_hcd *hcd) ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ msleep(5); up_write(&ehci_cf_port_reset_rwsem); + ehci->last_periodic_enable = ktime_get_real(); temp = HC_VERSION(ehci_readl(ehci, &ehci->caps->hc_capbase)); ehci_info (ehci, diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 378861b9d79..ead5f4f2aa5 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -111,6 +111,10 @@ static int ehci_pci_setup(struct usb_hcd *hcd) switch (pdev->vendor) { case PCI_VENDOR_ID_INTEL: ehci->need_io_watchdog = 0; + if (pdev->device == 0x27cc) { + ehci->broken_periodic = 1; + ehci_info(ehci, "using broken periodic workaround\n"); + } break; case PCI_VENDOR_ID_TDI: if (pdev->device == PCI_DEVICE_ID_TDI_EHCI) { diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 00ad9ce392e..139a2cc3f64 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -487,8 +487,20 @@ halt: * we must clear the TT buffer (11.17.5). */ if (unlikely(last_status != -EINPROGRESS && - last_status != -EREMOTEIO)) - ehci_clear_tt_buffer(ehci, qh, urb, token); + last_status != -EREMOTEIO)) { + /* The TT's in some hubs malfunction when they + * receive this request following a STALL (they + * stop sending isochronous packets). Since a + * STALL can't leave the TT buffer in a busy + * state (if you believe Figures 11-48 - 11-51 + * in the USB 2.0 spec), we won't clear the TT + * buffer in this case. Strictly speaking this + * is a violation of the spec. + */ + if (last_status != -EPIPE) + ehci_clear_tt_buffer(ehci, qh, urb, + token); + } } /* if we're removing something not at the queue head, diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index b25cdea93a1..a5535b5e3fe 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -475,6 +475,8 @@ static int enable_periodic (struct ehci_hcd *ehci) /* make sure ehci_work scans these */ ehci->next_uframe = ehci_readl(ehci, &ehci->regs->frame_index) % (ehci->periodic_size << 3); + if (unlikely(ehci->broken_periodic)) + ehci->last_periodic_enable = ktime_get_real(); return 0; } @@ -486,6 +488,16 @@ static int disable_periodic (struct ehci_hcd *ehci) if (--ehci->periodic_sched) return 0; + if (unlikely(ehci->broken_periodic)) { + /* delay experimentally determined */ + ktime_t safe = ktime_add_us(ehci->last_periodic_enable, 1000); + ktime_t now = ktime_get_real(); + s64 delay = ktime_us_delta(safe, now); + + if (unlikely(delay > 0)) + udelay(delay); + } + /* did setting PSE not take effect yet? * takes effect only at frame boundaries... */ diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index 064e76821ff..2d85e21ff28 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -118,6 +118,7 @@ struct ehci_hcd { /* one per controller */ unsigned stamp; unsigned random_frame; unsigned long next_statechange; + ktime_t last_periodic_enable; u32 command; /* SILICON QUIRKS */ @@ -127,6 +128,7 @@ struct ehci_hcd { /* one per controller */ unsigned big_endian_desc:1; unsigned has_amcc_usb23:1; unsigned need_io_watchdog:1; + unsigned broken_periodic:1; /* required for usb32 quirk */ #define OHCI_CTRL_HCFS (3 << 6) diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c index 516848dd9b4..39d253e841f 100644 --- a/drivers/usb/host/sl811_cs.c +++ b/drivers/usb/host/sl811_cs.c @@ -37,28 +37,8 @@ MODULE_LICENSE("GPL"); /* MACROS */ /*====================================================================*/ -#if defined(DEBUG) || defined(PCMCIA_DEBUG) - -static int pc_debug = 0; -module_param(pc_debug, int, 0644); - -#define DBG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG "sl811_cs: " args) - -#else -#define DBG(n, args...) do{}while(0) -#endif /* no debugging */ - #define INFO(args...) printk(KERN_INFO "sl811_cs: " args) -#define INT_MODULE_PARM(n, v) static int n = v; module_param(n, int, 0444) - -#define CS_CHECK(fn, ret) \ - do { \ - last_fn = (fn); \ - if ((last_ret = (ret)) != 0) \ - goto cs_failed; \ - } while (0) - /*====================================================================*/ /* VARIABLES */ /*====================================================================*/ @@ -76,7 +56,7 @@ static void sl811_cs_release(struct pcmcia_device * link); static void release_platform_dev(struct device * dev) { - DBG(0, "sl811_cs platform_dev release\n"); + dev_dbg(dev, "sl811_cs platform_dev release\n"); dev->parent = NULL; } @@ -140,7 +120,7 @@ static int sl811_hc_init(struct device *parent, resource_size_t base_addr, static void sl811_cs_detach(struct pcmcia_device *link) { - DBG(0, "sl811_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "sl811_cs_detach\n"); sl811_cs_release(link); @@ -150,7 +130,7 @@ static void sl811_cs_detach(struct pcmcia_device *link) static void sl811_cs_release(struct pcmcia_device * link) { - DBG(0, "sl811_cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "sl811_cs_release\n"); pcmcia_disable_device(link); platform_device_unregister(&platform_dev); @@ -205,11 +185,11 @@ static int sl811_cs_config_check(struct pcmcia_device *p_dev, static int sl811_cs_config(struct pcmcia_device *link) { - struct device *parent = &handle_to_dev(link); + struct device *parent = &link->dev; local_info_t *dev = link->priv; - int last_fn, last_ret; + int ret; - DBG(0, "sl811_cs_config(0x%p)\n", link); + dev_dbg(&link->dev, "sl811_cs_config\n"); if (pcmcia_loop_config(link, sl811_cs_config_check, NULL)) goto failed; @@ -217,14 +197,16 @@ static int sl811_cs_config(struct pcmcia_device *link) /* require an IRQ and two registers */ if (!link->io.NumPorts1 || link->io.NumPorts1 < 2) goto failed; - if (link->conf.Attributes & CONF_ENABLE_IRQ) - CS_CHECK(RequestIRQ, - pcmcia_request_irq(link, &link->irq)); - else + if (link->conf.Attributes & CONF_ENABLE_IRQ) { + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + } else goto failed; - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; sprintf(dev->node.dev_name, driver_name); dev->node.major = dev->node.minor = 0; @@ -241,8 +223,6 @@ static int sl811_cs_config(struct pcmcia_device *link) if (sl811_hc_init(parent, link->io.BasePort1, link->irq.AssignedIRQ) < 0) { -cs_failed: - cs_error(link, last_fn, last_ret); failed: printk(KERN_WARNING "sl811_cs_config failed\n"); sl811_cs_release(link); @@ -263,7 +243,6 @@ static int sl811_cs_probe(struct pcmcia_device *link) /* Initialize */ link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_INFO2_VALID|IRQ_LEVEL_ID; link->irq.Handler = NULL; link->conf.Attributes = 0; diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index c3577bbbae6..ef2332a9941 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -1442,11 +1442,6 @@ static int cppi_channel_abort(struct dma_channel *channel) musb_writew(regs, MUSB_TXCSR, value); musb_writew(regs, MUSB_TXCSR, value); - /* re-enable interrupt */ - if (enabled) - musb_writel(tibase, DAVINCI_TXCPPI_INTENAB_REG, - (1 << cppi_ch->index)); - /* While we scrub the TX state RAM, ensure that we clean * up any interrupt that's currently asserted: * 1. Write to completion Ptr value 0x1(bit 0 set) @@ -1459,6 +1454,11 @@ static int cppi_channel_abort(struct dma_channel *channel) cppi_reset_tx(tx_ram, 1); musb_writel(&tx_ram->tx_complete, 0, 0); + /* re-enable interrupt */ + if (enabled) + musb_writel(tibase, DAVINCI_TXCPPI_INTENAB_REG, + (1 << cppi_ch->index)); + cppi_dump_tx(5, cppi_ch, " (done teardown)"); /* REVISIT tx side _should_ clean up the same way diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 3a61ddb62bd..547e0e39072 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1450,7 +1450,7 @@ static int __init musb_core_init(u16 musb_type, struct musb *musb) #endif if (hw_ep->max_packet_sz_tx) { - printk(KERN_DEBUG + DBG(1, "%s: hw_ep %d%s, %smax %d\n", musb_driver_name, i, hw_ep->is_shared_fifo ? "shared" : "tx", @@ -1459,7 +1459,7 @@ static int __init musb_core_init(u16 musb_type, struct musb *musb) hw_ep->max_packet_sz_tx); } if (hw_ep->max_packet_sz_rx && !hw_ep->is_shared_fifo) { - printk(KERN_DEBUG + DBG(1, "%s: hw_ep %d%s, %smax %d\n", musb_driver_name, i, "rx", diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 8b3c4e2ed7b..74073f9a43f 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -4,6 +4,7 @@ * Copyright 2005 Mentor Graphics Corporation * Copyright (C) 2005-2006 by Texas Instruments * Copyright (C) 2006-2007 Nokia Corporation + * Copyright (C) 2009 MontaVista Software, Inc. <source@mvista.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -436,14 +437,6 @@ void musb_g_tx(struct musb *musb, u8 epnum) csr |= MUSB_TXCSR_P_WZC_BITS; csr &= ~MUSB_TXCSR_P_SENTSTALL; musb_writew(epio, MUSB_TXCSR, csr); - if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) { - dma->status = MUSB_DMA_STATUS_CORE_ABORT; - musb->dma_controller->channel_abort(dma); - } - - if (request) - musb_g_giveback(musb_ep, request, -EPIPE); - break; } @@ -582,15 +575,25 @@ void musb_g_tx(struct musb *musb, u8 epnum) */ static void rxstate(struct musb *musb, struct musb_request *req) { - u16 csr = 0; const u8 epnum = req->epnum; struct usb_request *request = &req->request; struct musb_ep *musb_ep = &musb->endpoints[epnum].ep_out; void __iomem *epio = musb->endpoints[epnum].regs; unsigned fifo_count = 0; u16 len = musb_ep->packet_sz; + u16 csr = musb_readw(epio, MUSB_RXCSR); - csr = musb_readw(epio, MUSB_RXCSR); + /* We shouldn't get here while DMA is active, but we do... */ + if (dma_channel_status(musb_ep->dma) == MUSB_DMA_STATUS_BUSY) { + DBG(4, "DMA pending...\n"); + return; + } + + if (csr & MUSB_RXCSR_P_SENDSTALL) { + DBG(5, "%s stalling, RXCSR %04x\n", + musb_ep->end_point.name, csr); + return; + } if (is_cppi_enabled() && musb_ep->dma) { struct dma_controller *c = musb->dma_controller; @@ -761,19 +764,10 @@ void musb_g_rx(struct musb *musb, u8 epnum) csr, dma ? " (dma)" : "", request); if (csr & MUSB_RXCSR_P_SENTSTALL) { - if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) { - dma->status = MUSB_DMA_STATUS_CORE_ABORT; - (void) musb->dma_controller->channel_abort(dma); - request->actual += musb_ep->dma->actual_len; - } - csr |= MUSB_RXCSR_P_WZC_BITS; csr &= ~MUSB_RXCSR_P_SENTSTALL; musb_writew(epio, MUSB_RXCSR, csr); - - if (request) - musb_g_giveback(musb_ep, request, -EPIPE); - goto done; + return; } if (csr & MUSB_RXCSR_P_OVERRUN) { @@ -795,7 +789,7 @@ void musb_g_rx(struct musb *musb, u8 epnum) DBG((csr & MUSB_RXCSR_DMAENAB) ? 4 : 1, "%s busy, csr %04x\n", musb_ep->end_point.name, csr); - goto done; + return; } if (dma && (csr & MUSB_RXCSR_DMAENAB)) { @@ -826,22 +820,15 @@ void musb_g_rx(struct musb *musb, u8 epnum) if ((request->actual < request->length) && (musb_ep->dma->actual_len == musb_ep->packet_sz)) - goto done; + return; #endif musb_g_giveback(musb_ep, request, 0); request = next_request(musb_ep); if (!request) - goto done; - - /* don't start more i/o till the stall clears */ - musb_ep_select(mbase, epnum); - csr = musb_readw(epio, MUSB_RXCSR); - if (csr & MUSB_RXCSR_P_SENDSTALL) - goto done; + return; } - /* analyze request if the ep is hot */ if (request) rxstate(musb, to_musb_request(request)); @@ -849,8 +836,6 @@ void musb_g_rx(struct musb *musb, u8 epnum) DBG(3, "packet waiting for %s%s request\n", musb_ep->desc ? "" : "inactive ", musb_ep->end_point.name); - -done: return; } @@ -1244,7 +1229,7 @@ int musb_gadget_set_halt(struct usb_ep *ep, int value) void __iomem *mbase; unsigned long flags; u16 csr; - struct musb_request *request = NULL; + struct musb_request *request; int status = 0; if (!ep) @@ -1260,24 +1245,29 @@ int musb_gadget_set_halt(struct usb_ep *ep, int value) musb_ep_select(mbase, epnum); - /* cannot portably stall with non-empty FIFO */ request = to_musb_request(next_request(musb_ep)); - if (value && musb_ep->is_in) { - csr = musb_readw(epio, MUSB_TXCSR); - if (csr & MUSB_TXCSR_FIFONOTEMPTY) { - DBG(3, "%s fifo busy, cannot halt\n", ep->name); - spin_unlock_irqrestore(&musb->lock, flags); - return -EAGAIN; + if (value) { + if (request) { + DBG(3, "request in progress, cannot halt %s\n", + ep->name); + status = -EAGAIN; + goto done; + } + /* Cannot portably stall with non-empty FIFO */ + if (musb_ep->is_in) { + csr = musb_readw(epio, MUSB_TXCSR); + if (csr & MUSB_TXCSR_FIFONOTEMPTY) { + DBG(3, "FIFO busy, cannot halt %s\n", ep->name); + status = -EAGAIN; + goto done; + } } - } /* set/clear the stall and toggle bits */ DBG(2, "%s: %s stall\n", ep->name, value ? "set" : "clear"); if (musb_ep->is_in) { csr = musb_readw(epio, MUSB_TXCSR); - if (csr & MUSB_TXCSR_FIFONOTEMPTY) - csr |= MUSB_TXCSR_FLUSHFIFO; csr |= MUSB_TXCSR_P_WZC_BITS | MUSB_TXCSR_CLRDATATOG; if (value) @@ -1300,14 +1290,13 @@ int musb_gadget_set_halt(struct usb_ep *ep, int value) musb_writew(epio, MUSB_RXCSR, csr); } -done: - /* maybe start the first request in the queue */ if (!musb_ep->busy && !value && request) { DBG(3, "restarting the request\n"); musb_ep_restart(musb, request); } +done: spin_unlock_irqrestore(&musb->lock, flags); return status; } diff --git a/drivers/usb/musb/musb_gadget_ep0.c b/drivers/usb/musb/musb_gadget_ep0.c index 7a6778675ad..522efb31b56 100644 --- a/drivers/usb/musb/musb_gadget_ep0.c +++ b/drivers/usb/musb/musb_gadget_ep0.c @@ -511,7 +511,8 @@ static void ep0_txstate(struct musb *musb) /* update the flags */ if (fifo_count < MUSB_MAX_END0_PACKET - || request->actual == request->length) { + || (request->actual == request->length + && !request->zero)) { musb->ep0_state = MUSB_EP0_STAGE_STATUSOUT; csr |= MUSB_CSR0_P_DATAEND; } else diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index cf94511485f..e3ab40a966e 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -1301,8 +1301,11 @@ void musb_host_tx(struct musb *musb, u8 epnum) return; } else if (usb_pipeisoc(pipe) && dma) { if (musb_tx_dma_program(musb->dma_controller, hw_ep, qh, urb, - offset, length)) + offset, length)) { + if (is_cppi_enabled() || tusb_dma_omap()) + musb_h_tx_dma_start(hw_ep); return; + } } else if (tx_csr & MUSB_TXCSR_DMAENAB) { DBG(1, "not complete, but DMA enabled?\n"); return; diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 9c60d6d4908..ebcc6d0e2e9 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1937,7 +1937,7 @@ static void ftdi_write_bulk_callback(struct urb *urb) return; } /* account for transferred data */ - countback = urb->actual_length; + countback = urb->transfer_buffer_length; data_offset = priv->write_offset; if (data_offset > 0) { /* Subtract the control bytes */ @@ -1950,7 +1950,6 @@ static void ftdi_write_bulk_callback(struct urb *urb) if (status) { dbg("nonzero write bulk status received: %d", status); - return; } usb_serial_port_softint(port); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 319aaf9725b..0577e4b6111 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -336,6 +336,10 @@ static int option_resume(struct usb_serial *serial); #define AIRPLUS_VENDOR_ID 0x1011 #define AIRPLUS_PRODUCT_MCD650 0x3198 +/* 4G Systems products */ +#define FOUR_G_SYSTEMS_VENDOR_ID 0x1c9e +#define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 + static struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -599,6 +603,7 @@ static struct usb_device_id option_ids[] = { { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S) }, { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) }, { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) }, + { USB_DEVICE(FOUR_G_SYSTEMS_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/video/da8xx-fb.c b/drivers/video/da8xx-fb.c index 035d56835b7..ea1fd3f4751 100644 --- a/drivers/video/da8xx-fb.c +++ b/drivers/video/da8xx-fb.c @@ -554,11 +554,11 @@ static int fb_check_var(struct fb_var_screeninfo *var, var->transp.length = 0; break; case 16: /* RGB 565 */ - var->red.offset = 0; + var->red.offset = 11; var->red.length = 5; var->green.offset = 5; var->green.length = 6; - var->blue.offset = 11; + var->blue.offset = 0; var->blue.length = 5; var->transp.offset = 0; var->transp.length = 0; @@ -591,7 +591,7 @@ static int __devexit fb_remove(struct platform_device *dev) unregister_framebuffer(info); fb_dealloc_cmap(&info->cmap); dma_free_coherent(NULL, par->databuf_sz + PAGE_SIZE, - info->screen_base, + info->screen_base - PAGE_SIZE, info->fix.smem_start); free_irq(par->irq, par); clk_disable(par->lcdc_clk); @@ -749,6 +749,7 @@ static int __init fb_probe(struct platform_device *device) (PAGE_SIZE - par->palette_sz); /* the rest of the frame buffer is pixel data */ + da8xx_fb_info->screen_base = par->v_palette_base + par->palette_sz; da8xx_fb_fix.smem_start = par->p_palette_base + par->palette_sz; da8xx_fb_fix.smem_len = par->databuf_sz - par->palette_sz; da8xx_fb_fix.line_length = (lcdc_info->width * lcd_cfg->bpp) / 8; @@ -787,6 +788,8 @@ static int __init fb_probe(struct platform_device *device) da8xx_fb_info->var = da8xx_fb_var; da8xx_fb_info->fbops = &da8xx_fb_ops; da8xx_fb_info->pseudo_palette = par->pseudo_palette; + da8xx_fb_info->fix.visual = (da8xx_fb_info->var.bits_per_pixel <= 8) ? + FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR; ret = fb_alloc_cmap(&da8xx_fb_info->cmap, PALETTE_SIZE, 0); if (ret) @@ -825,7 +828,7 @@ err_free_irq: err_release_fb_mem: dma_free_coherent(NULL, par->databuf_sz + PAGE_SIZE, - da8xx_fb_info->screen_base, + da8xx_fb_info->screen_base - PAGE_SIZE, da8xx_fb_info->fix.smem_start); err_release_fb: diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index 1a83709f961..f67db426837 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -1147,7 +1147,7 @@ static int __init gbefb_probe(struct platform_device *p_dev) gbefb_setup(options); #endif - if (!request_region(GBE_BASE, sizeof(struct sgi_gbe), "GBE")) { + if (!request_mem_region(GBE_BASE, sizeof(struct sgi_gbe), "GBE")) { printk(KERN_ERR "gbefb: couldn't reserve mmio region\n"); ret = -EBUSY; goto out_release_framebuffer; diff --git a/drivers/watchdog/rc32434_wdt.c b/drivers/watchdog/rc32434_wdt.c index f6cccc9df02..bf12d06b587 100644 --- a/drivers/watchdog/rc32434_wdt.c +++ b/drivers/watchdog/rc32434_wdt.c @@ -62,7 +62,7 @@ extern unsigned int idt_cpu_freq; static int timeout = WATCHDOG_TIMEOUT; module_param(timeout, int, 0); MODULE_PARM_DESC(timeout, "Watchdog timeout value, in seconds (default=" - WATCHDOG_TIMEOUT ")"); + __MODULE_STRING(WATCHDOG_TIMEOUT) ")"); static int nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, int, 0); @@ -276,7 +276,7 @@ static int __devinit rc32434_wdt_probe(struct platform_device *pdev) return -ENODEV; } - wdt_reg = ioremap_nocache(r->start, r->end - r->start); + wdt_reg = ioremap_nocache(r->start, resource_size(r)); if (!wdt_reg) { printk(KERN_ERR PFX "failed to remap I/O resources\n"); return -ENXIO; diff --git a/fs/9p/cache.c b/fs/9p/cache.c index bcc5357a906..e777961939f 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -343,7 +343,7 @@ int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) BUG_ON(!vcookie->fscache); - return fscache_maybe_release_page(vnode->cache, page, gfp); + return fscache_maybe_release_page(vcookie->fscache, page, gfp); } void __v9fs_fscache_invalidate_page(struct page *page) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 1d833256386..a6c8c6fe8df 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -11,6 +11,7 @@ #include <linux/mount.h> #include <linux/file.h> +#include <linux/ima.h> #include "internal.h" /* @@ -922,6 +923,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) if (IS_ERR(file)) { ret = PTR_ERR(file); } else { + ima_counts_get(file); ret = -EIO; if (file->f_op->write) { pos = (loff_t) page->index << PAGE_SHIFT; diff --git a/fs/exec.c b/fs/exec.c index ba112bd4a33..c0c636e34f6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -46,7 +46,6 @@ #include <linux/proc_fs.h> #include <linux/mount.h> #include <linux/security.h> -#include <linux/ima.h> #include <linux/syscalls.h> #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> @@ -1209,9 +1208,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) retval = security_bprm_check(bprm); if (retval) return retval; - retval = ima_bprm_check(bprm); - if (retval) - return retval; /* kernel module loader fixup */ /* so we don't try to load run modprobe in kernel space. */ diff --git a/fs/file_table.c b/fs/file_table.c index 8eb44042e00..4bef4c01ec6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/security.h> -#include <linux/ima.h> #include <linux/eventpoll.h> #include <linux/rcupdate.h> #include <linux/mount.h> @@ -280,7 +279,6 @@ void __fput(struct file *file) if (file->f_op && file->f_op->release) file->f_op->release(inode, file); security_file_free(file); - ima_file_free(file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) cdev_put(inode->i_cdev); fops_put(file->f_op); diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 5971359d209..4dcddf83326 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -8,6 +8,8 @@ config GFS2_FS select FS_POSIX_ACL select CRC32 select SLOW_WORK + select QUOTA + select QUOTACTL help A cluster filesystem. diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 3fc4e3ac7d8..3eb1ea84617 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -12,6 +12,7 @@ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> +#include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> #include <linux/gfs2_ondisk.h> @@ -26,108 +27,44 @@ #include "trans.h" #include "util.h" -#define ACL_ACCESS 1 -#define ACL_DEFAULT 0 - -int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, - struct gfs2_ea_request *er, int *remove, mode_t *mode) +static const char *gfs2_acl_name(int type) { - struct posix_acl *acl; - int error; - - error = gfs2_acl_validate_remove(ip, access); - if (error) - return error; - - if (!er->er_data) - return -EINVAL; - - acl = posix_acl_from_xattr(er->er_data, er->er_data_len); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (!acl) { - *remove = 1; - return 0; - } - - error = posix_acl_valid(acl); - if (error) - goto out; - - if (access) { - error = posix_acl_equiv_mode(acl, mode); - if (!error) - *remove = 1; - else if (error > 0) - error = 0; + switch (type) { + case ACL_TYPE_ACCESS: + return GFS2_POSIX_ACL_ACCESS; + case ACL_TYPE_DEFAULT: + return GFS2_POSIX_ACL_DEFAULT; } - -out: - posix_acl_release(acl); - return error; -} - -int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access) -{ - if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl) - return -EOPNOTSUPP; - if (!is_owner_or_cap(&ip->i_inode)) - return -EPERM; - if (S_ISLNK(ip->i_inode.i_mode)) - return -EOPNOTSUPP; - if (!access && !S_ISDIR(ip->i_inode.i_mode)) - return -EACCES; - - return 0; + return NULL; } -static int acl_get(struct gfs2_inode *ip, const char *name, - struct posix_acl **acl, struct gfs2_ea_location *el, - char **datap, unsigned int *lenp) +static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) { + struct posix_acl *acl; + const char *name; char *data; - unsigned int len; - int error; - - el->el_bh = NULL; + int len; if (!ip->i_eattr) - return 0; - - error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, el); - if (error) - return error; - if (!el->el_ea) - return 0; - if (!GFS2_EA_DATA_LEN(el->el_ea)) - goto out; + return NULL; - len = GFS2_EA_DATA_LEN(el->el_ea); - data = kmalloc(len, GFP_NOFS); - error = -ENOMEM; - if (!data) - goto out; + acl = get_cached_acl(&ip->i_inode, type); + if (acl != ACL_NOT_CACHED) + return acl; - error = gfs2_ea_get_copy(ip, el, data, len); - if (error < 0) - goto out_kfree; - error = 0; + name = gfs2_acl_name(type); + if (name == NULL) + return ERR_PTR(-EINVAL); - if (acl) { - *acl = posix_acl_from_xattr(data, len); - if (IS_ERR(*acl)) - error = PTR_ERR(*acl); - } + len = gfs2_xattr_acl_get(ip, name, &data); + if (len < 0) + return ERR_PTR(len); + if (len == 0) + return NULL; -out_kfree: - if (error || !datap) { - kfree(data); - } else { - *datap = data; - *lenp = len; - } -out: - return error; + acl = posix_acl_from_xattr(data, len); + kfree(data); + return acl; } /** @@ -140,14 +77,12 @@ out: int gfs2_check_acl(struct inode *inode, int mask) { - struct gfs2_ea_location el; - struct posix_acl *acl = NULL; + struct posix_acl *acl; int error; - error = acl_get(GFS2_I(inode), GFS2_POSIX_ACL_ACCESS, &acl, &el, NULL, NULL); - brelse(el.el_bh); - if (error) - return error; + acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); if (acl) { error = posix_acl_permission(inode, acl, mask); @@ -158,57 +93,75 @@ int gfs2_check_acl(struct inode *inode, int mask) return -EAGAIN; } -static int munge_mode(struct gfs2_inode *ip, mode_t mode) +static int gfs2_set_mode(struct inode *inode, mode_t mode) { - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct buffer_head *dibh; - int error; + int error = 0; - error = gfs2_trans_begin(sdp, RES_DINODE, 0); - if (error) - return error; + if (mode != inode->i_mode) { + struct iattr iattr; - error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - gfs2_assert_withdraw(sdp, - (ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT)); - ip->i_inode.i_mode = mode; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); + iattr.ia_valid = ATTR_MODE; + iattr.ia_mode = mode; + + error = gfs2_setattr_simple(GFS2_I(inode), &iattr); } - gfs2_trans_end(sdp); + return error; +} + +static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) +{ + int error; + int len; + char *data; + const char *name = gfs2_acl_name(type); - return 0; + BUG_ON(name == NULL); + len = posix_acl_to_xattr(acl, NULL, 0); + if (len == 0) + return 0; + data = kmalloc(len, GFP_NOFS); + if (data == NULL) + return -ENOMEM; + error = posix_acl_to_xattr(acl, data, len); + if (error < 0) + goto out; + error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0); + if (!error) + set_cached_acl(inode, type, acl); +out: + kfree(data); + return error; } -int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) +int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) { - struct gfs2_ea_location el; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct posix_acl *acl = NULL, *clone; - mode_t mode = ip->i_inode.i_mode; - char *data = NULL; - unsigned int len; - int error; + struct posix_acl *acl, *clone; + mode_t mode = inode->i_mode; + int error = 0; if (!sdp->sd_args.ar_posix_acl) return 0; - if (S_ISLNK(ip->i_inode.i_mode)) + if (S_ISLNK(inode->i_mode)) return 0; - error = acl_get(dip, GFS2_POSIX_ACL_DEFAULT, &acl, &el, &data, &len); - brelse(el.el_bh); - if (error) - return error; + acl = gfs2_acl_get(dip, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); if (!acl) { mode &= ~current_umask(); - if (mode != ip->i_inode.i_mode) - error = munge_mode(ip, mode); + if (mode != inode->i_mode) + error = gfs2_set_mode(inode, mode); return error; } + if (S_ISDIR(inode->i_mode)) { + error = gfs2_acl_set(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto out; + } + clone = posix_acl_clone(acl, GFP_NOFS); error = -ENOMEM; if (!clone) @@ -216,43 +169,32 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) posix_acl_release(acl); acl = clone; - if (S_ISDIR(ip->i_inode.i_mode)) { - error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS, - GFS2_POSIX_ACL_DEFAULT, data, len, 0); - if (error) - goto out; - } - error = posix_acl_create_masq(acl, &mode); if (error < 0) goto out; if (error == 0) goto munge; - posix_acl_to_xattr(acl, data, len); - error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS, - GFS2_POSIX_ACL_ACCESS, data, len, 0); + error = gfs2_acl_set(inode, ACL_TYPE_ACCESS, acl); if (error) goto out; munge: - error = munge_mode(ip, mode); + error = gfs2_set_mode(inode, mode); out: posix_acl_release(acl); - kfree(data); return error; } int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) { - struct posix_acl *acl = NULL, *clone; - struct gfs2_ea_location el; + struct posix_acl *acl, *clone; char *data; unsigned int len; int error; - error = acl_get(ip, GFS2_POSIX_ACL_ACCESS, &acl, &el, &data, &len); - if (error) - goto out_brelse; + acl = gfs2_acl_get(ip, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); if (!acl) return gfs2_setattr_simple(ip, attr); @@ -265,15 +207,134 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) error = posix_acl_chmod_masq(acl, attr->ia_mode); if (!error) { + len = posix_acl_to_xattr(acl, NULL, 0); + data = kmalloc(len, GFP_NOFS); + error = -ENOMEM; + if (data == NULL) + goto out; posix_acl_to_xattr(acl, data, len); - error = gfs2_ea_acl_chmod(ip, &el, attr, data); + error = gfs2_xattr_acl_chmod(ip, attr, data); + kfree(data); + set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); } out: posix_acl_release(acl); - kfree(data); -out_brelse: - brelse(el.el_bh); return error; } +static int gfs2_acl_type(const char *name) +{ + if (strcmp(name, GFS2_POSIX_ACL_ACCESS) == 0) + return ACL_TYPE_ACCESS; + if (strcmp(name, GFS2_POSIX_ACL_DEFAULT) == 0) + return ACL_TYPE_DEFAULT; + return -EINVAL; +} + +static int gfs2_xattr_system_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + struct posix_acl *acl; + int type; + int error; + + type = gfs2_acl_type(name); + if (type < 0) + return type; + + acl = gfs2_acl_get(GFS2_I(inode), type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int gfs2_xattr_system_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct posix_acl *acl = NULL; + int error = 0, type; + + if (!sdp->sd_args.ar_posix_acl) + return -EOPNOTSUPP; + + type = gfs2_acl_type(name); + if (type < 0) + return type; + if (flags & XATTR_CREATE) + return -EINVAL; + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return value ? -EACCES : 0; + if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + if (!value) + goto set_acl; + + acl = posix_acl_from_xattr(value, size); + if (!acl) { + /* + * acl_set_file(3) may request that we set default ACLs with + * zero length -- defend (gracefully) against that here. + */ + goto out; + } + if (IS_ERR(acl)) { + error = PTR_ERR(acl); + goto out; + } + + error = posix_acl_valid(acl); + if (error) + goto out_release; + + error = -EINVAL; + if (acl->a_count > GFS2_ACL_MAX_ENTRIES) + goto out_release; + + if (type == ACL_TYPE_ACCESS) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + + if (error <= 0) { + posix_acl_release(acl); + acl = NULL; + + if (error < 0) + return error; + } + + error = gfs2_set_mode(inode, mode); + if (error) + goto out_release; + } + +set_acl: + error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0); + if (!error) { + if (acl) + set_cached_acl(inode, type, acl); + else + forget_cached_acl(inode, type); + } +out_release: + posix_acl_release(acl); +out: + return error; +} + +struct xattr_handler gfs2_xattr_system_handler = { + .prefix = XATTR_SYSTEM_PREFIX, + .get = gfs2_xattr_system_get, + .set = gfs2_xattr_system_set, +}; + diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index 6751930bfb6..9306a2e6620 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -13,26 +13,12 @@ #include "incore.h" #define GFS2_POSIX_ACL_ACCESS "posix_acl_access" -#define GFS2_POSIX_ACL_ACCESS_LEN 16 #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" -#define GFS2_POSIX_ACL_DEFAULT_LEN 17 +#define GFS2_ACL_MAX_ENTRIES 25 -#define GFS2_ACL_IS_ACCESS(name, len) \ - ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \ - !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len))) - -#define GFS2_ACL_IS_DEFAULT(name, len) \ - ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \ - !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len))) - -struct gfs2_ea_request; - -int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, - struct gfs2_ea_request *er, - int *remove, mode_t *mode); -int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access); -int gfs2_check_acl(struct inode *inode, int mask); -int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); -int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); +extern int gfs2_check_acl(struct inode *inode, int mask); +extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); +extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); +extern struct xattr_handler gfs2_xattr_system_handler; #endif /* __ACL_DOT_H__ */ diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 694b5d48f03..7b8da941526 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -269,7 +269,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset = i_size & (PAGE_CACHE_SIZE-1); unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); - struct backing_dev_info *bdi = mapping->backing_dev_info; int i; int ret; @@ -313,11 +312,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, if (ret || (--(wbc->nr_to_write) <= 0)) ret = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - ret = 1; - } - } gfs2_trans_end(sdp); return ret; @@ -338,7 +332,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, static int gfs2_write_cache_jdata(struct address_space *mapping, struct writeback_control *wbc) { - struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; int done = 0; struct pagevec pvec; @@ -348,11 +341,6 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, int scanned = 0; int range_whole = 0; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - pagevec_init(&pvec, 0); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ @@ -819,8 +807,10 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, mark_inode_dirty(inode); } - if (inode == sdp->sd_rindex) + if (inode == sdp->sd_rindex) { adjust_fs_space(inode); + ip->i_gh.gh_flags |= GL_NOCACHE; + } brelse(dibh); gfs2_trans_end(sdp); @@ -889,8 +879,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, mark_inode_dirty(inode); } - if (inode == sdp->sd_rindex) + if (inode == sdp->sd_rindex) { adjust_fs_space(inode); + ip->i_gh.gh_flags |= GL_NOCACHE; + } brelse(dibh); gfs2_trans_end(sdp); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 297d7e5ceba..25fddc100f1 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -525,38 +525,6 @@ consist_inode: return ERR_PTR(-EIO); } - -/** - * dirent_first - Return the first dirent - * @dip: the directory - * @bh: The buffer - * @dent: Pointer to list of dirents - * - * return first dirent whether bh points to leaf or stuffed dinode - * - * Returns: IS_LEAF, IS_DINODE, or -errno - */ - -static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh, - struct gfs2_dirent **dent) -{ - struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data; - - if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) { - if (gfs2_meta_check(GFS2_SB(&dip->i_inode), bh)) - return -EIO; - *dent = (struct gfs2_dirent *)(bh->b_data + - sizeof(struct gfs2_leaf)); - return IS_LEAF; - } else { - if (gfs2_metatype_check(GFS2_SB(&dip->i_inode), bh, GFS2_METATYPE_DI)) - return -EIO; - *dent = (struct gfs2_dirent *)(bh->b_data + - sizeof(struct gfs2_dinode)); - return IS_DINODE; - } -} - static int dirent_check_reclen(struct gfs2_inode *dip, const struct gfs2_dirent *d, const void *end_p) { @@ -1006,7 +974,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) divider = (start + half_len) << (32 - dip->i_depth); /* Copy the entries */ - dirent_first(dip, obh, &dent); + dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf)); do { next = dent; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8b674b1f3a5..f455a03a09e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -241,15 +241,14 @@ int gfs2_glock_put(struct gfs2_glock *gl) int rv = 0; write_lock(gl_lock_addr(gl->gl_hash)); - if (atomic_dec_and_test(&gl->gl_ref)) { + if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { hlist_del(&gl->gl_list); - write_unlock(gl_lock_addr(gl->gl_hash)); - spin_lock(&lru_lock); if (!list_empty(&gl->gl_lru)) { list_del_init(&gl->gl_lru); atomic_dec(&lru_count); } spin_unlock(&lru_lock); + write_unlock(gl_lock_addr(gl->gl_hash)); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); glock_free(gl); rv = 1; @@ -513,7 +512,6 @@ retry: GLOCK_BUG_ON(gl, 1); } spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); return; } @@ -524,8 +522,6 @@ retry: if (glops->go_xmote_bh) { spin_unlock(&gl->gl_spin); rv = glops->go_xmote_bh(gl, gh); - if (rv == -EAGAIN) - return; spin_lock(&gl->gl_spin); if (rv) { do_error(gl, rv); @@ -540,7 +536,6 @@ out: clear_bit(GLF_LOCK, &gl->gl_flags); out_locked: spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); } static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, @@ -600,7 +595,6 @@ __acquires(&gl->gl_spin) if (!(ret & LM_OUT_ASYNC)) { finish_xmote(gl, ret); - gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } else { @@ -672,12 +666,17 @@ out: return; out_sched: + clear_bit(GLF_LOCK, &gl->gl_flags); + smp_mb__after_clear_bit(); gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put_nolock(gl); + return; + out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); - goto out; + smp_mb__after_clear_bit(); + return; } static void delete_work_func(struct work_struct *work) @@ -707,9 +706,12 @@ static void glock_work_func(struct work_struct *work) { unsigned long delay = 0; struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); + int drop_ref = 0; - if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { finish_xmote(gl, gl->gl_reply); + drop_ref = 1; + } down_read(&gfs2_umount_flush_sem); spin_lock(&gl->gl_spin); if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && @@ -727,6 +729,8 @@ static void glock_work_func(struct work_struct *work) if (!delay || queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); + if (drop_ref) + gfs2_glock_put(gl); } /** @@ -1361,10 +1365,6 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) list_del_init(&gl->gl_lru); atomic_dec(&lru_count); - /* Check if glock is about to be freed */ - if (atomic_read(&gl->gl_ref) == 0) - continue; - /* Test for being demotable */ if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { gfs2_glock_hold(gl); @@ -1375,10 +1375,11 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) handle_callback(gl, LM_ST_UNLOCKED, 0); nr--; } + clear_bit(GLF_LOCK, &gl->gl_flags); + smp_mb__after_clear_bit(); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put_nolock(gl); spin_unlock(&gl->gl_spin); - clear_bit(GLF_LOCK, &gl->gl_flags); spin_lock(&lru_lock); continue; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index c609894ec0d..13f0bd22813 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -180,15 +180,6 @@ static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl) return gl->gl_state == LM_ST_SHARED; } -static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) -{ - int ret; - spin_lock(&gl->gl_spin); - ret = test_bit(GLF_DEMOTE, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - return ret; -} - int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 6985eef06c3..78554acc060 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -13,6 +13,7 @@ #include <linux/buffer_head.h> #include <linux/gfs2_ondisk.h> #include <linux/bio.h> +#include <linux/posix_acl.h> #include "gfs2.h" #include "incore.h" @@ -184,8 +185,10 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) if (flags & DIO_METADATA) { struct address_space *mapping = gl->gl_aspace->i_mapping; truncate_inode_pages(mapping, 0); - if (ip) + if (ip) { set_bit(GIF_INVALID, &ip->i_flags); + forget_all_cached_acls(&ip->i_inode); + } } if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 6edb423f90b..4792200978c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -429,7 +429,11 @@ struct gfs2_args { unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ unsigned int ar_errors:2; /* errors=withdraw | panic */ + unsigned int ar_nobarrier:1; /* do not send barriers */ int ar_commit; /* Commit interval */ + int ar_statfs_quantum; /* The fast statfs interval */ + int ar_quota_quantum; /* The quota interval */ + int ar_statfs_percent; /* The % change to force sync */ }; struct gfs2_tune { @@ -558,6 +562,7 @@ struct gfs2_sbd { spinlock_t sd_statfs_spin; struct gfs2_statfs_change_host sd_statfs_master; struct gfs2_statfs_change_host sd_statfs_local; + int sd_statfs_force_sync; /* Resource group stuff */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index fb15d3b1f40..26ba2a4c4a2 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -871,7 +871,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock2; - error = gfs2_acl_create(dip, GFS2_I(inode)); + error = gfs2_acl_create(dip, inode); if (error) goto fail_gunlock2; @@ -947,9 +947,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); - str->di_header.__pad0 = 0; str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); - str->di_header.__pad1 = 0; str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); str->di_mode = cpu_to_be32(ip->i_inode.i_mode); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 13c6237c5f6..4511b08fc45 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -596,7 +596,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); + lh->lh_header.__pad0 = cpu_to_be64(0); lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); + lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++); lh->lh_flags = cpu_to_be32(flags); lh->lh_tail = cpu_to_be32(tail); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 9969ff062c5..de97632ba32 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -132,6 +132,7 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_meta_header *mh; struct gfs2_trans *tr; lock_buffer(bd->bd_bh); @@ -148,6 +149,9 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); gfs2_meta_check(sdp, bd->bd_bh); gfs2_pin(sdp, bd->bd_bh); + mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; + mh->__pad0 = cpu_to_be64(0); + mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); sdp->sd_log_num_buf++; list_add(&le->le_list, &sdp->sd_log_le_buf); tr->tr_num_buf_new++; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 52fb6c04898..edfee24f363 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -18,6 +18,7 @@ #include <linux/mount.h> #include <linux/gfs2_ondisk.h> #include <linux/slow-work.h> +#include <linux/quotaops.h> #include "gfs2.h" #include "incore.h" @@ -62,13 +63,10 @@ static void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_quota_warn_period = 10; gt->gt_quota_scale_num = 1; gt->gt_quota_scale_den = 1; - gt->gt_quota_quantum = 60; gt->gt_new_files_jdata = 0; gt->gt_max_readahead = 1 << 18; gt->gt_stall_secs = 600; gt->gt_complain_secs = 10; - gt->gt_statfs_quantum = 30; - gt->gt_statfs_slow = 0; } static struct gfs2_sbd *init_sbd(struct super_block *sb) @@ -1114,7 +1112,7 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp) * Returns: errno */ -static int fill_super(struct super_block *sb, void *data, int silent) +static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent) { struct gfs2_sbd *sdp; struct gfs2_holder mount_gh; @@ -1125,17 +1123,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n"); return -ENOMEM; } - - sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; - sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; - sdp->sd_args.ar_commit = 60; - sdp->sd_args.ar_errors = GFS2_ERRORS_DEFAULT; - - error = gfs2_mount_args(sdp, &sdp->sd_args, data); - if (error) { - printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); - goto fail; - } + sdp->sd_args = *args; if (sdp->sd_args.ar_spectator) { sb->s_flags |= MS_RDONLY; @@ -1143,11 +1131,15 @@ static int fill_super(struct super_block *sb, void *data, int silent) } if (sdp->sd_args.ar_posix_acl) sb->s_flags |= MS_POSIXACL; + if (sdp->sd_args.ar_nobarrier) + set_bit(SDF_NOBARRIERS, &sdp->sd_flags); sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; sb->s_export_op = &gfs2_export_ops; sb->s_xattr = gfs2_xattr_handlers; + sb->s_qcop = &gfs2_quotactl_ops; + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; sb->s_time_gran = 1; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -1160,6 +1152,15 @@ static int fill_super(struct super_block *sb, void *data, int silent) sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; + sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum; + if (sdp->sd_args.ar_statfs_quantum) { + sdp->sd_tune.gt_statfs_slow = 0; + sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum; + } + else { + sdp->sd_tune.gt_statfs_slow = 1; + sdp->sd_tune.gt_statfs_quantum = 30; + } error = init_names(sdp, silent); if (error) @@ -1243,18 +1244,127 @@ fail: return error; } -static int gfs2_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static int set_gfs2_super(struct super_block *s, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); + s->s_bdev = data; + s->s_dev = s->s_bdev->bd_dev; + + /* + * We set the bdi here to the queue backing, file systems can + * overwrite this in ->fill_super() + */ + s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + return 0; } -static int test_meta_super(struct super_block *s, void *ptr) +static int test_gfs2_super(struct super_block *s, void *ptr) { struct block_device *bdev = ptr; return (bdev == s->s_bdev); } +/** + * gfs2_get_sb - Get the GFS2 superblock + * @fs_type: The GFS2 filesystem type + * @flags: Mount flags + * @dev_name: The name of the device + * @data: The mount arguments + * @mnt: The vfsmnt for this mount + * + * Q. Why not use get_sb_bdev() ? + * A. We need to select one of two root directories to mount, independent + * of whether this is the initial, or subsequent, mount of this sb + * + * Returns: 0 or -ve on error + */ + +static int gfs2_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) +{ + struct block_device *bdev; + struct super_block *s; + fmode_t mode = FMODE_READ; + int error; + struct gfs2_args args; + struct gfs2_sbd *sdp; + + if (!(flags & MS_RDONLY)) + mode |= FMODE_WRITE; + + bdev = open_bdev_exclusive(dev_name, mode, fs_type); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + + /* + * once the super is inserted into the list by sget, s_umount + * will protect the lockfs code from trying to start a snapshot + * while we are mounting + */ + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + error = -EBUSY; + goto error_bdev; + } + s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + error = PTR_ERR(s); + if (IS_ERR(s)) + goto error_bdev; + + memset(&args, 0, sizeof(args)); + args.ar_quota = GFS2_QUOTA_DEFAULT; + args.ar_data = GFS2_DATA_DEFAULT; + args.ar_commit = 60; + args.ar_statfs_quantum = 30; + args.ar_quota_quantum = 60; + args.ar_errors = GFS2_ERRORS_DEFAULT; + + error = gfs2_mount_args(&args, data); + if (error) { + printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); + if (s->s_root) + goto error_super; + deactivate_locked_super(s); + return error; + } + + if (s->s_root) { + error = -EBUSY; + if ((flags ^ s->s_flags) & MS_RDONLY) + goto error_super; + close_bdev_exclusive(bdev, mode); + } else { + char b[BDEVNAME_SIZE]; + + s->s_flags = flags; + s->s_mode = mode; + strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); + sb_set_blocksize(s, block_size(bdev)); + error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); + if (error) { + deactivate_locked_super(s); + return error; + } + s->s_flags |= MS_ACTIVE; + bdev->bd_super = s; + } + + sdp = s->s_fs_info; + mnt->mnt_sb = s; + if (args.ar_meta) + mnt->mnt_root = dget(sdp->sd_master_dir); + else + mnt->mnt_root = dget(sdp->sd_root_dir); + return 0; + +error_super: + deactivate_locked_super(s); +error_bdev: + close_bdev_exclusive(bdev, mode); + return error; +} + static int set_meta_super(struct super_block *s, void *ptr) { return -EINVAL; @@ -1274,13 +1384,17 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, dev_name, error); return error; } - s = sget(&gfs2_fs_type, test_meta_super, set_meta_super, + s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, path.dentry->d_inode->i_sb->s_bdev); path_put(&path); if (IS_ERR(s)) { printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); return PTR_ERR(s); } + if ((flags ^ s->s_flags) & MS_RDONLY) { + deactivate_locked_super(s); + return -EBUSY; + } sdp = s->s_fs_info; mnt->mnt_sb = s; mnt->mnt_root = dget(sdp->sd_master_dir); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 2e9b9326bfc..e3bf6eab875 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -15,7 +15,7 @@ * fuzziness in the current usage value of IDs that are being used on different * nodes in the cluster simultaneously. So, it is possible for a user on * multiple nodes to overrun their quota, but that overrun is controlable. - * Since quota tags are part of transactions, there is no need to a quota check + * Since quota tags are part of transactions, there is no need for a quota check * program to be run on node crashes or anything like that. * * There are couple of knobs that let the administrator manage the quota @@ -47,6 +47,8 @@ #include <linux/gfs2_ondisk.h> #include <linux/kthread.h> #include <linux/freezer.h> +#include <linux/quota.h> +#include <linux/dqblk_xfs.h> #include "gfs2.h" #include "incore.h" @@ -65,13 +67,6 @@ #define QUOTA_USER 1 #define QUOTA_GROUP 0 -struct gfs2_quota_host { - u64 qu_limit; - u64 qu_warn; - s64 qu_value; - u32 qu_ll_next; -}; - struct gfs2_quota_change_host { u64 qc_change; u32 qc_flags; /* GFS2_QCF_... */ @@ -164,7 +159,7 @@ fail: return error; } -static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create, +static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, struct gfs2_quota_data **qdp) { struct gfs2_quota_data *qd = NULL, *new_qd = NULL; @@ -202,7 +197,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create, spin_unlock(&qd_lru_lock); - if (qd || !create) { + if (qd) { if (new_qd) { gfs2_glock_put(new_qd->qd_gl); kmem_cache_free(gfs2_quotad_cachep, new_qd); @@ -461,12 +456,12 @@ static void qd_unlock(struct gfs2_quota_data *qd) qd_put(qd); } -static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, int create, +static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, struct gfs2_quota_data **qdp) { int error; - error = qd_get(sdp, user, id, create, qdp); + error = qd_get(sdp, user, id, qdp); if (error) return error; @@ -508,20 +503,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); if (error) goto out; al->al_qd_num++; qd++; - error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); if (error) goto out; al->al_qd_num++; qd++; if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { - error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_USER, uid, qd); if (error) goto out; al->al_qd_num++; @@ -529,7 +524,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) } if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { - error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); if (error) goto out; al->al_qd_num++; @@ -617,48 +612,36 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) mutex_unlock(&sdp->sd_quota_mutex); } -static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf) -{ - const struct gfs2_quota *str = buf; - - qu->qu_limit = be64_to_cpu(str->qu_limit); - qu->qu_warn = be64_to_cpu(str->qu_warn); - qu->qu_value = be64_to_cpu(str->qu_value); - qu->qu_ll_next = be32_to_cpu(str->qu_ll_next); -} - -static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf) -{ - struct gfs2_quota *str = buf; - - str->qu_limit = cpu_to_be64(qu->qu_limit); - str->qu_warn = cpu_to_be64(qu->qu_warn); - str->qu_value = cpu_to_be64(qu->qu_value); - str->qu_ll_next = cpu_to_be32(qu->qu_ll_next); - memset(&str->qu_reserved, 0, sizeof(str->qu_reserved)); -} - /** - * gfs2_adjust_quota + * gfs2_adjust_quota - adjust record of current block usage + * @ip: The quota inode + * @loc: Offset of the entry in the quota file + * @change: The amount of usage change to record + * @qd: The quota data + * @fdq: The updated limits to record * * This function was mostly borrowed from gfs2_block_truncate_page which was * in turn mostly borrowed from ext3 + * + * Returns: 0 or -ve on error */ + static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, - s64 change, struct gfs2_quota_data *qd) + s64 change, struct gfs2_quota_data *qd, + struct fs_disk_quota *fdq) { struct inode *inode = &ip->i_inode; struct address_space *mapping = inode->i_mapping; unsigned long index = loc >> PAGE_CACHE_SHIFT; unsigned offset = loc & (PAGE_CACHE_SIZE - 1); unsigned blocksize, iblock, pos; - struct buffer_head *bh; + struct buffer_head *bh, *dibh; struct page *page; void *kaddr; - char *ptr; - struct gfs2_quota_host qp; + struct gfs2_quota *qp; s64 value; int err = -EIO; + u64 size; if (gfs2_is_stuffed(ip)) gfs2_unstuff_dinode(ip, NULL); @@ -700,18 +683,38 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, gfs2_trans_add_bh(ip->i_gl, bh, 0); kaddr = kmap_atomic(page, KM_USER0); - ptr = kaddr + offset; - gfs2_quota_in(&qp, ptr); - qp.qu_value += change; - value = qp.qu_value; - gfs2_quota_out(&qp, ptr); + qp = kaddr + offset; + value = (s64)be64_to_cpu(qp->qu_value) + change; + qp->qu_value = cpu_to_be64(value); + qd->qd_qb.qb_value = qp->qu_value; + if (fdq) { + if (fdq->d_fieldmask & FS_DQ_BSOFT) { + qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit); + qd->qd_qb.qb_warn = qp->qu_warn; + } + if (fdq->d_fieldmask & FS_DQ_BHARD) { + qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit); + qd->qd_qb.qb_limit = qp->qu_limit; + } + } flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - err = 0; - qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC); - qd->qd_qb.qb_value = cpu_to_be64(value); - ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC); - ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value); + + err = gfs2_meta_inode_buffer(ip, &dibh); + if (err) + goto unlock; + + size = loc + sizeof(struct gfs2_quota); + if (size > inode->i_size) { + ip->i_disksize = size; + i_size_write(inode, size); + } + inode->i_mtime = inode->i_atime = CURRENT_TIME; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + mark_inode_dirty(inode); + unlock: unlock_page(page); page_cache_release(page); @@ -739,9 +742,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) return -ENOMEM; sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); + mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); for (qx = 0; qx < num_qd; qx++) { - error = gfs2_glock_nq_init(qda[qx]->qd_gl, - LM_ST_EXCLUSIVE, + error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); if (error) goto out; @@ -795,9 +798,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) for (x = 0; x < num_qd; x++) { qd = qda[x]; offset = qd2offset(qd); - error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, - (struct gfs2_quota_data *) - qd); + error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL); if (error) goto out_end_trans; @@ -817,21 +818,44 @@ out_gunlock: out: while (qx--) gfs2_glock_dq_uninit(&ghs[qx]); + mutex_unlock(&ip->i_inode.i_mutex); kfree(ghs); gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); return error; } +static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) +{ + struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); + struct gfs2_quota q; + struct gfs2_quota_lvb *qlvb; + loff_t pos; + int error; + + memset(&q, 0, sizeof(struct gfs2_quota)); + pos = qd2offset(qd); + error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q)); + if (error < 0) + return error; + + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; + qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); + qlvb->__pad = 0; + qlvb->qb_limit = q.qu_limit; + qlvb->qb_warn = q.qu_warn; + qlvb->qb_value = q.qu_value; + qd->qd_qb = *qlvb; + + return 0; +} + static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_holder *q_gh) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_holder i_gh; - struct gfs2_quota_host q; - char buf[sizeof(struct gfs2_quota)]; int error; - struct gfs2_quota_lvb *qlvb; restart: error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); @@ -841,11 +865,9 @@ restart: qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { - loff_t pos; gfs2_glock_dq_uninit(q_gh); - error = gfs2_glock_nq_init(qd->qd_gl, - LM_ST_EXCLUSIVE, GL_NOCACHE, - q_gh); + error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, q_gh); if (error) return error; @@ -853,29 +875,14 @@ restart: if (error) goto fail; - memset(buf, 0, sizeof(struct gfs2_quota)); - pos = qd2offset(qd); - error = gfs2_internal_read(ip, NULL, buf, &pos, - sizeof(struct gfs2_quota)); - if (error < 0) + error = update_qd(sdp, qd); + if (error) goto fail_gunlock; gfs2_glock_dq_uninit(&i_gh); - - gfs2_quota_in(&q, buf); - qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; - qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); - qlvb->__pad = 0; - qlvb->qb_limit = cpu_to_be64(q.qu_limit); - qlvb->qb_warn = cpu_to_be64(q.qu_warn); - qlvb->qb_value = cpu_to_be64(q.qu_value); - qd->qd_qb = *qlvb; - - if (gfs2_glock_is_blocking(qd->qd_gl)) { - gfs2_glock_dq_uninit(q_gh); - force_refresh = 0; - goto restart; - } + gfs2_glock_dq_uninit(q_gh); + force_refresh = 0; + goto restart; } return 0; @@ -995,7 +1002,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\r\n", + printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n", sdp->sd_fsname, type, (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", qd->qd_id); @@ -1032,6 +1039,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { print_message(qd, "exceeded"); + quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? + USRQUOTA : GRPQUOTA, qd->qd_id, + sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); + error = -EDQUOT; break; } else if (be64_to_cpu(qd->qd_qb.qb_warn) && @@ -1039,6 +1050,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) time_after_eq(jiffies, qd->qd_last_warn + gfs2_tune_get(sdp, gt_quota_warn_period) * HZ)) { + quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? + USRQUOTA : GRPQUOTA, qd->qd_id, + sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); error = print_message(qd, "warning"); qd->qd_last_warn = jiffies; } @@ -1069,8 +1083,9 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, } } -int gfs2_quota_sync(struct gfs2_sbd *sdp) +int gfs2_quota_sync(struct super_block *sb, int type) { + struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); unsigned int num_qd; @@ -1118,7 +1133,7 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) struct gfs2_holder q_gh; int error; - error = qd_get(sdp, user, id, CREATE, &qd); + error = qd_get(sdp, user, id, &qd); if (error) return error; @@ -1127,7 +1142,6 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) gfs2_glock_dq_uninit(&q_gh); qd_put(qd); - return error; } @@ -1298,12 +1312,12 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) } static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, - int (*fxn)(struct gfs2_sbd *sdp), + int (*fxn)(struct super_block *sb, int type), unsigned long t, unsigned long *timeo, unsigned int *new_timeo) { if (t >= *timeo) { - int error = fxn(sdp); + int error = fxn(sdp->sd_vfs, 0); quotad_error(sdp, msg, error); *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ; } else { @@ -1330,6 +1344,14 @@ static void quotad_check_trunc_list(struct gfs2_sbd *sdp) } } +void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) { + if (!sdp->sd_statfs_force_sync) { + sdp->sd_statfs_force_sync = 1; + wake_up(&sdp->sd_quota_wait); + } +} + + /** * gfs2_quotad - Write cached quota changes into the quota file * @sdp: Pointer to GFS2 superblock @@ -1349,8 +1371,15 @@ int gfs2_quotad(void *data) while (!kthread_should_stop()) { /* Update the master statfs file */ - quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, - &statfs_timeo, &tune->gt_statfs_quantum); + if (sdp->sd_statfs_force_sync) { + int error = gfs2_statfs_sync(sdp->sd_vfs, 0); + quotad_error(sdp, "statfs", error); + statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ; + } + else + quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, + &statfs_timeo, + &tune->gt_statfs_quantum); /* Update quota file */ quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, @@ -1367,7 +1396,7 @@ int gfs2_quotad(void *data) spin_lock(&sdp->sd_trunc_lock); empty = list_empty(&sdp->sd_trunc_list); spin_unlock(&sdp->sd_trunc_lock); - if (empty) + if (empty && !sdp->sd_statfs_force_sync) t -= schedule_timeout(t); else t = 0; @@ -1377,3 +1406,181 @@ int gfs2_quotad(void *data) return 0; } +static int gfs2_quota_get_xstate(struct super_block *sb, + struct fs_quota_stat *fqs) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + + memset(fqs, 0, sizeof(struct fs_quota_stat)); + fqs->qs_version = FS_QSTAT_VERSION; + if (sdp->sd_args.ar_quota == GFS2_QUOTA_ON) + fqs->qs_flags = (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD); + else if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT) + fqs->qs_flags = (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT); + if (sdp->sd_quota_inode) { + fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr; + fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks; + } + fqs->qs_uquota.qfs_nextents = 1; /* unsupported */ + fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */ + fqs->qs_incoredqs = atomic_read(&qd_lru_count); + return 0; +} + +static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *fdq) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_quota_lvb *qlvb; + struct gfs2_quota_data *qd; + struct gfs2_holder q_gh; + int error; + + memset(fdq, 0, sizeof(struct fs_disk_quota)); + + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + return -ESRCH; /* Crazy XFS error code */ + + if (type == USRQUOTA) + type = QUOTA_USER; + else if (type == GRPQUOTA) + type = QUOTA_GROUP; + else + return -EINVAL; + + error = qd_get(sdp, type, id, &qd); + if (error) + return error; + error = do_glock(qd, FORCE, &q_gh); + if (error) + goto out; + + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; + fdq->d_version = FS_DQUOT_VERSION; + fdq->d_flags = (type == QUOTA_USER) ? XFS_USER_QUOTA : XFS_GROUP_QUOTA; + fdq->d_id = id; + fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); + fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); + fdq->d_bcount = be64_to_cpu(qlvb->qb_value); + + gfs2_glock_dq_uninit(&q_gh); +out: + qd_put(qd); + return error; +} + +/* GFS2 only supports a subset of the XFS fields */ +#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) + +static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *fdq) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); + struct gfs2_quota_data *qd; + struct gfs2_holder q_gh, i_gh; + unsigned int data_blocks, ind_blocks; + unsigned int blocks = 0; + int alloc_required; + struct gfs2_alloc *al; + loff_t offset; + int error; + + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + return -ESRCH; /* Crazy XFS error code */ + + switch(type) { + case USRQUOTA: + type = QUOTA_USER; + if (fdq->d_flags != XFS_USER_QUOTA) + return -EINVAL; + break; + case GRPQUOTA: + type = QUOTA_GROUP; + if (fdq->d_flags != XFS_GROUP_QUOTA) + return -EINVAL; + break; + default: + return -EINVAL; + } + + if (fdq->d_fieldmask & ~GFS2_FIELDMASK) + return -EINVAL; + if (fdq->d_id != id) + return -EINVAL; + + error = qd_get(sdp, type, id, &qd); + if (error) + return error; + + mutex_lock(&ip->i_inode.i_mutex); + error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh); + if (error) + goto out_put; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + goto out_q; + + /* Check for existing entry, if none then alloc new blocks */ + error = update_qd(sdp, qd); + if (error) + goto out_i; + + /* If nothing has changed, this is a no-op */ + if ((fdq->d_fieldmask & FS_DQ_BSOFT) && + (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn))) + fdq->d_fieldmask ^= FS_DQ_BSOFT; + if ((fdq->d_fieldmask & FS_DQ_BHARD) && + (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit))) + fdq->d_fieldmask ^= FS_DQ_BHARD; + if (fdq->d_fieldmask == 0) + goto out_i; + + offset = qd2offset(qd); + error = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota), + &alloc_required); + if (error) + goto out_i; + if (alloc_required) { + al = gfs2_alloc_get(ip); + if (al == NULL) + goto out_i; + gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), + &data_blocks, &ind_blocks); + blocks = al->al_requested = 1 + data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip); + if (error) + goto out_alloc; + } + + error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); + if (error) + goto out_release; + + /* Apply changes */ + error = gfs2_adjust_quota(ip, offset, 0, qd, fdq); + + gfs2_trans_end(sdp); +out_release: + if (alloc_required) { + gfs2_inplace_release(ip); +out_alloc: + gfs2_alloc_put(ip); + } +out_i: + gfs2_glock_dq_uninit(&i_gh); +out_q: + gfs2_glock_dq_uninit(&q_gh); +out_put: + mutex_unlock(&ip->i_inode.i_mutex); + qd_put(qd); + return error; +} + +const struct quotactl_ops gfs2_quotactl_ops = { + .quota_sync = gfs2_quota_sync, + .get_xstate = gfs2_quota_get_xstate, + .get_xquota = gfs2_xquota_get, + .set_xquota = gfs2_xquota_set, +}; + diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 0fa5fa63d0e..e271fa07ad0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -25,13 +25,15 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid); extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 uid, u32 gid); -extern int gfs2_quota_sync(struct gfs2_sbd *sdp); +extern int gfs2_quota_sync(struct super_block *sb, int type); extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); extern int gfs2_quota_init(struct gfs2_sbd *sdp); extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp); extern int gfs2_quotad(void *data); +extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); + static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); @@ -50,5 +52,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) } extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); +extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 09fa3196557..4b9bece3d43 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -410,7 +410,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); + lh->lh_header.__pad0 = cpu_to_be64(0); lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); + lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1); lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT); lh->lh_blkno = cpu_to_be32(lblock); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 8f1cfb02a6c..0608f490c29 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1710,11 +1710,16 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) { struct gfs2_rgrpd *rgd; struct gfs2_holder ri_gh, rgd_gh; + struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); + int ri_locked = 0; int error; - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - goto fail; + if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + goto fail; + ri_locked = 1; + } error = -EINVAL; rgd = gfs2_blk2rgrpd(sdp, no_addr); @@ -1730,7 +1735,8 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) gfs2_glock_dq_uninit(&rgd_gh); fail_rindex: - gfs2_glock_dq_uninit(&ri_gh); + if (ri_locked) + gfs2_glock_dq_uninit(&ri_gh); fail: return error; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0ec3ec672de..c282ad41f3d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -70,6 +70,11 @@ enum { Opt_commit, Opt_err_withdraw, Opt_err_panic, + Opt_statfs_quantum, + Opt_statfs_percent, + Opt_quota_quantum, + Opt_barrier, + Opt_nobarrier, Opt_error, }; @@ -101,18 +106,23 @@ static const match_table_t tokens = { {Opt_commit, "commit=%d"}, {Opt_err_withdraw, "errors=withdraw"}, {Opt_err_panic, "errors=panic"}, + {Opt_statfs_quantum, "statfs_quantum=%d"}, + {Opt_statfs_percent, "statfs_percent=%d"}, + {Opt_quota_quantum, "quota_quantum=%d"}, + {Opt_barrier, "barrier"}, + {Opt_nobarrier, "nobarrier"}, {Opt_error, NULL} }; /** * gfs2_mount_args - Parse mount options - * @sdp: - * @data: + * @args: The structure into which the parsed options will be written + * @options: The options to parse * * Return: errno */ -int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) +int gfs2_mount_args(struct gfs2_args *args, char *options) { char *o; int token; @@ -157,7 +167,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) break; case Opt_debug: if (args->ar_errors == GFS2_ERRORS_PANIC) { - fs_info(sdp, "-o debug and -o errors=panic " + printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " "are mutually exclusive.\n"); return -EINVAL; } @@ -210,7 +220,29 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) case Opt_commit: rv = match_int(&tmp[0], &args->ar_commit); if (rv || args->ar_commit <= 0) { - fs_info(sdp, "commit mount option requires a positive numeric argument\n"); + printk(KERN_WARNING "GFS2: commit mount option requires a positive numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; + case Opt_statfs_quantum: + rv = match_int(&tmp[0], &args->ar_statfs_quantum); + if (rv || args->ar_statfs_quantum < 0) { + printk(KERN_WARNING "GFS2: statfs_quantum mount option requires a non-negative numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; + case Opt_quota_quantum: + rv = match_int(&tmp[0], &args->ar_quota_quantum); + if (rv || args->ar_quota_quantum <= 0) { + printk(KERN_WARNING "GFS2: quota_quantum mount option requires a positive numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; + case Opt_statfs_percent: + rv = match_int(&tmp[0], &args->ar_statfs_percent); + if (rv || args->ar_statfs_percent < 0 || + args->ar_statfs_percent > 100) { + printk(KERN_WARNING "statfs_percent mount option requires a numeric argument between 0 and 100\n"); return rv ? rv : -EINVAL; } break; @@ -219,15 +251,21 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) break; case Opt_err_panic: if (args->ar_debug) { - fs_info(sdp, "-o debug and -o errors=panic " + printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " "are mutually exclusive.\n"); return -EINVAL; } args->ar_errors = GFS2_ERRORS_PANIC; break; + case Opt_barrier: + args->ar_nobarrier = 0; + break; + case Opt_nobarrier: + args->ar_nobarrier = 1; + break; case Opt_error: default: - fs_info(sdp, "invalid mount option: %s\n", o); + printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); return -EINVAL; } } @@ -442,7 +480,10 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, { struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct buffer_head *l_bh; + s64 x, y; + int need_sync = 0; int error; error = gfs2_meta_inode_buffer(l_ip, &l_bh); @@ -456,9 +497,17 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, l_sc->sc_free += free; l_sc->sc_dinodes += dinodes; gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode)); + if (sdp->sd_args.ar_statfs_percent) { + x = 100 * l_sc->sc_free; + y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent; + if (x >= y || x <= -y) + need_sync = 1; + } spin_unlock(&sdp->sd_statfs_spin); brelse(l_bh); + if (need_sync) + gfs2_wake_up_statfs(sdp); } void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, @@ -484,8 +533,9 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); } -int gfs2_statfs_sync(struct gfs2_sbd *sdp) +int gfs2_statfs_sync(struct super_block *sb, int type) { + struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; @@ -521,6 +571,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp) goto out_bh2; update_statfs(sdp, m_bh, l_bh); + sdp->sd_statfs_force_sync = 0; gfs2_trans_end(sdp); @@ -712,8 +763,8 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) int error; flush_workqueue(gfs2_delete_workqueue); - gfs2_quota_sync(sdp); - gfs2_statfs_sync(sdp); + gfs2_quota_sync(sdp->sd_vfs, 0); + gfs2_statfs_sync(sdp->sd_vfs, 0); error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, &t_gh); @@ -1061,8 +1112,13 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) spin_lock(>->gt_spin); args.ar_commit = gt->gt_log_flush_secs; + args.ar_quota_quantum = gt->gt_quota_quantum; + if (gt->gt_statfs_slow) + args.ar_statfs_quantum = 0; + else + args.ar_statfs_quantum = gt->gt_statfs_quantum; spin_unlock(>->gt_spin); - error = gfs2_mount_args(sdp, &args, data); + error = gfs2_mount_args(&args, data); if (error) return error; @@ -1097,8 +1153,21 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) sb->s_flags |= MS_POSIXACL; else sb->s_flags &= ~MS_POSIXACL; + if (sdp->sd_args.ar_nobarrier) + set_bit(SDF_NOBARRIERS, &sdp->sd_flags); + else + clear_bit(SDF_NOBARRIERS, &sdp->sd_flags); spin_lock(>->gt_spin); gt->gt_log_flush_secs = args.ar_commit; + gt->gt_quota_quantum = args.ar_quota_quantum; + if (args.ar_statfs_quantum) { + gt->gt_statfs_slow = 0; + gt->gt_statfs_quantum = args.ar_statfs_quantum; + } + else { + gt->gt_statfs_slow = 1; + gt->gt_statfs_quantum = 30; + } spin_unlock(>->gt_spin); gfs2_online_uevent(sdp); @@ -1179,7 +1248,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) { struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; - int lfsecs; + int val; if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) seq_printf(s, ",meta"); @@ -1240,9 +1309,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) } if (args->ar_discard) seq_printf(s, ",discard"); - lfsecs = sdp->sd_tune.gt_log_flush_secs; - if (lfsecs != 60) - seq_printf(s, ",commit=%d", lfsecs); + val = sdp->sd_tune.gt_log_flush_secs; + if (val != 60) + seq_printf(s, ",commit=%d", val); + val = sdp->sd_tune.gt_statfs_quantum; + if (val != 30) + seq_printf(s, ",statfs_quantum=%d", val); + val = sdp->sd_tune.gt_quota_quantum; + if (val != 60) + seq_printf(s, ",quota_quantum=%d", val); + if (args->ar_statfs_percent) + seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent); if (args->ar_errors != GFS2_ERRORS_DEFAULT) { const char *state; @@ -1259,6 +1336,9 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) } seq_printf(s, ",errors=%s", state); } + if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) + seq_printf(s, ",nobarrier"); + return 0; } diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 235db368288..3df60f2d84e 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -27,7 +27,7 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) extern void gfs2_jindex_free(struct gfs2_sbd *sdp); -extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data); +extern int gfs2_mount_args(struct gfs2_args *args, char *data); extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); extern int gfs2_jdesc_check(struct gfs2_jdesc *jd); @@ -44,7 +44,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf); extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, struct buffer_head *l_bh); -extern int gfs2_statfs_sync(struct gfs2_sbd *sdp); +extern int gfs2_statfs_sync(struct super_block *sb, int type); extern int gfs2_freeze_fs(struct gfs2_sbd *sdp); extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 446329728d5..c5dad1eb7b9 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -158,7 +158,7 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf, if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_statfs_sync(sdp); + gfs2_statfs_sync(sdp->sd_vfs, 0); return len; } @@ -171,13 +171,14 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_quota_sync(sdp); + gfs2_quota_sync(sdp->sd_vfs, 0); return len; } static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + int error; u32 id; if (!capable(CAP_SYS_ADMIN)) @@ -185,13 +186,14 @@ static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, id = simple_strtoul(buf, NULL, 0); - gfs2_quota_refresh(sdp, 1, id); - return len; + error = gfs2_quota_refresh(sdp, 1, id); + return error ? error : len; } static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + int error; u32 id; if (!capable(CAP_SYS_ADMIN)) @@ -199,8 +201,8 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, id = simple_strtoul(buf, NULL, 0); - gfs2_quota_refresh(sdp, 0, id); - return len; + error = gfs2_quota_refresh(sdp, 0, id); + return error ? error : len; } static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len) diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 8a0f8ef6ee2..912f5cbc474 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -186,8 +186,8 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh, return 0; } -int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, - struct gfs2_ea_location *el) +static int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, + struct gfs2_ea_location *el) { struct ea_find ef; int error; @@ -516,8 +516,8 @@ out: return error; } -int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, - char *data, size_t size) +static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, + char *data, size_t size) { int ret; size_t len = GFS2_EA_DATA_LEN(el->el_ea); @@ -534,6 +534,36 @@ int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, return len; } +int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata) +{ + struct gfs2_ea_location el; + int error; + int len; + char *data; + + error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, &el); + if (error) + return error; + if (!el.el_ea) + goto out; + if (!GFS2_EA_DATA_LEN(el.el_ea)) + goto out; + + len = GFS2_EA_DATA_LEN(el.el_ea); + data = kmalloc(len, GFP_NOFS); + error = -ENOMEM; + if (data == NULL) + goto out; + + error = gfs2_ea_get_copy(ip, &el, data, len); + if (error == 0) + error = len; + *ppdata = data; +out: + brelse(el.el_bh); + return error; +} + /** * gfs2_xattr_get - Get a GFS2 extended attribute * @inode: The inode @@ -1259,22 +1289,26 @@ fail: return error; } -int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, - struct iattr *attr, char *data) +int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) { + struct gfs2_ea_location el; struct buffer_head *dibh; int error; - if (GFS2_EA_IS_STUFFED(el->el_ea)) { + error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); + if (error) + return error; + + if (GFS2_EA_IS_STUFFED(el.el_ea)) { error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); if (error) return error; - gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); - memcpy(GFS2_EA2DATA(el->el_ea), data, - GFS2_EA_DATA_LEN(el->el_ea)); + gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); + memcpy(GFS2_EA2DATA(el.el_ea), data, + GFS2_EA_DATA_LEN(el.el_ea)); } else - error = ea_acl_chmod_unstuffed(ip, el->el_ea, data); + error = ea_acl_chmod_unstuffed(ip, el.el_ea, data); if (error) return error; @@ -1507,18 +1541,6 @@ static int gfs2_xattr_user_set(struct inode *inode, const char *name, return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags); } -static int gfs2_xattr_system_get(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size); -} - -static int gfs2_xattr_system_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - return gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, flags); -} - static int gfs2_xattr_security_get(struct inode *inode, const char *name, void *buffer, size_t size) { @@ -1543,12 +1565,6 @@ static struct xattr_handler gfs2_xattr_security_handler = { .set = gfs2_xattr_security_set, }; -static struct xattr_handler gfs2_xattr_system_handler = { - .prefix = XATTR_SYSTEM_PREFIX, - .get = gfs2_xattr_system_get, - .set = gfs2_xattr_system_set, -}; - struct xattr_handler *gfs2_xattr_handlers[] = { &gfs2_xattr_user_handler, &gfs2_xattr_security_handler, diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h index cbdfd774373..8d6ae5813c4 100644 --- a/fs/gfs2/xattr.h +++ b/fs/gfs2/xattr.h @@ -62,11 +62,7 @@ extern int gfs2_ea_dealloc(struct gfs2_inode *ip); /* Exported to acl.c */ -extern int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, - struct gfs2_ea_location *el); -extern int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, - char *data, size_t size); -extern int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, - struct iattr *attr, char *data); +extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data); +extern int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data); #endif /* __EATTR_DOT_H__ */ diff --git a/fs/inode.c b/fs/inode.c index 4d8e3be5597..06c1f02de61 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -18,7 +18,6 @@ #include <linux/hash.h> #include <linux/swap.h> #include <linux/security.h> -#include <linux/ima.h> #include <linux/pagemap.h> #include <linux/cdev.h> #include <linux/bootmem.h> @@ -157,11 +156,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) if (security_inode_alloc(inode)) goto out; - - /* allocate and initialize an i_integrity */ - if (ima_inode_alloc(inode)) - goto out_free_security; - spin_lock_init(&inode->i_lock); lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); @@ -201,9 +195,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) #endif return 0; - -out_free_security: - security_inode_free(inode); out: return -ENOMEM; } @@ -235,7 +226,6 @@ static struct inode *alloc_inode(struct super_block *sb) void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); - ima_inode_free(inode); security_inode_free(inode); fsnotify_inode_delete(inode); #ifdef CONFIG_FS_POSIX_ACL diff --git a/fs/namespace.c b/fs/namespace.c index bdc3cb4fd22..7d70d63ceb2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1921,6 +1921,16 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; + /* ... and get the mountpoint */ + retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); + if (retval) + return retval; + + retval = security_sb_mount(dev_name, &path, + type_page, flags, data_page); + if (retval) + goto dput_out; + /* Default to relatime unless overriden */ if (!(flags & MS_NOATIME)) mnt_flags |= MNT_RELATIME; @@ -1945,16 +1955,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); - /* ... and get the mountpoint */ - retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); - if (retval) - return retval; - - retval = security_sb_mount(dev_name, &path, - type_page, flags, data_page); - if (retval) - goto dput_out; - if (flags & MS_REMOUNT) retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, data_page); diff --git a/fs/open.c b/fs/open.c index 4f01e06227c..b4b31d277f3 100644 --- a/fs/open.c +++ b/fs/open.c @@ -587,6 +587,9 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) error = -EPERM; if (!capable(CAP_SYS_CHROOT)) goto dput_and_out; + error = security_path_chroot(&path); + if (error) + goto dput_and_out; set_fs_root(current->fs, &path); error = 0; @@ -617,11 +620,15 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) if (err) goto out_putf; mutex_lock(&inode->i_mutex); + err = security_path_chmod(dentry, file->f_vfsmnt, mode); + if (err) + goto out_unlock; if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; err = notify_change(dentry, &newattrs); +out_unlock: mutex_unlock(&inode->i_mutex); mnt_drop_write(file->f_path.mnt); out_putf: @@ -646,11 +653,15 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) if (error) goto dput_and_out; mutex_lock(&inode->i_mutex); + error = security_path_chmod(path.dentry, path.mnt, mode); + if (error) + goto out_unlock; if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change(path.dentry, &newattrs); +out_unlock: mutex_unlock(&inode->i_mutex); mnt_drop_write(path.mnt); dput_and_out: @@ -664,9 +675,9 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode) return sys_fchmodat(AT_FDCWD, filename, mode); } -static int chown_common(struct dentry * dentry, uid_t user, gid_t group) +static int chown_common(struct path *path, uid_t user, gid_t group) { - struct inode *inode = dentry->d_inode; + struct inode *inode = path->dentry->d_inode; int error; struct iattr newattrs; @@ -683,7 +694,9 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; mutex_lock(&inode->i_mutex); - error = notify_change(dentry, &newattrs); + error = security_path_chown(path, user, group); + if (!error) + error = notify_change(path->dentry, &newattrs); mutex_unlock(&inode->i_mutex); return error; @@ -700,7 +713,7 @@ SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(path.dentry, user, group); + error = chown_common(&path, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -725,7 +738,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(path.dentry, user, group); + error = chown_common(&path, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -744,7 +757,7 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(path.dentry, user, group); + error = chown_common(&path, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -767,7 +780,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) goto out_fput; dentry = file->f_path.dentry; audit_inode(NULL, dentry); - error = chown_common(dentry, user, group); + error = chown_common(&file->f_path, user, group); mnt_drop_write(file->f_path.mnt); out_fput: fput(file); diff --git a/fs/proc/array.c b/fs/proc/array.c index 822c2d50651..4badde179b1 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -410,6 +410,16 @@ static void task_show_stack_usage(struct seq_file *m, struct task_struct *task) } #endif /* CONFIG_MMU */ +static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) +{ + seq_printf(m, "Cpus_allowed:\t"); + seq_cpumask(m, &task->cpus_allowed); + seq_printf(m, "\n"); + seq_printf(m, "Cpus_allowed_list:\t"); + seq_cpumask_list(m, &task->cpus_allowed); + seq_printf(m, "\n"); +} + int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -424,6 +434,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, } task_sig(m, task); task_cap(m, task); + task_cpus_allowed(m, task); cpuset_task_status_allowed(m, task); #if defined(CONFIG_S390) task_show_regs(m, task); @@ -495,20 +506,17 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* add up live thread stats at the group level */ if (whole) { - struct task_cputime cputime; struct task_struct *t = task; do { min_flt += t->min_flt; maj_flt += t->maj_flt; - gtime = cputime_add(gtime, task_gtime(t)); + gtime = cputime_add(gtime, t->gtime); t = next_thread(t); } while (t != task); min_flt += sig->min_flt; maj_flt += sig->maj_flt; - thread_group_cputime(task, &cputime); - utime = cputime.utime; - stime = cputime.stime; + thread_group_times(task, &utime, &stime); gtime = cputime_add(gtime, sig->gtime); } @@ -524,9 +532,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (!whole) { min_flt = task->min_flt; maj_flt = task->maj_flt; - utime = task_utime(task); - stime = task_stime(task); - gtime = task_gtime(task); + task_times(task, &utime, &stime); + gtime = task->gtime; } /* scale priority and nice values from timeslices to -20..20 */ diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 7cc726c6d70..b9b7aad2003 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v) int i, j; unsigned long jif; cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; - cputime64_t guest; + cputime64_t guest, guest_nice; u64 sum = 0; u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; @@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v) user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; - guest = cputime64_zero; + guest = guest_nice = cputime64_zero; getboottime(&boottime); jif = boottime.tv_sec; @@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); + guest_nice = cputime64_add(guest_nice, + kstat_cpu(i).cpustat.guest_nice); for_each_irq_nr(j) { sum += kstat_irqs_cpu(j, i); } @@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v) } sum += arch_irq_stat(); - seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu " + "%llu\n", (unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(nice), (unsigned long long)cputime64_to_clock_t(system), @@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v) (unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(softirq), (unsigned long long)cputime64_to_clock_t(steal), - (unsigned long long)cputime64_to_clock_t(guest)); + (unsigned long long)cputime64_to_clock_t(guest), + (unsigned long long)cputime64_to_clock_t(guest_nice)); for_each_online_cpu(i) { /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ @@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v) softirq = kstat_cpu(i).cpustat.softirq; steal = kstat_cpu(i).cpustat.steal; guest = kstat_cpu(i).cpustat.guest; + guest_nice = kstat_cpu(i).cpustat.guest_nice; seq_printf(p, - "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " + "%llu\n", i, (unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(nice), @@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v) (unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(softirq), (unsigned long long)cputime64_to_clock_t(steal), - (unsigned long long)cputime64_to_clock_t(guest)); + (unsigned long long)cputime64_to_clock_t(guest), + (unsigned long long)cputime64_to_clock_t(guest_nice)); } seq_printf(p, "intr %llu", (unsigned long long)sum); diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index 8047e01ef46..353e78a9ebe 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -17,7 +17,7 @@ config QUOTA config QUOTA_NETLINK_INTERFACE bool "Report quota messages through netlink interface" - depends on QUOTA && NET + depends on QUOTACTL && NET help If you say Y here, quota warnings (about exceeding softlimit, reaching hardlimit, etc.) will be reported through netlink interface. If unsure, diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 39b49c42a7e..9b6ad908dcb 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -77,10 +77,6 @@ #include <linux/capability.h> #include <linux/quotaops.h> #include <linux/writeback.h> /* for inode_lock, oddly enough.. */ -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE -#include <net/netlink.h> -#include <net/genetlink.h> -#endif #include <asm/uaccess.h> @@ -1071,73 +1067,6 @@ static void print_warning(struct dquot *dquot, const int warntype) } #endif -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - -/* Netlink family structure for quota */ -static struct genl_family quota_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = "VFS_DQUOT", - .version = 1, - .maxattr = QUOTA_NL_A_MAX, -}; - -/* Send warning to userspace about user which exceeded quota */ -static void send_warning(const struct dquot *dquot, const char warntype) -{ - static atomic_t seq; - struct sk_buff *skb; - void *msg_head; - int ret; - int msg_size = 4 * nla_total_size(sizeof(u32)) + - 2 * nla_total_size(sizeof(u64)); - - /* We have to allocate using GFP_NOFS as we are called from a - * filesystem performing write and thus further recursion into - * the fs to free some data could cause deadlocks. */ - skb = genlmsg_new(msg_size, GFP_NOFS); - if (!skb) { - printk(KERN_ERR - "VFS: Not enough memory to send quota warning.\n"); - return; - } - msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), - "a_genl_family, 0, QUOTA_NL_C_WARNING); - if (!msg_head) { - printk(KERN_ERR - "VFS: Cannot store netlink header in quota warning.\n"); - goto err_out; - } - ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type); - if (ret) - goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, - MAJOR(dquot->dq_sb->s_dev)); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, - MINOR(dquot->dq_sb->s_dev)); - if (ret) - goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); - if (ret) - goto attr_err_out; - genlmsg_end(skb, msg_head); - - genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); - return; -attr_err_out: - printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); -err_out: - kfree_skb(skb); -} -#endif /* * Write warnings to the console and send warning messages over netlink. * @@ -1145,18 +1074,20 @@ err_out: */ static void flush_warnings(struct dquot *const *dquots, char *warntype) { + struct dquot *dq; int i; - for (i = 0; i < MAXQUOTAS; i++) - if (dquots[i] && warntype[i] != QUOTA_NL_NOWARN && - !warning_issued(dquots[i], warntype[i])) { + for (i = 0; i < MAXQUOTAS; i++) { + dq = dquots[i]; + if (dq && warntype[i] != QUOTA_NL_NOWARN && + !warning_issued(dq, warntype[i])) { #ifdef CONFIG_PRINT_QUOTA_WARNING - print_warning(dquots[i], warntype[i]); -#endif -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - send_warning(dquots[i], warntype[i]); + print_warning(dq, warntype[i]); #endif + quota_send_warning(dq->dq_type, dq->dq_id, + dq->dq_sb->s_dev, warntype[i]); } + } } static int ignore_hardlimit(struct dquot *dquot) @@ -2607,12 +2538,6 @@ static int __init dquot_init(void) register_shrinker(&dqcache_shrinker); -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - if (genl_register_family("a_genl_family) != 0) - printk(KERN_ERR - "VFS: Failed to create quota netlink interface.\n"); -#endif - return 0; } module_init(dquot_init); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 95c5b42384b..ee91e275695 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -18,6 +18,8 @@ #include <linux/capability.h> #include <linux/quotaops.h> #include <linux/types.h> +#include <net/netlink.h> +#include <net/genetlink.h> /* Check validity of generic quotactl commands */ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, @@ -525,3 +527,94 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, return ret; } #endif + + +#ifdef CONFIG_QUOTA_NETLINK_INTERFACE + +/* Netlink family structure for quota */ +static struct genl_family quota_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "VFS_DQUOT", + .version = 1, + .maxattr = QUOTA_NL_A_MAX, +}; + +/** + * quota_send_warning - Send warning to userspace about exceeded quota + * @type: The quota type: USRQQUOTA, GRPQUOTA,... + * @id: The user or group id of the quota that was exceeded + * @dev: The device on which the fs is mounted (sb->s_dev) + * @warntype: The type of the warning: QUOTA_NL_... + * + * This can be used by filesystems (including those which don't use + * dquot) to send a message to userspace relating to quota limits. + * + */ + +void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + static atomic_t seq; + struct sk_buff *skb; + void *msg_head; + int ret; + int msg_size = 4 * nla_total_size(sizeof(u32)) + + 2 * nla_total_size(sizeof(u64)); + + /* We have to allocate using GFP_NOFS as we are called from a + * filesystem performing write and thus further recursion into + * the fs to free some data could cause deadlocks. */ + skb = genlmsg_new(msg_size, GFP_NOFS); + if (!skb) { + printk(KERN_ERR + "VFS: Not enough memory to send quota warning.\n"); + return; + } + msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), + "a_genl_family, 0, QUOTA_NL_C_WARNING); + if (!msg_head) { + printk(KERN_ERR + "VFS: Cannot store netlink header in quota warning.\n"); + goto err_out; + } + ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type); + if (ret) + goto attr_err_out; + ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev)); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); + if (ret) + goto attr_err_out; + ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); + if (ret) + goto attr_err_out; + genlmsg_end(skb, msg_head); + + genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); + return; +attr_err_out: + printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); +err_out: + kfree_skb(skb); +} +EXPORT_SYMBOL(quota_send_warning); + +static int __init quota_init(void) +{ + if (genl_register_family("a_genl_family) != 0) + printk(KERN_ERR + "VFS: Failed to create quota netlink interface.\n"); + return 0; +}; + +module_init(quota_init); +#endif + diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index c6ad7c7e3ee..05ac0fe9c4d 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c @@ -36,7 +36,7 @@ posix_acl_from_xattr(const void *value, size_t size) if (count == 0) return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); + acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); acl_e = acl->a_entries; diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 1feed71551c..5a5385749e1 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -330,6 +330,7 @@ unifdef-y += scc.h unifdef-y += sched.h unifdef-y += screen_info.h unifdef-y += sdla.h +unifdef-y += securebits.h unifdef-y += selinux_netlink.h unifdef-y += sem.h unifdef-y += serial_core.h diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index dd97fb8408a..b10ec49ee2d 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat, unsigned long addr, unsigned long size); extern void free_bootmem(unsigned long addr, unsigned long size); +extern void free_bootmem_late(unsigned long addr, unsigned long size); /* * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, diff --git a/include/linux/capability.h b/include/linux/capability.h index c8f2a5f70ed..39e5ff512fb 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -92,9 +92,7 @@ struct vfs_cap_data { #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES extern int file_caps_enabled; -#endif typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index a3ed7cb8ca3..73dcf804bc9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -79,6 +79,7 @@ #define noinline __attribute__((noinline)) #define __attribute_const__ __attribute__((__const__)) #define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) #define __gcc_header(x) #x #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 450fa597c94..94dea3ffbfa 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -36,4 +36,26 @@ the kernel context */ #define __cold __attribute__((__cold__)) + +#if __GNUC_MINOR__ >= 5 +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() +#endif + +#endif + +#if __GNUC_MINOR__ > 0 +#define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#endif +#if __GNUC_MINOR__ >= 4 +#define __compiletime_warning(message) __attribute__((warning(message))) +#define __compiletime_error(message) __attribute__((error(message))) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e..5be3dab4a69 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -144,6 +144,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define barrier() __memory_barrier() #endif +/* Unreachable code */ +#ifndef unreachable +# define unreachable() do { } while (1) +#endif + #ifndef RELOC_HIDE # define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ @@ -213,6 +218,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __maybe_unused /* unimplemented */ #endif +#ifndef __always_unused +# define __always_unused /* unimplemented */ +#endif + #ifndef noinline #define noinline #endif @@ -266,6 +275,17 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) #endif +/* Compile time object size, -1 for unknown */ +#ifndef __compiletime_object_size +# define __compiletime_object_size(obj) -1 +#endif +#ifndef __compiletime_warning +# define __compiletime_warning(message) +#endif +#ifndef __compiletime_error +# define __compiletime_error(message) +#endif + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 4a2b162c256..5de4c9e5856 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -208,16 +208,9 @@ struct dmar_atsr_unit { u8 include_all:1; /* include all ports */ }; -/* Intel DMAR initialization functions */ extern int intel_iommu_init(void); -#else -static inline int intel_iommu_init(void) -{ -#ifdef CONFIG_INTR_REMAP - return dmar_dev_scope_init(); -#else - return -ENODEV; -#endif -} -#endif /* !CONFIG_DMAR */ +#else /* !CONFIG_DMAR: */ +static inline int intel_iommu_init(void) { return -ENODEV; } +#endif /* CONFIG_DMAR */ + #endif /* __DMAR_H__ */ diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4ec5e67e18c..47bbdf9c38d 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -117,12 +117,12 @@ struct ftrace_event_call { struct dentry *dir; struct trace_event *event; int enabled; - int (*regfunc)(void *); - void (*unregfunc)(void *); + int (*regfunc)(struct ftrace_event_call *); + void (*unregfunc)(struct ftrace_event_call *); int id; - int (*raw_init)(void); - int (*show_format)(struct ftrace_event_call *call, - struct trace_seq *s); + int (*raw_init)(struct ftrace_event_call *); + int (*show_format)(struct ftrace_event_call *, + struct trace_seq *); int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; @@ -131,20 +131,20 @@ struct ftrace_event_call { void *data; atomic_t profile_count; - int (*profile_enable)(void); - void (*profile_disable)(void); + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); }; #define FTRACE_MAX_PROFILE_SIZE 2048 -extern char *trace_profile_buf; -extern char *trace_profile_buf_nmi; +extern char *perf_trace_buf; +extern char *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ extern void destroy_preds(struct ftrace_event_call *call); -extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern int filter_match_preds(struct event_filter *filter, void *rec); extern int filter_current_check_discard(struct ring_buffer *buffer, struct ftrace_event_call *call, void *rec, @@ -157,11 +157,12 @@ enum { FILTER_PTR_STRING, }; -extern int trace_define_field(struct ftrace_event_call *call, - const char *type, const char *name, - int offset, int size, int is_signed, - int filter_type); extern int trace_define_common_fields(struct ftrace_event_call *call); +extern int trace_define_field(struct ftrace_event_call *call, const char *type, + const char *name, int offset, int size, + int is_signed, int filter_type); +extern int trace_add_event_call(struct ftrace_event_call *call); +extern void trace_remove_event_call(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) @@ -186,4 +187,13 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) +#ifdef CONFIG_EVENT_PROFILE +struct perf_event; +extern int ftrace_profile_enable(int event_id); +extern void ftrace_profile_disable(int event_id); +extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, + char *filter_str); +extern void ftrace_profile_free_filter(struct perf_event *event); +#endif + #endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index b80c88dedbb..81f90a59cda 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -81,7 +81,11 @@ struct gfs2_meta_header { __be32 mh_type; __be64 __pad0; /* Was generation number in gfs1 */ __be32 mh_format; - __be32 __pad1; /* Was incarnation number in gfs1 */ + /* This union is to keep userspace happy */ + union { + __be32 mh_jid; /* Was incarnation number in gfs1 */ + __be32 __pad1; + }; }; /* diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 6d527ee82b2..d5b387669da 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -139,10 +139,34 @@ static inline void account_system_vtime(struct task_struct *tsk) #endif #if defined(CONFIG_NO_HZ) +#if defined(CONFIG_TINY_RCU) +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +static inline void rcu_irq_enter(void) +{ + rcu_exit_nohz(); +} + +static inline void rcu_irq_exit(void) +{ + rcu_enter_nohz(); +} + +static inline void rcu_nmi_enter(void) +{ +} + +static inline void rcu_nmi_exit(void) +{ +} + +#else extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); extern void rcu_nmi_enter(void); extern void rcu_nmi_exit(void); +#endif #else # define rcu_irq_enter() do { } while (0) # define rcu_irq_exit() do { } while (0) diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h new file mode 100644 index 00000000000..a03daed08c5 --- /dev/null +++ b/include/linux/hw_breakpoint.h @@ -0,0 +1,131 @@ +#ifndef _LINUX_HW_BREAKPOINT_H +#define _LINUX_HW_BREAKPOINT_H + +enum { + HW_BREAKPOINT_LEN_1 = 1, + HW_BREAKPOINT_LEN_2 = 2, + HW_BREAKPOINT_LEN_4 = 4, + HW_BREAKPOINT_LEN_8 = 8, +}; + +enum { + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_X = 4, +}; + +#ifdef __KERNEL__ + +#include <linux/perf_event.h> + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + +/* As it's for in-kernel or ptrace use, we want it to be pinned */ +#define DEFINE_BREAKPOINT_ATTR(name) \ +struct perf_event_attr name = { \ + .type = PERF_TYPE_BREAKPOINT, \ + .size = sizeof(name), \ + .pinned = 1, \ +}; + +static inline void hw_breakpoint_init(struct perf_event_attr *attr) +{ + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(*attr); + attr->pinned = 1; +} + +static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) +{ + return bp->attr.bp_addr; +} + +static inline int hw_breakpoint_type(struct perf_event *bp) +{ + return bp->attr.bp_type; +} + +static inline int hw_breakpoint_len(struct perf_event *bp) +{ + return bp->attr.bp_len; +} + +extern struct perf_event * +register_user_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered, + struct task_struct *tsk); + +/* FIXME: only change from the attr, and don't unregister */ +extern struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + struct perf_event_attr *attr, + perf_callback_t triggered, + struct task_struct *tsk); + +/* + * Kernel breakpoints are not associated with any particular thread. + */ +extern struct perf_event * +register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, + perf_callback_t triggered, + int cpu); + +extern struct perf_event ** +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered); + +extern int register_perf_hw_breakpoint(struct perf_event *bp); +extern int __register_perf_hw_breakpoint(struct perf_event *bp); +extern void unregister_hw_breakpoint(struct perf_event *bp); +extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); + +extern int reserve_bp_slot(struct perf_event *bp); +extern void release_bp_slot(struct perf_event *bp); + +extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); + +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return &bp->hw.info; +} + +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + +static inline struct perf_event * +register_user_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered, + struct task_struct *tsk) { return NULL; } +static inline struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + struct perf_event_attr *attr, + perf_callback_t triggered, + struct task_struct *tsk) { return NULL; } +static inline struct perf_event * +register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, + perf_callback_t triggered, + int cpu) { return NULL; } +static inline struct perf_event ** +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered) { return NULL; } +static inline int +register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } +static inline int +__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } +static inline void unregister_hw_breakpoint(struct perf_event *bp) { } +static inline void +unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { } +static inline int +reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } +static inline void release_bp_slot(struct perf_event *bp) { } + +static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } + +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return NULL; +} + +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#endif /* __KERNEL__ */ + +#endif /* _LINUX_HW_BREAKPOINT_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 21a6f5d9af2..8d10aa7fd4c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -83,16 +83,12 @@ extern struct group_info init_groups; #define INIT_IDS #endif -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* * Because of the reduced scope of CAP_SETPCAP when filesystem * capabilities are in effect, it is safe to allow CAP_SETPCAP to * be available in the default configuration. */ # define CAP_INIT_BSET CAP_FULL_SET -#else -# define CAP_INIT_BSET CAP_INIT_EFF_SET -#endif #ifdef CONFIG_TREE_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 7ca72b74eec..75f3f00ac1e 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -603,12 +603,6 @@ static inline void init_irq_proc(void) } #endif -#if defined(CONFIG_GENERIC_HARDIRQS) && defined(CONFIG_DEBUG_SHIRQ) -extern void debug_poll_all_shared_irqs(void); -#else -static inline void debug_poll_all_shared_irqs(void) { } -#endif - struct seq_file; int show_interrupts(struct seq_file *p, void *v); diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index b02a3f1d46a..006bf45eae3 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -124,6 +124,6 @@ typecheck(unsigned long, flags); \ raw_irqs_disabled_flags(flags); \ }) -#endif /* CONFIG_X86 */ +#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #endif diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 1a9cf78bfce..6811f4bfc6e 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x); extern unsigned long clock_t_to_jiffies(unsigned long x); extern u64 jiffies_64_to_clock_t(u64 x); extern u64 nsec_to_clock_t(u64 x); +extern unsigned long nsecs_to_jiffies(u64 n); #define TIMESTAMP_SIZE 30 diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f4e3184fa05..3fa4c590cf1 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -15,7 +15,6 @@ #include <linux/bitops.h> #include <linux/log2.h> #include <linux/typecheck.h> -#include <linux/ratelimit.h> #include <linux/dynamic_debug.h> #include <asm/byteorder.h> #include <asm/bug.h> @@ -241,8 +240,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern struct ratelimit_state printk_ratelimit_state; -extern int printk_ratelimit(void); +extern int __printk_ratelimit(const char *func); +#define printk_ratelimit() __printk_ratelimit(__func__) extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 348fa8874b5..c059044bc6d 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -25,6 +25,7 @@ struct cpu_usage_stat { cputime64_t iowait; cputime64_t steal; cputime64_t guest; + cputime64_t guest_nice; }; struct kernel_stat { diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 3a46b7b7abb..1b672f74a32 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); int disable_kprobe(struct kprobe *kp); int enable_kprobe(struct kprobe *kp); +void dump_kprobe(struct kprobe *kp); + #else /* !CONFIG_KPROBES: */ static inline int kprobes_built_in(void) diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 190c3785487..f78f83d7663 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -26,14 +26,15 @@ /* Auxiliary data to use in generating the audit record. */ struct common_audit_data { - char type; -#define LSM_AUDIT_DATA_FS 1 -#define LSM_AUDIT_DATA_NET 2 -#define LSM_AUDIT_DATA_CAP 3 -#define LSM_AUDIT_DATA_IPC 4 -#define LSM_AUDIT_DATA_TASK 5 -#define LSM_AUDIT_DATA_KEY 6 -#define LSM_AUDIT_NO_AUDIT 7 + char type; +#define LSM_AUDIT_DATA_FS 1 +#define LSM_AUDIT_DATA_NET 2 +#define LSM_AUDIT_DATA_CAP 3 +#define LSM_AUDIT_DATA_IPC 4 +#define LSM_AUDIT_DATA_TASK 5 +#define LSM_AUDIT_DATA_KEY 6 +#define LSM_AUDIT_NO_AUDIT 7 +#define LSM_AUDIT_DATA_KMOD 8 struct task_struct *tsk; union { struct { @@ -66,6 +67,7 @@ struct common_audit_data { char *key_desc; } key_struct; #endif + char *kmod_name; } u; /* this union contains LSM specific data */ union { diff --git a/drivers/mfd/mcp.h b/include/linux/mfd/mcp.h index c093a93b880..ee496708e38 100644 --- a/drivers/mfd/mcp.h +++ b/include/linux/mfd/mcp.h @@ -10,6 +10,8 @@ #ifndef MCP_H #define MCP_H +#include <mach/dma.h> + struct mcp_ops; struct mcp { @@ -24,6 +26,7 @@ struct mcp { dma_device_t dma_telco_rd; dma_device_t dma_telco_wr; struct device attached_device; + int gpio_base; }; struct mcp_ops { diff --git a/drivers/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h index a8ad8a0ed5d..aa9c3789bed 100644 --- a/drivers/mfd/ucb1x00.h +++ b/include/linux/mfd/ucb1x00.h @@ -1,5 +1,5 @@ /* - * linux/drivers/mfd/ucb1x00.h + * linux/include/mfd/ucb1x00.h * * Copyright (C) 2001 Russell King, All Rights Reserved. * @@ -10,6 +10,9 @@ #ifndef UCB1200_H #define UCB1200_H +#include <linux/mfd/mcp.h> +#include <linux/gpio.h> + #define UCB_IO_DATA 0x00 #define UCB_IO_DIR 0x01 @@ -100,7 +103,6 @@ #define UCB_MODE_DYN_VFLAG_ENA (1 << 12) #define UCB_MODE_AUD_OFF_CAN (1 << 13) -#include "mcp.h" struct ucb1x00_irq { void *devid; @@ -123,6 +125,7 @@ struct ucb1x00 { struct device dev; struct list_head node; struct list_head devs; + struct gpio_chip gpio; }; struct ucb1x00_driver; diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h index f95466343fb..955d30fc6a2 100644 --- a/include/linux/mfd/wm831x/regulator.h +++ b/include/linux/mfd/wm831x/regulator.h @@ -1212,7 +1212,7 @@ #define WM831X_LDO1_OK_SHIFT 0 /* LDO1_OK */ #define WM831X_LDO1_OK_WIDTH 1 /* LDO1_OK */ -#define WM831X_ISINK_MAX_ISEL 56 -extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL]; +#define WM831X_ISINK_MAX_ISEL 55 +extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1]; #endif diff --git a/include/linux/net.h b/include/linux/net.h index 529a0931711..d7e26e30c8c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -358,6 +358,7 @@ static const struct proto_ops name##_ops = { \ #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> +#include <linux/ratelimit.h> extern struct ratelimit_state net_ratelimit_state; #endif diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 84cf1f3b783..daecca3c830 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1633,6 +1633,8 @@ #define PCI_DEVICE_ID_O2_6730 0x673a #define PCI_DEVICE_ID_O2_6832 0x6832 #define PCI_DEVICE_ID_O2_6836 0x6836 +#define PCI_DEVICE_ID_O2_6812 0x6872 +#define PCI_DEVICE_ID_O2_6933 0x6933 #define PCI_VENDOR_ID_3DFX 0x121a #define PCI_DEVICE_ID_3DFX_VOODOO 0x0001 diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7b7fbf433cf..e3fb2560670 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -106,6 +106,8 @@ enum perf_sw_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_MAX, /* non-ABI */ }; @@ -225,6 +227,7 @@ struct perf_counter_attr { #define PERF_COUNTER_IOC_RESET _IO ('$', 3) #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) #define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_COUNTER_IOC_SET_FILTER _IOW('$', 6, char *) enum perf_counter_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9e7012689a8..43adbd7f001 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -18,6 +18,10 @@ #include <linux/ioctl.h> #include <asm/byteorder.h> +#ifdef CONFIG_HAVE_HW_BREAKPOINT +#include <asm/hw_breakpoint.h> +#endif + /* * User-space ABI bits: */ @@ -31,6 +35,7 @@ enum perf_type_id { PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, PERF_TYPE_MAX, /* non-ABI */ }; @@ -102,6 +107,8 @@ enum perf_sw_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_MAX, /* non-ABI */ }; @@ -207,6 +214,15 @@ struct perf_event_attr { __u32 wakeup_events; /* wakeup every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; + + union { + struct { /* Hardware breakpoint info */ + __u64 bp_addr; + __u32 bp_type; + __u32 bp_len; + }; + }; + __u32 __reserved_2; __u64 __reserved_3; @@ -219,8 +235,9 @@ struct perf_event_attr { #define PERF_EVENT_IOC_DISABLE _IO ('$', 1) #define PERF_EVENT_IOC_REFRESH _IO ('$', 2) #define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -475,6 +492,11 @@ struct hw_perf_event { s64 remaining; struct hrtimer hrtimer; }; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + union { /* breakpoint */ + struct arch_hw_breakpoint info; + }; +#endif }; atomic64_t prev_count; u64 sample_period; @@ -543,6 +565,10 @@ struct perf_pending_entry { void (*func)(struct perf_pending_entry *); }; +typedef void (*perf_callback_t)(struct perf_event *, void *); + +struct perf_sample_data; + /** * struct perf_event - performance event kernel representation: */ @@ -585,7 +611,7 @@ struct perf_event { u64 tstamp_running; u64 tstamp_stopped; - struct perf_event_attr attr; + struct perf_event_attr attr; struct hw_perf_event hw; struct perf_event_context *ctx; @@ -633,7 +659,20 @@ struct perf_event { struct pid_namespace *ns; u64 id; + + void (*overflow_handler)(struct perf_event *event, + int nmi, struct perf_sample_data *data, + struct pt_regs *regs); + +#ifdef CONFIG_EVENT_PROFILE + struct event_filter *filter; #endif + + perf_callback_t callback; + + perf_callback_t event_callback; + +#endif /* CONFIG_PERF_EVENTS */ }; /** @@ -706,7 +745,6 @@ struct perf_output_handle { int nmi; int sample; int locked; - unsigned long flags; }; #ifdef CONFIG_PERF_EVENTS @@ -738,6 +776,14 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, int cpu); extern void perf_event_update_userpage(struct perf_event *event); +extern int perf_event_release_kernel(struct perf_event *event); +extern struct perf_event * +perf_event_create_kernel_counter(struct perf_event_attr *attr, + int cpu, + pid_t pid, + perf_callback_t callback); +extern u64 perf_event_read_value(struct perf_event *event, + u64 *enabled, u64 *running); struct perf_sample_data { u64 type; @@ -814,6 +860,7 @@ extern int sysctl_perf_event_sample_rate; extern void perf_event_init(void); extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); +extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ @@ -827,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern int perf_swevent_get_recursion_context(void); +extern void perf_swevent_put_recursion_context(int rctx); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -848,11 +897,15 @@ static inline int perf_event_task_enable(void) { return -EINVAL; } static inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } +static inline void +perf_bp_event(struct perf_event *event, void *data) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } +static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline void perf_swevent_put_recursion_context(int rctx) { } #endif diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 065a3652a3e..67608161df6 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -147,6 +147,20 @@ static inline void forget_cached_acl(struct inode *inode, int type) if (old != ACL_NOT_CACHED) posix_acl_release(old); } + +static inline void forget_all_cached_acls(struct inode *inode) +{ + struct posix_acl *old_access, *old_default; + spin_lock(&inode->i_lock); + old_access = inode->i_acl; + old_default = inode->i_default_acl; + inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; + spin_unlock(&inode->i_lock); + if (old_access != ACL_NOT_CACHED) + posix_acl_release(old_access); + if (old_default != ACL_NOT_CACHED) + posix_acl_release(old_default); +} #endif static inline void cache_no_acl(struct inode *inode) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 72b1a10a59b..2e681d9555b 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -105,6 +105,11 @@ struct preempt_notifier; * @sched_out: we've just been preempted * notifier: struct preempt_notifier for the task being preempted * next: the task that's kicking us out + * + * Please note that sched_in and out are called under different + * contexts. sched_out is called with rq lock held and irq disabled + * while sched_in is called without rq lock and irq enabled. This + * difference is intentional and depended upon by its users. */ struct preempt_ops { void (*sched_in)(struct preempt_notifier *notifier, int cpu); diff --git a/include/linux/quota.h b/include/linux/quota.h index 78c48895b12..ce9a9b2e5cd 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -376,6 +376,17 @@ static inline unsigned int dquot_generic_flag(unsigned int flags, int type) return flags >> _DQUOT_STATE_FLAGS; } +#ifdef CONFIG_QUOTA_NETLINK_INTERFACE +extern void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype); +#else +static inline void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + return; +} +#endif /* CONFIG_QUOTA_NETLINK_INTERFACE */ + struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ struct mutex dqio_mutex; /* lock device while I/O in progress */ diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 00044b85645..668cf1bef03 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -1,20 +1,31 @@ #ifndef _LINUX_RATELIMIT_H #define _LINUX_RATELIMIT_H + #include <linux/param.h> +#include <linux/spinlock_types.h> -#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) -#define DEFAULT_RATELIMIT_BURST 10 +#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) +#define DEFAULT_RATELIMIT_BURST 10 struct ratelimit_state { - int interval; - int burst; - int printed; - int missed; - unsigned long begin; + spinlock_t lock; /* protect the state */ + + int interval; + int burst; + int printed; + int missed; + unsigned long begin; }; -#define DEFINE_RATELIMIT_STATE(name, interval, burst) \ - struct ratelimit_state name = {interval, burst,} +#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \ + \ + struct ratelimit_state name = { \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + } + +extern int ___ratelimit(struct ratelimit_state *rs, const char *func); +#define __ratelimit(state) ___ratelimit(state, __func__) -extern int __ratelimit(struct ratelimit_state *rs); -#endif +#endif /* _LINUX_RATELIMIT_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3ebd0b7bcb0..24440f4bf47 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,11 +52,6 @@ struct rcu_head { }; /* Exported common interfaces */ -#ifdef CONFIG_TREE_PREEMPT_RCU -extern void synchronize_rcu(void); -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -#define synchronize_rcu synchronize_sched -#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ extern void synchronize_rcu_bh(void); extern void synchronize_sched(void); extern void rcu_barrier(void); @@ -67,12 +62,11 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); -extern void rcu_scheduler_starting(void); -extern int rcu_needs_cpu(int cpu); -extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include <linux/rcutree.h> +#elif defined(CONFIG_TINY_RCU) +#include <linux/rcutiny.h> #else #error "Unknown RCU implementation specified to kernel configuration" #endif diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h new file mode 100644 index 00000000000..c4ba9a78721 --- /dev/null +++ b/include/linux/rcutiny.h @@ -0,0 +1,104 @@ +/* + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ +#ifndef __LINUX_TINY_H +#define __LINUX_TINY_H + +#include <linux/cache.h> + +void rcu_sched_qs(int cpu); +void rcu_bh_qs(int cpu); + +#define __rcu_read_lock() preempt_disable() +#define __rcu_read_unlock() preempt_enable() +#define __rcu_read_lock_bh() local_bh_disable() +#define __rcu_read_unlock_bh() local_bh_enable() +#define call_rcu_sched call_rcu + +#define rcu_init_sched() do { } while (0) +extern void rcu_check_callbacks(int cpu, int user); + +static inline int rcu_needs_cpu(int cpu) +{ + return 0; +} + +/* + * Return the number of grace periods. + */ +static inline long rcu_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods. + */ +static inline long rcu_batches_completed_bh(void) +{ + return 0; +} + +extern int rcu_expedited_torture_stats(char *page); + +#define synchronize_rcu synchronize_sched + +static inline void synchronize_rcu_expedited(void) +{ + synchronize_sched(); +} + +static inline void synchronize_rcu_bh_expedited(void) +{ + synchronize_sched(); +} + +struct notifier_block; + +#ifdef CONFIG_NO_HZ + +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +#else /* #ifdef CONFIG_NO_HZ */ + +static inline void rcu_enter_nohz(void) +{ +} + +static inline void rcu_exit_nohz(void) +{ +} + +#endif /* #else #ifdef CONFIG_NO_HZ */ + +static inline void rcu_scheduler_starting(void) +{ +} + +static inline void exit_rcu(void) +{ +} + +#endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 9642c6bcb39..c93eee5911b 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,15 +34,15 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); -extern int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); extern int rcu_needs_cpu(int cpu); +extern void rcu_scheduler_starting(void); extern int rcu_expedited_torture_stats(char *page); #ifdef CONFIG_TREE_PREEMPT_RCU extern void __rcu_read_lock(void); extern void __rcu_read_unlock(void); +extern void synchronize_rcu(void); extern void exit_rcu(void); #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ @@ -57,7 +57,7 @@ static inline void __rcu_read_unlock(void) preempt_enable(); } -#define __synchronize_sched() synchronize_rcu() +#define synchronize_rcu synchronize_sched static inline void exit_rcu(void) { @@ -83,7 +83,6 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched_expedited(); } -extern void __rcu_init(void); extern void rcu_check_callbacks(int cpu, int user); extern long rcu_batches_completed(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60bf58..89115ec7d43 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(void); -extern u64 cpu_nr_migrations(int cpu); extern unsigned long get_parent_ip(unsigned long addr); @@ -171,8 +170,6 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) } #endif -extern unsigned long long time_sync_thresh; - /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). @@ -349,7 +346,6 @@ extern signed long schedule_timeout(signed long timeout); extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); -asmlinkage void __schedule(void); asmlinkage void schedule(void); extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); @@ -628,6 +624,9 @@ struct signal_struct { cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + cputime_t prev_utime, prev_stime; +#endif unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; @@ -1013,9 +1012,13 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) return to_cpumask(sd->span); } -extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new); +/* Allocate an array of sched domains, for partition_sched_domains(). */ +cpumask_var_t *alloc_sched_domains(unsigned int ndoms); +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); + /* Test a flag in parent sched domain */ static inline int test_sd_parent(struct sched_domain *sd, int flag) { @@ -1033,7 +1036,7 @@ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); struct sched_domain_attr; static inline void -partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { } @@ -1331,7 +1334,9 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING cputime_t prev_utime, prev_stime; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ @@ -1421,17 +1426,17 @@ struct task_struct { #endif #ifdef CONFIG_TRACE_IRQFLAGS unsigned int irq_events; - int hardirqs_enabled; unsigned long hardirq_enable_ip; - unsigned int hardirq_enable_event; unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; unsigned int hardirq_disable_event; - int softirqs_enabled; + int hardirqs_enabled; + int hardirq_context; unsigned long softirq_disable_ip; - unsigned int softirq_disable_event; unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; unsigned int softirq_enable_event; - int hardirq_context; + int softirqs_enabled; int softirq_context; #endif #ifdef CONFIG_LOCKDEP @@ -1720,9 +1725,8 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } -extern cputime_t task_utime(struct task_struct *p); -extern cputime_t task_stime(struct task_struct *p); -extern cputime_t task_gtime(struct task_struct *p); +extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); +extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st); /* * Per process flags @@ -2086,11 +2090,18 @@ static inline int is_si_special(const struct siginfo *info) return info <= SEND_SIG_FORCED; } -/* True if we are on the alternate signal stack. */ - +/* + * True if we are on the alternate signal stack. + */ static inline int on_sig_stack(unsigned long sp) { - return (sp - current->sas_ss_sp < current->sas_ss_size); +#ifdef CONFIG_STACK_GROWSUP + return sp >= current->sas_ss_sp && + sp - current->sas_ss_sp < current->sas_ss_size; +#else + return sp > current->sas_ss_sp && + sp - current->sas_ss_sp <= current->sas_ss_size; +#endif } static inline int sas_ss_flags(unsigned long sp) diff --git a/include/linux/securebits.h b/include/linux/securebits.h index d2c5ed845bc..33406174cbe 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -1,6 +1,15 @@ #ifndef _LINUX_SECUREBITS_H #define _LINUX_SECUREBITS_H 1 +/* Each securesetting is implemented using two bits. One bit specifies + whether the setting is on or off. The other bit specify whether the + setting is locked or not. A setting which is locked cannot be + changed from user-level. */ +#define issecure_mask(X) (1 << (X)) +#ifdef __KERNEL__ +#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#endif + #define SECUREBITS_DEFAULT 0x00000000 /* When set UID 0 has no special privileges. When unset, we support @@ -12,6 +21,9 @@ #define SECURE_NOROOT 0 #define SECURE_NOROOT_LOCKED 1 /* make bit-0 immutable */ +#define SECBIT_NOROOT (issecure_mask(SECURE_NOROOT)) +#define SECBIT_NOROOT_LOCKED (issecure_mask(SECURE_NOROOT_LOCKED)) + /* When set, setuid to/from uid 0 does not trigger capability-"fixup". When unset, to provide compatiblility with old programs relying on set*uid to gain/lose privilege, transitions to/from uid 0 cause @@ -19,6 +31,10 @@ #define SECURE_NO_SETUID_FIXUP 2 #define SECURE_NO_SETUID_FIXUP_LOCKED 3 /* make bit-2 immutable */ +#define SECBIT_NO_SETUID_FIXUP (issecure_mask(SECURE_NO_SETUID_FIXUP)) +#define SECBIT_NO_SETUID_FIXUP_LOCKED \ + (issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)) + /* When set, a process can retain its capabilities even after transitioning to a non-root user (the set-uid fixup suppressed by bit 2). Bit-4 is cleared when a process calls exec(); setting both @@ -27,12 +43,8 @@ #define SECURE_KEEP_CAPS 4 #define SECURE_KEEP_CAPS_LOCKED 5 /* make bit-4 immutable */ -/* Each securesetting is implemented using two bits. One bit specifies - whether the setting is on or off. The other bit specify whether the - setting is locked or not. A setting which is locked cannot be - changed from user-level. */ -#define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#define SECBIT_KEEP_CAPS (issecure_mask(SECURE_KEEP_CAPS)) +#define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED)) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ diff --git a/include/linux/security.h b/include/linux/security.h index 239e40d0450..466cbadbd1e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -447,6 +447,22 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @new_dir contains the path structure for parent of the new link. * @new_dentry contains the dentry structure of the new link. * Return 0 if permission is granted. + * @path_chmod: + * Check for permission to change DAC's permission of a file or directory. + * @dentry contains the dentry structure. + * @mnt contains the vfsmnt structure. + * @mode contains DAC's mode. + * Return 0 if permission is granted. + * @path_chown: + * Check for permission to change owner/group of a file or directory. + * @path contains the path structure. + * @uid contains new owner's ID. + * @gid contains new group's ID. + * Return 0 if permission is granted. + * @path_chroot: + * Check for permission to change root directory. + * @path contains the path structure. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -690,6 +706,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @kernel_module_request: * Ability to trigger the kernel to automatically upcall to userspace for * userspace to load a kernel module with the given name. + * @kmod_name name of the module requested by the kernel * Return 0 if successful. * @task_setuid: * Check permission before setting one or more of the user identity @@ -1488,6 +1505,10 @@ struct security_operations { struct dentry *new_dentry); int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); + int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, + mode_t mode); + int (*path_chown) (struct path *path, uid_t uid, gid_t gid); + int (*path_chroot) (struct path *path); #endif int (*inode_alloc_security) (struct inode *inode); @@ -1557,7 +1578,7 @@ struct security_operations { void (*cred_transfer)(struct cred *new, const struct cred *old); int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); - int (*kernel_module_request)(void); + int (*kernel_module_request)(char *kmod_name); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); @@ -1822,7 +1843,7 @@ void security_commit_creds(struct cred *new, const struct cred *old); void security_transfer_creds(struct cred *new, const struct cred *old); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); -int security_kernel_module_request(void); +int security_kernel_module_request(char *kmod_name); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); @@ -2387,7 +2408,7 @@ static inline int security_kernel_create_files_as(struct cred *cred, return 0; } -static inline int security_kernel_module_request(void) +static inline int security_kernel_module_request(char *kmod_name) { return 0; } @@ -2952,6 +2973,10 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); +int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode); +int security_path_chown(struct path *path, uid_t uid, gid_t gid); +int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) { @@ -3001,6 +3026,23 @@ static inline int security_path_rename(struct path *old_dir, { return 0; } + +static inline int security_path_chmod(struct dentry *dentry, + struct vfsmount *mnt, + mode_t mode) +{ + return 0; +} + +static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + return 0; +} + +static inline int security_path_chroot(struct path *path) +{ + return 0; +} #endif /* CONFIG_SECURITY_PATH */ #ifdef CONFIG_KEYS diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 5035a269173..13337bf6c3f 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -20,7 +20,7 @@ #include <linux/timer.h> struct slow_work; -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct seq_file; #endif @@ -42,8 +42,8 @@ struct slow_work_ops { /* execute a work item */ void (*execute)(struct slow_work *work); -#ifdef CONFIG_SLOW_WORK_PROC - /* describe a work item for /proc */ +#ifdef CONFIG_SLOW_WORK_DEBUG + /* describe a work item for debugfs */ void (*desc)(struct slow_work *work, struct seq_file *m); #endif }; @@ -64,7 +64,7 @@ struct slow_work { #define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct timespec mark; /* jiffies at which queued or exec begun */ #endif }; diff --git a/include/linux/smp.h b/include/linux/smp.h index 39c64bae776..7a0570e6a59 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -76,6 +76,9 @@ void smp_call_function_many(const struct cpumask *mask, void __smp_call_function_single(int cpuid, struct call_single_data *data, int wait); +int smp_call_function_any(const struct cpumask *mask, + void (*func)(void *info), void *info, int wait); + /* * Generic and arch helpers */ @@ -137,9 +140,15 @@ static inline void smp_send_reschedule(int cpu) { } #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) -static inline void init_call_single_data(void) +static inline void init_call_single_data(void) { } + +static inline int +smp_call_function_any(const struct cpumask *mask, void (*func)(void *info), + void *info, int wait) { + return smp_call_function_single(0, func, info, wait); } + #endif /* !SMP */ /* diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 813be59bf34..2ea1dd1ba21 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -24,8 +24,21 @@ static inline int reacquire_kernel_lock(struct task_struct *task) return 0; } -extern void __lockfunc lock_kernel(void) __acquires(kernel_lock); -extern void __lockfunc unlock_kernel(void) __releases(kernel_lock); +extern void __lockfunc +_lock_kernel(const char *func, const char *file, int line) +__acquires(kernel_lock); + +extern void __lockfunc +_unlock_kernel(const char *func, const char *file, int line) +__releases(kernel_lock); + +#define lock_kernel() do { \ + _lock_kernel(__func__, __FILE__, __LINE__); \ +} while (0) + +#define unlock_kernel() do { \ + _unlock_kernel(__func__, __FILE__, __LINE__); \ +} while (0) /* * Various legacy drivers don't really need the BKL in a specific @@ -41,8 +54,8 @@ static inline void cycle_kernel_lock(void) #else -#define lock_kernel() do { } while(0) -#define unlock_kernel() do { } while(0) +#define lock_kernel() +#define unlock_kernel() #define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index f0ca7a7a175..71dccfeb0d8 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -79,8 +79,6 @@ */ #include <linux/spinlock_types.h> -extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); - /* * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them): */ @@ -102,7 +100,7 @@ do { \ #else # define spin_lock_init(lock) \ - do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) + do { *(lock) = __SPIN_LOCK_UNLOCKED(lock); } while (0) #endif #ifdef CONFIG_DEBUG_SPINLOCK @@ -116,7 +114,7 @@ do { \ } while (0) #else # define rwlock_init(lock) \ - do { *(lock) = RW_LOCK_UNLOCKED; } while (0) + do { *(lock) = __RW_LOCK_UNLOCKED(lock); } while (0) #endif #define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 7a7e18fc241..8264a7f459b 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,137 +60,118 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); -/* - * We inline the unlock functions in the nondebug case: - */ -#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_PREEMPT) -#define __always_inline__spin_unlock -#define __always_inline__read_unlock -#define __always_inline__write_unlock -#define __always_inline__spin_unlock_irq -#define __always_inline__read_unlock_irq -#define __always_inline__write_unlock_irq -#endif - -#ifndef CONFIG_DEBUG_SPINLOCK -#ifndef CONFIG_GENERIC_LOCKBREAK - -#ifdef __always_inline__spin_lock +#ifdef CONFIG_INLINE_SPIN_LOCK #define _spin_lock(lock) __spin_lock(lock) #endif -#ifdef __always_inline__read_lock +#ifdef CONFIG_INLINE_READ_LOCK #define _read_lock(lock) __read_lock(lock) #endif -#ifdef __always_inline__write_lock +#ifdef CONFIG_INLINE_WRITE_LOCK #define _write_lock(lock) __write_lock(lock) #endif -#ifdef __always_inline__spin_lock_bh +#ifdef CONFIG_INLINE_SPIN_LOCK_BH #define _spin_lock_bh(lock) __spin_lock_bh(lock) #endif -#ifdef __always_inline__read_lock_bh +#ifdef CONFIG_INLINE_READ_LOCK_BH #define _read_lock_bh(lock) __read_lock_bh(lock) #endif -#ifdef __always_inline__write_lock_bh +#ifdef CONFIG_INLINE_WRITE_LOCK_BH #define _write_lock_bh(lock) __write_lock_bh(lock) #endif -#ifdef __always_inline__spin_lock_irq +#ifdef CONFIG_INLINE_SPIN_LOCK_IRQ #define _spin_lock_irq(lock) __spin_lock_irq(lock) #endif -#ifdef __always_inline__read_lock_irq +#ifdef CONFIG_INLINE_READ_LOCK_IRQ #define _read_lock_irq(lock) __read_lock_irq(lock) #endif -#ifdef __always_inline__write_lock_irq +#ifdef CONFIG_INLINE_WRITE_LOCK_IRQ #define _write_lock_irq(lock) __write_lock_irq(lock) #endif -#ifdef __always_inline__spin_lock_irqsave +#ifdef CONFIG_INLINE_SPIN_LOCK_IRQSAVE #define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock) #endif -#ifdef __always_inline__read_lock_irqsave +#ifdef CONFIG_INLINE_READ_LOCK_IRQSAVE #define _read_lock_irqsave(lock) __read_lock_irqsave(lock) #endif -#ifdef __always_inline__write_lock_irqsave +#ifdef CONFIG_INLINE_WRITE_LOCK_IRQSAVE #define _write_lock_irqsave(lock) __write_lock_irqsave(lock) #endif -#endif /* !CONFIG_GENERIC_LOCKBREAK */ - -#ifdef __always_inline__spin_trylock +#ifdef CONFIG_INLINE_SPIN_TRYLOCK #define _spin_trylock(lock) __spin_trylock(lock) #endif -#ifdef __always_inline__read_trylock +#ifdef CONFIG_INLINE_READ_TRYLOCK #define _read_trylock(lock) __read_trylock(lock) #endif -#ifdef __always_inline__write_trylock +#ifdef CONFIG_INLINE_WRITE_TRYLOCK #define _write_trylock(lock) __write_trylock(lock) #endif -#ifdef __always_inline__spin_trylock_bh +#ifdef CONFIG_INLINE_SPIN_TRYLOCK_BH #define _spin_trylock_bh(lock) __spin_trylock_bh(lock) #endif -#ifdef __always_inline__spin_unlock +#ifdef CONFIG_INLINE_SPIN_UNLOCK #define _spin_unlock(lock) __spin_unlock(lock) #endif -#ifdef __always_inline__read_unlock +#ifdef CONFIG_INLINE_READ_UNLOCK #define _read_unlock(lock) __read_unlock(lock) #endif -#ifdef __always_inline__write_unlock +#ifdef CONFIG_INLINE_WRITE_UNLOCK #define _write_unlock(lock) __write_unlock(lock) #endif -#ifdef __always_inline__spin_unlock_bh +#ifdef CONFIG_INLINE_SPIN_UNLOCK_BH #define _spin_unlock_bh(lock) __spin_unlock_bh(lock) #endif -#ifdef __always_inline__read_unlock_bh +#ifdef CONFIG_INLINE_READ_UNLOCK_BH #define _read_unlock_bh(lock) __read_unlock_bh(lock) #endif -#ifdef __always_inline__write_unlock_bh +#ifdef CONFIG_INLINE_WRITE_UNLOCK_BH #define _write_unlock_bh(lock) __write_unlock_bh(lock) #endif -#ifdef __always_inline__spin_unlock_irq +#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQ #define _spin_unlock_irq(lock) __spin_unlock_irq(lock) #endif -#ifdef __always_inline__read_unlock_irq +#ifdef CONFIG_INLINE_READ_UNLOCK_IRQ #define _read_unlock_irq(lock) __read_unlock_irq(lock) #endif -#ifdef __always_inline__write_unlock_irq +#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQ #define _write_unlock_irq(lock) __write_unlock_irq(lock) #endif -#ifdef __always_inline__spin_unlock_irqrestore +#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE #define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags) #endif -#ifdef __always_inline__read_unlock_irqrestore +#ifdef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE #define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags) #endif -#ifdef __always_inline__write_unlock_irqrestore +#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE #define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags) #endif -#endif /* CONFIG_DEBUG_SPINLOCK */ - static inline int __spin_trylock(spinlock_t *lock) { preempt_disable(); diff --git a/include/linux/srcu.h b/include/linux/srcu.h index aca0eee5393..4765d97dcaf 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -48,6 +48,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp); int srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); +void synchronize_srcu_expedited(struct srcu_struct *sp); long srcu_batches_completed(struct srcu_struct *sp); #endif diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 73b1f1cec42..febedcf67c7 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -7,6 +7,8 @@ struct device; struct dma_attrs; struct scatterlist; +extern int swiotlb_force; + /* * Maximum allowable number of contiguous slabs to map, * must be a power of 2. What is the appropriate value ? @@ -20,8 +22,7 @@ struct scatterlist; */ #define IO_TLB_SHIFT 11 -extern void -swiotlb_init(void); +extern void swiotlb_init(int verbose); extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -88,4 +89,11 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); +#ifdef CONFIG_SWIOTLB +extern void __init swiotlb_free(void); +#else +static inline void swiotlb_free(void) { } +#endif + +extern void swiotlb_print_info(void); #endif /* __LINUX_SWIOTLB_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a990ace1a83..e79e2f3ccc5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -99,37 +99,16 @@ struct perf_event_attr; #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) #ifdef CONFIG_EVENT_PROFILE -#define TRACE_SYS_ENTER_PROFILE(sname) \ -static int prof_sysenter_enable_##sname(void) \ -{ \ - return reg_prof_syscall_enter("sys"#sname); \ -} \ - \ -static void prof_sysenter_disable_##sname(void) \ -{ \ - unreg_prof_syscall_enter("sys"#sname); \ -} - -#define TRACE_SYS_EXIT_PROFILE(sname) \ -static int prof_sysexit_enable_##sname(void) \ -{ \ - return reg_prof_syscall_exit("sys"#sname); \ -} \ - \ -static void prof_sysexit_disable_##sname(void) \ -{ \ - unreg_prof_syscall_exit("sys"#sname); \ -} #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysenter_enable_##sname, \ - .profile_disable = prof_sysenter_disable_##sname, + .profile_enable = prof_sysenter_enable, \ + .profile_disable = prof_sysenter_disable, #define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysexit_enable_##sname, \ - .profile_disable = prof_sysexit_disable_##sname, + .profile_enable = prof_sysexit_enable, \ + .profile_disable = prof_sysexit_disable, #else #define TRACE_SYS_ENTER_PROFILE(sname) #define TRACE_SYS_ENTER_PROFILE_INIT(sname) @@ -153,74 +132,46 @@ static void prof_sysexit_disable_##sname(void) \ #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) #define SYSCALL_TRACE_ENTER_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_enter_##sname; \ - struct trace_event enter_syscall_print_##sname = { \ + static struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - static int init_enter_##sname(void) \ - { \ - int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&enter_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_enter_##sname.id = id; \ - set_syscall_enter_id(num, id); \ - INIT_LIST_HEAD(&event_enter_##sname.fields); \ - return 0; \ - } \ - TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ .system = "syscalls", \ - .event = &event_syscall_enter, \ - .raw_init = init_enter_##sname, \ + .event = &enter_syscall_print_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_ENTER_PROFILE_INIT(sname) \ } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_exit_##sname; \ - struct trace_event exit_syscall_print_##sname = { \ + static struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - static int init_exit_##sname(void) \ - { \ - int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&exit_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_exit_##sname.id = id; \ - set_syscall_exit_id(num, id); \ - INIT_LIST_HEAD(&event_exit_##sname.fields); \ - return 0; \ - } \ - TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ .system = "syscalls", \ - .event = &event_syscall_exit, \ - .raw_init = init_exit_##sname, \ + .event = &exit_syscall_print_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_EXIT_PROFILE_INIT(sname) \ } diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 3338b3f5c21..ac5d1c1285d 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -27,9 +27,16 @@ */ #define TPM_ANY_NUM 0xFFFF -#if defined(CONFIG_TCG_TPM) +#if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); +#else +static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) { + return -ENODEV; +} +static inline int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) { + return -ENODEV; +} #endif #endif diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2aac8a83e89..f59604ed0ec 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,6 +280,12 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) +#define DEFINE_EVENT(template, name, proto, args) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index 904468a191e..afc2bfb9e91 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -15,6 +15,10 @@ #ifndef _LINUX_CS_H #define _LINUX_CS_H +#ifdef __KERNEL__ +#include <linux/interrupt.h> +#endif + /* For AccessConfigurationRegister */ typedef struct conf_reg_t { u_char Function; @@ -111,11 +115,9 @@ typedef struct io_req_t { /* For RequestIRQ and ReleaseIRQ */ typedef struct irq_req_t { - u_int Attributes; - u_int AssignedIRQ; - u_int IRQInfo1, IRQInfo2; /* IRQInfo2 is ignored */ - void *Handler; - void *Instance; + u_int Attributes; + u_int AssignedIRQ; + irq_handler_t Handler; } irq_req_t; /* Attributes for RequestIRQ and ReleaseIRQ */ @@ -125,7 +127,7 @@ typedef struct irq_req_t { #define IRQ_TYPE_DYNAMIC_SHARING 0x02 #define IRQ_FORCED_PULSE 0x04 #define IRQ_FIRST_SHARED 0x08 -#define IRQ_HANDLE_PRESENT 0x10 +//#define IRQ_HANDLE_PRESENT 0x10 #define IRQ_PULSE_ALLOCATED 0x100 /* Bits in IRQInfo1 field */ diff --git a/include/pcmcia/cs_types.h b/include/pcmcia/cs_types.h index 315965a3793..f5e3b8386c8 100644 --- a/include/pcmcia/cs_types.h +++ b/include/pcmcia/cs_types.h @@ -26,8 +26,7 @@ typedef u_int event_t; typedef u_char cisdata_t; typedef u_short page_t; -struct window_t; -typedef struct window_t *window_handle_t; +typedef unsigned long window_handle_t; struct region_t; typedef struct region_t *memory_handle_t; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index a2be80b9a09..d403c12f797 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -34,6 +34,7 @@ struct pcmcia_socket; struct pcmcia_device; struct config_t; +struct net_device; /* dynamic device IDs for PCMCIA device drivers. See * Documentation/pcmcia/driver.txt for details. @@ -137,65 +138,39 @@ struct pcmcia_device { #define to_pcmcia_dev(n) container_of(n, struct pcmcia_device, dev) #define to_pcmcia_drv(n) container_of(n, struct pcmcia_driver, drv) -/* deprecated -- don't use! */ -#define handle_to_dev(handle) (handle->dev) - -/* (deprecated) error reporting by PCMCIA devices. Use dev_printk() - * or dev_dbg() directly in the driver, without referring to pcmcia_error_func() - * and/or pcmcia_error_ret() for those functions will go away soon. - */ -enum service { - AccessConfigurationRegister, AddSocketServices, - AdjustResourceInfo, CheckEraseQueue, CloseMemory, CopyMemory, - DeregisterClient, DeregisterEraseQueue, GetCardServicesInfo, - GetClientInfo, GetConfigurationInfo, GetEventMask, - GetFirstClient, GetFirstPartion, GetFirstRegion, GetFirstTuple, - GetNextClient, GetNextPartition, GetNextRegion, GetNextTuple, - GetStatus, GetTupleData, MapLogSocket, MapLogWindow, MapMemPage, - MapPhySocket, MapPhyWindow, ModifyConfiguration, ModifyWindow, - OpenMemory, ParseTuple, ReadMemory, RegisterClient, - RegisterEraseQueue, RegisterMTD, RegisterTimer, - ReleaseConfiguration, ReleaseExclusive, ReleaseIO, ReleaseIRQ, - ReleaseSocketMask, ReleaseWindow, ReplaceSocketServices, - RequestConfiguration, RequestExclusive, RequestIO, RequestIRQ, - RequestSocketMask, RequestWindow, ResetCard, ReturnSSEntry, - SetEventMask, SetRegion, ValidateCIS, VendorSpecific, - WriteMemory, BindDevice, BindMTD, ReportError, - SuspendCard, ResumeCard, EjectCard, InsertCard, ReplaceCIS, - GetFirstWindow, GetNextWindow, GetMemPage -}; -const char *pcmcia_error_func(int func); -const char *pcmcia_error_ret(int ret); - -#define cs_error(p_dev, func, ret) \ - { \ - dev_printk(KERN_NOTICE, &p_dev->dev, \ - "%s : %s\n", \ - pcmcia_error_func(func), \ - pcmcia_error_ret(ret)); \ - } - -/* CIS access. - * Use the pcmcia_* versions in PCMCIA drivers +/* + * CIS access. + * + * Please use the following functions to access CIS tuples: + * - pcmcia_get_tuple() + * - pcmcia_loop_tuple() + * - pcmcia_get_mac_from_cis() + * + * To parse a tuple_t, pcmcia_parse_tuple() exists. Its interface + * might change in future. */ -int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse); -int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_first_tuple(p_dev, tuple) \ - pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple) +/* get the very first CIS entry of type @code. Note that buf is pointer + * to u8 *buf; and that you need to kfree(buf) afterwards. */ +size_t pcmcia_get_tuple(struct pcmcia_device *p_dev, cisdata_t code, + u8 **buf); -int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_next_tuple(p_dev, tuple) \ - pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple) +/* loop over CIS entries */ +int pcmcia_loop_tuple(struct pcmcia_device *p_dev, cisdata_t code, + int (*loop_tuple) (struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data), + void *priv_data); -int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); -#define pcmcia_get_tuple_data(p_dev, tuple) \ - pccard_get_tuple_data(p_dev->socket, tuple) +/* get the MAC address from CISTPL_FUNCE */ +int pcmcia_get_mac_from_cis(struct pcmcia_device *p_dev, + struct net_device *dev); +/* parse a tuple_t */ +int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse); + /* loop CIS entries for valid configuration */ int pcmcia_loop_config(struct pcmcia_device *p_dev, int (*conf_check) (struct pcmcia_device *p_dev, @@ -221,12 +196,11 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req); int pcmcia_request_configuration(struct pcmcia_device *p_dev, config_req_t *req); -int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, +int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_handle_t *wh); -int pcmcia_release_window(window_handle_t win); - -int pcmcia_get_mem_page(window_handle_t win, memreq_t *req); -int pcmcia_map_mem_page(window_handle_t win, memreq_t *req); +int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); +int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, + memreq_t *req); int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod); void pcmcia_disable_device(struct pcmcia_device *p_dev); diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index e0f6feb8588..7c23be706f1 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -107,15 +107,6 @@ typedef struct io_window_t { struct resource *res; } io_window_t; -#define WINDOW_MAGIC 0xB35C -typedef struct window_t { - u_short magic; - u_short index; - struct pcmcia_device *handle; - struct pcmcia_socket *sock; - pccard_mem_map ctl; -} window_t; - /* Maximum number of IO windows per socket */ #define MAX_IO_WIN 2 @@ -155,7 +146,7 @@ struct pcmcia_socket { u_int Config; } irq; io_window_t io[MAX_IO_WIN]; - window_t win[MAX_WIN]; + pccard_mem_map win[MAX_WIN]; struct list_head cis_cache; size_t fake_cis_len; u8 *fake_cis; @@ -172,7 +163,7 @@ struct pcmcia_socket { u_int irq_mask; u_int map_size; u_int io_offset; - u_char pci_irq; + u_int pci_irq; struct pci_dev * cb_dev; diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 2a4b3bf7403..5acfb1eb4df 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -31,6 +31,14 @@ assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg) +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ + DEFINE_TRACE(name) + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -63,6 +71,9 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN +#undef DECLARE_EVENT_CLASS +#undef DEFINE_EVENT +#undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h new file mode 100644 index 00000000000..1af72dc2427 --- /dev/null +++ b/include/trace/events/bkl.h @@ -0,0 +1,61 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bkl + +#if !defined(_TRACE_BKL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BKL_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(lock_kernel, + + TP_PROTO(const char *func, const char *file, int line), + + TP_ARGS(func, file, line), + + TP_STRUCT__entry( + __field( int, depth ) + __field_ext( const char *, func, FILTER_PTR_STRING ) + __field_ext( const char *, file, FILTER_PTR_STRING ) + __field( int, line ) + ), + + TP_fast_assign( + /* We want to record the lock_depth after lock is acquired */ + __entry->depth = current->lock_depth + 1; + __entry->func = func; + __entry->file = file; + __entry->line = line; + ), + + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, + __entry->file, __entry->line, __entry->func) +); + +TRACE_EVENT(unlock_kernel, + + TP_PROTO(const char *func, const char *file, int line), + + TP_ARGS(func, file, line), + + TP_STRUCT__entry( + __field(int, depth ) + __field(const char *, func ) + __field(const char *, file ) + __field(int, line ) + ), + + TP_fast_assign( + __entry->depth = current->lock_depth; + __entry->func = func; + __entry->file = file; + __entry->line = line; + ), + + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, + __entry->file, __entry->line, __entry->func) +); + +#endif /* _TRACE_BKL_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 00405b5f624..5fb72733331 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -8,7 +8,7 @@ #include <linux/blkdev.h> #include <linux/tracepoint.h> -TRACE_EVENT(block_rq_abort, +DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -40,41 +40,28 @@ TRACE_EVENT(block_rq_abort, __entry->nr_sector, __entry->errors) ); -TRACE_EVENT(block_rq_insert, +DEFINE_EVENT(block_rq_with_error, block_rq_abort, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(q, rq) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( unsigned int, bytes ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), +DEFINE_EVENT(block_rq_with_error, block_rq_requeue, - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->bytes = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0; + TP_PROTO(struct request_queue *q, struct request *rq), - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_ARGS(q, rq) +); - TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __entry->bytes, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) +DEFINE_EVENT(block_rq_with_error, block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_issue, +DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request_queue *q, struct request *rq), @@ -86,7 +73,7 @@ TRACE_EVENT(block_rq_issue, __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) + __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -108,68 +95,18 @@ TRACE_EVENT(block_rq_issue, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_rq_requeue, +DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_complete, +DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); TRACE_EVENT(block_bio_bounce, @@ -228,7 +165,7 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -TRACE_EVENT(block_bio_backmerge, +DECLARE_EVENT_CLASS(block_bio, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -256,63 +193,28 @@ TRACE_EVENT(block_bio_backmerge, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_bio_frontmerge, +DEFINE_EVENT(block_bio, block_bio_backmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_bio_queue, +DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), + TP_ARGS(q, bio) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_bio, block_bio_queue, - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_getrq, +DECLARE_EVENT_CLASS(block_get_rq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), @@ -341,33 +243,18 @@ TRACE_EVENT(block_getrq, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_sleeprq, +DEFINE_EVENT(block_get_rq, block_getrq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw), + TP_ARGS(q, bio, rw) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_get_rq, block_sleeprq, - TP_fast_assign( - __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; - __entry->sector = bio ? bio->bi_sector : 0; - __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; - blk_fill_rwbs(__entry->rwbs, - bio ? bio->bi_rw : 0, __entry->nr_sector); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio, rw) ); TRACE_EVENT(block_plug, @@ -387,7 +274,7 @@ TRACE_EVENT(block_plug, TP_printk("[%s]", __entry->comm) ); -TRACE_EVENT(block_unplug_timer, +DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q), @@ -406,23 +293,18 @@ TRACE_EVENT(block_unplug_timer, TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); -TRACE_EVENT(block_unplug_io, +DEFINE_EVENT(block_unplug, block_unplug_timer, TP_PROTO(struct request_queue *q), - TP_ARGS(q), + TP_ARGS(q) +); - TP_STRUCT__entry( - __field( int, nr_rq ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_unplug, block_unplug_io, - TP_fast_assign( - __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q), - TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) + TP_ARGS(q) ); TRACE_EVENT(block_split, diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d09550bf3f9..318f76535bd 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -90,7 +90,7 @@ TRACE_EVENT(ext4_allocate_inode, (unsigned long) __entry->dir, __entry->mode) ); -TRACE_EVENT(ext4_write_begin, +DECLARE_EVENT_CLASS(ext4__write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), @@ -118,7 +118,23 @@ TRACE_EVENT(ext4_write_begin, __entry->pos, __entry->len, __entry->flags) ); -TRACE_EVENT(ext4_ordered_write_end, +DEFINE_EVENT(ext4__write_begin, ext4_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), @@ -145,57 +161,36 @@ TRACE_EVENT(ext4_ordered_write_end, __entry->pos, __entry->len, __entry->copied) ); -TRACE_EVENT(ext4_writeback_write_end, +DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), + TP_ARGS(inode, pos, len, copied) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), +DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end, - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_ARGS(inode, pos, len, copied) ); -TRACE_EVENT(ext4_journalled_write_end, +DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), + TP_ARGS(inode, pos, len, copied) +); - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), +DEFINE_EVENT(ext4__write_end, ext4_da_write_end, - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied) ); TRACE_EVENT(ext4_writepage, @@ -337,60 +332,6 @@ TRACE_EVENT(ext4_da_writepages_result, (unsigned long) __entry->writeback_index) ); -TRACE_EVENT(ext4_da_write_begin, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int flags), - - TP_ARGS(inode, pos, len, flags), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, flags ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->flags = flags; - ), - - TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->flags) -); - -TRACE_EVENT(ext4_da_write_end, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int copied), - - TP_ARGS(inode, pos, len, copied), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), - - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) -); - TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index b89f9db4a40..0e4cfb694fe 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -48,7 +48,7 @@ TRACE_EVENT(irq_handler_entry, __assign_str(name, action->name); ), - TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) + TP_printk("irq=%d name=%s", __entry->irq, __get_str(name)) ); /** @@ -78,22 +78,11 @@ TRACE_EVENT(irq_handler_exit, __entry->ret = ret; ), - TP_printk("irq=%d return=%s", + TP_printk("irq=%d ret=%s", __entry->irq, __entry->ret ? "handled" : "unhandled") ); -/** - * softirq_entry - called immediately before the softirq handler - * @h: pointer to struct softirq_action - * @vec: pointer to first struct softirq_action in softirq_vec array - * - * The @h parameter, contains a pointer to the struct softirq_action - * which has a pointer to the action handler that is called. By subtracting - * the @vec pointer from the @h pointer, we can determine the softirq - * number. Also, when used in combination with the softirq_exit tracepoint - * we can determine the softirq latency. - */ -TRACE_EVENT(softirq_entry, +DECLARE_EVENT_CLASS(softirq, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), @@ -107,11 +96,29 @@ TRACE_EVENT(softirq_entry, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); /** + * softirq_entry - called immediately before the softirq handler + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter, contains a pointer to the struct softirq_action + * which has a pointer to the action handler that is called. By subtracting + * the @vec pointer from the @h pointer, we can determine the softirq + * number. Also, when used in combination with the softirq_exit tracepoint + * we can determine the softirq latency. + */ +DEFINE_EVENT(softirq, softirq_entry, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + +/** * softirq_exit - called immediately after the softirq handler returns * @h: pointer to struct softirq_action * @vec: pointer to first struct softirq_action in softirq_vec array @@ -122,22 +129,11 @@ TRACE_EVENT(softirq_entry, * combination with the softirq_entry tracepoint we can determine the softirq * latency. */ -TRACE_EVENT(softirq_exit, +DEFINE_EVENT(softirq, softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - - TP_STRUCT__entry( - __field( int, vec ) - ), - - TP_fast_assign( - __entry->vec = (int)(h - vec); - ), - - TP_printk("softirq=%d action=%s", __entry->vec, - show_softirq_name(__entry->vec)) + TP_ARGS(h, vec) ); #endif /* _TRACE_IRQ_H */ diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 3c60b75adb9..96b370a050d 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -30,7 +30,7 @@ TRACE_EVENT(jbd2_checkpoint, jbd2_dev_to_name(__entry->dev), __entry->result) ); -TRACE_EVENT(jbd2_start_commit, +DECLARE_EVENT_CLASS(jbd2_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), @@ -53,73 +53,32 @@ TRACE_EVENT(jbd2_start_commit, __entry->sync_commit) ); -TRACE_EVENT(jbd2_commit_locking, +DEFINE_EVENT(jbd2_commit, jbd2_start_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_flushing, +DEFINE_EVENT(jbd2_commit, jbd2_commit_locking, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_logging, +DEFINE_EVENT(jbd2_commit, jbd2_commit_flushing, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), + TP_ARGS(journal, commit_transaction) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), +DEFINE_EVENT(jbd2_commit, jbd2_commit_logging, - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); TRACE_EVENT(jbd2_end_commit, diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index eaf46bdd18a..3adca0ca9db 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -44,7 +44,7 @@ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ ) : "GFP_NOWAIT" -TRACE_EVENT(kmalloc, +DECLARE_EVENT_CLASS(kmem_alloc, TP_PROTO(unsigned long call_site, const void *ptr, @@ -78,41 +78,23 @@ TRACE_EVENT(kmalloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(kmem_cache_alloc, +DEFINE_EVENT(kmem_alloc, kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), +DEFINE_EVENT(kmem_alloc, kmem_cache_alloc, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags)) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) ); -TRACE_EVENT(kmalloc_node, +DECLARE_EVENT_CLASS(kmem_alloc_node, TP_PROTO(unsigned long call_site, const void *ptr, @@ -150,45 +132,25 @@ TRACE_EVENT(kmalloc_node, __entry->node) ); -TRACE_EVENT(kmem_cache_alloc_node, +DEFINE_EVENT(kmem_alloc_node, kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), +DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags), - __entry->node) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) ); -TRACE_EVENT(kfree, +DECLARE_EVENT_CLASS(kmem_free, TP_PROTO(unsigned long call_site, const void *ptr), @@ -207,23 +169,18 @@ TRACE_EVENT(kfree, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); -TRACE_EVENT(kmem_cache_free, +DEFINE_EVENT(kmem_free, kfree, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr), + TP_ARGS(call_site, ptr) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), +DEFINE_EVENT(kmem_free, kmem_cache_free, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), + TP_PROTO(unsigned long call_site, const void *ptr), - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) + TP_ARGS(call_site, ptr) ); TRACE_EVENT(mm_page_free_direct, @@ -299,7 +256,7 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(mm_page_alloc_zone_locked, +DECLARE_EVENT_CLASS(mm_page, TP_PROTO(struct page *page, unsigned int order, int migratetype), @@ -325,29 +282,22 @@ TRACE_EVENT(mm_page_alloc_zone_locked, __entry->order == 0) ); -TRACE_EVENT(mm_page_pcpu_drain, +DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, - TP_PROTO(struct page *page, int order, int migratetype), + TP_PROTO(struct page *page, unsigned int order, int migratetype), - TP_ARGS(page, order, migratetype), + TP_ARGS(page, order, migratetype) +); - TP_STRUCT__entry( - __field( struct page *, page ) - __field( int, order ) - __field( int, migratetype ) - ), +DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, - TP_fast_assign( - __entry->page = page; - __entry->order = order; - __entry->migratetype = migratetype; - ), + TP_PROTO(struct page *page, unsigned int order, int migratetype), + + TP_ARGS(page, order, migratetype), TP_printk("page=%p pfn=%lu order=%d migratetype=%d", - __entry->page, - page_to_pfn(__entry->page), - __entry->order, - __entry->migratetype) + __entry->page, page_to_pfn(__entry->page), + __entry->order, __entry->migratetype) ); TRACE_EVENT(mm_page_alloc_extfrag, diff --git a/include/trace/events/lockdep.h b/include/trace/events/lock.h index bcf1d209a00..a870ba125aa 100644 --- a/include/trace/events/lockdep.h +++ b/include/trace/events/lock.h @@ -1,8 +1,8 @@ #undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep +#define TRACE_SYSTEM lock -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H +#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCK_H #include <linux/lockdep.h> #include <linux/tracepoint.h> @@ -90,7 +90,7 @@ TRACE_EVENT(lock_acquired, #endif #endif -#endif /* _TRACE_LOCKDEP_H */ +#endif /* _TRACE_LOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h new file mode 100644 index 00000000000..7eee77895cb --- /dev/null +++ b/include/trace/events/mce.h @@ -0,0 +1,69 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mce + +#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MCE_H + +#include <linux/ktime.h> +#include <linux/tracepoint.h> +#include <asm/mce.h> + +TRACE_EVENT(mce_record, + + TP_PROTO(struct mce *m), + + TP_ARGS(m), + + TP_STRUCT__entry( + __field( u64, mcgcap ) + __field( u64, mcgstatus ) + __field( u8, bank ) + __field( u64, status ) + __field( u64, addr ) + __field( u64, misc ) + __field( u64, ip ) + __field( u8, cs ) + __field( u64, tsc ) + __field( u64, walltime ) + __field( u32, cpu ) + __field( u32, cpuid ) + __field( u32, apicid ) + __field( u32, socketid ) + __field( u8, cpuvendor ) + ), + + TP_fast_assign( + __entry->mcgcap = m->mcgcap; + __entry->mcgstatus = m->mcgstatus; + __entry->bank = m->bank; + __entry->status = m->status; + __entry->addr = m->addr; + __entry->misc = m->misc; + __entry->ip = m->ip; + __entry->cs = m->cs; + __entry->tsc = m->tsc; + __entry->walltime = m->time; + __entry->cpu = m->extcpu; + __entry->cpuid = m->cpuid; + __entry->apicid = m->apicid; + __entry->socketid = m->socketid; + __entry->cpuvendor = m->cpuvendor; + ), + + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + __entry->cpu, + __entry->mcgcap, __entry->mcgstatus, + __entry->bank, __entry->status, + __entry->addr, __entry->misc, + __entry->cs, __entry->ip, + __entry->tsc, + __entry->cpuvendor, __entry->cpuid, + __entry->walltime, + __entry->socketid, + __entry->apicid) +); + +#endif /* _TRACE_MCE_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 84160fb1847..4b0f48ba16a 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -51,7 +51,7 @@ TRACE_EVENT(module_free, TP_printk("%s", __get_str(name)) ); -TRACE_EVENT(module_get, +DECLARE_EVENT_CLASS(module_refcnt, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), @@ -73,26 +73,18 @@ TRACE_EVENT(module_get, __get_str(name), (void *)__entry->ip, __entry->refcnt) ); -TRACE_EVENT(module_put, +DEFINE_EVENT(module_refcnt, module_get, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_ARGS(mod, ip, refcnt), + TP_ARGS(mod, ip, refcnt) +); - TP_STRUCT__entry( - __field( unsigned long, ip ) - __field( int, refcnt ) - __string( name, mod->name ) - ), +DEFINE_EVENT(module_refcnt, module_put, - TP_fast_assign( - __entry->ip = ip; - __entry->refcnt = refcnt; - __assign_str(name, mod->name); - ), + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_printk("%s call_site=%pf refcnt=%d", - __get_str(name), (void *)__entry->ip, __entry->refcnt) + TP_ARGS(mod, ip, refcnt) ); TRACE_EVENT(module_request, diff --git a/include/trace/events/power.h b/include/trace/events/power.h index ea6d579261a..c4efe9b8280 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,9 +16,7 @@ enum { }; #endif - - -TRACE_EVENT(power_start, +DECLARE_EVENT_CLASS(power, TP_PROTO(unsigned int type, unsigned int state), @@ -37,42 +35,36 @@ TRACE_EVENT(power_start, TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state) ); -TRACE_EVENT(power_end, - - TP_PROTO(int dummy), +DEFINE_EVENT(power, power_start, - TP_ARGS(dummy), + TP_PROTO(unsigned int type, unsigned int state), - TP_STRUCT__entry( - __field( u64, dummy ) - ), + TP_ARGS(type, state) +); - TP_fast_assign( - __entry->dummy = 0xffff; - ), +DEFINE_EVENT(power, power_frequency, - TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + TP_PROTO(unsigned int type, unsigned int state), + TP_ARGS(type, state) ); +TRACE_EVENT(power_end, -TRACE_EVENT(power_frequency, - - TP_PROTO(unsigned int type, unsigned int state), + TP_PROTO(int dummy), - TP_ARGS(type, state), + TP_ARGS(dummy), TP_STRUCT__entry( - __field( u64, type ) - __field( u64, state ) + __field( u64, dummy ) ), TP_fast_assign( - __entry->type = type; - __entry->state = state; + __entry->dummy = 0xffff; ), - TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state) + TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + ); #endif /* _TRACE_POWER_H */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4069c43f418..cfceb0b73e2 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -26,7 +26,7 @@ TRACE_EVENT(sched_kthread_stop, __entry->pid = t->pid; ), - TP_printk("task %s:%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) ); /* @@ -46,7 +46,7 @@ TRACE_EVENT(sched_kthread_stop_ret, __entry->ret = ret; ), - TP_printk("ret %d", __entry->ret) + TP_printk("ret=%d", __entry->ret) ); /* @@ -73,7 +73,7 @@ TRACE_EVENT(sched_wait_task, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup, +DECLARE_EVENT_CLASS(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -94,7 +94,7 @@ TRACE_EVENT(sched_wakeup, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -102,46 +102,27 @@ TRACE_EVENT(sched_wakeup, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); + /* * Tracepoint for waking up a new task: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - __field( int, cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - __entry->cpu = task_cpu(p); - ), - - TP_printk("task %s:%d [%d] success=%d [%03d]", - __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) -); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); /* * Tracepoint for task switches, performed by the scheduler: @@ -176,7 +157,7 @@ TRACE_EVENT(sched_switch, __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", + TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->prev_state ? __print_flags(__entry->prev_state, "|", @@ -211,15 +192,12 @@ TRACE_EVENT(sched_migrate_task, __entry->dest_cpu = dest_cpu; ), - TP_printk("task %s:%d [%d] from: %d to: %d", + TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu) ); -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, +DECLARE_EVENT_CLASS(sched_process_template, TP_PROTO(struct task_struct *p), @@ -237,34 +215,24 @@ TRACE_EVENT(sched_process_free, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); /* - * Tracepoint for a task exiting: + * Tracepoint for freeing a task: */ -TRACE_EVENT(sched_process_exit, +DEFINE_EVENT(sched_process_template, sched_process_free, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); + - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); +/* + * Tracepoint for a task exiting: + */ +DEFINE_EVENT(sched_process_template, sched_process_exit, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); /* * Tracepoint for a waiting task: @@ -287,7 +255,7 @@ TRACE_EVENT(sched_process_wait, __entry->prio = current->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -314,46 +282,16 @@ TRACE_EVENT(sched_process_fork, __entry->child_pid = child->pid; ), - TP_printk("parent %s:%d child %s:%d", + TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", __entry->parent_comm, __entry->parent_pid, __entry->child_comm, __entry->child_pid) ); /* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid) -); - -/* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ - -/* - * Tracepoint for accounting wait time (time the task is runnable - * but not actually running due to scheduler contention). - */ -TRACE_EVENT(sched_stat_wait, +DECLARE_EVENT_CLASS(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), @@ -374,11 +312,36 @@ TRACE_EVENT(sched_stat_wait, __perf_count(delay); ), - TP_printk("task: %s:%d wait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); + +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_wait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + +/* + * Tracepoint for accounting sleep time (time the task is not runnable, + * including iowait, see below). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + +/* + * Tracepoint for accounting iowait time (time the task is not runnable + * due to waiting on IO to complete). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). @@ -406,72 +369,12 @@ TRACE_EVENT(sched_stat_runtime, __perf_count(runtime); ), - TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]", + TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->runtime, (unsigned long long)__entry->vruntime) ); -/* - * Tracepoint for accounting sleep time (time the task is not runnable, - * including iowait, see below). - */ -TRACE_EVENT(sched_stat_sleep, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("task: %s:%d sleep: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - -/* - * Tracepoint for accounting iowait time (time the task is not runnable - * due to waiting on IO to complete). - */ -TRACE_EVENT(sched_stat_iowait, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("task: %s:%d iowait: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h new file mode 100644 index 00000000000..a510b75ac30 --- /dev/null +++ b/include/trace/events/signal.h @@ -0,0 +1,173 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM signal + +#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SIGNAL_H + +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/tracepoint.h> + +#define TP_STORE_SIGINFO(__entry, info) \ + do { \ + if (info == SEND_SIG_NOINFO) { \ + __entry->errno = 0; \ + __entry->code = SI_USER; \ + } else if (info == SEND_SIG_PRIV) { \ + __entry->errno = 0; \ + __entry->code = SI_KERNEL; \ + } else { \ + __entry->errno = info->si_errno; \ + __entry->code = info->si_code; \ + } \ + } while (0) + +/** + * signal_generate - called when a signal is generated + * @sig: signal number + * @info: pointer to struct siginfo + * @task: pointer to struct task_struct + * + * Current process sends a 'sig' signal to 'task' process with + * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, + * 'info' is not a pointer and you can't access its field. Instead, + * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV + * means that si_code is SI_KERNEL. + */ +TRACE_EVENT(signal_generate, + + TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), + + TP_ARGS(sig, info, task), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + ), + + TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", + __entry->sig, __entry->errno, __entry->code, + __entry->comm, __entry->pid) +); + +/** + * signal_deliver - called when a signal is delivered + * @sig: signal number + * @info: pointer to struct siginfo + * @ka: pointer to struct k_sigaction + * + * A 'sig' signal is delivered to current process with 'info' siginfo, + * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or + * SIG_DFL. + * Note that some signals reported by signal_generate tracepoint can be + * lost, ignored or modified (by debugger) before hitting this tracepoint. + * This means, this can show which signals are actually delivered, but + * matching generated signals and delivered signals may not be correct. + */ +TRACE_EVENT(signal_deliver, + + TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka), + + TP_ARGS(sig, info, ka), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __field( unsigned long, sa_handler ) + __field( unsigned long, sa_flags ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + __entry->sa_handler = (unsigned long)ka->sa.sa_handler; + __entry->sa_flags = ka->sa.sa_flags; + ), + + TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx", + __entry->sig, __entry->errno, __entry->code, + __entry->sa_handler, __entry->sa_flags) +); + +/** + * signal_overflow_fail - called when signal queue is overflow + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel fails to generate 'sig' signal with 'info' siginfo, because + * siginfo queue is overflow, and the signal is dropped. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of RT signals. + */ +TRACE_EVENT(signal_overflow_fail, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); + +/** + * signal_lose_info - called when siginfo is lost + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo + * queue is overflow. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of non-RT signals. + */ +TRACE_EVENT(signal_lose_info, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); +#endif /* _TRACE_SIGNAL_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 397dff2dbd5..fb726ac7cae 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM -#define TRACE_SYSTEM syscalls +#define TRACE_SYSTEM raw_syscalls +#define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_EVENTS_SYSCALLS_H diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 1844c48d640..e5ce87a0498 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -26,7 +26,7 @@ TRACE_EVENT(timer_init, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -54,7 +54,7 @@ TRACE_EVENT(timer_start, __entry->now = jiffies; ), - TP_printk("timer %p: func %pf, expires %lu, timeout %ld", + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", __entry->timer, __entry->function, __entry->expires, (long)__entry->expires - __entry->now) ); @@ -81,7 +81,7 @@ TRACE_EVENT(timer_expire_entry, __entry->now = jiffies; ), - TP_printk("timer %p: now %lu", __entry->timer, __entry->now) + TP_printk("timer=%p now=%lu", __entry->timer, __entry->now) ); /** @@ -108,7 +108,7 @@ TRACE_EVENT(timer_expire_exit, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -129,7 +129,7 @@ TRACE_EVENT(timer_cancel, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -140,24 +140,24 @@ TRACE_EVENT(timer_cancel, */ TRACE_EVENT(hrtimer_init, - TP_PROTO(struct hrtimer *timer, clockid_t clockid, + TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, enum hrtimer_mode mode), - TP_ARGS(timer, clockid, mode), + TP_ARGS(hrtimer, clockid, mode), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( clockid_t, clockid ) __field( enum hrtimer_mode, mode ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; __entry->clockid = clockid; __entry->mode = mode; ), - TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer, + TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, __entry->clockid == CLOCK_REALTIME ? "CLOCK_REALTIME" : "CLOCK_MONOTONIC", __entry->mode == HRTIMER_MODE_ABS ? @@ -170,26 +170,26 @@ TRACE_EVENT(hrtimer_init, */ TRACE_EVENT(hrtimer_start, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( void *, function ) __field( s64, expires ) __field( s64, softexpires ) ), TP_fast_assign( - __entry->timer = timer; - __entry->function = timer->function; - __entry->expires = hrtimer_get_expires(timer).tv64; - __entry->softexpires = hrtimer_get_softexpires(timer).tv64; + __entry->hrtimer = hrtimer; + __entry->function = hrtimer->function; + __entry->expires = hrtimer_get_expires(hrtimer).tv64; + __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64; ), - TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu", - __entry->timer, __entry->function, + TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", + __entry->hrtimer, __entry->function, (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->expires }), (unsigned long long)ktime_to_ns((ktime_t) { @@ -206,23 +206,22 @@ TRACE_EVENT(hrtimer_start, */ TRACE_EVENT(hrtimer_expire_entry, - TP_PROTO(struct hrtimer *timer, ktime_t *now), + TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), - TP_ARGS(timer, now), + TP_ARGS(hrtimer, now), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( s64, now ) ), TP_fast_assign( - __entry->timer = timer; - __entry->now = now->tv64; + __entry->hrtimer = hrtimer; + __entry->now = now->tv64; ), - TP_printk("hrtimer %p, now %llu", __entry->timer, - (unsigned long long)ktime_to_ns((ktime_t) { - .tv64 = __entry->now })) + TP_printk("hrtimer=%p now=%llu", __entry->hrtimer, + (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) ); /** @@ -234,40 +233,40 @@ TRACE_EVENT(hrtimer_expire_entry, */ TRACE_EVENT(hrtimer_expire_exit, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** * hrtimer_cancel - called when the hrtimer is canceled - * @timer: pointer to struct hrtimer + * @hrtimer: pointer to struct hrtimer */ TRACE_EVENT(hrtimer_cancel, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** @@ -302,7 +301,7 @@ TRACE_EVENT(itimer_state, __entry->interval_usec = value->it_interval.tv_usec; ), - TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu", + TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", __entry->which, __entry->expires, __entry->value_sec, __entry->value_usec, __entry->interval_sec, __entry->interval_usec) @@ -332,7 +331,7 @@ TRACE_EVENT(itimer_expire, __entry->pid = pid_nr(pid); ), - TP_printk("which %d, pid %d, now %lu", __entry->which, + TP_printk("which=%d pid=%d now=%lu", __entry->which, (int) __entry->pid, __entry->now) ); diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index e4612dbd7ba..d6c974474e7 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -8,7 +8,7 @@ #include <linux/sched.h> #include <linux/tracepoint.h> -TRACE_EVENT(workqueue_insertion, +DECLARE_EVENT_CLASS(workqueue, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), @@ -30,26 +30,18 @@ TRACE_EVENT(workqueue_insertion, __entry->thread_pid, __entry->func) ); -TRACE_EVENT(workqueue_execution, +DEFINE_EVENT(workqueue, workqueue_insertion, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work), + TP_ARGS(wq_thread, work) +); - TP_STRUCT__entry( - __array(char, thread_comm, TASK_COMM_LEN) - __field(pid_t, thread_pid) - __field(work_func_t, func) - ), +DEFINE_EVENT(workqueue, workqueue_execution, - TP_fast_assign( - memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); - __entry->thread_pid = wq_thread->pid; - __entry->func = work->func; - ), + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_printk("thread=%s:%d func=%pf", __entry->thread_comm, - __entry->thread_pid, __entry->func) + TP_ARGS(wq_thread, work) ); /* Trace the creation of one workqueue thread on a cpu */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc0d9667e18..d1b3de9c1a7 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,6 +18,26 @@ #include <linux/ftrace_event.h> +/* + * DECLARE_EVENT_CLASS can be used to add a generic function + * handlers for events. That is, if all events have the same + * parameters and just have distinct trace points. + * Each tracepoint can be defined with DEFINE_EVENT and that + * will map the DECLARE_EVENT_CLASS to the tracepoint. + * + * TRACE_EVENT is a one to one mapping between tracepoint and template. + */ +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + DECLARE_EVENT_CLASS(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)); \ + DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); + + #undef __field #define __field(type, item) type item; @@ -36,15 +56,21 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - char __data[0]; \ - }; \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + char __data[0]; \ + }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #undef __cpparg #define __cpparg(arg...) arg @@ -89,12 +115,19 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -120,9 +153,10 @@ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; @@ -132,19 +166,21 @@ #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), \ __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item)); \ + (unsigned int)sizeof(field.__data_loc_##item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; @@ -159,7 +195,7 @@ #undef __get_str #undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) +#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args) #undef TP_fast_assign #define TP_fast_assign(args...) args @@ -167,17 +203,50 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ +ftrace_format_setup_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct ftrace_raw_##call field __attribute__((unused)); \ int ret = 0; \ \ tstruct; \ \ + return ret; \ +} \ + \ +static int \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##call(unused, s); \ + if (!ret) \ + return ret; \ + \ + ret = trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ +static int \ +ftrace_format_##name(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##template(unused, s); \ + if (!ret) \ + return ret; \ + \ trace_seq_printf(s, "\nprint fmt: " print); \ \ return ret; \ @@ -252,10 +321,11 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ -ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +ftrace_raw_output_id_##call(int event_id, const char *name, \ + struct trace_iterator *iter, int flags) \ { \ struct trace_seq *s = &iter->seq; \ struct ftrace_raw_##call *field; \ @@ -265,6 +335,47 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ entry = iter->ent; \ \ + if (entry->type != event_id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + p = &get_cpu_var(ftrace_event_seq); \ + trace_seq_init(p); \ + ret = trace_seq_printf(s, "%s: ", name); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ + put_cpu(); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ +static enum print_line_t \ +ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ +{ \ + return ftrace_raw_output_id_##template(event_##name.id, \ + #name, iter, flags); \ +} + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ +static enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##template *field; \ + struct trace_entry *entry; \ + struct trace_seq *p; \ + int ret; \ + \ + entry = iter->ent; \ + \ if (entry->type != event_##call.id) { \ WARN_ON_ONCE(1); \ return TRACE_TYPE_UNHANDLED; \ @@ -274,14 +385,16 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ p = &get_cpu_var(ftrace_event_seq); \ trace_seq_init(p); \ - ret = trace_seq_printf(s, #call ": " print); \ + ret = trace_seq_printf(s, "%s: ", #call); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ put_cpu(); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -315,8 +428,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -332,6 +445,13 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -358,10 +478,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ __data_size += (len) * sizeof(type); #undef __string -#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ +#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -373,6 +493,13 @@ static inline int ftrace_get_offsets_##call( \ return __data_size; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -394,21 +521,28 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ \ -static void ftrace_profile_##call(proto); \ +static void ftrace_profile_##name(proto); \ \ -static int ftrace_profile_enable_##call(void) \ +static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\ { \ - return register_trace_##call(ftrace_profile_##call); \ + return register_trace_##name(ftrace_profile_##name); \ } \ \ -static void ftrace_profile_disable_##call(void) \ +static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ { \ - unregister_trace_##call(ftrace_profile_##call); \ + unregister_trace_##name(ftrace_profile_##name); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif @@ -423,7 +557,7 @@ static void ftrace_profile_disable_##call(void) \ * event_trace_printk(_RET_IP_, "<call>: " <fmt>); * } * - * static int ftrace_reg_event_<call>(void) + * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused) * { * int ret; * @@ -434,7 +568,7 @@ static void ftrace_profile_disable_##call(void) \ * return ret; * } * - * static void ftrace_unreg_event_<call>(void) + * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused) * { * unregister_trace_<call>(ftrace_event_<call>); * } @@ -469,7 +603,7 @@ static void ftrace_profile_disable_##call(void) \ * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); * } * - * static int ftrace_raw_reg_event_<call>(void) + * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused) * { * int ret; * @@ -480,7 +614,7 @@ static void ftrace_profile_disable_##call(void) \ * return ret; * } * - * static void ftrace_unreg_event_<call>(void) + * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused) * { * unregister_trace_<call>(ftrace_raw_event_<call>); * } @@ -489,7 +623,7 @@ static void ftrace_profile_disable_##call(void) \ * .trace = ftrace_raw_output_<call>, <-- stage 2 * }; * - * static int ftrace_raw_init_event_<call>(void) + * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused) * { * int id; * @@ -547,15 +681,13 @@ static void ftrace_profile_disable_##call(void) \ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ \ -static struct ftrace_event_call event_##call; \ - \ -static void ftrace_raw_event_##call(proto) \ +static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ struct ring_buffer *buffer; \ @@ -569,7 +701,7 @@ static void ftrace_raw_event_##call(proto) \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ event = trace_current_buffer_lock_reserve(&buffer, \ - event_##call.id, \ + event_call->id, \ sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ @@ -584,9 +716,17 @@ static void ftrace_raw_event_##call(proto) \ if (!filter_current_check_discard(buffer, event_call, entry, event)) \ trace_nowake_buffer_unlock_commit(buffer, \ event, irq_flags, pc); \ +} + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + ftrace_raw_event_id_##template(&event_##call, args); \ } \ \ -static int ftrace_raw_reg_event_##call(void *ptr) \ +static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ { \ int ret; \ \ @@ -597,7 +737,7 @@ static int ftrace_raw_reg_event_##call(void *ptr) \ return ret; \ } \ \ -static void ftrace_raw_unreg_event_##call(void *ptr) \ +static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ { \ unregister_trace_##call(ftrace_raw_event_##call); \ } \ @@ -606,7 +746,7 @@ static struct trace_event ftrace_event_type_##call = { \ .trace = ftrace_raw_output_##call, \ }; \ \ -static int ftrace_raw_init_event_##call(void) \ +static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ { \ int id; \ \ @@ -616,7 +756,36 @@ static int ftrace_raw_init_event_##call(void) \ event_##call.id = id; \ INIT_LIST_HEAD(&event_##call.fields); \ return 0; \ -} \ +} + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##template, \ + .define_fields = ftrace_define_fields_##template, \ + _TRACE_PROFILE_INIT(call) \ +} + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -628,7 +797,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ - .define_fields = ftrace_define_fields_##call, \ + .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } @@ -646,6 +815,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * struct ftrace_event_call *event_call = &event_<call>; * extern void perf_tp_event(int, u64, u64, void *, int); * struct ftrace_raw_##call *entry; + * struct perf_trace_buf *trace_buf; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; * struct trace_entry *ent; @@ -670,14 +840,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * __cpu = smp_processor_id(); * * if (in_nmi()) - * raw_data = rcu_dereference(trace_profile_buf_nmi); + * trace_buf = rcu_dereference(perf_trace_buf_nmi); * else - * raw_data = rcu_dereference(trace_profile_buf); + * trace_buf = rcu_dereference(perf_trace_buf); * - * if (!raw_data) + * if (!trace_buf) * goto end; * - * raw_data = per_cpu_ptr(raw_data, __cpu); + * trace_buf = per_cpu_ptr(trace_buf, __cpu); + * + * // Avoid recursion from perf that could mess up the buffer + * if (trace_buf->recursion++) + * goto end_recursion; + * + * raw_data = trace_buf->buf; + * + * // Make recursion update visible before entering perf_tp_event + * // so that we protect from perf recursions. + * + * barrier(); * * //zero dead bytes from alignment to avoid stack leak to userspace: * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; @@ -704,21 +885,26 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -static void ftrace_profile_##call(proto) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static void \ +ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - struct ftrace_event_call *event_call = &event_##call; \ - extern void perf_tp_event(int, u64, u64, void *, int); \ + extern int perf_swevent_get_recursion_context(void); \ + extern void perf_swevent_put_recursion_context(int rctx); \ + extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ + char *trace_buf; \ char *raw_data; \ int __cpu; \ + int rctx; \ int pc; \ \ pc = preempt_count(); \ @@ -733,17 +919,22 @@ static void ftrace_profile_##call(proto) \ return; \ \ local_irq_save(irq_flags); \ + \ + rctx = perf_swevent_get_recursion_context(); \ + if (rctx < 0) \ + goto end_recursion; \ + \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ - raw_data = rcu_dereference(trace_profile_buf_nmi); \ + trace_buf = rcu_dereference(perf_trace_buf_nmi); \ else \ - raw_data = rcu_dereference(trace_profile_buf); \ + trace_buf = rcu_dereference(perf_trace_buf); \ \ - if (!raw_data) \ + if (!trace_buf) \ goto end; \ \ - raw_data = per_cpu_ptr(raw_data, __cpu); \ + raw_data = per_cpu_ptr(trace_buf, __cpu); \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -759,10 +950,25 @@ static void ftrace_profile_##call(proto) \ __entry_size); \ \ end: \ + perf_swevent_put_recursion_context(rctx); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + struct ftrace_event_call *event_call = &event_##call; \ + \ + ftrace_profile_templ_##template(event_call, args); \ +} + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ diff --git a/include/trace/power.h b/include/trace/power.h deleted file mode 100644 index ef204666e98..00000000000 --- a/include/trace/power.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _TRACE_POWER_H -#define _TRACE_POWER_H - -#include <linux/ktime.h> -#include <linux/tracepoint.h> - -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -struct power_trace { - ktime_t stamp; - ktime_t end; - int type; - int state; -}; - -DECLARE_TRACE(power_start, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); - -DECLARE_TRACE(power_mark, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); - -DECLARE_TRACE(power_end, - TP_PROTO(struct power_trace *it), - TP_ARGS(it)); - -#endif /* _TRACE_POWER_H */ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283ba5ae..961fda3556b 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -12,51 +12,48 @@ * A syscall entry in the ftrace syscalls array. * * @name: name of the syscall + * @syscall_nr: number of the syscall * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) - * @enter_id: associated ftrace enter event id - * @exit_id: associated ftrace exit event id * @enter_event: associated syscall_enter trace event * @exit_event: associated syscall_exit trace event */ struct syscall_metadata { const char *name; + int syscall_nr; int nb_args; const char **types; const char **args; - int enter_id; - int exit_id; struct ftrace_event_call *enter_event; struct ftrace_event_call *exit_event; }; #ifdef CONFIG_FTRACE_SYSCALLS -extern struct syscall_metadata *syscall_nr_to_meta(int nr); -extern int syscall_name_to_nr(char *name); -void set_syscall_enter_id(int num, int id); -void set_syscall_exit_id(int num, int id); -extern struct trace_event event_syscall_enter; -extern struct trace_event event_syscall_exit; -extern int reg_event_syscall_enter(void *ptr); -extern void unreg_event_syscall_enter(void *ptr); -extern int reg_event_syscall_exit(void *ptr); -extern void unreg_event_syscall_exit(void *ptr); +extern unsigned long arch_syscall_addr(int nr); +extern int init_syscall_trace(struct ftrace_event_call *call); + extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_enter_define_fields(struct ftrace_event_call *call); extern int syscall_exit_define_fields(struct ftrace_event_call *call); +extern int reg_event_syscall_enter(struct ftrace_event_call *call); +extern void unreg_event_syscall_enter(struct ftrace_event_call *call); +extern int reg_event_syscall_exit(struct ftrace_event_call *call); +extern void unreg_event_syscall_exit(struct ftrace_event_call *call); +extern int +ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif #ifdef CONFIG_EVENT_PROFILE -int reg_prof_syscall_enter(char *name); -void unreg_prof_syscall_enter(char *name); -int reg_prof_syscall_exit(char *name); -void unreg_prof_syscall_exit(char *name); +int prof_sysenter_enable(struct ftrace_event_call *call); +void prof_sysenter_disable(struct ftrace_event_call *call); +int prof_sysexit_enable(struct ftrace_event_call *call); +void prof_sysexit_disable(struct ftrace_event_call *call); #endif diff --git a/init/Kconfig b/init/Kconfig index ab5c64801fe..9ee77829475 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -334,6 +334,15 @@ config TREE_PREEMPT_RCU is also required. It also scales down nicely to smaller systems. +config TINY_RCU + bool "UP-only small-memory-footprint RCU" + depends on !SMP + help + This option selects the RCU implementation that is + designed for UP systems from which real-time response + is not required. This option greatly reduces the + memory footprint of RCU. + endchoice config RCU_TRACE @@ -606,7 +615,7 @@ config SYSFS_DEPRECATED bool config SYSFS_DEPRECATED_V2 - bool "remove sysfs features which may confuse old userspace tools" + bool "enable deprecated sysfs features which may confuse old userspace tools" depends on SYSFS default n select SYSFS_DEPRECATED @@ -1098,12 +1107,12 @@ config SLOW_WORK See Documentation/slow-work.txt. -config SLOW_WORK_PROC - bool "Slow work debugging through /proc" +config SLOW_WORK_DEBUG + bool "Slow work debugging through debugfs" default n - depends on SLOW_WORK && PROC_FS + depends on SLOW_WORK && DEBUG_FS help - Display the contents of the slow work run queue through /proc, + Display the contents of the slow work run queue through debugfs, including items currently executing. See Documentation/slow-work.txt. @@ -1220,3 +1229,4 @@ source "block/Kconfig" config PREEMPT_NOTIFIERS bool +source "kernel/Kconfig.locks" diff --git a/init/main.c b/init/main.c index 5988debfc50..4051d75dd2d 100644 --- a/init/main.c +++ b/init/main.c @@ -251,7 +251,7 @@ early_param("loglevel", loglevel); /* * Unknown boot options get handed to init, unless they look like - * failed parameters + * unused parameters (modprobe will find them in /proc/cmdline). */ static int __init unknown_bootoption(char *param, char *val) { @@ -272,14 +272,9 @@ static int __init unknown_bootoption(char *param, char *val) if (obsolete_checksetup(param)) return 0; - /* - * Preemptive maintenance for "why didn't my misspelled command - * line work?" - */ - if (strchr(param, '.') && (!val || strchr(param, '.') < val)) { - printk(KERN_ERR "Unknown boot option `%s': ignoring\n", param); + /* Unused module parameter. */ + if (strchr(param, '.') && (!val || strchr(param, '.') < val)) return 0; - } if (panic_later) return 0; diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks new file mode 100644 index 00000000000..88c92fb4461 --- /dev/null +++ b/kernel/Kconfig.locks @@ -0,0 +1,202 @@ +# +# The ARCH_INLINE foo is necessary because select ignores "depends on" +# +config ARCH_INLINE_SPIN_TRYLOCK + bool + +config ARCH_INLINE_SPIN_TRYLOCK_BH + bool + +config ARCH_INLINE_SPIN_LOCK + bool + +config ARCH_INLINE_SPIN_LOCK_BH + bool + +config ARCH_INLINE_SPIN_LOCK_IRQ + bool + +config ARCH_INLINE_SPIN_LOCK_IRQSAVE + bool + +config ARCH_INLINE_SPIN_UNLOCK + bool + +config ARCH_INLINE_SPIN_UNLOCK_BH + bool + +config ARCH_INLINE_SPIN_UNLOCK_IRQ + bool + +config ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + bool + + +config ARCH_INLINE_READ_TRYLOCK + bool + +config ARCH_INLINE_READ_LOCK + bool + +config ARCH_INLINE_READ_LOCK_BH + bool + +config ARCH_INLINE_READ_LOCK_IRQ + bool + +config ARCH_INLINE_READ_LOCK_IRQSAVE + bool + +config ARCH_INLINE_READ_UNLOCK + bool + +config ARCH_INLINE_READ_UNLOCK_BH + bool + +config ARCH_INLINE_READ_UNLOCK_IRQ + bool + +config ARCH_INLINE_READ_UNLOCK_IRQRESTORE + bool + + +config ARCH_INLINE_WRITE_TRYLOCK + bool + +config ARCH_INLINE_WRITE_LOCK + bool + +config ARCH_INLINE_WRITE_LOCK_BH + bool + +config ARCH_INLINE_WRITE_LOCK_IRQ + bool + +config ARCH_INLINE_WRITE_LOCK_IRQSAVE + bool + +config ARCH_INLINE_WRITE_UNLOCK + bool + +config ARCH_INLINE_WRITE_UNLOCK_BH + bool + +config ARCH_INLINE_WRITE_UNLOCK_IRQ + bool + +config ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + bool + +# +# lock_* functions are inlined when: +# - DEBUG_SPINLOCK=n and GENERIC_LOCKBREAK=n and ARCH_INLINE_*LOCK=y +# +# trylock_* functions are inlined when: +# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y +# +# unlock and unlock_irq functions are inlined when: +# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y +# or +# - DEBUG_SPINLOCK=n and PREEMPT=n +# +# unlock_bh and unlock_irqrestore functions are inlined when: +# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y +# + +config INLINE_SPIN_TRYLOCK + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK + +config INLINE_SPIN_TRYLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK_BH + +config INLINE_SPIN_LOCK + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK + +config INLINE_SPIN_LOCK_BH + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_SPIN_LOCK_BH + +config INLINE_SPIN_LOCK_IRQ + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_SPIN_LOCK_IRQ + +config INLINE_SPIN_LOCK_IRQSAVE + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_SPIN_LOCK_IRQSAVE + +config INLINE_SPIN_UNLOCK + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK) + +config INLINE_SPIN_UNLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH + +config INLINE_SPIN_UNLOCK_IRQ + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK_BH) + +config INLINE_SPIN_UNLOCK_IRQRESTORE + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + + +config INLINE_READ_TRYLOCK + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_TRYLOCK + +config INLINE_READ_LOCK + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK + +config INLINE_READ_LOCK_BH + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_READ_LOCK_BH + +config INLINE_READ_LOCK_IRQ + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_READ_LOCK_IRQ + +config INLINE_READ_LOCK_IRQSAVE + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_READ_LOCK_IRQSAVE + +config INLINE_READ_UNLOCK + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK) + +config INLINE_READ_UNLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_BH + +config INLINE_READ_UNLOCK_IRQ + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK_BH) + +config INLINE_READ_UNLOCK_IRQRESTORE + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_IRQRESTORE + + +config INLINE_WRITE_TRYLOCK + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_TRYLOCK + +config INLINE_WRITE_LOCK + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK + +config INLINE_WRITE_LOCK_BH + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_WRITE_LOCK_BH + +config INLINE_WRITE_LOCK_IRQ + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_WRITE_LOCK_IRQ + +config INLINE_WRITE_LOCK_IRQSAVE + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_WRITE_LOCK_IRQSAVE + +config INLINE_WRITE_UNLOCK + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK) + +config INLINE_WRITE_UNLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_BH + +config INLINE_WRITE_UNLOCK_IRQ + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK_BH) + +config INLINE_WRITE_UNLOCK_IRQRESTORE + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + +config MUTEX_SPIN_ON_OWNER + def_bool SMP && !DEBUG_MUTEXES && !HAVE_DEFAULT_NO_SPIN_MUTEXES diff --git a/kernel/Makefile b/kernel/Makefile index 776ffed1556..982c50e2ce5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,6 +21,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg +CFLAGS_REMOVE_perf_event.o = -pg endif obj-$(CONFIG_FREEZER) += freezer.o @@ -82,6 +83,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o +obj-$(CONFIG_TINY_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o @@ -94,8 +96,9 @@ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o -obj-$(CONFIG_SLOW_WORK_PROC) += slow-work-proc.o +obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is diff --git a/kernel/capability.c b/kernel/capability.c index 4e17041963f..7f876e60521 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -29,7 +29,6 @@ EXPORT_SYMBOL(__cap_empty_set); EXPORT_SYMBOL(__cap_full_set); EXPORT_SYMBOL(__cap_init_eff_set); -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES int file_caps_enabled = 1; static int __init file_caps_disable(char *str) @@ -38,7 +37,6 @@ static int __init file_caps_disable(char *str) return 1; } __setup("no_file_caps", file_caps_disable); -#endif /* * More recent versions of libcap are available from: @@ -169,8 +167,8 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) kernel_cap_t pE, pI, pP; ret = cap_validate_magic(header, &tocopy); - if (ret != 0) - return ret; + if ((dataptr == NULL) || (ret != 0)) + return ((dataptr == NULL) && (ret == -EINVAL)) ? 0 : ret; if (get_user(pid, &header->pid)) return -EFAULT; @@ -238,7 +236,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) { struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; - unsigned i, tocopy; + unsigned i, tocopy, copybytes; kernel_cap_t inheritable, permitted, effective; struct cred *new; int ret; @@ -255,8 +253,11 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) if (pid != 0 && pid != task_pid_vnr(current)) return -EPERM; - if (copy_from_user(&kdata, data, - tocopy * sizeof(struct __user_cap_data_struct))) + copybytes = tocopy * sizeof(struct __user_cap_data_struct); + if (copybytes > sizeof(kdata)) + return -EFAULT; + + if (copy_from_user(&kdata, data, copybytes)) return -EFAULT; for (i = 0; i < tocopy; i++) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b5cb469d254..3cf2183b472 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * element of the partition (one sched domain) to be passed to * partition_sched_domains(). */ -/* FIXME: see the FIXME in partition_sched_domains() */ -static int generate_sched_domains(struct cpumask **domains, +static int generate_sched_domains(cpumask_var_t **domains, struct sched_domain_attr **attributes) { LIST_HEAD(q); /* queue of cpusets to be scanned */ @@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains, struct cpuset **csa; /* array of all cpuset ptrs */ int csn; /* how many cpuset ptrs in csa so far */ int i, j, k; /* indices for partition finding loops */ - struct cpumask *doms; /* resulting partition; i.e. sched domains */ + cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ struct sched_domain_attr *dattr; /* attributes for custom domains */ int ndoms = 0; /* number of sched domains in result */ int nslot; /* next empty doms[] struct cpumask slot */ @@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains, /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { - doms = kmalloc(cpumask_size(), GFP_KERNEL); + ndoms = 1; + doms = alloc_sched_domains(ndoms); if (!doms) goto done; @@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains, *dattr = SD_ATTR_INIT; update_domain_attr_tree(dattr, &top_cpuset); } - cpumask_copy(doms, top_cpuset.cpus_allowed); + cpumask_copy(doms[0], top_cpuset.cpus_allowed); - ndoms = 1; goto done; } @@ -636,7 +635,7 @@ restart: * Now we know how many domains to create. * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. */ - doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL); + doms = alloc_sched_domains(ndoms); if (!doms) goto done; @@ -656,7 +655,7 @@ restart: continue; } - dp = doms + nslot; + dp = doms[nslot]; if (nslot == ndoms) { static int warnings = 10; @@ -718,7 +717,7 @@ done: static void do_rebuild_sched_domains(struct work_struct *unused) { struct sched_domain_attr *attr; - struct cpumask *doms; + cpumask_var_t *doms; int ndoms; get_online_cpus(); @@ -2052,7 +2051,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, unsigned long phase, void *unused_cpu) { struct sched_domain_attr *attr; - struct cpumask *doms; + cpumask_var_t *doms; int ndoms; switch (phase) { @@ -2537,15 +2536,9 @@ const struct file_operations proc_cpuset_operations = { }; #endif /* CONFIG_PROC_PID_CPUSET */ -/* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */ +/* Display task mems_allowed in /proc/<pid>/status file. */ void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) { - seq_printf(m, "Cpus_allowed:\t"); - seq_cpumask(m, &task->cpus_allowed); - seq_printf(m, "\n"); - seq_printf(m, "Cpus_allowed_list:\t"); - seq_cpumask_list(m, &task->cpus_allowed); - seq_printf(m, "\n"); seq_printf(m, "Mems_allowed:\t"); seq_nodemask(m, &task->mems_allowed); seq_printf(m, "\n"); diff --git a/kernel/exit.c b/kernel/exit.c index f7864ac2ecc..80ae941cfd2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -49,6 +49,7 @@ #include <linux/init_task.h> #include <linux/perf_event.h> #include <trace/events/sched.h> +#include <linux/hw_breakpoint.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -110,9 +111,9 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime = cputime_add(sig->utime, task_utime(tsk)); - sig->stime = cputime_add(sig->stime, task_stime(tsk)); - sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); + sig->utime = cputime_add(sig->utime, tsk->utime); + sig->stime = cputime_add(sig->stime, tsk->stime); + sig->gtime = cputime_add(sig->gtime, tsk->gtime); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; @@ -978,6 +979,10 @@ NORET_TYPE void do_exit(long code) proc_exit_connector(tsk); /* + * FIXME: do that only when needed, using sched_exit tracepoint + */ + flush_ptrace_hw_breakpoint(tsk); + /* * Flush inherited counters to the parent - before the parent * gets woken up by child-exit notifications. */ @@ -1205,6 +1210,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) struct signal_struct *psig; struct signal_struct *sig; unsigned long maxrss; + cputime_t tgutime, tgstime; /* * The resource counters for the group leader are in its @@ -1220,20 +1226,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) * need to protect the access to parent->signal fields, * as other threads in the parent group can be right * here reaping other children at the same time. + * + * We use thread_group_times() to get times for the thread + * group, which consolidates times for all threads in the + * group including the group leader. */ + thread_group_times(p, &tgutime, &tgstime); spin_lock_irq(&p->real_parent->sighand->siglock); psig = p->real_parent->signal; sig = p->signal; psig->cutime = cputime_add(psig->cutime, - cputime_add(p->utime, - cputime_add(sig->utime, - sig->cutime))); + cputime_add(tgutime, + sig->cutime)); psig->cstime = cputime_add(psig->cstime, - cputime_add(p->stime, - cputime_add(sig->stime, - sig->cstime))); + cputime_add(tgstime, + sig->cstime)); psig->cgtime = cputime_add(psig->cgtime, cputime_add(p->gtime, diff --git a/kernel/fork.c b/kernel/fork.c index 166b8c49257..3d6f121bbe8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -884,6 +884,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + sig->prev_utime = sig->prev_stime = cputime_zero; +#endif sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; @@ -1066,8 +1069,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; +#endif p->default_timer_slack_ns = current->timer_slack_ns; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index d4e84174740..0c642d51aac 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -144,7 +144,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) rcu_read_lock(); do_each_thread(g, t) { - if (!--max_count) + if (!max_count--) goto unlock; if (!--batch_count) { batch_count = HUNG_TASK_BATCHING; diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c new file mode 100644 index 00000000000..cf5ee162841 --- /dev/null +++ b/kernel/hw_breakpoint.c @@ -0,0 +1,423 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) IBM Corporation, 2009 + * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> + * + * Thanks to Ingo Molnar for his many suggestions. + * + * Authors: Alan Stern <stern@rowland.harvard.edu> + * K.Prasad <prasad@linux.vnet.ibm.com> + * Frederic Weisbecker <fweisbec@gmail.com> + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + * This file contains the arch-independent routines. + */ + +#include <linux/irqflags.h> +#include <linux/kallsyms.h> +#include <linux/notifier.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <linux/hw_breakpoint.h> + +/* + * Constraints data + */ + +/* Number of pinned cpu breakpoints in a cpu */ +static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); + +/* Number of pinned task breakpoints in a cpu */ +static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); + +/* Number of non-pinned cpu/task breakpoints in a cpu */ +static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); + +/* Gather the number of total pinned and un-pinned bp in a cpuset */ +struct bp_busy_slots { + unsigned int pinned; + unsigned int flexible; +}; + +/* Serialize accesses to the above constraints */ +static DEFINE_MUTEX(nr_bp_mutex); + +/* + * Report the maximum number of pinned breakpoints a task + * have in this cpu + */ +static unsigned int max_task_bp_pinned(int cpu) +{ + int i; + unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); + + for (i = HBP_NUM -1; i >= 0; i--) { + if (tsk_pinned[i] > 0) + return i + 1; + } + + return 0; +} + +/* + * Report the number of pinned/un-pinned breakpoints we have in + * a given cpu (cpu > -1) or in all of them (cpu = -1). + */ +static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) +{ + if (cpu >= 0) { + slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); + slots->pinned += max_task_bp_pinned(cpu); + slots->flexible = per_cpu(nr_bp_flexible, cpu); + + return; + } + + for_each_online_cpu(cpu) { + unsigned int nr; + + nr = per_cpu(nr_cpu_bp_pinned, cpu); + nr += max_task_bp_pinned(cpu); + + if (nr > slots->pinned) + slots->pinned = nr; + + nr = per_cpu(nr_bp_flexible, cpu); + + if (nr > slots->flexible) + slots->flexible = nr; + } +} + +/* + * Add a pinned breakpoint for the given task in our constraint table + */ +static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) +{ + int count = 0; + struct perf_event *bp; + struct perf_event_context *ctx = tsk->perf_event_ctxp; + unsigned int *tsk_pinned; + struct list_head *list; + unsigned long flags; + + if (WARN_ONCE(!ctx, "No perf context for this task")) + return; + + list = &ctx->event_list; + + spin_lock_irqsave(&ctx->lock, flags); + + /* + * The current breakpoint counter is not included in the list + * at the open() callback time + */ + list_for_each_entry(bp, list, event_entry) { + if (bp->attr.type == PERF_TYPE_BREAKPOINT) + count++; + } + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) + return; + + tsk_pinned = per_cpu(task_bp_pinned, cpu); + if (enable) { + tsk_pinned[count]++; + if (count > 0) + tsk_pinned[count-1]--; + } else { + tsk_pinned[count]--; + if (count > 0) + tsk_pinned[count-1]++; + } +} + +/* + * Add/remove the given breakpoint in our constraint table + */ +static void toggle_bp_slot(struct perf_event *bp, bool enable) +{ + int cpu = bp->cpu; + struct task_struct *tsk = bp->ctx->task; + + /* Pinned counter task profiling */ + if (tsk) { + if (cpu >= 0) { + toggle_bp_task_slot(tsk, cpu, enable); + return; + } + + for_each_online_cpu(cpu) + toggle_bp_task_slot(tsk, cpu, enable); + return; + } + + /* Pinned counter cpu profiling */ + if (enable) + per_cpu(nr_cpu_bp_pinned, bp->cpu)++; + else + per_cpu(nr_cpu_bp_pinned, bp->cpu)--; +} + +/* + * Contraints to check before allowing this new breakpoint counter: + * + * == Non-pinned counter == (Considered as pinned for now) + * + * - If attached to a single cpu, check: + * + * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) + * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM + * + * -> If there are already non-pinned counters in this cpu, it means + * there is already a free slot for them. + * Otherwise, we check that the maximum number of per task + * breakpoints (for this cpu) plus the number of per cpu breakpoint + * (for this cpu) doesn't cover every registers. + * + * - If attached to every cpus, check: + * + * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) + * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM + * + * -> This is roughly the same, except we check the number of per cpu + * bp for every cpu and we keep the max one. Same for the per tasks + * breakpoints. + * + * + * == Pinned counter == + * + * - If attached to a single cpu, check: + * + * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) + * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM + * + * -> Same checks as before. But now the nr_bp_flexible, if any, must keep + * one register at least (or they will never be fed). + * + * - If attached to every cpus, check: + * + * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) + * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM + */ +int reserve_bp_slot(struct perf_event *bp) +{ + struct bp_busy_slots slots = {0}; + int ret = 0; + + mutex_lock(&nr_bp_mutex); + + fetch_bp_busy_slots(&slots, bp->cpu); + + /* Flexible counters need to keep at least one slot */ + if (slots.pinned + (!!slots.flexible) == HBP_NUM) { + ret = -ENOSPC; + goto end; + } + + toggle_bp_slot(bp, true); + +end: + mutex_unlock(&nr_bp_mutex); + + return ret; +} + +void release_bp_slot(struct perf_event *bp) +{ + mutex_lock(&nr_bp_mutex); + + toggle_bp_slot(bp, false); + + mutex_unlock(&nr_bp_mutex); +} + + +int __register_perf_hw_breakpoint(struct perf_event *bp) +{ + int ret; + + ret = reserve_bp_slot(bp); + if (ret) + return ret; + + /* + * Ptrace breakpoints can be temporary perf events only + * meant to reserve a slot. In this case, it is created disabled and + * we don't want to check the params right now (as we put a null addr) + * But perf tools create events as disabled and we want to check + * the params for them. + * This is a quick hack that will be removed soon, once we remove + * the tmp breakpoints from ptrace + */ + if (!bp->attr.disabled || bp->callback == perf_bp_event) + ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); + + return ret; +} + +int register_perf_hw_breakpoint(struct perf_event *bp) +{ + bp->callback = perf_bp_event; + + return __register_perf_hw_breakpoint(bp); +} + +/** + * register_user_hw_breakpoint - register a hardware breakpoint for user space + * @attr: breakpoint attributes + * @triggered: callback to trigger when we hit the breakpoint + * @tsk: pointer to 'task_struct' of the process to which the address belongs + */ +struct perf_event * +register_user_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered, + struct task_struct *tsk) +{ + return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); +} +EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); + +/** + * modify_user_hw_breakpoint - modify a user-space hardware breakpoint + * @bp: the breakpoint structure to modify + * @attr: new breakpoint attributes + * @triggered: callback to trigger when we hit the breakpoint + * @tsk: pointer to 'task_struct' of the process to which the address belongs + */ +struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr, + perf_callback_t triggered, + struct task_struct *tsk) +{ + /* + * FIXME: do it without unregistering + * - We don't want to lose our slot + * - If the new bp is incorrect, don't lose the older one + */ + unregister_hw_breakpoint(bp); + + return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); +} +EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); + +/** + * unregister_hw_breakpoint - unregister a user-space hardware breakpoint + * @bp: the breakpoint structure to unregister + */ +void unregister_hw_breakpoint(struct perf_event *bp) +{ + if (!bp) + return; + perf_event_release_kernel(bp); +} +EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); + +/** + * register_wide_hw_breakpoint - register a wide breakpoint in the kernel + * @attr: breakpoint attributes + * @triggered: callback to trigger when we hit the breakpoint + * + * @return a set of per_cpu pointers to perf events + */ +struct perf_event ** +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered) +{ + struct perf_event **cpu_events, **pevent, *bp; + long err; + int cpu; + + cpu_events = alloc_percpu(typeof(*cpu_events)); + if (!cpu_events) + return ERR_PTR(-ENOMEM); + + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); + + *pevent = bp; + + if (IS_ERR(bp)) { + err = PTR_ERR(bp); + goto fail; + } + } + + return cpu_events; + +fail: + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + if (IS_ERR(*pevent)) + break; + unregister_hw_breakpoint(*pevent); + } + free_percpu(cpu_events); + /* return the error if any */ + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); + +/** + * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel + * @cpu_events: the per cpu set of events to unregister + */ +void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) +{ + int cpu; + struct perf_event **pevent; + + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + unregister_hw_breakpoint(*pevent); + } + free_percpu(cpu_events); +} +EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); + +static struct notifier_block hw_breakpoint_exceptions_nb = { + .notifier_call = hw_breakpoint_exceptions_notify, + /* we need to be notified first */ + .priority = 0x7fffffff +}; + +static int __init init_hw_breakpoint(void) +{ + return register_die_notifier(&hw_breakpoint_exceptions_nb); +} +core_initcall(init_hw_breakpoint); + + +struct pmu perf_ops_bp = { + .enable = arch_install_hw_breakpoint, + .disable = arch_uninstall_hw_breakpoint, + .read = hw_breakpoint_pmu_read, + .unthrottle = hw_breakpoint_pmu_unthrottle +}; diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c1660194d11..ba566c261ad 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -166,11 +166,11 @@ int set_irq_data(unsigned int irq, void *data) EXPORT_SYMBOL(set_irq_data); /** - * set_irq_data - set irq type data for an irq + * set_irq_msi - set MSI descriptor data for an irq * @irq: Interrupt number * @entry: Pointer to MSI descriptor data * - * Set the hardware irq controller data for an irq + * Set the MSI descriptor entry for an irq */ int set_irq_msi(unsigned int irq, struct msi_desc *entry) { @@ -590,7 +590,7 @@ out_unlock: } /** - * handle_percpu_IRQ - Per CPU local irq handler + * handle_percpu_irq - Per CPU local irq handler * @irq: the interrupt number * @desc: the interrupt description structure for this irq * diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 692363dd591..0832145fea9 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -136,7 +136,7 @@ out: static int default_affinity_open(struct inode *inode, struct file *file) { - return single_open(file, default_affinity_show, NULL); + return single_open(file, default_affinity_show, PDE(inode)->data); } static const struct file_operations default_affinity_proc_fops = { @@ -148,18 +148,28 @@ static const struct file_operations default_affinity_proc_fops = { }; #endif -static int irq_spurious_read(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int irq_spurious_proc_show(struct seq_file *m, void *v) { - struct irq_desc *desc = irq_to_desc((long) data); - return sprintf(page, "count %u\n" - "unhandled %u\n" - "last_unhandled %u ms\n", - desc->irq_count, - desc->irqs_unhandled, - jiffies_to_msecs(desc->last_unhandled)); + struct irq_desc *desc = irq_to_desc((long) m->private); + + seq_printf(m, "count %u\n" "unhandled %u\n" "last_unhandled %u ms\n", + desc->irq_count, desc->irqs_unhandled, + jiffies_to_msecs(desc->last_unhandled)); + return 0; +} + +static int irq_spurious_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_spurious_proc_show, NULL); } +static const struct file_operations irq_spurious_proc_fops = { + .open = irq_spurious_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + #define MAX_NAMELEN 128 static int name_unique(unsigned int irq, struct irqaction *new_action) @@ -204,7 +214,6 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq, struct irq_desc *desc) { char name [MAX_NAMELEN]; - struct proc_dir_entry *entry; if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir) return; @@ -214,6 +223,8 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) /* create /proc/irq/1234 */ desc->dir = proc_mkdir(name, root_irq_dir); + if (!desc->dir) + return; #ifdef CONFIG_SMP /* create /proc/irq/<irq>/smp_affinity */ @@ -221,11 +232,8 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) &irq_affinity_proc_fops, (void *)(long)irq); #endif - entry = create_proc_entry("spurious", 0444, desc->dir); - if (entry) { - entry->data = (void *)(long)irq; - entry->read_proc = irq_spurious_read; - } + proc_create_data("spurious", 0444, desc->dir, + &irq_spurious_proc_fops, (void *)(long)irq); } #undef MAX_NAMELEN diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index bd7273e6282..22b0a6eedf2 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -104,7 +104,7 @@ static int misrouted_irq(int irq) return ok; } -static void poll_all_shared_irqs(void) +static void poll_spurious_irqs(unsigned long dummy) { struct irq_desc *desc; int i; @@ -125,23 +125,11 @@ static void poll_all_shared_irqs(void) try_one_irq(i, desc); local_irq_enable(); } -} - -static void poll_spurious_irqs(unsigned long dummy) -{ - poll_all_shared_irqs(); mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL); } -#ifdef CONFIG_DEBUG_SHIRQ -void debug_poll_all_shared_irqs(void) -{ - poll_all_shared_irqs(); -} -#endif - /* * If 99,900 of the previous 100,000 interrupts have not been handled * then assume that the IRQ is stuck in some manner. Drop a diagnostic diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 8b6b8b697c6..8e5288a8a35 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name) } return module_kallsyms_lookup_name(name); } +EXPORT_SYMBOL_GPL(kallsyms_lookup_name); int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 9147a3190c9..7d701463402 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -870,7 +870,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks) /* * All threads that don't have debuggerinfo should be - * in __schedule() sleeping, since all other CPUs + * in schedule() sleeping, since all other CPUs * are in kgdb_wait, and thus have debuggerinfo. */ if (local_debuggerinfo) { diff --git a/kernel/kmod.c b/kernel/kmod.c index 9fcb53a11f8..25b10319036 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -80,16 +80,16 @@ int __request_module(bool wait, const char *fmt, ...) #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ static int kmod_loop_msg; - ret = security_kernel_module_request(); - if (ret) - return ret; - va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); if (ret >= MODULE_NAME_LEN) return -ENAMETOOLONG; + ret = security_kernel_module_request(module_name); + if (ret) + return ret; + /* If modprobe needs a service that is in a module, we get a recursive * loop. Limit the number of running kmod threads to max_threads/2 or * MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5240d75f4c6..e5342a344c4 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -90,6 +90,9 @@ static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) */ static struct kprobe_blackpoint kprobe_blacklist[] = { {"preempt_schedule",}, + {"native_get_debugreg",}, + {"irq_entries_start",}, + {"common_interrupt",}, {NULL} /* Terminator */ }; @@ -673,6 +676,40 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) return (kprobe_opcode_t *)(((char *)addr) + p->offset); } +/* Check passed kprobe is valid and return kprobe in kprobe_table. */ +static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) +{ + struct kprobe *old_p, *list_p; + + old_p = get_kprobe(p->addr); + if (unlikely(!old_p)) + return NULL; + + if (p != old_p) { + list_for_each_entry_rcu(list_p, &old_p->list, list) + if (list_p == p) + /* kprobe p is a valid probe */ + goto valid; + return NULL; + } +valid: + return old_p; +} + +/* Return error if the kprobe is being re-registered */ +static inline int check_kprobe_rereg(struct kprobe *p) +{ + int ret = 0; + struct kprobe *old_p; + + mutex_lock(&kprobe_mutex); + old_p = __get_valid_kprobe(p); + if (old_p) + ret = -EINVAL; + mutex_unlock(&kprobe_mutex); + return ret; +} + int __kprobes register_kprobe(struct kprobe *p) { int ret = 0; @@ -685,6 +722,10 @@ int __kprobes register_kprobe(struct kprobe *p) return -EINVAL; p->addr = addr; + ret = check_kprobe_rereg(p); + if (ret) + return ret; + preempt_disable(); if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr)) { @@ -754,26 +795,6 @@ out: } EXPORT_SYMBOL_GPL(register_kprobe); -/* Check passed kprobe is valid and return kprobe in kprobe_table. */ -static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) -{ - struct kprobe *old_p, *list_p; - - old_p = get_kprobe(p->addr); - if (unlikely(!old_p)) - return NULL; - - if (p != old_p) { - list_for_each_entry_rcu(list_p, &old_p->list, list) - if (list_p == p) - /* kprobe p is a valid probe */ - goto valid; - return NULL; - } -valid: - return old_p; -} - /* * Unregister a kprobe without a scheduler synchronization. */ @@ -1014,9 +1035,9 @@ int __kprobes register_kretprobe(struct kretprobe *rp) /* Pre-allocate memory for max kretprobe instances */ if (rp->maxactive <= 0) { #ifdef CONFIG_PREEMPT - rp->maxactive = max(10, 2 * NR_CPUS); + rp->maxactive = max(10, 2 * num_possible_cpus()); #else - rp->maxactive = NR_CPUS; + rp->maxactive = num_possible_cpus(); #endif } spin_lock_init(&rp->lock); @@ -1141,6 +1162,13 @@ static void __kprobes kill_kprobe(struct kprobe *p) arch_remove_kprobe(p); } +void __kprobes dump_kprobe(struct kprobe *kp) +{ + printk(KERN_WARNING "Dumping kprobe:\n"); + printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n", + kp->symbol_name, kp->addr, kp->offset); +} + /* Module notifier call back, checking kprobes on the module */ static int __kprobes kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 9af56723c09..f5dcd36d315 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -49,7 +49,7 @@ #include "lockdep_internals.h" #define CREATE_TRACE_POINTS -#include <trace/events/lockdep.h> +#include <trace/events/lock.h> #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; diff --git a/kernel/module.c b/kernel/module.c index 8b7d8805819..5842a71cf05 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1187,7 +1187,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, /* Count loaded sections and allocate structures */ for (i = 0; i < nsect; i++) - if (sechdrs[i].sh_flags & SHF_ALLOC) + if (sechdrs[i].sh_flags & SHF_ALLOC + && sechdrs[i].sh_size) nloaded++; size[0] = ALIGN(sizeof(*sect_attrs) + nloaded * sizeof(sect_attrs->attrs[0]), @@ -1207,6 +1208,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, for (i = 0; i < nsect; i++) { if (! (sechdrs[i].sh_flags & SHF_ALLOC)) continue; + if (!sechdrs[i].sh_size) + continue; sattr->address = sechdrs[i].sh_addr; sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, GFP_KERNEL); diff --git a/kernel/mutex.c b/kernel/mutex.c index 947b3ad551f..632f04c57d8 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -148,8 +148,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, preempt_disable(); mutex_acquire(&lock->dep_map, subclass, 0, ip); -#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && \ - !defined(CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES) + +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* * Optimistic spinning. * diff --git a/kernel/notifier.c b/kernel/notifier.c index 61d5aa5eced..acd24e7643e 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -558,7 +558,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier); static ATOMIC_NOTIFIER_HEAD(die_chain); -int notrace notify_die(enum die_val val, const char *str, +int notrace __kprobes notify_die(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) { struct die_args args = { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 7f29643c898..6b7ddba1dd6 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -28,6 +28,8 @@ #include <linux/anon_inodes.h> #include <linux/kernel_stat.h> #include <linux/perf_event.h> +#include <linux/ftrace_event.h> +#include <linux/hw_breakpoint.h> #include <asm/irq_regs.h> @@ -244,6 +246,49 @@ static void perf_unpin_context(struct perf_event_context *ctx) put_ctx(ctx); } +static inline u64 perf_clock(void) +{ + return cpu_clock(smp_processor_id()); +} + +/* + * Update the record of the current time in a context. + */ +static void update_context_time(struct perf_event_context *ctx) +{ + u64 now = perf_clock(); + + ctx->time += now - ctx->timestamp; + ctx->timestamp = now; +} + +/* + * Update the total_time_enabled and total_time_running fields for a event. + */ +static void update_event_times(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + u64 run_end; + + if (event->state < PERF_EVENT_STATE_INACTIVE || + event->group_leader->state < PERF_EVENT_STATE_INACTIVE) + return; + + if (ctx->is_active) + run_end = ctx->time; + else + run_end = event->tstamp_stopped; + + event->total_time_enabled = run_end - event->tstamp_enabled; + + if (event->state == PERF_EVENT_STATE_INACTIVE) + run_end = event->tstamp_stopped; + else + run_end = ctx->time; + + event->total_time_running = run_end - event->tstamp_running; +} + /* * Add a event from the lists for its context. * Must be called with ctx->mutex and ctx->lock held. @@ -292,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) if (event->group_leader != event) event->group_leader->nr_siblings--; + update_event_times(event); + + /* + * If event was in error state, then keep it + * that way, otherwise bogus counts will be + * returned on read(). The only way to get out + * of error state is by explicit re-enabling + * of the event + */ + if (event->state > PERF_EVENT_STATE_OFF) + event->state = PERF_EVENT_STATE_OFF; + /* * If this was a group event with sibling events then * upgrade the siblings to singleton events by adding them @@ -445,50 +502,11 @@ retry: * can remove the event safely, if the call above did not * succeed. */ - if (!list_empty(&event->group_entry)) { + if (!list_empty(&event->group_entry)) list_del_event(event, ctx); - } spin_unlock_irq(&ctx->lock); } -static inline u64 perf_clock(void) -{ - return cpu_clock(smp_processor_id()); -} - -/* - * Update the record of the current time in a context. - */ -static void update_context_time(struct perf_event_context *ctx) -{ - u64 now = perf_clock(); - - ctx->time += now - ctx->timestamp; - ctx->timestamp = now; -} - -/* - * Update the total_time_enabled and total_time_running fields for a event. - */ -static void update_event_times(struct perf_event *event) -{ - struct perf_event_context *ctx = event->ctx; - u64 run_end; - - if (event->state < PERF_EVENT_STATE_INACTIVE || - event->group_leader->state < PERF_EVENT_STATE_INACTIVE) - return; - - event->total_time_enabled = ctx->time - event->tstamp_enabled; - - if (event->state == PERF_EVENT_STATE_INACTIVE) - run_end = event->tstamp_stopped; - else - run_end = ctx->time; - - event->total_time_running = run_end - event->tstamp_running; -} - /* * Update total_time_enabled and total_time_running for all events in a group. */ @@ -1031,10 +1049,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx, update_context_time(ctx); perf_disable(); - if (ctx->nr_active) + if (ctx->nr_active) { list_for_each_entry(event, &ctx->group_list, group_entry) group_sched_out(event, cpuctx, ctx); - + } perf_enable(); out: spin_unlock(&ctx->lock); @@ -1059,8 +1077,6 @@ static int context_equiv(struct perf_event_context *ctx1, && !ctx1->pin_count && !ctx2->pin_count; } -static void __perf_event_read(void *event); - static void __perf_event_sync_stat(struct perf_event *event, struct perf_event *next_event) { @@ -1078,8 +1094,8 @@ static void __perf_event_sync_stat(struct perf_event *event, */ switch (event->state) { case PERF_EVENT_STATE_ACTIVE: - __perf_event_read(event); - break; + event->pmu->read(event); + /* fall-through */ case PERF_EVENT_STATE_INACTIVE: update_event_times(event); @@ -1118,6 +1134,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, if (!ctx->nr_stat) return; + update_context_time(ctx); + event = list_first_entry(&ctx->event_list, struct perf_event, event_entry); @@ -1161,8 +1179,6 @@ void perf_event_task_sched_out(struct task_struct *task, if (likely(!ctx || !cpuctx->task_ctx)) return; - update_context_time(ctx); - rcu_read_lock(); parent = rcu_dereference(ctx->parent_ctx); next_ctx = next->perf_event_ctxp; @@ -1515,7 +1531,6 @@ static void __perf_event_read(void *info) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; - unsigned long flags; /* * If this is a task context, we need to check whether it is @@ -1527,12 +1542,12 @@ static void __perf_event_read(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - local_irq_save(flags); - if (ctx->is_active) - update_context_time(ctx); - event->pmu->read(event); + spin_lock(&ctx->lock); + update_context_time(ctx); update_event_times(event); - local_irq_restore(flags); + spin_unlock(&ctx->lock); + + event->pmu->read(event); } static u64 perf_event_read(struct perf_event *event) @@ -1545,7 +1560,13 @@ static u64 perf_event_read(struct perf_event *event) smp_call_function_single(event->oncpu, __perf_event_read, event, 1); } else if (event->state == PERF_EVENT_STATE_INACTIVE) { + struct perf_event_context *ctx = event->ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->lock, flags); + update_context_time(ctx); update_event_times(event); + spin_unlock_irqrestore(&ctx->lock, flags); } return atomic64_read(&event->count); @@ -1658,6 +1679,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) return ERR_PTR(err); } +static void perf_event_free_filter(struct perf_event *event); + static void free_event_rcu(struct rcu_head *head) { struct perf_event *event; @@ -1665,6 +1688,7 @@ static void free_event_rcu(struct rcu_head *head) event = container_of(head, struct perf_event, rcu_head); if (event->ns) put_pid_ns(event->ns); + perf_event_free_filter(event); kfree(event); } @@ -1696,16 +1720,10 @@ static void free_event(struct perf_event *event) call_rcu(&event->rcu_head, free_event_rcu); } -/* - * Called when the last reference to the file is gone. - */ -static int perf_release(struct inode *inode, struct file *file) +int perf_event_release_kernel(struct perf_event *event) { - struct perf_event *event = file->private_data; struct perf_event_context *ctx = event->ctx; - file->private_data = NULL; - WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); perf_event_remove_from_context(event); @@ -1720,6 +1738,19 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } +EXPORT_SYMBOL_GPL(perf_event_release_kernel); + +/* + * Called when the last reference to the file is gone. + */ +static int perf_release(struct inode *inode, struct file *file) +{ + struct perf_event *event = file->private_data; + + file->private_data = NULL; + + return perf_event_release_kernel(event); +} static int perf_event_read_size(struct perf_event *event) { @@ -1746,91 +1777,94 @@ static int perf_event_read_size(struct perf_event *event) return size; } -static u64 perf_event_read_value(struct perf_event *event) +u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { struct perf_event *child; u64 total = 0; + *enabled = 0; + *running = 0; + + mutex_lock(&event->child_mutex); total += perf_event_read(event); - list_for_each_entry(child, &event->child_list, child_list) + *enabled += event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); + *running += event->total_time_running + + atomic64_read(&event->child_total_time_running); + + list_for_each_entry(child, &event->child_list, child_list) { total += perf_event_read(child); + *enabled += child->total_time_enabled; + *running += child->total_time_running; + } + mutex_unlock(&event->child_mutex); return total; } - -static int perf_event_read_entry(struct perf_event *event, - u64 read_format, char __user *buf) -{ - int n = 0, count = 0; - u64 values[2]; - - values[n++] = perf_event_read_value(event); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_event_id(event); - - count = n * sizeof(u64); - - if (copy_to_user(buf, values, count)) - return -EFAULT; - - return count; -} +EXPORT_SYMBOL_GPL(perf_event_read_value); static int perf_event_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event->group_leader, *sub; - int n = 0, size = 0, err = -EFAULT; - u64 values[3]; + int n = 0, size = 0, ret = -EFAULT; + struct perf_event_context *ctx = leader->ctx; + u64 values[5]; + u64 count, enabled, running; + + mutex_lock(&ctx->mutex); + count = perf_event_read_value(leader, &enabled, &running); values[n++] = 1 + leader->nr_siblings; - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = leader->total_time_enabled + - atomic64_read(&leader->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = leader->total_time_running + - atomic64_read(&leader->child_total_time_running); - } + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = enabled; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = running; + values[n++] = count; + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(leader); size = n * sizeof(u64); if (copy_to_user(buf, values, size)) - return -EFAULT; - - err = perf_event_read_entry(leader, read_format, buf + size); - if (err < 0) - return err; + goto unlock; - size += err; + ret = size; list_for_each_entry(sub, &leader->sibling_list, group_entry) { - err = perf_event_read_entry(sub, read_format, - buf + size); - if (err < 0) - return err; + n = 0; + + values[n++] = perf_event_read_value(sub, &enabled, &running); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(sub); + + size = n * sizeof(u64); - size += err; + if (copy_to_user(buf + ret, values, size)) { + ret = -EFAULT; + goto unlock; + } + + ret += size; } +unlock: + mutex_unlock(&ctx->mutex); - return size; + return ret; } static int perf_event_read_one(struct perf_event *event, u64 read_format, char __user *buf) { + u64 enabled, running; u64 values[4]; int n = 0; - values[n++] = perf_event_read_value(event); - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = event->total_time_enabled + - atomic64_read(&event->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = event->total_time_running + - atomic64_read(&event->child_total_time_running); - } + values[n++] = perf_event_read_value(event, &enabled, &running); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = enabled; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = running; if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); @@ -1861,12 +1895,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) return -ENOSPC; WARN_ON_ONCE(event->ctx->parent_ctx); - mutex_lock(&event->child_mutex); if (read_format & PERF_FORMAT_GROUP) ret = perf_event_read_group(event, read_format, buf); else ret = perf_event_read_one(event, read_format, buf); - mutex_unlock(&event->child_mutex); return ret; } @@ -1974,7 +2006,8 @@ unlock: return ret; } -int perf_event_set_output(struct perf_event *event, int output_fd); +static int perf_event_set_output(struct perf_event *event, int output_fd); +static int perf_event_set_filter(struct perf_event *event, void __user *arg); static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2002,6 +2035,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_EVENT_IOC_SET_OUTPUT: return perf_event_set_output(event, arg); + case PERF_EVENT_IOC_SET_FILTER: + return perf_event_set_filter(event, (void __user *)arg); + default: return -ENOTTY; } @@ -2174,6 +2210,7 @@ static void perf_mmap_data_free(struct perf_mmap_data *data) perf_mmap_free_page((unsigned long)data->user_page); for (i = 0; i < data->nr_pages; i++) perf_mmap_free_page((unsigned long)data->data_pages[i]); + kfree(data); } #else @@ -2214,6 +2251,7 @@ static void perf_mmap_data_free_work(struct work_struct *work) perf_mmap_unmark_page(base + (i * PAGE_SIZE)); vfree(base); + kfree(data); } static void perf_mmap_data_free(struct perf_mmap_data *data) @@ -2307,7 +2345,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) } if (!data->watermark) - data->watermark = max_t(long, PAGE_SIZE, max_size / 2); + data->watermark = max_size / 2; rcu_assign_pointer(event->data, data); @@ -2319,7 +2357,6 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head) data = container_of(rcu_head, struct perf_mmap_data, rcu_head); perf_mmap_data_free(data); - kfree(data); } static void perf_mmap_data_release(struct perf_event *event) @@ -2666,20 +2703,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle) static void perf_output_lock(struct perf_output_handle *handle) { struct perf_mmap_data *data = handle->data; - int cpu; + int cur, cpu = get_cpu(); handle->locked = 0; - local_irq_save(handle->flags); - cpu = smp_processor_id(); - - if (in_nmi() && atomic_read(&data->lock) == cpu) - return; + for (;;) { + cur = atomic_cmpxchg(&data->lock, -1, cpu); + if (cur == -1) { + handle->locked = 1; + break; + } + if (cur == cpu) + break; - while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) cpu_relax(); - - handle->locked = 1; + } } static void perf_output_unlock(struct perf_output_handle *handle) @@ -2725,7 +2763,7 @@ again: if (atomic_xchg(&data->wakeup, 0)) perf_output_wakeup(handle); out: - local_irq_restore(handle->flags); + put_cpu(); } void perf_output_copy(struct perf_output_handle *handle, @@ -3236,15 +3274,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, { struct perf_event *event; - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_task_match(event)) perf_event_task_output(event, task_event); } - rcu_read_unlock(); } static void perf_event_task_event(struct perf_task_event *task_event) @@ -3252,11 +3285,11 @@ static void perf_event_task_event(struct perf_task_event *task_event) struct perf_cpu_context *cpuctx; struct perf_event_context *ctx = task_event->task_ctx; + rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_task_ctx(&cpuctx->ctx, task_event); put_cpu_var(perf_cpu_context); - rcu_read_lock(); if (!ctx) ctx = rcu_dereference(task_event->task->perf_event_ctxp); if (ctx) @@ -3348,15 +3381,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx, { struct perf_event *event; - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_comm_match(event)) perf_event_comm_output(event, comm_event); } - rcu_read_unlock(); } static void perf_event_comm_event(struct perf_comm_event *comm_event) @@ -3367,7 +3395,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) char comm[TASK_COMM_LEN]; memset(comm, 0, sizeof(comm)); - strncpy(comm, comm_event->task->comm, sizeof(comm)); + strlcpy(comm, comm_event->task->comm, sizeof(comm)); size = ALIGN(strlen(comm)+1, sizeof(u64)); comm_event->comm = comm; @@ -3375,11 +3403,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; + rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_comm_ctx(&cpuctx->ctx, comm_event); put_cpu_var(perf_cpu_context); - rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. @@ -3472,15 +3500,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx, { struct perf_event *event; - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_mmap_match(event, mmap_event)) perf_event_mmap_output(event, mmap_event); } - rcu_read_unlock(); } static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) @@ -3536,11 +3559,11 @@ got_name: mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; + rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); put_cpu_var(perf_cpu_context); - rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. @@ -3679,7 +3702,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, perf_event_disable(event); } - perf_event_output(event, nmi, data, regs); + if (event->overflow_handler) + event->overflow_handler(event, nmi, data, regs); + else + perf_event_output(event, nmi, data, regs); + return ret; } @@ -3724,16 +3751,16 @@ again: return nr; } -static void perf_swevent_overflow(struct perf_event *event, +static void perf_swevent_overflow(struct perf_event *event, u64 overflow, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { struct hw_perf_event *hwc = &event->hw; int throttle = 0; - u64 overflow; data->period = event->hw.last_period; - overflow = perf_swevent_set_period(event); + if (!overflow) + overflow = perf_swevent_set_period(event); if (hwc->interrupts == MAX_INTERRUPTS) return; @@ -3766,14 +3793,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, atomic64_add(nr, &event->count); + if (!regs) + return; + if (!hwc->sample_period) return; - if (!regs) + if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) + return perf_swevent_overflow(event, 1, nmi, data, regs); + + if (atomic64_add_negative(nr, &hwc->period_left)) return; - if (!atomic64_add_negative(nr, &hwc->period_left)) - perf_swevent_overflow(event, nmi, data, regs); + perf_swevent_overflow(event, 0, nmi, data, regs); } static int perf_swevent_is_counting(struct perf_event *event) @@ -3806,25 +3838,44 @@ static int perf_swevent_is_counting(struct perf_event *event) return 1; } +static int perf_tp_event_match(struct perf_event *event, + struct perf_sample_data *data); + +static int perf_exclude_event(struct perf_event *event, + struct pt_regs *regs) +{ + if (regs) { + if (event->attr.exclude_user && user_mode(regs)) + return 1; + + if (event->attr.exclude_kernel && !user_mode(regs)) + return 1; + } + + return 0; +} + static int perf_swevent_match(struct perf_event *event, enum perf_type_id type, - u32 event_id, struct pt_regs *regs) + u32 event_id, + struct perf_sample_data *data, + struct pt_regs *regs) { if (!perf_swevent_is_counting(event)) return 0; if (event->attr.type != type) return 0; + if (event->attr.config != event_id) return 0; - if (regs) { - if (event->attr.exclude_user && user_mode(regs)) - return 0; + if (perf_exclude_event(event, regs)) + return 0; - if (event->attr.exclude_kernel && !user_mode(regs)) - return 0; - } + if (event->attr.type == PERF_TYPE_TRACEPOINT && + !perf_tp_event_match(event, data)) + return 0; return 1; } @@ -3837,49 +3888,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, { struct perf_event *event; - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { - if (perf_swevent_match(event, type, event_id, regs)) + if (perf_swevent_match(event, type, event_id, data, regs)) perf_swevent_add(event, nr, nmi, data, regs); } - rcu_read_unlock(); } -static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) +int perf_swevent_get_recursion_context(void) { + struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + int rctx; + if (in_nmi()) - return &cpuctx->recursion[3]; + rctx = 3; + else if (in_irq()) + rctx = 2; + else if (in_softirq()) + rctx = 1; + else + rctx = 0; + + if (cpuctx->recursion[rctx]) { + put_cpu_var(perf_cpu_context); + return -1; + } - if (in_irq()) - return &cpuctx->recursion[2]; + cpuctx->recursion[rctx]++; + barrier(); - if (in_softirq()) - return &cpuctx->recursion[1]; + return rctx; +} +EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); - return &cpuctx->recursion[0]; +void perf_swevent_put_recursion_context(int rctx) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + barrier(); + cpuctx->recursion[rctx]--; + put_cpu_var(perf_cpu_context); } +EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); static void do_perf_sw_event(enum perf_type_id type, u32 event_id, u64 nr, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - int *recursion = perf_swevent_recursion_context(cpuctx); + struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; - if (*recursion) - goto out; - - (*recursion)++; - barrier(); - + cpuctx = &__get_cpu_var(perf_cpu_context); + rcu_read_lock(); perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, nr, nmi, data, regs); - rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. @@ -3888,23 +3949,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, if (ctx) perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); rcu_read_unlock(); - - barrier(); - (*recursion)--; - -out: - put_cpu_var(perf_cpu_context); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { - struct perf_sample_data data = { - .addr = addr, - }; + struct perf_sample_data data; + int rctx; - do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, - &data, regs); + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) + return; + + data.addr = addr; + data.raw = NULL; + + do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); + + perf_swevent_put_recursion_context(rctx); } static void perf_swevent_read(struct perf_event *event) @@ -3949,6 +4011,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) event->pmu->read(event); data.addr = 0; + data.period = event->hw.last_period; regs = get_irq_regs(); /* * In case we exclude kernel IPs or are somehow not in interrupt @@ -4108,6 +4171,7 @@ static const struct pmu perf_ops_task_clock = { }; #ifdef CONFIG_EVENT_PROFILE + void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size) { @@ -4126,13 +4190,21 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, if (!regs) regs = task_pt_regs(current); + /* Trace events already protected against recursion */ do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); -extern int ftrace_profile_enable(int); -extern void ftrace_profile_disable(int); +static int perf_tp_event_match(struct perf_event *event, + struct perf_sample_data *data) +{ + void *record = data->raw->data; + + if (likely(!event->filter) || filter_match_preds(event->filter, record)) + return 1; + return 0; +} static void tp_perf_event_destroy(struct perf_event *event) { @@ -4157,11 +4229,99 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) return &perf_ops_generic; } + +static int perf_event_set_filter(struct perf_event *event, void __user *arg) +{ + char *filter_str; + int ret; + + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -EINVAL; + + filter_str = strndup_user(arg, PAGE_SIZE); + if (IS_ERR(filter_str)) + return PTR_ERR(filter_str); + + ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); + + kfree(filter_str); + return ret; +} + +static void perf_event_free_filter(struct perf_event *event) +{ + ftrace_profile_free_filter(event); +} + #else + +static int perf_tp_event_match(struct perf_event *event, + struct perf_sample_data *data) +{ + return 1; +} + static const struct pmu *tp_perf_event_init(struct perf_event *event) { return NULL; } + +static int perf_event_set_filter(struct perf_event *event, void __user *arg) +{ + return -ENOENT; +} + +static void perf_event_free_filter(struct perf_event *event) +{ +} + +#endif /* CONFIG_EVENT_PROFILE */ + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +static void bp_perf_event_destroy(struct perf_event *event) +{ + release_bp_slot(event); +} + +static const struct pmu *bp_perf_event_init(struct perf_event *bp) +{ + int err; + /* + * The breakpoint is already filled if we haven't created the counter + * through perf syscall + * FIXME: manage to get trigerred to NULL if it comes from syscalls + */ + if (!bp->callback) + err = register_perf_hw_breakpoint(bp); + else + err = __register_perf_hw_breakpoint(bp); + if (err) + return ERR_PTR(err); + + bp->destroy = bp_perf_event_destroy; + + return &perf_ops_bp; +} + +void perf_bp_event(struct perf_event *bp, void *data) +{ + struct perf_sample_data sample; + struct pt_regs *regs = data; + + sample.addr = bp->attr.bp_addr; + + if (!perf_exclude_event(bp, regs)) + perf_swevent_add(bp, 1, 1, &sample, regs); +} +#else +static const struct pmu *bp_perf_event_init(struct perf_event *bp) +{ + return NULL; +} + +void perf_bp_event(struct perf_event *bp, void *regs) +{ +} #endif atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; @@ -4208,6 +4368,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) case PERF_COUNT_SW_PAGE_FAULTS_MAJ: case PERF_COUNT_SW_CONTEXT_SWITCHES: case PERF_COUNT_SW_CPU_MIGRATIONS: + case PERF_COUNT_SW_ALIGNMENT_FAULTS: + case PERF_COUNT_SW_EMULATION_FAULTS: if (!event->parent) { atomic_inc(&perf_swevent_enabled[event_id]); event->destroy = sw_perf_event_destroy; @@ -4228,6 +4390,7 @@ perf_event_alloc(struct perf_event_attr *attr, struct perf_event_context *ctx, struct perf_event *group_leader, struct perf_event *parent_event, + perf_callback_t callback, gfp_t gfpflags) { const struct pmu *pmu; @@ -4270,6 +4433,11 @@ perf_event_alloc(struct perf_event_attr *attr, event->state = PERF_EVENT_STATE_INACTIVE; + if (!callback && parent_event) + callback = parent_event->callback; + + event->callback = callback; + if (attr->disabled) event->state = PERF_EVENT_STATE_OFF; @@ -4304,6 +4472,11 @@ perf_event_alloc(struct perf_event_attr *attr, pmu = tp_perf_event_init(event); break; + case PERF_TYPE_BREAKPOINT: + pmu = bp_perf_event_init(event); + break; + + default: break; } @@ -4416,7 +4589,7 @@ err_size: goto out; } -int perf_event_set_output(struct perf_event *event, int output_fd) +static int perf_event_set_output(struct perf_event *event, int output_fd) { struct perf_event *output_event = NULL; struct file *output_file = NULL; @@ -4546,7 +4719,7 @@ SYSCALL_DEFINE5(perf_event_open, } event = perf_event_alloc(&attr, cpu, ctx, group_leader, - NULL, GFP_KERNEL); + NULL, NULL, GFP_KERNEL); err = PTR_ERR(event); if (IS_ERR(event)) goto err_put_context; @@ -4594,6 +4767,60 @@ err_put_context: return err; } +/** + * perf_event_create_kernel_counter + * + * @attr: attributes of the counter to create + * @cpu: cpu in which the counter is bound + * @pid: task to profile + */ +struct perf_event * +perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, + pid_t pid, perf_callback_t callback) +{ + struct perf_event *event; + struct perf_event_context *ctx; + int err; + + /* + * Get the target context (task or percpu): + */ + + ctx = find_get_context(pid, cpu); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto err_exit; + } + + event = perf_event_alloc(attr, cpu, ctx, NULL, + NULL, callback, GFP_KERNEL); + if (IS_ERR(event)) { + err = PTR_ERR(event); + goto err_put_context; + } + + event->filp = NULL; + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_install_in_context(ctx, event, cpu); + ++ctx->generation; + mutex_unlock(&ctx->mutex); + + event->owner = current; + get_task_struct(current); + mutex_lock(¤t->perf_event_mutex); + list_add_tail(&event->owner_entry, ¤t->perf_event_list); + mutex_unlock(¤t->perf_event_mutex); + + return event; + + err_put_context: + put_ctx(ctx); + err_exit: + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); + /* * inherit a event from parent task to child task: */ @@ -4619,7 +4846,7 @@ inherit_event(struct perf_event *parent_event, child_event = perf_event_alloc(&parent_event->attr, parent_event->cpu, child_ctx, group_leader, parent_event, - GFP_KERNEL); + NULL, GFP_KERNEL); if (IS_ERR(child_event)) return child_event; get_ctx(child_ctx); @@ -4637,6 +4864,8 @@ inherit_event(struct perf_event *parent_event, if (parent_event->attr.freq) child_event->hw.sample_period = parent_event->hw.sample_period; + child_event->overflow_handler = parent_event->overflow_handler; + /* * Link it up in the child's context: */ @@ -4726,7 +4955,6 @@ __perf_event_exit_task(struct perf_event *child_event, { struct perf_event *parent_event; - update_event_times(child_event); perf_event_remove_from_context(child_event); parent_event = child_event->parent; @@ -4778,6 +5006,7 @@ void perf_event_exit_task(struct task_struct *child) * the events from it. */ unclone_ctx(child_ctx); + update_context_time(child_ctx); spin_unlock_irqrestore(&child_ctx->lock, flags); /* diff --git a/kernel/printk.c b/kernel/printk.c index f38b07f78a4..b5ac4d99c66 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -33,6 +33,7 @@ #include <linux/bootmem.h> #include <linux/syscalls.h> #include <linux/kexec.h> +#include <linux/ratelimit.h> #include <asm/uaccess.h> @@ -1376,11 +1377,11 @@ late_initcall(disable_boot_consoles); */ DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); -int printk_ratelimit(void) +int __printk_ratelimit(const char *func) { - return __ratelimit(&printk_ratelimit_state); + return ___ratelimit(&printk_ratelimit_state, func); } -EXPORT_SYMBOL(printk_ratelimit); +EXPORT_SYMBOL(__printk_ratelimit); /** * printk_timed_ratelimit - caller-controlled printk ratelimiting diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 400183346ad..9b7fd472387 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,7 +44,6 @@ #include <linux/cpu.h> #include <linux/mutex.h> #include <linux/module.h> -#include <linux/kernel_stat.h> #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; @@ -53,8 +52,6 @@ struct lockdep_map rcu_lock_map = EXPORT_SYMBOL_GPL(rcu_lock_map); #endif -int rcu_scheduler_active __read_mostly; - /* * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. @@ -66,122 +63,3 @@ void wakeme_after_rcu(struct rcu_head *head) rcu = container_of(head, struct rcu_synchronize, head); complete(&rcu->completion); } - -#ifdef CONFIG_TREE_PREEMPT_RCU - -/** - * synchronize_rcu - wait until a grace period has elapsed. - * - * Control will return to the caller some time after a full grace - * period has elapsed, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. - */ -void synchronize_rcu(void) -{ - struct rcu_synchronize rcu; - - if (!rcu_scheduler_active) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_rcu); - -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - -/** - * synchronize_sched - wait until an rcu-sched grace period has elapsed. - * - * Control will return to the caller some time after a full rcu-sched - * grace period has elapsed, in other words after all currently executing - * rcu-sched read-side critical sections have completed. These read-side - * critical sections are delimited by rcu_read_lock_sched() and - * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), - * local_irq_disable(), and so on may be used in place of - * rcu_read_lock_sched(). - * - * This means that all preempt_disable code sequences, including NMI and - * hardware-interrupt handlers, in progress on entry will have completed - * before this primitive returns. However, this does not guarantee that - * softirq handlers will have completed, since in some kernels, these - * handlers can run in process context, and can block. - * - * This primitive provides the guarantees made by the (now removed) - * synchronize_kernel() API. In contrast, synchronize_rcu() only - * guarantees that rcu_read_lock() sections will have completed. - * In "classic RCU", these two guarantees happen to be one and - * the same, but can differ in realtime RCU implementations. - */ -void synchronize_sched(void) -{ - struct rcu_synchronize rcu; - - if (rcu_blocking_is_gp()) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_sched(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_sched); - -/** - * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. - * - * Control will return to the caller some time after a full rcu_bh grace - * period has elapsed, in other words after all currently executing rcu_bh - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), - * and may be nested. - */ -void synchronize_rcu_bh(void) -{ - struct rcu_synchronize rcu; - - if (rcu_blocking_is_gp()) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_bh(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_rcu_bh); - -static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - return rcu_cpu_notify(self, action, hcpu); -} - -void __init rcu_init(void) -{ - int i; - - __rcu_init(); - cpu_notifier(rcu_barrier_cpu_hotplug, 0); - - /* - * We don't need protection against CPU-hotplug here because - * this is called early in boot, before either interrupts - * or the scheduler are operational. - */ - for_each_online_cpu(i) - rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i); -} - -void rcu_scheduler_starting(void) -{ - WARN_ON(num_online_cpus() != 1); - WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; -} diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c new file mode 100644 index 00000000000..9f6d9ff2572 --- /dev/null +++ b/kernel/rcutiny.c @@ -0,0 +1,282 @@ +/* + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ +#include <linux/moduleparam.h> +#include <linux/completion.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/rcupdate.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/time.h> +#include <linux/cpu.h> + +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_ctrlblk = { + .donetail = &rcu_ctrlblk.rcucblist, + .curtail = &rcu_ctrlblk.rcucblist, +}; + +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; + +#ifdef CONFIG_NO_HZ + +static long rcu_dynticks_nesting = 1; + +/* + * Enter dynticks-idle mode, which is an extended quiescent state + * if we have fully entered that mode (i.e., if the new value of + * dynticks_nesting is zero). + */ +void rcu_enter_nohz(void) +{ + if (--rcu_dynticks_nesting == 0) + rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ +} + +/* + * Exit dynticks-idle mode, so that we are no longer in an extended + * quiescent state. + */ +void rcu_exit_nohz(void) +{ + rcu_dynticks_nesting++; +} + +#endif /* #ifdef CONFIG_NO_HZ */ + +/* + * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc(). + * Also disable irqs to avoid confusion due to interrupt handlers + * invoking call_rcu(). + */ +static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) +{ + unsigned long flags; + + local_irq_save(flags); + if (rcp->rcucblist != NULL && + rcp->donetail != rcp->curtail) { + rcp->donetail = rcp->curtail; + local_irq_restore(flags); + return 1; + } + local_irq_restore(flags); + + return 0; +} + +/* + * Record an rcu quiescent state. And an rcu_bh quiescent state while we + * are at it, given that any rcu quiescent state is also an rcu_bh + * quiescent state. Use "+" instead of "||" to defeat short circuiting. + */ +void rcu_sched_qs(int cpu) +{ + if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Record an rcu_bh quiescent state. + */ +void rcu_bh_qs(int cpu) +{ + if (rcu_qsctr_help(&rcu_bh_ctrlblk)) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Check to see if the scheduling-clock interrupt came from an extended + * quiescent state, and, if so, tell RCU about it. + */ +void rcu_check_callbacks(int cpu, int user) +{ + if (user || + (idle_cpu(cpu) && + !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) + rcu_sched_qs(cpu); + else if (!in_softirq()) + rcu_bh_qs(cpu); +} + +/* + * Helper function for rcu_process_callbacks() that operates on the + * specified rcu_ctrlkblk structure. + */ +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) +{ + struct rcu_head *next, *list; + unsigned long flags; + + /* If no RCU callbacks ready to invoke, just return. */ + if (&rcp->rcucblist == rcp->donetail) + return; + + /* Move the ready-to-invoke callbacks to a local list. */ + local_irq_save(flags); + list = rcp->rcucblist; + rcp->rcucblist = *rcp->donetail; + *rcp->donetail = NULL; + if (rcp->curtail == rcp->donetail) + rcp->curtail = &rcp->rcucblist; + rcp->donetail = &rcp->rcucblist; + local_irq_restore(flags); + + /* Invoke the callbacks on the local list. */ + while (list) { + next = list->next; + prefetch(next); + list->func(list); + list = next; + } +} + +/* + * Invoke any callbacks whose grace period has completed. + */ +static void rcu_process_callbacks(struct softirq_action *unused) +{ + __rcu_process_callbacks(&rcu_ctrlblk); + __rcu_process_callbacks(&rcu_bh_ctrlblk); +} + +/* + * Wait for a grace period to elapse. But it is illegal to invoke + * synchronize_sched() from within an RCU read-side critical section. + * Therefore, any legal call to synchronize_sched() is a quiescent + * state, and so on a UP system, synchronize_sched() need do nothing. + * Ditto for synchronize_rcu_bh(). (But Lai Jiangshan points out the + * benefits of doing might_sleep() to reduce latency.) + * + * Cool, huh? (Due to Josh Triplett.) + * + * But we want to make this a static inline later. + */ +void synchronize_sched(void) +{ + cond_resched(); +} +EXPORT_SYMBOL_GPL(synchronize_sched); + +void synchronize_rcu_bh(void) +{ + synchronize_sched(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_bh); + +/* + * Helper function for call_rcu() and call_rcu_bh(). + */ +static void __call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu), + struct rcu_ctrlblk *rcp) +{ + unsigned long flags; + + head->func = func; + head->next = NULL; + + local_irq_save(flags); + *rcp->curtail = head; + rcp->curtail = &head->next; + local_irq_restore(flags); +} + +/* + * Post an RCU callback to be invoked after the end of an RCU grace + * period. But since we have but one CPU, that would be after any + * quiescent state. + */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_ctrlblk); +} +EXPORT_SYMBOL_GPL(call_rcu); + +/* + * Post an RCU bottom-half callback to be invoked after any subsequent + * quiescent state. + */ +void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_bh_ctrlblk); +} +EXPORT_SYMBOL_GPL(call_rcu_bh); + +void rcu_barrier(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + +void rcu_barrier_bh(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_bh(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier_bh); + +void rcu_barrier_sched(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_sched(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier_sched); + +void __init rcu_init(void) +{ + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); +} diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 697c0a0229d..a621a67ef4e 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -327,6 +327,11 @@ rcu_torture_cb(struct rcu_head *p) cur_ops->deferred_free(rp); } +static int rcu_no_completed(void) +{ + return 0; +} + static void rcu_torture_deferred_free(struct rcu_torture *p) { call_rcu(&p->rtort_rcu, rcu_torture_cb); @@ -388,6 +393,21 @@ static struct rcu_torture_ops rcu_sync_ops = { .name = "rcu_sync" }; +static struct rcu_torture_ops rcu_expedited_ops = { + .init = rcu_sync_torture_init, + .cleanup = NULL, + .readlock = rcu_torture_read_lock, + .read_delay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = rcu_torture_read_unlock, + .completed = rcu_no_completed, + .deferred_free = rcu_sync_torture_deferred_free, + .sync = synchronize_rcu_expedited, + .cb_barrier = NULL, + .stats = NULL, + .irq_capable = 1, + .name = "rcu_expedited" +}; + /* * Definitions for rcu_bh torture testing. */ @@ -547,6 +567,25 @@ static struct rcu_torture_ops srcu_ops = { .name = "srcu" }; +static void srcu_torture_synchronize_expedited(void) +{ + synchronize_srcu_expedited(&srcu_ctl); +} + +static struct rcu_torture_ops srcu_expedited_ops = { + .init = srcu_torture_init, + .cleanup = srcu_torture_cleanup, + .readlock = srcu_torture_read_lock, + .read_delay = srcu_read_delay, + .readunlock = srcu_torture_read_unlock, + .completed = srcu_torture_completed, + .deferred_free = rcu_sync_torture_deferred_free, + .sync = srcu_torture_synchronize_expedited, + .cb_barrier = NULL, + .stats = srcu_torture_stats, + .name = "srcu_expedited" +}; + /* * Definitions for sched torture testing. */ @@ -562,11 +601,6 @@ static void sched_torture_read_unlock(int idx) preempt_enable(); } -static int sched_torture_completed(void) -{ - return 0; -} - static void rcu_sched_torture_deferred_free(struct rcu_torture *p) { call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); @@ -583,7 +617,7 @@ static struct rcu_torture_ops sched_ops = { .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = sched_torture_completed, + .completed = rcu_no_completed, .deferred_free = rcu_sched_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = rcu_barrier_sched, @@ -592,13 +626,13 @@ static struct rcu_torture_ops sched_ops = { .name = "sched" }; -static struct rcu_torture_ops sched_ops_sync = { +static struct rcu_torture_ops sched_sync_ops = { .init = rcu_sync_torture_init, .cleanup = NULL, .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = sched_torture_completed, + .completed = rcu_no_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = NULL, @@ -612,7 +646,7 @@ static struct rcu_torture_ops sched_expedited_ops = { .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = sched_torture_completed, + .completed = rcu_no_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_sched_expedited, .cb_barrier = NULL, @@ -1097,9 +1131,10 @@ rcu_torture_init(void) int cpu; int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = - { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, - &sched_expedited_ops, - &srcu_ops, &sched_ops, &sched_ops_sync, }; + { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, + &rcu_bh_ops, &rcu_bh_sync_ops, + &srcu_ops, &srcu_expedited_ops, + &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; mutex_lock(&fullstop_mutex); @@ -1110,8 +1145,12 @@ rcu_torture_init(void) break; } if (i == ARRAY_SIZE(torture_ops)) { - printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n", + printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n", torture_type); + printk(KERN_ALERT "rcu-torture types:"); + for (i = 0; i < ARRAY_SIZE(torture_ops); i++) + printk(KERN_ALERT " %s", torture_ops[i]->name); + printk(KERN_ALERT "\n"); mutex_unlock(&fullstop_mutex); return -EINVAL; } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f3077c0ab18..53ae9598f79 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -46,18 +46,22 @@ #include <linux/cpu.h> #include <linux/mutex.h> #include <linux/time.h> +#include <linux/kernel_stat.h> #include "rcutree.h" /* Data structures. */ +static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; + #define RCU_STATE_INITIALIZER(name) { \ .level = { &name.node[0] }, \ .levelcnt = { \ NUM_RCU_LVL_0, /* root of hierarchy. */ \ NUM_RCU_LVL_1, \ NUM_RCU_LVL_2, \ - NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \ + NUM_RCU_LVL_3, \ + NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ }, \ .signaled = RCU_GP_IDLE, \ .gpnum = -300, \ @@ -77,6 +81,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); +static int rcu_scheduler_active __read_mostly; + /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s @@ -98,7 +104,7 @@ void rcu_sched_qs(int cpu) struct rcu_data *rdp; rdp = &per_cpu(rcu_sched_data, cpu); - rdp->passed_quiesc_completed = rdp->completed; + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; rcu_preempt_note_context_switch(cpu); @@ -109,7 +115,7 @@ void rcu_bh_qs(int cpu) struct rcu_data *rdp; rdp = &per_cpu(rcu_bh_data, cpu); - rdp->passed_quiesc_completed = rdp->completed; + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; } @@ -335,28 +341,9 @@ void rcu_irq_exit(void) set_need_resched(); } -/* - * Record the specified "completed" value, which is later used to validate - * dynticks counter manipulations. Specify "rsp->completed - 1" to - * unconditionally invalidate any future dynticks manipulations (which is - * useful at the beginning of a grace period). - */ -static void dyntick_record_completed(struct rcu_state *rsp, long comp) -{ - rsp->dynticks_completed = comp; -} - #ifdef CONFIG_SMP /* - * Recall the previously recorded value of the completion for dynticks. - */ -static long dyntick_recall_completed(struct rcu_state *rsp) -{ - return rsp->dynticks_completed; -} - -/* * Snapshot the specified CPU's dynticks counter so that we can later * credit them with an implicit quiescent state. Return 1 if this CPU * is in dynticks idle mode, which is an extended quiescent state. @@ -419,24 +406,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) #else /* #ifdef CONFIG_NO_HZ */ -static void dyntick_record_completed(struct rcu_state *rsp, long comp) -{ -} - #ifdef CONFIG_SMP -/* - * If there are no dynticks, then the only way that a CPU can passively - * be in a quiescent state is to be offline. Unlike dynticks idle, which - * is a point in time during the prior (already finished) grace period, - * an offline CPU is always in a quiescent state, and thus can be - * unconditionally applied. So just return the current value of completed. - */ -static long dyntick_recall_completed(struct rcu_state *rsp) -{ - return rsp->completed; -} - static int dyntick_save_progress_counter(struct rcu_data *rdp) { return 0; @@ -553,13 +524,33 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) /* * Update CPU-local rcu_data state to record the newly noticed grace period. * This is used both when we started the grace period and when we notice - * that someone else started the grace period. + * that someone else started the grace period. The caller must hold the + * ->lock of the leaf rcu_node structure corresponding to the current CPU, + * and must have irqs disabled. */ +static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + if (rdp->gpnum != rnp->gpnum) { + rdp->qs_pending = 1; + rdp->passed_quiesc = 0; + rdp->gpnum = rnp->gpnum; + } +} + static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) { - rdp->qs_pending = 1; - rdp->passed_quiesc = 0; - rdp->gpnum = rsp->gpnum; + unsigned long flags; + struct rcu_node *rnp; + + local_irq_save(flags); + rnp = rdp->mynode; + if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ + !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + local_irq_restore(flags); + return; + } + __note_new_gpnum(rsp, rnp, rdp); + spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -583,6 +574,79 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) } /* + * Advance this CPU's callbacks, but only if the current grace period + * has ended. This may be called only from the CPU to whom the rdp + * belongs. In addition, the corresponding leaf rcu_node structure's + * ->lock must be held by the caller, with irqs disabled. + */ +static void +__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + /* Did another grace period end? */ + if (rdp->completed != rnp->completed) { + + /* Advance callbacks. No harm if list empty. */ + rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Remember that we saw this grace-period completion. */ + rdp->completed = rnp->completed; + } +} + +/* + * Advance this CPU's callbacks, but only if the current grace period + * has ended. This may be called only from the CPU to whom the rdp + * belongs. + */ +static void +rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) +{ + unsigned long flags; + struct rcu_node *rnp; + + local_irq_save(flags); + rnp = rdp->mynode; + if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ + !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + local_irq_restore(flags); + return; + } + __rcu_process_gp_end(rsp, rnp, rdp); + spin_unlock_irqrestore(&rnp->lock, flags); +} + +/* + * Do per-CPU grace-period initialization for running CPU. The caller + * must hold the lock of the leaf rcu_node structure corresponding to + * this CPU. + */ +static void +rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + /* Prior grace period ended, so advance callbacks for current CPU. */ + __rcu_process_gp_end(rsp, rnp, rdp); + + /* + * Because this CPU just now started the new grace period, we know + * that all of its callbacks will be covered by this upcoming grace + * period, even the ones that were registered arbitrarily recently. + * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. + * + * Other CPUs cannot be sure exactly when the grace period started. + * Therefore, their recently registered callbacks must pass through + * an additional RCU_NEXT_READY stage, so that they will be handled + * by the next RCU grace period. + */ + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Set state so that this CPU will detect the next quiescent state. */ + __note_new_gpnum(rsp, rnp, rdp); +} + +/* * Start a new RCU grace period if warranted, re-initializing the hierarchy * in preparation for detecting the next grace period. The caller must hold * the root node's ->lock, which is released before return. Hard irqs must @@ -596,7 +660,23 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_node *rnp = rcu_get_root(rsp); if (!cpu_needs_another_gp(rsp, rdp)) { - spin_unlock_irqrestore(&rnp->lock, flags); + if (rnp->completed == rsp->completed) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + + /* + * Propagate new ->completed value to rcu_node structures + * so that other CPUs don't have to wait until the start + * of the next grace period to process their callbacks. + */ + rcu_for_each_node_breadth_first(rsp, rnp) { + spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->completed = rsp->completed; + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + } + local_irq_restore(flags); return; } @@ -606,29 +686,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; record_gp_stall_check_time(rsp); - dyntick_record_completed(rsp, rsp->completed - 1); - note_new_gpnum(rsp, rdp); - - /* - * Because this CPU just now started the new grace period, we know - * that all of its callbacks will be covered by this upcoming grace - * period, even the ones that were registered arbitrarily recently. - * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. - * - * Other CPUs cannot be sure exactly when the grace period started. - * Therefore, their recently registered callbacks must pass through - * an additional RCU_NEXT_READY stage, so that they will be handled - * by the next RCU grace period. - */ - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; /* Special-case the common single-level case. */ if (NUM_RCU_NODES == 1) { rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ + rcu_start_gp_per_cpu(rsp, rnp, rdp); spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -661,6 +727,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; + if (rnp == rdp->mynode) + rcu_start_gp_per_cpu(rsp, rnp, rdp); spin_unlock(&rnp->lock); /* irqs remain disabled. */ } @@ -672,58 +741,32 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) } /* - * Advance this CPU's callbacks, but only if the current grace period - * has ended. This may be called only from the CPU to whom the rdp - * belongs. + * Report a full set of quiescent states to the specified rcu_state + * data structure. This involves cleaning up after the prior grace + * period and letting rcu_start_gp() start up the next grace period + * if one is needed. Note that the caller must hold rnp->lock, as + * required by rcu_start_gp(), which will release it. */ -static void -rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) -{ - long completed_snap; - unsigned long flags; - - local_irq_save(flags); - completed_snap = ACCESS_ONCE(rsp->completed); /* outside of lock. */ - - /* Did another grace period end? */ - if (rdp->completed != completed_snap) { - - /* Advance callbacks. No harm if list empty. */ - rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - - /* Remember that we saw this grace-period completion. */ - rdp->completed = completed_snap; - } - local_irq_restore(flags); -} - -/* - * Clean up after the prior grace period and let rcu_start_gp() start up - * the next grace period if one is needed. Note that the caller must - * hold rnp->lock, as required by rcu_start_gp(), which will release it. - */ -static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) +static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) { WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); rsp->completed = rsp->gpnum; rsp->signaled = RCU_GP_IDLE; - rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ } /* - * Similar to cpu_quiet(), for which it is a helper function. Allows - * a group of CPUs to be quieted at one go, though all the CPUs in the - * group must be represented by the same leaf rcu_node structure. - * That structure's lock must be held upon entry, and it is released - * before return. + * Similar to rcu_report_qs_rdp(), for which it is a helper function. + * Allows quiescent states for a group of CPUs to be reported at one go + * to the specified rcu_node structure, though all the CPUs in the group + * must be represented by the same rcu_node structure (which need not be + * a leaf rcu_node structure, though it often will be). That structure's + * lock must be held upon entry, and it is released before return. */ static void -cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, - unsigned long flags) +rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, + struct rcu_node *rnp, unsigned long flags) __releases(rnp->lock) { struct rcu_node *rnp_c; @@ -759,21 +802,23 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, /* * Get here if we are the last CPU to pass through a quiescent - * state for this grace period. Invoke cpu_quiet_msk_finish() + * state for this grace period. Invoke rcu_report_qs_rsp() * to clean up and start the next grace period if one is needed. */ - cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */ + rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ } /* - * Record a quiescent state for the specified CPU, which must either be - * the current CPU. The lastcomp argument is used to make sure we are - * still in the grace period of interest. We don't want to end the current - * grace period based on quiescent states detected in an earlier grace - * period! + * Record a quiescent state for the specified CPU to that CPU's rcu_data + * structure. This must be either called from the specified CPU, or + * called when the specified CPU is known to be offline (and when it is + * also known that no other CPU is concurrently trying to help the offline + * CPU). The lastcomp argument is used to make sure we are still in the + * grace period of interest. We don't want to end the current grace period + * based on quiescent states detected in an earlier grace period! */ static void -cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) +rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) { unsigned long flags; unsigned long mask; @@ -781,15 +826,15 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) rnp = rdp->mynode; spin_lock_irqsave(&rnp->lock, flags); - if (lastcomp != ACCESS_ONCE(rsp->completed)) { + if (lastcomp != rnp->completed) { /* * Someone beat us to it for this grace period, so leave. * The race with GP start is resolved by the fact that we * hold the leaf rcu_node lock, so that the per-CPU bits * cannot yet be initialized -- so we would simply find our - * CPU's bit already cleared in cpu_quiet_msk() if this race - * occurred. + * CPU's bit already cleared in rcu_report_qs_rnp() if this + * race occurred. */ rdp->passed_quiesc = 0; /* try again later! */ spin_unlock_irqrestore(&rnp->lock, flags); @@ -807,7 +852,7 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) */ rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */ + rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ } } @@ -838,8 +883,11 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) if (!rdp->passed_quiesc) return; - /* Tell RCU we are done (but cpu_quiet() will be the judge of that). */ - cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); + /* + * Tell RCU we are done (but rcu_report_qs_rdp() will be the + * judge of that). + */ + rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); } #ifdef CONFIG_HOTPLUG_CPU @@ -899,8 +947,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) { unsigned long flags; - long lastcomp; unsigned long mask; + int need_report = 0; struct rcu_data *rdp = rsp->rda[cpu]; struct rcu_node *rnp; @@ -914,30 +962,32 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->qsmaskinit &= ~mask; if (rnp->qsmaskinit != 0) { - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + if (rnp != rdp->mynode) + spin_unlock(&rnp->lock); /* irqs remain disabled. */ break; } - - /* - * If there was a task blocking the current grace period, - * and if all CPUs have checked in, we need to propagate - * the quiescent state up the rcu_node hierarchy. But that - * is inconvenient at the moment due to deadlock issues if - * this should end the current grace period. So set the - * offlined CPU's bit in ->qsmask in order to force the - * next force_quiescent_state() invocation to clean up this - * mess in a deadlock-free manner. - */ - if (rcu_preempt_offline_tasks(rsp, rnp, rdp) && !rnp->qsmask) - rnp->qsmask |= mask; - + if (rnp == rdp->mynode) + need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); + else + spin_unlock(&rnp->lock); /* irqs remain disabled. */ mask = rnp->grpmask; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ rnp = rnp->parent; } while (rnp != NULL); - lastcomp = rsp->completed; - spin_unlock_irqrestore(&rsp->onofflock, flags); + /* + * We still hold the leaf rcu_node structure lock here, and + * irqs are still disabled. The reason for this subterfuge is + * because invoking rcu_report_unblock_qs_rnp() with ->onofflock + * held leads to deadlock. + */ + spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + rnp = rdp->mynode; + if (need_report & RCU_OFL_TASKS_NORM_GP) + rcu_report_unblock_qs_rnp(rnp, flags); + else + spin_unlock_irqrestore(&rnp->lock, flags); + if (need_report & RCU_OFL_TASKS_EXP_GP) + rcu_report_exp_rnp(rsp, rnp); rcu_adopt_orphan_cbs(rsp); } @@ -1109,7 +1159,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, rcu_for_each_leaf_node(rsp, rnp) { mask = 0; spin_lock_irqsave(&rnp->lock, flags); - if (rsp->completed != lastcomp) { + if (rnp->completed != lastcomp) { spin_unlock_irqrestore(&rnp->lock, flags); return 1; } @@ -1123,10 +1173,10 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) mask |= bit; } - if (mask != 0 && rsp->completed == lastcomp) { + if (mask != 0 && rnp->completed == lastcomp) { - /* cpu_quiet_msk() releases rnp->lock. */ - cpu_quiet_msk(mask, rsp, rnp, flags); + /* rcu_report_qs_rnp() releases rnp->lock. */ + rcu_report_qs_rnp(mask, rsp, rnp, flags); continue; } spin_unlock_irqrestore(&rnp->lock, flags); @@ -1144,6 +1194,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) long lastcomp; struct rcu_node *rnp = rcu_get_root(rsp); u8 signaled; + u8 forcenow; if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ @@ -1156,10 +1207,10 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) goto unlock_ret; /* no emergency and done recently. */ rsp->n_force_qs++; spin_lock(&rnp->lock); - lastcomp = rsp->completed; + lastcomp = rsp->gpnum - 1; signaled = rsp->signaled; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; - if (lastcomp == rsp->gpnum) { + if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; spin_unlock(&rnp->lock); goto unlock_ret; /* no GP in progress, time updated. */ @@ -1180,21 +1231,29 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) if (rcu_process_dyntick(rsp, lastcomp, dyntick_save_progress_counter)) goto unlock_ret; + /* fall into next case. */ + + case RCU_SAVE_COMPLETED: /* Update state, record completion counter. */ + forcenow = 0; spin_lock(&rnp->lock); - if (lastcomp == rsp->completed && - rsp->signaled == RCU_SAVE_DYNTICK) { + if (lastcomp + 1 == rsp->gpnum && + lastcomp == rsp->completed && + rsp->signaled == signaled) { rsp->signaled = RCU_FORCE_QS; - dyntick_record_completed(rsp, lastcomp); + rsp->completed_fqs = lastcomp; + forcenow = signaled == RCU_SAVE_COMPLETED; } spin_unlock(&rnp->lock); - break; + if (!forcenow) + break; + /* fall into next case. */ case RCU_FORCE_QS: /* Check dyntick-idle state, send IPI to laggarts. */ - if (rcu_process_dyntick(rsp, dyntick_recall_completed(rsp), + if (rcu_process_dyntick(rsp, rsp->completed_fqs, rcu_implicit_dynticks_qs)) goto unlock_ret; @@ -1351,6 +1410,68 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu_bh); +/** + * synchronize_sched - wait until an rcu-sched grace period has elapsed. + * + * Control will return to the caller some time after a full rcu-sched + * grace period has elapsed, in other words after all currently executing + * rcu-sched read-side critical sections have completed. These read-side + * critical sections are delimited by rcu_read_lock_sched() and + * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), + * local_irq_disable(), and so on may be used in place of + * rcu_read_lock_sched(). + * + * This means that all preempt_disable code sequences, including NMI and + * hardware-interrupt handlers, in progress on entry will have completed + * before this primitive returns. However, this does not guarantee that + * softirq handlers will have completed, since in some kernels, these + * handlers can run in process context, and can block. + * + * This primitive provides the guarantees made by the (now removed) + * synchronize_kernel() API. In contrast, synchronize_rcu() only + * guarantees that rcu_read_lock() sections will have completed. + * In "classic RCU", these two guarantees happen to be one and + * the same, but can differ in realtime RCU implementations. + */ +void synchronize_sched(void) +{ + struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_sched(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_sched); + +/** + * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. + * + * Control will return to the caller some time after a full rcu_bh grace + * period has elapsed, in other words after all currently executing rcu_bh + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), + * and may be nested. + */ +void synchronize_rcu_bh(void) +{ + struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_bh(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_bh); + /* * Check to see if there is any immediate RCU-related work to be done * by the current CPU, for the specified type of RCU, returning 1 if so. @@ -1360,6 +1481,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); */ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) { + struct rcu_node *rnp = rdp->mynode; + rdp->n_rcu_pending++; /* Check for CPU stalls, if enabled. */ @@ -1384,13 +1507,13 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) } /* Has another RCU grace period completed? */ - if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */ + if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ rdp->n_rp_gp_completed++; return 1; } /* Has a new RCU grace period started? */ - if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */ + if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ rdp->n_rp_gp_started++; return 1; } @@ -1433,6 +1556,21 @@ int rcu_needs_cpu(int cpu) rcu_preempt_needs_cpu(cpu); } +/* + * This function is invoked towards the end of the scheduler's initialization + * process. Before this is called, the idle task might contain + * RCU read-side critical sections (during which time, this idle + * task is booting the system). After this function is called, the + * idle tasks are prohibited from containing RCU read-side critical + * sections. + */ +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} + static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); @@ -1544,21 +1682,16 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) { unsigned long flags; - long lastcomp; unsigned long mask; struct rcu_data *rdp = rsp->rda[cpu]; struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ spin_lock_irqsave(&rnp->lock, flags); - lastcomp = rsp->completed; - rdp->completed = lastcomp; - rdp->gpnum = lastcomp; rdp->passed_quiesc = 0; /* We could be racing with new GP, */ rdp->qs_pending = 1; /* so set up to respond to current GP. */ rdp->beenonline = 1; /* We have now been online. */ rdp->preemptable = preemptable; - rdp->passed_quiesc_completed = lastcomp - 1; rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; @@ -1580,6 +1713,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->qsmaskinit |= mask; mask = rnp->grpmask; + if (rnp == rdp->mynode) { + rdp->gpnum = rnp->completed; /* if GP in progress... */ + rdp->completed = rnp->completed; + rdp->passed_quiesc_completed = rnp->completed - 1; + } spin_unlock(&rnp->lock); /* irqs already disabled. */ rnp = rnp->parent; } while (rnp != NULL && !(rnp->qsmaskinit & mask)); @@ -1597,8 +1735,8 @@ static void __cpuinit rcu_online_cpu(int cpu) /* * Handle CPU online/offline notification events. */ -int __cpuinit rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int __cpuinit rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -1685,8 +1823,8 @@ static void __init rcu_init_one(struct rcu_state *rsp) cpustride *= rsp->levelspread[i]; rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { - if (rnp != rcu_get_root(rsp)) - spin_lock_init(&rnp->lock); + spin_lock_init(&rnp->lock); + lockdep_set_class(&rnp->lock, &rcu_node_class[i]); rnp->gpnum = 0; rnp->qsmask = 0; rnp->qsmaskinit = 0; @@ -1707,9 +1845,10 @@ static void __init rcu_init_one(struct rcu_state *rsp) rnp->level = i; INIT_LIST_HEAD(&rnp->blocked_tasks[0]); INIT_LIST_HEAD(&rnp->blocked_tasks[1]); + INIT_LIST_HEAD(&rnp->blocked_tasks[2]); + INIT_LIST_HEAD(&rnp->blocked_tasks[3]); } } - spin_lock_init(&rcu_get_root(rsp)->lock); } /* @@ -1735,16 +1874,30 @@ do { \ } \ } while (0) -void __init __rcu_init(void) +void __init rcu_init(void) { + int i; + rcu_bootup_announce(); #ifdef CONFIG_RCU_CPU_STALL_DETECTOR printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ +#if NUM_RCU_LVL_4 != 0 + printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n"); +#endif /* #if NUM_RCU_LVL_4 != 0 */ RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + + /* + * We don't need protection against CPU-hotplug here because + * this is called early in boot, before either interrupts + * or the scheduler are operational. + */ + cpu_notifier(rcu_cpu_notify, 0); + for_each_online_cpu(i) + rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i); } #include "rcutree_plugin.h" diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 1899023b096..d2a0046f63b 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -34,10 +34,11 @@ * In practice, this has not been tested, so there is probably some * bug somewhere. */ -#define MAX_RCU_LVLS 3 +#define MAX_RCU_LVLS 4 #define RCU_FANOUT (CONFIG_RCU_FANOUT) #define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) #define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) +#define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT) #if NR_CPUS <= RCU_FANOUT # define NUM_RCU_LVLS 1 @@ -45,23 +46,33 @@ # define NUM_RCU_LVL_1 (NR_CPUS) # define NUM_RCU_LVL_2 0 # define NUM_RCU_LVL_3 0 +# define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_SQ # define NUM_RCU_LVLS 2 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) # define NUM_RCU_LVL_2 (NR_CPUS) # define NUM_RCU_LVL_3 0 +# define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_CUBE # define NUM_RCU_LVLS 3 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) # define NUM_RCU_LVL_3 NR_CPUS +# define NUM_RCU_LVL_4 0 +#elif NR_CPUS <= RCU_FANOUT_FOURTH +# define NUM_RCU_LVLS 4 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE) +# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) +# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) +# define NUM_RCU_LVL_4 NR_CPUS #else # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" #endif /* #if (NR_CPUS) <= RCU_FANOUT */ -#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) +#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) /* @@ -84,14 +95,21 @@ struct rcu_node { long gpnum; /* Current grace period for this node. */ /* This will either be equal to or one */ /* behind the root rcu_node's gpnum. */ + long completed; /* Last grace period completed for this node. */ + /* This will either be equal to or one */ + /* behind the root rcu_node's gpnum. */ unsigned long qsmask; /* CPUs or groups that need to switch in */ /* order for current grace period to proceed.*/ /* In leaf rcu_node, each bit corresponds to */ /* an rcu_data structure, otherwise, each */ /* bit corresponds to a child rcu_node */ /* structure. */ + unsigned long expmask; /* Groups that have ->blocked_tasks[] */ + /* elements that need to drain to allow the */ + /* current expedited grace period to */ + /* complete (only for TREE_PREEMPT_RCU). */ unsigned long qsmaskinit; - /* Per-GP initialization for qsmask. */ + /* Per-GP initial value for qsmask & expmask. */ unsigned long grpmask; /* Mask to apply to parent qsmask. */ /* Only one bit will be set in this mask. */ int grplo; /* lowest-numbered CPU or group here. */ @@ -99,7 +117,7 @@ struct rcu_node { u8 grpnum; /* CPU/group number for next level up. */ u8 level; /* root is at level 0. */ struct rcu_node *parent; - struct list_head blocked_tasks[2]; + struct list_head blocked_tasks[4]; /* Tasks blocked in RCU read-side critsect. */ /* Grace period number (->gpnum) x blocked */ /* by tasks on the (x & 0x1) element of the */ @@ -114,6 +132,21 @@ struct rcu_node { for ((rnp) = &(rsp)->node[0]; \ (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) +/* + * Do a breadth-first scan of the non-leaf rcu_node structures for the + * specified rcu_state structure. Note that if there is a singleton + * rcu_node tree with but one rcu_node structure, this loop is a no-op. + */ +#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ + for ((rnp) = &(rsp)->node[0]; \ + (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) + +/* + * Scan the leaves of the rcu_node hierarchy for the specified rcu_state + * structure. Note that if there is a singleton rcu_node tree with but + * one rcu_node structure, this loop -will- visit the rcu_node structure. + * It is still a leaf node, even if it is also the root node. + */ #define rcu_for_each_leaf_node(rsp, rnp) \ for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) @@ -204,11 +237,12 @@ struct rcu_data { #define RCU_GP_IDLE 0 /* No grace period in progress. */ #define RCU_GP_INIT 1 /* Grace period being initialized. */ #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ -#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ +#define RCU_SAVE_COMPLETED 3 /* Need to save rsp->completed. */ +#define RCU_FORCE_QS 4 /* Need to force quiescent state. */ #ifdef CONFIG_NO_HZ #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK #else /* #ifdef CONFIG_NO_HZ */ -#define RCU_SIGNAL_INIT RCU_FORCE_QS +#define RCU_SIGNAL_INIT RCU_SAVE_COMPLETED #endif /* #else #ifdef CONFIG_NO_HZ */ #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ @@ -246,7 +280,7 @@ struct rcu_state { long gpnum; /* Current gp number. */ long completed; /* # of last completed gp. */ - /* End of fields guarded by root rcu_node's lock. */ + /* End of fields guarded by root rcu_node's lock. */ spinlock_t onofflock; /* exclude on/offline and */ /* starting new GP. Also */ @@ -260,6 +294,8 @@ struct rcu_state { long orphan_qlen; /* Number of orphaned cbs. */ spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ + long completed_fqs; /* Value of completed @ snap. */ + /* Protected by fqslock. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ unsigned long n_force_qs; /* Number of calls to */ @@ -274,11 +310,15 @@ struct rcu_state { unsigned long jiffies_stall; /* Time at which to check */ /* for CPU stalls. */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -#ifdef CONFIG_NO_HZ - long dynticks_completed; /* Value of completed @ snap. */ -#endif /* #ifdef CONFIG_NO_HZ */ }; +/* Return values for rcu_preempt_offline_tasks(). */ + +#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ + /* GP were moved to root. */ +#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ + /* GP were moved to root. */ + #ifdef RCU_TREE_NONCORE /* @@ -298,10 +338,14 @@ DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); #else /* #ifdef RCU_TREE_NONCORE */ /* Forward declarations for rcutree_plugin.h */ -static inline void rcu_bootup_announce(void); +static void rcu_bootup_announce(void); long rcu_batches_completed(void); static void rcu_preempt_note_context_switch(int cpu); static int rcu_preempted_readers(struct rcu_node *rnp); +#ifdef CONFIG_HOTPLUG_CPU +static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, + unsigned long flags); +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_RCU_CPU_STALL_DETECTOR static void rcu_print_task_stall(struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ @@ -315,6 +359,9 @@ static void rcu_preempt_offline_cpu(int cpu); static void rcu_preempt_check_callbacks(int cpu); static void rcu_preempt_process_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); +#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ static int rcu_preempt_pending(int cpu); static int rcu_preempt_needs_cpu(int cpu); static void __cpuinit rcu_preempt_init_percpu_data(int cpu); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ef2a58c2b9d..37fbccdf41d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -24,16 +24,19 @@ * Paul E. McKenney <paulmck@linux.vnet.ibm.com> */ +#include <linux/delay.h> #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); +static int rcu_preempted_readers_exp(struct rcu_node *rnp); + /* * Tell them what RCU they are running. */ -static inline void rcu_bootup_announce(void) +static void __init rcu_bootup_announce(void) { printk(KERN_INFO "Experimental preemptable hierarchical RCU implementation.\n"); @@ -67,7 +70,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); static void rcu_preempt_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - rdp->passed_quiesc_completed = rdp->completed; + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; } @@ -157,14 +160,58 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); */ static int rcu_preempted_readers(struct rcu_node *rnp) { - return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); + int phase = rnp->gpnum & 0x1; + + return !list_empty(&rnp->blocked_tasks[phase]) || + !list_empty(&rnp->blocked_tasks[phase + 2]); +} + +/* + * Record a quiescent state for all tasks that were previously queued + * on the specified rcu_node structure and that were blocking the current + * RCU grace period. The caller must hold the specified rnp->lock with + * irqs disabled, and this lock is released upon return, but irqs remain + * disabled. + */ +static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) + __releases(rnp->lock) +{ + unsigned long mask; + struct rcu_node *rnp_p; + + if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; /* Still need more quiescent states! */ + } + + rnp_p = rnp->parent; + if (rnp_p == NULL) { + /* + * Either there is only one rcu_node in the tree, + * or tasks were kicked up to root rcu_node due to + * CPUs going offline. + */ + rcu_report_qs_rsp(&rcu_preempt_state, flags); + return; + } + + /* Report up the rest of the hierarchy. */ + mask = rnp->grpmask; + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + spin_lock(&rnp_p->lock); /* irqs already disabled. */ + rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); } +/* + * Handle special cases during rcu_read_unlock(), such as needing to + * notify RCU core processing or task having blocked during the RCU + * read-side critical section. + */ static void rcu_read_unlock_special(struct task_struct *t) { int empty; + int empty_exp; unsigned long flags; - unsigned long mask; struct rcu_node *rnp; int special; @@ -207,36 +254,30 @@ static void rcu_read_unlock_special(struct task_struct *t) spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty = !rcu_preempted_readers(rnp); + empty_exp = !rcu_preempted_readers_exp(rnp); + smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ list_del_init(&t->rcu_node_entry); t->rcu_blocked_node = NULL; /* * If this was the last task on the current list, and if * we aren't waiting on any CPUs, report the quiescent state. - * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk() - * drop rnp->lock and restore irq. + * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. */ - if (!empty && rnp->qsmask == 0 && - !rcu_preempted_readers(rnp)) { - struct rcu_node *rnp_p; - - if (rnp->parent == NULL) { - /* Only one rcu_node in the tree. */ - cpu_quiet_msk_finish(&rcu_preempt_state, flags); - return; - } - /* Report up the rest of the hierarchy. */ - mask = rnp->grpmask; + if (empty) spin_unlock_irqrestore(&rnp->lock, flags); - rnp_p = rnp->parent; - spin_lock_irqsave(&rnp_p->lock, flags); - WARN_ON_ONCE(rnp->qsmask); - cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags); - return; - } - spin_unlock(&rnp->lock); + else + rcu_report_unblock_qs_rnp(rnp, flags); + + /* + * If this was the last task on the expedited lists, + * then we need to report up the rcu_node hierarchy. + */ + if (!empty_exp && !rcu_preempted_readers_exp(rnp)) + rcu_report_exp_rnp(&rcu_preempt_state, rnp); + } else { + local_irq_restore(flags); } - local_irq_restore(flags); } /* @@ -303,6 +344,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) * rcu_node. The reason for not just moving them to the immediate * parent is to remove the need for rcu_read_unlock_special() to * make more than two attempts to acquire the target rcu_node's lock. + * Returns true if there were tasks blocking the current RCU grace + * period. * * Returns 1 if there was previously a task blocking the current grace * period on the specified rcu_node structure. @@ -316,7 +359,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, int i; struct list_head *lp; struct list_head *lp_root; - int retval = rcu_preempted_readers(rnp); + int retval = 0; struct rcu_node *rnp_root = rcu_get_root(rsp); struct task_struct *tp; @@ -326,7 +369,9 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, } WARN_ON_ONCE(rnp != rdp->mynode && (!list_empty(&rnp->blocked_tasks[0]) || - !list_empty(&rnp->blocked_tasks[1]))); + !list_empty(&rnp->blocked_tasks[1]) || + !list_empty(&rnp->blocked_tasks[2]) || + !list_empty(&rnp->blocked_tasks[3]))); /* * Move tasks up to root rcu_node. Rely on the fact that the @@ -334,7 +379,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, * rcu_nodes in terms of gp_num value. This fact allows us to * move the blocked_tasks[] array directly, element by element. */ - for (i = 0; i < 2; i++) { + if (rcu_preempted_readers(rnp)) + retval |= RCU_OFL_TASKS_NORM_GP; + if (rcu_preempted_readers_exp(rnp)) + retval |= RCU_OFL_TASKS_EXP_GP; + for (i = 0; i < 4; i++) { lp = &rnp->blocked_tasks[i]; lp_root = &rnp_root->blocked_tasks[i]; while (!list_empty(lp)) { @@ -346,7 +395,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, spin_unlock(&rnp_root->lock); /* irqs remain disabled */ } } - return retval; } @@ -398,14 +446,183 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu); +/** + * synchronize_rcu - wait until a grace period has elapsed. + * + * Control will return to the caller some time after a full grace + * period has elapsed, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +void synchronize_rcu(void) +{ + struct rcu_synchronize rcu; + + if (!rcu_scheduler_active) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_rcu); + +static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); +static long sync_rcu_preempt_exp_count; +static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); + /* - * Wait for an rcu-preempt grace period. We are supposed to expedite the - * grace period, but this is the crude slow compatability hack, so just - * invoke synchronize_rcu(). + * Return non-zero if there are any tasks in RCU read-side critical + * sections blocking the current preemptible-RCU expedited grace period. + * If there is no preemptible-RCU expedited grace period currently in + * progress, returns zero unconditionally. + */ +static int rcu_preempted_readers_exp(struct rcu_node *rnp) +{ + return !list_empty(&rnp->blocked_tasks[2]) || + !list_empty(&rnp->blocked_tasks[3]); +} + +/* + * return non-zero if there is no RCU expedited grace period in progress + * for the specified rcu_node structure, in other words, if all CPUs and + * tasks covered by the specified rcu_node structure have done their bit + * for the current expedited grace period. Works only for preemptible + * RCU -- other RCU implementation use other means. + * + * Caller must hold sync_rcu_preempt_exp_mutex. + */ +static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) +{ + return !rcu_preempted_readers_exp(rnp) && + ACCESS_ONCE(rnp->expmask) == 0; +} + +/* + * Report the exit from RCU read-side critical section for the last task + * that queued itself during or before the current expedited preemptible-RCU + * grace period. This event is reported either to the rcu_node structure on + * which the task was queued or to one of that rcu_node structure's ancestors, + * recursively up the tree. (Calm down, calm down, we do the recursion + * iteratively!) + * + * Caller must hold sync_rcu_preempt_exp_mutex. + */ +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) +{ + unsigned long flags; + unsigned long mask; + + spin_lock_irqsave(&rnp->lock, flags); + for (;;) { + if (!sync_rcu_preempt_exp_done(rnp)) + break; + if (rnp->parent == NULL) { + wake_up(&sync_rcu_preempt_exp_wq); + break; + } + mask = rnp->grpmask; + spin_unlock(&rnp->lock); /* irqs remain disabled */ + rnp = rnp->parent; + spin_lock(&rnp->lock); /* irqs already disabled */ + rnp->expmask &= ~mask; + } + spin_unlock_irqrestore(&rnp->lock, flags); +} + +/* + * Snapshot the tasks blocking the newly started preemptible-RCU expedited + * grace period for the specified rcu_node structure. If there are no such + * tasks, report it up the rcu_node hierarchy. + * + * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. + */ +static void +sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) +{ + int must_wait; + + spin_lock(&rnp->lock); /* irqs already disabled */ + list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); + list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); + must_wait = rcu_preempted_readers_exp(rnp); + spin_unlock(&rnp->lock); /* irqs remain disabled */ + if (!must_wait) + rcu_report_exp_rnp(rsp, rnp); +} + +/* + * Wait for an rcu-preempt grace period, but expedite it. The basic idea + * is to invoke synchronize_sched_expedited() to push all the tasks to + * the ->blocked_tasks[] lists, move all entries from the first set of + * ->blocked_tasks[] lists to the second set, and finally wait for this + * second set to drain. */ void synchronize_rcu_expedited(void) { - synchronize_rcu(); + unsigned long flags; + struct rcu_node *rnp; + struct rcu_state *rsp = &rcu_preempt_state; + long snap; + int trycount = 0; + + smp_mb(); /* Caller's modifications seen first by other CPUs. */ + snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; + smp_mb(); /* Above access cannot bleed into critical section. */ + + /* + * Acquire lock, falling back to synchronize_rcu() if too many + * lock-acquisition failures. Of course, if someone does the + * expedited grace period for us, just leave. + */ + while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { + if (trycount++ < 10) + udelay(trycount * num_online_cpus()); + else { + synchronize_rcu(); + return; + } + if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) + goto mb_ret; /* Others did our work for us. */ + } + if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) + goto unlock_mb_ret; /* Others did our work for us. */ + + /* force all RCU readers onto blocked_tasks[]. */ + synchronize_sched_expedited(); + + spin_lock_irqsave(&rsp->onofflock, flags); + + /* Initialize ->expmask for all non-leaf rcu_node structures. */ + rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { + spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->expmask = rnp->qsmaskinit; + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + } + + /* Snapshot current state of ->blocked_tasks[] lists. */ + rcu_for_each_leaf_node(rsp, rnp) + sync_rcu_preempt_exp_init(rsp, rnp); + if (NUM_RCU_NODES > 1) + sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); + + spin_unlock_irqrestore(&rsp->onofflock, flags); + + /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ + rnp = rcu_get_root(rsp); + wait_event(sync_rcu_preempt_exp_wq, + sync_rcu_preempt_exp_done(rnp)); + + /* Clean up and exit. */ + smp_mb(); /* ensure expedited GP seen before counter increment. */ + ACCESS_ONCE(sync_rcu_preempt_exp_count)++; +unlock_mb_ret: + mutex_unlock(&sync_rcu_preempt_exp_mutex); +mb_ret: + smp_mb(); /* ensure subsequent action seen after grace period. */ } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); @@ -481,7 +698,7 @@ void exit_rcu(void) /* * Tell them what RCU they are running. */ -static inline void rcu_bootup_announce(void) +static void __init rcu_bootup_announce(void) { printk(KERN_INFO "Hierarchical RCU implementation.\n"); } @@ -512,6 +729,16 @@ static int rcu_preempted_readers(struct rcu_node *rnp) return 0; } +#ifdef CONFIG_HOTPLUG_CPU + +/* Because preemptible RCU does not exist, no quieting of tasks. */ +static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) +{ + spin_unlock_irqrestore(&rnp->lock, flags); +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + #ifdef CONFIG_RCU_CPU_STALL_DETECTOR /* @@ -594,6 +821,20 @@ void synchronize_rcu_expedited(void) } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Because preemptable RCU does not exist, there is never any need to + * report on tasks preempted in RCU read-side critical sections during + * expedited RCU grace periods. + */ +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) +{ + return; +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + /* * Because preemptable RCU does not exist, it never has any work to do. */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 4b31c779e62..9d2c88423b3 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -155,12 +155,15 @@ static const struct file_operations rcudata_csv_fops = { static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) { + long gpnum; int level = 0; + int phase; struct rcu_node *rnp; + gpnum = rsp->gpnum; seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x " "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n", - rsp->completed, rsp->gpnum, rsp->signaled, + rsp->completed, gpnum, rsp->signaled, (long)(rsp->jiffies_force_qs - jiffies), (int)(jiffies & 0xffff), rsp->n_force_qs, rsp->n_force_qs_ngp, @@ -171,8 +174,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) seq_puts(m, "\n"); level = rnp->level; } - seq_printf(m, "%lx/%lx %d:%d ^%d ", + phase = gpnum & 0x1; + seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ", rnp->qsmask, rnp->qsmaskinit, + "T."[list_empty(&rnp->blocked_tasks[phase])], + "E."[list_empty(&rnp->blocked_tasks[phase + 2])], + "T."[list_empty(&rnp->blocked_tasks[!phase])], + "E."[list_empty(&rnp->blocked_tasks[!phase + 2])], rnp->grplo, rnp->grphi, rnp->grpnum); } seq_puts(m, "\n"); diff --git a/kernel/sched.c b/kernel/sched.c index 3c11ae0a948..aa31244caa9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -535,14 +535,12 @@ struct rq { #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; #ifdef CONFIG_NO_HZ - unsigned long last_tick_seen; unsigned char in_nohz_recently; #endif /* capture load from *all* tasks on this cpu: */ struct load_weight load; unsigned long nr_load_updates; u64 nr_switches; - u64 nr_migrations_in; struct cfs_rq cfs; struct rt_rq rt; @@ -591,6 +589,8 @@ struct rq { u64 rt_avg; u64 age_stamp; + u64 idle_stamp; + u64 avg_idle; #endif /* calc_load related fields */ @@ -772,7 +772,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, if (!sched_feat_names[i]) return -EINVAL; - filp->f_pos += cnt; + *ppos += cnt; return cnt; } @@ -2017,6 +2017,7 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) } spin_lock_irqsave(&rq->lock, flags); + update_rq_clock(rq); set_task_cpu(p, cpu); p->cpus_allowed = cpumask_of_cpu(cpu); p->rt.nr_cpus_allowed = 1; @@ -2078,7 +2079,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #endif if (old_cpu != new_cpu) { p->se.nr_migrations++; - new_rq->nr_migrations_in++; #ifdef CONFIG_SCHEDSTATS if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); @@ -2115,6 +2115,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) * it is sufficient to simply update the task's cpu field. */ if (!p->se.on_rq && !task_running(rq, p)) { + update_rq_clock(rq); set_task_cpu(p, dest_cpu); return 0; } @@ -2376,14 +2377,15 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, task_rq_unlock(rq, &flags); cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); - if (cpu != orig_cpu) + if (cpu != orig_cpu) { + local_irq_save(flags); + rq = cpu_rq(cpu); + update_rq_clock(rq); set_task_cpu(p, cpu); - + local_irq_restore(flags); + } rq = task_rq_lock(p, &flags); - if (rq != orig_rq) - update_rq_clock(rq); - WARN_ON(p->state != TASK_WAKING); cpu = task_cpu(p); @@ -2440,6 +2442,17 @@ out_running: #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) p->sched_class->task_wake_up(rq, p); + + if (unlikely(rq->idle_stamp)) { + u64 delta = rq->clock - rq->idle_stamp; + u64 max = 2*sysctl_sched_migration_cost; + + if (delta > max) + rq->avg_idle = max; + else + update_avg(&rq->avg_idle, delta); + rq->idle_stamp = 0; + } #endif out: task_rq_unlock(rq, &flags); @@ -2545,6 +2558,7 @@ static void __sched_fork(struct task_struct *p) void sched_fork(struct task_struct *p, int clone_flags) { int cpu = get_cpu(); + unsigned long flags; __sched_fork(p); @@ -2581,7 +2595,10 @@ void sched_fork(struct task_struct *p, int clone_flags) #ifdef CONFIG_SMP cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); #endif + local_irq_save(flags); + update_rq_clock(cpu_rq(cpu)); set_task_cpu(p, cpu); + local_irq_restore(flags); #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) if (likely(sched_info_on())) @@ -2848,14 +2865,14 @@ context_switch(struct rq *rq, struct task_struct *prev, */ arch_start_context_switch(prev); - if (unlikely(!mm)) { + if (likely(!mm)) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next); } else switch_mm(oldmm, mm, next); - if (unlikely(!prev->mm)) { + if (likely(!prev->mm)) { prev->active_mm = NULL; rq->prev_mm = oldmm; } @@ -3018,15 +3035,6 @@ static void calc_load_account_active(struct rq *this_rq) } /* - * Externally visible per-cpu scheduler statistics: - * cpu_nr_migrations(cpu) - number of migrations into that cpu - */ -u64 cpu_nr_migrations(int cpu) -{ - return cpu_rq(cpu)->nr_migrations_in; -} - -/* * Update rq->cpu_load[] statistics. This function is usually called every * scheduler tick (TICK_NSEC). */ @@ -4126,7 +4134,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, unsigned long flags; struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); - cpumask_setall(cpus); + cpumask_copy(cpus, cpu_online_mask); /* * When power savings policy is enabled for the parent domain, idle @@ -4289,7 +4297,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) int all_pinned = 0; struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); - cpumask_setall(cpus); + cpumask_copy(cpus, cpu_online_mask); /* * When power savings policy is enabled for the parent domain, idle @@ -4429,6 +4437,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq) int pulled_task = 0; unsigned long next_balance = jiffies + HZ; + this_rq->idle_stamp = this_rq->clock; + + if (this_rq->avg_idle < sysctl_sched_migration_cost) + return; + for_each_domain(this_cpu, sd) { unsigned long interval; @@ -4443,8 +4456,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) interval = msecs_to_jiffies(sd->balance_interval); if (time_after(next_balance, sd->last_balance + interval)) next_balance = sd->last_balance + interval; - if (pulled_task) + if (pulled_task) { + this_rq->idle_stamp = 0; break; + } } if (pulled_task || time_after(jiffies, this_rq->next_balance)) { /* @@ -5046,8 +5061,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, p->gtime = cputime_add(p->gtime, cputime); /* Add guest time to cpustat. */ - cpustat->user = cputime64_add(cpustat->user, tmp); - cpustat->guest = cputime64_add(cpustat->guest, tmp); + if (TASK_NICE(p) > 0) { + cpustat->nice = cputime64_add(cpustat->nice, tmp); + cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp); + } else { + cpustat->user = cputime64_add(cpustat->user, tmp); + cpustat->guest = cputime64_add(cpustat->guest, tmp); + } } /* @@ -5162,60 +5182,86 @@ void account_idle_ticks(unsigned long ticks) * Use precise platform statistics if available: */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING -cputime_t task_utime(struct task_struct *p) +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { - return p->utime; + *ut = p->utime; + *st = p->stime; } -cputime_t task_stime(struct task_struct *p) +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { - return p->stime; + struct task_cputime cputime; + + thread_group_cputime(p, &cputime); + + *ut = cputime.utime; + *st = cputime.stime; } #else -cputime_t task_utime(struct task_struct *p) + +#ifndef nsecs_to_cputime +# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) +#endif + +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { - clock_t utime = cputime_to_clock_t(p->utime), - total = utime + cputime_to_clock_t(p->stime); - u64 temp; + cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime); /* * Use CFS's precise accounting: */ - temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); + rtime = nsecs_to_cputime(p->se.sum_exec_runtime); if (total) { - temp *= utime; + u64 temp; + + temp = (u64)(rtime * utime); do_div(temp, total); - } - utime = (clock_t)temp; + utime = (cputime_t)temp; + } else + utime = rtime; + + /* + * Compare with previous values, to keep monotonicity: + */ + p->prev_utime = max(p->prev_utime, utime); + p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime)); - p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); - return p->prev_utime; + *ut = p->prev_utime; + *st = p->prev_stime; } -cputime_t task_stime(struct task_struct *p) +/* + * Must be called with siglock held. + */ +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { - clock_t stime; + struct signal_struct *sig = p->signal; + struct task_cputime cputime; + cputime_t rtime, utime, total; - /* - * Use CFS's precise accounting. (we subtract utime from - * the total, to make sure the total observed by userspace - * grows monotonically - apps rely on that): - */ - stime = nsec_to_clock_t(p->se.sum_exec_runtime) - - cputime_to_clock_t(task_utime(p)); + thread_group_cputime(p, &cputime); - if (stime >= 0) - p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); + total = cputime_add(cputime.utime, cputime.stime); + rtime = nsecs_to_cputime(cputime.sum_exec_runtime); - return p->prev_stime; -} -#endif + if (total) { + u64 temp; -inline cputime_t task_gtime(struct task_struct *p) -{ - return p->gtime; + temp = (u64)(rtime * cputime.utime); + do_div(temp, total); + utime = (cputime_t)temp; + } else + utime = rtime; + + sig->prev_utime = max(sig->prev_utime, utime); + sig->prev_stime = max(sig->prev_stime, + cputime_sub(rtime, sig->prev_utime)); + + *ut = sig->prev_utime; + *st = sig->prev_stime; } +#endif /* * This function gets called by the timer code, with HZ frequency. @@ -5481,7 +5527,7 @@ need_resched_nonpreemptible: } EXPORT_SYMBOL(schedule); -#ifdef CONFIG_SMP +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* * Look out! "owner" is an entirely speculative pointer * access and not reliable. @@ -6175,22 +6221,14 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) BUG_ON(p->se.on_rq); p->policy = policy; - switch (p->policy) { - case SCHED_NORMAL: - case SCHED_BATCH: - case SCHED_IDLE: - p->sched_class = &fair_sched_class; - break; - case SCHED_FIFO: - case SCHED_RR: - p->sched_class = &rt_sched_class; - break; - } - p->rt_priority = prio; p->normal_prio = normal_prio(p); /* we are holding p->pi_lock already */ p->prio = rt_mutex_getprio(p); + if (rt_prio(p->prio)) + p->sched_class = &rt_sched_class; + else + p->sched_class = &fair_sched_class; set_load_weight(p); } @@ -6935,7 +6973,7 @@ void show_state_filter(unsigned long state_filter) /* * Only show locks if all tasks are dumped: */ - if (state_filter == -1) + if (!state_filter) debug_show_all_locks(); } @@ -7740,6 +7778,16 @@ early_initcall(migration_init); #ifdef CONFIG_SCHED_DEBUG +static __read_mostly int sched_domain_debug_enabled; + +static int __init sched_domain_debug_setup(char *str) +{ + sched_domain_debug_enabled = 1; + + return 0; +} +early_param("sched_debug", sched_domain_debug_setup); + static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, struct cpumask *groupmask) { @@ -7826,6 +7874,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) cpumask_var_t groupmask; int level = 0; + if (!sched_domain_debug_enabled) + return; + if (!sd) { printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); return; @@ -7905,6 +7956,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) static void free_rootdomain(struct root_domain *rd) { + synchronize_sched(); + cpupri_cleanup(&rd->cpupri); free_cpumask_var(rd->rto_mask); @@ -8045,6 +8098,7 @@ static cpumask_var_t cpu_isolated_map; /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) { + alloc_bootmem_cpumask_var(&cpu_isolated_map); cpulist_parse(str, cpu_isolated_map); return 1; } @@ -8881,7 +8935,7 @@ static int build_sched_domains(const struct cpumask *cpu_map) return __build_sched_domains(cpu_map, NULL); } -static struct cpumask *doms_cur; /* current sched domains */ +static cpumask_var_t *doms_cur; /* current sched domains */ static int ndoms_cur; /* number of sched domains in 'doms_cur' */ static struct sched_domain_attr *dattr_cur; /* attribues of custom domains in 'doms_cur' */ @@ -8903,6 +8957,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void) return 0; } +cpumask_var_t *alloc_sched_domains(unsigned int ndoms) +{ + int i; + cpumask_var_t *doms; + + doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL); + if (!doms) + return NULL; + for (i = 0; i < ndoms; i++) { + if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) { + free_sched_domains(doms, i); + return NULL; + } + } + return doms; +} + +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) +{ + unsigned int i; + for (i = 0; i < ndoms; i++) + free_cpumask_var(doms[i]); + kfree(doms); +} + /* * Set up scheduler domains and groups. Callers must hold the hotplug lock. * For now this just excludes isolated cpus, but could be used to @@ -8914,12 +8993,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) arch_update_cpu_topology(); ndoms_cur = 1; - doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); + doms_cur = alloc_sched_domains(ndoms_cur); if (!doms_cur) - doms_cur = fallback_doms; - cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); + doms_cur = &fallback_doms; + cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); dattr_cur = NULL; - err = build_sched_domains(doms_cur); + err = build_sched_domains(doms_cur[0]); register_sched_domain_sysctl(); return err; @@ -8969,19 +9048,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * doms_new[] to the current sched domain partitioning, doms_cur[]. * It destroys each deleted domain and builds each new domain. * - * 'doms_new' is an array of cpumask's of length 'ndoms_new'. + * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'. * The masks don't intersect (don't overlap.) We should setup one * sched domain for each mask. CPUs not in any of the cpumasks will * not be load balanced. If the same cpumask appears both in the * current 'doms_cur' domains and in the new 'doms_new', we can leave * it as it is. * - * The passed in 'doms_new' should be kmalloc'd. This routine takes - * ownership of it and will kfree it when done with it. If the caller - * failed the kmalloc call, then it can pass in doms_new == NULL && - * ndoms_new == 1, and partition_sched_domains() will fallback to - * the single partition 'fallback_doms', it also forces the domains - * to be rebuilt. + * The passed in 'doms_new' should be allocated using + * alloc_sched_domains. This routine takes ownership of it and will + * free_sched_domains it when done with it. If the caller failed the + * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1, + * and partition_sched_domains() will fallback to the single partition + * 'fallback_doms', it also forces the domains to be rebuilt. * * If doms_new == NULL it will be replaced with cpu_online_mask. * ndoms_new == 0 is a special case for destroying existing domains, @@ -8989,8 +9068,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * * Call with hotplug lock held */ -/* FIXME: Change to struct cpumask *doms_new[] */ -void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { int i, j, n; @@ -9009,40 +9087,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { for (j = 0; j < n && !new_topology; j++) { - if (cpumask_equal(&doms_cur[i], &doms_new[j]) + if (cpumask_equal(doms_cur[i], doms_new[j]) && dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; } /* no match - a current sched domain not in new doms_new[] */ - detach_destroy_domains(doms_cur + i); + detach_destroy_domains(doms_cur[i]); match1: ; } if (doms_new == NULL) { ndoms_cur = 0; - doms_new = fallback_doms; - cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); + doms_new = &fallback_doms; + cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); WARN_ON_ONCE(dattr_new); } /* Build new domains */ for (i = 0; i < ndoms_new; i++) { for (j = 0; j < ndoms_cur && !new_topology; j++) { - if (cpumask_equal(&doms_new[i], &doms_cur[j]) + if (cpumask_equal(doms_new[i], doms_cur[j]) && dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; } /* no match - add a new doms_new */ - __build_sched_domains(doms_new + i, + __build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL); match2: ; } /* Remember the new sched domains */ - if (doms_cur != fallback_doms) - kfree(doms_cur); + if (doms_cur != &fallback_doms) + free_sched_domains(doms_cur, ndoms_cur); kfree(dattr_cur); /* kfree(NULL) is safe */ doms_cur = doms_new; dattr_cur = dattr_new; @@ -9364,10 +9442,6 @@ void __init sched_init(void) #ifdef CONFIG_CPUMASK_OFFSTACK alloc_size += num_possible_cpus() * cpumask_size(); #endif - /* - * As sched_init() is called before page_alloc is setup, - * we use alloc_bootmem(). - */ if (alloc_size) { ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); @@ -9522,6 +9596,8 @@ void __init sched_init(void) rq->cpu = i; rq->online = 0; rq->migration_thread = NULL; + rq->idle_stamp = 0; + rq->avg_idle = 2*sysctl_sched_migration_cost; INIT_LIST_HEAD(&rq->migration_queue); rq_attach_root(rq, &def_root_domain); #endif @@ -9571,7 +9647,9 @@ void __init sched_init(void) zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); #endif - zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); + /* May be allocated at isolcpus cmdline parse time */ + if (cpu_isolated_map == NULL) + zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); #endif /* SMP */ perf_event_init(); @@ -10901,6 +10979,7 @@ void synchronize_sched_expedited(void) spin_unlock_irqrestore(&rq->lock, flags); } rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; + synchronize_sched_expedited_count++; mutex_unlock(&rcu_sched_expedited_mutex); put_online_cpus(); if (need_full_sync) diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index efb84409bc4..6988cf08f70 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -285,12 +285,16 @@ static void print_cpu(struct seq_file *m, int cpu) #ifdef CONFIG_SCHEDSTATS #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); +#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n); P(yld_count); P(sched_switch); P(sched_count); P(sched_goidle); +#ifdef CONFIG_SMP + P64(avg_idle); +#endif P(ttwu_count); P(ttwu_local); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 37087a7fac2..f61837ad336 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1345,6 +1345,37 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) } /* + * Try and locate an idle CPU in the sched_domain. + */ +static int +select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target) +{ + int cpu = smp_processor_id(); + int prev_cpu = task_cpu(p); + int i; + + /* + * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE + * test in select_task_rq_fair) and the prev_cpu is idle then that's + * always a better target than the current cpu. + */ + if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running) + return prev_cpu; + + /* + * Otherwise, iterate the domain and find an elegible idle cpu. + */ + for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { + if (!cpu_rq(i)->cfs.nr_running) { + target = i; + break; + } + } + + return target; +} + +/* * sched_balance_self: balance the current task (running on cpu) in domains * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and * SD_BALANCE_EXEC. @@ -1398,11 +1429,35 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag want_sd = 0; } - if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && - cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { + /* + * While iterating the domains looking for a spanning + * WAKE_AFFINE domain, adjust the affine target to any idle cpu + * in cache sharing domains along the way. + */ + if (want_affine) { + int target = -1; - affine_sd = tmp; - want_affine = 0; + /* + * If both cpu and prev_cpu are part of this domain, + * cpu is a valid SD_WAKE_AFFINE target. + */ + if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) + target = cpu; + + /* + * If there's an idle sibling in this domain, make that + * the wake_affine target instead of the current cpu. + */ + if (tmp->flags & SD_PREFER_SIBLING) + target = select_idle_sibling(p, tmp, target); + + if (target >= 0) { + if (tmp->flags & SD_WAKE_AFFINE) { + affine_sd = tmp; + want_affine = 0; + } + cpu = target; + } } if (!want_sd && !want_affine) @@ -1679,7 +1734,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; - if (unlikely(!cfs_rq->nr_running)) + if (!cfs_rq->nr_running) return NULL; do { diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index a4d790cddb1..5c5fef37841 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1153,29 +1153,12 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); -static inline int pick_optimal_cpu(int this_cpu, - const struct cpumask *mask) -{ - int first; - - /* "this_cpu" is cheaper to preempt than a remote processor */ - if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask)) - return this_cpu; - - first = cpumask_first(mask); - if (first < nr_cpu_ids) - return first; - - return -1; -} - static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); - cpumask_var_t domain_mask; if (task->rt.nr_cpus_allowed == 1) return -1; /* No other targets possible */ @@ -1198,28 +1181,26 @@ static int find_lowest_rq(struct task_struct *task) * Otherwise, we consult the sched_domains span maps to figure * out which cpu is logically closest to our hot cache data. */ - if (this_cpu == cpu) - this_cpu = -1; /* Skip this_cpu opt if the same */ - - if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) { - for_each_domain(cpu, sd) { - if (sd->flags & SD_WAKE_AFFINE) { - int best_cpu; + if (!cpumask_test_cpu(this_cpu, lowest_mask)) + this_cpu = -1; /* Skip this_cpu opt if not among lowest */ - cpumask_and(domain_mask, - sched_domain_span(sd), - lowest_mask); + for_each_domain(cpu, sd) { + if (sd->flags & SD_WAKE_AFFINE) { + int best_cpu; - best_cpu = pick_optimal_cpu(this_cpu, - domain_mask); - - if (best_cpu != -1) { - free_cpumask_var(domain_mask); - return best_cpu; - } - } + /* + * "this_cpu" is cheaper to preempt than a + * remote processor. + */ + if (this_cpu != -1 && + cpumask_test_cpu(this_cpu, sched_domain_span(sd))) + return this_cpu; + + best_cpu = cpumask_first_and(lowest_mask, + sched_domain_span(sd)); + if (best_cpu < nr_cpu_ids) + return best_cpu; } - free_cpumask_var(domain_mask); } /* @@ -1227,7 +1208,13 @@ static int find_lowest_rq(struct task_struct *task) * just give the caller *something* to work with from the compatible * locations. */ - return pick_optimal_cpu(this_cpu, lowest_mask); + if (this_cpu != -1) + return this_cpu; + + cpu = cpumask_any(lowest_mask); + if (cpu < nr_cpu_ids) + return cpu; + return -1; } /* Will lock the rq it finds */ diff --git a/kernel/signal.c b/kernel/signal.c index 6705320784f..6b982f2cf52 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -22,12 +22,14 @@ #include <linux/ptrace.h> #include <linux/signal.h> #include <linux/signalfd.h> +#include <linux/ratelimit.h> #include <linux/tracehook.h> #include <linux/capability.h> #include <linux/freezer.h> #include <linux/pid_namespace.h> #include <linux/nsproxy.h> -#include <trace/events/sched.h> +#define CREATE_TRACE_POINTS +#include <trace/events/signal.h> #include <asm/param.h> #include <asm/uaccess.h> @@ -41,6 +43,8 @@ static struct kmem_cache *sigqueue_cachep; +int print_fatal_signals __read_mostly; + static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; @@ -159,7 +163,7 @@ int next_signal(struct sigpending *pending, sigset_t *mask) { unsigned long i, *s, *m, x; int sig = 0; - + s = pending->signal.sig; m = mask->sig; switch (_NSIG_WORDS) { @@ -184,17 +188,31 @@ int next_signal(struct sigpending *pending, sigset_t *mask) sig = ffz(~x) + 1; break; } - + return sig; } +static inline void print_dropped_signal(int sig) +{ + static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); + + if (!print_fatal_signals) + return; + + if (!__ratelimit(&ratelimit_state)) + return; + + printk(KERN_INFO "%s/%d: reached RLIMIT_SIGPENDING, dropped signal %d\n", + current->comm, current->pid, sig); +} + /* * allocate a new signal queue record * - this may be called without locks if and only if t == current, otherwise an * appopriate lock must be held to stop the target task from exiting */ -static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, - int override_rlimit) +static struct sigqueue * +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) { struct sigqueue *q = NULL; struct user_struct *user; @@ -207,10 +225,15 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, */ user = get_uid(__task_cred(t)->user); atomic_inc(&user->sigpending); + if (override_rlimit || atomic_read(&user->sigpending) <= - t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) + t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) { q = kmem_cache_alloc(sigqueue_cachep, flags); + } else { + print_dropped_signal(sig); + } + if (unlikely(q == NULL)) { atomic_dec(&user->sigpending); free_uid(user); @@ -834,7 +857,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, struct sigqueue *q; int override_rlimit; - trace_sched_signal_send(sig, t); + trace_signal_generate(sig, info, t); assert_spin_locked(&t->sighand->siglock); @@ -869,7 +892,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, else override_rlimit = 0; - q = __sigqueue_alloc(t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE, + q = __sigqueue_alloc(sig, t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE, override_rlimit); if (q) { list_add_tail(&q->list, &pending->list); @@ -896,12 +919,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, break; } } else if (!is_si_special(info)) { - if (sig >= SIGRTMIN && info->si_code != SI_USER) - /* - * Queue overflow, abort. We may abort if the signal was rt - * and sent by user using something other than kill(). - */ + if (sig >= SIGRTMIN && info->si_code != SI_USER) { + /* + * Queue overflow, abort. We may abort if the + * signal was rt and sent by user using something + * other than kill(). + */ + trace_signal_overflow_fail(sig, group, info); return -EAGAIN; + } else { + /* + * This is a silent loss of information. We still + * send the signal, but the *info bits are lost. + */ + trace_signal_lose_info(sig, group, info); + } } out_set: @@ -925,8 +957,6 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, return __send_signal(sig, info, t, group, from_ancestor_ns); } -int print_fatal_signals; - static void print_fatal_signal(struct pt_regs *regs, int signr) { printk("%s/%d: potentially unexpected fatal signal %d.\n", @@ -1293,19 +1323,19 @@ EXPORT_SYMBOL(kill_pid); * These functions support sending signals using preallocated sigqueue * structures. This is needed "because realtime applications cannot * afford to lose notifications of asynchronous events, like timer - * expirations or I/O completions". In the case of Posix Timers + * expirations or I/O completions". In the case of Posix Timers * we allocate the sigqueue structure from the timer_create. If this * allocation fails we are able to report the failure to the application * with an EAGAIN error. */ - struct sigqueue *sigqueue_alloc(void) { - struct sigqueue *q; + struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); - if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0))) + if (q) q->flags |= SIGQUEUE_PREALLOC; - return(q); + + return q; } void sigqueue_free(struct sigqueue *q) @@ -1839,6 +1869,9 @@ relock: ka = &sighand->action[signr-1]; } + /* Trace actually delivered signals. */ + trace_signal_deliver(signr, info, ka); + if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; if (ka->sa.sa_handler != SIG_DFL) { diff --git a/kernel/slow-work-proc.c b/kernel/slow-work-debugfs.c index 3988032571f..e45c4364529 100644 --- a/kernel/slow-work-proc.c +++ b/kernel/slow-work-debugfs.c @@ -57,7 +57,7 @@ static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) } /* - * Describe a slow work item for /proc + * Describe a slow work item for debugfs */ static int slow_work_runqueue_show(struct seq_file *m, void *v) { @@ -211,7 +211,7 @@ static const struct seq_operations slow_work_runqueue_ops = { }; /* - * open "/proc/slow_work_rq" to list queue contents + * open "/sys/kernel/debug/slow_work/runqueue" to list queue contents */ static int slow_work_runqueue_open(struct inode *inode, struct file *file) { diff --git a/kernel/slow-work.c b/kernel/slow-work.c index da94f3c101a..00889bd3c59 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -16,7 +16,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/wait.h> -#include <linux/proc_fs.h> +#include <linux/debugfs.h> #include "slow-work.h" static void slow_work_cull_timeout(unsigned long); @@ -109,12 +109,36 @@ static struct module *slow_work_unreg_module; static struct slow_work *slow_work_unreg_work_item; static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq); static DEFINE_MUTEX(slow_work_unreg_sync_lock); + +static void slow_work_set_thread_processing(int id, struct slow_work *work) +{ + if (work) + slow_work_thread_processing[id] = work->owner; +} +static void slow_work_done_thread_processing(int id, struct slow_work *work) +{ + struct module *module = slow_work_thread_processing[id]; + + slow_work_thread_processing[id] = NULL; + smp_mb(); + if (slow_work_unreg_work_item == work || + slow_work_unreg_module == module) + wake_up_all(&slow_work_unreg_wq); +} +static void slow_work_clear_thread_processing(int id) +{ + slow_work_thread_processing[id] = NULL; +} +#else +static void slow_work_set_thread_processing(int id, struct slow_work *work) {} +static void slow_work_done_thread_processing(int id, struct slow_work *work) {} +static void slow_work_clear_thread_processing(int id) {} #endif /* * Data for tracking currently executing items for indication through /proc */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT]; pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT]; DEFINE_RWLOCK(slow_work_execs_lock); @@ -197,9 +221,6 @@ static unsigned slow_work_calc_vsmax(void) */ static noinline bool slow_work_execute(int id) { -#ifdef CONFIG_MODULES - struct module *module; -#endif struct slow_work *work = NULL; unsigned vsmax; bool very_slow; @@ -236,10 +257,7 @@ static noinline bool slow_work_execute(int id) very_slow = false; /* avoid the compiler warning */ } -#ifdef CONFIG_MODULES - if (work) - slow_work_thread_processing[id] = work->owner; -#endif + slow_work_set_thread_processing(id, work); if (work) { slow_work_mark_time(work); slow_work_begin_exec(id, work); @@ -287,15 +305,7 @@ static noinline bool slow_work_execute(int id) /* sort out the race between module unloading and put_ref() */ slow_work_put_ref(work); - -#ifdef CONFIG_MODULES - module = slow_work_thread_processing[id]; - slow_work_thread_processing[id] = NULL; - smp_mb(); - if (slow_work_unreg_work_item == work || - slow_work_unreg_module == module) - wake_up_all(&slow_work_unreg_wq); -#endif + slow_work_done_thread_processing(id, work); return true; @@ -310,7 +320,7 @@ auto_requeue: else list_add_tail(&work->link, &slow_work_queue); spin_unlock_irq(&slow_work_queue_lock); - slow_work_thread_processing[id] = NULL; + slow_work_clear_thread_processing(id); return true; } @@ -813,7 +823,7 @@ static void slow_work_new_thread_execute(struct slow_work *work) static const struct slow_work_ops slow_work_new_thread_ops = { .owner = THIS_MODULE, .execute = slow_work_new_thread_execute, -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG .desc = slow_work_new_thread_desc, #endif }; @@ -943,6 +953,7 @@ EXPORT_SYMBOL(slow_work_register_user); */ static void slow_work_wait_for_items(struct module *module) { +#ifdef CONFIG_MODULES DECLARE_WAITQUEUE(myself, current); struct slow_work *work; int loop; @@ -989,6 +1000,7 @@ static void slow_work_wait_for_items(struct module *module) remove_wait_queue(&slow_work_unreg_wq, &myself); mutex_unlock(&slow_work_unreg_sync_lock); +#endif /* CONFIG_MODULES */ } /** @@ -1043,9 +1055,15 @@ static int __init init_slow_work(void) if (slow_work_max_max_threads < nr_cpus * 2) slow_work_max_max_threads = nr_cpus * 2; #endif -#ifdef CONFIG_SLOW_WORK_PROC - proc_create("slow_work_rq", S_IFREG | 0400, NULL, - &slow_work_runqueue_fops); +#ifdef CONFIG_SLOW_WORK_DEBUG + { + struct dentry *dbdir; + + dbdir = debugfs_create_dir("slow_work", NULL); + if (dbdir && !IS_ERR(dbdir)) + debugfs_create_file("runqueue", S_IFREG | 0400, dbdir, + NULL, &slow_work_runqueue_fops); + } #endif return 0; } diff --git a/kernel/slow-work.h b/kernel/slow-work.h index 3c2f007f3ad..321f3c59d73 100644 --- a/kernel/slow-work.h +++ b/kernel/slow-work.h @@ -19,7 +19,7 @@ /* * slow-work.c */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG extern struct slow_work *slow_work_execs[]; extern pid_t slow_work_pids[]; extern rwlock_t slow_work_execs_lock; @@ -30,9 +30,9 @@ extern struct list_head vslow_work_queue; extern spinlock_t slow_work_queue_lock; /* - * slow-work-proc.c + * slow-work-debugfs.c */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG extern const struct file_operations slow_work_runqueue_fops; extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); diff --git a/kernel/smp.c b/kernel/smp.c index c9d1c7835c2..a8c76069cf5 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -265,9 +265,7 @@ static DEFINE_PER_CPU(struct call_single_data, csd_data); * @info: An arbitrary pointer to pass to the function. * @wait: If true, wait until function has completed on other CPUs. * - * Returns 0 on success, else a negative status code. Note that @wait - * will be implicitly turned on in case of allocation failures, since - * we fall back to on-stack allocation. + * Returns 0 on success, else a negative status code. */ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, int wait) @@ -321,6 +319,51 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); +/* + * smp_call_function_any - Run a function on any of the given cpus + * @mask: The mask of cpus it can run on. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait until function has completed. + * + * Returns 0 on success, else a negative status code (if no cpus were online). + * Note that @wait will be implicitly turned on in case of allocation failures, + * since we fall back to on-stack allocation. + * + * Selection preference: + * 1) current cpu if in @mask + * 2) any cpu of current node if in @mask + * 3) any other online cpu in @mask + */ +int smp_call_function_any(const struct cpumask *mask, + void (*func)(void *info), void *info, int wait) +{ + unsigned int cpu; + const struct cpumask *nodemask; + int ret; + + /* Try for same CPU (cheapest) */ + cpu = get_cpu(); + if (cpumask_test_cpu(cpu, mask)) + goto call; + + /* Try for same node. */ + nodemask = cpumask_of_node(cpu); + for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids; + cpu = cpumask_next_and(cpu, nodemask, mask)) { + if (cpu_online(cpu)) + goto call; + } + + /* Any online will do: smp_call_function_single handles nr_cpu_ids. */ + cpu = cpumask_any_and(mask, cpu_online_mask); +call: + ret = smp_call_function_single(cpu, func, info, wait); + put_cpu(); + return ret; +} +EXPORT_SYMBOL_GPL(smp_call_function_any); + /** * __smp_call_function_single(): Run a function on another CPU * @cpu: The CPU to run on. @@ -355,9 +398,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, * @wait: If true, wait (atomically) until function has completed * on other CPUs. * - * If @wait is true, then returns once @func has returned. Note that @wait - * will be implicitly turned on in case of allocation failures, since - * we fall back to on-stack allocation. + * If @wait is true, then returns once @func has returned. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. Preemption @@ -443,8 +484,7 @@ EXPORT_SYMBOL(smp_call_function_many); * Returns 0. * * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. In case of allocation - * failure, @wait will be implicitly turned on. + * it returns just before the target cpu calls @func. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. diff --git a/kernel/softirq.c b/kernel/softirq.c index f8749e5216e..21939d9e830 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -302,9 +302,9 @@ void irq_exit(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); + rcu_irq_exit(); #ifdef CONFIG_NO_HZ /* Make sure that timer wheel updates are propagated */ - rcu_irq_exit(); if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) tick_nohz_stop_sched_tick(0); #endif diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 5ddab730cb2..41e042219ff 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -21,145 +21,28 @@ #include <linux/debug_locks.h> #include <linux/module.h> -#ifndef _spin_trylock -int __lockfunc _spin_trylock(spinlock_t *lock) -{ - return __spin_trylock(lock); -} -EXPORT_SYMBOL(_spin_trylock); -#endif - -#ifndef _read_trylock -int __lockfunc _read_trylock(rwlock_t *lock) -{ - return __read_trylock(lock); -} -EXPORT_SYMBOL(_read_trylock); -#endif - -#ifndef _write_trylock -int __lockfunc _write_trylock(rwlock_t *lock) -{ - return __write_trylock(lock); -} -EXPORT_SYMBOL(_write_trylock); -#endif - /* * If lockdep is enabled then we use the non-preemption spin-ops * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are * not re-enabled during lock-acquire (which the preempt-spin-ops do): */ #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) - -#ifndef _read_lock -void __lockfunc _read_lock(rwlock_t *lock) -{ - __read_lock(lock); -} -EXPORT_SYMBOL(_read_lock); -#endif - -#ifndef _spin_lock_irqsave -unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) -{ - return __spin_lock_irqsave(lock); -} -EXPORT_SYMBOL(_spin_lock_irqsave); -#endif - -#ifndef _spin_lock_irq -void __lockfunc _spin_lock_irq(spinlock_t *lock) -{ - __spin_lock_irq(lock); -} -EXPORT_SYMBOL(_spin_lock_irq); -#endif - -#ifndef _spin_lock_bh -void __lockfunc _spin_lock_bh(spinlock_t *lock) -{ - __spin_lock_bh(lock); -} -EXPORT_SYMBOL(_spin_lock_bh); -#endif - -#ifndef _read_lock_irqsave -unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) -{ - return __read_lock_irqsave(lock); -} -EXPORT_SYMBOL(_read_lock_irqsave); -#endif - -#ifndef _read_lock_irq -void __lockfunc _read_lock_irq(rwlock_t *lock) -{ - __read_lock_irq(lock); -} -EXPORT_SYMBOL(_read_lock_irq); -#endif - -#ifndef _read_lock_bh -void __lockfunc _read_lock_bh(rwlock_t *lock) -{ - __read_lock_bh(lock); -} -EXPORT_SYMBOL(_read_lock_bh); -#endif - -#ifndef _write_lock_irqsave -unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) -{ - return __write_lock_irqsave(lock); -} -EXPORT_SYMBOL(_write_lock_irqsave); -#endif - -#ifndef _write_lock_irq -void __lockfunc _write_lock_irq(rwlock_t *lock) -{ - __write_lock_irq(lock); -} -EXPORT_SYMBOL(_write_lock_irq); -#endif - -#ifndef _write_lock_bh -void __lockfunc _write_lock_bh(rwlock_t *lock) -{ - __write_lock_bh(lock); -} -EXPORT_SYMBOL(_write_lock_bh); -#endif - -#ifndef _spin_lock -void __lockfunc _spin_lock(spinlock_t *lock) -{ - __spin_lock(lock); -} -EXPORT_SYMBOL(_spin_lock); -#endif - -#ifndef _write_lock -void __lockfunc _write_lock(rwlock_t *lock) -{ - __write_lock(lock); -} -EXPORT_SYMBOL(_write_lock); -#endif - -#else /* CONFIG_PREEMPT: */ - /* + * The __lock_function inlines are taken from + * include/linux/spinlock_api_smp.h + */ +#else +/* + * We build the __lock_function inlines here. They are too large for + * inlining all over the place, but here is only one user per function + * which embedds them into the calling _lock_function below. + * * This could be a long-held lock. We both prepare to spin for a long * time (making _this_ CPU preemptable if possible), and we also signal * towards that other CPU that it should break the lock ASAP. - * - * (We do this in a function because inlining it would be excessive.) */ - #define BUILD_LOCK_OPS(op, locktype) \ -void __lockfunc _##op##_lock(locktype##_t *lock) \ +void __lockfunc __##op##_lock(locktype##_t *lock) \ { \ for (;;) { \ preempt_disable(); \ @@ -175,9 +58,7 @@ void __lockfunc _##op##_lock(locktype##_t *lock) \ (lock)->break_lock = 0; \ } \ \ -EXPORT_SYMBOL(_##op##_lock); \ - \ -unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock) \ +unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock) \ { \ unsigned long flags; \ \ @@ -198,16 +79,12 @@ unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock) \ return flags; \ } \ \ -EXPORT_SYMBOL(_##op##_lock_irqsave); \ - \ -void __lockfunc _##op##_lock_irq(locktype##_t *lock) \ +void __lockfunc __##op##_lock_irq(locktype##_t *lock) \ { \ _##op##_lock_irqsave(lock); \ } \ \ -EXPORT_SYMBOL(_##op##_lock_irq); \ - \ -void __lockfunc _##op##_lock_bh(locktype##_t *lock) \ +void __lockfunc __##op##_lock_bh(locktype##_t *lock) \ { \ unsigned long flags; \ \ @@ -220,23 +97,21 @@ void __lockfunc _##op##_lock_bh(locktype##_t *lock) \ local_bh_disable(); \ local_irq_restore(flags); \ } \ - \ -EXPORT_SYMBOL(_##op##_lock_bh) /* * Build preemption-friendly versions of the following * lock-spinning functions: * - * _[spin|read|write]_lock() - * _[spin|read|write]_lock_irq() - * _[spin|read|write]_lock_irqsave() - * _[spin|read|write]_lock_bh() + * __[spin|read|write]_lock() + * __[spin|read|write]_lock_irq() + * __[spin|read|write]_lock_irqsave() + * __[spin|read|write]_lock_bh() */ BUILD_LOCK_OPS(spin, spinlock); BUILD_LOCK_OPS(read, rwlock); BUILD_LOCK_OPS(write, rwlock); -#endif /* CONFIG_PREEMPT */ +#endif #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -248,7 +123,8 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) } EXPORT_SYMBOL(_spin_lock_nested); -unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass) +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, + int subclass) { unsigned long flags; @@ -272,7 +148,127 @@ EXPORT_SYMBOL(_spin_lock_nest_lock); #endif -#ifndef _spin_unlock +#ifndef CONFIG_INLINE_SPIN_TRYLOCK +int __lockfunc _spin_trylock(spinlock_t *lock) +{ + return __spin_trylock(lock); +} +EXPORT_SYMBOL(_spin_trylock); +#endif + +#ifndef CONFIG_INLINE_READ_TRYLOCK +int __lockfunc _read_trylock(rwlock_t *lock) +{ + return __read_trylock(lock); +} +EXPORT_SYMBOL(_read_trylock); +#endif + +#ifndef CONFIG_INLINE_WRITE_TRYLOCK +int __lockfunc _write_trylock(rwlock_t *lock) +{ + return __write_trylock(lock); +} +EXPORT_SYMBOL(_write_trylock); +#endif + +#ifndef CONFIG_INLINE_READ_LOCK +void __lockfunc _read_lock(rwlock_t *lock) +{ + __read_lock(lock); +} +EXPORT_SYMBOL(_read_lock); +#endif + +#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE +unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) +{ + return __spin_lock_irqsave(lock); +} +EXPORT_SYMBOL(_spin_lock_irqsave); +#endif + +#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ +void __lockfunc _spin_lock_irq(spinlock_t *lock) +{ + __spin_lock_irq(lock); +} +EXPORT_SYMBOL(_spin_lock_irq); +#endif + +#ifndef CONFIG_INLINE_SPIN_LOCK_BH +void __lockfunc _spin_lock_bh(spinlock_t *lock) +{ + __spin_lock_bh(lock); +} +EXPORT_SYMBOL(_spin_lock_bh); +#endif + +#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE +unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) +{ + return __read_lock_irqsave(lock); +} +EXPORT_SYMBOL(_read_lock_irqsave); +#endif + +#ifndef CONFIG_INLINE_READ_LOCK_IRQ +void __lockfunc _read_lock_irq(rwlock_t *lock) +{ + __read_lock_irq(lock); +} +EXPORT_SYMBOL(_read_lock_irq); +#endif + +#ifndef CONFIG_INLINE_READ_LOCK_BH +void __lockfunc _read_lock_bh(rwlock_t *lock) +{ + __read_lock_bh(lock); +} +EXPORT_SYMBOL(_read_lock_bh); +#endif + +#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE +unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) +{ + return __write_lock_irqsave(lock); +} +EXPORT_SYMBOL(_write_lock_irqsave); +#endif + +#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ +void __lockfunc _write_lock_irq(rwlock_t *lock) +{ + __write_lock_irq(lock); +} +EXPORT_SYMBOL(_write_lock_irq); +#endif + +#ifndef CONFIG_INLINE_WRITE_LOCK_BH +void __lockfunc _write_lock_bh(rwlock_t *lock) +{ + __write_lock_bh(lock); +} +EXPORT_SYMBOL(_write_lock_bh); +#endif + +#ifndef CONFIG_INLINE_SPIN_LOCK +void __lockfunc _spin_lock(spinlock_t *lock) +{ + __spin_lock(lock); +} +EXPORT_SYMBOL(_spin_lock); +#endif + +#ifndef CONFIG_INLINE_WRITE_LOCK +void __lockfunc _write_lock(rwlock_t *lock) +{ + __write_lock(lock); +} +EXPORT_SYMBOL(_write_lock); +#endif + +#ifndef CONFIG_INLINE_SPIN_UNLOCK void __lockfunc _spin_unlock(spinlock_t *lock) { __spin_unlock(lock); @@ -280,7 +276,7 @@ void __lockfunc _spin_unlock(spinlock_t *lock) EXPORT_SYMBOL(_spin_unlock); #endif -#ifndef _write_unlock +#ifndef CONFIG_INLINE_WRITE_UNLOCK void __lockfunc _write_unlock(rwlock_t *lock) { __write_unlock(lock); @@ -288,7 +284,7 @@ void __lockfunc _write_unlock(rwlock_t *lock) EXPORT_SYMBOL(_write_unlock); #endif -#ifndef _read_unlock +#ifndef CONFIG_INLINE_READ_UNLOCK void __lockfunc _read_unlock(rwlock_t *lock) { __read_unlock(lock); @@ -296,7 +292,7 @@ void __lockfunc _read_unlock(rwlock_t *lock) EXPORT_SYMBOL(_read_unlock); #endif -#ifndef _spin_unlock_irqrestore +#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) { __spin_unlock_irqrestore(lock, flags); @@ -304,7 +300,7 @@ void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) EXPORT_SYMBOL(_spin_unlock_irqrestore); #endif -#ifndef _spin_unlock_irq +#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ void __lockfunc _spin_unlock_irq(spinlock_t *lock) { __spin_unlock_irq(lock); @@ -312,7 +308,7 @@ void __lockfunc _spin_unlock_irq(spinlock_t *lock) EXPORT_SYMBOL(_spin_unlock_irq); #endif -#ifndef _spin_unlock_bh +#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH void __lockfunc _spin_unlock_bh(spinlock_t *lock) { __spin_unlock_bh(lock); @@ -320,7 +316,7 @@ void __lockfunc _spin_unlock_bh(spinlock_t *lock) EXPORT_SYMBOL(_spin_unlock_bh); #endif -#ifndef _read_unlock_irqrestore +#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { __read_unlock_irqrestore(lock, flags); @@ -328,7 +324,7 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) EXPORT_SYMBOL(_read_unlock_irqrestore); #endif -#ifndef _read_unlock_irq +#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ void __lockfunc _read_unlock_irq(rwlock_t *lock) { __read_unlock_irq(lock); @@ -336,7 +332,7 @@ void __lockfunc _read_unlock_irq(rwlock_t *lock) EXPORT_SYMBOL(_read_unlock_irq); #endif -#ifndef _read_unlock_bh +#ifndef CONFIG_INLINE_READ_UNLOCK_BH void __lockfunc _read_unlock_bh(rwlock_t *lock) { __read_unlock_bh(lock); @@ -344,7 +340,7 @@ void __lockfunc _read_unlock_bh(rwlock_t *lock) EXPORT_SYMBOL(_read_unlock_bh); #endif -#ifndef _write_unlock_irqrestore +#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { __write_unlock_irqrestore(lock, flags); @@ -352,7 +348,7 @@ void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) EXPORT_SYMBOL(_write_unlock_irqrestore); #endif -#ifndef _write_unlock_irq +#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ void __lockfunc _write_unlock_irq(rwlock_t *lock) { __write_unlock_irq(lock); @@ -360,7 +356,7 @@ void __lockfunc _write_unlock_irq(rwlock_t *lock) EXPORT_SYMBOL(_write_unlock_irq); #endif -#ifndef _write_unlock_bh +#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH void __lockfunc _write_unlock_bh(rwlock_t *lock) { __write_unlock_bh(lock); @@ -368,7 +364,7 @@ void __lockfunc _write_unlock_bh(rwlock_t *lock) EXPORT_SYMBOL(_write_unlock_bh); #endif -#ifndef _spin_trylock_bh +#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH int __lockfunc _spin_trylock_bh(spinlock_t *lock) { return __spin_trylock_bh(lock); diff --git a/kernel/srcu.c b/kernel/srcu.c index b0aeeaf22ce..818d7d9aa03 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -49,6 +49,7 @@ int init_srcu_struct(struct srcu_struct *sp) sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); return (sp->per_cpu_ref ? 0 : -ENOMEM); } +EXPORT_SYMBOL_GPL(init_srcu_struct); /* * srcu_readers_active_idx -- returns approximate number of readers @@ -97,6 +98,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp) free_percpu(sp->per_cpu_ref); sp->per_cpu_ref = NULL; } +EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /** * srcu_read_lock - register a new reader for an SRCU-protected structure. @@ -118,6 +120,7 @@ int srcu_read_lock(struct srcu_struct *sp) preempt_enable(); return idx; } +EXPORT_SYMBOL_GPL(srcu_read_lock); /** * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. @@ -136,22 +139,12 @@ void srcu_read_unlock(struct srcu_struct *sp, int idx) per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; preempt_enable(); } +EXPORT_SYMBOL_GPL(srcu_read_unlock); -/** - * synchronize_srcu - wait for prior SRCU read-side critical-section completion - * @sp: srcu_struct with which to synchronize. - * - * Flip the completed counter, and wait for the old count to drain to zero. - * As with classic RCU, the updater must use some separate means of - * synchronizing concurrent updates. Can block; must be called from - * process context. - * - * Note that it is illegal to call synchornize_srcu() from the corresponding - * SRCU read-side critical section; doing so will result in deadlock. - * However, it is perfectly legal to call synchronize_srcu() on one - * srcu_struct from some other srcu_struct's read-side critical section. +/* + * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). */ -void synchronize_srcu(struct srcu_struct *sp) +void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) { int idx; @@ -173,7 +166,7 @@ void synchronize_srcu(struct srcu_struct *sp) return; } - synchronize_sched(); /* Force memory barrier on all CPUs. */ + sync_func(); /* Force memory barrier on all CPUs. */ /* * The preceding synchronize_sched() ensures that any CPU that @@ -190,7 +183,7 @@ void synchronize_srcu(struct srcu_struct *sp) idx = sp->completed & 0x1; sp->completed++; - synchronize_sched(); /* Force memory barrier on all CPUs. */ + sync_func(); /* Force memory barrier on all CPUs. */ /* * At this point, because of the preceding synchronize_sched(), @@ -203,7 +196,7 @@ void synchronize_srcu(struct srcu_struct *sp) while (srcu_readers_active_idx(sp, idx)) schedule_timeout_interruptible(1); - synchronize_sched(); /* Force memory barrier on all CPUs. */ + sync_func(); /* Force memory barrier on all CPUs. */ /* * The preceding synchronize_sched() forces all srcu_read_unlock() @@ -237,6 +230,47 @@ void synchronize_srcu(struct srcu_struct *sp) } /** + * synchronize_srcu - wait for prior SRCU read-side critical-section completion + * @sp: srcu_struct with which to synchronize. + * + * Flip the completed counter, and wait for the old count to drain to zero. + * As with classic RCU, the updater must use some separate means of + * synchronizing concurrent updates. Can block; must be called from + * process context. + * + * Note that it is illegal to call synchronize_srcu() from the corresponding + * SRCU read-side critical section; doing so will result in deadlock. + * However, it is perfectly legal to call synchronize_srcu() on one + * srcu_struct from some other srcu_struct's read-side critical section. + */ +void synchronize_srcu(struct srcu_struct *sp) +{ + __synchronize_srcu(sp, synchronize_sched); +} +EXPORT_SYMBOL_GPL(synchronize_srcu); + +/** + * synchronize_srcu_expedited - like synchronize_srcu, but less patient + * @sp: srcu_struct with which to synchronize. + * + * Flip the completed counter, and wait for the old count to drain to zero. + * As with classic RCU, the updater must use some separate means of + * synchronizing concurrent updates. Can block; must be called from + * process context. + * + * Note that it is illegal to call synchronize_srcu_expedited() + * from the corresponding SRCU read-side critical section; doing so + * will result in deadlock. However, it is perfectly legal to call + * synchronize_srcu_expedited() on one srcu_struct from some other + * srcu_struct's read-side critical section. + */ +void synchronize_srcu_expedited(struct srcu_struct *sp) +{ + __synchronize_srcu(sp, synchronize_sched_expedited); +} +EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); + +/** * srcu_batches_completed - return batches completed. * @sp: srcu_struct on which to report batch completion. * @@ -248,10 +282,4 @@ long srcu_batches_completed(struct srcu_struct *sp) { return sp->completed; } - -EXPORT_SYMBOL_GPL(init_srcu_struct); -EXPORT_SYMBOL_GPL(cleanup_srcu_struct); -EXPORT_SYMBOL_GPL(srcu_read_lock); -EXPORT_SYMBOL_GPL(srcu_read_unlock); -EXPORT_SYMBOL_GPL(synchronize_srcu); EXPORT_SYMBOL_GPL(srcu_batches_completed); diff --git a/kernel/sys.c b/kernel/sys.c index ce17760d9c5..9968c5fb55b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -911,16 +911,15 @@ change_okay: void do_sys_times(struct tms *tms) { - struct task_cputime cputime; - cputime_t cutime, cstime; + cputime_t tgutime, tgstime, cutime, cstime; - thread_group_cputime(current, &cputime); spin_lock_irq(¤t->sighand->siglock); + thread_group_times(current, &tgutime, &tgstime); cutime = current->signal->cutime; cstime = current->signal->cstime; spin_unlock_irq(¤t->sighand->siglock); - tms->tms_utime = cputime_to_clock_t(cputime.utime); - tms->tms_stime = cputime_to_clock_t(cputime.stime); + tms->tms_utime = cputime_to_clock_t(tgutime); + tms->tms_stime = cputime_to_clock_t(tgstime); tms->tms_cutime = cputime_to_clock_t(cutime); tms->tms_cstime = cputime_to_clock_t(cstime); } @@ -1338,16 +1337,14 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) { struct task_struct *t; unsigned long flags; - cputime_t utime, stime; - struct task_cputime cputime; + cputime_t tgutime, tgstime, utime, stime; unsigned long maxrss = 0; memset((char *) r, 0, sizeof *r); utime = stime = cputime_zero; if (who == RUSAGE_THREAD) { - utime = task_utime(current); - stime = task_stime(current); + task_times(current, &utime, &stime); accumulate_thread_rusage(p, r); maxrss = p->signal->maxrss; goto out; @@ -1373,9 +1370,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) break; case RUSAGE_SELF: - thread_group_cputime(p, &cputime); - utime = cputime_add(utime, cputime.utime); - stime = cputime_add(stime, cputime.stime); + thread_group_times(p, &tgutime, &tgstime); + utime = cputime_add(utime, tgutime); + stime = cputime_add(stime, tgstime); r->ru_nvcsw += p->signal->nvcsw; r->ru_nivcsw += p->signal->nivcsw; r->ru_minflt += p->signal->min_flt; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0d949c51741..4dbf93a52ee 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -36,6 +36,7 @@ #include <linux/sysrq.h> #include <linux/highuid.h> #include <linux/writeback.h> +#include <linux/ratelimit.h> #include <linux/hugetlb.h> #include <linux/initrd.h> #include <linux/key.h> @@ -158,6 +159,8 @@ extern int no_unaligned_warning; extern int unaligned_dump_stack; #endif +extern struct ratelimit_state printk_ratelimit_state; + #ifdef CONFIG_RT_MUTEXES extern int max_lock_depth; #endif diff --git a/kernel/time.c b/kernel/time.c index 2e2e469a7fe..804798005d1 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -662,6 +662,36 @@ u64 nsec_to_clock_t(u64 x) #endif } +/** + * nsecs_to_jiffies - Convert nsecs in u64 to jiffies + * + * @n: nsecs in u64 + * + * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64. + * And this doesn't return MAX_JIFFY_OFFSET since this function is designed + * for scheduler, not for use in device drivers to calculate timeout value. + * + * note: + * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) + * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years + */ +unsigned long nsecs_to_jiffies(u64 n) +{ +#if (NSEC_PER_SEC % HZ) == 0 + /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ + return div_u64(n, NSEC_PER_SEC / HZ); +#elif (HZ % 512) == 0 + /* overflow after 292 years if HZ = 1024 */ + return div_u64(n * HZ / 512, NSEC_PER_SEC / 512); +#else + /* + * Generic case - optimized for cases where HZ is a multiple of 3. + * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc. + */ + return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ); +#endif +} + #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) { diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b416512ad17..d006554888d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -339,6 +339,27 @@ config POWER_TRACER power management decisions, specifically the C-state and P-state behavior. +config KSYM_TRACER + bool "Trace read and write access on kernel memory locations" + depends on HAVE_HW_BREAKPOINT + select TRACING + help + This tracer helps find read and write operations on any given kernel + symbol i.e. /proc/kallsyms. + +config PROFILE_KSYM_TRACER + bool "Profile all kernel memory accesses on 'watched' variables" + depends on KSYM_TRACER + help + This tracer profiles kernel accesses on variables watched through the + ksym tracer ftrace plugin. Depending upon the hardware, all read + and write operations on kernel variables can be monitored for + accesses. + + The results will be displayed in: + /debugfs/tracing/profile_ksym + + Say N if unsure. config STACK_TRACER bool "Trace max stack" @@ -428,6 +449,23 @@ config BLK_DEV_IO_TRACE If unsure, say N. +config KPROBE_EVENT + depends on KPROBES + depends on X86 + bool "Enable kprobes-based dynamic events" + select TRACING + default y + help + This allows the user to add tracing events (similar to tracepoints) on the fly + via the ftrace interface. See Documentation/trace/kprobetrace.txt + for more details. + + Those events can be inserted wherever kprobes can probe, and record + various register and memory values. + + This option is also required by perf-probe subcommand of perf tools. If + you want to use perf tools, this option is strongly recommended. + config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 26f03ac07c2..cd9ecd89ec7 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -53,6 +53,8 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o +obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o +obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o obj-$(CONFIG_EVENT_TRACING) += power-traces.o libftrace-y := ftrace.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6dc4e5ef7a0..e51a1bcb7be 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -60,6 +60,13 @@ static int last_ftrace_enabled; /* Quick disabling of function tracer. */ int function_trace_stop; +/* List for set_ftrace_pid's pids. */ +LIST_HEAD(ftrace_pids); +struct ftrace_pid { + struct list_head list; + struct pid *pid; +}; + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -78,6 +85,10 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); +#endif + static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { struct ftrace_ops *op = ftrace_list; @@ -155,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) else func = ftrace_list_func; - if (ftrace_pid_trace) { + if (!list_empty(&ftrace_pids)) { set_ftrace_pid_function(func); func = ftrace_pid_func; } @@ -203,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_list->next == &ftrace_list_end) { ftrace_func_t func = ftrace_list->func; - if (ftrace_pid_trace) { + if (!list_empty(&ftrace_pids)) { set_ftrace_pid_function(func); func = ftrace_pid_func; } @@ -231,7 +242,7 @@ static void ftrace_update_pid_func(void) func = __ftrace_trace_function; #endif - if (ftrace_pid_trace) { + if (!list_empty(&ftrace_pids)) { set_ftrace_pid_function(func); func = ftrace_pid_func; } else { @@ -821,8 +832,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) } #endif /* CONFIG_FUNCTION_PROFILER */ -/* set when tracing only a pid */ -struct pid *ftrace_pid_trace; static struct pid * const ftrace_swapper_pid = &init_struct_pid; #ifdef CONFIG_DYNAMIC_FTRACE @@ -1261,12 +1270,34 @@ static int ftrace_update_code(struct module *mod) ftrace_new_addrs = p->newlist; p->flags = 0L; - /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(mod, p)) { - p->flags |= FTRACE_FL_CONVERTED; - ftrace_update_cnt++; - } else + /* + * Do the initial record convertion from mcount jump + * to the NOP instructions. + */ + if (!ftrace_code_disable(mod, p)) { ftrace_free_rec(p); + continue; + } + + p->flags |= FTRACE_FL_CONVERTED; + ftrace_update_cnt++; + + /* + * If the tracing is enabled, go ahead and enable the record. + * + * The reason not to enable the record immediatelly is the + * inherent check of ftrace_make_nop/ftrace_make_call for + * correct previous instructions. Making first the NOP + * conversion puts the module to the correct state, thus + * passing the ftrace_make_call check. + */ + if (ftrace_start_up) { + int failed = __ftrace_replace_code(p, 1); + if (failed) { + ftrace_bug(failed, p->ip); + ftrace_free_rec(p); + } + } } stop = ftrace_now(raw_smp_processor_id()); @@ -1656,60 +1687,6 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin) return ret; } -enum { - MATCH_FULL, - MATCH_FRONT_ONLY, - MATCH_MIDDLE_ONLY, - MATCH_END_ONLY, -}; - -/* - * (static function - no need for kernel doc) - * - * Pass in a buffer containing a glob and this function will - * set search to point to the search part of the buffer and - * return the type of search it is (see enum above). - * This does modify buff. - * - * Returns enum type. - * search returns the pointer to use for comparison. - * not returns 1 if buff started with a '!' - * 0 otherwise. - */ -static int -ftrace_setup_glob(char *buff, int len, char **search, int *not) -{ - int type = MATCH_FULL; - int i; - - if (buff[0] == '!') { - *not = 1; - buff++; - len--; - } else - *not = 0; - - *search = buff; - - for (i = 0; i < len; i++) { - if (buff[i] == '*') { - if (!i) { - *search = buff + 1; - type = MATCH_END_ONLY; - } else { - if (type == MATCH_END_ONLY) - type = MATCH_MIDDLE_ONLY; - else - type = MATCH_FRONT_ONLY; - buff[i] = 0; - break; - } - } - } - - return type; -} - static int ftrace_match(char *str, char *regex, int len, int type) { int matched = 0; @@ -1758,7 +1735,7 @@ static void ftrace_match_records(char *buff, int len, int enable) int not; flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; - type = ftrace_setup_glob(buff, len, &search, ¬); + type = filter_parse_regex(buff, len, &search, ¬); search_len = strlen(search); @@ -1826,7 +1803,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable) } if (strlen(buff)) { - type = ftrace_setup_glob(buff, strlen(buff), &search, ¬); + type = filter_parse_regex(buff, strlen(buff), &search, ¬); search_len = strlen(search); } @@ -1991,7 +1968,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, int count = 0; char *search; - type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); + type = filter_parse_regex(glob, strlen(glob), &search, ¬); len = strlen(search); /* we do not support '!' for function probes */ @@ -2068,7 +2045,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, else if (glob) { int not; - type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); + type = filter_parse_regex(glob, strlen(glob), &search, ¬); len = strlen(search); /* we do not support '!' for function probes */ @@ -2312,6 +2289,32 @@ static int __init set_ftrace_filter(char *str) } __setup("ftrace_filter=", set_ftrace_filter); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; +static int __init set_graph_function(char *str) +{ + strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); + return 1; +} +__setup("ftrace_graph_filter=", set_graph_function); + +static void __init set_ftrace_early_graph(char *buf) +{ + int ret; + char *func; + + while (buf) { + func = strsep(&buf, ","); + /* we allow only one expression at a time */ + ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, + func); + if (ret) + printk(KERN_DEBUG "ftrace: function %s not " + "traceable\n", func); + } +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + static void __init set_ftrace_early_filter(char *buf, int enable) { char *func; @@ -2328,6 +2331,10 @@ static void __init set_ftrace_early_filters(void) set_ftrace_early_filter(ftrace_filter_buf, 1); if (ftrace_notrace_buf[0]) set_ftrace_early_filter(ftrace_notrace_buf, 0); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (ftrace_graph_buf[0]) + set_ftrace_early_graph(ftrace_graph_buf); +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ } static int @@ -2513,7 +2520,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) return -ENODEV; /* decode regex */ - type = ftrace_setup_glob(buffer, strlen(buffer), &search, ¬); + type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); if (not) return -EINVAL; @@ -2624,7 +2631,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) return 0; } -static int ftrace_convert_nops(struct module *mod, +static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) { @@ -2684,7 +2691,7 @@ static void ftrace_init_module(struct module *mod, { if (ftrace_disabled || start == end) return; - ftrace_convert_nops(mod, start, end); + ftrace_process_locs(mod, start, end); } static int ftrace_module_notify(struct notifier_block *self, @@ -2745,7 +2752,7 @@ void __init ftrace_init(void) last_ftrace_enabled = ftrace_enabled = 1; - ret = ftrace_convert_nops(NULL, + ret = ftrace_process_locs(NULL, __start_mcount_loc, __stop_mcount_loc); @@ -2778,23 +2785,6 @@ static inline void ftrace_startup_enable(int command) { } # define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ -static ssize_t -ftrace_pid_read(struct file *file, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[64]; - int r; - - if (ftrace_pid_trace == ftrace_swapper_pid) - r = sprintf(buf, "swapper tasks\n"); - else if (ftrace_pid_trace) - r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace)); - else - r = sprintf(buf, "no pid\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - static void clear_ftrace_swapper(void) { struct task_struct *p; @@ -2845,14 +2835,12 @@ static void set_ftrace_pid(struct pid *pid) rcu_read_unlock(); } -static void clear_ftrace_pid_task(struct pid **pid) +static void clear_ftrace_pid_task(struct pid *pid) { - if (*pid == ftrace_swapper_pid) + if (pid == ftrace_swapper_pid) clear_ftrace_swapper(); else - clear_ftrace_pid(*pid); - - *pid = NULL; + clear_ftrace_pid(pid); } static void set_ftrace_pid_task(struct pid *pid) @@ -2863,74 +2851,184 @@ static void set_ftrace_pid_task(struct pid *pid) set_ftrace_pid(pid); } -static ssize_t -ftrace_pid_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int ftrace_pid_add(int p) { struct pid *pid; - char buf[64]; - long val; - int ret; + struct ftrace_pid *fpid; + int ret = -EINVAL; - if (cnt >= sizeof(buf)) - return -EINVAL; + mutex_lock(&ftrace_lock); - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; + if (!p) + pid = ftrace_swapper_pid; + else + pid = find_get_pid(p); - buf[cnt] = 0; + if (!pid) + goto out; - ret = strict_strtol(buf, 10, &val); - if (ret < 0) - return ret; + ret = 0; - mutex_lock(&ftrace_lock); - if (val < 0) { - /* disable pid tracing */ - if (!ftrace_pid_trace) - goto out; + list_for_each_entry(fpid, &ftrace_pids, list) + if (fpid->pid == pid) + goto out_put; - clear_ftrace_pid_task(&ftrace_pid_trace); + ret = -ENOMEM; - } else { - /* swapper task is special */ - if (!val) { - pid = ftrace_swapper_pid; - if (pid == ftrace_pid_trace) - goto out; - } else { - pid = find_get_pid(val); + fpid = kmalloc(sizeof(*fpid), GFP_KERNEL); + if (!fpid) + goto out_put; - if (pid == ftrace_pid_trace) { - put_pid(pid); - goto out; - } - } + list_add(&fpid->list, &ftrace_pids); + fpid->pid = pid; - if (ftrace_pid_trace) - clear_ftrace_pid_task(&ftrace_pid_trace); + set_ftrace_pid_task(pid); - if (!pid) - goto out; + ftrace_update_pid_func(); + ftrace_startup_enable(0); + + mutex_unlock(&ftrace_lock); + return 0; + +out_put: + if (pid != ftrace_swapper_pid) + put_pid(pid); - ftrace_pid_trace = pid; +out: + mutex_unlock(&ftrace_lock); + return ret; +} + +static void ftrace_pid_reset(void) +{ + struct ftrace_pid *fpid, *safe; - set_ftrace_pid_task(ftrace_pid_trace); + mutex_lock(&ftrace_lock); + list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) { + struct pid *pid = fpid->pid; + + clear_ftrace_pid_task(pid); + + list_del(&fpid->list); + kfree(fpid); } - /* update the function call */ ftrace_update_pid_func(); ftrace_startup_enable(0); - out: mutex_unlock(&ftrace_lock); +} - return cnt; +static void *fpid_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&ftrace_lock); + + if (list_empty(&ftrace_pids) && (!*pos)) + return (void *) 1; + + return seq_list_start(&ftrace_pids, *pos); +} + +static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) +{ + if (v == (void *)1) + return NULL; + + return seq_list_next(v, &ftrace_pids, pos); +} + +static void fpid_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&ftrace_lock); +} + +static int fpid_show(struct seq_file *m, void *v) +{ + const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list); + + if (v == (void *)1) { + seq_printf(m, "no pid\n"); + return 0; + } + + if (fpid->pid == ftrace_swapper_pid) + seq_printf(m, "swapper tasks\n"); + else + seq_printf(m, "%u\n", pid_vnr(fpid->pid)); + + return 0; +} + +static const struct seq_operations ftrace_pid_sops = { + .start = fpid_start, + .next = fpid_next, + .stop = fpid_stop, + .show = fpid_show, +}; + +static int +ftrace_pid_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + if ((file->f_mode & FMODE_WRITE) && + (file->f_flags & O_TRUNC)) + ftrace_pid_reset(); + + if (file->f_mode & FMODE_READ) + ret = seq_open(file, &ftrace_pid_sops); + + return ret; +} + +static ssize_t +ftrace_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64], *tmp; + long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* + * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid" + * to clean the filter quietly. + */ + tmp = strstrip(buf); + if (strlen(tmp) == 0) + return 1; + + ret = strict_strtol(tmp, 10, &val); + if (ret < 0) + return ret; + + ret = ftrace_pid_add(val); + + return ret ? ret : cnt; +} + +static int +ftrace_pid_release(struct inode *inode, struct file *file) +{ + if (file->f_mode & FMODE_READ) + seq_release(inode, file); + + return 0; } static const struct file_operations ftrace_pid_fops = { - .read = ftrace_pid_read, - .write = ftrace_pid_write, + .open = ftrace_pid_open, + .write = ftrace_pid_write, + .read = seq_read, + .llseek = seq_lseek, + .release = ftrace_pid_release, }; static __init int ftrace_init_debugfs(void) @@ -3293,4 +3391,3 @@ void ftrace_graph_stop(void) ftrace_stop(); } #endif - diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5dd017fea6f..a1ca4956ab5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s) int ret; ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" - "offset:0;\tsize:%u;\n", - (unsigned int)sizeof(field.time_stamp)); + "offset:0;\tsize:%u;\tsigned:%u;\n", + (unsigned int)sizeof(field.time_stamp), + (unsigned int)is_signed_type(u64)); ret = trace_seq_printf(s, "\tfield: local_t commit;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), - (unsigned int)sizeof(field.commit)); + (unsigned int)sizeof(field.commit), + (unsigned int)is_signed_type(long)); ret = trace_seq_printf(s, "\tfield: char data;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), - (unsigned int)BUF_PAGE_SIZE); + (unsigned int)BUF_PAGE_SIZE, + (unsigned int)is_signed_type(char)); return ret; } @@ -1787,9 +1790,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, static struct ring_buffer_event * rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long length, unsigned long tail, - struct buffer_page *commit_page, struct buffer_page *tail_page, u64 *ts) { + struct buffer_page *commit_page = cpu_buffer->commit_page; struct ring_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; @@ -1892,13 +1895,10 @@ static struct ring_buffer_event * __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, unsigned type, unsigned long length, u64 *ts) { - struct buffer_page *tail_page, *commit_page; + struct buffer_page *tail_page; struct ring_buffer_event *event; unsigned long tail, write; - commit_page = cpu_buffer->commit_page; - /* we just need to protect against interrupts */ - barrier(); tail_page = cpu_buffer->tail_page; write = local_add_return(length, &tail_page->write); @@ -1909,7 +1909,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* See if we shot pass the end of this buffer page */ if (write > BUF_PAGE_SIZE) return rb_move_tail(cpu_buffer, length, tail, - commit_page, tail_page, ts); + tail_page, ts); /* We reserved something on the buffer */ diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 573d3cc762c..b2477caf09c 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -35,6 +35,28 @@ static int disable_reader; module_param(disable_reader, uint, 0644); MODULE_PARM_DESC(disable_reader, "only run producer"); +static int write_iteration = 50; +module_param(write_iteration, uint, 0644); +MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); + +static int producer_nice = 19; +static int consumer_nice = 19; + +static int producer_fifo = -1; +static int consumer_fifo = -1; + +module_param(producer_nice, uint, 0644); +MODULE_PARM_DESC(producer_nice, "nice prio for producer"); + +module_param(consumer_nice, uint, 0644); +MODULE_PARM_DESC(consumer_nice, "nice prio for consumer"); + +module_param(producer_fifo, uint, 0644); +MODULE_PARM_DESC(producer_fifo, "fifo prio for producer"); + +module_param(consumer_fifo, uint, 0644); +MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer"); + static int read_events; static int kill_test; @@ -208,15 +230,18 @@ static void ring_buffer_producer(void) do { struct ring_buffer_event *event; int *entry; - - event = ring_buffer_lock_reserve(buffer, 10); - if (!event) { - missed++; - } else { - hit++; - entry = ring_buffer_event_data(event); - *entry = smp_processor_id(); - ring_buffer_unlock_commit(buffer, event); + int i; + + for (i = 0; i < write_iteration; i++) { + event = ring_buffer_lock_reserve(buffer, 10); + if (!event) { + missed++; + } else { + hit++; + entry = ring_buffer_event_data(event); + *entry = smp_processor_id(); + ring_buffer_unlock_commit(buffer, event); + } } do_gettimeofday(&end_tv); @@ -263,6 +288,27 @@ static void ring_buffer_producer(void) if (kill_test) trace_printk("ERROR!\n"); + + if (!disable_reader) { + if (consumer_fifo < 0) + trace_printk("Running Consumer at nice: %d\n", + consumer_nice); + else + trace_printk("Running Consumer at SCHED_FIFO %d\n", + consumer_fifo); + } + if (producer_fifo < 0) + trace_printk("Running Producer at nice: %d\n", + producer_nice); + else + trace_printk("Running Producer at SCHED_FIFO %d\n", + producer_fifo); + + /* Let the user know that the test is running at low priority */ + if (producer_fifo < 0 && consumer_fifo < 0 && + producer_nice == 19 && consumer_nice == 19) + trace_printk("WARNING!!! This test is running at lowest priority.\n"); + trace_printk("Time: %lld (usecs)\n", time); trace_printk("Overruns: %lld\n", overruns); if (disable_reader) @@ -392,6 +438,27 @@ static int __init ring_buffer_benchmark_init(void) if (IS_ERR(producer)) goto out_kill; + /* + * Run them as low-prio background tasks by default: + */ + if (!disable_reader) { + if (consumer_fifo >= 0) { + struct sched_param param = { + .sched_priority = consumer_fifo + }; + sched_setscheduler(consumer, SCHED_FIFO, ¶m); + } else + set_user_nice(consumer, consumer_nice); + } + + if (producer_fifo >= 0) { + struct sched_param param = { + .sched_priority = consumer_fifo + }; + sched_setscheduler(producer, SCHED_FIFO, ¶m); + } else + set_user_nice(producer, producer_nice); + return 0; out_kill: diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b20d3ec75de..874f2893cff 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -129,7 +129,7 @@ static int tracing_set_tracer(const char *buf); static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; static char *default_bootup_tracer; -static int __init set_ftrace(char *str) +static int __init set_cmdline_ftrace(char *str) { strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; @@ -137,7 +137,7 @@ static int __init set_ftrace(char *str) ring_buffer_expanded = 1; return 1; } -__setup("ftrace=", set_ftrace); +__setup("ftrace=", set_cmdline_ftrace); static int __init set_ftrace_dump_on_oops(char *str) { @@ -1361,10 +1361,11 @@ int trace_array_vprintk(struct trace_array *tr, pause_graph_tracing(); raw_local_irq_save(irq_flags); __raw_spin_lock(&trace_buf_lock); - len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); - - len = min(len, TRACE_BUF_SIZE-1); - trace_buf[len] = 0; + if (args == NULL) { + strncpy(trace_buf, fmt, TRACE_BUF_SIZE); + len = strlen(trace_buf); + } else + len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); size = sizeof(*entry) + len + 1; buffer = tr->buffer; @@ -1373,10 +1374,10 @@ int trace_array_vprintk(struct trace_array *tr, if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - entry->ip = ip; + entry->ip = ip; memcpy(&entry->buf, trace_buf, len); - entry->buf[len] = 0; + entry->buf[len] = '\0'; if (!filter_check_discard(call, entry, buffer, event)) ring_buffer_unlock_commit(buffer, event); @@ -3319,22 +3320,11 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, return cnt; } -static int mark_printk(const char *fmt, ...) -{ - int ret; - va_list args; - va_start(args, fmt); - ret = trace_vprintk(0, fmt, args); - va_end(args); - return ret; -} - static ssize_t tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { char *buf; - char *end; if (tracing_disabled) return -EINVAL; @@ -3342,7 +3332,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (cnt > TRACE_BUF_SIZE) cnt = TRACE_BUF_SIZE; - buf = kmalloc(cnt + 1, GFP_KERNEL); + buf = kmalloc(cnt + 2, GFP_KERNEL); if (buf == NULL) return -ENOMEM; @@ -3350,14 +3340,13 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, kfree(buf); return -EFAULT; } + if (buf[cnt-1] != '\n') { + buf[cnt] = '\n'; + buf[cnt+1] = '\0'; + } else + buf[cnt] = '\0'; - /* Cut from the first nil or newline. */ - buf[cnt] = '\0'; - end = strchr(buf, '\n'); - if (end) - *end = '\0'; - - cnt = mark_printk("%s\n", buf); + cnt = trace_vprintk(0, buf, NULL); kfree(buf); *fpos += cnt; @@ -3730,7 +3719,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) - return ENOMEM; + return -ENOMEM; trace_seq_init(s); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 405cb850b75..1d7f4830a80 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -11,6 +11,7 @@ #include <linux/ftrace.h> #include <trace/boot.h> #include <linux/kmemtrace.h> +#include <linux/hw_breakpoint.h> #include <linux/trace_seq.h> #include <linux/ftrace_event.h> @@ -37,6 +38,7 @@ enum trace_type { TRACE_KMEM_ALLOC, TRACE_KMEM_FREE, TRACE_BLK, + TRACE_KSYM, __TRACE_LAST_TYPE, }; @@ -98,9 +100,32 @@ struct syscall_trace_enter { struct syscall_trace_exit { struct trace_entry ent; int nr; - unsigned long ret; + long ret; }; +struct kprobe_trace_entry { + struct trace_entry ent; + unsigned long ip; + int nargs; + unsigned long args[]; +}; + +#define SIZEOF_KPROBE_TRACE_ENTRY(n) \ + (offsetof(struct kprobe_trace_entry, args) + \ + (sizeof(unsigned long) * (n))) + +struct kretprobe_trace_entry { + struct trace_entry ent; + unsigned long func; + unsigned long ret_ip; + int nargs; + unsigned long args[]; +}; + +#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \ + (offsetof(struct kretprobe_trace_entry, args) + \ + (sizeof(unsigned long) * (n))) + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -209,6 +234,7 @@ extern void __ftrace_bad_type(void); TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ TRACE_KMEM_FREE); \ + IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\ __ftrace_bad_type(); \ } while (0) @@ -364,6 +390,8 @@ int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); int is_tracing_stopped(void); +extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); + extern unsigned long nsecs_to_usecs(unsigned long nsecs); #ifdef CONFIG_TRACER_MAX_TRACE @@ -438,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_hw_branches(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_ksym(struct tracer *trace, + struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); @@ -483,10 +513,6 @@ static inline int ftrace_graph_addr(unsigned long addr) return 0; } #else -static inline int ftrace_trace_addr(unsigned long addr) -{ - return 1; -} static inline int ftrace_graph_addr(unsigned long addr) { return 1; @@ -500,12 +526,12 @@ print_graph_function(struct trace_iterator *iter) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -extern struct pid *ftrace_pid_trace; +extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER static inline int ftrace_trace_task(struct task_struct *task) { - if (!ftrace_pid_trace) + if (list_empty(&ftrace_pids)) return 1; return test_tsk_trace_trace(task); @@ -687,7 +713,6 @@ struct event_filter { int n_preds; struct filter_pred **preds; char *filter_string; - bool no_reset; }; struct event_subsystem { @@ -699,22 +724,40 @@ struct event_subsystem { }; struct filter_pred; +struct regex; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, int val1, int val2); +typedef int (*regex_match_func)(char *str, struct regex *r, int len); + +enum regex_type { + MATCH_FULL = 0, + MATCH_FRONT_ONLY, + MATCH_MIDDLE_ONLY, + MATCH_END_ONLY, +}; + +struct regex { + char pattern[MAX_FILTER_STR_VAL]; + int len; + int field_len; + regex_match_func match; +}; + struct filter_pred { - filter_pred_fn_t fn; - u64 val; - char str_val[MAX_FILTER_STR_VAL]; - int str_len; - char *field_name; - int offset; - int not; - int op; - int pop_n; + filter_pred_fn_t fn; + u64 val; + struct regex regex; + char *field_name; + int offset; + int not; + int op; + int pop_n; }; +extern enum regex_type +filter_parse_regex(char *buff, int len, char **search, int *not); extern void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s); extern int apply_event_filter(struct ftrace_event_call *call, @@ -730,7 +773,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { + if (unlikely(call->filter_active) && + !filter_match_preds(call->filter, rec)) { ring_buffer_discard_commit(buffer, event); return 1; } diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 20c5f92e28a..878c03f386b 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -20,6 +20,8 @@ #include <linux/ktime.h> #include <linux/trace_clock.h> +#include "trace.h" + /* * trace_clock_local(): the simplest and least coherent tracing clock. * @@ -28,17 +30,17 @@ */ u64 notrace trace_clock_local(void) { - unsigned long flags; u64 clock; + int resched; /* * sched_clock() is an architecture implemented, fast, scalable, * lockless clock. It is not guaranteed to be coherent across * CPUs, nor across CPU idle events. */ - raw_local_irq_save(flags); + resched = ftrace_preempt_disable(); clock = sched_clock(); - raw_local_irq_restore(flags); + ftrace_preempt_enable(resched); return clock; } diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index ead3d724599..c16a08f399d 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry, F_printk("type:%u call_site:%lx ptr:%p", __entry->type_id, __entry->call_site, __entry->ptr) ); + +FTRACE_ENTRY(ksym_trace, ksym_trace_entry, + + TRACE_KSYM, + + F_STRUCT( + __field( unsigned long, ip ) + __field( unsigned char, type ) + __array( char , cmd, TASK_COMM_LEN ) + __field( unsigned long, addr ) + ), + + F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", + (void *)__entry->ip, (unsigned int)__entry->type, + (void *)__entry->addr, __entry->cmd) +); diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 8d5c171cc99..d9c60f80aa0 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -8,17 +8,14 @@ #include <linux/module.h> #include "trace.h" -/* - * We can't use a size but a type in alloc_percpu() - * So let's create a dummy type that matches the desired size - */ -typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; -char *trace_profile_buf; -EXPORT_SYMBOL_GPL(trace_profile_buf); +char *perf_trace_buf; +EXPORT_SYMBOL_GPL(perf_trace_buf); + +char *perf_trace_buf_nmi; +EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); -char *trace_profile_buf_nmi; -EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); +typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; /* Count the events in use (per event id, not per instance) */ static int total_profile_count; @@ -32,20 +29,20 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) return 0; if (!total_profile_count) { - buf = (char *)alloc_percpu(profile_buf_t); + buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf; - rcu_assign_pointer(trace_profile_buf, buf); + rcu_assign_pointer(perf_trace_buf, buf); - buf = (char *)alloc_percpu(profile_buf_t); + buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf_nmi; - rcu_assign_pointer(trace_profile_buf_nmi, buf); + rcu_assign_pointer(perf_trace_buf_nmi, buf); } - ret = event->profile_enable(); + ret = event->profile_enable(event); if (!ret) { total_profile_count++; return 0; @@ -53,10 +50,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) fail_buf_nmi: if (!total_profile_count) { - free_percpu(trace_profile_buf_nmi); - free_percpu(trace_profile_buf); - trace_profile_buf_nmi = NULL; - trace_profile_buf = NULL; + free_percpu(perf_trace_buf_nmi); + free_percpu(perf_trace_buf); + perf_trace_buf_nmi = NULL; + perf_trace_buf = NULL; } fail_buf: atomic_dec(&event->profile_count); @@ -89,14 +86,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event) if (!atomic_add_negative(-1, &event->profile_count)) return; - event->profile_disable(); + event->profile_disable(event); if (!--total_profile_count) { - buf = trace_profile_buf; - rcu_assign_pointer(trace_profile_buf, NULL); + buf = perf_trace_buf; + rcu_assign_pointer(perf_trace_buf, NULL); - nmi_buf = trace_profile_buf_nmi; - rcu_assign_pointer(trace_profile_buf_nmi, NULL); + nmi_buf = perf_trace_buf_nmi; + rcu_assign_pointer(perf_trace_buf_nmi, NULL); /* * Ensure every events in profiling have finished before diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d128f65778e..1d18315dc83 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -93,9 +93,7 @@ int trace_define_common_fields(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_define_common_fields); -#ifdef CONFIG_MODULES - -static void trace_destroy_fields(struct ftrace_event_call *call) +void trace_destroy_fields(struct ftrace_event_call *call) { struct ftrace_event_field *field, *next; @@ -107,8 +105,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call) } } -#endif /* CONFIG_MODULES */ - static void ftrace_event_enable_disable(struct ftrace_event_call *call, int enable) { @@ -117,14 +113,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, if (call->enabled) { call->enabled = 0; tracing_stop_cmdline_record(); - call->unregfunc(call->data); + call->unregfunc(call); } break; case 1: if (!call->enabled) { call->enabled = 1; tracing_start_cmdline_record(); - call->regfunc(call->data); + call->regfunc(call); } break; } @@ -507,7 +503,7 @@ extern char *__bad_type_size(void); #define FIELD(type, name) \ sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ #type, "common_" #name, offsetof(typeof(field), name), \ - sizeof(field.name) + sizeof(field.name), is_signed_type(type) static int trace_write_header(struct trace_seq *s) { @@ -515,17 +511,17 @@ static int trace_write_header(struct trace_seq *s) /* struct trace_entry */ return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\n", - FIELD(unsigned short, type), - FIELD(unsigned char, flags), - FIELD(unsigned char, preempt_count), - FIELD(int, pid), - FIELD(int, lock_depth)); + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\n", + FIELD(unsigned short, type), + FIELD(unsigned char, flags), + FIELD(unsigned char, preempt_count), + FIELD(int, pid), + FIELD(int, lock_depth)); } static ssize_t @@ -878,9 +874,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events) "'%s/filter' entry\n", name); } - entry = trace_create_file("enable", 0644, system->entry, - (void *)system->name, - &ftrace_system_enable_fops); + trace_create_file("enable", 0644, system->entry, + (void *)system->name, + &ftrace_system_enable_fops); return system->entry; } @@ -892,7 +888,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, const struct file_operations *filter, const struct file_operations *format) { - struct dentry *entry; int ret; /* @@ -910,12 +905,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, } if (call->regfunc) - entry = trace_create_file("enable", 0644, call->dir, call, - enable); + trace_create_file("enable", 0644, call->dir, call, + enable); if (call->id && call->profile_enable) - entry = trace_create_file("id", 0444, call->dir, call, - id); + trace_create_file("id", 0444, call->dir, call, + id); if (call->define_fields) { ret = call->define_fields(call); @@ -924,41 +919,60 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, " events/%s\n", call->name); return ret; } - entry = trace_create_file("filter", 0644, call->dir, call, - filter); + trace_create_file("filter", 0644, call->dir, call, + filter); } /* A trace may not want to export its format */ if (!call->show_format) return 0; - entry = trace_create_file("format", 0444, call->dir, call, - format); + trace_create_file("format", 0444, call->dir, call, + format); return 0; } -#define for_each_event(event, start, end) \ - for (event = start; \ - (unsigned long)event < (unsigned long)end; \ - event++) +static int __trace_add_event_call(struct ftrace_event_call *call) +{ + struct dentry *d_events; + int ret; -#ifdef CONFIG_MODULES + if (!call->name) + return -EINVAL; -static LIST_HEAD(ftrace_module_file_list); + if (call->raw_init) { + ret = call->raw_init(call); + if (ret < 0) { + if (ret != -ENOSYS) + pr_warning("Could not initialize trace " + "events/%s\n", call->name); + return ret; + } + } -/* - * Modules must own their file_operations to keep up with - * reference counting. - */ -struct ftrace_module_file_ops { - struct list_head list; - struct module *mod; - struct file_operations id; - struct file_operations enable; - struct file_operations format; - struct file_operations filter; -}; + d_events = event_trace_events_dir(); + if (!d_events) + return -ENOENT; + + ret = event_create_dir(call, d_events, &ftrace_event_id_fops, + &ftrace_enable_fops, &ftrace_event_filter_fops, + &ftrace_event_format_fops); + if (!ret) + list_add(&call->list, &ftrace_events); + + return ret; +} + +/* Add an additional event_call dynamically */ +int trace_add_event_call(struct ftrace_event_call *call) +{ + int ret; + mutex_lock(&event_mutex); + ret = __trace_add_event_call(call); + mutex_unlock(&event_mutex); + return ret; +} static void remove_subsystem_dir(const char *name) { @@ -986,6 +1000,53 @@ static void remove_subsystem_dir(const char *name) } } +/* + * Must be called under locking both of event_mutex and trace_event_mutex. + */ +static void __trace_remove_event_call(struct ftrace_event_call *call) +{ + ftrace_event_enable_disable(call, 0); + if (call->event) + __unregister_ftrace_event(call->event); + debugfs_remove_recursive(call->dir); + list_del(&call->list); + trace_destroy_fields(call); + destroy_preds(call); + remove_subsystem_dir(call->system); +} + +/* Remove an event_call */ +void trace_remove_event_call(struct ftrace_event_call *call) +{ + mutex_lock(&event_mutex); + down_write(&trace_event_mutex); + __trace_remove_event_call(call); + up_write(&trace_event_mutex); + mutex_unlock(&event_mutex); +} + +#define for_each_event(event, start, end) \ + for (event = start; \ + (unsigned long)event < (unsigned long)end; \ + event++) + +#ifdef CONFIG_MODULES + +static LIST_HEAD(ftrace_module_file_list); + +/* + * Modules must own their file_operations to keep up with + * reference counting. + */ +struct ftrace_module_file_ops { + struct list_head list; + struct module *mod; + struct file_operations id; + struct file_operations enable; + struct file_operations format; + struct file_operations filter; +}; + static struct ftrace_module_file_ops * trace_create_file_ops(struct module *mod) { @@ -1043,7 +1104,7 @@ static void trace_module_add_events(struct module *mod) if (!call->name) continue; if (call->raw_init) { - ret = call->raw_init(); + ret = call->raw_init(call); if (ret < 0) { if (ret != -ENOSYS) pr_warning("Could not initialize trace " @@ -1061,10 +1122,11 @@ static void trace_module_add_events(struct module *mod) return; } call->mod = mod; - list_add(&call->list, &ftrace_events); - event_create_dir(call, d_events, - &file_ops->id, &file_ops->enable, - &file_ops->filter, &file_ops->format); + ret = event_create_dir(call, d_events, + &file_ops->id, &file_ops->enable, + &file_ops->filter, &file_ops->format); + if (!ret) + list_add(&call->list, &ftrace_events); } } @@ -1078,14 +1140,7 @@ static void trace_module_remove_events(struct module *mod) list_for_each_entry_safe(call, p, &ftrace_events, list) { if (call->mod == mod) { found = true; - ftrace_event_enable_disable(call, 0); - if (call->event) - __unregister_ftrace_event(call->event); - debugfs_remove_recursive(call->dir); - list_del(&call->list); - trace_destroy_fields(call); - destroy_preds(call); - remove_subsystem_dir(call->system); + __trace_remove_event_call(call); } } @@ -1203,7 +1258,7 @@ static __init int event_trace_init(void) if (!call->name) continue; if (call->raw_init) { - ret = call->raw_init(); + ret = call->raw_init(call); if (ret < 0) { if (ret != -ENOSYS) pr_warning("Could not initialize trace " @@ -1211,10 +1266,12 @@ static __init int event_trace_init(void) continue; } } - list_add(&call->list, &ftrace_events); - event_create_dir(call, d_events, &ftrace_event_id_fops, - &ftrace_enable_fops, &ftrace_event_filter_fops, - &ftrace_event_format_fops); + ret = event_create_dir(call, d_events, &ftrace_event_id_fops, + &ftrace_enable_fops, + &ftrace_event_filter_fops, + &ftrace_event_format_fops); + if (!ret) + list_add(&call->list, &ftrace_events); } while (true) { diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 98a6cc5c64e..50504cb228d 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -18,11 +18,10 @@ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> */ -#include <linux/debugfs.h> -#include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/mutex.h> +#include <linux/perf_event.h> #include "trace.h" #include "trace_output.h" @@ -31,6 +30,7 @@ enum filter_op_ids { OP_OR, OP_AND, + OP_GLOB, OP_NE, OP_EQ, OP_LT, @@ -48,16 +48,17 @@ struct filter_op { }; static struct filter_op filter_ops[] = { - { OP_OR, "||", 1 }, - { OP_AND, "&&", 2 }, - { OP_NE, "!=", 4 }, - { OP_EQ, "==", 4 }, - { OP_LT, "<", 5 }, - { OP_LE, "<=", 5 }, - { OP_GT, ">", 5 }, - { OP_GE, ">=", 5 }, - { OP_NONE, "OP_NONE", 0 }, - { OP_OPEN_PAREN, "(", 0 }, + { OP_OR, "||", 1 }, + { OP_AND, "&&", 2 }, + { OP_GLOB, "~", 4 }, + { OP_NE, "!=", 4 }, + { OP_EQ, "==", 4 }, + { OP_LT, "<", 5 }, + { OP_LE, "<=", 5 }, + { OP_GT, ">", 5 }, + { OP_GE, ">=", 5 }, + { OP_NONE, "OP_NONE", 0 }, + { OP_OPEN_PAREN, "(", 0 }, }; enum { @@ -197,9 +198,9 @@ static int filter_pred_string(struct filter_pred *pred, void *event, char *addr = (char *)(event + pred->offset); int cmp, match; - cmp = strncmp(addr, pred->str_val, pred->str_len); + cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len); - match = (!cmp) ^ pred->not; + match = cmp ^ pred->not; return match; } @@ -211,9 +212,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event, char **addr = (char **)(event + pred->offset); int cmp, match; - cmp = strncmp(*addr, pred->str_val, pred->str_len); + cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len); - match = (!cmp) ^ pred->not; + match = cmp ^ pred->not; return match; } @@ -237,9 +238,9 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event, char *addr = (char *)(event + str_loc); int cmp, match; - cmp = strncmp(addr, pred->str_val, str_len); + cmp = pred->regex.match(addr, &pred->regex, str_len); - match = (!cmp) ^ pred->not; + match = cmp ^ pred->not; return match; } @@ -250,10 +251,121 @@ static int filter_pred_none(struct filter_pred *pred, void *event, return 0; } +/* Basic regex callbacks */ +static int regex_match_full(char *str, struct regex *r, int len) +{ + if (strncmp(str, r->pattern, len) == 0) + return 1; + return 0; +} + +static int regex_match_front(char *str, struct regex *r, int len) +{ + if (strncmp(str, r->pattern, len) == 0) + return 1; + return 0; +} + +static int regex_match_middle(char *str, struct regex *r, int len) +{ + if (strstr(str, r->pattern)) + return 1; + return 0; +} + +static int regex_match_end(char *str, struct regex *r, int len) +{ + char *ptr = strstr(str, r->pattern); + + if (ptr && (ptr[r->len] == 0)) + return 1; + return 0; +} + +/** + * filter_parse_regex - parse a basic regex + * @buff: the raw regex + * @len: length of the regex + * @search: will point to the beginning of the string to compare + * @not: tell whether the match will have to be inverted + * + * This passes in a buffer containing a regex and this function will + * set search to point to the search part of the buffer and + * return the type of search it is (see enum above). + * This does modify buff. + * + * Returns enum type. + * search returns the pointer to use for comparison. + * not returns 1 if buff started with a '!' + * 0 otherwise. + */ +enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not) +{ + int type = MATCH_FULL; + int i; + + if (buff[0] == '!') { + *not = 1; + buff++; + len--; + } else + *not = 0; + + *search = buff; + + for (i = 0; i < len; i++) { + if (buff[i] == '*') { + if (!i) { + *search = buff + 1; + type = MATCH_END_ONLY; + } else { + if (type == MATCH_END_ONLY) + type = MATCH_MIDDLE_ONLY; + else + type = MATCH_FRONT_ONLY; + buff[i] = 0; + break; + } + } + } + + return type; +} + +static void filter_build_regex(struct filter_pred *pred) +{ + struct regex *r = &pred->regex; + char *search; + enum regex_type type = MATCH_FULL; + int not = 0; + + if (pred->op == OP_GLOB) { + type = filter_parse_regex(r->pattern, r->len, &search, ¬); + r->len = strlen(search); + memmove(r->pattern, search, r->len+1); + } + + switch (type) { + case MATCH_FULL: + r->match = regex_match_full; + break; + case MATCH_FRONT_ONLY: + r->match = regex_match_front; + break; + case MATCH_MIDDLE_ONLY: + r->match = regex_match_middle; + break; + case MATCH_END_ONLY: + r->match = regex_match_end; + break; + } + + pred->not ^= not; +} + /* return 1 if event matches, 0 otherwise (discard) */ -int filter_match_preds(struct ftrace_event_call *call, void *rec) +int filter_match_preds(struct event_filter *filter, void *rec) { - struct event_filter *filter = call->filter; int match, top = 0, val1 = 0, val2 = 0; int stack[MAX_FILTER_PRED]; struct filter_pred *pred; @@ -396,7 +508,7 @@ static void filter_clear_pred(struct filter_pred *pred) { kfree(pred->field_name); pred->field_name = NULL; - pred->str_len = 0; + pred->regex.len = 0; } static int filter_set_pred(struct filter_pred *dest, @@ -426,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call) filter->preds[i]->fn = filter_pred_none; } -void destroy_preds(struct ftrace_event_call *call) +static void __free_preds(struct event_filter *filter) { - struct event_filter *filter = call->filter; int i; if (!filter) @@ -441,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call) kfree(filter->preds); kfree(filter->filter_string); kfree(filter); +} + +void destroy_preds(struct ftrace_event_call *call) +{ + __free_preds(call->filter); call->filter = NULL; + call->filter_active = 0; } -static int init_preds(struct ftrace_event_call *call) +static struct event_filter *__alloc_preds(void) { struct event_filter *filter; struct filter_pred *pred; int i; - if (call->filter) - return 0; - - filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); - if (!call->filter) - return -ENOMEM; + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return ERR_PTR(-ENOMEM); filter->n_preds = 0; @@ -471,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call) filter->preds[i] = pred; } - return 0; + return filter; oom: - destroy_preds(call); + __free_preds(filter); + return ERR_PTR(-ENOMEM); +} - return -ENOMEM; +static int init_preds(struct ftrace_event_call *call) +{ + if (call->filter) + return 0; + + call->filter_active = 0; + call->filter = __alloc_preds(); + if (IS_ERR(call->filter)) + return PTR_ERR(call->filter); + + return 0; } static int init_subsystem_preds(struct event_subsystem *system) @@ -499,14 +625,7 @@ static int init_subsystem_preds(struct event_subsystem *system) return 0; } -enum { - FILTER_DISABLE_ALL, - FILTER_INIT_NO_RESET, - FILTER_SKIP_NO_RESET, -}; - -static void filter_free_subsystem_preds(struct event_subsystem *system, - int flag) +static void filter_free_subsystem_preds(struct event_subsystem *system) { struct ftrace_event_call *call; @@ -517,14 +636,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system, if (strcmp(call->system, system->name) != 0) continue; - if (flag == FILTER_INIT_NO_RESET) { - call->filter->no_reset = false; - continue; - } - - if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset) - continue; - filter_disable_preds(call); remove_filter_string(call->filter); } @@ -532,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system, static int filter_add_pred_fn(struct filter_parse_state *ps, struct ftrace_event_call *call, + struct event_filter *filter, struct filter_pred *pred, filter_pred_fn_t fn) { - struct event_filter *filter = call->filter; int idx, err; if (filter->n_preds == MAX_FILTER_PRED) { @@ -550,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, return err; filter->n_preds++; - call->filter_active = 1; return 0; } @@ -575,7 +685,10 @@ static bool is_string_field(struct ftrace_event_field *field) static int is_legal_op(struct ftrace_event_field *field, int op) { - if (is_string_field(field) && (op != OP_EQ && op != OP_NE)) + if (is_string_field(field) && + (op != OP_EQ && op != OP_NE && op != OP_GLOB)) + return 0; + if (!is_string_field(field) && op == OP_GLOB) return 0; return 1; @@ -626,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size, static int filter_add_pred(struct filter_parse_state *ps, struct ftrace_event_call *call, + struct event_filter *filter, struct filter_pred *pred, bool dry_run) { @@ -660,21 +774,22 @@ static int filter_add_pred(struct filter_parse_state *ps, } if (is_string_field(field)) { - pred->str_len = field->size; + filter_build_regex(pred); - if (field->filter_type == FILTER_STATIC_STRING) + if (field->filter_type == FILTER_STATIC_STRING) { fn = filter_pred_string; - else if (field->filter_type == FILTER_DYN_STRING) + pred->regex.field_len = field->size; + } else if (field->filter_type == FILTER_DYN_STRING) fn = filter_pred_strloc; else { fn = filter_pred_pchar; - pred->str_len = strlen(pred->str_val); + pred->regex.field_len = strlen(pred->regex.pattern); } } else { if (field->is_signed) - ret = strict_strtoll(pred->str_val, 0, &val); + ret = strict_strtoll(pred->regex.pattern, 0, &val); else - ret = strict_strtoull(pred->str_val, 0, &val); + ret = strict_strtoull(pred->regex.pattern, 0, &val); if (ret) { parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); return -EINVAL; @@ -694,45 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps, add_pred_fn: if (!dry_run) - return filter_add_pred_fn(ps, call, pred, fn); - return 0; -} - -static int filter_add_subsystem_pred(struct filter_parse_state *ps, - struct event_subsystem *system, - struct filter_pred *pred, - char *filter_string, - bool dry_run) -{ - struct ftrace_event_call *call; - int err = 0; - bool fail = true; - - list_for_each_entry(call, &ftrace_events, list) { - - if (!call->define_fields) - continue; - - if (strcmp(call->system, system->name)) - continue; - - if (call->filter->no_reset) - continue; - - err = filter_add_pred(ps, call, pred, dry_run); - if (err) - call->filter->no_reset = true; - else - fail = false; - - if (!dry_run) - replace_filter_string(call->filter, filter_string); - } - - if (fail) { - parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); - return err; - } + return filter_add_pred_fn(ps, call, filter, pred, fn); return 0; } @@ -1045,8 +1122,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2) return NULL; } - strcpy(pred->str_val, operand2); - pred->str_len = strlen(operand2); + strcpy(pred->regex.pattern, operand2); + pred->regex.len = strlen(pred->regex.pattern); pred->op = op; @@ -1090,8 +1167,8 @@ static int check_preds(struct filter_parse_state *ps) return 0; } -static int replace_preds(struct event_subsystem *system, - struct ftrace_event_call *call, +static int replace_preds(struct ftrace_event_call *call, + struct event_filter *filter, struct filter_parse_state *ps, char *filter_string, bool dry_run) @@ -1138,11 +1215,7 @@ static int replace_preds(struct event_subsystem *system, add_pred: if (!pred) return -ENOMEM; - if (call) - err = filter_add_pred(ps, call, pred, false); - else - err = filter_add_subsystem_pred(ps, system, pred, - filter_string, dry_run); + err = filter_add_pred(ps, call, filter, pred, dry_run); filter_free_pred(pred); if (err) return err; @@ -1153,10 +1226,50 @@ add_pred: return 0; } -int apply_event_filter(struct ftrace_event_call *call, char *filter_string) +static int replace_system_preds(struct event_subsystem *system, + struct filter_parse_state *ps, + char *filter_string) { + struct ftrace_event_call *call; + bool fail = true; int err; + list_for_each_entry(call, &ftrace_events, list) { + struct event_filter *filter = call->filter; + + if (!call->define_fields) + continue; + + if (strcmp(call->system, system->name) != 0) + continue; + + /* try to see if the filter can be applied */ + err = replace_preds(call, filter, ps, filter_string, true); + if (err) + continue; + + /* really apply the filter */ + filter_disable_preds(call); + err = replace_preds(call, filter, ps, filter_string, false); + if (err) + filter_disable_preds(call); + else { + call->filter_active = 1; + replace_filter_string(filter, filter_string); + } + fail = false; + } + + if (fail) { + parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); + return -EINVAL; + } + return 0; +} + +int apply_event_filter(struct ftrace_event_call *call, char *filter_string) +{ + int err; struct filter_parse_state *ps; mutex_lock(&event_mutex); @@ -1168,8 +1281,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) if (!strcmp(strstrip(filter_string), "0")) { filter_disable_preds(call); remove_filter_string(call->filter); - mutex_unlock(&event_mutex); - return 0; + goto out_unlock; } err = -ENOMEM; @@ -1187,10 +1299,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) goto out; } - err = replace_preds(NULL, call, ps, filter_string, false); + err = replace_preds(call, call->filter, ps, filter_string, false); if (err) append_filter_err(ps, call->filter); - + else + call->filter_active = 1; out: filter_opstack_clear(ps); postfix_clear(ps); @@ -1205,7 +1318,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system, char *filter_string) { int err; - struct filter_parse_state *ps; mutex_lock(&event_mutex); @@ -1215,10 +1327,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system, goto out_unlock; if (!strcmp(strstrip(filter_string), "0")) { - filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); + filter_free_subsystem_preds(system); remove_filter_string(system->filter); - mutex_unlock(&event_mutex); - return 0; + goto out_unlock; } err = -ENOMEM; @@ -1235,31 +1346,87 @@ int apply_subsystem_event_filter(struct event_subsystem *system, goto out; } - filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); - - /* try to see the filter can be applied to which events */ - err = replace_preds(system, NULL, ps, filter_string, true); - if (err) { + err = replace_system_preds(system, ps, filter_string); + if (err) append_filter_err(ps, system->filter); - goto out; + +out: + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); +out_unlock: + mutex_unlock(&event_mutex); + + return err; +} + +#ifdef CONFIG_EVENT_PROFILE + +void ftrace_profile_free_filter(struct perf_event *event) +{ + struct event_filter *filter = event->filter; + + event->filter = NULL; + __free_preds(filter); +} + +int ftrace_profile_set_filter(struct perf_event *event, int event_id, + char *filter_str) +{ + int err; + struct event_filter *filter; + struct filter_parse_state *ps; + struct ftrace_event_call *call = NULL; + + mutex_lock(&event_mutex); + + list_for_each_entry(call, &ftrace_events, list) { + if (call->id == event_id) + break; } - filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); + err = -EINVAL; + if (!call) + goto out_unlock; - /* really apply the filter to the events */ - err = replace_preds(system, NULL, ps, filter_string, false); - if (err) { - append_filter_err(ps, system->filter); - filter_free_subsystem_preds(system, 2); + err = -EEXIST; + if (event->filter) + goto out_unlock; + + filter = __alloc_preds(); + if (IS_ERR(filter)) { + err = PTR_ERR(filter); + goto out_unlock; } -out: + err = -ENOMEM; + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + goto free_preds; + + parse_init(ps, filter_ops, filter_str); + err = filter_parse(ps); + if (err) + goto free_ps; + + err = replace_preds(call, filter, ps, filter_str, false); + if (!err) + event->filter = filter; + +free_ps: filter_opstack_clear(ps); postfix_clear(ps); kfree(ps); + +free_preds: + if (err) + __free_preds(filter); + out_unlock: mutex_unlock(&event_mutex); return err; } +#endif /* CONFIG_EVENT_PROFILE */ + diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 9753fcc61bc..dff8c84ddf1 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -48,11 +48,11 @@ struct ____ftrace_##name { \ tstruct \ }; \ -static void __used ____ftrace_check_##name(void) \ +static void __always_unused ____ftrace_check_##name(void) \ { \ struct ____ftrace_##name *__entry = NULL; \ \ - /* force cmpile-time check on F_printk() */ \ + /* force compile-time check on F_printk() */ \ printk(print); \ } @@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void) \ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; #undef __field_desc #define __field_desc(type, container, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), container.item), \ - sizeof(field.container.item)); \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; #undef __array_desc #define __array_desc(type, container, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), container.item), \ - sizeof(field.container.item)); \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:0;\n", \ - offsetof(typeof(field), item)); \ + "offset:%zu;\tsize:0;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + is_signed_type(type)); \ if (!ret) \ return 0; @@ -131,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ #include "trace_entries.h" - #undef __field #define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ @@ -193,6 +195,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #include "trace_entries.h" +static int ftrace_raw_init_event(struct ftrace_event_call *call) +{ + INIT_LIST_HEAD(&call->fields); + return 0; +} #undef __field #define __field(type, item) @@ -211,7 +218,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ -static int ftrace_raw_init_event_##call(void); \ \ struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -219,14 +225,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .id = type, \ .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_raw_init_event_##call, \ + .raw_init = ftrace_raw_init_event, \ .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##call, \ }; \ -static int ftrace_raw_init_event_##call(void) \ -{ \ - INIT_LIST_HEAD(&event_##call.fields); \ - return 0; \ -} \ #include "trace_entries.h" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c new file mode 100644 index 00000000000..aff5f80b59b --- /dev/null +++ b/kernel/trace/trace_kprobe.c @@ -0,0 +1,1523 @@ +/* + * Kprobes-based tracing events + * + * Created by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/kprobes.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/debugfs.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/ptrace.h> +#include <linux/perf_event.h> + +#include "trace.h" +#include "trace_output.h" + +#define MAX_TRACE_ARGS 128 +#define MAX_ARGSTR_LEN 63 +#define MAX_EVENT_NAME_LEN 64 +#define KPROBE_EVENT_SYSTEM "kprobes" + +/* Reserved field names */ +#define FIELD_STRING_IP "__probe_ip" +#define FIELD_STRING_NARGS "__probe_nargs" +#define FIELD_STRING_RETIP "__probe_ret_ip" +#define FIELD_STRING_FUNC "__probe_func" + +const char *reserved_field_names[] = { + "common_type", + "common_flags", + "common_preempt_count", + "common_pid", + "common_tgid", + "common_lock_depth", + FIELD_STRING_IP, + FIELD_STRING_NARGS, + FIELD_STRING_RETIP, + FIELD_STRING_FUNC, +}; + +struct fetch_func { + unsigned long (*func)(struct pt_regs *, void *); + void *data; +}; + +static __kprobes unsigned long call_fetch(struct fetch_func *f, + struct pt_regs *regs) +{ + return f->func(regs, f->data); +} + +/* fetch handlers */ +static __kprobes unsigned long fetch_register(struct pt_regs *regs, + void *offset) +{ + return regs_get_register(regs, (unsigned int)((unsigned long)offset)); +} + +static __kprobes unsigned long fetch_stack(struct pt_regs *regs, + void *num) +{ + return regs_get_kernel_stack_nth(regs, + (unsigned int)((unsigned long)num)); +} + +static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) +{ + unsigned long retval; + + if (probe_kernel_address(addr, retval)) + return 0; + return retval; +} + +static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) +{ + return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); +} + +static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, + void *dummy) +{ + return regs_return_value(regs); +} + +static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, + void *dummy) +{ + return kernel_stack_pointer(regs); +} + +/* Memory fetching by symbol */ +struct symbol_cache { + char *symbol; + long offset; + unsigned long addr; +}; + +static unsigned long update_symbol_cache(struct symbol_cache *sc) +{ + sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); + if (sc->addr) + sc->addr += sc->offset; + return sc->addr; +} + +static void free_symbol_cache(struct symbol_cache *sc) +{ + kfree(sc->symbol); + kfree(sc); +} + +static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) +{ + struct symbol_cache *sc; + + if (!sym || strlen(sym) == 0) + return NULL; + sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); + if (!sc) + return NULL; + + sc->symbol = kstrdup(sym, GFP_KERNEL); + if (!sc->symbol) { + kfree(sc); + return NULL; + } + sc->offset = offset; + + update_symbol_cache(sc); + return sc; +} + +static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data) +{ + struct symbol_cache *sc = data; + + if (sc->addr) + return fetch_memory(regs, (void *)sc->addr); + else + return 0; +} + +/* Special indirect memory access interface */ +struct indirect_fetch_data { + struct fetch_func orig; + long offset; +}; + +static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data) +{ + struct indirect_fetch_data *ind = data; + unsigned long addr; + + addr = call_fetch(&ind->orig, regs); + if (addr) { + addr += ind->offset; + return fetch_memory(regs, (void *)addr); + } else + return 0; +} + +static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) +{ + if (data->orig.func == fetch_indirect) + free_indirect_fetch_data(data->orig.data); + else if (data->orig.func == fetch_symbol) + free_symbol_cache(data->orig.data); + kfree(data); +} + +/** + * Kprobe event core functions + */ + +struct probe_arg { + struct fetch_func fetch; + const char *name; +}; + +/* Flags for trace_probe */ +#define TP_FLAG_TRACE 1 +#define TP_FLAG_PROFILE 2 + +struct trace_probe { + struct list_head list; + struct kretprobe rp; /* Use rp.kp for kprobe use */ + unsigned long nhit; + unsigned int flags; /* For TP_FLAG_* */ + const char *symbol; /* symbol name */ + struct ftrace_event_call call; + struct trace_event event; + unsigned int nr_args; + struct probe_arg args[]; +}; + +#define SIZEOF_TRACE_PROBE(n) \ + (offsetof(struct trace_probe, args) + \ + (sizeof(struct probe_arg) * (n))) + +static __kprobes int probe_is_return(struct trace_probe *tp) +{ + return tp->rp.handler != NULL; +} + +static __kprobes const char *probe_symbol(struct trace_probe *tp) +{ + return tp->symbol ? tp->symbol : "unknown"; +} + +static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) +{ + int ret = -EINVAL; + + if (ff->func == fetch_argument) + ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data); + else if (ff->func == fetch_register) { + const char *name; + name = regs_query_register_name((unsigned int)((long)ff->data)); + ret = snprintf(buf, n, "%%%s", name); + } else if (ff->func == fetch_stack) + ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data); + else if (ff->func == fetch_memory) + ret = snprintf(buf, n, "@0x%p", ff->data); + else if (ff->func == fetch_symbol) { + struct symbol_cache *sc = ff->data; + if (sc->offset) + ret = snprintf(buf, n, "@%s%+ld", sc->symbol, + sc->offset); + else + ret = snprintf(buf, n, "@%s", sc->symbol); + } else if (ff->func == fetch_retvalue) + ret = snprintf(buf, n, "$retval"); + else if (ff->func == fetch_stack_address) + ret = snprintf(buf, n, "$stack"); + else if (ff->func == fetch_indirect) { + struct indirect_fetch_data *id = ff->data; + size_t l = 0; + ret = snprintf(buf, n, "%+ld(", id->offset); + if (ret >= n) + goto end; + l += ret; + ret = probe_arg_string(buf + l, n - l, &id->orig); + if (ret < 0) + goto end; + l += ret; + ret = snprintf(buf + l, n - l, ")"); + ret += l; + } +end: + if (ret >= n) + return -ENOSPC; + return ret; +} + +static int register_probe_event(struct trace_probe *tp); +static void unregister_probe_event(struct trace_probe *tp); + +static DEFINE_MUTEX(probe_lock); +static LIST_HEAD(probe_list); + +static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); +static int kretprobe_dispatcher(struct kretprobe_instance *ri, + struct pt_regs *regs); + +/* + * Allocate new trace_probe and initialize it (including kprobes). + */ +static struct trace_probe *alloc_trace_probe(const char *group, + const char *event, + void *addr, + const char *symbol, + unsigned long offs, + int nargs, int is_return) +{ + struct trace_probe *tp; + + tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); + if (!tp) + return ERR_PTR(-ENOMEM); + + if (symbol) { + tp->symbol = kstrdup(symbol, GFP_KERNEL); + if (!tp->symbol) + goto error; + tp->rp.kp.symbol_name = tp->symbol; + tp->rp.kp.offset = offs; + } else + tp->rp.kp.addr = addr; + + if (is_return) + tp->rp.handler = kretprobe_dispatcher; + else + tp->rp.kp.pre_handler = kprobe_dispatcher; + + if (!event) + goto error; + tp->call.name = kstrdup(event, GFP_KERNEL); + if (!tp->call.name) + goto error; + + if (!group) + goto error; + tp->call.system = kstrdup(group, GFP_KERNEL); + if (!tp->call.system) + goto error; + + INIT_LIST_HEAD(&tp->list); + return tp; +error: + kfree(tp->call.name); + kfree(tp->symbol); + kfree(tp); + return ERR_PTR(-ENOMEM); +} + +static void free_probe_arg(struct probe_arg *arg) +{ + if (arg->fetch.func == fetch_symbol) + free_symbol_cache(arg->fetch.data); + else if (arg->fetch.func == fetch_indirect) + free_indirect_fetch_data(arg->fetch.data); + kfree(arg->name); +} + +static void free_trace_probe(struct trace_probe *tp) +{ + int i; + + for (i = 0; i < tp->nr_args; i++) + free_probe_arg(&tp->args[i]); + + kfree(tp->call.system); + kfree(tp->call.name); + kfree(tp->symbol); + kfree(tp); +} + +static struct trace_probe *find_probe_event(const char *event, + const char *group) +{ + struct trace_probe *tp; + + list_for_each_entry(tp, &probe_list, list) + if (strcmp(tp->call.name, event) == 0 && + strcmp(tp->call.system, group) == 0) + return tp; + return NULL; +} + +/* Unregister a trace_probe and probe_event: call with locking probe_lock */ +static void unregister_trace_probe(struct trace_probe *tp) +{ + if (probe_is_return(tp)) + unregister_kretprobe(&tp->rp); + else + unregister_kprobe(&tp->rp.kp); + list_del(&tp->list); + unregister_probe_event(tp); +} + +/* Register a trace_probe and probe_event */ +static int register_trace_probe(struct trace_probe *tp) +{ + struct trace_probe *old_tp; + int ret; + + mutex_lock(&probe_lock); + + /* register as an event */ + old_tp = find_probe_event(tp->call.name, tp->call.system); + if (old_tp) { + /* delete old event */ + unregister_trace_probe(old_tp); + free_trace_probe(old_tp); + } + ret = register_probe_event(tp); + if (ret) { + pr_warning("Faild to register probe event(%d)\n", ret); + goto end; + } + + tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; + if (probe_is_return(tp)) + ret = register_kretprobe(&tp->rp); + else + ret = register_kprobe(&tp->rp.kp); + + if (ret) { + pr_warning("Could not insert probe(%d)\n", ret); + if (ret == -EILSEQ) { + pr_warning("Probing address(0x%p) is not an " + "instruction boundary.\n", + tp->rp.kp.addr); + ret = -EINVAL; + } + unregister_probe_event(tp); + } else + list_add_tail(&tp->list, &probe_list); +end: + mutex_unlock(&probe_lock); + return ret; +} + +/* Split symbol and offset. */ +static int split_symbol_offset(char *symbol, unsigned long *offset) +{ + char *tmp; + int ret; + + if (!offset) + return -EINVAL; + + tmp = strchr(symbol, '+'); + if (tmp) { + /* skip sign because strict_strtol doesn't accept '+' */ + ret = strict_strtoul(tmp + 1, 0, offset); + if (ret) + return ret; + *tmp = '\0'; + } else + *offset = 0; + return 0; +} + +#define PARAM_MAX_ARGS 16 +#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) + +static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) +{ + int ret = 0; + unsigned long param; + + if (strcmp(arg, "retval") == 0) { + if (is_return) { + ff->func = fetch_retvalue; + ff->data = NULL; + } else + ret = -EINVAL; + } else if (strncmp(arg, "stack", 5) == 0) { + if (arg[5] == '\0') { + ff->func = fetch_stack_address; + ff->data = NULL; + } else if (isdigit(arg[5])) { + ret = strict_strtoul(arg + 5, 10, ¶m); + if (ret || param > PARAM_MAX_STACK) + ret = -EINVAL; + else { + ff->func = fetch_stack; + ff->data = (void *)param; + } + } else + ret = -EINVAL; + } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) { + ret = strict_strtoul(arg + 3, 10, ¶m); + if (ret || param > PARAM_MAX_ARGS) + ret = -EINVAL; + else { + ff->func = fetch_argument; + ff->data = (void *)param; + } + } else + ret = -EINVAL; + return ret; +} + +/* Recursive argument parser */ +static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +{ + int ret = 0; + unsigned long param; + long offset; + char *tmp; + + switch (arg[0]) { + case '$': + ret = parse_probe_vars(arg + 1, ff, is_return); + break; + case '%': /* named register */ + ret = regs_query_register_offset(arg + 1); + if (ret >= 0) { + ff->func = fetch_register; + ff->data = (void *)(unsigned long)ret; + ret = 0; + } + break; + case '@': /* memory or symbol */ + if (isdigit(arg[1])) { + ret = strict_strtoul(arg + 1, 0, ¶m); + if (ret) + break; + ff->func = fetch_memory; + ff->data = (void *)param; + } else { + ret = split_symbol_offset(arg + 1, &offset); + if (ret) + break; + ff->data = alloc_symbol_cache(arg + 1, offset); + if (ff->data) + ff->func = fetch_symbol; + else + ret = -EINVAL; + } + break; + case '+': /* indirect memory */ + case '-': + tmp = strchr(arg, '('); + if (!tmp) { + ret = -EINVAL; + break; + } + *tmp = '\0'; + ret = strict_strtol(arg + 1, 0, &offset); + if (ret) + break; + if (arg[0] == '-') + offset = -offset; + arg = tmp + 1; + tmp = strrchr(arg, ')'); + if (tmp) { + struct indirect_fetch_data *id; + *tmp = '\0'; + id = kzalloc(sizeof(struct indirect_fetch_data), + GFP_KERNEL); + if (!id) + return -ENOMEM; + id->offset = offset; + ret = __parse_probe_arg(arg, &id->orig, is_return); + if (ret) + kfree(id); + else { + ff->func = fetch_indirect; + ff->data = (void *)id; + } + } else + ret = -EINVAL; + break; + default: + /* TODO: support custom handler */ + ret = -EINVAL; + } + return ret; +} + +/* String length checking wrapper */ +static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +{ + if (strlen(arg) > MAX_ARGSTR_LEN) { + pr_info("Argument is too long.: %s\n", arg); + return -ENOSPC; + } + return __parse_probe_arg(arg, ff, is_return); +} + +/* Return 1 if name is reserved or already used by another argument */ +static int conflict_field_name(const char *name, + struct probe_arg *args, int narg) +{ + int i; + for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++) + if (strcmp(reserved_field_names[i], name) == 0) + return 1; + for (i = 0; i < narg; i++) + if (strcmp(args[i].name, name) == 0) + return 1; + return 0; +} + +static int create_trace_probe(int argc, char **argv) +{ + /* + * Argument syntax: + * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] + * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] + * Fetch args: + * $argN : fetch Nth of function argument. (N:0-) + * $retval : fetch return value + * $stack : fetch stack address + * $stackN : fetch Nth of stack (N:0-) + * @ADDR : fetch memory at ADDR (ADDR should be in kernel) + * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) + * %REG : fetch register REG + * Indirect memory fetch: + * +|-offs(ARG) : fetch memory at ARG +|- offs address. + * Alias name of args: + * NAME=FETCHARG : set NAME as alias of FETCHARG. + */ + struct trace_probe *tp; + int i, ret = 0; + int is_return = 0; + char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; + unsigned long offset = 0; + void *addr = NULL; + char buf[MAX_EVENT_NAME_LEN]; + + if (argc < 2) { + pr_info("Probe point is not specified.\n"); + return -EINVAL; + } + + if (argv[0][0] == 'p') + is_return = 0; + else if (argv[0][0] == 'r') + is_return = 1; + else { + pr_info("Probe definition must be started with 'p' or 'r'.\n"); + return -EINVAL; + } + + if (argv[0][1] == ':') { + event = &argv[0][2]; + if (strchr(event, '/')) { + group = event; + event = strchr(group, '/') + 1; + event[-1] = '\0'; + if (strlen(group) == 0) { + pr_info("Group name is not specifiled\n"); + return -EINVAL; + } + } + if (strlen(event) == 0) { + pr_info("Event name is not specifiled\n"); + return -EINVAL; + } + } + + if (isdigit(argv[1][0])) { + if (is_return) { + pr_info("Return probe point must be a symbol.\n"); + return -EINVAL; + } + /* an address specified */ + ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); + if (ret) { + pr_info("Failed to parse address.\n"); + return ret; + } + } else { + /* a symbol specified */ + symbol = argv[1]; + /* TODO: support .init module functions */ + ret = split_symbol_offset(symbol, &offset); + if (ret) { + pr_info("Failed to parse symbol.\n"); + return ret; + } + if (offset && is_return) { + pr_info("Return probe must be used without offset.\n"); + return -EINVAL; + } + } + argc -= 2; argv += 2; + + /* setup a probe */ + if (!group) + group = KPROBE_EVENT_SYSTEM; + if (!event) { + /* Make a new event name */ + if (symbol) + snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld", + is_return ? 'r' : 'p', symbol, offset); + else + snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p", + is_return ? 'r' : 'p', addr); + event = buf; + } + tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, + is_return); + if (IS_ERR(tp)) { + pr_info("Failed to allocate trace_probe.(%d)\n", + (int)PTR_ERR(tp)); + return PTR_ERR(tp); + } + + /* parse arguments */ + ret = 0; + for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { + /* Parse argument name */ + arg = strchr(argv[i], '='); + if (arg) + *arg++ = '\0'; + else + arg = argv[i]; + + if (conflict_field_name(argv[i], tp->args, i)) { + pr_info("Argument%d name '%s' conflicts with " + "another field.\n", i, argv[i]); + ret = -EINVAL; + goto error; + } + + tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); + if (!tp->args[i].name) { + pr_info("Failed to allocate argument%d name '%s'.\n", + i, argv[i]); + ret = -ENOMEM; + goto error; + } + + /* Parse fetch argument */ + ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); + if (ret) { + pr_info("Parse error at argument%d. (%d)\n", i, ret); + kfree(tp->args[i].name); + goto error; + } + + tp->nr_args++; + } + + ret = register_trace_probe(tp); + if (ret) + goto error; + return 0; + +error: + free_trace_probe(tp); + return ret; +} + +static void cleanup_all_probes(void) +{ + struct trace_probe *tp; + + mutex_lock(&probe_lock); + /* TODO: Use batch unregistration */ + while (!list_empty(&probe_list)) { + tp = list_entry(probe_list.next, struct trace_probe, list); + unregister_trace_probe(tp); + free_trace_probe(tp); + } + mutex_unlock(&probe_lock); +} + + +/* Probes listing interfaces */ +static void *probes_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&probe_lock); + return seq_list_start(&probe_list, *pos); +} + +static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &probe_list, pos); +} + +static void probes_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&probe_lock); +} + +static int probes_seq_show(struct seq_file *m, void *v) +{ + struct trace_probe *tp = v; + int i, ret; + char buf[MAX_ARGSTR_LEN + 1]; + + seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); + seq_printf(m, ":%s/%s", tp->call.system, tp->call.name); + + if (!tp->symbol) + seq_printf(m, " 0x%p", tp->rp.kp.addr); + else if (tp->rp.kp.offset) + seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); + else + seq_printf(m, " %s", probe_symbol(tp)); + + for (i = 0; i < tp->nr_args; i++) { + ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); + if (ret < 0) { + pr_warning("Argument%d decoding error(%d).\n", i, ret); + return ret; + } + seq_printf(m, " %s=%s", tp->args[i].name, buf); + } + seq_printf(m, "\n"); + return 0; +} + +static const struct seq_operations probes_seq_op = { + .start = probes_seq_start, + .next = probes_seq_next, + .stop = probes_seq_stop, + .show = probes_seq_show +}; + +static int probes_open(struct inode *inode, struct file *file) +{ + if ((file->f_mode & FMODE_WRITE) && + (file->f_flags & O_TRUNC)) + cleanup_all_probes(); + + return seq_open(file, &probes_seq_op); +} + +static int command_trace_probe(const char *buf) +{ + char **argv; + int argc = 0, ret = 0; + + argv = argv_split(GFP_KERNEL, buf, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = create_trace_probe(argc, argv); + + argv_free(argv); + return ret; +} + +#define WRITE_BUFSIZE 128 + +static ssize_t probes_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + char *kbuf, *tmp; + int ret; + size_t done; + size_t size; + + kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + ret = done = 0; + while (done < count) { + size = count - done; + if (size >= WRITE_BUFSIZE) + size = WRITE_BUFSIZE - 1; + if (copy_from_user(kbuf, buffer + done, size)) { + ret = -EFAULT; + goto out; + } + kbuf[size] = '\0'; + tmp = strchr(kbuf, '\n'); + if (tmp) { + *tmp = '\0'; + size = tmp - kbuf + 1; + } else if (done + size < count) { + pr_warning("Line length is too long: " + "Should be less than %d.", WRITE_BUFSIZE); + ret = -EINVAL; + goto out; + } + done += size; + /* Remove comments */ + tmp = strchr(kbuf, '#'); + if (tmp) + *tmp = '\0'; + + ret = command_trace_probe(kbuf); + if (ret) + goto out; + } + ret = done; +out: + kfree(kbuf); + return ret; +} + +static const struct file_operations kprobe_events_ops = { + .owner = THIS_MODULE, + .open = probes_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .write = probes_write, +}; + +/* Probes profiling interfaces */ +static int probes_profile_seq_show(struct seq_file *m, void *v) +{ + struct trace_probe *tp = v; + + seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, + tp->rp.kp.nmissed); + + return 0; +} + +static const struct seq_operations profile_seq_op = { + .start = probes_seq_start, + .next = probes_seq_next, + .stop = probes_seq_stop, + .show = probes_profile_seq_show +}; + +static int profile_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &profile_seq_op); +} + +static const struct file_operations kprobe_profile_ops = { + .owner = THIS_MODULE, + .open = profile_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* Kprobe handler */ +static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); + struct kprobe_trace_entry *entry; + struct ring_buffer_event *event; + struct ring_buffer *buffer; + int size, i, pc; + unsigned long irq_flags; + struct ftrace_event_call *call = &tp->call; + + tp->nhit++; + + local_save_flags(irq_flags); + pc = preempt_count(); + + size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); + + event = trace_current_buffer_lock_reserve(&buffer, call->id, size, + irq_flags, pc); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->nargs = tp->nr_args; + entry->ip = (unsigned long)kp->addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + + if (!filter_current_check_discard(buffer, call, entry, event)) + trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); + return 0; +} + +/* Kretprobe handler */ +static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + struct kretprobe_trace_entry *entry; + struct ring_buffer_event *event; + struct ring_buffer *buffer; + int size, i, pc; + unsigned long irq_flags; + struct ftrace_event_call *call = &tp->call; + + local_save_flags(irq_flags); + pc = preempt_count(); + + size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); + + event = trace_current_buffer_lock_reserve(&buffer, call->id, size, + irq_flags, pc); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->nargs = tp->nr_args; + entry->func = (unsigned long)tp->rp.kp.addr; + entry->ret_ip = (unsigned long)ri->ret_addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + + if (!filter_current_check_discard(buffer, call, entry, event)) + trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); + + return 0; +} + +/* Event entry printers */ +enum print_line_t +print_kprobe_event(struct trace_iterator *iter, int flags) +{ + struct kprobe_trace_entry *field; + struct trace_seq *s = &iter->seq; + struct trace_event *event; + struct trace_probe *tp; + int i; + + field = (struct kprobe_trace_entry *)iter->ent; + event = ftrace_find_event(field->ent.type); + tp = container_of(event, struct trace_probe, event); + + if (!trace_seq_printf(s, "%s: (", tp->call.name)) + goto partial; + + if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) + goto partial; + + if (!trace_seq_puts(s, ")")) + goto partial; + + for (i = 0; i < field->nargs; i++) + if (!trace_seq_printf(s, " %s=%lx", + tp->args[i].name, field->args[i])) + goto partial; + + if (!trace_seq_puts(s, "\n")) + goto partial; + + return TRACE_TYPE_HANDLED; +partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +enum print_line_t +print_kretprobe_event(struct trace_iterator *iter, int flags) +{ + struct kretprobe_trace_entry *field; + struct trace_seq *s = &iter->seq; + struct trace_event *event; + struct trace_probe *tp; + int i; + + field = (struct kretprobe_trace_entry *)iter->ent; + event = ftrace_find_event(field->ent.type); + tp = container_of(event, struct trace_probe, event); + + if (!trace_seq_printf(s, "%s: (", tp->call.name)) + goto partial; + + if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) + goto partial; + + if (!trace_seq_puts(s, " <- ")) + goto partial; + + if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) + goto partial; + + if (!trace_seq_puts(s, ")")) + goto partial; + + for (i = 0; i < field->nargs; i++) + if (!trace_seq_printf(s, " %s=%lx", + tp->args[i].name, field->args[i])) + goto partial; + + if (!trace_seq_puts(s, "\n")) + goto partial; + + return TRACE_TYPE_HANDLED; +partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static int probe_event_enable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + tp->flags |= TP_FLAG_TRACE; + if (probe_is_return(tp)) + return enable_kretprobe(&tp->rp); + else + return enable_kprobe(&tp->rp.kp); +} + +static void probe_event_disable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + tp->flags &= ~TP_FLAG_TRACE; + if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { + if (probe_is_return(tp)) + disable_kretprobe(&tp->rp); + else + disable_kprobe(&tp->rp.kp); + } +} + +static int probe_event_raw_init(struct ftrace_event_call *event_call) +{ + INIT_LIST_HEAD(&event_call->fields); + + return 0; +} + +#undef DEFINE_FIELD +#define DEFINE_FIELD(type, item, name, is_signed) \ + do { \ + ret = trace_define_field(event_call, #type, name, \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed, \ + FILTER_OTHER); \ + if (ret) \ + return ret; \ + } while (0) + +static int kprobe_event_define_fields(struct ftrace_event_call *event_call) +{ + int ret, i; + struct kprobe_trace_entry field; + struct trace_probe *tp = (struct trace_probe *)event_call->data; + + ret = trace_define_common_fields(event_call); + if (!ret) + return ret; + + DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); + DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); + /* Set argument names as fields */ + for (i = 0; i < tp->nr_args; i++) + DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); + return 0; +} + +static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) +{ + int ret, i; + struct kretprobe_trace_entry field; + struct trace_probe *tp = (struct trace_probe *)event_call->data; + + ret = trace_define_common_fields(event_call); + if (!ret) + return ret; + + DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); + DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); + DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); + /* Set argument names as fields */ + for (i = 0; i < tp->nr_args; i++) + DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); + return 0; +} + +static int __probe_event_show_format(struct trace_seq *s, + struct trace_probe *tp, const char *fmt, + const char *arg) +{ + int i; + + /* Show format */ + if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt)) + return 0; + + for (i = 0; i < tp->nr_args; i++) + if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) + return 0; + + if (!trace_seq_printf(s, "\", %s", arg)) + return 0; + + for (i = 0; i < tp->nr_args; i++) + if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name)) + return 0; + + return trace_seq_puts(s, "\n"); +} + +#undef SHOW_FIELD +#define SHOW_FIELD(type, item, name) \ + do { \ + ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ + "offset:%u;\tsize:%u;\n", name, \ + (unsigned int)offsetof(typeof(field), item),\ + (unsigned int)sizeof(type)); \ + if (!ret) \ + return 0; \ + } while (0) + +static int kprobe_event_show_format(struct ftrace_event_call *call, + struct trace_seq *s) +{ + struct kprobe_trace_entry field __attribute__((unused)); + int ret, i; + struct trace_probe *tp = (struct trace_probe *)call->data; + + SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); + SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); + + /* Show fields */ + for (i = 0; i < tp->nr_args; i++) + SHOW_FIELD(unsigned long, args[i], tp->args[i].name); + trace_seq_puts(s, "\n"); + + return __probe_event_show_format(s, tp, "(%lx)", + "REC->" FIELD_STRING_IP); +} + +static int kretprobe_event_show_format(struct ftrace_event_call *call, + struct trace_seq *s) +{ + struct kretprobe_trace_entry field __attribute__((unused)); + int ret, i; + struct trace_probe *tp = (struct trace_probe *)call->data; + + SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); + SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); + SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); + + /* Show fields */ + for (i = 0; i < tp->nr_args; i++) + SHOW_FIELD(unsigned long, args[i], tp->args[i].name); + trace_seq_puts(s, "\n"); + + return __probe_event_show_format(s, tp, "(%lx <- %lx)", + "REC->" FIELD_STRING_FUNC + ", REC->" FIELD_STRING_RETIP); +} + +#ifdef CONFIG_EVENT_PROFILE + +/* Kprobe profile handler */ +static __kprobes int kprobe_profile_func(struct kprobe *kp, + struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); + struct ftrace_event_call *call = &tp->call; + struct kprobe_trace_entry *entry; + struct trace_entry *ent; + int size, __size, i, pc, __cpu; + unsigned long irq_flags; + char *trace_buf; + char *raw_data; + int rctx; + + pc = preempt_count(); + __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); + size = ALIGN(__size + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "profile buffer not large enough")) + return 0; + + /* + * Protect the non nmi buffer + * This also protects the rcu read side + */ + local_irq_save(irq_flags); + + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) + goto end_recursion; + + __cpu = smp_processor_id(); + + if (in_nmi()) + trace_buf = rcu_dereference(perf_trace_buf_nmi); + else + trace_buf = rcu_dereference(perf_trace_buf); + + if (!trace_buf) + goto end; + + raw_data = per_cpu_ptr(trace_buf, __cpu); + + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + entry = (struct kprobe_trace_entry *)raw_data; + ent = &entry->ent; + + tracing_generic_entry_update(ent, irq_flags, pc); + ent->type = call->id; + entry->nargs = tp->nr_args; + entry->ip = (unsigned long)kp->addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + perf_tp_event(call->id, entry->ip, 1, entry, size); + +end: + perf_swevent_put_recursion_context(rctx); +end_recursion: + local_irq_restore(irq_flags); + + return 0; +} + +/* Kretprobe profile handler */ +static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + struct ftrace_event_call *call = &tp->call; + struct kretprobe_trace_entry *entry; + struct trace_entry *ent; + int size, __size, i, pc, __cpu; + unsigned long irq_flags; + char *trace_buf; + char *raw_data; + int rctx; + + pc = preempt_count(); + __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); + size = ALIGN(__size + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "profile buffer not large enough")) + return 0; + + /* + * Protect the non nmi buffer + * This also protects the rcu read side + */ + local_irq_save(irq_flags); + + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) + goto end_recursion; + + __cpu = smp_processor_id(); + + if (in_nmi()) + trace_buf = rcu_dereference(perf_trace_buf_nmi); + else + trace_buf = rcu_dereference(perf_trace_buf); + + if (!trace_buf) + goto end; + + raw_data = per_cpu_ptr(trace_buf, __cpu); + + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + entry = (struct kretprobe_trace_entry *)raw_data; + ent = &entry->ent; + + tracing_generic_entry_update(ent, irq_flags, pc); + ent->type = call->id; + entry->nargs = tp->nr_args; + entry->func = (unsigned long)tp->rp.kp.addr; + entry->ret_ip = (unsigned long)ri->ret_addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + perf_tp_event(call->id, entry->ret_ip, 1, entry, size); + +end: + perf_swevent_put_recursion_context(rctx); +end_recursion: + local_irq_restore(irq_flags); + + return 0; +} + +static int probe_profile_enable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + tp->flags |= TP_FLAG_PROFILE; + + if (probe_is_return(tp)) + return enable_kretprobe(&tp->rp); + else + return enable_kprobe(&tp->rp.kp); +} + +static void probe_profile_disable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + tp->flags &= ~TP_FLAG_PROFILE; + + if (!(tp->flags & TP_FLAG_TRACE)) { + if (probe_is_return(tp)) + disable_kretprobe(&tp->rp); + else + disable_kprobe(&tp->rp.kp); + } +} +#endif /* CONFIG_EVENT_PROFILE */ + + +static __kprobes +int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); + + if (tp->flags & TP_FLAG_TRACE) + kprobe_trace_func(kp, regs); +#ifdef CONFIG_EVENT_PROFILE + if (tp->flags & TP_FLAG_PROFILE) + kprobe_profile_func(kp, regs); +#endif /* CONFIG_EVENT_PROFILE */ + return 0; /* We don't tweek kernel, so just return 0 */ +} + +static __kprobes +int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + + if (tp->flags & TP_FLAG_TRACE) + kretprobe_trace_func(ri, regs); +#ifdef CONFIG_EVENT_PROFILE + if (tp->flags & TP_FLAG_PROFILE) + kretprobe_profile_func(ri, regs); +#endif /* CONFIG_EVENT_PROFILE */ + return 0; /* We don't tweek kernel, so just return 0 */ +} + +static int register_probe_event(struct trace_probe *tp) +{ + struct ftrace_event_call *call = &tp->call; + int ret; + + /* Initialize ftrace_event_call */ + if (probe_is_return(tp)) { + tp->event.trace = print_kretprobe_event; + call->raw_init = probe_event_raw_init; + call->show_format = kretprobe_event_show_format; + call->define_fields = kretprobe_event_define_fields; + } else { + tp->event.trace = print_kprobe_event; + call->raw_init = probe_event_raw_init; + call->show_format = kprobe_event_show_format; + call->define_fields = kprobe_event_define_fields; + } + call->event = &tp->event; + call->id = register_ftrace_event(&tp->event); + if (!call->id) + return -ENODEV; + call->enabled = 0; + call->regfunc = probe_event_enable; + call->unregfunc = probe_event_disable; + +#ifdef CONFIG_EVENT_PROFILE + atomic_set(&call->profile_count, -1); + call->profile_enable = probe_profile_enable; + call->profile_disable = probe_profile_disable; +#endif + call->data = tp; + ret = trace_add_event_call(call); + if (ret) { + pr_info("Failed to register kprobe event: %s\n", call->name); + unregister_ftrace_event(&tp->event); + } + return ret; +} + +static void unregister_probe_event(struct trace_probe *tp) +{ + /* tp->event is unregistered in trace_remove_event_call() */ + trace_remove_event_call(&tp->call); +} + +/* Make a debugfs interface for controling probe points */ +static __init int init_kprobe_trace(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; + + entry = debugfs_create_file("kprobe_events", 0644, d_tracer, + NULL, &kprobe_events_ops); + + /* Event list interface */ + if (!entry) + pr_warning("Could not create debugfs " + "'kprobe_events' entry\n"); + + /* Profile interface */ + entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, + NULL, &kprobe_profile_ops); + + if (!entry) + pr_warning("Could not create debugfs " + "'kprobe_profile' entry\n"); + return 0; +} +fs_initcall(init_kprobe_trace); + + +#ifdef CONFIG_FTRACE_STARTUP_TEST + +static int kprobe_trace_selftest_target(int a1, int a2, int a3, + int a4, int a5, int a6) +{ + return a1 + a2 + a3 + a4 + a5 + a6; +} + +static __init int kprobe_trace_self_tests_init(void) +{ + int ret; + int (*target)(int, int, int, int, int, int); + + target = kprobe_trace_selftest_target; + + pr_info("Testing kprobe tracing: "); + + ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " + "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); + if (WARN_ON_ONCE(ret)) + pr_warning("error enabling function entry\n"); + + ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " + "$retval"); + if (WARN_ON_ONCE(ret)) + pr_warning("error enabling function return\n"); + + ret = target(1, 2, 3, 4, 5, 6); + + cleanup_all_probes(); + + pr_cont("OK\n"); + return 0; +} + +late_initcall(kprobe_trace_self_tests_init); + +#endif diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c new file mode 100644 index 00000000000..ddfa0fd43bc --- /dev/null +++ b/kernel/trace/trace_ksym.c @@ -0,0 +1,550 @@ +/* + * trace_ksym.c - Kernel Symbol Tracer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include <linux/kallsyms.h> +#include <linux/uaccess.h> +#include <linux/debugfs.h> +#include <linux/ftrace.h> +#include <linux/module.h> +#include <linux/fs.h> + +#include "trace_output.h" +#include "trace_stat.h" +#include "trace.h" + +#include <linux/hw_breakpoint.h> +#include <asm/hw_breakpoint.h> + +/* + * For now, let us restrict the no. of symbols traced simultaneously to number + * of available hardware breakpoint registers. + */ +#define KSYM_TRACER_MAX HBP_NUM + +#define KSYM_TRACER_OP_LEN 3 /* rw- */ + +struct trace_ksym { + struct perf_event **ksym_hbp; + struct perf_event_attr attr; +#ifdef CONFIG_PROFILE_KSYM_TRACER + unsigned long counter; +#endif + struct hlist_node ksym_hlist; +}; + +static struct trace_array *ksym_trace_array; + +static unsigned int ksym_filter_entry_count; +static unsigned int ksym_tracing_enabled; + +static HLIST_HEAD(ksym_filter_head); + +static DEFINE_MUTEX(ksym_tracer_mutex); + +#ifdef CONFIG_PROFILE_KSYM_TRACER + +#define MAX_UL_INT 0xffffffff + +void ksym_collect_stats(unsigned long hbp_hit_addr) +{ + struct hlist_node *node; + struct trace_ksym *entry; + + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { + if ((entry->attr.bp_addr == hbp_hit_addr) && + (entry->counter <= MAX_UL_INT)) { + entry->counter++; + break; + } + } + rcu_read_unlock(); +} +#endif /* CONFIG_PROFILE_KSYM_TRACER */ + +void ksym_hbp_handler(struct perf_event *hbp, void *data) +{ + struct ring_buffer_event *event; + struct ksym_trace_entry *entry; + struct pt_regs *regs = data; + struct ring_buffer *buffer; + int pc; + + if (!ksym_tracing_enabled) + return; + + buffer = ksym_trace_array->buffer; + + pc = preempt_count(); + + event = trace_buffer_lock_reserve(buffer, TRACE_KSYM, + sizeof(*entry), 0, pc); + if (!event) + return; + + entry = ring_buffer_event_data(event); + entry->ip = instruction_pointer(regs); + entry->type = hw_breakpoint_type(hbp); + entry->addr = hw_breakpoint_addr(hbp); + strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); + +#ifdef CONFIG_PROFILE_KSYM_TRACER + ksym_collect_stats(hw_breakpoint_addr(hbp)); +#endif /* CONFIG_PROFILE_KSYM_TRACER */ + + trace_buffer_unlock_commit(buffer, event, 0, pc); +} + +/* Valid access types are represented as + * + * rw- : Set Read/Write Access Breakpoint + * -w- : Set Write Access Breakpoint + * --- : Clear Breakpoints + * --x : Set Execution Break points (Not available yet) + * + */ +static int ksym_trace_get_access_type(char *str) +{ + int access = 0; + + if (str[0] == 'r') + access |= HW_BREAKPOINT_R; + + if (str[1] == 'w') + access |= HW_BREAKPOINT_W; + + if (str[2] == 'x') + access |= HW_BREAKPOINT_X; + + switch (access) { + case HW_BREAKPOINT_R: + case HW_BREAKPOINT_W: + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + return access; + default: + return -EINVAL; + } +} + +/* + * There can be several possible malformed requests and we attempt to capture + * all of them. We enumerate some of the rules + * 1. We will not allow kernel symbols with ':' since it is used as a delimiter. + * i.e. multiple ':' symbols disallowed. Possible uses are of the form + * <module>:<ksym_name>:<op>. + * 2. No delimiter symbol ':' in the input string + * 3. Spurious operator symbols or symbols not in their respective positions + * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file + * 5. Kernel symbol not a part of /proc/kallsyms + * 6. Duplicate requests + */ +static int parse_ksym_trace_str(char *input_string, char **ksymname, + unsigned long *addr) +{ + int ret; + + *ksymname = strsep(&input_string, ":"); + *addr = kallsyms_lookup_name(*ksymname); + + /* Check for malformed request: (2), (1) and (5) */ + if ((!input_string) || + (strlen(input_string) != KSYM_TRACER_OP_LEN) || + (*addr == 0)) + return -EINVAL;; + + ret = ksym_trace_get_access_type(input_string); + + return ret; +} + +int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) +{ + struct trace_ksym *entry; + int ret = -ENOMEM; + + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" + " new requests for tracing can be accepted now.\n", + KSYM_TRACER_MAX); + return -ENOSPC; + } + + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + hw_breakpoint_init(&entry->attr); + + entry->attr.bp_type = op; + entry->attr.bp_addr = addr; + entry->attr.bp_len = HW_BREAKPOINT_LEN_4; + + ret = -EAGAIN; + entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, + ksym_hbp_handler); + + if (IS_ERR(entry->ksym_hbp)) { + ret = PTR_ERR(entry->ksym_hbp); + printk(KERN_INFO "ksym_tracer request failed. Try again" + " later!!\n"); + goto err; + } + + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); + ksym_filter_entry_count++; + + return 0; + +err: + kfree(entry); + + return ret; +} + +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct trace_ksym *entry; + struct hlist_node *node; + struct trace_seq *s; + ssize_t cnt = 0; + int ret; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + trace_seq_init(s); + + mutex_lock(&ksym_tracer_mutex); + + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { + ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr); + if (entry->attr.bp_type == HW_BREAKPOINT_R) + ret = trace_seq_puts(s, "r--\n"); + else if (entry->attr.bp_type == HW_BREAKPOINT_W) + ret = trace_seq_puts(s, "-w-\n"); + else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) + ret = trace_seq_puts(s, "rw-\n"); + WARN_ON_ONCE(!ret); + } + + cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); + + mutex_unlock(&ksym_tracer_mutex); + + kfree(s); + + return cnt; +} + +static void __ksym_trace_reset(void) +{ + struct trace_ksym *entry; + struct hlist_node *node, *node1; + + mutex_lock(&ksym_tracer_mutex); + hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, + ksym_hlist) { + unregister_wide_hw_breakpoint(entry->ksym_hbp); + ksym_filter_entry_count--; + hlist_del_rcu(&(entry->ksym_hlist)); + synchronize_rcu(); + kfree(entry); + } + mutex_unlock(&ksym_tracer_mutex); +} + +static ssize_t ksym_trace_filter_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct trace_ksym *entry; + struct hlist_node *node; + char *input_string, *ksymname = NULL; + unsigned long ksym_addr = 0; + int ret, op, changed = 0; + + input_string = kzalloc(count + 1, GFP_KERNEL); + if (!input_string) + return -ENOMEM; + + if (copy_from_user(input_string, buffer, count)) { + kfree(input_string); + return -EFAULT; + } + input_string[count] = '\0'; + + strstrip(input_string); + + /* + * Clear all breakpoints if: + * 1: echo > ksym_trace_filter + * 2: echo 0 > ksym_trace_filter + * 3: echo "*:---" > ksym_trace_filter + */ + if (!input_string[0] || !strcmp(input_string, "0") || + !strcmp(input_string, "*:---")) { + __ksym_trace_reset(); + kfree(input_string); + return count; + } + + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); + if (ret < 0) { + kfree(input_string); + return ret; + } + + mutex_lock(&ksym_tracer_mutex); + + ret = -EINVAL; + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { + if (entry->attr.bp_addr == ksym_addr) { + /* Check for malformed request: (6) */ + if (entry->attr.bp_type != op) + changed = 1; + else + goto out; + break; + } + } + if (changed) { + unregister_wide_hw_breakpoint(entry->ksym_hbp); + entry->attr.bp_type = op; + ret = 0; + if (op > 0) { + entry->ksym_hbp = + register_wide_hw_breakpoint(&entry->attr, + ksym_hbp_handler); + if (IS_ERR(entry->ksym_hbp)) + ret = PTR_ERR(entry->ksym_hbp); + else + goto out; + } + /* Error or "symbol:---" case: drop it */ + ksym_filter_entry_count--; + hlist_del_rcu(&(entry->ksym_hlist)); + synchronize_rcu(); + kfree(entry); + goto out; + } else { + /* Check for malformed request: (4) */ + if (op == 0) + goto out; + ret = process_new_ksym_entry(ksymname, op, ksym_addr); + } +out: + mutex_unlock(&ksym_tracer_mutex); + + kfree(input_string); + + if (!ret) + ret = count; + return ret; +} + +static const struct file_operations ksym_tracing_fops = { + .open = tracing_open_generic, + .read = ksym_trace_filter_read, + .write = ksym_trace_filter_write, +}; + +static void ksym_trace_reset(struct trace_array *tr) +{ + ksym_tracing_enabled = 0; + __ksym_trace_reset(); +} + +static int ksym_trace_init(struct trace_array *tr) +{ + int cpu, ret = 0; + + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + ksym_tracing_enabled = 1; + ksym_trace_array = tr; + + return ret; +} + +static void ksym_trace_print_header(struct seq_file *m) +{ + seq_puts(m, + "# TASK-PID CPU# Symbol " + "Type Function\n"); + seq_puts(m, + "# | | | " + " | |\n"); +} + +static enum print_line_t ksym_trace_output(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct ksym_trace_entry *field; + char str[KSYM_SYMBOL_LEN]; + int ret; + + if (entry->type != TRACE_KSYM) + return TRACE_TYPE_UNHANDLED; + + trace_assign_type(field, entry); + + ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, + entry->pid, iter->cpu, (char *)field->addr); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + switch (field->type) { + case HW_BREAKPOINT_R: + ret = trace_seq_printf(s, " R "); + break; + case HW_BREAKPOINT_W: + ret = trace_seq_printf(s, " W "); + break; + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: + ret = trace_seq_printf(s, " RW "); + break; + default: + return TRACE_TYPE_PARTIAL_LINE; + } + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + sprint_symbol(str, field->ip); + ret = trace_seq_printf(s, "%s\n", str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +struct tracer ksym_tracer __read_mostly = +{ + .name = "ksym_tracer", + .init = ksym_trace_init, + .reset = ksym_trace_reset, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_ksym, +#endif + .print_header = ksym_trace_print_header, + .print_line = ksym_trace_output +}; + +__init static int init_ksym_trace(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + ksym_filter_entry_count = 0; + + entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, + NULL, &ksym_tracing_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'ksym_trace_filter' file\n"); + + return register_tracer(&ksym_tracer); +} +device_initcall(init_ksym_trace); + + +#ifdef CONFIG_PROFILE_KSYM_TRACER +static int ksym_tracer_stat_headers(struct seq_file *m) +{ + seq_puts(m, " Access Type "); + seq_puts(m, " Symbol Counter\n"); + seq_puts(m, " ----------- "); + seq_puts(m, " ------ -------\n"); + return 0; +} + +static int ksym_tracer_stat_show(struct seq_file *m, void *v) +{ + struct hlist_node *stat = v; + struct trace_ksym *entry; + int access_type = 0; + char fn_name[KSYM_NAME_LEN]; + + entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); + + access_type = entry->attr.bp_type; + + switch (access_type) { + case HW_BREAKPOINT_R: + seq_puts(m, " R "); + break; + case HW_BREAKPOINT_W: + seq_puts(m, " W "); + break; + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: + seq_puts(m, " RW "); + break; + default: + seq_puts(m, " NA "); + } + + if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) + seq_printf(m, " %-36s", fn_name); + else + seq_printf(m, " %-36s", "<NA>"); + seq_printf(m, " %15lu\n", entry->counter); + + return 0; +} + +static void *ksym_tracer_stat_start(struct tracer_stat *trace) +{ + return ksym_filter_head.first; +} + +static void * +ksym_tracer_stat_next(void *v, int idx) +{ + struct hlist_node *stat = v; + + return stat->next; +} + +static struct tracer_stat ksym_tracer_stats = { + .name = "ksym_tracer", + .stat_start = ksym_tracer_stat_start, + .stat_next = ksym_tracer_stat_next, + .stat_headers = ksym_tracer_stat_headers, + .stat_show = ksym_tracer_stat_show +}; + +__init static int ksym_tracer_stat_init(void) +{ + int ret; + + ret = register_stat_tracer(&ksym_tracer_stats); + if (ret) { + printk(KERN_WARNING "Warning: could not register " + "ksym tracer stats\n"); + return 1; + } + + return 0; +} +fs_initcall(ksym_tracer_stat_init); +#endif /* CONFIG_PROFILE_KSYM_TRACER */ diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index d2cdbabb4ea..dc98309e839 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_GRAPH_ENT: case TRACE_GRAPH_RET: case TRACE_HW_BRANCHES: + case TRACE_KSYM: return 1; } return 0; @@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace, return ret; } #endif /* CONFIG_HW_BRANCH_TRACER */ + +#ifdef CONFIG_KSYM_TRACER +static int ksym_selftest_dummy; + +int +trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) +{ + unsigned long count; + int ret; + + /* start the tracing */ + ret = tracer_init(trace, tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + + ksym_selftest_dummy = 0; + /* Register the read-write tracing request */ + + ret = process_new_ksym_entry("ksym_selftest_dummy", + HW_BREAKPOINT_R | HW_BREAKPOINT_W, + (unsigned long)(&ksym_selftest_dummy)); + + if (ret < 0) { + printk(KERN_CONT "ksym_trace read-write startup test failed\n"); + goto ret_path; + } + /* Perform a read and a write operation over the dummy variable to + * trigger the tracer + */ + if (ksym_selftest_dummy == 0) + ksym_selftest_dummy++; + + /* stop the tracing. */ + tracing_stop(); + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + tracing_start(); + + /* read & write operations - one each is performed on the dummy variable + * triggering two entries in the trace buffer + */ + if (!ret && count != 2) { + printk(KERN_CONT "Ksym tracer startup test failed"); + ret = -1; + } + +ret_path: + return ret; +} +#endif /* CONFIG_KSYM_TRACER */ + diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 527e17eae57..57501d90096 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -14,6 +14,43 @@ static int sys_refcount_exit; static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); +extern unsigned long __start_syscalls_metadata[]; +extern unsigned long __stop_syscalls_metadata[]; + +static struct syscall_metadata **syscalls_metadata; + +static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +{ + struct syscall_metadata *start; + struct syscall_metadata *stop; + char str[KSYM_SYMBOL_LEN]; + + + start = (struct syscall_metadata *)__start_syscalls_metadata; + stop = (struct syscall_metadata *)__stop_syscalls_metadata; + kallsyms_lookup(syscall, NULL, NULL, NULL, str); + + for ( ; start < stop; start++) { + /* + * Only compare after the "sys" prefix. Archs that use + * syscall wrappers may have syscalls symbols aliases prefixed + * with "SyS" instead of "sys", leading to an unwanted + * mismatch. + */ + if (start->name && !strcmp(start->name + 3, str + 3)) + return start; + } + return NULL; +} + +static struct syscall_metadata *syscall_nr_to_meta(int nr) +{ + if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) + return NULL; + + return syscalls_metadata[nr]; +} + enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { @@ -30,7 +67,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags) if (!entry) goto end; - if (entry->enter_id != ent->type) { + if (entry->enter_event->id != ent->type) { WARN_ON_ONCE(1); goto end; } @@ -85,7 +122,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } - if (entry->exit_id != ent->type) { + if (entry->exit_event->id != ent->type) { WARN_ON_ONCE(1); return TRACE_TYPE_UNHANDLED; } @@ -103,24 +140,19 @@ extern char *__bad_type_size(void); #define SYSCALL_FIELD(type, name) \ sizeof(type) != sizeof(trace.name) ? \ __bad_type_size() : \ - #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) + #type, #name, offsetof(typeof(trace), name), \ + sizeof(trace.name), is_signed_type(type) int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { int i; - int nr; int ret; - struct syscall_metadata *entry; + struct syscall_metadata *entry = call->data; struct syscall_trace_enter trace; int offset = offsetof(struct syscall_trace_enter, args); - nr = syscall_name_to_nr(call->data); - entry = syscall_nr_to_meta(nr); - - if (!entry) - return 0; - - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr)); if (!ret) return 0; @@ -130,8 +162,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) entry->args[i]); if (!ret) return 0; - ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, - sizeof(unsigned long)); + ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" + "\tsigned:%u;\n", offset, + sizeof(unsigned long), + is_signed_type(unsigned long)); if (!ret) return 0; offset += sizeof(unsigned long); @@ -163,8 +197,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) struct syscall_trace_exit trace; ret = trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr), SYSCALL_FIELD(long, ret)); if (!ret) @@ -176,22 +212,19 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) int syscall_enter_define_fields(struct ftrace_event_call *call) { struct syscall_trace_enter trace; - struct syscall_metadata *meta; + struct syscall_metadata *meta = call->data; int ret; - int nr; int i; int offset = offsetof(typeof(trace), args); - nr = syscall_name_to_nr(call->data); - meta = syscall_nr_to_meta(nr); - - if (!meta) - return 0; - ret = trace_define_common_fields(call); if (ret) return ret; + ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); + if (ret) + return ret; + for (i = 0; i < meta->nb_args; i++) { ret = trace_define_field(call, meta->types[i], meta->args[i], offset, @@ -212,7 +245,11 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) if (ret) return ret; - ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0, + ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); + if (ret) + return ret; + + ret = trace_define_field(call, SYSCALL_FIELD(long, ret), FILTER_OTHER); return ret; @@ -239,8 +276,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, - size, 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, + sys_data->enter_event->id, size, 0, 0); if (!event) return; @@ -271,8 +308,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, - sizeof(*entry), 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, + sys_data->exit_event->id, sizeof(*entry), 0, 0); if (!event) return; @@ -285,14 +322,12 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) trace_current_buffer_unlock_commit(buffer, event, 0, 0); } -int reg_event_syscall_enter(void *ptr) +int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; - char *name; - name = (char *)ptr; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -309,13 +344,11 @@ int reg_event_syscall_enter(void *ptr) return ret; } -void unreg_event_syscall_enter(void *ptr) +void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; - char *name; - name = (char *)ptr; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -326,14 +359,12 @@ void unreg_event_syscall_enter(void *ptr) mutex_unlock(&syscall_trace_lock); } -int reg_event_syscall_exit(void *ptr) +int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; - char *name; - name = (char *)ptr; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -350,13 +381,11 @@ int reg_event_syscall_exit(void *ptr) return ret; } -void unreg_event_syscall_exit(void *ptr) +void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; - char *name; - name = (char *)ptr; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -367,13 +396,44 @@ void unreg_event_syscall_exit(void *ptr) mutex_unlock(&syscall_trace_lock); } -struct trace_event event_syscall_enter = { - .trace = print_syscall_enter, -}; +int init_syscall_trace(struct ftrace_event_call *call) +{ + int id; + + id = register_ftrace_event(call->event); + if (!id) + return -ENODEV; + call->id = id; + INIT_LIST_HEAD(&call->fields); + return 0; +} + +int __init init_ftrace_syscalls(void) +{ + struct syscall_metadata *meta; + unsigned long addr; + int i; + + syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * + NR_syscalls, GFP_KERNEL); + if (!syscalls_metadata) { + WARN_ON(1); + return -ENOMEM; + } + + for (i = 0; i < NR_syscalls; i++) { + addr = arch_syscall_addr(i); + meta = find_syscall_meta(addr); + if (!meta) + continue; + + meta->syscall_nr = i; + syscalls_metadata[i] = meta; + } -struct trace_event event_syscall_exit = { - .trace = print_syscall_exit, -}; + return 0; +} +core_initcall(init_ftrace_syscalls); #ifdef CONFIG_EVENT_PROFILE @@ -387,8 +447,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; unsigned long flags; + char *trace_buf; char *raw_data; int syscall_nr; + int rctx; int size; int cpu; @@ -412,41 +474,42 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) + goto end_recursion; + cpu = smp_processor_id(); - if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); - else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, cpu); + raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; rec = (struct syscall_trace_enter *) raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_id; + rec->ent.type = sys_data->enter_event->id; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_tp_event(sys_data->enter_id, 0, 1, rec, size); + perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); end: + perf_swevent_put_recursion_context(rctx); +end_recursion: local_irq_restore(flags); } -int reg_prof_syscall_enter(char *name) +int prof_sysenter_enable(struct ftrace_event_call *call) { int ret = 0; int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return -ENOSYS; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); if (!sys_prof_refcount_enter) @@ -462,13 +525,11 @@ int reg_prof_syscall_enter(char *name) return ret; } -void unreg_prof_syscall_enter(char *name) +void prof_sysenter_disable(struct ftrace_event_call *call) { int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); sys_prof_refcount_enter--; @@ -484,7 +545,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) struct syscall_trace_exit *rec; unsigned long flags; int syscall_nr; + char *trace_buf; char *raw_data; + int rctx; int size; int cpu; @@ -510,17 +573,19 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); + + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) + goto end_recursion; + cpu = smp_processor_id(); - if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); - else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, cpu); + raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -528,24 +593,24 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) rec = (struct syscall_trace_exit *)raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->exit_id; + rec->ent.type = sys_data->exit_event->id; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_tp_event(sys_data->exit_id, 0, 1, rec, size); + perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); end: + perf_swevent_put_recursion_context(rctx); +end_recursion: local_irq_restore(flags); } -int reg_prof_syscall_exit(char *name) +int prof_sysexit_enable(struct ftrace_event_call *call) { int ret = 0; int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return -ENOSYS; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); if (!sys_prof_refcount_exit) @@ -561,13 +626,11 @@ int reg_prof_syscall_exit(char *name) return ret; } -void unreg_prof_syscall_exit(char *name) +void prof_sysexit_disable(struct ftrace_event_call *call) { int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); sys_prof_refcount_exit--; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 234ceb10861..a79c4d0407a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -750,7 +750,7 @@ config RCU_TORTURE_TEST_RUNNABLE config RCU_CPU_STALL_DETECTOR bool "Check for stalled CPUs delaying RCU grace periods" depends on TREE_RCU || TREE_PREEMPT_RCU - default n + default y help This option causes RCU to printk information on which CPUs are delaying the current grace period, but only when diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 39f1029e352..4ebfa5a164d 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -5,10 +5,13 @@ * relegated to obsolescence, but used by various less * important (or lazy) subsystems. */ -#include <linux/smp_lock.h> #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/semaphore.h> +#include <linux/smp_lock.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/bkl.h> /* * The 'big kernel lock' @@ -113,21 +116,26 @@ static inline void __unlock_kernel(void) * This cannot happen asynchronously, so we only need to * worry about other CPU's. */ -void __lockfunc lock_kernel(void) +void __lockfunc _lock_kernel(const char *func, const char *file, int line) { - int depth = current->lock_depth+1; + int depth = current->lock_depth + 1; + + trace_lock_kernel(func, file, line); + if (likely(!depth)) __lock_kernel(); current->lock_depth = depth; } -void __lockfunc unlock_kernel(void) +void __lockfunc _unlock_kernel(const char *func, const char *file, int line) { BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); + + trace_unlock_kernel(func, file, line); } -EXPORT_SYMBOL(lock_kernel); -EXPORT_SYMBOL(unlock_kernel); +EXPORT_SYMBOL(_lock_kernel); +EXPORT_SYMBOL(_unlock_kernel); diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 26187edcc7e..09f5ce1810d 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -7,15 +7,12 @@ * parameter. Now every user can use their own standalone ratelimit_state. * * This file is released under the GPLv2. - * */ -#include <linux/kernel.h> +#include <linux/ratelimit.h> #include <linux/jiffies.h> #include <linux/module.h> -static DEFINE_SPINLOCK(ratelimit_lock); - /* * __ratelimit - rate limiting * @rs: ratelimit_state data @@ -23,35 +20,43 @@ static DEFINE_SPINLOCK(ratelimit_lock); * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks * in every @rs->ratelimit_jiffies */ -int __ratelimit(struct ratelimit_state *rs) +int ___ratelimit(struct ratelimit_state *rs, const char *func) { unsigned long flags; + int ret; if (!rs->interval) return 1; - spin_lock_irqsave(&ratelimit_lock, flags); + /* + * If we contend on this state's lock then almost + * by definition we are too busy to print a message, + * in addition to the one that will be printed by + * the entity that is holding the lock already: + */ + if (!spin_trylock_irqsave(&rs->lock, flags)) + return 1; + if (!rs->begin) rs->begin = jiffies; if (time_is_before_jiffies(rs->begin + rs->interval)) { if (rs->missed) printk(KERN_WARNING "%s: %d callbacks suppressed\n", - __func__, rs->missed); - rs->begin = 0; + func, rs->missed); + rs->begin = 0; rs->printed = 0; - rs->missed = 0; + rs->missed = 0; } - if (rs->burst && rs->burst > rs->printed) - goto print; - - rs->missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 0; + if (rs->burst && rs->burst > rs->printed) { + rs->printed++; + ret = 1; + } else { + rs->missed++; + ret = 0; + } + spin_unlock_irqrestore(&rs->lock, flags); -print: - rs->printed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 1; + return ret; } -EXPORT_SYMBOL(__ratelimit); +EXPORT_SYMBOL(___ratelimit); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index ac25cd28e80..795472d8ae2 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -97,6 +97,8 @@ static phys_addr_t *io_tlb_orig_addr; */ static DEFINE_SPINLOCK(io_tlb_lock); +static int late_alloc; + static int __init setup_io_tlb_npages(char *str) { @@ -109,6 +111,7 @@ setup_io_tlb_npages(char *str) ++str; if (!strcmp(str, "force")) swiotlb_force = 1; + return 1; } __setup("swiotlb=", setup_io_tlb_npages); @@ -121,8 +124,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return phys_to_dma(hwdev, virt_to_phys(address)); } -static void swiotlb_print_info(unsigned long bytes) +void swiotlb_print_info(void) { + unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; phys_addr_t pstart, pend; pstart = virt_to_phys(io_tlb_start); @@ -140,7 +144,7 @@ static void swiotlb_print_info(unsigned long bytes) * structures for the software IO TLB used to implement the DMA API. */ void __init -swiotlb_init_with_default_size(size_t default_size) +swiotlb_init_with_default_size(size_t default_size, int verbose) { unsigned long i, bytes; @@ -176,14 +180,14 @@ swiotlb_init_with_default_size(size_t default_size) io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); if (!io_tlb_overflow_buffer) panic("Cannot allocate SWIOTLB overflow buffer!\n"); - - swiotlb_print_info(bytes); + if (verbose) + swiotlb_print_info(); } void __init -swiotlb_init(void) +swiotlb_init(int verbose) { - swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ + swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ } /* @@ -260,7 +264,9 @@ swiotlb_late_init_with_default_size(size_t default_size) if (!io_tlb_overflow_buffer) goto cleanup4; - swiotlb_print_info(bytes); + swiotlb_print_info(); + + late_alloc = 1; return 0; @@ -281,6 +287,32 @@ cleanup1: return -ENOMEM; } +void __init swiotlb_free(void) +{ + if (!io_tlb_overflow_buffer) + return; + + if (late_alloc) { + free_pages((unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow)); + free_pages((unsigned long)io_tlb_orig_addr, + get_order(io_tlb_nslabs * sizeof(phys_addr_t))); + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + free_pages((unsigned long)io_tlb_start, + get_order(io_tlb_nslabs << IO_TLB_SHIFT)); + } else { + free_bootmem_late(__pa(io_tlb_overflow_buffer), + io_tlb_overflow); + free_bootmem_late(__pa(io_tlb_orig_addr), + io_tlb_nslabs * sizeof(phys_addr_t)); + free_bootmem_late(__pa(io_tlb_list), + io_tlb_nslabs * sizeof(int)); + free_bootmem_late(__pa(io_tlb_start), + io_tlb_nslabs << IO_TLB_SHIFT); + } +} + static int is_swiotlb_buffer(phys_addr_t paddr) { return paddr >= virt_to_phys(io_tlb_start) && diff --git a/mm/bootmem.c b/mm/bootmem.c index 555d5d2731c..d1dc23cc7f1 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); } +/* + * free_bootmem_late - free bootmem pages directly to page allocator + * @addr: starting address of the range + * @size: size of the range in bytes + * + * This is only useful when the bootmem allocator has already been torn + * down, but we are still initializing the system. Pages are given directly + * to the page allocator, no bootmem metadata is updated because it is gone. + */ +void __init free_bootmem_late(unsigned long addr, unsigned long size) +{ + unsigned long cursor, end; + + kmemleak_free_part(__va(addr), size); + + cursor = PFN_UP(addr); + end = PFN_DOWN(addr + size); + + for (; cursor < end; cursor++) { + __free_pages_bootmem(pfn_to_page(cursor), 0); + totalram_pages++; + } +} + static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { int aligned; diff --git a/mm/mmap.c b/mm/mmap.c index 73f5e4b6401..292ddc3cef9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -20,7 +20,6 @@ #include <linux/fs.h> #include <linux/personality.h> #include <linux/security.h> -#include <linux/ima.h> #include <linux/hugetlb.h> #include <linux/profile.h> #include <linux/module.h> @@ -1061,9 +1060,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, error = security_file_mmap(file, reqprot, prot, flags, addr, 0); if (error) return error; - error = ima_file_mmap(file, prot); - if (error) - return error; return mmap_region(file, addr, len, flags, vm_flags, pgoff); } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7db1de0497c..887c03c4e3c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -10,7 +10,9 @@ #include <linux/module.h> #include <linux/socket.h> #include <linux/netdevice.h> +#include <linux/ratelimit.h> #include <linux/init.h> + #include <net/ip.h> #include <net/sock.h> diff --git a/net/core/utils.c b/net/core/utils.c index 83221aee708..838250241d2 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -24,6 +24,8 @@ #include <linux/types.h> #include <linux/percpu.h> #include <linux/init.h> +#include <linux/ratelimit.h> + #include <net/sock.h> #include <asm/byteorder.h> diff --git a/samples/Kconfig b/samples/Kconfig index b92bde3c6a8..e4be84ac3d3 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -40,5 +40,11 @@ config SAMPLE_KRETPROBES default m depends on SAMPLE_KPROBES && KRETPROBES +config SAMPLE_HW_BREAKPOINT + tristate "Build kernel hardware breakpoint examples -- loadable module only" + depends on HAVE_HW_BREAKPOINT && m + help + This builds kernel hardware breakpoint example modules. + endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index 43343a03b1f..0f15e6d77fd 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,3 +1,4 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ +obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \ + hw_breakpoint/ diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile new file mode 100644 index 00000000000..0f5c31c2fc4 --- /dev/null +++ b/samples/hw_breakpoint/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c new file mode 100644 index 00000000000..29525500df0 --- /dev/null +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -0,0 +1,87 @@ +/* + * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * usage: insmod data_breakpoint.ko ksym=<ksym_name> + * + * This file is a kernel module that places a breakpoint over ksym_name kernel + * variable using Hardware Breakpoint register. The corresponding handler which + * prints a backtrace is invoked everytime a write operation is performed on + * that variable. + * + * Copyright (C) IBM Corporation, 2009 + * + * Author: K.Prasad <prasad@linux.vnet.ibm.com> + */ +#include <linux/module.h> /* Needed by all modules */ +#include <linux/kernel.h> /* Needed for KERN_INFO */ +#include <linux/init.h> /* Needed for the macros */ +#include <linux/kallsyms.h> + +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> + +struct perf_event **sample_hbp; + +static char ksym_name[KSYM_NAME_LEN] = "pid_max"; +module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); +MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" + " write operations on the kernel symbol"); + +static void sample_hbp_handler(struct perf_event *temp, void *data) +{ + printk(KERN_INFO "%s value is changed\n", ksym_name); + dump_stack(); + printk(KERN_INFO "Dump stack from sample_hbp_handler\n"); +} + +static int __init hw_break_module_init(void) +{ + int ret; + DEFINE_BREAKPOINT_ATTR(attr); + + attr.bp_addr = kallsyms_lookup_name(ksym_name); + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + + sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler); + if (IS_ERR(sample_hbp)) { + ret = PTR_ERR(sample_hbp); + goto fail; + } + + printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name); + + return 0; + +fail: + printk(KERN_INFO "Breakpoint registration failed\n"); + + return ret; +} + +static void __exit hw_break_module_exit(void) +{ + unregister_wide_hw_breakpoint(sample_hbp); + printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name); +} + +module_init(hw_break_module_init); +module_exit(hw_break_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("K.Prasad"); +MODULE_DESCRIPTION("ksym breakpoint"); diff --git a/scripts/kernel-doc b/scripts/kernel-doc index ea9f8a58678..241310e59cd 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1852,10 +1852,17 @@ sub tracepoint_munge($) { my $tracepointname = 0; my $tracepointargs = 0; - if($prototype =~ m/TRACE_EVENT\((.*?),/) { + if ($prototype =~ m/TRACE_EVENT\((.*?),/) { $tracepointname = $1; } - if($prototype =~ m/TP_PROTO\((.*?)\)/) { + if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) { + $tracepointname = $1; + } + if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) { + $tracepointname = $2; + } + $tracepointname =~ s/^\s+//; #strip leading whitespace + if ($prototype =~ m/TP_PROTO\((.*?)\)/) { $tracepointargs = $1; } if (($tracepointname eq 0) || ($tracepointargs eq 0)) { @@ -1920,7 +1927,9 @@ sub process_state3_function($$) { if ($prototype =~ /SYSCALL_DEFINE/) { syscall_munge(); } - if ($prototype =~ /TRACE_EVENT/) { + if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ || + $prototype =~ /DEFINE_SINGLE_EVENT/) + { tracepoint_munge($file); } dump_function($prototype, $file); diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 090d300d739..f0d14452632 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -6,77 +6,93 @@ # all the offsets to the calls to mcount. # # -# What we want to end up with is a section in vmlinux called -# __mcount_loc that contains a list of pointers to all the -# call sites in the kernel that call mcount. Later on boot up, the kernel -# will read this list, save the locations and turn them into nops. -# When tracing or profiling is later enabled, these locations will then -# be converted back to pointers to some function. +# What we want to end up with this is that each object file will have a +# section called __mcount_loc that will hold the list of pointers to mcount +# callers. After final linking, the vmlinux will have within .init.data the +# list of all callers to mcount between __start_mcount_loc and __stop_mcount_loc. +# Later on boot up, the kernel will read this list, save the locations and turn +# them into nops. When tracing or profiling is later enabled, these locations +# will then be converted back to pointers to some function. # # This is no easy feat. This script is called just after the original # object is compiled and before it is linked. # -# The references to the call sites are offsets from the section of text -# that the call site is in. Hence, all functions in a section that -# has a call site to mcount, will have the offset from the beginning of -# the section and not the beginning of the function. +# When parse this object file using 'objdump', the references to the call +# sites are offsets from the section that the call site is in. Hence, all +# functions in a section that has a call site to mcount, will have the +# offset from the beginning of the section and not the beginning of the +# function. +# +# But where this section will reside finally in vmlinx is undetermined at +# this point. So we can't use this kind of offsets to record the final +# address of this call site. +# +# The trick is to change the call offset referring the start of a section to +# referring a function symbol in this section. During the link step, 'ld' will +# compute the final address according to the information we record. # -# The trick is to find a way to record the beginning of the section. -# The way we do this is to look at the first function in the section -# which will also be the location of that section after final link. # e.g. # # .section ".sched.text", "ax" -# .globl my_func -# my_func: # [...] -# call mcount (offset: 0x5) +# func1: +# [...] +# call mcount (offset: 0x10) # [...] # ret -# other_func: +# .globl fun2 +# func2: (offset: 0x20) # [...] -# call mcount (offset: 0x1b) +# [...] +# ret +# func3: +# [...] +# call mcount (offset: 0x30) # [...] # # Both relocation offsets for the mcounts in the above example will be -# offset from .sched.text. If we make another file called tmp.s with: +# offset from .sched.text. If we choose global symbol func2 as a reference and +# make another file called tmp.s with the new offsets: # # .section __mcount_loc -# .quad my_func + 0x5 -# .quad my_func + 0x1b +# .quad func2 - 0x10 +# .quad func2 + 0x10 # -# We can then compile this tmp.s into tmp.o, and link it to the original +# We can then compile this tmp.s into tmp.o, and link it back to the original # object. # -# But this gets hard if my_func is not globl (a static function). -# In such a case we have: +# In our algorithm, we will choose the first global function we meet in this +# section as the reference. But this gets hard if there is no global functions +# in this section. In such a case we have to select a local one. E.g. func1: # # .section ".sched.text", "ax" -# my_func: +# func1: # [...] -# call mcount (offset: 0x5) +# call mcount (offset: 0x10) # [...] # ret -# other_func: +# func2: # [...] -# call mcount (offset: 0x1b) +# call mcount (offset: 0x20) # [...] +# .section "other.section" # # If we make the tmp.s the same as above, when we link together with -# the original object, we will end up with two symbols for my_func: +# the original object, we will end up with two symbols for func1: # one local, one global. After final compile, we will end up with -# an undefined reference to my_func. +# an undefined reference to func1 or a wrong reference to another global +# func1 in other files. # # Since local objects can reference local variables, we need to find # a way to make tmp.o reference the local objects of the original object -# file after it is linked together. To do this, we convert the my_func +# file after it is linked together. To do this, we convert func1 # into a global symbol before linking tmp.o. Then after we link tmp.o -# we will only have a single symbol for my_func that is global. -# We can convert my_func back into a local symbol and we are done. +# we will only have a single symbol for func1 that is global. +# We can convert func1 back into a local symbol and we are done. # # Here are the steps we take: # -# 1) Record all the local symbols by using 'nm' +# 1) Record all the local and weak symbols by using 'nm' # 2) Use objdump to find all the call site offsets and sections for # mcount. # 3) Compile the list into its own object. @@ -86,10 +102,8 @@ # 6) Link together this new object with the list object. # 7) Convert the local functions back to local symbols and rename # the result as the original object. -# End. # 8) Link the object with the list object. # 9) Move the result back to the original object. -# End. # use strict; @@ -99,7 +113,7 @@ $P =~ s@.*/@@g; my $V = '0.1'; -if ($#ARGV < 7) { +if ($#ARGV != 10) { print "usage: $P arch bits objdump objcopy cc ld nm rm mv is_module inputfile\n"; print "version: $V\n"; exit(1); @@ -109,7 +123,7 @@ my ($arch, $bits, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV; # This file refers to mcount and shouldn't be ftraced, so lets' ignore it -if ($inputfile eq "kernel/trace/ftrace.o") { +if ($inputfile =~ m,kernel/trace/ftrace\.o$,) { exit(0); } @@ -119,6 +133,7 @@ my %text_sections = ( ".sched.text" => 1, ".spinlock.text" => 1, ".irqentry.text" => 1, + ".text.unlikely" => 1, ); $objdump = "objdump" if ((length $objdump) == 0); @@ -137,13 +152,47 @@ my %weak; # List of weak functions my %convert; # List of local functions used that needs conversion my $type; -my $nm_regex; # Find the local functions (return function) +my $local_regex; # Match a local function (return function) +my $weak_regex; # Match a weak function (return function) my $section_regex; # Find the start of a section my $function_regex; # Find the name of a function # (return offset and func name) my $mcount_regex; # Find the call site to mcount (return offset) my $alignment; # The .align value to use for $mcount_section my $section_type; # Section header plus possible alignment command +my $can_use_local = 0; # If we can use local function references + +# Shut up recordmcount if user has older objcopy +my $quiet_recordmcount = ".tmp_quiet_recordmcount"; +my $print_warning = 1; +$print_warning = 0 if ( -f $quiet_recordmcount); + +## +# check_objcopy - whether objcopy supports --globalize-symbols +# +# --globalize-symbols came out in 2.17, we must test the version +# of objcopy, and if it is less than 2.17, then we can not +# record local functions. +sub check_objcopy +{ + open (IN, "$objcopy --version |") or die "error running $objcopy"; + while (<IN>) { + if (/objcopy.*\s(\d+)\.(\d+)/) { + $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17)); + last; + } + } + close (IN); + + if (!$can_use_local && $print_warning) { + print STDERR "WARNING: could not find objcopy version or version " . + "is less than 2.17.\n" . + "\tLocal function references are disabled.\n"; + open (QUIET, ">$quiet_recordmcount"); + printf QUIET "Disables the warning from recordmcount.pl\n"; + close QUIET; + } +} if ($arch eq "x86") { if ($bits == 64) { @@ -157,7 +206,8 @@ if ($arch eq "x86") { # We base the defaults off of i386, the other archs may # feel free to change them in the below if statements. # -$nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)"; +$local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)"; +$weak_regex = "^[0-9a-fA-F]+\\s+([wW])\\s+(\\S+)"; $section_regex = "Disassembly of section\\s+(\\S+):"; $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$"; @@ -206,7 +256,7 @@ if ($arch eq "x86_64") { $cc .= " -m32"; } elsif ($arch eq "powerpc") { - $nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)"; + $local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)"; $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$"; @@ -278,44 +328,17 @@ if ($filename =~ m,^(.*)(\.\S),) { my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s"; my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o"; -# -# --globalize-symbols came out in 2.17, we must test the version -# of objcopy, and if it is less than 2.17, then we can not -# record local functions. -my $use_locals = 01; -my $local_warn_once = 0; -my $found_version = 0; - -open (IN, "$objcopy --version |") || die "error running $objcopy"; -while (<IN>) { - if (/objcopy.*\s(\d+)\.(\d+)/) { - my $major = $1; - my $minor = $2; - - $found_version = 1; - if ($major < 2 || - ($major == 2 && $minor < 17)) { - $use_locals = 0; - } - last; - } -} -close (IN); - -if (!$found_version) { - print STDERR "WARNING: could not find objcopy version.\n" . - "\tDisabling local function references.\n"; -} +check_objcopy(); # # Step 1: find all the local (static functions) and weak symbols. -# 't' is local, 'w/W' is weak (we never use a weak function) +# 't' is local, 'w/W' is weak # open (IN, "$nm $inputfile|") || die "error running $nm"; while (<IN>) { - if (/$nm_regex/) { + if (/$local_regex/) { $locals{$1} = 1; - } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) { + } elsif (/$weak_regex/) { $weak{$2} = $1; } } @@ -333,26 +356,20 @@ my $offset = 0; # offset of ref_func to section beginning # sub update_funcs { - return if ($#offsets < 0); - - defined($ref_func) || die "No function to reference"; + return unless ($ref_func and @offsets); - # A section only had a weak function, to represent it. - # Unfortunately, a weak function may be overwritten by another - # function of the same name, making all these offsets incorrect. - # To be safe, we simply print a warning and bail. + # Sanity check on weak function. A weak function may be overwritten by + # another function of the same name, making all these offsets incorrect. if (defined $weak{$ref_func}) { - print STDERR - "$inputfile: WARNING: referencing weak function" . + die "$inputfile: ERROR: referencing weak function" . " $ref_func for mcount\n"; - return; } # is this function static? If so, note this fact. if (defined $locals{$ref_func}) { # only use locals if objcopy supports globalize-symbols - if (!$use_locals) { + if (!$can_use_local) { return; } $convert{$ref_func} = 1; @@ -378,9 +395,27 @@ open(IN, "$objdump -hdr $inputfile|") || die "error running $objdump"; my $text; + +# read headers first my $read_headers = 1; while (<IN>) { + + if ($read_headers && /$mcount_section/) { + # + # Somehow the make process can execute this script on an + # object twice. If it does, we would duplicate the mcount + # section and it will cause the function tracer self test + # to fail. Check if the mcount section exists, and if it does, + # warn and exit. + # + print STDERR "ERROR: $mcount_section already in $inputfile\n" . + "\tThis may be an indication that your build is corrupted.\n" . + "\tDelete $inputfile and try again. If the same object file\n" . + "\tstill causes an issue, then disable CONFIG_DYNAMIC_FTRACE.\n"; + exit(-1); + } + # is it a section? if (/$section_regex/) { $read_headers = 0; @@ -392,7 +427,7 @@ while (<IN>) { $read_function = 0; } # print out any recorded offsets - update_funcs() if (defined($ref_func)); + update_funcs(); # reset all markers and arrays $text_found = 0; @@ -421,21 +456,7 @@ while (<IN>) { $offset = hex $1; } } - } elsif ($read_headers && /$mcount_section/) { - # - # Somehow the make process can execute this script on an - # object twice. If it does, we would duplicate the mcount - # section and it will cause the function tracer self test - # to fail. Check if the mcount section exists, and if it does, - # warn and exit. - # - print STDERR "ERROR: $mcount_section already in $inputfile\n" . - "\tThis may be an indication that your build is corrupted.\n" . - "\tDelete $inputfile and try again. If the same object file\n" . - "\tstill causes an issue, then disable CONFIG_DYNAMIC_FTRACE.\n"; - exit(-1); } - # is this a call site to mcount? If so, record it to print later if ($text_found && /$mcount_regex/) { $offsets[$#offsets + 1] = hex $1; @@ -443,7 +464,7 @@ while (<IN>) { } # dump out anymore offsets that may have been found -update_funcs() if (defined($ref_func)); +update_funcs(); # If we did not find any mcount callers, we are done (do nothing). if (!$opened) { diff --git a/scripts/selinux/Makefile b/scripts/selinux/Makefile index ca4b1ec0182..e8049da1831 100644 --- a/scripts/selinux/Makefile +++ b/scripts/selinux/Makefile @@ -1,2 +1,2 @@ -subdir-y := mdp -subdir- += mdp +subdir-y := mdp genheaders +subdir- += mdp genheaders diff --git a/scripts/selinux/genheaders/.gitignore b/scripts/selinux/genheaders/.gitignore new file mode 100644 index 00000000000..4c0b646ff8d --- /dev/null +++ b/scripts/selinux/genheaders/.gitignore @@ -0,0 +1 @@ +genheaders diff --git a/scripts/selinux/genheaders/Makefile b/scripts/selinux/genheaders/Makefile new file mode 100644 index 00000000000..417b165008e --- /dev/null +++ b/scripts/selinux/genheaders/Makefile @@ -0,0 +1,5 @@ +hostprogs-y := genheaders +HOST_EXTRACFLAGS += -Isecurity/selinux/include + +always := $(hostprogs-y) +clean-files := $(hostprogs-y) diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c new file mode 100644 index 00000000000..24626968055 --- /dev/null +++ b/scripts/selinux/genheaders/genheaders.c @@ -0,0 +1,118 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> + +struct security_class_mapping { + const char *name; + const char *perms[sizeof(unsigned) * 8 + 1]; +}; + +#include "classmap.h" +#include "initial_sid_to_string.h" + +#define max(x, y) (((int)(x) > (int)(y)) ? x : y) + +const char *progname; + +static void usage(void) +{ + printf("usage: %s flask.h av_permissions.h\n", progname); + exit(1); +} + +static char *stoupperx(const char *s) +{ + char *s2 = strdup(s); + char *p; + + if (!s2) { + fprintf(stderr, "%s: out of memory\n", progname); + exit(3); + } + + for (p = s2; *p; p++) + *p = toupper(*p); + return s2; +} + +int main(int argc, char *argv[]) +{ + int i, j, k; + int isids_len; + FILE *fout; + + progname = argv[0]; + + if (argc < 3) + usage(); + + fout = fopen(argv[1], "w"); + if (!fout) { + fprintf(stderr, "Could not open %s for writing: %s\n", + argv[1], strerror(errno)); + exit(2); + } + + for (i = 0; secclass_map[i].name; i++) { + struct security_class_mapping *map = &secclass_map[i]; + map->name = stoupperx(map->name); + for (j = 0; map->perms[j]; j++) + map->perms[j] = stoupperx(map->perms[j]); + } + + isids_len = sizeof(initial_sid_to_string) / sizeof (char *); + for (i = 1; i < isids_len; i++) + initial_sid_to_string[i] = stoupperx(initial_sid_to_string[i]); + + fprintf(fout, "/* This file is automatically generated. Do not edit. */\n"); + fprintf(fout, "#ifndef _SELINUX_FLASK_H_\n#define _SELINUX_FLASK_H_\n\n"); + + for (i = 0; secclass_map[i].name; i++) { + struct security_class_mapping *map = &secclass_map[i]; + fprintf(fout, "#define SECCLASS_%s", map->name); + for (j = 0; j < max(1, 40 - strlen(map->name)); j++) + fprintf(fout, " "); + fprintf(fout, "%2d\n", i+1); + } + + fprintf(fout, "\n"); + + for (i = 1; i < isids_len; i++) { + char *s = initial_sid_to_string[i]; + fprintf(fout, "#define SECINITSID_%s", s); + for (j = 0; j < max(1, 40 - strlen(s)); j++) + fprintf(fout, " "); + fprintf(fout, "%2d\n", i); + } + fprintf(fout, "\n#define SECINITSID_NUM %d\n", i-1); + fprintf(fout, "\n#endif\n"); + fclose(fout); + + fout = fopen(argv[2], "w"); + if (!fout) { + fprintf(stderr, "Could not open %s for writing: %s\n", + argv[2], strerror(errno)); + exit(4); + } + + fprintf(fout, "/* This file is automatically generated. Do not edit. */\n"); + fprintf(fout, "#ifndef _SELINUX_AV_PERMISSIONS_H_\n#define _SELINUX_AV_PERMISSIONS_H_\n\n"); + + for (i = 0; secclass_map[i].name; i++) { + struct security_class_mapping *map = &secclass_map[i]; + for (j = 0; map->perms[j]; j++) { + fprintf(fout, "#define %s__%s", map->name, + map->perms[j]); + for (k = 0; k < max(1, 40 - strlen(map->name) - strlen(map->perms[j])); k++) + fprintf(fout, " "); + fprintf(fout, "0x%08xUL\n", (1<<j)); + } + } + + fprintf(fout, "\n#endif\n"); + fclose(fout); + exit(0); +} diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c index b4ced856258..62b34ce1f50 100644 --- a/scripts/selinux/mdp/mdp.c +++ b/scripts/selinux/mdp/mdp.c @@ -29,86 +29,27 @@ #include <unistd.h> #include <string.h> -#include "flask.h" - static void usage(char *name) { printf("usage: %s [-m] policy_file context_file\n", name); exit(1); } -static void find_common_name(char *cname, char *dest, int len) -{ - char *start, *end; - - start = strchr(cname, '_')+1; - end = strchr(start, '_'); - if (!start || !end || start-cname > len || end-start > len) { - printf("Error with commons defines\n"); - exit(1); - } - strncpy(dest, start, end-start); - dest[end-start] = '\0'; -} - -#define S_(x) x, -static char *classlist[] = { -#include "class_to_string.h" - NULL +/* Class/perm mapping support */ +struct security_class_mapping { + const char *name; + const char *perms[sizeof(unsigned) * 8 + 1]; }; -#undef S_ +#include "classmap.h" #include "initial_sid_to_string.h" -#define TB_(x) char *x[] = { -#define TE_(x) NULL }; -#define S_(x) x, -#include "common_perm_to_string.h" -#undef TB_ -#undef TE_ -#undef S_ - -struct common { - char *cname; - char **perms; -}; -struct common common[] = { -#define TB_(x) { #x, x }, -#define S_(x) -#define TE_(x) -#include "common_perm_to_string.h" -#undef TB_ -#undef TE_ -#undef S_ -}; - -#define S_(x, y, z) {x, #y}, -struct av_inherit { - int class; - char *common; -}; -struct av_inherit av_inherit[] = { -#include "av_inherit.h" -}; -#undef S_ - -#include "av_permissions.h" -#define S_(x, y, z) {x, y, z}, -struct av_perms { - int class; - int perm_i; - char *perm_s; -}; -struct av_perms av_perms[] = { -#include "av_perm_to_string.h" -}; -#undef S_ - int main(int argc, char *argv[]) { int i, j, mls = 0; + int initial_sid_to_string_len; char **arg, *polout, *ctxout; - int classlist_len, initial_sid_to_string_len; + FILE *fout; if (argc < 3) @@ -127,64 +68,25 @@ int main(int argc, char *argv[]) usage(argv[0]); } - classlist_len = sizeof(classlist) / sizeof(char *); /* print out the classes */ - for (i=1; i < classlist_len; i++) { - if(classlist[i]) - fprintf(fout, "class %s\n", classlist[i]); - else - fprintf(fout, "class user%d\n", i); - } + for (i = 0; secclass_map[i].name; i++) + fprintf(fout, "class %s\n", secclass_map[i].name); fprintf(fout, "\n"); initial_sid_to_string_len = sizeof(initial_sid_to_string) / sizeof (char *); /* print out the sids */ - for (i=1; i < initial_sid_to_string_len; i++) + for (i = 1; i < initial_sid_to_string_len; i++) fprintf(fout, "sid %s\n", initial_sid_to_string[i]); fprintf(fout, "\n"); - /* print out the commons */ - for (i=0; i< sizeof(common)/sizeof(struct common); i++) { - char cname[101]; - find_common_name(common[i].cname, cname, 100); - cname[100] = '\0'; - fprintf(fout, "common %s\n{\n", cname); - for (j=0; common[i].perms[j]; j++) - fprintf(fout, "\t%s\n", common[i].perms[j]); - fprintf(fout, "}\n\n"); - } - fprintf(fout, "\n"); - /* print out the class permissions */ - for (i=1; i < classlist_len; i++) { - if (classlist[i]) { - int firstperm = -1, numperms = 0; - - fprintf(fout, "class %s\n", classlist[i]); - /* does it inherit from a common? */ - for (j=0; j < sizeof(av_inherit)/sizeof(struct av_inherit); j++) - if (av_inherit[j].class == i) - fprintf(fout, "inherits %s\n", av_inherit[j].common); - - for (j=0; j < sizeof(av_perms)/sizeof(struct av_perms); j++) { - if (av_perms[j].class == i) { - if (firstperm == -1) - firstperm = j; - numperms++; - } - } - if (!numperms) { - fprintf(fout, "\n"); - continue; - } - - fprintf(fout, "{\n"); - /* print out the av_perms */ - for (j=0; j < numperms; j++) { - fprintf(fout, "\t%s\n", av_perms[firstperm+j].perm_s); - } - fprintf(fout, "}\n\n"); - } + for (i = 0; secclass_map[i].name; i++) { + struct security_class_mapping *map = &secclass_map[i]; + fprintf(fout, "class %s\n", map->name); + fprintf(fout, "{\n"); + for (j = 0; map->perms[j]; j++) + fprintf(fout, "\t%s\n", map->perms[j]); + fprintf(fout, "}\n\n"); } fprintf(fout, "\n"); @@ -197,31 +99,34 @@ int main(int argc, char *argv[]) /* types, roles, and allows */ fprintf(fout, "type base_t;\n"); fprintf(fout, "role base_r types { base_t };\n"); - for (i=1; i < classlist_len; i++) { - if (classlist[i]) - fprintf(fout, "allow base_t base_t:%s *;\n", classlist[i]); - else - fprintf(fout, "allow base_t base_t:user%d *;\n", i); - } + for (i = 0; secclass_map[i].name; i++) + fprintf(fout, "allow base_t base_t:%s *;\n", + secclass_map[i].name); fprintf(fout, "user user_u roles { base_r };\n"); fprintf(fout, "\n"); /* default sids */ - for (i=1; i < initial_sid_to_string_len; i++) + for (i = 1; i < initial_sid_to_string_len; i++) fprintf(fout, "sid %s user_u:base_r:base_t\n", initial_sid_to_string[i]); fprintf(fout, "\n"); - fprintf(fout, "fs_use_xattr ext2 user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_xattr ext3 user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_xattr ext4 user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_xattr jfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_xattr xfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_xattr reiserfs user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_xattr jffs2 user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_xattr gfs2 user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_xattr lustre user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_task eventpollfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_task pipefs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_task sockfs user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_trans mqueue user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_trans devpts user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_trans hugetlbfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_trans tmpfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_trans shm user_u:base_r:base_t;\n"); diff --git a/security/Kconfig b/security/Kconfig index fb363cd81cf..226b9556b25 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -91,28 +91,6 @@ config SECURITY_PATH implement pathname based access controls. If you are unsure how to answer this question, answer N. -config SECURITY_FILE_CAPABILITIES - bool "File POSIX Capabilities" - default n - help - This enables filesystem capabilities, allowing you to give - binaries a subset of root's powers without using setuid 0. - - If in doubt, answer N. - -config SECURITY_ROOTPLUG - bool "Root Plug Support" - depends on USB=y && SECURITY - help - This is a sample LSM module that should only be used as such. - It prevents any programs running with egid == 0 if a specific - USB device is not present in the system. - - See <http://www.linuxjournal.com/article.php?sid=6279> for - more information about this module. - - If you are unsure how to answer this question, answer N. - config INTEL_TXT bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)" depends on HAVE_INTEL_TXT @@ -165,5 +143,37 @@ source security/tomoyo/Kconfig source security/integrity/ima/Kconfig +choice + prompt "Default security module" + default DEFAULT_SECURITY_SELINUX if SECURITY_SELINUX + default DEFAULT_SECURITY_SMACK if SECURITY_SMACK + default DEFAULT_SECURITY_TOMOYO if SECURITY_TOMOYO + default DEFAULT_SECURITY_DAC + + help + Select the security module that will be used by default if the + kernel parameter security= is not specified. + + config DEFAULT_SECURITY_SELINUX + bool "SELinux" if SECURITY_SELINUX=y + + config DEFAULT_SECURITY_SMACK + bool "Simplified Mandatory Access Control" if SECURITY_SMACK=y + + config DEFAULT_SECURITY_TOMOYO + bool "TOMOYO" if SECURITY_TOMOYO=y + + config DEFAULT_SECURITY_DAC + bool "Unix Discretionary Access Controls" + +endchoice + +config DEFAULT_SECURITY + string + default "selinux" if DEFAULT_SECURITY_SELINUX + default "smack" if DEFAULT_SECURITY_SMACK + default "tomoyo" if DEFAULT_SECURITY_TOMOYO + default "" if DEFAULT_SECURITY_DAC + endmenu diff --git a/security/Makefile b/security/Makefile index 95ecc06392d..bb44e350c61 100644 --- a/security/Makefile +++ b/security/Makefile @@ -18,7 +18,6 @@ obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o obj-$(CONFIG_AUDIT) += lsm_audit.o obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/built-in.o -obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o # Object integrity file lists diff --git a/security/capability.c b/security/capability.c index fce07a7bc82..5c700e1a4fd 100644 --- a/security/capability.c +++ b/security/capability.c @@ -308,6 +308,22 @@ static int cap_path_truncate(struct path *path, loff_t length, { return 0; } + +static int cap_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode) +{ + return 0; +} + +static int cap_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + return 0; +} + +static int cap_path_chroot(struct path *root) +{ + return 0; +} #endif static int cap_file_permission(struct file *file, int mask) @@ -405,7 +421,7 @@ static int cap_kernel_create_files_as(struct cred *new, struct inode *inode) return 0; } -static int cap_kernel_module_request(void) +static int cap_kernel_module_request(char *kmod_name) { return 0; } @@ -977,6 +993,9 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, path_link); set_to_cap_if_null(ops, path_rename); set_to_cap_if_null(ops, path_truncate); + set_to_cap_if_null(ops, path_chmod); + set_to_cap_if_null(ops, path_chown); + set_to_cap_if_null(ops, path_chroot); #endif set_to_cap_if_null(ops, file_permission); set_to_cap_if_null(ops, file_alloc_security); diff --git a/security/commoncap.c b/security/commoncap.c index fe30751a6cd..f800fdb3de9 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -1,4 +1,4 @@ -/* Common capabilities, needed by capability.o and root_plug.o +/* Common capabilities, needed by capability.o. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -173,7 +173,6 @@ int cap_capget(struct task_struct *target, kernel_cap_t *effective, */ static inline int cap_inh_is_capped(void) { -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* they are so limited unless the current task has the CAP_SETPCAP * capability @@ -181,7 +180,6 @@ static inline int cap_inh_is_capped(void) if (cap_capable(current, current_cred(), CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0) return 0; -#endif return 1; } @@ -239,8 +237,6 @@ static inline void bprm_clear_caps(struct linux_binprm *bprm) bprm->cap_effective = false; } -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES - /** * cap_inode_need_killpriv - Determine if inode change affects privileges * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV @@ -421,49 +417,6 @@ out: return rc; } -#else -int cap_inode_need_killpriv(struct dentry *dentry) -{ - return 0; -} - -int cap_inode_killpriv(struct dentry *dentry) -{ - return 0; -} - -int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps) -{ - memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); - return -ENODATA; -} - -static inline int get_file_caps(struct linux_binprm *bprm, bool *effective) -{ - bprm_clear_caps(bprm); - return 0; -} -#endif - -/* - * Determine whether a exec'ing process's new permitted capabilities should be - * limited to just what it already has. - * - * This prevents processes that are being ptraced from gaining access to - * CAP_SETPCAP, unless the process they're tracing already has it, and the - * binary they're executing has filecaps that elevate it. - * - * Returns 1 if they should be limited, 0 if they are not. - */ -static inline int cap_limit_ptraced_target(void) -{ -#ifndef CONFIG_SECURITY_FILE_CAPABILITIES - if (capable(CAP_SETPCAP)) - return 0; -#endif - return 1; -} - /** * cap_bprm_set_creds - Set up the proposed credentials for execve(). * @bprm: The execution parameters, including the proposed creds @@ -523,9 +476,8 @@ skip: new->euid = new->uid; new->egid = new->gid; } - if (cap_limit_ptraced_target()) - new->cap_permitted = cap_intersect(new->cap_permitted, - old->cap_permitted); + new->cap_permitted = cap_intersect(new->cap_permitted, + old->cap_permitted); } new->suid = new->fsuid = new->euid; @@ -739,7 +691,6 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) return 0; } -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* * Rationale: code calling task_setscheduler, task_setioprio, and * task_setnice, assumes that @@ -820,22 +771,6 @@ static long cap_prctl_drop(struct cred *new, unsigned long cap) return 0; } -#else -int cap_task_setscheduler (struct task_struct *p, int policy, - struct sched_param *lp) -{ - return 0; -} -int cap_task_setioprio (struct task_struct *p, int ioprio) -{ - return 0; -} -int cap_task_setnice (struct task_struct *p, int nice) -{ - return 0; -} -#endif - /** * cap_task_prctl - Implement process control functions for this security module * @option: The process control function requested @@ -866,7 +801,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, error = !!cap_raised(new->cap_bset, arg2); goto no_change; -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES case PR_CAPBSET_DROP: error = cap_prctl_drop(new, arg2); if (error < 0) @@ -917,8 +851,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, error = new->securebits; goto no_change; -#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ - case PR_GET_KEEPCAPS: if (issecure(SECURE_KEEP_CAPS)) error = 1; diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 53d9764e8f0..3d7846de806 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -3,6 +3,7 @@ config IMA bool "Integrity Measurement Architecture(IMA)" depends on ACPI + depends on SECURITY select SECURITYFS select CRYPTO select CRYPTO_HMAC diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 3bb90b6f1dd..51bd0fd9c9f 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -354,6 +354,10 @@ static void dump_common_audit_data(struct audit_buffer *ab, } break; #endif + case LSM_AUDIT_DATA_KMOD: + audit_log_format(ab, " kmod="); + audit_log_untrustedstring(ab, a->u.kmod_name); + break; } /* switch (a->type) */ } diff --git a/security/min_addr.c b/security/min_addr.c index c844eed7915..fc43c9d3708 100644 --- a/security/min_addr.c +++ b/security/min_addr.c @@ -33,6 +33,9 @@ int mmap_min_addr_handler(struct ctl_table *table, int write, { int ret; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); update_mmap_min_addr(); diff --git a/security/root_plug.c b/security/root_plug.c deleted file mode 100644 index 2f7ffa67c4d..00000000000 --- a/security/root_plug.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Root Plug sample LSM module - * - * Originally written for a Linux Journal. - * - * Copyright (C) 2002 Greg Kroah-Hartman <greg@kroah.com> - * - * Prevents any programs running with egid == 0 if a specific USB device - * is not present in the system. Yes, it can be gotten around, but is a - * nice starting point for people to play with, and learn the LSM - * interface. - * - * If you want to turn this into something with a semblance of security, - * you need to hook the task_* functions also. - * - * See http://www.linuxjournal.com/article.php?sid=6279 for more information - * about this code. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/security.h> -#include <linux/usb.h> -#include <linux/moduleparam.h> - -/* default is a generic type of usb to serial converter */ -static int vendor_id = 0x0557; -static int product_id = 0x2008; - -module_param(vendor_id, uint, 0400); -module_param(product_id, uint, 0400); - -/* should we print out debug messages */ -static int debug = 0; - -module_param(debug, bool, 0600); - -#define MY_NAME "root_plug" - -#define root_dbg(fmt, arg...) \ - do { \ - if (debug) \ - printk(KERN_DEBUG "%s: %s: " fmt , \ - MY_NAME , __func__ , \ - ## arg); \ - } while (0) - -static int rootplug_bprm_check_security (struct linux_binprm *bprm) -{ - struct usb_device *dev; - - root_dbg("file %s, e_uid = %d, e_gid = %d\n", - bprm->filename, bprm->cred->euid, bprm->cred->egid); - - if (bprm->cred->egid == 0) { - dev = usb_find_device(vendor_id, product_id); - if (!dev) { - root_dbg("e_gid = 0, and device not found, " - "task not allowed to run...\n"); - return -EPERM; - } - usb_put_dev(dev); - } - - return 0; -} - -static struct security_operations rootplug_security_ops = { - .bprm_check_security = rootplug_bprm_check_security, -}; - -static int __init rootplug_init (void) -{ - /* register ourselves with the security framework */ - if (register_security (&rootplug_security_ops)) { - printk (KERN_INFO - "Failure registering Root Plug module with the kernel\n"); - return -EINVAL; - } - printk (KERN_INFO "Root Plug module initialized, " - "vendor_id = %4.4x, product id = %4.4x\n", vendor_id, product_id); - return 0; -} - -security_initcall (rootplug_init); diff --git a/security/security.c b/security/security.c index c4c673240c1..24e060be9fa 100644 --- a/security/security.c +++ b/security/security.c @@ -16,9 +16,11 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/security.h> +#include <linux/ima.h> /* Boot-time LSM user choice */ -static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1]; +static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = + CONFIG_DEFAULT_SECURITY; /* things that live in capability.c */ extern struct security_operations default_security_ops; @@ -79,8 +81,10 @@ __setup("security=", choose_lsm); * * Return true if: * -The passed LSM is the one chosen by user at boot time, - * -or user didn't specify a specific LSM and we're the first to ask - * for registration permission, + * -or the passed LSM is configured as the default and the user did not + * choose an alternate LSM at boot time, + * -or there is no default LSM set and the user didn't specify a + * specific LSM and we're the first to ask for registration permission, * -or the passed LSM is currently loaded. * Otherwise, return false. */ @@ -235,7 +239,12 @@ int security_bprm_set_creds(struct linux_binprm *bprm) int security_bprm_check(struct linux_binprm *bprm) { - return security_ops->bprm_check_security(bprm); + int ret; + + ret = security_ops->bprm_check_security(bprm); + if (ret) + return ret; + return ima_bprm_check(bprm); } void security_bprm_committing_creds(struct linux_binprm *bprm) @@ -352,12 +361,21 @@ EXPORT_SYMBOL(security_sb_parse_opts_str); int security_inode_alloc(struct inode *inode) { + int ret; + inode->i_security = NULL; - return security_ops->inode_alloc_security(inode); + ret = security_ops->inode_alloc_security(inode); + if (ret) + return ret; + ret = ima_inode_alloc(inode); + if (ret) + security_inode_free(inode); + return ret; } void security_inode_free(struct inode *inode) { + ima_inode_free(inode); security_ops->inode_free_security(inode); } @@ -434,6 +452,26 @@ int security_path_truncate(struct path *path, loff_t length, return 0; return security_ops->path_truncate(path, length, time_attrs); } + +int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode) +{ + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; + return security_ops->path_chmod(dentry, mnt, mode); +} + +int security_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + if (unlikely(IS_PRIVATE(path->dentry->d_inode))) + return 0; + return security_ops->path_chown(path, uid, gid); +} + +int security_path_chroot(struct path *path) +{ + return security_ops->path_chroot(path); +} #endif int security_inode_create(struct inode *dir, struct dentry *dentry, int mode) @@ -628,6 +666,8 @@ int security_file_alloc(struct file *file) void security_file_free(struct file *file) { security_ops->file_free_security(file); + if (file->f_dentry) + ima_file_free(file); } int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -639,7 +679,12 @@ int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags, unsigned long addr, unsigned long addr_only) { - return security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only); + int ret; + + ret = security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only); + if (ret) + return ret; + return ima_file_mmap(file, prot); } int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, @@ -719,9 +764,9 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode) return security_ops->kernel_create_files_as(new, inode); } -int security_kernel_module_request(void) +int security_kernel_module_request(char *kmod_name) { - return security_ops->kernel_module_request(); + return security_ops->kernel_module_request(kmod_name); } int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) diff --git a/security/selinux/.gitignore b/security/selinux/.gitignore new file mode 100644 index 00000000000..2e5040a3d48 --- /dev/null +++ b/security/selinux/.gitignore @@ -0,0 +1,2 @@ +av_permissions.h +flask.h diff --git a/security/selinux/Makefile b/security/selinux/Makefile index d47fc5e545e..f013982df41 100644 --- a/security/selinux/Makefile +++ b/security/selinux/Makefile @@ -18,5 +18,13 @@ selinux-$(CONFIG_SECURITY_NETWORK_XFRM) += xfrm.o selinux-$(CONFIG_NETLABEL) += netlabel.o -EXTRA_CFLAGS += -Isecurity/selinux/include +EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include +$(obj)/avc.o: $(obj)/flask.h + +quiet_cmd_flask = GEN $(obj)/flask.h $(obj)/av_permissions.h + cmd_flask = scripts/selinux/genheaders/genheaders $(obj)/flask.h $(obj)/av_permissions.h + +targets += flask.h +$(obj)/flask.h: $(src)/include/classmap.h FORCE + $(call if_changed,flask) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index b4b5da1c0a4..f2dde268165 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -31,43 +31,7 @@ #include <net/ipv6.h> #include "avc.h" #include "avc_ss.h" - -static const struct av_perm_to_string av_perm_to_string[] = { -#define S_(c, v, s) { c, v, s }, -#include "av_perm_to_string.h" -#undef S_ -}; - -static const char *class_to_string[] = { -#define S_(s) s, -#include "class_to_string.h" -#undef S_ -}; - -#define TB_(s) static const char *s[] = { -#define TE_(s) }; -#define S_(s) s, -#include "common_perm_to_string.h" -#undef TB_ -#undef TE_ -#undef S_ - -static const struct av_inherit av_inherit[] = { -#define S_(c, i, b) { .tclass = c,\ - .common_pts = common_##i##_perm_to_string,\ - .common_base = b }, -#include "av_inherit.h" -#undef S_ -}; - -const struct selinux_class_perm selinux_class_perm = { - .av_perm_to_string = av_perm_to_string, - .av_pts_len = ARRAY_SIZE(av_perm_to_string), - .class_to_string = class_to_string, - .cts_len = ARRAY_SIZE(class_to_string), - .av_inherit = av_inherit, - .av_inherit_len = ARRAY_SIZE(av_inherit) -}; +#include "classmap.h" #define AVC_CACHE_SLOTS 512 #define AVC_DEF_CACHE_THRESHOLD 512 @@ -139,52 +103,28 @@ static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass) */ static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av) { - const char **common_pts = NULL; - u32 common_base = 0; - int i, i2, perm; + const char **perms; + int i, perm; if (av == 0) { audit_log_format(ab, " null"); return; } - for (i = 0; i < ARRAY_SIZE(av_inherit); i++) { - if (av_inherit[i].tclass == tclass) { - common_pts = av_inherit[i].common_pts; - common_base = av_inherit[i].common_base; - break; - } - } + perms = secclass_map[tclass-1].perms; audit_log_format(ab, " {"); i = 0; perm = 1; - while (perm < common_base) { - if (perm & av) { - audit_log_format(ab, " %s", common_pts[i]); + while (i < (sizeof(av) * 8)) { + if ((perm & av) && perms[i]) { + audit_log_format(ab, " %s", perms[i]); av &= ~perm; } i++; perm <<= 1; } - while (i < sizeof(av) * 8) { - if (perm & av) { - for (i2 = 0; i2 < ARRAY_SIZE(av_perm_to_string); i2++) { - if ((av_perm_to_string[i2].tclass == tclass) && - (av_perm_to_string[i2].value == perm)) - break; - } - if (i2 < ARRAY_SIZE(av_perm_to_string)) { - audit_log_format(ab, " %s", - av_perm_to_string[i2].name); - av &= ~perm; - } - } - i++; - perm <<= 1; - } - if (av) audit_log_format(ab, " 0x%x", av); @@ -219,8 +159,8 @@ static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tcla kfree(scontext); } - BUG_ON(tclass >= ARRAY_SIZE(class_to_string) || !class_to_string[tclass]); - audit_log_format(ab, " tclass=%s", class_to_string[tclass]); + BUG_ON(tclass >= ARRAY_SIZE(secclass_map)); + audit_log_format(ab, " tclass=%s", secclass_map[tclass-1].name); } /** diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index bb230d5d708..c96d63ec475 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -91,7 +91,6 @@ #define NUM_SEL_MNT_OPTS 5 -extern unsigned int policydb_loaded_version; extern int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm); extern struct security_operations *security_ops; @@ -3338,9 +3337,18 @@ static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode) return 0; } -static int selinux_kernel_module_request(void) +static int selinux_kernel_module_request(char *kmod_name) { - return task_has_system(current, SYSTEM__MODULE_REQUEST); + u32 sid; + struct common_audit_data ad; + + sid = task_sid(current); + + COMMON_AUDIT_DATA_INIT(&ad, KMOD); + ad.u.kmod_name = kmod_name; + + return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM, + SYSTEM__MODULE_REQUEST, &ad); } static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) @@ -4714,10 +4722,7 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) if (err) return err; - if (policydb_loaded_version >= POLICYDB_VERSION_NLCLASS) - err = selinux_nlmsg_perm(sk, skb); - - return err; + return selinux_nlmsg_perm(sk, skb); } static int selinux_netlink_recv(struct sk_buff *skb, int capability) @@ -5830,12 +5835,12 @@ int selinux_disable(void) selinux_disabled = 1; selinux_enabled = 0; - /* Try to destroy the avc node cache */ - avc_disable(); - /* Reset security_ops to the secondary module, dummy or capability. */ security_ops = secondary_ops; + /* Try to destroy the avc node cache */ + avc_disable(); + /* Unregister netfilter hooks. */ selinux_nf_ip_exit(); diff --git a/security/selinux/include/av_inherit.h b/security/selinux/include/av_inherit.h deleted file mode 100644 index abedcd704da..00000000000 --- a/security/selinux/include/av_inherit.h +++ /dev/null @@ -1,34 +0,0 @@ -/* This file is automatically generated. Do not edit. */ - S_(SECCLASS_DIR, file, 0x00020000UL) - S_(SECCLASS_FILE, file, 0x00020000UL) - S_(SECCLASS_LNK_FILE, file, 0x00020000UL) - S_(SECCLASS_CHR_FILE, file, 0x00020000UL) - S_(SECCLASS_BLK_FILE, file, 0x00020000UL) - S_(SECCLASS_SOCK_FILE, file, 0x00020000UL) - S_(SECCLASS_FIFO_FILE, file, 0x00020000UL) - S_(SECCLASS_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_TCP_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_UDP_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_RAWIP_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_PACKET_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_KEY_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_UNIX_STREAM_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_UNIX_DGRAM_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_TUN_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_IPC, ipc, 0x00000200UL) - S_(SECCLASS_SEM, ipc, 0x00000200UL) - S_(SECCLASS_MSGQ, ipc, 0x00000200UL) - S_(SECCLASS_SHM, ipc, 0x00000200UL) - S_(SECCLASS_NETLINK_ROUTE_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_FIREWALL_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_TCPDIAG_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_NFLOG_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_XFRM_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_SELINUX_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_AUDIT_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_IP6FW_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_DNRT_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_APPLETALK_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_DCCP_SOCKET, socket, 0x00400000UL) diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h deleted file mode 100644 index 2b683ad83d2..00000000000 --- a/security/selinux/include/av_perm_to_string.h +++ /dev/null @@ -1,183 +0,0 @@ -/* This file is automatically generated. Do not edit. */ - S_(SECCLASS_FILESYSTEM, FILESYSTEM__MOUNT, "mount") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__REMOUNT, "remount") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__UNMOUNT, "unmount") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__GETATTR, "getattr") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__RELABELFROM, "relabelfrom") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__RELABELTO, "relabelto") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__TRANSITION, "transition") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__ASSOCIATE, "associate") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__QUOTAMOD, "quotamod") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__QUOTAGET, "quotaget") - S_(SECCLASS_DIR, DIR__ADD_NAME, "add_name") - S_(SECCLASS_DIR, DIR__REMOVE_NAME, "remove_name") - S_(SECCLASS_DIR, DIR__REPARENT, "reparent") - S_(SECCLASS_DIR, DIR__SEARCH, "search") - S_(SECCLASS_DIR, DIR__RMDIR, "rmdir") - S_(SECCLASS_DIR, DIR__OPEN, "open") - S_(SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, "execute_no_trans") - S_(SECCLASS_FILE, FILE__ENTRYPOINT, "entrypoint") - S_(SECCLASS_FILE, FILE__EXECMOD, "execmod") - S_(SECCLASS_FILE, FILE__OPEN, "open") - S_(SECCLASS_CHR_FILE, CHR_FILE__EXECUTE_NO_TRANS, "execute_no_trans") - S_(SECCLASS_CHR_FILE, CHR_FILE__ENTRYPOINT, "entrypoint") - S_(SECCLASS_CHR_FILE, CHR_FILE__EXECMOD, "execmod") - S_(SECCLASS_CHR_FILE, CHR_FILE__OPEN, "open") - S_(SECCLASS_BLK_FILE, BLK_FILE__OPEN, "open") - S_(SECCLASS_SOCK_FILE, SOCK_FILE__OPEN, "open") - S_(SECCLASS_FIFO_FILE, FIFO_FILE__OPEN, "open") - S_(SECCLASS_FD, FD__USE, "use") - S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__CONNECTTO, "connectto") - S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__NEWCONN, "newconn") - S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__ACCEPTFROM, "acceptfrom") - S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__NODE_BIND, "node_bind") - S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__NAME_CONNECT, "name_connect") - S_(SECCLASS_UDP_SOCKET, UDP_SOCKET__NODE_BIND, "node_bind") - S_(SECCLASS_RAWIP_SOCKET, RAWIP_SOCKET__NODE_BIND, "node_bind") - S_(SECCLASS_NODE, NODE__TCP_RECV, "tcp_recv") - S_(SECCLASS_NODE, NODE__TCP_SEND, "tcp_send") - S_(SECCLASS_NODE, NODE__UDP_RECV, "udp_recv") - S_(SECCLASS_NODE, NODE__UDP_SEND, "udp_send") - S_(SECCLASS_NODE, NODE__RAWIP_RECV, "rawip_recv") - S_(SECCLASS_NODE, NODE__RAWIP_SEND, "rawip_send") - S_(SECCLASS_NODE, NODE__ENFORCE_DEST, "enforce_dest") - S_(SECCLASS_NODE, NODE__DCCP_RECV, "dccp_recv") - S_(SECCLASS_NODE, NODE__DCCP_SEND, "dccp_send") - S_(SECCLASS_NODE, NODE__RECVFROM, "recvfrom") - S_(SECCLASS_NODE, NODE__SENDTO, "sendto") - S_(SECCLASS_NETIF, NETIF__TCP_RECV, "tcp_recv") - S_(SECCLASS_NETIF, NETIF__TCP_SEND, "tcp_send") - S_(SECCLASS_NETIF, NETIF__UDP_RECV, "udp_recv") - S_(SECCLASS_NETIF, NETIF__UDP_SEND, "udp_send") - S_(SECCLASS_NETIF, NETIF__RAWIP_RECV, "rawip_recv") - S_(SECCLASS_NETIF, NETIF__RAWIP_SEND, "rawip_send") - S_(SECCLASS_NETIF, NETIF__DCCP_RECV, "dccp_recv") - S_(SECCLASS_NETIF, NETIF__DCCP_SEND, "dccp_send") - S_(SECCLASS_NETIF, NETIF__INGRESS, "ingress") - S_(SECCLASS_NETIF, NETIF__EGRESS, "egress") - S_(SECCLASS_UNIX_STREAM_SOCKET, UNIX_STREAM_SOCKET__CONNECTTO, "connectto") - S_(SECCLASS_UNIX_STREAM_SOCKET, UNIX_STREAM_SOCKET__NEWCONN, "newconn") - S_(SECCLASS_UNIX_STREAM_SOCKET, UNIX_STREAM_SOCKET__ACCEPTFROM, "acceptfrom") - S_(SECCLASS_PROCESS, PROCESS__FORK, "fork") - S_(SECCLASS_PROCESS, PROCESS__TRANSITION, "transition") - S_(SECCLASS_PROCESS, PROCESS__SIGCHLD, "sigchld") - S_(SECCLASS_PROCESS, PROCESS__SIGKILL, "sigkill") - S_(SECCLASS_PROCESS, PROCESS__SIGSTOP, "sigstop") - S_(SECCLASS_PROCESS, PROCESS__SIGNULL, "signull") - S_(SECCLASS_PROCESS, PROCESS__SIGNAL, "signal") - S_(SECCLASS_PROCESS, PROCESS__PTRACE, "ptrace") - S_(SECCLASS_PROCESS, PROCESS__GETSCHED, "getsched") - S_(SECCLASS_PROCESS, PROCESS__SETSCHED, "setsched") - S_(SECCLASS_PROCESS, PROCESS__GETSESSION, "getsession") - S_(SECCLASS_PROCESS, PROCESS__GETPGID, "getpgid") - S_(SECCLASS_PROCESS, PROCESS__SETPGID, "setpgid") - S_(SECCLASS_PROCESS, PROCESS__GETCAP, "getcap") - S_(SECCLASS_PROCESS, PROCESS__SETCAP, "setcap") - S_(SECCLASS_PROCESS, PROCESS__SHARE, "share") - S_(SECCLASS_PROCESS, PROCESS__GETATTR, "getattr") - S_(SECCLASS_PROCESS, PROCESS__SETEXEC, "setexec") - S_(SECCLASS_PROCESS, PROCESS__SETFSCREATE, "setfscreate") - S_(SECCLASS_PROCESS, PROCESS__NOATSECURE, "noatsecure") - S_(SECCLASS_PROCESS, PROCESS__SIGINH, "siginh") - S_(SECCLASS_PROCESS, PROCESS__SETRLIMIT, "setrlimit") - S_(SECCLASS_PROCESS, PROCESS__RLIMITINH, "rlimitinh") - S_(SECCLASS_PROCESS, PROCESS__DYNTRANSITION, "dyntransition") - S_(SECCLASS_PROCESS, PROCESS__SETCURRENT, "setcurrent") - S_(SECCLASS_PROCESS, PROCESS__EXECMEM, "execmem") - S_(SECCLASS_PROCESS, PROCESS__EXECSTACK, "execstack") - S_(SECCLASS_PROCESS, PROCESS__EXECHEAP, "execheap") - S_(SECCLASS_PROCESS, PROCESS__SETKEYCREATE, "setkeycreate") - S_(SECCLASS_PROCESS, PROCESS__SETSOCKCREATE, "setsockcreate") - S_(SECCLASS_MSGQ, MSGQ__ENQUEUE, "enqueue") - S_(SECCLASS_MSG, MSG__SEND, "send") - S_(SECCLASS_MSG, MSG__RECEIVE, "receive") - S_(SECCLASS_SHM, SHM__LOCK, "lock") - S_(SECCLASS_SECURITY, SECURITY__COMPUTE_AV, "compute_av") - S_(SECCLASS_SECURITY, SECURITY__COMPUTE_CREATE, "compute_create") - S_(SECCLASS_SECURITY, SECURITY__COMPUTE_MEMBER, "compute_member") - S_(SECCLASS_SECURITY, SECURITY__CHECK_CONTEXT, "check_context") - S_(SECCLASS_SECURITY, SECURITY__LOAD_POLICY, "load_policy") - S_(SECCLASS_SECURITY, SECURITY__COMPUTE_RELABEL, "compute_relabel") - S_(SECCLASS_SECURITY, SECURITY__COMPUTE_USER, "compute_user") - S_(SECCLASS_SECURITY, SECURITY__SETENFORCE, "setenforce") - S_(SECCLASS_SECURITY, SECURITY__SETBOOL, "setbool") - S_(SECCLASS_SECURITY, SECURITY__SETSECPARAM, "setsecparam") - S_(SECCLASS_SECURITY, SECURITY__SETCHECKREQPROT, "setcheckreqprot") - S_(SECCLASS_SYSTEM, SYSTEM__IPC_INFO, "ipc_info") - S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, "syslog_read") - S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, "syslog_mod") - S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE, "syslog_console") - S_(SECCLASS_SYSTEM, SYSTEM__MODULE_REQUEST, "module_request") - S_(SECCLASS_CAPABILITY, CAPABILITY__CHOWN, "chown") - S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_OVERRIDE, "dac_override") - S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_READ_SEARCH, "dac_read_search") - S_(SECCLASS_CAPABILITY, CAPABILITY__FOWNER, "fowner") - S_(SECCLASS_CAPABILITY, CAPABILITY__FSETID, "fsetid") - S_(SECCLASS_CAPABILITY, CAPABILITY__KILL, "kill") - S_(SECCLASS_CAPABILITY, CAPABILITY__SETGID, "setgid") - S_(SECCLASS_CAPABILITY, CAPABILITY__SETUID, "setuid") - S_(SECCLASS_CAPABILITY, CAPABILITY__SETPCAP, "setpcap") - S_(SECCLASS_CAPABILITY, CAPABILITY__LINUX_IMMUTABLE, "linux_immutable") - S_(SECCLASS_CAPABILITY, CAPABILITY__NET_BIND_SERVICE, "net_bind_service") - S_(SECCLASS_CAPABILITY, CAPABILITY__NET_BROADCAST, "net_broadcast") - S_(SECCLASS_CAPABILITY, CAPABILITY__NET_ADMIN, "net_admin") - S_(SECCLASS_CAPABILITY, CAPABILITY__NET_RAW, "net_raw") - S_(SECCLASS_CAPABILITY, CAPABILITY__IPC_LOCK, "ipc_lock") - S_(SECCLASS_CAPABILITY, CAPABILITY__IPC_OWNER, "ipc_owner") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_MODULE, "sys_module") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_RAWIO, "sys_rawio") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_CHROOT, "sys_chroot") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_PTRACE, "sys_ptrace") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_PACCT, "sys_pacct") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_ADMIN, "sys_admin") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_BOOT, "sys_boot") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_NICE, "sys_nice") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_RESOURCE, "sys_resource") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_TIME, "sys_time") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_TTY_CONFIG, "sys_tty_config") - S_(SECCLASS_CAPABILITY, CAPABILITY__MKNOD, "mknod") - S_(SECCLASS_CAPABILITY, CAPABILITY__LEASE, "lease") - S_(SECCLASS_CAPABILITY, CAPABILITY__AUDIT_WRITE, "audit_write") - S_(SECCLASS_CAPABILITY, CAPABILITY__AUDIT_CONTROL, "audit_control") - S_(SECCLASS_CAPABILITY, CAPABILITY__SETFCAP, "setfcap") - S_(SECCLASS_CAPABILITY2, CAPABILITY2__MAC_OVERRIDE, "mac_override") - S_(SECCLASS_CAPABILITY2, CAPABILITY2__MAC_ADMIN, "mac_admin") - S_(SECCLASS_NETLINK_ROUTE_SOCKET, NETLINK_ROUTE_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_ROUTE_SOCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_NETLINK_FIREWALL_SOCKET, NETLINK_FIREWALL_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_FIREWALL_SOCKET, NETLINK_FIREWALL_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_NETLINK_TCPDIAG_SOCKET, NETLINK_TCPDIAG_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_TCPDIAG_SOCKET, NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_NETLINK_XFRM_SOCKET, NETLINK_XFRM_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_XFRM_SOCKET, NETLINK_XFRM_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_NETLINK_AUDIT_SOCKET, NETLINK_AUDIT_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_AUDIT_SOCKET, NETLINK_AUDIT_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_NETLINK_AUDIT_SOCKET, NETLINK_AUDIT_SOCKET__NLMSG_RELAY, "nlmsg_relay") - S_(SECCLASS_NETLINK_AUDIT_SOCKET, NETLINK_AUDIT_SOCKET__NLMSG_READPRIV, "nlmsg_readpriv") - S_(SECCLASS_NETLINK_AUDIT_SOCKET, NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT, "nlmsg_tty_audit") - S_(SECCLASS_NETLINK_IP6FW_SOCKET, NETLINK_IP6FW_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_IP6FW_SOCKET, NETLINK_IP6FW_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, "sendto") - S_(SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, "recvfrom") - S_(SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, "setcontext") - S_(SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, "polmatch") - S_(SECCLASS_PACKET, PACKET__SEND, "send") - S_(SECCLASS_PACKET, PACKET__RECV, "recv") - S_(SECCLASS_PACKET, PACKET__RELABELTO, "relabelto") - S_(SECCLASS_PACKET, PACKET__FLOW_IN, "flow_in") - S_(SECCLASS_PACKET, PACKET__FLOW_OUT, "flow_out") - S_(SECCLASS_PACKET, PACKET__FORWARD_IN, "forward_in") - S_(SECCLASS_PACKET, PACKET__FORWARD_OUT, "forward_out") - S_(SECCLASS_KEY, KEY__VIEW, "view") - S_(SECCLASS_KEY, KEY__READ, "read") - S_(SECCLASS_KEY, KEY__WRITE, "write") - S_(SECCLASS_KEY, KEY__SEARCH, "search") - S_(SECCLASS_KEY, KEY__LINK, "link") - S_(SECCLASS_KEY, KEY__SETATTR, "setattr") - S_(SECCLASS_KEY, KEY__CREATE, "create") - S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NODE_BIND, "node_bind") - S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NAME_CONNECT, "name_connect") - S_(SECCLASS_MEMPROTECT, MEMPROTECT__MMAP_ZERO, "mmap_zero") - S_(SECCLASS_PEER, PEER__RECV, "recv") - S_(SECCLASS_KERNEL_SERVICE, KERNEL_SERVICE__USE_AS_OVERRIDE, "use_as_override") - S_(SECCLASS_KERNEL_SERVICE, KERNEL_SERVICE__CREATE_FILES_AS, "create_files_as") diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h deleted file mode 100644 index 0546d616cca..00000000000 --- a/security/selinux/include/av_permissions.h +++ /dev/null @@ -1,870 +0,0 @@ -/* This file is automatically generated. Do not edit. */ -#define COMMON_FILE__IOCTL 0x00000001UL -#define COMMON_FILE__READ 0x00000002UL -#define COMMON_FILE__WRITE 0x00000004UL -#define COMMON_FILE__CREATE 0x00000008UL -#define COMMON_FILE__GETATTR 0x00000010UL -#define COMMON_FILE__SETATTR 0x00000020UL -#define COMMON_FILE__LOCK 0x00000040UL -#define COMMON_FILE__RELABELFROM 0x00000080UL -#define COMMON_FILE__RELABELTO 0x00000100UL -#define COMMON_FILE__APPEND 0x00000200UL -#define COMMON_FILE__UNLINK 0x00000400UL -#define COMMON_FILE__LINK 0x00000800UL -#define COMMON_FILE__RENAME 0x00001000UL -#define COMMON_FILE__EXECUTE 0x00002000UL -#define COMMON_FILE__SWAPON 0x00004000UL -#define COMMON_FILE__QUOTAON 0x00008000UL -#define COMMON_FILE__MOUNTON 0x00010000UL -#define COMMON_SOCKET__IOCTL 0x00000001UL -#define COMMON_SOCKET__READ 0x00000002UL -#define COMMON_SOCKET__WRITE 0x00000004UL -#define COMMON_SOCKET__CREATE 0x00000008UL -#define COMMON_SOCKET__GETATTR 0x00000010UL -#define COMMON_SOCKET__SETATTR 0x00000020UL -#define COMMON_SOCKET__LOCK 0x00000040UL -#define COMMON_SOCKET__RELABELFROM 0x00000080UL -#define COMMON_SOCKET__RELABELTO 0x00000100UL -#define COMMON_SOCKET__APPEND 0x00000200UL -#define COMMON_SOCKET__BIND 0x00000400UL -#define COMMON_SOCKET__CONNECT 0x00000800UL -#define COMMON_SOCKET__LISTEN 0x00001000UL -#define COMMON_SOCKET__ACCEPT 0x00002000UL -#define COMMON_SOCKET__GETOPT 0x00004000UL -#define COMMON_SOCKET__SETOPT 0x00008000UL -#define COMMON_SOCKET__SHUTDOWN 0x00010000UL -#define COMMON_SOCKET__RECVFROM 0x00020000UL -#define COMMON_SOCKET__SENDTO 0x00040000UL -#define COMMON_SOCKET__RECV_MSG 0x00080000UL -#define COMMON_SOCKET__SEND_MSG 0x00100000UL -#define COMMON_SOCKET__NAME_BIND 0x00200000UL -#define COMMON_IPC__CREATE 0x00000001UL -#define COMMON_IPC__DESTROY 0x00000002UL -#define COMMON_IPC__GETATTR 0x00000004UL -#define COMMON_IPC__SETATTR 0x00000008UL -#define COMMON_IPC__READ 0x00000010UL -#define COMMON_IPC__WRITE 0x00000020UL -#define COMMON_IPC__ASSOCIATE 0x00000040UL -#define COMMON_IPC__UNIX_READ 0x00000080UL -#define COMMON_IPC__UNIX_WRITE 0x00000100UL -#define FILESYSTEM__MOUNT 0x00000001UL -#define FILESYSTEM__REMOUNT 0x00000002UL -#define FILESYSTEM__UNMOUNT 0x00000004UL -#define FILESYSTEM__GETATTR 0x00000008UL -#define FILESYSTEM__RELABELFROM 0x00000010UL -#define FILESYSTEM__RELABELTO 0x00000020UL -#define FILESYSTEM__TRANSITION 0x00000040UL -#define FILESYSTEM__ASSOCIATE 0x00000080UL -#define FILESYSTEM__QUOTAMOD 0x00000100UL -#define FILESYSTEM__QUOTAGET 0x00000200UL -#define DIR__IOCTL 0x00000001UL -#define DIR__READ 0x00000002UL -#define DIR__WRITE 0x00000004UL -#define DIR__CREATE 0x00000008UL -#define DIR__GETATTR 0x00000010UL -#define DIR__SETATTR 0x00000020UL -#define DIR__LOCK 0x00000040UL -#define DIR__RELABELFROM 0x00000080UL -#define DIR__RELABELTO 0x00000100UL -#define DIR__APPEND 0x00000200UL -#define DIR__UNLINK 0x00000400UL -#define DIR__LINK 0x00000800UL -#define DIR__RENAME 0x00001000UL -#define DIR__EXECUTE 0x00002000UL -#define DIR__SWAPON 0x00004000UL -#define DIR__QUOTAON 0x00008000UL -#define DIR__MOUNTON 0x00010000UL -#define DIR__ADD_NAME 0x00020000UL -#define DIR__REMOVE_NAME 0x00040000UL -#define DIR__REPARENT 0x00080000UL -#define DIR__SEARCH 0x00100000UL -#define DIR__RMDIR 0x00200000UL -#define DIR__OPEN 0x00400000UL -#define FILE__IOCTL 0x00000001UL -#define FILE__READ 0x00000002UL -#define FILE__WRITE 0x00000004UL -#define FILE__CREATE 0x00000008UL -#define FILE__GETATTR 0x00000010UL -#define FILE__SETATTR 0x00000020UL -#define FILE__LOCK 0x00000040UL -#define FILE__RELABELFROM 0x00000080UL -#define FILE__RELABELTO 0x00000100UL -#define FILE__APPEND 0x00000200UL -#define FILE__UNLINK 0x00000400UL -#define FILE__LINK 0x00000800UL -#define FILE__RENAME 0x00001000UL -#define FILE__EXECUTE 0x00002000UL -#define FILE__SWAPON 0x00004000UL -#define FILE__QUOTAON 0x00008000UL -#define FILE__MOUNTON 0x00010000UL -#define FILE__EXECUTE_NO_TRANS 0x00020000UL -#define FILE__ENTRYPOINT 0x00040000UL -#define FILE__EXECMOD 0x00080000UL -#define FILE__OPEN 0x00100000UL -#define LNK_FILE__IOCTL 0x00000001UL -#define LNK_FILE__READ 0x00000002UL -#define LNK_FILE__WRITE 0x00000004UL -#define LNK_FILE__CREATE 0x00000008UL -#define LNK_FILE__GETATTR 0x00000010UL -#define LNK_FILE__SETATTR 0x00000020UL -#define LNK_FILE__LOCK 0x00000040UL -#define LNK_FILE__RELABELFROM 0x00000080UL -#define LNK_FILE__RELABELTO 0x00000100UL -#define LNK_FILE__APPEND 0x00000200UL -#define LNK_FILE__UNLINK 0x00000400UL -#define LNK_FILE__LINK 0x00000800UL -#define LNK_FILE__RENAME 0x00001000UL -#define LNK_FILE__EXECUTE 0x00002000UL -#define LNK_FILE__SWAPON 0x00004000UL -#define LNK_FILE__QUOTAON 0x00008000UL -#define LNK_FILE__MOUNTON 0x00010000UL -#define CHR_FILE__IOCTL 0x00000001UL -#define CHR_FILE__READ 0x00000002UL -#define CHR_FILE__WRITE 0x00000004UL -#define CHR_FILE__CREATE 0x00000008UL -#define CHR_FILE__GETATTR 0x00000010UL -#define CHR_FILE__SETATTR 0x00000020UL -#define CHR_FILE__LOCK 0x00000040UL -#define CHR_FILE__RELABELFROM 0x00000080UL -#define CHR_FILE__RELABELTO 0x00000100UL -#define CHR_FILE__APPEND 0x00000200UL -#define CHR_FILE__UNLINK 0x00000400UL -#define CHR_FILE__LINK 0x00000800UL -#define CHR_FILE__RENAME 0x00001000UL -#define CHR_FILE__EXECUTE 0x00002000UL -#define CHR_FILE__SWAPON 0x00004000UL -#define CHR_FILE__QUOTAON 0x00008000UL -#define CHR_FILE__MOUNTON 0x00010000UL -#define CHR_FILE__EXECUTE_NO_TRANS 0x00020000UL -#define CHR_FILE__ENTRYPOINT 0x00040000UL -#define CHR_FILE__EXECMOD 0x00080000UL -#define CHR_FILE__OPEN 0x00100000UL -#define BLK_FILE__IOCTL 0x00000001UL -#define BLK_FILE__READ 0x00000002UL -#define BLK_FILE__WRITE 0x00000004UL -#define BLK_FILE__CREATE 0x00000008UL -#define BLK_FILE__GETATTR 0x00000010UL -#define BLK_FILE__SETATTR 0x00000020UL -#define BLK_FILE__LOCK 0x00000040UL -#define BLK_FILE__RELABELFROM 0x00000080UL -#define BLK_FILE__RELABELTO 0x00000100UL -#define BLK_FILE__APPEND 0x00000200UL -#define BLK_FILE__UNLINK 0x00000400UL -#define BLK_FILE__LINK 0x00000800UL -#define BLK_FILE__RENAME 0x00001000UL -#define BLK_FILE__EXECUTE 0x00002000UL -#define BLK_FILE__SWAPON 0x00004000UL -#define BLK_FILE__QUOTAON 0x00008000UL -#define BLK_FILE__MOUNTON 0x00010000UL -#define BLK_FILE__OPEN 0x00020000UL -#define SOCK_FILE__IOCTL 0x00000001UL -#define SOCK_FILE__READ 0x00000002UL -#define SOCK_FILE__WRITE 0x00000004UL -#define SOCK_FILE__CREATE 0x00000008UL -#define SOCK_FILE__GETATTR 0x00000010UL -#define SOCK_FILE__SETATTR 0x00000020UL -#define SOCK_FILE__LOCK 0x00000040UL -#define SOCK_FILE__RELABELFROM 0x00000080UL -#define SOCK_FILE__RELABELTO 0x00000100UL -#define SOCK_FILE__APPEND 0x00000200UL -#define SOCK_FILE__UNLINK 0x00000400UL -#define SOCK_FILE__LINK 0x00000800UL -#define SOCK_FILE__RENAME 0x00001000UL -#define SOCK_FILE__EXECUTE 0x00002000UL -#define SOCK_FILE__SWAPON 0x00004000UL -#define SOCK_FILE__QUOTAON 0x00008000UL -#define SOCK_FILE__MOUNTON 0x00010000UL -#define SOCK_FILE__OPEN 0x00020000UL -#define FIFO_FILE__IOCTL 0x00000001UL -#define FIFO_FILE__READ 0x00000002UL -#define FIFO_FILE__WRITE 0x00000004UL -#define FIFO_FILE__CREATE 0x00000008UL -#define FIFO_FILE__GETATTR 0x00000010UL -#define FIFO_FILE__SETATTR 0x00000020UL -#define FIFO_FILE__LOCK 0x00000040UL -#define FIFO_FILE__RELABELFROM 0x00000080UL -#define FIFO_FILE__RELABELTO 0x00000100UL -#define FIFO_FILE__APPEND 0x00000200UL -#define FIFO_FILE__UNLINK 0x00000400UL -#define FIFO_FILE__LINK 0x00000800UL -#define FIFO_FILE__RENAME 0x00001000UL -#define FIFO_FILE__EXECUTE 0x00002000UL -#define FIFO_FILE__SWAPON 0x00004000UL -#define FIFO_FILE__QUOTAON 0x00008000UL -#define FIFO_FILE__MOUNTON 0x00010000UL -#define FIFO_FILE__OPEN 0x00020000UL -#define FD__USE 0x00000001UL -#define SOCKET__IOCTL 0x00000001UL -#define SOCKET__READ 0x00000002UL -#define SOCKET__WRITE 0x00000004UL -#define SOCKET__CREATE 0x00000008UL -#define SOCKET__GETATTR 0x00000010UL -#define SOCKET__SETATTR 0x00000020UL -#define SOCKET__LOCK 0x00000040UL -#define SOCKET__RELABELFROM 0x00000080UL -#define SOCKET__RELABELTO 0x00000100UL -#define SOCKET__APPEND 0x00000200UL -#define SOCKET__BIND 0x00000400UL -#define SOCKET__CONNECT 0x00000800UL -#define SOCKET__LISTEN 0x00001000UL -#define SOCKET__ACCEPT 0x00002000UL -#define SOCKET__GETOPT 0x00004000UL -#define SOCKET__SETOPT 0x00008000UL -#define SOCKET__SHUTDOWN 0x00010000UL -#define SOCKET__RECVFROM 0x00020000UL -#define SOCKET__SENDTO 0x00040000UL -#define SOCKET__RECV_MSG 0x00080000UL -#define SOCKET__SEND_MSG 0x00100000UL -#define SOCKET__NAME_BIND 0x00200000UL -#define TCP_SOCKET__IOCTL 0x00000001UL -#define TCP_SOCKET__READ 0x00000002UL -#define TCP_SOCKET__WRITE 0x00000004UL -#define TCP_SOCKET__CREATE 0x00000008UL -#define TCP_SOCKET__GETATTR 0x00000010UL -#define TCP_SOCKET__SETATTR 0x00000020UL -#define TCP_SOCKET__LOCK 0x00000040UL -#define TCP_SOCKET__RELABELFROM 0x00000080UL -#define TCP_SOCKET__RELABELTO 0x00000100UL -#define TCP_SOCKET__APPEND 0x00000200UL -#define TCP_SOCKET__BIND 0x00000400UL -#define TCP_SOCKET__CONNECT 0x00000800UL -#define TCP_SOCKET__LISTEN 0x00001000UL -#define TCP_SOCKET__ACCEPT 0x00002000UL -#define TCP_SOCKET__GETOPT 0x00004000UL -#define TCP_SOCKET__SETOPT 0x00008000UL -#define TCP_SOCKET__SHUTDOWN 0x00010000UL -#define TCP_SOCKET__RECVFROM 0x00020000UL -#define TCP_SOCKET__SENDTO 0x00040000UL -#define TCP_SOCKET__RECV_MSG 0x00080000UL -#define TCP_SOCKET__SEND_MSG 0x00100000UL -#define TCP_SOCKET__NAME_BIND 0x00200000UL -#define TCP_SOCKET__CONNECTTO 0x00400000UL -#define TCP_SOCKET__NEWCONN 0x00800000UL -#define TCP_SOCKET__ACCEPTFROM 0x01000000UL -#define TCP_SOCKET__NODE_BIND 0x02000000UL -#define TCP_SOCKET__NAME_CONNECT 0x04000000UL -#define UDP_SOCKET__IOCTL 0x00000001UL -#define UDP_SOCKET__READ 0x00000002UL -#define UDP_SOCKET__WRITE 0x00000004UL -#define UDP_SOCKET__CREATE 0x00000008UL -#define UDP_SOCKET__GETATTR 0x00000010UL -#define UDP_SOCKET__SETATTR 0x00000020UL -#define UDP_SOCKET__LOCK 0x00000040UL -#define UDP_SOCKET__RELABELFROM 0x00000080UL -#define UDP_SOCKET__RELABELTO 0x00000100UL -#define UDP_SOCKET__APPEND 0x00000200UL -#define UDP_SOCKET__BIND 0x00000400UL -#define UDP_SOCKET__CONNECT 0x00000800UL -#define UDP_SOCKET__LISTEN 0x00001000UL -#define UDP_SOCKET__ACCEPT 0x00002000UL -#define UDP_SOCKET__GETOPT 0x00004000UL -#define UDP_SOCKET__SETOPT 0x00008000UL -#define UDP_SOCKET__SHUTDOWN 0x00010000UL -#define UDP_SOCKET__RECVFROM 0x00020000UL -#define UDP_SOCKET__SENDTO 0x00040000UL -#define UDP_SOCKET__RECV_MSG 0x00080000UL -#define UDP_SOCKET__SEND_MSG 0x00100000UL -#define UDP_SOCKET__NAME_BIND 0x00200000UL -#define UDP_SOCKET__NODE_BIND 0x00400000UL -#define RAWIP_SOCKET__IOCTL 0x00000001UL -#define RAWIP_SOCKET__READ 0x00000002UL -#define RAWIP_SOCKET__WRITE 0x00000004UL -#define RAWIP_SOCKET__CREATE 0x00000008UL -#define RAWIP_SOCKET__GETATTR 0x00000010UL -#define RAWIP_SOCKET__SETATTR 0x00000020UL -#define RAWIP_SOCKET__LOCK 0x00000040UL -#define RAWIP_SOCKET__RELABELFROM 0x00000080UL -#define RAWIP_SOCKET__RELABELTO 0x00000100UL -#define RAWIP_SOCKET__APPEND 0x00000200UL -#define RAWIP_SOCKET__BIND 0x00000400UL -#define RAWIP_SOCKET__CONNECT 0x00000800UL -#define RAWIP_SOCKET__LISTEN 0x00001000UL -#define RAWIP_SOCKET__ACCEPT 0x00002000UL -#define RAWIP_SOCKET__GETOPT 0x00004000UL -#define RAWIP_SOCKET__SETOPT 0x00008000UL -#define RAWIP_SOCKET__SHUTDOWN 0x00010000UL -#define RAWIP_SOCKET__RECVFROM 0x00020000UL -#define RAWIP_SOCKET__SENDTO 0x00040000UL -#define RAWIP_SOCKET__RECV_MSG 0x00080000UL -#define RAWIP_SOCKET__SEND_MSG 0x00100000UL -#define RAWIP_SOCKET__NAME_BIND 0x00200000UL -#define RAWIP_SOCKET__NODE_BIND 0x00400000UL -#define NODE__TCP_RECV 0x00000001UL -#define NODE__TCP_SEND 0x00000002UL -#define NODE__UDP_RECV 0x00000004UL -#define NODE__UDP_SEND 0x00000008UL -#define NODE__RAWIP_RECV 0x00000010UL -#define NODE__RAWIP_SEND 0x00000020UL -#define NODE__ENFORCE_DEST 0x00000040UL -#define NODE__DCCP_RECV 0x00000080UL -#define NODE__DCCP_SEND 0x00000100UL -#define NODE__RECVFROM 0x00000200UL -#define NODE__SENDTO 0x00000400UL -#define NETIF__TCP_RECV 0x00000001UL -#define NETIF__TCP_SEND 0x00000002UL -#define NETIF__UDP_RECV 0x00000004UL -#define NETIF__UDP_SEND 0x00000008UL -#define NETIF__RAWIP_RECV 0x00000010UL -#define NETIF__RAWIP_SEND 0x00000020UL -#define NETIF__DCCP_RECV 0x00000040UL -#define NETIF__DCCP_SEND 0x00000080UL -#define NETIF__INGRESS 0x00000100UL -#define NETIF__EGRESS 0x00000200UL -#define NETLINK_SOCKET__IOCTL 0x00000001UL -#define NETLINK_SOCKET__READ 0x00000002UL -#define NETLINK_SOCKET__WRITE 0x00000004UL -#define NETLINK_SOCKET__CREATE 0x00000008UL -#define NETLINK_SOCKET__GETATTR 0x00000010UL -#define NETLINK_SOCKET__SETATTR 0x00000020UL -#define NETLINK_SOCKET__LOCK 0x00000040UL -#define NETLINK_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_SOCKET__APPEND 0x00000200UL -#define NETLINK_SOCKET__BIND 0x00000400UL -#define NETLINK_SOCKET__CONNECT 0x00000800UL -#define NETLINK_SOCKET__LISTEN 0x00001000UL -#define NETLINK_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_SOCKET__GETOPT 0x00004000UL -#define NETLINK_SOCKET__SETOPT 0x00008000UL -#define NETLINK_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_SOCKET__SENDTO 0x00040000UL -#define NETLINK_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_SOCKET__NAME_BIND 0x00200000UL -#define PACKET_SOCKET__IOCTL 0x00000001UL -#define PACKET_SOCKET__READ 0x00000002UL -#define PACKET_SOCKET__WRITE 0x00000004UL -#define PACKET_SOCKET__CREATE 0x00000008UL -#define PACKET_SOCKET__GETATTR 0x00000010UL -#define PACKET_SOCKET__SETATTR 0x00000020UL -#define PACKET_SOCKET__LOCK 0x00000040UL -#define PACKET_SOCKET__RELABELFROM 0x00000080UL -#define PACKET_SOCKET__RELABELTO 0x00000100UL -#define PACKET_SOCKET__APPEND 0x00000200UL -#define PACKET_SOCKET__BIND 0x00000400UL -#define PACKET_SOCKET__CONNECT 0x00000800UL -#define PACKET_SOCKET__LISTEN 0x00001000UL -#define PACKET_SOCKET__ACCEPT 0x00002000UL -#define PACKET_SOCKET__GETOPT 0x00004000UL -#define PACKET_SOCKET__SETOPT 0x00008000UL -#define PACKET_SOCKET__SHUTDOWN 0x00010000UL -#define PACKET_SOCKET__RECVFROM 0x00020000UL -#define PACKET_SOCKET__SENDTO 0x00040000UL -#define PACKET_SOCKET__RECV_MSG 0x00080000UL -#define PACKET_SOCKET__SEND_MSG 0x00100000UL -#define PACKET_SOCKET__NAME_BIND 0x00200000UL -#define KEY_SOCKET__IOCTL 0x00000001UL -#define KEY_SOCKET__READ 0x00000002UL -#define KEY_SOCKET__WRITE 0x00000004UL -#define KEY_SOCKET__CREATE 0x00000008UL -#define KEY_SOCKET__GETATTR 0x00000010UL -#define KEY_SOCKET__SETATTR 0x00000020UL -#define KEY_SOCKET__LOCK 0x00000040UL -#define KEY_SOCKET__RELABELFROM 0x00000080UL -#define KEY_SOCKET__RELABELTO 0x00000100UL -#define KEY_SOCKET__APPEND 0x00000200UL -#define KEY_SOCKET__BIND 0x00000400UL -#define KEY_SOCKET__CONNECT 0x00000800UL -#define KEY_SOCKET__LISTEN 0x00001000UL -#define KEY_SOCKET__ACCEPT 0x00002000UL -#define KEY_SOCKET__GETOPT 0x00004000UL -#define KEY_SOCKET__SETOPT 0x00008000UL -#define KEY_SOCKET__SHUTDOWN 0x00010000UL -#define KEY_SOCKET__RECVFROM 0x00020000UL -#define KEY_SOCKET__SENDTO 0x00040000UL -#define KEY_SOCKET__RECV_MSG 0x00080000UL -#define KEY_SOCKET__SEND_MSG 0x00100000UL -#define KEY_SOCKET__NAME_BIND 0x00200000UL -#define UNIX_STREAM_SOCKET__IOCTL 0x00000001UL -#define UNIX_STREAM_SOCKET__READ 0x00000002UL -#define UNIX_STREAM_SOCKET__WRITE 0x00000004UL -#define UNIX_STREAM_SOCKET__CREATE 0x00000008UL -#define UNIX_STREAM_SOCKET__GETATTR 0x00000010UL -#define UNIX_STREAM_SOCKET__SETATTR 0x00000020UL -#define UNIX_STREAM_SOCKET__LOCK 0x00000040UL -#define UNIX_STREAM_SOCKET__RELABELFROM 0x00000080UL -#define UNIX_STREAM_SOCKET__RELABELTO 0x00000100UL -#define UNIX_STREAM_SOCKET__APPEND 0x00000200UL -#define UNIX_STREAM_SOCKET__BIND 0x00000400UL -#define UNIX_STREAM_SOCKET__CONNECT 0x00000800UL -#define UNIX_STREAM_SOCKET__LISTEN 0x00001000UL -#define UNIX_STREAM_SOCKET__ACCEPT 0x00002000UL -#define UNIX_STREAM_SOCKET__GETOPT 0x00004000UL -#define UNIX_STREAM_SOCKET__SETOPT 0x00008000UL -#define UNIX_STREAM_SOCKET__SHUTDOWN 0x00010000UL -#define UNIX_STREAM_SOCKET__RECVFROM 0x00020000UL -#define UNIX_STREAM_SOCKET__SENDTO 0x00040000UL -#define UNIX_STREAM_SOCKET__RECV_MSG 0x00080000UL -#define UNIX_STREAM_SOCKET__SEND_MSG 0x00100000UL -#define UNIX_STREAM_SOCKET__NAME_BIND 0x00200000UL -#define UNIX_STREAM_SOCKET__CONNECTTO 0x00400000UL -#define UNIX_STREAM_SOCKET__NEWCONN 0x00800000UL -#define UNIX_STREAM_SOCKET__ACCEPTFROM 0x01000000UL -#define UNIX_DGRAM_SOCKET__IOCTL 0x00000001UL -#define UNIX_DGRAM_SOCKET__READ 0x00000002UL -#define UNIX_DGRAM_SOCKET__WRITE 0x00000004UL -#define UNIX_DGRAM_SOCKET__CREATE 0x00000008UL -#define UNIX_DGRAM_SOCKET__GETATTR 0x00000010UL -#define UNIX_DGRAM_SOCKET__SETATTR 0x00000020UL -#define UNIX_DGRAM_SOCKET__LOCK 0x00000040UL -#define UNIX_DGRAM_SOCKET__RELABELFROM 0x00000080UL -#define UNIX_DGRAM_SOCKET__RELABELTO 0x00000100UL -#define UNIX_DGRAM_SOCKET__APPEND 0x00000200UL -#define UNIX_DGRAM_SOCKET__BIND 0x00000400UL -#define UNIX_DGRAM_SOCKET__CONNECT 0x00000800UL -#define UNIX_DGRAM_SOCKET__LISTEN 0x00001000UL -#define UNIX_DGRAM_SOCKET__ACCEPT 0x00002000UL -#define UNIX_DGRAM_SOCKET__GETOPT 0x00004000UL -#define UNIX_DGRAM_SOCKET__SETOPT 0x00008000UL -#define UNIX_DGRAM_SOCKET__SHUTDOWN 0x00010000UL -#define UNIX_DGRAM_SOCKET__RECVFROM 0x00020000UL -#define UNIX_DGRAM_SOCKET__SENDTO 0x00040000UL -#define UNIX_DGRAM_SOCKET__RECV_MSG 0x00080000UL -#define UNIX_DGRAM_SOCKET__SEND_MSG 0x00100000UL -#define UNIX_DGRAM_SOCKET__NAME_BIND 0x00200000UL -#define TUN_SOCKET__IOCTL 0x00000001UL -#define TUN_SOCKET__READ 0x00000002UL -#define TUN_SOCKET__WRITE 0x00000004UL -#define TUN_SOCKET__CREATE 0x00000008UL -#define TUN_SOCKET__GETATTR 0x00000010UL -#define TUN_SOCKET__SETATTR 0x00000020UL -#define TUN_SOCKET__LOCK 0x00000040UL -#define TUN_SOCKET__RELABELFROM 0x00000080UL -#define TUN_SOCKET__RELABELTO 0x00000100UL -#define TUN_SOCKET__APPEND 0x00000200UL -#define TUN_SOCKET__BIND 0x00000400UL -#define TUN_SOCKET__CONNECT 0x00000800UL -#define TUN_SOCKET__LISTEN 0x00001000UL -#define TUN_SOCKET__ACCEPT 0x00002000UL -#define TUN_SOCKET__GETOPT 0x00004000UL -#define TUN_SOCKET__SETOPT 0x00008000UL -#define TUN_SOCKET__SHUTDOWN 0x00010000UL -#define TUN_SOCKET__RECVFROM 0x00020000UL -#define TUN_SOCKET__SENDTO 0x00040000UL -#define TUN_SOCKET__RECV_MSG 0x00080000UL -#define TUN_SOCKET__SEND_MSG 0x00100000UL -#define TUN_SOCKET__NAME_BIND 0x00200000UL -#define PROCESS__FORK 0x00000001UL -#define PROCESS__TRANSITION 0x00000002UL -#define PROCESS__SIGCHLD 0x00000004UL -#define PROCESS__SIGKILL 0x00000008UL -#define PROCESS__SIGSTOP 0x00000010UL -#define PROCESS__SIGNULL 0x00000020UL -#define PROCESS__SIGNAL 0x00000040UL -#define PROCESS__PTRACE 0x00000080UL -#define PROCESS__GETSCHED 0x00000100UL -#define PROCESS__SETSCHED 0x00000200UL -#define PROCESS__GETSESSION 0x00000400UL -#define PROCESS__GETPGID 0x00000800UL -#define PROCESS__SETPGID 0x00001000UL -#define PROCESS__GETCAP 0x00002000UL -#define PROCESS__SETCAP 0x00004000UL -#define PROCESS__SHARE 0x00008000UL -#define PROCESS__GETATTR 0x00010000UL -#define PROCESS__SETEXEC 0x00020000UL -#define PROCESS__SETFSCREATE 0x00040000UL -#define PROCESS__NOATSECURE 0x00080000UL -#define PROCESS__SIGINH 0x00100000UL -#define PROCESS__SETRLIMIT 0x00200000UL -#define PROCESS__RLIMITINH 0x00400000UL -#define PROCESS__DYNTRANSITION 0x00800000UL -#define PROCESS__SETCURRENT 0x01000000UL -#define PROCESS__EXECMEM 0x02000000UL -#define PROCESS__EXECSTACK 0x04000000UL -#define PROCESS__EXECHEAP 0x08000000UL -#define PROCESS__SETKEYCREATE 0x10000000UL -#define PROCESS__SETSOCKCREATE 0x20000000UL -#define IPC__CREATE 0x00000001UL -#define IPC__DESTROY 0x00000002UL -#define IPC__GETATTR 0x00000004UL -#define IPC__SETATTR 0x00000008UL -#define IPC__READ 0x00000010UL -#define IPC__WRITE 0x00000020UL -#define IPC__ASSOCIATE 0x00000040UL -#define IPC__UNIX_READ 0x00000080UL -#define IPC__UNIX_WRITE 0x00000100UL -#define SEM__CREATE 0x00000001UL -#define SEM__DESTROY 0x00000002UL -#define SEM__GETATTR 0x00000004UL -#define SEM__SETATTR 0x00000008UL -#define SEM__READ 0x00000010UL -#define SEM__WRITE 0x00000020UL -#define SEM__ASSOCIATE 0x00000040UL -#define SEM__UNIX_READ 0x00000080UL -#define SEM__UNIX_WRITE 0x00000100UL -#define MSGQ__CREATE 0x00000001UL -#define MSGQ__DESTROY 0x00000002UL -#define MSGQ__GETATTR 0x00000004UL -#define MSGQ__SETATTR 0x00000008UL -#define MSGQ__READ 0x00000010UL -#define MSGQ__WRITE 0x00000020UL -#define MSGQ__ASSOCIATE 0x00000040UL -#define MSGQ__UNIX_READ 0x00000080UL -#define MSGQ__UNIX_WRITE 0x00000100UL -#define MSGQ__ENQUEUE 0x00000200UL -#define MSG__SEND 0x00000001UL -#define MSG__RECEIVE 0x00000002UL -#define SHM__CREATE 0x00000001UL -#define SHM__DESTROY 0x00000002UL -#define SHM__GETATTR 0x00000004UL -#define SHM__SETATTR 0x00000008UL -#define SHM__READ 0x00000010UL -#define SHM__WRITE 0x00000020UL -#define SHM__ASSOCIATE 0x00000040UL -#define SHM__UNIX_READ 0x00000080UL -#define SHM__UNIX_WRITE 0x00000100UL -#define SHM__LOCK 0x00000200UL -#define SECURITY__COMPUTE_AV 0x00000001UL -#define SECURITY__COMPUTE_CREATE 0x00000002UL -#define SECURITY__COMPUTE_MEMBER 0x00000004UL -#define SECURITY__CHECK_CONTEXT 0x00000008UL -#define SECURITY__LOAD_POLICY 0x00000010UL -#define SECURITY__COMPUTE_RELABEL 0x00000020UL -#define SECURITY__COMPUTE_USER 0x00000040UL -#define SECURITY__SETENFORCE 0x00000080UL -#define SECURITY__SETBOOL 0x00000100UL -#define SECURITY__SETSECPARAM 0x00000200UL -#define SECURITY__SETCHECKREQPROT 0x00000400UL -#define SYSTEM__IPC_INFO 0x00000001UL -#define SYSTEM__SYSLOG_READ 0x00000002UL -#define SYSTEM__SYSLOG_MOD 0x00000004UL -#define SYSTEM__SYSLOG_CONSOLE 0x00000008UL -#define SYSTEM__MODULE_REQUEST 0x00000010UL -#define CAPABILITY__CHOWN 0x00000001UL -#define CAPABILITY__DAC_OVERRIDE 0x00000002UL -#define CAPABILITY__DAC_READ_SEARCH 0x00000004UL -#define CAPABILITY__FOWNER 0x00000008UL -#define CAPABILITY__FSETID 0x00000010UL -#define CAPABILITY__KILL 0x00000020UL -#define CAPABILITY__SETGID 0x00000040UL -#define CAPABILITY__SETUID 0x00000080UL -#define CAPABILITY__SETPCAP 0x00000100UL -#define CAPABILITY__LINUX_IMMUTABLE 0x00000200UL -#define CAPABILITY__NET_BIND_SERVICE 0x00000400UL -#define CAPABILITY__NET_BROADCAST 0x00000800UL -#define CAPABILITY__NET_ADMIN 0x00001000UL -#define CAPABILITY__NET_RAW 0x00002000UL -#define CAPABILITY__IPC_LOCK 0x00004000UL -#define CAPABILITY__IPC_OWNER 0x00008000UL -#define CAPABILITY__SYS_MODULE 0x00010000UL -#define CAPABILITY__SYS_RAWIO 0x00020000UL -#define CAPABILITY__SYS_CHROOT 0x00040000UL -#define CAPABILITY__SYS_PTRACE 0x00080000UL -#define CAPABILITY__SYS_PACCT 0x00100000UL -#define CAPABILITY__SYS_ADMIN 0x00200000UL -#define CAPABILITY__SYS_BOOT 0x00400000UL -#define CAPABILITY__SYS_NICE 0x00800000UL -#define CAPABILITY__SYS_RESOURCE 0x01000000UL -#define CAPABILITY__SYS_TIME 0x02000000UL -#define CAPABILITY__SYS_TTY_CONFIG 0x04000000UL -#define CAPABILITY__MKNOD 0x08000000UL -#define CAPABILITY__LEASE 0x10000000UL -#define CAPABILITY__AUDIT_WRITE 0x20000000UL -#define CAPABILITY__AUDIT_CONTROL 0x40000000UL -#define CAPABILITY__SETFCAP 0x80000000UL -#define CAPABILITY2__MAC_OVERRIDE 0x00000001UL -#define CAPABILITY2__MAC_ADMIN 0x00000002UL -#define NETLINK_ROUTE_SOCKET__IOCTL 0x00000001UL -#define NETLINK_ROUTE_SOCKET__READ 0x00000002UL -#define NETLINK_ROUTE_SOCKET__WRITE 0x00000004UL -#define NETLINK_ROUTE_SOCKET__CREATE 0x00000008UL -#define NETLINK_ROUTE_SOCKET__GETATTR 0x00000010UL -#define NETLINK_ROUTE_SOCKET__SETATTR 0x00000020UL -#define NETLINK_ROUTE_SOCKET__LOCK 0x00000040UL -#define NETLINK_ROUTE_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_ROUTE_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_ROUTE_SOCKET__APPEND 0x00000200UL -#define NETLINK_ROUTE_SOCKET__BIND 0x00000400UL -#define NETLINK_ROUTE_SOCKET__CONNECT 0x00000800UL -#define NETLINK_ROUTE_SOCKET__LISTEN 0x00001000UL -#define NETLINK_ROUTE_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_ROUTE_SOCKET__GETOPT 0x00004000UL -#define NETLINK_ROUTE_SOCKET__SETOPT 0x00008000UL -#define NETLINK_ROUTE_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_ROUTE_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_ROUTE_SOCKET__SENDTO 0x00040000UL -#define NETLINK_ROUTE_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_ROUTE_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_ROUTE_SOCKET__NAME_BIND 0x00200000UL -#define NETLINK_ROUTE_SOCKET__NLMSG_READ 0x00400000UL -#define NETLINK_ROUTE_SOCKET__NLMSG_WRITE 0x00800000UL -#define NETLINK_FIREWALL_SOCKET__IOCTL 0x00000001UL -#define NETLINK_FIREWALL_SOCKET__READ 0x00000002UL -#define NETLINK_FIREWALL_SOCKET__WRITE 0x00000004UL -#define NETLINK_FIREWALL_SOCKET__CREATE 0x00000008UL -#define NETLINK_FIREWALL_SOCKET__GETATTR 0x00000010UL -#define NETLINK_FIREWALL_SOCKET__SETATTR 0x00000020UL -#define NETLINK_FIREWALL_SOCKET__LOCK 0x00000040UL -#define NETLINK_FIREWALL_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_FIREWALL_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_FIREWALL_SOCKET__APPEND 0x00000200UL -#define NETLINK_FIREWALL_SOCKET__BIND 0x00000400UL -#define NETLINK_FIREWALL_SOCKET__CONNECT 0x00000800UL -#define NETLINK_FIREWALL_SOCKET__LISTEN 0x00001000UL -#define NETLINK_FIREWALL_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_FIREWALL_SOCKET__GETOPT 0x00004000UL -#define NETLINK_FIREWALL_SOCKET__SETOPT 0x00008000UL -#define NETLINK_FIREWALL_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_FIREWALL_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_FIREWALL_SOCKET__SENDTO 0x00040000UL -#define NETLINK_FIREWALL_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_FIREWALL_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_FIREWALL_SOCKET__NAME_BIND 0x00200000UL -#define NETLINK_FIREWALL_SOCKET__NLMSG_READ 0x00400000UL -#define NETLINK_FIREWALL_SOCKET__NLMSG_WRITE 0x00800000UL -#define NETLINK_TCPDIAG_SOCKET__IOCTL 0x00000001UL -#define NETLINK_TCPDIAG_SOCKET__READ 0x00000002UL -#define NETLINK_TCPDIAG_SOCKET__WRITE 0x00000004UL -#define NETLINK_TCPDIAG_SOCKET__CREATE 0x00000008UL -#define NETLINK_TCPDIAG_SOCKET__GETATTR 0x00000010UL -#define NETLINK_TCPDIAG_SOCKET__SETATTR 0x00000020UL -#define NETLINK_TCPDIAG_SOCKET__LOCK 0x00000040UL -#define NETLINK_TCPDIAG_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_TCPDIAG_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_TCPDIAG_SOCKET__APPEND 0x00000200UL -#define NETLINK_TCPDIAG_SOCKET__BIND 0x00000400UL -#define NETLINK_TCPDIAG_SOCKET__CONNECT 0x00000800UL -#define NETLINK_TCPDIAG_SOCKET__LISTEN 0x00001000UL -#define NETLINK_TCPDIAG_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_TCPDIAG_SOCKET__GETOPT 0x00004000UL -#define NETLINK_TCPDIAG_SOCKET__SETOPT 0x00008000UL -#define NETLINK_TCPDIAG_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_TCPDIAG_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_TCPDIAG_SOCKET__SENDTO 0x00040000UL -#define NETLINK_TCPDIAG_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_TCPDIAG_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_TCPDIAG_SOCKET__NAME_BIND 0x00200000UL -#define NETLINK_TCPDIAG_SOCKET__NLMSG_READ 0x00400000UL -#define NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE 0x00800000UL -#define NETLINK_NFLOG_SOCKET__IOCTL 0x00000001UL -#define NETLINK_NFLOG_SOCKET__READ 0x00000002UL -#define NETLINK_NFLOG_SOCKET__WRITE 0x00000004UL -#define NETLINK_NFLOG_SOCKET__CREATE 0x00000008UL -#define NETLINK_NFLOG_SOCKET__GETATTR 0x00000010UL -#define NETLINK_NFLOG_SOCKET__SETATTR 0x00000020UL -#define NETLINK_NFLOG_SOCKET__LOCK 0x00000040UL -#define NETLINK_NFLOG_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_NFLOG_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_NFLOG_SOCKET__APPEND 0x00000200UL -#define NETLINK_NFLOG_SOCKET__BIND 0x00000400UL -#define NETLINK_NFLOG_SOCKET__CONNECT 0x00000800UL -#define NETLINK_NFLOG_SOCKET__LISTEN 0x00001000UL -#define NETLINK_NFLOG_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_NFLOG_SOCKET__GETOPT 0x00004000UL -#define NETLINK_NFLOG_SOCKET__SETOPT 0x00008000UL -#define NETLINK_NFLOG_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_NFLOG_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_NFLOG_SOCKET__SENDTO 0x00040000UL -#define NETLINK_NFLOG_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_NFLOG_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_NFLOG_SOCKET__NAME_BIND 0x00200000UL -#define NETLINK_XFRM_SOCKET__IOCTL 0x00000001UL -#define NETLINK_XFRM_SOCKET__READ 0x00000002UL -#define NETLINK_XFRM_SOCKET__WRITE 0x00000004UL -#define NETLINK_XFRM_SOCKET__CREATE 0x00000008UL -#define NETLINK_XFRM_SOCKET__GETATTR 0x00000010UL -#define NETLINK_XFRM_SOCKET__SETATTR 0x00000020UL -#define NETLINK_XFRM_SOCKET__LOCK 0x00000040UL -#define NETLINK_XFRM_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_XFRM_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_XFRM_SOCKET__APPEND 0x00000200UL -#define NETLINK_XFRM_SOCKET__BIND 0x00000400UL -#define NETLINK_XFRM_SOCKET__CONNECT 0x00000800UL -#define NETLINK_XFRM_SOCKET__LISTEN 0x00001000UL -#define NETLINK_XFRM_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_XFRM_SOCKET__GETOPT 0x00004000UL -#define NETLINK_XFRM_SOCKET__SETOPT 0x00008000UL -#define NETLINK_XFRM_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_XFRM_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_XFRM_SOCKET__SENDTO 0x00040000UL -#define NETLINK_XFRM_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_XFRM_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_XFRM_SOCKET__NAME_BIND 0x00200000UL -#define NETLINK_XFRM_SOCKET__NLMSG_READ 0x00400000UL -#define NETLINK_XFRM_SOCKET__NLMSG_WRITE 0x00800000UL -#define NETLINK_SELINUX_SOCKET__IOCTL 0x00000001UL -#define NETLINK_SELINUX_SOCKET__READ 0x00000002UL -#define NETLINK_SELINUX_SOCKET__WRITE 0x00000004UL -#define NETLINK_SELINUX_SOCKET__CREATE 0x00000008UL -#define NETLINK_SELINUX_SOCKET__GETATTR 0x00000010UL -#define NETLINK_SELINUX_SOCKET__SETATTR 0x00000020UL -#define NETLINK_SELINUX_SOCKET__LOCK 0x00000040UL -#define NETLINK_SELINUX_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_SELINUX_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_SELINUX_SOCKET__APPEND 0x00000200UL -#define NETLINK_SELINUX_SOCKET__BIND 0x00000400UL -#define NETLINK_SELINUX_SOCKET__CONNECT 0x00000800UL -#define NETLINK_SELINUX_SOCKET__LISTEN 0x00001000UL -#define NETLINK_SELINUX_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_SELINUX_SOCKET__GETOPT 0x00004000UL -#define NETLINK_SELINUX_SOCKET__SETOPT 0x00008000UL -#define NETLINK_SELINUX_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_SELINUX_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_SELINUX_SOCKET__SENDTO 0x00040000UL -#define NETLINK_SELINUX_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_SELINUX_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_SELINUX_SOCKET__NAME_BIND 0x00200000UL -#define NETLINK_AUDIT_SOCKET__IOCTL 0x00000001UL -#define NETLINK_AUDIT_SOCKET__READ 0x00000002UL -#define NETLINK_AUDIT_SOCKET__WRITE 0x00000004UL -#define NETLINK_AUDIT_SOCKET__CREATE 0x00000008UL -#define NETLINK_AUDIT_SOCKET__GETATTR 0x00000010UL -#define NETLINK_AUDIT_SOCKET__SETATTR 0x00000020UL -#define NETLINK_AUDIT_SOCKET__LOCK 0x00000040UL -#define NETLINK_AUDIT_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_AUDIT_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_AUDIT_SOCKET__APPEND 0x00000200UL -#define NETLINK_AUDIT_SOCKET__BIND 0x00000400UL -#define NETLINK_AUDIT_SOCKET__CONNECT 0x00000800UL -#define NETLINK_AUDIT_SOCKET__LISTEN 0x00001000UL -#define NETLINK_AUDIT_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_AUDIT_SOCKET__GETOPT 0x00004000UL -#define NETLINK_AUDIT_SOCKET__SETOPT 0x00008000UL -#define NETLINK_AUDIT_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_AUDIT_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_AUDIT_SOCKET__SENDTO 0x00040000UL -#define NETLINK_AUDIT_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_AUDIT_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_AUDIT_SOCKET__NAME_BIND 0x00200000UL -#define NETLINK_AUDIT_SOCKET__NLMSG_READ 0x00400000UL -#define NETLINK_AUDIT_SOCKET__NLMSG_WRITE 0x00800000UL -#define NETLINK_AUDIT_SOCKET__NLMSG_RELAY 0x01000000UL -#define NETLINK_AUDIT_SOCKET__NLMSG_READPRIV 0x02000000UL -#define NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT 0x04000000UL -#define NETLINK_IP6FW_SOCKET__IOCTL 0x00000001UL -#define NETLINK_IP6FW_SOCKET__READ 0x00000002UL -#define NETLINK_IP6FW_SOCKET__WRITE 0x00000004UL -#define NETLINK_IP6FW_SOCKET__CREATE 0x00000008UL -#define NETLINK_IP6FW_SOCKET__GETATTR 0x00000010UL -#define NETLINK_IP6FW_SOCKET__SETATTR 0x00000020UL -#define NETLINK_IP6FW_SOCKET__LOCK 0x00000040UL -#define NETLINK_IP6FW_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_IP6FW_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_IP6FW_SOCKET__APPEND 0x00000200UL -#define NETLINK_IP6FW_SOCKET__BIND 0x00000400UL -#define NETLINK_IP6FW_SOCKET__CONNECT 0x00000800UL -#define NETLINK_IP6FW_SOCKET__LISTEN 0x00001000UL -#define NETLINK_IP6FW_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_IP6FW_SOCKET__GETOPT 0x00004000UL -#define NETLINK_IP6FW_SOCKET__SETOPT 0x00008000UL -#define NETLINK_IP6FW_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_IP6FW_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_IP6FW_SOCKET__SENDTO 0x00040000UL -#define NETLINK_IP6FW_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_IP6FW_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_IP6FW_SOCKET__NAME_BIND 0x00200000UL -#define NETLINK_IP6FW_SOCKET__NLMSG_READ 0x00400000UL -#define NETLINK_IP6FW_SOCKET__NLMSG_WRITE 0x00800000UL -#define NETLINK_DNRT_SOCKET__IOCTL 0x00000001UL -#define NETLINK_DNRT_SOCKET__READ 0x00000002UL -#define NETLINK_DNRT_SOCKET__WRITE 0x00000004UL -#define NETLINK_DNRT_SOCKET__CREATE 0x00000008UL -#define NETLINK_DNRT_SOCKET__GETATTR 0x00000010UL -#define NETLINK_DNRT_SOCKET__SETATTR 0x00000020UL -#define NETLINK_DNRT_SOCKET__LOCK 0x00000040UL -#define NETLINK_DNRT_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_DNRT_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_DNRT_SOCKET__APPEND 0x00000200UL -#define NETLINK_DNRT_SOCKET__BIND 0x00000400UL -#define NETLINK_DNRT_SOCKET__CONNECT 0x00000800UL -#define NETLINK_DNRT_SOCKET__LISTEN 0x00001000UL -#define NETLINK_DNRT_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_DNRT_SOCKET__GETOPT 0x00004000UL -#define NETLINK_DNRT_SOCKET__SETOPT 0x00008000UL -#define NETLINK_DNRT_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_DNRT_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_DNRT_SOCKET__SENDTO 0x00040000UL -#define NETLINK_DNRT_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_DNRT_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_DNRT_SOCKET__NAME_BIND 0x00200000UL -#define ASSOCIATION__SENDTO 0x00000001UL -#define ASSOCIATION__RECVFROM 0x00000002UL -#define ASSOCIATION__SETCONTEXT 0x00000004UL -#define ASSOCIATION__POLMATCH 0x00000008UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__IOCTL 0x00000001UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__READ 0x00000002UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__WRITE 0x00000004UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__CREATE 0x00000008UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__GETATTR 0x00000010UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__SETATTR 0x00000020UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__LOCK 0x00000040UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__RELABELFROM 0x00000080UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__RELABELTO 0x00000100UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__APPEND 0x00000200UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__BIND 0x00000400UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__CONNECT 0x00000800UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__LISTEN 0x00001000UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__ACCEPT 0x00002000UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__GETOPT 0x00004000UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__SETOPT 0x00008000UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__SHUTDOWN 0x00010000UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__RECVFROM 0x00020000UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__SENDTO 0x00040000UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__RECV_MSG 0x00080000UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__SEND_MSG 0x00100000UL -#define NETLINK_KOBJECT_UEVENT_SOCKET__NAME_BIND 0x00200000UL -#define APPLETALK_SOCKET__IOCTL 0x00000001UL -#define APPLETALK_SOCKET__READ 0x00000002UL -#define APPLETALK_SOCKET__WRITE 0x00000004UL -#define APPLETALK_SOCKET__CREATE 0x00000008UL -#define APPLETALK_SOCKET__GETATTR 0x00000010UL -#define APPLETALK_SOCKET__SETATTR 0x00000020UL -#define APPLETALK_SOCKET__LOCK 0x00000040UL -#define APPLETALK_SOCKET__RELABELFROM 0x00000080UL -#define APPLETALK_SOCKET__RELABELTO 0x00000100UL -#define APPLETALK_SOCKET__APPEND 0x00000200UL -#define APPLETALK_SOCKET__BIND 0x00000400UL -#define APPLETALK_SOCKET__CONNECT 0x00000800UL -#define APPLETALK_SOCKET__LISTEN 0x00001000UL -#define APPLETALK_SOCKET__ACCEPT 0x00002000UL -#define APPLETALK_SOCKET__GETOPT 0x00004000UL -#define APPLETALK_SOCKET__SETOPT 0x00008000UL -#define APPLETALK_SOCKET__SHUTDOWN 0x00010000UL -#define APPLETALK_SOCKET__RECVFROM 0x00020000UL -#define APPLETALK_SOCKET__SENDTO 0x00040000UL -#define APPLETALK_SOCKET__RECV_MSG 0x00080000UL -#define APPLETALK_SOCKET__SEND_MSG 0x00100000UL -#define APPLETALK_SOCKET__NAME_BIND 0x00200000UL -#define PACKET__SEND 0x00000001UL -#define PACKET__RECV 0x00000002UL -#define PACKET__RELABELTO 0x00000004UL -#define PACKET__FLOW_IN 0x00000008UL -#define PACKET__FLOW_OUT 0x00000010UL -#define PACKET__FORWARD_IN 0x00000020UL -#define PACKET__FORWARD_OUT 0x00000040UL -#define KEY__VIEW 0x00000001UL -#define KEY__READ 0x00000002UL -#define KEY__WRITE 0x00000004UL -#define KEY__SEARCH 0x00000008UL -#define KEY__LINK 0x00000010UL -#define KEY__SETATTR 0x00000020UL -#define KEY__CREATE 0x00000040UL -#define DCCP_SOCKET__IOCTL 0x00000001UL -#define DCCP_SOCKET__READ 0x00000002UL -#define DCCP_SOCKET__WRITE 0x00000004UL -#define DCCP_SOCKET__CREATE 0x00000008UL -#define DCCP_SOCKET__GETATTR 0x00000010UL -#define DCCP_SOCKET__SETATTR 0x00000020UL -#define DCCP_SOCKET__LOCK 0x00000040UL -#define DCCP_SOCKET__RELABELFROM 0x00000080UL -#define DCCP_SOCKET__RELABELTO 0x00000100UL -#define DCCP_SOCKET__APPEND 0x00000200UL -#define DCCP_SOCKET__BIND 0x00000400UL -#define DCCP_SOCKET__CONNECT 0x00000800UL -#define DCCP_SOCKET__LISTEN 0x00001000UL -#define DCCP_SOCKET__ACCEPT 0x00002000UL -#define DCCP_SOCKET__GETOPT 0x00004000UL -#define DCCP_SOCKET__SETOPT 0x00008000UL -#define DCCP_SOCKET__SHUTDOWN 0x00010000UL -#define DCCP_SOCKET__RECVFROM 0x00020000UL -#define DCCP_SOCKET__SENDTO 0x00040000UL -#define DCCP_SOCKET__RECV_MSG 0x00080000UL -#define DCCP_SOCKET__SEND_MSG 0x00100000UL -#define DCCP_SOCKET__NAME_BIND 0x00200000UL -#define DCCP_SOCKET__NODE_BIND 0x00400000UL -#define DCCP_SOCKET__NAME_CONNECT 0x00800000UL -#define MEMPROTECT__MMAP_ZERO 0x00000001UL -#define PEER__RECV 0x00000001UL -#define KERNEL_SERVICE__USE_AS_OVERRIDE 0x00000001UL -#define KERNEL_SERVICE__CREATE_FILES_AS 0x00000002UL diff --git a/security/selinux/include/avc_ss.h b/security/selinux/include/avc_ss.h index bb1ec801bdf..4677aa519b0 100644 --- a/security/selinux/include/avc_ss.h +++ b/security/selinux/include/avc_ss.h @@ -10,26 +10,13 @@ int avc_ss_reset(u32 seqno); -struct av_perm_to_string { - u16 tclass; - u32 value; +/* Class/perm mapping support */ +struct security_class_mapping { const char *name; + const char *perms[sizeof(u32) * 8 + 1]; }; -struct av_inherit { - const char **common_pts; - u32 common_base; - u16 tclass; -}; - -struct selinux_class_perm { - const struct av_perm_to_string *av_perm_to_string; - u32 av_pts_len; - u32 cts_len; - const char **class_to_string; - const struct av_inherit *av_inherit; - u32 av_inherit_len; -}; +extern struct security_class_mapping secclass_map[]; #endif /* _SELINUX_AVC_SS_H_ */ diff --git a/security/selinux/include/class_to_string.h b/security/selinux/include/class_to_string.h deleted file mode 100644 index 7ab9299bfb6..00000000000 --- a/security/selinux/include/class_to_string.h +++ /dev/null @@ -1,80 +0,0 @@ -/* This file is automatically generated. Do not edit. */ -/* - * Security object class definitions - */ - S_(NULL) - S_("security") - S_("process") - S_("system") - S_("capability") - S_("filesystem") - S_("file") - S_("dir") - S_("fd") - S_("lnk_file") - S_("chr_file") - S_("blk_file") - S_("sock_file") - S_("fifo_file") - S_("socket") - S_("tcp_socket") - S_("udp_socket") - S_("rawip_socket") - S_("node") - S_("netif") - S_("netlink_socket") - S_("packet_socket") - S_("key_socket") - S_("unix_stream_socket") - S_("unix_dgram_socket") - S_("sem") - S_("msg") - S_("msgq") - S_("shm") - S_("ipc") - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_("netlink_route_socket") - S_("netlink_firewall_socket") - S_("netlink_tcpdiag_socket") - S_("netlink_nflog_socket") - S_("netlink_xfrm_socket") - S_("netlink_selinux_socket") - S_("netlink_audit_socket") - S_("netlink_ip6fw_socket") - S_("netlink_dnrt_socket") - S_(NULL) - S_(NULL) - S_("association") - S_("netlink_kobject_uevent_socket") - S_("appletalk_socket") - S_("packet") - S_("key") - S_(NULL) - S_("dccp_socket") - S_("memprotect") - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_("peer") - S_("capability2") - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_("kernel_service") - S_("tun_socket") diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h new file mode 100644 index 00000000000..8b32e959bb2 --- /dev/null +++ b/security/selinux/include/classmap.h @@ -0,0 +1,150 @@ +#define COMMON_FILE_SOCK_PERMS "ioctl", "read", "write", "create", \ + "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append" + +#define COMMON_FILE_PERMS COMMON_FILE_SOCK_PERMS, "unlink", "link", \ + "rename", "execute", "swapon", "quotaon", "mounton" + +#define COMMON_SOCK_PERMS COMMON_FILE_SOCK_PERMS, "bind", "connect", \ + "listen", "accept", "getopt", "setopt", "shutdown", "recvfrom", \ + "sendto", "recv_msg", "send_msg", "name_bind" + +#define COMMON_IPC_PERMS "create", "destroy", "getattr", "setattr", "read", \ + "write", "associate", "unix_read", "unix_write" + +struct security_class_mapping secclass_map[] = { + { "security", + { "compute_av", "compute_create", "compute_member", + "check_context", "load_policy", "compute_relabel", + "compute_user", "setenforce", "setbool", "setsecparam", + "setcheckreqprot", NULL } }, + { "process", + { "fork", "transition", "sigchld", "sigkill", + "sigstop", "signull", "signal", "ptrace", "getsched", "setsched", + "getsession", "getpgid", "setpgid", "getcap", "setcap", "share", + "getattr", "setexec", "setfscreate", "noatsecure", "siginh", + "setrlimit", "rlimitinh", "dyntransition", "setcurrent", + "execmem", "execstack", "execheap", "setkeycreate", + "setsockcreate", NULL } }, + { "system", + { "ipc_info", "syslog_read", "syslog_mod", + "syslog_console", "module_request", NULL } }, + { "capability", + { "chown", "dac_override", "dac_read_search", + "fowner", "fsetid", "kill", "setgid", "setuid", "setpcap", + "linux_immutable", "net_bind_service", "net_broadcast", + "net_admin", "net_raw", "ipc_lock", "ipc_owner", "sys_module", + "sys_rawio", "sys_chroot", "sys_ptrace", "sys_pacct", "sys_admin", + "sys_boot", "sys_nice", "sys_resource", "sys_time", + "sys_tty_config", "mknod", "lease", "audit_write", + "audit_control", "setfcap", NULL } }, + { "filesystem", + { "mount", "remount", "unmount", "getattr", + "relabelfrom", "relabelto", "transition", "associate", "quotamod", + "quotaget", NULL } }, + { "file", + { COMMON_FILE_PERMS, + "execute_no_trans", "entrypoint", "execmod", "open", NULL } }, + { "dir", + { COMMON_FILE_PERMS, "add_name", "remove_name", + "reparent", "search", "rmdir", "open", NULL } }, + { "fd", { "use", NULL } }, + { "lnk_file", + { COMMON_FILE_PERMS, NULL } }, + { "chr_file", + { COMMON_FILE_PERMS, + "execute_no_trans", "entrypoint", "execmod", "open", NULL } }, + { "blk_file", + { COMMON_FILE_PERMS, "open", NULL } }, + { "sock_file", + { COMMON_FILE_PERMS, "open", NULL } }, + { "fifo_file", + { COMMON_FILE_PERMS, "open", NULL } }, + { "socket", + { COMMON_SOCK_PERMS, NULL } }, + { "tcp_socket", + { COMMON_SOCK_PERMS, + "connectto", "newconn", "acceptfrom", "node_bind", "name_connect", + NULL } }, + { "udp_socket", + { COMMON_SOCK_PERMS, + "node_bind", NULL } }, + { "rawip_socket", + { COMMON_SOCK_PERMS, + "node_bind", NULL } }, + { "node", + { "tcp_recv", "tcp_send", "udp_recv", "udp_send", + "rawip_recv", "rawip_send", "enforce_dest", + "dccp_recv", "dccp_send", "recvfrom", "sendto", NULL } }, + { "netif", + { "tcp_recv", "tcp_send", "udp_recv", "udp_send", + "rawip_recv", "rawip_send", "dccp_recv", "dccp_send", + "ingress", "egress", NULL } }, + { "netlink_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "packet_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "key_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "unix_stream_socket", + { COMMON_SOCK_PERMS, "connectto", "newconn", "acceptfrom", NULL + } }, + { "unix_dgram_socket", + { COMMON_SOCK_PERMS, NULL + } }, + { "sem", + { COMMON_IPC_PERMS, NULL } }, + { "msg", { "send", "receive", NULL } }, + { "msgq", + { COMMON_IPC_PERMS, "enqueue", NULL } }, + { "shm", + { COMMON_IPC_PERMS, "lock", NULL } }, + { "ipc", + { COMMON_IPC_PERMS, NULL } }, + { "netlink_route_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_firewall_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_tcpdiag_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_nflog_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "netlink_xfrm_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_selinux_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "netlink_audit_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", "nlmsg_relay", "nlmsg_readpriv", + "nlmsg_tty_audit", NULL } }, + { "netlink_ip6fw_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_dnrt_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "association", + { "sendto", "recvfrom", "setcontext", "polmatch", NULL } }, + { "netlink_kobject_uevent_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "appletalk_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "packet", + { "send", "recv", "relabelto", "flow_in", "flow_out", + "forward_in", "forward_out", NULL } }, + { "key", + { "view", "read", "write", "search", "link", "setattr", "create", + NULL } }, + { "dccp_socket", + { COMMON_SOCK_PERMS, + "node_bind", "name_connect", NULL } }, + { "memprotect", { "mmap_zero", NULL } }, + { "peer", { "recv", NULL } }, + { "capability2", { "mac_override", "mac_admin", NULL } }, + { "kernel_service", { "use_as_override", "create_files_as", NULL } }, + { "tun_socket", + { COMMON_SOCK_PERMS, NULL } }, + { NULL } + }; diff --git a/security/selinux/include/common_perm_to_string.h b/security/selinux/include/common_perm_to_string.h deleted file mode 100644 index ce5b6e2fe9d..00000000000 --- a/security/selinux/include/common_perm_to_string.h +++ /dev/null @@ -1,58 +0,0 @@ -/* This file is automatically generated. Do not edit. */ -TB_(common_file_perm_to_string) - S_("ioctl") - S_("read") - S_("write") - S_("create") - S_("getattr") - S_("setattr") - S_("lock") - S_("relabelfrom") - S_("relabelto") - S_("append") - S_("unlink") - S_("link") - S_("rename") - S_("execute") - S_("swapon") - S_("quotaon") - S_("mounton") -TE_(common_file_perm_to_string) - -TB_(common_socket_perm_to_string) - S_("ioctl") - S_("read") - S_("write") - S_("create") - S_("getattr") - S_("setattr") - S_("lock") - S_("relabelfrom") - S_("relabelto") - S_("append") - S_("bind") - S_("connect") - S_("listen") - S_("accept") - S_("getopt") - S_("setopt") - S_("shutdown") - S_("recvfrom") - S_("sendto") - S_("recv_msg") - S_("send_msg") - S_("name_bind") -TE_(common_socket_perm_to_string) - -TB_(common_ipc_perm_to_string) - S_("create") - S_("destroy") - S_("getattr") - S_("setattr") - S_("read") - S_("write") - S_("associate") - S_("unix_read") - S_("unix_write") -TE_(common_ipc_perm_to_string) - diff --git a/security/selinux/include/flask.h b/security/selinux/include/flask.h deleted file mode 100644 index f248500a1e3..00000000000 --- a/security/selinux/include/flask.h +++ /dev/null @@ -1,91 +0,0 @@ -/* This file is automatically generated. Do not edit. */ -#ifndef _SELINUX_FLASK_H_ -#define _SELINUX_FLASK_H_ - -/* - * Security object class definitions - */ -#define SECCLASS_SECURITY 1 -#define SECCLASS_PROCESS 2 -#define SECCLASS_SYSTEM 3 -#define SECCLASS_CAPABILITY 4 -#define SECCLASS_FILESYSTEM 5 -#define SECCLASS_FILE 6 -#define SECCLASS_DIR 7 -#define SECCLASS_FD 8 -#define SECCLASS_LNK_FILE 9 -#define SECCLASS_CHR_FILE 10 -#define SECCLASS_BLK_FILE 11 -#define SECCLASS_SOCK_FILE 12 -#define SECCLASS_FIFO_FILE 13 -#define SECCLASS_SOCKET 14 -#define SECCLASS_TCP_SOCKET 15 -#define SECCLASS_UDP_SOCKET 16 -#define SECCLASS_RAWIP_SOCKET 17 -#define SECCLASS_NODE 18 -#define SECCLASS_NETIF 19 -#define SECCLASS_NETLINK_SOCKET 20 -#define SECCLASS_PACKET_SOCKET 21 -#define SECCLASS_KEY_SOCKET 22 -#define SECCLASS_UNIX_STREAM_SOCKET 23 -#define SECCLASS_UNIX_DGRAM_SOCKET 24 -#define SECCLASS_SEM 25 -#define SECCLASS_MSG 26 -#define SECCLASS_MSGQ 27 -#define SECCLASS_SHM 28 -#define SECCLASS_IPC 29 -#define SECCLASS_NETLINK_ROUTE_SOCKET 43 -#define SECCLASS_NETLINK_FIREWALL_SOCKET 44 -#define SECCLASS_NETLINK_TCPDIAG_SOCKET 45 -#define SECCLASS_NETLINK_NFLOG_SOCKET 46 -#define SECCLASS_NETLINK_XFRM_SOCKET 47 -#define SECCLASS_NETLINK_SELINUX_SOCKET 48 -#define SECCLASS_NETLINK_AUDIT_SOCKET 49 -#define SECCLASS_NETLINK_IP6FW_SOCKET 50 -#define SECCLASS_NETLINK_DNRT_SOCKET 51 -#define SECCLASS_ASSOCIATION 54 -#define SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET 55 -#define SECCLASS_APPLETALK_SOCKET 56 -#define SECCLASS_PACKET 57 -#define SECCLASS_KEY 58 -#define SECCLASS_DCCP_SOCKET 60 -#define SECCLASS_MEMPROTECT 61 -#define SECCLASS_PEER 68 -#define SECCLASS_CAPABILITY2 69 -#define SECCLASS_KERNEL_SERVICE 74 -#define SECCLASS_TUN_SOCKET 75 - -/* - * Security identifier indices for initial entities - */ -#define SECINITSID_KERNEL 1 -#define SECINITSID_SECURITY 2 -#define SECINITSID_UNLABELED 3 -#define SECINITSID_FS 4 -#define SECINITSID_FILE 5 -#define SECINITSID_FILE_LABELS 6 -#define SECINITSID_INIT 7 -#define SECINITSID_ANY_SOCKET 8 -#define SECINITSID_PORT 9 -#define SECINITSID_NETIF 10 -#define SECINITSID_NETMSG 11 -#define SECINITSID_NODE 12 -#define SECINITSID_IGMP_PACKET 13 -#define SECINITSID_ICMP_SOCKET 14 -#define SECINITSID_TCP_SOCKET 15 -#define SECINITSID_SYSCTL_MODPROBE 16 -#define SECINITSID_SYSCTL 17 -#define SECINITSID_SYSCTL_FS 18 -#define SECINITSID_SYSCTL_KERNEL 19 -#define SECINITSID_SYSCTL_NET 20 -#define SECINITSID_SYSCTL_NET_UNIX 21 -#define SECINITSID_SYSCTL_VM 22 -#define SECINITSID_SYSCTL_DEV 23 -#define SECINITSID_KMOD 24 -#define SECINITSID_POLICY 25 -#define SECINITSID_SCMP_PACKET 26 -#define SECINITSID_DEVNULL 27 - -#define SECINITSID_NUM 27 - -#endif diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index ca835795a8b..2553266ad79 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -97,11 +97,18 @@ struct av_decision { #define AVD_FLAGS_PERMISSIVE 0x0001 int security_compute_av(u32 ssid, u32 tsid, - u16 tclass, u32 requested, - struct av_decision *avd); + u16 tclass, u32 requested, + struct av_decision *avd); + +int security_compute_av_user(u32 ssid, u32 tsid, + u16 tclass, u32 requested, + struct av_decision *avd); int security_transition_sid(u32 ssid, u32 tsid, - u16 tclass, u32 *out_sid); + u16 tclass, u32 *out_sid); + +int security_transition_sid_user(u32 ssid, u32 tsid, + u16 tclass, u32 *out_sid); int security_member_sid(u32 ssid, u32 tsid, u16 tclass, u32 *out_sid); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index b4fc506e7a8..fab36fdf276 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -522,7 +522,7 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) if (length < 0) goto out2; - length = security_compute_av(ssid, tsid, tclass, req, &avd); + length = security_compute_av_user(ssid, tsid, tclass, req, &avd); if (length < 0) goto out2; @@ -571,7 +571,7 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) if (length < 0) goto out2; - length = security_transition_sid(ssid, tsid, tclass, &newsid); + length = security_transition_sid_user(ssid, tsid, tclass, &newsid); if (length < 0) goto out2; diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile index bad78779b9b..15d4e62917d 100644 --- a/security/selinux/ss/Makefile +++ b/security/selinux/ss/Makefile @@ -2,7 +2,7 @@ # Makefile for building the SELinux security server as part of the kernel tree. # -EXTRA_CFLAGS += -Isecurity/selinux/include +EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include obj-y := ss.o ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index b5407f16c2a..3f2b2706b5b 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -532,7 +532,7 @@ int mls_compute_sid(struct context *scontext, } /* Fallthrough */ case AVTAB_CHANGE: - if (tclass == SECCLASS_PROCESS) + if (tclass == policydb.process_class) /* Use the process MLS attributes. */ return mls_context_cpy(newcontext, scontext); else diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 72e4a54973a..f03667213ea 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -713,7 +713,6 @@ void policydb_destroy(struct policydb *p) ebitmap_destroy(&p->type_attr_map[i]); } kfree(p->type_attr_map); - kfree(p->undefined_perms); ebitmap_destroy(&p->policycaps); ebitmap_destroy(&p->permissive_map); @@ -1640,6 +1639,40 @@ static int policydb_bounds_sanity_check(struct policydb *p) extern int ss_initialized; +u16 string_to_security_class(struct policydb *p, const char *name) +{ + struct class_datum *cladatum; + + cladatum = hashtab_search(p->p_classes.table, name); + if (!cladatum) + return 0; + + return cladatum->value; +} + +u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name) +{ + struct class_datum *cladatum; + struct perm_datum *perdatum = NULL; + struct common_datum *comdatum; + + if (!tclass || tclass > p->p_classes.nprim) + return 0; + + cladatum = p->class_val_to_struct[tclass-1]; + comdatum = cladatum->comdatum; + if (comdatum) + perdatum = hashtab_search(comdatum->permissions.table, + name); + if (!perdatum) + perdatum = hashtab_search(cladatum->permissions.table, + name); + if (!perdatum) + return 0; + + return 1U << (perdatum->value-1); +} + /* * Read the configuration data from a policy database binary * representation file into a policy database structure. @@ -1861,6 +1894,16 @@ int policydb_read(struct policydb *p, void *fp) if (rc) goto bad; + p->process_class = string_to_security_class(p, "process"); + if (!p->process_class) + goto bad; + p->process_trans_perms = string_to_av_perm(p, p->process_class, + "transition"); + p->process_trans_perms |= string_to_av_perm(p, p->process_class, + "dyntransition"); + if (!p->process_trans_perms) + goto bad; + for (i = 0; i < info->ocon_num; i++) { rc = next_entry(buf, fp, sizeof(u32)); if (rc < 0) @@ -2101,7 +2144,7 @@ int policydb_read(struct policydb *p, void *fp) goto bad; rt->target_class = le32_to_cpu(buf[0]); } else - rt->target_class = SECCLASS_PROCESS; + rt->target_class = p->process_class; if (!policydb_type_isvalid(p, rt->source_type) || !policydb_type_isvalid(p, rt->target_type) || !policydb_class_isvalid(p, rt->target_class)) { diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index 55152d498b5..cdcc5700946 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h @@ -254,7 +254,9 @@ struct policydb { unsigned int reject_unknown : 1; unsigned int allow_unknown : 1; - u32 *undefined_perms; + + u16 process_class; + u32 process_trans_perms; }; extern void policydb_destroy(struct policydb *p); @@ -295,5 +297,8 @@ static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes) return 0; } +extern u16 string_to_security_class(struct policydb *p, const char *name); +extern u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name); + #endif /* _SS_POLICYDB_H_ */ diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index ff17820d35e..d6bb20cbad6 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -65,16 +65,10 @@ #include "audit.h" extern void selnl_notify_policyload(u32 seqno); -unsigned int policydb_loaded_version; int selinux_policycap_netpeer; int selinux_policycap_openperm; -/* - * This is declared in avc.c - */ -extern const struct selinux_class_perm selinux_class_perm; - static DEFINE_RWLOCK(policy_rwlock); static struct sidtab sidtab; @@ -98,6 +92,165 @@ static int context_struct_compute_av(struct context *scontext, u16 tclass, u32 requested, struct av_decision *avd); + +struct selinux_mapping { + u16 value; /* policy value */ + unsigned num_perms; + u32 perms[sizeof(u32) * 8]; +}; + +static struct selinux_mapping *current_mapping; +static u16 current_mapping_size; + +static int selinux_set_mapping(struct policydb *pol, + struct security_class_mapping *map, + struct selinux_mapping **out_map_p, + u16 *out_map_size) +{ + struct selinux_mapping *out_map = NULL; + size_t size = sizeof(struct selinux_mapping); + u16 i, j; + unsigned k; + bool print_unknown_handle = false; + + /* Find number of classes in the input mapping */ + if (!map) + return -EINVAL; + i = 0; + while (map[i].name) + i++; + + /* Allocate space for the class records, plus one for class zero */ + out_map = kcalloc(++i, size, GFP_ATOMIC); + if (!out_map) + return -ENOMEM; + + /* Store the raw class and permission values */ + j = 0; + while (map[j].name) { + struct security_class_mapping *p_in = map + (j++); + struct selinux_mapping *p_out = out_map + j; + + /* An empty class string skips ahead */ + if (!strcmp(p_in->name, "")) { + p_out->num_perms = 0; + continue; + } + + p_out->value = string_to_security_class(pol, p_in->name); + if (!p_out->value) { + printk(KERN_INFO + "SELinux: Class %s not defined in policy.\n", + p_in->name); + if (pol->reject_unknown) + goto err; + p_out->num_perms = 0; + print_unknown_handle = true; + continue; + } + + k = 0; + while (p_in->perms && p_in->perms[k]) { + /* An empty permission string skips ahead */ + if (!*p_in->perms[k]) { + k++; + continue; + } + p_out->perms[k] = string_to_av_perm(pol, p_out->value, + p_in->perms[k]); + if (!p_out->perms[k]) { + printk(KERN_INFO + "SELinux: Permission %s in class %s not defined in policy.\n", + p_in->perms[k], p_in->name); + if (pol->reject_unknown) + goto err; + print_unknown_handle = true; + } + + k++; + } + p_out->num_perms = k; + } + + if (print_unknown_handle) + printk(KERN_INFO "SELinux: the above unknown classes and permissions will be %s\n", + pol->allow_unknown ? "allowed" : "denied"); + + *out_map_p = out_map; + *out_map_size = i; + return 0; +err: + kfree(out_map); + return -EINVAL; +} + +/* + * Get real, policy values from mapped values + */ + +static u16 unmap_class(u16 tclass) +{ + if (tclass < current_mapping_size) + return current_mapping[tclass].value; + + return tclass; +} + +static u32 unmap_perm(u16 tclass, u32 tperm) +{ + if (tclass < current_mapping_size) { + unsigned i; + u32 kperm = 0; + + for (i = 0; i < current_mapping[tclass].num_perms; i++) + if (tperm & (1<<i)) { + kperm |= current_mapping[tclass].perms[i]; + tperm &= ~(1<<i); + } + return kperm; + } + + return tperm; +} + +static void map_decision(u16 tclass, struct av_decision *avd, + int allow_unknown) +{ + if (tclass < current_mapping_size) { + unsigned i, n = current_mapping[tclass].num_perms; + u32 result; + + for (i = 0, result = 0; i < n; i++) { + if (avd->allowed & current_mapping[tclass].perms[i]) + result |= 1<<i; + if (allow_unknown && !current_mapping[tclass].perms[i]) + result |= 1<<i; + } + avd->allowed = result; + + for (i = 0, result = 0; i < n; i++) + if (avd->auditallow & current_mapping[tclass].perms[i]) + result |= 1<<i; + avd->auditallow = result; + + for (i = 0, result = 0; i < n; i++) { + if (avd->auditdeny & current_mapping[tclass].perms[i]) + result |= 1<<i; + if (!allow_unknown && !current_mapping[tclass].perms[i]) + result |= 1<<i; + } + /* + * In case the kernel has a bug and requests a permission + * between num_perms and the maximum permission number, we + * should audit that denial + */ + for (; i < (sizeof(u32)*8); i++) + result |= 1<<i; + avd->auditdeny = result; + } +} + + /* * Return the boolean value of a constraint expression * when it is applied to the specified source and target @@ -467,21 +620,9 @@ static int context_struct_compute_av(struct context *scontext, struct class_datum *tclass_datum; struct ebitmap *sattr, *tattr; struct ebitmap_node *snode, *tnode; - const struct selinux_class_perm *kdefs = &selinux_class_perm; unsigned int i, j; /* - * Remap extended Netlink classes for old policy versions. - * Do this here rather than socket_type_to_security_class() - * in case a newer policy version is loaded, allowing sockets - * to remain in the correct class. - */ - if (policydb_loaded_version < POLICYDB_VERSION_NLCLASS) - if (tclass >= SECCLASS_NETLINK_ROUTE_SOCKET && - tclass <= SECCLASS_NETLINK_DNRT_SOCKET) - tclass = SECCLASS_NETLINK_SOCKET; - - /* * Initialize the access vectors to the default values. */ avd->allowed = 0; @@ -490,33 +631,11 @@ static int context_struct_compute_av(struct context *scontext, avd->seqno = latest_granting; avd->flags = 0; - /* - * Check for all the invalid cases. - * - tclass 0 - * - tclass > policy and > kernel - * - tclass > policy but is a userspace class - * - tclass > policy but we do not allow unknowns - */ - if (unlikely(!tclass)) - goto inval_class; - if (unlikely(tclass > policydb.p_classes.nprim)) - if (tclass > kdefs->cts_len || - !kdefs->class_to_string[tclass] || - !policydb.allow_unknown) - goto inval_class; - - /* - * Kernel class and we allow unknown so pad the allow decision - * the pad will be all 1 for unknown classes. - */ - if (tclass <= kdefs->cts_len && policydb.allow_unknown) - avd->allowed = policydb.undefined_perms[tclass - 1]; - - /* - * Not in policy. Since decision is completed (all 1 or all 0) return. - */ - if (unlikely(tclass > policydb.p_classes.nprim)) - return 0; + if (unlikely(!tclass || tclass > policydb.p_classes.nprim)) { + if (printk_ratelimit()) + printk(KERN_WARNING "SELinux: Invalid class %hu\n", tclass); + return -EINVAL; + } tclass_datum = policydb.class_val_to_struct[tclass - 1]; @@ -568,8 +687,8 @@ static int context_struct_compute_av(struct context *scontext, * role is changing, then check the (current_role, new_role) * pair. */ - if (tclass == SECCLASS_PROCESS && - (avd->allowed & (PROCESS__TRANSITION | PROCESS__DYNTRANSITION)) && + if (tclass == policydb.process_class && + (avd->allowed & policydb.process_trans_perms) && scontext->role != tcontext->role) { for (ra = policydb.role_allow; ra; ra = ra->next) { if (scontext->role == ra->role && @@ -577,8 +696,7 @@ static int context_struct_compute_av(struct context *scontext, break; } if (!ra) - avd->allowed &= ~(PROCESS__TRANSITION | - PROCESS__DYNTRANSITION); + avd->allowed &= ~policydb.process_trans_perms; } /* @@ -590,21 +708,6 @@ static int context_struct_compute_av(struct context *scontext, tclass, requested, avd); return 0; - -inval_class: - if (!tclass || tclass > kdefs->cts_len || - !kdefs->class_to_string[tclass]) { - if (printk_ratelimit()) - printk(KERN_ERR "SELinux: %s: unrecognized class %d\n", - __func__, tclass); - return -EINVAL; - } - - /* - * Known to the kernel, but not to the policy. - * Handle as a denial (allowed is 0). - */ - return 0; } static int security_validtrans_handle_fail(struct context *ocontext, @@ -636,13 +739,14 @@ out: } int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, - u16 tclass) + u16 orig_tclass) { struct context *ocontext; struct context *ncontext; struct context *tcontext; struct class_datum *tclass_datum; struct constraint_node *constraint; + u16 tclass; int rc = 0; if (!ss_initialized) @@ -650,16 +754,7 @@ int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, read_lock(&policy_rwlock); - /* - * Remap extended Netlink classes for old policy versions. - * Do this here rather than socket_type_to_security_class() - * in case a newer policy version is loaded, allowing sockets - * to remain in the correct class. - */ - if (policydb_loaded_version < POLICYDB_VERSION_NLCLASS) - if (tclass >= SECCLASS_NETLINK_ROUTE_SOCKET && - tclass <= SECCLASS_NETLINK_DNRT_SOCKET) - tclass = SECCLASS_NETLINK_SOCKET; + tclass = unmap_class(orig_tclass); if (!tclass || tclass > policydb.p_classes.nprim) { printk(KERN_ERR "SELinux: %s: unrecognized class %d\n", @@ -792,6 +887,38 @@ out: } +static int security_compute_av_core(u32 ssid, + u32 tsid, + u16 tclass, + u32 requested, + struct av_decision *avd) +{ + struct context *scontext = NULL, *tcontext = NULL; + int rc = 0; + + scontext = sidtab_search(&sidtab, ssid); + if (!scontext) { + printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + __func__, ssid); + return -EINVAL; + } + tcontext = sidtab_search(&sidtab, tsid); + if (!tcontext) { + printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + __func__, tsid); + return -EINVAL; + } + + rc = context_struct_compute_av(scontext, tcontext, tclass, + requested, avd); + + /* permissive domain? */ + if (ebitmap_get_bit(&policydb.permissive_map, scontext->type)) + avd->flags |= AVD_FLAGS_PERMISSIVE; + + return rc; +} + /** * security_compute_av - Compute access vector decisions. * @ssid: source security identifier @@ -807,12 +934,49 @@ out: */ int security_compute_av(u32 ssid, u32 tsid, - u16 tclass, - u32 requested, + u16 orig_tclass, + u32 orig_requested, struct av_decision *avd) { - struct context *scontext = NULL, *tcontext = NULL; - int rc = 0; + u16 tclass; + u32 requested; + int rc; + + read_lock(&policy_rwlock); + + if (!ss_initialized) + goto allow; + + requested = unmap_perm(orig_tclass, orig_requested); + tclass = unmap_class(orig_tclass); + if (unlikely(orig_tclass && !tclass)) { + if (policydb.allow_unknown) + goto allow; + rc = -EINVAL; + goto out; + } + rc = security_compute_av_core(ssid, tsid, tclass, requested, avd); + map_decision(orig_tclass, avd, policydb.allow_unknown); +out: + read_unlock(&policy_rwlock); + return rc; +allow: + avd->allowed = 0xffffffff; + avd->auditallow = 0; + avd->auditdeny = 0xffffffff; + avd->seqno = latest_granting; + avd->flags = 0; + rc = 0; + goto out; +} + +int security_compute_av_user(u32 ssid, + u32 tsid, + u16 tclass, + u32 requested, + struct av_decision *avd) +{ + int rc; if (!ss_initialized) { avd->allowed = 0xffffffff; @@ -823,29 +987,7 @@ int security_compute_av(u32 ssid, } read_lock(&policy_rwlock); - - scontext = sidtab_search(&sidtab, ssid); - if (!scontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", - __func__, ssid); - rc = -EINVAL; - goto out; - } - tcontext = sidtab_search(&sidtab, tsid); - if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", - __func__, tsid); - rc = -EINVAL; - goto out; - } - - rc = context_struct_compute_av(scontext, tcontext, tclass, - requested, avd); - - /* permissive domain? */ - if (ebitmap_get_bit(&policydb.permissive_map, scontext->type)) - avd->flags |= AVD_FLAGS_PERMISSIVE; -out: + rc = security_compute_av_core(ssid, tsid, tclass, requested, avd); read_unlock(&policy_rwlock); return rc; } @@ -1204,20 +1346,22 @@ out: static int security_compute_sid(u32 ssid, u32 tsid, - u16 tclass, + u16 orig_tclass, u32 specified, - u32 *out_sid) + u32 *out_sid, + bool kern) { struct context *scontext = NULL, *tcontext = NULL, newcontext; struct role_trans *roletr = NULL; struct avtab_key avkey; struct avtab_datum *avdatum; struct avtab_node *node; + u16 tclass; int rc = 0; if (!ss_initialized) { - switch (tclass) { - case SECCLASS_PROCESS: + switch (orig_tclass) { + case SECCLASS_PROCESS: /* kernel value */ *out_sid = ssid; break; default: @@ -1231,6 +1375,11 @@ static int security_compute_sid(u32 ssid, read_lock(&policy_rwlock); + if (kern) + tclass = unmap_class(orig_tclass); + else + tclass = orig_tclass; + scontext = sidtab_search(&sidtab, ssid); if (!scontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", @@ -1260,13 +1409,11 @@ static int security_compute_sid(u32 ssid, } /* Set the role and type to default values. */ - switch (tclass) { - case SECCLASS_PROCESS: + if (tclass == policydb.process_class) { /* Use the current role and type of process. */ newcontext.role = scontext->role; newcontext.type = scontext->type; - break; - default: + } else { /* Use the well-defined object role. */ newcontext.role = OBJECT_R_VAL; /* Use the type of the related object. */ @@ -1297,8 +1444,7 @@ static int security_compute_sid(u32 ssid, } /* Check for class-specific changes. */ - switch (tclass) { - case SECCLASS_PROCESS: + if (tclass == policydb.process_class) { if (specified & AVTAB_TRANSITION) { /* Look for a role transition rule. */ for (roletr = policydb.role_tr; roletr; @@ -1311,9 +1457,6 @@ static int security_compute_sid(u32 ssid, } } } - break; - default: - break; } /* Set the MLS attributes. @@ -1358,7 +1501,17 @@ int security_transition_sid(u32 ssid, u16 tclass, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, out_sid); + return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, + out_sid, true); +} + +int security_transition_sid_user(u32 ssid, + u32 tsid, + u16 tclass, + u32 *out_sid) +{ + return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, + out_sid, false); } /** @@ -1379,7 +1532,8 @@ int security_member_sid(u32 ssid, u16 tclass, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_MEMBER, out_sid); + return security_compute_sid(ssid, tsid, tclass, AVTAB_MEMBER, out_sid, + false); } /** @@ -1400,144 +1554,8 @@ int security_change_sid(u32 ssid, u16 tclass, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_CHANGE, out_sid); -} - -/* - * Verify that each kernel class that is defined in the - * policy is correct - */ -static int validate_classes(struct policydb *p) -{ - int i, j; - struct class_datum *cladatum; - struct perm_datum *perdatum; - u32 nprim, tmp, common_pts_len, perm_val, pol_val; - u16 class_val; - const struct selinux_class_perm *kdefs = &selinux_class_perm; - const char *def_class, *def_perm, *pol_class; - struct symtab *perms; - bool print_unknown_handle = 0; - - if (p->allow_unknown) { - u32 num_classes = kdefs->cts_len; - p->undefined_perms = kcalloc(num_classes, sizeof(u32), GFP_KERNEL); - if (!p->undefined_perms) - return -ENOMEM; - } - - for (i = 1; i < kdefs->cts_len; i++) { - def_class = kdefs->class_to_string[i]; - if (!def_class) - continue; - if (i > p->p_classes.nprim) { - printk(KERN_INFO - "SELinux: class %s not defined in policy\n", - def_class); - if (p->reject_unknown) - return -EINVAL; - if (p->allow_unknown) - p->undefined_perms[i-1] = ~0U; - print_unknown_handle = 1; - continue; - } - pol_class = p->p_class_val_to_name[i-1]; - if (strcmp(pol_class, def_class)) { - printk(KERN_ERR - "SELinux: class %d is incorrect, found %s but should be %s\n", - i, pol_class, def_class); - return -EINVAL; - } - } - for (i = 0; i < kdefs->av_pts_len; i++) { - class_val = kdefs->av_perm_to_string[i].tclass; - perm_val = kdefs->av_perm_to_string[i].value; - def_perm = kdefs->av_perm_to_string[i].name; - if (class_val > p->p_classes.nprim) - continue; - pol_class = p->p_class_val_to_name[class_val-1]; - cladatum = hashtab_search(p->p_classes.table, pol_class); - BUG_ON(!cladatum); - perms = &cladatum->permissions; - nprim = 1 << (perms->nprim - 1); - if (perm_val > nprim) { - printk(KERN_INFO - "SELinux: permission %s in class %s not defined in policy\n", - def_perm, pol_class); - if (p->reject_unknown) - return -EINVAL; - if (p->allow_unknown) - p->undefined_perms[class_val-1] |= perm_val; - print_unknown_handle = 1; - continue; - } - perdatum = hashtab_search(perms->table, def_perm); - if (perdatum == NULL) { - printk(KERN_ERR - "SELinux: permission %s in class %s not found in policy, bad policy\n", - def_perm, pol_class); - return -EINVAL; - } - pol_val = 1 << (perdatum->value - 1); - if (pol_val != perm_val) { - printk(KERN_ERR - "SELinux: permission %s in class %s has incorrect value\n", - def_perm, pol_class); - return -EINVAL; - } - } - for (i = 0; i < kdefs->av_inherit_len; i++) { - class_val = kdefs->av_inherit[i].tclass; - if (class_val > p->p_classes.nprim) - continue; - pol_class = p->p_class_val_to_name[class_val-1]; - cladatum = hashtab_search(p->p_classes.table, pol_class); - BUG_ON(!cladatum); - if (!cladatum->comdatum) { - printk(KERN_ERR - "SELinux: class %s should have an inherits clause but does not\n", - pol_class); - return -EINVAL; - } - tmp = kdefs->av_inherit[i].common_base; - common_pts_len = 0; - while (!(tmp & 0x01)) { - common_pts_len++; - tmp >>= 1; - } - perms = &cladatum->comdatum->permissions; - for (j = 0; j < common_pts_len; j++) { - def_perm = kdefs->av_inherit[i].common_pts[j]; - if (j >= perms->nprim) { - printk(KERN_INFO - "SELinux: permission %s in class %s not defined in policy\n", - def_perm, pol_class); - if (p->reject_unknown) - return -EINVAL; - if (p->allow_unknown) - p->undefined_perms[class_val-1] |= (1 << j); - print_unknown_handle = 1; - continue; - } - perdatum = hashtab_search(perms->table, def_perm); - if (perdatum == NULL) { - printk(KERN_ERR - "SELinux: permission %s in class %s not found in policy, bad policy\n", - def_perm, pol_class); - return -EINVAL; - } - if (perdatum->value != j + 1) { - printk(KERN_ERR - "SELinux: permission %s in class %s has incorrect value\n", - def_perm, pol_class); - return -EINVAL; - } - } - } - if (print_unknown_handle) - printk(KERN_INFO "SELinux: the above unknown classes and permissions will be %s\n", - (security_get_allow_unknown() ? "allowed" : "denied")); - return 0; + return security_compute_sid(ssid, tsid, tclass, AVTAB_CHANGE, out_sid, + false); } /* Clone the SID into the new SID table. */ @@ -1710,8 +1728,10 @@ int security_load_policy(void *data, size_t len) { struct policydb oldpolicydb, newpolicydb; struct sidtab oldsidtab, newsidtab; + struct selinux_mapping *oldmap, *map = NULL; struct convert_context_args args; u32 seqno; + u16 map_size; int rc = 0; struct policy_file file = { data, len }, *fp = &file; @@ -1721,22 +1741,19 @@ int security_load_policy(void *data, size_t len) avtab_cache_destroy(); return -EINVAL; } - if (policydb_load_isids(&policydb, &sidtab)) { + if (selinux_set_mapping(&policydb, secclass_map, + ¤t_mapping, + ¤t_mapping_size)) { policydb_destroy(&policydb); avtab_cache_destroy(); return -EINVAL; } - /* Verify that the kernel defined classes are correct. */ - if (validate_classes(&policydb)) { - printk(KERN_ERR - "SELinux: the definition of a class is incorrect\n"); - sidtab_destroy(&sidtab); + if (policydb_load_isids(&policydb, &sidtab)) { policydb_destroy(&policydb); avtab_cache_destroy(); return -EINVAL; } security_load_policycaps(); - policydb_loaded_version = policydb.policyvers; ss_initialized = 1; seqno = ++latest_granting; selinux_complete_init(); @@ -1759,13 +1776,9 @@ int security_load_policy(void *data, size_t len) return -ENOMEM; } - /* Verify that the kernel defined classes are correct. */ - if (validate_classes(&newpolicydb)) { - printk(KERN_ERR - "SELinux: the definition of a class is incorrect\n"); - rc = -EINVAL; + if (selinux_set_mapping(&newpolicydb, secclass_map, + &map, &map_size)) goto err; - } rc = security_preserve_bools(&newpolicydb); if (rc) { @@ -1799,13 +1812,16 @@ int security_load_policy(void *data, size_t len) memcpy(&policydb, &newpolicydb, sizeof policydb); sidtab_set(&sidtab, &newsidtab); security_load_policycaps(); + oldmap = current_mapping; + current_mapping = map; + current_mapping_size = map_size; seqno = ++latest_granting; - policydb_loaded_version = policydb.policyvers; write_unlock_irq(&policy_rwlock); /* Free the old policydb and SID table. */ policydb_destroy(&oldpolicydb); sidtab_destroy(&oldsidtab); + kfree(oldmap); avc_ss_reset(seqno); selnl_notify_policyload(seqno); @@ -1815,6 +1831,7 @@ int security_load_policy(void *data, size_t len) return 0; err: + kfree(map); sidtab_destroy(&newsidtab); policydb_destroy(&newpolicydb); return rc; @@ -2091,7 +2108,7 @@ out_unlock: } for (i = 0, j = 0; i < mynel; i++) { rc = avc_has_perm_noaudit(fromsid, mysids[i], - SECCLASS_PROCESS, + SECCLASS_PROCESS, /* kernel value */ PROCESS__TRANSITION, AVC_STRICT, NULL); if (!rc) @@ -2119,10 +2136,11 @@ out: */ int security_genfs_sid(const char *fstype, char *path, - u16 sclass, + u16 orig_sclass, u32 *sid) { int len; + u16 sclass; struct genfs *genfs; struct ocontext *c; int rc = 0, cmp = 0; @@ -2132,6 +2150,8 @@ int security_genfs_sid(const char *fstype, read_lock(&policy_rwlock); + sclass = unmap_class(orig_sclass); + for (genfs = policydb.genfs; genfs; genfs = genfs->next) { cmp = strcmp(fstype, genfs->fstype); if (cmp <= 0) diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 3c8bd8ee0b9..e0d0354008b 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -187,6 +187,8 @@ bool tomoyo_is_correct_path(const char *filename, const s8 start_type, const s8 pattern_type, const s8 end_type, const char *function) { + const char *const start = filename; + bool in_repetition = false; bool contains_pattern = false; unsigned char c; unsigned char d; @@ -212,9 +214,13 @@ bool tomoyo_is_correct_path(const char *filename, const s8 start_type, if (c == '/') goto out; } - while ((c = *filename++) != '\0') { + while (1) { + c = *filename++; + if (!c) + break; if (c == '\\') { - switch ((c = *filename++)) { + c = *filename++; + switch (c) { case '\\': /* "\\" */ continue; case '$': /* "\$" */ @@ -231,6 +237,22 @@ bool tomoyo_is_correct_path(const char *filename, const s8 start_type, break; /* Must not contain pattern */ contains_pattern = true; continue; + case '{': /* "/\{" */ + if (filename - 3 < start || + *(filename - 3) != '/') + break; + if (pattern_type == -1) + break; /* Must not contain pattern */ + contains_pattern = true; + in_repetition = true; + continue; + case '}': /* "\}/" */ + if (*filename != '/') + break; + if (!in_repetition) + break; + in_repetition = false; + continue; case '0': /* "\ooo" */ case '1': case '2': @@ -246,6 +268,8 @@ bool tomoyo_is_correct_path(const char *filename, const s8 start_type, continue; /* pattern is not \000 */ } goto out; + } else if (in_repetition && c == '/') { + goto out; } else if (tomoyo_is_invalid(c)) { goto out; } @@ -254,6 +278,8 @@ bool tomoyo_is_correct_path(const char *filename, const s8 start_type, if (!contains_pattern) goto out; } + if (in_repetition) + goto out; return true; out: printk(KERN_DEBUG "%s: Invalid pathname '%s'\n", function, @@ -360,33 +386,6 @@ struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname) } /** - * tomoyo_path_depth - Evaluate the number of '/' in a string. - * - * @pathname: The string to evaluate. - * - * Returns path depth of the string. - * - * I score 2 for each of the '/' in the @pathname - * and score 1 if the @pathname ends with '/'. - */ -static int tomoyo_path_depth(const char *pathname) -{ - int i = 0; - - if (pathname) { - const char *ep = pathname + strlen(pathname); - if (pathname < ep--) { - if (*ep != '/') - i++; - while (pathname <= ep) - if (*ep-- == '/') - i += 2; - } - } - return i; -} - -/** * tomoyo_const_part_length - Evaluate the initial length without a pattern in a token. * * @filename: The string to evaluate. @@ -444,11 +443,10 @@ void tomoyo_fill_path_info(struct tomoyo_path_info *ptr) ptr->is_dir = len && (name[len - 1] == '/'); ptr->is_patterned = (ptr->const_len < len); ptr->hash = full_name_hash(name, len); - ptr->depth = tomoyo_path_depth(name); } /** - * tomoyo_file_matches_to_pattern2 - Pattern matching without '/' character + * tomoyo_file_matches_pattern2 - Pattern matching without '/' character * and "\-" pattern. * * @filename: The start of string to check. @@ -458,10 +456,10 @@ void tomoyo_fill_path_info(struct tomoyo_path_info *ptr) * * Returns true if @filename matches @pattern, false otherwise. */ -static bool tomoyo_file_matches_to_pattern2(const char *filename, - const char *filename_end, - const char *pattern, - const char *pattern_end) +static bool tomoyo_file_matches_pattern2(const char *filename, + const char *filename_end, + const char *pattern, + const char *pattern_end) { while (filename < filename_end && pattern < pattern_end) { char c; @@ -519,7 +517,7 @@ static bool tomoyo_file_matches_to_pattern2(const char *filename, case '*': case '@': for (i = 0; i <= filename_end - filename; i++) { - if (tomoyo_file_matches_to_pattern2( + if (tomoyo_file_matches_pattern2( filename + i, filename_end, pattern + 1, pattern_end)) return true; @@ -550,7 +548,7 @@ static bool tomoyo_file_matches_to_pattern2(const char *filename, j++; } for (i = 1; i <= j; i++) { - if (tomoyo_file_matches_to_pattern2( + if (tomoyo_file_matches_pattern2( filename + i, filename_end, pattern + 1, pattern_end)) return true; @@ -567,7 +565,7 @@ static bool tomoyo_file_matches_to_pattern2(const char *filename, } /** - * tomoyo_file_matches_to_pattern - Pattern matching without without '/' character. + * tomoyo_file_matches_pattern - Pattern matching without without '/' character. * * @filename: The start of string to check. * @filename_end: The end of string to check. @@ -576,7 +574,7 @@ static bool tomoyo_file_matches_to_pattern2(const char *filename, * * Returns true if @filename matches @pattern, false otherwise. */ -static bool tomoyo_file_matches_to_pattern(const char *filename, +static bool tomoyo_file_matches_pattern(const char *filename, const char *filename_end, const char *pattern, const char *pattern_end) @@ -589,10 +587,10 @@ static bool tomoyo_file_matches_to_pattern(const char *filename, /* Split at "\-" pattern. */ if (*pattern++ != '\\' || *pattern++ != '-') continue; - result = tomoyo_file_matches_to_pattern2(filename, - filename_end, - pattern_start, - pattern - 2); + result = tomoyo_file_matches_pattern2(filename, + filename_end, + pattern_start, + pattern - 2); if (first) result = !result; if (result) @@ -600,13 +598,79 @@ static bool tomoyo_file_matches_to_pattern(const char *filename, first = false; pattern_start = pattern; } - result = tomoyo_file_matches_to_pattern2(filename, filename_end, - pattern_start, pattern_end); + result = tomoyo_file_matches_pattern2(filename, filename_end, + pattern_start, pattern_end); return first ? result : !result; } /** + * tomoyo_path_matches_pattern2 - Do pathname pattern matching. + * + * @f: The start of string to check. + * @p: The start of pattern to compare. + * + * Returns true if @f matches @p, false otherwise. + */ +static bool tomoyo_path_matches_pattern2(const char *f, const char *p) +{ + const char *f_delimiter; + const char *p_delimiter; + + while (*f && *p) { + f_delimiter = strchr(f, '/'); + if (!f_delimiter) + f_delimiter = f + strlen(f); + p_delimiter = strchr(p, '/'); + if (!p_delimiter) + p_delimiter = p + strlen(p); + if (*p == '\\' && *(p + 1) == '{') + goto recursive; + if (!tomoyo_file_matches_pattern(f, f_delimiter, p, + p_delimiter)) + return false; + f = f_delimiter; + if (*f) + f++; + p = p_delimiter; + if (*p) + p++; + } + /* Ignore trailing "\*" and "\@" in @pattern. */ + while (*p == '\\' && + (*(p + 1) == '*' || *(p + 1) == '@')) + p += 2; + return !*f && !*p; + recursive: + /* + * The "\{" pattern is permitted only after '/' character. + * This guarantees that below "*(p - 1)" is safe. + * Also, the "\}" pattern is permitted only before '/' character + * so that "\{" + "\}" pair will not break the "\-" operator. + */ + if (*(p - 1) != '/' || p_delimiter <= p + 3 || *p_delimiter != '/' || + *(p_delimiter - 1) != '}' || *(p_delimiter - 2) != '\\') + return false; /* Bad pattern. */ + do { + /* Compare current component with pattern. */ + if (!tomoyo_file_matches_pattern(f, f_delimiter, p + 2, + p_delimiter - 2)) + break; + /* Proceed to next component. */ + f = f_delimiter; + if (!*f) + break; + f++; + /* Continue comparison. */ + if (tomoyo_path_matches_pattern2(f, p_delimiter + 1)) + return true; + f_delimiter = strchr(f, '/'); + } while (f_delimiter); + return false; /* Not matched. */ +} + +/** * tomoyo_path_matches_pattern - Check whether the given filename matches the given pattern. + * * @filename: The filename to check. * @pattern: The pattern to compare. * @@ -615,24 +679,24 @@ static bool tomoyo_file_matches_to_pattern(const char *filename, * The following patterns are available. * \\ \ itself. * \ooo Octal representation of a byte. - * \* More than or equals to 0 character other than '/'. - * \@ More than or equals to 0 character other than '/' or '.'. + * \* Zero or more repetitions of characters other than '/'. + * \@ Zero or more repetitions of characters other than '/' or '.'. * \? 1 byte character other than '/'. - * \$ More than or equals to 1 decimal digit. + * \$ One or more repetitions of decimal digits. * \+ 1 decimal digit. - * \X More than or equals to 1 hexadecimal digit. + * \X One or more repetitions of hexadecimal digits. * \x 1 hexadecimal digit. - * \A More than or equals to 1 alphabet character. + * \A One or more repetitions of alphabet characters. * \a 1 alphabet character. + * * \- Subtraction operator. + * + * /\{dir\}/ '/' + 'One or more repetitions of dir/' (e.g. /dir/ /dir/dir/ + * /dir/dir/dir/ ). */ bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename, const struct tomoyo_path_info *pattern) { - /* - if (!filename || !pattern) - return false; - */ const char *f = filename->name; const char *p = pattern->name; const int len = pattern->const_len; @@ -640,37 +704,15 @@ bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename, /* If @pattern doesn't contain pattern, I can use strcmp(). */ if (!pattern->is_patterned) return !tomoyo_pathcmp(filename, pattern); - /* Dont compare if the number of '/' differs. */ - if (filename->depth != pattern->depth) + /* Don't compare directory and non-directory. */ + if (filename->is_dir != pattern->is_dir) return false; /* Compare the initial length without patterns. */ if (strncmp(f, p, len)) return false; f += len; p += len; - /* Main loop. Compare each directory component. */ - while (*f && *p) { - const char *f_delimiter = strchr(f, '/'); - const char *p_delimiter = strchr(p, '/'); - if (!f_delimiter) - f_delimiter = f + strlen(f); - if (!p_delimiter) - p_delimiter = p + strlen(p); - if (!tomoyo_file_matches_to_pattern(f, f_delimiter, - p, p_delimiter)) - return false; - f = f_delimiter; - if (*f) - f++; - p = p_delimiter; - if (*p) - p++; - } - /* Ignore trailing "\*" and "\@" in @pattern. */ - while (*p == '\\' && - (*(p + 1) == '*' || *(p + 1) == '@')) - p += 2; - return !*f && !*p; + return tomoyo_path_matches_pattern2(f, p); } /** diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 31df541911f..92169d29b2d 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -56,9 +56,6 @@ struct tomoyo_page_buffer { * (5) "is_patterned" is a bool which is true if "name" contains wildcard * characters, false otherwise. This allows TOMOYO to use "hash" and * strcmp() for string comparison if "is_patterned" is false. - * (6) "depth" is calculated using the number of "/" characters in "name". - * This allows TOMOYO to avoid comparing two pathnames which never match - * (e.g. whether "/var/www/html/index.html" matches "/tmp/sh-thd-\$"). */ struct tomoyo_path_info { const char *name; @@ -66,7 +63,6 @@ struct tomoyo_path_info { u16 const_len; /* = tomoyo_const_part_length(name) */ bool is_dir; /* = tomoyo_strendswith(name, "/") */ bool is_patterned; /* = tomoyo_path_contains_pattern(name) */ - u16 depth; /* = tomoyo_path_depth(name) */ }; /* diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index 5f2e3326337..917f564cdab 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -13,6 +13,8 @@ #include <linux/mount.h> #include <linux/mnt_namespace.h> #include <linux/fs_struct.h> +#include <linux/hash.h> + #include "common.h" #include "realpath.h" @@ -263,7 +265,8 @@ static unsigned int tomoyo_quota_for_savename; * table. Frequency of appending strings is very low. So we don't need * large (e.g. 64k) hash size. 256 will be sufficient. */ -#define TOMOYO_MAX_HASH 256 +#define TOMOYO_HASH_BITS 8 +#define TOMOYO_MAX_HASH (1u<<TOMOYO_HASH_BITS) /* * tomoyo_name_entry is a structure which is used for linking @@ -315,6 +318,7 @@ const struct tomoyo_path_info *tomoyo_save_name(const char *name) struct tomoyo_free_memory_block_list *fmb; int len; char *cp; + struct list_head *head; if (!name) return NULL; @@ -325,9 +329,10 @@ const struct tomoyo_path_info *tomoyo_save_name(const char *name) return NULL; } hash = full_name_hash((const unsigned char *) name, len - 1); + head = &tomoyo_name_list[hash_long(hash, TOMOYO_HASH_BITS)]; + mutex_lock(&lock); - list_for_each_entry(ptr, &tomoyo_name_list[hash % TOMOYO_MAX_HASH], - list) { + list_for_each_entry(ptr, head, list) { if (hash == ptr->entry.hash && !strcmp(name, ptr->entry.name)) goto out; } @@ -365,7 +370,7 @@ const struct tomoyo_path_info *tomoyo_save_name(const char *name) tomoyo_fill_path_info(&ptr->entry); fmb->ptr += len; fmb->len -= len; - list_add_tail(&ptr->list, &tomoyo_name_list[hash % TOMOYO_MAX_HASH]); + list_add_tail(&ptr->list, head); if (fmb->len == 0) { list_del(&fmb->list); kfree(fmb); diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c index 64b859925c0..7717e01fc07 100644 --- a/sound/pcmcia/pdaudiocf/pdaudiocf.c +++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c @@ -131,7 +131,7 @@ static int snd_pdacf_probe(struct pcmcia_device *link) return err; } - snd_card_set_dev(card, &handle_to_dev(link)); + snd_card_set_dev(card, &link->dev); pdacf->index = i; card_list[i] = card; @@ -142,12 +142,10 @@ static int snd_pdacf_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.NumPorts1 = 16; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT | IRQ_FORCED_PULSE; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_FORCED_PULSE; // link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = 0 /* | IRQ_LEVEL_ID */; link->irq.Handler = pdacf_interrupt; - link->irq.Instance = pdacf; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index 1492744ad67..7be3b335704 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -161,11 +161,9 @@ static int snd_vxpocket_new(struct snd_card *card, int ibl, link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.NumPorts1 = 16; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = &snd_vx_irq_handler; - link->irq.Instance = chip; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -244,7 +242,7 @@ static int vxpocket_config(struct pcmcia_device *link) if (ret) goto failed; - chip->dev = &handle_to_dev(link); + chip->dev = &link->dev; snd_card_set_dev(chip->card, chip->dev); if (snd_vxpocket_assign_resources(chip, link->io.BasePort1, link->irq.AssignedIRQ) < 0) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 0854f110bf7..fe08660ce0b 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -12,6 +12,7 @@ perf*.1 perf*.xml perf*.html common-cmds.h +perf.data tags TAGS cscope* diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt new file mode 100644 index 00000000000..ae525ac5a2c --- /dev/null +++ b/tools/perf/Documentation/perf-bench.txt @@ -0,0 +1,120 @@ +perf-bench(1) +============ + +NAME +---- +perf-bench - General framework for benchmark suites + +SYNOPSIS +-------- +[verse] +'perf bench' [<common options>] <subsystem> <suite> [<options>] + +DESCRIPTION +----------- +This 'perf bench' command is general framework for benchmark suites. + +COMMON OPTIONS +-------------- +-f:: +--format=:: +Specify format style. +Current available format styles are, + +'default':: +Default style. This is mainly for human reading. +--------------------- +% perf bench sched pipe # with no style specify +(executing 1000000 pipe operations between two tasks) + Total time:5.855 sec + 5.855061 usecs/op + 170792 ops/sec +--------------------- + +'simple':: +This simple style is friendly for automated +processing by scripts. +--------------------- +% perf bench --format=simple sched pipe # specified simple +5.988 +--------------------- + +SUBSYSTEM +--------- + +'sched':: + Scheduler and IPC mechanisms. + +SUITES FOR 'sched' +~~~~~~~~~~~~~~~~~~ +*messaging*:: +Suite for evaluating performance of scheduler and IPC mechanisms. +Based on hackbench by Rusty Russell. + +Options of *pipe* +^^^^^^^^^^^^^^^^^ +-p:: +--pipe:: +Use pipe() instead of socketpair() + +-t:: +--thread:: +Be multi thread instead of multi process + +-g:: +--group=:: +Specify number of groups + +-l:: +--loop=:: +Specify number of loops + +Example of *messaging* +^^^^^^^^^^^^^^^^^^^^^^ + +--------------------- +% perf bench sched messaging # run with default +options (20 sender and receiver processes per group) +(10 groups == 400 processes run) + + Total time:0.308 sec + +% perf bench sched messaging -t -g 20 # be multi-thread,with 20 groups +(20 sender and receiver threads per group) +(20 groups == 800 threads run) + + Total time:0.582 sec +--------------------- + +*pipe*:: +Suite for pipe() system call. +Based on pipe-test-1m.c by Ingo Molnar. + +Options of *pipe* +^^^^^^^^^^^^^^^^^ +-l:: +--loop=:: +Specify number of loops. + +Example of *pipe* +^^^^^^^^^^^^^^^^^ + +--------------------- +% perf bench sched pipe +(executing 1000000 pipe operations between two tasks) + + Total time:8.091 sec + 8.091833 usecs/op + 123581 ops/sec + +% perf bench sched pipe -l 1000 # loop 1000 +(executing 1000 pipe operations between two tasks) + + Total time:0.016 sec + 16.948000 usecs/op + 59004 ops/sec +--------------------- + +SEE ALSO +-------- +linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt new file mode 100644 index 00000000000..01b642c0bf8 --- /dev/null +++ b/tools/perf/Documentation/perf-buildid-list.txt @@ -0,0 +1,34 @@ +perf-buildid-list(1) +==================== + +NAME +---- +perf-buildid-list - List the buildids in a perf.data file + +SYNOPSIS +-------- +[verse] +'perf buildid-list <options>' + +DESCRIPTION +----------- +This command displays the buildids found in a perf.data file, so that other +tools can be used to fetch packages with matching symbol tables for use by +perf report. + +OPTIONS +------- +-i:: +--input=:: + Input file name. (default: perf.data) +-f:: +--force:: + Don't do ownership validation. +-v:: +--verbose:: + Be more verbose. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-top[1], +linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt new file mode 100644 index 00000000000..44b0ce35c28 --- /dev/null +++ b/tools/perf/Documentation/perf-kmem.txt @@ -0,0 +1,44 @@ +perf-kmem(1) +============== + +NAME +---- +perf-kmem - Tool to trace/measure kernel memory(slab) properties + +SYNOPSIS +-------- +[verse] +'perf kmem' {record} [<options>] + +DESCRIPTION +----------- +There's two variants of perf kmem: + + 'perf kmem record <command>' to record the kmem events + of an arbitrary workload. + + 'perf kmem' to report kernel memory statistics. + +OPTIONS +------- +-i <file>:: +--input=<file>:: + Select the input file (default: perf.data) + +--stat=<caller|alloc>:: + Select per callsite or per allocation statistics + +-s <key[,key2...]>:: +--sort=<key[,key2...]>:: + Sort the output (default: frag,hit,bytes) + +-l <num>:: +--line=<num>:: + Print n lines only + +--raw-ip:: + Print raw ip instead of symbol + +SEE ALSO +-------- +linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt new file mode 100644 index 00000000000..9270594e6df --- /dev/null +++ b/tools/perf/Documentation/perf-probe.txt @@ -0,0 +1,49 @@ +perf-probe(1) +============= + +NAME +---- +perf-probe - Define new dynamic tracepoints + +SYNOPSIS +-------- +[verse] +'perf probe' [options] --add 'PROBE' [--add 'PROBE' ...] +or +'perf probe' [options] 'PROBE' ['PROBE' ...] + + +DESCRIPTION +----------- +This command defines dynamic tracepoint events, by symbol and registers +without debuginfo, or by C expressions (C line numbers, C function names, +and C local variables) with debuginfo. + + +OPTIONS +------- +-k:: +--vmlinux=PATH:: + Specify vmlinux path which has debuginfo (Dwarf binary). + +-v:: +--verbose:: + Be more verbose (show parsed arguments, etc). + +-a:: +--add:: + Define a probe point (see PROBE SYNTAX for detail) + +PROBE SYNTAX +------------ +Probe points are defined by following syntax. + + "FUNC[+OFFS|:RLN|%return][@SRC]|SRC:ALN [ARG ...]" + +'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, 'RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. In addition, 'SRC' specifies a source file which has that function. +It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number. +'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc). + +SEE ALSO +-------- +linkperf:perf-trace[1], linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 0ff23de9e45..fc46c0b40f6 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -26,11 +26,19 @@ OPTIONS -e:: --event=:: - Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + Select the PMU event. Selection can be: + - a symbolic event name (use 'perf list' to list all events) + + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + + - a hardware breakpoint event in the form of '\mem:addr[:access]' + where addr is the address in memory you want to break in. + Access is the memory access type (read, write, execute) it can + be passed as follows: '\mem:addr[:[r][w][x]]'. + If you want to profile read-write accesses in 0x1000, just set + 'mem:0x1000:rw'. -a:: System-wide collection. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 59f0b846cd7..9dccb180b7a 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -24,11 +24,11 @@ OPTIONS --dsos=:: Only consider symbols in these dsos. CSV that understands file://filename entries. --n ---show-nr-samples +-n:: +--show-nr-samples:: Show the number of samples for each symbol --T ---threads +-T:: +--threads:: Show per-thread event counters -C:: --comms=:: diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index a7910099d6f..4b1788355ec 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -31,9 +31,12 @@ OPTIONS -w:: --width=:: Select the width of the SVG file (default: 1000) --p:: +-P:: --power-only:: Only output the CPU power section of the diagram +-p:: +--process:: + Select the processes to display, by name or PID SEE ALSO diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt new file mode 100644 index 00000000000..c5f55f43909 --- /dev/null +++ b/tools/perf/Documentation/perf-trace-perl.txt @@ -0,0 +1,219 @@ +perf-trace-perl(1) +================== + +NAME +---- +perf-trace-perl - Process trace data with a Perl script + +SYNOPSIS +-------- +[verse] +'perf trace' [-s [lang]:script[.ext] ] + +DESCRIPTION +----------- + +This perf trace option is used to process perf trace data using perf's +built-in Perl interpreter. It reads and processes the input file and +displays the results of the trace analysis implemented in the given +Perl script, if any. + +STARTER SCRIPTS +--------------- + +You can avoid reading the rest of this document by running 'perf trace +-g perl' in the same directory as an existing perf.data trace file. +That will generate a starter script containing a handler for each of +the event types in the trace file; it simply prints every available +field for each event in the trace file. + +You can also look at the existing scripts in +~/libexec/perf-core/scripts/perl for typical examples showing how to +do basic things like aggregate event data, print results, etc. Also, +the check-perf-trace.pl script, while not interesting for its results, +attempts to exercise all of the main scripting features. + +EVENT HANDLERS +-------------- + +When perf trace is invoked using a trace script, a user-defined +'handler function' is called for each event in the trace. If there's +no handler function defined for a given event type, the event is +ignored (or passed to a 'trace_handled' function, see below) and the +next event is processed. + +Most of the event's field values are passed as arguments to the +handler function; some of the less common ones aren't - those are +available as calls back into the perf executable (see below). + +As an example, the following perf record command can be used to record +all sched_wakeup events in the system: + + # perf record -c 1 -f -a -M -R -e sched:sched_wakeup + +Traces meant to be processed using a script should be recorded with +the above options: -c 1 says to sample every event, -a to enable +system-wide collection, -M to multiplex the output, and -R to collect +raw samples. + +The format file for the sched_wakep event defines the following fields +(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): + +---- + format: + field:unsigned short common_type; + field:unsigned char common_flags; + field:unsigned char common_preempt_count; + field:int common_pid; + field:int common_lock_depth; + + field:char comm[TASK_COMM_LEN]; + field:pid_t pid; + field:int prio; + field:int success; + field:int target_cpu; +---- + +The handler function for this event would be defined as: + +---- +sub sched::sched_wakeup +{ + my ($event_name, $context, $common_cpu, $common_secs, + $common_nsecs, $common_pid, $common_comm, + $comm, $pid, $prio, $success, $target_cpu) = @_; +} +---- + +The handler function takes the form subsystem::event_name. + +The $common_* arguments in the handler's argument list are the set of +arguments passed to all event handlers; some of the fields correspond +to the common_* fields in the format file, but some are synthesized, +and some of the common_* fields aren't common enough to to be passed +to every event as arguments but are available as library functions. + +Here's a brief description of each of the invariant event args: + + $event_name the name of the event as text + $context an opaque 'cookie' used in calls back into perf + $common_cpu the cpu the event occurred on + $common_secs the secs portion of the event timestamp + $common_nsecs the nsecs portion of the event timestamp + $common_pid the pid of the current task + $common_comm the name of the current process + +All of the remaining fields in the event's format file have +counterparts as handler function arguments of the same name, as can be +seen in the example above. + +The above provides the basics needed to directly access every field of +every event in a trace, which covers 90% of what you need to know to +write a useful trace script. The sections below cover the rest. + +SCRIPT LAYOUT +------------- + +Every perf trace Perl script should start by setting up a Perl module +search path and 'use'ing a few support modules (see module +descriptions below): + +---- + use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; + use lib "./Perf-Trace-Util/lib"; + use Perf::Trace::Core; + use Perf::Trace::Context; + use Perf::Trace::Util; +---- + +The rest of the script can contain handler functions and support +functions in any order. + +Aside from the event handler functions discussed above, every script +can implement a set of optional functions: + +*trace_begin*, if defined, is called before any event is processed and +gives scripts a chance to do setup tasks: + +---- + sub trace_begin + { + } +---- + +*trace_end*, if defined, is called after all events have been + processed and gives scripts a chance to do end-of-script tasks, such + as display results: + +---- +sub trace_end +{ +} +---- + +*trace_unhandled*, if defined, is called after for any event that + doesn't have a handler explicitly defined for it. The standard set + of common arguments are passed into it: + +---- +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, + $common_nsecs, $common_pid, $common_comm) = @_; +} +---- + +The remaining sections provide descriptions of each of the available +built-in perf trace Perl modules and their associated functions. + +AVAILABLE MODULES AND FUNCTIONS +------------------------------- + +The following sections describe the functions and variables available +via the various Perf::Trace::* Perl modules. To use the functions and +variables from the given module, add the corresponding 'use +Perf::Trace::XXX' line to your perf trace script. + +Perf::Trace::Core Module +~~~~~~~~~~~~~~~~~~~~~~~~ + +These functions provide some essential functions to user scripts. + +The *flag_str* and *symbol_str* functions provide human-readable +strings for flag and symbolic fields. These correspond to the strings +and values parsed from the 'print fmt' fields of the event format +files: + + flag_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the flag field $field_name of event $event_name + symbol_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the symbolic field $field_name of event $event_name + +Perf::Trace::Context Module +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some of the 'common' fields in the event format file aren't all that +common, but need to be made accessible to user scripts nonetheless. + +Perf::Trace::Context defines a set of functions that can be used to +access this data in the context of the current event. Each of these +functions expects a $context variable, which is the same as the +$context variable passed into every event handler as the second +argument. + + common_pc($context) - returns common_preempt count for the current event + common_flags($context) - returns common_flags for the current event + common_lock_depth($context) - returns common_lock_depth for the current event + +Perf::Trace::Util Module +~~~~~~~~~~~~~~~~~~~~~~~~ + +Various utility functions for use with perf trace: + + nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair + nsecs_secs($nsecs) - returns whole secs portion given nsecs + nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs + nsecs_str($nsecs) - returns printable string in the form secs.nsecs + avg($total, $n) - returns average given a sum and a total number of values + +SEE ALSO +-------- +linkperf:perf-trace[1] diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 41ed75398ca..07065efa60e 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -20,6 +20,15 @@ OPTIONS --dump-raw-trace=:: Display verbose dump of the trace data. +-s:: +--script=:: + Process trace data with the given script ([lang]:script[.ext]). + +-g:: +--gen-script=:: + Generate perf-trace.[ext] starter script for given language, + using current perf.data. + SEE ALSO -------- -linkperf:perf-record[1] +linkperf:perf-record[1], linkperf:perf-trace-perl[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7e190d522cd..23ec66098bd 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -2,6 +2,7 @@ all:: # Define V=1 to have a more verbose compile. +# Define V=2 to have an even more verbose compile. # # Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() # or vsnprintf() return -1 instead of number of characters which would @@ -145,6 +146,10 @@ all:: # Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call # your external grep (e.g., if your system lacks grep, if its grep is # broken, or spawning external process is slower than built-in grep perf has). +# +# Define LDFLAGS=-static to build a static binary. +# +# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN @@ -157,20 +162,6 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') -# -# Add -m32 for cross-builds: -# -ifdef NO_64BIT - MBITS := -m32 -else - # - # If we're on a 64-bit kernel, use -m64: - # - ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) - MBITS := -m64 - endif -endif - # CFLAGS and LDFLAGS are for the users to override from the command line. # @@ -200,8 +191,15 @@ EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wold-style-definition EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-prototypes EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wdeclaration-after-statement -CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) -LDFLAGS = -lpthread -lrt -lelf -lm +ifeq ("$(origin DEBUG)", "command line") + PERF_DEBUG = $(DEBUG) +endif +ifndef PERF_DEBUG + CFLAGS_OPTIMIZE = -O6 +endif + +CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -252,6 +250,9 @@ PTHREAD_LIBS = -lpthread # explicitly what architecture to check for. Fix this up for yours.. SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ +ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null "$(QUIET_STDERR)" && echo y"), y) + CFLAGS := $(CFLAGS) -fstack-protector-all +endif ### --- END CONFIGURATION SECTION --- @@ -327,8 +328,28 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h +LIB_H += ../../include/linux/stringify.h +LIB_H += util/include/linux/bitmap.h +LIB_H += util/include/linux/bitops.h +LIB_H += util/include/linux/compiler.h +LIB_H += util/include/linux/ctype.h +LIB_H += util/include/linux/kernel.h LIB_H += util/include/linux/list.h +LIB_H += util/include/linux/module.h +LIB_H += util/include/linux/poison.h +LIB_H += util/include/linux/prefetch.h +LIB_H += util/include/linux/rbtree.h +LIB_H += util/include/linux/string.h +LIB_H += util/include/linux/types.h +LIB_H += util/include/asm/asm-offsets.h +LIB_H += util/include/asm/bitops.h +LIB_H += util/include/asm/byteorder.h +LIB_H += util/include/asm/swab.h +LIB_H += util/include/asm/system.h +LIB_H += util/include/asm/uaccess.h LIB_H += perf.h +LIB_H += util/debugfs.h +LIB_H += util/event.h LIB_H += util/types.h LIB_H += util/levenshtein.h LIB_H += util/parse-options.h @@ -342,15 +363,22 @@ LIB_H += util/strlist.h LIB_H += util/run-command.h LIB_H += util/sigchain.h LIB_H += util/symbol.h -LIB_H += util/module.h LIB_H += util/color.h LIB_H += util/values.h +LIB_H += util/sort.h +LIB_H += util/hist.h +LIB_H += util/thread.h +LIB_H += util/data_map.h +LIB_H += util/probe-finder.h +LIB_H += util/probe-event.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o LIB_OBJS += util/config.o LIB_OBJS += util/ctype.o +LIB_OBJS += util/debugfs.o LIB_OBJS += util/environment.o +LIB_OBJS += util/event.o LIB_OBJS += util/exec_cmd.o LIB_OBJS += util/help.o LIB_OBJS += util/levenshtein.o @@ -358,6 +386,9 @@ LIB_OBJS += util/parse-options.o LIB_OBJS += util/parse-events.o LIB_OBJS += util/path.o LIB_OBJS += util/rbtree.o +LIB_OBJS += util/bitmap.o +LIB_OBJS += util/hweight.o +LIB_OBJS += util/find_next_bit.o LIB_OBJS += util/run-command.o LIB_OBJS += util/quote.o LIB_OBJS += util/strbuf.o @@ -367,7 +398,6 @@ LIB_OBJS += util/usage.o LIB_OBJS += util/wrapper.o LIB_OBJS += util/sigchain.o LIB_OBJS += util/symbol.o -LIB_OBJS += util/module.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o LIB_OBJS += util/header.o @@ -379,11 +409,25 @@ LIB_OBJS += util/thread.o LIB_OBJS += util/trace-event-parse.o LIB_OBJS += util/trace-event-read.o LIB_OBJS += util/trace-event-info.o +LIB_OBJS += util/trace-event-perl.o LIB_OBJS += util/svghelper.o +LIB_OBJS += util/sort.o +LIB_OBJS += util/hist.o +LIB_OBJS += util/data_map.o +LIB_OBJS += util/probe-event.o BUILTIN_OBJS += builtin-annotate.o + +BUILTIN_OBJS += builtin-bench.o + +# Benchmark modules +BUILTIN_OBJS += bench/sched-messaging.o +BUILTIN_OBJS += bench/sched-pipe.o +BUILTIN_OBJS += bench/mem-memcpy.o + BUILTIN_OBJS += builtin-help.o BUILTIN_OBJS += builtin-sched.o +BUILTIN_OBJS += builtin-buildid-list.o BUILTIN_OBJS += builtin-list.o BUILTIN_OBJS += builtin-record.o BUILTIN_OBJS += builtin-report.o @@ -391,9 +435,16 @@ BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-timechart.o BUILTIN_OBJS += builtin-top.o BUILTIN_OBJS += builtin-trace.o +BUILTIN_OBJS += builtin-probe.o +BUILTIN_OBJS += builtin-kmem.o PERFLIBS = $(LIB_FILE) +ifeq ($(V), 2) + QUIET_STDERR = ">/dev/null" +else + QUIET_STDERR = ">/dev/null 2>&1" +endif # # Platform specific tweaks # @@ -421,36 +472,58 @@ ifeq ($(uname_S),Darwin) PTHREAD_LIBS = endif -ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) - ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) +ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) +ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) + msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); +endif + + ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) BASIC_CFLAGS += -DLIBELF_NO_MMAP endif else msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); endif +ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) + msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231); + BASIC_CFLAGS += -DNO_LIBDWARF +else + EXTLIBS += -lelf -ldwarf + LIB_OBJS += util/probe-finder.o +endif + +PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` +PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` + +ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o /dev/null $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) + BASIC_CFLAGS += -DNO_LIBPERL +else + ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) + LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o +endif + ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else - has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y") + has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y") ifeq ($(has_bfd),y) EXTLIBS += -lbfd else - has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") + has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y") ifeq ($(has_bfd_iberty),y) EXTLIBS += -lbfd -liberty else - has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") + has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y") ifeq ($(has_bfd_iberty_z),y) EXTLIBS += -lbfd -liberty -lz else - has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y") + has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y") ifeq ($(has_cplus_demangle),y) EXTLIBS += -liberty BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE else - msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) + msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling) BASIC_CFLAGS += -DNO_DEMANGLE endif endif @@ -787,6 +860,25 @@ util/config.o: util/config.c PERF-CFLAGS util/rbtree.o: ../../lib/rbtree.c PERF-CFLAGS $(QUIET_CC)$(CC) -o util/rbtree.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +# some perf warning policies can't fit to lib/bitmap.c, eg: it warns about variable shadowing +# from <string.h> that comes from kernel headers wrapping. +KBITMAP_FLAGS=`echo $(ALL_CFLAGS) | sed s/-Wshadow// | sed s/-Wswitch-default// | sed s/-Wextra//` + +util/bitmap.o: ../../lib/bitmap.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/bitmap.o -c $(KBITMAP_FLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + +util/hweight.o: ../../lib/hweight.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/hweight.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + +util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + +util/trace-event-perl.o: util/trace-event-perl.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< + +scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< + perf-%$X: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -894,6 +986,13 @@ export perfexec_instdir install: all $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' + $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' + $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' + $(INSTALL) scripts/perl/Perf-Trace-Util/Makefile.PL -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util' + $(INSTALL) scripts/perl/Perf-Trace-Util/README -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util' ifdef BUILT_INS $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' @@ -979,7 +1078,7 @@ distclean: clean # $(RM) configure clean: - $(RM) *.o */*.o $(LIB_FILE) + $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE) $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X $(RM) $(TEST_PROGRAMS) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h new file mode 100644 index 00000000000..f7781c6267c --- /dev/null +++ b/tools/perf/bench/bench.h @@ -0,0 +1,17 @@ +#ifndef BENCH_H +#define BENCH_H + +extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); +extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); +extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); + +#define BENCH_FORMAT_DEFAULT_STR "default" +#define BENCH_FORMAT_DEFAULT 0 +#define BENCH_FORMAT_SIMPLE_STR "simple" +#define BENCH_FORMAT_SIMPLE 1 + +#define BENCH_FORMAT_UNKNOWN -1 + +extern int bench_format; + +#endif diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c new file mode 100644 index 00000000000..89773178e89 --- /dev/null +++ b/tools/perf/bench/mem-memcpy.c @@ -0,0 +1,193 @@ +/* + * mem-memcpy.c + * + * memcpy: Simple memory copy in various ways + * + * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> + */ +#include <ctype.h> + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/string.h" +#include "../util/header.h" +#include "bench.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <errno.h> + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "default"; +static int use_clock = 0; +static int clock_fd; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to copy. " + "available unit: B, MB, GB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "default", + "Specify routine to copy"), + OPT_BOOLEAN('c', "clock", &use_clock, + "Use CPU clock for measuring"), + OPT_END() +}; + +struct routine { + const char *name; + const char *desc; + void * (*fn)(void *dst, const void *src, size_t len); +}; + +struct routine routines[] = { + { "default", + "Default memcpy() provided by glibc", + memcpy }, + { NULL, + NULL, + NULL } +}; + +static const char * const bench_mem_memcpy_usage[] = { + "perf bench mem memcpy <options>", + NULL +}; + +static struct perf_event_attr clock_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_clock(void) +{ + clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + + if (clock_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(clock_fd < 0); +} + +static u64 get_clock(void) +{ + int ret; + u64 clk; + + ret = read(clock_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + + (double)ts->tv_usec / (double)1000000; +} + +int bench_mem_memcpy(int argc, const char **argv, + const char *prefix __used) +{ + int i; + void *dst, *src; + size_t length; + double bps = 0.0; + struct timeval tv_start, tv_end, tv_diff; + u64 clock_start, clock_end, clock_diff; + + clock_start = clock_end = clock_diff = 0ULL; + argc = parse_options(argc, argv, options, + bench_mem_memcpy_usage, 0); + + tv_diff.tv_sec = 0; + tv_diff.tv_usec = 0; + length = (size_t)perf_atoll((char *)length_str); + + if ((s64)length <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + for (i = 0; routines[i].name; i++) { + if (!strcmp(routines[i].name, routine)) + break; + } + if (!routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; routines[i].name; i++) { + printf("\t%s ... %s\n", + routines[i].name, routines[i].desc); + } + return 1; + } + + dst = zalloc(length); + if (!dst) + die("memory allocation failed - maybe length is too large?\n"); + + src = zalloc(length); + if (!src) + die("memory allocation failed - maybe length is too large?\n"); + + if (bench_format == BENCH_FORMAT_DEFAULT) { + printf("# Copying %s Bytes from %p to %p ...\n\n", + length_str, src, dst); + } + + if (use_clock) { + init_clock(); + clock_start = get_clock(); + } else { + BUG_ON(gettimeofday(&tv_start, NULL)); + } + + routines[i].fn(dst, src, length); + + if (use_clock) { + clock_end = get_clock(); + clock_diff = clock_end - clock_start; + } else { + BUG_ON(gettimeofday(&tv_end, NULL)); + timersub(&tv_end, &tv_start, &tv_diff); + bps = (double)((double)length / timeval2double(&tv_diff)); + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (use_clock) { + printf(" %14lf Clock/Byte\n", + (double)clock_diff / (double)length); + } else { + if (bps < K) + printf(" %14lf B/Sec\n", bps); + else if (bps < K * K) + printf(" %14lfd KB/Sec\n", bps / 1024); + else if (bps < K * K * K) + printf(" %14lf MB/Sec\n", bps / 1024 / 1024); + else { + printf(" %14lf GB/Sec\n", + bps / 1024 / 1024 / 1024); + } + } + break; + case BENCH_FORMAT_SIMPLE: + if (use_clock) { + printf("%14lf\n", + (double)clock_diff / (double)length); + } else + printf("%lf\n", bps); + break; + default: + /* reaching this means there's some disaster: */ + die("unknown format: %d\n", bench_format); + break; + } + + return 0; +} diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c new file mode 100644 index 00000000000..605a2a959aa --- /dev/null +++ b/tools/perf/bench/sched-messaging.c @@ -0,0 +1,336 @@ +/* + * + * builtin-bench-messaging.c + * + * messaging: Benchmark for scheduler and IPC mechanisms + * + * Based on hackbench by Rusty Russell <rusty@rustcorp.com.au> + * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> + * + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../builtin.h" +#include "bench.h" + +/* Test groups of 20 processes spraying to 20 receivers */ +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <sys/time.h> +#include <sys/poll.h> +#include <limits.h> + +#define DATASIZE 100 + +static int use_pipes = 0; +static unsigned int loops = 100; +static unsigned int thread_mode = 0; +static unsigned int num_groups = 10; + +struct sender_context { + unsigned int num_fds; + int ready_out; + int wakefd; + int out_fds[0]; +}; + +struct receiver_context { + unsigned int num_packets; + int in_fds[2]; + int ready_out; + int wakefd; +}; + +static void barf(const char *msg) +{ + fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno)); + exit(1); +} + +static void fdpair(int fds[2]) +{ + if (use_pipes) { + if (pipe(fds) == 0) + return; + } else { + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0) + return; + } + + barf(use_pipes ? "pipe()" : "socketpair()"); +} + +/* Block until we're ready to go */ +static void ready(int ready_out, int wakefd) +{ + char dummy; + struct pollfd pollfd = { .fd = wakefd, .events = POLLIN }; + + /* Tell them we're ready. */ + if (write(ready_out, &dummy, 1) != 1) + barf("CLIENT: ready write"); + + /* Wait for "GO" signal */ + if (poll(&pollfd, 1, -1) != 1) + barf("poll"); +} + +/* Sender sprays loops messages down each file descriptor */ +static void *sender(struct sender_context *ctx) +{ + char data[DATASIZE]; + unsigned int i, j; + + ready(ctx->ready_out, ctx->wakefd); + + /* Now pump to every receiver. */ + for (i = 0; i < loops; i++) { + for (j = 0; j < ctx->num_fds; j++) { + int ret, done = 0; + +again: + ret = write(ctx->out_fds[j], data + done, + sizeof(data)-done); + if (ret < 0) + barf("SENDER: write"); + done += ret; + if (done < DATASIZE) + goto again; + } + } + + return NULL; +} + + +/* One receiver per fd */ +static void *receiver(struct receiver_context* ctx) +{ + unsigned int i; + + if (!thread_mode) + close(ctx->in_fds[1]); + + /* Wait for start... */ + ready(ctx->ready_out, ctx->wakefd); + + /* Receive them all */ + for (i = 0; i < ctx->num_packets; i++) { + char data[DATASIZE]; + int ret, done = 0; + +again: + ret = read(ctx->in_fds[0], data + done, DATASIZE - done); + if (ret < 0) + barf("SERVER: read"); + done += ret; + if (done < DATASIZE) + goto again; + } + + return NULL; +} + +static pthread_t create_worker(void *ctx, void *(*func)(void *)) +{ + pthread_attr_t attr; + pthread_t childid; + int err; + + if (!thread_mode) { + /* process mode */ + /* Fork the receiver. */ + switch (fork()) { + case -1: + barf("fork()"); + break; + case 0: + (*func) (ctx); + exit(0); + break; + default: + break; + } + + return (pthread_t)0; + } + + if (pthread_attr_init(&attr) != 0) + barf("pthread_attr_init:"); + +#ifndef __ia64__ + if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0) + barf("pthread_attr_setstacksize"); +#endif + + err = pthread_create(&childid, &attr, func, ctx); + if (err != 0) { + fprintf(stderr, "pthread_create failed: %s (%d)\n", + strerror(err), err); + exit(-1); + } + return childid; +} + +static void reap_worker(pthread_t id) +{ + int proc_status; + void *thread_status; + + if (!thread_mode) { + /* process mode */ + wait(&proc_status); + if (!WIFEXITED(proc_status)) + exit(1); + } else { + pthread_join(id, &thread_status); + } +} + +/* One group of senders and receivers */ +static unsigned int group(pthread_t *pth, + unsigned int num_fds, + int ready_out, + int wakefd) +{ + unsigned int i; + struct sender_context *snd_ctx = malloc(sizeof(struct sender_context) + + num_fds * sizeof(int)); + + if (!snd_ctx) + barf("malloc()"); + + for (i = 0; i < num_fds; i++) { + int fds[2]; + struct receiver_context *ctx = malloc(sizeof(*ctx)); + + if (!ctx) + barf("malloc()"); + + + /* Create the pipe between client and server */ + fdpair(fds); + + ctx->num_packets = num_fds * loops; + ctx->in_fds[0] = fds[0]; + ctx->in_fds[1] = fds[1]; + ctx->ready_out = ready_out; + ctx->wakefd = wakefd; + + pth[i] = create_worker(ctx, (void *)receiver); + + snd_ctx->out_fds[i] = fds[1]; + if (!thread_mode) + close(fds[0]); + } + + /* Now we have all the fds, fork the senders */ + for (i = 0; i < num_fds; i++) { + snd_ctx->ready_out = ready_out; + snd_ctx->wakefd = wakefd; + snd_ctx->num_fds = num_fds; + + pth[num_fds+i] = create_worker(snd_ctx, (void *)sender); + } + + /* Close the fds we have left */ + if (!thread_mode) + for (i = 0; i < num_fds; i++) + close(snd_ctx->out_fds[i]); + + /* Return number of children to reap */ + return num_fds * 2; +} + +static const struct option options[] = { + OPT_BOOLEAN('p', "pipe", &use_pipes, + "Use pipe() instead of socketpair()"), + OPT_BOOLEAN('t', "thread", &thread_mode, + "Be multi thread instead of multi process"), + OPT_INTEGER('g', "group", &num_groups, + "Specify number of groups"), + OPT_INTEGER('l', "loop", &loops, + "Specify number of loops"), + OPT_END() +}; + +static const char * const bench_sched_message_usage[] = { + "perf bench sched messaging <options>", + NULL +}; + +int bench_sched_messaging(int argc, const char **argv, + const char *prefix __used) +{ + unsigned int i, total_children; + struct timeval start, stop, diff; + unsigned int num_fds = 20; + int readyfds[2], wakefds[2]; + char dummy; + pthread_t *pth_tab; + + argc = parse_options(argc, argv, options, + bench_sched_message_usage, 0); + + pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t)); + if (!pth_tab) + barf("main:malloc()"); + + fdpair(readyfds); + fdpair(wakefds); + + total_children = 0; + for (i = 0; i < num_groups; i++) + total_children += group(pth_tab+total_children, num_fds, + readyfds[1], wakefds[0]); + + /* Wait for everyone to be ready */ + for (i = 0; i < total_children; i++) + if (read(readyfds[0], &dummy, 1) != 1) + barf("Reading for readyfds"); + + gettimeofday(&start, NULL); + + /* Kick them off */ + if (write(wakefds[1], &dummy, 1) != 1) + barf("Writing to start them"); + + /* Reap them all */ + for (i = 0; i < total_children; i++) + reap_worker(pth_tab[i]); + + gettimeofday(&stop, NULL); + + timersub(&stop, &start, &diff); + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + printf("# %d sender and receiver %s per group\n", + num_fds, thread_mode ? "threads" : "processes"); + printf("# %d groups == %d %s run\n\n", + num_groups, num_groups * 2 * num_fds, + thread_mode ? "threads" : "processes"); + printf(" %14s: %lu.%03lu [sec]\n", "Total time", + diff.tv_sec, diff.tv_usec/1000); + break; + case BENCH_FORMAT_SIMPLE: + printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000); + break; + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; + } + + return 0; +} diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c new file mode 100644 index 00000000000..238185f9797 --- /dev/null +++ b/tools/perf/bench/sched-pipe.c @@ -0,0 +1,124 @@ +/* + * + * builtin-bench-pipe.c + * + * pipe: Benchmark for pipe() + * + * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com> + * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c + * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> + * + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../builtin.h" +#include "bench.h" + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/wait.h> +#include <linux/unistd.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <sys/time.h> +#include <sys/types.h> + +#define LOOPS_DEFAULT 1000000 +static int loops = LOOPS_DEFAULT; + +static const struct option options[] = { + OPT_INTEGER('l', "loop", &loops, + "Specify number of loops"), + OPT_END() +}; + +static const char * const bench_sched_pipe_usage[] = { + "perf bench sched pipe <options>", + NULL +}; + +int bench_sched_pipe(int argc, const char **argv, + const char *prefix __used) +{ + int pipe_1[2], pipe_2[2]; + int m = 0, i; + struct timeval start, stop, diff; + unsigned long long result_usec = 0; + + /* + * why does "ret" exist? + * discarding returned value of read(), write() + * causes error in building environment for perf + */ + int ret, wait_stat; + pid_t pid, retpid; + + argc = parse_options(argc, argv, options, + bench_sched_pipe_usage, 0); + + assert(!pipe(pipe_1)); + assert(!pipe(pipe_2)); + + pid = fork(); + assert(pid >= 0); + + gettimeofday(&start, NULL); + + if (!pid) { + for (i = 0; i < loops; i++) { + ret = read(pipe_1[0], &m, sizeof(int)); + ret = write(pipe_2[1], &m, sizeof(int)); + } + } else { + for (i = 0; i < loops; i++) { + ret = write(pipe_1[1], &m, sizeof(int)); + ret = read(pipe_2[0], &m, sizeof(int)); + } + } + + gettimeofday(&stop, NULL); + timersub(&stop, &start, &diff); + + if (pid) { + retpid = waitpid(pid, &wait_stat, 0); + assert((retpid == pid) && WIFEXITED(wait_stat)); + return 0; + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + printf("# Extecuted %d pipe operations between two tasks\n\n", + loops); + + result_usec = diff.tv_sec * 1000000; + result_usec += diff.tv_usec; + + printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", + diff.tv_sec, diff.tv_usec/1000); + + printf(" %14lf usecs/op\n", + (double)result_usec / (double)loops); + printf(" %14d ops/sec\n", + (int)((double)loops / + ((double)result_usec / (double)1000000))); + break; + + case BENCH_FORMAT_SIMPLE: + printf("%lu.%03lu\n", + diff.tv_sec, diff.tv_usec / 1000); + break; + + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; + } + + return 0; +} diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 1ec74161581..0bf2e8f9af5 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -19,29 +19,26 @@ #include "perf.h" #include "util/debug.h" +#include "util/event.h" #include "util/parse-options.h" #include "util/parse-events.h" #include "util/thread.h" +#include "util/sort.h" +#include "util/hist.h" +#include "util/data_map.h" static char const *input_name = "perf.data"; -static char default_sort_order[] = "comm,symbol"; -static char *sort_order = default_sort_order; - static int force; -static int input; -static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; static int full_paths; static int print_line; -static unsigned long page_size; -static unsigned long mmap_window = 32; - -static struct rb_root threads; -static struct thread *last_match; - +struct sym_hist { + u64 sum; + u64 ip[0]; +}; struct sym_ext { struct rb_node node; @@ -49,247 +46,38 @@ struct sym_ext { char *path; }; -/* - * histogram, sorted on item, collects counts - */ - -static struct rb_root hist; - -struct hist_entry { - struct rb_node rb_node; - - struct thread *thread; - struct map *map; - struct dso *dso; - struct symbol *sym; - u64 ip; - char level; - - uint32_t count; -}; - -/* - * configurable sorting bits - */ - -struct sort_entry { - struct list_head list; - - const char *header; - - int64_t (*cmp)(struct hist_entry *, struct hist_entry *); - int64_t (*collapse)(struct hist_entry *, struct hist_entry *); - size_t (*print)(FILE *fp, struct hist_entry *); -}; - -/* --sort pid */ - -static int64_t -sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static size_t -sort__thread_print(FILE *fp, struct hist_entry *self) -{ - return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); -} - -static struct sort_entry sort_thread = { - .header = " Command: Pid", - .cmp = sort__thread_cmp, - .print = sort__thread_print, -}; - -/* --sort comm */ - -static int64_t -sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static int64_t -sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) -{ - char *comm_l = left->thread->comm; - char *comm_r = right->thread->comm; - - if (!comm_l || !comm_r) { - if (!comm_l && !comm_r) - return 0; - else if (!comm_l) - return -1; - else - return 1; - } - - return strcmp(comm_l, comm_r); -} - -static size_t -sort__comm_print(FILE *fp, struct hist_entry *self) -{ - return fprintf(fp, "%16s", self->thread->comm); -} - -static struct sort_entry sort_comm = { - .header = " Command", - .cmp = sort__comm_cmp, - .collapse = sort__comm_collapse, - .print = sort__comm_print, -}; - -/* --sort dso */ - -static int64_t -sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct dso *dso_l = left->dso; - struct dso *dso_r = right->dso; - - if (!dso_l || !dso_r) { - if (!dso_l && !dso_r) - return 0; - else if (!dso_l) - return -1; - else - return 1; - } - - return strcmp(dso_l->name, dso_r->name); -} - -static size_t -sort__dso_print(FILE *fp, struct hist_entry *self) -{ - if (self->dso) - return fprintf(fp, "%-25s", self->dso->name); - - return fprintf(fp, "%016llx ", (u64)self->ip); -} - -static struct sort_entry sort_dso = { - .header = "Shared Object ", - .cmp = sort__dso_cmp, - .print = sort__dso_print, -}; - -/* --sort symbol */ - -static int64_t -sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) -{ - u64 ip_l, ip_r; - - if (left->sym == right->sym) - return 0; - - ip_l = left->sym ? left->sym->start : left->ip; - ip_r = right->sym ? right->sym->start : right->ip; - - return (int64_t)(ip_r - ip_l); -} - -static size_t -sort__sym_print(FILE *fp, struct hist_entry *self) -{ - size_t ret = 0; - - if (verbose) - ret += fprintf(fp, "%#018llx ", (u64)self->ip); - - if (self->sym) { - ret += fprintf(fp, "[%c] %s", - self->dso == kernel_dso ? 'k' : '.', self->sym->name); - } else { - ret += fprintf(fp, "%#016llx", (u64)self->ip); - } - - return ret; -} - -static struct sort_entry sort_sym = { - .header = "Symbol", - .cmp = sort__sym_cmp, - .print = sort__sym_print, -}; - -static int sort__need_collapse = 0; - -struct sort_dimension { - const char *name; - struct sort_entry *entry; - int taken; +struct sym_priv { + struct sym_hist *hist; + struct sym_ext *ext; }; -static struct sort_dimension sort_dimensions[] = { - { .name = "pid", .entry = &sort_thread, }, - { .name = "comm", .entry = &sort_comm, }, - { .name = "dso", .entry = &sort_dso, }, - { .name = "symbol", .entry = &sort_sym, }, +static struct symbol_conf symbol_conf = { + .priv_size = sizeof(struct sym_priv), + .try_vmlinux_path = true, }; -static LIST_HEAD(hist_entry__sort_list); +static const char *sym_hist_filter; -static int sort_dimension__add(char *tok) +static int symbol_filter(struct map *map __used, struct symbol *sym) { - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { - struct sort_dimension *sd = &sort_dimensions[i]; - - if (sd->taken) - continue; - - if (strncasecmp(tok, sd->name, strlen(tok))) - continue; - - if (sd->entry->collapse) - sort__need_collapse = 1; - - list_add_tail(&sd->entry->list, &hist_entry__sort_list); - sd->taken = 1; + if (sym_hist_filter == NULL || + strcmp(sym->name, sym_hist_filter) == 0) { + struct sym_priv *priv = symbol__priv(sym); + const int size = (sizeof(*priv->hist) + + (sym->end - sym->start) * sizeof(u64)); + priv->hist = malloc(size); + if (priv->hist) + memset(priv->hist, 0, size); return 0; } - - return -ESRCH; -} - -static int64_t -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - cmp = se->cmp(left, right); - if (cmp) - break; - } - - return cmp; -} - -static int64_t -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - int64_t (*f)(struct hist_entry *, struct hist_entry *); - - f = se->collapse ?: se->cmp; - - cmp = f(left, right); - if (cmp) - break; - } - - return cmp; + /* + * FIXME: We should really filter it out, as we don't want to go thru symbols + * we're not interested, and if a DSO ends up with no symbols, delete it too, + * but right now the kernel loading routines in symbol.c bail out if no symbols + * are found, fix it later. + */ + return 0; } /* @@ -299,380 +87,81 @@ static void hist_hit(struct hist_entry *he, u64 ip) { unsigned int sym_size, offset; struct symbol *sym = he->sym; + struct sym_priv *priv; + struct sym_hist *h; he->count++; - if (!sym || !sym->hist) + if (!sym || !he->map) + return; + + priv = symbol__priv(sym); + if (!priv->hist) return; sym_size = sym->end - sym->start; offset = ip - sym->start; + if (verbose) + fprintf(stderr, "%s: ip=%Lx\n", __func__, + he->map->unmap_ip(he->map, ip)); + if (offset >= sym_size) return; - sym->hist_sum++; - sym->hist[offset]++; + h = priv->hist; + h->sum++; + h->ip[offset]++; if (verbose >= 3) printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n", (void *)(unsigned long)he->sym->start, he->sym->name, (void *)(unsigned long)ip, ip - he->sym->start, - sym->hist[offset]); + h->ip[offset]); } -static int -hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, u64 ip, char level) +static int hist_entry__add(struct addr_location *al, u64 count) { - struct rb_node **p = &hist.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *he; - struct hist_entry entry = { - .thread = thread, - .map = map, - .dso = dso, - .sym = sym, - .ip = ip, - .level = level, - .count = 1, - }; - int cmp; - - while (*p != NULL) { - parent = *p; - he = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__cmp(&entry, he); - - if (!cmp) { - hist_hit(he, ip); - - return 0; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - he = malloc(sizeof(*he)); - if (!he) + bool hit; + struct hist_entry *he = __hist_entry__add(al, NULL, count, &hit); + if (he == NULL) return -ENOMEM; - *he = entry; - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &hist); - + hist_hit(he, al->addr); return 0; } -static void hist_entry__free(struct hist_entry *he) -{ - free(he); -} - -/* - * collapse the histogram - */ - -static struct rb_root collapse_hists; - -static void collapse__insert_entry(struct hist_entry *he) -{ - struct rb_node **p = &collapse_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - int64_t cmp; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__collapse(iter, he); - - if (!cmp) { - iter->count += he->count; - hist_entry__free(he); - return; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &collapse_hists); -} - -static void collapse__resort(void) -{ - struct rb_node *next; - struct hist_entry *n; - - if (!sort__need_collapse) - return; - - next = rb_first(&hist); - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, &hist); - collapse__insert_entry(n); - } -} - -/* - * reverse the map, sort on count. - */ - -static struct rb_root output_hists; - -static void output__insert_entry(struct hist_entry *he) +static int process_sample_event(event_t *event) { - struct rb_node **p = &output_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; + struct addr_location al; - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); + dump_printf("(IP, %d): %d: %p\n", event->header.misc, + event->ip.pid, (void *)(long)event->ip.ip); - if (he->count > iter->count) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &output_hists); -} - -static void output__resort(void) -{ - struct rb_node *next; - struct hist_entry *n; - struct rb_root *tree = &hist; - - if (sort__need_collapse) - tree = &collapse_hists; - - next = rb_first(tree); - - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, tree); - output__insert_entry(n); - } -} - -static unsigned long total = 0, - total_mmap = 0, - total_comm = 0, - total_fork = 0, - total_unknown = 0; - -static int -process_sample_event(event_t *event, unsigned long offset, unsigned long head) -{ - char level; - int show = 0; - struct dso *dso = NULL; - struct thread *thread; - u64 ip = event->ip.ip; - struct map *map = NULL; - - thread = threads__findnew(event->ip.pid, &threads, &last_match); - - dump_printf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.misc, - event->ip.pid, - (void *)(long)ip); - - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - - if (thread == NULL) { + if (event__preprocess_sample(event, &al, symbol_filter) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; } - if (event->header.misc & PERF_RECORD_MISC_KERNEL) { - show = SHOW_KERNEL; - level = 'k'; - - dso = kernel_dso; - - dump_printf(" ...... dso: %s\n", dso->name); - - } else if (event->header.misc & PERF_RECORD_MISC_USER) { - - show = SHOW_USER; - level = '.'; - - map = thread__find_map(thread, ip); - if (map != NULL) { - ip = map->map_ip(map, ip); - dso = map->dso; - } else { - /* - * If this is outside of all known maps, - * and is a negative address, try to look it - * up in the kernel dso, as it might be a - * vsyscall (which executes in user-mode): - */ - if ((long long)ip < 0) - dso = kernel_dso; - } - dump_printf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); - - } else { - show = SHOW_HV; - level = 'H'; - dump_printf(" ...... dso: [hypervisor]\n"); - } - - if (show & show_mask) { - struct symbol *sym = NULL; - - if (dso) - sym = dso->find_symbol(dso, ip); - - if (hist_entry__add(thread, map, dso, sym, ip, level)) { - fprintf(stderr, - "problem incrementing symbol count, skipping event\n"); - return -1; - } - } - total++; - - return 0; -} - -static int -process_mmap_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread; - struct map *map = map__new(&event->mmap, NULL, 0); - - thread = threads__findnew(event->mmap.pid, &threads, &last_match); - - dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->mmap.pid, - (void *)(long)event->mmap.start, - (void *)(long)event->mmap.len, - (void *)(long)event->mmap.pgoff, - event->mmap.filename); - - if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); - return 0; - } - - thread__insert_map(thread, map); - total_mmap++; - - return 0; -} - -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread; - - thread = threads__findnew(event->comm.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); - - if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); - return -1; - } - total_comm++; - - return 0; -} - -static int -process_fork_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread; - struct thread *parent; - - thread = threads__findnew(event->fork.pid, &threads, &last_match); - parent = threads__findnew(event->fork.ppid, &threads, &last_match); - dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->fork.pid, event->fork.ppid); - - /* - * A thread clone will have the same PID for both - * parent and child. - */ - if (thread == parent) - return 0; - - if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); - return -1; - } - total_fork++; - - return 0; -} - -static int -process_event(event_t *event, unsigned long offset, unsigned long head) -{ - switch (event->header.type) { - case PERF_RECORD_SAMPLE: - return process_sample_event(event, offset, head); - - case PERF_RECORD_MMAP: - return process_mmap_event(event, offset, head); - - case PERF_RECORD_COMM: - return process_comm_event(event, offset, head); - - case PERF_RECORD_FORK: - return process_fork_event(event, offset, head); - /* - * We dont process them right now but they are fine: - */ - - case PERF_RECORD_THROTTLE: - case PERF_RECORD_UNTHROTTLE: - return 0; - - default: + if (hist_entry__add(&al, 1)) { + fprintf(stderr, "problem incrementing symbol count, " + "skipping event\n"); return -1; } return 0; } -static int -parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) +static int parse_line(FILE *file, struct hist_entry *he, u64 len) { + struct symbol *sym = he->sym; char *line = NULL, *tmp, *tmp2; static const char *prev_line; static const char *prev_color; unsigned int offset; size_t line_len; + u64 start; s64 line_ip; int ret; char *c; @@ -709,22 +198,26 @@ parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) line_ip = -1; } + start = he->map->unmap_ip(he->map, sym->start); + if (line_ip != -1) { const char *path = NULL; unsigned int hits = 0; double percent = 0.0; const char *color; - struct sym_ext *sym_ext = sym->priv; + struct sym_priv *priv = symbol__priv(sym); + struct sym_ext *sym_ext = priv->ext; + struct sym_hist *h = priv->hist; offset = line_ip - start; if (offset < len) - hits = sym->hist[offset]; + hits = h->ip[offset]; if (offset < len && sym_ext) { path = sym_ext[offset].path; percent = sym_ext[offset].percent; - } else if (sym->hist_sum) - percent = 100.0 * hits / sym->hist_sum; + } else if (h->sum) + percent = 100.0 * hits / h->sum; color = get_percent_color(percent); @@ -777,9 +270,10 @@ static void insert_source_line(struct sym_ext *sym_ext) rb_insert_color(&sym_ext->node, &root_sym_ext); } -static void free_source_line(struct symbol *sym, int len) +static void free_source_line(struct hist_entry *he, int len) { - struct sym_ext *sym_ext = sym->priv; + struct sym_priv *priv = symbol__priv(he->sym); + struct sym_ext *sym_ext = priv->ext; int i; if (!sym_ext) @@ -789,26 +283,30 @@ static void free_source_line(struct symbol *sym, int len) free(sym_ext[i].path); free(sym_ext); - sym->priv = NULL; + priv->ext = NULL; root_sym_ext = RB_ROOT; } /* Get the filename:line for the colored entries */ static void -get_source_line(struct symbol *sym, u64 start, int len, const char *filename) +get_source_line(struct hist_entry *he, int len, const char *filename) { + struct symbol *sym = he->sym; + u64 start; int i; char cmd[PATH_MAX * 2]; struct sym_ext *sym_ext; + struct sym_priv *priv = symbol__priv(sym); + struct sym_hist *h = priv->hist; - if (!sym->hist_sum) + if (!h->sum) return; - sym->priv = calloc(len, sizeof(struct sym_ext)); - if (!sym->priv) + sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext)); + if (!priv->ext) return; - sym_ext = sym->priv; + start = he->map->unmap_ip(he->map, sym->start); for (i = 0; i < len; i++) { char *path = NULL; @@ -816,7 +314,7 @@ get_source_line(struct symbol *sym, u64 start, int len, const char *filename) u64 offset; FILE *fp; - sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum; + sym_ext[i].percent = 100.0 * h->ip[i] / h->sum; if (sym_ext[i].percent <= 0.5) continue; @@ -870,33 +368,34 @@ static void print_summary(const char *filename) } } -static void annotate_sym(struct dso *dso, struct symbol *sym) +static void annotate_sym(struct hist_entry *he) { - const char *filename = dso->name, *d_filename; - u64 start, end, len; + struct map *map = he->map; + struct dso *dso = map->dso; + struct symbol *sym = he->sym; + const char *filename = dso->long_name, *d_filename; + u64 len; char command[PATH_MAX*2]; FILE *file; if (!filename) return; - if (sym->module) - filename = sym->module->path; - else if (dso == kernel_dso) - filename = vmlinux_name; - - start = sym->obj_start; - if (!start) - start = sym->start; + + if (verbose) + fprintf(stderr, "%s: filename=%s, sym=%s, start=%Lx, end=%Lx\n", + __func__, filename, sym->name, + map->unmap_ip(map, sym->start), + map->unmap_ip(map, sym->end)); + if (full_paths) d_filename = filename; else d_filename = basename(filename); - end = start + sym->end - sym->start + 1; len = sym->end - sym->start; if (print_line) { - get_source_line(sym, start, len, filename); + get_source_line(he, len, filename); print_summary(filename); } @@ -905,10 +404,12 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) printf("------------------------------------------------\n"); if (verbose >= 2) - printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); + printf("annotating [%p] %30s : [%p] %30s\n", + dso, dso->long_name, sym, sym->name); sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s", - (u64)start, (u64)end, filename, filename); + map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end), + filename, filename); if (verbose >= 3) printf("doing: %s\n", command); @@ -918,159 +419,78 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) return; while (!feof(file)) { - if (parse_line(file, sym, start, len) < 0) + if (parse_line(file, he, len) < 0) break; } pclose(file); if (print_line) - free_source_line(sym, len); + free_source_line(he, len); } static void find_annotations(void) { struct rb_node *nd; - struct dso *dso; - int count = 0; - - list_for_each_entry(dso, &dsos, node) { - - for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) { - struct symbol *sym = rb_entry(nd, struct symbol, rb_node); - - if (sym->hist) { - annotate_sym(dso, sym); - count++; - } - } - } - - if (!count) - printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter); -} - -static int __cmd_annotate(void) -{ - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head = 0; - struct stat input_stat; - event_t *event; - uint32_t size; - char *buf; - - register_idle_thread(&threads, &last_match); - - input = open(input_name, O_RDONLY); - if (input < 0) { - perror("failed to open file"); - exit(-1); - } - - ret = fstat(input, &input_stat); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { - fprintf(stderr, "file: %s not owned by current user or root\n", input_name); - exit(-1); - } - - if (!input_stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - - if (load_kernel() < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; - } - -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - size = event->header.size; - if (!size) - size = 8; + for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { + struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); + struct sym_priv *priv; - if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); - int munmap_ret; - - munmap_ret = munmap(buf, page_size * mmap_window); - assert(munmap_ret == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - dump_printf("%p [%p]: event: %d\n", - (void *)(offset + head), - (void *)(long)event->header.size, - event->header.type); - - if (!size || process_event(event, offset, head) < 0) { - - dump_printf("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type); + if (he->sym == NULL) + continue; - total_unknown++; + priv = symbol__priv(he->sym); + if (priv->hist == NULL) + continue; + annotate_sym(he); /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. + * Since we have a hist_entry per IP for the same symbol, free + * he->sym->hist to signal we already processed this symbol. */ - - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; + free(priv->hist); + priv->hist = NULL; } +} - head += size; +static struct perf_file_handler file_handler = { + .process_sample_event = process_sample_event, + .process_mmap_event = event__process_mmap, + .process_comm_event = event__process_comm, + .process_fork_event = event__process_task, +}; - if (offset + head < (unsigned long)input_stat.st_size) - goto more; +static int __cmd_annotate(void) +{ + struct perf_header *header; + struct thread *idle; + int ret; - rc = EXIT_SUCCESS; - close(input); + idle = register_idle_thread(); + register_perf_file_handler(&file_handler); - dump_printf(" IP events: %10ld\n", total); - dump_printf(" mmap events: %10ld\n", total_mmap); - dump_printf(" comm events: %10ld\n", total_comm); - dump_printf(" fork events: %10ld\n", total_fork); - dump_printf(" unknown events: %10ld\n", total_unknown); + ret = mmap_dispatch_perf_file(&header, input_name, 0, 0, + &event__cwdlen, &event__cwd); + if (ret) + return ret; - if (dump_trace) + if (dump_trace) { + event__print_totals(); return 0; + } - if (verbose >= 3) - threads__fprintf(stdout, &threads); + if (verbose > 3) + threads__fprintf(stdout); - if (verbose >= 2) + if (verbose > 2) dsos__fprintf(stdout); collapse__resort(); - output__resort(); + output__resort(event__total[0]); find_annotations(); - return rc; + return ret; } static const char * const annotate_usage[] = { @@ -1088,8 +508,9 @@ static const struct option options[] = { "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), - OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), - OPT_BOOLEAN('m', "modules", &modules, + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('l', "print-line", &print_line, "print matching source lines (may be slow)"), @@ -1115,9 +536,8 @@ static void setup_sorting(void) int cmd_annotate(int argc, const char **argv, const char *prefix __used) { - symbol__init(); - - page_size = getpagesize(); + if (symbol__init(&symbol_conf) < 0) + return -1; argc = parse_options(argc, argv, options, annotate_usage, 0); @@ -1134,10 +554,13 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) sym_hist_filter = argv[0]; } - if (!sym_hist_filter) - usage_with_options(annotate_usage, options); - setup_pager(); + if (field_sep && *field_sep == '.') { + fputs("'.' is the only non valid --field-separator argument\n", + stderr); + exit(129); + } + return __cmd_annotate(); } diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c new file mode 100644 index 00000000000..e043eb83092 --- /dev/null +++ b/tools/perf/builtin-bench.c @@ -0,0 +1,196 @@ +/* + * + * builtin-bench.c + * + * General benchmarking subsystem provided by perf + * + * Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> + * + */ + +/* + * + * Available subsystem list: + * sched ... scheduler and IPC mechanism + * mem ... memory access performance + * + */ + +#include "perf.h" +#include "util/util.h" +#include "util/parse-options.h" +#include "builtin.h" +#include "bench/bench.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +struct bench_suite { + const char *name; + const char *summary; + int (*fn)(int, const char **, const char *); +}; + +static struct bench_suite sched_suites[] = { + { "messaging", + "Benchmark for scheduler and IPC mechanisms", + bench_sched_messaging }, + { "pipe", + "Flood of communication over pipe() between two processes", + bench_sched_pipe }, + { NULL, + NULL, + NULL } +}; + +static struct bench_suite mem_suites[] = { + { "memcpy", + "Simple memory copy in various ways", + bench_mem_memcpy }, + { NULL, + NULL, + NULL } +}; + +struct bench_subsys { + const char *name; + const char *summary; + struct bench_suite *suites; +}; + +static struct bench_subsys subsystems[] = { + { "sched", + "scheduler and IPC mechanism", + sched_suites }, + { "mem", + "memory access performance", + mem_suites }, + { NULL, + NULL, + NULL } +}; + +static void dump_suites(int subsys_index) +{ + int i; + + printf("List of available suites for %s...\n\n", + subsystems[subsys_index].name); + + for (i = 0; subsystems[subsys_index].suites[i].name; i++) + printf("\t%s: %s\n", + subsystems[subsys_index].suites[i].name, + subsystems[subsys_index].suites[i].summary); + + printf("\n"); + return; +} + +static char *bench_format_str; +int bench_format = BENCH_FORMAT_DEFAULT; + +static const struct option bench_options[] = { + OPT_STRING('f', "format", &bench_format_str, "default", + "Specify format style"), + OPT_END() +}; + +static const char * const bench_usage[] = { + "perf bench [<common options>] <subsystem> <suite> [<options>]", + NULL +}; + +static void print_usage(void) +{ + int i; + + printf("Usage: \n"); + for (i = 0; bench_usage[i]; i++) + printf("\t%s\n", bench_usage[i]); + printf("\n"); + + printf("List of available subsystems...\n\n"); + + for (i = 0; subsystems[i].name; i++) + printf("\t%s: %s\n", + subsystems[i].name, subsystems[i].summary); + printf("\n"); +} + +static int bench_str2int(char *str) +{ + if (!str) + return BENCH_FORMAT_DEFAULT; + + if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR)) + return BENCH_FORMAT_DEFAULT; + else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR)) + return BENCH_FORMAT_SIMPLE; + + return BENCH_FORMAT_UNKNOWN; +} + +int cmd_bench(int argc, const char **argv, const char *prefix __used) +{ + int i, j, status = 0; + + if (argc < 2) { + /* No subsystem specified. */ + print_usage(); + goto end; + } + + argc = parse_options(argc, argv, bench_options, bench_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + bench_format = bench_str2int(bench_format_str); + if (bench_format == BENCH_FORMAT_UNKNOWN) { + printf("Unknown format descriptor:%s\n", bench_format_str); + goto end; + } + + if (argc < 1) { + print_usage(); + goto end; + } + + for (i = 0; subsystems[i].name; i++) { + if (strcmp(subsystems[i].name, argv[0])) + continue; + + if (argc < 2) { + /* No suite specified. */ + dump_suites(i); + goto end; + } + + for (j = 0; subsystems[i].suites[j].name; j++) { + if (strcmp(subsystems[i].suites[j].name, argv[1])) + continue; + + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Running %s/%s benchmark...\n", + subsystems[i].name, + subsystems[i].suites[j].name); + status = subsystems[i].suites[j].fn(argc - 1, + argv + 1, prefix); + goto end; + } + + if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { + dump_suites(i); + goto end; + } + + printf("Unknown suite:%s for %s\n", argv[1], argv[0]); + status = 1; + goto end; + } + + printf("Unknown subsystem:%s\n", argv[0]); + status = 1; + +end: + return status; +} diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c new file mode 100644 index 00000000000..7dee9d19ab7 --- /dev/null +++ b/tools/perf/builtin-buildid-list.c @@ -0,0 +1,116 @@ +/* + * builtin-buildid-list.c + * + * Builtin buildid-list command: list buildids in perf.data + * + * Copyright (C) 2009, Red Hat Inc. + * Copyright (C) 2009, Arnaldo Carvalho de Melo <acme@redhat.com> + */ +#include "builtin.h" +#include "perf.h" +#include "util/cache.h" +#include "util/data_map.h" +#include "util/debug.h" +#include "util/header.h" +#include "util/parse-options.h" +#include "util/symbol.h" + +static char const *input_name = "perf.data"; +static int force; + +static const char *const buildid_list_usage[] = { + "perf report [<options>]", + NULL +}; + +static const struct option options[] = { + OPT_STRING('i', "input", &input_name, "file", + "input file name"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose"), + OPT_END() +}; + +static int perf_file_section__process_buildids(struct perf_file_section *self, + int feat, int fd) +{ + if (feat != HEADER_BUILD_ID) + return 0; + + if (lseek(fd, self->offset, SEEK_SET) < 0) { + pr_warning("Failed to lseek to %Ld offset for buildids!\n", + self->offset); + return -1; + } + + if (perf_header__read_build_ids(fd, self->offset, self->size)) { + pr_warning("Failed to read buildids!\n"); + return -1; + } + + return 0; +} + +static int __cmd_buildid_list(void) +{ + int err = -1; + struct perf_header *header; + struct perf_file_header f_header; + struct stat input_stat; + int input = open(input_name, O_RDONLY); + + if (input < 0) { + pr_err("failed to open file: %s", input_name); + if (!strcmp(input_name, "perf.data")) + pr_err(" (try 'perf record' first)"); + pr_err("\n"); + goto out; + } + + err = fstat(input, &input_stat); + if (err < 0) { + perror("failed to stat file"); + goto out_close; + } + + if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { + pr_err("file %s not owned by current user or root\n", + input_name); + goto out_close; + } + + if (!input_stat.st_size) { + pr_info("zero-sized file, nothing to do!\n"); + goto out_close; + } + + err = -1; + header = perf_header__new(); + if (header == NULL) + goto out_close; + + if (perf_file_header__read(&f_header, header, input) < 0) { + pr_warning("incompatible file format"); + goto out_close; + } + + err = perf_header__process_sections(header, input, + perf_file_section__process_buildids); + + if (err < 0) + goto out_close; + + dsos__fprintf_buildid(stdout); +out_close: + close(input); +out: + return err; +} + +int cmd_buildid_list(int argc, const char **argv, const char *prefix __used) +{ + argc = parse_options(argc, argv, options, buildid_list_usage, 0); + setup_pager(); + return __cmd_buildid_list(); +} diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 4fb8734a796..9f810b17c25 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -61,8 +61,7 @@ static const char *get_man_viewer_info(const char *name) { struct man_viewer_info_list *viewer; - for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) - { + for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) { if (!strcasecmp(name, viewer->name)) return viewer->info; } @@ -115,7 +114,7 @@ static int check_emacsclient_version(void) return 0; } -static void exec_woman_emacs(const char* path, const char *page) +static void exec_woman_emacs(const char *path, const char *page) { if (!check_emacsclient_version()) { /* This works only with emacsclient version >= 22. */ @@ -129,7 +128,7 @@ static void exec_woman_emacs(const char* path, const char *page) } } -static void exec_man_konqueror(const char* path, const char *page) +static void exec_man_konqueror(const char *path, const char *page) { const char *display = getenv("DISPLAY"); if (display && *display) { @@ -157,7 +156,7 @@ static void exec_man_konqueror(const char* path, const char *page) } } -static void exec_man_man(const char* path, const char *page) +static void exec_man_man(const char *path, const char *page) { if (!path) path = "man"; @@ -180,7 +179,7 @@ static void add_man_viewer(const char *name) while (*p) p = &((*p)->next); - *p = calloc(1, (sizeof(**p) + len + 1)); + *p = zalloc(sizeof(**p) + len + 1); strncpy((*p)->name, name, len); } @@ -195,7 +194,7 @@ static void do_add_man_viewer_info(const char *name, size_t len, const char *value) { - struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1); + struct man_viewer_info_list *new = zalloc(sizeof(*new) + len + 1); strncpy(new->name, name, len); new->info = strdup(value); @@ -364,9 +363,8 @@ static void show_man_page(const char *perf_cmd) setup_man_path(); for (viewer = man_viewer_list; viewer; viewer = viewer->next) - { exec_viewer(viewer->name, page); /* will return when unable */ - } + if (fallback) exec_viewer(fallback, page); exec_viewer("man", page); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c new file mode 100644 index 00000000000..047fef74bd5 --- /dev/null +++ b/tools/perf/builtin-kmem.c @@ -0,0 +1,807 @@ +#include "builtin.h" +#include "perf.h" + +#include "util/util.h" +#include "util/cache.h" +#include "util/symbol.h" +#include "util/thread.h" +#include "util/header.h" + +#include "util/parse-options.h" +#include "util/trace-event.h" + +#include "util/debug.h" +#include "util/data_map.h" + +#include <linux/rbtree.h> + +struct alloc_stat; +typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); + +static char const *input_name = "perf.data"; + +static struct perf_header *header; +static u64 sample_type; + +static int alloc_flag; +static int caller_flag; + +static int alloc_lines = -1; +static int caller_lines = -1; + +static bool raw_ip; + +static char default_sort_order[] = "frag,hit,bytes"; + +static int *cpunode_map; +static int max_cpu_num; + +struct alloc_stat { + u64 call_site; + u64 ptr; + u64 bytes_req; + u64 bytes_alloc; + u32 hit; + u32 pingpong; + + short alloc_cpu; + + struct rb_node node; +}; + +static struct rb_root root_alloc_stat; +static struct rb_root root_alloc_sorted; +static struct rb_root root_caller_stat; +static struct rb_root root_caller_sorted; + +static unsigned long total_requested, total_allocated; +static unsigned long nr_allocs, nr_cross_allocs; + +struct raw_event_sample { + u32 size; + char data[0]; +}; + +#define PATH_SYS_NODE "/sys/devices/system/node" + +static void init_cpunode_map(void) +{ + FILE *fp; + int i; + + fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); + if (!fp) { + max_cpu_num = 4096; + return; + } + + if (fscanf(fp, "%d", &max_cpu_num) < 1) + die("Failed to read 'kernel_max' from sysfs"); + max_cpu_num++; + + cpunode_map = calloc(max_cpu_num, sizeof(int)); + if (!cpunode_map) + die("calloc"); + for (i = 0; i < max_cpu_num; i++) + cpunode_map[i] = -1; + fclose(fp); +} + +static void setup_cpunode_map(void) +{ + struct dirent *dent1, *dent2; + DIR *dir1, *dir2; + unsigned int cpu, mem; + char buf[PATH_MAX]; + + init_cpunode_map(); + + dir1 = opendir(PATH_SYS_NODE); + if (!dir1) + return; + + while (true) { + dent1 = readdir(dir1); + if (!dent1) + break; + + if (sscanf(dent1->d_name, "node%u", &mem) < 1) + continue; + + snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); + dir2 = opendir(buf); + if (!dir2) + continue; + while (true) { + dent2 = readdir(dir2); + if (!dent2) + break; + if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1) + continue; + cpunode_map[cpu] = mem; + } + } +} + +static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, + int bytes_req, int bytes_alloc, int cpu) +{ + struct rb_node **node = &root_alloc_stat.rb_node; + struct rb_node *parent = NULL; + struct alloc_stat *data = NULL; + + while (*node) { + parent = *node; + data = rb_entry(*node, struct alloc_stat, node); + + if (ptr > data->ptr) + node = &(*node)->rb_right; + else if (ptr < data->ptr) + node = &(*node)->rb_left; + else + break; + } + + if (data && data->ptr == ptr) { + data->hit++; + data->bytes_req += bytes_req; + data->bytes_alloc += bytes_req; + } else { + data = malloc(sizeof(*data)); + if (!data) + die("malloc"); + data->ptr = ptr; + data->pingpong = 0; + data->hit = 1; + data->bytes_req = bytes_req; + data->bytes_alloc = bytes_alloc; + + rb_link_node(&data->node, parent, node); + rb_insert_color(&data->node, &root_alloc_stat); + } + data->call_site = call_site; + data->alloc_cpu = cpu; +} + +static void insert_caller_stat(unsigned long call_site, + int bytes_req, int bytes_alloc) +{ + struct rb_node **node = &root_caller_stat.rb_node; + struct rb_node *parent = NULL; + struct alloc_stat *data = NULL; + + while (*node) { + parent = *node; + data = rb_entry(*node, struct alloc_stat, node); + + if (call_site > data->call_site) + node = &(*node)->rb_right; + else if (call_site < data->call_site) + node = &(*node)->rb_left; + else + break; + } + + if (data && data->call_site == call_site) { + data->hit++; + data->bytes_req += bytes_req; + data->bytes_alloc += bytes_req; + } else { + data = malloc(sizeof(*data)); + if (!data) + die("malloc"); + data->call_site = call_site; + data->pingpong = 0; + data->hit = 1; + data->bytes_req = bytes_req; + data->bytes_alloc = bytes_alloc; + + rb_link_node(&data->node, parent, node); + rb_insert_color(&data->node, &root_caller_stat); + } +} + +static void process_alloc_event(struct raw_event_sample *raw, + struct event *event, + int cpu, + u64 timestamp __used, + struct thread *thread __used, + int node) +{ + unsigned long call_site; + unsigned long ptr; + int bytes_req; + int bytes_alloc; + int node1, node2; + + ptr = raw_field_value(event, "ptr", raw->data); + call_site = raw_field_value(event, "call_site", raw->data); + bytes_req = raw_field_value(event, "bytes_req", raw->data); + bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); + + insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); + insert_caller_stat(call_site, bytes_req, bytes_alloc); + + total_requested += bytes_req; + total_allocated += bytes_alloc; + + if (node) { + node1 = cpunode_map[cpu]; + node2 = raw_field_value(event, "node", raw->data); + if (node1 != node2) + nr_cross_allocs++; + } + nr_allocs++; +} + +static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); +static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); + +static struct alloc_stat *search_alloc_stat(unsigned long ptr, + unsigned long call_site, + struct rb_root *root, + sort_fn_t sort_fn) +{ + struct rb_node *node = root->rb_node; + struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; + + while (node) { + struct alloc_stat *data; + int cmp; + + data = rb_entry(node, struct alloc_stat, node); + + cmp = sort_fn(&key, data); + if (cmp < 0) + node = node->rb_left; + else if (cmp > 0) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void process_free_event(struct raw_event_sample *raw, + struct event *event, + int cpu, + u64 timestamp __used, + struct thread *thread __used) +{ + unsigned long ptr; + struct alloc_stat *s_alloc, *s_caller; + + ptr = raw_field_value(event, "ptr", raw->data); + + s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); + if (!s_alloc) + return; + + if (cpu != s_alloc->alloc_cpu) { + s_alloc->pingpong++; + + s_caller = search_alloc_stat(0, s_alloc->call_site, + &root_caller_stat, callsite_cmp); + assert(s_caller); + s_caller->pingpong++; + } + s_alloc->alloc_cpu = -1; +} + +static void +process_raw_event(event_t *raw_event __used, void *more_data, + int cpu, u64 timestamp, struct thread *thread) +{ + struct raw_event_sample *raw = more_data; + struct event *event; + int type; + + type = trace_parse_common_type(raw->data); + event = trace_find_event(type); + + if (!strcmp(event->name, "kmalloc") || + !strcmp(event->name, "kmem_cache_alloc")) { + process_alloc_event(raw, event, cpu, timestamp, thread, 0); + return; + } + + if (!strcmp(event->name, "kmalloc_node") || + !strcmp(event->name, "kmem_cache_alloc_node")) { + process_alloc_event(raw, event, cpu, timestamp, thread, 1); + return; + } + + if (!strcmp(event->name, "kfree") || + !strcmp(event->name, "kmem_cache_free")) { + process_free_event(raw, event, cpu, timestamp, thread); + return; + } +} + +static int process_sample_event(event_t *event) +{ + u64 ip = event->ip.ip; + u64 timestamp = -1; + u32 cpu = -1; + u64 period = 1; + void *more_data = event->ip.__more_data; + struct thread *thread = threads__findnew(event->ip.pid); + + if (sample_type & PERF_SAMPLE_TIME) { + timestamp = *(u64 *)more_data; + more_data += sizeof(u64); + } + + if (sample_type & PERF_SAMPLE_CPU) { + cpu = *(u32 *)more_data; + more_data += sizeof(u32); + more_data += sizeof(u32); /* reserved */ + } + + if (sample_type & PERF_SAMPLE_PERIOD) { + period = *(u64 *)more_data; + more_data += sizeof(u64); + } + + dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", + event->header.misc, + event->ip.pid, event->ip.tid, + (void *)(long)ip, + (long long)period); + + if (thread == NULL) { + pr_debug("problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + + process_raw_event(event, more_data, cpu, timestamp, thread); + + return 0; +} + +static int sample_type_check(u64 type) +{ + sample_type = type; + + if (!(sample_type & PERF_SAMPLE_RAW)) { + fprintf(stderr, + "No trace sample to read. Did you call perf record " + "without -R?"); + return -1; + } + + return 0; +} + +static struct perf_file_handler file_handler = { + .process_sample_event = process_sample_event, + .process_comm_event = event__process_comm, + .sample_type_check = sample_type_check, +}; + +static int read_events(void) +{ + register_idle_thread(); + register_perf_file_handler(&file_handler); + + return mmap_dispatch_perf_file(&header, input_name, 0, 0, + &event__cwdlen, &event__cwd); +} + +static double fragmentation(unsigned long n_req, unsigned long n_alloc) +{ + if (n_alloc == 0) + return 0.0; + else + return 100.0 - (100.0 * n_req / n_alloc); +} + +static void __print_result(struct rb_root *root, int n_lines, int is_caller) +{ + struct rb_node *next; + + printf("%.102s\n", graph_dotted_line); + printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); + printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); + printf("%.102s\n", graph_dotted_line); + + next = rb_first(root); + + while (next && n_lines--) { + struct alloc_stat *data = rb_entry(next, struct alloc_stat, + node); + struct symbol *sym = NULL; + char buf[BUFSIZ]; + u64 addr; + + if (is_caller) { + addr = data->call_site; + if (!raw_ip) + sym = thread__find_function(kthread, addr, NULL); + } else + addr = data->ptr; + + if (sym != NULL) + snprintf(buf, sizeof(buf), "%s+%Lx", sym->name, + addr - sym->start); + else + snprintf(buf, sizeof(buf), "%#Lx", addr); + printf(" %-34s |", buf); + + printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n", + (unsigned long long)data->bytes_alloc, + (unsigned long)data->bytes_alloc / data->hit, + (unsigned long long)data->bytes_req, + (unsigned long)data->bytes_req / data->hit, + (unsigned long)data->hit, + (unsigned long)data->pingpong, + fragmentation(data->bytes_req, data->bytes_alloc)); + + next = rb_next(next); + } + + if (n_lines == -1) + printf(" ... | ... | ... | ... | ... | ... \n"); + + printf("%.102s\n", graph_dotted_line); +} + +static void print_summary(void) +{ + printf("\nSUMMARY\n=======\n"); + printf("Total bytes requested: %lu\n", total_requested); + printf("Total bytes allocated: %lu\n", total_allocated); + printf("Total bytes wasted on internal fragmentation: %lu\n", + total_allocated - total_requested); + printf("Internal fragmentation: %f%%\n", + fragmentation(total_requested, total_allocated)); + printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); +} + +static void print_result(void) +{ + if (caller_flag) + __print_result(&root_caller_sorted, caller_lines, 1); + if (alloc_flag) + __print_result(&root_alloc_sorted, alloc_lines, 0); + print_summary(); +} + +struct sort_dimension { + const char name[20]; + sort_fn_t cmp; + struct list_head list; +}; + +static LIST_HEAD(caller_sort); +static LIST_HEAD(alloc_sort); + +static void sort_insert(struct rb_root *root, struct alloc_stat *data, + struct list_head *sort_list) +{ + struct rb_node **new = &(root->rb_node); + struct rb_node *parent = NULL; + struct sort_dimension *sort; + + while (*new) { + struct alloc_stat *this; + int cmp = 0; + + this = rb_entry(*new, struct alloc_stat, node); + parent = *new; + + list_for_each_entry(sort, sort_list, list) { + cmp = sort->cmp(data, this); + if (cmp) + break; + } + + if (cmp > 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, + struct list_head *sort_list) +{ + struct rb_node *node; + struct alloc_stat *data; + + for (;;) { + node = rb_first(root); + if (!node) + break; + + rb_erase(node, root); + data = rb_entry(node, struct alloc_stat, node); + sort_insert(root_sorted, data, sort_list); + } +} + +static void sort_result(void) +{ + __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); + __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); +} + +static int __cmd_kmem(void) +{ + setup_pager(); + read_events(); + sort_result(); + print_result(); + + return 0; +} + +static const char * const kmem_usage[] = { + "perf kmem [<options>] {record}", + NULL +}; + +static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->ptr < r->ptr) + return -1; + else if (l->ptr > r->ptr) + return 1; + return 0; +} + +static struct sort_dimension ptr_sort_dimension = { + .name = "ptr", + .cmp = ptr_cmp, +}; + +static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->call_site < r->call_site) + return -1; + else if (l->call_site > r->call_site) + return 1; + return 0; +} + +static struct sort_dimension callsite_sort_dimension = { + .name = "callsite", + .cmp = callsite_cmp, +}; + +static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->hit < r->hit) + return -1; + else if (l->hit > r->hit) + return 1; + return 0; +} + +static struct sort_dimension hit_sort_dimension = { + .name = "hit", + .cmp = hit_cmp, +}; + +static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->bytes_alloc < r->bytes_alloc) + return -1; + else if (l->bytes_alloc > r->bytes_alloc) + return 1; + return 0; +} + +static struct sort_dimension bytes_sort_dimension = { + .name = "bytes", + .cmp = bytes_cmp, +}; + +static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + double x, y; + + x = fragmentation(l->bytes_req, l->bytes_alloc); + y = fragmentation(r->bytes_req, r->bytes_alloc); + + if (x < y) + return -1; + else if (x > y) + return 1; + return 0; +} + +static struct sort_dimension frag_sort_dimension = { + .name = "frag", + .cmp = frag_cmp, +}; + +static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->pingpong < r->pingpong) + return -1; + else if (l->pingpong > r->pingpong) + return 1; + return 0; +} + +static struct sort_dimension pingpong_sort_dimension = { + .name = "pingpong", + .cmp = pingpong_cmp, +}; + +static struct sort_dimension *avail_sorts[] = { + &ptr_sort_dimension, + &callsite_sort_dimension, + &hit_sort_dimension, + &bytes_sort_dimension, + &frag_sort_dimension, + &pingpong_sort_dimension, +}; + +#define NUM_AVAIL_SORTS \ + (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *)) + +static int sort_dimension__add(const char *tok, struct list_head *list) +{ + struct sort_dimension *sort; + int i; + + for (i = 0; i < NUM_AVAIL_SORTS; i++) { + if (!strcmp(avail_sorts[i]->name, tok)) { + sort = malloc(sizeof(*sort)); + if (!sort) + die("malloc"); + memcpy(sort, avail_sorts[i], sizeof(*sort)); + list_add_tail(&sort->list, list); + return 0; + } + } + + return -1; +} + +static int setup_sorting(struct list_head *sort_list, const char *arg) +{ + char *tok; + char *str = strdup(arg); + + if (!str) + die("strdup"); + + while (true) { + tok = strsep(&str, ","); + if (!tok) + break; + if (sort_dimension__add(tok, sort_list) < 0) { + error("Unknown --sort key: '%s'", tok); + return -1; + } + } + + free(str); + return 0; +} + +static int parse_sort_opt(const struct option *opt __used, + const char *arg, int unset __used) +{ + if (!arg) + return -1; + + if (caller_flag > alloc_flag) + return setup_sorting(&caller_sort, arg); + else + return setup_sorting(&alloc_sort, arg); + + return 0; +} + +static int parse_stat_opt(const struct option *opt __used, + const char *arg, int unset __used) +{ + if (!arg) + return -1; + + if (strcmp(arg, "alloc") == 0) + alloc_flag = (caller_flag + 1); + else if (strcmp(arg, "caller") == 0) + caller_flag = (alloc_flag + 1); + else + return -1; + return 0; +} + +static int parse_line_opt(const struct option *opt __used, + const char *arg, int unset __used) +{ + int lines; + + if (!arg) + return -1; + + lines = strtoul(arg, NULL, 10); + + if (caller_flag > alloc_flag) + caller_lines = lines; + else + alloc_lines = lines; + + return 0; +} + +static const struct option kmem_options[] = { + OPT_STRING('i', "input", &input_name, "file", + "input file name"), + OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>", + "stat selector, Pass 'alloc' or 'caller'.", + parse_stat_opt), + OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", + "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", + parse_sort_opt), + OPT_CALLBACK('l', "line", NULL, "num", + "show n lins", + parse_line_opt), + OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), + OPT_END() +}; + +static const char *record_args[] = { + "record", + "-a", + "-R", + "-M", + "-f", + "-c", "1", + "-e", "kmem:kmalloc", + "-e", "kmem:kmalloc_node", + "-e", "kmem:kfree", + "-e", "kmem:kmem_cache_alloc", + "-e", "kmem:kmem_cache_alloc_node", + "-e", "kmem:kmem_cache_free", +}; + +static int __cmd_record(int argc, const char **argv) +{ + unsigned int rec_argc, i, j; + const char **rec_argv; + + rec_argc = ARRAY_SIZE(record_args) + argc - 1; + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + + for (i = 0; i < ARRAY_SIZE(record_args); i++) + rec_argv[i] = strdup(record_args[i]); + + for (j = 1; j < (unsigned int)argc; j++, i++) + rec_argv[i] = argv[j]; + + return cmd_record(i, rec_argv, NULL); +} + +int cmd_kmem(int argc, const char **argv, const char *prefix __used) +{ + symbol__init(0); + + argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); + + if (argc && !strncmp(argv[0], "rec", 3)) + return __cmd_record(argc, argv); + else if (argc) + usage_with_options(kmem_usage, kmem_options); + + if (list_empty(&caller_sort)) + setup_sorting(&caller_sort, default_sort_order); + if (list_empty(&alloc_sort)) + setup_sorting(&alloc_sort, default_sort_order); + + setup_cpunode_map(); + + return __cmd_kmem(); +} + diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c new file mode 100644 index 00000000000..a58e11b7ea8 --- /dev/null +++ b/tools/perf/builtin-probe.c @@ -0,0 +1,242 @@ +/* + * builtin-probe.c + * + * Builtin probe command: Set up probe events by C expression + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#define _GNU_SOURCE +#include <sys/utsname.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> + +#undef _GNU_SOURCE +#include "perf.h" +#include "builtin.h" +#include "util/util.h" +#include "util/event.h" +#include "util/debug.h" +#include "util/parse-options.h" +#include "util/parse-events.h" /* For debugfs_path */ +#include "util/probe-finder.h" +#include "util/probe-event.h" + +/* Default vmlinux search paths */ +#define NR_SEARCH_PATH 3 +const char *default_search_path[NR_SEARCH_PATH] = { +"/lib/modules/%s/build/vmlinux", /* Custom build kernel */ +"/usr/lib/debug/lib/modules/%s/vmlinux", /* Red Hat debuginfo */ +"/boot/vmlinux-debug-%s", /* Ubuntu */ +}; + +#define MAX_PATH_LEN 256 +#define MAX_PROBES 128 + +/* Session management structure */ +static struct { + char *vmlinux; + char *release; + int need_dwarf; + int nr_probe; + struct probe_point probes[MAX_PROBES]; +} session; + +static bool listing; + +/* Parse an event definition. Note that any error must die. */ +static void parse_probe_event(const char *str) +{ + struct probe_point *pp = &session.probes[session.nr_probe]; + + pr_debug("probe-definition(%d): %s\n", session.nr_probe, str); + if (++session.nr_probe == MAX_PROBES) + die("Too many probes (> %d) are specified.", MAX_PROBES); + + /* Parse perf-probe event into probe_point */ + session.need_dwarf = parse_perf_probe_event(str, pp); + + pr_debug("%d arguments\n", pp->nr_args); +} + +static int opt_add_probe_event(const struct option *opt __used, + const char *str, int unset __used) +{ + if (str) + parse_probe_event(str); + return 0; +} + +#ifndef NO_LIBDWARF +static int open_default_vmlinux(void) +{ + struct utsname uts; + char fname[MAX_PATH_LEN]; + int fd, ret, i; + + ret = uname(&uts); + if (ret) { + pr_debug("uname() failed.\n"); + return -errno; + } + session.release = uts.release; + for (i = 0; i < NR_SEARCH_PATH; i++) { + ret = snprintf(fname, MAX_PATH_LEN, + default_search_path[i], session.release); + if (ret >= MAX_PATH_LEN || ret < 0) { + pr_debug("Filename(%d,%s) is too long.\n", i, + uts.release); + errno = E2BIG; + return -E2BIG; + } + pr_debug("try to open %s\n", fname); + fd = open(fname, O_RDONLY); + if (fd >= 0) + break; + } + return fd; +} +#endif + +static const char * const probe_usage[] = { + "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]", + "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", + "perf probe --list", + NULL +}; + +static const struct option options[] = { + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose (show parsed arguments, etc)"), +#ifndef NO_LIBDWARF + OPT_STRING('k', "vmlinux", &session.vmlinux, "file", + "vmlinux/module pathname"), +#endif + OPT_BOOLEAN('l', "list", &listing, "list up current probes"), + OPT_CALLBACK('a', "add", NULL, +#ifdef NO_LIBDWARF + "FUNC[+OFFS|%return] [ARG ...]", +#else + "FUNC[+OFFS|%return|:RLN][@SRC]|SRC:ALN [ARG ...]", +#endif + "probe point definition, where\n" + "\t\tGRP:\tGroup name (optional)\n" + "\t\tNAME:\tEvent name\n" + "\t\tFUNC:\tFunction name\n" + "\t\tOFFS:\tOffset from function entry (in byte)\n" + "\t\t%return:\tPut the probe at function return\n" +#ifdef NO_LIBDWARF + "\t\tARG:\tProbe argument (only \n" +#else + "\t\tSRC:\tSource code path\n" + "\t\tRLN:\tRelative line number from function entry.\n" + "\t\tALN:\tAbsolute line number in file.\n" + "\t\tARG:\tProbe argument (local variable name or\n" +#endif + "\t\t\tkprobe-tracer argument format.)\n", + opt_add_probe_event), + OPT_END() +}; + +int cmd_probe(int argc, const char **argv, const char *prefix __used) +{ + int i, j, ret; +#ifndef NO_LIBDWARF + int fd; +#endif + struct probe_point *pp; + + argc = parse_options(argc, argv, options, probe_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + for (i = 0; i < argc; i++) + parse_probe_event(argv[i]); + + if ((session.nr_probe == 0 && !listing) || + (session.nr_probe != 0 && listing)) + usage_with_options(probe_usage, options); + + if (listing) { + show_perf_probe_events(); + return 0; + } + + if (session.need_dwarf) +#ifdef NO_LIBDWARF + die("Debuginfo-analysis is not supported"); +#else /* !NO_LIBDWARF */ + pr_debug("Some probes require debuginfo.\n"); + + if (session.vmlinux) + fd = open(session.vmlinux, O_RDONLY); + else + fd = open_default_vmlinux(); + if (fd < 0) { + if (session.need_dwarf) + die("Could not open vmlinux/module file."); + + pr_warning("Could not open vmlinux/module file." + " Try to use symbols.\n"); + goto end_dwarf; + } + + /* Searching probe points */ + for (j = 0; j < session.nr_probe; j++) { + pp = &session.probes[j]; + if (pp->found) + continue; + + lseek(fd, SEEK_SET, 0); + ret = find_probepoint(fd, pp); + if (ret < 0) { + if (session.need_dwarf) + die("Could not analyze debuginfo."); + + pr_warning("An error occurred in debuginfo analysis. Try to use symbols.\n"); + break; + } + if (ret == 0) /* No error but failed to find probe point. */ + die("No probe point found."); + } + close(fd); + +end_dwarf: +#endif /* !NO_LIBDWARF */ + + /* Synthesize probes without dwarf */ + for (j = 0; j < session.nr_probe; j++) { + pp = &session.probes[j]; + if (pp->found) /* This probe is already found. */ + continue; + + ret = synthesize_trace_kprobe_event(pp); + if (ret == -E2BIG) + die("probe point definition becomes too long."); + else if (ret < 0) + die("Failed to synthesize a probe point."); + } + + /* Settng up probe points */ + add_trace_kprobe_events(session.probes, session.nr_probe); + return 0; +} + diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a4be453fc8a..0e519c667e3 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -17,55 +17,52 @@ #include "util/header.h" #include "util/event.h" #include "util/debug.h" -#include "util/trace-event.h" +#include "util/symbol.h" #include <unistd.h> #include <sched.h> -#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) -#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) - static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static long default_interval = 100000; +static long default_interval = 0; -static int nr_cpus = 0; +static int nr_cpus = 0; static unsigned int page_size; -static unsigned int mmap_pages = 128; -static int freq = 0; +static unsigned int mmap_pages = 128; +static int freq = 1000; static int output; static const char *output_name = "perf.data"; -static int group = 0; -static unsigned int realtime_prio = 0; -static int raw_samples = 0; -static int system_wide = 0; -static int profile_cpu = -1; -static pid_t target_pid = -1; -static pid_t child_pid = -1; -static int inherit = 1; -static int force = 0; -static int append_file = 0; -static int call_graph = 0; -static int inherit_stat = 0; -static int no_samples = 0; -static int sample_address = 0; -static int multiplex = 0; -static int multiplex_fd = -1; - -static long samples; +static int group = 0; +static unsigned int realtime_prio = 0; +static int raw_samples = 0; +static int system_wide = 0; +static int profile_cpu = -1; +static pid_t target_pid = -1; +static pid_t child_pid = -1; +static int inherit = 1; +static int force = 0; +static int append_file = 0; +static int call_graph = 0; +static int inherit_stat = 0; +static int no_samples = 0; +static int sample_address = 0; +static int multiplex = 0; +static int multiplex_fd = -1; + +static long samples = 0; static struct timeval last_read; static struct timeval this_read; -static u64 bytes_written; +static u64 bytes_written = 0; static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; -static int nr_poll; -static int nr_cpu; +static int nr_poll = 0; +static int nr_cpu = 0; -static int file_new = 1; +static int file_new = 1; -struct perf_header *header; +struct perf_header *header = NULL; struct mmap_data { int counter; @@ -113,6 +110,24 @@ static void write_output(void *buf, size_t size) } } +static void write_event(event_t *buf, size_t size) +{ + /* + * Add it to the list of DSOs, so that when we finish this + * record session we can pick the available build-ids. + */ + if (buf->header.type == PERF_RECORD_MMAP) + dsos__findnew(buf->mmap.filename); + + write_output(buf, size); +} + +static int process_synthesized_event(event_t *event) +{ + write_event(event, event->header.size); + return 0; +} + static void mmap_read(struct mmap_data *md) { unsigned int head = mmap_read_head(md); @@ -161,14 +176,14 @@ static void mmap_read(struct mmap_data *md) size = md->mask + 1 - (old & md->mask); old += size; - write_output(buf, size); + write_event(buf, size); } buf = &data[old & md->mask]; size = head - old; old += size; - write_output(buf, size); + write_event(buf, size); md->prev = old; mmap_write_tail(md, old); @@ -195,168 +210,6 @@ static void sig_atexit(void) kill(getpid(), signr); } -static pid_t pid_synthesize_comm_event(pid_t pid, int full) -{ - struct comm_event comm_ev; - char filename[PATH_MAX]; - char bf[BUFSIZ]; - FILE *fp; - size_t size = 0; - DIR *tasks; - struct dirent dirent, *next; - pid_t tgid = 0; - - snprintf(filename, sizeof(filename), "/proc/%d/status", pid); - - fp = fopen(filename, "r"); - if (fp == NULL) { - /* - * We raced with a task exiting - just return: - */ - if (verbose) - fprintf(stderr, "couldn't open %s\n", filename); - return 0; - } - - memset(&comm_ev, 0, sizeof(comm_ev)); - while (!comm_ev.comm[0] || !comm_ev.pid) { - if (fgets(bf, sizeof(bf), fp) == NULL) - goto out_failure; - - if (memcmp(bf, "Name:", 5) == 0) { - char *name = bf + 5; - while (*name && isspace(*name)) - ++name; - size = strlen(name) - 1; - memcpy(comm_ev.comm, name, size++); - } else if (memcmp(bf, "Tgid:", 5) == 0) { - char *tgids = bf + 5; - while (*tgids && isspace(*tgids)) - ++tgids; - tgid = comm_ev.pid = atoi(tgids); - } - } - - comm_ev.header.type = PERF_RECORD_COMM; - size = ALIGN(size, sizeof(u64)); - comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); - - if (!full) { - comm_ev.tid = pid; - - write_output(&comm_ev, comm_ev.header.size); - goto out_fclose; - } - - snprintf(filename, sizeof(filename), "/proc/%d/task", pid); - - tasks = opendir(filename); - while (!readdir_r(tasks, &dirent, &next) && next) { - char *end; - pid = strtol(dirent.d_name, &end, 10); - if (*end) - continue; - - comm_ev.tid = pid; - - write_output(&comm_ev, comm_ev.header.size); - } - closedir(tasks); - -out_fclose: - fclose(fp); - return tgid; - -out_failure: - fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", - filename); - exit(EXIT_FAILURE); -} - -static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid) -{ - char filename[PATH_MAX]; - FILE *fp; - - snprintf(filename, sizeof(filename), "/proc/%d/maps", pid); - - fp = fopen(filename, "r"); - if (fp == NULL) { - /* - * We raced with a task exiting - just return: - */ - if (verbose) - fprintf(stderr, "couldn't open %s\n", filename); - return; - } - while (1) { - char bf[BUFSIZ], *pbf = bf; - struct mmap_event mmap_ev = { - .header = { .type = PERF_RECORD_MMAP }, - }; - int n; - size_t size; - if (fgets(bf, sizeof(bf), fp) == NULL) - break; - - /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = hex2u64(pbf, &mmap_ev.start); - if (n < 0) - continue; - pbf += n + 1; - n = hex2u64(pbf, &mmap_ev.len); - if (n < 0) - continue; - pbf += n + 3; - if (*pbf == 'x') { /* vm_exec */ - char *execname = strchr(bf, '/'); - - /* Catch VDSO */ - if (execname == NULL) - execname = strstr(bf, "[vdso]"); - - if (execname == NULL) - continue; - - size = strlen(execname); - execname[size - 1] = '\0'; /* Remove \n */ - memcpy(mmap_ev.filename, execname, size); - size = ALIGN(size, sizeof(u64)); - mmap_ev.len -= mmap_ev.start; - mmap_ev.header.size = (sizeof(mmap_ev) - - (sizeof(mmap_ev.filename) - size)); - mmap_ev.pid = tgid; - mmap_ev.tid = pid; - - write_output(&mmap_ev, mmap_ev.header.size); - } - } - - fclose(fp); -} - -static void synthesize_all(void) -{ - DIR *proc; - struct dirent dirent, *next; - - proc = opendir("/proc"); - - while (!readdir_r(proc, &dirent, &next) && next) { - char *end; - pid_t pid, tgid; - - pid = strtol(dirent.d_name, &end, 10); - if (*end) /* only interested in proper numerical dirents */ - continue; - - tgid = pid_synthesize_comm_event(pid, 1); - pid_synthesize_mmap_samples(pid, tgid); - } - - closedir(proc); -} - static int group_fd; static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr) @@ -367,7 +220,11 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n h_attr = header->attr[nr]; } else { h_attr = perf_header_attr__new(a); - perf_header__add_attr(header, h_attr); + if (h_attr != NULL) + if (perf_header__add_attr(header, h_attr) < 0) { + perf_header_attr__delete(h_attr); + h_attr = NULL; + } } return h_attr; @@ -375,9 +232,11 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n static void create_counter(int counter, int cpu, pid_t pid) { + char *filter = filters[counter]; struct perf_event_attr *attr = attrs + counter; struct perf_header_attr *h_attr; int track = !counter; /* only the first counter needs these */ + int ret; struct { u64 count; u64 time_enabled; @@ -448,11 +307,19 @@ try_again: printf("\n"); error("perfcounter syscall returned with %d (%s)\n", fd[nr_cpu][counter], strerror(err)); + +#if defined(__i386__) || defined(__x86_64__) + if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) + die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n"); +#endif + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } h_attr = get_header_attr(attr, counter); + if (h_attr == NULL) + die("nomem\n"); if (!file_new) { if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { @@ -466,7 +333,10 @@ try_again: exit(-1); } - perf_header_attr__add_id(h_attr, read_data.id); + if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { + pr_warning("Not enough memory to add id\n"); + exit(-1); + } assert(fd[nr_cpu][counter] >= 0); fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); @@ -480,7 +350,6 @@ try_again: multiplex_fd = fd[nr_cpu][counter]; if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { - int ret; ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); assert(ret != -1); @@ -500,6 +369,16 @@ try_again: } } + if (filter != NULL) { + ret = ioctl(fd[nr_cpu][counter], + PERF_EVENT_IOC_SET_FILTER, filter); + if (ret) { + error("failed to set filter with %d (%s)\n", errno, + strerror(errno)); + exit(-1); + } + } + ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE); } @@ -518,7 +397,7 @@ static void atexit_header(void) { header->data_size += bytes_written; - perf_header__write(header, output); + perf_header__write(header, output, true); } static int __cmd_record(int argc, const char **argv) @@ -527,7 +406,7 @@ static int __cmd_record(int argc, const char **argv) struct stat st; pid_t pid = 0; int flags; - int ret; + int err; unsigned long waking = 0; page_size = sysconf(_SC_PAGE_SIZE); @@ -561,22 +440,29 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } - if (!file_new) - header = perf_header__read(output); - else - header = perf_header__new(); + header = perf_header__new(); + if (header == NULL) { + pr_err("Not enough memory for reading perf file header\n"); + return -1; + } + if (!file_new) { + err = perf_header__read(header, output); + if (err < 0) + return err; + } if (raw_samples) { - read_tracing_data(attrs, nr_counters); + perf_header__set_feat(header, HEADER_TRACE_INFO); } else { for (i = 0; i < nr_counters; i++) { if (attrs[i].sample_type & PERF_SAMPLE_RAW) { - read_tracing_data(attrs, nr_counters); + perf_header__set_feat(header, HEADER_TRACE_INFO); break; } } } + atexit(atexit_header); if (!system_wide) { @@ -594,25 +480,36 @@ static int __cmd_record(int argc, const char **argv) } } - if (file_new) - perf_header__write(header, output); + if (file_new) { + err = perf_header__write(header, output, false); + if (err < 0) + return err; + } - if (!system_wide) { - pid_t tgid = pid_synthesize_comm_event(pid, 0); - pid_synthesize_mmap_samples(pid, tgid); - } else - synthesize_all(); + if (!system_wide) + event__synthesize_thread(pid, process_synthesized_event); + else + event__synthesize_threads(process_synthesized_event); if (target_pid == -1 && argc) { pid = fork(); if (pid < 0) - perror("failed to fork"); + die("failed to fork"); if (!pid) { if (execvp(argv[0], (char **)argv)) { perror(argv[0]); exit(-1); } + } else { + /* + * Wait a bit for the execv'ed child to appear + * and be updated in /proc + * FIXME: Do you know a less heuristical solution? + */ + usleep(1000); + event__synthesize_thread(pid, + process_synthesized_event); } child_pid = pid; @@ -623,7 +520,7 @@ static int __cmd_record(int argc, const char **argv) param.sched_priority = realtime_prio; if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { - printf("Could not set realtime priority.\n"); + pr_err("Could not set realtime priority.\n"); exit(-1); } } @@ -641,7 +538,7 @@ static int __cmd_record(int argc, const char **argv) if (hits == samples) { if (done) break; - ret = poll(event_array, nr_poll, -1); + err = poll(event_array, nr_poll, -1); waking++; } @@ -677,6 +574,8 @@ static const struct option options[] = { OPT_CALLBACK('e', "event", NULL, "event", "event selector. use 'perf list' to list available events", parse_events), + OPT_CALLBACK(0, "filter", NULL, "filter", + "event filter", parse_filter), OPT_INTEGER('p', "pid", &target_pid, "record events on existing pid"), OPT_INTEGER('r', "realtime", &realtime_prio, @@ -720,6 +619,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) { int counter; + symbol__init(0); + argc = parse_options(argc, argv, options, record_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc && target_pid == -1 && !system_wide) @@ -731,6 +632,18 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) attrs[0].config = PERF_COUNT_HW_CPU_CYCLES; } + /* + * User specified count overrides default frequency. + */ + if (default_interval) + freq = 0; + else if (freq) { + default_interval = freq; + } else { + fprintf(stderr, "frequency and count are zero, aborting\n"); + exit(EXIT_FAILURE); + } + for (counter = 0; counter < nr_counters; counter++) { if (attrs[counter].sample_period) continue; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 19669c20088..383c4ab4f9a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -26,20 +26,18 @@ #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/data_map.h" #include "util/thread.h" +#include "util/sort.h" +#include "util/hist.h" static char const *input_name = "perf.data"; -static char default_sort_order[] = "comm,dso,symbol"; -static char *sort_order = default_sort_order; static char *dso_list_str, *comm_list_str, *sym_list_str, *col_width_list_str; static struct strlist *dso_list, *comm_list, *sym_list; -static char *field_sep; static int force; -static int input; -static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; static int full_paths; static int show_nr_samples; @@ -50,374 +48,38 @@ static struct perf_read_values show_threads_values; static char default_pretty_printing_style[] = "normal"; static char *pretty_printing_style = default_pretty_printing_style; -static unsigned long page_size; -static unsigned long mmap_window = 32; - -static char default_parent_pattern[] = "^sys_|^do_page_fault"; -static char *parent_pattern = default_parent_pattern; -static regex_t parent_regex; - static int exclude_other = 1; static char callchain_default_opt[] = "fractal,0.5"; -static int callchain; - -static char __cwd[PATH_MAX]; -static char *cwd = __cwd; -static int cwdlen; - -static struct rb_root threads; -static struct thread *last_match; - static struct perf_header *header; -static -struct callchain_param callchain_param = { - .mode = CHAIN_GRAPH_REL, - .min_percent = 0.5 -}; - static u64 sample_type; -static int repsep_fprintf(FILE *fp, const char *fmt, ...) -{ - int n; - va_list ap; - - va_start(ap, fmt); - if (!field_sep) - n = vfprintf(fp, fmt, ap); - else { - char *bf = NULL; - n = vasprintf(&bf, fmt, ap); - if (n > 0) { - char *sep = bf; - - while (1) { - sep = strchr(sep, *field_sep); - if (sep == NULL) - break; - *sep = '.'; - } - } - fputs(bf, fp); - free(bf); - } - va_end(ap); - return n; -} - -static unsigned int dsos__col_width, - comms__col_width, - threads__col_width; +struct symbol_conf symbol_conf; -/* - * histogram, sorted on item, collects counts - */ - -static struct rb_root hist; - -struct hist_entry { - struct rb_node rb_node; - - struct thread *thread; - struct map *map; - struct dso *dso; - struct symbol *sym; - struct symbol *parent; - u64 ip; - char level; - struct callchain_node callchain; - struct rb_root sorted_chain; - - u64 count; -}; - -/* - * configurable sorting bits - */ - -struct sort_entry { - struct list_head list; - - const char *header; - - int64_t (*cmp)(struct hist_entry *, struct hist_entry *); - int64_t (*collapse)(struct hist_entry *, struct hist_entry *); - size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width); - unsigned int *width; - bool elide; -}; - -static int64_t cmp_null(void *l, void *r) -{ - if (!l && !r) - return 0; - else if (!l) - return -1; - else - return 1; -} - -/* --sort pid */ - -static int64_t -sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} static size_t -sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width) +callchain__fprintf_left_margin(FILE *fp, int left_margin) { - return repsep_fprintf(fp, "%*s:%5d", width - 6, - self->thread->comm ?: "", self->thread->pid); -} - -static struct sort_entry sort_thread = { - .header = "Command: Pid", - .cmp = sort__thread_cmp, - .print = sort__thread_print, - .width = &threads__col_width, -}; - -/* --sort comm */ - -static int64_t -sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static int64_t -sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) -{ - char *comm_l = left->thread->comm; - char *comm_r = right->thread->comm; - - if (!comm_l || !comm_r) - return cmp_null(comm_l, comm_r); - - return strcmp(comm_l, comm_r); -} - -static size_t -sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width) -{ - return repsep_fprintf(fp, "%*s", width, self->thread->comm); -} - -static struct sort_entry sort_comm = { - .header = "Command", - .cmp = sort__comm_cmp, - .collapse = sort__comm_collapse, - .print = sort__comm_print, - .width = &comms__col_width, -}; - -/* --sort dso */ - -static int64_t -sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct dso *dso_l = left->dso; - struct dso *dso_r = right->dso; - - if (!dso_l || !dso_r) - return cmp_null(dso_l, dso_r); - - return strcmp(dso_l->name, dso_r->name); -} - -static size_t -sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width) -{ - if (self->dso) - return repsep_fprintf(fp, "%-*s", width, self->dso->name); - - return repsep_fprintf(fp, "%*llx", width, (u64)self->ip); -} - -static struct sort_entry sort_dso = { - .header = "Shared Object", - .cmp = sort__dso_cmp, - .print = sort__dso_print, - .width = &dsos__col_width, -}; - -/* --sort symbol */ - -static int64_t -sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) -{ - u64 ip_l, ip_r; - - if (left->sym == right->sym) - return 0; - - ip_l = left->sym ? left->sym->start : left->ip; - ip_r = right->sym ? right->sym->start : right->ip; - - return (int64_t)(ip_r - ip_l); -} - -static size_t -sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) -{ - size_t ret = 0; + int i; + int ret; - if (verbose) - ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, - dso__symtab_origin(self->dso)); + ret = fprintf(fp, " "); - ret += repsep_fprintf(fp, "[%c] ", self->level); - if (self->sym) { - ret += repsep_fprintf(fp, "%s", self->sym->name); - - if (self->sym->module) - ret += repsep_fprintf(fp, "\t[%s]", - self->sym->module->name); - } else { - ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip); - } + for (i = 0; i < left_margin; i++) + ret += fprintf(fp, " "); return ret; } -static struct sort_entry sort_sym = { - .header = "Symbol", - .cmp = sort__sym_cmp, - .print = sort__sym_print, -}; - -/* --sort parent */ - -static int64_t -sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct symbol *sym_l = left->parent; - struct symbol *sym_r = right->parent; - - if (!sym_l || !sym_r) - return cmp_null(sym_l, sym_r); - - return strcmp(sym_l->name, sym_r->name); -} - -static size_t -sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width) -{ - return repsep_fprintf(fp, "%-*s", width, - self->parent ? self->parent->name : "[other]"); -} - -static unsigned int parent_symbol__col_width; - -static struct sort_entry sort_parent = { - .header = "Parent symbol", - .cmp = sort__parent_cmp, - .print = sort__parent_print, - .width = &parent_symbol__col_width, -}; - -static int sort__need_collapse = 0; -static int sort__has_parent = 0; - -struct sort_dimension { - const char *name; - struct sort_entry *entry; - int taken; -}; - -static struct sort_dimension sort_dimensions[] = { - { .name = "pid", .entry = &sort_thread, }, - { .name = "comm", .entry = &sort_comm, }, - { .name = "dso", .entry = &sort_dso, }, - { .name = "symbol", .entry = &sort_sym, }, - { .name = "parent", .entry = &sort_parent, }, -}; - -static LIST_HEAD(hist_entry__sort_list); - -static int sort_dimension__add(const char *tok) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { - struct sort_dimension *sd = &sort_dimensions[i]; - - if (sd->taken) - continue; - - if (strncasecmp(tok, sd->name, strlen(tok))) - continue; - - if (sd->entry->collapse) - sort__need_collapse = 1; - - if (sd->entry == &sort_parent) { - int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); - if (ret) { - char err[BUFSIZ]; - - regerror(ret, &parent_regex, err, sizeof(err)); - fprintf(stderr, "Invalid regex: %s\n%s", - parent_pattern, err); - exit(-1); - } - sort__has_parent = 1; - } - - list_add_tail(&sd->entry->list, &hist_entry__sort_list); - sd->taken = 1; - - return 0; - } - - return -ESRCH; -} - -static int64_t -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - cmp = se->cmp(left, right); - if (cmp) - break; - } - - return cmp; -} - -static int64_t -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - int64_t (*f)(struct hist_entry *, struct hist_entry *); - - f = se->collapse ?: se->cmp; - - cmp = f(left, right); - if (cmp) - break; - } - - return cmp; -} - -static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask) +static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, + int left_margin) { int i; size_t ret = 0; - ret += fprintf(fp, "%s", " "); + ret += callchain__fprintf_left_margin(fp, left_margin); for (i = 0; i < depth; i++) if (depth_mask & (1 << i)) @@ -432,12 +94,12 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask) static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, int depth_mask, int count, u64 total_samples, - int hits) + int hits, int left_margin) { int i; size_t ret = 0; - ret += fprintf(fp, "%s", " "); + ret += callchain__fprintf_left_margin(fp, left_margin); for (i = 0; i < depth; i++) { if (depth_mask & (1 << i)) ret += fprintf(fp, "|"); @@ -475,8 +137,9 @@ static void init_rem_hits(void) } static size_t -callchain__fprintf_graph(FILE *fp, struct callchain_node *self, - u64 total_samples, int depth, int depth_mask) +__callchain__fprintf_graph(FILE *fp, struct callchain_node *self, + u64 total_samples, int depth, int depth_mask, + int left_margin) { struct rb_node *node, *next; struct callchain_node *child; @@ -517,7 +180,8 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, * But we keep the older depth mask for the line seperator * to keep the level link until we reach the last child */ - ret += ipchain__fprintf_graph_line(fp, depth, depth_mask); + ret += ipchain__fprintf_graph_line(fp, depth, depth_mask, + left_margin); i = 0; list_for_each_entry(chain, &child->val, list) { if (chain->ip >= PERF_CONTEXT_MAX) @@ -525,11 +189,13 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, ret += ipchain__fprintf_graph(fp, chain, depth, new_depth_mask, i++, new_total, - cumul); + cumul, + left_margin); } - ret += callchain__fprintf_graph(fp, child, new_total, - depth + 1, - new_depth_mask | (1 << depth)); + ret += __callchain__fprintf_graph(fp, child, new_total, + depth + 1, + new_depth_mask | (1 << depth), + left_margin); node = next; } @@ -543,9 +209,48 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, ret += ipchain__fprintf_graph(fp, &rem_hits, depth, new_depth_mask, 0, new_total, - remaining); + remaining, left_margin); + } + + return ret; +} + + +static size_t +callchain__fprintf_graph(FILE *fp, struct callchain_node *self, + u64 total_samples, int left_margin) +{ + struct callchain_list *chain; + bool printed = false; + int i = 0; + int ret = 0; + + list_for_each_entry(chain, &self->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + + if (!i++ && sort__first_dimension == SORT_SYM) + continue; + + if (!printed) { + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "|\n"); + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "---"); + + left_margin += 3; + printed = true; + } else + ret += callchain__fprintf_left_margin(fp, left_margin); + + if (chain->sym) + ret += fprintf(fp, " %s\n", chain->sym->name); + else + ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); } + ret += __callchain__fprintf_graph(fp, self, total_samples, 1, 1, left_margin); + return ret; } @@ -577,7 +282,7 @@ callchain__fprintf_flat(FILE *fp, struct callchain_node *self, static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, - u64 total_samples) + u64 total_samples, int left_margin) { struct rb_node *rb_node; struct callchain_node *chain; @@ -597,8 +302,8 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, break; case CHAIN_GRAPH_ABS: /* Falldown */ case CHAIN_GRAPH_REL: - ret += callchain__fprintf_graph(fp, chain, - total_samples, 1, 1); + ret += callchain__fprintf_graph(fp, chain, total_samples, + left_margin); case CHAIN_NONE: default: break; @@ -610,7 +315,6 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, return ret; } - static size_t hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) { @@ -644,8 +348,19 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) ret += fprintf(fp, "\n"); - if (callchain) - hist_entry_callchain__fprintf(fp, self, total_samples); + if (callchain) { + int left_margin = 0; + + if (sort__first_dimension == SORT_COMM) { + se = list_first_entry(&hist_entry__sort_list, typeof(*se), + list); + left_margin = se->width ? *se->width : 0; + left_margin -= thread__comm_len(self->thread); + } + + hist_entry_callchain__fprintf(fp, self, total_samples, + left_margin); + } return ret; } @@ -693,63 +408,6 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm) return 0; } - -static struct symbol * -resolve_symbol(struct thread *thread, struct map **mapp, - struct dso **dsop, u64 *ipp) -{ - struct dso *dso = dsop ? *dsop : NULL; - struct map *map = mapp ? *mapp : NULL; - u64 ip = *ipp; - - if (!thread) - return NULL; - - if (dso) - goto got_dso; - - if (map) - goto got_map; - - map = thread__find_map(thread, ip); - if (map != NULL) { - /* - * We have to do this here as we may have a dso - * with no symbol hit that has a name longer than - * the ones with symbols sampled. - */ - if (!sort_dso.elide && !map->dso->slen_calculated) - dso__calc_col_width(map->dso); - - if (mapp) - *mapp = map; -got_map: - ip = map->map_ip(map, ip); - - dso = map->dso; - } else { - /* - * If this is outside of all known maps, - * and is a negative address, try to look it - * up in the kernel dso, as it might be a - * vsyscall (which executes in user-mode): - */ - if ((long long)ip < 0) - dso = kernel_dso; - } - dump_printf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); - dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip); - *ipp = ip; - - if (dsop) - *dsop = dso; - - if (!dso) - return NULL; -got_dso: - return dso->find_symbol(dso, ip); -} - static int call__match(struct symbol *sym) { if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) @@ -758,11 +416,11 @@ static int call__match(struct symbol *sym) return 0; } -static struct symbol ** -resolve_callchain(struct thread *thread, struct map *map __used, - struct ip_callchain *chain, struct hist_entry *entry) +static struct symbol **resolve_callchain(struct thread *thread, + struct ip_callchain *chain, + struct symbol **parent) { - u64 context = PERF_CONTEXT_MAX; + u8 cpumode = PERF_RECORD_MISC_USER; struct symbol **syms = NULL; unsigned int i; @@ -776,34 +434,31 @@ resolve_callchain(struct thread *thread, struct map *map __used, for (i = 0; i < chain->nr; i++) { u64 ip = chain->ips[i]; - struct dso *dso = NULL; - struct symbol *sym; + struct addr_location al; if (ip >= PERF_CONTEXT_MAX) { - context = ip; + switch (ip) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; break; + default: + break; + } continue; } - switch (context) { - case PERF_CONTEXT_HV: - dso = hypervisor_dso; - break; - case PERF_CONTEXT_KERNEL: - dso = kernel_dso; - break; - default: - break; - } - - sym = resolve_symbol(thread, NULL, &dso, &ip); - - if (sym) { - if (sort__has_parent && call__match(sym) && - !entry->parent) - entry->parent = sym; + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + ip, &al, NULL); + if (al.sym != NULL) { + if (sort__has_parent && !*parent && + call__match(al.sym)) + *parent = al.sym; if (!callchain) break; - syms[i] = sym; + syms[i] = al.sym; } } @@ -814,178 +469,33 @@ resolve_callchain(struct thread *thread, struct map *map __used, * collect histogram counts */ -static int -hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, u64 ip, struct ip_callchain *chain, - char level, u64 count) +static int hist_entry__add(struct addr_location *al, + struct ip_callchain *chain, u64 count) { - struct rb_node **p = &hist.rb_node; - struct rb_node *parent = NULL; + struct symbol **syms = NULL, *parent = NULL; + bool hit; struct hist_entry *he; - struct symbol **syms = NULL; - struct hist_entry entry = { - .thread = thread, - .map = map, - .dso = dso, - .sym = sym, - .ip = ip, - .level = level, - .count = count, - .parent = NULL, - .sorted_chain = RB_ROOT - }; - int cmp; if ((sort__has_parent || callchain) && chain) - syms = resolve_callchain(thread, map, chain, &entry); + syms = resolve_callchain(al->thread, chain, &parent); - while (*p != NULL) { - parent = *p; - he = rb_entry(parent, struct hist_entry, rb_node); + he = __hist_entry__add(al, parent, count, &hit); + if (he == NULL) + return -ENOMEM; - cmp = hist_entry__cmp(&entry, he); + if (hit) + he->count += count; - if (!cmp) { - he->count += count; - if (callchain) { - append_chain(&he->callchain, chain, syms); - free(syms); - } - return 0; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - he = malloc(sizeof(*he)); - if (!he) - return -ENOMEM; - *he = entry; if (callchain) { - callchain_init(&he->callchain); + if (!hit) + callchain_init(&he->callchain); append_chain(&he->callchain, chain, syms); free(syms); } - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &hist); return 0; } -static void hist_entry__free(struct hist_entry *he) -{ - free(he); -} - -/* - * collapse the histogram - */ - -static struct rb_root collapse_hists; - -static void collapse__insert_entry(struct hist_entry *he) -{ - struct rb_node **p = &collapse_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - int64_t cmp; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__collapse(iter, he); - - if (!cmp) { - iter->count += he->count; - hist_entry__free(he); - return; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &collapse_hists); -} - -static void collapse__resort(void) -{ - struct rb_node *next; - struct hist_entry *n; - - if (!sort__need_collapse) - return; - - next = rb_first(&hist); - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, &hist); - collapse__insert_entry(n); - } -} - -/* - * reverse the map, sort on count. - */ - -static struct rb_root output_hists; - -static void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits) -{ - struct rb_node **p = &output_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - - if (callchain) - callchain_param.sort(&he->sorted_chain, &he->callchain, - min_callchain_hits, &callchain_param); - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - if (he->count > iter->count) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &output_hists); -} - -static void output__resort(u64 total_samples) -{ - struct rb_node *next; - struct hist_entry *n; - struct rb_root *tree = &hist; - u64 min_callchain_hits; - - min_callchain_hits = total_samples * (callchain_param.min_percent / 100); - - if (sort__need_collapse) - tree = &collapse_hists; - - next = rb_first(tree); - - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, tree); - output__insert_entry(n, min_callchain_hits); - } -} - static size_t output__fprintf(FILE *fp, u64 total_samples) { struct hist_entry *pos; @@ -1080,13 +590,6 @@ print_entries: return ret; } -static unsigned long total = 0, - total_mmap = 0, - total_comm = 0, - total_fork = 0, - total_unknown = 0, - total_lost = 0; - static int validate_chain(struct ip_callchain *chain, event_t *event) { unsigned int chain_size; @@ -1100,30 +603,22 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) return 0; } -static int -process_sample_event(event_t *event, unsigned long offset, unsigned long head) +static int process_sample_event(event_t *event) { - char level; - int show = 0; - struct dso *dso = NULL; - struct thread *thread; u64 ip = event->ip.ip; u64 period = 1; - struct map *map = NULL; void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; int cpumode; - - thread = threads__findnew(event->ip.pid, &threads, &last_match); + struct addr_location al; + struct thread *thread = threads__findnew(event->ip.pid); if (sample_type & PERF_SAMPLE_PERIOD) { period = *(u64 *)more_data; more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), + dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", event->header.misc, event->ip.pid, event->ip.tid, (void *)(long)ip, @@ -1137,7 +632,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf("... chain: nr:%Lu\n", chain->nr); if (validate_chain(chain, event) < 0) { - eprintf("call-chain problem with event, skipping it.\n"); + pr_debug("call-chain problem with event, " + "skipping it.\n"); return 0; } @@ -1147,163 +643,64 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) } } - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - if (thread == NULL) { - eprintf("problem processing %d event, skipping it.\n", + pr_debug("problem processing %d event, skipping it.\n", event->header.type); return -1; } + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + if (comm_list && !strlist__has_entry(comm_list, thread->comm)) return 0; cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (cpumode == PERF_RECORD_MISC_KERNEL) { - show = SHOW_KERNEL; - level = 'k'; - - dso = kernel_dso; - - dump_printf(" ...... dso: %s\n", dso->name); - - } else if (cpumode == PERF_RECORD_MISC_USER) { - - show = SHOW_USER; - level = '.'; - - } else { - show = SHOW_HV; - level = 'H'; - - dso = hypervisor_dso; - - dump_printf(" ...... dso: [hypervisor]\n"); - } - - if (show & show_mask) { - struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); - - if (dso_list && (!dso || !dso->name || - !strlist__has_entry(dso_list, dso->name))) - return 0; - - if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name))) - return 0; - - if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { - eprintf("problem incrementing symbol count, skipping event\n"); - return -1; - } - } - total += period; - - return 0; -} + thread__find_addr_location(thread, cpumode, + MAP__FUNCTION, ip, &al, NULL); + /* + * We have to do this here as we may have a dso with no symbol hit that + * has a name longer than the ones with symbols sampled. + */ + if (al.map && !sort_dso.elide && !al.map->dso->slen_calculated) + dso__calc_col_width(al.map->dso); + + if (dso_list && + (!al.map || !al.map->dso || + !(strlist__has_entry(dso_list, al.map->dso->short_name) || + (al.map->dso->short_name != al.map->dso->long_name && + strlist__has_entry(dso_list, al.map->dso->long_name))))) + return 0; -static int -process_mmap_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread; - struct map *map = map__new(&event->mmap, cwd, cwdlen); - - thread = threads__findnew(event->mmap.pid, &threads, &last_match); - - dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->mmap.pid, - event->mmap.tid, - (void *)(long)event->mmap.start, - (void *)(long)event->mmap.len, - (void *)(long)event->mmap.pgoff, - event->mmap.filename); - - if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); + if (sym_list && al.sym && !strlist__has_entry(sym_list, al.sym->name)) return 0; + + if (hist_entry__add(&al, chain, period)) { + pr_debug("problem incrementing symbol count, skipping event\n"); + return -1; } - thread__insert_map(thread, map); - total_mmap++; + event__stats.total += period; return 0; } -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) +static int process_comm_event(event_t *event) { - struct thread *thread; - - thread = threads__findnew(event->comm.pid, &threads, &last_match); + struct thread *thread = threads__findnew(event->comm.pid); - dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); + dump_printf(": %s:%d\n", event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm_adjust(thread, event->comm.comm)) { dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } - total_comm++; - - return 0; -} - -static int -process_task_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread; - struct thread *parent; - - thread = threads__findnew(event->fork.pid, &threads, &last_match); - parent = threads__findnew(event->fork.ppid, &threads, &last_match); - - dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT", - event->fork.pid, event->fork.tid, - event->fork.ppid, event->fork.ptid); - - /* - * A thread clone will have the same PID for both - * parent and child. - */ - if (thread == parent) - return 0; - - if (event->header.type == PERF_RECORD_EXIT) - return 0; - - if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); - return -1; - } - total_fork++; return 0; } -static int -process_lost_event(event_t *event, unsigned long offset, unsigned long head) -{ - dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->lost.id, - event->lost.lost); - - total_lost += event->lost.lost; - - return 0; -} - -static int -process_read_event(event_t *event, unsigned long offset, unsigned long head) +static int process_read_event(event_t *event) { struct perf_event_attr *attr; @@ -1319,238 +716,91 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head) event->read.value); } - dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->read.pid, - event->read.tid, - attr ? __event_name(attr->type, attr->config) - : "FAIL", - event->read.value); - - return 0; -} - -static int -process_event(event_t *event, unsigned long offset, unsigned long head) -{ - trace_event(event); - - switch (event->header.type) { - case PERF_RECORD_SAMPLE: - return process_sample_event(event, offset, head); - - case PERF_RECORD_MMAP: - return process_mmap_event(event, offset, head); - - case PERF_RECORD_COMM: - return process_comm_event(event, offset, head); - - case PERF_RECORD_FORK: - case PERF_RECORD_EXIT: - return process_task_event(event, offset, head); - - case PERF_RECORD_LOST: - return process_lost_event(event, offset, head); - - case PERF_RECORD_READ: - return process_read_event(event, offset, head); - - /* - * We dont process them right now but they are fine: - */ - - case PERF_RECORD_THROTTLE: - case PERF_RECORD_UNTHROTTLE: - return 0; - - default: - return -1; - } + dump_printf(": %d %d %s %Lu\n", event->read.pid, event->read.tid, + attr ? __event_name(attr->type, attr->config) : "FAIL", + event->read.value); return 0; } -static int __cmd_report(void) +static int sample_type_check(u64 type) { - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head, shift; - struct stat input_stat; - struct thread *idle; - event_t *event; - uint32_t size; - char *buf; - - idle = register_idle_thread(&threads, &last_match); - thread__comm_adjust(idle); - - if (show_threads) - perf_read_values_init(&show_threads_values); - - input = open(input_name, O_RDONLY); - if (input < 0) { - fprintf(stderr, " failed to open file: %s", input_name); - if (!strcmp(input_name, "perf.data")) - fprintf(stderr, " (try 'perf record' first)"); - fprintf(stderr, "\n"); - exit(-1); - } - - ret = fstat(input, &input_stat); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { - fprintf(stderr, "file: %s not owned by current user or root\n", input_name); - exit(-1); - } - - if (!input_stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - - header = perf_header__read(input); - head = header->data_offset; - - sample_type = perf_header__sample_type(header); + sample_type = type; if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { fprintf(stderr, "selected --sort parent, but no" " callchain data. Did you call" " perf record without -g?\n"); - exit(-1); + return -1; } if (callchain) { fprintf(stderr, "selected -g but no callchain data." " Did you call perf record without" " -g?\n"); - exit(-1); + return -1; } } else if (callchain_param.mode != CHAIN_NONE && !callchain) { callchain = 1; if (register_callchain_param(&callchain_param) < 0) { fprintf(stderr, "Can't register callchain" " params\n"); - exit(-1); + return -1; } } - if (load_kernel() < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; - } - - if (!full_paths) { - if (getcwd(__cwd, sizeof(__cwd)) == NULL) { - perror("failed to get the current directory"); - return EXIT_FAILURE; - } - cwdlen = strlen(cwd); - } else { - cwd = NULL; - cwdlen = 0; - } - - shift = page_size * (head / page_size); - offset += shift; - head -= shift; - -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - - size = event->header.size; - if (!size) - size = 8; - - if (head + event->header.size >= page_size * mmap_window) { - int munmap_ret; - - shift = page_size * (head / page_size); - - munmap_ret = munmap(buf, page_size * mmap_window); - assert(munmap_ret == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - dump_printf("\n%p [%p]: event: %d\n", - (void *)(offset + head), - (void *)(long)event->header.size, - event->header.type); - - if (!size || process_event(event, offset, head) < 0) { - - dump_printf("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type); - - total_unknown++; - - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ + return 0; +} - if (unlikely(head & 7)) - head &= ~7ULL; +static struct perf_file_handler file_handler = { + .process_sample_event = process_sample_event, + .process_mmap_event = event__process_mmap, + .process_comm_event = process_comm_event, + .process_exit_event = event__process_task, + .process_fork_event = event__process_task, + .process_lost_event = event__process_lost, + .process_read_event = process_read_event, + .sample_type_check = sample_type_check, +}; - size = 8; - } - head += size; +static int __cmd_report(void) +{ + struct thread *idle; + int ret; - if (offset + head >= header->data_offset + header->data_size) - goto done; + idle = register_idle_thread(); + thread__comm_adjust(idle); - if (offset + head < (unsigned long)input_stat.st_size) - goto more; + if (show_threads) + perf_read_values_init(&show_threads_values); -done: - rc = EXIT_SUCCESS; - close(input); + register_perf_file_handler(&file_handler); - dump_printf(" IP events: %10ld\n", total); - dump_printf(" mmap events: %10ld\n", total_mmap); - dump_printf(" comm events: %10ld\n", total_comm); - dump_printf(" fork events: %10ld\n", total_fork); - dump_printf(" lost events: %10ld\n", total_lost); - dump_printf(" unknown events: %10ld\n", total_unknown); + ret = mmap_dispatch_perf_file(&header, input_name, force, + full_paths, &event__cwdlen, &event__cwd); + if (ret) + return ret; - if (dump_trace) + if (dump_trace) { + event__print_totals(); return 0; + } - if (verbose >= 3) - threads__fprintf(stdout, &threads); + if (verbose > 3) + threads__fprintf(stdout); - if (verbose >= 2) + if (verbose > 2) dsos__fprintf(stdout); collapse__resort(); - output__resort(total); - output__fprintf(stdout, total); + output__resort(event__stats.total); + output__fprintf(stdout, event__stats.total); if (show_threads) perf_read_values_destroy(&show_threads_values); - return rc; + return ret; } static int @@ -1606,7 +856,8 @@ setup: return 0; } -static const char * const report_usage[] = { +//static const char * const report_usage[] = { +const char * const report_usage[] = { "perf report [<options>] <command>", NULL }; @@ -1618,9 +869,10 @@ static const struct option options[] = { "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), - OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), - OPT_BOOLEAN('m', "modules", &modules, + OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, "Show a column with the number of samples"), @@ -1690,9 +942,8 @@ static void setup_list(struct strlist **list, const char *list_str, int cmd_report(int argc, const char **argv, const char *prefix __used) { - symbol__init(); - - page_size = getpagesize(); + if (symbol__init(&symbol_conf) < 0) + return -1; argc = parse_options(argc, argv, options, report_usage, 0); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index ce2d5be4f30..26b782f26ee 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -11,6 +11,7 @@ #include "util/trace-event.h" #include "util/debug.h" +#include "util/data_map.h" #include <sys/types.h> #include <sys/prctl.h> @@ -20,14 +21,6 @@ #include <math.h> static char const *input_name = "perf.data"; -static int input; -static unsigned long page_size; -static unsigned long mmap_window = 32; - -static unsigned long total_comm = 0; - -static struct rb_root threads; -static struct thread *last_match; static struct perf_header *header; static u64 sample_type; @@ -35,11 +28,11 @@ static u64 sample_type; static char default_sort_order[] = "avg, max, switch, runtime"; static char *sort_order = default_sort_order; +static int profile_cpu = -1; + #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 -#define BUG_ON(x) assert(!(x)) - static u64 run_measurement_overhead; static u64 sleep_measurement_overhead; @@ -74,6 +67,7 @@ enum sched_event_type { SCHED_EVENT_RUN, SCHED_EVENT_SLEEP, SCHED_EVENT_WAKEUP, + SCHED_EVENT_MIGRATION, }; struct sched_atom { @@ -226,7 +220,7 @@ static void calibrate_sleep_measurement_overhead(void) static struct sched_atom * get_new_event(struct task_desc *task, u64 timestamp) { - struct sched_atom *event = calloc(1, sizeof(*event)); + struct sched_atom *event = zalloc(sizeof(*event)); unsigned long idx = task->nr_events; size_t size; @@ -294,7 +288,7 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp, return; } - wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem)); + wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); sem_init(wakee_event->wait_sem, 0, 0); wakee_event->specific_wait = 1; event->wait_sem = wakee_event->wait_sem; @@ -324,7 +318,7 @@ static struct task_desc *register_pid(unsigned long pid, const char *comm) if (task) return task; - task = calloc(1, sizeof(*task)); + task = zalloc(sizeof(*task)); task->pid = pid; task->nr = nr_tasks; strcpy(task->comm, comm); @@ -398,6 +392,8 @@ process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) ret = sem_post(atom->wait_sem); BUG_ON(ret); break; + case SCHED_EVENT_MIGRATION: + break; default: BUG_ON(1); } @@ -632,29 +628,6 @@ static void test_calibrations(void) printf("the sleep test took %Ld nsecs\n", T1-T0); } -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread; - - thread = threads__findnew(event->comm.pid, &threads, &last_match); - - dump_printf("%p [%p]: perf_event_comm: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); - - if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing perf_event_comm, skipping event.\n"); - return -1; - } - total_comm++; - - return 0; -} - - struct raw_event_sample { u32 size; char data[0]; @@ -745,6 +718,22 @@ struct trace_fork_event { u32 child_pid; }; +struct trace_migrate_task_event { + u32 size; + + u16 common_type; + u8 common_flags; + u8 common_preempt_count; + u32 common_pid; + u32 common_tgid; + + char comm[16]; + u32 pid; + + u32 prio; + u32 cpu; +}; + struct trace_sched_handler { void (*switch_event)(struct trace_switch_event *, struct event *, @@ -769,6 +758,12 @@ struct trace_sched_handler { int cpu, u64 timestamp, struct thread *thread); + + void (*migrate_task_event)(struct trace_migrate_task_event *, + struct event *, + int cpu, + u64 timestamp, + struct thread *thread); }; @@ -941,9 +936,7 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data, static void thread_atoms_insert(struct thread *thread) { - struct work_atoms *atoms; - - atoms = calloc(sizeof(*atoms), 1); + struct work_atoms *atoms = zalloc(sizeof(*atoms)); if (!atoms) die("No memory"); @@ -975,9 +968,7 @@ add_sched_out_event(struct work_atoms *atoms, char run_state, u64 timestamp) { - struct work_atom *atom; - - atom = calloc(sizeof(*atom), 1); + struct work_atom *atom = zalloc(sizeof(*atom)); if (!atom) die("Non memory"); @@ -1058,8 +1049,8 @@ latency_switch_event(struct trace_switch_event *switch_event, die("hm, delta: %Ld < 0 ?\n", delta); - sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); - sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); + sched_out = threads__findnew(switch_event->prev_pid); + sched_in = threads__findnew(switch_event->next_pid); out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); if (!out_events) { @@ -1092,13 +1083,10 @@ latency_runtime_event(struct trace_runtime_event *runtime_event, u64 timestamp, struct thread *this_thread __used) { - struct work_atoms *atoms; - struct thread *thread; + struct thread *thread = threads__findnew(runtime_event->pid); + struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); BUG_ON(cpu >= MAX_CPUS || cpu < 0); - - thread = threads__findnew(runtime_event->pid, &threads, &last_match); - atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); if (!atoms) { thread_atoms_insert(thread); atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); @@ -1125,7 +1113,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, if (!wakeup_event->success) return; - wakee = threads__findnew(wakeup_event->pid, &threads, &last_match); + wakee = threads__findnew(wakeup_event->pid); atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); if (!atoms) { thread_atoms_insert(wakee); @@ -1139,7 +1127,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, atom = list_entry(atoms->work_list.prev, struct work_atom, list); - if (atom->state != THREAD_SLEEPING) + /* + * You WILL be missing events if you've recorded only + * one CPU, or are only looking at only one, so don't + * make useless noise. + */ + if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) nr_state_machine_bugs++; nr_timestamps++; @@ -1152,11 +1145,51 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, atom->wake_up_time = timestamp; } +static void +latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, + struct event *__event __used, + int cpu __used, + u64 timestamp, + struct thread *thread __used) +{ + struct work_atoms *atoms; + struct work_atom *atom; + struct thread *migrant; + + /* + * Only need to worry about migration when profiling one CPU. + */ + if (profile_cpu == -1) + return; + + migrant = threads__findnew(migrate_task_event->pid); + atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); + if (!atoms) { + thread_atoms_insert(migrant); + register_pid(migrant->pid, migrant->comm); + atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); + if (!atoms) + die("migration-event: Internal tree error"); + add_sched_out_event(atoms, 'R', timestamp); + } + + BUG_ON(list_empty(&atoms->work_list)); + + atom = list_entry(atoms->work_list.prev, struct work_atom, list); + atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; + + nr_timestamps++; + + if (atom->sched_out_time > timestamp) + nr_unordered_timestamps++; +} + static struct trace_sched_handler lat_ops = { .wakeup_event = latency_wakeup_event, .switch_event = latency_switch_event, .runtime_event = latency_runtime_event, .fork_event = latency_fork_event, + .migrate_task_event = latency_migrate_task_event, }; static void output_lat_thread(struct work_atoms *work_list) @@ -1385,8 +1418,8 @@ map_switch_event(struct trace_switch_event *switch_event, die("hm, delta: %Ld < 0 ?\n", delta); - sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); - sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); + sched_out = threads__findnew(switch_event->prev_pid); + sched_in = threads__findnew(switch_event->next_pid); curr_thread[this_cpu] = sched_in; @@ -1517,6 +1550,26 @@ process_sched_exit_event(struct event *event, } static void +process_sched_migrate_task_event(struct raw_event_sample *raw, + struct event *event, + int cpu __used, + u64 timestamp __used, + struct thread *thread __used) +{ + struct trace_migrate_task_event migrate_task_event; + + FILL_COMMON_FIELDS(migrate_task_event, event, raw->data); + + FILL_ARRAY(migrate_task_event, comm, event, raw->data); + FILL_FIELD(migrate_task_event, pid, event, raw->data); + FILL_FIELD(migrate_task_event, prio, event, raw->data); + FILL_FIELD(migrate_task_event, cpu, event, raw->data); + + if (trace_handler->migrate_task_event) + trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread); +} + +static void process_raw_event(event_t *raw_event __used, void *more_data, int cpu, u64 timestamp, struct thread *thread) { @@ -1539,23 +1592,23 @@ process_raw_event(event_t *raw_event __used, void *more_data, process_sched_fork_event(raw, event, cpu, timestamp, thread); if (!strcmp(event->name, "sched_process_exit")) process_sched_exit_event(event, cpu, timestamp, thread); + if (!strcmp(event->name, "sched_migrate_task")) + process_sched_migrate_task_event(raw, event, cpu, timestamp, thread); } -static int -process_sample_event(event_t *event, unsigned long offset, unsigned long head) +static int process_sample_event(event_t *event) { - char level; - int show = 0; - struct dso *dso = NULL; struct thread *thread; u64 ip = event->ip.ip; u64 timestamp = -1; u32 cpu = -1; u64 period = 1; void *more_data = event->ip.__more_data; - int cpumode; - thread = threads__findnew(event->ip.pid, &threads, &last_match); + if (!(sample_type & PERF_SAMPLE_RAW)) + return 0; + + thread = threads__findnew(event->ip.pid); if (sample_type & PERF_SAMPLE_TIME) { timestamp = *(u64 *)more_data; @@ -1573,177 +1626,64 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), + dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", event->header.misc, event->ip.pid, event->ip.tid, (void *)(long)ip, (long long)period); - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - if (thread == NULL) { - eprintf("problem processing %d event, skipping it.\n", - event->header.type); + pr_debug("problem processing %d event, skipping it.\n", + event->header.type); return -1; } - cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - - if (cpumode == PERF_RECORD_MISC_KERNEL) { - show = SHOW_KERNEL; - level = 'k'; - - dso = kernel_dso; - - dump_printf(" ...... dso: %s\n", dso->name); - - } else if (cpumode == PERF_RECORD_MISC_USER) { - - show = SHOW_USER; - level = '.'; - - } else { - show = SHOW_HV; - level = 'H'; - - dso = hypervisor_dso; + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - dump_printf(" ...... dso: [hypervisor]\n"); - } + if (profile_cpu != -1 && profile_cpu != (int) cpu) + return 0; - if (sample_type & PERF_SAMPLE_RAW) - process_raw_event(event, more_data, cpu, timestamp, thread); + process_raw_event(event, more_data, cpu, timestamp, thread); return 0; } -static int -process_event(event_t *event, unsigned long offset, unsigned long head) +static int process_lost_event(event_t *event __used) { - trace_event(event); - - nr_events++; - switch (event->header.type) { - case PERF_RECORD_MMAP: - return 0; - case PERF_RECORD_LOST: - nr_lost_chunks++; - nr_lost_events += event->lost.lost; - return 0; - - case PERF_RECORD_COMM: - return process_comm_event(event, offset, head); + nr_lost_chunks++; + nr_lost_events += event->lost.lost; - case PERF_RECORD_EXIT ... PERF_RECORD_READ: - return 0; + return 0; +} - case PERF_RECORD_SAMPLE: - return process_sample_event(event, offset, head); +static int sample_type_check(u64 type) +{ + sample_type = type; - case PERF_RECORD_MAX: - default: + if (!(sample_type & PERF_SAMPLE_RAW)) { + fprintf(stderr, + "No trace sample to read. Did you call perf record " + "without -R?"); return -1; } return 0; } +static struct perf_file_handler file_handler = { + .process_sample_event = process_sample_event, + .process_comm_event = event__process_comm, + .process_lost_event = process_lost_event, + .sample_type_check = sample_type_check, +}; + static int read_events(void) { - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head = 0; - struct stat perf_stat; - event_t *event; - uint32_t size; - char *buf; - - trace_report(); - register_idle_thread(&threads, &last_match); - - input = open(input_name, O_RDONLY); - if (input < 0) { - perror("failed to open file"); - exit(-1); - } - - ret = fstat(input, &perf_stat); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!perf_stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - header = perf_header__read(input); - head = header->data_offset; - sample_type = perf_header__sample_type(header); - - if (!(sample_type & PERF_SAMPLE_RAW)) - die("No trace sample to read. Did you call perf record " - "without -R?"); - - if (load_kernel() < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; - } - -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - - size = event->header.size; - if (!size) - size = 8; - - if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); - int res; - - res = munmap(buf, page_size * mmap_window); - assert(res == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - - if (!size || process_event(event, offset, head) < 0) { - - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; - } - - head += size; - - if (offset + head < (unsigned long)perf_stat.st_size) - goto more; - - rc = EXIT_SUCCESS; - close(input); + register_idle_thread(); + register_perf_file_handler(&file_handler); - return rc; + return mmap_dispatch_perf_file(&header, input_name, 0, 0, + &event__cwdlen, &event__cwd); } static void print_bad_events(void) @@ -1883,6 +1823,8 @@ static const struct option latency_options[] = { "sort by key(s): runtime, switch, avg, max"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), + OPT_INTEGER('C', "CPU", &profile_cpu, + "CPU to profile on"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_END() @@ -1960,8 +1902,7 @@ static int __cmd_record(int argc, const char **argv) int cmd_sched(int argc, const char **argv, const char *prefix __used) { - symbol__init(); - page_size = getpagesize(); + symbol__init(0); argc = parse_options(argc, argv, sched_options, sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3db31e7bf17..c70d7200355 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -50,15 +50,17 @@ static struct perf_event_attr default_attrs[] = { - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, - - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, + + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, }; @@ -125,6 +127,7 @@ struct stats event_res_stats[MAX_COUNTERS][3]; struct stats runtime_nsecs_stats; struct stats walltime_nsecs_stats; struct stats runtime_cycles_stats; +struct stats runtime_branches_stats; #define MATCH_EVENT(t, c, counter) \ (attrs[counter].type == PERF_TYPE_##t && \ @@ -235,6 +238,8 @@ static void read_counter(int counter) update_stats(&runtime_nsecs_stats, count[0]); if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) update_stats(&runtime_cycles_stats, count[0]); + if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) + update_stats(&runtime_branches_stats, count[0]); } static int run_perf_stat(int argc __used, const char **argv) @@ -352,7 +357,16 @@ static void abs_printout(int counter, double avg) ratio = avg / total; fprintf(stderr, " # %10.3f IPC ", ratio); - } else { + } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && + runtime_branches_stats.n != 0) { + total = avg_stats(&runtime_branches_stats); + + if (total) + ratio = avg * 100 / total; + + fprintf(stderr, " # %10.3f %% ", ratio); + + } else if (runtime_nsecs_stats.n != 0) { total = avg_stats(&runtime_nsecs_stats); if (total) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index e8a510d935e..cb58b6605fc 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -29,14 +29,14 @@ #include "util/header.h" #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/event.h" +#include "util/data_map.h" #include "util/svghelper.h" static char const *input_name = "perf.data"; static char const *output_name = "output.svg"; -static unsigned long page_size; -static unsigned long mmap_window = 32; static u64 sample_type; static unsigned int numcpus; @@ -49,8 +49,6 @@ static u64 first_time, last_time; static int power_only; -static struct perf_header *header; - struct per_pid; struct per_pidcomm; @@ -153,6 +151,17 @@ static struct wake_event *wake_events; struct sample_wrapper *all_samples; + +struct process_filter; +struct process_filter { + char *name; + int pid; + struct process_filter *next; +}; + +static struct process_filter *process_filter; + + static struct per_pid *find_create_pid(int pid) { struct per_pid *cursor = all_data; @@ -763,11 +772,11 @@ static void draw_wakeups(void) c = p->all; while (c) { if (c->Y && c->start_time <= we->time && c->end_time >= we->time) { - if (p->pid == we->waker) { + if (p->pid == we->waker && !from) { from = c->Y; task_from = strdup(c->comm); } - if (p->pid == we->wakee) { + if (p->pid == we->wakee && !to) { to = c->Y; task_to = strdup(c->comm); } @@ -882,12 +891,89 @@ static void draw_process_bars(void) } } +static void add_process_filter(const char *string) +{ + struct process_filter *filt; + int pid; + + pid = strtoull(string, NULL, 10); + filt = malloc(sizeof(struct process_filter)); + if (!filt) + return; + + filt->name = strdup(string); + filt->pid = pid; + filt->next = process_filter; + + process_filter = filt; +} + +static int passes_filter(struct per_pid *p, struct per_pidcomm *c) +{ + struct process_filter *filt; + if (!process_filter) + return 1; + + filt = process_filter; + while (filt) { + if (filt->pid && p->pid == filt->pid) + return 1; + if (strcmp(filt->name, c->comm) == 0) + return 1; + filt = filt->next; + } + return 0; +} + +static int determine_display_tasks_filtered(void) +{ + struct per_pid *p; + struct per_pidcomm *c; + int count = 0; + + p = all_data; + while (p) { + p->display = 0; + if (p->start_time == 1) + p->start_time = first_time; + + /* no exit marker, task kept running to the end */ + if (p->end_time == 0) + p->end_time = last_time; + + c = p->all; + + while (c) { + c->display = 0; + + if (c->start_time == 1) + c->start_time = first_time; + + if (passes_filter(p, c)) { + c->display = 1; + p->display = 1; + count++; + } + + if (c->end_time == 0) + c->end_time = last_time; + + c = c->next; + } + p = p->next; + } + return count; +} + static int determine_display_tasks(u64 threshold) { struct per_pid *p; struct per_pidcomm *c; int count = 0; + if (process_filter) + return determine_display_tasks_filtered(); + p = all_data; while (p) { p->display = 0; @@ -957,36 +1043,6 @@ static void write_svg_file(const char *filename) svg_close(); } -static int -process_event(event_t *event) -{ - - switch (event->header.type) { - - case PERF_RECORD_COMM: - return process_comm_event(event); - case PERF_RECORD_FORK: - return process_fork_event(event); - case PERF_RECORD_EXIT: - return process_exit_event(event); - case PERF_RECORD_SAMPLE: - return queue_sample_event(event); - - /* - * We dont process them right now but they are fine: - */ - case PERF_RECORD_MMAP: - case PERF_RECORD_THROTTLE: - case PERF_RECORD_UNTHROTTLE: - return 0; - - default: - return -1; - } - - return 0; -} - static void process_samples(void) { struct sample_wrapper *cursor; @@ -1002,107 +1058,38 @@ static void process_samples(void) } } - -static int __cmd_timechart(void) +static int sample_type_check(u64 type) { - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head, shift; - struct stat statbuf; - event_t *event; - uint32_t size; - char *buf; - int input; - - input = open(input_name, O_RDONLY); - if (input < 0) { - fprintf(stderr, " failed to open file: %s", input_name); - if (!strcmp(input_name, "perf.data")) - fprintf(stderr, " (try 'perf record' first)"); - fprintf(stderr, "\n"); - exit(-1); - } - - ret = fstat(input, &statbuf); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!statbuf.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - - header = perf_header__read(input); - head = header->data_offset; - - sample_type = perf_header__sample_type(header); + sample_type = type; - shift = page_size * (head / page_size); - offset += shift; - head -= shift; - -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - - size = event->header.size; - if (!size) - size = 8; - - if (head + event->header.size >= page_size * mmap_window) { - int ret2; - - shift = page_size * (head / page_size); - - ret2 = munmap(buf, page_size * mmap_window); - assert(ret2 == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - if (!size || process_event(event) < 0) { - - printf("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type); - - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; + if (!(sample_type & PERF_SAMPLE_RAW)) { + fprintf(stderr, "No trace samples found in the file.\n" + "Have you used 'perf timechart record' to record it?\n"); + return -1; } - head += size; + return 0; +} - if (offset + head >= header->data_offset + header->data_size) - goto done; +static struct perf_file_handler file_handler = { + .process_comm_event = process_comm_event, + .process_fork_event = process_fork_event, + .process_exit_event = process_exit_event, + .process_sample_event = queue_sample_event, + .sample_type_check = sample_type_check, +}; - if (offset + head < (unsigned long)statbuf.st_size) - goto more; +static int __cmd_timechart(void) +{ + struct perf_header *header; + int ret; -done: - rc = EXIT_SUCCESS; - close(input); + register_perf_file_handler(&file_handler); + ret = mmap_dispatch_perf_file(&header, input_name, 0, 0, + &event__cwdlen, &event__cwd); + if (ret) + return EXIT_FAILURE; process_samples(); @@ -1112,9 +1099,10 @@ done: write_svg_file(output_name); - printf("Written %2.1f seconds of trace to %s.\n", (last_time - first_time) / 1000000000.0, output_name); + pr_info("Written %2.1f seconds of trace to %s.\n", + (last_time - first_time) / 1000000000.0, output_name); - return rc; + return EXIT_SUCCESS; } static const char * const timechart_usage[] = { @@ -1153,6 +1141,14 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } +static int +parse_process(const struct option *opt __used, const char *arg, int __used unset) +{ + if (arg) + add_process_filter(arg); + return 0; +} + static const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), @@ -1160,17 +1156,18 @@ static const struct option options[] = { "output file name"), OPT_INTEGER('w', "width", &svg_page_width, "page width"), - OPT_BOOLEAN('p', "power-only", &power_only, + OPT_BOOLEAN('P', "power-only", &power_only, "output power data only"), + OPT_CALLBACK('p', "process", NULL, "process", + "process selector. Pass a pid or process name.", + parse_process), OPT_END() }; int cmd_timechart(int argc, const char **argv, const char *prefix __used) { - symbol__init(); - - page_size = getpagesize(); + symbol__init(0); argc = parse_options(argc, argv, options, timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index e23bc74e734..e0a374d0e43 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -22,6 +22,7 @@ #include "util/symbol.h" #include "util/color.h" +#include "util/thread.h" #include "util/util.h" #include <linux/rbtree.h> #include "util/parse-options.h" @@ -54,26 +55,31 @@ static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static int system_wide = 0; +static int system_wide = 0; -static int default_interval = 100000; +static int default_interval = 0; -static int count_filter = 5; -static int print_entries = 15; +static int count_filter = 5; +static int print_entries; -static int target_pid = -1; -static int inherit = 0; -static int profile_cpu = -1; -static int nr_cpus = 0; -static unsigned int realtime_prio = 0; -static int group = 0; +static int target_pid = -1; +static int inherit = 0; +static int profile_cpu = -1; +static int nr_cpus = 0; +static unsigned int realtime_prio = 0; +static int group = 0; static unsigned int page_size; -static unsigned int mmap_pages = 16; -static int freq = 0; +static unsigned int mmap_pages = 16; +static int freq = 1000; /* 1 KHz */ -static int delay_secs = 2; -static int zero; -static int dump_symtab; +static int delay_secs = 2; +static int zero = 0; +static int dump_symtab = 0; + +static bool hide_kernel_symbols = false; +static bool hide_user_symbols = false; +static struct winsize winsize; +struct symbol_conf symbol_conf; /* * Source @@ -86,83 +92,126 @@ struct source_line { struct source_line *next; }; -static char *sym_filter = NULL; -struct sym_entry *sym_filter_entry = NULL; -static int sym_pcnt_filter = 5; -static int sym_counter = 0; -static int display_weighted = -1; +static char *sym_filter = NULL; +struct sym_entry *sym_filter_entry = NULL; +static int sym_pcnt_filter = 5; +static int sym_counter = 0; +static int display_weighted = -1; /* * Symbols */ -static u64 min_ip; -static u64 max_ip = -1ll; +struct sym_entry_source { + struct source_line *source; + struct source_line *lines; + struct source_line **lines_tail; + pthread_mutex_t lock; +}; struct sym_entry { struct rb_node rb_node; struct list_head node; - unsigned long count[MAX_COUNTERS]; unsigned long snap_count; double weight; int skip; - struct source_line *source; - struct source_line *lines; - struct source_line **lines_tail; - pthread_mutex_t source_lock; + u16 name_len; + u8 origin; + struct map *map; + struct sym_entry_source *src; + unsigned long count[0]; }; /* * Source functions */ +static inline struct symbol *sym_entry__symbol(struct sym_entry *self) +{ + return ((void *)self) + symbol_conf.priv_size; +} + +static void get_term_dimensions(struct winsize *ws) +{ + char *s = getenv("LINES"); + + if (s != NULL) { + ws->ws_row = atoi(s); + s = getenv("COLUMNS"); + if (s != NULL) { + ws->ws_col = atoi(s); + if (ws->ws_row && ws->ws_col) + return; + } + } +#ifdef TIOCGWINSZ + if (ioctl(1, TIOCGWINSZ, ws) == 0 && + ws->ws_row && ws->ws_col) + return; +#endif + ws->ws_row = 25; + ws->ws_col = 80; +} + +static void update_print_entries(struct winsize *ws) +{ + print_entries = ws->ws_row; + + if (print_entries > 9) + print_entries -= 9; +} + +static void sig_winch_handler(int sig __used) +{ + get_term_dimensions(&winsize); + update_print_entries(&winsize); +} + static void parse_source(struct sym_entry *syme) { struct symbol *sym; - struct module *module; - struct section *section = NULL; + struct sym_entry_source *source; + struct map *map; FILE *file; char command[PATH_MAX*2]; - const char *path = vmlinux_name; - u64 start, end, len; + const char *path; + u64 len; if (!syme) return; - if (syme->lines) { - pthread_mutex_lock(&syme->source_lock); - goto out_assign; + if (syme->src == NULL) { + syme->src = zalloc(sizeof(*source)); + if (syme->src == NULL) + return; + pthread_mutex_init(&syme->src->lock, NULL); } - sym = (struct symbol *)(syme + 1); - module = sym->module; - - if (module) - path = module->path; - if (!path) - return; - - start = sym->obj_start; - if (!start) - start = sym->start; + source = syme->src; - if (module) { - section = module->sections->find_section(module->sections, ".text"); - if (section) - start -= section->vma; + if (source->lines) { + pthread_mutex_lock(&source->lock); + goto out_assign; } - end = start + sym->end - sym->start + 1; + sym = sym_entry__symbol(syme); + map = syme->map; + path = map->dso->long_name; + len = sym->end - sym->start; - sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); + sprintf(command, + "objdump --start-address=0x%016Lx " + "--stop-address=0x%016Lx -dS %s", + map->unmap_ip(map, sym->start), + map->unmap_ip(map, sym->end), path); file = popen(command, "r"); if (!file) return; - pthread_mutex_lock(&syme->source_lock); - syme->lines_tail = &syme->lines; + pthread_mutex_lock(&source->lock); + source->lines_tail = &source->lines; while (!feof(file)) { struct source_line *src; size_t dummy = 0; @@ -182,24 +231,22 @@ static void parse_source(struct sym_entry *syme) *c = 0; src->next = NULL; - *syme->lines_tail = src; - syme->lines_tail = &src->next; + *source->lines_tail = src; + source->lines_tail = &src->next; if (strlen(src->line)>8 && src->line[8] == ':') { src->eip = strtoull(src->line, NULL, 16); - if (section) - src->eip += section->vma; + src->eip = map->unmap_ip(map, src->eip); } if (strlen(src->line)>8 && src->line[16] == ':') { src->eip = strtoull(src->line, NULL, 16); - if (section) - src->eip += section->vma; + src->eip = map->unmap_ip(map, src->eip); } } pclose(file); out_assign: sym_filter_entry = syme; - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&source->lock); } static void __zero_source_counters(struct sym_entry *syme) @@ -207,7 +254,7 @@ static void __zero_source_counters(struct sym_entry *syme) int i; struct source_line *line; - line = syme->lines; + line = syme->src->lines; while (line) { for (i = 0; i < nr_counters; i++) line->count[i] = 0; @@ -222,13 +269,13 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) if (syme != sym_filter_entry) return; - if (pthread_mutex_trylock(&syme->source_lock)) + if (pthread_mutex_trylock(&syme->src->lock)) return; - if (!syme->source) + if (syme->src == NULL || syme->src->source == NULL) goto out_unlock; - for (line = syme->lines; line; line = line->next) { + for (line = syme->src->lines; line; line = line->next) { if (line->eip == ip) { line->count[counter]++; break; @@ -237,32 +284,25 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) break; } out_unlock: - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&syme->src->lock); } static void lookup_sym_source(struct sym_entry *syme) { - struct symbol *symbol = (struct symbol *)(syme + 1); + struct symbol *symbol = sym_entry__symbol(syme); struct source_line *line; char pattern[PATH_MAX]; - char *idx; sprintf(pattern, "<%s>:", symbol->name); - if (symbol->module) { - idx = strstr(pattern, "\t"); - if (idx) - *idx = 0; - } - - pthread_mutex_lock(&syme->source_lock); - for (line = syme->lines; line; line = line->next) { + pthread_mutex_lock(&syme->src->lock); + for (line = syme->src->lines; line; line = line->next) { if (strstr(line->line, pattern)) { - syme->source = line; + syme->src->source = line; break; } } - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&syme->src->lock); } static void show_lines(struct source_line *queue, int count, int total) @@ -292,24 +332,24 @@ static void show_details(struct sym_entry *syme) if (!syme) return; - if (!syme->source) + if (!syme->src->source) lookup_sym_source(syme); - if (!syme->source) + if (!syme->src->source) return; - symbol = (struct symbol *)(syme + 1); + symbol = sym_entry__symbol(syme); printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); - pthread_mutex_lock(&syme->source_lock); - line = syme->source; + pthread_mutex_lock(&syme->src->lock); + line = syme->src->source; while (line) { total += line->count[sym_counter]; line = line->next; } - line = syme->source; + line = syme->src->source; while (line) { float pcnt = 0.0; @@ -334,13 +374,13 @@ static void show_details(struct sym_entry *syme) line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; line = line->next; } - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&syme->src->lock); if (more) printf("%d lines not displayed, maybe increase display entries [e]\n", more); } /* - * Symbols will be added here in record_ip and will get out + * Symbols will be added here in event__process_sample and will get out * after decayed. */ static LIST_HEAD(active_symbols); @@ -411,6 +451,8 @@ static void print_sym_table(void) struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; struct rb_node *nd; + int sym_width = 0, dso_width = 0, max_dso_width; + const int win_width = winsize.ws_col - 1; samples = userspace_samples = 0; @@ -422,6 +464,14 @@ static void print_sym_table(void) list_for_each_entry_safe_from(syme, n, &active_symbols, node) { syme->snap_count = syme->count[snap]; if (syme->snap_count != 0) { + + if ((hide_user_symbols && + syme->origin == PERF_RECORD_MISC_USER) || + (hide_kernel_symbols && + syme->origin == PERF_RECORD_MISC_KERNEL)) { + list_remove_active_sym(syme); + continue; + } syme->weight = sym_weight(syme); rb_insert_active_sym(&tmp, syme); sum_ksamples += syme->snap_count; @@ -434,8 +484,7 @@ static void print_sym_table(void) puts(CONSOLE_CLEAR); - printf( -"------------------------------------------------------------------------------\n"); + printf("%-*.*s\n", win_width, win_width, graph_dotted_line); printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", samples_per_sec, 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); @@ -473,33 +522,57 @@ static void print_sym_table(void) printf(", %d CPUs)\n", nr_cpus); } - printf("------------------------------------------------------------------------------\n\n"); + printf("%-*.*s\n", win_width, win_width, graph_dotted_line); if (sym_filter_entry) { show_details(sym_filter_entry); return; } + /* + * Find the longest symbol name that will be displayed + */ + for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { + syme = rb_entry(nd, struct sym_entry, rb_node); + if (++printed > print_entries || + (int)syme->snap_count < count_filter) + continue; + + if (syme->map->dso->long_name_len > dso_width) + dso_width = syme->map->dso->long_name_len; + + if (syme->name_len > sym_width) + sym_width = syme->name_len; + } + + printed = 0; + + max_dso_width = winsize.ws_col - sym_width - 29; + if (dso_width > max_dso_width) + dso_width = max_dso_width; + putchar('\n'); if (nr_counters == 1) - printf(" samples pcnt"); + printf(" samples pcnt"); else - printf(" weight samples pcnt"); + printf(" weight samples pcnt"); if (verbose) printf(" RIP "); - printf(" kernel function\n"); - printf(" %s _______ _____", + printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); + printf(" %s _______ _____", nr_counters == 1 ? " " : "______"); if (verbose) - printf(" ________________"); - printf(" _______________\n\n"); + printf(" ________________"); + printf(" %-*.*s", sym_width, sym_width, graph_line); + printf(" %-*.*s", dso_width, dso_width, graph_line); + puts("\n"); for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { struct symbol *sym; double pcnt; syme = rb_entry(nd, struct sym_entry, rb_node); - sym = (struct symbol *)(syme + 1); + sym = sym_entry__symbol(syme); if (++printed > print_entries || (int)syme->snap_count < count_filter) continue; @@ -508,17 +581,18 @@ static void print_sym_table(void) sum_ksamples)); if (nr_counters == 1 || !display_weighted) - printf("%20.2f - ", syme->weight); + printf("%20.2f ", syme->weight); else - printf("%9.1f %10ld - ", syme->weight, syme->snap_count); + printf("%9.1f %10ld ", syme->weight, syme->snap_count); percent_color_fprintf(stdout, "%4.1f%%", pcnt); if (verbose) - printf(" - %016llx", sym->start); - printf(" : %s", sym->name); - if (sym->module) - printf("\t[%s]", sym->module->name); - printf("\n"); + printf(" %016llx", sym->start); + printf(" %-*.*s", sym_width, sym_width, sym->name); + printf(" %-*.*s\n", dso_width, dso_width, + dso_width >= syme->map->dso->long_name_len ? + syme->map->dso->long_name : + syme->map->dso->short_name); } } @@ -565,10 +639,10 @@ static void prompt_symbol(struct sym_entry **target, const char *msg) /* zero counters of active symbol */ if (syme) { - pthread_mutex_lock(&syme->source_lock); + pthread_mutex_lock(&syme->src->lock); __zero_source_counters(syme); *target = NULL; - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&syme->src->lock); } fprintf(stdout, "\n%s: ", msg); @@ -584,7 +658,7 @@ static void prompt_symbol(struct sym_entry **target, const char *msg) pthread_mutex_unlock(&active_symbols_lock); list_for_each_entry_safe_from(syme, n, &active_symbols, node) { - struct symbol *sym = (struct symbol *)(syme + 1); + struct symbol *sym = sym_entry__symbol(syme); if (!strcmp(buf, sym->name)) { found = syme; @@ -608,7 +682,7 @@ static void print_mapped_keys(void) char *name = NULL; if (sym_filter_entry) { - struct symbol *sym = (struct symbol *)(sym_filter_entry+1); + struct symbol *sym = sym_entry__symbol(sym_filter_entry); name = sym->name; } @@ -621,7 +695,7 @@ static void print_mapped_keys(void) fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); - if (vmlinux_name) { + if (symbol_conf.vmlinux_name) { fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); @@ -630,6 +704,12 @@ static void print_mapped_keys(void) if (nr_counters > 1) fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); + fprintf(stdout, + "\t[K] hide kernel_symbols symbols. \t(%s)\n", + hide_kernel_symbols ? "yes" : "no"); + fprintf(stdout, + "\t[U] hide user symbols. \t(%s)\n", + hide_user_symbols ? "yes" : "no"); fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); fprintf(stdout, "\t[qQ] quit.\n"); } @@ -643,6 +723,8 @@ static int key_mapped(int c) case 'z': case 'q': case 'Q': + case 'K': + case 'U': return 1; case 'E': case 'w': @@ -650,7 +732,7 @@ static int key_mapped(int c) case 'F': case 's': case 'S': - return vmlinux_name ? 1 : 0; + return symbol_conf.vmlinux_name ? 1 : 0; default: break; } @@ -691,6 +773,11 @@ static void handle_keypress(int c) break; case 'e': prompt_integer(&print_entries, "Enter display entries (lines)"); + if (print_entries == 0) { + sig_winch_handler(SIGWINCH); + signal(SIGWINCH, sig_winch_handler); + } else + signal(SIGWINCH, SIG_DFL); break; case 'E': if (nr_counters > 1) { @@ -715,9 +802,14 @@ static void handle_keypress(int c) case 'F': prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); break; + case 'K': + hide_kernel_symbols = !hide_kernel_symbols; + break; case 'q': case 'Q': printf("exiting.\n"); + if (dump_symtab) + dsos__fprintf(stderr); exit(0); case 's': prompt_symbol(&sym_filter_entry, "Enter details symbol"); @@ -728,12 +820,15 @@ static void handle_keypress(int c) else { struct sym_entry *syme = sym_filter_entry; - pthread_mutex_lock(&syme->source_lock); + pthread_mutex_lock(&syme->src->lock); sym_filter_entry = NULL; __zero_source_counters(syme); - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&syme->src->lock); } break; + case 'U': + hide_user_symbols = !hide_user_symbols; + break; case 'w': display_weighted = ~display_weighted; break; @@ -790,7 +885,7 @@ static const char *skip_symbols[] = { NULL }; -static int symbol_filter(struct dso *self, struct symbol *sym) +static int symbol_filter(struct map *map, struct symbol *sym) { struct sym_entry *syme; const char *name = sym->name; @@ -812,8 +907,9 @@ static int symbol_filter(struct dso *self, struct symbol *sym) strstr(name, "_text_end")) return 1; - syme = dso__sym_priv(self, sym); - pthread_mutex_init(&syme->source_lock, NULL); + syme = symbol__priv(sym); + syme->map = map; + syme->src = NULL; if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) sym_filter_entry = syme; @@ -824,75 +920,65 @@ static int symbol_filter(struct dso *self, struct symbol *sym) } } - return 0; -} - -static int parse_symbols(void) -{ - struct rb_node *node; - struct symbol *sym; - int use_modules = vmlinux_name ? 1 : 0; - - kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); - if (kernel_dso == NULL) - return -1; - - if (dso__load_kernel(kernel_dso, vmlinux_name, symbol_filter, verbose, use_modules) <= 0) - goto out_delete_dso; - - node = rb_first(&kernel_dso->syms); - sym = rb_entry(node, struct symbol, rb_node); - min_ip = sym->start; - - node = rb_last(&kernel_dso->syms); - sym = rb_entry(node, struct symbol, rb_node); - max_ip = sym->end; - - if (dump_symtab) - dso__fprintf(kernel_dso, stderr); + if (!syme->skip) + syme->name_len = strlen(sym->name); return 0; - -out_delete_dso: - dso__delete(kernel_dso); - kernel_dso = NULL; - return -1; } -/* - * Binary search in the histogram table and record the hit: - */ -static void record_ip(u64 ip, int counter) +static void event__process_sample(const event_t *self, int counter) { - struct symbol *sym = dso__find_symbol(kernel_dso, ip); - - if (sym != NULL) { - struct sym_entry *syme = dso__sym_priv(kernel_dso, sym); - - if (!syme->skip) { - syme->count[counter]++; - record_precise_ip(syme, counter, ip); - pthread_mutex_lock(&active_symbols_lock); - if (list_empty(&syme->node) || !syme->node.next) - __list_insert_active_sym(syme); - pthread_mutex_unlock(&active_symbols_lock); + u64 ip = self->ip.ip; + struct sym_entry *syme; + struct addr_location al; + u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + switch (origin) { + case PERF_RECORD_MISC_USER: + if (hide_user_symbols) return; - } + break; + case PERF_RECORD_MISC_KERNEL: + if (hide_kernel_symbols) + return; + break; + default: + return; } - samples--; + if (event__preprocess_sample(self, &al, symbol_filter) < 0 || + al.sym == NULL) + return; + + syme = symbol__priv(al.sym); + if (!syme->skip) { + syme->count[counter]++; + syme->origin = origin; + record_precise_ip(syme, counter, ip); + pthread_mutex_lock(&active_symbols_lock); + if (list_empty(&syme->node) || !syme->node.next) + __list_insert_active_sym(syme); + pthread_mutex_unlock(&active_symbols_lock); + if (origin == PERF_RECORD_MISC_USER) + ++userspace_samples; + ++samples; + } } -static void process_event(u64 ip, int counter, int user) +static int event__process(event_t *event) { - samples++; - - if (user) { - userspace_samples++; - return; + switch (event->header.type) { + case PERF_RECORD_COMM: + event__process_comm(event); + break; + case PERF_RECORD_MMAP: + event__process_mmap(event); + break; + default: + break; } - record_ip(ip, counter); + return 0; } struct mmap_data { @@ -913,8 +999,6 @@ static unsigned int mmap_read_head(struct mmap_data *md) return head; } -struct timeval last_read, this_read; - static void mmap_read_counter(struct mmap_data *md) { unsigned int head = mmap_read_head(md); @@ -922,8 +1006,6 @@ static void mmap_read_counter(struct mmap_data *md) unsigned char *data = md->base + page_size; int diff; - gettimeofday(&this_read, NULL); - /* * If we're further behind than half the buffer, there's a chance * the writer will bite our tail and mess up the samples under us. @@ -934,14 +1016,7 @@ static void mmap_read_counter(struct mmap_data *md) */ diff = head - old; if (diff > md->mask / 2 || diff < 0) { - struct timeval iv; - unsigned long msecs; - - timersub(&this_read, &last_read, &iv); - msecs = iv.tv_sec*1000 + iv.tv_usec/1000; - - fprintf(stderr, "WARNING: failed to keep up with mmap data." - " Last read %lu msecs ago.\n", msecs); + fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); /* * head points to a known good entry, start there. @@ -949,8 +1024,6 @@ static void mmap_read_counter(struct mmap_data *md) old = head; } - last_read = this_read; - for (; old != head;) { event_t *event = (event_t *)&data[old & md->mask]; @@ -978,13 +1051,11 @@ static void mmap_read_counter(struct mmap_data *md) event = &event_copy; } + if (event->header.type == PERF_RECORD_SAMPLE) + event__process_sample(event, md->counter); + else + event__process(event); old += size; - - if (event->header.type == PERF_RECORD_SAMPLE) { - int user = - (event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER; - process_event(event->ip.ip, md->counter, user); - } } md->prev = old; @@ -1018,8 +1089,15 @@ static void start_counter(int i, int counter) attr = attrs + counter; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; - attr->freq = freq; + + if (freq) { + attr->sample_type |= PERF_SAMPLE_PERIOD; + attr->freq = 1; + attr->sample_freq = freq; + } + attr->inherit = (cpu < 0) && inherit; + attr->mmap = 1; try_again: fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); @@ -1078,6 +1156,11 @@ static int __cmd_top(void) int i, counter; int ret; + if (target_pid != -1) + event__synthesize_thread(target_pid, event__process); + else + event__synthesize_threads(event__process); + for (i = 0; i < nr_cpus; i++) { group_fd = -1; for (counter = 0; counter < nr_counters; counter++) @@ -1133,7 +1216,10 @@ static const struct option options[] = { "system-wide collection from all CPUs"), OPT_INTEGER('C', "CPU", &profile_cpu, "CPU to profile on"), - OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, + "hide kernel symbols"), OPT_INTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), OPT_INTEGER('r', "realtime", &realtime_prio, @@ -1156,6 +1242,8 @@ static const struct option options[] = { "profile at this frequency"), OPT_INTEGER('E', "entries", &print_entries, "display this many functions"), + OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols, + "hide user symbols"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_END() @@ -1165,19 +1253,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) { int counter; - symbol__init(); - page_size = sysconf(_SC_PAGE_SIZE); argc = parse_options(argc, argv, options, top_usage, 0); if (argc) usage_with_options(top_usage, options); - if (freq) { - default_interval = freq; - freq = 1; - } - /* CPU and PID are mutually exclusive */ if (target_pid != -1 && profile_cpu != -1) { printf("WARNING: PID switch overriding CPU\n"); @@ -1188,13 +1269,31 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (!nr_counters) nr_counters = 1; + symbol_conf.priv_size = (sizeof(struct sym_entry) + + (nr_counters + 1) * sizeof(unsigned long)); + if (symbol_conf.vmlinux_name == NULL) + symbol_conf.try_vmlinux_path = true; + if (symbol__init(&symbol_conf) < 0) + return -1; + if (delay_secs < 1) delay_secs = 1; - parse_symbols(); parse_source(sym_filter_entry); /* + * User specified count overrides default frequency. + */ + if (default_interval) + freq = 0; + else if (freq) { + default_interval = freq; + } else { + fprintf(stderr, "frequency and count are zero, aborting\n"); + exit(EXIT_FAILURE); + } + + /* * Fill in the ones not specifically initialized via -c: */ for (counter = 0; counter < nr_counters; counter++) { @@ -1211,5 +1310,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (target_pid != -1 || profile_cpu != -1) nr_cpus = 1; + get_term_dimensions(&winsize); + if (print_entries == 0) { + update_print_entries(&winsize); + signal(SIGWINCH, sig_winch_handler); + } + return __cmd_top(); } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0c5e4f72f2b..abb914aa7be 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -5,66 +5,73 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/header.h" +#include "util/exec_cmd.h" +#include "util/trace-event.h" -#include "util/parse-options.h" +static char const *script_name; +static char const *generate_script_lang; -#include "perf.h" -#include "util/debug.h" +static int default_start_script(const char *script __attribute((unused))) +{ + return 0; +} -#include "util/trace-event.h" +static int default_stop_script(void) +{ + return 0; +} -static char const *input_name = "perf.data"; -static int input; -static unsigned long page_size; -static unsigned long mmap_window = 32; +static int default_generate_script(const char *outfile __attribute ((unused))) +{ + return 0; +} -static unsigned long total = 0; -static unsigned long total_comm = 0; +static struct scripting_ops default_scripting_ops = { + .start_script = default_start_script, + .stop_script = default_stop_script, + .process_event = print_event, + .generate_script = default_generate_script, +}; + +static struct scripting_ops *scripting_ops; -static struct rb_root threads; -static struct thread *last_match; +static void setup_scripting(void) +{ + /* make sure PERF_EXEC_PATH is set for scripts */ + perf_set_argv_exec_path(perf_exec_path()); -static struct perf_header *header; -static u64 sample_type; + setup_perl_scripting(); + scripting_ops = &default_scripting_ops; +} -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) +static int cleanup_scripting(void) { - struct thread *thread; + return scripting_ops->stop_script(); +} - thread = threads__findnew(event->comm.pid, &threads, &last_match); +#include "util/parse-options.h" - dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); +#include "perf.h" +#include "util/debug.h" - if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); - return -1; - } - total_comm++; +#include "util/trace-event.h" +#include "util/data_map.h" +#include "util/exec_cmd.h" - return 0; -} +static char const *input_name = "perf.data"; -static int -process_sample_event(event_t *event, unsigned long offset, unsigned long head) +static struct perf_header *header; +static u64 sample_type; + +static int process_sample_event(event_t *event) { - char level; - int show = 0; - struct dso *dso = NULL; - struct thread *thread; u64 ip = event->ip.ip; u64 timestamp = -1; u32 cpu = -1; u64 period = 1; void *more_data = event->ip.__more_data; - int cpumode; - - thread = threads__findnew(event->ip.pid, &threads, &last_match); + struct thread *thread = threads__findnew(event->ip.pid); if (sample_type & PERF_SAMPLE_TIME) { timestamp = *(u64 *)more_data; @@ -82,45 +89,19 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), + dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", event->header.misc, event->ip.pid, event->ip.tid, (void *)(long)ip, (long long)period); - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - if (thread == NULL) { - eprintf("problem processing %d event, skipping it.\n", - event->header.type); + pr_debug("problem processing %d event, skipping it.\n", + event->header.type); return -1; } - cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - - if (cpumode == PERF_RECORD_MISC_KERNEL) { - show = SHOW_KERNEL; - level = 'k'; - - dso = kernel_dso; - - dump_printf(" ...... dso: %s\n", dso->name); - - } else if (cpumode == PERF_RECORD_MISC_USER) { - - show = SHOW_USER; - level = '.'; - - } else { - show = SHOW_HV; - level = 'H'; - - dso = hypervisor_dso; - - dump_printf(" ...... dso: [hypervisor]\n"); - } + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); if (sample_type & PERF_SAMPLE_RAW) { struct { @@ -133,128 +114,189 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) * field, although it should be the same than this perf * event pid */ - print_event(cpu, raw->data, raw->size, timestamp, thread->comm); + scripting_ops->process_event(cpu, raw->data, raw->size, + timestamp, thread->comm); } - total += period; + event__stats.total += period; return 0; } -static int -process_event(event_t *event, unsigned long offset, unsigned long head) +static int sample_type_check(u64 type) { - trace_event(event); - - switch (event->header.type) { - case PERF_RECORD_MMAP ... PERF_RECORD_LOST: - return 0; - - case PERF_RECORD_COMM: - return process_comm_event(event, offset, head); - - case PERF_RECORD_EXIT ... PERF_RECORD_READ: - return 0; - - case PERF_RECORD_SAMPLE: - return process_sample_event(event, offset, head); + sample_type = type; - case PERF_RECORD_MAX: - default: + if (!(sample_type & PERF_SAMPLE_RAW)) { + fprintf(stderr, + "No trace sample to read. Did you call perf record " + "without -R?"); return -1; } return 0; } +static struct perf_file_handler file_handler = { + .process_sample_event = process_sample_event, + .process_comm_event = event__process_comm, + .sample_type_check = sample_type_check, +}; + static int __cmd_trace(void) { - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head = 0; - struct stat perf_stat; - event_t *event; - uint32_t size; - char *buf; - - trace_report(); - register_idle_thread(&threads, &last_match); - - input = open(input_name, O_RDONLY); - if (input < 0) { - perror("failed to open file"); - exit(-1); - } + register_idle_thread(); + register_perf_file_handler(&file_handler); - ret = fstat(input, &perf_stat); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } + return mmap_dispatch_perf_file(&header, input_name, + 0, 0, &event__cwdlen, &event__cwd); +} - if (!perf_stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - header = perf_header__read(input); - head = header->data_offset; - sample_type = perf_header__sample_type(header); +struct script_spec { + struct list_head node; + struct scripting_ops *ops; + char spec[0]; +}; - if (!(sample_type & PERF_SAMPLE_RAW)) - die("No trace sample to read. Did you call perf record " - "without -R?"); +LIST_HEAD(script_specs); - if (load_kernel() < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; - } +static struct script_spec *script_spec__new(const char *spec, + struct scripting_ops *ops) +{ + struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1); -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); + if (s != NULL) { + strcpy(s->spec, spec); + s->ops = ops; } -more: - event = (event_t *)(buf + head); + return s; +} - if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); - int res; +static void script_spec__delete(struct script_spec *s) +{ + free(s->spec); + free(s); +} - res = munmap(buf, page_size * mmap_window); - assert(res == 0); +static void script_spec__add(struct script_spec *s) +{ + list_add_tail(&s->node, &script_specs); +} - offset += shift; - head -= shift; - goto remap; - } +static struct script_spec *script_spec__find(const char *spec) +{ + struct script_spec *s; - size = event->header.size; + list_for_each_entry(s, &script_specs, node) + if (strcasecmp(s->spec, spec) == 0) + return s; + return NULL; +} - if (!size || process_event(event, offset, head) < 0) { +static struct script_spec *script_spec__findnew(const char *spec, + struct scripting_ops *ops) +{ + struct script_spec *s = script_spec__find(spec); - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ + if (s) + return s; - if (unlikely(head & 7)) - head &= ~7ULL; + s = script_spec__new(spec, ops); + if (!s) + goto out_delete_spec; - size = 8; - } + script_spec__add(s); + + return s; - head += size; +out_delete_spec: + script_spec__delete(s); + + return NULL; +} - if (offset + head < (unsigned long)perf_stat.st_size) - goto more; +int script_spec_register(const char *spec, struct scripting_ops *ops) +{ + struct script_spec *s; + + s = script_spec__find(spec); + if (s) + return -1; - rc = EXIT_SUCCESS; - close(input); + s = script_spec__findnew(spec, ops); + if (!s) + return -1; + + return 0; +} + +static struct scripting_ops *script_spec__lookup(const char *spec) +{ + struct script_spec *s = script_spec__find(spec); + if (!s) + return NULL; - return rc; + return s->ops; +} + +static void list_available_languages(void) +{ + struct script_spec *s; + + fprintf(stderr, "\n"); + fprintf(stderr, "Scripting language extensions (used in " + "perf trace -s [spec:]script.[spec]):\n\n"); + + list_for_each_entry(s, &script_specs, node) + fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name); + + fprintf(stderr, "\n"); +} + +static int parse_scriptname(const struct option *opt __used, + const char *str, int unset __used) +{ + char spec[PATH_MAX]; + const char *script, *ext; + int len; + + if (strcmp(str, "list") == 0) { + list_available_languages(); + return 0; + } + + script = strchr(str, ':'); + if (script) { + len = script - str; + if (len >= PATH_MAX) { + fprintf(stderr, "invalid language specifier"); + return -1; + } + strncpy(spec, str, len); + spec[len] = '\0'; + scripting_ops = script_spec__lookup(spec); + if (!scripting_ops) { + fprintf(stderr, "invalid language specifier"); + return -1; + } + script++; + } else { + script = str; + ext = strchr(script, '.'); + if (!ext) { + fprintf(stderr, "invalid script extension"); + return -1; + } + scripting_ops = script_spec__lookup(++ext); + if (!scripting_ops) { + fprintf(stderr, "invalid script extension"); + return -1; + } + } + + script_name = strdup(script); + + return 0; } static const char * const annotate_usage[] = { @@ -267,13 +309,24 @@ static const struct option options[] = { "dump raw trace in ASCII"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('l', "latency", &latency_format, + "show latency attributes (irqs/preemption disabled, etc)"), + OPT_CALLBACK('s', "script", NULL, "name", + "script file name (lang:script name, script name, or *)", + parse_scriptname), + OPT_STRING('g', "gen-script", &generate_script_lang, "lang", + "generate perf-trace.xx script in specified language"), + OPT_END() }; int cmd_trace(int argc, const char **argv, const char *prefix __used) { - symbol__init(); - page_size = getpagesize(); + int err; + + symbol__init(0); + + setup_scripting(); argc = parse_options(argc, argv, options, annotate_usage, 0); if (argc) { @@ -287,5 +340,50 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) setup_pager(); - return __cmd_trace(); + if (generate_script_lang) { + struct stat perf_stat; + + int input = open(input_name, O_RDONLY); + if (input < 0) { + perror("failed to open file"); + exit(-1); + } + + err = fstat(input, &perf_stat); + if (err < 0) { + perror("failed to stat file"); + exit(-1); + } + + if (!perf_stat.st_size) { + fprintf(stderr, "zero-sized file, nothing to do!\n"); + exit(0); + } + + scripting_ops = script_spec__lookup(generate_script_lang); + if (!scripting_ops) { + fprintf(stderr, "invalid language specifier"); + return -1; + } + + header = perf_header__new(); + if (header == NULL) + return -1; + + perf_header__read(header, input); + err = scripting_ops->generate_script("perf-trace"); + goto out; + } + + if (script_name) { + err = scripting_ops->start_script(script_name); + if (err) + goto out; + } + + err = __cmd_trace(); + + cleanup_scripting(); +out: + return err; } diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index e11d8d231c3..a3d8bf65f26 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -15,6 +15,8 @@ extern int read_line_with_nul(char *buf, int size, FILE *file); extern int check_pager_config(const char *cmd); extern int cmd_annotate(int argc, const char **argv, const char *prefix); +extern int cmd_bench(int argc, const char **argv, const char *prefix); +extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); extern int cmd_help(int argc, const char **argv, const char *prefix); extern int cmd_sched(int argc, const char **argv, const char *prefix); extern int cmd_list(int argc, const char **argv, const char *prefix); @@ -25,5 +27,7 @@ extern int cmd_timechart(int argc, const char **argv, const char *prefix); extern int cmd_top(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); +extern int cmd_probe(int argc, const char **argv, const char *prefix); +extern int cmd_kmem(int argc, const char **argv, const char *prefix); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 00326e230d8..02b09ea17a3 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -3,6 +3,8 @@ # command name category [deprecated] [common] # perf-annotate mainporcelain common +perf-bench mainporcelain common +perf-buildid-list mainporcelain common perf-list mainporcelain common perf-sched mainporcelain common perf-record mainporcelain common @@ -11,3 +13,5 @@ perf-stat mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common perf-trace mainporcelain common +perf-probe mainporcelain common +perf-kmem mainporcelain common diff --git a/tools/perf/design.txt b/tools/perf/design.txt index fdd42a824c9..f000c30877a 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -137,6 +137,8 @@ enum sw_event_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, }; Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 19fc7feb9d5..cf64049bc9b 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -14,6 +14,7 @@ #include "util/run-command.h" #include "util/parse-events.h" #include "util/string.h" +#include "util/debugfs.h" const char perf_usage_string[] = "perf [--version] [--help] COMMAND [ARGS]"; @@ -89,8 +90,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) /* * Check remaining flags. */ - if (!prefixcmp(cmd, "--exec-path")) { - cmd += 11; + if (!prefixcmp(cmd, CMD_EXEC_PATH)) { + cmd += strlen(CMD_EXEC_PATH); if (*cmd == '=') perf_set_argv_exec_path(cmd + 1); else { @@ -117,8 +118,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) (*argv)++; (*argc)--; handled++; - } else if (!prefixcmp(cmd, "--perf-dir=")) { - setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1); + } else if (!prefixcmp(cmd, CMD_PERF_DIR)) { + setenv(PERF_DIR_ENVIRONMENT, cmd + strlen(CMD_PERF_DIR), 1); if (envchanged) *envchanged = 1; } else if (!strcmp(cmd, "--work-tree")) { @@ -131,8 +132,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) *envchanged = 1; (*argv)++; (*argc)--; - } else if (!prefixcmp(cmd, "--work-tree=")) { - setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1); + } else if (!prefixcmp(cmd, CMD_WORK_TREE)) { + setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + strlen(CMD_WORK_TREE), 1); if (envchanged) *envchanged = 1; } else if (!strcmp(cmd, "--debugfs-dir")) { @@ -146,8 +147,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) *envchanged = 1; (*argv)++; (*argc)--; - } else if (!prefixcmp(cmd, "--debugfs-dir=")) { - strncpy(debugfs_mntpt, cmd + 14, MAXPATHLEN); + } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { + strncpy(debugfs_mntpt, cmd + strlen(CMD_DEBUGFS_DIR), MAXPATHLEN); debugfs_mntpt[MAXPATHLEN - 1] = '\0'; if (envchanged) *envchanged = 1; @@ -284,17 +285,21 @@ static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; static struct cmd_struct commands[] = { - { "help", cmd_help, 0 }, - { "list", cmd_list, 0 }, - { "record", cmd_record, 0 }, - { "report", cmd_report, 0 }, - { "stat", cmd_stat, 0 }, - { "timechart", cmd_timechart, 0 }, - { "top", cmd_top, 0 }, - { "annotate", cmd_annotate, 0 }, - { "version", cmd_version, 0 }, - { "trace", cmd_trace, 0 }, - { "sched", cmd_sched, 0 }, + { "buildid-list", cmd_buildid_list, 0 }, + { "help", cmd_help, 0 }, + { "list", cmd_list, 0 }, + { "record", cmd_record, 0 }, + { "report", cmd_report, 0 }, + { "bench", cmd_bench, 0 }, + { "stat", cmd_stat, 0 }, + { "timechart", cmd_timechart, 0 }, + { "top", cmd_top, 0 }, + { "annotate", cmd_annotate, 0 }, + { "version", cmd_version, 0 }, + { "trace", cmd_trace, 0 }, + { "sched", cmd_sched, 0 }, + { "probe", cmd_probe, 0 }, + { "kmem", cmd_kmem, 0 }, }; unsigned int i; static const char ext[] = STRIP_EXTENSION; @@ -382,45 +387,12 @@ static int run_argv(int *argcp, const char ***argv) /* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */ static void get_debugfs_mntpt(void) { - FILE *file; - char fs_type[100]; - char debugfs[MAXPATHLEN]; + const char *path = debugfs_find_mountpoint(); - /* - * try the standard location - */ - if (valid_debugfs_mount("/sys/kernel/debug/") == 0) { - strcpy(debugfs_mntpt, "/sys/kernel/debug/"); - return; - } - - /* - * try the sane location - */ - if (valid_debugfs_mount("/debug/") == 0) { - strcpy(debugfs_mntpt, "/debug/"); - return; - } - - /* - * give up and parse /proc/mounts - */ - file = fopen("/proc/mounts", "r"); - if (file == NULL) - return; - - while (fscanf(file, "%*s %" - STR(MAXPATHLEN) - "s %99s %*s %*d %*d\n", - debugfs, fs_type) == 2) { - if (strcmp(fs_type, "debugfs") == 0) - break; - } - fclose(file); - if (strcmp(fs_type, "debugfs") == 0) { - strncpy(debugfs_mntpt, debugfs, MAXPATHLEN); - debugfs_mntpt[MAXPATHLEN - 1] = '\0'; - } + if (path) + strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt)); + else + debugfs_mntpt[0] = '\0'; } int main(int argc, const char **argv) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 8cc4623afd6..454d5d55f32 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -47,6 +47,18 @@ #define cpu_relax() asm volatile("":::"memory") #endif +#ifdef __alpha__ +#include "../../arch/alpha/include/asm/unistd.h" +#define rmb() asm volatile("mb" ::: "memory") +#define cpu_relax() asm volatile("" ::: "memory") +#endif + +#ifdef __ia64__ +#include "../../arch/ia64/include/asm/unistd.h" +#define rmb() asm volatile ("mf" ::: "memory") +#define cpu_relax() asm volatile ("hint @pause" ::: "memory") +#endif + #include <time.h> #include <unistd.h> #include <sys/types.h> diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c new file mode 100644 index 00000000000..af78d9a52a7 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c @@ -0,0 +1,134 @@ +/* + * This file was generated automatically by ExtUtils::ParseXS version 2.18_02 from the + * contents of Context.xs. Do not edit this file, edit Context.xs instead. + * + * ANY CHANGES MADE HERE WILL BE LOST! + * + */ + +#line 1 "Context.xs" +/* + * Context.xs. XS interfaces for perf trace. + * + * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#include "../../../util/trace-event-perl.h" + +#ifndef PERL_UNUSED_VAR +# define PERL_UNUSED_VAR(var) if (0) var = var +#endif + +#line 41 "Context.c" + +XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */ +XS(XS_Perf__Trace__Context_common_pc) +{ +#ifdef dVAR + dVAR; dXSARGS; +#else + dXSARGS; +#endif + if (items != 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_pc", "context"); + PERL_UNUSED_VAR(cv); /* -W */ + { + struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0))); + int RETVAL; + dXSTARG; + + RETVAL = common_pc(context); + XSprePUSH; PUSHi((IV)RETVAL); + } + XSRETURN(1); +} + + +XS(XS_Perf__Trace__Context_common_flags); /* prototype to pass -Wmissing-prototypes */ +XS(XS_Perf__Trace__Context_common_flags) +{ +#ifdef dVAR + dVAR; dXSARGS; +#else + dXSARGS; +#endif + if (items != 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_flags", "context"); + PERL_UNUSED_VAR(cv); /* -W */ + { + struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0))); + int RETVAL; + dXSTARG; + + RETVAL = common_flags(context); + XSprePUSH; PUSHi((IV)RETVAL); + } + XSRETURN(1); +} + + +XS(XS_Perf__Trace__Context_common_lock_depth); /* prototype to pass -Wmissing-prototypes */ +XS(XS_Perf__Trace__Context_common_lock_depth) +{ +#ifdef dVAR + dVAR; dXSARGS; +#else + dXSARGS; +#endif + if (items != 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_lock_depth", "context"); + PERL_UNUSED_VAR(cv); /* -W */ + { + struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0))); + int RETVAL; + dXSTARG; + + RETVAL = common_lock_depth(context); + XSprePUSH; PUSHi((IV)RETVAL); + } + XSRETURN(1); +} + +#ifdef __cplusplus +extern "C" +#endif +XS(boot_Perf__Trace__Context); /* prototype to pass -Wmissing-prototypes */ +XS(boot_Perf__Trace__Context) +{ +#ifdef dVAR + dVAR; dXSARGS; +#else + dXSARGS; +#endif + const char* file = __FILE__; + + PERL_UNUSED_VAR(cv); /* -W */ + PERL_UNUSED_VAR(items); /* -W */ + XS_VERSION_BOOTCHECK ; + + newXSproto("Perf::Trace::Context::common_pc", XS_Perf__Trace__Context_common_pc, file, "$"); + newXSproto("Perf::Trace::Context::common_flags", XS_Perf__Trace__Context_common_flags, file, "$"); + newXSproto("Perf::Trace::Context::common_lock_depth", XS_Perf__Trace__Context_common_lock_depth, file, "$"); + if (PL_unitcheckav) + call_list(PL_scopestack_ix, PL_unitcheckav); + XSRETURN_YES; +} + diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs new file mode 100644 index 00000000000..fb78006c165 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs @@ -0,0 +1,41 @@ +/* + * Context.xs. XS interfaces for perf trace. + * + * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#include "../../../util/trace-event-perl.h" + +MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context +PROTOTYPES: ENABLE + +int +common_pc(context) + struct scripting_context * context + +int +common_flags(context) + struct scripting_context * context + +int +common_lock_depth(context) + struct scripting_context * context + diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL new file mode 100644 index 00000000000..decdeb0f678 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL @@ -0,0 +1,17 @@ +use 5.010000; +use ExtUtils::MakeMaker; +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. +WriteMakefile( + NAME => 'Perf::Trace::Context', + VERSION_FROM => 'lib/Perf/Trace/Context.pm', # finds $VERSION + PREREQ_PM => {}, # e.g., Module::Name => 1.1 + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'lib/Perf/Trace/Context.pm', # retrieve abstract from module + AUTHOR => 'Tom Zanussi <tzanussi@gmail.com>') : ()), + LIBS => [''], # e.g., '-lm' + DEFINE => '-I ../..', # e.g., '-DHAVE_SOMETHING' + INC => '-I.', # e.g., '-I. -I/usr/include/other' + # Un-comment this if you add C files to link with later: + OBJECT => 'Context.o', # link all the C files too +); diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README new file mode 100644 index 00000000000..9a970763079 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/README @@ -0,0 +1,59 @@ +Perf-Trace-Util version 0.01 +============================ + +This module contains utility functions for use with perf trace. + +Core.pm and Util.pm are pure Perl modules; Core.pm contains routines +that the core perf support for Perl calls on and should always be +'used', while Util.pm contains useful but optional utility functions +that scripts may want to use. Context.pm contains the Perl->C +interface that allows scripts to access data in the embedding perf +executable; scripts wishing to do that should 'use Context.pm'. + +The Perl->C perf interface is completely driven by Context.xs. If you +want to add new Perl functions that end up accessing C data in the +perf executable, you add desciptions of the new functions here. +scripting_context is a pointer to the perf data in the perf executable +that you want to access - it's passed as the second parameter, +$context, to all handler functions. + +After you do that: + + perl Makefile.PL # to create a Makefile for the next step + make # to create Context.c + + edit Context.c to add const to the char* file = __FILE__ line in + XS(boot_Perf__Trace__Context) to silence a warning/error. + + You can delete the Makefile, object files and anything else that was + generated e.g. blib and shared library, etc, except for of course + Context.c + + You should then be able to run the normal perf make as usual. + +INSTALLATION + +Building perf with perf trace Perl scripting should install this +module in the right place. + +You should make sure libperl and ExtUtils/Embed.pm are installed first +e.g. apt-get install libperl-dev or yum install perl-ExtUtils-Embed. + +DEPENDENCIES + +This module requires these other modules and libraries: + + None + +COPYRIGHT AND LICENCE + +Copyright (C) 2009 by Tom Zanussi <tzanussi@gmail.com> + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.10.0 or, +at your option, any later version of Perl 5 you may have available. + +Alternatively, this software may be distributed under the terms of the +GNU General Public License ("GPL") version 2 as published by the Free +Software Foundation. + diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm new file mode 100644 index 00000000000..6c7f3659cb1 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm @@ -0,0 +1,55 @@ +package Perf::Trace::Context; + +use 5.010000; +use strict; +use warnings; + +require Exporter; + +our @ISA = qw(Exporter); + +our %EXPORT_TAGS = ( 'all' => [ qw( +) ] ); + +our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); + +our @EXPORT = qw( + common_pc common_flags common_lock_depth +); + +our $VERSION = '0.01'; + +require XSLoader; +XSLoader::load('Perf::Trace::Context', $VERSION); + +1; +__END__ +=head1 NAME + +Perf::Trace::Context - Perl extension for accessing functions in perf. + +=head1 SYNOPSIS + + use Perf::Trace::Context; + +=head1 SEE ALSO + +Perf (trace) documentation + +=head1 AUTHOR + +Tom Zanussi, E<lt>tzanussi@gmail.com<gt> + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2009 by Tom Zanussi + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.10.0 or, +at your option, any later version of Perl 5 you may have available. + +Alternatively, this software may be distributed under the terms of the +GNU General Public License ("GPL") version 2 as published by the Free +Software Foundation. + +=cut diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm new file mode 100644 index 00000000000..9df376a9f62 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm @@ -0,0 +1,192 @@ +package Perf::Trace::Core; + +use 5.010000; +use strict; +use warnings; + +require Exporter; + +our @ISA = qw(Exporter); + +our %EXPORT_TAGS = ( 'all' => [ qw( +) ] ); + +our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); + +our @EXPORT = qw( +define_flag_field define_flag_value flag_str dump_flag_fields +define_symbolic_field define_symbolic_value symbol_str dump_symbolic_fields +trace_flag_str +); + +our $VERSION = '0.01'; + +my %trace_flags = (0x00 => "NONE", + 0x01 => "IRQS_OFF", + 0x02 => "IRQS_NOSUPPORT", + 0x04 => "NEED_RESCHED", + 0x08 => "HARDIRQ", + 0x10 => "SOFTIRQ"); + +sub trace_flag_str +{ + my ($value) = @_; + + my $string; + + my $print_delim = 0; + + foreach my $idx (sort {$a <=> $b} keys %trace_flags) { + if (!$value && !$idx) { + $string .= "NONE"; + last; + } + + if ($idx && ($value & $idx) == $idx) { + if ($print_delim) { + $string .= " | "; + } + $string .= "$trace_flags{$idx}"; + $print_delim = 1; + $value &= ~$idx; + } + } + + return $string; +} + +my %flag_fields; +my %symbolic_fields; + +sub flag_str +{ + my ($event_name, $field_name, $value) = @_; + + my $string; + + if ($flag_fields{$event_name}{$field_name}) { + my $print_delim = 0; + foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event_name}{$field_name}{"values"}}) { + if (!$value && !$idx) { + $string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}"; + last; + } + if ($idx && ($value & $idx) == $idx) { + if ($print_delim && $flag_fields{$event_name}{$field_name}{'delim'}) { + $string .= " $flag_fields{$event_name}{$field_name}{'delim'} "; + } + $string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}"; + $print_delim = 1; + $value &= ~$idx; + } + } + } + + return $string; +} + +sub define_flag_field +{ + my ($event_name, $field_name, $delim) = @_; + + $flag_fields{$event_name}{$field_name}{"delim"} = $delim; +} + +sub define_flag_value +{ + my ($event_name, $field_name, $value, $field_str) = @_; + + $flag_fields{$event_name}{$field_name}{"values"}{$value} = $field_str; +} + +sub dump_flag_fields +{ + for my $event (keys %flag_fields) { + print "event $event:\n"; + for my $field (keys %{$flag_fields{$event}}) { + print " field: $field:\n"; + print " delim: $flag_fields{$event}{$field}{'delim'}\n"; + foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event}{$field}{"values"}}) { + print " value $idx: $flag_fields{$event}{$field}{'values'}{$idx}\n"; + } + } + } +} + +sub symbol_str +{ + my ($event_name, $field_name, $value) = @_; + + if ($symbolic_fields{$event_name}{$field_name}) { + foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event_name}{$field_name}{"values"}}) { + if (!$value && !$idx) { + return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}"; + last; + } + if ($value == $idx) { + return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}"; + } + } + } + + return undef; +} + +sub define_symbolic_field +{ + my ($event_name, $field_name) = @_; + + # nothing to do, really +} + +sub define_symbolic_value +{ + my ($event_name, $field_name, $value, $field_str) = @_; + + $symbolic_fields{$event_name}{$field_name}{"values"}{$value} = $field_str; +} + +sub dump_symbolic_fields +{ + for my $event (keys %symbolic_fields) { + print "event $event:\n"; + for my $field (keys %{$symbolic_fields{$event}}) { + print " field: $field:\n"; + foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event}{$field}{"values"}}) { + print " value $idx: $symbolic_fields{$event}{$field}{'values'}{$idx}\n"; + } + } + } +} + +1; +__END__ +=head1 NAME + +Perf::Trace::Core - Perl extension for perf trace + +=head1 SYNOPSIS + + use Perf::Trace::Core + +=head1 SEE ALSO + +Perf (trace) documentation + +=head1 AUTHOR + +Tom Zanussi, E<lt>tzanussi@gmail.com<gt> + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2009 by Tom Zanussi + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.10.0 or, +at your option, any later version of Perl 5 you may have available. + +Alternatively, this software may be distributed under the terms of the +GNU General Public License ("GPL") version 2 as published by the Free +Software Foundation. + +=cut diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm new file mode 100644 index 00000000000..052f132ced2 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm @@ -0,0 +1,88 @@ +package Perf::Trace::Util; + +use 5.010000; +use strict; +use warnings; + +require Exporter; + +our @ISA = qw(Exporter); + +our %EXPORT_TAGS = ( 'all' => [ qw( +) ] ); + +our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); + +our @EXPORT = qw( +avg nsecs nsecs_secs nsecs_nsecs nsecs_usecs print_nsecs +); + +our $VERSION = '0.01'; + +sub avg +{ + my ($total, $n) = @_; + + return $total / $n; +} + +my $NSECS_PER_SEC = 1000000000; + +sub nsecs +{ + my ($secs, $nsecs) = @_; + + return $secs * $NSECS_PER_SEC + $nsecs; +} + +sub nsecs_secs { + my ($nsecs) = @_; + + return $nsecs / $NSECS_PER_SEC; +} + +sub nsecs_nsecs { + my ($nsecs) = @_; + + return $nsecs - nsecs_secs($nsecs); +} + +sub nsecs_str { + my ($nsecs) = @_; + + my $str = sprintf("%5u.%09u", nsecs_secs($nsecs), nsecs_nsecs($nsecs)); + + return $str; +} + +1; +__END__ +=head1 NAME + +Perf::Trace::Util - Perl extension for perf trace + +=head1 SYNOPSIS + + use Perf::Trace::Util; + +=head1 SEE ALSO + +Perf (trace) documentation + +=head1 AUTHOR + +Tom Zanussi, E<lt>tzanussi@gmail.com<gt> + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2009 by Tom Zanussi + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.10.0 or, +at your option, any later version of Perl 5 you may have available. + +Alternatively, this software may be distributed under the terms of the +GNU General Public License ("GPL") version 2 as published by the Free +Software Foundation. + +=cut diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/typemap b/tools/perf/scripts/perl/Perf-Trace-Util/typemap new file mode 100644 index 00000000000..840836804aa --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/typemap @@ -0,0 +1 @@ +struct scripting_context * T_PTR diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record new file mode 100644 index 00000000000..c7ec5de2f53 --- /dev/null +++ b/tools/perf/scripts/perl/bin/check-perf-trace-record @@ -0,0 +1,7 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry + + + + + diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-report b/tools/perf/scripts/perl/bin/check-perf-trace-report new file mode 100644 index 00000000000..89948b01502 --- /dev/null +++ b/tools/perf/scripts/perl/bin/check-perf-trace-report @@ -0,0 +1,5 @@ +#!/bin/bash +perf trace -s ~/libexec/perf-core/scripts/perl/check-perf-trace.pl + + + diff --git a/tools/perf/scripts/perl/bin/rw-by-file-record b/tools/perf/scripts/perl/bin/rw-by-file-record new file mode 100644 index 00000000000..b25056ebf96 --- /dev/null +++ b/tools/perf/scripts/perl/bin/rw-by-file-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report new file mode 100644 index 00000000000..f5dcf9cb5bd --- /dev/null +++ b/tools/perf/scripts/perl/bin/rw-by-file-report @@ -0,0 +1,5 @@ +#!/bin/bash +perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl + + + diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-record b/tools/perf/scripts/perl/bin/rw-by-pid-record new file mode 100644 index 00000000000..8903979c5b6 --- /dev/null +++ b/tools/perf/scripts/perl/bin/rw-by-pid-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report new file mode 100644 index 00000000000..cea16f78a3a --- /dev/null +++ b/tools/perf/scripts/perl/bin/rw-by-pid-report @@ -0,0 +1,5 @@ +#!/bin/bash +perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl + + + diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-record b/tools/perf/scripts/perl/bin/wakeup-latency-record new file mode 100644 index 00000000000..6abedda911a --- /dev/null +++ b/tools/perf/scripts/perl/bin/wakeup-latency-record @@ -0,0 +1,6 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup + + + + diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report new file mode 100644 index 00000000000..85769dc456e --- /dev/null +++ b/tools/perf/scripts/perl/bin/wakeup-latency-report @@ -0,0 +1,5 @@ +#!/bin/bash +perf trace -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl + + + diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record new file mode 100644 index 00000000000..fce6637b19b --- /dev/null +++ b/tools/perf/scripts/perl/bin/workqueue-stats-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report new file mode 100644 index 00000000000..aa68435be92 --- /dev/null +++ b/tools/perf/scripts/perl/bin/workqueue-stats-report @@ -0,0 +1,6 @@ +#!/bin/bash +perf trace -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl + + + + diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl new file mode 100644 index 00000000000..4e7dc0a407a --- /dev/null +++ b/tools/perf/scripts/perl/check-perf-trace.pl @@ -0,0 +1,106 @@ +# perf trace event handlers, generated by perf trace -g perl +# (c) 2009, Tom Zanussi <tzanussi@gmail.com> +# Licensed under the terms of the GNU GPL License version 2 + +# This script tests basic functionality such as flag and symbol +# strings, common_xxx() calls back into perf, begin, end, unhandled +# events, etc. Basically, if this script runs successfully and +# displays expected results, perl scripting support should be ok. + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Context; +use Perf::Trace::Util; + +sub trace_begin +{ + print "trace_begin\n"; +} + +sub trace_end +{ + print "trace_end\n"; + + print_unhandled(); +} + +sub irq::softirq_entry +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $vec) = @_; + + print_header($event_name, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm); + + print_uncommon($context); + + printf("vec=%s\n", + symbol_str("irq::softirq_entry", "vec", $vec)); +} + +sub kmem::kmalloc +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $call_site, $ptr, $bytes_req, $bytes_alloc, + $gfp_flags) = @_; + + print_header($event_name, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm); + + print_uncommon($context); + + printf("call_site=%p, ptr=%p, bytes_req=%u, bytes_alloc=%u, ". + "gfp_flags=%s\n", + $call_site, $ptr, $bytes_req, $bytes_alloc, + + flag_str("kmem::kmalloc", "gfp_flags", $gfp_flags)); +} + +# print trace fields not included in handler args +sub print_uncommon +{ + my ($context) = @_; + + printf("common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, ", + common_pc($context), trace_flag_str(common_flags($context)), + common_lock_depth($context)); + +} + +my %unhandled; + +sub print_unhandled +{ + if ((scalar keys %unhandled) == 0) { + return; + } + + print "\nunhandled events:\n\n"; + + printf("%-40s %10s\n", "event", "count"); + printf("%-40s %10s\n", "----------------------------------------", + "-----------"); + + foreach my $event_name (keys %unhandled) { + printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); + } +} + +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm) = @_; + + $unhandled{$event_name}++; +} + +sub print_header +{ + my ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_; + + printf("%-20s %5u %05u.%09u %8u %-20s ", + $event_name, $cpu, $secs, $nsecs, $pid, $comm); +} diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl new file mode 100644 index 00000000000..61f91561d84 --- /dev/null +++ b/tools/perf/scripts/perl/rw-by-file.pl @@ -0,0 +1,105 @@ +#!/usr/bin/perl -w +# (c) 2009, Tom Zanussi <tzanussi@gmail.com> +# Licensed under the terms of the GNU GPL License version 2 + +# Display r/w activity for files read/written to for a given program + +# The common_* event handler fields are the most useful fields common to +# all events. They don't necessarily correspond to the 'common_*' fields +# in the status files. Those fields not available as handler params can +# be retrieved via script functions of the form get_common_*(). + +use 5.010000; +use strict; +use warnings; + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Util; + +# change this to the comm of the program you're interested in +my $for_comm = "perf"; + +my %reads; +my %writes; + +sub syscalls::sys_enter_read +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; + + if ($common_comm eq $for_comm) { + $reads{$fd}{bytes_requested} += $count; + $reads{$fd}{total_reads}++; + } +} + +sub syscalls::sys_enter_write +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; + + if ($common_comm eq $for_comm) { + $writes{$fd}{bytes_written} += $count; + $writes{$fd}{total_writes}++; + } +} + +sub trace_end +{ + printf("file read counts for $for_comm:\n\n"); + + printf("%6s %10s %10s\n", "fd", "# reads", "bytes_requested"); + printf("%6s %10s %10s\n", "------", "----------", "-----------"); + + foreach my $fd (sort {$reads{$b}{bytes_requested} <=> + $reads{$a}{bytes_requested}} keys %reads) { + my $total_reads = $reads{$fd}{total_reads}; + my $bytes_requested = $reads{$fd}{bytes_requested}; + printf("%6u %10u %10u\n", $fd, $total_reads, $bytes_requested); + } + + printf("\nfile write counts for $for_comm:\n\n"); + + printf("%6s %10s %10s\n", "fd", "# writes", "bytes_written"); + printf("%6s %10s %10s\n", "------", "----------", "-----------"); + + foreach my $fd (sort {$writes{$b}{bytes_written} <=> + $writes{$a}{bytes_written}} keys %writes) { + my $total_writes = $writes{$fd}{total_writes}; + my $bytes_written = $writes{$fd}{bytes_written}; + printf("%6u %10u %10u\n", $fd, $total_writes, $bytes_written); + } + + print_unhandled(); +} + +my %unhandled; + +sub print_unhandled +{ + if ((scalar keys %unhandled) == 0) { + return; + } + + print "\nunhandled events:\n\n"; + + printf("%-40s %10s\n", "event", "count"); + printf("%-40s %10s\n", "----------------------------------------", + "-----------"); + + foreach my $event_name (keys %unhandled) { + printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); + } +} + +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm) = @_; + + $unhandled{$event_name}++; +} + + diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl new file mode 100644 index 00000000000..da601fae1a0 --- /dev/null +++ b/tools/perf/scripts/perl/rw-by-pid.pl @@ -0,0 +1,170 @@ +#!/usr/bin/perl -w +# (c) 2009, Tom Zanussi <tzanussi@gmail.com> +# Licensed under the terms of the GNU GPL License version 2 + +# Display r/w activity for all processes + +# The common_* event handler fields are the most useful fields common to +# all events. They don't necessarily correspond to the 'common_*' fields +# in the status files. Those fields not available as handler params can +# be retrieved via script functions of the form get_common_*(). + +use 5.010000; +use strict; +use warnings; + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Util; + +my %reads; +my %writes; + +sub syscalls::sys_exit_read +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $nr, $ret) = @_; + + if ($ret > 0) { + $reads{$common_pid}{bytes_read} += $ret; + } else { + if (!defined ($reads{$common_pid}{bytes_read})) { + $reads{$common_pid}{bytes_read} = 0; + } + $reads{$common_pid}{errors}{$ret}++; + } +} + +sub syscalls::sys_enter_read +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $nr, $fd, $buf, $count) = @_; + + $reads{$common_pid}{bytes_requested} += $count; + $reads{$common_pid}{total_reads}++; + $reads{$common_pid}{comm} = $common_comm; +} + +sub syscalls::sys_exit_write +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $nr, $ret) = @_; + + if ($ret <= 0) { + $writes{$common_pid}{errors}{$ret}++; + } +} + +sub syscalls::sys_enter_write +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $nr, $fd, $buf, $count) = @_; + + $writes{$common_pid}{bytes_written} += $count; + $writes{$common_pid}{total_writes}++; + $writes{$common_pid}{comm} = $common_comm; +} + +sub trace_end +{ + printf("read counts by pid:\n\n"); + + printf("%6s %20s %10s %10s %10s\n", "pid", "comm", + "# reads", "bytes_requested", "bytes_read"); + printf("%6s %-20s %10s %10s %10s\n", "------", "--------------------", + "-----------", "----------", "----------"); + + foreach my $pid (sort {$reads{$b}{bytes_read} <=> + $reads{$a}{bytes_read}} keys %reads) { + my $comm = $reads{$pid}{comm}; + my $total_reads = $reads{$pid}{total_reads}; + my $bytes_requested = $reads{$pid}{bytes_requested}; + my $bytes_read = $reads{$pid}{bytes_read}; + + printf("%6s %-20s %10s %10s %10s\n", $pid, $comm, + $total_reads, $bytes_requested, $bytes_read); + } + + printf("\nfailed reads by pid:\n\n"); + + printf("%6s %20s %6s %10s\n", "pid", "comm", "error #", "# errors"); + printf("%6s %20s %6s %10s\n", "------", "--------------------", + "------", "----------"); + + foreach my $pid (keys %reads) { + my $comm = $reads{$pid}{comm}; + foreach my $err (sort {$reads{$b}{comm} cmp $reads{$a}{comm}} + keys %{$reads{$pid}{errors}}) { + my $errors = $reads{$pid}{errors}{$err}; + + printf("%6d %-20s %6d %10s\n", $pid, $comm, $err, $errors); + } + } + + printf("\nwrite counts by pid:\n\n"); + + printf("%6s %20s %10s %10s\n", "pid", "comm", + "# writes", "bytes_written"); + printf("%6s %-20s %10s %10s\n", "------", "--------------------", + "-----------", "----------"); + + foreach my $pid (sort {$writes{$b}{bytes_written} <=> + $writes{$a}{bytes_written}} keys %writes) { + my $comm = $writes{$pid}{comm}; + my $total_writes = $writes{$pid}{total_writes}; + my $bytes_written = $writes{$pid}{bytes_written}; + + printf("%6s %-20s %10s %10s\n", $pid, $comm, + $total_writes, $bytes_written); + } + + printf("\nfailed writes by pid:\n\n"); + + printf("%6s %20s %6s %10s\n", "pid", "comm", "error #", "# errors"); + printf("%6s %20s %6s %10s\n", "------", "--------------------", + "------", "----------"); + + foreach my $pid (keys %writes) { + my $comm = $writes{$pid}{comm}; + foreach my $err (sort {$writes{$b}{comm} cmp $writes{$a}{comm}} + keys %{$writes{$pid}{errors}}) { + my $errors = $writes{$pid}{errors}{$err}; + + printf("%6d %-20s %6d %10s\n", $pid, $comm, $err, $errors); + } + } + + print_unhandled(); +} + +my %unhandled; + +sub print_unhandled +{ + if ((scalar keys %unhandled) == 0) { + return; + } + + print "\nunhandled events:\n\n"; + + printf("%-40s %10s\n", "event", "count"); + printf("%-40s %10s\n", "----------------------------------------", + "-----------"); + + foreach my $event_name (keys %unhandled) { + printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); + } +} + +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm) = @_; + + $unhandled{$event_name}++; +} diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl new file mode 100644 index 00000000000..ed58ef284e2 --- /dev/null +++ b/tools/perf/scripts/perl/wakeup-latency.pl @@ -0,0 +1,103 @@ +#!/usr/bin/perl -w +# (c) 2009, Tom Zanussi <tzanussi@gmail.com> +# Licensed under the terms of the GNU GPL License version 2 + +# Display avg/min/max wakeup latency + +# The common_* event handler fields are the most useful fields common to +# all events. They don't necessarily correspond to the 'common_*' fields +# in the status files. Those fields not available as handler params can +# be retrieved via script functions of the form get_common_*(). + +use 5.010000; +use strict; +use warnings; + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Util; + +my %last_wakeup; + +my $max_wakeup_latency; +my $min_wakeup_latency; +my $total_wakeup_latency; +my $total_wakeups; + +sub sched::sched_switch +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid, + $next_prio) = @_; + + my $wakeup_ts = $last_wakeup{$common_cpu}{ts}; + if ($wakeup_ts) { + my $switch_ts = nsecs($common_secs, $common_nsecs); + my $wakeup_latency = $switch_ts - $wakeup_ts; + if ($wakeup_latency > $max_wakeup_latency) { + $max_wakeup_latency = $wakeup_latency; + } + if ($wakeup_latency < $min_wakeup_latency) { + $min_wakeup_latency = $wakeup_latency; + } + $total_wakeup_latency += $wakeup_latency; + $total_wakeups++; + } + $last_wakeup{$common_cpu}{ts} = 0; +} + +sub sched::sched_wakeup +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $comm, $pid, $prio, $success, $target_cpu) = @_; + + $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs); +} + +sub trace_begin +{ + $min_wakeup_latency = 1000000000; + $max_wakeup_latency = 0; +} + +sub trace_end +{ + printf("wakeup_latency stats:\n\n"); + print "total_wakeups: $total_wakeups\n"; + printf("avg_wakeup_latency (ns): %u\n", + avg($total_wakeup_latency, $total_wakeups)); + printf("min_wakeup_latency (ns): %u\n", $min_wakeup_latency); + printf("max_wakeup_latency (ns): %u\n", $max_wakeup_latency); + + print_unhandled(); +} + +my %unhandled; + +sub print_unhandled +{ + if ((scalar keys %unhandled) == 0) { + return; + } + + print "\nunhandled events:\n\n"; + + printf("%-40s %10s\n", "event", "count"); + printf("%-40s %10s\n", "----------------------------------------", + "-----------"); + + foreach my $event_name (keys %unhandled) { + printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); + } +} + +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm) = @_; + + $unhandled{$event_name}++; +} diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl new file mode 100644 index 00000000000..511302c8a49 --- /dev/null +++ b/tools/perf/scripts/perl/workqueue-stats.pl @@ -0,0 +1,129 @@ +#!/usr/bin/perl -w +# (c) 2009, Tom Zanussi <tzanussi@gmail.com> +# Licensed under the terms of the GNU GPL License version 2 + +# Displays workqueue stats +# +# Usage: +# +# perf record -c 1 -f -a -R -e workqueue:workqueue_creation -e +# workqueue:workqueue_destruction -e workqueue:workqueue_execution +# -e workqueue:workqueue_insertion +# +# perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl + +use 5.010000; +use strict; +use warnings; + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Util; + +my @cpus; + +sub workqueue::workqueue_destruction +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $thread_comm, $thread_pid) = @_; + + $cpus[$common_cpu]{$thread_pid}{destroyed}++; + $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; +} + +sub workqueue::workqueue_creation +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $thread_comm, $thread_pid, $cpu) = @_; + + $cpus[$common_cpu]{$thread_pid}{created}++; + $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; +} + +sub workqueue::workqueue_execution +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $thread_comm, $thread_pid, $func) = @_; + + $cpus[$common_cpu]{$thread_pid}{executed}++; + $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; +} + +sub workqueue::workqueue_insertion +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $thread_comm, $thread_pid, $func) = @_; + + $cpus[$common_cpu]{$thread_pid}{inserted}++; + $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; +} + +sub trace_end +{ + print "workqueue work stats:\n\n"; + my $cpu = 0; + printf("%3s %6s %6s\t%-20s\n", "cpu", "ins", "exec", "name"); + printf("%3s %6s %6s\t%-20s\n", "---", "---", "----", "----"); + foreach my $pidhash (@cpus) { + while ((my $pid, my $wqhash) = each %$pidhash) { + my $ins = $$wqhash{'inserted'}; + my $exe = $$wqhash{'executed'}; + my $comm = $$wqhash{'comm'}; + if ($ins || $exe) { + printf("%3u %6u %6u\t%-20s\n", $cpu, $ins, $exe, $comm); + } + } + $cpu++; + } + + $cpu = 0; + print "\nworkqueue lifecycle stats:\n\n"; + printf("%3s %6s %6s\t%-20s\n", "cpu", "created", "destroyed", "name"); + printf("%3s %6s %6s\t%-20s\n", "---", "-------", "---------", "----"); + foreach my $pidhash (@cpus) { + while ((my $pid, my $wqhash) = each %$pidhash) { + my $created = $$wqhash{'created'}; + my $destroyed = $$wqhash{'destroyed'}; + my $comm = $$wqhash{'comm'}; + if ($created || $destroyed) { + printf("%3u %6u %6u\t%-20s\n", $cpu, $created, $destroyed, + $comm); + } + } + $cpu++; + } + + print_unhandled(); +} + +my %unhandled; + +sub print_unhandled +{ + if ((scalar keys %unhandled) == 0) { + return; + } + + print "\nunhandled events:\n\n"; + + printf("%-40s %10s\n", "event", "count"); + printf("%-40s %10s\n", "----------------------------------------", + "-----------"); + + foreach my $event_name (keys %unhandled) { + printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); + } +} + +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm) = @_; + + $unhandled{$event_name}++; +} diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 6f8ea9d210b..918eb376abe 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -1,10 +1,15 @@ -#ifndef CACHE_H -#define CACHE_H +#ifndef __PERF_CACHE_H +#define __PERF_CACHE_H #include "util.h" #include "strbuf.h" #include "../perf.h" +#define CMD_EXEC_PATH "--exec-path" +#define CMD_PERF_DIR "--perf-dir=" +#define CMD_WORK_TREE "--work-tree=" +#define CMD_DEBUGFS_DIR "--debugfs-dir=" + #define PERF_DIR_ENVIRONMENT "PERF_DIR" #define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" @@ -117,4 +122,4 @@ extern char *perf_pathdup(const char *fmt, ...) extern size_t strlcpy(char *dest, const char *src, size_t size); -#endif /* CACHE_H */ +#endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3b8380f1b47..b3b71258272 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -206,7 +206,7 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, } node->val_nr = chain->nr - start; if (!node->val_nr) - printf("Warning: empty node in callchain tree\n"); + pr_warning("Warning: empty node in callchain tree\n"); } static void diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 43cf3ea9e08..ad4626de4c2 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -58,4 +58,4 @@ static inline u64 cumul_hits(struct callchain_node *node) int register_callchain_param(struct callchain_param *param); void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); -#endif +#endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 58d597564b9..24e8809210b 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -1,5 +1,5 @@ -#ifndef COLOR_H -#define COLOR_H +#ifndef __PERF_COLOR_H +#define __PERF_COLOR_H /* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ #define COLOR_MAXLEN 24 @@ -39,4 +39,4 @@ int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *bu int percent_color_fprintf(FILE *fp, const char *fmt, double percent); const char *get_percent_color(double percent); -#endif /* COLOR_H */ +#endif /* __PERF_COLOR_H */ diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index 0b791bd346b..35073621e5d 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -29,3 +29,11 @@ unsigned char sane_ctype[256] = { A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ /* Nothing in the 128.. range */ }; + +const char *graph_line = + "_____________________________________________________________________" + "_____________________________________________________________________"; +const char *graph_dotted_line = + "---------------------------------------------------------------------" + "---------------------------------------------------------------------" + "---------------------------------------------------------------------"; diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c new file mode 100644 index 00000000000..ca0bedf637c --- /dev/null +++ b/tools/perf/util/data_map.c @@ -0,0 +1,291 @@ +#include "data_map.h" +#include "symbol.h" +#include "util.h" +#include "debug.h" + + +static struct perf_file_handler *curr_handler; +static unsigned long mmap_window = 32; +static char __cwd[PATH_MAX]; + +static int process_event_stub(event_t *event __used) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +void register_perf_file_handler(struct perf_file_handler *handler) +{ + if (!handler->process_sample_event) + handler->process_sample_event = process_event_stub; + if (!handler->process_mmap_event) + handler->process_mmap_event = process_event_stub; + if (!handler->process_comm_event) + handler->process_comm_event = process_event_stub; + if (!handler->process_fork_event) + handler->process_fork_event = process_event_stub; + if (!handler->process_exit_event) + handler->process_exit_event = process_event_stub; + if (!handler->process_lost_event) + handler->process_lost_event = process_event_stub; + if (!handler->process_read_event) + handler->process_read_event = process_event_stub; + if (!handler->process_throttle_event) + handler->process_throttle_event = process_event_stub; + if (!handler->process_unthrottle_event) + handler->process_unthrottle_event = process_event_stub; + + curr_handler = handler; +} + +static const char *event__name[] = { + [0] = "TOTAL", + [PERF_RECORD_MMAP] = "MMAP", + [PERF_RECORD_LOST] = "LOST", + [PERF_RECORD_COMM] = "COMM", + [PERF_RECORD_EXIT] = "EXIT", + [PERF_RECORD_THROTTLE] = "THROTTLE", + [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE", + [PERF_RECORD_FORK] = "FORK", + [PERF_RECORD_READ] = "READ", + [PERF_RECORD_SAMPLE] = "SAMPLE", +}; + +unsigned long event__total[PERF_RECORD_MAX]; + +void event__print_totals(void) +{ + int i; + for (i = 0; i < PERF_RECORD_MAX; ++i) + pr_info("%10s events: %10ld\n", + event__name[i], event__total[i]); +} + +static int +process_event(event_t *event, unsigned long offset, unsigned long head) +{ + trace_event(event); + + if (event->header.type < PERF_RECORD_MAX) { + dump_printf("%p [%p]: PERF_RECORD_%s", + (void *)(offset + head), + (void *)(long)(event->header.size), + event__name[event->header.type]); + ++event__total[0]; + ++event__total[event->header.type]; + } + + switch (event->header.type) { + case PERF_RECORD_SAMPLE: + return curr_handler->process_sample_event(event); + case PERF_RECORD_MMAP: + return curr_handler->process_mmap_event(event); + case PERF_RECORD_COMM: + return curr_handler->process_comm_event(event); + case PERF_RECORD_FORK: + return curr_handler->process_fork_event(event); + case PERF_RECORD_EXIT: + return curr_handler->process_exit_event(event); + case PERF_RECORD_LOST: + return curr_handler->process_lost_event(event); + case PERF_RECORD_READ: + return curr_handler->process_read_event(event); + case PERF_RECORD_THROTTLE: + return curr_handler->process_throttle_event(event); + case PERF_RECORD_UNTHROTTLE: + return curr_handler->process_unthrottle_event(event); + default: + curr_handler->total_unknown++; + return -1; + } +} + +int perf_header__read_build_ids(int input, off_t offset, off_t size) +{ + struct build_id_event bev; + char filename[PATH_MAX]; + off_t limit = offset + size; + int err = -1; + + while (offset < limit) { + struct dso *dso; + ssize_t len; + + if (read(input, &bev, sizeof(bev)) != sizeof(bev)) + goto out; + + len = bev.header.size - sizeof(bev); + if (read(input, filename, len) != len) + goto out; + + dso = dsos__findnew(filename); + if (dso != NULL) + dso__set_build_id(dso, &bev.build_id); + + offset += bev.header.size; + } + err = 0; +out: + return err; +} + +int mmap_dispatch_perf_file(struct perf_header **pheader, + const char *input_name, + int force, + int full_paths, + int *cwdlen, + char **cwd) +{ + int err; + struct perf_header *header; + unsigned long head, shift; + unsigned long offset = 0; + struct stat input_stat; + size_t page_size; + u64 sample_type; + event_t *event; + uint32_t size; + int input; + char *buf; + + if (curr_handler == NULL) { + pr_debug("Forgot to register perf file handler\n"); + return -EINVAL; + } + + page_size = getpagesize(); + + input = open(input_name, O_RDONLY); + if (input < 0) { + pr_err("Failed to open file: %s", input_name); + if (!strcmp(input_name, "perf.data")) + pr_err(" (try 'perf record' first)"); + pr_err("\n"); + return -errno; + } + + if (fstat(input, &input_stat) < 0) { + pr_err("failed to stat file"); + err = -errno; + goto out_close; + } + + err = -EACCES; + if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { + pr_err("file: %s not owned by current user or root\n", + input_name); + goto out_close; + } + + if (input_stat.st_size == 0) { + pr_info("zero-sized file, nothing to do!\n"); + goto done; + } + + err = -ENOMEM; + header = perf_header__new(); + if (header == NULL) + goto out_close; + + err = perf_header__read(header, input); + if (err < 0) + goto out_delete; + *pheader = header; + head = header->data_offset; + + sample_type = perf_header__sample_type(header); + + err = -EINVAL; + if (curr_handler->sample_type_check && + curr_handler->sample_type_check(sample_type) < 0) + goto out_delete; + + if (!full_paths) { + if (getcwd(__cwd, sizeof(__cwd)) == NULL) { + pr_err("failed to get the current directory\n"); + err = -errno; + goto out_delete; + } + *cwd = __cwd; + *cwdlen = strlen(*cwd); + } else { + *cwd = NULL; + *cwdlen = 0; + } + + shift = page_size * (head / page_size); + offset += shift; + head -= shift; + +remap: + buf = mmap(NULL, page_size * mmap_window, PROT_READ, + MAP_SHARED, input, offset); + if (buf == MAP_FAILED) { + pr_err("failed to mmap file\n"); + err = -errno; + goto out_delete; + } + +more: + event = (event_t *)(buf + head); + + size = event->header.size; + if (!size) + size = 8; + + if (head + event->header.size >= page_size * mmap_window) { + int munmap_ret; + + shift = page_size * (head / page_size); + + munmap_ret = munmap(buf, page_size * mmap_window); + assert(munmap_ret == 0); + + offset += shift; + head -= shift; + goto remap; + } + + size = event->header.size; + + dump_printf("\n%p [%p]: event: %d\n", + (void *)(offset + head), + (void *)(long)event->header.size, + event->header.type); + + if (!size || process_event(event, offset, head) < 0) { + + dump_printf("%p [%p]: skipping unknown header type: %d\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->header.type); + + /* + * assume we lost track of the stream, check alignment, and + * increment a single u64 in the hope to catch on again 'soon'. + */ + + if (unlikely(head & 7)) + head &= ~7ULL; + + size = 8; + } + + head += size; + + if (offset + head >= header->data_offset + header->data_size) + goto done; + + if (offset + head < (unsigned long)input_stat.st_size) + goto more; + +done: + err = 0; +out_close: + close(input); + + return err; +out_delete: + perf_header__delete(header); + goto out_close; +} diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h new file mode 100644 index 00000000000..3180ff7e363 --- /dev/null +++ b/tools/perf/util/data_map.h @@ -0,0 +1,32 @@ +#ifndef __PERF_DATAMAP_H +#define __PERF_DATAMAP_H + +#include "event.h" +#include "header.h" + +typedef int (*event_type_handler_t)(event_t *); + +struct perf_file_handler { + event_type_handler_t process_sample_event; + event_type_handler_t process_mmap_event; + event_type_handler_t process_comm_event; + event_type_handler_t process_fork_event; + event_type_handler_t process_exit_event; + event_type_handler_t process_lost_event; + event_type_handler_t process_read_event; + event_type_handler_t process_throttle_event; + event_type_handler_t process_unthrottle_event; + int (*sample_type_check)(u64 sample_type); + unsigned long total_unknown; +}; + +void register_perf_file_handler(struct perf_file_handler *handler); +int mmap_dispatch_perf_file(struct perf_header **pheader, + const char *input_name, + int force, + int full_paths, + int *cwdlen, + char **cwd); +int perf_header__read_build_ids(int input, off_t offset, off_t file_size); + +#endif diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index e8ca98fe0bd..28d520d5a1f 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -13,12 +13,12 @@ int verbose = 0; int dump_trace = 0; -int eprintf(const char *fmt, ...) +int eprintf(int level, const char *fmt, ...) { va_list args; int ret = 0; - if (verbose) { + if (verbose >= level) { va_start(args, fmt); ret = vfprintf(stderr, fmt, args); va_end(args); diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 437eea58ce4..c6c24c522de 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -1,8 +1,15 @@ /* For debugging general purposes */ +#ifndef __PERF_DEBUG_H +#define __PERF_DEBUG_H + +#include "event.h" extern int verbose; extern int dump_trace; -int eprintf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +int eprintf(int level, + const char *fmt, ...) __attribute__((format(printf, 2, 3))); int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); void trace_event(event_t *event); + +#endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c new file mode 100644 index 00000000000..06b73ee02c4 --- /dev/null +++ b/tools/perf/util/debugfs.c @@ -0,0 +1,241 @@ +#include "util.h" +#include "debugfs.h" +#include "cache.h" + +static int debugfs_premounted; +static char debugfs_mountpoint[MAX_PATH+1]; + +static const char *debugfs_known_mountpoints[] = { + "/sys/kernel/debug/", + "/debug/", + 0, +}; + +/* use this to force a umount */ +void debugfs_force_cleanup(void) +{ + debugfs_find_mountpoint(); + debugfs_premounted = 0; + debugfs_umount(); +} + +/* construct a full path to a debugfs element */ +int debugfs_make_path(const char *element, char *buffer, int size) +{ + int len; + + if (strlen(debugfs_mountpoint) == 0) { + buffer[0] = '\0'; + return -1; + } + + len = strlen(debugfs_mountpoint) + strlen(element) + 1; + if (len >= size) + return len+1; + + snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element); + return 0; +} + +static int debugfs_found; + +/* find the path to the mounted debugfs */ +const char *debugfs_find_mountpoint(void) +{ + const char **ptr; + char type[100]; + FILE *fp; + + if (debugfs_found) + return (const char *) debugfs_mountpoint; + + ptr = debugfs_known_mountpoints; + while (*ptr) { + if (debugfs_valid_mountpoint(*ptr) == 0) { + debugfs_found = 1; + strcpy(debugfs_mountpoint, *ptr); + return debugfs_mountpoint; + } + ptr++; + } + + /* give up and parse /proc/mounts */ + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + die("Can't open /proc/mounts for read"); + + while (fscanf(fp, "%*s %" + STR(MAX_PATH) + "s %99s %*s %*d %*d\n", + debugfs_mountpoint, type) == 2) { + if (strcmp(type, "debugfs") == 0) + break; + } + fclose(fp); + + if (strcmp(type, "debugfs") != 0) + return NULL; + + debugfs_found = 1; + + return debugfs_mountpoint; +} + +/* verify that a mountpoint is actually a debugfs instance */ + +int debugfs_valid_mountpoint(const char *debugfs) +{ + struct statfs st_fs; + + if (statfs(debugfs, &st_fs) < 0) + return -ENOENT; + else if (st_fs.f_type != (long) DEBUGFS_MAGIC) + return -ENOENT; + + return 0; +} + + +int debugfs_valid_entry(const char *path) +{ + struct stat st; + + if (stat(path, &st)) + return -errno; + + return 0; +} + +/* mount the debugfs somewhere */ + +int debugfs_mount(const char *mountpoint) +{ + char mountcmd[128]; + + /* see if it's already mounted */ + if (debugfs_find_mountpoint()) { + debugfs_premounted = 1; + return 0; + } + + /* if not mounted and no argument */ + if (mountpoint == NULL) { + /* see if environment variable set */ + mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT); + /* if no environment variable, use default */ + if (mountpoint == NULL) + mountpoint = "/sys/kernel/debug"; + } + + /* save the mountpoint */ + strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint)); + + /* mount it */ + snprintf(mountcmd, sizeof(mountcmd), + "/bin/mount -t debugfs debugfs %s", mountpoint); + return system(mountcmd); +} + +/* umount the debugfs */ + +int debugfs_umount(void) +{ + char umountcmd[128]; + int ret; + + /* if it was already mounted, leave it */ + if (debugfs_premounted) + return 0; + + /* make sure it's a valid mount point */ + ret = debugfs_valid_mountpoint(debugfs_mountpoint); + if (ret) + return ret; + + snprintf(umountcmd, sizeof(umountcmd), + "/bin/umount %s", debugfs_mountpoint); + return system(umountcmd); +} + +int debugfs_write(const char *entry, const char *value) +{ + char path[MAX_PATH+1]; + int ret, count; + int fd; + + /* construct the path */ + snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); + + /* verify that it exists */ + ret = debugfs_valid_entry(path); + if (ret) + return ret; + + /* get how many chars we're going to write */ + count = strlen(value); + + /* open the debugfs entry */ + fd = open(path, O_RDWR); + if (fd < 0) + return -errno; + + while (count > 0) { + /* write it */ + ret = write(fd, value, count); + if (ret <= 0) { + if (ret == EAGAIN) + continue; + close(fd); + return -errno; + } + count -= ret; + } + + /* close it */ + close(fd); + + /* return success */ + return 0; +} + +/* + * read a debugfs entry + * returns the number of chars read or a negative errno + */ +int debugfs_read(const char *entry, char *buffer, size_t size) +{ + char path[MAX_PATH+1]; + int ret; + int fd; + + /* construct the path */ + snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); + + /* verify that it exists */ + ret = debugfs_valid_entry(path); + if (ret) + return ret; + + /* open the debugfs entry */ + fd = open(path, O_RDONLY); + if (fd < 0) + return -errno; + + do { + /* read it */ + ret = read(fd, buffer, size); + if (ret == 0) { + close(fd); + return EOF; + } + } while (ret < 0 && errno == EAGAIN); + + /* close it */ + close(fd); + + /* make *sure* there's a null character at the end */ + buffer[ret] = '\0'; + + /* return the number of chars read */ + return ret; +} diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h new file mode 100644 index 00000000000..3cd14f9ae78 --- /dev/null +++ b/tools/perf/util/debugfs.h @@ -0,0 +1,25 @@ +#ifndef __DEBUGFS_H__ +#define __DEBUGFS_H__ + +#include <sys/mount.h> + +#ifndef MAX_PATH +# define MAX_PATH 256 +#endif + +#ifndef STR +# define _STR(x) #x +# define STR(x) _STR(x) +#endif + +extern const char *debugfs_find_mountpoint(void); +extern int debugfs_valid_mountpoint(const char *debugfs); +extern int debugfs_valid_entry(const char *path); +extern int debugfs_mount(const char *mountpoint); +extern int debugfs_umount(void); +extern int debugfs_write(const char *entry, const char *value); +extern int debugfs_read(const char *entry, char *buffer, size_t size); +extern void debugfs_force_cleanup(void); +extern int debugfs_make_path(const char *element, char *buffer, int size); + +#endif /* __DEBUGFS_H__ */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c new file mode 100644 index 00000000000..414b89d1bde --- /dev/null +++ b/tools/perf/util/event.c @@ -0,0 +1,312 @@ +#include <linux/types.h> +#include "event.h" +#include "debug.h" +#include "string.h" +#include "thread.h" + +static pid_t event__synthesize_comm(pid_t pid, int full, + int (*process)(event_t *event)) +{ + event_t ev; + char filename[PATH_MAX]; + char bf[BUFSIZ]; + FILE *fp; + size_t size = 0; + DIR *tasks; + struct dirent dirent, *next; + pid_t tgid = 0; + + snprintf(filename, sizeof(filename), "/proc/%d/status", pid); + + fp = fopen(filename, "r"); + if (fp == NULL) { +out_race: + /* + * We raced with a task exiting - just return: + */ + pr_debug("couldn't open %s\n", filename); + return 0; + } + + memset(&ev.comm, 0, sizeof(ev.comm)); + while (!ev.comm.comm[0] || !ev.comm.pid) { + if (fgets(bf, sizeof(bf), fp) == NULL) + goto out_failure; + + if (memcmp(bf, "Name:", 5) == 0) { + char *name = bf + 5; + while (*name && isspace(*name)) + ++name; + size = strlen(name) - 1; + memcpy(ev.comm.comm, name, size++); + } else if (memcmp(bf, "Tgid:", 5) == 0) { + char *tgids = bf + 5; + while (*tgids && isspace(*tgids)) + ++tgids; + tgid = ev.comm.pid = atoi(tgids); + } + } + + ev.comm.header.type = PERF_RECORD_COMM; + size = ALIGN(size, sizeof(u64)); + ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size); + + if (!full) { + ev.comm.tid = pid; + + process(&ev); + goto out_fclose; + } + + snprintf(filename, sizeof(filename), "/proc/%d/task", pid); + + tasks = opendir(filename); + if (tasks == NULL) + goto out_race; + + while (!readdir_r(tasks, &dirent, &next) && next) { + char *end; + pid = strtol(dirent.d_name, &end, 10); + if (*end) + continue; + + ev.comm.tid = pid; + + process(&ev); + } + closedir(tasks); + +out_fclose: + fclose(fp); + return tgid; + +out_failure: + pr_warning("couldn't get COMM and pgid, malformed %s\n", filename); + return -1; +} + +static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, + int (*process)(event_t *event)) +{ + char filename[PATH_MAX]; + FILE *fp; + + snprintf(filename, sizeof(filename), "/proc/%d/maps", pid); + + fp = fopen(filename, "r"); + if (fp == NULL) { + /* + * We raced with a task exiting - just return: + */ + pr_debug("couldn't open %s\n", filename); + return -1; + } + + while (1) { + char bf[BUFSIZ], *pbf = bf; + event_t ev = { + .header = { .type = PERF_RECORD_MMAP }, + }; + int n; + size_t size; + if (fgets(bf, sizeof(bf), fp) == NULL) + break; + + /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ + n = hex2u64(pbf, &ev.mmap.start); + if (n < 0) + continue; + pbf += n + 1; + n = hex2u64(pbf, &ev.mmap.len); + if (n < 0) + continue; + pbf += n + 3; + if (*pbf == 'x') { /* vm_exec */ + char *execname = strchr(bf, '/'); + + /* Catch VDSO */ + if (execname == NULL) + execname = strstr(bf, "[vdso]"); + + if (execname == NULL) + continue; + + size = strlen(execname); + execname[size - 1] = '\0'; /* Remove \n */ + memcpy(ev.mmap.filename, execname, size); + size = ALIGN(size, sizeof(u64)); + ev.mmap.len -= ev.mmap.start; + ev.mmap.header.size = (sizeof(ev.mmap) - + (sizeof(ev.mmap.filename) - size)); + ev.mmap.pid = tgid; + ev.mmap.tid = pid; + + process(&ev); + } + } + + fclose(fp); + return 0; +} + +int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)) +{ + pid_t tgid = event__synthesize_comm(pid, 1, process); + if (tgid == -1) + return -1; + return event__synthesize_mmap_events(pid, tgid, process); +} + +void event__synthesize_threads(int (*process)(event_t *event)) +{ + DIR *proc; + struct dirent dirent, *next; + + proc = opendir("/proc"); + + while (!readdir_r(proc, &dirent, &next) && next) { + char *end; + pid_t pid = strtol(dirent.d_name, &end, 10); + + if (*end) /* only interested in proper numerical dirents */ + continue; + + event__synthesize_thread(pid, process); + } + + closedir(proc); +} + +char *event__cwd; +int event__cwdlen; + +struct events_stats event__stats; + +int event__process_comm(event_t *self) +{ + struct thread *thread = threads__findnew(self->comm.pid); + + dump_printf(": %s:%d\n", self->comm.comm, self->comm.pid); + + if (thread == NULL || thread__set_comm(thread, self->comm.comm)) { + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); + return -1; + } + + return 0; +} + +int event__process_lost(event_t *self) +{ + dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost); + event__stats.lost += self->lost.lost; + return 0; +} + +int event__process_mmap(event_t *self) +{ + struct thread *thread = threads__findnew(self->mmap.pid); + struct map *map = map__new(&self->mmap, MAP__FUNCTION, + event__cwd, event__cwdlen); + + dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n", + self->mmap.pid, self->mmap.tid, + (void *)(long)self->mmap.start, + (void *)(long)self->mmap.len, + (void *)(long)self->mmap.pgoff, + self->mmap.filename); + + if (thread == NULL || map == NULL) + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); + else + thread__insert_map(thread, map); + + return 0; +} + +int event__process_task(event_t *self) +{ + struct thread *thread = threads__findnew(self->fork.pid); + struct thread *parent = threads__findnew(self->fork.ppid); + + dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid, + self->fork.ppid, self->fork.ptid); + /* + * A thread clone will have the same PID for both parent and child. + */ + if (thread == parent) + return 0; + + if (self->header.type == PERF_RECORD_EXIT) + return 0; + + if (thread == NULL || parent == NULL || + thread__fork(thread, parent) < 0) { + dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); + return -1; + } + + return 0; +} + +void thread__find_addr_location(struct thread *self, u8 cpumode, + enum map_type type, u64 addr, + struct addr_location *al, + symbol_filter_t filter) +{ + struct thread *thread = al->thread = self; + + al->addr = addr; + + if (cpumode & PERF_RECORD_MISC_KERNEL) { + al->level = 'k'; + thread = kthread; + } else if (cpumode & PERF_RECORD_MISC_USER) + al->level = '.'; + else { + al->level = 'H'; + al->map = NULL; + al->sym = NULL; + return; + } +try_again: + al->map = thread__find_map(thread, type, al->addr); + if (al->map == NULL) { + /* + * If this is outside of all known maps, and is a negative + * address, try to look it up in the kernel dso, as it might be + * a vsyscall or vdso (which executes in user-mode). + * + * XXX This is nasty, we should have a symbol list in the + * "[vdso]" dso, but for now lets use the old trick of looking + * in the whole kernel symbol list. + */ + if ((long long)al->addr < 0 && thread != kthread) { + thread = kthread; + goto try_again; + } + al->sym = NULL; + } else { + al->addr = al->map->map_ip(al->map, al->addr); + al->sym = map__find_symbol(al->map, al->addr, filter); + } +} + +int event__preprocess_sample(const event_t *self, struct addr_location *al, + symbol_filter_t filter) +{ + u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct thread *thread = threads__findnew(self->ip.pid); + + if (thread == NULL) + return -1; + + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + self->ip.ip, al, filter); + dump_printf(" ...... dso: %s\n", + al->map ? al->map->dso->long_name : + al->level == 'H' ? "[hypervisor]" : "<not found>"); + return 0; +} diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 2c9c26d6ded..a4cc8105cf6 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -1,14 +1,10 @@ #ifndef __PERF_RECORD_H #define __PERF_RECORD_H + #include "../perf.h" #include "util.h" #include <linux/list.h> - -enum { - SHOW_KERNEL = 1, - SHOW_USER = 2, - SHOW_HV = 4, -}; +#include <linux/rbtree.h> /* * PERF_SAMPLE_IP | PERF_SAMPLE_TID | * @@ -65,6 +61,13 @@ struct sample_event{ u64 array[]; }; +#define BUILD_ID_SIZE 20 + +struct build_id_event { + struct perf_event_header header; + u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; + char filename[]; +}; typedef union event_union { struct perf_event_header header; @@ -77,12 +80,30 @@ typedef union event_union { struct sample_event sample; } event_t; +struct events_stats { + unsigned long total; + unsigned long lost; +}; + +void event__print_totals(void); + +enum map_type { + MAP__FUNCTION = 0, + + MAP__NR_TYPES, +}; + struct map { - struct list_head node; + union { + struct rb_node rb_node; + struct list_head node; + }; u64 start; u64 end; + enum map_type type; u64 pgoff; u64 (*map_ip)(struct map *, u64); + u64 (*unmap_ip)(struct map *, u64); struct dso *dso; }; @@ -91,14 +112,48 @@ static inline u64 map__map_ip(struct map *map, u64 ip) return ip - map->start + map->pgoff; } -static inline u64 vdso__map_ip(struct map *map __used, u64 ip) +static inline u64 map__unmap_ip(struct map *map, u64 ip) +{ + return ip + map->start - map->pgoff; +} + +static inline u64 identity__map_ip(struct map *map __used, u64 ip) { return ip; } -struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen); +struct symbol; + +typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); + +void map__init(struct map *self, enum map_type type, + u64 start, u64 end, u64 pgoff, struct dso *dso); +struct map *map__new(struct mmap_event *event, enum map_type, + char *cwd, int cwdlen); +void map__delete(struct map *self); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); +struct symbol *map__find_symbol(struct map *self, u64 addr, + symbol_filter_t filter); +void map__fixup_start(struct map *self); +void map__fixup_end(struct map *self); + +int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)); +void event__synthesize_threads(int (*process)(event_t *event)); + +extern char *event__cwd; +extern int event__cwdlen; +extern struct events_stats event__stats; +extern unsigned long event__total[PERF_RECORD_MAX]; + +int event__process_comm(event_t *self); +int event__process_lost(event_t *self); +int event__process_mmap(event_t *self); +int event__process_task(event_t *self); + +struct addr_location; +int event__preprocess_sample(const event_t *self, struct addr_location *al, + symbol_filter_t filter); -#endif +#endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h index effe25eb154..31647ac92ed 100644 --- a/tools/perf/util/exec_cmd.h +++ b/tools/perf/util/exec_cmd.h @@ -1,5 +1,5 @@ -#ifndef PERF_EXEC_CMD_H -#define PERF_EXEC_CMD_H +#ifndef __PERF_EXEC_CMD_H +#define __PERF_EXEC_CMD_H extern void perf_set_argv_exec_path(const char *exec_path); extern const char *perf_extract_argv0_path(const char *path); @@ -10,4 +10,4 @@ extern int execv_perf_cmd(const char **argv); /* NULL terminated */ extern int execl_perf_cmd(const char *cmd, ...); extern const char *system_path(const char *path); -#endif /* PERF_EXEC_CMD_H */ +#endif /* __PERF_EXEC_CMD_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e306857b2c2..4805e6dfd23 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2,9 +2,15 @@ #include <unistd.h> #include <stdio.h> #include <stdlib.h> +#include <linux/list.h> #include "util.h" #include "header.h" +#include "../perf.h" +#include "trace-event.h" +#include "symbol.h" +#include "data_map.h" +#include "debug.h" /* * Create new perf.data header attribute: @@ -13,32 +19,43 @@ struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr) { struct perf_header_attr *self = malloc(sizeof(*self)); - if (!self) - die("nomem"); - - self->attr = *attr; - self->ids = 0; - self->size = 1; - self->id = malloc(sizeof(u64)); - - if (!self->id) - die("nomem"); + if (self != NULL) { + self->attr = *attr; + self->ids = 0; + self->size = 1; + self->id = malloc(sizeof(u64)); + if (self->id == NULL) { + free(self); + self = NULL; + } + } return self; } -void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) +void perf_header_attr__delete(struct perf_header_attr *self) +{ + free(self->id); + free(self); +} + +int perf_header_attr__add_id(struct perf_header_attr *self, u64 id) { int pos = self->ids; self->ids++; if (self->ids > self->size) { - self->size *= 2; - self->id = realloc(self->id, self->size * sizeof(u64)); - if (!self->id) - die("nomem"); + int nsize = self->size * 2; + u64 *nid = realloc(self->id, nsize * sizeof(u64)); + + if (nid == NULL) + return -1; + + self->size = nsize; + self->id = nid; } self->id[pos] = id; + return 0; } /* @@ -46,42 +63,52 @@ void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) */ struct perf_header *perf_header__new(void) { - struct perf_header *self = malloc(sizeof(*self)); + struct perf_header *self = zalloc(sizeof(*self)); - if (!self) - die("nomem"); + if (self != NULL) { + self->size = 1; + self->attr = malloc(sizeof(void *)); - self->frozen = 0; + if (self->attr == NULL) { + free(self); + self = NULL; + } + } - self->attrs = 0; - self->size = 1; - self->attr = malloc(sizeof(void *)); + return self; +} - if (!self->attr) - die("nomem"); +void perf_header__delete(struct perf_header *self) +{ + int i; - self->data_offset = 0; - self->data_size = 0; + for (i = 0; i < self->attrs; ++i) + perf_header_attr__delete(self->attr[i]); - return self; + free(self->attr); + free(self); } -void perf_header__add_attr(struct perf_header *self, - struct perf_header_attr *attr) +int perf_header__add_attr(struct perf_header *self, + struct perf_header_attr *attr) { - int pos = self->attrs; - if (self->frozen) - die("frozen"); + return -1; - self->attrs++; - if (self->attrs > self->size) { - self->size *= 2; - self->attr = realloc(self->attr, self->size * sizeof(void *)); - if (!self->attr) - die("nomem"); + if (self->attrs == self->size) { + int nsize = self->size * 2; + struct perf_header_attr **nattr; + + nattr = realloc(self->attr, nsize * sizeof(void *)); + if (nattr == NULL) + return -1; + + self->size = nsize; + self->attr = nattr; } - self->attr[pos] = attr; + + self->attr[self->attrs++] = attr; + return 0; } #define MAX_EVENT_NAME 64 @@ -97,7 +124,7 @@ static struct perf_trace_event_type *events; void perf_header__push_event(u64 id, const char *name) { if (strlen(name) > MAX_EVENT_NAME) - printf("Event %s will be truncated\n", name); + pr_warning("Event %s will be truncated\n", name); if (!events) { events = malloc(sizeof(struct perf_trace_event_type)); @@ -128,44 +155,137 @@ static const char *__perf_magic = "PERFFILE"; #define PERF_MAGIC (*(u64 *)__perf_magic) -struct perf_file_section { - u64 offset; - u64 size; -}; - struct perf_file_attr { struct perf_event_attr attr; struct perf_file_section ids; }; -struct perf_file_header { - u64 magic; - u64 size; - u64 attr_size; - struct perf_file_section attrs; - struct perf_file_section data; - struct perf_file_section event_types; -}; +void perf_header__set_feat(struct perf_header *self, int feat) +{ + set_bit(feat, self->adds_features); +} -static void do_write(int fd, void *buf, size_t size) +bool perf_header__has_feat(const struct perf_header *self, int feat) +{ + return test_bit(feat, self->adds_features); +} + +static int do_write(int fd, const void *buf, size_t size) { while (size) { int ret = write(fd, buf, size); if (ret < 0) - die("failed to write"); + return -errno; size -= ret; buf += ret; } + + return 0; +} + +static int __dsos__write_buildid_table(struct list_head *head, int fd) +{ + struct dso *pos; + + list_for_each_entry(pos, head, node) { + int err; + struct build_id_event b; + size_t len; + + if (!pos->has_build_id) + continue; + len = pos->long_name_len + 1; + len = ALIGN(len, 64); + memset(&b, 0, sizeof(b)); + memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); + b.header.size = sizeof(b) + len; + err = do_write(fd, &b, sizeof(b)); + if (err < 0) + return err; + err = do_write(fd, pos->long_name, len); + if (err < 0) + return err; + } + + return 0; } -void perf_header__write(struct perf_header *self, int fd) +static int dsos__write_buildid_table(int fd) +{ + int err = __dsos__write_buildid_table(&dsos__kernel, fd); + if (err == 0) + err = __dsos__write_buildid_table(&dsos__user, fd); + return err; +} + +static int perf_header__adds_write(struct perf_header *self, int fd) +{ + int nr_sections; + struct perf_file_section *feat_sec; + int sec_size; + u64 sec_start; + int idx = 0, err; + + if (dsos__read_build_ids()) + perf_header__set_feat(self, HEADER_BUILD_ID); + + nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); + if (!nr_sections) + return 0; + + feat_sec = calloc(sizeof(*feat_sec), nr_sections); + if (feat_sec == NULL) + return -ENOMEM; + + sec_size = sizeof(*feat_sec) * nr_sections; + + sec_start = self->data_offset + self->data_size; + lseek(fd, sec_start + sec_size, SEEK_SET); + + if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { + struct perf_file_section *trace_sec; + + trace_sec = &feat_sec[idx++]; + + /* Write trace info */ + trace_sec->offset = lseek(fd, 0, SEEK_CUR); + read_tracing_data(fd, attrs, nr_counters); + trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; + } + + + if (perf_header__has_feat(self, HEADER_BUILD_ID)) { + struct perf_file_section *buildid_sec; + + buildid_sec = &feat_sec[idx++]; + + /* Write build-ids */ + buildid_sec->offset = lseek(fd, 0, SEEK_CUR); + err = dsos__write_buildid_table(fd); + if (err < 0) { + pr_debug("failed to write buildid table\n"); + goto out_free; + } + buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; + } + + lseek(fd, sec_start, SEEK_SET); + err = do_write(fd, feat_sec, sec_size); + if (err < 0) + pr_debug("failed to write feature section\n"); +out_free: + free(feat_sec); + return err; +} + +int perf_header__write(struct perf_header *self, int fd, bool at_exit) { struct perf_file_header f_header; struct perf_file_attr f_attr; struct perf_header_attr *attr; - int i; + int i, err; lseek(fd, sizeof(f_header), SEEK_SET); @@ -174,7 +294,11 @@ void perf_header__write(struct perf_header *self, int fd) attr = self->attr[i]; attr->id_offset = lseek(fd, 0, SEEK_CUR); - do_write(fd, attr->id, attr->ids * sizeof(u64)); + err = do_write(fd, attr->id, attr->ids * sizeof(u64)); + if (err < 0) { + pr_debug("failed to write perf header\n"); + return err; + } } @@ -190,17 +314,31 @@ void perf_header__write(struct perf_header *self, int fd) .size = attr->ids * sizeof(u64), } }; - do_write(fd, &f_attr, sizeof(f_attr)); + err = do_write(fd, &f_attr, sizeof(f_attr)); + if (err < 0) { + pr_debug("failed to write perf header attribute\n"); + return err; + } } self->event_offset = lseek(fd, 0, SEEK_CUR); self->event_size = event_count * sizeof(struct perf_trace_event_type); - if (events) - do_write(fd, events, self->event_size); - + if (events) { + err = do_write(fd, events, self->event_size); + if (err < 0) { + pr_debug("failed to write perf header events\n"); + return err; + } + } self->data_offset = lseek(fd, 0, SEEK_CUR); + if (at_exit) { + err = perf_header__adds_write(self, fd); + if (err < 0) + return err; + } + f_header = (struct perf_file_header){ .magic = PERF_MAGIC, .size = sizeof(f_header), @@ -219,11 +357,18 @@ void perf_header__write(struct perf_header *self, int fd) }, }; + memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features)); + lseek(fd, 0, SEEK_SET); - do_write(fd, &f_header, sizeof(f_header)); + err = do_write(fd, &f_header, sizeof(f_header)); + if (err < 0) { + pr_debug("failed to write perf header\n"); + return err; + } lseek(fd, self->data_offset + self->data_size, SEEK_SET); self->frozen = 1; + return 0; } static void do_read(int fd, void *buf, size_t size) @@ -241,22 +386,109 @@ static void do_read(int fd, void *buf, size_t size) } } -struct perf_header *perf_header__read(int fd) +int perf_header__process_sections(struct perf_header *self, int fd, + int (*process)(struct perf_file_section *self, + int feat, int fd)) +{ + struct perf_file_section *feat_sec; + int nr_sections; + int sec_size; + int idx = 0; + int err = 0, feat = 1; + + nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); + if (!nr_sections) + return 0; + + feat_sec = calloc(sizeof(*feat_sec), nr_sections); + if (!feat_sec) + return -1; + + sec_size = sizeof(*feat_sec) * nr_sections; + + lseek(fd, self->data_offset + self->data_size, SEEK_SET); + + do_read(fd, feat_sec, sec_size); + + while (idx < nr_sections && feat < HEADER_LAST_FEATURE) { + if (perf_header__has_feat(self, feat)) { + struct perf_file_section *sec = &feat_sec[idx++]; + + err = process(sec, feat, fd); + if (err < 0) + break; + } + ++feat; + } + + free(feat_sec); + return err; +}; + +int perf_file_header__read(struct perf_file_header *self, + struct perf_header *ph, int fd) +{ + lseek(fd, 0, SEEK_SET); + do_read(fd, self, sizeof(*self)); + + if (self->magic != PERF_MAGIC || + self->attr_size != sizeof(struct perf_file_attr)) + return -1; + + if (self->size != sizeof(*self)) { + /* Support the previous format */ + if (self->size == offsetof(typeof(*self), adds_features)) + bitmap_zero(self->adds_features, HEADER_FEAT_BITS); + else + return -1; + } + + memcpy(&ph->adds_features, &self->adds_features, + sizeof(self->adds_features)); + + ph->event_offset = self->event_types.offset; + ph->event_size = self->event_types.size; + ph->data_offset = self->data.offset; + ph->data_size = self->data.size; + return 0; +} + +static int perf_file_section__process(struct perf_file_section *self, + int feat, int fd) +{ + if (lseek(fd, self->offset, SEEK_SET) < 0) { + pr_debug("Failed to lseek to %Ld offset for feature %d, " + "continuing...\n", self->offset, feat); + return 0; + } + + switch (feat) { + case HEADER_TRACE_INFO: + trace_report(fd); + break; + + case HEADER_BUILD_ID: + if (perf_header__read_build_ids(fd, self->offset, self->size)) + pr_debug("Failed to read buildids, continuing...\n"); + break; + default: + pr_debug("unknown feature %d, continuing...\n", feat); + } + + return 0; +} + +int perf_header__read(struct perf_header *self, int fd) { - struct perf_header *self = perf_header__new(); struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; - int nr_attrs, nr_ids, i, j; - lseek(fd, 0, SEEK_SET); - do_read(fd, &f_header, sizeof(f_header)); - - if (f_header.magic != PERF_MAGIC || - f_header.size != sizeof(f_header) || - f_header.attr_size != sizeof(f_attr)) - die("incompatible file format"); + if (perf_file_header__read(&f_header, self, fd) < 0) { + pr_debug("incompatible file format\n"); + return -EINVAL; + } nr_attrs = f_header.attrs.size / sizeof(f_attr); lseek(fd, f_header.attrs.offset, SEEK_SET); @@ -269,6 +501,8 @@ struct perf_header *perf_header__read(int fd) tmp = lseek(fd, 0, SEEK_CUR); attr = perf_header_attr__new(&f_attr.attr); + if (attr == NULL) + return -ENOMEM; nr_ids = f_attr.ids.size / sizeof(u64); lseek(fd, f_attr.ids.offset, SEEK_SET); @@ -276,31 +510,34 @@ struct perf_header *perf_header__read(int fd) for (j = 0; j < nr_ids; j++) { do_read(fd, &f_id, sizeof(f_id)); - perf_header_attr__add_id(attr, f_id); + if (perf_header_attr__add_id(attr, f_id) < 0) { + perf_header_attr__delete(attr); + return -ENOMEM; + } } - perf_header__add_attr(self, attr); + if (perf_header__add_attr(self, attr) < 0) { + perf_header_attr__delete(attr); + return -ENOMEM; + } + lseek(fd, tmp, SEEK_SET); } if (f_header.event_types.size) { lseek(fd, f_header.event_types.offset, SEEK_SET); events = malloc(f_header.event_types.size); - if (!events) - die("nomem"); + if (events == NULL) + return -ENOMEM; do_read(fd, events, f_header.event_types.size); event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } - self->event_offset = f_header.event_types.offset; - self->event_size = f_header.event_types.size; - self->data_offset = f_header.data.offset; - self->data_size = f_header.data.size; + perf_header__process_sections(self, fd, perf_file_section__process); lseek(fd, self->data_offset, SEEK_SET); self->frozen = 1; - - return self; + return 0; } u64 perf_header__sample_type(struct perf_header *header) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index a0761bc7863..d1dbe2b79c4 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -1,10 +1,13 @@ -#ifndef _PERF_HEADER_H -#define _PERF_HEADER_H +#ifndef __PERF_HEADER_H +#define __PERF_HEADER_H #include "../../../include/linux/perf_event.h" #include <sys/types.h> +#include <stdbool.h> #include "types.h" +#include <linux/bitmap.h> + struct perf_header_attr { struct perf_event_attr attr; int ids, size; @@ -12,36 +15,71 @@ struct perf_header_attr { off_t id_offset; }; +enum { + HEADER_TRACE_INFO = 1, + HEADER_BUILD_ID, + HEADER_LAST_FEATURE, +}; + +#define HEADER_FEAT_BITS 256 + +struct perf_file_section { + u64 offset; + u64 size; +}; + +struct perf_file_header { + u64 magic; + u64 size; + u64 attr_size; + struct perf_file_section attrs; + struct perf_file_section data; + struct perf_file_section event_types; + DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); +}; + +struct perf_header; + +int perf_file_header__read(struct perf_file_header *self, + struct perf_header *ph, int fd); + struct perf_header { - int frozen; - int attrs, size; + int frozen; + int attrs, size; struct perf_header_attr **attr; - s64 attr_offset; - u64 data_offset; - u64 data_size; - u64 event_offset; - u64 event_size; + s64 attr_offset; + u64 data_offset; + u64 data_size; + u64 event_offset; + u64 event_size; + DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); }; -struct perf_header *perf_header__read(int fd); -void perf_header__write(struct perf_header *self, int fd); +struct perf_header *perf_header__new(void); +void perf_header__delete(struct perf_header *self); -void perf_header__add_attr(struct perf_header *self, - struct perf_header_attr *attr); +int perf_header__read(struct perf_header *self, int fd); +int perf_header__write(struct perf_header *self, int fd, bool at_exit); + +int perf_header__add_attr(struct perf_header *self, + struct perf_header_attr *attr); void perf_header__push_event(u64 id, const char *name); char *perf_header__find_event(u64 id); +struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr); +void perf_header_attr__delete(struct perf_header_attr *self); -struct perf_header_attr * -perf_header_attr__new(struct perf_event_attr *attr); -void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); +int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header); +void perf_header__set_feat(struct perf_header *self, int feat); +bool perf_header__has_feat(const struct perf_header *self, int feat); +int perf_header__process_sections(struct perf_header *self, int fd, + int (*process)(struct perf_file_section *self, + int feat, int fd)); -struct perf_header *perf_header__new(void); - -#endif /* _PERF_HEADER_H */ +#endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h index 7128783637b..7f5c6dedd71 100644 --- a/tools/perf/util/help.h +++ b/tools/perf/util/help.h @@ -1,5 +1,5 @@ -#ifndef HELP_H -#define HELP_H +#ifndef __PERF_HELP_H +#define __PERF_HELP_H struct cmdnames { size_t alloc; @@ -26,4 +26,4 @@ int is_in_cmdlist(struct cmdnames *c, const char *s); void list_commands(const char *title, struct cmdnames *main_cmds, struct cmdnames *other_cmds); -#endif /* HELP_H */ +#endif /* __PERF_HELP_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c new file mode 100644 index 00000000000..0ebf6ee16ca --- /dev/null +++ b/tools/perf/util/hist.c @@ -0,0 +1,202 @@ +#include "hist.h" + +struct rb_root hist; +struct rb_root collapse_hists; +struct rb_root output_hists; +int callchain; + +struct callchain_param callchain_param = { + .mode = CHAIN_GRAPH_REL, + .min_percent = 0.5 +}; + +/* + * histogram, sorted on item, collects counts + */ + +struct hist_entry *__hist_entry__add(struct addr_location *al, + struct symbol *sym_parent, + u64 count, bool *hit) +{ + struct rb_node **p = &hist.rb_node; + struct rb_node *parent = NULL; + struct hist_entry *he; + struct hist_entry entry = { + .thread = al->thread, + .map = al->map, + .sym = al->sym, + .ip = al->addr, + .level = al->level, + .count = count, + .parent = sym_parent, + }; + int cmp; + + while (*p != NULL) { + parent = *p; + he = rb_entry(parent, struct hist_entry, rb_node); + + cmp = hist_entry__cmp(&entry, he); + + if (!cmp) { + *hit = true; + return he; + } + + if (cmp < 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + he = malloc(sizeof(*he)); + if (!he) + return NULL; + *he = entry; + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, &hist); + *hit = false; + return he; +} + +int64_t +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct sort_entry *se; + int64_t cmp = 0; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + cmp = se->cmp(left, right); + if (cmp) + break; + } + + return cmp; +} + +int64_t +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +{ + struct sort_entry *se; + int64_t cmp = 0; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + int64_t (*f)(struct hist_entry *, struct hist_entry *); + + f = se->collapse ?: se->cmp; + + cmp = f(left, right); + if (cmp) + break; + } + + return cmp; +} + +void hist_entry__free(struct hist_entry *he) +{ + free(he); +} + +/* + * collapse the histogram + */ + +void collapse__insert_entry(struct hist_entry *he) +{ + struct rb_node **p = &collapse_hists.rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + int64_t cmp; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + cmp = hist_entry__collapse(iter, he); + + if (!cmp) { + iter->count += he->count; + hist_entry__free(he); + return; + } + + if (cmp < 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, &collapse_hists); +} + +void collapse__resort(void) +{ + struct rb_node *next; + struct hist_entry *n; + + if (!sort__need_collapse) + return; + + next = rb_first(&hist); + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + next = rb_next(&n->rb_node); + + rb_erase(&n->rb_node, &hist); + collapse__insert_entry(n); + } +} + +/* + * reverse the map, sort on count. + */ + +void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits) +{ + struct rb_node **p = &output_hists.rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + + if (callchain) + callchain_param.sort(&he->sorted_chain, &he->callchain, + min_callchain_hits, &callchain_param); + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + if (he->count > iter->count) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, &output_hists); +} + +void output__resort(u64 total_samples) +{ + struct rb_node *next; + struct hist_entry *n; + struct rb_root *tree = &hist; + u64 min_callchain_hits; + + min_callchain_hits = + total_samples * (callchain_param.min_percent / 100); + + if (sort__need_collapse) + tree = &collapse_hists; + + next = rb_first(tree); + + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + next = rb_next(&n->rb_node); + + rb_erase(&n->rb_node, tree); + output__insert_entry(n, min_callchain_hits); + } +} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h new file mode 100644 index 00000000000..3020db0c929 --- /dev/null +++ b/tools/perf/util/hist.h @@ -0,0 +1,50 @@ +#ifndef __PERF_HIST_H +#define __PERF_HIST_H +#include "../builtin.h" + +#include "util.h" + +#include "color.h" +#include <linux/list.h> +#include "cache.h" +#include <linux/rbtree.h> +#include "symbol.h" +#include "string.h" +#include "callchain.h" +#include "strlist.h" +#include "values.h" + +#include "../perf.h" +#include "debug.h" +#include "header.h" + +#include "parse-options.h" +#include "parse-events.h" + +#include "thread.h" +#include "sort.h" + +extern struct rb_root hist; +extern struct rb_root collapse_hists; +extern struct rb_root output_hists; +extern int callchain; +extern struct callchain_param callchain_param; +extern unsigned long total; +extern unsigned long total_mmap; +extern unsigned long total_comm; +extern unsigned long total_fork; +extern unsigned long total_unknown; +extern unsigned long total_lost; + +struct hist_entry *__hist_entry__add(struct addr_location *al, + struct symbol *parent, + u64 count, bool *hit); +extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); +extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); +extern void hist_entry__free(struct hist_entry *); +extern void collapse__insert_entry(struct hist_entry *); +extern void collapse__resort(void); +extern void output__insert_entry(struct hist_entry *, u64); +extern void output__resort(u64); + +#endif /* __PERF_HIST_H */ diff --git a/tools/perf/util/include/asm/asm-offsets.h b/tools/perf/util/include/asm/asm-offsets.h new file mode 100644 index 00000000000..ed538942523 --- /dev/null +++ b/tools/perf/util/include/asm/asm-offsets.h @@ -0,0 +1 @@ +/* stub */ diff --git a/tools/perf/util/include/asm/bitops.h b/tools/perf/util/include/asm/bitops.h new file mode 100644 index 00000000000..58e9817ffae --- /dev/null +++ b/tools/perf/util/include/asm/bitops.h @@ -0,0 +1,18 @@ +#ifndef _PERF_ASM_BITOPS_H_ +#define _PERF_ASM_BITOPS_H_ + +#include <sys/types.h> +#include "../../types.h" +#include <linux/compiler.h> + +/* CHECKME: Not sure both always match */ +#define BITS_PER_LONG __WORDSIZE + +#include "../../../../include/asm-generic/bitops/__fls.h" +#include "../../../../include/asm-generic/bitops/fls.h" +#include "../../../../include/asm-generic/bitops/fls64.h" +#include "../../../../include/asm-generic/bitops/__ffs.h" +#include "../../../../include/asm-generic/bitops/ffz.h" +#include "../../../../include/asm-generic/bitops/hweight.h" + +#endif diff --git a/tools/perf/util/include/asm/bug.h b/tools/perf/util/include/asm/bug.h new file mode 100644 index 00000000000..7fcc6810adc --- /dev/null +++ b/tools/perf/util/include/asm/bug.h @@ -0,0 +1,22 @@ +#ifndef _PERF_ASM_GENERIC_BUG_H +#define _PERF_ASM_GENERIC_BUG_H + +#define __WARN_printf(arg...) do { fprintf(stderr, arg); } while (0) + +#define WARN(condition, format...) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + __WARN_printf(format); \ + unlikely(__ret_warn_on); \ +}) + +#define WARN_ONCE(condition, format...) ({ \ + static int __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once)) \ + if (WARN(!__warned, format)) \ + __warned = 1; \ + unlikely(__ret_warn_once); \ +}) +#endif diff --git a/tools/perf/util/include/asm/byteorder.h b/tools/perf/util/include/asm/byteorder.h new file mode 100644 index 00000000000..b722abe3a62 --- /dev/null +++ b/tools/perf/util/include/asm/byteorder.h @@ -0,0 +1,2 @@ +#include <asm/types.h> +#include "../../../../include/linux/swab.h" diff --git a/tools/perf/util/include/asm/swab.h b/tools/perf/util/include/asm/swab.h new file mode 100644 index 00000000000..ed538942523 --- /dev/null +++ b/tools/perf/util/include/asm/swab.h @@ -0,0 +1 @@ +/* stub */ diff --git a/tools/perf/util/include/asm/uaccess.h b/tools/perf/util/include/asm/uaccess.h new file mode 100644 index 00000000000..d0f72b8fcc3 --- /dev/null +++ b/tools/perf/util/include/asm/uaccess.h @@ -0,0 +1,14 @@ +#ifndef _PERF_ASM_UACCESS_H_ +#define _PERF_ASM_UACCESS_H_ + +#define __get_user(src, dest) \ +({ \ + (src) = *dest; \ + 0; \ +}) + +#define get_user __get_user + +#define access_ok(type, addr, size) 1 + +#endif diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h new file mode 100644 index 00000000000..94507639a8c --- /dev/null +++ b/tools/perf/util/include/linux/bitmap.h @@ -0,0 +1,3 @@ +#include "../../../../include/linux/bitmap.h" +#include "../../../../include/asm-generic/bitops/find.h" +#include <linux/errno.h> diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h new file mode 100644 index 00000000000..8d63116e943 --- /dev/null +++ b/tools/perf/util/include/linux/bitops.h @@ -0,0 +1,29 @@ +#ifndef _PERF_LINUX_BITOPS_H_ +#define _PERF_LINUX_BITOPS_H_ + +#define __KERNEL__ + +#define CONFIG_GENERIC_FIND_NEXT_BIT +#define CONFIG_GENERIC_FIND_FIRST_BIT +#include "../../../../include/linux/bitops.h" + +#undef __KERNEL__ + +static inline void set_bit(int nr, unsigned long *addr) +{ + addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); +} + +static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) +{ + return ((1UL << (nr % BITS_PER_LONG)) & + (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; +} + +unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned + long size, unsigned long offset); + +unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned + long size, unsigned long offset); + +#endif diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h new file mode 100644 index 00000000000..dfb0713ed47 --- /dev/null +++ b/tools/perf/util/include/linux/compiler.h @@ -0,0 +1,10 @@ +#ifndef _PERF_LINUX_COMPILER_H_ +#define _PERF_LINUX_COMPILER_H_ + +#ifndef __always_inline +#define __always_inline inline +#endif +#define __user +#define __attribute_const__ + +#endif diff --git a/tools/perf/util/include/linux/ctype.h b/tools/perf/util/include/linux/ctype.h new file mode 100644 index 00000000000..a53d4ee1e0b --- /dev/null +++ b/tools/perf/util/include/linux/ctype.h @@ -0,0 +1 @@ +#include "../util.h" diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index a6b87390cb5..21c0274c02f 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -1,6 +1,16 @@ #ifndef PERF_LINUX_KERNEL_H_ #define PERF_LINUX_KERNEL_H_ +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) +#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) + #ifndef offsetof #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif @@ -26,4 +36,70 @@ _max1 > _max2 ? _max1 : _max2; }) #endif +#ifndef min +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) +#endif + +#ifndef BUG_ON +#define BUG_ON(cond) assert(!(cond)) +#endif + +/* + * Both need more care to handle endianness + * (Don't use bitmap_copy_le() for now) + */ +#define cpu_to_le64(x) (x) +#define cpu_to_le32(x) (x) + +static inline int +vscnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int i; + ssize_t ssize = size; + + i = vsnprintf(buf, size, fmt, args); + + return (i >= ssize) ? (ssize - 1) : i; +} + +static inline int scnprintf(char * buf, size_t size, const char * fmt, ...) +{ + va_list args; + ssize_t ssize = size; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + + return (i >= ssize) ? (ssize - 1) : i; +} + +static inline unsigned long +simple_strtoul(const char *nptr, char **endptr, int base) +{ + return strtoul(nptr, endptr, base); +} + +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#define pr_err(fmt, ...) \ + do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) +#define pr_warning(fmt, ...) \ + do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) +#define pr_info(fmt, ...) \ + do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) +#define pr_debug(fmt, ...) \ + eprintf(1, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_debugN(n, fmt, ...) \ + eprintf(n, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__) + #endif diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h new file mode 100644 index 00000000000..3b2f5900276 --- /dev/null +++ b/tools/perf/util/include/linux/string.h @@ -0,0 +1 @@ +#include <string.h> diff --git a/tools/perf/util/include/linux/types.h b/tools/perf/util/include/linux/types.h new file mode 100644 index 00000000000..196862a81a2 --- /dev/null +++ b/tools/perf/util/include/linux/types.h @@ -0,0 +1,9 @@ +#ifndef _PERF_LINUX_TYPES_H_ +#define _PERF_LINUX_TYPES_H_ + +#include <asm/types.h> + +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +#endif diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h index 0173abeef52..b0fcb6d8a88 100644 --- a/tools/perf/util/levenshtein.h +++ b/tools/perf/util/levenshtein.h @@ -1,8 +1,8 @@ -#ifndef LEVENSHTEIN_H -#define LEVENSHTEIN_H +#ifndef __PERF_LEVENSHTEIN_H +#define __PERF_LEVENSHTEIN_H int levenshtein(const char *string1, const char *string2, int swap_penalty, int substition_penalty, int insertion_penalty, int deletion_penalty); -#endif +#endif /* __PERF_LEVENSHTEIN_H */ diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 804e0238273..69f94fe9db2 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -3,6 +3,7 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> +#include "debug.h" static inline int is_anon_memory(const char *filename) { @@ -19,13 +20,28 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen) return n; } - struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen) +void map__init(struct map *self, enum map_type type, + u64 start, u64 end, u64 pgoff, struct dso *dso) +{ + self->type = type; + self->start = start; + self->end = end; + self->pgoff = pgoff; + self->dso = dso; + self->map_ip = map__map_ip; + self->unmap_ip = map__unmap_ip; + RB_CLEAR_NODE(&self->rb_node); +} + +struct map *map__new(struct mmap_event *event, enum map_type type, + char *cwd, int cwdlen) { struct map *self = malloc(sizeof(*self)); if (self != NULL) { const char *filename = event->filename; char newfilename[PATH_MAX]; + struct dso *dso; int anon; if (cwd) { @@ -45,18 +61,15 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen) filename = newfilename; } - self->start = event->start; - self->end = event->start + event->len; - self->pgoff = event->pgoff; - - self->dso = dsos__findnew(filename); - if (self->dso == NULL) + dso = dsos__findnew(filename); + if (dso == NULL) goto out_delete; + map__init(self, type, event->start, event->start + event->len, + event->pgoff, dso); + if (self->dso == vdso || anon) - self->map_ip = vdso__map_ip; - else - self->map_ip = map__map_ip; + self->map_ip = self->unmap_ip = identity__map_ip; } return self; out_delete: @@ -64,6 +77,72 @@ out_delete: return NULL; } +void map__delete(struct map *self) +{ + free(self); +} + +void map__fixup_start(struct map *self) +{ + struct rb_root *symbols = &self->dso->symbols[self->type]; + struct rb_node *nd = rb_first(symbols); + if (nd != NULL) { + struct symbol *sym = rb_entry(nd, struct symbol, rb_node); + self->start = sym->start; + } +} + +void map__fixup_end(struct map *self) +{ + struct rb_root *symbols = &self->dso->symbols[self->type]; + struct rb_node *nd = rb_last(symbols); + if (nd != NULL) { + struct symbol *sym = rb_entry(nd, struct symbol, rb_node); + self->end = sym->end; + } +} + +#define DSO__DELETED "(deleted)" + +struct symbol *map__find_symbol(struct map *self, u64 addr, + symbol_filter_t filter) +{ + if (!dso__loaded(self->dso, self->type)) { + int nr = dso__load(self->dso, self, filter); + + if (nr < 0) { + if (self->dso->has_build_id) { + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + + build_id__sprintf(self->dso->build_id, + sizeof(self->dso->build_id), + sbuild_id); + pr_warning("%s with build id %s not found", + self->dso->long_name, sbuild_id); + } else + pr_warning("Failed to open %s", + self->dso->long_name); + pr_warning(", continuing without symbols\n"); + return NULL; + } else if (nr == 0) { + const char *name = self->dso->long_name; + const size_t len = strlen(name); + const size_t real_len = len - sizeof(DSO__DELETED); + + if (len > sizeof(DSO__DELETED) && + strcmp(name + real_len + 1, DSO__DELETED) == 0) { + pr_warning("%.*s was updated, restart the long running apps that use it!\n", + (int)real_len, name); + } else { + pr_warning("no symbols found in %s, maybe install a debug package?\n", name); + } + return NULL; + } + } + + return self->dso->find_symbol(self->dso, self->type, addr); +} + struct map *map__clone(struct map *self) { struct map *map = malloc(sizeof(*self)); diff --git a/tools/perf/util/module.c b/tools/perf/util/module.c deleted file mode 100644 index 0d8c85defcd..00000000000 --- a/tools/perf/util/module.c +++ /dev/null @@ -1,545 +0,0 @@ -#include "util.h" -#include "../perf.h" -#include "string.h" -#include "module.h" - -#include <libelf.h> -#include <libgen.h> -#include <gelf.h> -#include <elf.h> -#include <dirent.h> -#include <sys/utsname.h> - -static unsigned int crc32(const char *p, unsigned int len) -{ - int i; - unsigned int crc = 0; - - while (len--) { - crc ^= *p++; - for (i = 0; i < 8; i++) - crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0); - } - return crc; -} - -/* module section methods */ - -struct sec_dso *sec_dso__new_dso(const char *name) -{ - struct sec_dso *self = malloc(sizeof(*self) + strlen(name) + 1); - - if (self != NULL) { - strcpy(self->name, name); - self->secs = RB_ROOT; - self->find_section = sec_dso__find_section; - } - - return self; -} - -static void sec_dso__delete_section(struct section *self) -{ - free(((void *)self)); -} - -void sec_dso__delete_sections(struct sec_dso *self) -{ - struct section *pos; - struct rb_node *next = rb_first(&self->secs); - - while (next) { - pos = rb_entry(next, struct section, rb_node); - next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, &self->secs); - sec_dso__delete_section(pos); - } -} - -void sec_dso__delete_self(struct sec_dso *self) -{ - sec_dso__delete_sections(self); - free(self); -} - -static void sec_dso__insert_section(struct sec_dso *self, struct section *sec) -{ - struct rb_node **p = &self->secs.rb_node; - struct rb_node *parent = NULL; - const u64 hash = sec->hash; - struct section *s; - - while (*p != NULL) { - parent = *p; - s = rb_entry(parent, struct section, rb_node); - if (hash < s->hash) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&sec->rb_node, parent, p); - rb_insert_color(&sec->rb_node, &self->secs); -} - -struct section *sec_dso__find_section(struct sec_dso *self, const char *name) -{ - struct rb_node *n; - u64 hash; - int len; - - if (self == NULL) - return NULL; - - len = strlen(name); - hash = crc32(name, len); - - n = self->secs.rb_node; - - while (n) { - struct section *s = rb_entry(n, struct section, rb_node); - - if (hash < s->hash) - n = n->rb_left; - else if (hash > s->hash) - n = n->rb_right; - else { - if (!strcmp(name, s->name)) - return s; - else - n = rb_next(&s->rb_node); - } - } - - return NULL; -} - -static size_t sec_dso__fprintf_section(struct section *self, FILE *fp) -{ - return fprintf(fp, "name:%s vma:%llx path:%s\n", - self->name, self->vma, self->path); -} - -size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp) -{ - size_t ret = fprintf(fp, "dso: %s\n", self->name); - - struct rb_node *nd; - for (nd = rb_first(&self->secs); nd; nd = rb_next(nd)) { - struct section *pos = rb_entry(nd, struct section, rb_node); - ret += sec_dso__fprintf_section(pos, fp); - } - - return ret; -} - -static struct section *section__new(const char *name, const char *path) -{ - struct section *self = calloc(1, sizeof(*self)); - - if (!self) - goto out_failure; - - self->name = calloc(1, strlen(name) + 1); - if (!self->name) - goto out_failure; - - self->path = calloc(1, strlen(path) + 1); - if (!self->path) - goto out_failure; - - strcpy(self->name, name); - strcpy(self->path, path); - self->hash = crc32(self->name, strlen(name)); - - return self; - -out_failure: - if (self) { - if (self->name) - free(self->name); - if (self->path) - free(self->path); - free(self); - } - - return NULL; -} - -/* module methods */ - -struct mod_dso *mod_dso__new_dso(const char *name) -{ - struct mod_dso *self = malloc(sizeof(*self) + strlen(name) + 1); - - if (self != NULL) { - strcpy(self->name, name); - self->mods = RB_ROOT; - self->find_module = mod_dso__find_module; - } - - return self; -} - -static void mod_dso__delete_module(struct module *self) -{ - free(((void *)self)); -} - -void mod_dso__delete_modules(struct mod_dso *self) -{ - struct module *pos; - struct rb_node *next = rb_first(&self->mods); - - while (next) { - pos = rb_entry(next, struct module, rb_node); - next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, &self->mods); - mod_dso__delete_module(pos); - } -} - -void mod_dso__delete_self(struct mod_dso *self) -{ - mod_dso__delete_modules(self); - free(self); -} - -static void mod_dso__insert_module(struct mod_dso *self, struct module *mod) -{ - struct rb_node **p = &self->mods.rb_node; - struct rb_node *parent = NULL; - const u64 hash = mod->hash; - struct module *m; - - while (*p != NULL) { - parent = *p; - m = rb_entry(parent, struct module, rb_node); - if (hash < m->hash) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&mod->rb_node, parent, p); - rb_insert_color(&mod->rb_node, &self->mods); -} - -struct module *mod_dso__find_module(struct mod_dso *self, const char *name) -{ - struct rb_node *n; - u64 hash; - int len; - - if (self == NULL) - return NULL; - - len = strlen(name); - hash = crc32(name, len); - - n = self->mods.rb_node; - - while (n) { - struct module *m = rb_entry(n, struct module, rb_node); - - if (hash < m->hash) - n = n->rb_left; - else if (hash > m->hash) - n = n->rb_right; - else { - if (!strcmp(name, m->name)) - return m; - else - n = rb_next(&m->rb_node); - } - } - - return NULL; -} - -static size_t mod_dso__fprintf_module(struct module *self, FILE *fp) -{ - return fprintf(fp, "name:%s path:%s\n", self->name, self->path); -} - -size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp) -{ - struct rb_node *nd; - size_t ret; - - ret = fprintf(fp, "dso: %s\n", self->name); - - for (nd = rb_first(&self->mods); nd; nd = rb_next(nd)) { - struct module *pos = rb_entry(nd, struct module, rb_node); - - ret += mod_dso__fprintf_module(pos, fp); - } - - return ret; -} - -static struct module *module__new(const char *name, const char *path) -{ - struct module *self = calloc(1, sizeof(*self)); - - if (!self) - goto out_failure; - - self->name = calloc(1, strlen(name) + 1); - if (!self->name) - goto out_failure; - - self->path = calloc(1, strlen(path) + 1); - if (!self->path) - goto out_failure; - - strcpy(self->name, name); - strcpy(self->path, path); - self->hash = crc32(self->name, strlen(name)); - - return self; - -out_failure: - if (self) { - if (self->name) - free(self->name); - if (self->path) - free(self->path); - free(self); - } - - return NULL; -} - -static int mod_dso__load_sections(struct module *mod) -{ - int count = 0, path_len; - struct dirent *entry; - char *line = NULL; - char *dir_path; - DIR *dir; - size_t n; - - path_len = strlen("/sys/module/"); - path_len += strlen(mod->name); - path_len += strlen("/sections/"); - - dir_path = calloc(1, path_len + 1); - if (dir_path == NULL) - goto out_failure; - - strcat(dir_path, "/sys/module/"); - strcat(dir_path, mod->name); - strcat(dir_path, "/sections/"); - - dir = opendir(dir_path); - if (dir == NULL) - goto out_free; - - while ((entry = readdir(dir))) { - struct section *section; - char *path, *vma; - int line_len; - FILE *file; - - if (!strcmp(".", entry->d_name) || !strcmp("..", entry->d_name)) - continue; - - path = calloc(1, path_len + strlen(entry->d_name) + 1); - if (path == NULL) - break; - strcat(path, dir_path); - strcat(path, entry->d_name); - - file = fopen(path, "r"); - if (file == NULL) { - free(path); - break; - } - - line_len = getline(&line, &n, file); - if (line_len < 0) { - free(path); - fclose(file); - break; - } - - if (!line) { - free(path); - fclose(file); - break; - } - - line[--line_len] = '\0'; /* \n */ - - vma = strstr(line, "0x"); - if (!vma) { - free(path); - fclose(file); - break; - } - vma += 2; - - section = section__new(entry->d_name, path); - if (!section) { - fprintf(stderr, "load_sections: allocation error\n"); - free(path); - fclose(file); - break; - } - - hex2u64(vma, §ion->vma); - sec_dso__insert_section(mod->sections, section); - - free(path); - fclose(file); - count++; - } - - closedir(dir); - free(line); - free(dir_path); - - return count; - -out_free: - free(dir_path); - -out_failure: - return count; -} - -static int mod_dso__load_module_paths(struct mod_dso *self) -{ - struct utsname uts; - int count = 0, len, err = -1; - char *line = NULL; - FILE *file; - char *dpath, *dir; - size_t n; - - if (uname(&uts) < 0) - return err; - - len = strlen("/lib/modules/"); - len += strlen(uts.release); - len += strlen("/modules.dep"); - - dpath = calloc(1, len + 1); - if (dpath == NULL) - return err; - - strcat(dpath, "/lib/modules/"); - strcat(dpath, uts.release); - strcat(dpath, "/modules.dep"); - - file = fopen(dpath, "r"); - if (file == NULL) - goto out_failure; - - dir = dirname(dpath); - if (!dir) - goto out_failure; - strcat(dir, "/"); - - while (!feof(file)) { - struct module *module; - char *name, *path, *tmp; - FILE *modfile; - int line_len; - - line_len = getline(&line, &n, file); - if (line_len < 0) - break; - - if (!line) - break; - - line[--line_len] = '\0'; /* \n */ - - path = strchr(line, ':'); - if (!path) - break; - *path = '\0'; - - path = strdup(line); - if (!path) - break; - - if (!strstr(path, dir)) { - if (strncmp(path, "kernel/", 7)) - break; - - free(path); - path = calloc(1, strlen(dir) + strlen(line) + 1); - if (!path) - break; - strcat(path, dir); - strcat(path, line); - } - - modfile = fopen(path, "r"); - if (modfile == NULL) - break; - fclose(modfile); - - name = strdup(path); - if (!name) - break; - - name = strtok(name, "/"); - tmp = name; - - while (tmp) { - tmp = strtok(NULL, "/"); - if (tmp) - name = tmp; - } - - name = strsep(&name, "."); - if (!name) - break; - - /* Quirk: replace '-' with '_' in all modules */ - for (len = strlen(name); len; len--) { - if (*(name+len) == '-') - *(name+len) = '_'; - } - - module = module__new(name, path); - if (!module) - break; - mod_dso__insert_module(self, module); - - module->sections = sec_dso__new_dso("sections"); - if (!module->sections) - break; - - module->active = mod_dso__load_sections(module); - - if (module->active > 0) - count++; - } - - if (feof(file)) - err = count; - else - fprintf(stderr, "load_module_paths: modules.dep parsing failure!\n"); - -out_failure: - if (dpath) - free(dpath); - if (file) - fclose(file); - if (line) - free(line); - - return err; -} - -int mod_dso__load_modules(struct mod_dso *dso) -{ - int err; - - err = mod_dso__load_module_paths(dso); - - return err; -} diff --git a/tools/perf/util/module.h b/tools/perf/util/module.h deleted file mode 100644 index 8a592ef641c..00000000000 --- a/tools/perf/util/module.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef _PERF_MODULE_ -#define _PERF_MODULE_ 1 - -#include <linux/types.h> -#include "../types.h" -#include <linux/list.h> -#include <linux/rbtree.h> - -struct section { - struct rb_node rb_node; - u64 hash; - u64 vma; - char *name; - char *path; -}; - -struct sec_dso { - struct list_head node; - struct rb_root secs; - struct section *(*find_section)(struct sec_dso *, const char *name); - char name[0]; -}; - -struct module { - struct rb_node rb_node; - u64 hash; - char *name; - char *path; - struct sec_dso *sections; - int active; -}; - -struct mod_dso { - struct list_head node; - struct rb_root mods; - struct module *(*find_module)(struct mod_dso *, const char *name); - char name[0]; -}; - -struct sec_dso *sec_dso__new_dso(const char *name); -void sec_dso__delete_sections(struct sec_dso *self); -void sec_dso__delete_self(struct sec_dso *self); -size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp); -struct section *sec_dso__find_section(struct sec_dso *self, const char *name); - -struct mod_dso *mod_dso__new_dso(const char *name); -void mod_dso__delete_modules(struct mod_dso *self); -void mod_dso__delete_self(struct mod_dso *self); -size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp); -struct module *mod_dso__find_module(struct mod_dso *self, const char *name); -int mod_dso__load_modules(struct mod_dso *dso); - -#endif /* _PERF_MODULE_ */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8cfb48cbbea..9e5dbd66d34 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,4 +1,4 @@ - +#include "../../../include/linux/hw_breakpoint.h" #include "util.h" #include "../perf.h" #include "parse-options.h" @@ -7,10 +7,12 @@ #include "string.h" #include "cache.h" #include "header.h" +#include "debugfs.h" -int nr_counters; +int nr_counters; struct perf_event_attr attrs[MAX_COUNTERS]; +char *filters[MAX_COUNTERS]; struct event_symbol { u8 type; @@ -46,6 +48,8 @@ static struct event_symbol event_symbols[] = { { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, + { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, + { CSW(EMULATION_FAULTS), "emulation-faults", "" }, }; #define __PERF_EVENT_FIELD(config, name) \ @@ -74,6 +78,8 @@ static const char *sw_event_names[] = { "CPU-migrations", "minor-faults", "major-faults", + "alignment-faults", + "emulation-faults", }; #define MAX_ALIASES 8 @@ -148,16 +154,6 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) #define MAX_EVENT_LENGTH 512 -int valid_debugfs_mount(const char *debugfs) -{ - struct statfs st_fs; - - if (statfs(debugfs, &st_fs) < 0) - return -ENOENT; - else if (st_fs.f_type != (long) DEBUGFS_MAGIC) - return -ENOENT; - return 0; -} struct tracepoint_path *tracepoint_id_to_path(u64 config) { @@ -170,7 +166,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (valid_debugfs_mount(debugfs_path)) + if (debugfs_valid_mountpoint(debugfs_path)) return NULL; sys_dir = opendir(debugfs_path); @@ -201,7 +197,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) if (id == config) { closedir(evt_dir); closedir(sys_dir); - path = calloc(1, sizeof(path)); + path = zalloc(sizeof(path)); path->system = malloc(MAX_EVENT_LENGTH); if (!path->system) { free(path); @@ -509,7 +505,7 @@ static enum event_result parse_tracepoint_event(const char **strp, char sys_name[MAX_EVENT_LENGTH]; unsigned int sys_length, evt_length; - if (valid_debugfs_mount(debugfs_path)) + if (debugfs_valid_mountpoint(debugfs_path)) return 0; evt_name = strchr(*strp, ':'); @@ -544,6 +540,81 @@ static enum event_result parse_tracepoint_event(const char **strp, attr, strp); } +static enum event_result +parse_breakpoint_type(const char *type, const char **strp, + struct perf_event_attr *attr) +{ + int i; + + for (i = 0; i < 3; i++) { + if (!type[i]) + break; + + switch (type[i]) { + case 'r': + attr->bp_type |= HW_BREAKPOINT_R; + break; + case 'w': + attr->bp_type |= HW_BREAKPOINT_W; + break; + case 'x': + attr->bp_type |= HW_BREAKPOINT_X; + break; + default: + return EVT_FAILED; + } + } + if (!attr->bp_type) /* Default */ + attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; + + *strp = type + i; + + return EVT_HANDLED; +} + +static enum event_result +parse_breakpoint_event(const char **strp, struct perf_event_attr *attr) +{ + const char *target; + const char *type; + char *endaddr; + u64 addr; + enum event_result err; + + target = strchr(*strp, ':'); + if (!target) + return EVT_FAILED; + + if (strncmp(*strp, "mem", target - *strp) != 0) + return EVT_FAILED; + + target++; + + addr = strtoull(target, &endaddr, 0); + if (target == endaddr) + return EVT_FAILED; + + attr->bp_addr = addr; + *strp = endaddr; + + type = strchr(target, ':'); + + /* If no type is defined, just rw as default */ + if (!type) { + attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; + } else { + err = parse_breakpoint_type(++type, strp, attr); + if (err == EVT_FAILED) + return EVT_FAILED; + } + + /* We should find a nice way to override the access type */ + attr->bp_len = HW_BREAKPOINT_LEN_4; + attr->type = PERF_TYPE_BREAKPOINT; + + return EVT_HANDLED; +} + static int check_events(const char *str, unsigned int i) { int n; @@ -677,6 +748,12 @@ parse_event_symbols(const char **str, struct perf_event_attr *attr) if (ret != EVT_FAILED) goto modifier; + ret = parse_breakpoint_event(str, attr); + if (ret != EVT_FAILED) + goto modifier; + + fprintf(stderr, "invalid or unsupported event: '%s'\n", *str); + fprintf(stderr, "Run 'perf list' for a list of valid events\n"); return EVT_FAILED; modifier: @@ -708,7 +785,6 @@ static void store_event_type(const char *orgname) perf_header__push_event(id, orgname); } - int parse_events(const struct option *opt __used, const char *str, int unset __used) { struct perf_event_attr attr; @@ -745,6 +821,28 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u return 0; } +int parse_filter(const struct option *opt __used, const char *str, + int unset __used) +{ + int i = nr_counters - 1; + int len = strlen(str); + + if (i < 0 || attrs[i].type != PERF_TYPE_TRACEPOINT) { + fprintf(stderr, + "-F option should follow a -e tracepoint option\n"); + return -1; + } + + filters[i] = malloc(len + 1); + if (!filters[i]) { + fprintf(stderr, "not enough memory to hold filter string\n"); + return -1; + } + strcpy(filters[i], str); + + return 0; +} + static const char * const event_type_descriptors[] = { "", "Hardware event", @@ -764,7 +862,7 @@ static void print_tracepoint_events(void) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (valid_debugfs_mount(debugfs_path)) + if (debugfs_valid_mountpoint(debugfs_path)) return; sys_dir = opendir(debugfs_path); @@ -782,7 +880,7 @@ static void print_tracepoint_events(void) for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); - fprintf(stderr, " %-42s [%s]\n", evt_path, + printf(" %-42s [%s]\n", evt_path, event_type_descriptors[PERF_TYPE_TRACEPOINT+1]); } closedir(evt_dir); @@ -799,8 +897,8 @@ void print_events(void) unsigned int i, type, op, prev_type = -1; char name[40]; - fprintf(stderr, "\n"); - fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); + printf("\n"); + printf("List of pre-defined events (to be used in -e):\n"); for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { type = syms->type + 1; @@ -808,19 +906,19 @@ void print_events(void) type = 0; if (type != prev_type) - fprintf(stderr, "\n"); + printf("\n"); if (strlen(syms->alias)) sprintf(name, "%s OR %s", syms->symbol, syms->alias); else strcpy(name, syms->symbol); - fprintf(stderr, " %-42s [%s]\n", name, + printf(" %-42s [%s]\n", name, event_type_descriptors[type]); prev_type = type; } - fprintf(stderr, "\n"); + printf("\n"); for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ @@ -828,17 +926,20 @@ void print_events(void) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - fprintf(stderr, " %-42s [%s]\n", + printf(" %-42s [%s]\n", event_cache_name(type, op, i), event_type_descriptors[4]); } } } - fprintf(stderr, "\n"); - fprintf(stderr, " %-42s [raw hardware event descriptor]\n", + printf("\n"); + printf(" %-42s [raw hardware event descriptor]\n", "rNNN"); - fprintf(stderr, "\n"); + printf("\n"); + + printf(" %-42s [hardware breakpoint]\n", "mem:<addr>[:access]"); + printf("\n"); print_tracepoint_events(); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 30c60811284..b8c1f64bc93 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -1,5 +1,5 @@ -#ifndef _PARSE_EVENTS_H -#define _PARSE_EVENTS_H +#ifndef __PERF_PARSE_EVENTS_H +#define __PERF_PARSE_EVENTS_H /* * Parse symbolic events/counts passed in as options: */ @@ -17,11 +17,13 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config); extern int nr_counters; extern struct perf_event_attr attrs[MAX_COUNTERS]; +extern char *filters[MAX_COUNTERS]; extern const char *event_name(int ctr); extern const char *__event_name(int type, u64 config); extern int parse_events(const struct option *opt, const char *str, int unset); +extern int parse_filter(const struct option *opt, const char *str, int unset); #define EVENTS_HELP_MAX (128*1024) @@ -31,4 +33,4 @@ extern char debugfs_path[]; extern int valid_debugfs_mount(const char *debugfs); -#endif /* _PARSE_EVENTS_H */ +#endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 2ee248ff27e..948805af43c 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -1,5 +1,5 @@ -#ifndef PARSE_OPTIONS_H -#define PARSE_OPTIONS_H +#ifndef __PERF_PARSE_OPTIONS_H +#define __PERF_PARSE_OPTIONS_H enum parse_opt_type { /* special types */ @@ -174,4 +174,4 @@ extern int parse_opt_verbosity_cb(const struct option *, const char *, int); extern const char *parse_options_fix_filename(const char *prefix, const char *file); -#endif +#endif /* __PERF_PARSE_OPTIONS_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c new file mode 100644 index 00000000000..cd7fbda5e2a --- /dev/null +++ b/tools/perf/util/probe-event.c @@ -0,0 +1,484 @@ +/* + * probe-event.c : perf-probe definition to kprobe_events format converter + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#define _GNU_SOURCE +#include <sys/utsname.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <limits.h> + +#undef _GNU_SOURCE +#include "event.h" +#include "string.h" +#include "strlist.h" +#include "debug.h" +#include "parse-events.h" /* For debugfs_path */ +#include "probe-event.h" + +#define MAX_CMDLEN 256 +#define MAX_PROBE_ARGS 128 +#define PERFPROBE_GROUP "probe" + +#define semantic_error(msg ...) die("Semantic error :" msg) + +/* If there is no space to write, returns -E2BIG. */ +static int e_snprintf(char *str, size_t size, const char *format, ...) +{ + int ret; + va_list ap; + va_start(ap, format); + ret = vsnprintf(str, size, format, ap); + va_end(ap); + if (ret >= (int)size) + ret = -E2BIG; + return ret; +} + +/* Parse probepoint definition. */ +static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) +{ + char *ptr, *tmp; + char c, nc = 0; + /* + * <Syntax> + * perf probe SRC:LN + * perf probe FUNC[+OFFS|%return][@SRC] + */ + + ptr = strpbrk(arg, ":+@%"); + if (ptr) { + nc = *ptr; + *ptr++ = '\0'; + } + + /* Check arg is function or file and copy it */ + if (strchr(arg, '.')) /* File */ + pp->file = strdup(arg); + else /* Function */ + pp->function = strdup(arg); + DIE_IF(pp->file == NULL && pp->function == NULL); + + /* Parse other options */ + while (ptr) { + arg = ptr; + c = nc; + ptr = strpbrk(arg, ":+@%"); + if (ptr) { + nc = *ptr; + *ptr++ = '\0'; + } + switch (c) { + case ':': /* Line number */ + pp->line = strtoul(arg, &tmp, 0); + if (*tmp != '\0') + semantic_error("There is non-digit charactor" + " in line number."); + break; + case '+': /* Byte offset from a symbol */ + pp->offset = strtoul(arg, &tmp, 0); + if (*tmp != '\0') + semantic_error("There is non-digit charactor" + " in offset."); + break; + case '@': /* File name */ + if (pp->file) + semantic_error("SRC@SRC is not allowed."); + pp->file = strdup(arg); + DIE_IF(pp->file == NULL); + if (ptr) + semantic_error("@SRC must be the last " + "option."); + break; + case '%': /* Probe places */ + if (strcmp(arg, "return") == 0) { + pp->retprobe = 1; + } else /* Others not supported yet */ + semantic_error("%%%s is not supported.", arg); + break; + default: + DIE_IF("Program has a bug."); + break; + } + } + + /* Exclusion check */ + if (pp->line && pp->offset) + semantic_error("Offset can't be used with line number."); + + if (!pp->line && pp->file && !pp->function) + semantic_error("File always requires line number."); + + if (pp->offset && !pp->function) + semantic_error("Offset requires an entry function."); + + if (pp->retprobe && !pp->function) + semantic_error("Return probe requires an entry function."); + + if ((pp->offset || pp->line) && pp->retprobe) + semantic_error("Offset/Line can't be used with return probe."); + + pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n", + pp->function, pp->file, pp->line, pp->offset, pp->retprobe); +} + +/* Parse perf-probe event definition */ +int parse_perf_probe_event(const char *str, struct probe_point *pp) +{ + char **argv; + int argc, i, need_dwarf = 0; + + argv = argv_split(str, &argc); + if (!argv) + die("argv_split failed."); + if (argc > MAX_PROBE_ARGS + 1) + semantic_error("Too many arguments"); + + /* Parse probe point */ + parse_perf_probe_probepoint(argv[0], pp); + if (pp->file || pp->line) + need_dwarf = 1; + + /* Copy arguments and ensure return probe has no C argument */ + pp->nr_args = argc - 1; + pp->args = zalloc(sizeof(char *) * pp->nr_args); + for (i = 0; i < pp->nr_args; i++) { + pp->args[i] = strdup(argv[i + 1]); + if (!pp->args[i]) + die("Failed to copy argument."); + if (is_c_varname(pp->args[i])) { + if (pp->retprobe) + semantic_error("You can't specify local" + " variable for kretprobe"); + need_dwarf = 1; + } + } + + argv_free(argv); + return need_dwarf; +} + +/* Parse kprobe_events event into struct probe_point */ +void parse_trace_kprobe_event(const char *str, char **group, char **event, + struct probe_point *pp) +{ + char pr; + char *p; + int ret, i, argc; + char **argv; + + pr_debug("Parsing kprobe_events: %s\n", str); + argv = argv_split(str, &argc); + if (!argv) + die("argv_split failed."); + if (argc < 2) + semantic_error("Too less arguments."); + + /* Scan event and group name. */ + ret = sscanf(argv[0], "%c:%a[^/ \t]/%a[^ \t]", + &pr, (float *)(void *)group, (float *)(void *)event); + if (ret != 3) + semantic_error("Failed to parse event name: %s", argv[0]); + pr_debug("Group:%s Event:%s probe:%c\n", *group, *event, pr); + + if (!pp) + goto end; + + pp->retprobe = (pr == 'r'); + + /* Scan function name and offset */ + ret = sscanf(argv[1], "%a[^+]+%d", (float *)(void *)&pp->function, &pp->offset); + if (ret == 1) + pp->offset = 0; + + /* kprobe_events doesn't have this information */ + pp->line = 0; + pp->file = NULL; + + pp->nr_args = argc - 2; + pp->args = zalloc(sizeof(char *) * pp->nr_args); + for (i = 0; i < pp->nr_args; i++) { + p = strchr(argv[i + 2], '='); + if (p) /* We don't need which register is assigned. */ + *p = '\0'; + pp->args[i] = strdup(argv[i + 2]); + if (!pp->args[i]) + die("Failed to copy argument."); + } + +end: + argv_free(argv); +} + +int synthesize_perf_probe_event(struct probe_point *pp) +{ + char *buf; + char offs[64] = "", line[64] = ""; + int i, len, ret; + + pp->probes[0] = buf = zalloc(MAX_CMDLEN); + if (!buf) + die("Failed to allocate memory by zalloc."); + if (pp->offset) { + ret = e_snprintf(offs, 64, "+%d", pp->offset); + if (ret <= 0) + goto error; + } + if (pp->line) { + ret = e_snprintf(line, 64, ":%d", pp->line); + if (ret <= 0) + goto error; + } + + if (pp->function) + ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function, + offs, pp->retprobe ? "%return" : "", line); + else + ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->file, line); + if (ret <= 0) + goto error; + len = ret; + + for (i = 0; i < pp->nr_args; i++) { + ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s", + pp->args[i]); + if (ret <= 0) + goto error; + len += ret; + } + pp->found = 1; + + return pp->found; +error: + free(pp->probes[0]); + + return ret; +} + +int synthesize_trace_kprobe_event(struct probe_point *pp) +{ + char *buf; + int i, len, ret; + + pp->probes[0] = buf = zalloc(MAX_CMDLEN); + if (!buf) + die("Failed to allocate memory by zalloc."); + ret = e_snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); + if (ret <= 0) + goto error; + len = ret; + + for (i = 0; i < pp->nr_args; i++) { + ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s", + pp->args[i]); + if (ret <= 0) + goto error; + len += ret; + } + pp->found = 1; + + return pp->found; +error: + free(pp->probes[0]); + + return ret; +} + +static int open_kprobe_events(int flags, int mode) +{ + char buf[PATH_MAX]; + int ret; + + ret = e_snprintf(buf, PATH_MAX, "%s/../kprobe_events", debugfs_path); + if (ret < 0) + die("Failed to make kprobe_events path."); + + ret = open(buf, flags, mode); + if (ret < 0) { + if (errno == ENOENT) + die("kprobe_events file does not exist -" + " please rebuild with CONFIG_KPROBE_TRACER."); + else + die("Could not open kprobe_events file: %s", + strerror(errno)); + } + return ret; +} + +/* Get raw string list of current kprobe_events */ +static struct strlist *get_trace_kprobe_event_rawlist(int fd) +{ + int ret, idx; + FILE *fp; + char buf[MAX_CMDLEN]; + char *p; + struct strlist *sl; + + sl = strlist__new(true, NULL); + + fp = fdopen(dup(fd), "r"); + while (!feof(fp)) { + p = fgets(buf, MAX_CMDLEN, fp); + if (!p) + break; + + idx = strlen(p) - 1; + if (p[idx] == '\n') + p[idx] = '\0'; + ret = strlist__add(sl, buf); + if (ret < 0) + die("strlist__add failed: %s", strerror(-ret)); + } + fclose(fp); + + return sl; +} + +/* Free and zero clear probe_point */ +static void clear_probe_point(struct probe_point *pp) +{ + int i; + + if (pp->function) + free(pp->function); + if (pp->file) + free(pp->file); + for (i = 0; i < pp->nr_args; i++) + free(pp->args[i]); + if (pp->args) + free(pp->args); + for (i = 0; i < pp->found; i++) + free(pp->probes[i]); + memset(pp, 0, sizeof(pp)); +} + +/* List up current perf-probe events */ +void show_perf_probe_events(void) +{ + unsigned int i; + int fd; + char *group, *event; + struct probe_point pp; + struct strlist *rawlist; + struct str_node *ent; + + fd = open_kprobe_events(O_RDONLY, 0); + rawlist = get_trace_kprobe_event_rawlist(fd); + close(fd); + + for (i = 0; i < strlist__nr_entries(rawlist); i++) { + ent = strlist__entry(rawlist, i); + parse_trace_kprobe_event(ent->s, &group, &event, &pp); + synthesize_perf_probe_event(&pp); + printf("[%s:%s]\t%s\n", group, event, pp.probes[0]); + free(group); + free(event); + clear_probe_point(&pp); + } + + strlist__delete(rawlist); +} + +/* Get current perf-probe event names */ +static struct strlist *get_perf_event_names(int fd) +{ + unsigned int i; + char *group, *event; + struct strlist *sl, *rawlist; + struct str_node *ent; + + rawlist = get_trace_kprobe_event_rawlist(fd); + + sl = strlist__new(false, NULL); + for (i = 0; i < strlist__nr_entries(rawlist); i++) { + ent = strlist__entry(rawlist, i); + parse_trace_kprobe_event(ent->s, &group, &event, NULL); + strlist__add(sl, event); + free(group); + } + + strlist__delete(rawlist); + + return sl; +} + +static int write_trace_kprobe_event(int fd, const char *buf) +{ + int ret; + + ret = write(fd, buf, strlen(buf)); + if (ret <= 0) + die("Failed to create event."); + else + printf("Added new event: %s\n", buf); + + return ret; +} + +static void get_new_event_name(char *buf, size_t len, const char *base, + struct strlist *namelist) +{ + int i, ret; + for (i = 0; i < MAX_EVENT_INDEX; i++) { + ret = e_snprintf(buf, len, "%s_%d", base, i); + if (ret < 0) + die("snprintf() failed: %s", strerror(-ret)); + if (!strlist__has_entry(namelist, buf)) + break; + } + if (i == MAX_EVENT_INDEX) + die("Too many events are on the same function."); +} + +void add_trace_kprobe_events(struct probe_point *probes, int nr_probes) +{ + int i, j, fd; + struct probe_point *pp; + char buf[MAX_CMDLEN]; + char event[64]; + struct strlist *namelist; + + fd = open_kprobe_events(O_RDWR, O_APPEND); + /* Get current event names */ + namelist = get_perf_event_names(fd); + + for (j = 0; j < nr_probes; j++) { + pp = probes + j; + for (i = 0; i < pp->found; i++) { + /* Get an unused new event name */ + get_new_event_name(event, 64, pp->function, namelist); + snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s\n", + pp->retprobe ? 'r' : 'p', + PERFPROBE_GROUP, event, + pp->probes[i]); + write_trace_kprobe_event(fd, buf); + /* Add added event name to namelist */ + strlist__add(namelist, event); + } + } + close(fd); +} diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h new file mode 100644 index 00000000000..0c6fe56fe38 --- /dev/null +++ b/tools/perf/util/probe-event.h @@ -0,0 +1,18 @@ +#ifndef _PROBE_EVENT_H +#define _PROBE_EVENT_H + +#include "probe-finder.h" +#include "strlist.h" + +extern int parse_perf_probe_event(const char *str, struct probe_point *pp); +extern int synthesize_perf_probe_event(struct probe_point *pp); +extern void parse_trace_kprobe_event(const char *str, char **group, + char **event, struct probe_point *pp); +extern int synthesize_trace_kprobe_event(struct probe_point *pp); +extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes); +extern void show_perf_probe_events(void); + +/* Maximum index number of event-name postfix */ +#define MAX_EVENT_INDEX 1024 + +#endif /*_PROBE_EVENT_H */ diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c new file mode 100644 index 00000000000..293cdfc1b8c --- /dev/null +++ b/tools/perf/util/probe-finder.c @@ -0,0 +1,732 @@ +/* + * probe-finder.c : C expression to kprobe event converter + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#include <sys/utsname.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <ctype.h> + +#include "event.h" +#include "debug.h" +#include "util.h" +#include "probe-finder.h" + + +/* Dwarf_Die Linkage to parent Die */ +struct die_link { + struct die_link *parent; /* Parent die */ + Dwarf_Die die; /* Current die */ +}; + +static Dwarf_Debug __dw_debug; +static Dwarf_Error __dw_error; + +/* + * Generic dwarf analysis helpers + */ + +#define X86_32_MAX_REGS 8 +const char *x86_32_regs_table[X86_32_MAX_REGS] = { + "%ax", + "%cx", + "%dx", + "%bx", + "$stack", /* Stack address instead of %sp */ + "%bp", + "%si", + "%di", +}; + +#define X86_64_MAX_REGS 16 +const char *x86_64_regs_table[X86_64_MAX_REGS] = { + "%ax", + "%dx", + "%cx", + "%bx", + "%si", + "%di", + "%bp", + "%sp", + "%r8", + "%r9", + "%r10", + "%r11", + "%r12", + "%r13", + "%r14", + "%r15", +}; + +/* TODO: switching by dwarf address size */ +#ifdef __x86_64__ +#define ARCH_MAX_REGS X86_64_MAX_REGS +#define arch_regs_table x86_64_regs_table +#else +#define ARCH_MAX_REGS X86_32_MAX_REGS +#define arch_regs_table x86_32_regs_table +#endif + +/* Return architecture dependent register string (for kprobe-tracer) */ +static const char *get_arch_regstr(unsigned int n) +{ + return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; +} + +/* + * Compare the tail of two strings. + * Return 0 if whole of either string is same as another's tail part. + */ +static int strtailcmp(const char *s1, const char *s2) +{ + int i1 = strlen(s1); + int i2 = strlen(s2); + while (--i1 > 0 && --i2 > 0) { + if (s1[i1] != s2[i2]) + return s1[i1] - s2[i2]; + } + return 0; +} + +/* Find the fileno of the target file. */ +static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname) +{ + Dwarf_Signed cnt, i; + Dwarf_Unsigned found = 0; + char **srcs; + int ret; + + if (!fname) + return 0; + + ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error); + if (ret == DW_DLV_OK) { + for (i = 0; i < cnt && !found; i++) { + if (strtailcmp(srcs[i], fname) == 0) + found = i + 1; + dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING); + } + for (; i < cnt; i++) + dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING); + dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST); + } + if (found) + pr_debug("found fno: %d\n", (int)found); + return found; +} + +/* Compare diename and tname */ +static int die_compare_name(Dwarf_Die dw_die, const char *tname) +{ + char *name; + int ret; + ret = dwarf_diename(dw_die, &name, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + ret = strcmp(tname, name); + dwarf_dealloc(__dw_debug, name, DW_DLA_STRING); + } else + ret = -1; + return ret; +} + +/* Check the address is in the subprogram(function). */ +static int die_within_subprogram(Dwarf_Die sp_die, Dwarf_Addr addr, + Dwarf_Signed *offs) +{ + Dwarf_Addr lopc, hipc; + int ret; + + /* TODO: check ranges */ + ret = dwarf_lowpc(sp_die, &lopc, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) + return 0; + ret = dwarf_highpc(sp_die, &hipc, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + if (lopc <= addr && addr < hipc) { + *offs = addr - lopc; + return 1; + } else + return 0; +} + +/* Check the die is inlined function */ +static Dwarf_Bool die_inlined_subprogram(Dwarf_Die dw_die) +{ + /* TODO: check strictly */ + Dwarf_Bool inl; + int ret; + + ret = dwarf_hasattr(dw_die, DW_AT_inline, &inl, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + return inl; +} + +/* Get the offset of abstruct_origin */ +static Dwarf_Off die_get_abstract_origin(Dwarf_Die dw_die) +{ + Dwarf_Attribute attr; + Dwarf_Off cu_offs; + int ret; + + ret = dwarf_attr(dw_die, DW_AT_abstract_origin, &attr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = dwarf_formref(attr, &cu_offs, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return cu_offs; +} + +/* Get entry pc(or low pc, 1st entry of ranges) of the die */ +static Dwarf_Addr die_get_entrypc(Dwarf_Die dw_die) +{ + Dwarf_Attribute attr; + Dwarf_Addr addr; + Dwarf_Off offs; + Dwarf_Ranges *ranges; + Dwarf_Signed cnt; + int ret; + + /* Try to get entry pc */ + ret = dwarf_attr(dw_die, DW_AT_entry_pc, &attr, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + ret = dwarf_formaddr(attr, &addr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return addr; + } + + /* Try to get low pc */ + ret = dwarf_lowpc(dw_die, &addr, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) + return addr; + + /* Try to get ranges */ + ret = dwarf_attr(dw_die, DW_AT_ranges, &attr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = dwarf_formref(attr, &offs, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = dwarf_get_ranges(__dw_debug, offs, &ranges, &cnt, NULL, + &__dw_error); + DIE_IF(ret != DW_DLV_OK); + addr = ranges[0].dwr_addr1; + dwarf_ranges_dealloc(__dw_debug, ranges, cnt); + return addr; +} + +/* + * Search a Die from Die tree. + * Note: cur_link->die should be deallocated in this function. + */ +static int __search_die_tree(struct die_link *cur_link, + int (*die_cb)(struct die_link *, void *), + void *data) +{ + Dwarf_Die new_die; + struct die_link new_link; + int ret; + + if (!die_cb) + return 0; + + /* Check current die */ + while (!(ret = die_cb(cur_link, data))) { + /* Check child die */ + ret = dwarf_child(cur_link->die, &new_die, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + new_link.parent = cur_link; + new_link.die = new_die; + ret = __search_die_tree(&new_link, die_cb, data); + if (ret) + break; + } + + /* Move to next sibling */ + ret = dwarf_siblingof(__dw_debug, cur_link->die, &new_die, + &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE); + cur_link->die = new_die; + if (ret == DW_DLV_NO_ENTRY) + return 0; + } + dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE); + return ret; +} + +/* Search a die in its children's die tree */ +static int search_die_from_children(Dwarf_Die parent_die, + int (*die_cb)(struct die_link *, void *), + void *data) +{ + struct die_link new_link; + int ret; + + new_link.parent = NULL; + ret = dwarf_child(parent_die, &new_link.die, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) + return __search_die_tree(&new_link, die_cb, data); + else + return 0; +} + +/* Find a locdesc corresponding to the address */ +static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc, + Dwarf_Addr addr) +{ + Dwarf_Signed lcnt; + Dwarf_Locdesc **llbuf; + int ret, i; + + ret = dwarf_loclist_n(attr, &llbuf, &lcnt, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = DW_DLV_NO_ENTRY; + for (i = 0; i < lcnt; ++i) { + if (llbuf[i]->ld_lopc <= addr && + llbuf[i]->ld_hipc > addr) { + memcpy(desc, llbuf[i], sizeof(Dwarf_Locdesc)); + desc->ld_s = + malloc(sizeof(Dwarf_Loc) * llbuf[i]->ld_cents); + DIE_IF(desc->ld_s == NULL); + memcpy(desc->ld_s, llbuf[i]->ld_s, + sizeof(Dwarf_Loc) * llbuf[i]->ld_cents); + ret = DW_DLV_OK; + break; + } + dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK); + dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC); + } + /* Releasing loop */ + for (; i < lcnt; ++i) { + dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK); + dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC); + } + dwarf_dealloc(__dw_debug, llbuf, DW_DLA_LIST); + return ret; +} + +/* Get decl_file attribute value (file number) */ +static Dwarf_Unsigned die_get_decl_file(Dwarf_Die sp_die) +{ + Dwarf_Attribute attr; + Dwarf_Unsigned fno; + int ret; + + ret = dwarf_attr(sp_die, DW_AT_decl_file, &attr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_formudata(attr, &fno, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return fno; +} + +/* Get decl_line attribute value (line number) */ +static Dwarf_Unsigned die_get_decl_line(Dwarf_Die sp_die) +{ + Dwarf_Attribute attr; + Dwarf_Unsigned lno; + int ret; + + ret = dwarf_attr(sp_die, DW_AT_decl_line, &attr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_formudata(attr, &lno, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return lno; +} + +/* + * Probe finder related functions + */ + +/* Show a location */ +static void show_location(Dwarf_Loc *loc, struct probe_finder *pf) +{ + Dwarf_Small op; + Dwarf_Unsigned regn; + Dwarf_Signed offs; + int deref = 0, ret; + const char *regs; + + op = loc->lr_atom; + + /* If this is based on frame buffer, set the offset */ + if (op == DW_OP_fbreg) { + deref = 1; + offs = (Dwarf_Signed)loc->lr_number; + op = pf->fbloc.ld_s[0].lr_atom; + loc = &pf->fbloc.ld_s[0]; + } else + offs = 0; + + if (op >= DW_OP_breg0 && op <= DW_OP_breg31) { + regn = op - DW_OP_breg0; + offs += (Dwarf_Signed)loc->lr_number; + deref = 1; + } else if (op >= DW_OP_reg0 && op <= DW_OP_reg31) { + regn = op - DW_OP_reg0; + } else if (op == DW_OP_bregx) { + regn = loc->lr_number; + offs += (Dwarf_Signed)loc->lr_number2; + deref = 1; + } else if (op == DW_OP_regx) { + regn = loc->lr_number; + } else + die("Dwarf_OP %d is not supported.\n", op); + + regs = get_arch_regstr(regn); + if (!regs) + die("%lld exceeds max register number.\n", regn); + + if (deref) + ret = snprintf(pf->buf, pf->len, + " %s=%+lld(%s)", pf->var, offs, regs); + else + ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs); + DIE_IF(ret < 0); + DIE_IF(ret >= pf->len); +} + +/* Show a variables in kprobe event format */ +static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf) +{ + Dwarf_Attribute attr; + Dwarf_Locdesc ld; + int ret; + + ret = dwarf_attr(vr_die, DW_AT_location, &attr, &__dw_error); + if (ret != DW_DLV_OK) + goto error; + ret = attr_get_locdesc(attr, &ld, (pf->addr - pf->cu_base)); + if (ret != DW_DLV_OK) + goto error; + /* TODO? */ + DIE_IF(ld.ld_cents != 1); + show_location(&ld.ld_s[0], pf); + free(ld.ld_s); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return ; +error: + die("Failed to find the location of %s at this address.\n" + " Perhaps, it has been optimized out.\n", pf->var); +} + +static int variable_callback(struct die_link *dlink, void *data) +{ + struct probe_finder *pf = (struct probe_finder *)data; + Dwarf_Half tag; + int ret; + + ret = dwarf_tag(dlink->die, &tag, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if ((tag == DW_TAG_formal_parameter || + tag == DW_TAG_variable) && + (die_compare_name(dlink->die, pf->var) == 0)) { + show_variable(dlink->die, pf); + return 1; + } + /* TODO: Support struct members and arrays */ + return 0; +} + +/* Find a variable in a subprogram die */ +static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf) +{ + int ret; + + if (!is_c_varname(pf->var)) { + /* Output raw parameters */ + ret = snprintf(pf->buf, pf->len, " %s", pf->var); + DIE_IF(ret < 0); + DIE_IF(ret >= pf->len); + return ; + } + + pr_debug("Searching '%s' variable in context.\n", pf->var); + /* Search child die for local variables and parameters. */ + ret = search_die_from_children(sp_die, variable_callback, pf); + if (!ret) + die("Failed to find '%s' in this function.\n", pf->var); +} + +/* Get a frame base on the address */ +static void get_current_frame_base(Dwarf_Die sp_die, struct probe_finder *pf) +{ + Dwarf_Attribute attr; + int ret; + + ret = dwarf_attr(sp_die, DW_AT_frame_base, &attr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = attr_get_locdesc(attr, &pf->fbloc, (pf->addr - pf->cu_base)); + DIE_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); +} + +static void free_current_frame_base(struct probe_finder *pf) +{ + free(pf->fbloc.ld_s); + memset(&pf->fbloc, 0, sizeof(Dwarf_Locdesc)); +} + +/* Show a probe point to output buffer */ +static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs, + struct probe_finder *pf) +{ + struct probe_point *pp = pf->pp; + char *name; + char tmp[MAX_PROBE_BUFFER]; + int ret, i, len; + + /* Output name of probe point */ + ret = dwarf_diename(sp_die, &name, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%u", name, + (unsigned int)offs); + /* Copy the function name if possible */ + if (!pp->function) { + pp->function = strdup(name); + pp->offset = offs; + } + dwarf_dealloc(__dw_debug, name, DW_DLA_STRING); + } else { + /* This function has no name. */ + ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%llx", pf->addr); + if (!pp->function) { + /* TODO: Use _stext */ + pp->function = strdup(""); + pp->offset = (int)pf->addr; + } + } + DIE_IF(ret < 0); + DIE_IF(ret >= MAX_PROBE_BUFFER); + len = ret; + pr_debug("Probe point found: %s\n", tmp); + + /* Find each argument */ + get_current_frame_base(sp_die, pf); + for (i = 0; i < pp->nr_args; i++) { + pf->var = pp->args[i]; + pf->buf = &tmp[len]; + pf->len = MAX_PROBE_BUFFER - len; + find_variable(sp_die, pf); + len += strlen(pf->buf); + } + free_current_frame_base(pf); + + pp->probes[pp->found] = strdup(tmp); + pp->found++; +} + +static int probeaddr_callback(struct die_link *dlink, void *data) +{ + struct probe_finder *pf = (struct probe_finder *)data; + Dwarf_Half tag; + Dwarf_Signed offs; + int ret; + + ret = dwarf_tag(dlink->die, &tag, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + /* Check the address is in this subprogram */ + if (tag == DW_TAG_subprogram && + die_within_subprogram(dlink->die, pf->addr, &offs)) { + show_probepoint(dlink->die, offs, pf); + return 1; + } + return 0; +} + +/* Find probe point from its line number */ +static void find_by_line(struct probe_finder *pf) +{ + Dwarf_Signed cnt, i, clm; + Dwarf_Line *lines; + Dwarf_Unsigned lineno = 0; + Dwarf_Addr addr; + Dwarf_Unsigned fno; + int ret; + + ret = dwarf_srclines(pf->cu_die, &lines, &cnt, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + + for (i = 0; i < cnt; i++) { + ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + if (fno != pf->fno) + continue; + + ret = dwarf_lineno(lines[i], &lineno, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + if (lineno != pf->lno) + continue; + + ret = dwarf_lineoff(lines[i], &clm, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + + ret = dwarf_lineaddr(lines[i], &addr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + pr_debug("Probe line found: line[%d]:%u,%d addr:0x%llx\n", + (int)i, (unsigned)lineno, (int)clm, addr); + pf->addr = addr; + /* Search a real subprogram including this line, */ + ret = search_die_from_children(pf->cu_die, + probeaddr_callback, pf); + if (ret == 0) + die("Probe point is not found in subprograms.\n"); + /* Continuing, because target line might be inlined. */ + } + dwarf_srclines_dealloc(__dw_debug, lines, cnt); +} + +/* Search function from function name */ +static int probefunc_callback(struct die_link *dlink, void *data) +{ + struct probe_finder *pf = (struct probe_finder *)data; + struct probe_point *pp = pf->pp; + struct die_link *lk; + Dwarf_Signed offs; + Dwarf_Half tag; + int ret; + + ret = dwarf_tag(dlink->die, &tag, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (tag == DW_TAG_subprogram) { + if (die_compare_name(dlink->die, pp->function) == 0) { + if (pp->line) { /* Function relative line */ + pf->fno = die_get_decl_file(dlink->die); + pf->lno = die_get_decl_line(dlink->die) + + pp->line; + find_by_line(pf); + return 1; + } + if (die_inlined_subprogram(dlink->die)) { + /* Inlined function, save it. */ + ret = dwarf_die_CU_offset(dlink->die, + &pf->inl_offs, + &__dw_error); + DIE_IF(ret != DW_DLV_OK); + pr_debug("inline definition offset %lld\n", + pf->inl_offs); + return 0; /* Continue to search */ + } + /* Get probe address */ + pf->addr = die_get_entrypc(dlink->die); + pf->addr += pp->offset; + /* TODO: Check the address in this function */ + show_probepoint(dlink->die, pp->offset, pf); + return 1; /* Exit; no same symbol in this CU. */ + } + } else if (tag == DW_TAG_inlined_subroutine && pf->inl_offs) { + if (die_get_abstract_origin(dlink->die) == pf->inl_offs) { + /* Get probe address */ + pf->addr = die_get_entrypc(dlink->die); + pf->addr += pp->offset; + pr_debug("found inline addr: 0x%llx\n", pf->addr); + /* Inlined function. Get a real subprogram */ + for (lk = dlink->parent; lk != NULL; lk = lk->parent) { + tag = 0; + dwarf_tag(lk->die, &tag, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (tag == DW_TAG_subprogram && + !die_inlined_subprogram(lk->die)) + goto found; + } + die("Failed to find real subprogram.\n"); +found: + /* Get offset from subprogram */ + ret = die_within_subprogram(lk->die, pf->addr, &offs); + DIE_IF(!ret); + show_probepoint(lk->die, offs, pf); + /* Continue to search */ + } + } + return 0; +} + +static void find_by_func(struct probe_finder *pf) +{ + search_die_from_children(pf->cu_die, probefunc_callback, pf); +} + +/* Find a probe point */ +int find_probepoint(int fd, struct probe_point *pp) +{ + Dwarf_Half addr_size = 0; + Dwarf_Unsigned next_cuh = 0; + int cu_number = 0, ret; + struct probe_finder pf = {.pp = pp}; + + ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error); + if (ret != DW_DLV_OK) { + pr_warning("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO.\n"); + return -ENOENT; + } + + pp->found = 0; + while (++cu_number) { + /* Search CU (Compilation Unit) */ + ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL, + &addr_size, &next_cuh, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) + break; + + /* Get the DIE(Debugging Information Entry) of this CU */ + ret = dwarf_siblingof(__dw_debug, 0, &pf.cu_die, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + + /* Check if target file is included. */ + if (pp->file) + pf.fno = cu_find_fileno(pf.cu_die, pp->file); + + if (!pp->file || pf.fno) { + /* Save CU base address (for frame_base) */ + ret = dwarf_lowpc(pf.cu_die, &pf.cu_base, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) + pf.cu_base = 0; + if (pp->function) + find_by_func(&pf); + else { + pf.lno = pp->line; + find_by_line(&pf); + } + } + dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE); + } + ret = dwarf_finish(__dw_debug, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + + return pp->found; +} + diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h new file mode 100644 index 00000000000..bdebca6697d --- /dev/null +++ b/tools/perf/util/probe-finder.h @@ -0,0 +1,57 @@ +#ifndef _PROBE_FINDER_H +#define _PROBE_FINDER_H + +#define MAX_PATH_LEN 256 +#define MAX_PROBE_BUFFER 1024 +#define MAX_PROBES 128 + +static inline int is_c_varname(const char *name) +{ + /* TODO */ + return isalpha(name[0]) || name[0] == '_'; +} + +struct probe_point { + /* Inputs */ + char *file; /* File name */ + int line; /* Line number */ + + char *function; /* Function name */ + int offset; /* Offset bytes */ + + int nr_args; /* Number of arguments */ + char **args; /* Arguments */ + + int retprobe; /* Return probe */ + + /* Output */ + int found; /* Number of found probe points */ + char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ +}; + +#ifndef NO_LIBDWARF +extern int find_probepoint(int fd, struct probe_point *pp); + +#include <libdwarf/dwarf.h> +#include <libdwarf/libdwarf.h> + +struct probe_finder { + struct probe_point *pp; /* Target probe point */ + + /* For function searching */ + Dwarf_Addr addr; /* Address */ + Dwarf_Unsigned fno; /* File number */ + Dwarf_Unsigned lno; /* Line number */ + Dwarf_Off inl_offs; /* Inline offset */ + Dwarf_Die cu_die; /* Current CU */ + + /* For variable searching */ + Dwarf_Addr cu_base; /* Current CU base address */ + Dwarf_Locdesc fbloc; /* Location of Current Frame Base */ + const char *var; /* Current variable name */ + char *buf; /* Current output buffer */ + int len; /* Length of output buffer */ +}; +#endif /* NO_LIBDWARF */ + +#endif /*_PROBE_FINDER_H */ diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h index a5454a1d1c1..b6a01973391 100644 --- a/tools/perf/util/quote.h +++ b/tools/perf/util/quote.h @@ -1,5 +1,5 @@ -#ifndef QUOTE_H -#define QUOTE_H +#ifndef __PERF_QUOTE_H +#define __PERF_QUOTE_H #include <stddef.h> #include <stdio.h> @@ -65,4 +65,4 @@ extern void perl_quote_print(FILE *stream, const char *src); extern void python_quote_print(FILE *stream, const char *src); extern void tcl_quote_print(FILE *stream, const char *src); -#endif +#endif /* __PERF_QUOTE_H */ diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h index cc1837deba8..d79028727ce 100644 --- a/tools/perf/util/run-command.h +++ b/tools/perf/util/run-command.h @@ -1,5 +1,5 @@ -#ifndef RUN_COMMAND_H -#define RUN_COMMAND_H +#ifndef __PERF_RUN_COMMAND_H +#define __PERF_RUN_COMMAND_H enum { ERR_RUN_COMMAND_FORK = 10000, @@ -85,4 +85,4 @@ struct async { int start_async(struct async *async); int finish_async(struct async *async); -#endif +#endif /* __PERF_RUN_COMMAND_H */ diff --git a/tools/perf/util/sigchain.h b/tools/perf/util/sigchain.h index 618083bce0c..1a53c11265f 100644 --- a/tools/perf/util/sigchain.h +++ b/tools/perf/util/sigchain.h @@ -1,5 +1,5 @@ -#ifndef SIGCHAIN_H -#define SIGCHAIN_H +#ifndef __PERF_SIGCHAIN_H +#define __PERF_SIGCHAIN_H typedef void (*sigchain_fun)(int); @@ -8,4 +8,4 @@ int sigchain_pop(int sig); void sigchain_push_common(sigchain_fun f); -#endif /* SIGCHAIN_H */ +#endif /* __PERF_SIGCHAIN_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c new file mode 100644 index 00000000000..b490354d1b2 --- /dev/null +++ b/tools/perf/util/sort.c @@ -0,0 +1,290 @@ +#include "sort.h" + +regex_t parent_regex; +char default_parent_pattern[] = "^sys_|^do_page_fault"; +char *parent_pattern = default_parent_pattern; +char default_sort_order[] = "comm,dso,symbol"; +char *sort_order = default_sort_order; +int sort__need_collapse = 0; +int sort__has_parent = 0; + +enum sort_type sort__first_dimension; + +unsigned int dsos__col_width; +unsigned int comms__col_width; +unsigned int threads__col_width; +static unsigned int parent_symbol__col_width; +char * field_sep; + +LIST_HEAD(hist_entry__sort_list); + +struct sort_entry sort_thread = { + .header = "Command: Pid", + .cmp = sort__thread_cmp, + .print = sort__thread_print, + .width = &threads__col_width, +}; + +struct sort_entry sort_comm = { + .header = "Command", + .cmp = sort__comm_cmp, + .collapse = sort__comm_collapse, + .print = sort__comm_print, + .width = &comms__col_width, +}; + +struct sort_entry sort_dso = { + .header = "Shared Object", + .cmp = sort__dso_cmp, + .print = sort__dso_print, + .width = &dsos__col_width, +}; + +struct sort_entry sort_sym = { + .header = "Symbol", + .cmp = sort__sym_cmp, + .print = sort__sym_print, +}; + +struct sort_entry sort_parent = { + .header = "Parent symbol", + .cmp = sort__parent_cmp, + .print = sort__parent_print, + .width = &parent_symbol__col_width, +}; + +struct sort_dimension { + const char *name; + struct sort_entry *entry; + int taken; +}; + +static struct sort_dimension sort_dimensions[] = { + { .name = "pid", .entry = &sort_thread, }, + { .name = "comm", .entry = &sort_comm, }, + { .name = "dso", .entry = &sort_dso, }, + { .name = "symbol", .entry = &sort_sym, }, + { .name = "parent", .entry = &sort_parent, }, +}; + +int64_t cmp_null(void *l, void *r) +{ + if (!l && !r) + return 0; + else if (!l) + return -1; + else + return 1; +} + +/* --sort pid */ + +int64_t +sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->thread->pid - left->thread->pid; +} + +int repsep_fprintf(FILE *fp, const char *fmt, ...) +{ + int n; + va_list ap; + + va_start(ap, fmt); + if (!field_sep) + n = vfprintf(fp, fmt, ap); + else { + char *bf = NULL; + n = vasprintf(&bf, fmt, ap); + if (n > 0) { + char *sep = bf; + + while (1) { + sep = strchr(sep, *field_sep); + if (sep == NULL) + break; + *sep = '.'; + } + } + fputs(bf, fp); + free(bf); + } + va_end(ap); + return n; +} + +size_t +sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width) +{ + return repsep_fprintf(fp, "%*s:%5d", width - 6, + self->thread->comm ?: "", self->thread->pid); +} + +size_t +sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width) +{ + return repsep_fprintf(fp, "%*s", width, self->thread->comm); +} + +/* --sort dso */ + +int64_t +sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct dso *dso_l = left->map ? left->map->dso : NULL; + struct dso *dso_r = right->map ? right->map->dso : NULL; + const char *dso_name_l, *dso_name_r; + + if (!dso_l || !dso_r) + return cmp_null(dso_l, dso_r); + + if (verbose) { + dso_name_l = dso_l->long_name; + dso_name_r = dso_r->long_name; + } else { + dso_name_l = dso_l->short_name; + dso_name_r = dso_r->short_name; + } + + return strcmp(dso_name_l, dso_name_r); +} + +size_t +sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width) +{ + if (self->map && self->map->dso) { + const char *dso_name = !verbose ? self->map->dso->short_name : + self->map->dso->long_name; + return repsep_fprintf(fp, "%-*s", width, dso_name); + } + + return repsep_fprintf(fp, "%*llx", width, (u64)self->ip); +} + +/* --sort symbol */ + +int64_t +sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) +{ + u64 ip_l, ip_r; + + if (left->sym == right->sym) + return 0; + + ip_l = left->sym ? left->sym->start : left->ip; + ip_r = right->sym ? right->sym->start : right->ip; + + return (int64_t)(ip_r - ip_l); +} + + +size_t +sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) +{ + size_t ret = 0; + + if (verbose) { + char o = self->map ? dso__symtab_origin(self->map->dso) : '!'; + ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, o); + } + + ret += repsep_fprintf(fp, "[%c] ", self->level); + if (self->sym) + ret += repsep_fprintf(fp, "%s", self->sym->name); + else + ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip); + + return ret; +} + +/* --sort comm */ + +int64_t +sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->thread->pid - left->thread->pid; +} + +int64_t +sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) +{ + char *comm_l = left->thread->comm; + char *comm_r = right->thread->comm; + + if (!comm_l || !comm_r) + return cmp_null(comm_l, comm_r); + + return strcmp(comm_l, comm_r); +} + +/* --sort parent */ + +int64_t +sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct symbol *sym_l = left->parent; + struct symbol *sym_r = right->parent; + + if (!sym_l || !sym_r) + return cmp_null(sym_l, sym_r); + + return strcmp(sym_l->name, sym_r->name); +} + +size_t +sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width) +{ + return repsep_fprintf(fp, "%-*s", width, + self->parent ? self->parent->name : "[other]"); +} + +int sort_dimension__add(const char *tok) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { + struct sort_dimension *sd = &sort_dimensions[i]; + + if (sd->taken) + continue; + + if (strncasecmp(tok, sd->name, strlen(tok))) + continue; + + if (sd->entry->collapse) + sort__need_collapse = 1; + + if (sd->entry == &sort_parent) { + int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); + if (ret) { + char err[BUFSIZ]; + + regerror(ret, &parent_regex, err, sizeof(err)); + fprintf(stderr, "Invalid regex: %s\n%s", + parent_pattern, err); + exit(-1); + } + sort__has_parent = 1; + } + + if (list_empty(&hist_entry__sort_list)) { + if (!strcmp(sd->name, "pid")) + sort__first_dimension = SORT_PID; + else if (!strcmp(sd->name, "comm")) + sort__first_dimension = SORT_COMM; + else if (!strcmp(sd->name, "dso")) + sort__first_dimension = SORT_DSO; + else if (!strcmp(sd->name, "symbol")) + sort__first_dimension = SORT_SYM; + else if (!strcmp(sd->name, "parent")) + sort__first_dimension = SORT_PARENT; + } + + list_add_tail(&sd->entry->list, &hist_entry__sort_list); + sd->taken = 1; + + return 0; + } + + return -ESRCH; +} diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h new file mode 100644 index 00000000000..333e664ff45 --- /dev/null +++ b/tools/perf/util/sort.h @@ -0,0 +1,99 @@ +#ifndef __PERF_SORT_H +#define __PERF_SORT_H +#include "../builtin.h" + +#include "util.h" + +#include "color.h" +#include <linux/list.h> +#include "cache.h" +#include <linux/rbtree.h> +#include "symbol.h" +#include "string.h" +#include "callchain.h" +#include "strlist.h" +#include "values.h" + +#include "../perf.h" +#include "debug.h" +#include "header.h" + +#include "parse-options.h" +#include "parse-events.h" + +#include "thread.h" +#include "sort.h" + +extern regex_t parent_regex; +extern char *sort_order; +extern char default_parent_pattern[]; +extern char *parent_pattern; +extern char default_sort_order[]; +extern int sort__need_collapse; +extern int sort__has_parent; +extern char *field_sep; +extern struct sort_entry sort_comm; +extern struct sort_entry sort_dso; +extern struct sort_entry sort_sym; +extern struct sort_entry sort_parent; +extern unsigned int dsos__col_width; +extern unsigned int comms__col_width; +extern unsigned int threads__col_width; +extern enum sort_type sort__first_dimension; + +struct hist_entry { + struct rb_node rb_node; + u64 count; + struct thread *thread; + struct map *map; + struct symbol *sym; + u64 ip; + char level; + struct symbol *parent; + struct callchain_node callchain; + struct rb_root sorted_chain; +}; + +enum sort_type { + SORT_PID, + SORT_COMM, + SORT_DSO, + SORT_SYM, + SORT_PARENT +}; + +/* + * configurable sorting bits + */ + +struct sort_entry { + struct list_head list; + + const char *header; + + int64_t (*cmp)(struct hist_entry *, struct hist_entry *); + int64_t (*collapse)(struct hist_entry *, struct hist_entry *); + size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width); + unsigned int *width; + bool elide; +}; + +extern struct sort_entry sort_thread; +extern struct list_head hist_entry__sort_list; + +extern int repsep_fprintf(FILE *fp, const char *fmt, ...); +extern size_t sort__thread_print(FILE *, struct hist_entry *, unsigned int); +extern size_t sort__comm_print(FILE *, struct hist_entry *, unsigned int); +extern size_t sort__dso_print(FILE *, struct hist_entry *, unsigned int); +extern size_t sort__sym_print(FILE *, struct hist_entry *, unsigned int __used); +extern int64_t cmp_null(void *, void *); +extern int64_t sort__thread_cmp(struct hist_entry *, struct hist_entry *); +extern int64_t sort__comm_cmp(struct hist_entry *, struct hist_entry *); +extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *); +extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *); +extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *); +extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *); +extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int); +extern int sort_dimension__add(const char *); + +#endif /* __PERF_SORT_H */ diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index d2aa86c014c..a3d121d6c83 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -1,5 +1,5 @@ -#ifndef STRBUF_H -#define STRBUF_H +#ifndef __PERF_STRBUF_H +#define __PERF_STRBUF_H /* * Strbuf's can be use in many ways: as a byte array, or to store arbitrary @@ -134,4 +134,4 @@ extern int launch_editor(const char *path, struct strbuf *buffer, const char *co extern int strbuf_branchname(struct strbuf *sb, const char *name); extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); -#endif /* STRBUF_H */ +#endif /* __PERF_STRBUF_H */ diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index c93eca9a7be..f24a8cc933d 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -1,4 +1,5 @@ #include "string.h" +#include "util.h" static int hex(char ch) { @@ -32,3 +33,196 @@ int hex2u64(const char *ptr, u64 *long_val) return p - ptr; } + +char *strxfrchar(char *s, char from, char to) +{ + char *p = s; + + while ((p = strchr(p, from)) != NULL) + *p++ = to; + + return s; +} + +#define K 1024LL +/* + * perf_atoll() + * Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB") + * and return its numeric value + */ +s64 perf_atoll(const char *str) +{ + unsigned int i; + s64 length = -1, unit = 1; + + if (!isdigit(str[0])) + goto out_err; + + for (i = 1; i < strlen(str); i++) { + switch (str[i]) { + case 'B': + case 'b': + break; + case 'K': + if (str[i + 1] != 'B') + goto out_err; + else + goto kilo; + case 'k': + if (str[i + 1] != 'b') + goto out_err; +kilo: + unit = K; + break; + case 'M': + if (str[i + 1] != 'B') + goto out_err; + else + goto mega; + case 'm': + if (str[i + 1] != 'b') + goto out_err; +mega: + unit = K * K; + break; + case 'G': + if (str[i + 1] != 'B') + goto out_err; + else + goto giga; + case 'g': + if (str[i + 1] != 'b') + goto out_err; +giga: + unit = K * K * K; + break; + case 'T': + if (str[i + 1] != 'B') + goto out_err; + else + goto tera; + case 't': + if (str[i + 1] != 'b') + goto out_err; +tera: + unit = K * K * K * K; + break; + case '\0': /* only specified figures */ + unit = 1; + break; + default: + if (!isdigit(str[i])) + goto out_err; + break; + } + } + + length = atoll(str) * unit; + goto out; + +out_err: + length = -1; +out: + return length; +} + +/* + * Helper function for splitting a string into an argv-like array. + * originaly copied from lib/argv_split.c + */ +static const char *skip_sep(const char *cp) +{ + while (*cp && isspace(*cp)) + cp++; + + return cp; +} + +static const char *skip_arg(const char *cp) +{ + while (*cp && !isspace(*cp)) + cp++; + + return cp; +} + +static int count_argc(const char *str) +{ + int count = 0; + + while (*str) { + str = skip_sep(str); + if (*str) { + count++; + str = skip_arg(str); + } + } + + return count; +} + +/** + * argv_free - free an argv + * @argv - the argument vector to be freed + * + * Frees an argv and the strings it points to. + */ +void argv_free(char **argv) +{ + char **p; + for (p = argv; *p; p++) + free(*p); + + free(argv); +} + +/** + * argv_split - split a string at whitespace, returning an argv + * @str: the string to be split + * @argcp: returned argument count + * + * Returns an array of pointers to strings which are split out from + * @str. This is performed by strictly splitting on white-space; no + * quote processing is performed. Multiple whitespace characters are + * considered to be a single argument separator. The returned array + * is always NULL-terminated. Returns NULL on memory allocation + * failure. + */ +char **argv_split(const char *str, int *argcp) +{ + int argc = count_argc(str); + char **argv = zalloc(sizeof(*argv) * (argc+1)); + char **argvp; + + if (argv == NULL) + goto out; + + if (argcp) + *argcp = argc; + + argvp = argv; + + while (*str) { + str = skip_sep(str); + + if (*str) { + const char *p = str; + char *t; + + str = skip_arg(str); + + t = strndup(p, str-p); + if (t == NULL) + goto fail; + *argvp++ = t; + } + } + *argvp = NULL; + +out: + return argv; + +fail: + argv_free(argv); + return NULL; +} diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index bf39dfadfd2..bfecec265a1 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -1,11 +1,15 @@ -#ifndef _PERF_STRING_H_ -#define _PERF_STRING_H_ +#ifndef __PERF_STRING_H_ +#define __PERF_STRING_H_ #include "types.h" int hex2u64(const char *ptr, u64 *val); +char *strxfrchar(char *s, char from, char to); +s64 perf_atoll(const char *str); +char **argv_split(const char *str, int *argcp); +void argv_free(char **argv); #define _STR(x) #x #define STR(x) _STR(x) -#endif +#endif /* __PERF_STRING_H */ diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 921818e44a5..cb4659306d7 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -1,5 +1,5 @@ -#ifndef STRLIST_H_ -#define STRLIST_H_ +#ifndef __PERF_STRLIST_H +#define __PERF_STRLIST_H #include <linux/rbtree.h> #include <stdbool.h> @@ -36,4 +36,4 @@ static inline unsigned int strlist__nr_entries(const struct strlist *self) } int strlist__parse_list(struct strlist *self, const char *s); -#endif /* STRLIST_H_ */ +#endif /* __PERF_STRLIST_H */ diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h index cd93195aedb..e0781989cc3 100644 --- a/tools/perf/util/svghelper.h +++ b/tools/perf/util/svghelper.h @@ -1,5 +1,5 @@ -#ifndef _INCLUDE_GUARD_SVG_HELPER_ -#define _INCLUDE_GUARD_SVG_HELPER_ +#ifndef __PERF_SVGHELPER_H +#define __PERF_SVGHELPER_H #include "types.h" @@ -25,4 +25,4 @@ extern void svg_close(void); extern int svg_page_width; -#endif +#endif /* __PERF_SVGHELPER_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 226f44a2357..fffcb937cdc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2,14 +2,20 @@ #include "../perf.h" #include "string.h" #include "symbol.h" +#include "thread.h" #include "debug.h" +#include <asm/bug.h> #include <libelf.h> #include <gelf.h> #include <elf.h> +#include <limits.h> +#include <sys/utsname.h> -const char *sym_hist_filter; +#ifndef NT_GNU_BUILD_ID +#define NT_GNU_BUILD_ID 3 +#endif enum dso_origin { DSO__ORIG_KERNEL = 0, @@ -18,94 +24,189 @@ enum dso_origin { DSO__ORIG_UBUNTU, DSO__ORIG_BUILDID, DSO__ORIG_DSO, + DSO__ORIG_KMODULE, DSO__ORIG_NOT_FOUND, }; -static struct symbol *symbol__new(u64 start, u64 len, - const char *name, unsigned int priv_size, - u64 obj_start, int v) +static void dsos__add(struct list_head *head, struct dso *dso); +static struct map *thread__find_map_by_name(struct thread *self, char *name); +static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); +struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr); +static int dso__load_kernel_sym(struct dso *self, struct map *map, + struct thread *thread, symbol_filter_t filter); +unsigned int symbol__priv_size; +static int vmlinux_path__nr_entries; +static char **vmlinux_path; + +static struct symbol_conf symbol_conf__defaults = { + .use_modules = true, + .try_vmlinux_path = true, +}; + +static struct thread kthread_mem; +struct thread *kthread = &kthread_mem; + +bool dso__loaded(const struct dso *self, enum map_type type) { - size_t namelen = strlen(name) + 1; - struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); + return self->loaded & (1 << type); +} - if (!self) - return NULL; +static void dso__set_loaded(struct dso *self, enum map_type type) +{ + self->loaded |= (1 << type); +} - if (v >= 2) - printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", - (u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); +static void symbols__fixup_end(struct rb_root *self) +{ + struct rb_node *nd, *prevnd = rb_first(self); + struct symbol *curr, *prev; + + if (prevnd == NULL) + return; - self->obj_start= obj_start; - self->hist = NULL; - self->hist_sum = 0; + curr = rb_entry(prevnd, struct symbol, rb_node); - if (sym_hist_filter && !strcmp(name, sym_hist_filter)) - self->hist = calloc(sizeof(u64), len); + for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { + prev = curr; + curr = rb_entry(nd, struct symbol, rb_node); - if (priv_size) { - memset(self, 0, priv_size); - self = ((void *)self) + priv_size; + if (prev->end == prev->start) + prev->end = curr->start - 1; } + + /* Last entry */ + if (curr->end == curr->start) + curr->end = roundup(curr->start, 4096); +} + +static void __thread__fixup_maps_end(struct thread *self, enum map_type type) +{ + struct map *prev, *curr; + struct rb_node *nd, *prevnd = rb_first(&self->maps[type]); + + if (prevnd == NULL) + return; + + curr = rb_entry(prevnd, struct map, rb_node); + + for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { + prev = curr; + curr = rb_entry(nd, struct map, rb_node); + prev->end = curr->start - 1; + } + + /* + * We still haven't the actual symbols, so guess the + * last map final address. + */ + curr->end = ~0UL; +} + +static void thread__fixup_maps_end(struct thread *self) +{ + int i; + for (i = 0; i < MAP__NR_TYPES; ++i) + __thread__fixup_maps_end(self, i); +} + +static struct symbol *symbol__new(u64 start, u64 len, const char *name) +{ + size_t namelen = strlen(name) + 1; + struct symbol *self = zalloc(symbol__priv_size + + sizeof(*self) + namelen); + if (self == NULL) + return NULL; + + if (symbol__priv_size) + self = ((void *)self) + symbol__priv_size; + self->start = start; self->end = len ? start + len - 1 : start; + + pr_debug3("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end); + memcpy(self->name, name, namelen); return self; } -static void symbol__delete(struct symbol *self, unsigned int priv_size) +static void symbol__delete(struct symbol *self) { - free(((void *)self) - priv_size); + free(((void *)self) - symbol__priv_size); } static size_t symbol__fprintf(struct symbol *self, FILE *fp) { - if (!self->module) - return fprintf(fp, " %llx-%llx %s\n", + return fprintf(fp, " %llx-%llx %s\n", self->start, self->end, self->name); - else - return fprintf(fp, " %llx-%llx %s \t[%s]\n", - self->start, self->end, self->name, self->module->name); } -struct dso *dso__new(const char *name, unsigned int sym_priv_size) +static void dso__set_long_name(struct dso *self, char *name) +{ + if (name == NULL) + return; + self->long_name = name; + self->long_name_len = strlen(name); +} + +static void dso__set_basename(struct dso *self) +{ + self->short_name = basename(self->long_name); +} + +struct dso *dso__new(const char *name) { struct dso *self = malloc(sizeof(*self) + strlen(name) + 1); if (self != NULL) { + int i; strcpy(self->name, name); - self->syms = RB_ROOT; - self->sym_priv_size = sym_priv_size; + dso__set_long_name(self, self->name); + self->short_name = self->name; + for (i = 0; i < MAP__NR_TYPES; ++i) + self->symbols[i] = RB_ROOT; self->find_symbol = dso__find_symbol; self->slen_calculated = 0; self->origin = DSO__ORIG_NOT_FOUND; + self->loaded = 0; + self->has_build_id = 0; } return self; } -static void dso__delete_symbols(struct dso *self) +static void symbols__delete(struct rb_root *self) { struct symbol *pos; - struct rb_node *next = rb_first(&self->syms); + struct rb_node *next = rb_first(self); while (next) { pos = rb_entry(next, struct symbol, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, &self->syms); - symbol__delete(pos, self->sym_priv_size); + rb_erase(&pos->rb_node, self); + symbol__delete(pos); } } void dso__delete(struct dso *self) { - dso__delete_symbols(self); + int i; + for (i = 0; i < MAP__NR_TYPES; ++i) + symbols__delete(&self->symbols[i]); + if (self->long_name != self->name) + free(self->long_name); free(self); } -static void dso__insert_symbol(struct dso *self, struct symbol *sym) +void dso__set_build_id(struct dso *self, void *build_id) { - struct rb_node **p = &self->syms.rb_node; + memcpy(self->build_id, build_id, sizeof(self->build_id)); + self->has_build_id = 1; +} + +static void symbols__insert(struct rb_root *self, struct symbol *sym) +{ + struct rb_node **p = &self->rb_node; struct rb_node *parent = NULL; const u64 ip = sym->start; struct symbol *s; @@ -119,17 +220,17 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym) p = &(*p)->rb_right; } rb_link_node(&sym->rb_node, parent, p); - rb_insert_color(&sym->rb_node, &self->syms); + rb_insert_color(&sym->rb_node, self); } -struct symbol *dso__find_symbol(struct dso *self, u64 ip) +static struct symbol *symbols__find(struct rb_root *self, u64 ip) { struct rb_node *n; if (self == NULL) return NULL; - n = self->syms.rb_node; + n = self->rb_node; while (n) { struct symbol *s = rb_entry(n, struct symbol, rb_node); @@ -145,12 +246,42 @@ struct symbol *dso__find_symbol(struct dso *self, u64 ip) return NULL; } -size_t dso__fprintf(struct dso *self, FILE *fp) +struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr) { - size_t ret = fprintf(fp, "dso: %s\n", self->name); + return symbols__find(&self->symbols[type], addr); +} + +int build_id__sprintf(u8 *self, int len, char *bf) +{ + char *bid = bf; + u8 *raw = self; + int i; + + for (i = 0; i < len; ++i) { + sprintf(bid, "%02x", *raw); + ++raw; + bid += 2; + } + + return raw - self; +} + +size_t dso__fprintf_buildid(struct dso *self, FILE *fp) +{ + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + + build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id); + return fprintf(fp, "%s", sbuild_id); +} +size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) +{ struct rb_node *nd; - for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) { + size_t ret = fprintf(fp, "dso: %s (", self->short_name); + + ret += dso__fprintf_buildid(self, fp); + ret += fprintf(fp, ")\n"); + for (nd = rb_first(&self->symbols[type]); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); ret += symbol__fprintf(pos, fp); } @@ -158,13 +289,17 @@ size_t dso__fprintf(struct dso *self, FILE *fp) return ret; } -static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v) +/* + * Loads the function entries in /proc/kallsyms into kernel_map->dso, + * so that we can in the next step set the symbol ->end address and then + * call kernel_maps__split_kallsyms. + */ +static int dso__load_all_kallsyms(struct dso *self, struct map *map) { - struct rb_node *nd, *prevnd; char *line = NULL; size_t n; + struct rb_root *root = &self->symbols[map->type]; FILE *file = fopen("/proc/kallsyms", "r"); - int count = 0; if (file == NULL) goto out_failure; @@ -174,6 +309,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v) struct symbol *sym; int line_len, len; char symbol_type; + char *symbol_name; line_len = getline(&line, &n, file); if (line_len < 0) @@ -196,44 +332,26 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v) */ if (symbol_type != 'T' && symbol_type != 'W') continue; + + symbol_name = line + len + 2; /* - * Well fix up the end later, when we have all sorted. + * Will fix up the end later, when we have all symbols sorted. */ - sym = symbol__new(start, 0xdead, line + len + 2, - self->sym_priv_size, 0, v); + sym = symbol__new(start, 0, symbol_name); if (sym == NULL) goto out_delete_line; - - if (filter && filter(self, sym)) - symbol__delete(sym, self->sym_priv_size); - else { - dso__insert_symbol(self, sym); - count++; - } - } - - /* - * Now that we have all sorted out, just set the ->end of all - * symbols - */ - prevnd = rb_first(&self->syms); - - if (prevnd == NULL) - goto out_delete_line; - - for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { - struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node), - *curr = rb_entry(nd, struct symbol, rb_node); - - prev->end = curr->start - 1; - prevnd = nd; + /* + * We will pass the symbols to the filter later, in + * map__split_kallsyms, when we have split the maps per module + */ + symbols__insert(root, sym); } free(line); fclose(file); - return count; + return 0; out_delete_line: free(line); @@ -241,14 +359,114 @@ out_failure: return -1; } -static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int v) +/* + * Split the symbols into maps, making sure there are no overlaps, i.e. the + * kernel range is broken in several maps, named [kernel].N, as we don't have + * the original ELF section names vmlinux have. + */ +static int dso__split_kallsyms(struct dso *self, struct map *map, struct thread *thread, + symbol_filter_t filter) +{ + struct map *curr_map = map; + struct symbol *pos; + int count = 0; + struct rb_root *root = &self->symbols[map->type]; + struct rb_node *next = rb_first(root); + int kernel_range = 0; + + while (next) { + char *module; + + pos = rb_entry(next, struct symbol, rb_node); + next = rb_next(&pos->rb_node); + + module = strchr(pos->name, '\t'); + if (module) { + if (!thread->use_modules) + goto discard_symbol; + + *module++ = '\0'; + + if (strcmp(self->name, module)) { + curr_map = thread__find_map_by_name(thread, module); + if (curr_map == NULL) { + pr_debug("/proc/{kallsyms,modules} " + "inconsistency!\n"); + return -1; + } + } + /* + * So that we look just like we get from .ko files, + * i.e. not prelinked, relative to map->start. + */ + pos->start = curr_map->map_ip(curr_map, pos->start); + pos->end = curr_map->map_ip(curr_map, pos->end); + } else if (curr_map != map) { + char dso_name[PATH_MAX]; + struct dso *dso; + + snprintf(dso_name, sizeof(dso_name), "[kernel].%d", + kernel_range++); + + dso = dso__new(dso_name); + if (dso == NULL) + return -1; + + curr_map = map__new2(pos->start, dso, map->type); + if (map == NULL) { + dso__delete(dso); + return -1; + } + + curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; + __thread__insert_map(thread, curr_map); + ++kernel_range; + } + + if (filter && filter(curr_map, pos)) { +discard_symbol: rb_erase(&pos->rb_node, root); + symbol__delete(pos); + } else { + if (curr_map != map) { + rb_erase(&pos->rb_node, root); + symbols__insert(&curr_map->dso->symbols[curr_map->type], pos); + } + count++; + } + } + + return count; +} + + +static int dso__load_kallsyms(struct dso *self, struct map *map, + struct thread *thread, symbol_filter_t filter) +{ + if (dso__load_all_kallsyms(self, map) < 0) + return -1; + + symbols__fixup_end(&self->symbols[map->type]); + self->origin = DSO__ORIG_KERNEL; + + return dso__split_kallsyms(self, map, thread, filter); +} + +size_t kernel_maps__fprintf(FILE *fp) +{ + size_t printed = fprintf(fp, "Kernel maps:\n"); + printed += thread__fprintf_maps(kthread, fp); + return printed + fprintf(fp, "END kernel maps\n"); +} + +static int dso__load_perf_map(struct dso *self, struct map *map, + symbol_filter_t filter) { char *line = NULL; size_t n; FILE *file; int nr_syms = 0; - file = fopen(self->name, "r"); + file = fopen(self->long_name, "r"); if (file == NULL) goto out_failure; @@ -278,16 +496,15 @@ static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int v) if (len + 2 >= line_len) continue; - sym = symbol__new(start, size, line + len, - self->sym_priv_size, start, v); + sym = symbol__new(start, size, line + len); if (sym == NULL) goto out_delete_line; - if (filter && filter(self, sym)) - symbol__delete(sym, self->sym_priv_size); + if (filter && filter(map, sym)) + symbol__delete(sym); else { - dso__insert_symbol(self, sym); + symbols__insert(&self->symbols[map->type], sym); nr_syms++; } } @@ -393,7 +610,8 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, * And always look at the original dso, not at debuginfo packages, that * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). */ -static int dso__synthesize_plt_symbols(struct dso *self, int v) +static int dso__synthesize_plt_symbols(struct dso *self, struct map *map, + symbol_filter_t filter) { uint32_t nr_rel_entries, idx; GElf_Sym sym; @@ -409,7 +627,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v) Elf *elf; int nr = 0, symidx, fd, err = 0; - fd = open(self->name, O_RDONLY); + fd = open(self->long_name, O_RDONLY); if (fd < 0) goto out; @@ -477,12 +695,16 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v) "%s@plt", elf_sym__name(&sym, symstrs)); f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size, 0, v); + sympltname); if (!f) goto out_elf_end; - dso__insert_symbol(self, f); - ++nr; + if (filter && filter(map, f)) + symbol__delete(f); + else { + symbols__insert(&self->symbols[map->type], f); + ++nr; + } } } else if (shdr_rel_plt.sh_type == SHT_REL) { GElf_Rel pos_mem, *pos; @@ -495,12 +717,16 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v) "%s@plt", elf_sym__name(&sym, symstrs)); f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size, 0, v); + sympltname); if (!f) goto out_elf_end; - dso__insert_symbol(self, f); - ++nr; + if (filter && filter(map, f)) + symbol__delete(f); + else { + symbols__insert(&self->symbols[map->type], f); + ++nr; + } } } @@ -513,14 +739,18 @@ out_close: if (err == 0) return nr; out: - fprintf(stderr, "%s: problems reading %s PLT info.\n", - __func__, self->name); + pr_warning("%s: problems reading %s PLT info.\n", + __func__, self->long_name); return 0; } -static int dso__load_sym(struct dso *self, int fd, const char *name, - symbol_filter_t filter, int v, struct module *mod) +static int dso__load_sym(struct dso *self, struct map *map, + struct thread *thread, const char *name, int fd, + symbol_filter_t filter, int kernel, int kmodule) { + struct map *curr_map = map; + struct dso *curr_dso = self; + size_t dso_name_len = strlen(self->short_name); Elf_Data *symstrs, *secstrs; uint32_t nr_syms; int err = -1; @@ -531,19 +761,16 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, GElf_Sym sym; Elf_Scn *sec, *sec_strndx; Elf *elf; - int nr = 0, kernel = !strcmp("[kernel]", self->name); + int nr = 0; elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) { - if (v) - fprintf(stderr, "%s: cannot read %s ELF file.\n", - __func__, name); + pr_err("%s: cannot read %s ELF file.\n", __func__, name); goto out_close; } if (gelf_getehdr(elf, &ehdr) == NULL) { - if (v) - fprintf(stderr, "%s: cannot get elf header.\n", __func__); + pr_err("%s: cannot get elf header.\n", __func__); goto out_elf_end; } @@ -587,9 +814,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { struct symbol *f; const char *elf_name; - char *demangled; - u64 obj_start; - struct section *section = NULL; + char *demangled = NULL; int is_label = elf_sym__is_label(&sym); const char *section_name; @@ -605,52 +830,85 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, if (is_label && !elf_sec__is_text(&shdr, secstrs)) continue; + elf_name = elf_sym__name(&sym, symstrs); section_name = elf_sec__name(&shdr, secstrs); - obj_start = sym.st_value; - if (self->adjust_symbols) { - if (v >= 2) - printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", - (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); + if (kernel || kmodule) { + char dso_name[PATH_MAX]; - sym.st_value -= shdr.sh_addr - shdr.sh_offset; - } + if (strcmp(section_name, + curr_dso->short_name + dso_name_len) == 0) + goto new_symbol; - if (mod) { - section = mod->sections->find_section(mod->sections, section_name); - if (section) - sym.st_value += section->vma; - else { - fprintf(stderr, "dso__load_sym() module %s lookup of %s failed\n", - mod->name, section_name); - goto out_elf_end; + if (strcmp(section_name, ".text") == 0) { + curr_map = map; + curr_dso = self; + goto new_symbol; } + + snprintf(dso_name, sizeof(dso_name), + "%s%s", self->short_name, section_name); + + curr_map = thread__find_map_by_name(thread, dso_name); + if (curr_map == NULL) { + u64 start = sym.st_value; + + if (kmodule) + start += map->start + shdr.sh_offset; + + curr_dso = dso__new(dso_name); + if (curr_dso == NULL) + goto out_elf_end; + curr_map = map__new2(start, curr_dso, + MAP__FUNCTION); + if (curr_map == NULL) { + dso__delete(curr_dso); + goto out_elf_end; + } + curr_map->map_ip = identity__map_ip; + curr_map->unmap_ip = identity__map_ip; + curr_dso->origin = DSO__ORIG_KERNEL; + __thread__insert_map(kthread, curr_map); + dsos__add(&dsos__kernel, curr_dso); + } else + curr_dso = curr_map->dso; + + goto new_symbol; + } + + if (curr_dso->adjust_symbols) { + pr_debug2("adjusting symbol: st_value: %Lx sh_addr: " + "%Lx sh_offset: %Lx\n", (u64)sym.st_value, + (u64)shdr.sh_addr, (u64)shdr.sh_offset); + sym.st_value -= shdr.sh_addr - shdr.sh_offset; } /* * We need to figure out if the object was created from C++ sources * DWARF DW_compile_unit has this, but we don't always have access * to it... */ - elf_name = elf_sym__name(&sym, symstrs); demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); if (demangled != NULL) elf_name = demangled; - - f = symbol__new(sym.st_value, sym.st_size, elf_name, - self->sym_priv_size, obj_start, v); +new_symbol: + f = symbol__new(sym.st_value, sym.st_size, elf_name); free(demangled); if (!f) goto out_elf_end; - if (filter && filter(self, f)) - symbol__delete(f, self->sym_priv_size); + if (filter && filter(curr_map, f)) + symbol__delete(f); else { - f->module = mod; - dso__insert_symbol(self, f); + symbols__insert(&curr_dso->symbols[curr_map->type], f); nr++; } } + /* + * For misannotated, zeroed, ASM function sizes. + */ + if (nr > 0) + symbols__fixup_end(&self->symbols[map->type]); err = nr; out_elf_end: elf_end(elf); @@ -658,63 +916,153 @@ out_close: return err; } -#define BUILD_ID_SIZE 128 +static bool dso__build_id_equal(const struct dso *self, u8 *build_id) +{ + return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0; +} -static char *dso__read_build_id(struct dso *self, int v) +static bool __dsos__read_build_ids(struct list_head *head) { - int i; + bool have_build_id = false; + struct dso *pos; + + list_for_each_entry(pos, head, node) + if (filename__read_build_id(pos->long_name, pos->build_id, + sizeof(pos->build_id)) > 0) { + have_build_id = true; + pos->has_build_id = true; + } + + return have_build_id; +} + +bool dsos__read_build_ids(void) +{ + return __dsos__read_build_ids(&dsos__kernel) || + __dsos__read_build_ids(&dsos__user); +} + +/* + * Align offset to 4 bytes as needed for note name and descriptor data. + */ +#define NOTE_ALIGN(n) (((n) + 3) & -4U) + +int filename__read_build_id(const char *filename, void *bf, size_t size) +{ + int fd, err = -1; GElf_Ehdr ehdr; GElf_Shdr shdr; - Elf_Data *build_id_data; + Elf_Data *data; Elf_Scn *sec; - char *build_id = NULL, *bid; - unsigned char *raw; + Elf_Kind ek; + void *ptr; Elf *elf; - int fd = open(self->name, O_RDONLY); + if (size < BUILD_ID_SIZE) + goto out; + + fd = open(filename, O_RDONLY); if (fd < 0) goto out; elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) { - if (v) - fprintf(stderr, "%s: cannot read %s ELF file.\n", - __func__, self->name); + pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); goto out_close; } + ek = elf_kind(elf); + if (ek != ELF_K_ELF) + goto out_elf_end; + if (gelf_getehdr(elf, &ehdr) == NULL) { - if (v) - fprintf(stderr, "%s: cannot get elf header.\n", __func__); + pr_err("%s: cannot get elf header.\n", __func__); goto out_elf_end; } - sec = elf_section_by_name(elf, &ehdr, &shdr, ".note.gnu.build-id", NULL); - if (sec == NULL) - goto out_elf_end; + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note.gnu.build-id", NULL); + if (sec == NULL) { + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".notes", NULL); + if (sec == NULL) + goto out_elf_end; + } - build_id_data = elf_getdata(sec, NULL); - if (build_id_data == NULL) - goto out_elf_end; - build_id = malloc(BUILD_ID_SIZE); - if (build_id == NULL) + data = elf_getdata(sec, NULL); + if (data == NULL) goto out_elf_end; - raw = build_id_data->d_buf + 16; - bid = build_id; - for (i = 0; i < 20; ++i) { - sprintf(bid, "%02x", *raw); - ++raw; - bid += 2; + ptr = data->d_buf; + while (ptr < (data->d_buf + data->d_size)) { + GElf_Nhdr *nhdr = ptr; + int namesz = NOTE_ALIGN(nhdr->n_namesz), + descsz = NOTE_ALIGN(nhdr->n_descsz); + const char *name; + + ptr += sizeof(*nhdr); + name = ptr; + ptr += namesz; + if (nhdr->n_type == NT_GNU_BUILD_ID && + nhdr->n_namesz == sizeof("GNU")) { + if (memcmp(name, "GNU", sizeof("GNU")) == 0) { + memcpy(bf, ptr, BUILD_ID_SIZE); + err = BUILD_ID_SIZE; + break; + } + } + ptr += descsz; } - if (v >= 2) - printf("%s(%s): %s\n", __func__, self->name, build_id); out_elf_end: elf_end(elf); out_close: close(fd); out: - return build_id; + return err; +} + +int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +{ + int fd, err = -1; + + if (size < BUILD_ID_SIZE) + goto out; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + while (1) { + char bf[BUFSIZ]; + GElf_Nhdr nhdr; + int namesz, descsz; + + if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) + break; + + namesz = NOTE_ALIGN(nhdr.n_namesz); + descsz = NOTE_ALIGN(nhdr.n_descsz); + if (nhdr.n_type == NT_GNU_BUILD_ID && + nhdr.n_namesz == sizeof("GNU")) { + if (read(fd, bf, namesz) != namesz) + break; + if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { + if (read(fd, build_id, + BUILD_ID_SIZE) == BUILD_ID_SIZE) { + err = 0; + break; + } + } else if (read(fd, bf, descsz) != descsz) + break; + } else { + int n = namesz + descsz; + if (read(fd, bf, n) != n) + break; + } + } + close(fd); +out: + return err; } char dso__symtab_origin(const struct dso *self) @@ -726,6 +1074,7 @@ char dso__symtab_origin(const struct dso *self) [DSO__ORIG_UBUNTU] = 'u', [DSO__ORIG_BUILDID] = 'b', [DSO__ORIG_DSO] = 'd', + [DSO__ORIG_KMODULE] = 'K', }; if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) @@ -733,20 +1082,27 @@ char dso__symtab_origin(const struct dso *self) return origin[self->origin]; } -int dso__load(struct dso *self, symbol_filter_t filter, int v) +int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) { int size = PATH_MAX; - char *name = malloc(size), *build_id = NULL; + char *name; + u8 build_id[BUILD_ID_SIZE]; int ret = -1; int fd; + dso__set_loaded(self, map->type); + + if (self->kernel) + return dso__load_kernel_sym(self, map, kthread, filter); + + name = malloc(size); if (!name) return -1; self->adjust_symbols = 0; if (strncmp(self->name, "/tmp/perf-", 10) == 0) { - ret = dso__load_perf_map(self, filter, v); + ret = dso__load_perf_map(self, map, filter); self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : DSO__ORIG_NOT_FOUND; return ret; @@ -759,34 +1115,50 @@ more: self->origin++; switch (self->origin) { case DSO__ORIG_FEDORA: - snprintf(name, size, "/usr/lib/debug%s.debug", self->name); + snprintf(name, size, "/usr/lib/debug%s.debug", + self->long_name); break; case DSO__ORIG_UBUNTU: - snprintf(name, size, "/usr/lib/debug%s", self->name); + snprintf(name, size, "/usr/lib/debug%s", + self->long_name); break; case DSO__ORIG_BUILDID: - build_id = dso__read_build_id(self, v); - if (build_id != NULL) { + if (filename__read_build_id(self->long_name, build_id, + sizeof(build_id))) { + char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + + build_id__sprintf(build_id, sizeof(build_id), + build_id_hex); snprintf(name, size, "/usr/lib/debug/.build-id/%.2s/%s.debug", - build_id, build_id + 2); - free(build_id); + build_id_hex, build_id_hex + 2); + if (self->has_build_id) + goto compare_build_id; break; } self->origin++; /* Fall thru */ case DSO__ORIG_DSO: - snprintf(name, size, "%s", self->name); + snprintf(name, size, "%s", self->long_name); break; default: goto out; } + if (self->has_build_id) { + if (filename__read_build_id(name, build_id, + sizeof(build_id)) < 0) + goto more; +compare_build_id: + if (!dso__build_id_equal(self, build_id)) + goto more; + } + fd = open(name, O_RDONLY); } while (fd < 0); - ret = dso__load_sym(self, fd, name, filter, v, NULL); + ret = dso__load_sym(self, map, NULL, name, fd, filter, 0, 0); close(fd); /* @@ -796,7 +1168,7 @@ more: goto more; if (ret > 0) { - int nr_plt = dso__synthesize_plt_symbols(self, v); + int nr_plt = dso__synthesize_plt_symbols(self, map, filter); if (nr_plt > 0) ret += nr_plt; } @@ -807,151 +1179,279 @@ out: return ret; } -static int dso__load_module(struct dso *self, struct mod_dso *mods, const char *name, - symbol_filter_t filter, int v) +static struct map *thread__find_map_by_name(struct thread *self, char *name) { - struct module *mod = mod_dso__find_module(mods, name); - int err = 0, fd; + struct rb_node *nd; - if (mod == NULL || !mod->active) - return err; + for (nd = rb_first(&self->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) { + struct map *map = rb_entry(nd, struct map, rb_node); - fd = open(mod->path, O_RDONLY); + if (map->dso && strcmp(map->dso->name, name) == 0) + return map; + } - if (fd < 0) - return err; + return NULL; +} - err = dso__load_sym(self, fd, name, filter, v, mod); - close(fd); +static int dsos__set_modules_path_dir(char *dirname) +{ + struct dirent *dent; + DIR *dir = opendir(dirname); - return err; + if (!dir) { + pr_debug("%s: cannot open %s dir\n", __func__, dirname); + return -1; + } + + while ((dent = readdir(dir)) != NULL) { + char path[PATH_MAX]; + + if (dent->d_type == DT_DIR) { + if (!strcmp(dent->d_name, ".") || + !strcmp(dent->d_name, "..")) + continue; + + snprintf(path, sizeof(path), "%s/%s", + dirname, dent->d_name); + if (dsos__set_modules_path_dir(path) < 0) + goto failure; + } else { + char *dot = strrchr(dent->d_name, '.'), + dso_name[PATH_MAX]; + struct map *map; + char *long_name; + + if (dot == NULL || strcmp(dot, ".ko")) + continue; + snprintf(dso_name, sizeof(dso_name), "[%.*s]", + (int)(dot - dent->d_name), dent->d_name); + + strxfrchar(dso_name, '-', '_'); + map = thread__find_map_by_name(kthread, dso_name); + if (map == NULL) + continue; + + snprintf(path, sizeof(path), "%s/%s", + dirname, dent->d_name); + + long_name = strdup(path); + if (long_name == NULL) + goto failure; + dso__set_long_name(map->dso, long_name); + } + } + + return 0; +failure: + closedir(dir); + return -1; } -int dso__load_modules(struct dso *self, symbol_filter_t filter, int v) +static int dsos__set_modules_path(void) { - struct mod_dso *mods = mod_dso__new_dso("modules"); - struct module *pos; - struct rb_node *next; - int err, count = 0; + struct utsname uts; + char modules_path[PATH_MAX]; - err = mod_dso__load_modules(mods); - - if (err <= 0) - return err; + if (uname(&uts) < 0) + return -1; - /* - * Iterate over modules, and load active symbols. - */ - next = rb_first(&mods->mods); - while (next) { - pos = rb_entry(next, struct module, rb_node); - err = dso__load_module(self, mods, pos->name, filter, v); + snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel", + uts.release); - if (err < 0) - break; + return dsos__set_modules_path_dir(modules_path); +} - next = rb_next(&pos->rb_node); - count += err; - } +/* + * Constructor variant for modules (where we know from /proc/modules where + * they are loaded) and for vmlinux, where only after we load all the + * symbols we'll know where it starts and ends. + */ +static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) +{ + struct map *self = malloc(sizeof(*self)); - if (err < 0) { - mod_dso__delete_modules(mods); - mod_dso__delete_self(mods); - return err; + if (self != NULL) { + /* + * ->end will be filled after we load all the symbols + */ + map__init(self, type, start, 0, 0, dso); } - return count; + return self; } -static inline void dso__fill_symbol_holes(struct dso *self) +static int thread__create_module_maps(struct thread *self) { - struct symbol *prev = NULL; - struct rb_node *nd; + char *line = NULL; + size_t n; + FILE *file = fopen("/proc/modules", "r"); + struct map *map; - for (nd = rb_last(&self->syms); nd; nd = rb_prev(nd)) { - struct symbol *pos = rb_entry(nd, struct symbol, rb_node); + if (file == NULL) + return -1; - if (prev) { - u64 hole = 0; - int alias = pos->start == prev->start; + while (!feof(file)) { + char name[PATH_MAX]; + u64 start; + struct dso *dso; + char *sep; + int line_len; - if (!alias) - hole = prev->start - pos->end - 1; + line_len = getline(&line, &n, file); + if (line_len < 0) + break; - if (hole || alias) { - if (alias) - pos->end = prev->end; - else if (hole) - pos->end = prev->start - 1; - } + if (!line) + goto out_failure; + + line[--line_len] = '\0'; /* \n */ + + sep = strrchr(line, 'x'); + if (sep == NULL) + continue; + + hex2u64(sep + 1, &start); + + sep = strchr(line, ' '); + if (sep == NULL) + continue; + + *sep = '\0'; + + snprintf(name, sizeof(name), "[%s]", line); + dso = dso__new(name); + + if (dso == NULL) + goto out_delete_line; + + map = map__new2(start, dso, MAP__FUNCTION); + if (map == NULL) { + dso__delete(dso); + goto out_delete_line; } - prev = pos; + + snprintf(name, sizeof(name), + "/sys/module/%s/notes/.note.gnu.build-id", line); + if (sysfs__read_build_id(name, dso->build_id, + sizeof(dso->build_id)) == 0) + dso->has_build_id = true; + + dso->origin = DSO__ORIG_KMODULE; + __thread__insert_map(self, map); + dsos__add(&dsos__kernel, dso); } + + free(line); + fclose(file); + + return dsos__set_modules_path(); + +out_delete_line: + free(line); +out_failure: + return -1; } -static int dso__load_vmlinux(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int v) +static int dso__load_vmlinux(struct dso *self, struct map *map, struct thread *thread, + const char *vmlinux, symbol_filter_t filter) { - int err, fd = open(vmlinux, O_RDONLY); + int err = -1, fd; - if (fd < 0) - return -1; + if (self->has_build_id) { + u8 build_id[BUILD_ID_SIZE]; - err = dso__load_sym(self, fd, vmlinux, filter, v, NULL); + if (filename__read_build_id(vmlinux, build_id, + sizeof(build_id)) < 0) { + pr_debug("No build_id in %s, ignoring it\n", vmlinux); + return -1; + } + if (!dso__build_id_equal(self, build_id)) { + char expected_build_id[BUILD_ID_SIZE * 2 + 1], + vmlinux_build_id[BUILD_ID_SIZE * 2 + 1]; + + build_id__sprintf(self->build_id, + sizeof(self->build_id), + expected_build_id); + build_id__sprintf(build_id, sizeof(build_id), + vmlinux_build_id); + pr_debug("build_id in %s is %s while expected is %s, " + "ignoring it\n", vmlinux, vmlinux_build_id, + expected_build_id); + return -1; + } + } - if (err > 0) - dso__fill_symbol_holes(self); + fd = open(vmlinux, O_RDONLY); + if (fd < 0) + return -1; + dso__set_loaded(self, map->type); + err = dso__load_sym(self, map, thread, self->long_name, fd, filter, 1, 0); close(fd); return err; } -int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int v, int use_modules) +static int dso__load_kernel_sym(struct dso *self, struct map *map, + struct thread *thread, symbol_filter_t filter) { - int err = -1; - - if (vmlinux) { - err = dso__load_vmlinux(self, vmlinux, filter, v); - if (err > 0 && use_modules) { - int syms = dso__load_modules(self, filter, v); - - if (syms < 0) { - fprintf(stderr, "dso__load_modules failed!\n"); - return syms; + int err; + bool is_kallsyms; + + if (vmlinux_path != NULL) { + int i; + pr_debug("Looking at the vmlinux_path (%d entries long)\n", + vmlinux_path__nr_entries); + for (i = 0; i < vmlinux_path__nr_entries; ++i) { + err = dso__load_vmlinux(self, map, thread, + vmlinux_path[i], filter); + if (err > 0) { + pr_debug("Using %s for symbols\n", + vmlinux_path[i]); + dso__set_long_name(self, + strdup(vmlinux_path[i])); + goto out_fixup; } - err += syms; } } - if (err <= 0) - err = dso__load_kallsyms(self, filter, v); + is_kallsyms = self->long_name[0] == '['; + if (is_kallsyms) + goto do_kallsyms; - if (err > 0) - self->origin = DSO__ORIG_KERNEL; + err = dso__load_vmlinux(self, map, thread, self->long_name, filter); + if (err <= 0) { + pr_info("The file %s cannot be used, " + "trying to use /proc/kallsyms...", self->long_name); +do_kallsyms: + err = dso__load_kallsyms(self, map, thread, filter); + if (err > 0 && !is_kallsyms) + dso__set_long_name(self, strdup("[kernel.kallsyms]")); + } + + if (err > 0) { +out_fixup: + map__fixup_start(map); + map__fixup_end(map); + } return err; } -LIST_HEAD(dsos); -struct dso *kernel_dso; -struct dso *vdso; -struct dso *hypervisor_dso; - -const char *vmlinux_name = "vmlinux"; -int modules; +LIST_HEAD(dsos__user); +LIST_HEAD(dsos__kernel); +struct dso *vdso; -static void dsos__add(struct dso *dso) +static void dsos__add(struct list_head *head, struct dso *dso) { - list_add_tail(&dso->node, &dsos); + list_add_tail(&dso->node, head); } -static struct dso *dsos__find(const char *name) +static struct dso *dsos__find(struct list_head *head, const char *name) { struct dso *pos; - list_for_each_entry(pos, &dsos, node) + list_for_each_entry(pos, head, node) if (strcmp(pos->name, name) == 0) return pos; return NULL; @@ -959,79 +1459,170 @@ static struct dso *dsos__find(const char *name) struct dso *dsos__findnew(const char *name) { - struct dso *dso = dsos__find(name); - int nr; - - if (dso) - return dso; - - dso = dso__new(name, 0); - if (!dso) - goto out_delete_dso; + struct dso *dso = dsos__find(&dsos__user, name); - nr = dso__load(dso, NULL, verbose); - if (nr < 0) { - eprintf("Failed to open: %s\n", name); - goto out_delete_dso; + if (!dso) { + dso = dso__new(name); + if (dso != NULL) { + dsos__add(&dsos__user, dso); + dso__set_basename(dso); + } } - if (!nr) - eprintf("No symbols found in: %s, maybe install a debug package?\n", name); - - dsos__add(dso); return dso; +} -out_delete_dso: - dso__delete(dso); - return NULL; +static void __dsos__fprintf(struct list_head *head, FILE *fp) +{ + struct dso *pos; + + list_for_each_entry(pos, head, node) { + int i; + for (i = 0; i < MAP__NR_TYPES; ++i) + dso__fprintf(pos, i, fp); + } } void dsos__fprintf(FILE *fp) { + __dsos__fprintf(&dsos__kernel, fp); + __dsos__fprintf(&dsos__user, fp); +} + +static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp) +{ struct dso *pos; + size_t ret = 0; - list_for_each_entry(pos, &dsos, node) - dso__fprintf(pos, fp); + list_for_each_entry(pos, head, node) { + ret += dso__fprintf_buildid(pos, fp); + ret += fprintf(fp, " %s\n", pos->long_name); + } + return ret; } -static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) +size_t dsos__fprintf_buildid(FILE *fp) { - return dso__find_symbol(dso, ip); + return (__dsos__fprintf_buildid(&dsos__kernel, fp) + + __dsos__fprintf_buildid(&dsos__user, fp)); } -int load_kernel(void) +static int thread__create_kernel_map(struct thread *self, const char *vmlinux) { - int err; + struct map *kmap; + struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]"); - kernel_dso = dso__new("[kernel]", 0); - if (!kernel_dso) + if (kernel == NULL) return -1; - err = dso__load_kernel(kernel_dso, vmlinux_name, NULL, verbose, modules); - if (err <= 0) { - dso__delete(kernel_dso); - kernel_dso = NULL; - } else - dsos__add(kernel_dso); + kmap = map__new2(0, kernel, MAP__FUNCTION); + if (kmap == NULL) + goto out_delete_kernel_dso; - vdso = dso__new("[vdso]", 0); - if (!vdso) - return -1; + kmap->map_ip = kmap->unmap_ip = identity__map_ip; + kernel->short_name = "[kernel]"; + kernel->kernel = 1; - vdso->find_symbol = vdso__find_symbol; + vdso = dso__new("[vdso]"); + if (vdso == NULL) + goto out_delete_kernel_map; + dso__set_loaded(vdso, MAP__FUNCTION); - dsos__add(vdso); + if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id, + sizeof(kernel->build_id)) == 0) + kernel->has_build_id = true; - hypervisor_dso = dso__new("[hypervisor]", 0); - if (!hypervisor_dso) - return -1; - dsos__add(hypervisor_dso); + __thread__insert_map(self, kmap); + dsos__add(&dsos__kernel, kernel); + dsos__add(&dsos__user, vdso); - return err; + return 0; + +out_delete_kernel_map: + map__delete(kmap); +out_delete_kernel_dso: + dso__delete(kernel); + return -1; +} + +static void vmlinux_path__exit(void) +{ + while (--vmlinux_path__nr_entries >= 0) { + free(vmlinux_path[vmlinux_path__nr_entries]); + vmlinux_path[vmlinux_path__nr_entries] = NULL; + } + + free(vmlinux_path); + vmlinux_path = NULL; } +static int vmlinux_path__init(void) +{ + struct utsname uts; + char bf[PATH_MAX]; + + if (uname(&uts) < 0) + return -1; + + vmlinux_path = malloc(sizeof(char *) * 5); + if (vmlinux_path == NULL) + return -1; + + vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux"); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + goto out_fail; + ++vmlinux_path__nr_entries; + vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux"); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + goto out_fail; + ++vmlinux_path__nr_entries; + snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release); + vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + goto out_fail; + ++vmlinux_path__nr_entries; + snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", uts.release); + vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + goto out_fail; + ++vmlinux_path__nr_entries; + snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux", + uts.release); + vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + goto out_fail; + ++vmlinux_path__nr_entries; + + return 0; + +out_fail: + vmlinux_path__exit(); + return -1; +} -void symbol__init(void) +int symbol__init(struct symbol_conf *conf) { + const struct symbol_conf *pconf = conf ?: &symbol_conf__defaults; + elf_version(EV_CURRENT); + symbol__priv_size = pconf->priv_size; + thread__init(kthread, 0); + + if (pconf->try_vmlinux_path && vmlinux_path__init() < 0) + return -1; + + if (thread__create_kernel_map(kthread, pconf->vmlinux_name) < 0) { + vmlinux_path__exit(); + return -1; + } + + kthread->use_modules = pconf->use_modules; + if (pconf->use_modules && thread__create_module_maps(kthread) < 0) + pr_debug("Failed to load list of modules in use, " + "continuing...\n"); + /* + * Now that we have all the maps created, just set the ->end of them: + */ + thread__fixup_maps_end(kthread); + return 0; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 829da9edba6..17003efa0b3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -1,11 +1,11 @@ -#ifndef _PERF_SYMBOL_ -#define _PERF_SYMBOL_ 1 +#ifndef __PERF_SYMBOL +#define __PERF_SYMBOL 1 #include <linux/types.h> +#include <stdbool.h> #include "types.h" #include <linux/list.h> #include <linux/rbtree.h> -#include "module.h" #include "event.h" #ifdef HAVE_CPLUS_DEMANGLE @@ -46,57 +46,75 @@ struct symbol { struct rb_node rb_node; u64 start; u64 end; - u64 obj_start; - u64 hist_sum; - u64 *hist; - struct module *module; - void *priv; char name[0]; }; +struct symbol_conf { + unsigned short priv_size; + bool try_vmlinux_path, + use_modules; + const char *vmlinux_name; +}; + +extern unsigned int symbol__priv_size; + +static inline void *symbol__priv(struct symbol *self) +{ + return ((void *)self) - symbol__priv_size; +} + +struct addr_location { + struct thread *thread; + struct map *map; + struct symbol *sym; + u64 addr; + char level; +}; + struct dso { struct list_head node; - struct rb_root syms; - struct symbol *(*find_symbol)(struct dso *, u64 ip); - unsigned int sym_priv_size; - unsigned char adjust_symbols; - unsigned char slen_calculated; + struct rb_root symbols[MAP__NR_TYPES]; + struct symbol *(*find_symbol)(struct dso *self, + enum map_type type, u64 addr); + u8 adjust_symbols:1; + u8 slen_calculated:1; + u8 has_build_id:1; + u8 kernel:1; unsigned char origin; + u8 loaded; + u8 build_id[BUILD_ID_SIZE]; + u16 long_name_len; + const char *short_name; + char *long_name; char name[0]; }; -extern const char *sym_hist_filter; - -typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym); - -struct dso *dso__new(const char *name, unsigned int sym_priv_size); +struct dso *dso__new(const char *name); void dso__delete(struct dso *self); -static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) -{ - return ((void *)sym) - self->sym_priv_size; -} - -struct symbol *dso__find_symbol(struct dso *self, u64 ip); +bool dso__loaded(const struct dso *self, enum map_type type); -int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose, int modules); -int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose); -int dso__load(struct dso *self, symbol_filter_t filter, int verbose); struct dso *dsos__findnew(const char *name); +int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); void dsos__fprintf(FILE *fp); +size_t dsos__fprintf_buildid(FILE *fp); -size_t dso__fprintf(struct dso *self, FILE *fp); +size_t dso__fprintf_buildid(struct dso *self, FILE *fp); +size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); char dso__symtab_origin(const struct dso *self); +void dso__set_build_id(struct dso *self, void *build_id); + +int filename__read_build_id(const char *filename, void *bf, size_t size); +int sysfs__read_build_id(const char *filename, void *bf, size_t size); +bool dsos__read_build_ids(void); +int build_id__sprintf(u8 *self, int len, char *bf); -int load_kernel(void); +size_t kernel_maps__fprintf(FILE *fp); -void symbol__init(void); +int symbol__init(struct symbol_conf *conf); -extern struct list_head dsos; -extern struct dso *kernel_dso; +struct thread; +struct thread *kthread; +extern struct list_head dsos__user, dsos__kernel; extern struct dso *vdso; -extern struct dso *hypervisor_dso; -extern const char *vmlinux_name; -extern int modules; -#endif /* _PERF_SYMBOL_ */ +#endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 45efb5db0d1..603f5610861 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -6,16 +6,29 @@ #include "util.h" #include "debug.h" +static struct rb_root threads; +static struct thread *last_match; + +void thread__init(struct thread *self, pid_t pid) +{ + int i; + self->pid = pid; + self->comm = NULL; + for (i = 0; i < MAP__NR_TYPES; ++i) { + self->maps[i] = RB_ROOT; + INIT_LIST_HEAD(&self->removed_maps[i]); + } +} + static struct thread *thread__new(pid_t pid) { - struct thread *self = calloc(1, sizeof(*self)); + struct thread *self = zalloc(sizeof(*self)); if (self != NULL) { - self->pid = pid; + thread__init(self, pid); self->comm = malloc(32); if (self->comm) snprintf(self->comm, 32, ":%d", self->pid); - INIT_LIST_HEAD(&self->maps); } return self; @@ -29,21 +42,84 @@ int thread__set_comm(struct thread *self, const char *comm) return self->comm ? 0 : -ENOMEM; } -static size_t thread__fprintf(struct thread *self, FILE *fp) +int thread__comm_len(struct thread *self) +{ + if (!self->comm_len) { + if (!self->comm) + return 0; + self->comm_len = strlen(self->comm); + } + + return self->comm_len; +} + +static const char *map_type__name[MAP__NR_TYPES] = { + [MAP__FUNCTION] = "Functions", +}; + +static size_t __thread__fprintf_maps(struct thread *self, + enum map_type type, FILE *fp) +{ + size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); + struct rb_node *nd; + + for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) { + struct map *pos = rb_entry(nd, struct map, rb_node); + printed += fprintf(fp, "Map:"); + printed += map__fprintf(pos, fp); + if (verbose > 1) { + printed += dso__fprintf(pos->dso, type, fp); + printed += fprintf(fp, "--\n"); + } + } + + return printed; +} + +size_t thread__fprintf_maps(struct thread *self, FILE *fp) +{ + size_t printed = 0, i; + for (i = 0; i < MAP__NR_TYPES; ++i) + printed += __thread__fprintf_maps(self, i, fp); + return printed; +} + +static size_t __thread__fprintf_removed_maps(struct thread *self, + enum map_type type, FILE *fp) { struct map *pos; - size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); + size_t printed = 0; + + list_for_each_entry(pos, &self->removed_maps[type], node) { + printed += fprintf(fp, "Map:"); + printed += map__fprintf(pos, fp); + if (verbose > 1) { + printed += dso__fprintf(pos->dso, type, fp); + printed += fprintf(fp, "--\n"); + } + } + return printed; +} - list_for_each_entry(pos, &self->maps, node) - ret += map__fprintf(pos, fp); +static size_t thread__fprintf_removed_maps(struct thread *self, FILE *fp) +{ + size_t printed = 0, i; + for (i = 0; i < MAP__NR_TYPES; ++i) + printed += __thread__fprintf_removed_maps(self, i, fp); + return printed; +} - return ret; +static size_t thread__fprintf(struct thread *self, FILE *fp) +{ + size_t printed = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); + printed += thread__fprintf_removed_maps(self, fp); + printed += fprintf(fp, "Removed maps:\n"); + return printed + thread__fprintf_removed_maps(self, fp); } -struct thread * -threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) +struct thread *threads__findnew(pid_t pid) { - struct rb_node **p = &threads->rb_node; + struct rb_node **p = &threads.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -52,15 +128,15 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) * so most of the time we dont have to look up * the full rbtree: */ - if (*last_match && (*last_match)->pid == pid) - return *last_match; + if (last_match && last_match->pid == pid) + return last_match; while (*p != NULL) { parent = *p; th = rb_entry(parent, struct thread, rb_node); if (th->pid == pid) { - *last_match = th; + last_match = th; return th; } @@ -73,17 +149,16 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) th = thread__new(pid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, threads); - *last_match = th; + rb_insert_color(&th->rb_node, &threads); + last_match = th; } return th; } -struct thread * -register_idle_thread(struct rb_root *threads, struct thread **last_match) +struct thread *register_idle_thread(void) { - struct thread *thread = threads__findnew(0, threads, last_match); + struct thread *thread = threads__findnew(0); if (!thread || thread__set_comm(thread, "swapper")) { fprintf(stderr, "problem inserting idle task.\n"); @@ -93,79 +168,116 @@ register_idle_thread(struct rb_root *threads, struct thread **last_match) return thread; } -void thread__insert_map(struct thread *self, struct map *map) +static void thread__remove_overlappings(struct thread *self, struct map *map) { - struct map *pos, *tmp; + struct rb_root *root = &self->maps[map->type]; + struct rb_node *next = rb_first(root); - list_for_each_entry_safe(pos, tmp, &self->maps, node) { - if (map__overlap(pos, map)) { - if (verbose >= 2) { - printf("overlapping maps:\n"); - map__fprintf(map, stdout); - map__fprintf(pos, stdout); - } + while (next) { + struct map *pos = rb_entry(next, struct map, rb_node); + next = rb_next(&pos->rb_node); - if (map->start <= pos->start && map->end > pos->start) - pos->start = map->end; + if (!map__overlap(pos, map)) + continue; - if (map->end >= pos->end && map->start < pos->end) - pos->end = map->start; + if (verbose >= 2) { + fputs("overlapping maps:\n", stderr); + map__fprintf(map, stderr); + map__fprintf(pos, stderr); + } - if (verbose >= 2) { - printf("after collision:\n"); - map__fprintf(pos, stdout); - } + rb_erase(&pos->rb_node, root); + /* + * We may have references to this map, for instance in some + * hist_entry instances, so just move them to a separate + * list. + */ + list_add_tail(&pos->node, &self->removed_maps[map->type]); + } +} - if (pos->start >= pos->end) { - list_del_init(&pos->node); - free(pos); - } - } +void maps__insert(struct rb_root *maps, struct map *map) +{ + struct rb_node **p = &maps->rb_node; + struct rb_node *parent = NULL; + const u64 ip = map->start; + struct map *m; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node); + if (ip < m->start) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; } - list_add_tail(&map->node, &self->maps); + rb_link_node(&map->rb_node, parent, p); + rb_insert_color(&map->rb_node, maps); } -int thread__fork(struct thread *self, struct thread *parent) +struct map *maps__find(struct rb_root *maps, u64 ip) { - struct map *map; + struct rb_node **p = &maps->rb_node; + struct rb_node *parent = NULL; + struct map *m; - if (self->comm) - free(self->comm); - self->comm = strdup(parent->comm); - if (!self->comm) - return -ENOMEM; + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node); + if (ip < m->start) + p = &(*p)->rb_left; + else if (ip > m->end) + p = &(*p)->rb_right; + else + return m; + } + + return NULL; +} + +void thread__insert_map(struct thread *self, struct map *map) +{ + thread__remove_overlappings(self, map); + maps__insert(&self->maps[map->type], map); +} - list_for_each_entry(map, &parent->maps, node) { +static int thread__clone_maps(struct thread *self, struct thread *parent, + enum map_type type) +{ + struct rb_node *nd; + for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) { + struct map *map = rb_entry(nd, struct map, rb_node); struct map *new = map__clone(map); - if (!new) + if (new == NULL) return -ENOMEM; thread__insert_map(self, new); } - return 0; } -struct map *thread__find_map(struct thread *self, u64 ip) +int thread__fork(struct thread *self, struct thread *parent) { - struct map *pos; + int i; - if (self == NULL) - return NULL; - - list_for_each_entry(pos, &self->maps, node) - if (ip >= pos->start && ip <= pos->end) - return pos; + if (self->comm) + free(self->comm); + self->comm = strdup(parent->comm); + if (!self->comm) + return -ENOMEM; - return NULL; + for (i = 0; i < MAP__NR_TYPES; ++i) + if (thread__clone_maps(self, parent, i) < 0) + return -ENOMEM; + return 0; } -size_t threads__fprintf(FILE *fp, struct rb_root *threads) +size_t threads__fprintf(FILE *fp) { size_t ret = 0; struct rb_node *nd; - for (nd = rb_first(threads); nd; nd = rb_next(nd)) { + for (nd = rb_first(&threads); nd; nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); ret += thread__fprintf(pos, fp); @@ -173,3 +285,15 @@ size_t threads__fprintf(FILE *fp, struct rb_root *threads) return ret; } + +struct symbol *thread__find_symbol(struct thread *self, + enum map_type type, u64 addr, + symbol_filter_t filter) +{ + struct map *map = thread__find_map(self, type, addr); + + if (map != NULL) + return map__find_symbol(map, map->map_ip(map, addr), filter); + + return NULL; +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 32aea3c1c2a..686d6e914d9 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -1,22 +1,56 @@ +#ifndef __PERF_THREAD_H +#define __PERF_THREAD_H + #include <linux/rbtree.h> -#include <linux/list.h> #include <unistd.h> #include "symbol.h" struct thread { struct rb_node rb_node; - struct list_head maps; + struct rb_root maps[MAP__NR_TYPES]; + struct list_head removed_maps[MAP__NR_TYPES]; pid_t pid; + bool use_modules; char shortname[3]; char *comm; + int comm_len; }; +void thread__init(struct thread *self, pid_t pid); int thread__set_comm(struct thread *self, const char *comm); -struct thread * -threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match); -struct thread * -register_idle_thread(struct rb_root *threads, struct thread **last_match); +int thread__comm_len(struct thread *self); +struct thread *threads__findnew(pid_t pid); +struct thread *register_idle_thread(void); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); -struct map *thread__find_map(struct thread *self, u64 ip); -size_t threads__fprintf(FILE *fp, struct rb_root *threads); +size_t thread__fprintf_maps(struct thread *self, FILE *fp); +size_t threads__fprintf(FILE *fp); + +void maps__insert(struct rb_root *maps, struct map *map); +struct map *maps__find(struct rb_root *maps, u64 addr); + +static inline struct map *thread__find_map(struct thread *self, + enum map_type type, u64 addr) +{ + return self ? maps__find(&self->maps[type], addr) : NULL; +} + +static inline void __thread__insert_map(struct thread *self, struct map *map) +{ + maps__insert(&self->maps[map->type], map); +} + +void thread__find_addr_location(struct thread *self, u8 cpumode, + enum map_type type, u64 addr, + struct addr_location *al, + symbol_filter_t filter); +struct symbol *thread__find_symbol(struct thread *self, + enum map_type type, u64 addr, + symbol_filter_t filter); + +static inline struct symbol * +thread__find_function(struct thread *self, u64 addr, symbol_filter_t filter) +{ + return thread__find_symbol(self, MAP__FUNCTION, addr, filter); +} +#endif /* __PERF_THREAD_H */ diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index af4b0573b37..cace3559553 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -33,11 +33,11 @@ #include <ctype.h> #include <errno.h> #include <stdbool.h> +#include <linux/kernel.h> #include "../perf.h" #include "trace-event.h" - #define VERSION "0.5" #define _STR(x) #x @@ -483,27 +483,33 @@ static struct tracepoint_path * get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) { struct tracepoint_path path, *ppath = &path; - int i; + int i, nr_tracepoints = 0; for (i = 0; i < nb_events; i++) { if (pattrs[i].type != PERF_TYPE_TRACEPOINT) continue; + ++nr_tracepoints; ppath->next = tracepoint_id_to_path(pattrs[i].config); if (!ppath->next) die("%s\n", "No memory to alloc tracepoints list"); ppath = ppath->next; } - return path.next; + return nr_tracepoints > 0 ? path.next : NULL; } -void read_tracing_data(struct perf_event_attr *pattrs, int nb_events) + +int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) { char buf[BUFSIZ]; - struct tracepoint_path *tps; + struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events); + + /* + * What? No tracepoints? No sense writing anything here, bail out. + */ + if (tps == NULL) + return -1; - output_fd = open(output_file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); - if (output_fd < 0) - die("creating file '%s'", output_file); + output_fd = fd; buf[0] = 23; buf[1] = 8; @@ -530,11 +536,11 @@ void read_tracing_data(struct perf_event_attr *pattrs, int nb_events) page_size = getpagesize(); write_or_die(&page_size, 4); - tps = get_tracepoints_path(pattrs, nb_events); - read_header_files(); read_ftrace_files(tps); read_event_files(tps); read_proc_kallsyms(); read_ftrace_printk(); + + return 0; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 55c9659a56e..0302405aa2c 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -40,12 +40,19 @@ int header_page_size_size; int header_page_data_offset; int header_page_data_size; +int latency_format; + static char *input_buf; static unsigned long long input_buf_ptr; static unsigned long long input_buf_siz; static int cpus; static int long_size; +static int is_flag_field; +static int is_symbolic_field; + +static struct format_field * +find_any_field(struct event *event, const char *name); static void init_input_buf(char *buf, unsigned long long size) { @@ -284,18 +291,19 @@ void parse_ftrace_printk(char *file, unsigned int size __unused) char *line; char *next = NULL; char *addr_str; - int ret; int i; line = strtok_r(file, "\n", &next); while (line) { + addr_str = strsep(&line, ":"); + if (!line) { + warning("error parsing print strings"); + break; + } item = malloc_or_die(sizeof(*item)); - ret = sscanf(line, "%as : %as", - (float *)(void *)&addr_str, /* workaround gcc warning */ - (float *)(void *)&item->printk); item->addr = strtoull(addr_str, NULL, 16); - free(addr_str); - + /* fmt still has a space, skip it */ + item->printk = strdup(line+1); item->next = list; list = item; line = strtok_r(NULL, "\n", &next); @@ -522,7 +530,10 @@ static enum event_type __read_token(char **tok) last_ch = ch; ch = __read_char(); buf[i++] = ch; - } while (ch != quote_ch && last_ch != '\\'); + /* the '\' '\' will cancel itself */ + if (ch == '\\' && last_ch == '\\') + last_ch = 0; + } while (ch != quote_ch || last_ch == '\\'); /* remove the last quote */ i--; goto out; @@ -610,7 +621,7 @@ static enum event_type read_token_item(char **tok) static int test_type(enum event_type type, enum event_type expect) { if (type != expect) { - die("Error: expected type %d but read %d", + warning("Error: expected type %d but read %d", expect, type); return -1; } @@ -621,13 +632,13 @@ static int test_type_token(enum event_type type, char *token, enum event_type expect, const char *expect_tok) { if (type != expect) { - die("Error: expected type %d but read %d", + warning("Error: expected type %d but read %d", expect, type); return -1; } if (strcmp(token, expect_tok) != 0) { - die("Error: expected '%s' but read '%s'", + warning("Error: expected '%s' but read '%s'", expect_tok, token); return -1; } @@ -665,7 +676,7 @@ static int __read_expected(enum event_type expect, const char *str, int newline_ free_token(token); - return 0; + return ret; } static int read_expected(enum event_type expect, const char *str) @@ -682,10 +693,10 @@ static char *event_read_name(void) { char *token; - if (read_expected(EVENT_ITEM, (char *)"name") < 0) + if (read_expected(EVENT_ITEM, "name") < 0) return NULL; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return NULL; if (read_expect_type(EVENT_ITEM, &token) < 0) @@ -703,10 +714,10 @@ static int event_read_id(void) char *token; int id; - if (read_expected_item(EVENT_ITEM, (char *)"ID") < 0) + if (read_expected_item(EVENT_ITEM, "ID") < 0) return -1; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return -1; if (read_expect_type(EVENT_ITEM, &token) < 0) @@ -721,6 +732,24 @@ static int event_read_id(void) return -1; } +static int field_is_string(struct format_field *field) +{ + if ((field->flags & FIELD_IS_ARRAY) && + (!strstr(field->type, "char") || !strstr(field->type, "u8") || + !strstr(field->type, "s8"))) + return 1; + + return 0; +} + +static int field_is_dynamic(struct format_field *field) +{ + if (!strcmp(field->type, "__data_loc")) + return 1; + + return 0; +} + static int event_read_fields(struct event *event, struct format_field **fields) { struct format_field *field = NULL; @@ -738,7 +767,7 @@ static int event_read_fields(struct event *event, struct format_field **fields) count++; - if (test_type_token(type, token, EVENT_ITEM, (char *)"field")) + if (test_type_token(type, token, EVENT_ITEM, "field")) goto fail; free_token(token); @@ -753,7 +782,7 @@ static int event_read_fields(struct event *event, struct format_field **fields) type = read_token(&token); } - if (test_type_token(type, token, EVENT_OP, (char *)":") < 0) + if (test_type_token(type, token, EVENT_OP, ":") < 0) return -1; if (read_expect_type(EVENT_ITEM, &token) < 0) @@ -865,14 +894,20 @@ static int event_read_fields(struct event *event, struct format_field **fields) free(brackets); } - if (test_type_token(type, token, EVENT_OP, (char *)";")) + if (field_is_string(field)) { + field->flags |= FIELD_IS_STRING; + if (field_is_dynamic(field)) + field->flags |= FIELD_IS_DYNAMIC; + } + + if (test_type_token(type, token, EVENT_OP, ";")) goto fail; free_token(token); - if (read_expected(EVENT_ITEM, (char *)"offset") < 0) + if (read_expected(EVENT_ITEM, "offset") < 0) goto fail_expect; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) goto fail_expect; if (read_expect_type(EVENT_ITEM, &token)) @@ -880,13 +915,13 @@ static int event_read_fields(struct event *event, struct format_field **fields) field->offset = strtoul(token, NULL, 0); free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) goto fail_expect; - if (read_expected(EVENT_ITEM, (char *)"size") < 0) + if (read_expected(EVENT_ITEM, "size") < 0) goto fail_expect; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) goto fail_expect; if (read_expect_type(EVENT_ITEM, &token)) @@ -894,11 +929,34 @@ static int event_read_fields(struct event *event, struct format_field **fields) field->size = strtoul(token, NULL, 0); free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) goto fail_expect; - if (read_expect_type(EVENT_NEWLINE, &token) < 0) - goto fail; + type = read_token(&token); + if (type != EVENT_NEWLINE) { + /* newer versions of the kernel have a "signed" type */ + if (test_type_token(type, token, EVENT_ITEM, "signed")) + goto fail; + + free_token(token); + + if (read_expected(EVENT_OP, ":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + + if (strtoul(token, NULL, 0)) + field->flags |= FIELD_IS_SIGNED; + + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + } + free_token(token); *fields = field; @@ -921,10 +979,10 @@ static int event_read_format(struct event *event) char *token; int ret; - if (read_expected_item(EVENT_ITEM, (char *)"format") < 0) + if (read_expected_item(EVENT_ITEM, "format") < 0) return -1; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return -1; if (read_expect_type(EVENT_NEWLINE, &token)) @@ -984,7 +1042,7 @@ process_cond(struct event *event, struct print_arg *top, char **tok) *tok = NULL; type = process_arg(event, left, &token); - if (test_type_token(type, token, EVENT_OP, (char *)":")) + if (test_type_token(type, token, EVENT_OP, ":")) goto out_free; arg->op.op = token; @@ -1004,6 +1062,35 @@ out_free: return EVENT_ERROR; } +static enum event_type +process_array(struct event *event, struct print_arg *top, char **tok) +{ + struct print_arg *arg; + enum event_type type; + char *token = NULL; + + arg = malloc_or_die(sizeof(*arg)); + memset(arg, 0, sizeof(*arg)); + + *tok = NULL; + type = process_arg(event, arg, &token); + if (test_type_token(type, token, EVENT_OP, "]")) + goto out_free; + + top->op.right = arg; + + free_token(token); + type = read_token_item(&token); + *tok = token; + + return type; + +out_free: + free_token(*tok); + free_arg(arg); + return EVENT_ERROR; +} + static int get_op_prio(char *op) { if (!op[1]) { @@ -1128,6 +1215,8 @@ process_op(struct event *event, struct print_arg *arg, char **tok) strcmp(token, "*") == 0 || strcmp(token, "^") == 0 || strcmp(token, "/") == 0 || + strcmp(token, "<") == 0 || + strcmp(token, ">") == 0 || strcmp(token, "==") == 0 || strcmp(token, "!=") == 0) { @@ -1144,17 +1233,46 @@ process_op(struct event *event, struct print_arg *arg, char **tok) right = malloc_or_die(sizeof(*right)); - type = process_arg(event, right, tok); + type = read_token_item(&token); + *tok = token; + + /* could just be a type pointer */ + if ((strcmp(arg->op.op, "*") == 0) && + type == EVENT_DELIM && (strcmp(token, ")") == 0)) { + if (left->type != PRINT_ATOM) + die("bad pointer type"); + left->atom.atom = realloc(left->atom.atom, + sizeof(left->atom.atom) + 3); + strcat(left->atom.atom, " *"); + *arg = *left; + free(arg); + + return type; + } + + type = process_arg_token(event, right, tok, type); arg->op.right = right; + } else if (strcmp(token, "[") == 0) { + + left = malloc_or_die(sizeof(*left)); + *left = *arg; + + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = left; + + arg->op.prio = 0; + type = process_array(event, arg, tok); + } else { - die("unknown op '%s'", token); + warning("unknown op '%s'", token); + event->flags |= EVENT_FL_FAILED; /* the arg is now the left side */ return EVENT_NONE; } - if (type == EVENT_OP) { int prio; @@ -1178,7 +1296,7 @@ process_entry(struct event *event __unused, struct print_arg *arg, char *field; char *token; - if (read_expected(EVENT_OP, (char *)"->") < 0) + if (read_expected(EVENT_OP, "->") < 0) return EVENT_ERROR; if (read_expect_type(EVENT_ITEM, &token) < 0) @@ -1188,6 +1306,16 @@ process_entry(struct event *event __unused, struct print_arg *arg, arg->type = PRINT_FIELD; arg->field.name = field; + if (is_flag_field) { + arg->field.field = find_any_field(event, arg->field.name); + arg->field.field->flags |= FIELD_IS_FLAG; + is_flag_field = 0; + } else if (is_symbolic_field) { + arg->field.field = find_any_field(event, arg->field.name); + arg->field.field->flags |= FIELD_IS_SYMBOLIC; + is_symbolic_field = 0; + } + type = read_token(&token); *tok = token; @@ -1338,14 +1466,14 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok) do { free_token(token); type = read_token_item(&token); - if (test_type_token(type, token, EVENT_OP, (char *)"{")) + if (test_type_token(type, token, EVENT_OP, "{")) break; arg = malloc_or_die(sizeof(*arg)); free_token(token); type = process_arg(event, arg, &token); - if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free; field = malloc_or_die(sizeof(*field)); @@ -1356,7 +1484,7 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok) free_token(token); type = process_arg(event, arg, &token); - if (test_type_token(type, token, EVENT_OP, (char *)"}")) + if (test_type_token(type, token, EVENT_OP, "}")) goto out_free; value = arg_eval(arg); @@ -1391,13 +1519,13 @@ process_flags(struct event *event, struct print_arg *arg, char **tok) memset(arg, 0, sizeof(*arg)); arg->type = PRINT_FLAGS; - if (read_expected_item(EVENT_DELIM, (char *)"(") < 0) + if (read_expected_item(EVENT_DELIM, "(") < 0) return EVENT_ERROR; field = malloc_or_die(sizeof(*field)); type = process_arg(event, field, &token); - if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free; arg->flags.field = field; @@ -1408,11 +1536,11 @@ process_flags(struct event *event, struct print_arg *arg, char **tok) type = read_token_item(&token); } - if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free; type = process_fields(event, &arg->flags.flags, &token); - if (test_type_token(type, token, EVENT_DELIM, (char *)")")) + if (test_type_token(type, token, EVENT_DELIM, ")")) goto out_free; free_token(token); @@ -1434,19 +1562,19 @@ process_symbols(struct event *event, struct print_arg *arg, char **tok) memset(arg, 0, sizeof(*arg)); arg->type = PRINT_SYMBOL; - if (read_expected_item(EVENT_DELIM, (char *)"(") < 0) + if (read_expected_item(EVENT_DELIM, "(") < 0) return EVENT_ERROR; field = malloc_or_die(sizeof(*field)); type = process_arg(event, field, &token); - if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free; arg->symbol.field = field; type = process_fields(event, &arg->symbol.symbols, &token); - if (test_type_token(type, token, EVENT_DELIM, (char *)")")) + if (test_type_token(type, token, EVENT_DELIM, ")")) goto out_free; free_token(token); @@ -1463,7 +1591,6 @@ process_paren(struct event *event, struct print_arg *arg, char **tok) { struct print_arg *item_arg; enum event_type type; - int ptr_cast = 0; char *token; type = process_arg(event, arg, &token); @@ -1471,28 +1598,13 @@ process_paren(struct event *event, struct print_arg *arg, char **tok) if (type == EVENT_ERROR) return EVENT_ERROR; - if (type == EVENT_OP) { - /* handle the ptr casts */ - if (!strcmp(token, "*")) { - /* - * FIXME: should we zapp whitespaces before ')' ? - * (may require a peek_token_item()) - */ - if (__peek_char() == ')') { - ptr_cast = 1; - free_token(token); - type = read_token_item(&token); - } - } - if (!ptr_cast) { - type = process_op(event, arg, &token); + if (type == EVENT_OP) + type = process_op(event, arg, &token); - if (type == EVENT_ERROR) - return EVENT_ERROR; - } - } + if (type == EVENT_ERROR) + return EVENT_ERROR; - if (test_type_token(type, token, EVENT_DELIM, (char *)")")) { + if (test_type_token(type, token, EVENT_DELIM, ")")) { free_token(token); return EVENT_ERROR; } @@ -1516,13 +1628,6 @@ process_paren(struct event *event, struct print_arg *arg, char **tok) item_arg = malloc_or_die(sizeof(*item_arg)); arg->type = PRINT_TYPE; - if (ptr_cast) { - char *old = arg->atom.atom; - - arg->atom.atom = malloc_or_die(strlen(old + 3)); - sprintf(arg->atom.atom, "%s *", old); - free(old); - } arg->typecast.type = arg->atom.atom; arg->typecast.item = item_arg; type = process_arg_token(event, item_arg, &token, type); @@ -1540,7 +1645,7 @@ process_str(struct event *event __unused, struct print_arg *arg, char **tok) enum event_type type; char *token; - if (read_expected(EVENT_DELIM, (char *)"(") < 0) + if (read_expected(EVENT_DELIM, "(") < 0) return EVENT_ERROR; if (read_expect_type(EVENT_ITEM, &token) < 0) @@ -1550,7 +1655,7 @@ process_str(struct event *event __unused, struct print_arg *arg, char **tok) arg->string.string = token; arg->string.offset = -1; - if (read_expected(EVENT_DELIM, (char *)")") < 0) + if (read_expected(EVENT_DELIM, ")") < 0) return EVENT_ERROR; type = read_token(&token); @@ -1578,9 +1683,11 @@ process_arg_token(struct event *event, struct print_arg *arg, type = process_entry(event, arg, &token); } else if (strcmp(token, "__print_flags") == 0) { free_token(token); + is_flag_field = 1; type = process_flags(event, arg, &token); } else if (strcmp(token, "__print_symbolic") == 0) { free_token(token); + is_symbolic_field = 1; type = process_symbols(event, arg, &token); } else if (strcmp(token, "__get_str") == 0) { free_token(token); @@ -1637,12 +1744,18 @@ process_arg_token(struct event *event, struct print_arg *arg, static int event_read_print_args(struct event *event, struct print_arg **list) { - enum event_type type; + enum event_type type = EVENT_ERROR; struct print_arg *arg; char *token; int args = 0; do { + if (type == EVENT_NEWLINE) { + free_token(token); + type = read_token_item(&token); + continue; + } + arg = malloc_or_die(sizeof(*arg)); memset(arg, 0, sizeof(*arg)); @@ -1683,18 +1796,19 @@ static int event_read_print(struct event *event) char *token; int ret; - if (read_expected_item(EVENT_ITEM, (char *)"print") < 0) + if (read_expected_item(EVENT_ITEM, "print") < 0) return -1; - if (read_expected(EVENT_ITEM, (char *)"fmt") < 0) + if (read_expected(EVENT_ITEM, "fmt") < 0) return -1; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return -1; if (read_expect_type(EVENT_DQUOTE, &token) < 0) goto fail; + concat: event->print_fmt.format = token; event->print_fmt.args = NULL; @@ -1704,7 +1818,22 @@ static int event_read_print(struct event *event) if (type == EVENT_NONE) return 0; - if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + /* Handle concatination of print lines */ + if (type == EVENT_DQUOTE) { + char *cat; + + cat = malloc_or_die(strlen(event->print_fmt.format) + + strlen(token) + 1); + strcpy(cat, event->print_fmt.format); + strcat(cat, token); + free_token(token); + free_token(event->print_fmt.format); + event->print_fmt.format = NULL; + token = cat; + goto concat; + } + + if (test_type_token(type, token, EVENT_DELIM, ",")) goto fail; free_token(token); @@ -1713,7 +1842,7 @@ static int event_read_print(struct event *event) if (ret < 0) return -1; - return 0; + return ret; fail: free_token(token); @@ -1759,7 +1888,7 @@ find_any_field(struct event *event, const char *name) return find_field(event, name); } -static unsigned long long read_size(void *ptr, int size) +unsigned long long read_size(void *ptr, int size) { switch (size) { case 1: @@ -1822,37 +1951,67 @@ static int get_common_info(const char *type, int *offset, int *size) return 0; } -int trace_parse_common_type(void *data) +static int __parse_common(void *data, int *size, int *offset, + const char *name) { - static int type_offset; - static int type_size; int ret; - if (!type_size) { - ret = get_common_info("common_type", - &type_offset, - &type_size); + if (!*size) { + ret = get_common_info(name, offset, size); if (ret < 0) return ret; } - return read_size(data + type_offset, type_size); + return read_size(data + *offset, *size); +} + +int trace_parse_common_type(void *data) +{ + static int type_offset; + static int type_size; + + return __parse_common(data, &type_size, &type_offset, + "common_type"); } -static int parse_common_pid(void *data) +int trace_parse_common_pid(void *data) { static int pid_offset; static int pid_size; + + return __parse_common(data, &pid_size, &pid_offset, + "common_pid"); +} + +int parse_common_pc(void *data) +{ + static int pc_offset; + static int pc_size; + + return __parse_common(data, &pc_size, &pc_offset, + "common_preempt_count"); +} + +int parse_common_flags(void *data) +{ + static int flags_offset; + static int flags_size; + + return __parse_common(data, &flags_size, &flags_offset, + "common_flags"); +} + +int parse_common_lock_depth(void *data) +{ + static int ld_offset; + static int ld_size; int ret; - if (!pid_size) { - ret = get_common_info("common_pid", - &pid_offset, - &pid_size); - if (ret < 0) - return ret; - } + ret = __parse_common(data, &ld_size, &ld_offset, + "common_lock_depth"); + if (ret < 0) + return -1; - return read_size(data + pid_offset, pid_size); + return ret; } struct event *trace_find_event(int id) @@ -1866,11 +2025,20 @@ struct event *trace_find_event(int id) return event; } +struct event *trace_find_next_event(struct event *event) +{ + if (!event) + return event_list; + + return event->next; +} + static unsigned long long eval_num_arg(void *data, int size, struct event *event, struct print_arg *arg) { unsigned long long val = 0; unsigned long long left, right; + struct print_arg *larg; switch (arg->type) { case PRINT_NULL: @@ -1897,6 +2065,26 @@ static unsigned long long eval_num_arg(void *data, int size, return 0; break; case PRINT_OP: + if (strcmp(arg->op.op, "[") == 0) { + /* + * Arrays are special, since we don't want + * to read the arg as is. + */ + if (arg->op.left->type != PRINT_FIELD) + goto default_op; /* oops, all bets off */ + larg = arg->op.left; + if (!larg->field.field) { + larg->field.field = + find_any_field(event, larg->field.name); + if (!larg->field.field) + die("field %s not found", larg->field.name); + } + right = eval_num_arg(data, size, event, arg->op.right); + val = read_size(data + larg->field.field->offset + + right * long_size, long_size); + break; + } + default_op: left = eval_num_arg(data, size, event, arg->op.left); right = eval_num_arg(data, size, event, arg->op.right); switch (arg->op.op[0]) { @@ -1947,6 +2135,12 @@ static unsigned long long eval_num_arg(void *data, int size, die("unknown op '%s'", arg->op.op); val = left == right; break; + case '-': + val = left - right; + break; + case '+': + val = left + right; + break; default: die("unknown op '%s'", arg->op.op); } @@ -1978,7 +2172,7 @@ static const struct flag flags[] = { { "HRTIMER_RESTART", 1 }, }; -static unsigned long long eval_flag(const char *flag) +unsigned long long eval_flag(const char *flag) { int i; @@ -2145,8 +2339,9 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc case 'u': case 'x': case 'i': - bptr = (void *)(((unsigned long)bptr + (long_size - 1)) & - ~(long_size - 1)); + /* the pointers are always 4 bytes aligned */ + bptr = (void *)(((unsigned long)bptr + 3) & + ~3); switch (ls) { case 0: case 1: @@ -2270,7 +2465,27 @@ static void pretty_print(void *data, int size, struct event *event) for (; *ptr; ptr++) { ls = 0; - if (*ptr == '%') { + if (*ptr == '\\') { + ptr++; + switch (*ptr) { + case 'n': + printf("\n"); + break; + case 't': + printf("\t"); + break; + case 'r': + printf("\r"); + break; + case '\\': + printf("\\"); + break; + default: + printf("%c", *ptr); + break; + } + + } else if (*ptr == '%') { saveptr = ptr; show_func = 0; cont_process: @@ -2377,6 +2592,41 @@ static inline int log10_cpu(int nb) return 1; } +static void print_lat_fmt(void *data, int size __unused) +{ + unsigned int lat_flags; + unsigned int pc; + int lock_depth; + int hardirq; + int softirq; + + lat_flags = parse_common_flags(data); + pc = parse_common_pc(data); + lock_depth = parse_common_lock_depth(data); + + hardirq = lat_flags & TRACE_FLAG_HARDIRQ; + softirq = lat_flags & TRACE_FLAG_SOFTIRQ; + + printf("%c%c%c", + (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ? + 'X' : '.', + (lat_flags & TRACE_FLAG_NEED_RESCHED) ? + 'N' : '.', + (hardirq && softirq) ? 'H' : + hardirq ? 'h' : softirq ? 's' : '.'); + + if (pc) + printf("%x", pc); + else + printf("."); + + if (lock_depth < 0) + printf("."); + else + printf("%d", lock_depth); +} + /* taken from Linux, written by Frederic Weisbecker */ static void print_graph_cpu(int cpu) { @@ -2452,7 +2702,7 @@ get_return_for_leaf(int cpu, int cur_pid, unsigned long long cur_func, if (!(event->flags & EVENT_FL_ISFUNCRET)) return NULL; - pid = parse_common_pid(next->data); + pid = trace_parse_common_pid(next->data); field = find_field(event, "func"); if (!field) die("function return does not have field func"); @@ -2620,6 +2870,11 @@ pretty_print_func_ent(void *data, int size, struct event *event, printf(" | "); + if (latency_format) { + print_lat_fmt(data, size); + printf(" | "); + } + field = find_field(event, "func"); if (!field) die("function entry does not have func field"); @@ -2663,6 +2918,11 @@ pretty_print_func_ret(void *data, int size __unused, struct event *event, printf(" | "); + if (latency_format) { + print_lat_fmt(data, size); + printf(" | "); + } + field = find_field(event, "rettime"); if (!field) die("can't find rettime in return graph"); @@ -2724,19 +2984,30 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, event = trace_find_event(type); if (!event) { - printf("ug! no event found for type %d\n", type); + warning("ug! no event found for type %d", type); return; } - pid = parse_common_pid(data); + pid = trace_parse_common_pid(data); if (event->flags & (EVENT_FL_ISFUNCENT | EVENT_FL_ISFUNCRET)) return pretty_print_func_graph(data, size, event, cpu, pid, comm, secs, usecs); - printf("%16s-%-5d [%03d] %5lu.%09Lu: %s: ", - comm, pid, cpu, - secs, nsecs, event->name); + if (latency_format) { + printf("%8.8s-%-5d %3d", + comm, pid, cpu); + print_lat_fmt(data, size); + } else + printf("%16s-%-5d [%03d]", comm, pid, cpu); + + printf(" %5lu.%06lu: %s: ", secs, usecs, event->name); + + if (event->flags & EVENT_FL_FAILED) { + printf("EVENT '%s' FAILED TO PARSE\n", + event->name); + return; + } pretty_print(data, size, event); printf("\n"); @@ -2807,46 +3078,71 @@ static void print_args(struct print_arg *args) } } -static void parse_header_field(char *type, +static void parse_header_field(const char *field, int *offset, int *size) { char *token; + int type; - if (read_expected(EVENT_ITEM, (char *)"field") < 0) + if (read_expected(EVENT_ITEM, "field") < 0) return; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return; + /* type */ if (read_expect_type(EVENT_ITEM, &token) < 0) - return; + goto fail; free_token(token); - if (read_expected(EVENT_ITEM, type) < 0) + if (read_expected(EVENT_ITEM, field) < 0) return; - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) return; - if (read_expected(EVENT_ITEM, (char *)"offset") < 0) + if (read_expected(EVENT_ITEM, "offset") < 0) return; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return; if (read_expect_type(EVENT_ITEM, &token) < 0) - return; + goto fail; *offset = atoi(token); free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) return; - if (read_expected(EVENT_ITEM, (char *)"size") < 0) + if (read_expected(EVENT_ITEM, "size") < 0) return; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return; if (read_expect_type(EVENT_ITEM, &token) < 0) - return; + goto fail; *size = atoi(token); free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) - return; - if (read_expect_type(EVENT_NEWLINE, &token) < 0) + if (read_expected(EVENT_OP, ";") < 0) return; + type = read_token(&token); + if (type != EVENT_NEWLINE) { + /* newer versions of the kernel have a "signed" type */ + if (type != EVENT_ITEM) + goto fail; + + if (strcmp(token, "signed") != 0) + goto fail; + + free_token(token); + + if (read_expected(EVENT_OP, ":") < 0) + return; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + return; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + } + fail: free_token(token); } @@ -2854,11 +3150,11 @@ int parse_header_page(char *buf, unsigned long size) { init_input_buf(buf, size); - parse_header_field((char *)"timestamp", &header_page_ts_offset, + parse_header_field("timestamp", &header_page_ts_offset, &header_page_ts_size); - parse_header_field((char *)"commit", &header_page_size_offset, + parse_header_field("commit", &header_page_size_offset, &header_page_size_size); - parse_header_field((char *)"data", &header_page_data_offset, + parse_header_field("data", &header_page_data_offset, &header_page_data_size); return 0; @@ -2909,6 +3205,9 @@ int parse_ftrace_file(char *buf, unsigned long size) if (ret < 0) die("failed to read ftrace event print fmt"); + /* New ftrace handles args */ + if (ret > 0) + return 0; /* * The arguments for ftrace files are parsed by the fields. * Set up the fields as their arguments. @@ -2926,7 +3225,7 @@ int parse_ftrace_file(char *buf, unsigned long size) return 0; } -int parse_event_file(char *buf, unsigned long size, char *system__unused __unused) +int parse_event_file(char *buf, unsigned long size, char *sys) { struct event *event; int ret; @@ -2946,12 +3245,18 @@ int parse_event_file(char *buf, unsigned long size, char *system__unused __unuse die("failed to read event id"); ret = event_read_format(event); - if (ret < 0) - die("failed to read event format"); + if (ret < 0) { + warning("failed to read event format for %s", event->name); + goto event_failed; + } ret = event_read_print(event); - if (ret < 0) - die("failed to read event print fmt"); + if (ret < 0) { + warning("failed to read event print fmt for %s", event->name); + goto event_failed; + } + + event->system = strdup(sys); #define PRINT_ARGS 0 if (PRINT_ARGS && event->print_fmt.args) @@ -2959,6 +3264,12 @@ int parse_event_file(char *buf, unsigned long size, char *system__unused __unuse add_event(event); return 0; + + event_failed: + event->flags |= EVENT_FL_FAILED; + /* still add it even if it failed */ + add_event(event); + return -1; } void parse_set_info(int nr_cpus, int long_sz) diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c new file mode 100644 index 00000000000..51e833fd58c --- /dev/null +++ b/tools/perf/util/trace-event-perl.c @@ -0,0 +1,598 @@ +/* + * trace-event-perl. Feed perf trace events to an embedded Perl interpreter. + * + * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> + +#include "../perf.h" +#include "util.h" +#include "trace-event.h" +#include "trace-event-perl.h" + +void xs_init(pTHX); + +void boot_Perf__Trace__Context(pTHX_ CV *cv); +void boot_DynaLoader(pTHX_ CV *cv); + +void xs_init(pTHX) +{ + const char *file = __FILE__; + dXSUB_SYS; + + newXS("Perf::Trace::Context::bootstrap", boot_Perf__Trace__Context, + file); + newXS("DynaLoader::boot_DynaLoader", boot_DynaLoader, file); +} + +INTERP my_perl; + +#define FTRACE_MAX_EVENT \ + ((1 << (sizeof(unsigned short) * 8)) - 1) + +struct event *events[FTRACE_MAX_EVENT]; + +static struct scripting_context *scripting_context; + +static char *cur_field_name; +static int zero_flag_atom; + +static void define_symbolic_value(const char *ev_name, + const char *field_name, + const char *field_value, + const char *field_str) +{ + unsigned long long value; + dSP; + + value = eval_flag(field_value); + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(newSVpv(ev_name, 0))); + XPUSHs(sv_2mortal(newSVpv(field_name, 0))); + XPUSHs(sv_2mortal(newSVuv(value))); + XPUSHs(sv_2mortal(newSVpv(field_str, 0))); + + PUTBACK; + if (get_cv("main::define_symbolic_value", 0)) + call_pv("main::define_symbolic_value", G_SCALAR); + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void define_symbolic_values(struct print_flag_sym *field, + const char *ev_name, + const char *field_name) +{ + define_symbolic_value(ev_name, field_name, field->value, field->str); + if (field->next) + define_symbolic_values(field->next, ev_name, field_name); +} + +static void define_symbolic_field(const char *ev_name, + const char *field_name) +{ + dSP; + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(newSVpv(ev_name, 0))); + XPUSHs(sv_2mortal(newSVpv(field_name, 0))); + + PUTBACK; + if (get_cv("main::define_symbolic_field", 0)) + call_pv("main::define_symbolic_field", G_SCALAR); + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void define_flag_value(const char *ev_name, + const char *field_name, + const char *field_value, + const char *field_str) +{ + unsigned long long value; + dSP; + + value = eval_flag(field_value); + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(newSVpv(ev_name, 0))); + XPUSHs(sv_2mortal(newSVpv(field_name, 0))); + XPUSHs(sv_2mortal(newSVuv(value))); + XPUSHs(sv_2mortal(newSVpv(field_str, 0))); + + PUTBACK; + if (get_cv("main::define_flag_value", 0)) + call_pv("main::define_flag_value", G_SCALAR); + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void define_flag_values(struct print_flag_sym *field, + const char *ev_name, + const char *field_name) +{ + define_flag_value(ev_name, field_name, field->value, field->str); + if (field->next) + define_flag_values(field->next, ev_name, field_name); +} + +static void define_flag_field(const char *ev_name, + const char *field_name, + const char *delim) +{ + dSP; + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(newSVpv(ev_name, 0))); + XPUSHs(sv_2mortal(newSVpv(field_name, 0))); + XPUSHs(sv_2mortal(newSVpv(delim, 0))); + + PUTBACK; + if (get_cv("main::define_flag_field", 0)) + call_pv("main::define_flag_field", G_SCALAR); + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void define_event_symbols(struct event *event, + const char *ev_name, + struct print_arg *args) +{ + switch (args->type) { + case PRINT_NULL: + break; + case PRINT_ATOM: + define_flag_value(ev_name, cur_field_name, "0", + args->atom.atom); + zero_flag_atom = 0; + break; + case PRINT_FIELD: + if (cur_field_name) + free(cur_field_name); + cur_field_name = strdup(args->field.name); + break; + case PRINT_FLAGS: + define_event_symbols(event, ev_name, args->flags.field); + define_flag_field(ev_name, cur_field_name, args->flags.delim); + define_flag_values(args->flags.flags, ev_name, cur_field_name); + break; + case PRINT_SYMBOL: + define_event_symbols(event, ev_name, args->symbol.field); + define_symbolic_field(ev_name, cur_field_name); + define_symbolic_values(args->symbol.symbols, ev_name, + cur_field_name); + break; + case PRINT_STRING: + break; + case PRINT_TYPE: + define_event_symbols(event, ev_name, args->typecast.item); + break; + case PRINT_OP: + if (strcmp(args->op.op, ":") == 0) + zero_flag_atom = 1; + define_event_symbols(event, ev_name, args->op.left); + define_event_symbols(event, ev_name, args->op.right); + break; + default: + /* we should warn... */ + return; + } + + if (args->next) + define_event_symbols(event, ev_name, args->next); +} + +static inline struct event *find_cache_event(int type) +{ + static char ev_name[256]; + struct event *event; + + if (events[type]) + return events[type]; + + events[type] = event = trace_find_event(type); + if (!event) + return NULL; + + sprintf(ev_name, "%s::%s", event->system, event->name); + + define_event_symbols(event, ev_name, event->print_fmt.args); + + return event; +} + +int common_pc(struct scripting_context *context) +{ + int pc; + + pc = parse_common_pc(context->event_data); + + return pc; +} + +int common_flags(struct scripting_context *context) +{ + int flags; + + flags = parse_common_flags(context->event_data); + + return flags; +} + +int common_lock_depth(struct scripting_context *context) +{ + int lock_depth; + + lock_depth = parse_common_lock_depth(context->event_data); + + return lock_depth; +} + +static void perl_process_event(int cpu, void *data, + int size __attribute((unused)), + unsigned long long nsecs, char *comm) +{ + struct format_field *field; + static char handler[256]; + unsigned long long val; + unsigned long s, ns; + struct event *event; + int type; + int pid; + + dSP; + + type = trace_parse_common_type(data); + + event = find_cache_event(type); + if (!event) + die("ug! no event found for type %d", type); + + pid = trace_parse_common_pid(data); + + sprintf(handler, "%s::%s", event->system, event->name); + + s = nsecs / NSECS_PER_SEC; + ns = nsecs - s * NSECS_PER_SEC; + + scripting_context->event_data = data; + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(newSVpv(handler, 0))); + XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context)))); + XPUSHs(sv_2mortal(newSVuv(cpu))); + XPUSHs(sv_2mortal(newSVuv(s))); + XPUSHs(sv_2mortal(newSVuv(ns))); + XPUSHs(sv_2mortal(newSViv(pid))); + XPUSHs(sv_2mortal(newSVpv(comm, 0))); + + /* common fields other than pid can be accessed via xsub fns */ + + for (field = event->format.fields; field; field = field->next) { + if (field->flags & FIELD_IS_STRING) { + int offset; + if (field->flags & FIELD_IS_DYNAMIC) { + offset = *(int *)(data + field->offset); + offset &= 0xffff; + } else + offset = field->offset; + XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); + } else { /* FIELD_IS_NUMERIC */ + val = read_size(data + field->offset, field->size); + if (field->flags & FIELD_IS_SIGNED) { + XPUSHs(sv_2mortal(newSViv(val))); + } else { + XPUSHs(sv_2mortal(newSVuv(val))); + } + } + } + + PUTBACK; + + if (get_cv(handler, 0)) + call_pv(handler, G_SCALAR); + else if (get_cv("main::trace_unhandled", 0)) { + XPUSHs(sv_2mortal(newSVpv(handler, 0))); + XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context)))); + XPUSHs(sv_2mortal(newSVuv(cpu))); + XPUSHs(sv_2mortal(newSVuv(nsecs))); + XPUSHs(sv_2mortal(newSViv(pid))); + XPUSHs(sv_2mortal(newSVpv(comm, 0))); + call_pv("main::trace_unhandled", G_SCALAR); + } + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void run_start_sub(void) +{ + dSP; /* access to Perl stack */ + PUSHMARK(SP); + + if (get_cv("main::trace_begin", 0)) + call_pv("main::trace_begin", G_DISCARD | G_NOARGS); +} + +/* + * Start trace script + */ +static int perl_start_script(const char *script) +{ + const char *command_line[2] = { "", NULL }; + + command_line[1] = script; + + my_perl = perl_alloc(); + perl_construct(my_perl); + + if (perl_parse(my_perl, xs_init, 2, (char **)command_line, + (char **)NULL)) + return -1; + + perl_run(my_perl); + if (SvTRUE(ERRSV)) + return -1; + + run_start_sub(); + + fprintf(stderr, "perf trace started with Perl script %s\n\n", script); + + return 0; +} + +/* + * Stop trace script + */ +static int perl_stop_script(void) +{ + dSP; /* access to Perl stack */ + PUSHMARK(SP); + + if (get_cv("main::trace_end", 0)) + call_pv("main::trace_end", G_DISCARD | G_NOARGS); + + perl_destruct(my_perl); + perl_free(my_perl); + + fprintf(stderr, "\nperf trace Perl script stopped\n"); + + return 0; +} + +static int perl_generate_script(const char *outfile) +{ + struct event *event = NULL; + struct format_field *f; + char fname[PATH_MAX]; + int not_first, count; + FILE *ofp; + + sprintf(fname, "%s.pl", outfile); + ofp = fopen(fname, "w"); + if (ofp == NULL) { + fprintf(stderr, "couldn't open %s\n", fname); + return -1; + } + + fprintf(ofp, "# perf trace event handlers, " + "generated by perf trace -g perl\n"); + + fprintf(ofp, "# Licensed under the terms of the GNU GPL" + " License version 2\n\n"); + + fprintf(ofp, "# The common_* event handler fields are the most useful " + "fields common to\n"); + + fprintf(ofp, "# all events. They don't necessarily correspond to " + "the 'common_*' fields\n"); + + fprintf(ofp, "# in the format files. Those fields not available as " + "handler params can\n"); + + fprintf(ofp, "# be retrieved using Perl functions of the form " + "common_*($context).\n"); + + fprintf(ofp, "# See Context.pm for the list of available " + "functions.\n\n"); + + fprintf(ofp, "use lib \"$ENV{'PERF_EXEC_PATH'}/scripts/perl/" + "Perf-Trace-Util/lib\";\n"); + + fprintf(ofp, "use lib \"./Perf-Trace-Util/lib\";\n"); + fprintf(ofp, "use Perf::Trace::Core;\n"); + fprintf(ofp, "use Perf::Trace::Context;\n"); + fprintf(ofp, "use Perf::Trace::Util;\n\n"); + + fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n"); + fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n"); + + while ((event = trace_find_next_event(event))) { + fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name); + fprintf(ofp, "\tmy ("); + + fprintf(ofp, "$event_name, "); + fprintf(ofp, "$context, "); + fprintf(ofp, "$common_cpu, "); + fprintf(ofp, "$common_secs, "); + fprintf(ofp, "$common_nsecs,\n"); + fprintf(ofp, "\t $common_pid, "); + fprintf(ofp, "$common_comm,\n\t "); + + not_first = 0; + count = 0; + + for (f = event->format.fields; f; f = f->next) { + if (not_first++) + fprintf(ofp, ", "); + if (++count % 5 == 0) + fprintf(ofp, "\n\t "); + + fprintf(ofp, "$%s", f->name); + } + fprintf(ofp, ") = @_;\n\n"); + + fprintf(ofp, "\tprint_header($event_name, $common_cpu, " + "$common_secs, $common_nsecs,\n\t " + "$common_pid, $common_comm);\n\n"); + + fprintf(ofp, "\tprintf(\""); + + not_first = 0; + count = 0; + + for (f = event->format.fields; f; f = f->next) { + if (not_first++) + fprintf(ofp, ", "); + if (count && count % 4 == 0) { + fprintf(ofp, "\".\n\t \""); + } + count++; + + fprintf(ofp, "%s=", f->name); + if (f->flags & FIELD_IS_STRING || + f->flags & FIELD_IS_FLAG || + f->flags & FIELD_IS_SYMBOLIC) + fprintf(ofp, "%%s"); + else if (f->flags & FIELD_IS_SIGNED) + fprintf(ofp, "%%d"); + else + fprintf(ofp, "%%u"); + } + + fprintf(ofp, "\\n\",\n\t "); + + not_first = 0; + count = 0; + + for (f = event->format.fields; f; f = f->next) { + if (not_first++) + fprintf(ofp, ", "); + + if (++count % 5 == 0) + fprintf(ofp, "\n\t "); + + if (f->flags & FIELD_IS_FLAG) { + if ((count - 1) % 5 != 0) { + fprintf(ofp, "\n\t "); + count = 4; + } + fprintf(ofp, "flag_str(\""); + fprintf(ofp, "%s::%s\", ", event->system, + event->name); + fprintf(ofp, "\"%s\", $%s)", f->name, + f->name); + } else if (f->flags & FIELD_IS_SYMBOLIC) { + if ((count - 1) % 5 != 0) { + fprintf(ofp, "\n\t "); + count = 4; + } + fprintf(ofp, "symbol_str(\""); + fprintf(ofp, "%s::%s\", ", event->system, + event->name); + fprintf(ofp, "\"%s\", $%s)", f->name, + f->name); + } else + fprintf(ofp, "$%s", f->name); + } + + fprintf(ofp, ");\n"); + fprintf(ofp, "}\n\n"); + } + + fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, " + "$common_cpu, $common_secs, $common_nsecs,\n\t " + "$common_pid, $common_comm) = @_;\n\n"); + + fprintf(ofp, "\tprint_header($event_name, $common_cpu, " + "$common_secs, $common_nsecs,\n\t $common_pid, " + "$common_comm);\n}\n\n"); + + fprintf(ofp, "sub print_header\n{\n" + "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n" + "\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t " + "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}"); + + fclose(ofp); + + fprintf(stderr, "generated Perl script: %s\n", fname); + + return 0; +} + +struct scripting_ops perl_scripting_ops = { + .name = "Perl", + .start_script = perl_start_script, + .stop_script = perl_stop_script, + .process_event = perl_process_event, + .generate_script = perl_generate_script, +}; + +#ifdef NO_LIBPERL +void setup_perl_scripting(void) +{ + fprintf(stderr, "Perl scripting not supported." + " Install libperl and rebuild perf to enable it. e.g. " + "apt-get install libperl-dev (ubuntu), yum install " + "perl-ExtUtils-Embed (Fedora), etc.\n"); +} +#else +void setup_perl_scripting(void) +{ + int err; + err = script_spec_register("Perl", &perl_scripting_ops); + if (err) + die("error registering Perl script extension"); + + err = script_spec_register("pl", &perl_scripting_ops); + if (err) + die("error registering pl script extension"); + + scripting_context = malloc(sizeof(struct scripting_context)); +} +#endif diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h new file mode 100644 index 00000000000..8fe0d866fe1 --- /dev/null +++ b/tools/perf/util/trace-event-perl.h @@ -0,0 +1,51 @@ +#ifndef __PERF_TRACE_EVENT_PERL_H +#define __PERF_TRACE_EVENT_PERL_H +#ifdef NO_LIBPERL +typedef int INTERP; +#define dSP +#define ENTER +#define SAVETMPS +#define PUTBACK +#define SPAGAIN +#define FREETMPS +#define LEAVE +#define SP +#define ERRSV +#define G_SCALAR (0) +#define G_DISCARD (0) +#define G_NOARGS (0) +#define PUSHMARK(a) +#define SvTRUE(a) (0) +#define XPUSHs(s) +#define sv_2mortal(a) +#define newSVpv(a,b) +#define newSVuv(a) +#define newSViv(a) +#define get_cv(a,b) (0) +#define call_pv(a,b) (0) +#define perl_alloc() (0) +#define perl_construct(a) (0) +#define perl_parse(a,b,c,d,e) (0) +#define perl_run(a) (0) +#define perl_destruct(a) (0) +#define perl_free(a) (0) +#define pTHX void +#define CV void +#define dXSUB_SYS +#define pTHX_ +static inline void newXS(const char *a, void *b, const char *c) {} +#else +#include <EXTERN.h> +#include <perl.h> +typedef PerlInterpreter * INTERP; +#endif + +struct scripting_context { + void *event_data; +}; + +int common_pc(struct scripting_context *context); +int common_flags(struct scripting_context *context); +int common_lock_depth(struct scripting_context *context); + +#endif /* __PERF_TRACE_EVENT_PERL_H */ diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 1b5c847d2c2..342dfdd43f8 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -458,9 +458,8 @@ struct record *trace_read_data(int cpu) return data; } -void trace_report(void) +void trace_report(int fd) { - const char *input_file = "trace.info"; char buf[BUFSIZ]; char test[] = { 23, 8, 68 }; char *version; @@ -468,17 +467,15 @@ void trace_report(void) int show_funcs = 0; int show_printk = 0; - input_fd = open(input_file, O_RDONLY); - if (input_fd < 0) - die("opening '%s'\n", input_file); + input_fd = fd; read_or_die(buf, 3); if (memcmp(buf, test, 3) != 0) - die("not an trace data file"); + die("no trace data in the file"); read_or_die(buf, 7); if (memcmp(buf, "tracing", 7) != 0) - die("not a trace file (missing tracing)"); + die("not a trace file (missing 'tracing' tag)"); version = read_string(); if (show_version) diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 693f815c942..81698d5e650 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -1,5 +1,5 @@ -#ifndef _TRACE_EVENTS_H -#define _TRACE_EVENTS_H +#ifndef __PERF_TRACE_EVENTS_H +#define __PERF_TRACE_EVENTS_H #include "parse-events.h" @@ -26,6 +26,11 @@ enum { enum format_flags { FIELD_IS_ARRAY = 1, FIELD_IS_POINTER = 2, + FIELD_IS_SIGNED = 4, + FIELD_IS_STRING = 8, + FIELD_IS_DYNAMIC = 16, + FIELD_IS_FLAG = 32, + FIELD_IS_SYMBOLIC = 64, }; struct format_field { @@ -132,15 +137,18 @@ struct event { int flags; struct format format; struct print_fmt print_fmt; + char *system; }; enum { - EVENT_FL_ISFTRACE = 1, - EVENT_FL_ISPRINT = 2, - EVENT_FL_ISBPRINT = 4, - EVENT_FL_ISFUNC = 8, - EVENT_FL_ISFUNCENT = 16, - EVENT_FL_ISFUNCRET = 32, + EVENT_FL_ISFTRACE = 0x01, + EVENT_FL_ISPRINT = 0x02, + EVENT_FL_ISBPRINT = 0x04, + EVENT_FL_ISFUNC = 0x08, + EVENT_FL_ISFUNCENT = 0x10, + EVENT_FL_ISFUNCRET = 0x20, + + EVENT_FL_FAILED = 0x80000000 }; struct record { @@ -154,7 +162,7 @@ struct record *trace_read_data(int cpu); void parse_set_info(int nr_cpus, int long_sz); -void trace_report(void); +void trace_report(int fd); void *malloc_or_die(unsigned int size); @@ -166,7 +174,7 @@ void print_funcs(void); void print_printk(void); int parse_ftrace_file(char *buf, unsigned long size); -int parse_event_file(char *buf, unsigned long size, char *system); +int parse_event_file(char *buf, unsigned long size, char *sys); void print_event(int cpu, void *data, int size, unsigned long long nsecs, char *comm); @@ -233,13 +241,45 @@ extern int header_page_size_size; extern int header_page_data_offset; extern int header_page_data_size; +extern int latency_format; + int parse_header_page(char *buf, unsigned long size); int trace_parse_common_type(void *data); +int trace_parse_common_pid(void *data); +int parse_common_pc(void *data); +int parse_common_flags(void *data); +int parse_common_lock_depth(void *data); struct event *trace_find_event(int id); +struct event *trace_find_next_event(struct event *event); +unsigned long long read_size(void *ptr, int size); unsigned long long raw_field_value(struct event *event, const char *name, void *data); void *raw_field_ptr(struct event *event, const char *name, void *data); +unsigned long long eval_flag(const char *flag); + +int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); + +/* taken from kernel/trace/trace.h */ +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_IRQS_NOSUPPORT = 0x02, + TRACE_FLAG_NEED_RESCHED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, +}; + +struct scripting_ops { + const char *name; + int (*start_script) (const char *); + int (*stop_script) (void); + void (*process_event) (int cpu, void *data, int size, + unsigned long long nsecs, char *comm); + int (*generate_script) (const char *outfile); +}; + +int script_spec_register(const char *spec, struct scripting_ops *ops); -void read_tracing_data(struct perf_event_attr *pattrs, int nb_events); +extern struct scripting_ops perl_scripting_ops; +void setup_perl_scripting(void); -#endif /* _TRACE_EVENTS_H */ +#endif /* __PERF_TRACE_EVENTS_H */ diff --git a/tools/perf/util/types.h b/tools/perf/util/types.h index 5e75f900594..7d6b8331f89 100644 --- a/tools/perf/util/types.h +++ b/tools/perf/util/types.h @@ -1,5 +1,5 @@ -#ifndef _PERF_TYPES_H -#define _PERF_TYPES_H +#ifndef __PERF_TYPES_H +#define __PERF_TYPES_H /* * We define u64 as unsigned long long for every architecture @@ -14,4 +14,4 @@ typedef signed short s16; typedef unsigned char u8; typedef signed char s8; -#endif /* _PERF_TYPES_H */ +#endif /* __PERF_TYPES_H */ diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9de2329dd44..c673d882588 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -84,6 +84,9 @@ #include <iconv.h> #endif +extern const char *graph_line; +extern const char *graph_dotted_line; + /* On most systems <limits.h> would have given us this, but * not on some systems (e.g. GNU/Hurd). */ @@ -134,6 +137,15 @@ extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); +#include "../../../include/linux/stringify.h" + +#define DIE_IF(cnd) \ + do { if (cnd) \ + die(" at (" __FILE__ ":" __stringify(__LINE__) "): " \ + __stringify(cnd) "\n"); \ + } while (0) + + extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); extern int prefixcmp(const char *str, const char *prefix); @@ -278,17 +290,15 @@ static inline char *gitstrchrnul(const char *s, int c) * Wrappers: */ extern char *xstrdup(const char *str); -extern void *xmalloc(size_t size); +extern void *xmalloc(size_t size) __attribute__((weak)); extern void *xmemdupz(const void *data, size_t len); extern char *xstrndup(const char *str, size_t len); -extern void *xrealloc(void *ptr, size_t size); -extern void *xcalloc(size_t nmemb, size_t size); -extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); -extern ssize_t xread(int fd, void *buf, size_t len); -extern ssize_t xwrite(int fd, const void *buf, size_t len); -extern int xdup(int fd); -extern FILE *xfdopen(int fd, const char *mode); -extern int xmkstemp(char *template); +extern void *xrealloc(void *ptr, size_t size) __attribute__((weak)); + +static inline void *zalloc(size_t size) +{ + return calloc(1, size); +} static inline size_t xsize_t(off_t len) { @@ -306,6 +316,7 @@ static inline int has_extension(const char *filename, const char *ext) #undef isascii #undef isspace #undef isdigit +#undef isxdigit #undef isalpha #undef isprint #undef isalnum @@ -323,6 +334,8 @@ extern unsigned char sane_ctype[256]; #define isascii(x) (((x) & ~0x7f) == 0) #define isspace(x) sane_istest(x,GIT_SPACE) #define isdigit(x) sane_istest(x,GIT_DIGIT) +#define isxdigit(x) \ + (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G') #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) #define isprint(x) sane_istest(x,GIT_PRINT) diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h index cadf8cf2a59..2fa967e1a88 100644 --- a/tools/perf/util/values.h +++ b/tools/perf/util/values.h @@ -1,5 +1,5 @@ -#ifndef _PERF_VALUES_H -#define _PERF_VALUES_H +#ifndef __PERF_VALUES_H +#define __PERF_VALUES_H #include "types.h" @@ -24,4 +24,4 @@ void perf_read_values_add_value(struct perf_read_values *values, void perf_read_values_display(FILE *fp, struct perf_read_values *values, int raw); -#endif /* _PERF_VALUES_H */ +#endif /* __PERF_VALUES_H */ diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c index 4574ac28396..bf44ca85d23 100644 --- a/tools/perf/util/wrapper.c +++ b/tools/perf/util/wrapper.c @@ -79,43 +79,12 @@ void *xrealloc(void *ptr, size_t size) return ret; } -void *xcalloc(size_t nmemb, size_t size) -{ - void *ret = calloc(nmemb, size); - if (!ret && (!nmemb || !size)) - ret = calloc(1, 1); - if (!ret) { - release_pack_memory(nmemb * size, -1); - ret = calloc(nmemb, size); - if (!ret && (!nmemb || !size)) - ret = calloc(1, 1); - if (!ret) - die("Out of memory, calloc failed"); - } - return ret; -} - -void *xmmap(void *start, size_t length, - int prot, int flags, int fd, off_t offset) -{ - void *ret = mmap(start, length, prot, flags, fd, offset); - if (ret == MAP_FAILED) { - if (!length) - return NULL; - release_pack_memory(length, fd); - ret = mmap(start, length, prot, flags, fd, offset); - if (ret == MAP_FAILED) - die("Out of memory? mmap failed: %s", strerror(errno)); - } - return ret; -} - /* * xread() is the same a read(), but it automatically restarts read() * operations with a recoverable error (EAGAIN and EINTR). xread() * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. */ -ssize_t xread(int fd, void *buf, size_t len) +static ssize_t xread(int fd, void *buf, size_t len) { ssize_t nr; while (1) { @@ -131,7 +100,7 @@ ssize_t xread(int fd, void *buf, size_t len) * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT * GUARANTEE that "len" bytes is written even if the operation is successful. */ -ssize_t xwrite(int fd, const void *buf, size_t len) +static ssize_t xwrite(int fd, const void *buf, size_t len) { ssize_t nr; while (1) { @@ -179,29 +148,3 @@ ssize_t write_in_full(int fd, const void *buf, size_t count) return total; } - -int xdup(int fd) -{ - int ret = dup(fd); - if (ret < 0) - die("dup failed: %s", strerror(errno)); - return ret; -} - -FILE *xfdopen(int fd, const char *mode) -{ - FILE *stream = fdopen(fd, mode); - if (stream == NULL) - die("Out of memory? fdopen failed: %s", strerror(errno)); - return stream; -} - -int xmkstemp(char *template) -{ - int fd; - - fd = mkstemp(template); - if (fd < 0) - die("Unable to create temporary file: %s", strerror(errno)); - return fd; -} |