diff options
116 files changed, 1828 insertions, 1269 deletions
diff --git a/Documentation/HOWTO b/Documentation/HOWTO index c2371c5a98f..48a3955f05f 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -77,7 +77,8 @@ documentation files are also added which explain how to use the feature. When a kernel change causes the interface that the kernel exposes to userspace to change, it is recommended that you send the information or a patch to the manual pages explaining the change to the manual pages -maintainer at mtk.manpages@gmail.com. +maintainer at mtk.manpages@gmail.com, and CC the list +linux-api@vger.kernel.org. Here is a list of files that are in the kernel source tree that are required reading: diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index da10e071424..21f0795af20 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist @@ -67,6 +67,8 @@ kernel patches. 19: All new userspace interfaces are documented in Documentation/ABI/. See Documentation/ABI/README for more information. + Patches that change userspace interfaces should be CCed to + linux-api@vger.kernel.org. 20: Check that it all passes `make headers_check'. diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt index 0bd32748a46..c6841eee959 100644 --- a/Documentation/kernel-doc-nano-HOWTO.txt +++ b/Documentation/kernel-doc-nano-HOWTO.txt @@ -168,10 +168,10 @@ if ($#ARGV < 0) { mkdir $ARGV[0],0777; $state = 0; while (<STDIN>) { - if (/^\.TH \"[^\"]*\" 4 \"([^\"]*)\"/) { + if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) { if ($state == 1) { close OUT } $state = 1; - $fn = "$ARGV[0]/$1.4"; + $fn = "$ARGV[0]/$1.9"; print STDERR "Creating $fn\n"; open OUT, ">$fn" or die "can't open $fn: $!\n"; print OUT $_; diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index 88bcb876733..9d8eb553884 100644 --- a/Documentation/scheduler/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt @@ -1,151 +1,242 @@ + ============= + CFS Scheduler + ============= -This is the CFS scheduler. - -80% of CFS's design can be summed up in a single sentence: CFS basically -models an "ideal, precise multi-tasking CPU" on real hardware. - -"Ideal multi-tasking CPU" is a (non-existent :-)) CPU that has 100% -physical power and which can run each task at precise equal speed, in -parallel, each at 1/nr_running speed. For example: if there are 2 tasks -running then it runs each at 50% physical power - totally in parallel. - -On real hardware, we can run only a single task at once, so while that -one task runs, the other tasks that are waiting for the CPU are at a -disadvantage - the current task gets an unfair amount of CPU time. In -CFS this fairness imbalance is expressed and tracked via the per-task -p->wait_runtime (nanosec-unit) value. "wait_runtime" is the amount of -time the task should now run on the CPU for it to become completely fair -and balanced. - -( small detail: on 'ideal' hardware, the p->wait_runtime value would - always be zero - no task would ever get 'out of balance' from the - 'ideal' share of CPU time. ) - -CFS's task picking logic is based on this p->wait_runtime value and it -is thus very simple: it always tries to run the task with the largest -p->wait_runtime value. In other words, CFS tries to run the task with -the 'gravest need' for more CPU time. So CFS always tries to split up -CPU time between runnable tasks as close to 'ideal multitasking -hardware' as possible. - -Most of the rest of CFS's design just falls out of this really simple -concept, with a few add-on embellishments like nice levels, -multiprocessing and various algorithm variants to recognize sleepers. - -In practice it works like this: the system runs a task a bit, and when -the task schedules (or a scheduler tick happens) the task's CPU usage is -'accounted for': the (small) time it just spent using the physical CPU -is deducted from p->wait_runtime. [minus the 'fair share' it would have -gotten anyway]. Once p->wait_runtime gets low enough so that another -task becomes the 'leftmost task' of the time-ordered rbtree it maintains -(plus a small amount of 'granularity' distance relative to the leftmost -task so that we do not over-schedule tasks and trash the cache) then the -new leftmost task is picked and the current task is preempted. - -The rq->fair_clock value tracks the 'CPU time a runnable task would have -fairly gotten, had it been runnable during that time'. So by using -rq->fair_clock values we can accurately timestamp and measure the -'expected CPU time' a task should have gotten. All runnable tasks are -sorted in the rbtree by the "rq->fair_clock - p->wait_runtime" key, and -CFS picks the 'leftmost' task and sticks to it. As the system progresses -forwards, newly woken tasks are put into the tree more and more to the -right - slowly but surely giving a chance for every task to become the -'leftmost task' and thus get on the CPU within a deterministic amount of -time. - -Some implementation details: - - - the introduction of Scheduling Classes: an extensible hierarchy of - scheduler modules. These modules encapsulate scheduling policy - details and are handled by the scheduler core without the core - code assuming about them too much. - - - sched_fair.c implements the 'CFS desktop scheduler': it is a - replacement for the vanilla scheduler's SCHED_OTHER interactivity - code. - - I'd like to give credit to Con Kolivas for the general approach here: - he has proven via RSDL/SD that 'fair scheduling' is possible and that - it results in better desktop scheduling. Kudos Con! - - The CFS patch uses a completely different approach and implementation - from RSDL/SD. My goal was to make CFS's interactivity quality exceed - that of RSDL/SD, which is a high standard to meet :-) Testing - feedback is welcome to decide this one way or another. [ and, in any - case, all of SD's logic could be added via a kernel/sched_sd.c module - as well, if Con is interested in such an approach. ] - - CFS's design is quite radical: it does not use runqueues, it uses a - time-ordered rbtree to build a 'timeline' of future task execution, - and thus has no 'array switch' artifacts (by which both the vanilla - scheduler and RSDL/SD are affected). - - CFS uses nanosecond granularity accounting and does not rely on any - jiffies or other HZ detail. Thus the CFS scheduler has no notion of - 'timeslices' and has no heuristics whatsoever. There is only one - central tunable (you have to switch on CONFIG_SCHED_DEBUG): - - /proc/sys/kernel/sched_granularity_ns - - which can be used to tune the scheduler from 'desktop' (low - latencies) to 'server' (good batching) workloads. It defaults to a - setting suitable for desktop workloads. SCHED_BATCH is handled by the - CFS scheduler module too. - - Due to its design, the CFS scheduler is not prone to any of the - 'attacks' that exist today against the heuristics of the stock - scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all - work fine and do not impact interactivity and produce the expected - behavior. - - the CFS scheduler has a much stronger handling of nice levels and - SCHED_BATCH: both types of workloads should be isolated much more - agressively than under the vanilla scheduler. - - ( another detail: due to nanosec accounting and timeline sorting, - sched_yield() support is very simple under CFS, and in fact under - CFS sched_yield() behaves much better than under any other - scheduler i have tested so far. ) - - - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler - way than the vanilla scheduler does. It uses 100 runqueues (for all - 100 RT priority levels, instead of 140 in the vanilla scheduler) - and it needs no expired array. - - - reworked/sanitized SMP load-balancing: the runqueue-walking - assumptions are gone from the load-balancing code now, and - iterators of the scheduling modules are used. The balancing code got - quite a bit simpler as a result. - - -Group scheduler extension to CFS -================================ - -Normally the scheduler operates on individual tasks and strives to provide -fair CPU time to each task. Sometimes, it may be desirable to group tasks -and provide fair CPU time to each such task group. For example, it may -be desirable to first provide fair CPU time to each user on the system -and then to each task belonging to a user. - -CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets -SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such -groups. At present, there are two (mutually exclusive) mechanisms to group -tasks for CPU bandwidth control purpose: - - - Based on user id (CONFIG_FAIR_USER_SCHED) - In this option, tasks are grouped according to their user id. - - Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED) - This options lets the administrator create arbitrary groups - of tasks, using the "cgroup" pseudo filesystem. See - Documentation/cgroups.txt for more information about this - filesystem. -Only one of these options to group tasks can be chosen and not both. +1. OVERVIEW + +CFS stands for "Completely Fair Scheduler," and is the new "desktop" process +scheduler implemented by Ingo Molnar and merged in Linux 2.6.23. It is the +replacement for the previous vanilla scheduler's SCHED_OTHER interactivity +code. + +80% of CFS's design can be summed up in a single sentence: CFS basically models +an "ideal, precise multi-tasking CPU" on real hardware. + +"Ideal multi-tasking CPU" is a (non-existent :-)) CPU that has 100% physical +power and which can run each task at precise equal speed, in parallel, each at +1/nr_running speed. For example: if there are 2 tasks running, then it runs +each at 50% physical power --- i.e., actually in parallel. + +On real hardware, we can run only a single task at once, so we have to +introduce the concept of "virtual runtime." The virtual runtime of a task +specifies when its next timeslice would start execution on the ideal +multi-tasking CPU described above. In practice, the virtual runtime of a task +is its actual runtime normalized to the total number of running tasks. + + + +2. FEW IMPLEMENTATION DETAILS + +In CFS the virtual runtime is expressed and tracked via the per-task +p->se.vruntime (nanosec-unit) value. This way, it's possible to accurately +timestamp and measure the "expected CPU time" a task should have gotten. + +[ small detail: on "ideal" hardware, at any time all tasks would have the same + p->se.vruntime value --- i.e., tasks would execute simultaneously and no task + would ever get "out of balance" from the "ideal" share of CPU time. ] + +CFS's task picking logic is based on this p->se.vruntime value and it is thus +very simple: it always tries to run the task with the smallest p->se.vruntime +value (i.e., the task which executed least so far). CFS always tries to split +up CPU time between runnable tasks as close to "ideal multitasking hardware" as +possible. + +Most of the rest of CFS's design just falls out of this really simple concept, +with a few add-on embellishments like nice levels, multiprocessing and various +algorithm variants to recognize sleepers. + + + +3. THE RBTREE + +CFS's design is quite radical: it does not use the old data structures for the +runqueues, but it uses a time-ordered rbtree to build a "timeline" of future +task execution, and thus has no "array switch" artifacts (by which both the +previous vanilla scheduler and RSDL/SD are affected). + +CFS also maintains the rq->cfs.min_vruntime value, which is a monotonic +increasing value tracking the smallest vruntime among all tasks in the +runqueue. The total amount of work done by the system is tracked using +min_vruntime; that value is used to place newly activated entities on the left +side of the tree as much as possible. + +The total number of running tasks in the runqueue is accounted through the +rq->cfs.load value, which is the sum of the weights of the tasks queued on the +runqueue. + +CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the +p->se.vruntime key (there is a subtraction using rq->cfs.min_vruntime to +account for possible wraparounds). CFS picks the "leftmost" task from this +tree and sticks to it. +As the system progresses forwards, the executed tasks are put into the tree +more and more to the right --- slowly but surely giving a chance for every task +to become the "leftmost task" and thus get on the CPU within a deterministic +amount of time. + +Summing up, CFS works like this: it runs a task a bit, and when the task +schedules (or a scheduler tick happens) the task's CPU usage is "accounted +for": the (small) time it just spent using the physical CPU is added to +p->se.vruntime. Once p->se.vruntime gets high enough so that another task +becomes the "leftmost task" of the time-ordered rbtree it maintains (plus a +small amount of "granularity" distance relative to the leftmost task so that we +do not over-schedule tasks and trash the cache), then the new leftmost task is +picked and the current task is preempted. + + + +4. SOME FEATURES OF CFS + +CFS uses nanosecond granularity accounting and does not rely on any jiffies or +other HZ detail. Thus the CFS scheduler has no notion of "timeslices" in the +way the previous scheduler had, and has no heuristics whatsoever. There is +only one central tunable (you have to switch on CONFIG_SCHED_DEBUG): + + /proc/sys/kernel/sched_granularity_ns + +which can be used to tune the scheduler from "desktop" (i.e., low latencies) to +"server" (i.e., good batching) workloads. It defaults to a setting suitable +for desktop workloads. SCHED_BATCH is handled by the CFS scheduler module too. + +Due to its design, the CFS scheduler is not prone to any of the "attacks" that +exist today against the heuristics of the stock scheduler: fiftyp.c, thud.c, +chew.c, ring-test.c, massive_intr.c all work fine and do not impact +interactivity and produce the expected behavior. + +The CFS scheduler has a much stronger handling of nice levels and SCHED_BATCH +than the previous vanilla scheduler: both types of workloads are isolated much +more aggressively. + +SMP load-balancing has been reworked/sanitized: the runqueue-walking +assumptions are gone from the load-balancing code now, and iterators of the +scheduling modules are used. The balancing code got quite a bit simpler as a +result. + + + +5. Scheduling policies + +CFS implements three scheduling policies: + + - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling + policy that is used for regular tasks. + + - SCHED_BATCH: Does not preempt nearly as often as regular tasks + would, thereby allowing tasks to run longer and make better use of + caches but at the cost of interactivity. This is well suited for + batch jobs. + + - SCHED_IDLE: This is even weaker than nice 19, but its not a true + idle timer scheduler in order to avoid to get into priority + inversion problems which would deadlock the machine. + +SCHED_FIFO/_RR are implemented in sched_rt.c and are as specified by +POSIX. + +The command chrt from util-linux-ng 2.13.1.1 can set all of these except +SCHED_IDLE. -Group scheduler tunables: -When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for -each new user and a "cpu_share" file is added in that directory. + +6. SCHEDULING CLASSES + +The new CFS scheduler has been designed in such a way to introduce "Scheduling +Classes," an extensible hierarchy of scheduler modules. These modules +encapsulate scheduling policy details and are handled by the scheduler core +without the core code assuming too much about them. + +sched_fair.c implements the CFS scheduler described above. + +sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler way than +the previous vanilla scheduler did. It uses 100 runqueues (for all 100 RT +priority levels, instead of 140 in the previous scheduler) and it needs no +expired array. + +Scheduling classes are implemented through the sched_class structure, which +contains hooks to functions that must be called whenever an interesting event +occurs. + +This is the (partial) list of the hooks: + + - enqueue_task(...) + + Called when a task enters a runnable state. + It puts the scheduling entity (task) into the red-black tree and + increments the nr_running variable. + + - dequeue_tree(...) + + When a task is no longer runnable, this function is called to keep the + corresponding scheduling entity out of the red-black tree. It decrements + the nr_running variable. + + - yield_task(...) + + This function is basically just a dequeue followed by an enqueue, unless the + compat_yield sysctl is turned on; in that case, it places the scheduling + entity at the right-most end of the red-black tree. + + - check_preempt_curr(...) + + This function checks if a task that entered the runnable state should + preempt the currently running task. + + - pick_next_task(...) + + This function chooses the most appropriate task eligible to run next. + + - set_curr_task(...) + + This function is called when a task changes its scheduling class or changes + its task group. + + - task_tick(...) + + This function is mostly called from time tick functions; it might lead to + process switch. This drives the running preemption. + + - task_new(...) + + The core scheduler gives the scheduling module an opportunity to manage new + task startup. The CFS scheduling module uses it for group scheduling, while + the scheduling module for a real-time task does not use it. + + + +7. GROUP SCHEDULER EXTENSIONS TO CFS + +Normally, the scheduler operates on individual tasks and strives to provide +fair CPU time to each task. Sometimes, it may be desirable to group tasks and +provide fair CPU time to each such task group. For example, it may be +desirable to first provide fair CPU time to each user on the system and then to +each task belonging to a user. + +CONFIG_GROUP_SCHED strives to achieve exactly that. It lets tasks to be +grouped and divides CPU time fairly among such groups. + +CONFIG_RT_GROUP_SCHED permits to group real-time (i.e., SCHED_FIFO and +SCHED_RR) tasks. + +CONFIG_FAIR_GROUP_SCHED permits to group CFS (i.e., SCHED_NORMAL and +SCHED_BATCH) tasks. + +At present, there are two (mutually exclusive) mechanisms to group tasks for +CPU bandwidth control purposes: + + - Based on user id (CONFIG_USER_SCHED) + + With this option, tasks are grouped according to their user id. + + - Based on "cgroup" pseudo filesystem (CONFIG_CGROUP_SCHED) + + This options needs CONFIG_CGROUPS to be defined, and lets the administrator + create arbitrary groups of tasks, using the "cgroup" pseudo filesystem. See + Documentation/cgroups.txt for more information about this filesystem. + +Only one of these options to group tasks can be chosen and not both. + +When CONFIG_USER_SCHED is defined, a directory is created in sysfs for each new +user and a "cpu_share" file is added in that directory. # cd /sys/kernel/uids # cat 512/cpu_share # Display user 512's CPU share @@ -155,16 +246,14 @@ each new user and a "cpu_share" file is added in that directory. 2048 # -CPU bandwidth between two users are divided in the ratio of their CPU shares. -For ex: if you would like user "root" to get twice the bandwidth of user -"guest", then set the cpu_share for both the users such that "root"'s -cpu_share is twice "guest"'s cpu_share - +CPU bandwidth between two users is divided in the ratio of their CPU shares. +For example: if you would like user "root" to get twice the bandwidth of user +"guest," then set the cpu_share for both the users such that "root"'s cpu_share +is twice "guest"'s cpu_share. -When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created -for each group created using the pseudo filesystem. See example steps -below to create task groups and modify their CPU share using the "cgroups" -pseudo filesystem +When CONFIG_CGROUP_SCHED is defined, a "cpu.shares" file is created for each +group created using the pseudo filesystem. See example steps below to create +task groups and modify their CPU share using the "cgroups" pseudo filesystem. # mkdir /dev/cpuctl # mount -t cgroup -ocpu none /dev/cpuctl diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 89c7f32abf9..53449cb99b1 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -46,7 +46,7 @@ 45 -> Pinnacle PCTV DVB-T (em2870) 46 -> Compro, VideoMate U3 (em2870) [185b:2870] 47 -> KWorld DVB-T 305U (em2880) [eb1a:e305] - 48 -> KWorld DVB-T 310U (em2880) + 48 -> KWorld DVB-T 310U (em2880) [eb1a:e310] 49 -> MSI DigiVox A/D (em2880) [eb1a:e310] 50 -> MSI DigiVox A/D II (em2880) [eb1a:e320] 51 -> Terratec Hybrid XS Secam (em2880) [0ccd:004c] diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 0f03900c48f..9a3e4d797fa 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -190,6 +190,7 @@ pac7311 093a:260f SnakeCam pac7311 093a:2621 PAC731x pac7311 093a:2624 PAC7302 pac7311 093a:2626 Labtec 2200 +pac7311 093a:262a Webcam 300k zc3xx 0ac8:0302 Z-star Vimicro zc0302 vc032x 0ac8:0321 Vimicro generic vc0321 vc032x 0ac8:0323 Vimicro Vc0323 diff --git a/MAINTAINERS b/MAINTAINERS index 3596d178226..8dae4555f10 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1198,9 +1198,7 @@ M: hpa@zytor.com S: Maintained CPUSETS -P: Paul Jackson P: Paul Menage -M: pj@sgi.com M: menage@google.com L: linux-kernel@vger.kernel.org W: http://www.bullopensource.org/cpuset/ @@ -2706,6 +2704,7 @@ MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7 P: Michael Kerrisk M: mtk.manpages@gmail.com W: http://www.kernel.org/doc/man-pages +L: linux-man@vger.kernel.org S: Supported MARVELL LIBERTAS WIRELESS DRIVER @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 27 -EXTRAVERSION = -rc8 +EXTRAVERSION = -rc9 NAME = Rotary Wombat # *DOCUMENTATION* diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 83df541650f..06b6fdab639 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -149,6 +149,9 @@ smp_callin(void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; + /* inform the notifiers about the new cpu */ + notify_cpu_starting(cpuid); + /* Must have completely accurate bogos. */ local_irq_enable(); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index e9842f6767f..e42a749a56d 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -277,6 +277,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) /* * Enable local interrupts. */ + notify_cpu_starting(cpu); local_irq_enable(); local_fiq_enable(); diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 952a24b2f5a..52e16c6436f 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -178,6 +178,7 @@ void __init smp_callin(void) unmask_irq(IPI_INTR_VECT); unmask_irq(TIMER0_INTR_VECT); preempt_disable(); + notify_cpu_starting(cpu); local_irq_enable(); cpu_set(cpu, cpu_online_map); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index d8f05e504fb..1dcbb85fc4e 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -401,6 +401,7 @@ smp_callin (void) spin_lock(&vector_lock); /* Setup the per cpu irq handling data structures */ __setup_vector_irq(cpuid); + notify_cpu_starting(cpuid); cpu_set(cpuid, cpu_online_map); per_cpu(cpu_state, cpuid) = CPU_ONLINE; spin_unlock(&vector_lock); diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index 2c03ac1d005..fc2994811f1 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -498,6 +498,8 @@ static void __init smp_online(void) { int cpu_id = smp_processor_id(); + notify_cpu_starting(cpu_id); + local_irq_enable(); /* Get our bogomips. */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 49896a2a1d7..1e06d233fa8 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -211,6 +211,7 @@ config MIPS_MALTA select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_LITTLE_ENDIAN + select SYS_SUPPORTS_MIPS_CMP if BROKEN # because SYNC_R4K is broken select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_SMARTMIPS help @@ -1403,7 +1404,6 @@ config MIPS_MT_SMTC depends on CPU_MIPS32_R2 #depends on CPU_MIPS64_R2 # once there is hardware ... depends on SYS_SUPPORTS_MULTITHREADING - select GENERIC_CLOCKEVENTS_BROADCAST select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select MIPS_MT @@ -1451,32 +1451,17 @@ config MIPS_VPE_LOADER Includes a loader for loading an elf relocatable object onto another VPE and running it. -config MIPS_MT_SMTC_INSTANT_REPLAY - bool "Low-latency Dispatch of Deferred SMTC IPIs" - depends on MIPS_MT_SMTC && !PREEMPT - default y - help - SMTC pseudo-interrupts between TCs are deferred and queued - if the target TC is interrupt-inhibited (IXMT). In the first - SMTC prototypes, these queued IPIs were serviced on return - to user mode, or on entry into the kernel idle loop. The - INSTANT_REPLAY option dispatches them as part of local_irq_restore() - processing, which adds runtime overhead (hence the option to turn - it off), but ensures that IPIs are handled promptly even under - heavy I/O interrupt load. - config MIPS_MT_SMTC_IM_BACKSTOP bool "Use per-TC register bits as backstop for inhibited IM bits" depends on MIPS_MT_SMTC - default y + default n help To support multiple TC microthreads acting as "CPUs" within a VPE, VPE-wide interrupt mask bits must be specially manipulated during interrupt handling. To support legacy drivers and interrupt controller management code, SMTC has a "backstop" to track and if necessary restore the interrupt mask. This has some performance - impact on interrupt service overhead. Disable it only if you know - what you are doing. + impact on interrupt service overhead. config MIPS_MT_SMTC_IRQAFF bool "Support IRQ affinity API" @@ -1486,10 +1471,8 @@ config MIPS_MT_SMTC_IRQAFF Enables SMP IRQ affinity API (/proc/irq/*/smp_affinity, etc.) for SMTC Linux kernel. Requires platform support, of which an example can be found in the MIPS kernel i8259 and Malta - platform code. It is recommended that MIPS_MT_SMTC_INSTANT_REPLAY - be enabled if MIPS_MT_SMTC_IRQAFF is used. Adds overhead to - interrupt dispatch, and should be used only if you know what - you are doing. + platform code. Adds some overhead to interrupt dispatch, and + should be used only if you know what you are doing. config MIPS_VPE_LOADER_TOM bool "Load VPE program into memory hidden from linux" @@ -1517,6 +1500,18 @@ config MIPS_APSP_KSPD "exit" syscall notifying other kernel modules the SP program is exiting. You probably want to say yes here. +config MIPS_CMP + bool "MIPS CMP framework support" + depends on SYS_SUPPORTS_MIPS_CMP + select SYNC_R4K if BROKEN + select SYS_SUPPORTS_SMP + select SYS_SUPPORTS_SCHED_SMT if SMP + select WEAK_ORDERING + default n + help + This is a placeholder option for the GCMP work. It will need to + be handled differently... + config SB1_PASS_1_WORKAROUNDS bool depends on CPU_SB1_PASS_1 @@ -1693,6 +1688,9 @@ config SMP config SMP_UP bool +config SYS_SUPPORTS_MIPS_CMP + bool + config SYS_SUPPORTS_SMP bool @@ -1740,17 +1738,6 @@ config NR_CPUS performance should round up your number of processors to the next power of two. -config MIPS_CMP - bool "MIPS CMP framework support" - depends on SMP - select SYNC_R4K - select SYS_SUPPORTS_SCHED_SMT - select WEAK_ORDERING - default n - help - This is a placeholder option for the GCMP work. It will need to - be handled differently... - source "kernel/time/Kconfig" # diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 706f9397479..25775cb5400 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -10,6 +10,7 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o obj-$(CONFIG_CEVT_R4K) += cevt-r4k.o +obj-$(CONFIG_MIPS_MT_SMTC) += cevt-smtc.o obj-$(CONFIG_CEVT_DS1287) += cevt-ds1287.o obj-$(CONFIG_CEVT_GT641XX) += cevt-gt641xx.o obj-$(CONFIG_CEVT_SB1250) += cevt-sb1250.o diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 24a2d907aa0..4a4c59f2737 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -12,6 +12,14 @@ #include <asm/smtc_ipi.h> #include <asm/time.h> +#include <asm/cevt-r4k.h> + +/* + * The SMTC Kernel for the 34K, 1004K, et. al. replaces several + * of these routines with SMTC-specific variants. + */ + +#ifndef CONFIG_MIPS_MT_SMTC static int mips_next_event(unsigned long delta, struct clock_event_device *evt) @@ -19,60 +27,27 @@ static int mips_next_event(unsigned long delta, unsigned int cnt; int res; -#ifdef CONFIG_MIPS_MT_SMTC - { - unsigned long flags, vpflags; - local_irq_save(flags); - vpflags = dvpe(); -#endif cnt = read_c0_count(); cnt += delta; write_c0_compare(cnt); res = ((int)(read_c0_count() - cnt) > 0) ? -ETIME : 0; -#ifdef CONFIG_MIPS_MT_SMTC - evpe(vpflags); - local_irq_restore(flags); - } -#endif return res; } -static void mips_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) +#endif /* CONFIG_MIPS_MT_SMTC */ + +void mips_set_clock_mode(enum clock_event_mode mode, + struct clock_event_device *evt) { /* Nothing to do ... */ } -static DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device); -static int cp0_timer_irq_installed; +DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device); +int cp0_timer_irq_installed; -/* - * Timer ack for an R4k-compatible timer of a known frequency. - */ -static void c0_timer_ack(void) -{ - write_c0_compare(read_c0_compare()); -} +#ifndef CONFIG_MIPS_MT_SMTC -/* - * Possibly handle a performance counter interrupt. - * Return true if the timer interrupt should not be checked - */ -static inline int handle_perf_irq(int r2) -{ - /* - * The performance counter overflow interrupt may be shared with the - * timer interrupt (cp0_perfcount_irq < 0). If it is and a - * performance counter has overflowed (perf_irq() == IRQ_HANDLED) - * and we can't reliably determine if a counter interrupt has also - * happened (!r2) then don't check for a timer interrupt. - */ - return (cp0_perfcount_irq < 0) && - perf_irq() == IRQ_HANDLED && - !r2; -} - -static irqreturn_t c0_compare_interrupt(int irq, void *dev_id) +irqreturn_t c0_compare_interrupt(int irq, void *dev_id) { const int r2 = cpu_has_mips_r2; struct clock_event_device *cd; @@ -93,12 +68,8 @@ static irqreturn_t c0_compare_interrupt(int irq, void *dev_id) * interrupt. Being the paranoiacs we are we check anyway. */ if (!r2 || (read_c0_cause() & (1 << 30))) { - c0_timer_ack(); -#ifdef CONFIG_MIPS_MT_SMTC - if (cpu_data[cpu].vpe_id) - goto out; - cpu = 0; -#endif + /* Clear Count/Compare Interrupt */ + write_c0_compare(read_c0_compare()); cd = &per_cpu(mips_clockevent_device, cpu); cd->event_handler(cd); } @@ -107,65 +78,16 @@ out: return IRQ_HANDLED; } -static struct irqaction c0_compare_irqaction = { +#endif /* Not CONFIG_MIPS_MT_SMTC */ + +struct irqaction c0_compare_irqaction = { .handler = c0_compare_interrupt, -#ifdef CONFIG_MIPS_MT_SMTC - .flags = IRQF_DISABLED, -#else .flags = IRQF_DISABLED | IRQF_PERCPU, -#endif .name = "timer", }; -#ifdef CONFIG_MIPS_MT_SMTC -DEFINE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device); - -static void smtc_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ -} - -static void mips_broadcast(cpumask_t mask) -{ - unsigned int cpu; - - for_each_cpu_mask(cpu, mask) - smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0); -} - -static void setup_smtc_dummy_clockevent_device(void) -{ - //uint64_t mips_freq = mips_hpt_^frequency; - unsigned int cpu = smp_processor_id(); - struct clock_event_device *cd; - cd = &per_cpu(smtc_dummy_clockevent_device, cpu); - - cd->name = "SMTC"; - cd->features = CLOCK_EVT_FEAT_DUMMY; - - /* Calculate the min / max delta */ - cd->mult = 0; //div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32); - cd->shift = 0; //32; - cd->max_delta_ns = 0; //clockevent_delta2ns(0x7fffffff, cd); - cd->min_delta_ns = 0; //clockevent_delta2ns(0x30, cd); - - cd->rating = 200; - cd->irq = 17; //-1; -// if (cpu) -// cd->cpumask = CPU_MASK_ALL; // cpumask_of_cpu(cpu); -// else - cd->cpumask = cpumask_of_cpu(cpu); - - cd->set_mode = smtc_set_mode; - - cd->broadcast = mips_broadcast; - - clockevents_register_device(cd); -} -#endif - -static void mips_event_handler(struct clock_event_device *dev) +void mips_event_handler(struct clock_event_device *dev) { } @@ -177,7 +99,23 @@ static int c0_compare_int_pending(void) return (read_c0_cause() >> cp0_compare_irq) & 0x100; } -static int c0_compare_int_usable(void) +/* + * Compare interrupt can be routed and latched outside the core, + * so a single execution hazard barrier may not be enough to give + * it time to clear as seen in the Cause register. 4 time the + * pipeline depth seems reasonably conservative, and empirically + * works better in configurations with high CPU/bus clock ratios. + */ + +#define compare_change_hazard() \ + do { \ + irq_disable_hazard(); \ + irq_disable_hazard(); \ + irq_disable_hazard(); \ + irq_disable_hazard(); \ + } while (0) + +int c0_compare_int_usable(void) { unsigned int delta; unsigned int cnt; @@ -187,7 +125,7 @@ static int c0_compare_int_usable(void) */ if (c0_compare_int_pending()) { write_c0_compare(read_c0_count()); - irq_disable_hazard(); + compare_change_hazard(); if (c0_compare_int_pending()) return 0; } @@ -196,7 +134,7 @@ static int c0_compare_int_usable(void) cnt = read_c0_count(); cnt += delta; write_c0_compare(cnt); - irq_disable_hazard(); + compare_change_hazard(); if ((int)(read_c0_count() - cnt) < 0) break; /* increase delta if the timer was already expired */ @@ -205,11 +143,12 @@ static int c0_compare_int_usable(void) while ((int)(read_c0_count() - cnt) <= 0) ; /* Wait for expiry */ + compare_change_hazard(); if (!c0_compare_int_pending()) return 0; write_c0_compare(read_c0_count()); - irq_disable_hazard(); + compare_change_hazard(); if (c0_compare_int_pending()) return 0; @@ -219,6 +158,8 @@ static int c0_compare_int_usable(void) return 1; } +#ifndef CONFIG_MIPS_MT_SMTC + int __cpuinit mips_clockevent_init(void) { uint64_t mips_freq = mips_hpt_frequency; @@ -229,17 +170,6 @@ int __cpuinit mips_clockevent_init(void) if (!cpu_has_counter || !mips_hpt_frequency) return -ENXIO; -#ifdef CONFIG_MIPS_MT_SMTC - setup_smtc_dummy_clockevent_device(); - - /* - * On SMTC we only register VPE0's compare interrupt as clockevent - * device. - */ - if (cpu) - return 0; -#endif - if (!c0_compare_int_usable()) return -ENXIO; @@ -265,13 +195,9 @@ int __cpuinit mips_clockevent_init(void) cd->rating = 300; cd->irq = irq; -#ifdef CONFIG_MIPS_MT_SMTC - cd->cpumask = CPU_MASK_ALL; -#else cd->cpumask = cpumask_of_cpu(cpu); -#endif cd->set_next_event = mips_next_event; - cd->set_mode = mips_set_mode; + cd->set_mode = mips_set_clock_mode; cd->event_handler = mips_event_handler; clockevents_register_device(cd); @@ -281,12 +207,9 @@ int __cpuinit mips_clockevent_init(void) cp0_timer_irq_installed = 1; -#ifdef CONFIG_MIPS_MT_SMTC -#define CPUCTR_IMASKBIT (0x100 << cp0_compare_irq) - setup_irq_smtc(irq, &c0_compare_irqaction, CPUCTR_IMASKBIT); -#else setup_irq(irq, &c0_compare_irqaction); -#endif return 0; } + +#endif /* Not CONFIG_MIPS_MT_SMTC */ diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c new file mode 100644 index 00000000000..5162fe4b595 --- /dev/null +++ b/arch/mips/kernel/cevt-smtc.c @@ -0,0 +1,321 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2007 MIPS Technologies, Inc. + * Copyright (C) 2007 Ralf Baechle <ralf@linux-mips.org> + * Copyright (C) 2008 Kevin D. Kissell, Paralogos sarl + */ +#include <linux/clockchips.h> +#include <linux/interrupt.h> +#include <linux/percpu.h> + +#include <asm/smtc_ipi.h> +#include <asm/time.h> +#include <asm/cevt-r4k.h> + +/* + * Variant clock event timer support for SMTC on MIPS 34K, 1004K + * or other MIPS MT cores. + * + * Notes on SMTC Support: + * + * SMTC has multiple microthread TCs pretending to be Linux CPUs. + * But there's only one Count/Compare pair per VPE, and Compare + * interrupts are taken opportunisitically by available TCs + * bound to the VPE with the Count register. The new timer + * framework provides for global broadcasts, but we really + * want VPE-level multicasts for best behavior. So instead + * of invoking the high-level clock-event broadcast code, + * this version of SMTC support uses the historical SMTC + * multicast mechanisms "under the hood", appearing to the + * generic clock layer as if the interrupts are per-CPU. + * + * The approach taken here is to maintain a set of NR_CPUS + * virtual timers, and track which "CPU" needs to be alerted + * at each event. + * + * It's unlikely that we'll see a MIPS MT core with more than + * 2 VPEs, but we *know* that we won't need to handle more + * VPEs than we have "CPUs". So NCPUs arrays of NCPUs elements + * is always going to be overkill, but always going to be enough. + */ + +unsigned long smtc_nexttime[NR_CPUS][NR_CPUS]; +static int smtc_nextinvpe[NR_CPUS]; + +/* + * Timestamps stored are absolute values to be programmed + * into Count register. Valid timestamps will never be zero. + * If a Zero Count value is actually calculated, it is converted + * to be a 1, which will introduce 1 or two CPU cycles of error + * roughly once every four billion events, which at 1000 HZ means + * about once every 50 days. If that's actually a problem, one + * could alternate squashing 0 to 1 and to -1. + */ + +#define MAKEVALID(x) (((x) == 0L) ? 1L : (x)) +#define ISVALID(x) ((x) != 0L) + +/* + * Time comparison is subtle, as it's really truncated + * modular arithmetic. + */ + +#define IS_SOONER(a, b, reference) \ + (((a) - (unsigned long)(reference)) < ((b) - (unsigned long)(reference))) + +/* + * CATCHUP_INCREMENT, used when the function falls behind the counter. + * Could be an increasing function instead of a constant; + */ + +#define CATCHUP_INCREMENT 64 + +static int mips_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + unsigned long flags; + unsigned int mtflags; + unsigned long timestamp, reference, previous; + unsigned long nextcomp = 0L; + int vpe = current_cpu_data.vpe_id; + int cpu = smp_processor_id(); + local_irq_save(flags); + mtflags = dmt(); + + /* + * Maintain the per-TC virtual timer + * and program the per-VPE shared Count register + * as appropriate here... + */ + reference = (unsigned long)read_c0_count(); + timestamp = MAKEVALID(reference + delta); + /* + * To really model the clock, we have to catch the case + * where the current next-in-VPE timestamp is the old + * timestamp for the calling CPE, but the new value is + * in fact later. In that case, we have to do a full + * scan and discover the new next-in-VPE CPU id and + * timestamp. + */ + previous = smtc_nexttime[vpe][cpu]; + if (cpu == smtc_nextinvpe[vpe] && ISVALID(previous) + && IS_SOONER(previous, timestamp, reference)) { + int i; + int soonest = cpu; + + /* + * Update timestamp array here, so that new + * value gets considered along with those of + * other virtual CPUs on the VPE. + */ + smtc_nexttime[vpe][cpu] = timestamp; + for_each_online_cpu(i) { + if (ISVALID(smtc_nexttime[vpe][i]) + && IS_SOONER(smtc_nexttime[vpe][i], + smtc_nexttime[vpe][soonest], reference)) { + soonest = i; + } + } + smtc_nextinvpe[vpe] = soonest; + nextcomp = smtc_nexttime[vpe][soonest]; + /* + * Otherwise, we don't have to process the whole array rank, + * we just have to see if the event horizon has gotten closer. + */ + } else { + if (!ISVALID(smtc_nexttime[vpe][smtc_nextinvpe[vpe]]) || + IS_SOONER(timestamp, + smtc_nexttime[vpe][smtc_nextinvpe[vpe]], reference)) { + smtc_nextinvpe[vpe] = cpu; + nextcomp = timestamp; + } + /* + * Since next-in-VPE may me the same as the executing + * virtual CPU, we update the array *after* checking + * its value. + */ + smtc_nexttime[vpe][cpu] = timestamp; + } + + /* + * It may be that, in fact, we don't need to update Compare, + * but if we do, we want to make sure we didn't fall into + * a crack just behind Count. + */ + if (ISVALID(nextcomp)) { + write_c0_compare(nextcomp); + ehb(); + /* + * We never return an error, we just make sure + * that we trigger the handlers as quickly as + * we can if we fell behind. + */ + while ((nextcomp - (unsigned long)read_c0_count()) + > (unsigned long)LONG_MAX) { + nextcomp += CATCHUP_INCREMENT; + write_c0_compare(nextcomp); + ehb(); + } + } + emt(mtflags); + local_irq_restore(flags); + return 0; +} + + +void smtc_distribute_timer(int vpe) +{ + unsigned long flags; + unsigned int mtflags; + int cpu; + struct clock_event_device *cd; + unsigned long nextstamp = 0L; + unsigned long reference; + + +repeat: + for_each_online_cpu(cpu) { + /* + * Find virtual CPUs within the current VPE who have + * unserviced timer requests whose time is now past. + */ + local_irq_save(flags); + mtflags = dmt(); + if (cpu_data[cpu].vpe_id == vpe && + ISVALID(smtc_nexttime[vpe][cpu])) { + reference = (unsigned long)read_c0_count(); + if ((smtc_nexttime[vpe][cpu] - reference) + > (unsigned long)LONG_MAX) { + smtc_nexttime[vpe][cpu] = 0L; + emt(mtflags); + local_irq_restore(flags); + /* + * We don't send IPIs to ourself. + */ + if (cpu != smp_processor_id()) { + smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0); + } else { + cd = &per_cpu(mips_clockevent_device, cpu); + cd->event_handler(cd); + } + } else { + /* Local to VPE but Valid Time not yet reached. */ + if (!ISVALID(nextstamp) || + IS_SOONER(smtc_nexttime[vpe][cpu], nextstamp, + reference)) { + smtc_nextinvpe[vpe] = cpu; + nextstamp = smtc_nexttime[vpe][cpu]; + } + emt(mtflags); + local_irq_restore(flags); + } + } else { + emt(mtflags); + local_irq_restore(flags); + + } + } + /* Reprogram for interrupt at next soonest timestamp for VPE */ + if (ISVALID(nextstamp)) { + write_c0_compare(nextstamp); + ehb(); + if ((nextstamp - (unsigned long)read_c0_count()) + > (unsigned long)LONG_MAX) + goto repeat; + } +} + + +irqreturn_t c0_compare_interrupt(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + + /* If we're running SMTC, we've got MIPS MT and therefore MIPS32R2 */ + handle_perf_irq(1); + + if (read_c0_cause() & (1 << 30)) { + /* Clear Count/Compare Interrupt */ + write_c0_compare(read_c0_compare()); + smtc_distribute_timer(cpu_data[cpu].vpe_id); + } + return IRQ_HANDLED; +} + + +int __cpuinit mips_clockevent_init(void) +{ + uint64_t mips_freq = mips_hpt_frequency; + unsigned int cpu = smp_processor_id(); + struct clock_event_device *cd; + unsigned int irq; + int i; + int j; + + if (!cpu_has_counter || !mips_hpt_frequency) + return -ENXIO; + if (cpu == 0) { + for (i = 0; i < num_possible_cpus(); i++) { + smtc_nextinvpe[i] = 0; + for (j = 0; j < num_possible_cpus(); j++) + smtc_nexttime[i][j] = 0L; + } + /* + * SMTC also can't have the usablility test + * run by secondary TCs once Compare is in use. + */ + if (!c0_compare_int_usable()) + return -ENXIO; + } + + /* + * With vectored interrupts things are getting platform specific. + * get_c0_compare_int is a hook to allow a platform to return the + * interrupt number of it's liking. + */ + irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq; + if (get_c0_compare_int) + irq = get_c0_compare_int(); + + cd = &per_cpu(mips_clockevent_device, cpu); + + cd->name = "MIPS"; + cd->features = CLOCK_EVT_FEAT_ONESHOT; + + /* Calculate the min / max delta */ + cd->mult = div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32); + cd->shift = 32; + cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); + cd->min_delta_ns = clockevent_delta2ns(0x300, cd); + + cd->rating = 300; + cd->irq = irq; + cd->cpumask = cpumask_of_cpu(cpu); + cd->set_next_event = mips_next_event; + cd->set_mode = mips_set_clock_mode; + cd->event_handler = mips_event_handler; + + clockevents_register_device(cd); + + /* + * On SMTC we only want to do the data structure + * initialization and IRQ setup once. + */ + if (cpu) + return 0; + /* + * And we need the hwmask associated with the c0_compare + * vector to be initialized. + */ + irq_hwmask[irq] = (0x100 << cp0_compare_irq); + if (cp0_timer_irq_installed) + return 0; + + cp0_timer_irq_installed = 1; + + setup_irq(irq, &c0_compare_irqaction); + + return 0; +} diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 11c92dc5379..e621fda8ab3 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -54,14 +54,18 @@ extern void r4k_wait(void); * interrupt is requested" restriction in the MIPS32/MIPS64 architecture makes * using this version a gamble. */ -static void r4k_wait_irqoff(void) +void r4k_wait_irqoff(void) { local_irq_disable(); if (!need_resched()) - __asm__(" .set mips3 \n" + __asm__(" .set push \n" + " .set mips3 \n" " wait \n" - " .set mips0 \n"); + " .set pop \n"); local_irq_enable(); + __asm__(" .globl __pastwait \n" + "__pastwait: \n"); + return; } /* diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index e29598ae939..ffa331029e0 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -79,11 +79,6 @@ FEXPORT(syscall_exit) FEXPORT(restore_all) # restore full frame #ifdef CONFIG_MIPS_MT_SMTC -/* Detect and execute deferred IPI "interrupts" */ - LONG_L s0, TI_REGS($28) - LONG_S sp, TI_REGS($28) - jal deferred_smtc_ipi - LONG_S s0, TI_REGS($28) #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP /* Re-arm any temporarily masked interrupts not explicitly "acked" */ mfc0 v0, CP0_TCSTATUS @@ -112,6 +107,11 @@ FEXPORT(restore_all) # restore full frame xor t0, t0, t3 mtc0 t0, CP0_TCCONTEXT #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */ +/* Detect and execute deferred IPI "interrupts" */ + LONG_L s0, TI_REGS($28) + LONG_S sp, TI_REGS($28) + jal deferred_smtc_ipi + LONG_S s0, TI_REGS($28) #endif /* CONFIG_MIPS_MT_SMTC */ .set noat RESTORE_TEMP diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index f886dd7f708..01dcbe38fa0 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -282,8 +282,8 @@ NESTED(except_vec_vi_handler, 0, sp) and t0, a0, t1 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP mfc0 t2, CP0_TCCONTEXT - or t0, t0, t2 - mtc0 t0, CP0_TCCONTEXT + or t2, t0, t2 + mtc0 t2, CP0_TCCONTEXT #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */ xor t1, t1, t0 mtc0 t1, CP0_STATUS diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index 361364501d3..492a0a8d70f 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -22,6 +22,7 @@ #include <asm/irqflags.h> #include <asm/regdef.h> #include <asm/page.h> +#include <asm/pgtable-bits.h> #include <asm/mipsregs.h> #include <asm/stackframe.h> diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index df4d3f2f740..dc9eb72ed9d 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -159,7 +159,7 @@ __setup("fpaff=", fpaff_thresh); /* * FPU Use Factor empirically derived from experiments on 34K */ -#define FPUSEFACTOR 333 +#define FPUSEFACTOR 2000 static __init int mt_fp_affinity_init(void) { diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index ce7684335a4..22fc19bbe87 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -55,7 +55,7 @@ void __noreturn cpu_idle(void) while (1) { tick_nohz_stop_sched_tick(1); while (!need_resched()) { -#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG +#ifdef CONFIG_MIPS_MT_SMTC extern void smtc_idle_loop_hook(void); smtc_idle_loop_hook(); @@ -145,19 +145,18 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, */ p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1); childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); + +#ifdef CONFIG_MIPS_MT_SMTC + /* + * SMTC restores TCStatus after Status, and the CU bits + * are aliased there. + */ + childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1); +#endif clear_tsk_thread_flag(p, TIF_USEDFPU); #ifdef CONFIG_MIPS_MT_FPAFF clear_tsk_thread_flag(p, TIF_FPUBOUND); - - /* - * FPU affinity support is cleaner if we track the - * user-visible CPU affinity from the very beginning. - * The generic cpus_allowed mask will already have - * been copied from the parent before copy_thread - * is invoked. - */ - p->thread.user_cpus_allowed = p->cpus_allowed; #endif /* CONFIG_MIPS_MT_FPAFF */ if (clone_flags & CLONE_SETTLS) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 35234b92b9a..96ffc9c6d19 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -238,7 +238,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case FPC_EIR: { /* implementation / version register */ unsigned int flags; #ifdef CONFIG_MIPS_MT_SMTC - unsigned int irqflags; + unsigned long irqflags; unsigned int mtflags; #endif /* CONFIG_MIPS_MT_SMTC */ diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 4410f172b8a..7b59cfb7e60 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -121,6 +121,8 @@ asmlinkage __cpuinit void start_secondary(void) cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; + notify_cpu_starting(cpu); + mp_ops->smp_finish(); set_cpu_sibling_map(cpu); diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index a516286532a..897fb2b4751 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -1,4 +1,21 @@ -/* Copyright (C) 2004 Mips Technologies, Inc */ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2004 Mips Technologies, Inc + * Copyright (C) 2008 Kevin D. Kissell + */ #include <linux/clockchips.h> #include <linux/kernel.h> @@ -21,7 +38,6 @@ #include <asm/time.h> #include <asm/addrspace.h> #include <asm/smtc.h> -#include <asm/smtc_ipi.h> #include <asm/smtc_proc.h> /* @@ -58,11 +74,6 @@ unsigned long irq_hwmask[NR_IRQS]; asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS]; -/* - * Clock interrupt "latch" buffers, per "CPU" - */ - -static atomic_t ipi_timer_latch[NR_CPUS]; /* * Number of InterProcessor Interrupt (IPI) message buffers to allocate @@ -70,7 +81,7 @@ static atomic_t ipi_timer_latch[NR_CPUS]; #define IPIBUF_PER_CPU 4 -static struct smtc_ipi_q IPIQ[NR_CPUS]; +struct smtc_ipi_q IPIQ[NR_CPUS]; static struct smtc_ipi_q freeIPIq; @@ -282,7 +293,7 @@ static void smtc_configure_tlb(void) * phys_cpu_present_map and the logical/physical mappings. */ -int __init mipsmt_build_cpu_map(int start_cpu_slot) +int __init smtc_build_cpu_map(int start_cpu_slot) { int i, ntcs; @@ -325,7 +336,12 @@ static void smtc_tc_setup(int vpe, int tc, int cpu) write_tc_c0_tcstatus((read_tc_c0_tcstatus() & ~(TCSTATUS_TKSU | TCSTATUS_DA | TCSTATUS_IXMT)) | TCSTATUS_A); - write_tc_c0_tccontext(0); + /* + * TCContext gets an offset from the base of the IPIQ array + * to be used in low-level code to detect the presence of + * an active IPI queue + */ + write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16); /* Bind tc to vpe */ write_tc_c0_tcbind(vpe); /* In general, all TCs should have the same cpu_data indications */ @@ -336,10 +352,18 @@ static void smtc_tc_setup(int vpe, int tc, int cpu) cpu_data[cpu].options &= ~MIPS_CPU_FPU; cpu_data[cpu].vpe_id = vpe; cpu_data[cpu].tc_id = tc; + /* Multi-core SMTC hasn't been tested, but be prepared */ + cpu_data[cpu].core = (read_vpe_c0_ebase() >> 1) & 0xff; } +/* + * Tweak to get Count registes in as close a sync as possible. + * Value seems good for 34K-class cores. + */ + +#define CP0_SKEW 8 -void mipsmt_prepare_cpus(void) +void smtc_prepare_cpus(int cpus) { int i, vpe, tc, ntc, nvpe, tcpervpe[NR_CPUS], slop, cpu; unsigned long flags; @@ -363,13 +387,13 @@ void mipsmt_prepare_cpus(void) IPIQ[i].head = IPIQ[i].tail = NULL; spin_lock_init(&IPIQ[i].lock); IPIQ[i].depth = 0; - atomic_set(&ipi_timer_latch[i], 0); } /* cpu_data index starts at zero */ cpu = 0; cpu_data[cpu].vpe_id = 0; cpu_data[cpu].tc_id = 0; + cpu_data[cpu].core = (read_c0_ebase() >> 1) & 0xff; cpu++; /* Report on boot-time options */ @@ -484,7 +508,8 @@ void mipsmt_prepare_cpus(void) write_vpe_c0_compare(0); /* Propagate Config7 */ write_vpe_c0_config7(read_c0_config7()); - write_vpe_c0_count(read_c0_count()); + write_vpe_c0_count(read_c0_count() + CP0_SKEW); + ehb(); } /* enable multi-threading within VPE */ write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() | VPECONTROL_TE); @@ -556,7 +581,7 @@ void mipsmt_prepare_cpus(void) void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle) { extern u32 kernelsp[NR_CPUS]; - long flags; + unsigned long flags; int mtflags; LOCK_MT_PRA(); @@ -585,24 +610,22 @@ void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle) void smtc_init_secondary(void) { - /* - * Start timer on secondary VPEs if necessary. - * plat_timer_setup has already have been invoked by init/main - * on "boot" TC. Like per_cpu_trap_init() hack, this assumes that - * SMTC init code assigns TCs consdecutively and in ascending order - * to across available VPEs. - */ - if (((read_c0_tcbind() & TCBIND_CURTC) != 0) && - ((read_c0_tcbind() & TCBIND_CURVPE) - != cpu_data[smp_processor_id() - 1].vpe_id)){ - write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ); - } - local_irq_enable(); } void smtc_smp_finish(void) { + int cpu = smp_processor_id(); + + /* + * Lowest-numbered CPU per VPE starts a clock tick. + * Like per_cpu_trap_init() hack, this assumes that + * SMTC init code assigns TCs consdecutively and + * in ascending order across available VPEs. + */ + if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id)) + write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ); + printk("TC %d going on-line as CPU %d\n", cpu_data[smp_processor_id()].tc_id, smp_processor_id()); } @@ -753,8 +776,10 @@ void smtc_send_ipi(int cpu, int type, unsigned int action) { int tcstatus; struct smtc_ipi *pipi; - long flags; + unsigned long flags; int mtflags; + unsigned long tcrestart; + extern void r4k_wait_irqoff(void), __pastwait(void); if (cpu == smp_processor_id()) { printk("Cannot Send IPI to self!\n"); @@ -771,8 +796,6 @@ void smtc_send_ipi(int cpu, int type, unsigned int action) pipi->arg = (void *)action; pipi->dest = cpu; if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) { - if (type == SMTC_CLOCK_TICK) - atomic_inc(&ipi_timer_latch[cpu]); /* If not on same VPE, enqueue and send cross-VPE interrupt */ smtc_ipi_nq(&IPIQ[cpu], pipi); LOCK_CORE_PRA(); @@ -800,22 +823,29 @@ void smtc_send_ipi(int cpu, int type, unsigned int action) if ((tcstatus & TCSTATUS_IXMT) != 0) { /* - * Spin-waiting here can deadlock, - * so we queue the message for the target TC. + * If we're in the the irq-off version of the wait + * loop, we need to force exit from the wait and + * do a direct post of the IPI. + */ + if (cpu_wait == r4k_wait_irqoff) { + tcrestart = read_tc_c0_tcrestart(); + if (tcrestart >= (unsigned long)r4k_wait_irqoff + && tcrestart < (unsigned long)__pastwait) { + write_tc_c0_tcrestart(__pastwait); + tcstatus &= ~TCSTATUS_IXMT; + write_tc_c0_tcstatus(tcstatus); + goto postdirect; + } + } + /* + * Otherwise we queue the message for the target TC + * to pick up when he does a local_irq_restore() */ write_tc_c0_tchalt(0); UNLOCK_CORE_PRA(); - /* Try to reduce redundant timer interrupt messages */ - if (type == SMTC_CLOCK_TICK) { - if (atomic_postincrement(&ipi_timer_latch[cpu])!=0){ - smtc_ipi_nq(&freeIPIq, pipi); - return; - } - } smtc_ipi_nq(&IPIQ[cpu], pipi); } else { - if (type == SMTC_CLOCK_TICK) - atomic_inc(&ipi_timer_latch[cpu]); +postdirect: post_direct_ipi(cpu, pipi); write_tc_c0_tchalt(0); UNLOCK_CORE_PRA(); @@ -883,7 +913,7 @@ static void ipi_call_interrupt(void) smp_call_function_interrupt(); } -DECLARE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device); +DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device); void ipi_decode(struct smtc_ipi *pipi) { @@ -891,20 +921,13 @@ void ipi_decode(struct smtc_ipi *pipi) struct clock_event_device *cd; void *arg_copy = pipi->arg; int type_copy = pipi->type; - int ticks; - smtc_ipi_nq(&freeIPIq, pipi); switch (type_copy) { case SMTC_CLOCK_TICK: irq_enter(); kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++; - cd = &per_cpu(smtc_dummy_clockevent_device, cpu); - ticks = atomic_read(&ipi_timer_latch[cpu]); - atomic_sub(ticks, &ipi_timer_latch[cpu]); - while (ticks) { - cd->event_handler(cd); - ticks--; - } + cd = &per_cpu(mips_clockevent_device, cpu); + cd->event_handler(cd); irq_exit(); break; @@ -937,24 +960,48 @@ void ipi_decode(struct smtc_ipi *pipi) } } +/* + * Similar to smtc_ipi_replay(), but invoked from context restore, + * so it reuses the current exception frame rather than set up a + * new one with self_ipi. + */ + void deferred_smtc_ipi(void) { - struct smtc_ipi *pipi; - unsigned long flags; -/* DEBUG */ - int q = smp_processor_id(); + int cpu = smp_processor_id(); /* * Test is not atomic, but much faster than a dequeue, * and the vast majority of invocations will have a null queue. + * If irq_disabled when this was called, then any IPIs queued + * after we test last will be taken on the next irq_enable/restore. + * If interrupts were enabled, then any IPIs added after the + * last test will be taken directly. */ - if (IPIQ[q].head != NULL) { - while((pipi = smtc_ipi_dq(&IPIQ[q])) != NULL) { - /* ipi_decode() should be called with interrupts off */ - local_irq_save(flags); + + while (IPIQ[cpu].head != NULL) { + struct smtc_ipi_q *q = &IPIQ[cpu]; + struct smtc_ipi *pipi; + unsigned long flags; + + /* + * It may be possible we'll come in with interrupts + * already enabled. + */ + local_irq_save(flags); + + spin_lock(&q->lock); + pipi = __smtc_ipi_dq(q); + spin_unlock(&q->lock); + if (pipi != NULL) ipi_decode(pipi); - local_irq_restore(flags); - } + /* + * The use of the __raw_local restore isn't + * as obviously necessary here as in smtc_ipi_replay(), + * but it's more efficient, given that we're already + * running down the IPI queue. + */ + __raw_local_irq_restore(flags); } } @@ -975,7 +1022,7 @@ static irqreturn_t ipi_interrupt(int irq, void *dev_idm) struct smtc_ipi *pipi; unsigned long tcstatus; int sent; - long flags; + unsigned long flags; unsigned int mtflags; unsigned int vpflags; @@ -1066,55 +1113,53 @@ static void setup_cross_vpe_interrupts(unsigned int nvpe) /* * SMTC-specific hacks invoked from elsewhere in the kernel. - * - * smtc_ipi_replay is called from raw_local_irq_restore which is only ever - * called with interrupts disabled. We do rely on interrupts being disabled - * here because using spin_lock_irqsave()/spin_unlock_irqrestore() would - * result in a recursive call to raw_local_irq_restore(). */ -static void __smtc_ipi_replay(void) + /* + * smtc_ipi_replay is called from raw_local_irq_restore + */ + +void smtc_ipi_replay(void) { unsigned int cpu = smp_processor_id(); /* * To the extent that we've ever turned interrupts off, * we may have accumulated deferred IPIs. This is subtle. - * If we use the smtc_ipi_qdepth() macro, we'll get an - * exact number - but we'll also disable interrupts - * and create a window of failure where a new IPI gets - * queued after we test the depth but before we re-enable - * interrupts. So long as IXMT never gets set, however, * we should be OK: If we pick up something and dispatch * it here, that's great. If we see nothing, but concurrent * with this operation, another TC sends us an IPI, IXMT * is clear, and we'll handle it as a real pseudo-interrupt - * and not a pseudo-pseudo interrupt. + * and not a pseudo-pseudo interrupt. The important thing + * is to do the last check for queued message *after* the + * re-enabling of interrupts. */ - if (IPIQ[cpu].depth > 0) { - while (1) { - struct smtc_ipi_q *q = &IPIQ[cpu]; - struct smtc_ipi *pipi; - extern void self_ipi(struct smtc_ipi *); - - spin_lock(&q->lock); - pipi = __smtc_ipi_dq(q); - spin_unlock(&q->lock); - if (!pipi) - break; + while (IPIQ[cpu].head != NULL) { + struct smtc_ipi_q *q = &IPIQ[cpu]; + struct smtc_ipi *pipi; + unsigned long flags; + + /* + * It's just possible we'll come in with interrupts + * already enabled. + */ + local_irq_save(flags); + + spin_lock(&q->lock); + pipi = __smtc_ipi_dq(q); + spin_unlock(&q->lock); + /* + ** But use a raw restore here to avoid recursion. + */ + __raw_local_irq_restore(flags); + if (pipi) { self_ipi(pipi); smtc_cpu_stats[cpu].selfipis++; } } } -void smtc_ipi_replay(void) -{ - raw_local_irq_disable(); - __smtc_ipi_replay(); -} - EXPORT_SYMBOL(smtc_ipi_replay); void smtc_idle_loop_hook(void) @@ -1193,40 +1238,13 @@ void smtc_idle_loop_hook(void) } } - /* - * Now that we limit outstanding timer IPIs, check for hung TC - */ - for (tc = 0; tc < NR_CPUS; tc++) { - /* Don't check ourself - we'll dequeue IPIs just below */ - if ((tc != smp_processor_id()) && - atomic_read(&ipi_timer_latch[tc]) > timerq_limit) { - if (clock_hang_reported[tc] == 0) { - pdb_msg += sprintf(pdb_msg, - "TC %d looks hung with timer latch at %d\n", - tc, atomic_read(&ipi_timer_latch[tc])); - clock_hang_reported[tc]++; - } - } - } emt(mtflags); local_irq_restore(flags); if (pdb_msg != &id_ho_db_msg[0]) printk("CPU%d: %s", smp_processor_id(), id_ho_db_msg); #endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */ - /* - * Replay any accumulated deferred IPIs. If "Instant Replay" - * is in use, there should never be any. - */ -#ifndef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY - { - unsigned long flags; - - local_irq_save(flags); - __smtc_ipi_replay(); - local_irq_restore(flags); - } -#endif /* CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY */ + smtc_ipi_replay(); } void smtc_soft_dump(void) @@ -1242,10 +1260,6 @@ void smtc_soft_dump(void) printk("%d: %ld\n", i, smtc_cpu_stats[i].selfipis); } smtc_ipi_qdump(); - printk("Timer IPI Backlogs:\n"); - for (i=0; i < NR_CPUS; i++) { - printk("%d: %d\n", i, atomic_read(&ipi_timer_latch[i])); - } printk("%d Recoveries of \"stolen\" FPU\n", atomic_read(&smtc_fpu_recoveries)); } diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 5fd0cd020af..b602ac6eb47 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -825,8 +825,10 @@ static void mt_ase_fp_affinity(void) if (cpus_intersects(current->cpus_allowed, mt_fpu_cpumask)) { cpumask_t tmask; - cpus_and(tmask, current->thread.user_cpus_allowed, - mt_fpu_cpumask); + current->thread.user_cpus_allowed + = current->cpus_allowed; + cpus_and(tmask, current->cpus_allowed, + mt_fpu_cpumask); set_cpus_allowed(current, tmask); set_thread_flag(TIF_FPUBOUND); } diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile index 3b7dd722c32..cef2db8d222 100644 --- a/arch/mips/mti-malta/Makefile +++ b/arch/mips/mti-malta/Makefile @@ -15,6 +15,6 @@ obj-$(CONFIG_EARLY_PRINTK) += malta-console.o obj-$(CONFIG_PCI) += malta-pci.o # FIXME FIXME FIXME -obj-$(CONFIG_MIPS_MT_SMTC) += malta_smtc.o +obj-$(CONFIG_MIPS_MT_SMTC) += malta-smtc.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index 5ea705e4945..f84a46a8ae6 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -84,12 +84,17 @@ static void msmtc_cpus_done(void) static void __init msmtc_smp_setup(void) { - mipsmt_build_cpu_map(0); + /* + * we won't get the definitive value until + * we've run smtc_prepare_cpus later, but + * we would appear to need an upper bound now. + */ + smp_num_siblings = smtc_build_cpu_map(0); } static void __init msmtc_prepare_cpus(unsigned int max_cpus) { - mipsmt_prepare_cpus(); + smtc_prepare_cpus(max_cpus); } struct plat_smp_ops msmtc_smp_ops = { diff --git a/arch/mips/sibyte/swarm/Makefile b/arch/mips/sibyte/swarm/Makefile index f18ba9201bb..7b45f199d92 100644 --- a/arch/mips/sibyte/swarm/Makefile +++ b/arch/mips/sibyte/swarm/Makefile @@ -1,3 +1,4 @@ -obj-y := setup.o rtc_xicor1241.o rtc_m41t81.o +obj-y := platform.o setup.o rtc_xicor1241.o \ + rtc_m41t81.o obj-$(CONFIG_I2C_BOARDINFO) += swarm-i2c.o diff --git a/arch/mips/sibyte/swarm/platform.c b/arch/mips/sibyte/swarm/platform.c new file mode 100644 index 00000000000..dd0e5b9b64e --- /dev/null +++ b/arch/mips/sibyte/swarm/platform.c @@ -0,0 +1,81 @@ +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/ata_platform.h> + +#include <asm/sibyte/board.h> +#include <asm/sibyte/sb1250_genbus.h> +#include <asm/sibyte/sb1250_regs.h> + +#define DRV_NAME "pata-swarm" + +#define SWARM_IDE_SHIFT 5 +#define SWARM_IDE_BASE 0x1f0 +#define SWARM_IDE_CTRL 0x3f6 + +static struct resource swarm_pata_resource[] = { + { + .name = "Swarm GenBus IDE", + .flags = IORESOURCE_MEM, + }, { + .name = "Swarm GenBus IDE", + .flags = IORESOURCE_MEM, + }, { + .name = "Swarm GenBus IDE", + .flags = IORESOURCE_IRQ, + .start = K_INT_GB_IDE, + .end = K_INT_GB_IDE, + }, +}; + +static struct pata_platform_info pata_platform_data = { + .ioport_shift = SWARM_IDE_SHIFT, +}; + +static struct platform_device swarm_pata_device = { + .name = "pata_platform", + .id = -1, + .resource = swarm_pata_resource, + .num_resources = ARRAY_SIZE(swarm_pata_resource), + .dev = { + .platform_data = &pata_platform_data, + .coherent_dma_mask = ~0, /* grumble */ + }, +}; + +static int __init swarm_pata_init(void) +{ + u8 __iomem *base; + phys_t offset, size; + struct resource *r; + + if (!SIBYTE_HAVE_IDE) + return -ENODEV; + + base = ioremap(A_IO_EXT_BASE, 0x800); + offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS)); + size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS)); + iounmap(base); + + offset = G_IO_START_ADDR(offset) << S_IO_ADDRBASE; + size = (G_IO_MULT_SIZE(size) + 1) << S_IO_REGSIZE; + if (offset < A_PHYS_GENBUS || offset >= A_PHYS_GENBUS_END) { + pr_info(DRV_NAME ": PATA interface at GenBus disabled\n"); + + return -EBUSY; + } + + pr_info(DRV_NAME ": PATA interface at GenBus slot %i\n", IDE_CS); + + r = swarm_pata_resource; + r[0].start = offset + (SWARM_IDE_BASE << SWARM_IDE_SHIFT); + r[0].end = offset + ((SWARM_IDE_BASE + 8) << SWARM_IDE_SHIFT) - 1; + r[1].start = offset + (SWARM_IDE_CTRL << SWARM_IDE_SHIFT); + r[1].end = offset + ((SWARM_IDE_CTRL + 1) << SWARM_IDE_SHIFT) - 1; + + return platform_device_register(&swarm_pata_device); +} + +device_initcall(swarm_pata_init); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5337ca7bb64..c27b10a1bd7 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -453,6 +453,7 @@ int __devinit start_secondary(void *unused) secondary_cpu_time_init(); ipi_call_lock(); + notify_cpu_starting(cpu); cpu_set(cpu, cpu_online_map); /* Update sibling maps */ base = cpu_first_thread_in_core(cpu); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 00b9b4dec5e..9e8b1f9b8f4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -585,6 +585,8 @@ int __cpuinit start_secondary(void *cpuvoid) /* Enable pfault pseudo page faults on this cpu. */ pfault_init(); + /* call cpu notifiers */ + notify_cpu_starting(smp_processor_id()); /* Mark this cpu as online */ spin_lock(&call_lock); cpu_set(smp_processor_id(), cpu_online_map); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index ca114fe46ff..06acb1a18bb 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -169,6 +169,8 @@ void init_cpu_timer(void) static void clock_comparator_interrupt(__u16 code) { + if (S390_lowcore.clock_comparator == -1ULL) + set_clock_comparator(S390_lowcore.clock_comparator); } static void etr_timing_alert(struct etr_irq_parm *); diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index fc6ab6094df..0953cee05ef 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -1,14 +1,9 @@ /* - * arch/s390/lib/delay.c * Precise Delay Loops for S390 * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * - * Derived from "arch/i386/lib/delay.c" - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * Copyright IBM Corp. 1999,2008 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, */ #include <linux/sched.h> @@ -29,30 +24,31 @@ void __delay(unsigned long loops) asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1)); } -/* - * Waits for 'usecs' microseconds using the TOD clock comparator. - */ -void __udelay(unsigned long usecs) +static void __udelay_disabled(unsigned long usecs) { - u64 end, time, old_cc = 0; - unsigned long flags, cr0, mask, dummy; - int irq_context; + unsigned long mask, cr0, cr0_saved; + u64 clock_saved; - irq_context = in_interrupt(); - if (!irq_context) - local_bh_disable(); - local_irq_save(flags); - if (raw_irqs_disabled_flags(flags)) { - old_cc = local_tick_disable(); - S390_lowcore.clock_comparator = -1ULL; - __ctl_store(cr0, 0, 0); - dummy = (cr0 & 0xffff00e0) | 0x00000800; - __ctl_load(dummy , 0, 0); - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; - } else - mask = psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_EXT | PSW_MASK_IO; + clock_saved = local_tick_disable(); + set_clock_comparator(get_clock() + ((u64) usecs << 12)); + __ctl_store(cr0_saved, 0, 0); + cr0 = (cr0_saved & 0xffff00e0) | 0x00000800; + __ctl_load(cr0 , 0, 0); + mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; + trace_hardirqs_on(); + __load_psw_mask(mask); + local_irq_disable(); + __ctl_load(cr0_saved, 0, 0); + local_tick_enable(clock_saved); + set_clock_comparator(S390_lowcore.clock_comparator); +} +static void __udelay_enabled(unsigned long usecs) +{ + unsigned long mask; + u64 end, time; + + mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO; end = get_clock() + ((u64) usecs << 12); do { time = end < S390_lowcore.clock_comparator ? @@ -62,13 +58,37 @@ void __udelay(unsigned long usecs) __load_psw_mask(mask); local_irq_disable(); } while (get_clock() < end); + set_clock_comparator(S390_lowcore.clock_comparator); +} - if (raw_irqs_disabled_flags(flags)) { - __ctl_load(cr0, 0, 0); - local_tick_enable(old_cc); +/* + * Waits for 'usecs' microseconds using the TOD clock comparator. + */ +void __udelay(unsigned long usecs) +{ + unsigned long flags; + + preempt_disable(); + local_irq_save(flags); + if (in_irq()) { + __udelay_disabled(usecs); + goto out; + } + if (in_softirq()) { + if (raw_irqs_disabled_flags(flags)) + __udelay_disabled(usecs); + else + __udelay_enabled(usecs); + goto out; } - if (!irq_context) + if (raw_irqs_disabled_flags(flags)) { + local_bh_disable(); + __udelay_disabled(usecs); _local_bh_enable(); - set_clock_comparator(S390_lowcore.clock_comparator); + goto out; + } + __udelay_enabled(usecs); +out: local_irq_restore(flags); + preempt_enable(); } diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 60c50841143..001778f9ada 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -82,6 +82,8 @@ asmlinkage void __cpuinit start_secondary(void) preempt_disable(); + notify_cpu_starting(smp_processor_id()); + local_irq_enable(); calibrate_delay(); diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 69596402a50..446767e8f56 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -88,6 +88,7 @@ void __init smp4d_callin(void) local_flush_cache_all(); local_flush_tlb_all(); + notify_cpu_starting(cpuid); /* * Unblock the master CPU _only_ when the scheduler state * of all secondary CPUs will be up-to-date, so after diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index a14a76ac7f3..9964890dc1d 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -71,6 +71,8 @@ void __cpuinit smp4m_callin(void) local_flush_cache_all(); local_flush_tlb_all(); + notify_cpu_starting(cpuid); + /* Get our local ticker going. */ smp_setup_percpu_timer(); diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index be2d50c3aa9..04577214284 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -85,6 +85,7 @@ static int idle_proc(void *cpup) while (!cpu_isset(cpu, smp_commenced_mask)) cpu_relax(); + notify_cpu_starting(cpu); cpu_set(cpu, cpu_online_map); default_idle(); return 0; diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index a1310c52fc0..857e492c571 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -492,7 +492,7 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) continue; } sh_symtab = sec_symtab->symtab; - sym_strtab = sec->link->strtab; + sym_strtab = sec_symtab->link->strtab; for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { Elf32_Rel *rel; Elf32_Sym *sym; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index bfd10fd211c..c102af85df9 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1605,6 +1605,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { */ { .callback = dmi_ignore_irq0_timer_override, + .ident = "HP nx6115 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"), + }, + }, + { + .callback = dmi_ignore_irq0_timer_override, .ident = "HP NX6125 laptop", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), @@ -1619,6 +1627,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), }, }, + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "HP 6715b laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), + }, + }, {} }; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8282a213968..10435a120d2 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -455,12 +455,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) return NOTIFY_DONE; case DIE_NMI_IPI: - if (atomic_read(&kgdb_active) != -1) { - /* KGDB CPU roundup */ - kgdb_nmicallback(raw_smp_processor_id(), regs); - was_in_debug_nmi[raw_smp_processor_id()] = 1; - touch_nmi_watchdog(); - } + /* Just ignore, we will handle the roundup on DIE_NMI. */ return NOTIFY_DONE; case DIE_NMIUNKNOWN: diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8fd4d..be33a5442d8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -626,7 +626,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) struct pci_dev *dev; void *gatt; int i, error; - unsigned long start_pfn, end_pfn; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; @@ -672,12 +671,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); - /* need to map that range */ - end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); - if (end_pfn > max_low_pfn_mapped) { - start_pfn = (aper_base>>PAGE_SHIFT); - init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); - } return 0; nommu: @@ -727,7 +720,8 @@ void __init gart_iommu_init(void) { struct agp_kern_info info; unsigned long iommu_start; - unsigned long aper_size; + unsigned long aper_base, aper_size; + unsigned long start_pfn, end_pfn; unsigned long scratch; long i; @@ -765,8 +759,16 @@ void __init gart_iommu_init(void) return; } + /* need to map that range */ + aper_size = info.aper_size << 20; + aper_base = info.aper_base; + end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + if (end_pfn > max_low_pfn_mapped) { + start_pfn = (aper_base>>PAGE_SHIFT); + init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); + } + printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); - aper_size = info.aper_size * 1024 * 1024; iommu_size = check_iommu_size(info.aper_base, aper_size); iommu_pages = iommu_size >> PAGE_SHIFT; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7985c5b3f91..0b8261c3cac 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -257,6 +257,7 @@ static void __cpuinit smp_callin(void) end_local_APIC_setup(); map_cpu_to_logical_apicid(); + notify_cpu_starting(cpuid); /* * Get our bogomips. * diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index ee0fba09215..199a5f4a873 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -448,6 +448,8 @@ static void __init start_secondary(void *unused) VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid)); + notify_cpu_starting(cpuid); + /* enable interrupts */ local_irq_enable(); diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 084109507c9..8dd3336efd7 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -165,8 +165,11 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle) "firmware_node"); ret = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj, "physical_node"); - if (acpi_dev->wakeup.flags.valid) + if (acpi_dev->wakeup.flags.valid) { device_set_wakeup_capable(dev, true); + device_set_wakeup_enable(dev, + acpi_dev->wakeup.state.enabled); + } } return 0; diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c index 4ebbba2b6b1..bf5b04de02d 100644 --- a/drivers/acpi/sleep/proc.c +++ b/drivers/acpi/sleep/proc.c @@ -377,6 +377,14 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) return 0; } +static void physical_device_enable_wakeup(struct acpi_device *adev) +{ + struct device *dev = acpi_get_physical_device(adev->handle); + + if (dev && device_can_wakeup(dev)) + device_set_wakeup_enable(dev, adev->wakeup.state.enabled); +} + static ssize_t acpi_system_write_wakeup_device(struct file *file, const char __user * buffer, @@ -411,6 +419,7 @@ acpi_system_write_wakeup_device(struct file *file, } } if (found_dev) { + physical_device_enable_wakeup(found_dev); list_for_each_safe(node, next, &acpi_wakeup_device_list) { struct acpi_device *dev = container_of(node, struct @@ -428,6 +437,7 @@ acpi_system_write_wakeup_device(struct file *file, dev->pnp.bus_id, found_dev->pnp.bus_id); dev->wakeup.state.enabled = found_dev->wakeup.state.enabled; + physical_device_enable_wakeup(dev); } } } diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 94df9177124..0778d99aea7 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -364,7 +364,7 @@ static void dw_dma_tasklet(unsigned long data) int i; status_block = dma_readl(dw, RAW.BLOCK); - status_xfer = dma_readl(dw, RAW.BLOCK); + status_xfer = dma_readl(dw, RAW.XFER); status_err = dma_readl(dw, RAW.ERROR); dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n", diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 8e93a797c93..052879a6f85 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -780,10 +780,6 @@ config BLK_DEV_IDEDMA_PMAC to transfer data to and from memory. Saying Y is safe and improves performance. -config BLK_DEV_IDE_SWARM - tristate "IDE for Sibyte evaluation boards" - depends on SIBYTE_SB1xxx_SOC - config BLK_DEV_IDE_AU1XXX bool "IDE for AMD Alchemy Au1200" depends on SOC_AU1200 diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 49a8c589e34..f16bb466723 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1661,7 +1661,9 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive) cdi->mask &= ~CDC_PLAY_AUDIO; mechtype = buf[8 + 6] >> 5; - if (mechtype == mechtype_caddy || mechtype == mechtype_popup) + if (mechtype == mechtype_caddy || + mechtype == mechtype_popup || + (drive->atapi_flags & IDE_AFLAG_NO_AUTOCLOSE)) cdi->mask |= CDC_CLOSE_TRAY; if (cdi->sanyo_slot > 0) { @@ -1859,6 +1861,8 @@ static const struct cd_list_entry ide_cd_quirks_list[] = { { "MATSHITADVD-ROM SR-8176", NULL, IDE_AFLAG_PLAY_AUDIO_OK }, { "MATSHITADVD-ROM SR-8174", NULL, IDE_AFLAG_PLAY_AUDIO_OK }, { "Optiarc DVD RW AD-5200A", NULL, IDE_AFLAG_PLAY_AUDIO_OK }, + { "Optiarc DVD RW AD-7200A", NULL, IDE_AFLAG_PLAY_AUDIO_OK }, + { "Optiarc DVD RW AD-7543A", NULL, IDE_AFLAG_NO_AUTOCLOSE }, { NULL, NULL, 0 } }; diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index adc68275585..3fa07c0aeaa 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -211,7 +211,7 @@ int ide_build_dmatable (ide_drive_t *drive, struct request *rq) xcount = bcount & 0xffff; if (is_trm290) xcount = ((xcount >> 2) - 1) << 16; - if (xcount == 0x0000) { + else if (xcount == 0x0000) { /* * Most chipsets correctly interpret a length of 0x0000 as 64KB, * but at least one (e.g. CS5530) misinterprets it as zero (!). diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 994e41099b4..a51a30e9eab 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1492,7 +1492,7 @@ static struct device_attribute *ide_port_attrs[] = { static int ide_sysfs_register_port(ide_hwif_t *hwif) { - int i, rc; + int i, uninitialized_var(rc); for (i = 0; ide_port_attrs[i]; i++) { rc = device_create_file(hwif->portdev, ide_port_attrs[i]); diff --git a/drivers/ide/mips/Makefile b/drivers/ide/mips/Makefile index 677c7b2bac9..5873fa0b876 100644 --- a/drivers/ide/mips/Makefile +++ b/drivers/ide/mips/Makefile @@ -1,4 +1,3 @@ -obj-$(CONFIG_BLK_DEV_IDE_SWARM) += swarm.o obj-$(CONFIG_BLK_DEV_IDE_AU1XXX) += au1xxx-ide.o EXTRA_CFLAGS := -Idrivers/ide diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c deleted file mode 100644 index 39c9ee99585..00000000000 --- a/drivers/ide/mips/swarm.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (C) 2001, 2002, 2003 Broadcom Corporation - * Copyright (C) 2004 MontaVista Software Inc. - * Author: Manish Lachwani, mlachwani@mvista.com - * Copyright (C) 2004 MIPS Technologies, Inc. All rights reserved. - * Author: Maciej W. Rozycki <macro@mips.com> - * Copyright (c) 2006, 2008 Maciej W. Rozycki - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -/* - * Derived loosely from ide-pmac.c, so: - * Copyright (C) 1998 Paul Mackerras. - * Copyright (C) 1995-1998 Mark Lord - */ - -/* - * Boards with SiByte processors so far have supported IDE devices via - * the Generic Bus, PCI bus, and built-in PCMCIA interface. In all - * cases, byte-swapping must be avoided for these devices (whereas - * other PCI devices, for example, will require swapping). Any - * SiByte-targetted kernel including IDE support will include this - * file. Probing of a Generic Bus for an IDE device is controlled by - * the definition of "SIBYTE_HAVE_IDE", which is provided by - * <asm/sibyte/board.h> for Broadcom boards. - */ - -#include <linux/ide.h> -#include <linux/ioport.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/platform_device.h> - -#include <asm/io.h> - -#include <asm/sibyte/board.h> -#include <asm/sibyte/sb1250_genbus.h> -#include <asm/sibyte/sb1250_regs.h> - -#define DRV_NAME "ide-swarm" - -static char swarm_ide_string[] = DRV_NAME; - -static struct resource swarm_ide_resource = { - .name = "SWARM GenBus IDE", - .flags = IORESOURCE_MEM, -}; - -static struct platform_device *swarm_ide_dev; - -static const struct ide_port_info swarm_port_info = { - .name = DRV_NAME, - .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, -}; - -/* - * swarm_ide_probe - if the board header indicates the existence of - * Generic Bus IDE, allocate a HWIF for it. - */ -static int __devinit swarm_ide_probe(struct device *dev) -{ - u8 __iomem *base; - struct ide_host *host; - phys_t offset, size; - int i, rc; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; - - if (!SIBYTE_HAVE_IDE) - return -ENODEV; - - base = ioremap(A_IO_EXT_BASE, 0x800); - offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS)); - size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS)); - iounmap(base); - - offset = G_IO_START_ADDR(offset) << S_IO_ADDRBASE; - size = (G_IO_MULT_SIZE(size) + 1) << S_IO_REGSIZE; - if (offset < A_PHYS_GENBUS || offset >= A_PHYS_GENBUS_END) { - printk(KERN_INFO DRV_NAME - ": IDE interface at GenBus disabled\n"); - return -EBUSY; - } - - printk(KERN_INFO DRV_NAME ": IDE interface at GenBus slot %i\n", - IDE_CS); - - swarm_ide_resource.start = offset; - swarm_ide_resource.end = offset + size - 1; - if (request_resource(&iomem_resource, &swarm_ide_resource)) { - printk(KERN_ERR DRV_NAME - ": can't request I/O memory resource\n"); - return -EBUSY; - } - - base = ioremap(offset, size); - - memset(&hw, 0, sizeof(hw)); - for (i = 0; i <= 7; i++) - hw.io_ports_array[i] = - (unsigned long)(base + ((0x1f0 + i) << 5)); - hw.io_ports.ctl_addr = - (unsigned long)(base + (0x3f6 << 5)); - hw.irq = K_INT_GB_IDE; - hw.chipset = ide_generic; - - rc = ide_host_add(&swarm_port_info, hws, &host); - if (rc) - goto err; - - dev_set_drvdata(dev, host); - - return 0; -err: - release_resource(&swarm_ide_resource); - iounmap(base); - return rc; -} - -static struct device_driver swarm_ide_driver = { - .name = swarm_ide_string, - .bus = &platform_bus_type, - .probe = swarm_ide_probe, -}; - -static void swarm_ide_platform_release(struct device *device) -{ - struct platform_device *pldev; - - /* free device */ - pldev = to_platform_device(device); - kfree(pldev); -} - -static int __devinit swarm_ide_init_module(void) -{ - struct platform_device *pldev; - int err; - - printk(KERN_INFO "SWARM IDE driver\n"); - - if (driver_register(&swarm_ide_driver)) { - printk(KERN_ERR "Driver registration failed\n"); - err = -ENODEV; - goto out; - } - - if (!(pldev = kzalloc(sizeof (*pldev), GFP_KERNEL))) { - err = -ENOMEM; - goto out_unregister_driver; - } - - pldev->name = swarm_ide_string; - pldev->id = 0; - pldev->dev.release = swarm_ide_platform_release; - - if (platform_device_register(pldev)) { - err = -ENODEV; - goto out_free_pldev; - } - - if (!pldev->dev.driver) { - /* - * The driver was not bound to this device, there was - * no hardware at this address. Unregister it, as the - * release fuction will take care of freeing the - * allocated structure - */ - platform_device_unregister (pldev); - } - - swarm_ide_dev = pldev; - - return 0; - -out_free_pldev: - kfree(pldev); - -out_unregister_driver: - driver_unregister(&swarm_ide_driver); -out: - return err; -} - -module_init(swarm_ide_init_module); diff --git a/drivers/leds/leds-fsg.c b/drivers/leds/leds-fsg.c index be0e12144b8..34935155c1c 100644 --- a/drivers/leds/leds-fsg.c +++ b/drivers/leds/leds-fsg.c @@ -161,6 +161,16 @@ static int fsg_led_probe(struct platform_device *pdev) { int ret; + /* Map the LED chip select address space */ + latch_address = (unsigned short *) ioremap(IXP4XX_EXP_BUS_BASE(2), 512); + if (!latch_address) { + ret = -ENOMEM; + goto failremap; + } + + latch_value = 0xffff; + *latch_address = latch_value; + ret = led_classdev_register(&pdev->dev, &fsg_wlan_led); if (ret < 0) goto failwlan; @@ -185,20 +195,8 @@ static int fsg_led_probe(struct platform_device *pdev) if (ret < 0) goto failring; - /* Map the LED chip select address space */ - latch_address = (unsigned short *) ioremap(IXP4XX_EXP_BUS_BASE(2), 512); - if (!latch_address) { - ret = -ENOMEM; - goto failremap; - } - - latch_value = 0xffff; - *latch_address = latch_value; - return ret; - failremap: - led_classdev_unregister(&fsg_ring_led); failring: led_classdev_unregister(&fsg_sync_led); failsync: @@ -210,14 +208,14 @@ static int fsg_led_probe(struct platform_device *pdev) failwan: led_classdev_unregister(&fsg_wlan_led); failwlan: + iounmap(latch_address); + failremap: return ret; } static int fsg_led_remove(struct platform_device *pdev) { - iounmap(latch_address); - led_classdev_unregister(&fsg_wlan_led); led_classdev_unregister(&fsg_wan_led); led_classdev_unregister(&fsg_sata_led); @@ -225,6 +223,8 @@ static int fsg_led_remove(struct platform_device *pdev) led_classdev_unregister(&fsg_sync_led); led_classdev_unregister(&fsg_ring_led); + iounmap(latch_address); + return 0; } diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index 146c0697286..f508729123b 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -248,11 +248,10 @@ static int __devinit pca955x_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct pca955x_led *pca955x; - int i; - int err = -ENODEV; struct pca955x_chipdef *chip; struct i2c_adapter *adapter; struct led_platform_data *pdata; + int i, err; chip = &pca955x_chipdefs[id->driver_data]; adapter = to_i2c_adapter(client->dev.parent); @@ -282,43 +281,41 @@ static int __devinit pca955x_probe(struct i2c_client *client, } } + pca955x = kzalloc(sizeof(*pca955x) * chip->bits, GFP_KERNEL); + if (!pca955x) + return -ENOMEM; + + i2c_set_clientdata(client, pca955x); + for (i = 0; i < chip->bits; i++) { - pca955x = kzalloc(sizeof(struct pca955x_led), GFP_KERNEL); - if (!pca955x) { - err = -ENOMEM; - goto exit; - } + pca955x[i].chipdef = chip; + pca955x[i].client = client; + pca955x[i].led_num = i; - pca955x->chipdef = chip; - pca955x->client = client; - pca955x->led_num = i; /* Platform data can specify LED names and default triggers */ if (pdata) { if (pdata->leds[i].name) - snprintf(pca955x->name, 32, "pca955x:%s", - pdata->leds[i].name); + snprintf(pca955x[i].name, + sizeof(pca955x[i].name), "pca955x:%s", + pdata->leds[i].name); if (pdata->leds[i].default_trigger) - pca955x->led_cdev.default_trigger = + pca955x[i].led_cdev.default_trigger = pdata->leds[i].default_trigger; } else { - snprintf(pca955x->name, 32, "pca955x:%d", i); + snprintf(pca955x[i].name, sizeof(pca955x[i].name), + "pca955x:%d", i); } - spin_lock_init(&pca955x->lock); - pca955x->led_cdev.name = pca955x->name; - pca955x->led_cdev.brightness_set = - pca955x_led_set; + spin_lock_init(&pca955x[i].lock); - /* - * Client data is a pointer to the _first_ pca955x_led - * struct - */ - if (i == 0) - i2c_set_clientdata(client, pca955x); + pca955x[i].led_cdev.name = pca955x[i].name; + pca955x[i].led_cdev.brightness_set = pca955x_led_set; - INIT_WORK(&(pca955x->work), pca955x_led_work); + INIT_WORK(&pca955x[i].work, pca955x_led_work); - led_classdev_register(&client->dev, &(pca955x->led_cdev)); + err = led_classdev_register(&client->dev, &pca955x[i].led_cdev); + if (err < 0) + goto exit; } /* Turn off LEDs */ @@ -336,23 +333,32 @@ static int __devinit pca955x_probe(struct i2c_client *client, pca955x_write_psc(client, 1, 0); return 0; + exit: + while (i--) { + led_classdev_unregister(&pca955x[i].led_cdev); + cancel_work_sync(&pca955x[i].work); + } + + kfree(pca955x); + i2c_set_clientdata(client, NULL); + return err; } static int __devexit pca955x_remove(struct i2c_client *client) { struct pca955x_led *pca955x = i2c_get_clientdata(client); - int leds = pca955x->chipdef->bits; int i; - for (i = 0; i < leds; i++) { - led_classdev_unregister(&(pca955x->led_cdev)); - cancel_work_sync(&(pca955x->work)); - kfree(pca955x); - pca955x = pca955x + 1; + for (i = 0; i < pca955x->chipdef->bits; i++) { + led_classdev_unregister(&pca955x[i].led_cdev); + cancel_work_sync(&pca955x[i].work); } + kfree(pca955x); + i2c_set_clientdata(client, NULL); + return 0; } diff --git a/drivers/media/common/tuners/tuner-xc2028.h b/drivers/media/common/tuners/tuner-xc2028.h index 216025cf5d4..2c5b6282b56 100644 --- a/drivers/media/common/tuners/tuner-xc2028.h +++ b/drivers/media/common/tuners/tuner-xc2028.h @@ -10,6 +10,7 @@ #include "dvb_frontend.h" #define XC2028_DEFAULT_FIRMWARE "xc3028-v27.fw" +#define XC3028L_DEFAULT_FIRMWARE "xc3028L-v36.fw" /* Dmoduler IF (kHz) */ #define XC3028_FE_DEFAULT 0 /* Don't load SCODE */ diff --git a/drivers/media/dvb/b2c2/flexcop-fe-tuner.c b/drivers/media/dvb/b2c2/flexcop-fe-tuner.c index 4eed783f4bc..a127a4175c4 100644 --- a/drivers/media/dvb/b2c2/flexcop-fe-tuner.c +++ b/drivers/media/dvb/b2c2/flexcop-fe-tuner.c @@ -491,6 +491,7 @@ static struct s5h1420_config skystar2_rev2_7_s5h1420_config = { .demod_address = 0x53, .invert = 1, .repeated_start_workaround = 1, + .serial_mpeg = 1, }; static struct itd1000_config skystar2_rev2_7_itd1000_config = { diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c index 069d847ba88..0c733c66a44 100644 --- a/drivers/media/dvb/dvb-core/dmxdev.c +++ b/drivers/media/dvb/dvb-core/dmxdev.c @@ -364,15 +364,16 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, enum dmx_success success) { struct dmxdev_filter *dmxdevfilter = filter->priv; + unsigned long flags; int ret; if (dmxdevfilter->buffer.error) { wake_up(&dmxdevfilter->buffer.queue); return 0; } - spin_lock(&dmxdevfilter->dev->lock); + spin_lock_irqsave(&dmxdevfilter->dev->lock, flags); if (dmxdevfilter->state != DMXDEV_STATE_GO) { - spin_unlock(&dmxdevfilter->dev->lock); + spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); return 0; } del_timer(&dmxdevfilter->timer); @@ -391,7 +392,7 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, } if (dmxdevfilter->params.sec.flags & DMX_ONESHOT) dmxdevfilter->state = DMXDEV_STATE_DONE; - spin_unlock(&dmxdevfilter->dev->lock); + spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); wake_up(&dmxdevfilter->buffer.queue); return 0; } @@ -403,11 +404,12 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, { struct dmxdev_filter *dmxdevfilter = feed->priv; struct dvb_ringbuffer *buffer; + unsigned long flags; int ret; - spin_lock(&dmxdevfilter->dev->lock); + spin_lock_irqsave(&dmxdevfilter->dev->lock, flags); if (dmxdevfilter->params.pes.output == DMX_OUT_DECODER) { - spin_unlock(&dmxdevfilter->dev->lock); + spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); return 0; } @@ -417,7 +419,7 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, else buffer = &dmxdevfilter->dev->dvr_buffer; if (buffer->error) { - spin_unlock(&dmxdevfilter->dev->lock); + spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); wake_up(&buffer->queue); return 0; } @@ -428,7 +430,7 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, dvb_ringbuffer_flush(buffer); buffer->error = ret; } - spin_unlock(&dmxdevfilter->dev->lock); + spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); wake_up(&buffer->queue); return 0; } diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c index e2eca0b1fe7..a2c1fd5d2f6 100644 --- a/drivers/media/dvb/dvb-core/dvb_demux.c +++ b/drivers/media/dvb/dvb-core/dvb_demux.c @@ -399,7 +399,9 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf) void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf, size_t count) { - spin_lock(&demux->lock); + unsigned long flags; + + spin_lock_irqsave(&demux->lock, flags); while (count--) { if (buf[0] == 0x47) @@ -407,16 +409,17 @@ void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf, buf += 188; } - spin_unlock(&demux->lock); + spin_unlock_irqrestore(&demux->lock, flags); } EXPORT_SYMBOL(dvb_dmx_swfilter_packets); void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count) { + unsigned long flags; int p = 0, i, j; - spin_lock(&demux->lock); + spin_lock_irqsave(&demux->lock, flags); if (demux->tsbufp) { i = demux->tsbufp; @@ -449,17 +452,18 @@ void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count) } bailout: - spin_unlock(&demux->lock); + spin_unlock_irqrestore(&demux->lock, flags); } EXPORT_SYMBOL(dvb_dmx_swfilter); void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count) { + unsigned long flags; int p = 0, i, j; u8 tmppack[188]; - spin_lock(&demux->lock); + spin_lock_irqsave(&demux->lock, flags); if (demux->tsbufp) { i = demux->tsbufp; @@ -500,7 +504,7 @@ void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count) } bailout: - spin_unlock(&demux->lock); + spin_unlock_irqrestore(&demux->lock, flags); } EXPORT_SYMBOL(dvb_dmx_swfilter_204); diff --git a/drivers/media/dvb/frontends/s5h1420.c b/drivers/media/dvb/frontends/s5h1420.c index 747d3fa2e5e..2e9fd2893ed 100644 --- a/drivers/media/dvb/frontends/s5h1420.c +++ b/drivers/media/dvb/frontends/s5h1420.c @@ -59,7 +59,7 @@ struct s5h1420_state { * it does not support repeated-start, workaround: write addr-1 * and then read */ - u8 shadow[255]; + u8 shadow[256]; }; static u32 s5h1420_getsymbolrate(struct s5h1420_state* state); @@ -94,8 +94,11 @@ static u8 s5h1420_readreg(struct s5h1420_state *state, u8 reg) if (ret != 3) return ret; } else { - ret = i2c_transfer(state->i2c, &msg[1], 2); - if (ret != 2) + ret = i2c_transfer(state->i2c, &msg[1], 1); + if (ret != 1) + return ret; + ret = i2c_transfer(state->i2c, &msg[2], 1); + if (ret != 1) return ret; } @@ -823,7 +826,7 @@ static int s5h1420_init (struct dvb_frontend* fe) struct s5h1420_state* state = fe->demodulator_priv; /* disable power down and do reset */ - state->CON_1_val = 0x10; + state->CON_1_val = state->config->serial_mpeg << 4; s5h1420_writereg(state, 0x02, state->CON_1_val); msleep(10); s5h1420_reset(state); diff --git a/drivers/media/dvb/frontends/s5h1420.h b/drivers/media/dvb/frontends/s5h1420.h index 4c913f142bc..ff308136d86 100644 --- a/drivers/media/dvb/frontends/s5h1420.h +++ b/drivers/media/dvb/frontends/s5h1420.h @@ -32,10 +32,12 @@ struct s5h1420_config u8 demod_address; /* does the inversion require inversion? */ - u8 invert : 1; + u8 invert:1; - u8 repeated_start_workaround : 1; - u8 cdclk_polarity : 1; /* 1 == falling edge, 0 == raising edge */ + u8 repeated_start_workaround:1; + u8 cdclk_polarity:1; /* 1 == falling edge, 0 == raising edge */ + + u8 serial_mpeg:1; }; #if defined(CONFIG_DVB_S5H1420) || (defined(CONFIG_DVB_S5H1420_MODULE) && defined(MODULE)) diff --git a/drivers/media/dvb/siano/sms-cards.c b/drivers/media/dvb/siano/sms-cards.c index cc5efb643f3..9da260fe3fd 100644 --- a/drivers/media/dvb/siano/sms-cards.c +++ b/drivers/media/dvb/siano/sms-cards.c @@ -40,6 +40,8 @@ struct usb_device_id smsusb_id_table[] = { .driver_info = SMS1XXX_BOARD_HAUPPAUGE_OKEMO_B }, { USB_DEVICE(0x2040, 0x5500), .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, + { USB_DEVICE(0x2040, 0x5510), + .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, { USB_DEVICE(0x2040, 0x5580), .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, { USB_DEVICE(0x2040, 0x5590), @@ -87,7 +89,7 @@ static struct sms_board sms_boards[] = { .fw[DEVICE_MODE_DVBT_BDA] = "sms1xxx-nova-b-dvbt-01.fw", }, [SMS1XXX_BOARD_HAUPPAUGE_WINDHAM] = { - .name = "Hauppauge WinTV-Nova-T-MiniStick", + .name = "Hauppauge WinTV MiniStick", .type = SMS_NOVA_B0, .fw[DEVICE_MODE_DVBT_BDA] = "sms1xxx-hcw-55xxx-dvbt-01.fw", }, diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c index 6ae4cc860ef..933eaef41ea 100644 --- a/drivers/media/video/bt8xx/bttv-driver.c +++ b/drivers/media/video/bt8xx/bttv-driver.c @@ -3431,7 +3431,7 @@ static int radio_open(struct inode *inode, struct file *file) dprintk("bttv: open minor=%d\n",minor); for (i = 0; i < bttv_num; i++) { - if (bttvs[i].radio_dev->minor == minor) { + if (bttvs[i].radio_dev && bttvs[i].radio_dev->minor == minor) { btv = &bttvs[i]; break; } diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c index c149b7d712e..5405c30dbb0 100644 --- a/drivers/media/video/cafe_ccic.c +++ b/drivers/media/video/cafe_ccic.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/mm.h> #include <linux/pci.h> #include <linux/i2c.h> #include <linux/interrupt.h> diff --git a/drivers/media/video/cpia2/cpia2_usb.c b/drivers/media/video/cpia2/cpia2_usb.c index a4574740350..a8a199047cb 100644 --- a/drivers/media/video/cpia2/cpia2_usb.c +++ b/drivers/media/video/cpia2/cpia2_usb.c @@ -632,7 +632,7 @@ int cpia2_usb_transfer_cmd(struct camera_data *cam, static int submit_urbs(struct camera_data *cam) { struct urb *urb; - int fx, err, i; + int fx, err, i, j; for(i=0; i<NUM_SBUF; ++i) { if (cam->sbuf[i].data) @@ -657,6 +657,9 @@ static int submit_urbs(struct camera_data *cam) } urb = usb_alloc_urb(FRAMES_PER_DESC, GFP_KERNEL); if (!urb) { + ERR("%s: usb_alloc_urb error!\n", __func__); + for (j = 0; j < i; j++) + usb_free_urb(cam->sbuf[j].urb); return -ENOMEM; } diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c index 8fe5f38c4d7..3cb9734ec07 100644 --- a/drivers/media/video/cx18/cx18-cards.c +++ b/drivers/media/video/cx18/cx18-cards.c @@ -163,7 +163,7 @@ static const struct cx18_card cx18_card_h900 = { }, .audio_inputs = { { CX18_CARD_INPUT_AUD_TUNER, - CX18_AV_AUDIO8, 0 }, + CX18_AV_AUDIO5, 0 }, { CX18_CARD_INPUT_LINE_IN1, CX18_AV_AUDIO_SERIAL1, 0 }, }, diff --git a/drivers/media/video/em28xx/em28xx-audio.c b/drivers/media/video/em28xx/em28xx-audio.c index 3c006103c1e..ac3292d7646 100644 --- a/drivers/media/video/em28xx/em28xx-audio.c +++ b/drivers/media/video/em28xx/em28xx-audio.c @@ -117,10 +117,10 @@ static void em28xx_audio_isocirq(struct urb *urb) if (oldptr + length >= runtime->buffer_size) { unsigned int cnt = - runtime->buffer_size - oldptr - 1; + runtime->buffer_size - oldptr; memcpy(runtime->dma_area + oldptr * stride, cp, cnt * stride); - memcpy(runtime->dma_area, cp + cnt, + memcpy(runtime->dma_area, cp + cnt * stride, length * stride - cnt * stride); } else { memcpy(runtime->dma_area + oldptr * stride, cp, @@ -161,8 +161,14 @@ static int em28xx_init_audio_isoc(struct em28xx *dev) memset(dev->adev->transfer_buffer[i], 0x80, sb_size); urb = usb_alloc_urb(EM28XX_NUM_AUDIO_PACKETS, GFP_ATOMIC); - if (!urb) + if (!urb) { + em28xx_errdev("usb_alloc_urb failed!\n"); + for (j = 0; j < i; j++) { + usb_free_urb(dev->adev->urb[j]); + kfree(dev->adev->transfer_buffer[j]); + } return -ENOMEM; + } urb->dev = dev->udev; urb->context = dev; diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 452da70e719..de943cf6c16 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -93,28 +93,6 @@ struct em28xx_board em28xx_boards[] = { .amux = 0, } }, }, - [EM2800_BOARD_KWORLD_USB2800] = { - .name = "Kworld USB2800", - .valid = EM28XX_BOARD_NOT_VALIDATED, - .is_em2800 = 1, - .vchannels = 3, - .tuner_type = TUNER_PHILIPS_FCV1236D, - .tda9887_conf = TDA9887_PRESENT, - .decoder = EM28XX_SAA7113, - .input = { { - .type = EM28XX_VMUX_TELEVISION, - .vmux = SAA7115_COMPOSITE2, - .amux = 0, - }, { - .type = EM28XX_VMUX_COMPOSITE1, - .vmux = SAA7115_COMPOSITE0, - .amux = 1, - }, { - .type = EM28XX_VMUX_SVIDEO, - .vmux = SAA7115_SVIDEO3, - .amux = 1, - } }, - }, [EM2820_BOARD_KWORLD_PVRTV2800RF] = { .name = "Kworld PVR TV 2800 RF", .is_em2800 = 0, @@ -599,7 +577,7 @@ struct em28xx_board em28xx_boards[] = { }, { .type = EM28XX_VMUX_COMPOSITE1, .vmux = TVP5150_COMPOSITE1, - .amux = 1, + .amux = 3, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, @@ -952,22 +930,23 @@ struct em28xx_board em28xx_boards[] = { }, [EM2880_BOARD_KWORLD_DVB_310U] = { .name = "KWorld DVB-T 310U", - .valid = EM28XX_BOARD_NOT_VALIDATED, .vchannels = 3, .tuner_type = TUNER_XC2028, + .has_dvb = 1, + .mts_firmware = 1, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, - .amux = 0, + .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE1, .vmux = TVP5150_COMPOSITE1, - .amux = 1, - }, { + .amux = EM28XX_AMUX_AC97_LINE_IN, + }, { /* S-video has not been tested yet */ .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, - .amux = 1, + .amux = EM28XX_AMUX_AC97_LINE_IN, } }, }, [EM2881_BOARD_DNT_DA2_HYBRID] = { @@ -1282,6 +1261,7 @@ static struct em28xx_reg_seq em2882_terratec_hybrid_xs_digital[] = { static struct em28xx_hash_table em28xx_eeprom_hash [] = { /* P/N: SA 60002070465 Tuner: TVF7533-MF */ {0x6ce05a8f, EM2820_BOARD_PROLINK_PLAYTV_USB2, TUNER_YMEC_TVF_5533MF}, + {0x966a0441, EM2880_BOARD_KWORLD_DVB_310U, TUNER_XC2028}, }; /* I2C devicelist hash table for devices with generic USB IDs */ @@ -1552,9 +1532,12 @@ static void em28xx_setup_xc3028(struct em28xx *dev, struct xc2028_ctrl *ctl) /* djh - Not sure which demod we need here */ ctl->demod = XC3028_FE_DEFAULT; break; + case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600: + ctl->demod = XC3028_FE_DEFAULT; + ctl->fname = XC3028L_DEFAULT_FIRMWARE; + break; case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950: case EM2880_BOARD_PINNACLE_PCTV_HD_PRO: - case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600: /* FIXME: Better to specify the needed IF */ ctl->demod = XC3028_FE_DEFAULT; break; @@ -1764,6 +1747,20 @@ void em28xx_card_setup(struct em28xx *dev) break; case EM2820_BOARD_UNKNOWN: case EM2800_BOARD_UNKNOWN: + /* + * The K-WORLD DVB-T 310U is detected as an MSI Digivox AD. + * + * This occurs because they share identical USB vendor and + * product IDs. + * + * What we do here is look up the EEPROM hash of the K-WORLD + * and if it is found then we decide that we do not have + * a DIGIVOX and reset the device to the K-WORLD instead. + * + * This solution is only valid if they do not share eeprom + * hash identities which has not been determined as yet. + */ + case EM2880_BOARD_MSI_DIGIVOX_AD: if (!em28xx_hint_board(dev)) em28xx_set_model(dev); break; diff --git a/drivers/media/video/em28xx/em28xx-dvb.c b/drivers/media/video/em28xx/em28xx-dvb.c index 4b992bc0083..d2b1a1a5268 100644 --- a/drivers/media/video/em28xx/em28xx-dvb.c +++ b/drivers/media/video/em28xx/em28xx-dvb.c @@ -452,6 +452,15 @@ static int dvb_init(struct em28xx *dev) goto out_free; } break; + case EM2880_BOARD_KWORLD_DVB_310U: + dvb->frontend = dvb_attach(zl10353_attach, + &em28xx_zl10353_with_xc3028, + &dev->i2c_adap); + if (attach_xc3028(0x61, dev) < 0) { + result = -EINVAL; + goto out_free; + } + break; default: printk(KERN_ERR "%s/2: The frontend of your DVB/ATSC card" " isn't supported yet\n", diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c index 7be69284da0..ac95c55887d 100644 --- a/drivers/media/video/gspca/gspca.c +++ b/drivers/media/video/gspca/gspca.c @@ -459,6 +459,7 @@ static int create_urbs(struct gspca_dev *gspca_dev, urb = usb_alloc_urb(npkt, GFP_KERNEL); if (!urb) { err("usb_alloc_urb failed"); + destroy_urbs(gspca_dev); return -ENOMEM; } urb->transfer_buffer = usb_buffer_alloc(gspca_dev->dev, @@ -468,8 +469,8 @@ static int create_urbs(struct gspca_dev *gspca_dev, if (urb->transfer_buffer == NULL) { usb_free_urb(urb); - destroy_urbs(gspca_dev); err("usb_buffer_urb failed"); + destroy_urbs(gspca_dev); return -ENOMEM; } gspca_dev->urb[n] = urb; diff --git a/drivers/media/video/gspca/pac7311.c b/drivers/media/video/gspca/pac7311.c index d4be5184328..ba865b7f1ed 100644 --- a/drivers/media/video/gspca/pac7311.c +++ b/drivers/media/video/gspca/pac7311.c @@ -1063,6 +1063,7 @@ static __devinitdata struct usb_device_id device_table[] = { {USB_DEVICE(0x093a, 0x2621), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x2624), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x2626), .driver_info = SENSOR_PAC7302}, + {USB_DEVICE(0x093a, 0x262a), .driver_info = SENSOR_PAC7302}, {} }; MODULE_DEVICE_TABLE(usb, device_table); diff --git a/drivers/media/video/gspca/sonixb.c b/drivers/media/video/gspca/sonixb.c index 5dd78c6766e..12b81ae526b 100644 --- a/drivers/media/video/gspca/sonixb.c +++ b/drivers/media/video/gspca/sonixb.c @@ -232,7 +232,7 @@ static struct ctrl sd_ctrls[] = { static struct v4l2_pix_format vga_mode[] = { {160, 120, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 160, - .sizeimage = 160 * 120 * 5 / 4, + .sizeimage = 160 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2 | MODE_RAW}, {160, 120, V4L2_PIX_FMT_SN9C10X, V4L2_FIELD_NONE, @@ -264,7 +264,7 @@ static struct v4l2_pix_format sif_mode[] = { .priv = 1 | MODE_REDUCED_SIF}, {176, 144, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 176, - .sizeimage = 176 * 144 * 5 / 4, + .sizeimage = 176 * 144, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1 | MODE_RAW}, {176, 144, V4L2_PIX_FMT_SN9C10X, V4L2_FIELD_NONE, diff --git a/drivers/media/video/gspca/sonixj.c b/drivers/media/video/gspca/sonixj.c index d75b1d20b31..572b0f363b6 100644 --- a/drivers/media/video/gspca/sonixj.c +++ b/drivers/media/video/gspca/sonixj.c @@ -707,6 +707,7 @@ static void i2c_w8(struct gspca_dev *gspca_dev, 0x08, 0, /* value, index */ gspca_dev->usb_buf, 8, 500); + msleep(2); } /* read 5 bytes in gspca_dev->usb_buf */ @@ -976,13 +977,13 @@ static int sd_init(struct gspca_dev *gspca_dev) case BRIDGE_SN9C105: if (regF1 != 0x11) return -ENODEV; - reg_w(gspca_dev, 0x02, regGpio, 2); + reg_w(gspca_dev, 0x01, regGpio, 2); break; case BRIDGE_SN9C120: if (regF1 != 0x12) return -ENODEV; regGpio[1] = 0x70; - reg_w(gspca_dev, 0x02, regGpio, 2); + reg_w(gspca_dev, 0x01, regGpio, 2); break; default: /* case BRIDGE_SN9C110: */ @@ -1183,7 +1184,7 @@ static void sd_start(struct gspca_dev *gspca_dev) static const __u8 CA[] = { 0x28, 0xd8, 0x14, 0xec }; static const __u8 CE[] = { 0x32, 0xdd, 0x2d, 0xdd }; /* MI0360 */ static const __u8 CE_ov76xx[] = - { 0x32, 0xdd, 0x32, 0xdd }; /* OV7630/48 */ + { 0x32, 0xdd, 0x32, 0xdd }; sn9c1xx = sn_tb[(int) sd->sensor]; configure_gpio(gspca_dev, sn9c1xx); @@ -1223,8 +1224,15 @@ static void sd_start(struct gspca_dev *gspca_dev) reg_w(gspca_dev, 0x20, gamma_def, sizeof gamma_def); for (i = 0; i < 8; i++) reg_w(gspca_dev, 0x84, reg84, sizeof reg84); + switch (sd->sensor) { + case SENSOR_OV7660: + reg_w1(gspca_dev, 0x9a, 0x05); + break; + default: reg_w1(gspca_dev, 0x9a, 0x08); reg_w1(gspca_dev, 0x99, 0x59); + break; + } mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv; if (mode) @@ -1275,8 +1283,8 @@ static void sd_start(struct gspca_dev *gspca_dev) /* reg1 = 0x44; */ /* reg1 = 0x46; (done) */ } else { - reg17 = 0x22; /* 640 MCKSIZE */ - reg1 = 0x06; + reg17 = 0xa2; /* 640 */ + reg1 = 0x44; } break; } @@ -1285,6 +1293,7 @@ static void sd_start(struct gspca_dev *gspca_dev) switch (sd->sensor) { case SENSOR_OV7630: case SENSOR_OV7648: + case SENSOR_OV7660: reg_w(gspca_dev, 0xce, CE_ov76xx, 4); break; default: diff --git a/drivers/media/video/gspca/spca561.c b/drivers/media/video/gspca/spca561.c index cfbc9ebc5c5..95fcfcb9e31 100644 --- a/drivers/media/video/gspca/spca561.c +++ b/drivers/media/video/gspca/spca561.c @@ -225,7 +225,7 @@ static int i2c_read(struct gspca_dev *gspca_dev, __u16 reg, __u8 mode) reg_w_val(gspca_dev->dev, 0x8802, (mode | 0x01)); do { reg_r(gspca_dev, 0x8803, 1); - if (!gspca_dev->usb_buf) + if (!gspca_dev->usb_buf[0]) break; } while (--retry); if (retry == 0) diff --git a/drivers/media/video/gspca/zc3xx.c b/drivers/media/video/gspca/zc3xx.c index 8d7c27e6ac7..d61ef727e0c 100644 --- a/drivers/media/video/gspca/zc3xx.c +++ b/drivers/media/video/gspca/zc3xx.c @@ -6576,8 +6576,8 @@ static int setlightfreq(struct gspca_dev *gspca_dev) cs2102_60HZ, cs2102_60HZScale}, /* SENSOR_CS2102K 1 */ {cs2102_NoFliker, cs2102_NoFlikerScale, - cs2102_50HZ, cs2102_50HZScale, - cs2102_60HZ, cs2102_60HZScale}, + NULL, NULL, /* currently disabled */ + NULL, NULL}, /* SENSOR_GC0305 2 */ {gc0305_NoFliker, gc0305_NoFliker, gc0305_50HZ, gc0305_50HZ, diff --git a/drivers/media/video/ov511.c b/drivers/media/video/ov511.c index 3d3c48db45d..c6852402c5e 100644 --- a/drivers/media/video/ov511.c +++ b/drivers/media/video/ov511.c @@ -3591,7 +3591,7 @@ static int ov51x_init_isoc(struct usb_ov511 *ov) { struct urb *urb; - int fx, err, n, size; + int fx, err, n, i, size; PDEBUG(3, "*** Initializing capture ***"); @@ -3662,6 +3662,8 @@ ov51x_init_isoc(struct usb_ov511 *ov) urb = usb_alloc_urb(FRAMES_PER_DESC, GFP_KERNEL); if (!urb) { err("init isoc: usb_alloc_urb ret. NULL"); + for (i = 0; i < n; i++) + usb_free_urb(ov->sbuf[i].urb); return -ENOMEM; } ov->sbuf[n].urb = urb; @@ -5651,7 +5653,7 @@ static ssize_t show_exposure(struct device *cd, if (!ov->dev) return -ENODEV; sensor_get_exposure(ov, &exp); - return sprintf(buf, "%d\n", exp >> 8); + return sprintf(buf, "%d\n", exp); } static DEVICE_ATTR(exposure, S_IRUGO, show_exposure, NULL); diff --git a/drivers/media/video/pvrusb2/pvrusb2-devattr.c b/drivers/media/video/pvrusb2/pvrusb2-devattr.c index 88e17516843..cbe2a341785 100644 --- a/drivers/media/video/pvrusb2/pvrusb2-devattr.c +++ b/drivers/media/video/pvrusb2/pvrusb2-devattr.c @@ -489,6 +489,8 @@ static const struct pvr2_device_desc pvr2_device_751xx = { struct usb_device_id pvr2_device_table[] = { { USB_DEVICE(0x2040, 0x2900), .driver_info = (kernel_ulong_t)&pvr2_device_29xxx}, + { USB_DEVICE(0x2040, 0x2950), /* Logically identical to 2900 */ + .driver_info = (kernel_ulong_t)&pvr2_device_29xxx}, { USB_DEVICE(0x2040, 0x2400), .driver_info = (kernel_ulong_t)&pvr2_device_24xxx}, { USB_DEVICE(0x1164, 0x0622), diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c index b1d09d8e2b8..92b83feae36 100644 --- a/drivers/media/video/s2255drv.c +++ b/drivers/media/video/s2255drv.c @@ -669,7 +669,7 @@ static void s2255_fillbuff(struct s2255_dev *dev, struct s2255_buffer *buf, (unsigned long)vbuf, pos); /* tell v4l buffer was filled */ - buf->vb.field_count++; + buf->vb.field_count = dev->frame_count[chn] * 2; do_gettimeofday(&ts); buf->vb.ts = ts; buf->vb.state = VIDEOBUF_DONE; @@ -1268,6 +1268,7 @@ static int vidioc_streamon(struct file *file, void *priv, enum v4l2_buf_type i) dev->last_frame[chn] = -1; dev->bad_payload[chn] = 0; dev->cur_frame[chn] = 0; + dev->frame_count[chn] = 0; for (j = 0; j < SYS_FRAMES; j++) { dev->buffer[chn].frame[j].ulState = 0; dev->buffer[chn].frame[j].cur_size = 0; diff --git a/drivers/media/video/uvc/uvc_ctrl.c b/drivers/media/video/uvc/uvc_ctrl.c index 6ef3e5297de..feab12aa2c7 100644 --- a/drivers/media/video/uvc/uvc_ctrl.c +++ b/drivers/media/video/uvc/uvc_ctrl.c @@ -592,7 +592,7 @@ int uvc_query_v4l2_ctrl(struct uvc_video_device *video, if (ctrl == NULL) return -EINVAL; - data = kmalloc(8, GFP_KERNEL); + data = kmalloc(ctrl->info->size, GFP_KERNEL); if (data == NULL) return -ENOMEM; diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c index 168baabe465..11edf79f57b 100644 --- a/drivers/media/video/w9968cf.c +++ b/drivers/media/video/w9968cf.c @@ -911,7 +911,6 @@ static int w9968cf_start_transfer(struct w9968cf_device* cam) for (i = 0; i < W9968CF_URBS; i++) { urb = usb_alloc_urb(W9968CF_ISO_PACKETS, GFP_KERNEL); - cam->urb[i] = urb; if (!urb) { for (j = 0; j < i; j++) usb_free_urb(cam->urb[j]); @@ -919,6 +918,7 @@ static int w9968cf_start_transfer(struct w9968cf_device* cam) return -ENOMEM; } + cam->urb[i] = urb; urb->dev = udev; urb->context = (void*)cam; urb->pipe = usb_rcvisocpipe(udev, 1); diff --git a/drivers/media/video/wm8739.c b/drivers/media/video/wm8739.c index 95c79ad8048..54ac3fe26ec 100644 --- a/drivers/media/video/wm8739.c +++ b/drivers/media/video/wm8739.c @@ -274,10 +274,8 @@ static int wm8739_probe(struct i2c_client *client, client->addr << 1, client->adapter->name); state = kmalloc(sizeof(struct wm8739_state), GFP_KERNEL); - if (state == NULL) { - kfree(client); + if (state == NULL) return -ENOMEM; - } state->vol_l = 0x17; /* 0dB */ state->vol_r = 0x17; /* 0dB */ state->muted = 0; diff --git a/drivers/media/video/zoran_card.c b/drivers/media/video/zoran_card.c index d842a7cb99d..3282be73029 100644 --- a/drivers/media/video/zoran_card.c +++ b/drivers/media/video/zoran_card.c @@ -988,7 +988,7 @@ zoran_open_init_params (struct zoran *zr) zr->v4l_grab_seq = 0; zr->v4l_settings.width = 192; zr->v4l_settings.height = 144; - zr->v4l_settings.format = &zoran_formats[4]; /* YUY2 - YUV-4:2:2 packed */ + zr->v4l_settings.format = &zoran_formats[7]; /* YUY2 - YUV-4:2:2 packed */ zr->v4l_settings.bytesperline = zr->v4l_settings.width * ((zr->v4l_settings.format->depth + 7) / 8); diff --git a/drivers/media/video/zoran_driver.c b/drivers/media/video/zoran_driver.c index ec6f59674b1..2dab9eea4de 100644 --- a/drivers/media/video/zoran_driver.c +++ b/drivers/media/video/zoran_driver.c @@ -134,7 +134,7 @@ const struct zoran_format zoran_formats[] = { }, { .name = "16-bit RGB BE", ZFMT(-1, - V4L2_PIX_FMT_RGB565, V4L2_COLORSPACE_SRGB), + V4L2_PIX_FMT_RGB565X, V4L2_COLORSPACE_SRGB), .depth = 16, .flags = ZORAN_FORMAT_CAPTURE | ZORAN_FORMAT_OVERLAY, @@ -2737,7 +2737,8 @@ zoran_do_ioctl (struct inode *inode, fh->v4l_settings.format->fourcc; fmt->fmt.pix.colorspace = fh->v4l_settings.format->colorspace; - fmt->fmt.pix.bytesperline = 0; + fmt->fmt.pix.bytesperline = + fh->v4l_settings.bytesperline; if (BUZ_MAX_HEIGHT < (fh->v4l_settings.height * 2)) fmt->fmt.pix.field = @@ -2833,13 +2834,6 @@ zoran_do_ioctl (struct inode *inode, fmt->fmt.pix.pixelformat, (char *) &printformat); - if (fmt->fmt.pix.bytesperline > 0) { - dprintk(5, - KERN_ERR "%s: bpl not supported\n", - ZR_DEVNAME(zr)); - return -EINVAL; - } - /* we can be requested to do JPEG/raw playback/capture */ if (! (fmt->type == V4L2_BUF_TYPE_VIDEO_CAPTURE || @@ -2923,6 +2917,7 @@ zoran_do_ioctl (struct inode *inode, fh->jpg_buffers.buffer_size = zoran_v4l2_calc_bufsize(&fh-> jpg_settings); + fmt->fmt.pix.bytesperline = 0; fmt->fmt.pix.sizeimage = fh->jpg_buffers.buffer_size; @@ -2979,6 +2974,8 @@ zoran_do_ioctl (struct inode *inode, /* tell the user the * results/missing stuff */ + fmt->fmt.pix.bytesperline = + fh->v4l_settings.bytesperline; fmt->fmt.pix.sizeimage = fh->v4l_settings.height * fh->v4l_settings.bytesperline; diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 917035e16da..00008967ef7 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -426,8 +426,6 @@ static u32 atmci_submit_data(struct mmc_host *mmc, struct mmc_data *data) host->sg = NULL; host->data = data; - mci_writel(host, BLKR, MCI_BCNT(data->blocks) - | MCI_BLKLEN(data->blksz)); dev_vdbg(&mmc->class_dev, "BLKR=0x%08x\n", MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz)); @@ -483,6 +481,10 @@ static void atmci_request(struct mmc_host *mmc, struct mmc_request *mrq) if (data->blocks > 1 && data->blksz & 3) goto fail; atmci_set_timeout(host, data); + + /* Must set block count/size before sending command */ + mci_writel(host, BLKR, MCI_BCNT(data->blocks) + | MCI_BLKLEN(data->blksz)); } iflags = MCI_CMDRDY; diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 0b6095ba3ce..bcd2bc477af 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -396,7 +396,7 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw) u32 extcnf_ctrl; u32 timeout = PHY_CFG_TIMEOUT; - WARN_ON(preempt_count()); + might_sleep(); if (!mutex_trylock(&nvm_mutex)) { WARN(1, KERN_ERR "e1000e mutex contention. Owned by pid %d\n", diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c index f118252f3a9..52e2743b04e 100644 --- a/drivers/rtc/rtc-dev.c +++ b/drivers/rtc/rtc-dev.c @@ -422,6 +422,12 @@ done: return err; } +static int rtc_dev_fasync(int fd, struct file *file, int on) +{ + struct rtc_device *rtc = file->private_data; + return fasync_helper(fd, file, on, &rtc->async_queue); +} + static int rtc_dev_release(struct inode *inode, struct file *file) { struct rtc_device *rtc = file->private_data; @@ -434,16 +440,13 @@ static int rtc_dev_release(struct inode *inode, struct file *file) if (rtc->ops->release) rtc->ops->release(rtc->dev.parent); + if (file->f_flags & FASYNC) + rtc_dev_fasync(-1, file, 0); + clear_bit_unlock(RTC_DEV_BUSY, &rtc->flags); return 0; } -static int rtc_dev_fasync(int fd, struct file *file, int on) -{ - struct rtc_device *rtc = file->private_data; - return fasync_helper(fd, file, on, &rtc->async_queue); -} - static const struct file_operations rtc_dev_fops = { .owner = THIS_MODULE, .llseek = no_llseek, diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 1679e2f91c9..a0b6b46e746 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -447,51 +447,36 @@ void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, { char s[80]; - sprintf(s, "%s sc:%x ", cdev->dev.bus_id, irq_ptr->schid.sch_no); - + sprintf(s, "qdio: %s ", dev_name(&cdev->dev)); switch (irq_ptr->qib.qfmt) { case QDIO_QETH_QFMT: - sprintf(s + strlen(s), "OSADE "); + sprintf(s + strlen(s), "OSA "); break; case QDIO_ZFCP_QFMT: sprintf(s + strlen(s), "ZFCP "); break; case QDIO_IQDIO_QFMT: - sprintf(s + strlen(s), "HiperSockets "); + sprintf(s + strlen(s), "HS "); break; } - sprintf(s + strlen(s), "using: "); - - if (!is_thinint_irq(irq_ptr)) - sprintf(s + strlen(s), "no"); - sprintf(s + strlen(s), "AdapterInterrupts "); - if (!(irq_ptr->sch_token != 0)) - sprintf(s + strlen(s), "no"); - sprintf(s + strlen(s), "QEBSM "); - if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED)) - sprintf(s + strlen(s), "no"); - sprintf(s + strlen(s), "OutboundPCI "); - if (!css_general_characteristics.aif_tdd) - sprintf(s + strlen(s), "no"); - sprintf(s + strlen(s), "TDD\n"); - printk(KERN_INFO "qdio: %s", s); - - memset(s, 0, sizeof(s)); - sprintf(s, "%s SIGA required: ", cdev->dev.bus_id); - if (irq_ptr->siga_flag.input) - sprintf(s + strlen(s), "Read "); - if (irq_ptr->siga_flag.output) - sprintf(s + strlen(s), "Write "); - if (irq_ptr->siga_flag.sync) - sprintf(s + strlen(s), "Sync "); - if (!irq_ptr->siga_flag.no_sync_ti) - sprintf(s + strlen(s), "SyncAI "); - if (!irq_ptr->siga_flag.no_sync_out_ti) - sprintf(s + strlen(s), "SyncOutAI "); - if (!irq_ptr->siga_flag.no_sync_out_pci) - sprintf(s + strlen(s), "SyncOutPCI"); + sprintf(s + strlen(s), "on SC %x using ", irq_ptr->schid.sch_no); + sprintf(s + strlen(s), "AI:%d ", is_thinint_irq(irq_ptr)); + sprintf(s + strlen(s), "QEBSM:%d ", (irq_ptr->sch_token) ? 1 : 0); + sprintf(s + strlen(s), "PCI:%d ", + (irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED) ? 1 : 0); + sprintf(s + strlen(s), "TDD:%d ", css_general_characteristics.aif_tdd); + sprintf(s + strlen(s), "SIGA:"); + sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.input) ? "R" : " "); + sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.output) ? "W" : " "); + sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.sync) ? "S" : " "); + sprintf(s + strlen(s), "%s", + (!irq_ptr->siga_flag.no_sync_ti) ? "A" : " "); + sprintf(s + strlen(s), "%s", + (!irq_ptr->siga_flag.no_sync_out_ti) ? "O" : " "); + sprintf(s + strlen(s), "%s", + (!irq_ptr->siga_flag.no_sync_out_pci) ? "P" : " "); sprintf(s + strlen(s), "\n"); - printk(KERN_INFO "qdio: %s", s); + printk(KERN_INFO "%s", s); } int __init qdio_setup_init(void) diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c index c4eaacd6e55..b872bfaf4bd 100644 --- a/drivers/spi/orion_spi.c +++ b/drivers/spi/orion_spi.c @@ -427,7 +427,7 @@ static int orion_spi_transfer(struct spi_device *spi, struct spi_message *m) goto msg_rejected; } - if (t->speed_hz < orion_spi->min_speed) { + if (t->speed_hz && t->speed_hz < orion_spi->min_speed) { dev_err(&spi->dev, "message rejected : " "device min speed (%d Hz) exceeds " diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index c6299e8a041..9cbff84b787 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -2400,11 +2400,15 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch) if (!fbcon_is_inactive(vc, info)) { if (ops->blank_state != blank) { + int ret = 1; + ops->blank_state = blank; fbcon_cursor(vc, blank ? CM_ERASE : CM_DRAW); ops->cursor_flash = (!blank); - if (fb_blank(info, blank)) + if (info->fbops->fb_blank) + ret = info->fbops->fb_blank(blank, info); + if (ret) fbcon_generic_blank(vc, info, blank); } diff --git a/include/asm-mips/cevt-r4k.h b/include/asm-mips/cevt-r4k.h new file mode 100644 index 00000000000..fa4328f9124 --- /dev/null +++ b/include/asm-mips/cevt-r4k.h @@ -0,0 +1,46 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Kevin D. Kissell + */ + +/* + * Definitions used for common event timer implementation + * for MIPS 4K-type processors and their MIPS MT variants. + * Avoids unsightly extern declarations in C files. + */ +#ifndef __ASM_CEVT_R4K_H +#define __ASM_CEVT_R4K_H + +DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device); + +void mips_event_handler(struct clock_event_device *dev); +int c0_compare_int_usable(void); +void mips_set_clock_mode(enum clock_event_mode, struct clock_event_device *); +irqreturn_t c0_compare_interrupt(int, void *); + +extern struct irqaction c0_compare_irqaction; +extern int cp0_timer_irq_installed; + +/* + * Possibly handle a performance counter interrupt. + * Return true if the timer interrupt should not be checked + */ + +static inline int handle_perf_irq(int r2) +{ + /* + * The performance counter overflow interrupt may be shared with the + * timer interrupt (cp0_perfcount_irq < 0). If it is and a + * performance counter has overflowed (perf_irq() == IRQ_HANDLED) + * and we can't reliably determine if a counter interrupt has also + * happened (!r2) then don't check for a timer interrupt. + */ + return (cp0_perfcount_irq < 0) && + perf_irq() == IRQ_HANDLED && + !r2; +} + +#endif /* __ASM_CEVT_R4K_H */ diff --git a/include/asm-mips/irqflags.h b/include/asm-mips/irqflags.h index 881e8866501..701ec0ba8fa 100644 --- a/include/asm-mips/irqflags.h +++ b/include/asm-mips/irqflags.h @@ -38,8 +38,17 @@ __asm__( " .set pop \n" " .endm"); +extern void smtc_ipi_replay(void); + static inline void raw_local_irq_enable(void) { +#ifdef CONFIG_MIPS_MT_SMTC + /* + * SMTC kernel needs to do a software replay of queued + * IPIs, at the cost of call overhead on each local_irq_enable() + */ + smtc_ipi_replay(); +#endif __asm__ __volatile__( "raw_local_irq_enable" : /* no outputs */ @@ -47,6 +56,7 @@ static inline void raw_local_irq_enable(void) : "memory"); } + /* * For cli() we have to insert nops to make sure that the new value * has actually arrived in the status register before the end of this @@ -185,15 +195,14 @@ __asm__( " .set pop \n" " .endm \n"); -extern void smtc_ipi_replay(void); static inline void raw_local_irq_restore(unsigned long flags) { unsigned long __tmp1; -#ifdef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY +#ifdef CONFIG_MIPS_MT_SMTC /* - * CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY does prompt replay of deferred + * SMTC kernel needs to do a software replay of queued * IPIs, at the cost of branch and call overhead on each * local_irq_restore() */ @@ -208,6 +217,17 @@ static inline void raw_local_irq_restore(unsigned long flags) : "memory"); } +static inline void __raw_local_irq_restore(unsigned long flags) +{ + unsigned long __tmp1; + + __asm__ __volatile__( + "raw_local_irq_restore\t%0" + : "=r" (__tmp1) + : "0" (flags) + : "memory"); +} + static inline int raw_irqs_disabled_flags(unsigned long flags) { #ifdef CONFIG_MIPS_MT_SMTC diff --git a/include/asm-mips/mipsregs.h b/include/asm-mips/mipsregs.h index a46f8e258e6..979866000da 100644 --- a/include/asm-mips/mipsregs.h +++ b/include/asm-mips/mipsregs.h @@ -1462,7 +1462,7 @@ set_c0_##name(unsigned int set) \ { \ unsigned int res; \ unsigned int omt; \ - unsigned int flags; \ + unsigned long flags; \ \ local_irq_save(flags); \ omt = __dmt(); \ @@ -1480,7 +1480,7 @@ clear_c0_##name(unsigned int clear) \ { \ unsigned int res; \ unsigned int omt; \ - unsigned int flags; \ + unsigned long flags; \ \ local_irq_save(flags); \ omt = __dmt(); \ @@ -1498,7 +1498,7 @@ change_c0_##name(unsigned int change, unsigned int new) \ { \ unsigned int res; \ unsigned int omt; \ - unsigned int flags; \ + unsigned long flags; \ \ local_irq_save(flags); \ \ diff --git a/include/asm-mips/smtc.h b/include/asm-mips/smtc.h index 3639b28f80d..ea60bf08dcb 100644 --- a/include/asm-mips/smtc.h +++ b/include/asm-mips/smtc.h @@ -6,6 +6,7 @@ */ #include <asm/mips_mt.h> +#include <asm/smtc_ipi.h> /* * System-wide SMTC status information @@ -38,14 +39,15 @@ struct mm_struct; struct task_struct; void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu); - +void self_ipi(struct smtc_ipi *); void smtc_flush_tlb_asid(unsigned long asid); -extern int mipsmt_build_cpu_map(int startslot); -extern void mipsmt_prepare_cpus(void); +extern int smtc_build_cpu_map(int startslot); +extern void smtc_prepare_cpus(int cpus); extern void smtc_smp_finish(void); extern void smtc_boot_secondary(int cpu, struct task_struct *t); extern void smtc_cpus_done(void); + /* * Sharing the TLB between multiple VPEs means that the * "random" index selection function is not allowed to diff --git a/include/asm-mips/sn/mapped_kernel.h b/include/asm-mips/sn/mapped_kernel.h index c3dd5d0d525..721496a0bb9 100644 --- a/include/asm-mips/sn/mapped_kernel.h +++ b/include/asm-mips/sn/mapped_kernel.h @@ -5,6 +5,8 @@ #ifndef __ASM_SN_MAPPED_KERNEL_H #define __ASM_SN_MAPPED_KERNEL_H +#include <linux/mmzone.h> + /* * Note on how mapped kernels work: the text and data section is * compiled at cksseg segment (LOADADDR = 0xc001c000), and the @@ -29,10 +31,8 @@ #define MAPPED_ADDR_RO_TO_PHYS(x) (x - REP_BASE) #define MAPPED_ADDR_RW_TO_PHYS(x) (x - REP_BASE - 16777216) -#define MAPPED_KERN_RO_PHYSBASE(n) \ - (PLAT_NODE_DATA(n)->kern_vars.kv_ro_baseaddr) -#define MAPPED_KERN_RW_PHYSBASE(n) \ - (PLAT_NODE_DATA(n)->kern_vars.kv_rw_baseaddr) +#define MAPPED_KERN_RO_PHYSBASE(n) (hub_data(n)->kern_vars.kv_ro_baseaddr) +#define MAPPED_KERN_RW_PHYSBASE(n) (hub_data(n)->kern_vars.kv_rw_baseaddr) #define MAPPED_KERN_RO_TO_PHYS(x) \ ((unsigned long)MAPPED_ADDR_RO_TO_PHYS(x) | \ diff --git a/include/asm-mips/stackframe.h b/include/asm-mips/stackframe.h index 051e1af0bb9..4c37c4e5f72 100644 --- a/include/asm-mips/stackframe.h +++ b/include/asm-mips/stackframe.h @@ -297,14 +297,31 @@ #ifdef CONFIG_MIPS_MT_SMTC .set mips32r2 /* - * This may not really be necessary if ints are already - * inhibited here. + * We need to make sure the read-modify-write + * of Status below isn't perturbed by an interrupt + * or cross-TC access, so we need to do at least a DMT, + * protected by an interrupt-inhibit. But setting IXMT + * also creates a few-cycle window where an IPI could + * be queued and not be detected before potentially + * returning to a WAIT or user-mode loop. It must be + * replayed. + * + * We're in the middle of a context switch, and + * we can't dispatch it directly without trashing + * some registers, so we'll try to detect this unlikely + * case and program a software interrupt in the VPE, + * as would be done for a cross-VPE IPI. To accomodate + * the handling of that case, we're doing a DVPE instead + * of just a DMT here to protect against other threads. + * This is a lot of cruft to cover a tiny window. + * If you can find a better design, implement it! + * */ mfc0 v0, CP0_TCSTATUS ori v0, TCSTATUS_IXMT mtc0 v0, CP0_TCSTATUS _ehb - DMT 5 # dmt a1 + DVPE 5 # dvpe a1 jal mips_ihb #endif /* CONFIG_MIPS_MT_SMTC */ mfc0 a0, CP0_STATUS @@ -325,17 +342,50 @@ */ LONG_L v1, PT_TCSTATUS(sp) _ehb - mfc0 v0, CP0_TCSTATUS + mfc0 a0, CP0_TCSTATUS andi v1, TCSTATUS_IXMT - /* We know that TCStatua.IXMT should be set from above */ - xori v0, v0, TCSTATUS_IXMT - or v0, v0, v1 - mtc0 v0, CP0_TCSTATUS - _ehb - andi a1, a1, VPECONTROL_TE + bnez v1, 0f + +/* + * We'd like to detect any IPIs queued in the tiny window + * above and request an software interrupt to service them + * when we ERET. + * + * Computing the offset into the IPIQ array of the executing + * TC's IPI queue in-line would be tedious. We use part of + * the TCContext register to hold 16 bits of offset that we + * can add in-line to find the queue head. + */ + mfc0 v0, CP0_TCCONTEXT + la a2, IPIQ + srl v0, v0, 16 + addu a2, a2, v0 + LONG_L v0, 0(a2) + beqz v0, 0f +/* + * If we have a queue, provoke dispatch within the VPE by setting C_SW1 + */ + mfc0 v0, CP0_CAUSE + ori v0, v0, C_SW1 + mtc0 v0, CP0_CAUSE +0: + /* + * This test should really never branch but + * let's be prudent here. Having atomized + * the shared register modifications, we can + * now EVPE, and must do so before interrupts + * are potentially re-enabled. + */ + andi a1, a1, MVPCONTROL_EVP beqz a1, 1f - emt + evpe 1: + /* We know that TCStatua.IXMT should be set from above */ + xori a0, a0, TCSTATUS_IXMT + or a0, a0, v1 + mtc0 a0, CP0_TCSTATUS + _ehb + .set mips0 #endif /* CONFIG_MIPS_MT_SMTC */ LONG_L v1, PT_EPC(sp) diff --git a/include/linux/completion.h b/include/linux/completion.h index 02ef8835999..4a6b604ef7e 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -10,6 +10,18 @@ #include <linux/wait.h> +/** + * struct completion - structure used to maintain state for a "completion" + * + * This is the opaque structure used to maintain the state for a "completion". + * Completions currently use a FIFO to queue threads that have to wait for + * the "completion" event. + * + * See also: complete(), wait_for_completion() (and friends _timeout, + * _interruptible, _interruptible_timeout, and _killable), init_completion(), + * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and + * INIT_COMPLETION(). + */ struct completion { unsigned int done; wait_queue_head_t wait; @@ -21,6 +33,14 @@ struct completion { #define COMPLETION_INITIALIZER_ONSTACK(work) \ ({ init_completion(&work); work; }) +/** + * DECLARE_COMPLETION: - declare and initialize a completion structure + * @work: identifier for the completion structure + * + * This macro declares and initializes a completion structure. Generally used + * for static declarations. You should use the _ONSTACK variant for automatic + * variables. + */ #define DECLARE_COMPLETION(work) \ struct completion work = COMPLETION_INITIALIZER(work) @@ -29,6 +49,13 @@ struct completion { * completions - so we use the _ONSTACK() variant for those that * are on the kernel stack: */ +/** + * DECLARE_COMPLETION_ONSTACK: - declare and initialize a completion structure + * @work: identifier for the completion structure + * + * This macro declares and initializes a completion structure on the kernel + * stack. + */ #ifdef CONFIG_LOCKDEP # define DECLARE_COMPLETION_ONSTACK(work) \ struct completion work = COMPLETION_INITIALIZER_ONSTACK(work) @@ -36,6 +63,13 @@ struct completion { # define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) #endif +/** + * init_completion: - Initialize a dynamically allocated completion + * @x: completion structure that is to be initialized + * + * This inline function will initialize a dynamically created completion + * structure. + */ static inline void init_completion(struct completion *x) { x->done = 0; @@ -55,6 +89,13 @@ extern bool completion_done(struct completion *x); extern void complete(struct completion *); extern void complete_all(struct completion *); +/** + * INIT_COMPLETION: - reinitialize a completion structure + * @x: completion structure to be reinitialized + * + * This macro should be used to reinitialize a completion structure so it can + * be reused. This is especially important after complete_all() is used. + */ #define INIT_COMPLETION(x) ((x).done = 0) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d7faf880849..c2747ac2ae4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -69,6 +69,7 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) #endif int cpu_up(unsigned int cpu); +void notify_cpu_starting(unsigned int cpu); extern void cpu_hotplug_init(void); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); diff --git a/include/linux/ide.h b/include/linux/ide.h index 1524829f73f..6514db8fd2e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -366,7 +366,9 @@ enum { /* Currently on a filemark */ IDE_AFLAG_FILEMARK = (1 << 25), /* 0 = no tape is loaded, so we don't rewind after ejecting */ - IDE_AFLAG_MEDIUM_PRESENT = (1 << 26) + IDE_AFLAG_MEDIUM_PRESENT = (1 << 26), + + IDE_AFLAG_NO_AUTOCLOSE = (1 << 27), }; struct ide_drive_s { diff --git a/include/linux/notifier.h b/include/linux/notifier.h index da2698b0fdd..b86fa2ffca0 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -213,9 +213,16 @@ static inline int notifier_to_errno(int ret) #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ #define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, - * not handling interrupts, soon dead */ + * not handling interrupts, soon dead. + * Called on the dying cpu, interrupts + * are already disabled. Must not + * sleep, must not fail */ #define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug * lock is dropped */ +#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running. + * Called on the new cpu, just before + * enabling interrupts. Must not sleep, + * must not fail */ /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend * operation in progress @@ -229,6 +236,7 @@ static inline int notifier_to_errno(int ret) #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) #define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) +#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN) /* Hibernation and suspend events */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 5afc1b23346..cf793bbbd05 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -104,8 +104,8 @@ struct prop_local_single { * snapshot of the last seen global state * and a lock protecting this state */ - int shift; unsigned long period; + int shift; spinlock_t lock; /* protect the snapshot state */ }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d9120c5ad1..d8e699b5585 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -451,8 +451,8 @@ struct signal_struct { * - everyone except group_exit_task is stopped during signal delivery * of fatal signals, group_exit_task processes the signal. */ - struct task_struct *group_exit_task; int notify_count; + struct task_struct *group_exit_task; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; @@ -897,7 +897,7 @@ struct sched_class { void (*yield_task) (struct rq *rq); int (*select_task_rq)(struct task_struct *p, int sync); - void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); + void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync); struct task_struct * (*pick_next_task) (struct rq *rq); void (*put_prev_task) (struct rq *rq, struct task_struct *p); @@ -1010,8 +1010,8 @@ struct sched_entity { struct sched_rt_entity { struct list_head run_list; - unsigned int time_slice; unsigned long timeout; + unsigned int time_slice; int nr_cpus_allowed; struct sched_rt_entity *back; diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 5da9794b2d7..b106fd8e0d5 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -1,6 +1,8 @@ #ifndef __LINUX_STACKTRACE_H #define __LINUX_STACKTRACE_H +struct task_struct; + #ifdef CONFIG_STACKTRACE struct stack_trace { unsigned int nr_entries, max_entries; diff --git a/init/main.c b/init/main.c index f6f7042331d..3820323c4c8 100644 --- a/init/main.c +++ b/init/main.c @@ -708,7 +708,7 @@ int do_one_initcall(initcall_t fn) int result; if (initcall_debug) { - print_fn_descriptor_symbol("calling %s\n", fn); + printk("calling %pF\n", fn); t0 = ktime_get(); } @@ -718,8 +718,8 @@ int do_one_initcall(initcall_t fn) t1 = ktime_get(); delta = ktime_sub(t1, t0); - print_fn_descriptor_symbol("initcall %s", fn); - printk(" returned %d after %Ld msecs\n", result, + printk("initcall %pF returned %d after %Ld msecs\n", + fn, result, (unsigned long long) delta.tv64 >> 20); } @@ -737,8 +737,7 @@ int do_one_initcall(initcall_t fn) local_irq_enable(); } if (msgbuf[0]) { - print_fn_descriptor_symbol(KERN_WARNING "initcall %s", fn); - printk(" returned with %s\n", msgbuf); + printk("initcall %pF returned with %s\n", fn, msgbuf); } return result; diff --git a/kernel/cpu.c b/kernel/cpu.c index f17e9854c24..86d49045dae 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -199,13 +199,14 @@ static int __ref take_cpu_down(void *_param) struct take_cpu_down_param *param = _param; int err; - raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, - param->hcpu); /* Ensure this CPU doesn't handle any more interrupts. */ err = __cpu_disable(); if (err < 0) return err; + raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, + param->hcpu); + /* Force idle task to run as soon as we yield: it should immediately notice cpu is offline and die quickly. */ sched_idle_next(); @@ -453,6 +454,25 @@ out: } #endif /* CONFIG_PM_SLEEP_SMP */ +/** + * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers + * @cpu: cpu that just started + * + * This function calls the cpu_chain notifiers with CPU_STARTING. + * It must be called by the arch code on the new cpu, before the new cpu + * enables interrupts and before the "boot" cpu returns from __cpu_up(). + */ +void notify_cpu_starting(unsigned int cpu) +{ + unsigned long val = CPU_STARTING; + +#ifdef CONFIG_PM_SLEEP_SMP + if (cpu_isset(cpu, frozen_cpus)) + val = CPU_STARTING_FROZEN; +#endif /* CONFIG_PM_SLEEP_SMP */ + raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu); +} + #endif /* CONFIG_SMP */ /* diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 25d955dbb98..e4dcfb2272a 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -590,6 +590,7 @@ static void kgdb_wait(struct pt_regs *regs) /* Signal the primary CPU that we are done: */ atomic_set(&cpu_in_kgdb[cpu], 0); + touch_softlockup_watchdog(); clocksource_touch_watchdog(); local_irq_restore(flags); } @@ -1432,6 +1433,7 @@ acquirelock: atomic_read(&kgdb_cpu_doing_single_step) != cpu) { atomic_set(&kgdb_active, -1); + touch_softlockup_watchdog(); clocksource_touch_watchdog(); local_irq_restore(flags); @@ -1524,6 +1526,7 @@ acquirelock: kgdb_restore: /* Free kgdb_active */ atomic_set(&kgdb_active, -1); + touch_softlockup_watchdog(); clocksource_touch_watchdog(); local_irq_restore(flags); diff --git a/kernel/sched.c b/kernel/sched.c index ad1962dc0aa..9715f4ce6cf 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -204,11 +204,16 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; } +static inline int rt_bandwidth_enabled(void) +{ + return sysctl_sched_rt_runtime >= 0; +} + static void start_rt_bandwidth(struct rt_bandwidth *rt_b) { ktime_t now; - if (rt_b->rt_runtime == RUNTIME_INF) + if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) return; if (hrtimer_active(&rt_b->rt_period_timer)) @@ -298,9 +303,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; #endif /* CONFIG_RT_GROUP_SCHED */ -#else /* !CONFIG_FAIR_GROUP_SCHED */ +#else /* !CONFIG_USER_SCHED */ #define root_task_group init_task_group -#endif /* CONFIG_FAIR_GROUP_SCHED */ +#endif /* CONFIG_USER_SCHED */ /* task_group_lock serializes add/remove of task groups and also changes to * a task group's cpu shares. @@ -604,9 +609,9 @@ struct rq { static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); -static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) +static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync) { - rq->curr->sched_class->check_preempt_curr(rq, p); + rq->curr->sched_class->check_preempt_curr(rq, p, sync); } static inline int cpu_of(struct rq *rq) @@ -1102,7 +1107,7 @@ static void hrtick_start(struct rq *rq, u64 delay) hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL); } -static void init_hrtick(void) +static inline void init_hrtick(void) { } #endif /* CONFIG_SMP */ @@ -1121,7 +1126,7 @@ static void init_rq_hrtick(struct rq *rq) rq->hrtick_timer.function = hrtick; rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; } -#else +#else /* CONFIG_SCHED_HRTICK */ static inline void hrtick_clear(struct rq *rq) { } @@ -1133,7 +1138,7 @@ static inline void init_rq_hrtick(struct rq *rq) static inline void init_hrtick(void) { } -#endif +#endif /* CONFIG_SCHED_HRTICK */ /* * resched_task - mark a task 'to be rescheduled now'. @@ -1380,38 +1385,24 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) update_load_sub(&rq->load, load); } -#ifdef CONFIG_SMP -static unsigned long source_load(int cpu, int type); -static unsigned long target_load(int cpu, int type); -static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); - -static unsigned long cpu_avg_load_per_task(int cpu) -{ - struct rq *rq = cpu_rq(cpu); - - if (rq->nr_running) - rq->avg_load_per_task = rq->load.weight / rq->nr_running; - - return rq->avg_load_per_task; -} - -#ifdef CONFIG_FAIR_GROUP_SCHED - -typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *); +#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED) +typedef int (*tg_visitor)(struct task_group *, void *); /* * Iterate the full tree, calling @down when first entering a node and @up when * leaving it for the final time. */ -static void -walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd) +static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) { struct task_group *parent, *child; + int ret; rcu_read_lock(); parent = &root_task_group; down: - (*down)(parent, cpu, sd); + ret = (*down)(parent, data); + if (ret) + goto out_unlock; list_for_each_entry_rcu(child, &parent->children, siblings) { parent = child; goto down; @@ -1419,15 +1410,43 @@ down: up: continue; } - (*up)(parent, cpu, sd); + ret = (*up)(parent, data); + if (ret) + goto out_unlock; child = parent; parent = parent->parent; if (parent) goto up; +out_unlock: rcu_read_unlock(); + + return ret; } +static int tg_nop(struct task_group *tg, void *data) +{ + return 0; +} +#endif + +#ifdef CONFIG_SMP +static unsigned long source_load(int cpu, int type); +static unsigned long target_load(int cpu, int type); +static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); + +static unsigned long cpu_avg_load_per_task(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + + if (rq->nr_running) + rq->avg_load_per_task = rq->load.weight / rq->nr_running; + + return rq->avg_load_per_task; +} + +#ifdef CONFIG_FAIR_GROUP_SCHED + static void __set_se_shares(struct sched_entity *se, unsigned long shares); /* @@ -1486,11 +1505,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, * This needs to be done in a bottom-up fashion because the rq weight of a * parent group depends on the shares of its child groups. */ -static void -tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) +static int tg_shares_up(struct task_group *tg, void *data) { unsigned long rq_weight = 0; unsigned long shares = 0; + struct sched_domain *sd = data; int i; for_each_cpu_mask(i, sd->span) { @@ -1515,6 +1534,8 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) __update_group_shares_cpu(tg, i, shares, rq_weight); spin_unlock_irqrestore(&rq->lock, flags); } + + return 0; } /* @@ -1522,10 +1543,10 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) * This needs to be done in a top-down fashion because the load of a child * group is a fraction of its parents load. */ -static void -tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd) +static int tg_load_down(struct task_group *tg, void *data) { unsigned long load; + long cpu = (long)data; if (!tg->parent) { load = cpu_rq(cpu)->load.weight; @@ -1536,11 +1557,8 @@ tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd) } tg->cfs_rq[cpu]->h_load = load; -} -static void -tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd) -{ + return 0; } static void update_shares(struct sched_domain *sd) @@ -1550,7 +1568,7 @@ static void update_shares(struct sched_domain *sd) if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { sd->last_update = now; - walk_tg_tree(tg_nop, tg_shares_up, 0, sd); + walk_tg_tree(tg_nop, tg_shares_up, sd); } } @@ -1561,9 +1579,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd) spin_lock(&rq->lock); } -static void update_h_load(int cpu) +static void update_h_load(long cpu) { - walk_tg_tree(tg_load_down, tg_nop, cpu, NULL); + walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); } #else @@ -1921,11 +1939,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) running = task_running(rq, p); on_rq = p->se.on_rq; ncsw = 0; - if (!match_state || p->state == match_state) { - ncsw = p->nivcsw + p->nvcsw; - if (unlikely(!ncsw)) - ncsw = 1; - } + if (!match_state || p->state == match_state) + ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ task_rq_unlock(rq, &flags); /* @@ -2285,7 +2300,7 @@ out_running: trace_mark(kernel_sched_wakeup, "pid %d state %ld ## rq %p task %p rq->curr %p", p->pid, p->state, rq, p, rq->curr); - check_preempt_curr(rq, p); + check_preempt_curr(rq, p, sync); p->state = TASK_RUNNING; #ifdef CONFIG_SMP @@ -2420,7 +2435,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) trace_mark(kernel_sched_wakeup_new, "pid %d state %ld ## rq %p task %p rq->curr %p", p->pid, p->state, rq, p, rq->curr); - check_preempt_curr(rq, p); + check_preempt_curr(rq, p, 0); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) p->sched_class->task_wake_up(rq, p); @@ -2880,7 +2895,7 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, * Note that idle threads have a prio of MAX_PRIO, for this test * to be always true for them. */ - check_preempt_curr(this_rq, p); + check_preempt_curr(this_rq, p, 0); } /* @@ -4627,6 +4642,15 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) } EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ +/** + * complete: - signals a single thread waiting on this completion + * @x: holds the state of this particular completion + * + * This will wake up a single thread waiting on this completion. Threads will be + * awakened in the same order in which they were queued. + * + * See also complete_all(), wait_for_completion() and related routines. + */ void complete(struct completion *x) { unsigned long flags; @@ -4638,6 +4662,12 @@ void complete(struct completion *x) } EXPORT_SYMBOL(complete); +/** + * complete_all: - signals all threads waiting on this completion + * @x: holds the state of this particular completion + * + * This will wake up all threads waiting on this particular completion event. + */ void complete_all(struct completion *x) { unsigned long flags; @@ -4658,10 +4688,7 @@ do_wait_for_common(struct completion *x, long timeout, int state) wait.flags |= WQ_FLAG_EXCLUSIVE; __add_wait_queue_tail(&x->wait, &wait); do { - if ((state == TASK_INTERRUPTIBLE && - signal_pending(current)) || - (state == TASK_KILLABLE && - fatal_signal_pending(current))) { + if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break; } @@ -4689,12 +4716,31 @@ wait_for_common(struct completion *x, long timeout, int state) return timeout; } +/** + * wait_for_completion: - waits for completion of a task + * @x: holds the state of this particular completion + * + * This waits to be signaled for completion of a specific task. It is NOT + * interruptible and there is no timeout. + * + * See also similar routines (i.e. wait_for_completion_timeout()) with timeout + * and interrupt capability. Also see complete(). + */ void __sched wait_for_completion(struct completion *x) { wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(wait_for_completion); +/** + * wait_for_completion_timeout: - waits for completion of a task (w/timeout) + * @x: holds the state of this particular completion + * @timeout: timeout value in jiffies + * + * This waits for either a completion of a specific task to be signaled or for a + * specified timeout to expire. The timeout is in jiffies. It is not + * interruptible. + */ unsigned long __sched wait_for_completion_timeout(struct completion *x, unsigned long timeout) { @@ -4702,6 +4748,13 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout) } EXPORT_SYMBOL(wait_for_completion_timeout); +/** + * wait_for_completion_interruptible: - waits for completion of a task (w/intr) + * @x: holds the state of this particular completion + * + * This waits for completion of a specific task to be signaled. It is + * interruptible. + */ int __sched wait_for_completion_interruptible(struct completion *x) { long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE); @@ -4711,6 +4764,14 @@ int __sched wait_for_completion_interruptible(struct completion *x) } EXPORT_SYMBOL(wait_for_completion_interruptible); +/** + * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr)) + * @x: holds the state of this particular completion + * @timeout: timeout value in jiffies + * + * This waits for either a completion of a specific task to be signaled or for a + * specified timeout to expire. It is interruptible. The timeout is in jiffies. + */ unsigned long __sched wait_for_completion_interruptible_timeout(struct completion *x, unsigned long timeout) @@ -4719,6 +4780,13 @@ wait_for_completion_interruptible_timeout(struct completion *x, } EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); +/** + * wait_for_completion_killable: - waits for completion of a task (killable) + * @x: holds the state of this particular completion + * + * This waits to be signaled for completion of a specific task. It can be + * interrupted by a kill signal. + */ int __sched wait_for_completion_killable(struct completion *x) { long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE); @@ -5121,7 +5189,8 @@ recheck: * Do not allow realtime tasks into groups that have no runtime * assigned. */ - if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) + if (rt_bandwidth_enabled() && rt_policy(policy) && + task_group(p)->rt_bandwidth.rt_runtime == 0) return -EPERM; #endif @@ -5957,7 +6026,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) set_task_cpu(p, dest_cpu); if (on_rq) { activate_task(rq_dest, p, 0); - check_preempt_curr(rq_dest, p); + check_preempt_curr(rq_dest, p, 0); } done: ret = 1; @@ -8242,20 +8311,25 @@ void __might_sleep(char *file, int line) #ifdef in_atomic static unsigned long prev_jiffy; /* ratelimiting */ - if ((in_atomic() || irqs_disabled()) && - system_state == SYSTEM_RUNNING && !oops_in_progress) { - if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) - return; - prev_jiffy = jiffies; - printk(KERN_ERR "BUG: sleeping function called from invalid" - " context at %s:%d\n", file, line); - printk("in_atomic():%d, irqs_disabled():%d\n", - in_atomic(), irqs_disabled()); - debug_show_held_locks(current); - if (irqs_disabled()) - print_irqtrace_events(current); - dump_stack(); - } + if ((!in_atomic() && !irqs_disabled()) || + system_state != SYSTEM_RUNNING || oops_in_progress) + return; + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) + return; + prev_jiffy = jiffies; + + printk(KERN_ERR + "BUG: sleeping function called from invalid context at %s:%d\n", + file, line); + printk(KERN_ERR + "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), + current->pid, current->comm); + + debug_show_held_locks(current); + if (irqs_disabled()) + print_irqtrace_events(current); + dump_stack(); #endif } EXPORT_SYMBOL(__might_sleep); @@ -8753,73 +8827,95 @@ static DEFINE_MUTEX(rt_constraints_mutex); static unsigned long to_ratio(u64 period, u64 runtime) { if (runtime == RUNTIME_INF) - return 1ULL << 16; + return 1ULL << 20; - return div64_u64(runtime << 16, period); + return div64_u64(runtime << 20, period); } -#ifdef CONFIG_CGROUP_SCHED -static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) +/* Must be called with tasklist_lock held */ +static inline int tg_has_rt_tasks(struct task_group *tg) { - struct task_group *tgi, *parent = tg->parent; - unsigned long total = 0; + struct task_struct *g, *p; - if (!parent) { - if (global_rt_period() < period) - return 0; + do_each_thread(g, p) { + if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) + return 1; + } while_each_thread(g, p); - return to_ratio(period, runtime) < - to_ratio(global_rt_period(), global_rt_runtime()); - } + return 0; +} - if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period) - return 0; +struct rt_schedulable_data { + struct task_group *tg; + u64 rt_period; + u64 rt_runtime; +}; - rcu_read_lock(); - list_for_each_entry_rcu(tgi, &parent->children, siblings) { - if (tgi == tg) - continue; +static int tg_schedulable(struct task_group *tg, void *data) +{ + struct rt_schedulable_data *d = data; + struct task_group *child; + unsigned long total, sum = 0; + u64 period, runtime; - total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), - tgi->rt_bandwidth.rt_runtime); + period = ktime_to_ns(tg->rt_bandwidth.rt_period); + runtime = tg->rt_bandwidth.rt_runtime; + + if (tg == d->tg) { + period = d->rt_period; + runtime = d->rt_runtime; } - rcu_read_unlock(); - return total + to_ratio(period, runtime) <= - to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), - parent->rt_bandwidth.rt_runtime); -} -#elif defined CONFIG_USER_SCHED -static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) -{ - struct task_group *tgi; - unsigned long total = 0; - unsigned long global_ratio = - to_ratio(global_rt_period(), global_rt_runtime()); + /* + * Cannot have more runtime than the period. + */ + if (runtime > period && runtime != RUNTIME_INF) + return -EINVAL; - rcu_read_lock(); - list_for_each_entry_rcu(tgi, &task_groups, list) { - if (tgi == tg) - continue; + /* + * Ensure we don't starve existing RT tasks. + */ + if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg)) + return -EBUSY; + + total = to_ratio(period, runtime); + + /* + * Nobody can have more than the global setting allows. + */ + if (total > to_ratio(global_rt_period(), global_rt_runtime())) + return -EINVAL; + + /* + * The sum of our children's runtime should not exceed our own. + */ + list_for_each_entry_rcu(child, &tg->children, siblings) { + period = ktime_to_ns(child->rt_bandwidth.rt_period); + runtime = child->rt_bandwidth.rt_runtime; + + if (child == d->tg) { + period = d->rt_period; + runtime = d->rt_runtime; + } - total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), - tgi->rt_bandwidth.rt_runtime); + sum += to_ratio(period, runtime); } - rcu_read_unlock(); - return total + to_ratio(period, runtime) < global_ratio; + if (sum > total) + return -EINVAL; + + return 0; } -#endif -/* Must be called with tasklist_lock held */ -static inline int tg_has_rt_tasks(struct task_group *tg) +static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) { - struct task_struct *g, *p; - do_each_thread(g, p) { - if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) - return 1; - } while_each_thread(g, p); - return 0; + struct rt_schedulable_data data = { + .tg = tg, + .rt_period = period, + .rt_runtime = runtime, + }; + + return walk_tg_tree(tg_schedulable, tg_nop, &data); } static int tg_set_bandwidth(struct task_group *tg, @@ -8829,14 +8925,9 @@ static int tg_set_bandwidth(struct task_group *tg, mutex_lock(&rt_constraints_mutex); read_lock(&tasklist_lock); - if (rt_runtime == 0 && tg_has_rt_tasks(tg)) { - err = -EBUSY; + err = __rt_schedulable(tg, rt_period, rt_runtime); + if (err) goto unlock; - } - if (!__rt_schedulable(tg, rt_period, rt_runtime)) { - err = -EINVAL; - goto unlock; - } spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); @@ -8905,19 +8996,25 @@ long sched_group_rt_period(struct task_group *tg) static int sched_rt_global_constraints(void) { - struct task_group *tg = &root_task_group; - u64 rt_runtime, rt_period; + u64 runtime, period; int ret = 0; if (sysctl_sched_rt_period <= 0) return -EINVAL; - rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); - rt_runtime = tg->rt_bandwidth.rt_runtime; + runtime = global_rt_runtime(); + period = global_rt_period(); + + /* + * Sanity check on the sysctl variables. + */ + if (runtime > period && runtime != RUNTIME_INF) + return -EINVAL; mutex_lock(&rt_constraints_mutex); - if (!__rt_schedulable(tg, rt_period, rt_runtime)) - ret = -EINVAL; + read_lock(&tasklist_lock); + ret = __rt_schedulable(NULL, 0, 0); + read_unlock(&tasklist_lock); mutex_unlock(&rt_constraints_mutex); return ret; @@ -8991,7 +9088,6 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) if (!cgrp->parent) { /* This is early initialization for the top cgroup */ - init_task_group.css.cgroup = cgrp; return &init_task_group.css; } @@ -9000,9 +9096,6 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) if (IS_ERR(tg)) return ERR_PTR(-ENOMEM); - /* Bind the cgroup to task_group object we just created */ - tg->css.cgroup = cgrp; - return &tg->css; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index fb8994c6d4b..fcbe850a5a9 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -409,64 +409,6 @@ static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) } /* - * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in - * that it favours >=0 over <0. - * - * -20 | - * | - * 0 --------+------- - * .' - * 19 .' - * - */ -static unsigned long -calc_delta_asym(unsigned long delta, struct sched_entity *se) -{ - struct load_weight lw = { - .weight = NICE_0_LOAD, - .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) - }; - - for_each_sched_entity(se) { - struct load_weight *se_lw = &se->load; - unsigned long rw = cfs_rq_of(se)->load.weight; - -#ifdef CONFIG_FAIR_SCHED_GROUP - struct cfs_rq *cfs_rq = se->my_q; - struct task_group *tg = NULL - - if (cfs_rq) - tg = cfs_rq->tg; - - if (tg && tg->shares < NICE_0_LOAD) { - /* - * scale shares to what it would have been had - * tg->weight been NICE_0_LOAD: - * - * weight = 1024 * shares / tg->weight - */ - lw.weight *= se->load.weight; - lw.weight /= tg->shares; - - lw.inv_weight = 0; - - se_lw = &lw; - rw += lw.weight - se->load.weight; - } else -#endif - - if (se->load.weight < NICE_0_LOAD) { - se_lw = &lw; - rw += NICE_0_LOAD - se->load.weight; - } - - delta = calc_delta_mine(delta, rw, se_lw); - } - - return delta; -} - -/* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. */ @@ -586,11 +528,12 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) update_load_add(&cfs_rq->load, se->load.weight); if (!parent_entity(se)) inc_cpu_load(rq_of(cfs_rq), se->load.weight); - if (entity_is_task(se)) + if (entity_is_task(se)) { add_cfs_task_weight(cfs_rq, se->load.weight); + list_add(&se->group_node, &cfs_rq->tasks); + } cfs_rq->nr_running++; se->on_rq = 1; - list_add(&se->group_node, &cfs_rq->tasks); } static void @@ -599,11 +542,12 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) update_load_sub(&cfs_rq->load, se->load.weight); if (!parent_entity(se)) dec_cpu_load(rq_of(cfs_rq), se->load.weight); - if (entity_is_task(se)) + if (entity_is_task(se)) { add_cfs_task_weight(cfs_rq, -se->load.weight); + list_del_init(&se->group_node); + } cfs_rq->nr_running--; se->on_rq = 0; - list_del_init(&se->group_node); } static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -1085,7 +1029,6 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) { struct sched_entity *se = tg->se[cpu]; - long more_w; if (!tg->parent) return wl; @@ -1097,18 +1040,17 @@ static long effective_load(struct task_group *tg, int cpu, if (!wl && sched_feat(ASYM_EFF_LOAD)) return wl; - /* - * Instead of using this increment, also add the difference - * between when the shares were last updated and now. - */ - more_w = se->my_q->load.weight - se->my_q->rq_weight; - wl += more_w; - wg += more_w; - for_each_sched_entity(se) { -#define D(n) (likely(n) ? (n) : 1) - long S, rw, s, a, b; + long more_w; + + /* + * Instead of using this increment, also add the difference + * between when the shares were last updated and now. + */ + more_w = se->my_q->load.weight - se->my_q->rq_weight; + wl += more_w; + wg += more_w; S = se->my_q->tg->shares; s = se->my_q->shares; @@ -1117,7 +1059,11 @@ static long effective_load(struct task_group *tg, int cpu, a = S*(rw + wl); b = S*rw + s*wg; - wl = s*(a-b)/D(b); + wl = s*(a-b); + + if (likely(b)) + wl /= b; + /* * Assume the group is already running and will * thus already be accounted for in the weight. @@ -1126,7 +1072,6 @@ static long effective_load(struct task_group *tg, int cpu, * alter the group weight. */ wg = 0; -#undef D } return wl; @@ -1143,7 +1088,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, #endif static int -wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, +wake_affine(struct sched_domain *this_sd, struct rq *this_rq, struct task_struct *p, int prev_cpu, int this_cpu, int sync, int idx, unsigned long load, unsigned long this_load, unsigned int imbalance) @@ -1191,8 +1136,8 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, schedstat_inc(p, se.nr_wakeups_affine_attempts); tl_per_task = cpu_avg_load_per_task(this_cpu); - if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || - balanced) { + if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= + tl_per_task)) { /* * This domain has SD_WAKE_AFFINE and * p is cache cold in this domain, and @@ -1211,16 +1156,17 @@ static int select_task_rq_fair(struct task_struct *p, int sync) struct sched_domain *sd, *this_sd = NULL; int prev_cpu, this_cpu, new_cpu; unsigned long load, this_load; - struct rq *rq, *this_rq; + struct rq *this_rq; unsigned int imbalance; int idx; prev_cpu = task_cpu(p); - rq = task_rq(p); this_cpu = smp_processor_id(); this_rq = cpu_rq(this_cpu); new_cpu = prev_cpu; + if (prev_cpu == this_cpu) + goto out; /* * 'this_sd' is the first domain that both * this_cpu and prev_cpu are present in: @@ -1248,13 +1194,10 @@ static int select_task_rq_fair(struct task_struct *p, int sync) load = source_load(prev_cpu, idx); this_load = target_load(this_cpu, idx); - if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, + if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, load, this_load, imbalance)) return this_cpu; - if (prev_cpu == this_cpu) - goto out; - /* * Start passive balancing when half the imbalance_pct * limit is reached. @@ -1281,62 +1224,20 @@ static unsigned long wakeup_gran(struct sched_entity *se) * + nice tasks. */ if (sched_feat(ASYM_GRAN)) - gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); - else - gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); + gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); return gran; } /* - * Should 'se' preempt 'curr'. - * - * |s1 - * |s2 - * |s3 - * g - * |<--->|c - * - * w(c, s1) = -1 - * w(c, s2) = 0 - * w(c, s3) = 1 - * - */ -static int -wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) -{ - s64 gran, vdiff = curr->vruntime - se->vruntime; - - if (vdiff < 0) - return -1; - - gran = wakeup_gran(curr); - if (vdiff > gran) - return 1; - - return 0; -} - -/* return depth at which a sched entity is present in the hierarchy */ -static inline int depth_se(struct sched_entity *se) -{ - int depth = 0; - - for_each_sched_entity(se) - depth++; - - return depth; -} - -/* * Preempt the current task with a newly woken task if needed: */ -static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) +static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) { struct task_struct *curr = rq->curr; struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se, *pse = &p->se; - int se_depth, pse_depth; + s64 delta_exec; if (unlikely(rt_prio(p->prio))) { update_rq_clock(rq); @@ -1351,6 +1252,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) cfs_rq_of(pse)->next = pse; /* + * We can come here with TIF_NEED_RESCHED already set from new task + * wake up path. + */ + if (test_tsk_need_resched(curr)) + return; + + /* * Batch tasks do not preempt (their preemption is driven by * the tick): */ @@ -1360,33 +1268,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) if (!sched_feat(WAKEUP_PREEMPT)) return; - /* - * preemption test can be made between sibling entities who are in the - * same cfs_rq i.e who have a common parent. Walk up the hierarchy of - * both tasks until we find their ancestors who are siblings of common - * parent. - */ - - /* First walk up until both entities are at same depth */ - se_depth = depth_se(se); - pse_depth = depth_se(pse); - - while (se_depth > pse_depth) { - se_depth--; - se = parent_entity(se); - } - - while (pse_depth > se_depth) { - pse_depth--; - pse = parent_entity(pse); - } - - while (!is_same_group(se, pse)) { - se = parent_entity(se); - pse = parent_entity(pse); + if (sched_feat(WAKEUP_OVERLAP) && sync && + se->avg_overlap < sysctl_sched_migration_cost && + pse->avg_overlap < sysctl_sched_migration_cost) { + resched_task(curr); + return; } - if (wakeup_preempt_entity(se, pse) == 1) + delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; + if (delta_exec > wakeup_gran(pse)) resched_task(curr); } @@ -1445,19 +1335,9 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next) if (next == &cfs_rq->tasks) return NULL; - /* Skip over entities that are not tasks */ - do { - se = list_entry(next, struct sched_entity, group_node); - next = next->next; - } while (next != &cfs_rq->tasks && !entity_is_task(se)); - - if (next == &cfs_rq->tasks) - return NULL; - - cfs_rq->balance_iterator = next; - - if (entity_is_task(se)) - p = task_of(se); + se = list_entry(next, struct sched_entity, group_node); + p = task_of(se); + cfs_rq->balance_iterator = next->next; return p; } @@ -1507,7 +1387,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, rcu_read_lock(); update_h_load(busiest_cpu); - list_for_each_entry(tg, &task_groups, list) { + list_for_each_entry_rcu(tg, &task_groups, list) { struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu]; unsigned long busiest_h_load = busiest_cfs_rq->h_load; unsigned long busiest_weight = busiest_cfs_rq->load.weight; @@ -1620,10 +1500,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); + resched_task(rq->curr); } enqueue_task_fair(rq, p, 0); - resched_task(rq->curr); } /* @@ -1642,7 +1522,7 @@ static void prio_changed_fair(struct rq *rq, struct task_struct *p, if (p->prio > oldprio) resched_task(rq->curr); } else - check_preempt_curr(rq, p); + check_preempt_curr(rq, p, 0); } /* @@ -1659,7 +1539,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p, if (running) resched_task(rq->curr); else - check_preempt_curr(rq, p); + check_preempt_curr(rq, p, 0); } /* Account for a task changing its policy or group. diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 9353ca78154..7c9e8f4a049 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -11,3 +11,4 @@ SCHED_FEAT(ASYM_GRAN, 1) SCHED_FEAT(LB_BIAS, 1) SCHED_FEAT(LB_WAKEUP_UPDATE, 1) SCHED_FEAT(ASYM_EFF_LOAD, 1) +SCHED_FEAT(WAKEUP_OVERLAP, 0) diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 3a4f92dbbe6..dec4ccabe2f 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync) /* * Idle tasks are unconditionally rescheduled: */ -static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p) +static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync) { resched_task(rq->idle); } @@ -76,7 +76,7 @@ static void switched_to_idle(struct rq *rq, struct task_struct *p, if (running) resched_task(rq->curr); else - check_preempt_curr(rq, p); + check_preempt_curr(rq, p, 0); } static void prio_changed_idle(struct rq *rq, struct task_struct *p, @@ -93,7 +93,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p, if (p->prio > oldprio) resched_task(rq->curr); } else - check_preempt_curr(rq, p); + check_preempt_curr(rq, p, 0); } /* diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 1113157b205..cdf5740ab03 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se); static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { + struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; struct sched_rt_entity *rt_se = rt_rq->rt_se; - if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) { - struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; - - enqueue_rt_entity(rt_se); + if (rt_rq->rt_nr_running) { + if (rt_se && !on_rt_rq(rt_se)) + enqueue_rt_entity(rt_se); if (rt_rq->highest_prio < curr->prio) resched_task(curr); } @@ -231,6 +231,9 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) #endif /* CONFIG_RT_GROUP_SCHED */ #ifdef CONFIG_SMP +/* + * We ran out of runtime, see if we can borrow some from our neighbours. + */ static int do_balance_runtime(struct rt_rq *rt_rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); @@ -250,9 +253,18 @@ static int do_balance_runtime(struct rt_rq *rt_rq) continue; spin_lock(&iter->rt_runtime_lock); + /* + * Either all rqs have inf runtime and there's nothing to steal + * or __disable_runtime() below sets a specific rq to inf to + * indicate its been disabled and disalow stealing. + */ if (iter->rt_runtime == RUNTIME_INF) goto next; + /* + * From runqueues with spare time, take 1/n part of their + * spare time, but no more than our period. + */ diff = iter->rt_runtime - iter->rt_time; if (diff > 0) { diff = div_u64((u64)diff, weight); @@ -274,6 +286,9 @@ next: return more; } +/* + * Ensure this RQ takes back all the runtime it lend to its neighbours. + */ static void __disable_runtime(struct rq *rq) { struct root_domain *rd = rq->rd; @@ -289,17 +304,33 @@ static void __disable_runtime(struct rq *rq) spin_lock(&rt_b->rt_runtime_lock); spin_lock(&rt_rq->rt_runtime_lock); + /* + * Either we're all inf and nobody needs to borrow, or we're + * already disabled and thus have nothing to do, or we have + * exactly the right amount of runtime to take out. + */ if (rt_rq->rt_runtime == RUNTIME_INF || rt_rq->rt_runtime == rt_b->rt_runtime) goto balanced; spin_unlock(&rt_rq->rt_runtime_lock); + /* + * Calculate the difference between what we started out with + * and what we current have, that's the amount of runtime + * we lend and now have to reclaim. + */ want = rt_b->rt_runtime - rt_rq->rt_runtime; + /* + * Greedy reclaim, take back as much as we can. + */ for_each_cpu_mask(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; + /* + * Can't reclaim from ourselves or disabled runqueues. + */ if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) continue; @@ -319,8 +350,16 @@ static void __disable_runtime(struct rq *rq) } spin_lock(&rt_rq->rt_runtime_lock); + /* + * We cannot be left wanting - that would mean some runtime + * leaked out of the system. + */ BUG_ON(want); balanced: + /* + * Disable all the borrow logic by pretending we have inf + * runtime - in which case borrowing doesn't make sense. + */ rt_rq->rt_runtime = RUNTIME_INF; spin_unlock(&rt_rq->rt_runtime_lock); spin_unlock(&rt_b->rt_runtime_lock); @@ -343,6 +382,9 @@ static void __enable_runtime(struct rq *rq) if (unlikely(!scheduler_running)) return; + /* + * Reset each runqueue's bandwidth settings + */ for_each_leaf_rt_rq(rt_rq, rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); @@ -389,7 +431,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) int i, idle = 1; cpumask_t span; - if (rt_b->rt_runtime == RUNTIME_INF) + if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return 1; span = sched_rt_period_mask(); @@ -487,6 +529,9 @@ static void update_curr_rt(struct rq *rq) curr->se.exec_start = rq->clock; cpuacct_charge(curr, delta_exec); + if (!rt_bandwidth_enabled()) + return; + for_each_sched_rt_entity(rt_se) { rt_rq = rt_rq_of_se(rt_se); @@ -784,7 +829,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) /* * Preempt the current task with a newly woken task if needed: */ -static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) +static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync) { if (p->prio < rq->curr->prio) { resched_task(rq->curr); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index bd703454239..cb01cd8f919 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -235,7 +235,8 @@ static void tick_do_broadcast_on_off(void *why) case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: if (!cpu_isset(cpu, tick_broadcast_mask)) { cpu_set(cpu, tick_broadcast_mask); - if (bc->mode == TICKDEV_MODE_PERIODIC) + if (tick_broadcast_device.mode == + TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) @@ -245,7 +246,8 @@ static void tick_do_broadcast_on_off(void *why) if (!tick_broadcast_force && cpu_isset(cpu, tick_broadcast_mask)) { cpu_clear(cpu, tick_broadcast_mask); - if (bc->mode == TICKDEV_MODE_PERIODIC) + if (tick_broadcast_device.mode == + TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); } break; diff --git a/kernel/user.c b/kernel/user.c index 865ecf57a09..39d6159fae4 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -169,7 +169,7 @@ static ssize_t cpu_rt_runtime_show(struct kobject *kobj, { struct user_struct *up = container_of(kobj, struct user_struct, kobj); - return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg)); + return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg)); } static ssize_t cpu_rt_runtime_store(struct kobject *kobj, @@ -180,7 +180,7 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj, unsigned long rt_runtime; int rc; - sscanf(buf, "%lu", &rt_runtime); + sscanf(buf, "%ld", &rt_runtime); rc = sched_group_set_rt_runtime(up->tg, rt_runtime); diff --git a/mm/slob.c b/mm/slob.c index 4c82dd41f32..62b679dc660 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -515,7 +515,7 @@ size_t ksize(const void *block) sp = (struct slob_page *)virt_to_page(block); if (slob_page(sp)) - return ((slob_t *)block - 1)->units + SLOB_UNIT; + return (((slob_t *)block - 1)->units - 1) * SLOB_UNIT; else return sp->page.private; } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 418cd7dbbc9..8e0de6a5e18 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1986,11 +1986,13 @@ static void read_markers(const char *fname) mod = find_module(modname); if (!mod) { - if (is_vmlinux(modname)) - have_vmlinux = 1; mod = new_module(NOFAIL(strdup(modname))); mod->skip = 1; } + if (is_vmlinux(modname)) { + have_vmlinux = 1; + mod->skip = 0; + } if (!mod->skip) add_marker(mod, marker, fmt); diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index d11a8154500..8551952ef32 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2737,6 +2737,7 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, if (ctx == NULL) goto netlbl_secattr_to_sid_return; + context_init(&ctx_new); ctx_new.user = ctx->user; ctx_new.role = ctx->role; ctx_new.type = ctx->type; @@ -2745,13 +2746,9 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, if (ebitmap_netlbl_import(&ctx_new.range.level[0].cat, secattr->attr.mls.cat) != 0) goto netlbl_secattr_to_sid_return; - ctx_new.range.level[1].cat.highbit = - ctx_new.range.level[0].cat.highbit; - ctx_new.range.level[1].cat.node = - ctx_new.range.level[0].cat.node; - } else { - ebitmap_init(&ctx_new.range.level[0].cat); - ebitmap_init(&ctx_new.range.level[1].cat); + memcpy(&ctx_new.range.level[1].cat, + &ctx_new.range.level[0].cat, + sizeof(ctx_new.range.level[0].cat)); } if (mls_context_isvalid(&policydb, &ctx_new) != 1) goto netlbl_secattr_to_sid_return_cleanup; |