diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 25 | ||||
-rw-r--r-- | kernel/extable.c | 2 | ||||
-rw-r--r-- | kernel/module.c | 12 | ||||
-rw-r--r-- | kernel/ptrace.c | 57 | ||||
-rw-r--r-- | kernel/rcupdate.c | 19 | ||||
-rw-r--r-- | kernel/sched.c | 62 | ||||
-rw-r--r-- | kernel/timer.c | 16 |
7 files changed, 115 insertions, 78 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 72248d1b9e3..ab81fdd4572 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2231,19 +2231,25 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * So only GFP_KERNEL allocations, if all nodes in the cpuset are * short of memory, might require taking the callback_mutex mutex. * - * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() - * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing - * hardwall cpusets - no allocation on a node outside the cpuset is - * allowed (unless in interrupt, of course). - * - * The second loop doesn't even call here for GFP_ATOMIC requests - * (if the __alloc_pages() local variable 'wait' is set). That check - * and the checks below have the combined affect in the second loop of - * the __alloc_pages() routine that: + * The first call here from mm/page_alloc:get_page_from_freelist() + * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so + * no allocation on a node outside the cpuset is allowed (unless in + * interrupt, of course). + * + * The second pass through get_page_from_freelist() doesn't even call + * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() + * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set + * in alloc_flags. That logic and the checks below have the combined + * affect that: * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. + * + * Rule: + * Don't call cpuset_zone_allowed() if you can't sleep, unless you + * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables + * the code that might scan up ancestor cpusets and sleep. **/ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) @@ -2255,6 +2261,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) if (in_interrupt()) return 1; node = z->zone_pgdat->node_id; + might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); if (node_isset(node, current->mems_allowed)) return 1; if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ diff --git a/kernel/extable.c b/kernel/extable.c index 7501b531cee..7fe26285531 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -40,7 +40,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -static int core_kernel_text(unsigned long addr) +int core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) diff --git a/kernel/module.c b/kernel/module.c index d24deb0dbbc..bbe04862e1b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -705,14 +705,14 @@ EXPORT_SYMBOL(__symbol_put); void symbol_put_addr(void *addr) { - unsigned long flags; + struct module *modaddr; - spin_lock_irqsave(&modlist_lock, flags); - if (!kernel_text_address((unsigned long)addr)) - BUG(); + if (core_kernel_text((unsigned long)addr)) + return; - module_put(module_text_address((unsigned long)addr)); - spin_unlock_irqrestore(&modlist_lock, flags); + if (!(modaddr = module_text_address((unsigned long)addr))) + BUG(); + module_put(modaddr); } EXPORT_SYMBOL_GPL(symbol_put_addr); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4e0f0ec003f..921c22ad16e 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -148,12 +148,34 @@ int ptrace_may_attach(struct task_struct *task) int ptrace_attach(struct task_struct *task) { int retval; - task_lock(task); + retval = -EPERM; if (task->pid <= 1) - goto bad; + goto out; if (task->tgid == current->tgid) - goto bad; + goto out; + +repeat: + /* + * Nasty, nasty. + * + * We want to hold both the task-lock and the + * tasklist_lock for writing at the same time. + * But that's against the rules (tasklist_lock + * is taken for reading by interrupts on other + * cpu's that may have task_lock). + */ + task_lock(task); + local_irq_disable(); + if (!write_trylock(&tasklist_lock)) { + local_irq_enable(); + task_unlock(task); + do { + cpu_relax(); + } while (!write_can_lock(&tasklist_lock)); + goto repeat; + } + /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) goto bad; @@ -166,17 +188,15 @@ int ptrace_attach(struct task_struct *task) ? PT_ATTACHED : 0); if (capable(CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; - task_unlock(task); - write_lock_irq(&tasklist_lock); __ptrace_link(task, current); - write_unlock_irq(&tasklist_lock); force_sig_specific(SIGSTOP, task); - return 0; bad: + write_unlock_irq(&tasklist_lock); task_unlock(task); +out: return retval; } @@ -417,21 +437,22 @@ int ptrace_request(struct task_struct *child, long request, */ int ptrace_traceme(void) { - int ret; + int ret = -EPERM; /* * Are we already being traced? */ - if (current->ptrace & PT_PTRACED) - return -EPERM; - ret = security_ptrace(current->parent, current); - if (ret) - return -EPERM; - /* - * Set the ptrace bit in the process ptrace flags. - */ - current->ptrace |= PT_PTRACED; - return 0; + task_lock(current); + if (!(current->ptrace & PT_PTRACED)) { + ret = security_ptrace(current->parent, current); + /* + * Set the ptrace bit in the process ptrace flags. + */ + if (!ret) + current->ptrace |= PT_PTRACED; + } + task_unlock(current); + return ret; } /** diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 6d32ff26f94..2058f88c7bb 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -479,12 +479,31 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) return 0; } +/* + * Check to see if there is any immediate RCU-related work to be done + * by the current CPU, returning 1 if so. This function is part of the + * RCU implementation; it is -not- an exported member of the RCU API. + */ int rcu_pending(int cpu) { return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); } +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + */ +int rcu_needs_cpu(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); + + return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); +} + void rcu_check_callbacks(int cpu, int user) { if (user || diff --git a/kernel/sched.c b/kernel/sched.c index 4c64f85698a..c13f1bd2df7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -665,55 +665,13 @@ static int effective_prio(task_t *p) } /* - * We place interactive tasks back into the active array, if possible. - * - * To guarantee that this does not starve expired tasks we ignore the - * interactivity of a task if the first expired task had to wait more - * than a 'reasonable' amount of time. This deadline timeout is - * load-dependent, as the frequency of array switched decreases with - * increasing number of running tasks. We also ignore the interactivity - * if a better static_prio task has expired, and switch periodically - * regardless, to ensure that highly interactive tasks do not starve - * the less fortunate for unreasonably long periods. - */ -static inline int expired_starving(runqueue_t *rq) -{ - int limit; - - /* - * Arrays were recently switched, all is well - */ - if (!rq->expired_timestamp) - return 0; - - limit = STARVATION_LIMIT * rq->nr_running; - - /* - * It's time to switch arrays - */ - if (jiffies - rq->expired_timestamp >= limit) - return 1; - - /* - * There's a better selection in the expired array - */ - if (rq->curr->static_prio > rq->best_expired_prio) - return 1; - - /* - * All is well - */ - return 0; -} - -/* * __activate_task - move a task to the runqueue. */ static void __activate_task(task_t *p, runqueue_t *rq) { prio_array_t *target = rq->active; - if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p)))) + if (batch_task(p)) target = rq->expired; enqueue_task(p, target); rq->nr_running++; @@ -2532,6 +2490,22 @@ unsigned long long current_sched_time(const task_t *tsk) } /* + * We place interactive tasks back into the active array, if possible. + * + * To guarantee that this does not starve expired tasks we ignore the + * interactivity of a task if the first expired task had to wait more + * than a 'reasonable' amount of time. This deadline timeout is + * load-dependent, as the frequency of array switched decreases with + * increasing number of running tasks. We also ignore the interactivity + * if a better static_prio task has expired: + */ +#define EXPIRED_STARVING(rq) \ + ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ + (jiffies - (rq)->expired_timestamp >= \ + STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ + ((rq)->curr->static_prio > (rq)->best_expired_prio)) + +/* * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() @@ -2666,7 +2640,7 @@ void scheduler_tick(void) if (!rq->expired_timestamp) rq->expired_timestamp = jiffies; - if (!TASK_INTERACTIVE(p) || expired_starving(rq)) { + if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { enqueue_task(p, rq->expired); if (p->static_prio < rq->best_expired_prio) rq->best_expired_prio = p->static_prio; diff --git a/kernel/timer.c b/kernel/timer.c index 67eaf0f5409..9e49deed468 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -541,6 +541,22 @@ found: } spin_unlock(&base->lock); + /* + * It can happen that other CPUs service timer IRQs and increment + * jiffies, but we have not yet got a local timer tick to process + * the timer wheels. In that case, the expiry time can be before + * jiffies, but since the high-resolution timer here is relative to + * jiffies, the default expression when high-resolution timers are + * not active, + * + * time_before(MAX_JIFFY_OFFSET + jiffies, expires) + * + * would falsely evaluate to true. If that is the case, just + * return jiffies so that we can immediately fire the local timer + */ + if (time_before(expires, jiffies)) + return jiffies; + if (time_before(hr_expires, expires)) return hr_expires; |