summaryrefslogtreecommitdiffstats
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-04-14 13:18:27 +0200
committerIngo Molnar <mingo@kernel.org>2012-04-14 13:19:04 +0200
commit6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e (patch)
tree021cc9f6b477146fcebe6f3be4752abfa2ba18a9 /mm/memory-failure.c
parent682968e0c425c60f0dde37977e5beb2b12ddc4cc (diff)
parenta385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff)
Merge branch 'perf/core' into perf/uprobes
Merge in latest upstream (and the latest perf development tree), to prepare for tooling changes, and also to pick up v3.4 MM changes that the uprobes code needs to take care of. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c98
1 files changed, 51 insertions, 47 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 56080ea3614..97cc2733551 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p)
EXPORT_SYMBOL_GPL(hwpoison_filter);
/*
- * Send all the processes who have the page mapped an ``action optional''
- * signal.
+ * Send all the processes who have the page mapped a signal.
+ * ``action optional'' if they are not immediately affected by the error
+ * ``action required'' if error happened in current execution context
*/
-static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
- unsigned long pfn, struct page *page)
+static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
+ unsigned long pfn, struct page *page, int flags)
{
struct siginfo si;
int ret;
printk(KERN_ERR
- "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
+ "MCE %#lx: Killing %s:%d due to hardware memory corruption\n",
pfn, t->comm, t->pid);
si.si_signo = SIGBUS;
si.si_errno = 0;
- si.si_code = BUS_MCEERR_AO;
si.si_addr = (void *)addr;
#ifdef __ARCH_SI_TRAPNO
si.si_trapno = trapno;
#endif
si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
- /*
- * Don't use force here, it's convenient if the signal
- * can be temporarily blocked.
- * This could cause a loop when the user sets SIGBUS
- * to SIG_IGN, but hopefully no one will do that?
- */
- ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
+
+ if ((flags & MF_ACTION_REQUIRED) && t == current) {
+ si.si_code = BUS_MCEERR_AR;
+ ret = force_sig_info(SIGBUS, &si, t);
+ } else {
+ /*
+ * Don't use force here, it's convenient if the signal
+ * can be temporarily blocked.
+ * This could cause a loop when the user sets SIGBUS
+ * to SIG_IGN, but hopefully no one will do that?
+ */
+ si.si_code = BUS_MCEERR_AO;
+ ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
+ }
if (ret < 0)
printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
t->comm, t->pid, ret);
@@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
* Also when FAIL is set do a force kill because something went
* wrong earlier.
*/
-static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
- int fail, struct page *page, unsigned long pfn)
+static void kill_procs(struct list_head *to_kill, int doit, int trapno,
+ int fail, struct page *page, unsigned long pfn,
+ int flags)
{
struct to_kill *tk, *next;
@@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
* check for that, but we need to tell the
* process anyways.
*/
- else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
- pfn, page) < 0)
+ else if (kill_proc(tk->tsk, tk->addr, trapno,
+ pfn, page, flags) < 0)
printk(KERN_ERR
"MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
pfn, tk->tsk->comm, tk->tsk->pid);
@@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p,
* the pages and send SIGBUS to the processes if the data was dirty.
*/
static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
- int trapno)
+ int trapno, int flags)
{
enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
struct address_space *mapping;
@@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
* use a more force-full uncatchable kill to prevent
* any accesses to the poisoned memory.
*/
- kill_procs_ao(&tokill, !!PageDirty(ppage), trapno,
- ret != SWAP_SUCCESS, p, pfn);
+ kill_procs(&tokill, !!PageDirty(ppage), trapno,
+ ret != SWAP_SUCCESS, p, pfn, flags);
return ret;
}
@@ -984,7 +992,25 @@ static void clear_page_hwpoison_huge_page(struct page *hpage)
ClearPageHWPoison(hpage + i);
}
-int __memory_failure(unsigned long pfn, int trapno, int flags)
+/**
+ * memory_failure - Handle memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @trapno: Trap number reported in the signal to user space.
+ * @flags: fine tune action taken
+ *
+ * This function is called by the low level machine check code
+ * of an architecture when it detects hardware memory corruption
+ * of a page. It tries its best to recover, which includes
+ * dropping pages, killing processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Must run in process context (e.g. a work queue) with interrupts
+ * enabled and no spinlocks hold.
+ */
+int memory_failure(unsigned long pfn, int trapno, int flags)
{
struct page_state *ps;
struct page *p;
@@ -1063,7 +1089,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
* The check (unnecessarily) ignores LRU pages being isolated and
* walked by the page reclaim code, however that's not a big loss.
*/
- if (!PageHuge(p) && !PageTransCompound(p)) {
+ if (!PageHuge(p) && !PageTransTail(p)) {
if (!PageLRU(p))
shake_page(p, 0);
if (!PageLRU(p)) {
@@ -1130,7 +1156,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
* Now take care of user space mappings.
* Abort on fail: __delete_from_page_cache() assumes unmapped page.
*/
- if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) {
+ if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) {
printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
res = -EBUSY;
goto out;
@@ -1156,29 +1182,7 @@ out:
unlock_page(hpage);
return res;
}
-EXPORT_SYMBOL_GPL(__memory_failure);
-
-/**
- * memory_failure - Handle memory failure of a page.
- * @pfn: Page Number of the corrupted page
- * @trapno: Trap number reported in the signal to user space.
- *
- * This function is called by the low level machine check code
- * of an architecture when it detects hardware memory corruption
- * of a page. It tries its best to recover, which includes
- * dropping pages, killing processes etc.
- *
- * The function is primarily of use for corruptions that
- * happen outside the current execution context (e.g. when
- * detected by a background scrubber)
- *
- * Must run in process context (e.g. a work queue) with interrupts
- * enabled and no spinlocks hold.
- */
-void memory_failure(unsigned long pfn, int trapno)
-{
- __memory_failure(pfn, trapno, 0);
-}
+EXPORT_SYMBOL_GPL(memory_failure);
#define MEMORY_FAILURE_FIFO_ORDER 4
#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
@@ -1251,7 +1255,7 @@ static void memory_failure_work_func(struct work_struct *work)
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
- __memory_failure(entry.pfn, entry.trapno, entry.flags);
+ memory_failure(entry.pfn, entry.trapno, entry.flags);
}
}