From 5033cba087f6ac773002123aafbea1aad4267682 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 25 Jun 2005 14:57:56 -0700 Subject: [PATCH] kexec: x86 kexec core This is the i386 implementation of kexec. Signed-off-by: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 arch/i386/kernel/crash.c (limited to 'arch/i386/kernel/crash.c') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c new file mode 100644 index 00000000000..fa27a6c2abb --- /dev/null +++ b/arch/i386/kernel/crash.c @@ -0,0 +1,42 @@ +/* + * Architecture specific (i386) functions for kexec based crash dumps. + * + * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) + * + * Copyright (C) IBM Corporation, 2004. All rights reserved. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define MAX_NOTE_BYTES 1024 +typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; + +note_buf_t crash_notes[NR_CPUS]; + +void machine_crash_shutdown(void) +{ + /* This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means shooting down the other cpus in + * an SMP system. + */ +} -- cgit v1.2.3-70-g09d2 From c4ac4263a019c791e906f284bb03891d3c25a845 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 25 Jun 2005 14:57:58 -0700 Subject: [PATCH] crashdump: x86: add NMI handler to capture other CPUs One of the dangers when switching from one kernel to another is what happens to all of the other cpus that were running in the crashed kernel. In an attempt to avoid that problem this patch adds a nmi handler and attempts to shoot down the other cpus by sending them non maskable interrupts. The code then waits for 1 second or until all known cpus have stopped running and then jumps from the running kernel that has crashed to the kernel in reserved memory. The kernel spin loop is used for the delay as that should behave continue to be safe even in after a crash. Signed-off-by: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'arch/i386/kernel/crash.c') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index fa27a6c2abb..882779c0787 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -23,12 +23,65 @@ #include #include #include +#include #define MAX_NOTE_BYTES 1024 typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; note_buf_t crash_notes[NR_CPUS]; +#ifdef CONFIG_SMP +static atomic_t waiting_for_crash_ipi; + +static int crash_nmi_callback(struct pt_regs *regs, int cpu) +{ + local_irq_disable(); + atomic_dec(&waiting_for_crash_ipi); + /* Assume hlt works */ + __asm__("hlt"); + for(;;); + return 1; +} + +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +static void smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(APIC_DM_NMI); +} + +static void nmi_shootdown_cpus(void) +{ + unsigned long msecs; + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + + /* Would it be better to replace the trap vector here? */ + set_nmi_callback(crash_nmi_callback); + /* Ensure the new callback function is set before sending + * out the NMI + */ + wmb(); + + smp_send_nmi_allbutself(); + + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + + /* Leave the nmi callback set */ +} +#else +static void nmi_shootdown_cpus(void) +{ + /* There are no cpus to shootdown */ +} +#endif + void machine_crash_shutdown(void) { /* This function is only called after the system @@ -39,4 +92,7 @@ void machine_crash_shutdown(void) * In practice this means shooting down the other cpus in * an SMP system. */ + /* The kernel is broken so disable interrupts */ + local_irq_disable(); + nmi_shootdown_cpus(); } -- cgit v1.2.3-70-g09d2 From 2c818b45a202b8f632b4031edcba8599efda42b7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 25 Jun 2005 14:57:59 -0700 Subject: [PATCH] kexec: x86: snapshot registers during crash shutdown After the kernel panics if we wish to generate an entire machine core file it is very nice to know the register state at the time the machine crashed. After long discussion it was realized that if you are going to be saving the information anyway it is reasonable to store the information in a format that it will be used and recognized in so the register state is stored in the standard ELF note format. Signed-off-by: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'arch/i386/kernel/crash.c') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 882779c0787..1bb5dd98d1d 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -30,12 +30,91 @@ typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; note_buf_t crash_notes[NR_CPUS]; +static u32 *append_elf_note(u32 *buf, + char *name, unsigned type, void *data, size_t data_len) +{ + struct elf_note note; + note.n_namesz = strlen(name) + 1; + note.n_descsz = data_len; + note.n_type = type; + memcpy(buf, ¬e, sizeof(note)); + buf += (sizeof(note) +3)/4; + memcpy(buf, name, note.n_namesz); + buf += (note.n_namesz + 3)/4; + memcpy(buf, data, note.n_descsz); + buf += (note.n_descsz + 3)/4; + return buf; +} + +static void final_note(u32 *buf) +{ + struct elf_note note; + note.n_namesz = 0; + note.n_descsz = 0; + note.n_type = 0; + memcpy(buf, ¬e, sizeof(note)); +} + + +static void crash_save_this_cpu(struct pt_regs *regs, int cpu) +{ + struct elf_prstatus prstatus; + u32 *buf; + if ((cpu < 0) || (cpu >= NR_CPUS)) { + return; + } + /* Using ELF notes here is opportunistic. + * I need a well defined structure format + * for the data I pass, and I need tags + * on the data to indicate what information I have + * squirrelled away. ELF notes happen to provide + * all of that that no need to invent something new. + */ + buf = &crash_notes[cpu][0]; + memset(&prstatus, 0, sizeof(prstatus)); + prstatus.pr_pid = current->pid; + elf_core_copy_regs(&prstatus.pr_reg, regs); + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, + &prstatus, sizeof(prstatus)); + + final_note(buf); +} + +static void crash_get_current_regs(struct pt_regs *regs) +{ + __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx)); + __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx)); + __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx)); + __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi)); + __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi)); + __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp)); + __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax)); + __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp)); + __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss)); + __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs)); + __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds)); + __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes)); + __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags)); + + regs->eip = (unsigned long)current_text_addr(); +} + +static void crash_save_self(void) +{ + struct pt_regs regs; + int cpu; + cpu = smp_processor_id(); + crash_get_current_regs(®s); + crash_save_this_cpu(®s, cpu); +} + #ifdef CONFIG_SMP static atomic_t waiting_for_crash_ipi; static int crash_nmi_callback(struct pt_regs *regs, int cpu) { local_irq_disable(); + crash_save_this_cpu(regs, cpu); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ __asm__("hlt"); @@ -95,4 +174,5 @@ void machine_crash_shutdown(void) /* The kernel is broken so disable interrupts */ local_irq_disable(); nmi_shootdown_cpus(); + crash_save_self(); } -- cgit v1.2.3-70-g09d2 From 63d30298efc387c72557d11e2a7b467554c05a64 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 25 Jun 2005 14:58:00 -0700 Subject: [PATCH] kexec: x86 shutdown APICs during crash_shutdown In the case of a crash/panic an architecture specific function machine_crash_shutdown is called. This patch adds to the x86 machine_crash function the standard kernel code for shutting down apics. Every line of code added to that function increases the risk that we will call code after a kernel panic that is not safe. This patch should not make it to the stable kernel without a being reviewed a lot more. It is unclear how much a hardned kernel can take when it comes to misconfigured apics. So since a normal kernel has problems this patch does a clean shutdown. It is my expectation this patch will be dropped from future generations of the kexec work. But for the moment it is a crutch to keep from breaking everything. Signed-off-by: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/i386/kernel/crash.c') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 1bb5dd98d1d..59b92d21746 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #define MAX_NOTE_BYTES 1024 @@ -115,6 +116,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) { local_irq_disable(); crash_save_this_cpu(regs, cpu); + disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ __asm__("hlt"); @@ -153,6 +155,7 @@ static void nmi_shootdown_cpus(void) } /* Leave the nmi callback set */ + disable_local_APIC(); } #else static void nmi_shootdown_cpus(void) @@ -174,5 +177,9 @@ void machine_crash_shutdown(void) /* The kernel is broken so disable interrupts */ local_irq_disable(); nmi_shootdown_cpus(); + lapic_shutdown(); +#if defined(CONFIG_X86_IO_APIC) + disable_IO_APIC(); +#endif crash_save_self(); } -- cgit v1.2.3-70-g09d2 From 625f1c8219d95300ed32e4c67eb62a50ded095ba Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Sat, 25 Jun 2005 14:58:12 -0700 Subject: [PATCH] Kdump: Export crash notes section address through sysfs o Following patch exports kexec global variable "crash_notes" to user space through sysfs as kernel attribute in /sys/kernel. Signed-off-by: Maneesh Soni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 2 -- arch/x86_64/kernel/crash.c | 3 --- include/asm-i386/kexec.h | 5 +++++ include/asm-x86_64/kexec.h | 5 +++++ kernel/ksysfs.c | 13 +++++++++++++ 5 files changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/i386/kernel/crash.c') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 59b92d21746..3645ad7ac20 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -26,8 +26,6 @@ #include #include -#define MAX_NOTE_BYTES 1024 -typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; note_buf_t crash_notes[NR_CPUS]; diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index 7caf8a49d0c..6183bcb8525 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c @@ -20,9 +20,6 @@ #include #include -#define MAX_NOTE_BYTES 1024 -typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; - note_buf_t crash_notes[NR_CPUS]; void machine_crash_shutdown(void) diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h index a1599b55d62..6ed2a03e37b 100644 --- a/include/asm-i386/kexec.h +++ b/include/asm-i386/kexec.h @@ -25,4 +25,9 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_386 +#define MAX_NOTE_BYTES 1024 +typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; + +extern note_buf_t crash_notes[]; + #endif /* _I386_KEXEC_H */ diff --git a/include/asm-x86_64/kexec.h b/include/asm-x86_64/kexec.h index dc33646dc7d..42d2ff15c59 100644 --- a/include/asm-x86_64/kexec.h +++ b/include/asm-x86_64/kexec.h @@ -25,4 +25,9 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_X86_64 +#define MAX_NOTE_BYTES 1024 +typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; + +extern note_buf_t crash_notes[]; + #endif /* _X86_64_KEXEC_H */ diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 1f064a63f8c..015fb69ad94 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -30,12 +30,25 @@ static ssize_t hotplug_seqnum_show(struct subsystem *subsys, char *page) KERNEL_ATTR_RO(hotplug_seqnum); #endif +#ifdef CONFIG_KEXEC +#include + +static ssize_t crash_notes_show(struct subsystem *subsys, char *page) +{ + return sprintf(page, "%p\n", (void *)crash_notes); +} +KERNEL_ATTR_RO(crash_notes); +#endif + decl_subsys(kernel, NULL, NULL); EXPORT_SYMBOL_GPL(kernel_subsys); static struct attribute * kernel_attrs[] = { #ifdef CONFIG_HOTPLUG &hotplug_seqnum_attr.attr, +#endif +#ifdef CONFIG_KEXEC + &crash_notes_attr.attr, #endif NULL }; -- cgit v1.2.3-70-g09d2 From 4d55476c3f889e0f30d88e22da4682b5f10394ff Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Sat, 25 Jun 2005 14:58:13 -0700 Subject: [PATCH] kdump: NMI handler segment selector, stack pointer fix CPU does not save ss and esp on stack if execution was already in kernel mode at the time of NMI occurrence. This leads to saving of erractic values for ss and esp. This patch fixes the issue. Signed-off-by: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/i386/kernel/crash.c') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 3645ad7ac20..31e077bb0ca 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -112,7 +112,20 @@ static atomic_t waiting_for_crash_ipi; static int crash_nmi_callback(struct pt_regs *regs, int cpu) { + struct pt_regs fixed_regs; local_irq_disable(); + + /* CPU does not save ss and esp on stack if execution is already + * running in kernel mode at the time of NMI occurrence. This code + * fixes it. + */ + if (!user_mode(regs)) { + memcpy(&fixed_regs, regs, sizeof(*regs)); + fixed_regs.esp = (unsigned long)&(regs->esp); + __asm__ __volatile__("xorl %eax, %eax;"); + __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(fixed_regs.xss)); + regs = &fixed_regs; + } crash_save_this_cpu(regs, cpu); disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); -- cgit v1.2.3-70-g09d2 From a3ea8ac8468f5c7fc65684331dfba3260d5b2d93 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Sat, 25 Jun 2005 14:58:14 -0700 Subject: [PATCH] Kexec: Kexec on panic fix with nmi watchdog enabled o Problem: Kexec on panic hangs if first kernel is booted with nmi_watchdog command line parameter. This problem occurs because kexec crash shutdown code replaces the NMI callback handler. This handler saves the cpu register states and halts the cpu. If system is booted with nmi_watchdog parameter, then crashing cpu also runs this nmi handler and halts itself. o This patch fixes the problem by keeping a track of crashing cpu and not executing the new nmi handler on crashing cpu. o There is a dependence on smp_processor_id() function which might return insane value for cpu, if cpu field of thread_info is corrupted. Signed-off-by: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/i386/kernel/crash.c') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 31e077bb0ca..a021681d21f 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -28,6 +28,8 @@ note_buf_t crash_notes[NR_CPUS]; +/* This keeps a track of which one is crashing cpu. */ +static int crashing_cpu; static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, size_t data_len) @@ -113,6 +115,13 @@ static atomic_t waiting_for_crash_ipi; static int crash_nmi_callback(struct pt_regs *regs, int cpu) { struct pt_regs fixed_regs; + + /* Don't do anything if this handler is invoked on crashing cpu. + * Otherwise, system will completely hang. Crashing cpu can get + * an NMI if system was initially booted with nmi_watchdog parameter. + */ + if (cpu == crashing_cpu) + return 1; local_irq_disable(); /* CPU does not save ss and esp on stack if execution is already @@ -187,6 +196,9 @@ void machine_crash_shutdown(void) */ /* The kernel is broken so disable interrupts */ local_irq_disable(); + + /* Make a note of crashing cpu. Will be used in NMI callback.*/ + crashing_cpu = smp_processor_id(); nmi_shootdown_cpus(); lapic_shutdown(); #if defined(CONFIG_X86_IO_APIC) -- cgit v1.2.3-70-g09d2 From 6e274d144302068a00794ec22e73520c0615cb6f Mon Sep 17 00:00:00 2001 From: Alexander Nyberg Date: Sat, 25 Jun 2005 14:58:26 -0700 Subject: [PATCH] kdump: Use real pt_regs from exception Makes kexec_crashdump() take a pt_regs * as an argument. This allows to get exact register state at the point of the crash. If we come from direct panic assertion NULL will be passed and the current registers saved before crashdump. This hooks into two places: die(): check the conditions under which we will panic when calling do_exit and go there directly with the pt_regs that caused the fatal fault. die_nmi(): If we receive an NMI lockup while in the kernel use the pt_regs and go directly to crash_kexec(). We're probably nested up badly at this point so this might be the only chance to escape with proper information. Signed-off-by: Alexander Nyberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 36 ++++++++++++++++++++++++------------ arch/i386/kernel/traps.c | 17 +++++++++++++++++ arch/ppc/kernel/machine_kexec.c | 2 +- arch/ppc64/kernel/machine_kexec.c | 2 +- arch/s390/kernel/crash.c | 2 +- arch/x86_64/kernel/crash.c | 2 +- drivers/char/sysrq.c | 2 +- include/linux/kexec.h | 8 ++++++-- include/linux/reboot.h | 3 ++- kernel/kexec.c | 13 +++++++++++-- kernel/panic.c | 2 +- 11 files changed, 66 insertions(+), 23 deletions(-) (limited to 'arch/i386/kernel/crash.c') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index a021681d21f..8bdb4b6af0f 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -100,12 +100,31 @@ static void crash_get_current_regs(struct pt_regs *regs) regs->eip = (unsigned long)current_text_addr(); } -static void crash_save_self(void) +/* CPU does not save ss and esp on stack if execution is already + * running in kernel mode at the time of NMI occurrence. This code + * fixes it. + */ +static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) +{ + memcpy(newregs, oldregs, sizeof(*newregs)); + newregs->esp = (unsigned long)&(oldregs->esp); + __asm__ __volatile__("xorl %eax, %eax;"); + __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss)); +} + +/* We may have saved_regs from where the error came from + * or it is NULL if via a direct panic(). + */ +static void crash_save_self(struct pt_regs *saved_regs) { struct pt_regs regs; int cpu; cpu = smp_processor_id(); - crash_get_current_regs(®s); + + if (saved_regs) + crash_setup_regs(®s, saved_regs); + else + crash_get_current_regs(®s); crash_save_this_cpu(®s, cpu); } @@ -124,15 +143,8 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) return 1; local_irq_disable(); - /* CPU does not save ss and esp on stack if execution is already - * running in kernel mode at the time of NMI occurrence. This code - * fixes it. - */ if (!user_mode(regs)) { - memcpy(&fixed_regs, regs, sizeof(*regs)); - fixed_regs.esp = (unsigned long)&(regs->esp); - __asm__ __volatile__("xorl %eax, %eax;"); - __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(fixed_regs.xss)); + crash_setup_regs(&fixed_regs, regs); regs = &fixed_regs; } crash_save_this_cpu(regs, cpu); @@ -184,7 +196,7 @@ static void nmi_shootdown_cpus(void) } #endif -void machine_crash_shutdown(void) +void machine_crash_shutdown(struct pt_regs *regs) { /* This function is only called after the system * has paniced or is otherwise in a critical state. @@ -204,5 +216,5 @@ void machine_crash_shutdown(void) #if defined(CONFIG_X86_IO_APIC) disable_IO_APIC(); #endif - crash_save_self(); + crash_save_self(regs); } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 207ea8ba716..e458463ebc0 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_EISA #include @@ -294,6 +295,9 @@ bug: printk("Kernel BUG\n"); } +/* This is gone through when something in the kernel + * has done something bad and is about to be terminated. +*/ void die(const char * str, struct pt_regs * regs, long err) { static struct { @@ -341,6 +345,10 @@ void die(const char * str, struct pt_regs * regs, long err) bust_spinlocks(0); die.lock_owner = -1; spin_unlock_irq(&die.lock); + + if (kexec_should_crash(current)) + crash_kexec(regs); + if (in_interrupt()) panic("Fatal exception in interrupt"); @@ -570,6 +578,15 @@ void die_nmi (struct pt_regs *regs, const char *msg) console_silent(); spin_unlock(&nmi_print_lock); bust_spinlocks(0); + + /* If we are in kernel we are probably nested up pretty bad + * and might aswell get out now while we still can. + */ + if (!user_mode(regs)) { + current->thread.trap_no = 2; + crash_kexec(regs); + } + do_exit(SIGSEGV); } diff --git a/arch/ppc/kernel/machine_kexec.c b/arch/ppc/kernel/machine_kexec.c index 435ad9ea0a8..b82535357d6 100644 --- a/arch/ppc/kernel/machine_kexec.c +++ b/arch/ppc/kernel/machine_kexec.c @@ -34,7 +34,7 @@ void machine_shutdown(void) } } -void machine_crash_shutdown(void) +void machine_crash_shutdown(struct pt_regs *regs) { if (ppc_md.machine_crash_shutdown) { ppc_md.machine_crash_shutdown(); diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c index 217965d60a4..06b25b59c8a 100644 --- a/arch/ppc64/kernel/machine_kexec.c +++ b/arch/ppc64/kernel/machine_kexec.c @@ -34,7 +34,7 @@ note_buf_t crash_notes[NR_CPUS]; * and if what it will achieve. Letting it be now to compile the code * in generic kexec environment */ -void machine_crash_shutdown(void) +void machine_crash_shutdown(struct pt_regs *regs) { /* do nothing right now */ /* smp_relase_cpus() if we want smp on panic kernel */ diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c index db38283c1f2..7bd169c58b0 100644 --- a/arch/s390/kernel/crash.c +++ b/arch/s390/kernel/crash.c @@ -12,6 +12,6 @@ note_buf_t crash_notes[NR_CPUS]; -void machine_crash_shutdown(void) +void machine_crash_shutdown(struct pt_regs *regs) { } diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index 6183bcb8525..d7fa4248501 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c @@ -22,7 +22,7 @@ note_buf_t crash_notes[NR_CPUS]; -void machine_crash_shutdown(void) +void machine_crash_shutdown(struct pt_regs *regs) { /* This function is only called after the system * has paniced or is otherwise in a critical state. diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 53b2c8fab00..af79805b557 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -100,7 +100,7 @@ static struct sysrq_key_op sysrq_unraw_op = { static void sysrq_handle_crashdump(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { - crash_kexec(); + crash_kexec(pt_regs); } static struct sysrq_key_op sysrq_crashdump_op = { .handler = sysrq_handle_crashdump, diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0653a27c3d7..7383173a3a9 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -99,7 +99,8 @@ extern asmlinkage long compat_sys_kexec_load(unsigned long entry, unsigned long flags); #endif extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); -extern void crash_kexec(void); +extern void crash_kexec(struct pt_regs *); +int kexec_should_crash(struct task_struct *); extern struct kimage *kexec_image; #define KEXEC_ON_CRASH 0x00000001 @@ -123,6 +124,9 @@ extern struct kimage *kexec_image; extern struct resource crashk_res; #else /* !CONFIG_KEXEC */ -static inline void crash_kexec(void) { } +struct pt_regs; +struct task_struct; +static inline void crash_kexec(struct pt_regs *regs) { } +static inline int kexec_should_crash(struct task_struct *p) { return 0; } #endif /* CONFIG_KEXEC */ #endif /* LINUX_KEXEC_H */ diff --git a/include/linux/reboot.h b/include/linux/reboot.h index c5a05e16edb..2d4dd23168d 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -52,7 +52,8 @@ extern void machine_halt(void); extern void machine_power_off(void); extern void machine_shutdown(void); -extern void machine_crash_shutdown(void); +struct pt_regs; +extern void machine_crash_shutdown(struct pt_regs *); #endif diff --git a/kernel/kexec.c b/kernel/kexec.c index a0411b3bd54..277f22afe74 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -18,6 +18,8 @@ #include #include #include +#include + #include #include #include @@ -32,6 +34,13 @@ struct resource crashk_res = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; +int kexec_should_crash(struct task_struct *p) +{ + if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops) + return 1; + return 0; +} + /* * When kexec transitions to the new kernel there is a one-to-one * mapping between physical and virtual addresses. On processors @@ -1010,7 +1019,7 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry, } #endif -void crash_kexec(void) +void crash_kexec(struct pt_regs *regs) { struct kimage *image; int locked; @@ -1028,7 +1037,7 @@ void crash_kexec(void) if (!locked) { image = xchg(&kexec_crash_image, NULL); if (image) { - machine_crash_shutdown(); + machine_crash_shutdown(regs); machine_kexec(image); } xchg(&kexec_lock, 0); diff --git a/kernel/panic.c b/kernel/panic.c index 66f43d33cd8..74ba5f3e46c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -83,7 +83,7 @@ NORET_TYPE void panic(const char * fmt, ...) * everything else. * Do we want to call this before we try to display a message? */ - crash_kexec(); + crash_kexec(NULL); #ifdef CONFIG_SMP /* -- cgit v1.2.3-70-g09d2 From 72414d3f1d22fc3e311b162fca95c430048d38ce Mon Sep 17 00:00:00 2001 From: Maneesh Soni Date: Sat, 25 Jun 2005 14:58:28 -0700 Subject: [PATCH] kexec code cleanup o Following patch provides purely cosmetic changes and corrects CodingStyle guide lines related certain issues like below in kexec related files o braces for one line "if" statements, "for" loops, o more than 80 column wide lines, o No space after "while", "for" and "switch" key words o Changes: o take-2: Removed the extra tab before "case" key words. o take-3: Put operator at the end of line and space before "*/" Signed-off-by: Maneesh Soni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 23 +-- arch/i386/kernel/machine_kexec.c | 16 +- arch/ppc/kernel/machine_kexec.c | 30 ++-- arch/ppc64/kernel/machine_kexec.c | 9 +- arch/s390/kernel/machine_kexec.c | 4 +- arch/x86_64/kernel/machine_kexec.c | 49 +++--- drivers/char/mem.c | 2 +- include/linux/kexec.h | 13 +- include/linux/syscalls.h | 6 +- kernel/kexec.c | 302 ++++++++++++++++++++----------------- 10 files changed, 243 insertions(+), 211 deletions(-) (limited to 'arch/i386/kernel/crash.c') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 8bdb4b6af0f..e5fab12f792 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -31,10 +31,11 @@ note_buf_t crash_notes[NR_CPUS]; /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; -static u32 *append_elf_note(u32 *buf, - char *name, unsigned type, void *data, size_t data_len) +static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, + size_t data_len) { struct elf_note note; + note.n_namesz = strlen(name) + 1; note.n_descsz = data_len; note.n_type = type; @@ -44,26 +45,28 @@ static u32 *append_elf_note(u32 *buf, buf += (note.n_namesz + 3)/4; memcpy(buf, data, note.n_descsz); buf += (note.n_descsz + 3)/4; + return buf; } static void final_note(u32 *buf) { struct elf_note note; + note.n_namesz = 0; note.n_descsz = 0; note.n_type = 0; memcpy(buf, ¬e, sizeof(note)); } - static void crash_save_this_cpu(struct pt_regs *regs, int cpu) { struct elf_prstatus prstatus; u32 *buf; - if ((cpu < 0) || (cpu >= NR_CPUS)) { + + if ((cpu < 0) || (cpu >= NR_CPUS)) return; - } + /* Using ELF notes here is opportunistic. * I need a well defined structure format * for the data I pass, and I need tags @@ -75,9 +78,8 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu) memset(&prstatus, 0, sizeof(prstatus)); prstatus.pr_pid = current->pid; elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, - &prstatus, sizeof(prstatus)); - + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, + sizeof(prstatus)); final_note(buf); } @@ -119,8 +121,8 @@ static void crash_save_self(struct pt_regs *saved_regs) { struct pt_regs regs; int cpu; - cpu = smp_processor_id(); + cpu = smp_processor_id(); if (saved_regs) crash_setup_regs(®s, saved_regs); else @@ -153,6 +155,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) /* Assume hlt works */ __asm__("hlt"); for(;;); + return 1; } @@ -169,8 +172,8 @@ static void smp_send_nmi_allbutself(void) static void nmi_shootdown_cpus(void) { unsigned long msecs; - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); /* Would it be better to replace the trap vector here? */ set_nmi_callback(crash_nmi_callback); /* Ensure the new callback function is set before sending diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 671880415d1..52ed18d8b51 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -80,7 +80,8 @@ static void identity_map_page(unsigned long address) /* Identity map the page table entry */ pgtable_level1[level1_index] = address | L0_ATTR; pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; - set_64bit(&pgtable_level3[level3_index], __pa(pgtable_level2) | L2_ATTR); + set_64bit(&pgtable_level3[level3_index], + __pa(pgtable_level2) | L2_ATTR); /* Flush the tlb so the new mapping takes effect. * Global tlb entries are not flushed but that is not an issue. @@ -139,8 +140,10 @@ static void load_segments(void) } typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( - unsigned long indirection_page, unsigned long reboot_code_buffer, - unsigned long start_address, unsigned int has_pae) ATTRIB_NORET; + unsigned long indirection_page, + unsigned long reboot_code_buffer, + unsigned long start_address, + unsigned int has_pae) ATTRIB_NORET; const extern unsigned char relocate_new_kernel[]; extern void relocate_new_kernel_end(void); @@ -180,20 +183,23 @@ NORET_TYPE void machine_kexec(struct kimage *image) { unsigned long page_list; unsigned long reboot_code_buffer; + relocate_new_kernel_t rnk; /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); /* Compute some offsets */ - reboot_code_buffer = page_to_pfn(image->control_code_page) << PAGE_SHIFT; + reboot_code_buffer = page_to_pfn(image->control_code_page) + << PAGE_SHIFT; page_list = image->head; /* Set up an identity mapping for the reboot_code_buffer */ identity_map_page(reboot_code_buffer); /* copy it out */ - memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); + memcpy((void *)reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); /* The segment registers are funny things, they are * automatically loaded from a table, in memory wherever you diff --git a/arch/ppc/kernel/machine_kexec.c b/arch/ppc/kernel/machine_kexec.c index b82535357d6..84d65a87191 100644 --- a/arch/ppc/kernel/machine_kexec.c +++ b/arch/ppc/kernel/machine_kexec.c @@ -21,24 +21,23 @@ #include typedef NORET_TYPE void (*relocate_new_kernel_t)( - unsigned long indirection_page, unsigned long reboot_code_buffer, - unsigned long start_address) ATTRIB_NORET; + unsigned long indirection_page, + unsigned long reboot_code_buffer, + unsigned long start_address) ATTRIB_NORET; const extern unsigned char relocate_new_kernel[]; const extern unsigned int relocate_new_kernel_size; void machine_shutdown(void) { - if (ppc_md.machine_shutdown) { + if (ppc_md.machine_shutdown) ppc_md.machine_shutdown(); - } } void machine_crash_shutdown(struct pt_regs *regs) { - if (ppc_md.machine_crash_shutdown) { + if (ppc_md.machine_crash_shutdown) ppc_md.machine_crash_shutdown(); - } } /* @@ -48,9 +47,8 @@ void machine_crash_shutdown(struct pt_regs *regs) */ int machine_kexec_prepare(struct kimage *image) { - if (ppc_md.machine_kexec_prepare) { + if (ppc_md.machine_kexec_prepare) return ppc_md.machine_kexec_prepare(image); - } /* * Fail if platform doesn't provide its own machine_kexec_prepare * implementation. @@ -60,9 +58,8 @@ int machine_kexec_prepare(struct kimage *image) void machine_kexec_cleanup(struct kimage *image) { - if (ppc_md.machine_kexec_cleanup) { + if (ppc_md.machine_kexec_cleanup) ppc_md.machine_kexec_cleanup(image); - } } /* @@ -71,9 +68,9 @@ void machine_kexec_cleanup(struct kimage *image) */ NORET_TYPE void machine_kexec(struct kimage *image) { - if (ppc_md.machine_kexec) { + if (ppc_md.machine_kexec) ppc_md.machine_kexec(image); - } else { + else { /* * Fall back to normal restart if platform doesn't provide * its own kexec function, and user insist to kexec... @@ -83,7 +80,6 @@ NORET_TYPE void machine_kexec(struct kimage *image) for(;;); } - /* * This is a generic machine_kexec function suitable at least for * non-OpenFirmware embedded platforms. @@ -104,15 +100,15 @@ void machine_kexec_simple(struct kimage *image) /* we need both effective and real address here */ reboot_code_buffer = - (unsigned long)page_address(image->control_code_page); + (unsigned long)page_address(image->control_code_page); reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer); /* copy our kernel relocation code to the control code page */ - memcpy((void *)reboot_code_buffer, - relocate_new_kernel, relocate_new_kernel_size); + memcpy((void *)reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); flush_icache_range(reboot_code_buffer, - reboot_code_buffer + KEXEC_CONTROL_CODE_SIZE); + reboot_code_buffer + KEXEC_CONTROL_CODE_SIZE); printk(KERN_INFO "Bye!\n"); /* now call it */ diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c index 06b25b59c8a..fdb2fc649d7 100644 --- a/arch/ppc64/kernel/machine_kexec.c +++ b/arch/ppc64/kernel/machine_kexec.c @@ -58,7 +58,7 @@ int machine_kexec_prepare(struct kimage *image) * handle the virtual mode, we must make sure no destination * overlaps kernel static data or bss. */ - for(i = 0; i < image->nr_segments; i++) + for (i = 0; i < image->nr_segments; i++) if (image->segment[i].mem < __pa(_end)) return -ETXTBSY; @@ -76,7 +76,7 @@ int machine_kexec_prepare(struct kimage *image) low = __pa(htab_address); high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE; - for(i = 0; i < image->nr_segments; i++) { + for (i = 0; i < image->nr_segments; i++) { begin = image->segment[i].mem; end = begin + image->segment[i].memsz; @@ -98,7 +98,7 @@ int machine_kexec_prepare(struct kimage *image) low = *basep; high = low + (*sizep); - for(i = 0; i < image->nr_segments; i++) { + for (i = 0; i < image->nr_segments; i++) { begin = image->segment[i].mem; end = begin + image->segment[i].memsz; @@ -274,7 +274,8 @@ union thread_union kexec_stack /* Our assembly helper, in kexec_stub.S */ extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, - void *image, void *control, void (*clear_all)(void)) ATTRIB_NORET; + void *image, void *control, + void (*clear_all)(void)) ATTRIB_NORET; /* too late to fail here */ void machine_kexec(struct kimage *image) diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 7a94db76df4..2721c3a32b8 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -67,7 +67,7 @@ machine_kexec(struct kimage *image) ctl_clear_bit(0,28); on_each_cpu(kexec_halt_all_cpus, image, 0, 0); - for(;;); + for (;;); } static void @@ -85,7 +85,7 @@ kexec_halt_all_cpus(void *kernel_image) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; - while(!smp_cpu_not_running(cpu)) + while (!smp_cpu_not_running(cpu)) cpu_relax(); } diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 200b5993f8d..60d1eff4156 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c @@ -32,29 +32,31 @@ #define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -static void init_level2_page( - u64 *level2p, unsigned long addr) +static void init_level2_page(u64 *level2p, unsigned long addr) { unsigned long end_addr; + addr &= PAGE_MASK; end_addr = addr + LEVEL2_SIZE; - while(addr < end_addr) { + while (addr < end_addr) { *(level2p++) = addr | L1_ATTR; addr += LEVEL1_SIZE; } } -static int init_level3_page(struct kimage *image, - u64 *level3p, unsigned long addr, unsigned long last_addr) +static int init_level3_page(struct kimage *image, u64 *level3p, + unsigned long addr, unsigned long last_addr) { unsigned long end_addr; int result; + result = 0; addr &= PAGE_MASK; end_addr = addr + LEVEL3_SIZE; - while((addr < last_addr) && (addr < end_addr)) { + while ((addr < last_addr) && (addr < end_addr)) { struct page *page; u64 *level2p; + page = kimage_alloc_control_pages(image, 0); if (!page) { result = -ENOMEM; @@ -66,7 +68,7 @@ static int init_level3_page(struct kimage *image, addr += LEVEL2_SIZE; } /* clear the unused entries */ - while(addr < end_addr) { + while (addr < end_addr) { *(level3p++) = 0; addr += LEVEL2_SIZE; } @@ -75,17 +77,19 @@ out: } -static int init_level4_page(struct kimage *image, - u64 *level4p, unsigned long addr, unsigned long last_addr) +static int init_level4_page(struct kimage *image, u64 *level4p, + unsigned long addr, unsigned long last_addr) { unsigned long end_addr; int result; + result = 0; addr &= PAGE_MASK; end_addr = addr + LEVEL4_SIZE; - while((addr < last_addr) && (addr < end_addr)) { + while ((addr < last_addr) && (addr < end_addr)) { struct page *page; u64 *level3p; + page = kimage_alloc_control_pages(image, 0); if (!page) { result = -ENOMEM; @@ -100,11 +104,11 @@ static int init_level4_page(struct kimage *image, addr += LEVEL3_SIZE; } /* clear the unused entries */ - while(addr < end_addr) { + while (addr < end_addr) { *(level4p++) = 0; addr += LEVEL3_SIZE; } - out: +out: return result; } @@ -113,7 +117,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { u64 *level4p; level4p = (u64 *)__va(start_pgtable); - return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); + return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); } static void set_idt(void *newidt, u16 limit) @@ -159,9 +163,10 @@ static void load_segments(void) #undef __STR } -typedef NORET_TYPE void (*relocate_new_kernel_t)( - unsigned long indirection_page, unsigned long control_code_buffer, - unsigned long start_address, unsigned long pgtable) ATTRIB_NORET; +typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page, + unsigned long control_code_buffer, + unsigned long start_address, + unsigned long pgtable) ATTRIB_NORET; const extern unsigned char relocate_new_kernel[]; const extern unsigned long relocate_new_kernel_size; @@ -172,17 +177,17 @@ int machine_kexec_prepare(struct kimage *image) int result; /* Calculate the offsets */ - start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; + start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; control_code_buffer = start_pgtable + 4096UL; /* Setup the identity mapped 64bit page table */ result = init_pgtable(image, start_pgtable); - if (result) { + if (result) return result; - } /* Place the code in the reboot code buffer */ - memcpy(__va(control_code_buffer), relocate_new_kernel, relocate_new_kernel_size); + memcpy(__va(control_code_buffer), relocate_new_kernel, + relocate_new_kernel_size); return 0; } @@ -207,8 +212,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) local_irq_disable(); /* Calculate the offsets */ - page_list = image->head; - start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; + page_list = image->head; + start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; control_code_buffer = start_pgtable + 4096UL; /* Set the low half of the page table to my identity mapped diff --git a/drivers/char/mem.c b/drivers/char/mem.c index b64108dd765..42187381506 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -287,7 +287,7 @@ static ssize_t read_oldmem(struct file *file, char __user *buf, size_t read = 0, csize; int rc = 0; - while(count) { + while (count) { pfn = *ppos / PAGE_SIZE; if (pfn > saved_max_pfn) return read; diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 7383173a3a9..c8468472aec 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -91,14 +91,17 @@ extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET; extern int machine_kexec_prepare(struct kimage *image); extern void machine_kexec_cleanup(struct kimage *image); extern asmlinkage long sys_kexec_load(unsigned long entry, - unsigned long nr_segments, struct kexec_segment __user *segments, - unsigned long flags); + unsigned long nr_segments, + struct kexec_segment __user *segments, + unsigned long flags); #ifdef CONFIG_COMPAT extern asmlinkage long compat_sys_kexec_load(unsigned long entry, - unsigned long nr_segments, struct compat_kexec_segment __user *segments, - unsigned long flags); + unsigned long nr_segments, + struct compat_kexec_segment __user *segments, + unsigned long flags); #endif -extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); +extern struct page *kimage_alloc_control_pages(struct kimage *image, + unsigned int order); extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); extern struct kimage *kexec_image; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7ba8f8f747a..52830b6d94e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -159,9 +159,9 @@ asmlinkage long sys_shutdown(int, int); asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user *arg); asmlinkage long sys_restart_syscall(void); -asmlinkage long sys_kexec_load(unsigned long entry, - unsigned long nr_segments, struct kexec_segment __user *segments, - unsigned long flags); +asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, + struct kexec_segment __user *segments, + unsigned long flags); asmlinkage long sys_exit(int error_code); asmlinkage void sys_exit_group(int error_code); diff --git a/kernel/kexec.c b/kernel/kexec.c index 277f22afe74..7843548cf2d 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -87,12 +87,15 @@ int kexec_should_crash(struct task_struct *p) */ #define KIMAGE_NO_DEST (-1UL) -static int kimage_is_destination_range( - struct kimage *image, unsigned long start, unsigned long end); -static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long dest); +static int kimage_is_destination_range(struct kimage *image, + unsigned long start, unsigned long end); +static struct page *kimage_alloc_page(struct kimage *image, + unsigned int gfp_mask, + unsigned long dest); static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, - unsigned long nr_segments, struct kexec_segment __user *segments) + unsigned long nr_segments, + struct kexec_segment __user *segments) { size_t segment_bytes; struct kimage *image; @@ -102,9 +105,9 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, /* Allocate a controlling structure */ result = -ENOMEM; image = kmalloc(sizeof(*image), GFP_KERNEL); - if (!image) { + if (!image) goto out; - } + memset(image, 0, sizeof(*image)); image->head = 0; image->entry = &image->head; @@ -145,6 +148,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, result = -EADDRNOTAVAIL; for (i = 0; i < nr_segments; i++) { unsigned long mstart, mend; + mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) @@ -159,12 +163,13 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, * easy explanation as one segment stops on another. */ result = -EINVAL; - for(i = 0; i < nr_segments; i++) { + for (i = 0; i < nr_segments; i++) { unsigned long mstart, mend; unsigned long j; + mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; - for(j = 0; j < i; j++) { + for (j = 0; j < i; j++) { unsigned long pstart, pend; pstart = image->segment[j].mem; pend = pstart + image->segment[j].memsz; @@ -180,25 +185,25 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, * later on. */ result = -EINVAL; - for(i = 0; i < nr_segments; i++) { + for (i = 0; i < nr_segments; i++) { if (image->segment[i].bufsz > image->segment[i].memsz) goto out; } - result = 0; - out: - if (result == 0) { +out: + if (result == 0) *rimage = image; - } else { + else kfree(image); - } + return result; } static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, - unsigned long nr_segments, struct kexec_segment __user *segments) + unsigned long nr_segments, + struct kexec_segment __user *segments) { int result; struct kimage *image; @@ -206,9 +211,9 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, /* Allocate and initialize a controlling structure */ image = NULL; result = do_kimage_alloc(&image, entry, nr_segments, segments); - if (result) { + if (result) goto out; - } + *rimage = image; /* @@ -218,7 +223,7 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, */ result = -ENOMEM; image->control_code_page = kimage_alloc_control_pages(image, - get_order(KEXEC_CONTROL_CODE_SIZE)); + get_order(KEXEC_CONTROL_CODE_SIZE)); if (!image->control_code_page) { printk(KERN_ERR "Could not allocate control_code_buffer\n"); goto out; @@ -226,16 +231,17 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, result = 0; out: - if (result == 0) { + if (result == 0) *rimage = image; - } else { + else kfree(image); - } + return result; } static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, - unsigned long nr_segments, struct kexec_segment *segments) + unsigned long nr_segments, + struct kexec_segment *segments) { int result; struct kimage *image; @@ -250,9 +256,8 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, /* Allocate and initialize a controlling structure */ result = do_kimage_alloc(&image, entry, nr_segments, segments); - if (result) { + if (result) goto out; - } /* Enable the special crash kernel control page * allocation policy. @@ -272,6 +277,7 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, result = -EADDRNOTAVAIL; for (i = 0; i < nr_segments; i++) { unsigned long mstart, mend; + mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz - 1; /* Ensure we are within the crash kernel limits */ @@ -279,7 +285,6 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, goto out; } - /* * Find a location for the control code buffer, and add * the vector of segments so that it's pages will also be @@ -287,80 +292,84 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, */ result = -ENOMEM; image->control_code_page = kimage_alloc_control_pages(image, - get_order(KEXEC_CONTROL_CODE_SIZE)); + get_order(KEXEC_CONTROL_CODE_SIZE)); if (!image->control_code_page) { printk(KERN_ERR "Could not allocate control_code_buffer\n"); goto out; } result = 0; - out: - if (result == 0) { +out: + if (result == 0) *rimage = image; - } else { + else kfree(image); - } + return result; } -static int kimage_is_destination_range( - struct kimage *image, unsigned long start, unsigned long end) +static int kimage_is_destination_range(struct kimage *image, + unsigned long start, + unsigned long end) { unsigned long i; for (i = 0; i < image->nr_segments; i++) { unsigned long mstart, mend; + mstart = image->segment[i].mem; - mend = mstart + image->segment[i].memsz; - if ((end > mstart) && (start < mend)) { + mend = mstart + image->segment[i].memsz; + if ((end > mstart) && (start < mend)) return 1; - } } + return 0; } -static struct page *kimage_alloc_pages(unsigned int gfp_mask, unsigned int order) +static struct page *kimage_alloc_pages(unsigned int gfp_mask, + unsigned int order) { struct page *pages; + pages = alloc_pages(gfp_mask, order); if (pages) { unsigned int count, i; pages->mapping = NULL; pages->private = order; count = 1 << order; - for(i = 0; i < count; i++) { + for (i = 0; i < count; i++) SetPageReserved(pages + i); - } } + return pages; } static void kimage_free_pages(struct page *page) { unsigned int order, count, i; + order = page->private; count = 1 << order; - for(i = 0; i < count; i++) { + for (i = 0; i < count; i++) ClearPageReserved(page + i); - } __free_pages(page, order); } static void kimage_free_page_list(struct list_head *list) { struct list_head *pos, *next; + list_for_each_safe(pos, next, list) { struct page *page; page = list_entry(pos, struct page, lru); list_del(&page->lru); - kimage_free_pages(page); } } -static struct page *kimage_alloc_normal_control_pages( - struct kimage *image, unsigned int order) +static struct page *kimage_alloc_normal_control_pages(struct kimage *image, + unsigned int order) { /* Control pages are special, they are the intermediaries * that are needed while we copy the rest of the pages @@ -387,6 +396,7 @@ static struct page *kimage_alloc_normal_control_pages( */ do { unsigned long pfn, epfn, addr, eaddr; + pages = kimage_alloc_pages(GFP_KERNEL, order); if (!pages) break; @@ -395,12 +405,12 @@ static struct page *kimage_alloc_normal_control_pages( addr = pfn << PAGE_SHIFT; eaddr = epfn << PAGE_SHIFT; if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || - kimage_is_destination_range(image, addr, eaddr)) - { + kimage_is_destination_range(image, addr, eaddr)) { list_add(&pages->lru, &extra_pages); pages = NULL; } - } while(!pages); + } while (!pages); + if (pages) { /* Remember the allocated page... */ list_add(&pages->lru, &image->control_pages); @@ -420,12 +430,12 @@ static struct page *kimage_alloc_normal_control_pages( * For now it is simpler to just free the pages. */ kimage_free_page_list(&extra_pages); - return pages; + return pages; } -static struct page *kimage_alloc_crash_control_pages( - struct kimage *image, unsigned int order) +static struct page *kimage_alloc_crash_control_pages(struct kimage *image, + unsigned int order) { /* Control pages are special, they are the intermediaries * that are needed while we copy the rest of the pages @@ -450,21 +460,22 @@ static struct page *kimage_alloc_crash_control_pages( */ unsigned long hole_start, hole_end, size; struct page *pages; + pages = NULL; size = (1 << order) << PAGE_SHIFT; hole_start = (image->control_page + (size - 1)) & ~(size - 1); hole_end = hole_start + size - 1; - while(hole_end <= crashk_res.end) { + while (hole_end <= crashk_res.end) { unsigned long i; - if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT) { + + if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT) break; - } - if (hole_end > crashk_res.end) { + if (hole_end > crashk_res.end) break; - } /* See if I overlap any of the segments */ - for(i = 0; i < image->nr_segments; i++) { + for (i = 0; i < image->nr_segments; i++) { unsigned long mstart, mend; + mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz - 1; if ((hole_end >= mstart) && (hole_start <= mend)) { @@ -480,18 +491,19 @@ static struct page *kimage_alloc_crash_control_pages( break; } } - if (pages) { + if (pages) image->control_page = hole_end; - } + return pages; } -struct page *kimage_alloc_control_pages( - struct kimage *image, unsigned int order) +struct page *kimage_alloc_control_pages(struct kimage *image, + unsigned int order) { struct page *pages = NULL; - switch(image->type) { + + switch (image->type) { case KEXEC_TYPE_DEFAULT: pages = kimage_alloc_normal_control_pages(image, order); break; @@ -499,43 +511,46 @@ struct page *kimage_alloc_control_pages( pages = kimage_alloc_crash_control_pages(image, order); break; } + return pages; } static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) { - if (*image->entry != 0) { + if (*image->entry != 0) image->entry++; - } + if (image->entry == image->last_entry) { kimage_entry_t *ind_page; struct page *page; + page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); - if (!page) { + if (!page) return -ENOMEM; - } + ind_page = page_address(page); *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; image->entry = ind_page; - image->last_entry = - ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); + image->last_entry = ind_page + + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); } *image->entry = entry; image->entry++; *image->entry = 0; + return 0; } -static int kimage_set_destination( - struct kimage *image, unsigned long destination) +static int kimage_set_destination(struct kimage *image, + unsigned long destination) { int result; destination &= PAGE_MASK; result = kimage_add_entry(image, destination | IND_DESTINATION); - if (result == 0) { + if (result == 0) image->destination = destination; - } + return result; } @@ -546,9 +561,9 @@ static int kimage_add_page(struct kimage *image, unsigned long page) page &= PAGE_MASK; result = kimage_add_entry(image, page | IND_SOURCE); - if (result == 0) { + if (result == 0) image->destination += PAGE_SIZE; - } + return result; } @@ -564,10 +579,11 @@ static void kimage_free_extra_pages(struct kimage *image) } static int kimage_terminate(struct kimage *image) { - if (*image->entry != 0) { + if (*image->entry != 0) image->entry++; - } + *image->entry = IND_DONE; + return 0; } @@ -591,26 +607,24 @@ static void kimage_free(struct kimage *image) if (!image) return; + kimage_free_extra_pages(image); for_each_kimage_entry(image, ptr, entry) { if (entry & IND_INDIRECTION) { /* Free the previous indirection page */ - if (ind & IND_INDIRECTION) { + if (ind & IND_INDIRECTION) kimage_free_entry(ind); - } /* Save this indirection page until we are * done with it. */ ind = entry; } - else if (entry & IND_SOURCE) { + else if (entry & IND_SOURCE) kimage_free_entry(entry); - } } /* Free the final indirection page */ - if (ind & IND_INDIRECTION) { + if (ind & IND_INDIRECTION) kimage_free_entry(ind); - } /* Handle any machine specific cleanup */ machine_kexec_cleanup(image); @@ -620,26 +634,28 @@ static void kimage_free(struct kimage *image) kfree(image); } -static kimage_entry_t *kimage_dst_used(struct kimage *image, unsigned long page) +static kimage_entry_t *kimage_dst_used(struct kimage *image, + unsigned long page) { kimage_entry_t *ptr, entry; unsigned long destination = 0; for_each_kimage_entry(image, ptr, entry) { - if (entry & IND_DESTINATION) { + if (entry & IND_DESTINATION) destination = entry & PAGE_MASK; - } else if (entry & IND_SOURCE) { - if (page == destination) { + if (page == destination) return ptr; - } destination += PAGE_SIZE; } } + return 0; } -static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long destination) +static struct page *kimage_alloc_page(struct kimage *image, + unsigned int gfp_mask, + unsigned long destination) { /* * Here we implement safeguards to ensure that a source page @@ -679,11 +695,11 @@ static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mas /* Allocate a page, if we run out of memory give up */ page = kimage_alloc_pages(gfp_mask, 0); - if (!page) { + if (!page) return 0; - } /* If the page cannot be used file it away */ - if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { + if (page_to_pfn(page) > + (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { list_add(&page->lru, &image->unuseable_pages); continue; } @@ -694,7 +710,8 @@ static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mas break; /* If the page is not a destination page use it */ - if (!kimage_is_destination_range(image, addr, addr + PAGE_SIZE)) + if (!kimage_is_destination_range(image, addr, + addr + PAGE_SIZE)) break; /* @@ -727,11 +744,12 @@ static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mas list_add(&page->lru, &image->dest_pages); } } + return page; } static int kimage_load_normal_segment(struct kimage *image, - struct kexec_segment *segment) + struct kexec_segment *segment) { unsigned long maddr; unsigned long ubytes, mbytes; @@ -745,34 +763,36 @@ static int kimage_load_normal_segment(struct kimage *image, maddr = segment->mem; result = kimage_set_destination(image, maddr); - if (result < 0) { + if (result < 0) goto out; - } - while(mbytes) { + + while (mbytes) { struct page *page; char *ptr; size_t uchunk, mchunk; + page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); if (page == 0) { result = -ENOMEM; goto out; } - result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT); - if (result < 0) { + result = kimage_add_page(image, page_to_pfn(page) + << PAGE_SHIFT); + if (result < 0) goto out; - } + ptr = kmap(page); /* Start with a clear page */ memset(ptr, 0, PAGE_SIZE); ptr += maddr & ~PAGE_MASK; mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); - if (mchunk > mbytes) { + if (mchunk > mbytes) mchunk = mbytes; - } + uchunk = mchunk; - if (uchunk > ubytes) { + if (uchunk > ubytes) uchunk = ubytes; - } + result = copy_from_user(ptr, buf, uchunk); kunmap(page); if (result) { @@ -784,12 +804,12 @@ static int kimage_load_normal_segment(struct kimage *image, buf += mchunk; mbytes -= mchunk; } - out: +out: return result; } static int kimage_load_crash_segment(struct kimage *image, - struct kexec_segment *segment) + struct kexec_segment *segment) { /* For crash dumps kernels we simply copy the data from * user space to it's destination. @@ -805,10 +825,11 @@ static int kimage_load_crash_segment(struct kimage *image, ubytes = segment->bufsz; mbytes = segment->memsz; maddr = segment->mem; - while(mbytes) { + while (mbytes) { struct page *page; char *ptr; size_t uchunk, mchunk; + page = pfn_to_page(maddr >> PAGE_SHIFT); if (page == 0) { result = -ENOMEM; @@ -817,9 +838,9 @@ static int kimage_load_crash_segment(struct kimage *image, ptr = kmap(page); ptr += maddr & ~PAGE_MASK; mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); - if (mchunk > mbytes) { + if (mchunk > mbytes) mchunk = mbytes; - } + uchunk = mchunk; if (uchunk > ubytes) { uchunk = ubytes; @@ -837,15 +858,16 @@ static int kimage_load_crash_segment(struct kimage *image, buf += mchunk; mbytes -= mchunk; } - out: +out: return result; } static int kimage_load_segment(struct kimage *image, - struct kexec_segment *segment) + struct kexec_segment *segment) { int result = -ENOMEM; - switch(image->type) { + + switch (image->type) { case KEXEC_TYPE_DEFAULT: result = kimage_load_normal_segment(image, segment); break; @@ -853,6 +875,7 @@ static int kimage_load_segment(struct kimage *image, result = kimage_load_crash_segment(image, segment); break; } + return result; } @@ -885,9 +908,9 @@ static struct kimage *kexec_crash_image = NULL; */ static int kexec_lock = 0; -asmlinkage long sys_kexec_load(unsigned long entry, - unsigned long nr_segments, struct kexec_segment __user *segments, - unsigned long flags) +asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, + struct kexec_segment __user *segments, + unsigned long flags) { struct kimage **dest_image, *image; int locked; @@ -907,9 +930,7 @@ asmlinkage long sys_kexec_load(unsigned long entry, /* Verify we are on the appropriate architecture */ if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) - { return -EINVAL; - } /* Put an artificial cap on the number * of segments passed to kexec_load. @@ -929,58 +950,59 @@ asmlinkage long sys_kexec_load(unsigned long entry, * KISS: always take the mutex. */ locked = xchg(&kexec_lock, 1); - if (locked) { + if (locked) return -EBUSY; - } + dest_image = &kexec_image; - if (flags & KEXEC_ON_CRASH) { + if (flags & KEXEC_ON_CRASH) dest_image = &kexec_crash_image; - } if (nr_segments > 0) { unsigned long i; + /* Loading another kernel to reboot into */ - if ((flags & KEXEC_ON_CRASH) == 0) { - result = kimage_normal_alloc(&image, entry, nr_segments, segments); - } + if ((flags & KEXEC_ON_CRASH) == 0) + result = kimage_normal_alloc(&image, entry, + nr_segments, segments); /* Loading another kernel to switch to if this one crashes */ else if (flags & KEXEC_ON_CRASH) { /* Free any current crash dump kernel before * we corrupt it. */ kimage_free(xchg(&kexec_crash_image, NULL)); - result = kimage_crash_alloc(&image, entry, nr_segments, segments); + result = kimage_crash_alloc(&image, entry, + nr_segments, segments); } - if (result) { + if (result) goto out; - } + result = machine_kexec_prepare(image); - if (result) { + if (result) goto out; - } - for(i = 0; i < nr_segments; i++) { + + for (i = 0; i < nr_segments; i++) { result = kimage_load_segment(image, &image->segment[i]); - if (result) { + if (result) goto out; - } } result = kimage_terminate(image); - if (result) { + if (result) goto out; - } } /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); - out: +out: xchg(&kexec_lock, 0); /* Release the mutex */ kimage_free(image); + return result; } #ifdef CONFIG_COMPAT asmlinkage long compat_sys_kexec_load(unsigned long entry, - unsigned long nr_segments, struct compat_kexec_segment __user *segments, - unsigned long flags) + unsigned long nr_segments, + struct compat_kexec_segment __user *segments, + unsigned long flags) { struct compat_kexec_segment in; struct kexec_segment out, __user *ksegments; @@ -989,20 +1011,17 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry, /* Don't allow clients that don't understand the native * architecture to do anything. */ - if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) { + if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) return -EINVAL; - } - if (nr_segments > KEXEC_SEGMENT_MAX) { + if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; - } ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); for (i=0; i < nr_segments; i++) { result = copy_from_user(&in, &segments[i], sizeof(in)); - if (result) { + if (result) return -EFAULT; - } out.buf = compat_ptr(in.buf); out.bufsz = in.bufsz; @@ -1010,9 +1029,8 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry, out.memsz = in.memsz; result = copy_to_user(&ksegments[i], &out, sizeof(out)); - if (result) { + if (result) return -EFAULT; - } } return sys_kexec_load(entry, nr_segments, ksegments, flags); -- cgit v1.2.3-70-g09d2