From 05f47fda9fc5b17bfab189e9d54228025befc996 Mon Sep 17 00:00:00 2001 From: Daisuke HATAYAMA Date: Fri, 5 Mar 2010 13:44:05 -0800 Subject: coredump: unify dump_seek() implementations for each binfmt_*.c The current ELF dumper can produce broken corefiles if program headers exceed 65535. In particular, the program in 64-bit environment often demands more than 65535 mmaps. If you google max_map_count, then you can find many users facing this problem. Solaris has already dealt with this issue, and other OSes have also adopted the same method as in Solaris. Currently, Sun's document and AMD 64 ABI include the description for the extension, where they call the extension Extended Numbering. See Reference for further information. I believe that linux kernel should adopt the same way as they did, so I've written this patch. I am also preparing for patches of GDB and binutils. How to fix ========== In new dumping process, there are two cases according to weather or not the number of program headers is equal to or more than 65535. - if less than 65535, the produced corefile format is exactly the same as the ordinary one. - if equal to or more than 65535, then e_phnum field is set to newly introduced constant PN_XNUM(0xffff) and the actual number of program headers is set to sh_info field of the section header at index 0. Compatibility Concern ===================== * As already mentioned in Summary, Sun and AMD64 has already adopted this. See Reference. * There are four combinations according to whether kernel and userland tools are respectively modified or not. The next table summarizes shortly for each combination. --------------------------------------------- Original Kernel | Modified Kernel --------------------------------------------- < 65535 | >= 65535 | < 65535 | >= 65535 ------------------------------------------------------------- Original Tools | OK | broken | OK | broken (#) ------------------------------------------------------------- Modified Tools | OK | broken | OK | OK ------------------------------------------------------------- Note that there is no case that `OK' changes to `broken'. (#) Although this case remains broken, O-M behaves better than O-O. That is, while in O-O case e_phnum field would be extremely small due to integer overflow, in O-M case it is guaranteed to be at least 65535 by being set to PN_XNUM(0xFFFF), much closer to the actual correct value than the O-O case. Test Program ============ Here is a test program mkmmaps.c that is useful to produce the corefile with many mmaps. To use this, please take the following steps: $ ulimit -c unlimited $ sysctl vm.max_map_count=70000 # default 65530 is too small $ sysctl fs.file-max=70000 $ mkmmaps 65535 Then, the program will abort and a corefile will be generated. If failed, there are two cases according to the error message displayed. * ``out of memory'' means vm.max_map_count is still smaller * ``too many open files'' means fs.file-max is still smaller So, please change it to a larger value, and then retry it. mkmmaps.c == #include #include #include #include #include int main(int argc, char **argv) { int maps_num; if (argc < 2) { fprintf(stderr, "mkmmaps [number of maps to be created]\n"); exit(1); } if (sscanf(argv[1], "%d", &maps_num) == EOF) { perror("sscanf"); exit(2); } if (maps_num < 0) { fprintf(stderr, "%d is invalid\n", maps_num); exit(3); } for (; maps_num > 0; --maps_num) { if (MAP_FAILED == mmap((void *)NULL, (size_t) 1, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, (int) -1, (off_t) NULL)) { perror("mmap"); exit(4); } } abort(); { char buffer[128]; sprintf(buffer, "wc -l /proc/%u/maps", getpid()); system(buffer); } return 0; } Tested on i386, ia64 and um/sys-i386. Built on sh4 (which covers fs/binfmt_elf_fdpic.c) References ========== - Sun microsystems: Linker and Libraries. Part No: 817-1984-17, September 2008. URL: http://docs.sun.com/app/docs/doc/817-1984 - System V ABI AMD64 Architecture Processor Supplement Draft Version 0.99., May 11, 2009. URL: http://www.x86-64.org/ This patch: There are three different definitions for dump_seek() functions in binfmt_aout.c, binfmt_elf.c and binfmt_elf_fdpic.c, respectively. The only for binfmt_elf.c. My next patch will move dump_seek() into a header file in order to share the same implementations for dump_write() and dump_seek(). As the first step, this patch unify these three definitions for dump_seek() by applying the past commits that have been applied only for binfmt_elf.c. Specifically, the modification made here is part of the following commits: * d025c9db7f31fc0554ce7fb2dfc78d35a77f3487 * 7f14daa19ea36b200d237ad3ac5826ae25360461 This patch does not change a shape of corefiles. Signed-off-by: Daisuke HATAYAMA Cc: "Luck, Tony" Cc: Jeff Dike Cc: David Howells Cc: Greg Ungerer Cc: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Alexander Viro Cc: Andi Kleen Cc: Alan Cox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf_fdpic.c | 59 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 21 deletions(-) (limited to 'fs/binfmt_elf_fdpic.c') diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 18d77297ccc..32d9b44c3cb 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1226,11 +1226,22 @@ static int dump_write(struct file *file, const void *addr, int nr) static int dump_seek(struct file *file, loff_t off) { - if (file->f_op->llseek) { - if (file->f_op->llseek(file, off, SEEK_SET) != off) + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + if (file->f_op->llseek(file, off, SEEK_CUR) < 0) return 0; } else { - file->f_pos = off; + char *buf = (char *)get_zeroed_page(GFP_KERNEL); + if (!buf) + return 0; + while (off > 0) { + unsigned long n = off; + if (n > PAGE_SIZE) + n = PAGE_SIZE; + if (!dump_write(file, buf, n)) + return 0; + off -= n; + } + free_page((unsigned long)buf); } return 1; } @@ -1313,30 +1324,35 @@ static int notesize(struct memelfnote *en) /* #define DEBUG */ -#define DUMP_WRITE(addr, nr) \ - do { if (!dump_write(file, (addr), (nr))) return 0; } while(0) -#define DUMP_SEEK(off) \ - do { if (!dump_seek(file, (off))) return 0; } while(0) +#define DUMP_WRITE(addr, nr, foffset) \ + do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0) -static int writenote(struct memelfnote *men, struct file *file) +static int alignfile(struct file *file, loff_t *foffset) { - struct elf_note en; + static const char buf[4] = { 0, }; + DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset); + return 1; +} +static int writenote(struct memelfnote *men, struct file *file, + loff_t *foffset) +{ + struct elf_note en; en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; - DUMP_WRITE(&en, sizeof(en)); - DUMP_WRITE(men->name, en.n_namesz); - /* XXX - cast from long long to long to avoid need for libgcc.a */ - DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */ - DUMP_WRITE(men->data, men->datasz); - DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */ + DUMP_WRITE(&en, sizeof(en), foffset); + DUMP_WRITE(men->name, en.n_namesz, foffset); + if (!alignfile(file, foffset)) + return 0; + DUMP_WRITE(men->data, men->datasz, foffset); + if (!alignfile(file, foffset)) + return 0; return 1; } #undef DUMP_WRITE -#undef DUMP_SEEK #define DUMP_WRITE(addr, nr) \ if ((size += (nr)) > cprm->limit || \ @@ -1552,7 +1568,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size, err = -EIO; kunmap(page); page_cache_release(page); - } else if (!dump_seek(file, file->f_pos + PAGE_SIZE)) + } else if (!dump_seek(file, PAGE_SIZE)) err = -EFBIG; if (err) goto out; @@ -1605,7 +1621,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) int i; struct vm_area_struct *vma; struct elfhdr *elf = NULL; - loff_t offset = 0, dataoff; + loff_t offset = 0, dataoff, foffset; int numnote; struct memelfnote *notes = NULL; struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ @@ -1730,6 +1746,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) DUMP_WRITE(elf, sizeof(*elf)); offset += sizeof(*elf); /* Elf header */ offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */ + foffset = offset; /* Write notes phdr entry */ { @@ -1786,7 +1803,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) /* write out the notes section */ for (i = 0; i < numnote; i++) - if (!writenote(notes + i, cprm->file)) + if (!writenote(notes + i, cprm->file, &foffset)) goto end_coredump; /* write out the thread status notes section */ @@ -1795,11 +1812,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) list_entry(t, struct elf_thread_status, list); for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], cprm->file)) + if (!writenote(&tmp->notes[i], cprm->file, &foffset)) goto end_coredump; } - if (!dump_seek(cprm->file, dataoff)) + if (!dump_seek(cprm->file, dataoff - foffset)) goto end_coredump; if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit, -- cgit v1.2.3-70-g09d2 From 088e7af73a962fcc8883b7a6392544d8342553d6 Mon Sep 17 00:00:00 2001 From: Daisuke HATAYAMA Date: Fri, 5 Mar 2010 13:44:06 -0800 Subject: coredump: move dump_write() and dump_seek() into a header file My next patch will replace ELF_CORE_EXTRA_* macros by functions, putting them into other newly created *.c files. Then, each files will contain dump_write(), where each pair of binfmt_*.c and elfcore.c should be the same. So, this patch moves them into a header file with dump_seek(). Also, the patch deletes confusing DUMP_WRITE macros in each files. Signed-off-by: Daisuke HATAYAMA Cc: "Luck, Tony" Cc: Jeff Dike Cc: David Howells Cc: Greg Ungerer Cc: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Alexander Viro Cc: Andi Kleen Cc: Alan Cox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_aout.c | 49 ++++++++----------------------------------- fs/binfmt_elf.c | 52 +++++++++++++--------------------------------- fs/binfmt_elf_fdpic.c | 54 ++++++++++++++---------------------------------- include/linux/coredump.h | 41 ++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 117 deletions(-) create mode 100644 include/linux/coredump.h (limited to 'fs/binfmt_elf_fdpic.c') diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index d2f8872dd76..15d80bb35d6 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -59,42 +60,6 @@ static int set_brk(unsigned long start, unsigned long end) return 0; } -/* - * These are the only things you should do on a core-file: use only these - * macros to write out all the necessary info. - */ - -static int dump_write(struct file *file, const void *addr, int nr) -{ - return file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} - -static int dump_seek(struct file *file, loff_t off) -{ - if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) - return 0; - } else { - char *buf = (char *)get_zeroed_page(GFP_KERNEL); - if (!buf) - return 0; - while (off > 0) { - unsigned long n = off; - if (n > PAGE_SIZE) - n = PAGE_SIZE; - if (!dump_write(file, buf, n)) - return 0; - off -= n; - } - free_page((unsigned long)buf); - } - return 1; -} - -#define DUMP_WRITE(addr, nr) \ - if (!dump_write(file, (void *)(addr), (nr))) \ - goto end_coredump; - /* * Routine writes a core dump image in the current directory. * Currently only a stub-function. @@ -146,7 +111,8 @@ static int aout_core_dump(struct coredump_params *cprm) set_fs(KERNEL_DS); /* struct user */ - DUMP_WRITE(&dump,sizeof(dump)); + if (!dump_write(file, &dump, sizeof(dump))) + goto end_coredump; /* Now dump all of the user data. Include malloced stuff as well */ if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump))) goto end_coredump; @@ -156,17 +122,20 @@ static int aout_core_dump(struct coredump_params *cprm) if (dump.u_dsize != 0) { dump_start = START_DATA(dump); dump_size = dump.u_dsize << PAGE_SHIFT; - DUMP_WRITE(dump_start,dump_size); + if (!dump_write(file, dump_start, dump_size)) + goto end_coredump; } /* Now prepare to dump the stack area */ if (dump.u_ssize != 0) { dump_start = START_STACK(dump); dump_size = dump.u_ssize << PAGE_SHIFT; - DUMP_WRITE(dump_start,dump_size); + if (!dump_write(file, dump_start, dump_size)) + goto end_coredump; } /* Finally dump the task struct. Not be used by gdb, but could be useful */ set_fs(KERNEL_DS); - DUMP_WRITE(current,sizeof(*current)); + if (!dump_write(file, current, sizeof(*current))) + goto end_coredump; end_coredump: set_fs(fs); return has_dumped; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index fd5b2ea5d29..0bcfbb05c32 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -1085,36 +1086,6 @@ out: * Modelled on fs/exec.c:aout_core_dump() * Jeremy Fitzhardinge */ -/* - * These are the only things you should do on a core-file: use only these - * functions to write out all the necessary info. - */ -static int dump_write(struct file *file, const void *addr, int nr) -{ - return file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} - -static int dump_seek(struct file *file, loff_t off) -{ - if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) - return 0; - } else { - char *buf = (char *)get_zeroed_page(GFP_KERNEL); - if (!buf) - return 0; - while (off > 0) { - unsigned long n = off; - if (n > PAGE_SIZE) - n = PAGE_SIZE; - if (!dump_write(file, buf, n)) - return 0; - off -= n; - } - free_page((unsigned long)buf); - } - return 1; -} /* * Decide what to dump of a segment, part, all or none. @@ -1249,11 +1220,6 @@ static int writenote(struct memelfnote *men, struct file *file, } #undef DUMP_WRITE -#define DUMP_WRITE(addr, nr) \ - if ((size += (nr)) > cprm->limit || \ - !dump_write(cprm->file, (addr), (nr))) \ - goto end_coredump; - static void fill_elf_header(struct elfhdr *elf, int segs, u16 machine, u32 flags, u8 osabi) { @@ -1934,7 +1900,10 @@ static int elf_core_dump(struct coredump_params *cprm) fs = get_fs(); set_fs(KERNEL_DS); - DUMP_WRITE(elf, sizeof(*elf)); + size += sizeof(*elf); + if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) + goto end_coredump; + offset += sizeof(*elf); /* Elf header */ offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */ foffset = offset; @@ -1948,7 +1917,11 @@ static int elf_core_dump(struct coredump_params *cprm) fill_elf_note_phdr(&phdr, sz, offset); offset += sz; - DUMP_WRITE(&phdr, sizeof(phdr)); + + size += sizeof(phdr); + if (size > cprm->limit + || !dump_write(cprm->file, &phdr, sizeof(phdr))) + goto end_coredump; } dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); @@ -1979,7 +1952,10 @@ static int elf_core_dump(struct coredump_params *cprm) phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; - DUMP_WRITE(&phdr, sizeof(phdr)); + size += sizeof(phdr); + if (size > cprm->limit + || !dump_write(cprm->file, &phdr, sizeof(phdr))) + goto end_coredump; } #ifdef ELF_CORE_WRITE_EXTRA_PHDRS diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 32d9b44c3cb..63edf40b569 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -1215,37 +1216,6 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, */ #ifdef CONFIG_ELF_CORE -/* - * These are the only things you should do on a core-file: use only these - * functions to write out all the necessary info. - */ -static int dump_write(struct file *file, const void *addr, int nr) -{ - return file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} - -static int dump_seek(struct file *file, loff_t off) -{ - if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) - return 0; - } else { - char *buf = (char *)get_zeroed_page(GFP_KERNEL); - if (!buf) - return 0; - while (off > 0) { - unsigned long n = off; - if (n > PAGE_SIZE) - n = PAGE_SIZE; - if (!dump_write(file, buf, n)) - return 0; - off -= n; - } - free_page((unsigned long)buf); - } - return 1; -} - /* * Decide whether a segment is worth dumping; default is yes to be * sure (missing info is worse than too much; etc). @@ -1354,11 +1324,6 @@ static int writenote(struct memelfnote *men, struct file *file, } #undef DUMP_WRITE -#define DUMP_WRITE(addr, nr) \ - if ((size += (nr)) > cprm->limit || \ - !dump_write(cprm->file, (addr), (nr))) \ - goto end_coredump; - static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) { memcpy(elf->e_ident, ELFMAG, SELFMAG); @@ -1743,7 +1708,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) fs = get_fs(); set_fs(KERNEL_DS); - DUMP_WRITE(elf, sizeof(*elf)); + size += sizeof(*elf); + if (size > cprm->limit + || !dump_write(cprm->file, elf, sizeof(*elf))) + goto end_coredump; + offset += sizeof(*elf); /* Elf header */ offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */ foffset = offset; @@ -1760,7 +1729,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) fill_elf_note_phdr(&phdr, sz, offset); offset += sz; - DUMP_WRITE(&phdr, sizeof(phdr)); + + size += sizeof(phdr); + if (size > cprm->limit + || !dump_write(cprm->file, &phdr, sizeof(phdr))) + goto end_coredump; } /* Page-align dumped data */ @@ -1794,7 +1767,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; - DUMP_WRITE(&phdr, sizeof(phdr)); + size += sizeof(phdr); + if (size > cprm->limit + || !dump_write(cprm->file, &phdr, sizeof(phdr))) + goto end_coredump; } #ifdef ELF_CORE_WRITE_EXTRA_PHDRS diff --git a/include/linux/coredump.h b/include/linux/coredump.h new file mode 100644 index 00000000000..b3c91d7cede --- /dev/null +++ b/include/linux/coredump.h @@ -0,0 +1,41 @@ +#ifndef _LINUX_COREDUMP_H +#define _LINUX_COREDUMP_H + +#include +#include +#include + +/* + * These are the only things you should do on a core-file: use only these + * functions to write out all the necessary info. + */ +static inline int dump_write(struct file *file, const void *addr, int nr) +{ + return file->f_op->write(file, addr, nr, &file->f_pos) == nr; +} + +static inline int dump_seek(struct file *file, loff_t off) +{ + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + if (file->f_op->llseek(file, off, SEEK_CUR) < 0) + return 0; + } else { + char *buf = (char *)get_zeroed_page(GFP_KERNEL); + + if (!buf) + return 0; + while (off > 0) { + unsigned long n = off; + + if (n > PAGE_SIZE) + n = PAGE_SIZE; + if (!dump_write(file, buf, n)) + return 0; + off -= n; + } + free_page((unsigned long)buf); + } + return 1; +} + +#endif /* _LINUX_COREDUMP_H */ -- cgit v1.2.3-70-g09d2 From 1fcccbac89f5bbc5e41aa72086960059fce372da Mon Sep 17 00:00:00 2001 From: Daisuke HATAYAMA Date: Fri, 5 Mar 2010 13:44:07 -0800 Subject: elf coredump: replace ELF_CORE_EXTRA_* macros by functions elf_core_dump() and elf_fdpic_core_dump() use #ifdef and the corresponding macro for hiding _multiline_ logics in functions. This patch removes #ifdef and replaces ELF_CORE_EXTRA_* by corresponding functions. For architectures not implemeonting ELF_CORE_EXTRA_*, we use weak functions in order to reduce a range of modification. This cleanup is for my next patches, but I think this cleanup itself is worth doing regardless of my firnal purpose. Signed-off-by: Daisuke HATAYAMA Cc: "Luck, Tony" Cc: Jeff Dike Cc: David Howells Cc: Greg Ungerer Cc: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Alexander Viro Cc: Andi Kleen Cc: Alan Cox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/elf.h | 48 -------------------------------- arch/ia64/kernel/Makefile | 2 ++ arch/ia64/kernel/elfcore.c | 64 +++++++++++++++++++++++++++++++++++++++++++ arch/um/sys-i386/Makefile | 2 ++ arch/um/sys-i386/asm/elf.h | 43 ----------------------------- arch/um/sys-i386/elfcore.c | 67 +++++++++++++++++++++++++++++++++++++++++++++ fs/binfmt_elf.c | 14 ++++------ fs/binfmt_elf_fdpic.c | 14 ++++------ fs/compat_binfmt_elf.c | 2 ++ include/linux/elf.h | 2 ++ include/linux/elfcore.h | 16 +++++++++++ kernel/Makefile | 3 ++ kernel/elfcore.c | 23 ++++++++++++++++ 13 files changed, 191 insertions(+), 109 deletions(-) create mode 100644 arch/ia64/kernel/elfcore.c create mode 100644 arch/um/sys-i386/elfcore.c create mode 100644 kernel/elfcore.c (limited to 'fs/binfmt_elf_fdpic.c') diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index 4c41656ede8..b5298eb09ad 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -219,54 +219,6 @@ do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR); \ } while (0) - -/* - * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out - * extra segments containing the gate DSO contents. Dumping its - * contents makes post-mortem fully interpretable later without matching up - * the same kernel and hardware config to see what PC values meant. - * Dumping its extra ELF program headers includes all the other information - * a debugger needs to easily find how the gate DSO was being used. - */ -#define ELF_CORE_EXTRA_PHDRS (GATE_EHDR->e_phnum) -#define ELF_CORE_WRITE_EXTRA_PHDRS \ -do { \ - const struct elf_phdr *const gate_phdrs = \ - (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); \ - int i; \ - Elf64_Off ofs = 0; \ - for (i = 0; i < GATE_EHDR->e_phnum; ++i) { \ - struct elf_phdr phdr = gate_phdrs[i]; \ - if (phdr.p_type == PT_LOAD) { \ - phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); \ - phdr.p_filesz = phdr.p_memsz; \ - if (ofs == 0) { \ - ofs = phdr.p_offset = offset; \ - offset += phdr.p_filesz; \ - } \ - else \ - phdr.p_offset = ofs; \ - } \ - else \ - phdr.p_offset += ofs; \ - phdr.p_paddr = 0; /* match other core phdrs */ \ - DUMP_WRITE(&phdr, sizeof(phdr)); \ - } \ -} while (0) -#define ELF_CORE_WRITE_EXTRA_DATA \ -do { \ - const struct elf_phdr *const gate_phdrs = \ - (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); \ - int i; \ - for (i = 0; i < GATE_EHDR->e_phnum; ++i) { \ - if (gate_phdrs[i].p_type == PT_LOAD) { \ - DUMP_WRITE((void *) gate_phdrs[i].p_vaddr, \ - PAGE_ALIGN(gate_phdrs[i].p_memsz)); \ - break; \ - } \ - } \ -} while (0) - /* * format for entries in the Global Offset Table */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 4138282aefa..db10b1e378b 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -45,6 +45,8 @@ endif obj-$(CONFIG_DMAR) += pci-dma.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o +obj-$(CONFIG_BINFMT_ELF) += elfcore.o + # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c new file mode 100644 index 00000000000..57a2298a858 --- /dev/null +++ b/arch/ia64/kernel/elfcore.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include + +#include + + +Elf64_Half elf_core_extra_phdrs(void) +{ + return GATE_EHDR->e_phnum; +} + +int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, + unsigned long limit) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + Elf64_Off ofs = 0; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + struct elf_phdr phdr = gate_phdrs[i]; + + if (phdr.p_type == PT_LOAD) { + phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); + phdr.p_filesz = phdr.p_memsz; + if (ofs == 0) { + ofs = phdr.p_offset = offset; + offset += phdr.p_filesz; + } else { + phdr.p_offset = ofs; + } + } else { + phdr.p_offset += ofs; + } + phdr.p_paddr = 0; /* match other core phdrs */ + *size += sizeof(phdr); + if (*size > limit || !dump_write(file, &phdr, sizeof(phdr))) + return 0; + } + return 1; +} + +int elf_core_write_extra_data(struct file *file, size_t *size, + unsigned long limit) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + if (gate_phdrs[i].p_type == PT_LOAD) { + void *addr = (void *)gate_phdrs[i].p_vaddr; + size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz); + + *size += memsz; + if (*size > limit || !dump_write(file, addr, memsz)) + return 0; + break; + } + } + return 1; +} diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 1b549bca464..804b28dd032 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -6,6 +6,8 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \ sys_call_table.o tls.o +obj-$(CONFIG_BINFMT_ELF) += elfcore.o + subarch-obj-y = lib/semaphore_32.o lib/string_32.o subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o subarch-obj-$(CONFIG_MODULES) += kernel/module.o diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h index 770885472ed..e64cd41d7ba 100644 --- a/arch/um/sys-i386/asm/elf.h +++ b/arch/um/sys-i386/asm/elf.h @@ -116,47 +116,4 @@ do { \ } \ } while (0) -/* - * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out - * extra segments containing the vsyscall DSO contents. Dumping its - * contents makes post-mortem fully interpretable later without matching up - * the same kernel and hardware config to see what PC values meant. - * Dumping its extra ELF program headers includes all the other information - * a debugger needs to easily find how the vsyscall DSO was being used. - */ -#define ELF_CORE_EXTRA_PHDRS \ - (vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0 ) - -#define ELF_CORE_WRITE_EXTRA_PHDRS \ -if ( vsyscall_ehdr ) { \ - const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr; \ - const struct elf_phdr *const phdrp = \ - (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); \ - int i; \ - Elf32_Off ofs = 0; \ - for (i = 0; i < ehdrp->e_phnum; ++i) { \ - struct elf_phdr phdr = phdrp[i]; \ - if (phdr.p_type == PT_LOAD) { \ - ofs = phdr.p_offset = offset; \ - offset += phdr.p_filesz; \ - } \ - else \ - phdr.p_offset += ofs; \ - phdr.p_paddr = 0; /* match other core phdrs */ \ - DUMP_WRITE(&phdr, sizeof(phdr)); \ - } \ -} -#define ELF_CORE_WRITE_EXTRA_DATA \ -if ( vsyscall_ehdr ) { \ - const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr; \ - const struct elf_phdr *const phdrp = \ - (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); \ - int i; \ - for (i = 0; i < ehdrp->e_phnum; ++i) { \ - if (phdrp[i].p_type == PT_LOAD) \ - DUMP_WRITE((void *) phdrp[i].p_vaddr, \ - phdrp[i].p_filesz); \ - } \ -} - #endif diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c new file mode 100644 index 00000000000..30cac52a04b --- /dev/null +++ b/arch/um/sys-i386/elfcore.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include + +#include + + +Elf32_Half elf_core_extra_phdrs(void) +{ + return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0; +} + +int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, + unsigned long limit) +{ + if ( vsyscall_ehdr ) { + const struct elfhdr *const ehdrp = + (struct elfhdr *) vsyscall_ehdr; + const struct elf_phdr *const phdrp = + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); + int i; + Elf32_Off ofs = 0; + + for (i = 0; i < ehdrp->e_phnum; ++i) { + struct elf_phdr phdr = phdrp[i]; + + if (phdr.p_type == PT_LOAD) { + ofs = phdr.p_offset = offset; + offset += phdr.p_filesz; + } else { + phdr.p_offset += ofs; + } + phdr.p_paddr = 0; /* match other core phdrs */ + *size += sizeof(phdr); + if (*size > limit + || !dump_write(file, &phdr, sizeof(phdr))) + return 0; + } + } + return 1; +} + +int elf_core_write_extra_data(struct file *file, size_t *size, + unsigned long limit) +{ + if ( vsyscall_ehdr ) { + const struct elfhdr *const ehdrp = + (struct elfhdr *) vsyscall_ehdr; + const struct elf_phdr *const phdrp = + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); + int i; + + for (i = 0; i < ehdrp->e_phnum; ++i) { + if (phdrp[i].p_type == PT_LOAD) { + void *addr = (void *) phdrp[i].p_vaddr; + size_t filesz = phdrp[i].p_filesz; + + *size += filesz; + if (*size > limit + || !dump_write(file, addr, filesz)) + return 0; + } + } + } + return 1; +} diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0bcfbb05c32..c1a499599b7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1878,9 +1878,7 @@ static int elf_core_dump(struct coredump_params *cprm) * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. */ segs = current->mm->map_count; -#ifdef ELF_CORE_EXTRA_PHDRS - segs += ELF_CORE_EXTRA_PHDRS; -#endif + segs += elf_core_extra_phdrs(); gate_vma = get_gate_vma(current); if (gate_vma != NULL) @@ -1958,9 +1956,8 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; } -#ifdef ELF_CORE_WRITE_EXTRA_PHDRS - ELF_CORE_WRITE_EXTRA_PHDRS; -#endif + if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) + goto end_coredump; /* write out the notes section */ if (!write_note_info(&info, cprm->file, &foffset)) @@ -1999,9 +1996,8 @@ static int elf_core_dump(struct coredump_params *cprm) } } -#ifdef ELF_CORE_WRITE_EXTRA_DATA - ELF_CORE_WRITE_EXTRA_DATA; -#endif + if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) + goto end_coredump; end_coredump: set_fs(fs); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 63edf40b569..952699a86ec 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1664,9 +1664,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) elf_core_copy_regs(&prstatus->pr_reg, cprm->regs); segs = current->mm->map_count; -#ifdef ELF_CORE_EXTRA_PHDRS - segs += ELF_CORE_EXTRA_PHDRS; -#endif + segs += elf_core_extra_phdrs(); /* Set up header */ fill_elf_fdpic_header(elf, segs + 1); /* including notes section */ @@ -1773,9 +1771,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; } -#ifdef ELF_CORE_WRITE_EXTRA_PHDRS - ELF_CORE_WRITE_EXTRA_PHDRS; -#endif + if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) + goto end_coredump; /* write out the notes section */ for (i = 0; i < numnote; i++) @@ -1799,9 +1796,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) mm_flags) < 0) goto end_coredump; -#ifdef ELF_CORE_WRITE_EXTRA_DATA - ELF_CORE_WRITE_EXTRA_DATA; -#endif + if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) + goto end_coredump; if (cprm->file->f_pos != offset) { /* Sanity check */ diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 0adced2f296..112e45a17e9 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -28,10 +28,12 @@ #undef elfhdr #undef elf_phdr +#undef elf_shdr #undef elf_note #undef elf_addr_t #define elfhdr elf32_hdr #define elf_phdr elf32_phdr +#define elf_shdr elf32_shdr #define elf_note elf32_note #define elf_addr_t Elf32_Addr diff --git a/include/linux/elf.h b/include/linux/elf.h index ad990c5f63f..ccde3fd45f3 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -396,6 +396,7 @@ extern Elf32_Dyn _DYNAMIC []; #define elf_phdr elf32_phdr #define elf_note elf32_note #define elf_addr_t Elf32_Off +#define Elf_Half Elf32_Half #else @@ -404,6 +405,7 @@ extern Elf64_Dyn _DYNAMIC []; #define elf_phdr elf64_phdr #define elf_note elf64_note #define elf_addr_t Elf64_Off +#define Elf_Half Elf64_Half #endif diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 00d6a68d042..cfda74f521b 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -8,6 +8,8 @@ #include #endif #include +#include +#include struct elf_siginfo { @@ -150,5 +152,19 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse #endif /* __KERNEL__ */ +/* + * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out + * extra segments containing the gate DSO contents. Dumping its + * contents makes post-mortem fully interpretable later without matching up + * the same kernel and hardware config to see what PC values meant. + * Dumping its extra ELF program headers includes all the other information + * a debugger needs to easily find how the gate DSO was being used. + */ +extern Elf_Half elf_core_extra_phdrs(void); +extern int +elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, + unsigned long limit); +extern int +elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit); #endif /* _LINUX_ELFCORE_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 7b974699f8c..a987aa1676b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -91,6 +91,9 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o +obj-$(CONFIG_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_X86_DS) += trace/ diff --git a/kernel/elfcore.c b/kernel/elfcore.c new file mode 100644 index 00000000000..5445741f4b4 --- /dev/null +++ b/kernel/elfcore.c @@ -0,0 +1,23 @@ +#include +#include +#include + +#include + + +Elf_Half __weak elf_core_extra_phdrs(void) +{ + return 0; +} + +int __weak elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, + unsigned long limit) +{ + return 1; +} + +int __weak elf_core_write_extra_data(struct file *file, size_t *size, + unsigned long limit) +{ + return 1; +} -- cgit v1.2.3-70-g09d2 From 93eb211e6c9ff6054fcf9c5b9e344d8d9ad29175 Mon Sep 17 00:00:00 2001 From: Daisuke HATAYAMA Date: Fri, 5 Mar 2010 13:44:09 -0800 Subject: elf coredump: make offset calculation process and writing process explicit By the next patch, elf_core_dump() and elf_fdpic_core_dump() will support extended numbering and so will produce the corefiles with section header table in a special case. The problem is the process of writing a file header offset of the section header table into e_shoff field of the ELF header. ELF header is positioned at the beginning of the corefile, while section header at the end. So, we need to take which of the following ways: 1. Seek backward to retry writing operation for ELF header after writing process for a whole part 2. Make offset calculation process and writing process totally sequential The clause 1. is not always possible: one cannot assume that file system supports seek function. Consider the no_llseek case. Therefore, this patch adopts the clause 2. Signed-off-by: Daisuke HATAYAMA Cc: "Luck, Tony" Cc: Jeff Dike Cc: David Howells Cc: Greg Ungerer Cc: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Alexander Viro Cc: Andi Kleen Cc: Alan Cox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 27 ++++++++++++++++----------- fs/binfmt_elf_fdpic.c | 29 ++++++++++++++++------------- 2 files changed, 32 insertions(+), 24 deletions(-) (limited to 'fs/binfmt_elf_fdpic.c') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index c1a499599b7..6fc49b6ed93 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1856,6 +1856,7 @@ static int elf_core_dump(struct coredump_params *cprm) loff_t offset = 0, dataoff, foffset; unsigned long mm_flags; struct elf_note_info info; + struct elf_phdr *phdr4note = NULL; /* * We no longer stop all VM operations. @@ -1898,28 +1899,22 @@ static int elf_core_dump(struct coredump_params *cprm) fs = get_fs(); set_fs(KERNEL_DS); - size += sizeof(*elf); - if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) - goto end_coredump; - offset += sizeof(*elf); /* Elf header */ offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */ foffset = offset; /* Write notes phdr entry */ { - struct elf_phdr phdr; size_t sz = get_note_info_size(&info); sz += elf_coredump_extra_notes_size(); - fill_elf_note_phdr(&phdr, sz, offset); - offset += sz; - - size += sizeof(phdr); - if (size > cprm->limit - || !dump_write(cprm->file, &phdr, sizeof(phdr))) + phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL); + if (!phdr4note) goto end_coredump; + + fill_elf_note_phdr(phdr4note, sz, offset); + offset += sz; } dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); @@ -1931,6 +1926,15 @@ static int elf_core_dump(struct coredump_params *cprm) */ mm_flags = current->mm->flags; + size += sizeof(*elf); + if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) + goto end_coredump; + + size += sizeof(*phdr4note); + if (size > cprm->limit + || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note))) + goto end_coredump; + /* Write program headers for segments dump */ for (vma = first_vma(current, gate_vma); vma != NULL; vma = next_vma(vma, gate_vma)) { @@ -2004,6 +2008,7 @@ end_coredump: cleanup: free_note_info(&info); + kfree(phdr4note); kfree(elf); out: return has_dumped; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 952699a86ec..112da491d75 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1600,6 +1600,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) int thread_status_size = 0; elf_addr_t *auxv; unsigned long mm_flags; + struct elf_phdr *phdr4note = NULL; /* * We no longer stop all VM operations. @@ -1706,18 +1707,12 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) fs = get_fs(); set_fs(KERNEL_DS); - size += sizeof(*elf); - if (size > cprm->limit - || !dump_write(cprm->file, elf, sizeof(*elf))) - goto end_coredump; - offset += sizeof(*elf); /* Elf header */ offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */ foffset = offset; /* Write notes phdr entry */ { - struct elf_phdr phdr; int sz = 0; for (i = 0; i < numnote; i++) @@ -1725,13 +1720,12 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) sz += thread_status_size; - fill_elf_note_phdr(&phdr, sz, offset); - offset += sz; - - size += sizeof(phdr); - if (size > cprm->limit - || !dump_write(cprm->file, &phdr, sizeof(phdr))) + phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL); + if (!phdr4note) goto end_coredump; + + fill_elf_note_phdr(phdr4note, sz, offset); + offset += sz; } /* Page-align dumped data */ @@ -1744,6 +1738,15 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) */ mm_flags = current->mm->flags; + size += sizeof(*elf); + if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) + goto end_coredump; + + size += sizeof(*phdr4note); + if (size > cprm->limit + || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note))) + goto end_coredump; + /* write program headers for segments dump */ for (vma = current->mm->mmap; vma; vma = vma->vm_next) { struct elf_phdr phdr; @@ -1815,7 +1818,7 @@ cleanup: list_del(tmp); kfree(list_entry(tmp, struct elf_thread_status, list)); } - + kfree(phdr4note); kfree(elf); kfree(prstatus); kfree(psinfo); -- cgit v1.2.3-70-g09d2 From 8d9032bbe4671dc481261ccd4e161cd96e54b118 Mon Sep 17 00:00:00 2001 From: Daisuke HATAYAMA Date: Fri, 5 Mar 2010 13:44:10 -0800 Subject: elf coredump: add extended numbering support The current ELF dumper implementation can produce broken corefiles if program headers exceed 65535. This number is determined by the number of vmas which the process have. In particular, some extreme programs may use more than 65535 vmas. (If you google max_map_count, you can find some users facing this problem.) This kind of program never be able to generate correct coredumps. This patch implements ``extended numbering'' that uses sh_info field of the first section header instead of e_phnum field in order to represent upto 4294967295 vmas. This is supported by AMD64-ABI(http://www.x86-64.org/documentation.html) and Solaris(http://docs.sun.com/app/docs/doc/817-1984/). Of course, we are preparing patches for gdb and binutils. Signed-off-by: Daisuke HATAYAMA Cc: "Luck, Tony" Cc: Jeff Dike Cc: David Howells Cc: Greg Ungerer Cc: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Alexander Viro Cc: Andi Kleen Cc: Alan Cox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/elfcore.c | 16 +++++++++++ arch/um/sys-i386/elfcore.c | 16 +++++++++++ fs/binfmt_elf.c | 66 +++++++++++++++++++++++++++++++++++++++++++--- fs/binfmt_elf_fdpic.c | 63 +++++++++++++++++++++++++++++++++++++++++-- include/linux/elf.h | 26 +++++++++++++++++- include/linux/elfcore.h | 1 + kernel/elfcore.c | 5 ++++ 7 files changed, 187 insertions(+), 6 deletions(-) (limited to 'fs/binfmt_elf_fdpic.c') diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c index 57a2298a858..bac1639bc32 100644 --- a/arch/ia64/kernel/elfcore.c +++ b/arch/ia64/kernel/elfcore.c @@ -62,3 +62,19 @@ int elf_core_write_extra_data(struct file *file, size_t *size, } return 1; } + +size_t elf_core_extra_data_size(void) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + size_t size = 0; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + if (gate_phdrs[i].p_type == PT_LOAD) { + size += PAGE_ALIGN(gate_phdrs[i].p_memsz); + break; + } + } + return size; +} diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c index 30cac52a04b..6bb49b687c9 100644 --- a/arch/um/sys-i386/elfcore.c +++ b/arch/um/sys-i386/elfcore.c @@ -65,3 +65,19 @@ int elf_core_write_extra_data(struct file *file, size_t *size, } return 1; } + +size_t elf_core_extra_data_size(void) +{ + if ( vsyscall_ehdr ) { + const struct elfhdr *const ehdrp = + (struct elfhdr *)vsyscall_ehdr; + const struct elf_phdr *const phdrp = + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); + int i; + + for (i = 0; i < ehdrp->e_phnum; ++i) + if (phdrp[i].p_type == PT_LOAD) + return (size_t) phdrp[i].p_filesz; + } + return 0; +} diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6fc49b6ed93..78de530cfb0 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1838,6 +1838,34 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, return gate_vma; } +static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, + elf_addr_t e_shoff, int segs) +{ + elf->e_shoff = e_shoff; + elf->e_shentsize = sizeof(*shdr4extnum); + elf->e_shnum = 1; + elf->e_shstrndx = SHN_UNDEF; + + memset(shdr4extnum, 0, sizeof(*shdr4extnum)); + + shdr4extnum->sh_type = SHT_NULL; + shdr4extnum->sh_size = elf->e_shnum; + shdr4extnum->sh_link = elf->e_shstrndx; + shdr4extnum->sh_info = segs; +} + +static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma, + unsigned long mm_flags) +{ + struct vm_area_struct *vma; + size_t size = 0; + + for (vma = first_vma(current, gate_vma); vma != NULL; + vma = next_vma(vma, gate_vma)) + size += vma_dump_size(vma, mm_flags); + return size; +} + /* * Actual dumper * @@ -1857,6 +1885,9 @@ static int elf_core_dump(struct coredump_params *cprm) unsigned long mm_flags; struct elf_note_info info; struct elf_phdr *phdr4note = NULL; + struct elf_shdr *shdr4extnum = NULL; + Elf_Half e_phnum; + elf_addr_t e_shoff; /* * We no longer stop all VM operations. @@ -1885,12 +1916,19 @@ static int elf_core_dump(struct coredump_params *cprm) if (gate_vma != NULL) segs++; + /* for notes section */ + segs++; + + /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid + * this, kernel supports extended numbering. Have a look at + * include/linux/elf.h for further information. */ + e_phnum = segs > PN_XNUM ? PN_XNUM : segs; + /* * Collect all the non-memory information about the process for the * notes. This also sets up the file header. */ - if (!fill_note_info(elf, segs + 1, /* including notes section */ - &info, cprm->signr, cprm->regs)) + if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs)) goto cleanup; has_dumped = 1; @@ -1900,7 +1938,7 @@ static int elf_core_dump(struct coredump_params *cprm) set_fs(KERNEL_DS); offset += sizeof(*elf); /* Elf header */ - offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */ + offset += segs * sizeof(struct elf_phdr); /* Program headers */ foffset = offset; /* Write notes phdr entry */ @@ -1926,6 +1964,19 @@ static int elf_core_dump(struct coredump_params *cprm) */ mm_flags = current->mm->flags; + offset += elf_core_vma_data_size(gate_vma, mm_flags); + offset += elf_core_extra_data_size(); + e_shoff = offset; + + if (e_phnum == PN_XNUM) { + shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL); + if (!shdr4extnum) + goto end_coredump; + fill_extnum_info(elf, shdr4extnum, e_shoff, segs); + } + + offset = dataoff; + size += sizeof(*elf); if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) goto end_coredump; @@ -2003,11 +2054,20 @@ static int elf_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) goto end_coredump; + if (e_phnum == PN_XNUM) { + size += sizeof(*shdr4extnum); + if (size > cprm->limit + || !dump_write(cprm->file, shdr4extnum, + sizeof(*shdr4extnum))) + goto end_coredump; + } + end_coredump: set_fs(fs); cleanup: free_note_info(&info); + kfree(shdr4extnum); kfree(phdr4note); kfree(elf); out: diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 112da491d75..e49d9c06a4b 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1505,6 +1505,22 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) return sz; } +static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, + elf_addr_t e_shoff, int segs) +{ + elf->e_shoff = e_shoff; + elf->e_shentsize = sizeof(*shdr4extnum); + elf->e_shnum = 1; + elf->e_shstrndx = SHN_UNDEF; + + memset(shdr4extnum, 0, sizeof(*shdr4extnum)); + + shdr4extnum->sh_type = SHT_NULL; + shdr4extnum->sh_size = elf->e_shnum; + shdr4extnum->sh_link = elf->e_shstrndx; + shdr4extnum->sh_info = segs; +} + /* * dump the segments for an MMU process */ @@ -1569,6 +1585,17 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size, } #endif +static size_t elf_core_vma_data_size(unsigned long mm_flags) +{ + struct vm_area_struct *vma; + size_t size = 0; + + for (vma = current->mm->mmap; vma; vma->vm_next) + if (maydump(vma, mm_flags)) + size += vma->vm_end - vma->vm_start; + return size; +} + /* * Actual dumper * @@ -1601,6 +1628,9 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) elf_addr_t *auxv; unsigned long mm_flags; struct elf_phdr *phdr4note = NULL; + struct elf_shdr *shdr4extnum = NULL; + Elf_Half e_phnum; + elf_addr_t e_shoff; /* * We no longer stop all VM operations. @@ -1667,8 +1697,16 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) segs = current->mm->map_count; segs += elf_core_extra_phdrs(); + /* for notes section */ + segs++; + + /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid + * this, kernel supports extended numbering. Have a look at + * include/linux/elf.h for further information. */ + e_phnum = segs > PN_XNUM ? PN_XNUM : segs; + /* Set up header */ - fill_elf_fdpic_header(elf, segs + 1); /* including notes section */ + fill_elf_fdpic_header(elf, e_phnum); has_dumped = 1; current->flags |= PF_DUMPCORE; @@ -1708,7 +1746,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) set_fs(KERNEL_DS); offset += sizeof(*elf); /* Elf header */ - offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */ + offset += segs * sizeof(struct elf_phdr); /* Program headers */ foffset = offset; /* Write notes phdr entry */ @@ -1738,6 +1776,19 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) */ mm_flags = current->mm->flags; + offset += elf_core_vma_data_size(mm_flags); + offset += elf_core_extra_data_size(); + e_shoff = offset; + + if (e_phnum == PN_XNUM) { + shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL); + if (!shdr4extnum) + goto end_coredump; + fill_extnum_info(elf, shdr4extnum, e_shoff, segs); + } + + offset = dataoff; + size += sizeof(*elf); if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) goto end_coredump; @@ -1802,6 +1853,14 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) goto end_coredump; + if (e_phnum == PN_XNUM) { + size += sizeof(*shdr4extnum); + if (size > cprm->limit + || !dump_write(cprm->file, shdr4extnum, + sizeof(*shdr4extnum))) + goto end_coredump; + } + if (cprm->file->f_pos != offset) { /* Sanity check */ printk(KERN_WARNING diff --git a/include/linux/elf.h b/include/linux/elf.h index ccde3fd45f3..59785841805 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -50,6 +50,28 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) +/* + * Extended Numbering + * + * If the real number of program header table entries is larger than + * or equal to PN_XNUM(0xffff), it is set to sh_info field of the + * section header at index 0, and PN_XNUM is set to e_phnum + * field. Otherwise, the section header at index 0 is zero + * initialized, if it exists. + * + * Specifications are available in: + * + * - Sun microsystems: Linker and Libraries. + * Part No: 817-1984-17, September 2008. + * URL: http://docs.sun.com/app/docs/doc/817-1984 + * + * - System V ABI AMD64 Architecture Processor Supplement + * Draft Version 0.99., + * May 11, 2009. + * URL: http://www.x86-64.org/ + */ +#define PN_XNUM 0xffff + /* These constants define the different elf file types */ #define ET_NONE 0 #define ET_REL 1 @@ -286,7 +308,7 @@ typedef struct elf64_phdr { #define SHN_COMMON 0xfff2 #define SHN_HIRESERVE 0xffff -typedef struct { +typedef struct elf32_shdr { Elf32_Word sh_name; Elf32_Word sh_type; Elf32_Word sh_flags; @@ -394,6 +416,7 @@ typedef struct elf64_note { extern Elf32_Dyn _DYNAMIC []; #define elfhdr elf32_hdr #define elf_phdr elf32_phdr +#define elf_shdr elf32_shdr #define elf_note elf32_note #define elf_addr_t Elf32_Off #define Elf_Half Elf32_Half @@ -403,6 +426,7 @@ extern Elf32_Dyn _DYNAMIC []; extern Elf64_Dyn _DYNAMIC []; #define elfhdr elf64_hdr #define elf_phdr elf64_phdr +#define elf_shdr elf64_shdr #define elf_note elf64_note #define elf_addr_t Elf64_Off #define Elf_Half Elf64_Half diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index cfda74f521b..e687bc3ba4d 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -166,5 +166,6 @@ elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, unsigned long limit); extern int elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit); +extern size_t elf_core_extra_data_size(void); #endif /* _LINUX_ELFCORE_H */ diff --git a/kernel/elfcore.c b/kernel/elfcore.c index 5445741f4b4..ff915efef66 100644 --- a/kernel/elfcore.c +++ b/kernel/elfcore.c @@ -21,3 +21,8 @@ int __weak elf_core_write_extra_data(struct file *file, size_t *size, { return 1; } + +size_t __weak elf_core_extra_data_size(void) +{ + return 0; +} -- cgit v1.2.3-70-g09d2 From 30736a4d43f4af7f1a7836d6a266be17082195c4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 5 Mar 2010 13:44:12 -0800 Subject: coredump: pass mm->flags as a coredump parameter for consistency Pass mm->flags as a coredump parameter for consistency. --- 1787 if (mm->core_state || !get_dumpable(mm)) { <- (1) 1788 up_write(&mm->mmap_sem); 1789 put_cred(cred); 1790 goto fail; 1791 } 1792 [...] 1798 if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ <-(2) 1799 flag = O_EXCL; /* Stop rewrite attacks */ 1800 cred->fsuid = 0; /* Dump root private */ 1801 } --- Since dumpable bits are not protected by lock, there is a chance to change these bits between (1) and (2). To solve this issue, this patch copies mm->flags to coredump_params.mm_flags at the beginning of do_coredump() and uses it instead of get_dumpable() while dumping core. This copy is also passed to binfmt->core_dump, since elf*_core_dump() uses dump_filter bits in mm->flags. [akpm@linux-foundation.org: fix merge] Signed-off-by: Masami Hiramatsu Acked-by: Roland McGrath Cc: Hidehiro Kawai Cc: Oleg Nesterov Cc: Ingo Molnar Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 14 +++----------- fs/binfmt_elf_fdpic.c | 14 +++----------- fs/exec.c | 20 ++++++++++++++++---- include/linux/binfmts.h | 1 + 4 files changed, 23 insertions(+), 26 deletions(-) (limited to 'fs/binfmt_elf_fdpic.c') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 78de530cfb0..535e763ab1a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1882,7 +1882,6 @@ static int elf_core_dump(struct coredump_params *cprm) struct vm_area_struct *vma, *gate_vma; struct elfhdr *elf = NULL; loff_t offset = 0, dataoff, foffset; - unsigned long mm_flags; struct elf_note_info info; struct elf_phdr *phdr4note = NULL; struct elf_shdr *shdr4extnum = NULL; @@ -1957,14 +1956,7 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); - /* - * We must use the same mm->flags while dumping core to avoid - * inconsistency between the program headers and bodies, otherwise an - * unusable core file can be generated. - */ - mm_flags = current->mm->flags; - - offset += elf_core_vma_data_size(gate_vma, mm_flags); + offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags); offset += elf_core_extra_data_size(); e_shoff = offset; @@ -1995,7 +1987,7 @@ static int elf_core_dump(struct coredump_params *cprm) phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; phdr.p_paddr = 0; - phdr.p_filesz = vma_dump_size(vma, mm_flags); + phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags); phdr.p_memsz = vma->vm_end - vma->vm_start; offset += phdr.p_filesz; phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; @@ -2030,7 +2022,7 @@ static int elf_core_dump(struct coredump_params *cprm) unsigned long addr; unsigned long end; - end = vma->vm_start + vma_dump_size(vma, mm_flags); + end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags); for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { struct page *page; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index e49d9c06a4b..6d6a16c5e9b 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1626,7 +1626,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) #endif int thread_status_size = 0; elf_addr_t *auxv; - unsigned long mm_flags; struct elf_phdr *phdr4note = NULL; struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; @@ -1769,14 +1768,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) /* Page-align dumped data */ dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); - /* - * We must use the same mm->flags while dumping core to avoid - * inconsistency between the program headers and bodies, otherwise an - * unusable core file can be generated. - */ - mm_flags = current->mm->flags; - - offset += elf_core_vma_data_size(mm_flags); + offset += elf_core_vma_data_size(cprm->mm_flags); offset += elf_core_extra_data_size(); e_shoff = offset; @@ -1809,7 +1801,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; phdr.p_paddr = 0; - phdr.p_filesz = maydump(vma, mm_flags) ? sz : 0; + phdr.p_filesz = maydump(vma, cprm->mm_flags) ? sz : 0; phdr.p_memsz = sz; offset += phdr.p_filesz; phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; @@ -1847,7 +1839,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit, - mm_flags) < 0) + cprm->mm_flags) < 0) goto end_coredump; if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) diff --git a/fs/exec.c b/fs/exec.c index da2b31dc4e1..89d4080c143 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1748,14 +1748,19 @@ void set_dumpable(struct mm_struct *mm, int value) } } -int get_dumpable(struct mm_struct *mm) +static int __get_dumpable(unsigned long mm_flags) { int ret; - ret = mm->flags & 0x3; + ret = mm_flags & MMF_DUMPABLE_MASK; return (ret >= 2) ? 2 : ret; } +int get_dumpable(struct mm_struct *mm) +{ + return __get_dumpable(mm->flags); +} + static void wait_for_dump_helpers(struct file *file) { struct pipe_inode_info *pipe; @@ -1799,6 +1804,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) .signr = signr, .regs = regs, .limit = rlimit(RLIMIT_CORE), + /* + * We must use the same mm->flags while dumping core to avoid + * inconsistency of bit flags, since this flag is not protected + * by any locks. + */ + .mm_flags = mm->flags, }; audit_core_dumps(signr); @@ -1817,7 +1828,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) /* * If another thread got here first, or we are not dumpable, bail out. */ - if (mm->core_state || !get_dumpable(mm)) { + if (mm->core_state || !__get_dumpable(cprm.mm_flags)) { up_write(&mm->mmap_sem); put_cred(cred); goto fail; @@ -1828,7 +1839,8 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) * process nor do we know its entire history. We only know it * was tainted so we dump it as root in mode 2. */ - if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ + if (__get_dumpable(cprm.mm_flags) == 2) { + /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = 0; /* Dump root private */ } diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 89c6249fc56..c809e286d21 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -74,6 +74,7 @@ struct coredump_params { struct pt_regs *regs; struct file *file; unsigned long limit; + unsigned long mm_flags; }; /* -- cgit v1.2.3-70-g09d2