diff options
Diffstat (limited to 'arch/x86_64/ia32')
-rw-r--r-- | arch/x86_64/ia32/Makefile | 32 | ||||
-rw-r--r-- | arch/x86_64/ia32/fpu32.c | 184 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_aout.c | 529 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_binfmt.c | 434 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_ioctl.c | 201 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_signal.c | 621 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32entry.S | 602 | ||||
-rw-r--r-- | arch/x86_64/ia32/ipc32.c | 57 | ||||
-rw-r--r-- | arch/x86_64/ia32/ptrace32.c | 379 | ||||
-rw-r--r-- | arch/x86_64/ia32/sys_ia32.c | 1050 | ||||
-rw-r--r-- | arch/x86_64/ia32/syscall32.c | 111 | ||||
-rw-r--r-- | arch/x86_64/ia32/tls32.c | 163 | ||||
-rw-r--r-- | arch/x86_64/ia32/vsyscall-sigreturn.S | 120 | ||||
-rw-r--r-- | arch/x86_64/ia32/vsyscall-syscall.S | 68 | ||||
-rw-r--r-- | arch/x86_64/ia32/vsyscall-sysenter.S | 94 | ||||
-rw-r--r-- | arch/x86_64/ia32/vsyscall.lds | 77 |
16 files changed, 4722 insertions, 0 deletions
diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile new file mode 100644 index 00000000000..a12b19da4b5 --- /dev/null +++ b/arch/x86_64/ia32/Makefile @@ -0,0 +1,32 @@ +# +# Makefile for the ia32 kernel emulation subsystem. +# + +obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o \ + ia32_signal.o tls32.o \ + ia32_binfmt.o fpu32.o ptrace32.o syscall32.o + +sysv-$(CONFIG_SYSVIPC) := ipc32.o +obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) + +obj-$(CONFIG_IA32_AOUT) += ia32_aout.o + +$(obj)/syscall32.o: $(src)/syscall32.c \ + $(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so) + +# Teach kbuild about targets +targets := $(foreach F,sysenter syscall,vsyscall-$F.o vsyscall-$F.so) + +# The DSO images are built using a special linker script +quiet_cmd_syscall = SYSCALL $@ + cmd_syscall = $(CC) -m32 -nostdlib -shared -s \ + -Wl,-soname=linux-gate.so.1 -o $@ \ + -Wl,-T,$(filter-out FORCE,$^) + +$(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \ +$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE + $(call if_changed,syscall) + +AFLAGS_vsyscall-sysenter.o = -m32 +AFLAGS_vsyscall-syscall.o = -m32 +CFLAGS_ia32_ioctl.o += -Ifs/ diff --git a/arch/x86_64/ia32/fpu32.c b/arch/x86_64/ia32/fpu32.c new file mode 100644 index 00000000000..1c23095f181 --- /dev/null +++ b/arch/x86_64/ia32/fpu32.c @@ -0,0 +1,184 @@ +/* + * Copyright 2002 Andi Kleen, SuSE Labs. + * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes. + * This is used for ptrace, signals and coredumps in 32bit emulation. + * $Id: fpu32.c,v 1.1 2002/03/21 14:16:32 ak Exp $ + */ + +#include <linux/sched.h> +#include <asm/sigcontext32.h> +#include <asm/processor.h> +#include <asm/uaccess.h> +#include <asm/i387.h> + +static inline unsigned short twd_i387_to_fxsr(unsigned short twd) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; +} + +static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) +{ + struct _fpxreg *st = NULL; + unsigned long tos = (fxsave->swd >> 11) & 7; + unsigned long twd = (unsigned long) fxsave->twd; + unsigned long tag; + unsigned long ret = 0xffff0000; + int i; + +#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); + + for (i = 0 ; i < 8 ; i++) { + if (twd & 0x1) { + st = FPREG_ADDR( fxsave, (i - tos) & 7 ); + + switch (st->exponent & 0x7fff) { + case 0x7fff: + tag = 2; /* Special */ + break; + case 0x0000: + if ( !st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3] ) { + tag = 1; /* Zero */ + } else { + tag = 2; /* Special */ + } + break; + default: + if (st->significand[3] & 0x8000) { + tag = 0; /* Valid */ + } else { + tag = 2; /* Special */ + } + break; + } + } else { + tag = 3; /* Empty */ + } + ret |= (tag << (2 * i)); + twd = twd >> 1; + } + return ret; +} + + +static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave, + struct _fpstate_ia32 __user *buf) +{ + struct _fpxreg *to; + struct _fpreg __user *from; + int i; + u32 v; + int err = 0; + +#define G(num,val) err |= __get_user(val, num + (u32 __user *)buf) + G(0, fxsave->cwd); + G(1, fxsave->swd); + G(2, fxsave->twd); + fxsave->twd = twd_i387_to_fxsr(fxsave->twd); + G(3, fxsave->rip); + G(4, v); + fxsave->fop = v>>16; /* cs ignored */ + G(5, fxsave->rdp); + /* 6: ds ignored */ +#undef G + if (err) + return -1; + + to = (struct _fpxreg *)&fxsave->st_space[0]; + from = &buf->_st[0]; + for (i = 0 ; i < 8 ; i++, to++, from++) { + if (__copy_from_user(to, from, sizeof(*from))) + return -1; + } + return 0; +} + + +static inline int convert_fxsr_to_user(struct _fpstate_ia32 __user *buf, + struct i387_fxsave_struct *fxsave, + struct pt_regs *regs, + struct task_struct *tsk) +{ + struct _fpreg __user *to; + struct _fpxreg *from; + int i; + u16 cs,ds; + int err = 0; + + if (tsk == current) { + /* should be actually ds/cs at fpu exception time, + but that information is not available in 64bit mode. */ + asm("movw %%ds,%0 " : "=r" (ds)); + asm("movw %%cs,%0 " : "=r" (cs)); + } else { /* ptrace. task has stopped. */ + ds = tsk->thread.ds; + cs = regs->cs; + } + +#define P(num,val) err |= __put_user(val, num + (u32 __user *)buf) + P(0, (u32)fxsave->cwd | 0xffff0000); + P(1, (u32)fxsave->swd | 0xffff0000); + P(2, twd_fxsr_to_i387(fxsave)); + P(3, (u32)fxsave->rip); + P(4, cs | ((u32)fxsave->fop) << 16); + P(5, fxsave->rdp); + P(6, 0xffff0000 | ds); +#undef P + + if (err) + return -1; + + to = &buf->_st[0]; + from = (struct _fpxreg *) &fxsave->st_space[0]; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + if (__copy_to_user(to, from, sizeof(*to))) + return -1; + } + return 0; +} + +int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave) +{ + clear_fpu(tsk); + if (!fsave) { + if (__copy_from_user(&tsk->thread.i387.fxsave, + &buf->_fxsr_env[0], + sizeof(struct i387_fxsave_struct))) + return -1; + tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + set_stopped_child_used_math(tsk); + } + return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf); +} + +int save_i387_ia32(struct task_struct *tsk, + struct _fpstate_ia32 __user *buf, + struct pt_regs *regs, + int fsave) +{ + int err = 0; + + init_fpu(tsk); + if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave, regs, tsk)) + return -1; + if (fsave) + return 0; + err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); + if (fsave) + return err ? -1 : 1; + err |= __put_user(X86_FXSR_MAGIC, &buf->magic); + err |= __copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, + sizeof(struct i387_fxsave_struct)); + return err ? -1 : 1; +} diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c new file mode 100644 index 00000000000..1965efc974d --- /dev/null +++ b/arch/x86_64/ia32/ia32_aout.c @@ -0,0 +1,529 @@ +/* + * a.out loader for x86-64 + * + * Copyright (C) 1991, 1992, 1996 Linus Torvalds + * Hacked together by Andi Kleen + */ + +#include <linux/module.h> + +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/a.out.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/slab.h> +#include <linux/binfmts.h> +#include <linux/personality.h> +#include <linux/init.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/pgalloc.h> +#include <asm/cacheflush.h> +#include <asm/user32.h> +#include <asm/ia32.h> + +#undef WARN_OLD +#undef CORE_DUMP /* probably broken */ + +extern int ia32_setup_arg_pages(struct linux_binprm *bprm, + unsigned long stack_top, int exec_stack); + +static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); +static int load_aout_library(struct file*); + +#if CORE_DUMP +static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file); + +/* + * fill in the user structure for a core dump.. + */ +static void dump_thread32(struct pt_regs * regs, struct user32 * dump) +{ + u32 fs,gs; + +/* changed the size calculations - should hopefully work better. lbt */ + dump->magic = CMAGIC; + dump->start_code = 0; + dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1); + dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; + dump->u_dsize -= dump->u_tsize; + dump->u_ssize = 0; + dump->u_debugreg[0] = current->thread.debugreg0; + dump->u_debugreg[1] = current->thread.debugreg1; + dump->u_debugreg[2] = current->thread.debugreg2; + dump->u_debugreg[3] = current->thread.debugreg3; + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + dump->u_debugreg[7] = current->thread.debugreg7; + + if (dump->start_stack < 0xc0000000) + dump->u_ssize = ((unsigned long) (0xc0000000 - dump->start_stack)) >> PAGE_SHIFT; + + dump->regs.ebx = regs->rbx; + dump->regs.ecx = regs->rcx; + dump->regs.edx = regs->rdx; + dump->regs.esi = regs->rsi; + dump->regs.edi = regs->rdi; + dump->regs.ebp = regs->rbp; + dump->regs.eax = regs->rax; + dump->regs.ds = current->thread.ds; + dump->regs.es = current->thread.es; + asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs; + asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs; + dump->regs.orig_eax = regs->orig_rax; + dump->regs.eip = regs->rip; + dump->regs.cs = regs->cs; + dump->regs.eflags = regs->eflags; + dump->regs.esp = regs->rsp; + dump->regs.ss = regs->ss; + +#if 1 /* FIXME */ + dump->u_fpvalid = 0; +#else + dump->u_fpvalid = dump_fpu (regs, &dump->i387); +#endif +} + +#endif + +static struct linux_binfmt aout_format = { + .module = THIS_MODULE, + .load_binary = load_aout_binary, + .load_shlib = load_aout_library, +#if CORE_DUMP + .core_dump = aout_core_dump, +#endif + .min_coredump = PAGE_SIZE +}; + +static void set_brk(unsigned long start, unsigned long end) +{ + start = PAGE_ALIGN(start); + end = PAGE_ALIGN(end); + if (end <= start) + return; + down_write(¤t->mm->mmap_sem); + do_brk(start, end - start); + up_write(¤t->mm->mmap_sem); +} + +#if CORE_DUMP +/* + * These are the only things you should do on a core-file: use only these + * macros to write out all the necessary info. + */ + +static int dump_write(struct file *file, const void *addr, int nr) +{ + return file->f_op->write(file, addr, nr, &file->f_pos) == nr; +} + +#define DUMP_WRITE(addr, nr) \ + if (!dump_write(file, (void *)(addr), (nr))) \ + goto end_coredump; + +#define DUMP_SEEK(offset) \ +if (file->f_op->llseek) { \ + if (file->f_op->llseek(file,(offset),0) != (offset)) \ + goto end_coredump; \ +} else file->f_pos = (offset) + +/* + * Routine writes a core dump image in the current directory. + * Currently only a stub-function. + * + * Note that setuid/setgid files won't make a core-dump if the uid/gid + * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable" + * field, which also makes sure the core-dumps won't be recursive if the + * dumping of the process results in another error.. + */ + +static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file) +{ + mm_segment_t fs; + int has_dumped = 0; + unsigned long dump_start, dump_size; + struct user32 dump; +# define START_DATA(u) (u.u_tsize << PAGE_SHIFT) +# define START_STACK(u) (u.start_stack) + + fs = get_fs(); + set_fs(KERNEL_DS); + has_dumped = 1; + current->flags |= PF_DUMPCORE; + strncpy(dump.u_comm, current->comm, sizeof(current->comm)); + dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump))); + dump.signal = signr; + dump_thread32(regs, &dump); + +/* If the size of the dump file exceeds the rlimit, then see what would happen + if we wrote the stack, but not the data area. */ + if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE > + current->signal->rlim[RLIMIT_CORE].rlim_cur) + dump.u_dsize = 0; + +/* Make sure we have enough room to write the stack and data areas. */ + if ((dump.u_ssize+1) * PAGE_SIZE > + current->signal->rlim[RLIMIT_CORE].rlim_cur) + dump.u_ssize = 0; + +/* make sure we actually have a data and stack area to dump */ + set_fs(USER_DS); + if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) + dump.u_dsize = 0; + if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) + dump.u_ssize = 0; + + set_fs(KERNEL_DS); +/* struct user */ + DUMP_WRITE(&dump,sizeof(dump)); +/* Now dump all of the user data. Include malloced stuff as well */ + DUMP_SEEK(PAGE_SIZE); +/* now we start writing out the user space info */ + set_fs(USER_DS); +/* Dump the data area */ + if (dump.u_dsize != 0) { + dump_start = START_DATA(dump); + dump_size = dump.u_dsize << PAGE_SHIFT; + DUMP_WRITE(dump_start,dump_size); + } +/* Now prepare to dump the stack area */ + if (dump.u_ssize != 0) { + dump_start = START_STACK(dump); + dump_size = dump.u_ssize << PAGE_SHIFT; + DUMP_WRITE(dump_start,dump_size); + } +/* Finally dump the task struct. Not be used by gdb, but could be useful */ + set_fs(KERNEL_DS); + DUMP_WRITE(current,sizeof(*current)); +end_coredump: + set_fs(fs); + return has_dumped; +} +#endif + +/* + * create_aout_tables() parses the env- and arg-strings in new user + * memory and creates the pointer tables from them, and puts their + * addresses on the "stack", returning the new stack pointer value. + */ +static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm) +{ + u32 __user *argv; + u32 __user *envp; + u32 __user *sp; + int argc = bprm->argc; + int envc = bprm->envc; + + sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p); + sp -= envc+1; + envp = sp; + sp -= argc+1; + argv = sp; + put_user((unsigned long) envp,--sp); + put_user((unsigned long) argv,--sp); + put_user(argc,--sp); + current->mm->arg_start = (unsigned long) p; + while (argc-->0) { + char c; + put_user((u32)(unsigned long)p,argv++); + do { + get_user(c,p++); + } while (c); + } + put_user(NULL,argv); + current->mm->arg_end = current->mm->env_start = (unsigned long) p; + while (envc-->0) { + char c; + put_user((u32)(unsigned long)p,envp++); + do { + get_user(c,p++); + } while (c); + } + put_user(NULL,envp); + current->mm->env_end = (unsigned long) p; + return sp; +} + +/* + * These are the functions used to load a.out style executables and shared + * libraries. There is no binary dependent code anywhere else. + */ + +static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) +{ + struct exec ex; + unsigned long error; + unsigned long fd_offset; + unsigned long rlim; + int retval; + + ex = *((struct exec *) bprm->buf); /* exec-header */ + if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && + N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || + N_TRSIZE(ex) || N_DRSIZE(ex) || + i_size_read(bprm->file->f_dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + return -ENOEXEC; + } + + fd_offset = N_TXTOFF(ex); + + /* Check initial limits. This avoids letting people circumvent + * size limits imposed on them by creating programs with large + * arrays in the data or bss. + */ + rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + if (rlim >= RLIM_INFINITY) + rlim = ~0; + if (ex.a_data + ex.a_bss > rlim) + return -ENOMEM; + + /* Flush all traces of the currently running executable */ + retval = flush_old_exec(bprm); + if (retval) + return retval; + + regs->cs = __USER32_CS; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = + regs->r13 = regs->r14 = regs->r15 = 0; + + /* OK, This is the point of no return */ + set_personality(PER_LINUX); + set_thread_flag(TIF_IA32); + clear_thread_flag(TIF_ABI_PENDING); + + current->mm->end_code = ex.a_text + + (current->mm->start_code = N_TXTADDR(ex)); + current->mm->end_data = ex.a_data + + (current->mm->start_data = N_DATADDR(ex)); + current->mm->brk = ex.a_bss + + (current->mm->start_brk = N_BSSADDR(ex)); + current->mm->free_area_cache = TASK_UNMAPPED_BASE; + + set_mm_counter(current->mm, rss, 0); + current->mm->mmap = NULL; + compute_creds(bprm); + current->flags &= ~PF_FORKNOEXEC; + + if (N_MAGIC(ex) == OMAGIC) { + unsigned long text_addr, map_size; + loff_t pos; + + text_addr = N_TXTADDR(ex); + + pos = 32; + map_size = ex.a_text+ex.a_data; + + down_write(¤t->mm->mmap_sem); + error = do_brk(text_addr & PAGE_MASK, map_size); + up_write(¤t->mm->mmap_sem); + + if (error != (text_addr & PAGE_MASK)) { + send_sig(SIGKILL, current, 0); + return error; + } + + error = bprm->file->f_op->read(bprm->file, (char *)text_addr, + ex.a_text+ex.a_data, &pos); + if ((signed long)error < 0) { + send_sig(SIGKILL, current, 0); + return error; + } + + flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); + } else { +#ifdef WARN_OLD + static unsigned long error_time, error_time2; + if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && + (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) + { + printk(KERN_NOTICE "executable not page aligned\n"); + error_time2 = jiffies; + } + + if ((fd_offset & ~PAGE_MASK) != 0 && + (jiffies-error_time) > 5*HZ) + { + printk(KERN_WARNING + "fd_offset is not page aligned. Please convert program: %s\n", + bprm->file->f_dentry->d_name.name); + error_time = jiffies; + } +#endif + + if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { + loff_t pos = fd_offset; + down_write(¤t->mm->mmap_sem); + do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); + up_write(¤t->mm->mmap_sem); + bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex), + ex.a_text+ex.a_data, &pos); + flush_icache_range((unsigned long) N_TXTADDR(ex), + (unsigned long) N_TXTADDR(ex) + + ex.a_text+ex.a_data); + goto beyond_if; + } + + down_write(¤t->mm->mmap_sem); + error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, + PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, + fd_offset); + up_write(¤t->mm->mmap_sem); + + if (error != N_TXTADDR(ex)) { + send_sig(SIGKILL, current, 0); + return error; + } + + down_write(¤t->mm->mmap_sem); + error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, + fd_offset + ex.a_text); + up_write(¤t->mm->mmap_sem); + if (error != N_DATADDR(ex)) { + send_sig(SIGKILL, current, 0); + return error; + } + } +beyond_if: + set_binfmt(&aout_format); + + set_brk(current->mm->start_brk, current->mm->brk); + + retval = ia32_setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); + if (retval < 0) { + /* Someone check-me: is this error path enough? */ + send_sig(SIGKILL, current, 0); + return retval; + } + + current->mm->start_stack = + (unsigned long)create_aout_tables((char __user *)bprm->p, bprm); + /* start thread */ + asm volatile("movl %0,%%fs" :: "r" (0)); \ + asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); + load_gs_index(0); + (regs)->rip = ex.a_entry; + (regs)->rsp = current->mm->start_stack; + (regs)->eflags = 0x200; + (regs)->cs = __USER32_CS; + (regs)->ss = __USER32_DS; + set_fs(USER_DS); + if (unlikely(current->ptrace & PT_PTRACED)) { + if (current->ptrace & PT_TRACE_EXEC) + ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP); + else + send_sig(SIGTRAP, current, 0); + } + return 0; +} + +static int load_aout_library(struct file *file) +{ + struct inode * inode; + unsigned long bss, start_addr, len; + unsigned long error; + int retval; + struct exec ex; + + inode = file->f_dentry->d_inode; + + retval = -ENOEXEC; + error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); + if (error != sizeof(ex)) + goto out; + + /* We come in here for the regular a.out style of shared libraries */ + if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || + N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || + i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + goto out; + } + + if (N_FLAGS(ex)) + goto out; + + /* For QMAGIC, the starting address is 0x20 into the page. We mask + this off to get the starting address for the page */ + + start_addr = ex.a_entry & 0xfffff000; + + if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { + loff_t pos = N_TXTOFF(ex); + +#ifdef WARN_OLD + static unsigned long error_time; + if ((jiffies-error_time) > 5*HZ) + { + printk(KERN_WARNING + "N_TXTOFF is not page aligned. Please convert library: %s\n", + file->f_dentry->d_name.name); + error_time = jiffies; + } +#endif + down_write(¤t->mm->mmap_sem); + do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); + up_write(¤t->mm->mmap_sem); + + file->f_op->read(file, (char *)start_addr, + ex.a_text + ex.a_data, &pos); + flush_icache_range((unsigned long) start_addr, + (unsigned long) start_addr + ex.a_text + ex.a_data); + + retval = 0; + goto out; + } + /* Now use mmap to map the library into memory. */ + down_write(¤t->mm->mmap_sem); + error = do_mmap(file, start_addr, ex.a_text + ex.a_data, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, + N_TXTOFF(ex)); + up_write(¤t->mm->mmap_sem); + retval = error; + if (error != start_addr) + goto out; + + len = PAGE_ALIGN(ex.a_text + ex.a_data); + bss = ex.a_text + ex.a_data + ex.a_bss; + if (bss > len) { + down_write(¤t->mm->mmap_sem); + error = do_brk(start_addr + len, bss - len); + up_write(¤t->mm->mmap_sem); + retval = error; + if (error != start_addr + len) + goto out; + } + retval = 0; +out: + return retval; +} + +static int __init init_aout_binfmt(void) +{ + return register_binfmt(&aout_format); +} + +static void __exit exit_aout_binfmt(void) +{ + unregister_binfmt(&aout_format); +} + +module_init(init_aout_binfmt); +module_exit(exit_aout_binfmt); +MODULE_LICENSE("GPL"); diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c new file mode 100644 index 00000000000..93d568dfa76 --- /dev/null +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -0,0 +1,434 @@ +/* + * Written 2000,2002 by Andi Kleen. + * + * Loosely based on the sparc64 and IA64 32bit emulation loaders. + * This tricks binfmt_elf.c into loading 32bit binaries using lots + * of ugly preprocessor tricks. Talk about very very poor man's inheritance. + */ +#include <linux/types.h> +#include <linux/config.h> +#include <linux/stddef.h> +#include <linux/rwsem.h> +#include <linux/sched.h> +#include <linux/compat.h> +#include <linux/string.h> +#include <linux/binfmts.h> +#include <linux/mm.h> +#include <linux/security.h> + +#include <asm/segment.h> +#include <asm/ptrace.h> +#include <asm/processor.h> +#include <asm/user32.h> +#include <asm/sigcontext32.h> +#include <asm/fpu32.h> +#include <asm/i387.h> +#include <asm/uaccess.h> +#include <asm/ia32.h> +#include <asm/vsyscall32.h> + +#define ELF_NAME "elf/i386" + +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 + +int sysctl_vsyscall32 = 1; + +#define ARCH_DLINFO do { \ + if (sysctl_vsyscall32) { \ + NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \ + } \ +} while(0) + +struct file; +struct elf_phdr; + +#define IA32_EMULATOR 1 + +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_32 + 0x1000000) + +#undef ELF_ARCH +#define ELF_ARCH EM_386 + +#undef ELF_CLASS +#define ELF_CLASS ELFCLASS32 + +#define ELF_DATA ELFDATA2LSB + +#define USE_ELF_CORE_DUMP 1 + +/* Overwrite elfcore.h */ +#define _LINUX_ELFCORE_H 1 +typedef unsigned int elf_greg_t; + +#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +/* + * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out + * extra segments containing the vsyscall DSO contents. Dumping its + * contents makes post-mortem fully interpretable later without matching up + * the same kernel and hardware config to see what PC values meant. + * Dumping its extra ELF program headers includes all the other information + * a debugger needs to easily find how the vsyscall DSO was being used. + */ +#define ELF_CORE_EXTRA_PHDRS (VSYSCALL32_EHDR->e_phnum) +#define ELF_CORE_WRITE_EXTRA_PHDRS \ +do { \ + const struct elf32_phdr *const vsyscall_phdrs = \ + (const struct elf32_phdr *) (VSYSCALL32_BASE \ + + VSYSCALL32_EHDR->e_phoff); \ + int i; \ + Elf32_Off ofs = 0; \ + for (i = 0; i < VSYSCALL32_EHDR->e_phnum; ++i) { \ + struct elf32_phdr phdr = vsyscall_phdrs[i]; \ + if (phdr.p_type == PT_LOAD) { \ + BUG_ON(ofs != 0); \ + ofs = phdr.p_offset = offset; \ + phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); \ + phdr.p_filesz = phdr.p_memsz; \ + offset += phdr.p_filesz; \ + } \ + else \ + phdr.p_offset += ofs; \ + phdr.p_paddr = 0; /* match other core phdrs */ \ + DUMP_WRITE(&phdr, sizeof(phdr)); \ + } \ +} while (0) +#define ELF_CORE_WRITE_EXTRA_DATA \ +do { \ + const struct elf32_phdr *const vsyscall_phdrs = \ + (const struct elf32_phdr *) (VSYSCALL32_BASE \ + + VSYSCALL32_EHDR->e_phoff); \ + int i; \ + for (i = 0; i < VSYSCALL32_EHDR->e_phnum; ++i) { \ + if (vsyscall_phdrs[i].p_type == PT_LOAD) \ + DUMP_WRITE((void *) (u64) vsyscall_phdrs[i].p_vaddr, \ + PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \ + } \ +} while (0) + +struct elf_siginfo +{ + int si_signo; /* signal number */ + int si_code; /* extra code */ + int si_errno; /* errno */ +}; + +#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0) + +struct elf_prstatus +{ + struct elf_siginfo pr_info; /* Info associated with signal */ + short pr_cursig; /* Current signal */ + unsigned int pr_sigpend; /* Set of pending signals */ + unsigned int pr_sighold; /* Set of held signals */ + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct compat_timeval pr_utime; /* User time */ + struct compat_timeval pr_stime; /* System time */ + struct compat_timeval pr_cutime; /* Cumulative user time */ + struct compat_timeval pr_cstime; /* Cumulative system time */ + elf_gregset_t pr_reg; /* GP registers */ + int pr_fpvalid; /* True if math co-processor being used. */ +}; + +#define ELF_PRARGSZ (80) /* Number of chars for args */ + +struct elf_prpsinfo +{ + char pr_state; /* numeric process state */ + char pr_sname; /* char for pr_state */ + char pr_zomb; /* zombie */ + char pr_nice; /* nice val */ + unsigned int pr_flag; /* flags */ + __u16 pr_uid; + __u16 pr_gid; + pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; + /* Lots missing */ + char pr_fname[16]; /* filename of executable */ + char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ +}; + +#define __STR(x) #x +#define STR(x) __STR(x) + +#define _GET_SEG(x) \ + ({ __u32 seg; asm("movl %%" STR(x) ",%0" : "=r"(seg)); seg; }) + +/* Assumes current==process to be dumped */ +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ + pr_reg[0] = regs->rbx; \ + pr_reg[1] = regs->rcx; \ + pr_reg[2] = regs->rdx; \ + pr_reg[3] = regs->rsi; \ + pr_reg[4] = regs->rdi; \ + pr_reg[5] = regs->rbp; \ + pr_reg[6] = regs->rax; \ + pr_reg[7] = _GET_SEG(ds); \ + pr_reg[8] = _GET_SEG(es); \ + pr_reg[9] = _GET_SEG(fs); \ + pr_reg[10] = _GET_SEG(gs); \ + pr_reg[11] = regs->orig_rax; \ + pr_reg[12] = regs->rip; \ + pr_reg[13] = regs->cs; \ + pr_reg[14] = regs->eflags; \ + pr_reg[15] = regs->rsp; \ + pr_reg[16] = regs->ss; + +#define user user32 + +#define __ASM_X86_64_ELF_H 1 +#define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) +//#include <asm/ia32.h> +#include <linux/elf.h> + +typedef struct user_i387_ia32_struct elf_fpregset_t; +typedef struct user32_fxsr_struct elf_fpxregset_t; + + +static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs) +{ + ELF_CORE_COPY_REGS((*elfregs), regs) +} + +static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) +{ + struct pt_regs *pp = (struct pt_regs *)(t->thread.rsp0); + --pp; + ELF_CORE_COPY_REGS((*elfregs), pp); + /* fix wrong segments */ + (*elfregs)[7] = t->thread.ds; + (*elfregs)[9] = t->thread.fsindex; + (*elfregs)[10] = t->thread.gsindex; + (*elfregs)[8] = t->thread.es; + return 1; +} + +static inline int +elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu) +{ + struct _fpstate_ia32 *fpstate = (void*)fpu; + mm_segment_t oldfs = get_fs(); + + if (!tsk_used_math(tsk)) + return 0; + if (!regs) + regs = (struct pt_regs *)tsk->thread.rsp0; + --regs; + if (tsk == current) + unlazy_fpu(tsk); + set_fs(KERNEL_DS); + save_i387_ia32(tsk, fpstate, regs, 1); + /* Correct for i386 bug. It puts the fop into the upper 16bits of + the tag word (like FXSAVE), not into the fcs*/ + fpstate->cssel |= fpstate->tag & 0xffff0000; + set_fs(oldfs); + return 1; +} + +#define ELF_CORE_COPY_XFPREGS 1 +static inline int +elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) +{ + struct pt_regs *regs = ((struct pt_regs *)(t->thread.rsp0))-1; + if (!tsk_used_math(t)) + return 0; + if (t == current) + unlazy_fpu(t); + memcpy(xfpu, &t->thread.i387.fxsave, sizeof(elf_fpxregset_t)); + xfpu->fcs = regs->cs; + xfpu->fos = t->thread.ds; /* right? */ + return 1; +} + +#undef elf_check_arch +#define elf_check_arch(x) \ + ((x)->e_machine == EM_386) + +extern int force_personality32; + +#define ELF_EXEC_PAGESIZE PAGE_SIZE +#define ELF_HWCAP (boot_cpu_data.x86_capability[0]) +#define ELF_PLATFORM ("i686") +#define SET_PERSONALITY(ex, ibcs2) \ +do { \ + unsigned long new_flags = 0; \ + if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ + new_flags = _TIF_IA32; \ + if ((current_thread_info()->flags & _TIF_IA32) \ + != new_flags) \ + set_thread_flag(TIF_ABI_PENDING); \ + else \ + clear_thread_flag(TIF_ABI_PENDING); \ + /* XXX This overwrites the user set personality */ \ + current->personality |= force_personality32; \ +} while (0) + +/* Override some function names */ +#define elf_format elf32_format + +#define init_elf_binfmt init_elf32_binfmt +#define exit_elf_binfmt exit_elf32_binfmt + +#define load_elf_binary load_elf32_binary + +#define ELF_PLAT_INIT(r, load_addr) elf32_init(r) +#define setup_arg_pages(bprm, stack_top, exec_stack) \ + ia32_setup_arg_pages(bprm, stack_top, exec_stack) +int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack); + +#undef start_thread +#define start_thread(regs,new_rip,new_rsp) do { \ + asm volatile("movl %0,%%fs" :: "r" (0)); \ + asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \ + load_gs_index(0); \ + (regs)->rip = (new_rip); \ + (regs)->rsp = (new_rsp); \ + (regs)->eflags = 0x200; \ + (regs)->cs = __USER32_CS; \ + (regs)->ss = __USER32_DS; \ + set_fs(USER_DS); \ +} while(0) + + +#define elf_map elf32_map + +#include <linux/module.h> + +MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries."); +MODULE_AUTHOR("Eric Youngdale, Andi Kleen"); + +#undef MODULE_DESCRIPTION +#undef MODULE_AUTHOR + +#define elf_addr_t __u32 + +#undef TASK_SIZE +#define TASK_SIZE 0xffffffff + +static void elf32_init(struct pt_regs *); + +#include "../../../fs/binfmt_elf.c" + +static void elf32_init(struct pt_regs *regs) +{ + struct task_struct *me = current; + regs->rdi = 0; + regs->rsi = 0; + regs->rdx = 0; + regs->rcx = 0; + regs->rax = 0; + regs->rbx = 0; + regs->rbp = 0; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = + regs->r13 = regs->r14 = regs->r15 = 0; + me->thread.fs = 0; + me->thread.gs = 0; + me->thread.fsindex = 0; + me->thread.gsindex = 0; + me->thread.ds = __USER_DS; + me->thread.es = __USER_DS; +} + +int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) +{ + unsigned long stack_base; + struct vm_area_struct *mpnt; + struct mm_struct *mm = current->mm; + int i, ret; + + stack_base = IA32_STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE; + mm->arg_start = bprm->p + stack_base; + + bprm->p += stack_base; + if (bprm->loader) + bprm->loader += stack_base; + bprm->exec += stack_base; + + mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!mpnt) + return -ENOMEM; + + if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + kmem_cache_free(vm_area_cachep, mpnt); + return -ENOMEM; + } + + memset(mpnt, 0, sizeof(*mpnt)); + + down_write(&mm->mmap_sem); + { + mpnt->vm_mm = mm; + mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; + mpnt->vm_end = IA32_STACK_TOP; + if (executable_stack == EXSTACK_ENABLE_X) + mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; + else if (executable_stack == EXSTACK_DISABLE_X) + mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC; + else + mpnt->vm_flags = VM_STACK_FLAGS; + mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? + PAGE_COPY_EXEC : PAGE_COPY; + if ((ret = insert_vm_struct(mm, mpnt))) { + up_write(&mm->mmap_sem); + kmem_cache_free(vm_area_cachep, mpnt); + return ret; + } + mm->stack_vm = mm->total_vm = vma_pages(mpnt); + } + + for (i = 0 ; i < MAX_ARG_PAGES ; i++) { + struct page *page = bprm->page[i]; + if (page) { + bprm->page[i] = NULL; + install_arg_page(mpnt, page, stack_base); + } + stack_base += PAGE_SIZE; + } + up_write(&mm->mmap_sem); + + return 0; +} + +static unsigned long +elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) +{ + unsigned long map_addr; + struct task_struct *me = current; + + down_write(&me->mm->mmap_sem); + map_addr = do_mmap(filep, ELF_PAGESTART(addr), + eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, + type, + eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); + up_write(&me->mm->mmap_sem); + return(map_addr); +} + +#ifdef CONFIG_SYSCTL +/* Register vsyscall32 into the ABI table */ +#include <linux/sysctl.h> + +static ctl_table abi_table2[] = { + { 99, "vsyscall32", &sysctl_vsyscall32, sizeof(int), 0644, NULL, + proc_dointvec }, + { 0, } +}; + +static ctl_table abi_root_table2[] = { + { .ctl_name = CTL_ABI, .procname = "abi", .mode = 0555, + .child = abi_table2 }, + { 0 }, +}; + +static __init int ia32_binfmt_init(void) +{ + register_sysctl_table(abi_root_table2, 1); + return 0; +} +__initcall(ia32_binfmt_init); +#endif diff --git a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c new file mode 100644 index 00000000000..d259f8a6f81 --- /dev/null +++ b/arch/x86_64/ia32/ia32_ioctl.c @@ -0,0 +1,201 @@ +/* $Id: ia32_ioctl.c,v 1.25 2002/10/11 07:17:06 ak Exp $ + * ioctl32.c: Conversion between 32bit and 64bit native ioctls. + * + * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) + * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs + * + * These routines maintain argument size conversion between 32bit and 64bit + * ioctls. + */ + +#define INCLUDES +#include <linux/syscalls.h> +#include "compat_ioctl.c" +#include <asm/mtrr.h> +#include <asm/ia32.h> + +#define CODE +#include "compat_ioctl.c" + +#ifndef TIOCGDEV +#define TIOCGDEV _IOR('T',0x32, unsigned int) +#endif +static int tiocgdev(unsigned fd, unsigned cmd, unsigned int __user *ptr) +{ + + struct file *file = fget(fd); + struct tty_struct *real_tty; + + if (!file) + return -EBADF; + if (file->f_op->ioctl != tty_ioctl) + return -EINVAL; + real_tty = (struct tty_struct *)file->private_data; + if (!real_tty) + return -EINVAL; + return put_user(new_encode_dev(tty_devnum(real_tty)), ptr); +} + +#define RTC_IRQP_READ32 _IOR('p', 0x0b, unsigned int) /* Read IRQ rate */ +#define RTC_IRQP_SET32 _IOW('p', 0x0c, unsigned int) /* Set IRQ rate */ +#define RTC_EPOCH_READ32 _IOR('p', 0x0d, unsigned) /* Read epoch */ +#define RTC_EPOCH_SET32 _IOW('p', 0x0e, unsigned) /* Set epoch */ + +static int rtc32_ioctl(unsigned fd, unsigned cmd, unsigned long arg) +{ + unsigned long val; + mm_segment_t oldfs = get_fs(); + int ret; + + switch (cmd) { + case RTC_IRQP_READ32: + set_fs(KERNEL_DS); + ret = sys_ioctl(fd, RTC_IRQP_READ, (unsigned long)&val); + set_fs(oldfs); + if (!ret) + ret = put_user(val, (unsigned int __user *) arg); + return ret; + + case RTC_IRQP_SET32: + cmd = RTC_IRQP_SET; + break; + + case RTC_EPOCH_READ32: + set_fs(KERNEL_DS); + ret = sys_ioctl(fd, RTC_EPOCH_READ, (unsigned long) &val); + set_fs(oldfs); + if (!ret) + ret = put_user(val, (unsigned int __user *) arg); + return ret; + + case RTC_EPOCH_SET32: + cmd = RTC_EPOCH_SET; + break; + } + return sys_ioctl(fd,cmd,arg); +} + +/* /proc/mtrr ioctls */ + + +struct mtrr_sentry32 +{ + compat_ulong_t base; /* Base address */ + compat_uint_t size; /* Size of region */ + compat_uint_t type; /* Type of region */ +}; + +struct mtrr_gentry32 +{ + compat_ulong_t regnum; /* Register number */ + compat_uint_t base; /* Base address */ + compat_uint_t size; /* Size of region */ + compat_uint_t type; /* Type of region */ +}; + +#define MTRR_IOCTL_BASE 'M' + +#define MTRRIOC32_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry32) +#define MTRRIOC32_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry32) +#define MTRRIOC32_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry32) +#define MTRRIOC32_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry32) +#define MTRRIOC32_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry32) +#define MTRRIOC32_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry32) +#define MTRRIOC32_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry32) +#define MTRRIOC32_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry32) +#define MTRRIOC32_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry32) +#define MTRRIOC32_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32) + + +static int mtrr_ioctl32(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + struct mtrr_gentry g; + struct mtrr_sentry s; + int get = 0, err = 0; + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)arg; + mm_segment_t oldfs = get_fs(); + + switch (cmd) { +#define SET(x) case MTRRIOC32_ ## x ## _ENTRY: cmd = MTRRIOC_ ## x ## _ENTRY; break +#define GET(x) case MTRRIOC32_ ## x ## _ENTRY: cmd = MTRRIOC_ ## x ## _ENTRY; get=1; break + SET(ADD); + SET(SET); + SET(DEL); + GET(GET); + SET(KILL); + SET(ADD_PAGE); + SET(SET_PAGE); + SET(DEL_PAGE); + GET(GET_PAGE); + SET(KILL_PAGE); + } + + if (get) { + err = get_user(g.regnum, &g32->regnum); + err |= get_user(g.base, &g32->base); + err |= get_user(g.size, &g32->size); + err |= get_user(g.type, &g32->type); + + arg = (unsigned long)&g; + } else { + struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)arg; + err = get_user(s.base, &s32->base); + err |= get_user(s.size, &s32->size); + err |= get_user(s.type, &s32->type); + + arg = (unsigned long)&s; + } + if (err) return err; + + set_fs(KERNEL_DS); + err = sys_ioctl(fd, cmd, arg); + set_fs(oldfs); + + if (!err && get) { + err = put_user(g.base, &g32->base); + err |= put_user(g.size, &g32->size); + err |= put_user(g.regnum, &g32->regnum); + err |= put_user(g.type, &g32->type); + } + return err; +} + +#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler) }, +#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) + +struct ioctl_trans ioctl_start[] = { +#include <linux/compat_ioctl.h> +#define DECLARES +#include "compat_ioctl.c" +COMPATIBLE_IOCTL(HDIO_SET_KEEPSETTINGS) +COMPATIBLE_IOCTL(HDIO_SCAN_HWIF) +COMPATIBLE_IOCTL(BLKRASET) +COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */ +COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */ +COMPATIBLE_IOCTL(FIOQSIZE) + +/* And these ioctls need translation */ +HANDLE_IOCTL(TIOCGDEV, tiocgdev) +/* realtime device */ +HANDLE_IOCTL(RTC_IRQP_READ, rtc32_ioctl) +HANDLE_IOCTL(RTC_IRQP_READ32,rtc32_ioctl) +HANDLE_IOCTL(RTC_IRQP_SET32, rtc32_ioctl) +HANDLE_IOCTL(RTC_EPOCH_READ32, rtc32_ioctl) +HANDLE_IOCTL(RTC_EPOCH_SET32, rtc32_ioctl) +/* take care of sizeof(sizeof()) breakage */ +/* mtrr */ +HANDLE_IOCTL(MTRRIOC32_ADD_ENTRY, mtrr_ioctl32) +HANDLE_IOCTL(MTRRIOC32_SET_ENTRY, mtrr_ioctl32) +HANDLE_IOCTL(MTRRIOC32_DEL_ENTRY, mtrr_ioctl32) +HANDLE_IOCTL(MTRRIOC32_GET_ENTRY, mtrr_ioctl32) +HANDLE_IOCTL(MTRRIOC32_KILL_ENTRY, mtrr_ioctl32) +HANDLE_IOCTL(MTRRIOC32_ADD_PAGE_ENTRY, mtrr_ioctl32) +HANDLE_IOCTL(MTRRIOC32_SET_PAGE_ENTRY, mtrr_ioctl32) +HANDLE_IOCTL(MTRRIOC32_DEL_PAGE_ENTRY, mtrr_ioctl32) +HANDLE_IOCTL(MTRRIOC32_GET_PAGE_ENTRY, mtrr_ioctl32) +HANDLE_IOCTL(MTRRIOC32_KILL_PAGE_ENTRY, mtrr_ioctl32) +}; + +int ioctl_table_size = ARRAY_SIZE(ioctl_start); + diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c new file mode 100644 index 00000000000..fbd09b5126c --- /dev/null +++ b/arch/x86_64/ia32/ia32_signal.c @@ -0,0 +1,621 @@ +/* + * linux/arch/x86_64/ia32/ia32_signal.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes + * 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen + * + * $Id: ia32_signal.c,v 1.22 2002/07/29 10:34:03 ak Exp $ + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/ptrace.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/personality.h> +#include <linux/compat.h> +#include <asm/ucontext.h> +#include <asm/uaccess.h> +#include <asm/i387.h> +#include <asm/ia32.h> +#include <asm/ptrace.h> +#include <asm/ia32_unistd.h> +#include <asm/user32.h> +#include <asm/sigcontext32.h> +#include <asm/fpu32.h> +#include <asm/proto.h> +#include <asm/vsyscall32.h> + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); +void signal_fault(struct pt_regs *regs, void __user *frame, char *where); + +int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +{ + int err; + if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t))) + return -EFAULT; + + /* If you change siginfo_t structure, please make sure that + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + + if (from->si_code < 0) { + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr); + } else { + /* First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) */ + err |= __put_user(from->_sifields._pad[0], &to->_sifields._pad[0]); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + /* FALL THROUGH */ + default: + case __SI_KILL >> 16: + err |= __put_user(from->si_uid, &to->si_uid); + break; + case __SI_POLL >> 16: + err |= __put_user(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user(ptr_to_compat(from->si_ptr), + &to->si_ptr); + break; + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_int, &to->si_int); + break; + } + } + return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + int err; + u32 ptr32; + if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t))) + return -EFAULT; + + err = __get_user(to->si_signo, &from->si_signo); + err |= __get_user(to->si_errno, &from->si_errno); + err |= __get_user(to->si_code, &from->si_code); + + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + err |= __get_user(ptr32, &from->si_ptr); + to->si_ptr = compat_ptr(ptr32); + + return err; +} + +asmlinkage long +sys32_sigsuspend(int history0, int history1, old_sigset_t mask, + struct pt_regs *regs) +{ + sigset_t saveset; + + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sighand->siglock); + saveset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + regs->rax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(regs, &saveset)) + return -EINTR; + } +} + +asmlinkage long +sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, + stack_ia32_t __user *uoss_ptr, + struct pt_regs *regs) +{ + stack_t uss,uoss; + int ret; + mm_segment_t seg; + if (uss_ptr) { + u32 ptr; + memset(&uss,0,sizeof(stack_t)); + if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) || + __get_user(ptr, &uss_ptr->ss_sp) || + __get_user(uss.ss_flags, &uss_ptr->ss_flags) || + __get_user(uss.ss_size, &uss_ptr->ss_size)) + return -EFAULT; + uss.ss_sp = compat_ptr(ptr); + } + seg = get_fs(); + set_fs(KERNEL_DS); + ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->rsp); + set_fs(seg); + if (ret >= 0 && uoss_ptr) { + if (!access_ok(VERIFY_WRITE,uoss_ptr,sizeof(stack_ia32_t)) || + __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || + __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || + __put_user(uoss.ss_size, &uoss_ptr->ss_size)) + ret = -EFAULT; + } + return ret; +} + +/* + * Do a signal return; undo the signal stack. + */ + +struct sigframe +{ + u32 pretcode; + int sig; + struct sigcontext_ia32 sc; + struct _fpstate_ia32 fpstate; + unsigned int extramask[_COMPAT_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe +{ + u32 pretcode; + int sig; + u32 pinfo; + u32 puc; + compat_siginfo_t info; + struct ucontext_ia32 uc; + struct _fpstate_ia32 fpstate; + char retcode[8]; +}; + +static int +ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc, unsigned int *peax) +{ + unsigned int err = 0; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + +#if DEBUG_SIG + printk("SIG restore_sigcontext: sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n", + sc, sc->err, sc->eip, sc->cs, sc->eflags); +#endif +#define COPY(x) { \ + unsigned int reg; \ + err |= __get_user(reg, &sc->e ##x); \ + regs->r ## x = reg; \ +} + +#define RELOAD_SEG(seg,mask) \ + { unsigned int cur; \ + unsigned short pre; \ + err |= __get_user(pre, &sc->seg); \ + asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \ + pre |= mask; \ + if (pre != cur) loadsegment(seg,pre); } + + /* Reload fs and gs if they have changed in the signal handler. + This does not handle long fs/gs base changes in the handler, but + does not clobber them at least in the normal case. */ + + { + unsigned gs, oldgs; + err |= __get_user(gs, &sc->gs); + gs |= 3; + asm("movl %%gs,%0" : "=r" (oldgs)); + if (gs != oldgs) + load_gs_index(gs); + } + RELOAD_SEG(fs,3); + RELOAD_SEG(ds,3); + RELOAD_SEG(es,3); + + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); + COPY(dx); COPY(cx); COPY(ip); + /* Don't touch extended registers */ + + err |= __get_user(regs->cs, &sc->cs); + regs->cs |= 3; + err |= __get_user(regs->ss, &sc->ss); + regs->ss |= 3; + + { + unsigned int tmpflags; + err |= __get_user(tmpflags, &sc->eflags); + regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); + regs->orig_rax = -1; /* disable syscall checks */ + } + + { + u32 tmp; + struct _fpstate_ia32 __user * buf; + err |= __get_user(tmp, &sc->fpstate); + buf = compat_ptr(tmp); + if (buf) { + if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387_ia32(current, buf, 0); + } else { + struct task_struct *me = current; + if (used_math()) { + clear_fpu(me); + clear_used_math(); + } + } + } + + { + u32 tmp; + err |= __get_user(tmp, &sc->eax); + *peax = tmp; + } + return err; + +badframe: + return 1; +} + +asmlinkage long sys32_sigreturn(struct pt_regs *regs) +{ + struct sigframe __user *frame = (struct sigframe __user *)(regs->rsp-8); + sigset_t set; + unsigned int eax; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__get_user(set.sig[0], &frame->sc.oldmask) + || (_COMPAT_NSIG_WORDS > 1 + && __copy_from_user((((char *) &set.sig) + 4), &frame->extramask, + sizeof(frame->extramask)))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (ia32_restore_sigcontext(regs, &frame->sc, &eax)) + goto badframe; + return eax; + +badframe: + signal_fault(regs, frame, "32bit sigreturn"); + return 0; +} + +asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + sigset_t set; + unsigned int eax; + struct pt_regs tregs; + + frame = (struct rt_sigframe __user *)(regs->rsp - 4); + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) + goto badframe; + + tregs = *regs; + if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT) + goto badframe; + + return eax; + +badframe: + signal_fault(regs,frame,"32bit rt sigreturn"); + return 0; +} + +/* + * Set up a signal frame. + */ + +static int +ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate, + struct pt_regs *regs, unsigned int mask) +{ + int tmp, err = 0; + u32 eflags; + + tmp = 0; + __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int __user *)&sc->gs); + __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int __user *)&sc->fs); + __asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int __user *)&sc->ds); + __asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int __user *)&sc->es); + + err |= __put_user((u32)regs->rdi, &sc->edi); + err |= __put_user((u32)regs->rsi, &sc->esi); + err |= __put_user((u32)regs->rbp, &sc->ebp); + err |= __put_user((u32)regs->rsp, &sc->esp); + err |= __put_user((u32)regs->rbx, &sc->ebx); + err |= __put_user((u32)regs->rdx, &sc->edx); + err |= __put_user((u32)regs->rcx, &sc->ecx); + err |= __put_user((u32)regs->rax, &sc->eax); + err |= __put_user((u32)regs->cs, &sc->cs); + err |= __put_user((u32)regs->ss, &sc->ss); + err |= __put_user(current->thread.trap_no, &sc->trapno); + err |= __put_user(current->thread.error_code, &sc->err); + err |= __put_user((u32)regs->rip, &sc->eip); + eflags = regs->eflags; + if (current->ptrace & PT_PTRACED) + eflags &= ~TF_MASK; + err |= __put_user((u32)eflags, &sc->eflags); + err |= __put_user((u32)regs->rsp, &sc->esp_at_signal); + + tmp = save_i387_ia32(current, fpstate, regs, 0); + if (tmp < 0) + err = -EFAULT; + else { + clear_used_math(); + stts(); + err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL), + &sc->fpstate); + } + + /* non-iBCS2 extensions.. */ + err |= __put_user(mask, &sc->oldmask); + err |= __put_user(current->thread.cr2, &sc->cr2); + + return err; +} + +/* + * Determine which stack to use.. + */ +static void __user * +get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +{ + unsigned long rsp; + + /* Default to using normal stack */ + rsp = regs->rsp; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(rsp) == 0) + rsp = current->sas_ss_sp + current->sas_ss_size; + } + + /* This is the legacy signal stack switching. */ + else if ((regs->ss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + rsp = (unsigned long) ka->sa.sa_restorer; + } + + return (void __user *)((rsp - frame_size) & -8UL); +} + +void ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs * regs) +{ + struct sigframe __user *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + { + struct exec_domain *ed = current_thread_info()->exec_domain; + err |= __put_user((ed + && ed->signal_invmap + && sig < 32 + ? ed->signal_invmap[sig] + : sig), + &frame->sig); + } + if (err) + goto give_sigsegv; + + err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs, + set->sig[0]); + if (err) + goto give_sigsegv; + + if (_COMPAT_NSIG_WORDS > 1) { + err |= __copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask)); + } + if (err) + goto give_sigsegv; + + /* Return stub is in 32bit vsyscall page */ + { + void __user *restorer = VSYSCALL32_SIGRETURN; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); + } + /* These are actually not used anymore, but left because some + gdb versions depend on them as a marker. */ + { + /* copy_to_user optimizes that into a single 8 byte store */ + static const struct { + u16 poplmovl; + u32 val; + u16 int80; + u16 pad; + } __attribute__((packed)) code = { + 0xb858, /* popl %eax ; movl $...,%eax */ + __NR_ia32_sigreturn, + 0x80cd, /* int $0x80 */ + 0, + }; + err |= __copy_to_user(frame->retcode, &code, 8); + } + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->rsp = (unsigned long) frame; + regs->rip = (unsigned long) ka->sa.sa_handler; + + asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); + asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); + + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; + + set_fs(USER_DS); + if (regs->eflags & TF_MASK) { + if (current->ptrace & PT_PTRACED) { + ptrace_notify(SIGTRAP); + } else { + regs->eflags &= ~TF_MASK; + } + } + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->rip, frame->pretcode); +#endif + + return; + +give_sigsegv: + force_sigsegv(sig, current); +} + +void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs * regs) +{ + struct rt_sigframe __user *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + { + struct exec_domain *ed = current_thread_info()->exec_domain; + err |= __put_user((ed + && ed->signal_invmap + && sig < 32 + ? ed->signal_invmap[sig] + : sig), + &frame->sig); + } + err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); + err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); + err |= copy_siginfo_to_user32(&frame->info, info); + if (err) + goto give_sigsegv; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->rsp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + + { + void __user *restorer = VSYSCALL32_RTSIGRETURN; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); + } + + /* This is movl $,%eax ; int $0x80 */ + /* Not actually used anymore, but left because some gdb versions + need it. */ + { + /* __copy_to_user optimizes that into a single 8 byte store */ + static const struct { + u8 movl; + u32 val; + u16 int80; + u16 pad; + u8 pad2; + } __attribute__((packed)) code = { + 0xb8, + __NR_ia32_rt_sigreturn, + 0x80cd, + 0, + }; + err |= __copy_to_user(frame->retcode, &code, 8); + } + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->rsp = (unsigned long) frame; + regs->rip = (unsigned long) ka->sa.sa_handler; + + asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); + asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); + + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; + + set_fs(USER_DS); + if (regs->eflags & TF_MASK) { + if (current->ptrace & PT_PTRACED) { + ptrace_notify(SIGTRAP); + } else { + regs->eflags &= ~TF_MASK; + } + } + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->rip, frame->pretcode); +#endif + + return; + +give_sigsegv: + force_sigsegv(sig, current); +} + diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S new file mode 100644 index 00000000000..f3ca0db85b5 --- /dev/null +++ b/arch/x86_64/ia32/ia32entry.S @@ -0,0 +1,602 @@ +/* + * Compatibility mode system call entry point for x86-64. + * + * Copyright 2000-2002 Andi Kleen, SuSE Labs. + */ + +#include <asm/dwarf2.h> +#include <asm/calling.h> +#include <asm/offset.h> +#include <asm/current.h> +#include <asm/errno.h> +#include <asm/ia32_unistd.h> +#include <asm/thread_info.h> +#include <asm/segment.h> +#include <asm/vsyscall32.h> +#include <linux/linkage.h> + + .macro IA32_ARG_FIXUP noebp=0 + movl %edi,%r8d + .if \noebp + .else + movl %ebp,%r9d + .endif + xchg %ecx,%esi + movl %ebx,%edi + movl %edx,%edx /* zero extension */ + .endm + + /* clobbers %eax */ + .macro CLEAR_RREGS + xorl %eax,%eax + movq %rax,R11(%rsp) + movq %rax,R10(%rsp) + movq %rax,R9(%rsp) + movq %rax,R8(%rsp) + .endm + +/* + * 32bit SYSENTER instruction entry. + * + * Arguments: + * %eax System call number. + * %ebx Arg1 + * %ecx Arg2 + * %edx Arg3 + * %esi Arg4 + * %edi Arg5 + * %ebp user stack + * 0(%ebp) Arg6 + * + * Interrupts off. + * + * This is purely a fast path. For anything complicated we use the int 0x80 + * path below. Set up a complete hardware stack frame to share code + * with the int 0x80 path. + */ +ENTRY(ia32_sysenter_target) + CFI_STARTPROC + swapgs + movq %gs:pda_kernelstack, %rsp + addq $(PDA_STACKOFFSET),%rsp + sti + movl %ebp,%ebp /* zero extension */ + pushq $__USER32_DS + pushq %rbp + pushfq + movl $VSYSCALL32_SYSEXIT, %r10d + pushq $__USER32_CS + movl %eax, %eax + pushq %r10 + pushq %rax + cld + SAVE_ARGS 0,0,1 + /* no need to do an access_ok check here because rbp has been + 32bit zero extended */ +1: movl (%rbp),%r9d + .section __ex_table,"a" + .quad 1b,ia32_badarg + .previous + GET_THREAD_INFO(%r10) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + jnz sysenter_tracesys +sysenter_do_call: + cmpl $(IA32_NR_syscalls),%eax + jae ia32_badsys + IA32_ARG_FIXUP 1 + call *ia32_sys_call_table(,%rax,8) + movq %rax,RAX-ARGOFFSET(%rsp) + GET_THREAD_INFO(%r10) + cli + testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) + jnz int_ret_from_sys_call + /* clear IF, that popfq doesn't enable interrupts early */ + andl $~0x200,EFLAGS-R11(%rsp) + RESTORE_ARGS 1,24,1,1,1,1 + popfq + popq %rcx /* User %esp */ + movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ + swapgs + sti /* sti only takes effect after the next instruction */ + /* sysexit */ + .byte 0xf, 0x35 + +sysenter_tracesys: + SAVE_REST + CLEAR_RREGS + movq $-ENOSYS,RAX(%rsp) /* really needed? */ + movq %rsp,%rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter + LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST + movl %ebp, %ebp + /* no need to do an access_ok check here because rbp has been + 32bit zero extended */ +1: movl (%rbp),%r9d + .section __ex_table,"a" + .quad 1b,ia32_badarg + .previous + jmp sysenter_do_call + CFI_ENDPROC + +/* + * 32bit SYSCALL instruction entry. + * + * Arguments: + * %eax System call number. + * %ebx Arg1 + * %ecx return EIP + * %edx Arg3 + * %esi Arg4 + * %edi Arg5 + * %ebp Arg2 [note: not saved in the stack frame, should not be touched] + * %esp user stack + * 0(%esp) Arg6 + * + * Interrupts off. + * + * This is purely a fast path. For anything complicated we use the int 0x80 + * path below. Set up a complete hardware stack frame to share code + * with the int 0x80 path. + */ +ENTRY(ia32_cstar_target) + CFI_STARTPROC + swapgs + movl %esp,%r8d + movq %gs:pda_kernelstack,%rsp + sti + SAVE_ARGS 8,1,1 + movl %eax,%eax /* zero extension */ + movq %rax,ORIG_RAX-ARGOFFSET(%rsp) + movq %rcx,RIP-ARGOFFSET(%rsp) + movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ + movl %ebp,%ecx + movq $__USER32_CS,CS-ARGOFFSET(%rsp) + movq $__USER32_DS,SS-ARGOFFSET(%rsp) + movq %r11,EFLAGS-ARGOFFSET(%rsp) + movq %r8,RSP-ARGOFFSET(%rsp) + /* no need to do an access_ok check here because r8 has been + 32bit zero extended */ + /* hardware stack frame is complete now */ +1: movl (%r8),%r9d + .section __ex_table,"a" + .quad 1b,ia32_badarg + .previous + GET_THREAD_INFO(%r10) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + jnz cstar_tracesys +cstar_do_call: + cmpl $IA32_NR_syscalls,%eax + jae ia32_badsys + IA32_ARG_FIXUP 1 + call *ia32_sys_call_table(,%rax,8) + movq %rax,RAX-ARGOFFSET(%rsp) + GET_THREAD_INFO(%r10) + cli + testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) + jnz int_ret_from_sys_call + RESTORE_ARGS 1,-ARG_SKIP,1,1,1 + movl RIP-ARGOFFSET(%rsp),%ecx + movl EFLAGS-ARGOFFSET(%rsp),%r11d + movl RSP-ARGOFFSET(%rsp),%esp + swapgs + sysretl + +cstar_tracesys: + SAVE_REST + CLEAR_RREGS + movq $-ENOSYS,RAX(%rsp) /* really needed? */ + movq %rsp,%rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter + LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST + movl RSP-ARGOFFSET(%rsp), %r8d + /* no need to do an access_ok check here because r8 has been + 32bit zero extended */ +1: movl (%r8),%r9d + .section __ex_table,"a" + .quad 1b,ia32_badarg + .previous + jmp cstar_do_call + +ia32_badarg: + movq $-EFAULT,%rax + jmp ia32_sysret + CFI_ENDPROC + +/* + * Emulated IA32 system calls via int 0x80. + * + * Arguments: + * %eax System call number. + * %ebx Arg1 + * %ecx Arg2 + * %edx Arg3 + * %esi Arg4 + * %edi Arg5 + * %ebp Arg6 [note: not saved in the stack frame, should not be touched] + * + * Notes: + * Uses the same stack frame as the x86-64 version. + * All registers except %eax must be saved (but ptrace may violate that) + * Arguments are zero extended. For system calls that want sign extension and + * take long arguments a wrapper is needed. Most calls can just be called + * directly. + * Assumes it is only called from user space and entered with interrupts off. + */ + +ENTRY(ia32_syscall) + CFI_STARTPROC + swapgs + sti + movl %eax,%eax + pushq %rax + cld + /* note the registers are not zero extended to the sf. + this could be a problem. */ + SAVE_ARGS 0,0,1 + GET_THREAD_INFO(%r10) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + jnz ia32_tracesys +ia32_do_syscall: + cmpl $(IA32_NR_syscalls),%eax + jae ia32_badsys + IA32_ARG_FIXUP + call *ia32_sys_call_table(,%rax,8) # xxx: rip relative +ia32_sysret: + movq %rax,RAX-ARGOFFSET(%rsp) + jmp int_ret_from_sys_call + +ia32_tracesys: + SAVE_REST + movq $-ENOSYS,RAX(%rsp) /* really needed? */ + movq %rsp,%rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter + LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST + jmp ia32_do_syscall + +ia32_badsys: + movq $0,ORIG_RAX-ARGOFFSET(%rsp) + movq $-ENOSYS,RAX-ARGOFFSET(%rsp) + jmp int_ret_from_sys_call + +ni_syscall: + movq %rax,%rdi + jmp sys32_ni_syscall + +quiet_ni_syscall: + movq $-ENOSYS,%rax + ret + CFI_ENDPROC + + .macro PTREGSCALL label, func, arg + .globl \label +\label: + leaq \func(%rip),%rax + leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ + jmp ia32_ptregs_common + .endm + + PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi + PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi + PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx + PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx + PTREGSCALL stub32_execve, sys32_execve, %rcx + PTREGSCALL stub32_fork, sys_fork, %rdi + PTREGSCALL stub32_clone, sys32_clone, %rdx + PTREGSCALL stub32_vfork, sys_vfork, %rdi + PTREGSCALL stub32_iopl, sys_iopl, %rsi + PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx + +ENTRY(ia32_ptregs_common) + CFI_STARTPROC + popq %r11 + SAVE_REST + call *%rax + RESTORE_REST + jmp ia32_sysret /* misbalances the return cache */ + CFI_ENDPROC + + .data + .align 8 + .globl ia32_sys_call_table +ia32_sys_call_table: + .quad sys_restart_syscall + .quad sys_exit + .quad stub32_fork + .quad sys_read + .quad sys_write + .quad sys32_open /* 5 */ + .quad sys_close + .quad sys32_waitpid + .quad sys_creat + .quad sys_link + .quad sys_unlink /* 10 */ + .quad stub32_execve + .quad sys_chdir + .quad compat_sys_time + .quad sys_mknod + .quad sys_chmod /* 15 */ + .quad sys_lchown16 + .quad quiet_ni_syscall /* old break syscall holder */ + .quad sys_stat + .quad sys32_lseek + .quad sys_getpid /* 20 */ + .quad compat_sys_mount /* mount */ + .quad sys_oldumount /* old_umount */ + .quad sys_setuid16 + .quad sys_getuid16 + .quad compat_sys_stime /* stime */ /* 25 */ + .quad sys32_ptrace /* ptrace */ + .quad sys_alarm + .quad sys_fstat /* (old)fstat */ + .quad sys_pause + .quad compat_sys_utime /* 30 */ + .quad quiet_ni_syscall /* old stty syscall holder */ + .quad quiet_ni_syscall /* old gtty syscall holder */ + .quad sys_access + .quad sys_nice + .quad quiet_ni_syscall /* 35 */ /* old ftime syscall holder */ + .quad sys_sync + .quad sys32_kill + .quad sys_rename + .quad sys_mkdir + .quad sys_rmdir /* 40 */ + .quad sys_dup + .quad sys32_pipe + .quad compat_sys_times + .quad quiet_ni_syscall /* old prof syscall holder */ + .quad sys_brk /* 45 */ + .quad sys_setgid16 + .quad sys_getgid16 + .quad sys_signal + .quad sys_geteuid16 + .quad sys_getegid16 /* 50 */ + .quad sys_acct + .quad sys_umount /* new_umount */ + .quad quiet_ni_syscall /* old lock syscall holder */ + .quad compat_sys_ioctl + .quad compat_sys_fcntl64 /* 55 */ + .quad quiet_ni_syscall /* old mpx syscall holder */ + .quad sys_setpgid + .quad quiet_ni_syscall /* old ulimit syscall holder */ + .quad sys32_olduname + .quad sys_umask /* 60 */ + .quad sys_chroot + .quad sys32_ustat + .quad sys_dup2 + .quad sys_getppid + .quad sys_getpgrp /* 65 */ + .quad sys_setsid + .quad sys32_sigaction + .quad sys_sgetmask + .quad sys_ssetmask + .quad sys_setreuid16 /* 70 */ + .quad sys_setregid16 + .quad stub32_sigsuspend + .quad compat_sys_sigpending + .quad sys_sethostname + .quad compat_sys_setrlimit /* 75 */ + .quad compat_sys_old_getrlimit /* old_getrlimit */ + .quad compat_sys_getrusage + .quad sys32_gettimeofday + .quad sys32_settimeofday + .quad sys_getgroups16 /* 80 */ + .quad sys_setgroups16 + .quad sys32_old_select + .quad sys_symlink + .quad sys_lstat + .quad sys_readlink /* 85 */ +#ifdef CONFIG_IA32_AOUT + .quad sys_uselib +#else + .quad quiet_ni_syscall +#endif + .quad sys_swapon + .quad sys_reboot + .quad compat_sys_old_readdir + .quad sys32_mmap /* 90 */ + .quad sys_munmap + .quad sys_truncate + .quad sys_ftruncate + .quad sys_fchmod + .quad sys_fchown16 /* 95 */ + .quad sys_getpriority + .quad sys_setpriority + .quad quiet_ni_syscall /* old profil syscall holder */ + .quad compat_sys_statfs + .quad compat_sys_fstatfs /* 100 */ + .quad sys_ioperm + .quad compat_sys_socketcall + .quad sys_syslog + .quad compat_sys_setitimer + .quad compat_sys_getitimer /* 105 */ + .quad compat_sys_newstat + .quad compat_sys_newlstat + .quad compat_sys_newfstat + .quad sys32_uname + .quad stub32_iopl /* 110 */ + .quad sys_vhangup + .quad quiet_ni_syscall /* old "idle" system call */ + .quad sys32_vm86_warning /* vm86old */ + .quad compat_sys_wait4 + .quad sys_swapoff /* 115 */ + .quad sys32_sysinfo + .quad sys32_ipc + .quad sys_fsync + .quad stub32_sigreturn + .quad stub32_clone /* 120 */ + .quad sys_setdomainname + .quad sys_uname + .quad sys_modify_ldt + .quad sys32_adjtimex + .quad sys32_mprotect /* 125 */ + .quad compat_sys_sigprocmask + .quad quiet_ni_syscall /* create_module */ + .quad sys_init_module + .quad sys_delete_module + .quad quiet_ni_syscall /* 130 get_kernel_syms */ + .quad sys_quotactl + .quad sys_getpgid + .quad sys_fchdir + .quad quiet_ni_syscall /* bdflush */ + .quad sys_sysfs /* 135 */ + .quad sys_personality + .quad quiet_ni_syscall /* for afs_syscall */ + .quad sys_setfsuid16 + .quad sys_setfsgid16 + .quad sys_llseek /* 140 */ + .quad compat_sys_getdents + .quad compat_sys_select + .quad sys_flock + .quad sys_msync + .quad compat_sys_readv /* 145 */ + .quad compat_sys_writev + .quad sys_getsid + .quad sys_fdatasync + .quad sys32_sysctl /* sysctl */ + .quad sys_mlock /* 150 */ + .quad sys_munlock + .quad sys_mlockall + .quad sys_munlockall + .quad sys_sched_setparam + .quad sys_sched_getparam /* 155 */ + .quad sys_sched_setscheduler + .quad sys_sched_getscheduler + .quad sys_sched_yield + .quad sys_sched_get_priority_max + .quad sys_sched_get_priority_min /* 160 */ + .quad sys_sched_rr_get_interval + .quad compat_sys_nanosleep + .quad sys_mremap + .quad sys_setresuid16 + .quad sys_getresuid16 /* 165 */ + .quad sys32_vm86_warning /* vm86 */ + .quad quiet_ni_syscall /* query_module */ + .quad sys_poll + .quad compat_sys_nfsservctl + .quad sys_setresgid16 /* 170 */ + .quad sys_getresgid16 + .quad sys_prctl + .quad stub32_rt_sigreturn + .quad sys32_rt_sigaction + .quad sys32_rt_sigprocmask /* 175 */ + .quad sys32_rt_sigpending + .quad compat_sys_rt_sigtimedwait + .quad sys32_rt_sigqueueinfo + .quad stub32_rt_sigsuspend + .quad sys32_pread /* 180 */ + .quad sys32_pwrite + .quad sys_chown16 + .quad sys_getcwd + .quad sys_capget + .quad sys_capset + .quad stub32_sigaltstack + .quad sys32_sendfile + .quad quiet_ni_syscall /* streams1 */ + .quad quiet_ni_syscall /* streams2 */ + .quad stub32_vfork /* 190 */ + .quad compat_sys_getrlimit + .quad sys32_mmap2 + .quad sys32_truncate64 + .quad sys32_ftruncate64 + .quad sys32_stat64 /* 195 */ + .quad sys32_lstat64 + .quad sys32_fstat64 + .quad sys_lchown + .quad sys_getuid + .quad sys_getgid /* 200 */ + .quad sys_geteuid + .quad sys_getegid + .quad sys_setreuid + .quad sys_setregid + .quad sys_getgroups /* 205 */ + .quad sys_setgroups + .quad sys_fchown + .quad sys_setresuid + .quad sys_getresuid + .quad sys_setresgid /* 210 */ + .quad sys_getresgid + .quad sys_chown + .quad sys_setuid + .quad sys_setgid + .quad sys_setfsuid /* 215 */ + .quad sys_setfsgid + .quad sys_pivot_root + .quad sys_mincore + .quad sys_madvise + .quad compat_sys_getdents64 /* 220 getdents64 */ + .quad compat_sys_fcntl64 + .quad quiet_ni_syscall /* tux */ + .quad quiet_ni_syscall /* security */ + .quad sys_gettid + .quad sys_readahead /* 225 */ + .quad sys_setxattr + .quad sys_lsetxattr + .quad sys_fsetxattr + .quad sys_getxattr + .quad sys_lgetxattr /* 230 */ + .quad sys_fgetxattr + .quad sys_listxattr + .quad sys_llistxattr + .quad sys_flistxattr + .quad sys_removexattr /* 235 */ + .quad sys_lremovexattr + .quad sys_fremovexattr + .quad sys_tkill + .quad sys_sendfile64 + .quad compat_sys_futex /* 240 */ + .quad compat_sys_sched_setaffinity + .quad compat_sys_sched_getaffinity + .quad sys32_set_thread_area + .quad sys32_get_thread_area + .quad compat_sys_io_setup /* 245 */ + .quad sys_io_destroy + .quad compat_sys_io_getevents + .quad compat_sys_io_submit + .quad sys_io_cancel + .quad sys_fadvise64 /* 250 */ + .quad quiet_ni_syscall /* free_huge_pages */ + .quad sys_exit_group + .quad sys32_lookup_dcookie + .quad sys_epoll_create + .quad sys_epoll_ctl /* 255 */ + .quad sys_epoll_wait + .quad sys_remap_file_pages + .quad sys_set_tid_address + .quad sys32_timer_create + .quad compat_sys_timer_settime /* 260 */ + .quad compat_sys_timer_gettime + .quad sys_timer_getoverrun + .quad sys_timer_delete + .quad compat_sys_clock_settime + .quad compat_sys_clock_gettime /* 265 */ + .quad compat_sys_clock_getres + .quad compat_sys_clock_nanosleep + .quad compat_sys_statfs64 + .quad compat_sys_fstatfs64 + .quad sys_tgkill /* 270 */ + .quad compat_sys_utimes + .quad sys32_fadvise64_64 + .quad quiet_ni_syscall /* sys_vserver */ + .quad sys_mbind + .quad compat_sys_get_mempolicy /* 275 */ + .quad sys_set_mempolicy + .quad compat_sys_mq_open + .quad sys_mq_unlink + .quad compat_sys_mq_timedsend + .quad compat_sys_mq_timedreceive /* 280 */ + .quad compat_sys_mq_notify + .quad compat_sys_mq_getsetattr + .quad quiet_ni_syscall /* reserved for kexec */ + .quad compat_sys_waitid + .quad quiet_ni_syscall /* sys_altroot */ + .quad sys_add_key + .quad sys_request_key + .quad sys_keyctl + /* don't forget to change IA32_NR_syscalls */ +ia32_syscall_end: + .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 + .quad ni_syscall + .endr diff --git a/arch/x86_64/ia32/ipc32.c b/arch/x86_64/ia32/ipc32.c new file mode 100644 index 00000000000..369151dc321 --- /dev/null +++ b/arch/x86_64/ia32/ipc32.c @@ -0,0 +1,57 @@ +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/syscalls.h> +#include <linux/time.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/shm.h> +#include <linux/ipc.h> +#include <linux/compat.h> + +#include <asm-i386/ipc.h> + +asmlinkage long +sys32_ipc(u32 call, int first, int second, int third, + compat_uptr_t ptr, u32 fifth) +{ + int version; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMOP: + /* struct sembuf is the same on 32 and 64bit :)) */ + return sys_semtimedop(first, compat_ptr(ptr), second, NULL); + case SEMTIMEDOP: + return compat_sys_semtimedop(first, compat_ptr(ptr), second, + compat_ptr(fifth)); + case SEMGET: + return sys_semget(first, second, third); + case SEMCTL: + return compat_sys_semctl(first, second, third, compat_ptr(ptr)); + + case MSGSND: + return compat_sys_msgsnd(first, second, third, compat_ptr(ptr)); + case MSGRCV: + return compat_sys_msgrcv(first, second, fifth, third, + version, compat_ptr(ptr)); + case MSGGET: + return sys_msgget((key_t) first, second); + case MSGCTL: + return compat_sys_msgctl(first, second, compat_ptr(ptr)); + + case SHMAT: + return compat_sys_shmat(first, second, third, version, + compat_ptr(ptr)); + break; + case SHMDT: + return sys_shmdt(compat_ptr(ptr)); + case SHMGET: + return sys_shmget(first, (unsigned)second, third); + case SHMCTL: + return compat_sys_shmctl(first, second, compat_ptr(ptr)); + } + return -ENOSYS; +} diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c new file mode 100644 index 00000000000..b98b6d2462f --- /dev/null +++ b/arch/x86_64/ia32/ptrace32.c @@ -0,0 +1,379 @@ +/* + * 32bit ptrace for x86-64. + * + * Copyright 2001,2002 Andi Kleen, SuSE Labs. + * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier + * copyright. + * + * This allows to access 64bit processes too; but there is no way to see the extended + * register contents. + * + * $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $ + */ + +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/unistd.h> +#include <linux/mm.h> +#include <linux/ptrace.h> +#include <asm/ptrace.h> +#include <asm/compat.h> +#include <asm/uaccess.h> +#include <asm/user32.h> +#include <asm/user.h> +#include <asm/errno.h> +#include <asm/debugreg.h> +#include <asm/i387.h> +#include <asm/fpu32.h> + +/* determines which flags the user has access to. */ +/* 1 = access 0 = no access */ +#define FLAG_MASK 0x44dd5UL + +#define R32(l,q) \ + case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break + +static int putreg32(struct task_struct *child, unsigned regno, u32 val) +{ + int i; + __u64 *stack = (__u64 *)(child->thread.rsp0 - sizeof(struct pt_regs)); + + switch (regno) { + case offsetof(struct user32, regs.fs): + if (val && (val & 3) != 3) return -EIO; + child->thread.fs = val & 0xffff; + break; + case offsetof(struct user32, regs.gs): + if (val && (val & 3) != 3) return -EIO; + child->thread.gs = val & 0xffff; + break; + case offsetof(struct user32, regs.ds): + if (val && (val & 3) != 3) return -EIO; + child->thread.ds = val & 0xffff; + break; + case offsetof(struct user32, regs.es): + child->thread.es = val & 0xffff; + break; + case offsetof(struct user32, regs.ss): + if ((val & 3) != 3) return -EIO; + stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff; + break; + case offsetof(struct user32, regs.cs): + if ((val & 3) != 3) return -EIO; + stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff; + break; + + R32(ebx, rbx); + R32(ecx, rcx); + R32(edx, rdx); + R32(edi, rdi); + R32(esi, rsi); + R32(ebp, rbp); + R32(eax, rax); + R32(orig_eax, orig_rax); + R32(eip, rip); + R32(esp, rsp); + + case offsetof(struct user32, regs.eflags): { + __u64 *flags = &stack[offsetof(struct pt_regs, eflags)/8]; + val &= FLAG_MASK; + *flags = val | (*flags & ~FLAG_MASK); + break; + } + + case offsetof(struct user32, u_debugreg[4]): + case offsetof(struct user32, u_debugreg[5]): + return -EIO; + + case offsetof(struct user32, u_debugreg[0]): + child->thread.debugreg0 = val; + break; + + case offsetof(struct user32, u_debugreg[1]): + child->thread.debugreg1 = val; + break; + + case offsetof(struct user32, u_debugreg[2]): + child->thread.debugreg2 = val; + break; + + case offsetof(struct user32, u_debugreg[3]): + child->thread.debugreg3 = val; + break; + + case offsetof(struct user32, u_debugreg[6]): + child->thread.debugreg6 = val; + break; + + case offsetof(struct user32, u_debugreg[7]): + val &= ~DR_CONTROL_RESERVED; + /* See arch/i386/kernel/ptrace.c for an explanation of + * this awkward check.*/ + for(i=0; i<4; i++) + if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1) + return -EIO; + child->thread.debugreg7 = val; + break; + + default: + if (regno > sizeof(struct user32) || (regno & 3)) + return -EIO; + + /* Other dummy fields in the virtual user structure are ignored */ + break; + } + return 0; +} + +#undef R32 + +#define R32(l,q) \ + case offsetof(struct user32, regs.l): *val = stack[offsetof(struct pt_regs, q)/8]; break + +static int getreg32(struct task_struct *child, unsigned regno, u32 *val) +{ + __u64 *stack = (__u64 *)(child->thread.rsp0 - sizeof(struct pt_regs)); + + switch (regno) { + case offsetof(struct user32, regs.fs): + *val = child->thread.fs; + break; + case offsetof(struct user32, regs.gs): + *val = child->thread.gs; + break; + case offsetof(struct user32, regs.ds): + *val = child->thread.ds; + break; + case offsetof(struct user32, regs.es): + *val = child->thread.es; + break; + + R32(cs, cs); + R32(ss, ss); + R32(ebx, rbx); + R32(ecx, rcx); + R32(edx, rdx); + R32(edi, rdi); + R32(esi, rsi); + R32(ebp, rbp); + R32(eax, rax); + R32(orig_eax, orig_rax); + R32(eip, rip); + R32(eflags, eflags); + R32(esp, rsp); + + case offsetof(struct user32, u_debugreg[0]): + *val = child->thread.debugreg0; + break; + case offsetof(struct user32, u_debugreg[1]): + *val = child->thread.debugreg1; + break; + case offsetof(struct user32, u_debugreg[2]): + *val = child->thread.debugreg2; + break; + case offsetof(struct user32, u_debugreg[3]): + *val = child->thread.debugreg3; + break; + case offsetof(struct user32, u_debugreg[6]): + *val = child->thread.debugreg6; + break; + case offsetof(struct user32, u_debugreg[7]): + *val = child->thread.debugreg7; + break; + + default: + if (regno > sizeof(struct user32) || (regno & 3)) + return -EIO; + + /* Other dummy fields in the virtual user structure are ignored */ + *val = 0; + break; + } + return 0; +} + +#undef R32 + +static struct task_struct *find_target(int request, int pid, int *err) +{ + struct task_struct *child; + + *err = -EPERM; + if (pid == 1) + return NULL; + + *err = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (child) { + *err = -EPERM; + if (child->pid == 1) + goto out; + *err = ptrace_check_attach(child, request == PTRACE_KILL); + if (*err < 0) + goto out; + return child; + } + out: + if (child) + put_task_struct(child); + return NULL; + +} + +asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) +{ + struct task_struct *child; + struct pt_regs *childregs; + void __user *datap = compat_ptr(data); + int ret; + __u32 val; + + switch (request) { + default: + return sys_ptrace(request, pid, addr, data); + + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + case PTRACE_POKEDATA: + case PTRACE_POKETEXT: + case PTRACE_POKEUSR: + case PTRACE_PEEKUSR: + case PTRACE_GETREGS: + case PTRACE_SETREGS: + case PTRACE_SETFPREGS: + case PTRACE_GETFPREGS: + case PTRACE_SETFPXREGS: + case PTRACE_GETFPXREGS: + case PTRACE_GETEVENTMSG: + break; + } + + child = find_target(request, pid, &ret); + if (!child) + return ret; + + childregs = (struct pt_regs *)(child->thread.rsp0 - sizeof(struct pt_regs)); + + switch (request) { + case PTRACE_PEEKDATA: + case PTRACE_PEEKTEXT: + ret = 0; + if (access_process_vm(child, addr, &val, sizeof(u32), 0)!=sizeof(u32)) + ret = -EIO; + else + ret = put_user(val, (unsigned int __user *)datap); + break; + + case PTRACE_POKEDATA: + case PTRACE_POKETEXT: + ret = 0; + if (access_process_vm(child, addr, &data, sizeof(u32), 1)!=sizeof(u32)) + ret = -EIO; + break; + + case PTRACE_PEEKUSR: + ret = getreg32(child, addr, &val); + if (ret == 0) + ret = put_user(val, (__u32 __user *)datap); + break; + + case PTRACE_POKEUSR: + ret = putreg32(child, addr, data); + break; + + case PTRACE_GETREGS: { /* Get all gp regs from the child. */ + int i; + if (!access_ok(VERIFY_WRITE, datap, 16*4)) { + ret = -EIO; + break; + } + ret = 0; + for ( i = 0; i <= 16*4 ; i += sizeof(__u32) ) { + getreg32(child, i, &val); + ret |= __put_user(val,(u32 __user *)datap); + datap += sizeof(u32); + } + break; + } + + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp; + int i; + if (!access_ok(VERIFY_READ, datap, 16*4)) { + ret = -EIO; + break; + } + ret = 0; + for ( i = 0; i <= 16*4; i += sizeof(u32) ) { + ret |= __get_user(tmp, (u32 __user *)datap); + putreg32(child, i, tmp); + datap += sizeof(u32); + } + break; + } + + case PTRACE_GETFPREGS: + ret = -EIO; + if (!access_ok(VERIFY_READ, compat_ptr(data), + sizeof(struct user_i387_struct))) + break; + save_i387_ia32(child, datap, childregs, 1); + ret = 0; + break; + + case PTRACE_SETFPREGS: + ret = -EIO; + if (!access_ok(VERIFY_WRITE, datap, + sizeof(struct user_i387_struct))) + break; + ret = 0; + /* don't check EFAULT to be bug-to-bug compatible to i386 */ + restore_i387_ia32(child, datap, 1); + break; + + case PTRACE_GETFPXREGS: { + struct user32_fxsr_struct __user *u = datap; + init_fpu(child); + ret = -EIO; + if (!access_ok(VERIFY_WRITE, u, sizeof(*u))) + break; + ret = -EFAULT; + if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u))) + break; + ret = __put_user(childregs->cs, &u->fcs); + ret |= __put_user(child->thread.ds, &u->fos); + break; + } + case PTRACE_SETFPXREGS: { + struct user32_fxsr_struct __user *u = datap; + unlazy_fpu(child); + ret = -EIO; + if (!access_ok(VERIFY_READ, u, sizeof(*u))) + break; + /* no checking to be bug-to-bug compatible with i386 */ + __copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)); + set_stopped_child_used_math(child); + child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + ret = 0; + break; + } + + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message,(unsigned int __user *)compat_ptr(data)); + break; + + default: + ret = -EINVAL; + break; + } + + put_task_struct(child); + return ret; +} + diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c new file mode 100644 index 00000000000..68a9ab06ee7 --- /dev/null +++ b/arch/x86_64/ia32/sys_ia32.c @@ -0,0 +1,1050 @@ +/* + * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on + * sys_sparc32 + * + * Copyright (C) 2000 VA Linux Co + * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 2000 Hewlett-Packard Co. + * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port) + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. In 2.5 most of this should be moved to a generic directory. + * + * This file assumes that there is a hole at the end of user address space. + * + * Some of the functions are LE specific currently. These are hopefully all marked. + * This should be fixed. + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/signal.h> +#include <linux/syscalls.h> +#include <linux/resource.h> +#include <linux/times.h> +#include <linux/utsname.h> +#include <linux/timex.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/mm.h> +#include <linux/shm.h> +#include <linux/slab.h> +#include <linux/uio.h> +#include <linux/nfs_fs.h> +#include <linux/quota.h> +#include <linux/module.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> +#include <linux/nfsd/syscall.h> +#include <linux/poll.h> +#include <linux/personality.h> +#include <linux/stat.h> +#include <linux/ipc.h> +#include <linux/rwsem.h> +#include <linux/binfmts.h> +#include <linux/init.h> +#include <linux/aio_abi.h> +#include <linux/aio.h> +#include <linux/compat.h> +#include <linux/vfs.h> +#include <linux/ptrace.h> +#include <linux/highuid.h> +#include <linux/vmalloc.h> +#include <asm/mman.h> +#include <asm/types.h> +#include <asm/uaccess.h> +#include <asm/semaphore.h> +#include <asm/atomic.h> +#include <asm/ldt.h> + +#include <net/scm.h> +#include <net/sock.h> +#include <asm/ia32.h> + +#define AA(__x) ((unsigned long)(__x)) + +int cp_compat_stat(struct kstat *kbuf, struct compat_stat __user *ubuf) +{ + typeof(ubuf->st_uid) uid = 0; + typeof(ubuf->st_gid) gid = 0; + SET_UID(uid, kbuf->uid); + SET_GID(gid, kbuf->gid); + if (!old_valid_dev(kbuf->dev) || !old_valid_dev(kbuf->rdev)) + return -EOVERFLOW; + if (kbuf->size >= 0x7fffffff) + return -EOVERFLOW; + if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) || + __put_user (old_encode_dev(kbuf->dev), &ubuf->st_dev) || + __put_user (kbuf->ino, &ubuf->st_ino) || + __put_user (kbuf->mode, &ubuf->st_mode) || + __put_user (kbuf->nlink, &ubuf->st_nlink) || + __put_user (uid, &ubuf->st_uid) || + __put_user (gid, &ubuf->st_gid) || + __put_user (old_encode_dev(kbuf->rdev), &ubuf->st_rdev) || + __put_user (kbuf->size, &ubuf->st_size) || + __put_user (kbuf->atime.tv_sec, &ubuf->st_atime) || + __put_user (kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) || + __put_user (kbuf->mtime.tv_sec, &ubuf->st_mtime) || + __put_user (kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) || + __put_user (kbuf->ctime.tv_sec, &ubuf->st_ctime) || + __put_user (kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) || + __put_user (kbuf->blksize, &ubuf->st_blksize) || + __put_user (kbuf->blocks, &ubuf->st_blocks)) + return -EFAULT; + return 0; +} + +asmlinkage long +sys32_truncate64(char __user * filename, unsigned long offset_low, unsigned long offset_high) +{ + return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); +} + +asmlinkage long +sys32_ftruncate64(unsigned int fd, unsigned long offset_low, unsigned long offset_high) +{ + return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); +} + +/* Another set for IA32/LFS -- x86_64 struct stat is different due to + support for 64bit inode numbers. */ + +static int +cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) +{ + typeof(ubuf->st_uid) uid = 0; + typeof(ubuf->st_gid) gid = 0; + SET_UID(uid, stat->uid); + SET_GID(gid, stat->gid); + if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) || + __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) || + __put_user (stat->ino, &ubuf->__st_ino) || + __put_user (stat->ino, &ubuf->st_ino) || + __put_user (stat->mode, &ubuf->st_mode) || + __put_user (stat->nlink, &ubuf->st_nlink) || + __put_user (uid, &ubuf->st_uid) || + __put_user (gid, &ubuf->st_gid) || + __put_user (huge_encode_dev(stat->rdev), &ubuf->st_rdev) || + __put_user (stat->size, &ubuf->st_size) || + __put_user (stat->atime.tv_sec, &ubuf->st_atime) || + __put_user (stat->atime.tv_nsec, &ubuf->st_atime_nsec) || + __put_user (stat->mtime.tv_sec, &ubuf->st_mtime) || + __put_user (stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) || + __put_user (stat->ctime.tv_sec, &ubuf->st_ctime) || + __put_user (stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) || + __put_user (stat->blksize, &ubuf->st_blksize) || + __put_user (stat->blocks, &ubuf->st_blocks)) + return -EFAULT; + return 0; +} + +asmlinkage long +sys32_stat64(char __user * filename, struct stat64 __user *statbuf) +{ + struct kstat stat; + int ret = vfs_stat(filename, &stat); + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +asmlinkage long +sys32_lstat64(char __user * filename, struct stat64 __user *statbuf) +{ + struct kstat stat; + int ret = vfs_lstat(filename, &stat); + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +asmlinkage long +sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) +{ + struct kstat stat; + int ret = vfs_fstat(fd, &stat); + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +/* + * Linux/i386 didn't use to be able to handle more than + * 4 system call parameters, so these system calls used a memory + * block for parameter passing.. + */ + +struct mmap_arg_struct { + unsigned int addr; + unsigned int len; + unsigned int prot; + unsigned int flags; + unsigned int fd; + unsigned int offset; +}; + +asmlinkage long +sys32_mmap(struct mmap_arg_struct __user *arg) +{ + struct mmap_arg_struct a; + struct file *file = NULL; + unsigned long retval; + struct mm_struct *mm ; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + + if (a.offset & ~PAGE_MASK) + return -EINVAL; + + if (!(a.flags & MAP_ANONYMOUS)) { + file = fget(a.fd); + if (!file) + return -EBADF; + } + + mm = current->mm; + down_write(&mm->mmap_sem); + retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT); + if (file) + fput(file); + + up_write(&mm->mmap_sem); + + return retval; +} + +asmlinkage long +sys32_mprotect(unsigned long start, size_t len, unsigned long prot) +{ + return sys_mprotect(start,len,prot); +} + +asmlinkage long +sys32_pipe(int __user *fd) +{ + int retval; + int fds[2]; + + retval = do_pipe(fds); + if (retval) + goto out; + if (copy_to_user(fd, fds, sizeof(fds))) + retval = -EFAULT; + out: + return retval; +} + +asmlinkage long +sys32_rt_sigaction(int sig, struct sigaction32 __user *act, + struct sigaction32 __user *oact, unsigned int sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + int ret; + compat_sigset_t set32; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + + if (act) { + compat_uptr_t handler, restorer; + + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(restorer, &act->sa_restorer)|| + __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t))) + return -EFAULT; + new_ka.sa.sa_handler = compat_ptr(handler); + new_ka.sa.sa_restorer = compat_ptr(restorer); + /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */ + switch (_NSIG_WORDS) { + case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] + | (((long)set32.sig[7]) << 32); + case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] + | (((long)set32.sig[5]) << 32); + case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] + | (((long)set32.sig[3]) << 32); + case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] + | (((long)set32.sig[1]) << 32); + } + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */ + switch (_NSIG_WORDS) { + case 4: + set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); + set32.sig[6] = old_ka.sa.sa_mask.sig[3]; + case 3: + set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); + set32.sig[4] = old_ka.sa.sa_mask.sig[2]; + case 2: + set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); + set32.sig[2] = old_ka.sa.sa_mask.sig[1]; + case 1: + set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); + set32.sig[0] = old_ka.sa.sa_mask.sig[0]; + } + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t))) + return -EFAULT; + } + + return ret; +} + +asmlinkage long +sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + compat_old_sigset_t mask; + compat_uptr_t handler, restorer; + + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(restorer, &act->sa_restorer) || + __get_user(mask, &act->sa_mask)) + return -EFAULT; + + new_ka.sa.sa_handler = compat_ptr(handler); + new_ka.sa.sa_restorer = compat_ptr(restorer); + + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) + return -EFAULT; + } + + return ret; +} + +asmlinkage long +sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, unsigned int sigsetsize) +{ + sigset_t s; + compat_sigset_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + if (set) { + if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) + return -EFAULT; + switch (_NSIG_WORDS) { + case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); + case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); + case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); + case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + } + } + set_fs (KERNEL_DS); + ret = sys_rt_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL, + sigsetsize); + set_fs (old_fs); + if (ret) return ret; + if (oset) { + switch (_NSIG_WORDS) { + case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; + case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; + case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; + case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; + } + if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) + return -EFAULT; + } + return 0; +} + +static inline long +get_tv32(struct timeval *o, struct compat_timeval __user *i) +{ + int err = -EFAULT; + if (access_ok(VERIFY_READ, i, sizeof(*i))) { + err = __get_user(o->tv_sec, &i->tv_sec); + err |= __get_user(o->tv_usec, &i->tv_usec); + } + return err; +} + +static inline long +put_tv32(struct compat_timeval __user *o, struct timeval *i) +{ + int err = -EFAULT; + if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { + err = __put_user(i->tv_sec, &o->tv_sec); + err |= __put_user(i->tv_usec, &o->tv_usec); + } + return err; +} + +extern int do_setitimer(int which, struct itimerval *, struct itimerval *); + +asmlinkage long +sys32_alarm(unsigned int seconds) +{ + struct itimerval it_new, it_old; + unsigned int oldalarm; + + it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; + it_new.it_value.tv_sec = seconds; + it_new.it_value.tv_usec = 0; + do_setitimer(ITIMER_REAL, &it_new, &it_old); + oldalarm = it_old.it_value.tv_sec; + /* ehhh.. We can't return 0 if we have an alarm pending.. */ + /* And we'd better return too much than too little anyway */ + if (it_old.it_value.tv_usec) + oldalarm++; + return oldalarm; +} + +/* Translations due to time_t size differences. Which affects all + sorts of things, like timeval and itimerval. */ + +extern struct timezone sys_tz; + +asmlinkage long +sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +{ + if (tv) { + struct timeval ktv; + do_gettimeofday(&ktv); + if (put_tv32(tv, &ktv)) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + return 0; +} + +asmlinkage long +sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +{ + struct timeval ktv; + struct timespec kts; + struct timezone ktz; + + if (tv) { + if (get_tv32(&ktv, tv)) + return -EFAULT; + kts.tv_sec = ktv.tv_sec; + kts.tv_nsec = ktv.tv_usec * NSEC_PER_USEC; + } + if (tz) { + if (copy_from_user(&ktz, tz, sizeof(ktz))) + return -EFAULT; + } + + return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); +} + +struct sel_arg_struct { + unsigned int n; + unsigned int inp; + unsigned int outp; + unsigned int exp; + unsigned int tvp; +}; + +asmlinkage long +sys32_old_select(struct sel_arg_struct __user *arg) +{ + struct sel_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), + compat_ptr(a.exp), compat_ptr(a.tvp)); +} + +extern asmlinkage long +compat_sys_wait4(compat_pid_t pid, compat_uint_t * stat_addr, int options, + struct compat_rusage *ru); + +asmlinkage long +sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options) +{ + return compat_sys_wait4(pid, stat_addr, options, NULL); +} + +int sys32_ni_syscall(int call) +{ + struct task_struct *me = current; + static char lastcomm[sizeof(me->comm)]; + + if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { + printk(KERN_INFO "IA32 syscall %d from %s not implemented\n", + call, me->comm); + strncpy(lastcomm, me->comm, sizeof(lastcomm)); + } + return -ENOSYS; +} + +/* 32-bit timeval and related flotsam. */ + +asmlinkage long +sys32_sysfs(int option, u32 arg1, u32 arg2) +{ + return sys_sysfs(option, arg1, arg2); +} + +struct sysinfo32 { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + unsigned short procs; + unsigned short pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(u32)-sizeof(int)]; +}; + +asmlinkage long +sys32_sysinfo(struct sysinfo32 __user *info) +{ + struct sysinfo s; + int ret; + mm_segment_t old_fs = get_fs (); + int bitcount = 0; + + set_fs (KERNEL_DS); + ret = sys_sysinfo(&s); + set_fs (old_fs); + + /* Check to see if any memory value is too large for 32-bit and scale + * down if needed + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + if (!access_ok(VERIFY_WRITE, info, sizeof(struct sysinfo32)) || + __put_user (s.uptime, &info->uptime) || + __put_user (s.loads[0], &info->loads[0]) || + __put_user (s.loads[1], &info->loads[1]) || + __put_user (s.loads[2], &info->loads[2]) || + __put_user (s.totalram, &info->totalram) || + __put_user (s.freeram, &info->freeram) || + __put_user (s.sharedram, &info->sharedram) || + __put_user (s.bufferram, &info->bufferram) || + __put_user (s.totalswap, &info->totalswap) || + __put_user (s.freeswap, &info->freeswap) || + __put_user (s.procs, &info->procs) || + __put_user (s.totalhigh, &info->totalhigh) || + __put_user (s.freehigh, &info->freehigh) || + __put_user (s.mem_unit, &info->mem_unit)) + return -EFAULT; + return 0; +} + +asmlinkage long +sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs (); + + set_fs (KERNEL_DS); + ret = sys_sched_rr_get_interval(pid, &t); + set_fs (old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} + +asmlinkage long +sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) +{ + sigset_t s; + compat_sigset_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_rt_sigpending(&s, sigsetsize); + set_fs (old_fs); + if (!ret) { + switch (_NSIG_WORDS) { + case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; + case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; + case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; + case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; + } + if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) + return -EFAULT; + } + return ret; +} + +asmlinkage long +sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo) +{ + siginfo_t info; + int ret; + mm_segment_t old_fs = get_fs(); + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + set_fs (KERNEL_DS); + ret = sys_rt_sigqueueinfo(pid, sig, &info); + set_fs (old_fs); + return ret; +} + +/* These are here just in case some old ia32 binary calls it. */ +asmlinkage long +sys32_pause(void) +{ + current->state = TASK_INTERRUPTIBLE; + schedule(); + return -ERESTARTNOHAND; +} + + +#ifdef CONFIG_SYSCTL +struct sysctl_ia32 { + unsigned int name; + int nlen; + unsigned int oldval; + unsigned int oldlenp; + unsigned int newval; + unsigned int newlen; + unsigned int __unused[4]; +}; + + +asmlinkage long +sys32_sysctl(struct sysctl_ia32 __user *args32) +{ + struct sysctl_ia32 a32; + mm_segment_t old_fs = get_fs (); + void __user *oldvalp, *newvalp; + size_t oldlen; + int __user *namep; + long ret; + extern int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp, + void *newval, size_t newlen); + + + if (copy_from_user(&a32, args32, sizeof (a32))) + return -EFAULT; + + /* + * We need to pre-validate these because we have to disable address checking + * before calling do_sysctl() because of OLDLEN but we can't run the risk of the + * user specifying bad addresses here. Well, since we're dealing with 32 bit + * addresses, we KNOW that access_ok() will always succeed, so this is an + * expensive NOP, but so what... + */ + namep = compat_ptr(a32.name); + oldvalp = compat_ptr(a32.oldval); + newvalp = compat_ptr(a32.newval); + + if ((oldvalp && get_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) + || !access_ok(VERIFY_WRITE, namep, 0) + || !access_ok(VERIFY_WRITE, oldvalp, 0) + || !access_ok(VERIFY_WRITE, newvalp, 0)) + return -EFAULT; + + set_fs(KERNEL_DS); + lock_kernel(); + ret = do_sysctl(namep, a32.nlen, oldvalp, &oldlen, newvalp, (size_t) a32.newlen); + unlock_kernel(); + set_fs(old_fs); + + if (oldvalp && put_user (oldlen, (int __user *)compat_ptr(a32.oldlenp))) + return -EFAULT; + + return ret; +} +#endif + +/* warning: next two assume little endian */ +asmlinkage long +sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) +{ + return sys_pread64(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + +asmlinkage long +sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) +{ + return sys_pwrite64(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + + +asmlinkage long +sys32_personality(unsigned long personality) +{ + int ret; + if (personality(current->personality) == PER_LINUX32 && + personality == PER_LINUX) + personality = PER_LINUX32; + ret = sys_personality(personality); + if (ret == PER_LINUX32) + ret = PER_LINUX; + return ret; +} + +asmlinkage long +sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + off_t of; + + if (offset && get_user(of, offset)) + return -EFAULT; + + set_fs(KERNEL_DS); + ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count); + set_fs(old_fs); + + if (!ret && offset && put_user(of, offset)) + return -EFAULT; + + return ret; +} + +/* Handle adjtimex compatibility. */ + +struct timex32 { + u32 modes; + s32 offset, freq, maxerror, esterror; + s32 status, constant, precision, tolerance; + struct compat_timeval time; + s32 tick; + s32 ppsfreq, jitter, shift, stabil; + s32 jitcnt, calcnt, errcnt, stbcnt; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; +}; + +extern int do_adjtimex(struct timex *); + +asmlinkage long +sys32_adjtimex(struct timex32 __user *utp) +{ + struct timex txc; + int ret; + + memset(&txc, 0, sizeof(struct timex)); + + if (!access_ok(VERIFY_READ, utp, sizeof(struct timex32)) || + __get_user(txc.modes, &utp->modes) || + __get_user(txc.offset, &utp->offset) || + __get_user(txc.freq, &utp->freq) || + __get_user(txc.maxerror, &utp->maxerror) || + __get_user(txc.esterror, &utp->esterror) || + __get_user(txc.status, &utp->status) || + __get_user(txc.constant, &utp->constant) || + __get_user(txc.precision, &utp->precision) || + __get_user(txc.tolerance, &utp->tolerance) || + __get_user(txc.time.tv_sec, &utp->time.tv_sec) || + __get_user(txc.time.tv_usec, &utp->time.tv_usec) || + __get_user(txc.tick, &utp->tick) || + __get_user(txc.ppsfreq, &utp->ppsfreq) || + __get_user(txc.jitter, &utp->jitter) || + __get_user(txc.shift, &utp->shift) || + __get_user(txc.stabil, &utp->stabil) || + __get_user(txc.jitcnt, &utp->jitcnt) || + __get_user(txc.calcnt, &utp->calcnt) || + __get_user(txc.errcnt, &utp->errcnt) || + __get_user(txc.stbcnt, &utp->stbcnt)) + return -EFAULT; + + ret = do_adjtimex(&txc); + + if (!access_ok(VERIFY_WRITE, utp, sizeof(struct timex32)) || + __put_user(txc.modes, &utp->modes) || + __put_user(txc.offset, &utp->offset) || + __put_user(txc.freq, &utp->freq) || + __put_user(txc.maxerror, &utp->maxerror) || + __put_user(txc.esterror, &utp->esterror) || + __put_user(txc.status, &utp->status) || + __put_user(txc.constant, &utp->constant) || + __put_user(txc.precision, &utp->precision) || + __put_user(txc.tolerance, &utp->tolerance) || + __put_user(txc.time.tv_sec, &utp->time.tv_sec) || + __put_user(txc.time.tv_usec, &utp->time.tv_usec) || + __put_user(txc.tick, &utp->tick) || + __put_user(txc.ppsfreq, &utp->ppsfreq) || + __put_user(txc.jitter, &utp->jitter) || + __put_user(txc.shift, &utp->shift) || + __put_user(txc.stabil, &utp->stabil) || + __put_user(txc.jitcnt, &utp->jitcnt) || + __put_user(txc.calcnt, &utp->calcnt) || + __put_user(txc.errcnt, &utp->errcnt) || + __put_user(txc.stbcnt, &utp->stbcnt)) + ret = -EFAULT; + + return ret; +} + +asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + struct mm_struct *mm = current->mm; + unsigned long error; + struct file * file = NULL; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + return -EBADF; + } + + down_write(&mm->mmap_sem); + error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(&mm->mmap_sem); + + if (file) + fput(file); + return error; +} + +asmlinkage long sys32_olduname(struct oldold_utsname __user * name) +{ + int error; + + if (!name) + return -EFAULT; + if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) + return -EFAULT; + + down_read(&uts_sem); + + error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + __put_user(0,name->sysname+__OLD_UTS_LEN); + __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + __put_user(0,name->nodename+__OLD_UTS_LEN); + __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + __put_user(0,name->release+__OLD_UTS_LEN); + __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + __put_user(0,name->version+__OLD_UTS_LEN); + { + char *arch = "x86_64"; + if (personality(current->personality) == PER_LINUX32) + arch = "i686"; + + __copy_to_user(&name->machine,arch,strlen(arch)+1); + } + + up_read(&uts_sem); + + error = error ? -EFAULT : 0; + + return error; +} + +long sys32_uname(struct old_utsname __user * name) +{ + int err; + if (!name) + return -EFAULT; + down_read(&uts_sem); + err=copy_to_user(name, &system_utsname, sizeof (*name)); + up_read(&uts_sem); + if (personality(current->personality) == PER_LINUX32) + err |= copy_to_user(&name->machine, "i686", 5); + return err?-EFAULT:0; +} + +long sys32_ustat(unsigned dev, struct ustat32 __user *u32p) +{ + struct ustat u; + mm_segment_t seg; + int ret; + + seg = get_fs(); + set_fs(KERNEL_DS); + ret = sys_ustat(dev,&u); + set_fs(seg); + if (ret >= 0) { + if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) || + __put_user((__u32) u.f_tfree, &u32p->f_tfree) || + __put_user((__u32) u.f_tinode, &u32p->f_tfree) || + __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) || + __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack))) + ret = -EFAULT; + } + return ret; +} + +asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, + compat_uptr_t __user *envp, struct pt_regs *regs) +{ + long error; + char * filename; + + filename = getname(name); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + return error; + error = compat_do_execve(filename, argv, envp, regs); + if (error == 0) { + task_lock(current); + current->ptrace &= ~PT_DTRACE; + task_unlock(current); + } + putname(filename); + return error; +} + +asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp, + struct pt_regs *regs) +{ + void __user *parent_tid = (void __user *)regs->rdx; + void __user *child_tid = (void __user *)regs->rdi; + if (!newsp) + newsp = regs->rsp; + return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); +} + +/* + * Some system calls that need sign extended arguments. This could be done by a generic wrapper. + */ + +long sys32_lseek (unsigned int fd, int offset, unsigned int whence) +{ + return sys_lseek(fd, offset, whence); +} + +long sys32_kill(int pid, int sig) +{ + return sys_kill(pid, sig); +} + +asmlinkage long sys32_open(const char __user * filename, int flags, int mode) +{ + char * tmp; + int fd, error; + + /* don't force O_LARGEFILE */ + tmp = getname(filename); + fd = PTR_ERR(tmp); + if (!IS_ERR(tmp)) { + fd = get_unused_fd(); + if (fd >= 0) { + struct file *f = filp_open(tmp, flags, mode); + error = PTR_ERR(f); + if (IS_ERR(f)) { + put_unused_fd(fd); + fd = error; + } else + fd_install(fd, f); + } + putname(tmp); + } + return fd; +} + +extern asmlinkage long +sys_timer_create(clockid_t which_clock, + struct sigevent __user *timer_event_spec, + timer_t __user * created_timer_id); + +long +sys32_timer_create(u32 clock, struct compat_sigevent __user *se32, timer_t __user *timer_id) +{ + struct sigevent __user *p = NULL; + if (se32) { + struct sigevent se; + p = compat_alloc_user_space(sizeof(struct sigevent)); + if (get_compat_sigevent(&se, se32) || + copy_to_user(p, &se, sizeof(se))) + return -EFAULT; + } + return sys_timer_create(clock, p, timer_id); +} + +long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, + __u32 len_low, __u32 len_high, int advice) +{ + return sys_fadvise64_64(fd, + (((u64)offset_high)<<32) | offset_low, + (((u64)len_high)<<32) | len_low, + advice); +} + +long sys32_vm86_warning(void) +{ + struct task_struct *me = current; + static char lastcomm[sizeof(me->comm)]; + if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { + printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n", + me->comm); + strncpy(lastcomm, me->comm, sizeof(lastcomm)); + } + return -ENOSYS; +} + +long sys32_lookup_dcookie(u32 addr_low, u32 addr_high, + char __user * buf, size_t len) +{ + return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len); +} + +static int __init ia32_init (void) +{ + printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n"); + return 0; +} + +__initcall(ia32_init); + +extern unsigned long ia32_sys_call_table[]; +EXPORT_SYMBOL(ia32_sys_call_table); diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c new file mode 100644 index 00000000000..399ff498509 --- /dev/null +++ b/arch/x86_64/ia32/syscall32.c @@ -0,0 +1,111 @@ +/* Copyright 2002,2003 Andi Kleen, SuSE Labs */ + +/* vsyscall handling for 32bit processes. Map a stub page into it + on demand because 32bit cannot reach the kernel's fixmaps */ + +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/init.h> +#include <linux/stringify.h> +#include <asm/proto.h> +#include <asm/tlbflush.h> +#include <asm/ia32_unistd.h> + +/* 32bit VDSOs mapped into user space. */ +asm(".section \".init.data\",\"aw\"\n" + "syscall32_syscall:\n" + ".incbin \"arch/x86_64/ia32/vsyscall-syscall.so\"\n" + "syscall32_syscall_end:\n" + "syscall32_sysenter:\n" + ".incbin \"arch/x86_64/ia32/vsyscall-sysenter.so\"\n" + "syscall32_sysenter_end:\n" + ".previous"); + +extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; +extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; +extern int sysctl_vsyscall32; + +char *syscall32_page; +static int use_sysenter = -1; + +/* + * Map the 32bit vsyscall page on demand. + * + * RED-PEN: This knows too much about high level VM. + * + * Alternative would be to generate a vma with appropriate backing options + * and let it be handled by generic VM. + */ +int __map_syscall32(struct mm_struct *mm, unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte; + pmd_t *pmd; + int err = -ENOMEM; + + spin_lock(&mm->page_table_lock); + pgd = pgd_offset(mm, address); + pud = pud_alloc(mm, pgd, address); + if (pud) { + pmd = pmd_alloc(mm, pud, address); + if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) { + if (pte_none(*pte)) { + set_pte(pte, + mk_pte(virt_to_page(syscall32_page), + PAGE_KERNEL_VSYSCALL32)); + } + /* Flush only the local CPU. Other CPUs taking a fault + will just end up here again + This probably not needed and just paranoia. */ + __flush_tlb_one(address); + err = 0; + } + } + spin_unlock(&mm->page_table_lock); + return err; +} + +int map_syscall32(struct mm_struct *mm, unsigned long address) +{ + int err; + down_read(&mm->mmap_sem); + err = __map_syscall32(mm, address); + up_read(&mm->mmap_sem); + return err; +} + +static int __init init_syscall32(void) +{ + syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); + if (!syscall32_page) + panic("Cannot allocate syscall32 page"); + SetPageReserved(virt_to_page(syscall32_page)); + if (use_sysenter > 0) { + memcpy(syscall32_page, syscall32_sysenter, + syscall32_sysenter_end - syscall32_sysenter); + } else { + memcpy(syscall32_page, syscall32_syscall, + syscall32_syscall_end - syscall32_syscall); + } + return 0; +} + +__initcall(init_syscall32); + +/* May not be __init: called during resume */ +void syscall32_cpu_init(void) +{ + if (use_sysenter < 0) + use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); + + /* Load these always in case some future AMD CPU supports + SYSENTER from compat mode too. */ + checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); + checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); + checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); + + wrmsrl(MSR_CSTAR, ia32_cstar_target); +} diff --git a/arch/x86_64/ia32/tls32.c b/arch/x86_64/ia32/tls32.c new file mode 100644 index 00000000000..1cc4340de3c --- /dev/null +++ b/arch/x86_64/ia32/tls32.c @@ -0,0 +1,163 @@ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/user.h> + +#include <asm/uaccess.h> +#include <asm/desc.h> +#include <asm/system.h> +#include <asm/ldt.h> +#include <asm/processor.h> +#include <asm/proto.h> + +/* + * sys_alloc_thread_area: get a yet unused TLS descriptor index. + */ +static int get_free_idx(void) +{ + struct thread_struct *t = ¤t->thread; + int idx; + + for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) + if (desc_empty((struct n_desc_struct *)(t->tls_array) + idx)) + return idx + GDT_ENTRY_TLS_MIN; + return -ESRCH; +} + +/* + * Set a given TLS descriptor: + * When you want addresses > 32bit use arch_prctl() + */ +int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info) +{ + struct user_desc info; + struct n_desc_struct *desc; + int cpu, idx; + + if (copy_from_user(&info, u_info, sizeof(info))) + return -EFAULT; + + idx = info.entry_number; + + /* + * index -1 means the kernel should try to find and + * allocate an empty descriptor: + */ + if (idx == -1) { + idx = get_free_idx(); + if (idx < 0) + return idx; + if (put_user(idx, &u_info->entry_number)) + return -EFAULT; + } + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN; + + /* + * We must not get preempted while modifying the TLS. + */ + cpu = get_cpu(); + + if (LDT_empty(&info)) { + desc->a = 0; + desc->b = 0; + } else { + desc->a = LDT_entry_a(&info); + desc->b = LDT_entry_b(&info); + } + if (t == ¤t->thread) + load_TLS(t, cpu); + + put_cpu(); + return 0; +} + +asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info) +{ + return do_set_thread_area(¤t->thread, u_info); +} + + +/* + * Get the current Thread-Local Storage area: + */ + +#define GET_BASE(desc) ( \ + (((desc)->a >> 16) & 0x0000ffff) | \ + (((desc)->b << 16) & 0x00ff0000) | \ + ( (desc)->b & 0xff000000) ) + +#define GET_LIMIT(desc) ( \ + ((desc)->a & 0x0ffff) | \ + ((desc)->b & 0xf0000) ) + +#define GET_32BIT(desc) (((desc)->b >> 22) & 1) +#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) +#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) +#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) +#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) +#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) +#define GET_LONGMODE(desc) (((desc)->b >> 21) & 1) + +int do_get_thread_area(struct thread_struct *t, struct user_desc __user *u_info) +{ + struct user_desc info; + struct n_desc_struct *desc; + int idx; + + if (get_user(idx, &u_info->entry_number)) + return -EFAULT; + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN; + + memset(&info, 0, sizeof(struct user_desc)); + info.entry_number = idx; + info.base_addr = GET_BASE(desc); + info.limit = GET_LIMIT(desc); + info.seg_32bit = GET_32BIT(desc); + info.contents = GET_CONTENTS(desc); + info.read_exec_only = !GET_WRITABLE(desc); + info.limit_in_pages = GET_LIMIT_PAGES(desc); + info.seg_not_present = !GET_PRESENT(desc); + info.useable = GET_USEABLE(desc); + info.lm = GET_LONGMODE(desc); + + if (copy_to_user(u_info, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +asmlinkage long sys32_get_thread_area(struct user_desc __user *u_info) +{ + return do_get_thread_area(¤t->thread, u_info); +} + + +int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs) +{ + struct n_desc_struct *desc; + struct user_desc info; + struct user_desc __user *cp; + int idx; + + cp = (void __user *)childregs->rsi; + if (copy_from_user(&info, cp, sizeof(info))) + return -EFAULT; + if (LDT_empty(&info)) + return -EINVAL; + + idx = info.entry_number; + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + desc = (struct n_desc_struct *)(p->thread.tls_array) + idx - GDT_ENTRY_TLS_MIN; + desc->a = LDT_entry_a(&info); + desc->b = LDT_entry_b(&info); + + return 0; +} diff --git a/arch/x86_64/ia32/vsyscall-sigreturn.S b/arch/x86_64/ia32/vsyscall-sigreturn.S new file mode 100644 index 00000000000..ba4067d350e --- /dev/null +++ b/arch/x86_64/ia32/vsyscall-sigreturn.S @@ -0,0 +1,120 @@ +/* + * Common code for the sigreturn entry points on the vsyscall page. + * This code uses SYSCALL_ENTER_KERNEL (either syscall or int $0x80) + * to enter the kernel. + * This file is #include'd by vsyscall-*.S to define them after the + * vsyscall entry point. The addresses we get for these entry points + * by doing ".balign 32" must match in both versions of the page. + */ + + .section .text.sigreturn,"ax" + .balign 32 + .globl __kernel_sigreturn + .type __kernel_sigreturn,@function +__kernel_sigreturn: +.LSTART_sigreturn: + popl %eax + movl $__NR_ia32_sigreturn, %eax + SYSCALL_ENTER_KERNEL +.LEND_sigreturn: + .size __kernel_sigreturn,.-.LSTART_sigreturn + + .section .text.rtsigreturn,"ax" + .balign 32 + .globl __kernel_rt_sigreturn + .type __kernel_rt_sigreturn,@function +__kernel_rt_sigreturn: +.LSTART_rt_sigreturn: + movl $__NR_ia32_rt_sigreturn, %eax + SYSCALL_ENTER_KERNEL +.LEND_rt_sigreturn: + .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn + + .section .eh_frame,"a",@progbits + .long .LENDFDE2-.LSTARTFDE2 /* Length FDE */ +.LSTARTFDE2: + .long .LSTARTFDE2-.LSTARTFRAME /* CIE pointer */ + /* HACK: The dwarf2 unwind routines will subtract 1 from the + return address to get an address in the middle of the + presumed call instruction. Since we didn't get here via + a call, we need to include the nop before the real start + to make up for it. */ + .long .LSTART_sigreturn-1-. /* PC-relative start address */ + .long .LEND_sigreturn-.LSTART_sigreturn+1 + .uleb128 0 /* Augmentation length */ + /* What follows are the instructions for the table generation. + We record the locations of each register saved. This is + complicated by the fact that the "CFA" is always assumed to + be the value of the stack pointer in the caller. This means + that we must define the CFA of this body of code to be the + saved value of the stack pointer in the sigcontext. Which + also means that there is no fixed relation to the other + saved registers, which means that we must use DW_CFA_expression + to compute their addresses. It also means that when we + adjust the stack with the popl, we have to do it all over again. */ + +#define do_cfa_expr(offset) \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 1f-0f; /* length */ \ +0: .byte 0x74; /* DW_OP_breg4 */ \ + .sleb128 offset; /* offset */ \ + .byte 0x06; /* DW_OP_deref */ \ +1: + +#define do_expr(regno, offset) \ + .byte 0x10; /* DW_CFA_expression */ \ + .uleb128 regno; /* regno */ \ + .uleb128 1f-0f; /* length */ \ +0: .byte 0x74; /* DW_OP_breg4 */ \ + .sleb128 offset; /* offset */ \ +1: + + do_cfa_expr(IA32_SIGCONTEXT_esp+4) + do_expr(0, IA32_SIGCONTEXT_eax+4) + do_expr(1, IA32_SIGCONTEXT_ecx+4) + do_expr(2, IA32_SIGCONTEXT_edx+4) + do_expr(3, IA32_SIGCONTEXT_ebx+4) + do_expr(5, IA32_SIGCONTEXT_ebp+4) + do_expr(6, IA32_SIGCONTEXT_esi+4) + do_expr(7, IA32_SIGCONTEXT_edi+4) + do_expr(8, IA32_SIGCONTEXT_eip+4) + + .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */ + + do_cfa_expr(IA32_SIGCONTEXT_esp) + do_expr(0, IA32_SIGCONTEXT_eax) + do_expr(1, IA32_SIGCONTEXT_ecx) + do_expr(2, IA32_SIGCONTEXT_edx) + do_expr(3, IA32_SIGCONTEXT_ebx) + do_expr(5, IA32_SIGCONTEXT_ebp) + do_expr(6, IA32_SIGCONTEXT_esi) + do_expr(7, IA32_SIGCONTEXT_edi) + do_expr(8, IA32_SIGCONTEXT_eip) + + .align 4 +.LENDFDE2: + + .long .LENDFDE3-.LSTARTFDE3 /* Length FDE */ +.LSTARTFDE3: + .long .LSTARTFDE3-.LSTARTFRAME /* CIE pointer */ + /* HACK: See above wrt unwind library assumptions. */ + .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ + .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 + .uleb128 0 /* Augmentation */ + /* What follows are the instructions for the table generation. + We record the locations of each register saved. This is + slightly less complicated than the above, since we don't + modify the stack pointer in the process. */ + + do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp) + do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax) + do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx) + do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx) + do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx) + do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp) + do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi) + do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi) + do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip) + + .align 4 +.LENDFDE3: diff --git a/arch/x86_64/ia32/vsyscall-syscall.S b/arch/x86_64/ia32/vsyscall-syscall.S new file mode 100644 index 00000000000..e2aaf3de8a4 --- /dev/null +++ b/arch/x86_64/ia32/vsyscall-syscall.S @@ -0,0 +1,68 @@ +/* + * Code for the vsyscall page. This version uses the syscall instruction. + */ + +#include <asm/ia32_unistd.h> +#include <asm/offset.h> +#include <asm/segment.h> + + .text + .section .text.vsyscall,"ax" + .globl __kernel_vsyscall + .type __kernel_vsyscall,@function +__kernel_vsyscall: +.LSTART_vsyscall: + push %ebp +.Lpush_ebp: + movl %ecx, %ebp + syscall + movl $__USER32_DS, %ecx + movl %ecx, %ss + movl %ebp, %ecx + popl %ebp +.Lpop_ebp: + ret +.LEND_vsyscall: + .size __kernel_vsyscall,.-.LSTART_vsyscall + + .section .eh_frame,"a",@progbits +.LSTARTFRAME: + .long .LENDCIE-.LSTARTCIE +.LSTARTCIE: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zR" /* NUL-terminated augmentation string */ + .uleb128 1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 8 /* Return address register column */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ + .byte 0x0c /* DW_CFA_def_cfa */ + .uleb128 4 + .uleb128 4 + .byte 0x88 /* DW_CFA_offset, column 0x8 */ + .uleb128 1 + .align 4 +.LENDCIE: + + .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */ +.LSTARTFDE1: + .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */ + .long .LSTART_vsyscall-. /* PC-relative start address */ + .long .LEND_vsyscall-.LSTART_vsyscall + .uleb128 0 /* Augmentation length */ + /* What follows are the instructions for the table generation. + We have to record all changes of the stack pointer. */ + .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */ + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .uleb128 8 + .byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */ + .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */ + .byte 0xc5 /* DW_CFA_restore %ebp */ + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .uleb128 4 + .align 4 +.LENDFDE1: + +#define SYSCALL_ENTER_KERNEL syscall +#include "vsyscall-sigreturn.S" diff --git a/arch/x86_64/ia32/vsyscall-sysenter.S b/arch/x86_64/ia32/vsyscall-sysenter.S new file mode 100644 index 00000000000..8fb8e0ff3af --- /dev/null +++ b/arch/x86_64/ia32/vsyscall-sysenter.S @@ -0,0 +1,94 @@ +/* + * Code for the vsyscall page. This version uses the sysenter instruction. + */ + +#include <asm/ia32_unistd.h> +#include <asm/offset.h> + + .text + .section .text.vsyscall,"ax" + .globl __kernel_vsyscall + .type __kernel_vsyscall,@function +__kernel_vsyscall: +.LSTART_vsyscall: + push %ecx +.Lpush_ecx: + push %edx +.Lpush_edx: + push %ebp +.Lenter_kernel: + movl %esp,%ebp + sysenter + .space 7,0x90 + jmp .Lenter_kernel + /* 16: System call normal return point is here! */ + pop %ebp +.Lpop_ebp: + pop %edx +.Lpop_edx: + pop %ecx +.Lpop_ecx: + ret +.LEND_vsyscall: + .size __kernel_vsyscall,.-.LSTART_vsyscall + + .section .eh_frame,"a",@progbits +.LSTARTFRAME: + .long .LENDCIE-.LSTARTCIE +.LSTARTCIE: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zR" /* NUL-terminated augmentation string */ + .uleb128 1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 8 /* Return address register column */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ + .byte 0x0c /* DW_CFA_def_cfa */ + .uleb128 4 + .uleb128 4 + .byte 0x88 /* DW_CFA_offset, column 0x8 */ + .uleb128 1 + .align 4 +.LENDCIE: + + .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */ +.LSTARTFDE1: + .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */ + .long .LSTART_vsyscall-. /* PC-relative start address */ + .long .LEND_vsyscall-.LSTART_vsyscall + .uleb128 0 /* Augmentation length */ + /* What follows are the instructions for the table generation. + We have to record all changes of the stack pointer. */ + .byte 0x04 /* DW_CFA_advance_loc4 */ + .long .Lpush_ecx-.LSTART_vsyscall + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .byte 0x08 /* RA at offset 8 now */ + .byte 0x04 /* DW_CFA_advance_loc4 */ + .long .Lpush_edx-.Lpush_ecx + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .byte 0x0c /* RA at offset 12 now */ + .byte 0x04 /* DW_CFA_advance_loc4 */ + .long .Lenter_kernel-.Lpush_edx + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .byte 0x10 /* RA at offset 16 now */ + .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */ + /* Finally the epilogue. */ + .byte 0x04 /* DW_CFA_advance_loc4 */ + .long .Lpop_ebp-.Lenter_kernel + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .byte 0x12 /* RA at offset 12 now */ + .byte 0xc5 /* DW_CFA_restore %ebp */ + .byte 0x04 /* DW_CFA_advance_loc4 */ + .long .Lpop_edx-.Lpop_ebp + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .byte 0x08 /* RA at offset 8 now */ + .byte 0x04 /* DW_CFA_advance_loc4 */ + .long .Lpop_ecx-.Lpop_edx + .byte 0x0e /* DW_CFA_def_cfa_offset */ + .byte 0x04 /* RA at offset 4 now */ + .align 4 +.LENDFDE1: + +#define SYSCALL_ENTER_KERNEL int $0x80 +#include "vsyscall-sigreturn.S" diff --git a/arch/x86_64/ia32/vsyscall.lds b/arch/x86_64/ia32/vsyscall.lds new file mode 100644 index 00000000000..fa4b4dd4a9f --- /dev/null +++ b/arch/x86_64/ia32/vsyscall.lds @@ -0,0 +1,77 @@ +/* + * Linker script for vsyscall DSO. The vsyscall page is an ELF shared + * object prelinked to its virtual address. This script controls its layout. + */ + +/* This must match <asm/fixmap.h>. */ +VSYSCALL_BASE = 0xffffe000; + +SECTIONS +{ + . = VSYSCALL_BASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + /* This linker script is used both with -r and with -shared. + For the layouts to match, we need to skip more than enough + space for the dynamic symbol table et al. If this amount + is insufficient, ld -shared will barf. Just increase it here. */ + . = VSYSCALL_BASE + 0x400; + + .text.vsyscall : { *(.text.vsyscall) } :text =0x90909090 + + /* This is an 32bit object and we cannot easily get the offsets + into the 64bit kernel. Just hardcode them here. This assumes + that all the stubs don't need more than 0x100 bytes. */ + . = VSYSCALL_BASE + 0x500; + + .text.sigreturn : { *(.text.sigreturn) } :text =0x90909090 + + . = VSYSCALL_BASE + 0x600; + + .text.rtsigreturn : { *(.text.rtsigreturn) } :text =0x90909090 + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .dynamic : { *(.dynamic) } :text :dynamic + .useless : { + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + } :text +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.5 { + global: + __kernel_vsyscall; + __kernel_sigreturn; + __kernel_rt_sigreturn; + + local: *; + }; +} + +/* The ELF entry point can be used to set the AT_SYSINFO value. */ +ENTRY(__kernel_vsyscall); |