summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorJeff Dike <jdike@addtoit.com>2008-02-08 04:22:07 -0800
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-08 09:22:42 -0800
commit536788fe2d28e11db6aeda74207d95d750fb761f (patch)
tree73df2d3a46c542c71d3a84c20c8fd1ce617386a3 /arch
parent2f569afd9ced9ebec9a6eb3dbf6f83429be0a7b4 (diff)
uml: runtime host VMSPLIT detection
Calculate TASK_SIZE at run-time by figuring out the host's VMSPLIT - this is needed on i386 if UML is to run on hosts with varying VMSPLITs without recompilation. TASK_SIZE is now defined in terms of a variable, task_size. This gets rid of an include of pgtable.h from processor.h, which can cause include loops. On i386, task_size is calculated early in boot by probing the address space in a binary search to figure out where the boundary between usable and non-usable memory is. This tries to make sure that a page that is considered to be in userspace is, or can be made, read-write. I'm concerned about a system-global VDSO page in kernel memory being hit and considered to be a userspace page. On x86_64, task_size is just the old value of CONFIG_TOP_ADDR. A bunch of config variable are gone now. CONFIG_TOP_ADDR is directly replaced by TASK_SIZE. NEST_LEVEL is gone since the relocation of the stubs makes it irrelevant. All the HOST_VMSPLIT stuff is gone. All references to these in arch/um/Makefile are also gone. I noticed and fixed a missing extern in os.h when adding os_get_task_size. Note: This has been revised to fix the 32-bit UML on 64-bit host bug that Miklos ran into. Signed-off-by: Jeff Dike <jdike@linux.intel.com> Cc: Miklos Szeredi <miklos@szeredi.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/um/Kconfig11
-rw-r--r--arch/um/Kconfig.i38637
-rw-r--r--arch/um/Kconfig.x86_644
-rw-r--r--arch/um/Makefile11
-rw-r--r--arch/um/defconfig3
-rw-r--r--arch/um/include/as-layout.h2
-rw-r--r--arch/um/include/os.h5
-rw-r--r--arch/um/kernel/exec.c2
-rw-r--r--arch/um/kernel/um_arch.c16
-rw-r--r--arch/um/os-Linux/sys-i386/Makefile2
-rw-r--r--arch/um/os-Linux/sys-i386/task_size.c120
-rw-r--r--arch/um/os-Linux/sys-x86_64/Makefile2
-rw-r--r--arch/um/os-Linux/sys-x86_64/task_size.c5
13 files changed, 148 insertions, 72 deletions
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 99e51d059a0..dba8e05f028 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -203,17 +203,6 @@ config NR_CPUS
depends on SMP
default "32"
-config NEST_LEVEL
- int "Nesting level"
- default "0"
- help
- This is set to the number of layers of UMLs that this UML will be run
- in. Normally, this is zero, meaning that it will run directly on the
- host. Setting it to one will build a UML that can run inside a UML
- that is running on the host. Generally, if you intend this UML to run
- inside another UML, set CONFIG_NEST_LEVEL to one more than the host
- UML.
-
config HIGHMEM
bool "Highmem support (EXPERIMENTAL)"
depends on !64BIT && EXPERIMENTAL
diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386
index e75264603d2..3cd8a04d66d 100644
--- a/arch/um/Kconfig.i386
+++ b/arch/um/Kconfig.i386
@@ -23,43 +23,6 @@ config SEMAPHORE_SLEEPERS
bool
default y
-choice
- prompt "Host memory split"
- default HOST_VMSPLIT_3G
- help
- This is needed when the host kernel on which you run has a non-default
- (like 2G/2G) memory split, instead of the customary 3G/1G. If you did
- not recompile your own kernel but use the default distro's one, you can
- safely accept the "Default split" option.
-
- It can be enabled on recent (>=2.6.16-rc2) vanilla kernels via
- CONFIG_VM_SPLIT_*, or on previous kernels with special patches (-ck
- patchset by Con Kolivas, or other ones) - option names match closely the
- host CONFIG_VM_SPLIT_* ones.
-
- A lower setting (where 1G/3G is lowest and 3G/1G is higher) will
- tolerate even more "normal" host kernels, but an higher setting will be
- stricter.
-
- So, if you do not know what to do here, say 'Default split'.
-
-config HOST_VMSPLIT_3G
- bool "Default split (3G/1G user/kernel host split)"
-config HOST_VMSPLIT_3G_OPT
- bool "3G/1G user/kernel host split (for full 1G low memory)"
-config HOST_VMSPLIT_2G
- bool "2G/2G user/kernel host split"
-config HOST_VMSPLIT_1G
- bool "1G/3G user/kernel host split"
-endchoice
-
-config TOP_ADDR
- hex
- default 0xB0000000 if HOST_VMSPLIT_3G_OPT
- default 0x78000000 if HOST_VMSPLIT_2G
- default 0x40000000 if HOST_VMSPLIT_1G
- default 0xC0000000
-
config 3_LEVEL_PGTABLES
bool "Three-level pagetables (EXPERIMENTAL)"
default n
diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64
index b438f0e1427..6533b349f06 100644
--- a/arch/um/Kconfig.x86_64
+++ b/arch/um/Kconfig.x86_64
@@ -15,10 +15,6 @@ config SEMAPHORE_SLEEPERS
bool
default y
-config TOP_ADDR
- hex
- default 0x7fc0000000
-
config 3_LEVEL_PGTABLES
bool
default y
diff --git a/arch/um/Makefile b/arch/um/Makefile
index cb4af9bf207..dbeab15e7bb 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -79,13 +79,6 @@ KERNEL_DEFINES = $(strip -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask \
KBUILD_CFLAGS += $(KERNEL_DEFINES)
KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time,)
-# These are needed for clean and mrproper, since in that case .config is not
-# included; the values here are meaningless
-
-CONFIG_NEST_LEVEL ?= 0
-
-SIZE = ($(CONFIG_NEST_LEVEL) * 0x20000000)
-
PHONY += linux
all: linux
@@ -120,10 +113,6 @@ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,)
CONFIG_KERNEL_STACK_ORDER ?= 2
STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )
-ifndef START
- START = $(shell echo $$[ $(TOP_ADDR) - $(SIZE) ] )
-endif
-
CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \
-DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE)
diff --git a/arch/um/defconfig b/arch/um/defconfig
index 86db2862f22..59215bc264e 100644
--- a/arch/um/defconfig
+++ b/arch/um/defconfig
@@ -56,8 +56,6 @@ CONFIG_X86_TSC=y
CONFIG_UML_X86=y
# CONFIG_64BIT is not set
CONFIG_SEMAPHORE_SLEEPERS=y
-# CONFIG_HOST_2G_2G is not set
-CONFIG_TOP_ADDR=0xc0000000
# CONFIG_3_LEVEL_PGTABLES is not set
CONFIG_ARCH_HAS_SC_SIGNALS=y
CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y
@@ -81,7 +79,6 @@ CONFIG_HOSTFS=y
# CONFIG_HPPFS is not set
CONFIG_MCONSOLE=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_NEST_LEVEL=0
# CONFIG_HIGHMEM is not set
CONFIG_KERNEL_STACK_ORDER=0
diff --git a/arch/um/include/as-layout.h b/arch/um/include/as-layout.h
index 606bb5c7fdf..cac542d8ff7 100644
--- a/arch/um/include/as-layout.h
+++ b/arch/um/include/as-layout.h
@@ -57,6 +57,8 @@ extern unsigned long _stext, _etext, _sdata, _edata, __bss_start, _end;
extern unsigned long _unprotected_end;
extern unsigned long brk_start;
+extern unsigned long host_task_size;
+
extern int linux_main(int argc, char **argv);
extern void (*sig_info[])(int, struct uml_pt_regs *);
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 0b6b6273330..32c799e3a49 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -295,6 +295,9 @@ extern void maybe_sigio_broken(int fd, int read);
extern int os_arch_prctl(int pid, int code, unsigned long *addr);
/* tty.c */
-int get_pty(void);
+extern int get_pty(void);
+
+/* sys-$ARCH/task_size.c */
+extern unsigned long os_get_task_size(void);
#endif
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 76a62c0cb2b..f5d7f4569ba 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -25,7 +25,7 @@ void flush_thread(void)
ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data);
ret = ret || unmap(&current->mm->context.id, STUB_END,
- TASK_SIZE - STUB_END, 1, &data);
+ host_task_size - STUB_END, 1, &data);
if (ret) {
printk(KERN_ERR "flush_thread - clearing address space failed, "
"err = %d\n", ret);
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 468aba990db..a6c1dd1cf5a 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -241,6 +241,11 @@ static struct notifier_block panic_exit_notifier = {
};
/* Set during early boot */
+unsigned long task_size;
+EXPORT_SYMBOL(task_size);
+
+unsigned long host_task_size;
+
unsigned long brk_start;
unsigned long end_iomem;
EXPORT_SYMBOL(end_iomem);
@@ -267,6 +272,13 @@ int __init linux_main(int argc, char **argv)
if (have_root == 0)
add_arg(DEFAULT_COMMAND_LINE);
+ host_task_size = os_get_task_size();
+ /*
+ * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps
+ * out
+ */
+ task_size = host_task_size & PGDIR_MASK;
+
/* OS sanity checks that need to happen before the kernel runs */
os_early_checks();
@@ -303,7 +315,7 @@ int __init linux_main(int argc, char **argv)
highmem = 0;
iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
- max_physmem = CONFIG_TOP_ADDR - uml_physmem - iomem_size - MIN_VMALLOC;
+ max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;
/*
* Zones have to begin on a 1 << MAX_ORDER page boundary,
@@ -335,7 +347,7 @@ int __init linux_main(int argc, char **argv)
}
virtmem_size = physmem_size;
- avail = CONFIG_TOP_ADDR - start_vm;
+ avail = TASK_SIZE - start_vm;
if (physmem_size > avail)
virtmem_size = avail;
end_vm = start_vm + virtmem_size;
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile
index a841262c594..b4bc6ac4f30 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/um/os-Linux/sys-i386/Makefile
@@ -3,7 +3,7 @@
# Licensed under the GPL
#
-obj-y = registers.o signal.o tls.o
+obj-y = registers.o signal.o task_size.o tls.o
USER_OBJS := $(obj-y)
diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c
new file mode 100644
index 00000000000..48d211b3d9a
--- /dev/null
+++ b/arch/um/os-Linux/sys-i386/task_size.c
@@ -0,0 +1,120 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include "longjmp.h"
+#include "kern_constants.h"
+
+static jmp_buf buf;
+
+static void segfault(int sig)
+{
+ longjmp(buf, 1);
+}
+
+static int page_ok(unsigned long page)
+{
+ unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT);
+ unsigned long n = ~0UL;
+ void *mapped = NULL;
+ int ok = 0;
+
+ /*
+ * First see if the page is readable. If it is, it may still
+ * be a VDSO, so we go on to see if it's writable. If not
+ * then try mapping memory there. If that fails, then we're
+ * still in the kernel area. As a sanity check, we'll fail if
+ * the mmap succeeds, but gives us an address different from
+ * what we wanted.
+ */
+ if (setjmp(buf) == 0)
+ n = *address;
+ else {
+ mapped = mmap(address, UM_KERN_PAGE_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mapped == MAP_FAILED)
+ return 0;
+ if (mapped != address)
+ goto out;
+ }
+
+ /*
+ * Now, is it writeable? If so, then we're in user address
+ * space. If not, then try mprotecting it and try the write
+ * again.
+ */
+ if (setjmp(buf) == 0) {
+ *address = n;
+ ok = 1;
+ goto out;
+ } else if (mprotect(address, UM_KERN_PAGE_SIZE,
+ PROT_READ | PROT_WRITE) != 0)
+ goto out;
+
+ if (setjmp(buf) == 0) {
+ *address = n;
+ ok = 1;
+ }
+
+ out:
+ if (mapped != NULL)
+ munmap(mapped, UM_KERN_PAGE_SIZE);
+ return ok;
+}
+
+unsigned long os_get_task_size(void)
+{
+ struct sigaction sa, old;
+ unsigned long bottom = 0;
+ /*
+ * A 32-bit UML on a 64-bit host gets confused about the VDSO at
+ * 0xffffe000. It is mapped, is readable, can be reprotected writeable
+ * and written. However, exec discovers later that it can't be
+ * unmapped. So, just set the highest address to be checked to just
+ * below it. This might waste some address space on 4G/4G 32-bit
+ * hosts, but shouldn't hurt otherwise.
+ */
+ unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT;
+ unsigned long test;
+
+ printf("Locating the top of the address space ... ");
+ fflush(stdout);
+
+ /*
+ * We're going to be longjmping out of the signal handler, so
+ * SA_DEFER needs to be set.
+ */
+ sa.sa_handler = segfault;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_NODEFER;
+ sigaction(SIGSEGV, &sa, &old);
+
+ if (!page_ok(bottom)) {
+ fprintf(stderr, "Address 0x%x no good?\n",
+ bottom << UM_KERN_PAGE_SHIFT);
+ exit(1);
+ }
+
+ /* This could happen with a 4G/4G split */
+ if (page_ok(top))
+ goto out;
+
+ do {
+ test = bottom + (top - bottom) / 2;
+ if (page_ok(test))
+ bottom = test;
+ else
+ top = test;
+ } while (top - bottom > 1);
+
+out:
+ /* Restore the old SIGSEGV handling */
+ sigaction(SIGSEGV, &old, NULL);
+
+ top <<= UM_KERN_PAGE_SHIFT;
+ printf("0x%x\n", top);
+ fflush(stdout);
+
+ return top;
+}
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
index a42a4ef02e1..a44a47f8f57 100644
--- a/arch/um/os-Linux/sys-x86_64/Makefile
+++ b/arch/um/os-Linux/sys-x86_64/Makefile
@@ -3,7 +3,7 @@
# Licensed under the GPL
#
-obj-y = registers.o prctl.o signal.o
+obj-y = registers.o prctl.o signal.o task_size.o
USER_OBJS := $(obj-y)
diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c
new file mode 100644
index 00000000000..fad6f57f8ee
--- /dev/null
+++ b/arch/um/os-Linux/sys-x86_64/task_size.c
@@ -0,0 +1,5 @@
+unsigned long os_get_task_size(unsigned long shift)
+{
+ /* The old value of CONFIG_TOP_ADDR */
+ return 0x7fc0000000;
+}