From 742755a1d8ce2b548428f7aacf1758b4bba50080 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 23 Jun 2006 02:03:55 -0700 Subject: [PATCH] page migration: sys_move_pages(): support moving of individual pages move_pages() is used to move individual pages of a process. The function can be used to determine the location of pages and to move them onto the desired node. move_pages() returns status information for each page. long move_pages(pid, number_of_pages_to_move, addresses_of_pages[], nodes[] or NULL, status[], flags); The addresses of pages is an array of void * pointing to the pages to be moved. The nodes array contains the node numbers that the pages should be moved to. If a NULL is passed instead of an array then no pages are moved but the status array is updated. The status request may be used to determine the page state before issuing another move_pages() to move pages. The status array will contain the state of all individual page migration attempts when the function terminates. The status array is only valid if move_pages() completed successfullly. Possible page states in status[]: 0..MAX_NUMNODES The page is now on the indicated node. -ENOENT Page is not present -EACCES Page is mapped by multiple processes and can only be moved if MPOL_MF_MOVE_ALL is specified. -EPERM The page has been mlocked by a process/driver and cannot be moved. -EBUSY Page is busy and cannot be moved. Try again later. -EFAULT Invalid address (no VMA or zero page). -ENOMEM Unable to allocate memory on target node. -EIO Unable to write back page. The page must be written back in order to move it since the page is dirty and the filesystem does not provide a migration function that would allow the moving of dirty pages. -EINVAL A dirty page cannot be moved. The filesystem does not provide a migration function and has no ability to write back pages. The flags parameter indicates what types of pages to move: MPOL_MF_MOVE Move pages that are only mapped by the process. MPOL_MF_MOVE_ALL Also move pages that are mapped by multiple processes. Requires sufficient capabilities. Possible return codes from move_pages() -ENOENT No pages found that would require moving. All pages are either already on the target node, not present, had an invalid address or could not be moved because they were mapped by multiple processes. -EINVAL Flags other than MPOL_MF_MOVE(_ALL) specified or an attempt to migrate pages in a kernel thread. -EPERM MPOL_MF_MOVE_ALL specified without sufficient priviledges. or an attempt to move a process belonging to another user. -EACCES One of the target nodes is not allowed by the current cpuset. -ENODEV One of the target nodes is not online. -ESRCH Process does not exist. -E2BIG Too many pages to move. -ENOMEM Not enough memory to allocate control array. -EFAULT Parameters could not be accessed. A test program for move_pages() may be found with the patches on ftp.kernel.org:/pub/linux/kernel/people/christoph/pmig/patches-2.6.17-rc4-mm3 From: Christoph Lameter Detailed results for sys_move_pages() Pass a pointer to an integer to get_new_page() that may be used to indicate where the completion status of a migration operation should be placed. This allows sys_move_pags() to report back exactly what happened to each page. Wish there would be a better way to do this. Looks a bit hacky. Signed-off-by: Christoph Lameter Cc: Hugh Dickins Cc: Jes Sorensen Cc: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Cc: Andi Kleen Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/syscalls.h') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index bd67a4413df..7e3f2349091 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -516,6 +516,11 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *from, const unsigned long __user *to); +asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, + const void __user * __user *pages, + const int __user *nodes, + int __user *status, + int flags); asmlinkage long sys_mbind(unsigned long start, unsigned long len, unsigned long mode, unsigned long __user *nmask, -- cgit v1.2.3-70-g09d2 From 1b2db9fb7adc4d67d9ce7d16ce79c41ee84730fe Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 23 Jun 2006 02:03:56 -0700 Subject: [PATCH] sys_move_pages: 32bit support (i386, x86_64) sys_move_pages() support for 32bit (i386 plus x86_64 compat layer) Add support for move_pages() on i386 and also add the compat functions necessary to run 32 bit binaries on x86_64. Add compat_sys_move_pages to the x86_64 32bit binary layer. Note that it is not up to date so I added the missing pieces. Not sure if this is done the right way. [akpm@osdl.org: compile fix] Signed-off-by: Christoph Lameter Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/syscall_table.S | 1 + arch/x86_64/ia32/ia32entry.S | 1 + include/asm-i386/unistd.h | 3 ++- include/linux/syscalls.h | 5 +++++ kernel/compat.c | 23 +++++++++++++++++++++++ kernel/sys_ni.c | 1 + 6 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include/linux/syscalls.h') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index af56987f69b..dd63d477539 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -316,3 +316,4 @@ ENTRY(sys_call_table) .long sys_sync_file_range .long sys_tee /* 315 */ .long sys_vmsplice + .long sys_move_pages diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 5a92fed2d1d..4ec594ab1a9 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -696,4 +696,5 @@ ia32_sys_call_table: .quad sys_sync_file_range .quad sys_tee .quad compat_sys_vmsplice + .quad compat_sys_move_pages ia32_syscall_end: diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index de2ccc149e3..fc1c8ddae14 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -322,10 +322,11 @@ #define __NR_sync_file_range 314 #define __NR_tee 315 #define __NR_vmsplice 316 +#define __NR_move_pages 317 #ifdef __KERNEL__ -#define NR_syscalls 317 +#define NR_syscalls 318 /* * user-visible error numbers are in the range -1 - -128: see diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7e3f2349091..e42738c6916 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -521,6 +521,11 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags); +asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, + void __user *pages, + const int __user *nodes, + int __user *status, + int flags); asmlinkage long sys_mbind(unsigned long start, unsigned long len, unsigned long mode, unsigned long __user *nmask, diff --git a/kernel/compat.c b/kernel/compat.c index c1601a84f8d..ccea93e2895 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -934,3 +935,25 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) return ret; } + +#ifdef CONFIG_NUMA +asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, + void __user *pages32, + const int __user *nodes, + int __user *status, + int flags) +{ + const void __user * __user *pages; + int i; + + pages = compat_alloc_user_space(nr_pages * sizeof(void *)); + for (i = 0; i < nr_pages; i++) { + compat_uptr_t p; + + if (get_user(p, (compat_uptr_t *)(pages32 + i)) || + put_user(compat_ptr(p), pages + i)) + return -EFAULT; + } + return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); +} +#endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 597229749de..6991bece67e 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -133,3 +133,4 @@ cond_syscall(sys_mincore); cond_syscall(sys_madvise); cond_syscall(sys_mremap); cond_syscall(sys_remap_file_pages); +cond_syscall(compat_sys_move_pages); -- cgit v1.2.3-70-g09d2 From 9216dfad4fc97ab639ef0885efc713f3d7a20d5b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 23 Jun 2006 02:03:57 -0700 Subject: [PATCH] move_pages: fix 32 -> 64 bit compat function The definition of the third parameter is a pointer to an array of virtual addresses which give us some trouble. The existing code calculated the wrong address in the array since I used void to avoid having to specify a type. I now use the correct type "compat_uptr_t __user *" in the definition of the function in kernel/compat.c. However, I used __u32 in syscalls.h. Would have to include compat.h there in order to provide the same definition which would generate an ugly include situation. On both ia64 and x86_64 compat_uptr_t is u32. So this works although parameter declarations differ. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 2 +- kernel/compat.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/syscalls.h') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e42738c6916..33785b79d54 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -522,7 +522,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, int __user *status, int flags); asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, - void __user *pages, + __u32 __user *pages, const int __user *nodes, int __user *status, int flags); diff --git a/kernel/compat.c b/kernel/compat.c index ccea93e2895..2f672332430 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -938,7 +938,7 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) #ifdef CONFIG_NUMA asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, - void __user *pages32, + compat_uptr_t __user *pages32, const int __user *nodes, int __user *status, int flags) @@ -950,7 +950,7 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, for (i = 0; i < nr_pages; i++) { compat_uptr_t p; - if (get_user(p, (compat_uptr_t *)(pages32 + i)) || + if (get_user(p, pages32 + i) || put_user(compat_ptr(p), pages + i)) return -EFAULT; } -- cgit v1.2.3-70-g09d2