From bcb71abe7d4c5a0d0368c67da0a7def4fc73497a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Oct 2011 15:56:24 -0400 Subject: iommu: Add option to group multi-function devices The option iommu=group_mf indicates the that the iommu driver should expose all functions of a multi-function PCI device as the same iommu_device_group. This is useful for disallowing individual functions being exposed as independent devices to userspace as there are often hidden dependencies. Virtual functions are not affected by this option. Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel --- arch/x86/kernel/pci-dma.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 80dc793b3f6..1c4d769e21e 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -45,6 +45,15 @@ int iommu_detected __read_mostly = 0; */ int iommu_pass_through __read_mostly; +/* + * Group multi-function PCI devices into a single device-group for the + * iommu_device_group interface. This tells the iommu driver to pretend + * it cannot distinguish between functions of a device, exposing only one + * group for the device. Useful for disallowing use of individual PCI + * functions from userspace drivers. + */ +int iommu_group_mf __read_mostly; + extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; /* Dummy device used for NULL arguments (normally ISA). */ @@ -169,6 +178,8 @@ static __init int iommu_setup(char *p) #endif if (!strncmp(p, "pt", 2)) iommu_pass_through = 1; + if (!strncmp(p, "group_mf", 8)) + iommu_group_mf = 1; gart_parse_options(p); -- cgit v1.2.3-70-g09d2 From b82e324b3c46a554595c12b45465d1943a57326c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 10 Nov 2011 13:18:09 +0000 Subject: serial, mfd: don't hardcode the console Add support to specify which HSU port to use as an early console. This can be selected by passing "earlyprintk=hsu" on the kernel command line. By default port 0 is still used. Signed-off-by: Mika Westerberg Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mrst.h | 2 +- arch/x86/kernel/early_printk.c | 2 +- arch/x86/platform/mrst/early_printk_mrst.c | 16 ++++++++++++---- drivers/tty/serial/mfd.c | 18 +++++++----------- 4 files changed, 21 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 719f00b28ff..470776039af 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -51,7 +51,7 @@ extern struct console early_mrst_console; extern void mrst_early_console_init(void); extern struct console early_hsu_console; -extern void hsu_early_console_init(void); +extern void hsu_early_console_init(const char *); extern void intel_scu_devices_create(void); extern void intel_scu_devices_destroy(void); diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index cd28a350f7f..9d42a52d233 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -247,7 +247,7 @@ static int __init setup_early_printk(char *buf) } if (!strncmp(buf, "hsu", 3)) { - hsu_early_console_init(); + hsu_early_console_init(buf + 3); early_console_register(&early_hsu_console, keep); } #endif diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c index 25bfdbb5b13..3c6e328483c 100644 --- a/arch/x86/platform/mrst/early_printk_mrst.c +++ b/arch/x86/platform/mrst/early_printk_mrst.c @@ -245,16 +245,24 @@ struct console early_mrst_console = { * Following is the early console based on Medfield HSU (High * Speed UART) device. */ -#define HSU_PORT2_PADDR 0xffa28180 +#define HSU_PORT_BASE 0xffa28080 static void __iomem *phsu; -void hsu_early_console_init(void) +void hsu_early_console_init(const char *s) { + unsigned long paddr, port = 0; u8 lcr; - phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, - HSU_PORT2_PADDR); + /* + * Select the early HSU console port if specified by user in the + * kernel command line. + */ + if (*s && !kstrtoul(s, 10, &port)) + port = clamp_val(port, 0, 2); + + paddr = HSU_PORT_BASE + port * 0x80; + phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr); /* Disable FIFO */ writeb(0x0, phsu + UART_FCR); diff --git a/drivers/tty/serial/mfd.c b/drivers/tty/serial/mfd.c index 286c386d9c4..565f3fe3dd7 100644 --- a/drivers/tty/serial/mfd.c +++ b/drivers/tty/serial/mfd.c @@ -1156,7 +1156,6 @@ serial_hsu_console_setup(struct console *co, char *options) int bits = 8; int parity = 'n'; int flow = 'n'; - int ret; if (co->index == -1 || co->index >= serial_hsu_reg.nr) co->index = 0; @@ -1167,9 +1166,7 @@ serial_hsu_console_setup(struct console *co, char *options) if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); - ret = uart_set_options(&up->port, co, baud, parity, bits, flow); - - return ret; + return uart_set_options(&up->port, co, baud, parity, bits, flow); } static struct console serial_hsu_console = { @@ -1178,9 +1175,13 @@ static struct console serial_hsu_console = { .device = uart_console_device, .setup = serial_hsu_console_setup, .flags = CON_PRINTBUFFER, - .index = 2, + .index = -1, .data = &serial_hsu_reg, }; + +#define SERIAL_HSU_CONSOLE (&serial_hsu_console) +#else +#define SERIAL_HSU_CONSOLE NULL #endif struct uart_ops serial_hsu_pops = { @@ -1210,6 +1211,7 @@ static struct uart_driver serial_hsu_reg = { .major = TTY_MAJOR, .minor = 128, .nr = 3, + .cons = SERIAL_HSU_CONSOLE, }; #ifdef CONFIG_PM @@ -1344,12 +1346,6 @@ static int serial_hsu_probe(struct pci_dev *pdev, } uart_add_one_port(&serial_hsu_reg, &uport->port); -#ifdef CONFIG_SERIAL_MFD_HSU_CONSOLE - if (index == 2) { - register_console(&serial_hsu_console); - uport->port.cons = &serial_hsu_console; - } -#endif pci_set_drvdata(pdev, uport); } -- cgit v1.2.3-70-g09d2 From b7641d2c83aa10031bf45afd82619bfaaedcbc6f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 11 Nov 2011 15:43:02 -0800 Subject: x86-64, syscall: Adjust comment spacing and remove typo Adjust spacing for comment so that it matches the multiline comment style used in the rest of the kernel, and remove word duplication. It is not really clear what version of gcc this refers to, but the extra & doesn't cause any harm, so there is no reason to remove it. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/syscall_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index de87d600829..0edfafa1b26 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -21,9 +21,9 @@ extern void sys_ni_syscall(void); const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* - *Smells like a like a compiler bug -- it doesn't work - *when the & below is removed. - */ + * Smells like a compiler bug -- it doesn't work + * when the & below is removed. + */ [0 ... __NR_syscall_max] = &sys_ni_syscall, #include }; -- cgit v1.2.3-70-g09d2 From 303395ac3bf3e2cb488435537d416bc840438fcb Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 11 Nov 2011 16:07:41 -0800 Subject: x86: Generate system call tables and unistd_*.h from tables Generate system call tables and unistd_*.h automatically from the tables in arch/x86/syscalls. All other information, like NR_syscalls, is auto-generated, some of which is in asm-offsets_*.c. This allows us to keep all the system call information in one place, and allows for kernel space and user space to see different information; this is currently used for the ia32 system call numbers when building the 64-bit kernel, but will be used by the x32 ABI in the near future. This also removes some gratuitious differences between i386, x86-64 and ia32; in particular, now all system call tables are generated with the same mechanism. Cc: H. J. Lu Cc: Sam Ravnborg Cc: Michal Marek Signed-off-by: H. Peter Anvin --- arch/x86/Makefile | 6 + arch/x86/ia32/Makefile | 2 +- arch/x86/ia32/ia32entry.S | 356 ------------------ arch/x86/ia32/syscall_ia32.c | 25 ++ arch/x86/include/asm/Kbuild | 5 +- arch/x86/include/asm/ia32_unistd.h | 13 +- arch/x86/include/asm/unistd.h | 54 ++- arch/x86/include/asm/unistd_32.h | 401 -------------------- arch/x86/include/asm/unistd_64.h | 732 ------------------------------------- arch/x86/kernel/Makefile | 3 +- arch/x86/kernel/asm-offsets_32.c | 8 + arch/x86/kernel/asm-offsets_64.c | 19 +- arch/x86/kernel/entry_32.S | 37 +- arch/x86/kernel/syscall_32.c | 25 ++ arch/x86/kernel/syscall_64.c | 14 +- arch/x86/kernel/syscall_table_32.S | 350 ------------------ 16 files changed, 154 insertions(+), 1896 deletions(-) create mode 100644 arch/x86/ia32/syscall_ia32.c delete mode 100644 arch/x86/include/asm/unistd_32.h delete mode 100644 arch/x86/include/asm/unistd_64.h create mode 100644 arch/x86/kernel/syscall_32.c delete mode 100644 arch/x86/kernel/syscall_table_32.S (limited to 'arch/x86/kernel') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b02e509072a..209ba129459 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -117,6 +117,12 @@ KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) +### +# Syscall table generation + +archheaders: + $(Q)$(MAKE) $(build)=arch/x86/syscalls all + ### # Kernel objects diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile index eea9a1c77d3..455646e0e53 100644 --- a/arch/x86/ia32/Makefile +++ b/arch/x86/ia32/Makefile @@ -3,7 +3,7 @@ # obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o -obj-$(CONFIG_IA32_EMULATION) += nosyscall.o +obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o sysv-$(CONFIG_SYSVIPC) := ipc32.o obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 59538a77769..72f853aea47 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -27,8 +27,6 @@ .section .entry.text, "ax" -#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) - .macro IA32_ARG_FIXUP noebp=0 movl %edi,%r8d .if \noebp @@ -496,357 +494,3 @@ ENTRY(ia32_ptregs_common) jmp ia32_sysret /* misbalances the return cache */ CFI_ENDPROC END(ia32_ptregs_common) - - .section .rodata,"a" - .align 8 -ia32_sys_call_table: - .quad sys_restart_syscall - .quad sys_exit - .quad stub32_fork - .quad sys_read - .quad sys_write - .quad compat_sys_open /* 5 */ - .quad sys_close - .quad sys32_waitpid - .quad sys_creat - .quad sys_link - .quad sys_unlink /* 10 */ - .quad stub32_execve - .quad sys_chdir - .quad compat_sys_time - .quad sys_mknod - .quad sys_chmod /* 15 */ - .quad sys_lchown16 - .quad quiet_ni_syscall /* old break syscall holder */ - .quad sys_stat - .quad sys32_lseek - .quad sys_getpid /* 20 */ - .quad compat_sys_mount /* mount */ - .quad sys_oldumount /* old_umount */ - .quad sys_setuid16 - .quad sys_getuid16 - .quad compat_sys_stime /* stime */ /* 25 */ - .quad compat_sys_ptrace /* ptrace */ - .quad sys_alarm - .quad sys_fstat /* (old)fstat */ - .quad sys_pause - .quad compat_sys_utime /* 30 */ - .quad quiet_ni_syscall /* old stty syscall holder */ - .quad quiet_ni_syscall /* old gtty syscall holder */ - .quad sys_access - .quad sys_nice - .quad quiet_ni_syscall /* 35 */ /* old ftime syscall holder */ - .quad sys_sync - .quad sys32_kill - .quad sys_rename - .quad sys_mkdir - .quad sys_rmdir /* 40 */ - .quad sys_dup - .quad sys_pipe - .quad compat_sys_times - .quad quiet_ni_syscall /* old prof syscall holder */ - .quad sys_brk /* 45 */ - .quad sys_setgid16 - .quad sys_getgid16 - .quad sys_signal - .quad sys_geteuid16 - .quad sys_getegid16 /* 50 */ - .quad sys_acct - .quad sys_umount /* new_umount */ - .quad quiet_ni_syscall /* old lock syscall holder */ - .quad compat_sys_ioctl - .quad compat_sys_fcntl64 /* 55 */ - .quad quiet_ni_syscall /* old mpx syscall holder */ - .quad sys_setpgid - .quad quiet_ni_syscall /* old ulimit syscall holder */ - .quad sys_olduname - .quad sys_umask /* 60 */ - .quad sys_chroot - .quad compat_sys_ustat - .quad sys_dup2 - .quad sys_getppid - .quad sys_getpgrp /* 65 */ - .quad sys_setsid - .quad sys32_sigaction - .quad sys_sgetmask - .quad sys_ssetmask - .quad sys_setreuid16 /* 70 */ - .quad sys_setregid16 - .quad sys32_sigsuspend - .quad compat_sys_sigpending - .quad sys_sethostname - .quad compat_sys_setrlimit /* 75 */ - .quad compat_sys_old_getrlimit /* old_getrlimit */ - .quad compat_sys_getrusage - .quad compat_sys_gettimeofday - .quad compat_sys_settimeofday - .quad sys_getgroups16 /* 80 */ - .quad sys_setgroups16 - .quad compat_sys_old_select - .quad sys_symlink - .quad sys_lstat - .quad sys_readlink /* 85 */ - .quad sys_uselib - .quad sys_swapon - .quad sys_reboot - .quad compat_sys_old_readdir - .quad sys32_mmap /* 90 */ - .quad sys_munmap - .quad sys_truncate - .quad sys_ftruncate - .quad sys_fchmod - .quad sys_fchown16 /* 95 */ - .quad sys_getpriority - .quad sys_setpriority - .quad quiet_ni_syscall /* old profil syscall holder */ - .quad compat_sys_statfs - .quad compat_sys_fstatfs /* 100 */ - .quad sys_ioperm - .quad compat_sys_socketcall - .quad sys_syslog - .quad compat_sys_setitimer - .quad compat_sys_getitimer /* 105 */ - .quad compat_sys_newstat - .quad compat_sys_newlstat - .quad compat_sys_newfstat - .quad sys_uname - .quad stub32_iopl /* 110 */ - .quad sys_vhangup - .quad quiet_ni_syscall /* old "idle" system call */ - .quad sys32_vm86_warning /* vm86old */ - .quad compat_sys_wait4 - .quad sys_swapoff /* 115 */ - .quad compat_sys_sysinfo - .quad sys32_ipc - .quad sys_fsync - .quad stub32_sigreturn - .quad stub32_clone /* 120 */ - .quad sys_setdomainname - .quad sys_newuname - .quad sys_modify_ldt - .quad compat_sys_adjtimex - .quad sys32_mprotect /* 125 */ - .quad compat_sys_sigprocmask - .quad quiet_ni_syscall /* create_module */ - .quad sys_init_module - .quad sys_delete_module - .quad quiet_ni_syscall /* 130 get_kernel_syms */ - .quad sys32_quotactl - .quad sys_getpgid - .quad sys_fchdir - .quad quiet_ni_syscall /* bdflush */ - .quad sys_sysfs /* 135 */ - .quad sys_personality - .quad quiet_ni_syscall /* for afs_syscall */ - .quad sys_setfsuid16 - .quad sys_setfsgid16 - .quad sys_llseek /* 140 */ - .quad compat_sys_getdents - .quad compat_sys_select - .quad sys_flock - .quad sys_msync - .quad compat_sys_readv /* 145 */ - .quad compat_sys_writev - .quad sys_getsid - .quad sys_fdatasync - .quad compat_sys_sysctl /* sysctl */ - .quad sys_mlock /* 150 */ - .quad sys_munlock - .quad sys_mlockall - .quad sys_munlockall - .quad sys_sched_setparam - .quad sys_sched_getparam /* 155 */ - .quad sys_sched_setscheduler - .quad sys_sched_getscheduler - .quad sys_sched_yield - .quad sys_sched_get_priority_max - .quad sys_sched_get_priority_min /* 160 */ - .quad sys32_sched_rr_get_interval - .quad compat_sys_nanosleep - .quad sys_mremap - .quad sys_setresuid16 - .quad sys_getresuid16 /* 165 */ - .quad sys32_vm86_warning /* vm86 */ - .quad quiet_ni_syscall /* query_module */ - .quad sys_poll - .quad quiet_ni_syscall /* old nfsservctl */ - .quad sys_setresgid16 /* 170 */ - .quad sys_getresgid16 - .quad sys_prctl - .quad stub32_rt_sigreturn - .quad sys32_rt_sigaction - .quad sys32_rt_sigprocmask /* 175 */ - .quad sys32_rt_sigpending - .quad compat_sys_rt_sigtimedwait - .quad sys32_rt_sigqueueinfo - .quad sys_rt_sigsuspend - .quad sys32_pread /* 180 */ - .quad sys32_pwrite - .quad sys_chown16 - .quad sys_getcwd - .quad sys_capget - .quad sys_capset - .quad stub32_sigaltstack - .quad sys32_sendfile - .quad quiet_ni_syscall /* streams1 */ - .quad quiet_ni_syscall /* streams2 */ - .quad stub32_vfork /* 190 */ - .quad compat_sys_getrlimit - .quad sys_mmap_pgoff - .quad sys32_truncate64 - .quad sys32_ftruncate64 - .quad sys32_stat64 /* 195 */ - .quad sys32_lstat64 - .quad sys32_fstat64 - .quad sys_lchown - .quad sys_getuid - .quad sys_getgid /* 200 */ - .quad sys_geteuid - .quad sys_getegid - .quad sys_setreuid - .quad sys_setregid - .quad sys_getgroups /* 205 */ - .quad sys_setgroups - .quad sys_fchown - .quad sys_setresuid - .quad sys_getresuid - .quad sys_setresgid /* 210 */ - .quad sys_getresgid - .quad sys_chown - .quad sys_setuid - .quad sys_setgid - .quad sys_setfsuid /* 215 */ - .quad sys_setfsgid - .quad sys_pivot_root - .quad sys_mincore - .quad sys_madvise - .quad compat_sys_getdents64 /* 220 getdents64 */ - .quad compat_sys_fcntl64 - .quad quiet_ni_syscall /* tux */ - .quad quiet_ni_syscall /* security */ - .quad sys_gettid - .quad sys32_readahead /* 225 */ - .quad sys_setxattr - .quad sys_lsetxattr - .quad sys_fsetxattr - .quad sys_getxattr - .quad sys_lgetxattr /* 230 */ - .quad sys_fgetxattr - .quad sys_listxattr - .quad sys_llistxattr - .quad sys_flistxattr - .quad sys_removexattr /* 235 */ - .quad sys_lremovexattr - .quad sys_fremovexattr - .quad sys_tkill - .quad sys_sendfile64 - .quad compat_sys_futex /* 240 */ - .quad compat_sys_sched_setaffinity - .quad compat_sys_sched_getaffinity - .quad sys_set_thread_area - .quad sys_get_thread_area - .quad compat_sys_io_setup /* 245 */ - .quad sys_io_destroy - .quad compat_sys_io_getevents - .quad compat_sys_io_submit - .quad sys_io_cancel - .quad sys32_fadvise64 /* 250 */ - .quad quiet_ni_syscall /* free_huge_pages */ - .quad sys_exit_group - .quad sys32_lookup_dcookie - .quad sys_epoll_create - .quad sys_epoll_ctl /* 255 */ - .quad sys_epoll_wait - .quad sys_remap_file_pages - .quad sys_set_tid_address - .quad compat_sys_timer_create - .quad compat_sys_timer_settime /* 260 */ - .quad compat_sys_timer_gettime - .quad sys_timer_getoverrun - .quad sys_timer_delete - .quad compat_sys_clock_settime - .quad compat_sys_clock_gettime /* 265 */ - .quad compat_sys_clock_getres - .quad compat_sys_clock_nanosleep - .quad compat_sys_statfs64 - .quad compat_sys_fstatfs64 - .quad sys_tgkill /* 270 */ - .quad compat_sys_utimes - .quad sys32_fadvise64_64 - .quad quiet_ni_syscall /* sys_vserver */ - .quad sys_mbind - .quad compat_sys_get_mempolicy /* 275 */ - .quad sys_set_mempolicy - .quad compat_sys_mq_open - .quad sys_mq_unlink - .quad compat_sys_mq_timedsend - .quad compat_sys_mq_timedreceive /* 280 */ - .quad compat_sys_mq_notify - .quad compat_sys_mq_getsetattr - .quad compat_sys_kexec_load /* reserved for kexec */ - .quad compat_sys_waitid - .quad quiet_ni_syscall /* 285: sys_altroot */ - .quad sys_add_key - .quad sys_request_key - .quad sys_keyctl - .quad sys_ioprio_set - .quad sys_ioprio_get /* 290 */ - .quad sys_inotify_init - .quad sys_inotify_add_watch - .quad sys_inotify_rm_watch - .quad sys_migrate_pages - .quad compat_sys_openat /* 295 */ - .quad sys_mkdirat - .quad sys_mknodat - .quad sys_fchownat - .quad compat_sys_futimesat - .quad sys32_fstatat /* 300 */ - .quad sys_unlinkat - .quad sys_renameat - .quad sys_linkat - .quad sys_symlinkat - .quad sys_readlinkat /* 305 */ - .quad sys_fchmodat - .quad sys_faccessat - .quad compat_sys_pselect6 - .quad compat_sys_ppoll - .quad sys_unshare /* 310 */ - .quad compat_sys_set_robust_list - .quad compat_sys_get_robust_list - .quad sys_splice - .quad sys32_sync_file_range - .quad sys_tee /* 315 */ - .quad compat_sys_vmsplice - .quad compat_sys_move_pages - .quad sys_getcpu - .quad sys_epoll_pwait - .quad compat_sys_utimensat /* 320 */ - .quad compat_sys_signalfd - .quad sys_timerfd_create - .quad sys_eventfd - .quad sys32_fallocate - .quad compat_sys_timerfd_settime /* 325 */ - .quad compat_sys_timerfd_gettime - .quad compat_sys_signalfd4 - .quad sys_eventfd2 - .quad sys_epoll_create1 - .quad sys_dup3 /* 330 */ - .quad sys_pipe2 - .quad sys_inotify_init1 - .quad compat_sys_preadv - .quad compat_sys_pwritev - .quad compat_sys_rt_tgsigqueueinfo /* 335 */ - .quad sys_perf_event_open - .quad compat_sys_recvmmsg - .quad sys_fanotify_init - .quad sys32_fanotify_mark - .quad sys_prlimit64 /* 340 */ - .quad sys_name_to_handle_at - .quad compat_sys_open_by_handle_at - .quad compat_sys_clock_adjtime - .quad sys_syncfs - .quad compat_sys_sendmmsg /* 345 */ - .quad sys_setns - .quad compat_sys_process_vm_readv - .quad compat_sys_process_vm_writev -ia32_syscall_end: diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c new file mode 100644 index 00000000000..d04d3dbc47d --- /dev/null +++ b/arch/x86/ia32/syscall_ia32.c @@ -0,0 +1,25 @@ +/* System call table for ia32 emulation. */ + +#include +#include +#include +#include + +#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ; +#include +#undef __SYSCALL_I386 + +#define __SYSCALL_I386(nr, sym, compat) [nr] = compat, + +typedef void (*sys_call_ptr_t)(void); + +extern void compat_ni_syscall(void); + +const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { + /* + * Smells like a like a compiler bug -- it doesn't work + * when the & below is removed. + */ + [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall, +#include +}; diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 6fa90a845e4..b57e6a43a37 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -19,7 +19,8 @@ header-y += processor-flags.h header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h -header-y += unistd_32.h -header-y += unistd_64.h header-y += vm86.h header-y += vsyscall.h + +genhdr-y += unistd_32.h +genhdr-y += unistd_64.h diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h index 976f6ecd2ce..b0d5716ca1e 100644 --- a/arch/x86/include/asm/ia32_unistd.h +++ b/arch/x86/include/asm/ia32_unistd.h @@ -2,17 +2,10 @@ #define _ASM_X86_IA32_UNISTD_H /* - * This file contains the system call numbers of the ia32 port, + * This file contains the system call numbers of the ia32 compat ABI, * this is for the kernel only. - * Only add syscalls here where some part of the kernel needs to know - * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK */ - -#define __NR_ia32_restart_syscall 0 -#define __NR_ia32_exit 1 -#define __NR_ia32_read 3 -#define __NR_ia32_write 4 -#define __NR_ia32_sigreturn 119 -#define __NR_ia32_rt_sigreturn 173 +#define __SYSCALL_ia32_NR(x) (x) +#include #endif /* _ASM_X86_IA32_UNISTD_H */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 2a58ed3e51d..b4a3db7ce14 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -1,13 +1,59 @@ +#ifndef _ASM_X86_UNISTD_H +#define _ASM_X86_UNISTD_H 1 + #ifdef __KERNEL__ # ifdef CONFIG_X86_32 -# include "unistd_32.h" + +# include +# define __ARCH_WANT_IPC_PARSE_VERSION +# define __ARCH_WANT_STAT64 +# define __ARCH_WANT_SYS_OLD_MMAP +# define __ARCH_WANT_SYS_OLD_SELECT + # else -# include "unistd_64.h" + +# include +# define __ARCH_WANT_COMPAT_SYS_TIME + # endif + +# define __ARCH_WANT_OLD_READDIR +# define __ARCH_WANT_OLD_STAT +# define __ARCH_WANT_SYS_ALARM +# define __ARCH_WANT_SYS_FADVISE64 +# define __ARCH_WANT_SYS_GETHOSTNAME +# define __ARCH_WANT_SYS_GETPGRP +# define __ARCH_WANT_SYS_LLSEEK +# define __ARCH_WANT_SYS_NICE +# define __ARCH_WANT_SYS_OLDUMOUNT +# define __ARCH_WANT_SYS_OLD_GETRLIMIT +# define __ARCH_WANT_SYS_OLD_UNAME +# define __ARCH_WANT_SYS_PAUSE +# define __ARCH_WANT_SYS_RT_SIGACTION +# define __ARCH_WANT_SYS_RT_SIGSUSPEND +# define __ARCH_WANT_SYS_SGETMASK +# define __ARCH_WANT_SYS_SIGNAL +# define __ARCH_WANT_SYS_SIGPENDING +# define __ARCH_WANT_SYS_SIGPROCMASK +# define __ARCH_WANT_SYS_SOCKETCALL +# define __ARCH_WANT_SYS_TIME +# define __ARCH_WANT_SYS_UTIME +# define __ARCH_WANT_SYS_WAITPID + +/* + * "Conditional" syscalls + * + * What we want is __attribute__((weak,alias("sys_ni_syscall"))), + * but it doesn't work on all toolchains, so we just do it by hand + */ +# define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") + #else # ifdef __i386__ -# include "unistd_32.h" +# include # else -# include "unistd_64.h" +# include # endif #endif + +#endif /* _ASM_X86_UNISTD_H */ diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h deleted file mode 100644 index 599c77d38f3..00000000000 --- a/arch/x86/include/asm/unistd_32.h +++ /dev/null @@ -1,401 +0,0 @@ -#ifndef _ASM_X86_UNISTD_32_H -#define _ASM_X86_UNISTD_32_H - -/* - * This file contains the system call numbers. - */ - -#define __NR_restart_syscall 0 -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_waitpid 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_time 13 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_lchown 16 -#define __NR_break 17 -#define __NR_oldstat 18 -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_oldfstat 28 -#define __NR_pause 29 -#define __NR_utime 30 -#define __NR_stty 31 -#define __NR_gtty 32 -#define __NR_access 33 -#define __NR_nice 34 -#define __NR_ftime 35 -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -#define __NR_prof 44 -#define __NR_brk 45 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_signal 48 -#define __NR_geteuid 49 -#define __NR_getegid 50 -#define __NR_acct 51 -#define __NR_umount2 52 -#define __NR_lock 53 -#define __NR_ioctl 54 -#define __NR_fcntl 55 -#define __NR_mpx 56 -#define __NR_setpgid 57 -#define __NR_ulimit 58 -#define __NR_oldolduname 59 -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -#define __NR_sigaction 67 -#define __NR_sgetmask 68 -#define __NR_ssetmask 69 -#define __NR_setreuid 70 -#define __NR_setregid 71 -#define __NR_sigsuspend 72 -#define __NR_sigpending 73 -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -#define __NR_getrlimit 76 /* Back compatible 2Gig limited rlimit */ -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -#define __NR_select 82 -#define __NR_symlink 83 -#define __NR_oldlstat 84 -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -#define __NR_readdir 89 -#define __NR_mmap 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_fchown 95 -#define __NR_getpriority 96 -#define __NR_setpriority 97 -#define __NR_profil 98 -#define __NR_statfs 99 -#define __NR_fstatfs 100 -#define __NR_ioperm 101 -#define __NR_socketcall 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_stat 106 -#define __NR_lstat 107 -#define __NR_fstat 108 -#define __NR_olduname 109 -#define __NR_iopl 110 -#define __NR_vhangup 111 -#define __NR_idle 112 -#define __NR_vm86old 113 -#define __NR_wait4 114 -#define __NR_swapoff 115 -#define __NR_sysinfo 116 -#define __NR_ipc 117 -#define __NR_fsync 118 -#define __NR_sigreturn 119 -#define __NR_clone 120 -#define __NR_setdomainname 121 -#define __NR_uname 122 -#define __NR_modify_ldt 123 -#define __NR_adjtimex 124 -#define __NR_mprotect 125 -#define __NR_sigprocmask 126 -#define __NR_create_module 127 -#define __NR_init_module 128 -#define __NR_delete_module 129 -#define __NR_get_kernel_syms 130 -#define __NR_quotactl 131 -#define __NR_getpgid 132 -#define __NR_fchdir 133 -#define __NR_bdflush 134 -#define __NR_sysfs 135 -#define __NR_personality 136 -#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#define __NR__llseek 140 -#define __NR_getdents 141 -#define __NR__newselect 142 -#define __NR_flock 143 -#define __NR_msync 144 -#define __NR_readv 145 -#define __NR_writev 146 -#define __NR_getsid 147 -#define __NR_fdatasync 148 -#define __NR__sysctl 149 -#define __NR_mlock 150 -#define __NR_munlock 151 -#define __NR_mlockall 152 -#define __NR_munlockall 153 -#define __NR_sched_setparam 154 -#define __NR_sched_getparam 155 -#define __NR_sched_setscheduler 156 -#define __NR_sched_getscheduler 157 -#define __NR_sched_yield 158 -#define __NR_sched_get_priority_max 159 -#define __NR_sched_get_priority_min 160 -#define __NR_sched_rr_get_interval 161 -#define __NR_nanosleep 162 -#define __NR_mremap 163 -#define __NR_setresuid 164 -#define __NR_getresuid 165 -#define __NR_vm86 166 -#define __NR_query_module 167 -#define __NR_poll 168 -#define __NR_nfsservctl 169 -#define __NR_setresgid 170 -#define __NR_getresgid 171 -#define __NR_prctl 172 -#define __NR_rt_sigreturn 173 -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 -#define __NR_rt_sigpending 176 -#define __NR_rt_sigtimedwait 177 -#define __NR_rt_sigqueueinfo 178 -#define __NR_rt_sigsuspend 179 -#define __NR_pread64 180 -#define __NR_pwrite64 181 -#define __NR_chown 182 -#define __NR_getcwd 183 -#define __NR_capget 184 -#define __NR_capset 185 -#define __NR_sigaltstack 186 -#define __NR_sendfile 187 -#define __NR_getpmsg 188 /* some people actually want streams */ -#define __NR_putpmsg 189 /* some people actually want streams */ -#define __NR_vfork 190 -#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ -#define __NR_mmap2 192 -#define __NR_truncate64 193 -#define __NR_ftruncate64 194 -#define __NR_stat64 195 -#define __NR_lstat64 196 -#define __NR_fstat64 197 -#define __NR_lchown32 198 -#define __NR_getuid32 199 -#define __NR_getgid32 200 -#define __NR_geteuid32 201 -#define __NR_getegid32 202 -#define __NR_setreuid32 203 -#define __NR_setregid32 204 -#define __NR_getgroups32 205 -#define __NR_setgroups32 206 -#define __NR_fchown32 207 -#define __NR_setresuid32 208 -#define __NR_getresuid32 209 -#define __NR_setresgid32 210 -#define __NR_getresgid32 211 -#define __NR_chown32 212 -#define __NR_setuid32 213 -#define __NR_setgid32 214 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 -#define __NR_pivot_root 217 -#define __NR_mincore 218 -#define __NR_madvise 219 -#define __NR_madvise1 219 /* delete when C lib stub is removed */ -#define __NR_getdents64 220 -#define __NR_fcntl64 221 -/* 223 is unused */ -#define __NR_gettid 224 -#define __NR_readahead 225 -#define __NR_setxattr 226 -#define __NR_lsetxattr 227 -#define __NR_fsetxattr 228 -#define __NR_getxattr 229 -#define __NR_lgetxattr 230 -#define __NR_fgetxattr 231 -#define __NR_listxattr 232 -#define __NR_llistxattr 233 -#define __NR_flistxattr 234 -#define __NR_removexattr 235 -#define __NR_lremovexattr 236 -#define __NR_fremovexattr 237 -#define __NR_tkill 238 -#define __NR_sendfile64 239 -#define __NR_futex 240 -#define __NR_sched_setaffinity 241 -#define __NR_sched_getaffinity 242 -#define __NR_set_thread_area 243 -#define __NR_get_thread_area 244 -#define __NR_io_setup 245 -#define __NR_io_destroy 246 -#define __NR_io_getevents 247 -#define __NR_io_submit 248 -#define __NR_io_cancel 249 -#define __NR_fadvise64 250 -/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ -#define __NR_exit_group 252 -#define __NR_lookup_dcookie 253 -#define __NR_epoll_create 254 -#define __NR_epoll_ctl 255 -#define __NR_epoll_wait 256 -#define __NR_remap_file_pages 257 -#define __NR_set_tid_address 258 -#define __NR_timer_create 259 -#define __NR_timer_settime (__NR_timer_create+1) -#define __NR_timer_gettime (__NR_timer_create+2) -#define __NR_timer_getoverrun (__NR_timer_create+3) -#define __NR_timer_delete (__NR_timer_create+4) -#define __NR_clock_settime (__NR_timer_create+5) -#define __NR_clock_gettime (__NR_timer_create+6) -#define __NR_clock_getres (__NR_timer_create+7) -#define __NR_clock_nanosleep (__NR_timer_create+8) -#define __NR_statfs64 268 -#define __NR_fstatfs64 269 -#define __NR_tgkill 270 -#define __NR_utimes 271 -#define __NR_fadvise64_64 272 -#define __NR_vserver 273 -#define __NR_mbind 274 -#define __NR_get_mempolicy 275 -#define __NR_set_mempolicy 276 -#define __NR_mq_open 277 -#define __NR_mq_unlink (__NR_mq_open+1) -#define __NR_mq_timedsend (__NR_mq_open+2) -#define __NR_mq_timedreceive (__NR_mq_open+3) -#define __NR_mq_notify (__NR_mq_open+4) -#define __NR_mq_getsetattr (__NR_mq_open+5) -#define __NR_kexec_load 283 -#define __NR_waitid 284 -/* #define __NR_sys_setaltroot 285 */ -#define __NR_add_key 286 -#define __NR_request_key 287 -#define __NR_keyctl 288 -#define __NR_ioprio_set 289 -#define __NR_ioprio_get 290 -#define __NR_inotify_init 291 -#define __NR_inotify_add_watch 292 -#define __NR_inotify_rm_watch 293 -#define __NR_migrate_pages 294 -#define __NR_openat 295 -#define __NR_mkdirat 296 -#define __NR_mknodat 297 -#define __NR_fchownat 298 -#define __NR_futimesat 299 -#define __NR_fstatat64 300 -#define __NR_unlinkat 301 -#define __NR_renameat 302 -#define __NR_linkat 303 -#define __NR_symlinkat 304 -#define __NR_readlinkat 305 -#define __NR_fchmodat 306 -#define __NR_faccessat 307 -#define __NR_pselect6 308 -#define __NR_ppoll 309 -#define __NR_unshare 310 -#define __NR_set_robust_list 311 -#define __NR_get_robust_list 312 -#define __NR_splice 313 -#define __NR_sync_file_range 314 -#define __NR_tee 315 -#define __NR_vmsplice 316 -#define __NR_move_pages 317 -#define __NR_getcpu 318 -#define __NR_epoll_pwait 319 -#define __NR_utimensat 320 -#define __NR_signalfd 321 -#define __NR_timerfd_create 322 -#define __NR_eventfd 323 -#define __NR_fallocate 324 -#define __NR_timerfd_settime 325 -#define __NR_timerfd_gettime 326 -#define __NR_signalfd4 327 -#define __NR_eventfd2 328 -#define __NR_epoll_create1 329 -#define __NR_dup3 330 -#define __NR_pipe2 331 -#define __NR_inotify_init1 332 -#define __NR_preadv 333 -#define __NR_pwritev 334 -#define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_event_open 336 -#define __NR_recvmmsg 337 -#define __NR_fanotify_init 338 -#define __NR_fanotify_mark 339 -#define __NR_prlimit64 340 -#define __NR_name_to_handle_at 341 -#define __NR_open_by_handle_at 342 -#define __NR_clock_adjtime 343 -#define __NR_syncfs 344 -#define __NR_sendmmsg 345 -#define __NR_setns 346 -#define __NR_process_vm_readv 347 -#define __NR_process_vm_writev 348 - -#ifdef __KERNEL__ - -#define NR_syscalls 349 - -#define __ARCH_WANT_IPC_PARSE_VERSION -#define __ARCH_WANT_OLD_READDIR -#define __ARCH_WANT_OLD_STAT -#define __ARCH_WANT_STAT64 -#define __ARCH_WANT_SYS_ALARM -#define __ARCH_WANT_SYS_GETHOSTNAME -#define __ARCH_WANT_SYS_IPC -#define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK -#define __ARCH_WANT_SYS_SIGNAL -#define __ARCH_WANT_SYS_TIME -#define __ARCH_WANT_SYS_UTIME -#define __ARCH_WANT_SYS_WAITPID -#define __ARCH_WANT_SYS_SOCKETCALL -#define __ARCH_WANT_SYS_FADVISE64 -#define __ARCH_WANT_SYS_GETPGRP -#define __ARCH_WANT_SYS_LLSEEK -#define __ARCH_WANT_SYS_NICE -#define __ARCH_WANT_SYS_OLD_GETRLIMIT -#define __ARCH_WANT_SYS_OLD_UNAME -#define __ARCH_WANT_SYS_OLD_MMAP -#define __ARCH_WANT_SYS_OLD_SELECT -#define __ARCH_WANT_SYS_OLDUMOUNT -#define __ARCH_WANT_SYS_SIGPENDING -#define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND - -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#ifndef cond_syscall -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") -#endif - -#endif /* __KERNEL__ */ -#endif /* _ASM_X86_UNISTD_32_H */ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h deleted file mode 100644 index 0431f193c3f..00000000000 --- a/arch/x86/include/asm/unistd_64.h +++ /dev/null @@ -1,732 +0,0 @@ -#ifndef _ASM_X86_UNISTD_64_H -#define _ASM_X86_UNISTD_64_H - -#ifndef __SYSCALL -#define __SYSCALL(a, b) -#endif - -/* - * This file contains the system call numbers. - * - * Note: holes are not allowed. - */ - -/* at least 8 syscall per cacheline */ -#define __NR_read 0 -__SYSCALL(__NR_read, sys_read) -#define __NR_write 1 -__SYSCALL(__NR_write, sys_write) -#define __NR_open 2 -__SYSCALL(__NR_open, sys_open) -#define __NR_close 3 -__SYSCALL(__NR_close, sys_close) -#define __NR_stat 4 -__SYSCALL(__NR_stat, sys_newstat) -#define __NR_fstat 5 -__SYSCALL(__NR_fstat, sys_newfstat) -#define __NR_lstat 6 -__SYSCALL(__NR_lstat, sys_newlstat) -#define __NR_poll 7 -__SYSCALL(__NR_poll, sys_poll) - -#define __NR_lseek 8 -__SYSCALL(__NR_lseek, sys_lseek) -#define __NR_mmap 9 -__SYSCALL(__NR_mmap, sys_mmap) -#define __NR_mprotect 10 -__SYSCALL(__NR_mprotect, sys_mprotect) -#define __NR_munmap 11 -__SYSCALL(__NR_munmap, sys_munmap) -#define __NR_brk 12 -__SYSCALL(__NR_brk, sys_brk) -#define __NR_rt_sigaction 13 -__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) -#define __NR_rt_sigprocmask 14 -__SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask) -#define __NR_rt_sigreturn 15 -__SYSCALL(__NR_rt_sigreturn, stub_rt_sigreturn) - -#define __NR_ioctl 16 -__SYSCALL(__NR_ioctl, sys_ioctl) -#define __NR_pread64 17 -__SYSCALL(__NR_pread64, sys_pread64) -#define __NR_pwrite64 18 -__SYSCALL(__NR_pwrite64, sys_pwrite64) -#define __NR_readv 19 -__SYSCALL(__NR_readv, sys_readv) -#define __NR_writev 20 -__SYSCALL(__NR_writev, sys_writev) -#define __NR_access 21 -__SYSCALL(__NR_access, sys_access) -#define __NR_pipe 22 -__SYSCALL(__NR_pipe, sys_pipe) -#define __NR_select 23 -__SYSCALL(__NR_select, sys_select) - -#define __NR_sched_yield 24 -__SYSCALL(__NR_sched_yield, sys_sched_yield) -#define __NR_mremap 25 -__SYSCALL(__NR_mremap, sys_mremap) -#define __NR_msync 26 -__SYSCALL(__NR_msync, sys_msync) -#define __NR_mincore 27 -__SYSCALL(__NR_mincore, sys_mincore) -#define __NR_madvise 28 -__SYSCALL(__NR_madvise, sys_madvise) -#define __NR_shmget 29 -__SYSCALL(__NR_shmget, sys_shmget) -#define __NR_shmat 30 -__SYSCALL(__NR_shmat, sys_shmat) -#define __NR_shmctl 31 -__SYSCALL(__NR_shmctl, sys_shmctl) - -#define __NR_dup 32 -__SYSCALL(__NR_dup, sys_dup) -#define __NR_dup2 33 -__SYSCALL(__NR_dup2, sys_dup2) -#define __NR_pause 34 -__SYSCALL(__NR_pause, sys_pause) -#define __NR_nanosleep 35 -__SYSCALL(__NR_nanosleep, sys_nanosleep) -#define __NR_getitimer 36 -__SYSCALL(__NR_getitimer, sys_getitimer) -#define __NR_alarm 37 -__SYSCALL(__NR_alarm, sys_alarm) -#define __NR_setitimer 38 -__SYSCALL(__NR_setitimer, sys_setitimer) -#define __NR_getpid 39 -__SYSCALL(__NR_getpid, sys_getpid) - -#define __NR_sendfile 40 -__SYSCALL(__NR_sendfile, sys_sendfile64) -#define __NR_socket 41 -__SYSCALL(__NR_socket, sys_socket) -#define __NR_connect 42 -__SYSCALL(__NR_connect, sys_connect) -#define __NR_accept 43 -__SYSCALL(__NR_accept, sys_accept) -#define __NR_sendto 44 -__SYSCALL(__NR_sendto, sys_sendto) -#define __NR_recvfrom 45 -__SYSCALL(__NR_recvfrom, sys_recvfrom) -#define __NR_sendmsg 46 -__SYSCALL(__NR_sendmsg, sys_sendmsg) -#define __NR_recvmsg 47 -__SYSCALL(__NR_recvmsg, sys_recvmsg) - -#define __NR_shutdown 48 -__SYSCALL(__NR_shutdown, sys_shutdown) -#define __NR_bind 49 -__SYSCALL(__NR_bind, sys_bind) -#define __NR_listen 50 -__SYSCALL(__NR_listen, sys_listen) -#define __NR_getsockname 51 -__SYSCALL(__NR_getsockname, sys_getsockname) -#define __NR_getpeername 52 -__SYSCALL(__NR_getpeername, sys_getpeername) -#define __NR_socketpair 53 -__SYSCALL(__NR_socketpair, sys_socketpair) -#define __NR_setsockopt 54 -__SYSCALL(__NR_setsockopt, sys_setsockopt) -#define __NR_getsockopt 55 -__SYSCALL(__NR_getsockopt, sys_getsockopt) - -#define __NR_clone 56 -__SYSCALL(__NR_clone, stub_clone) -#define __NR_fork 57 -__SYSCALL(__NR_fork, stub_fork) -#define __NR_vfork 58 -__SYSCALL(__NR_vfork, stub_vfork) -#define __NR_execve 59 -__SYSCALL(__NR_execve, stub_execve) -#define __NR_exit 60 -__SYSCALL(__NR_exit, sys_exit) -#define __NR_wait4 61 -__SYSCALL(__NR_wait4, sys_wait4) -#define __NR_kill 62 -__SYSCALL(__NR_kill, sys_kill) -#define __NR_uname 63 -__SYSCALL(__NR_uname, sys_newuname) - -#define __NR_semget 64 -__SYSCALL(__NR_semget, sys_semget) -#define __NR_semop 65 -__SYSCALL(__NR_semop, sys_semop) -#define __NR_semctl 66 -__SYSCALL(__NR_semctl, sys_semctl) -#define __NR_shmdt 67 -__SYSCALL(__NR_shmdt, sys_shmdt) -#define __NR_msgget 68 -__SYSCALL(__NR_msgget, sys_msgget) -#define __NR_msgsnd 69 -__SYSCALL(__NR_msgsnd, sys_msgsnd) -#define __NR_msgrcv 70 -__SYSCALL(__NR_msgrcv, sys_msgrcv) -#define __NR_msgctl 71 -__SYSCALL(__NR_msgctl, sys_msgctl) - -#define __NR_fcntl 72 -__SYSCALL(__NR_fcntl, sys_fcntl) -#define __NR_flock 73 -__SYSCALL(__NR_flock, sys_flock) -#define __NR_fsync 74 -__SYSCALL(__NR_fsync, sys_fsync) -#define __NR_fdatasync 75 -__SYSCALL(__NR_fdatasync, sys_fdatasync) -#define __NR_truncate 76 -__SYSCALL(__NR_truncate, sys_truncate) -#define __NR_ftruncate 77 -__SYSCALL(__NR_ftruncate, sys_ftruncate) -#define __NR_getdents 78 -__SYSCALL(__NR_getdents, sys_getdents) -#define __NR_getcwd 79 -__SYSCALL(__NR_getcwd, sys_getcwd) - -#define __NR_chdir 80 -__SYSCALL(__NR_chdir, sys_chdir) -#define __NR_fchdir 81 -__SYSCALL(__NR_fchdir, sys_fchdir) -#define __NR_rename 82 -__SYSCALL(__NR_rename, sys_rename) -#define __NR_mkdir 83 -__SYSCALL(__NR_mkdir, sys_mkdir) -#define __NR_rmdir 84 -__SYSCALL(__NR_rmdir, sys_rmdir) -#define __NR_creat 85 -__SYSCALL(__NR_creat, sys_creat) -#define __NR_link 86 -__SYSCALL(__NR_link, sys_link) -#define __NR_unlink 87 -__SYSCALL(__NR_unlink, sys_unlink) - -#define __NR_symlink 88 -__SYSCALL(__NR_symlink, sys_symlink) -#define __NR_readlink 89 -__SYSCALL(__NR_readlink, sys_readlink) -#define __NR_chmod 90 -__SYSCALL(__NR_chmod, sys_chmod) -#define __NR_fchmod 91 -__SYSCALL(__NR_fchmod, sys_fchmod) -#define __NR_chown 92 -__SYSCALL(__NR_chown, sys_chown) -#define __NR_fchown 93 -__SYSCALL(__NR_fchown, sys_fchown) -#define __NR_lchown 94 -__SYSCALL(__NR_lchown, sys_lchown) -#define __NR_umask 95 -__SYSCALL(__NR_umask, sys_umask) - -#define __NR_gettimeofday 96 -__SYSCALL(__NR_gettimeofday, sys_gettimeofday) -#define __NR_getrlimit 97 -__SYSCALL(__NR_getrlimit, sys_getrlimit) -#define __NR_getrusage 98 -__SYSCALL(__NR_getrusage, sys_getrusage) -#define __NR_sysinfo 99 -__SYSCALL(__NR_sysinfo, sys_sysinfo) -#define __NR_times 100 -__SYSCALL(__NR_times, sys_times) -#define __NR_ptrace 101 -__SYSCALL(__NR_ptrace, sys_ptrace) -#define __NR_getuid 102 -__SYSCALL(__NR_getuid, sys_getuid) -#define __NR_syslog 103 -__SYSCALL(__NR_syslog, sys_syslog) - -/* at the very end the stuff that never runs during the benchmarks */ -#define __NR_getgid 104 -__SYSCALL(__NR_getgid, sys_getgid) -#define __NR_setuid 105 -__SYSCALL(__NR_setuid, sys_setuid) -#define __NR_setgid 106 -__SYSCALL(__NR_setgid, sys_setgid) -#define __NR_geteuid 107 -__SYSCALL(__NR_geteuid, sys_geteuid) -#define __NR_getegid 108 -__SYSCALL(__NR_getegid, sys_getegid) -#define __NR_setpgid 109 -__SYSCALL(__NR_setpgid, sys_setpgid) -#define __NR_getppid 110 -__SYSCALL(__NR_getppid, sys_getppid) -#define __NR_getpgrp 111 -__SYSCALL(__NR_getpgrp, sys_getpgrp) - -#define __NR_setsid 112 -__SYSCALL(__NR_setsid, sys_setsid) -#define __NR_setreuid 113 -__SYSCALL(__NR_setreuid, sys_setreuid) -#define __NR_setregid 114 -__SYSCALL(__NR_setregid, sys_setregid) -#define __NR_getgroups 115 -__SYSCALL(__NR_getgroups, sys_getgroups) -#define __NR_setgroups 116 -__SYSCALL(__NR_setgroups, sys_setgroups) -#define __NR_setresuid 117 -__SYSCALL(__NR_setresuid, sys_setresuid) -#define __NR_getresuid 118 -__SYSCALL(__NR_getresuid, sys_getresuid) -#define __NR_setresgid 119 -__SYSCALL(__NR_setresgid, sys_setresgid) - -#define __NR_getresgid 120 -__SYSCALL(__NR_getresgid, sys_getresgid) -#define __NR_getpgid 121 -__SYSCALL(__NR_getpgid, sys_getpgid) -#define __NR_setfsuid 122 -__SYSCALL(__NR_setfsuid, sys_setfsuid) -#define __NR_setfsgid 123 -__SYSCALL(__NR_setfsgid, sys_setfsgid) -#define __NR_getsid 124 -__SYSCALL(__NR_getsid, sys_getsid) -#define __NR_capget 125 -__SYSCALL(__NR_capget, sys_capget) -#define __NR_capset 126 -__SYSCALL(__NR_capset, sys_capset) - -#define __NR_rt_sigpending 127 -__SYSCALL(__NR_rt_sigpending, sys_rt_sigpending) -#define __NR_rt_sigtimedwait 128 -__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait) -#define __NR_rt_sigqueueinfo 129 -__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo) -#define __NR_rt_sigsuspend 130 -__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) -#define __NR_sigaltstack 131 -__SYSCALL(__NR_sigaltstack, stub_sigaltstack) -#define __NR_utime 132 -__SYSCALL(__NR_utime, sys_utime) -#define __NR_mknod 133 -__SYSCALL(__NR_mknod, sys_mknod) - -/* Only needed for a.out */ -#define __NR_uselib 134 -__SYSCALL(__NR_uselib, sys_ni_syscall) -#define __NR_personality 135 -__SYSCALL(__NR_personality, sys_personality) - -#define __NR_ustat 136 -__SYSCALL(__NR_ustat, sys_ustat) -#define __NR_statfs 137 -__SYSCALL(__NR_statfs, sys_statfs) -#define __NR_fstatfs 138 -__SYSCALL(__NR_fstatfs, sys_fstatfs) -#define __NR_sysfs 139 -__SYSCALL(__NR_sysfs, sys_sysfs) - -#define __NR_getpriority 140 -__SYSCALL(__NR_getpriority, sys_getpriority) -#define __NR_setpriority 141 -__SYSCALL(__NR_setpriority, sys_setpriority) -#define __NR_sched_setparam 142 -__SYSCALL(__NR_sched_setparam, sys_sched_setparam) -#define __NR_sched_getparam 143 -__SYSCALL(__NR_sched_getparam, sys_sched_getparam) -#define __NR_sched_setscheduler 144 -__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) -#define __NR_sched_getscheduler 145 -__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) -#define __NR_sched_get_priority_max 146 -__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) -#define __NR_sched_get_priority_min 147 -__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) -#define __NR_sched_rr_get_interval 148 -__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval) - -#define __NR_mlock 149 -__SYSCALL(__NR_mlock, sys_mlock) -#define __NR_munlock 150 -__SYSCALL(__NR_munlock, sys_munlock) -#define __NR_mlockall 151 -__SYSCALL(__NR_mlockall, sys_mlockall) -#define __NR_munlockall 152 -__SYSCALL(__NR_munlockall, sys_munlockall) - -#define __NR_vhangup 153 -__SYSCALL(__NR_vhangup, sys_vhangup) - -#define __NR_modify_ldt 154 -__SYSCALL(__NR_modify_ldt, sys_modify_ldt) - -#define __NR_pivot_root 155 -__SYSCALL(__NR_pivot_root, sys_pivot_root) - -#define __NR__sysctl 156 -__SYSCALL(__NR__sysctl, sys_sysctl) - -#define __NR_prctl 157 -__SYSCALL(__NR_prctl, sys_prctl) -#define __NR_arch_prctl 158 -__SYSCALL(__NR_arch_prctl, sys_arch_prctl) - -#define __NR_adjtimex 159 -__SYSCALL(__NR_adjtimex, sys_adjtimex) - -#define __NR_setrlimit 160 -__SYSCALL(__NR_setrlimit, sys_setrlimit) - -#define __NR_chroot 161 -__SYSCALL(__NR_chroot, sys_chroot) - -#define __NR_sync 162 -__SYSCALL(__NR_sync, sys_sync) - -#define __NR_acct 163 -__SYSCALL(__NR_acct, sys_acct) - -#define __NR_settimeofday 164 -__SYSCALL(__NR_settimeofday, sys_settimeofday) - -#define __NR_mount 165 -__SYSCALL(__NR_mount, sys_mount) -#define __NR_umount2 166 -__SYSCALL(__NR_umount2, sys_umount) - -#define __NR_swapon 167 -__SYSCALL(__NR_swapon, sys_swapon) -#define __NR_swapoff 168 -__SYSCALL(__NR_swapoff, sys_swapoff) - -#define __NR_reboot 169 -__SYSCALL(__NR_reboot, sys_reboot) - -#define __NR_sethostname 170 -__SYSCALL(__NR_sethostname, sys_sethostname) -#define __NR_setdomainname 171 -__SYSCALL(__NR_setdomainname, sys_setdomainname) - -#define __NR_iopl 172 -__SYSCALL(__NR_iopl, stub_iopl) -#define __NR_ioperm 173 -__SYSCALL(__NR_ioperm, sys_ioperm) - -#define __NR_create_module 174 -__SYSCALL(__NR_create_module, sys_ni_syscall) -#define __NR_init_module 175 -__SYSCALL(__NR_init_module, sys_init_module) -#define __NR_delete_module 176 -__SYSCALL(__NR_delete_module, sys_delete_module) -#define __NR_get_kernel_syms 177 -__SYSCALL(__NR_get_kernel_syms, sys_ni_syscall) -#define __NR_query_module 178 -__SYSCALL(__NR_query_module, sys_ni_syscall) - -#define __NR_quotactl 179 -__SYSCALL(__NR_quotactl, sys_quotactl) - -#define __NR_nfsservctl 180 -__SYSCALL(__NR_nfsservctl, sys_ni_syscall) - -/* reserved for LiS/STREAMS */ -#define __NR_getpmsg 181 -__SYSCALL(__NR_getpmsg, sys_ni_syscall) -#define __NR_putpmsg 182 -__SYSCALL(__NR_putpmsg, sys_ni_syscall) - -/* reserved for AFS */ -#define __NR_afs_syscall 183 -__SYSCALL(__NR_afs_syscall, sys_ni_syscall) - -/* reserved for tux */ -#define __NR_tuxcall 184 -__SYSCALL(__NR_tuxcall, sys_ni_syscall) - -#define __NR_security 185 -__SYSCALL(__NR_security, sys_ni_syscall) - -#define __NR_gettid 186 -__SYSCALL(__NR_gettid, sys_gettid) - -#define __NR_readahead 187 -__SYSCALL(__NR_readahead, sys_readahead) -#define __NR_setxattr 188 -__SYSCALL(__NR_setxattr, sys_setxattr) -#define __NR_lsetxattr 189 -__SYSCALL(__NR_lsetxattr, sys_lsetxattr) -#define __NR_fsetxattr 190 -__SYSCALL(__NR_fsetxattr, sys_fsetxattr) -#define __NR_getxattr 191 -__SYSCALL(__NR_getxattr, sys_getxattr) -#define __NR_lgetxattr 192 -__SYSCALL(__NR_lgetxattr, sys_lgetxattr) -#define __NR_fgetxattr 193 -__SYSCALL(__NR_fgetxattr, sys_fgetxattr) -#define __NR_listxattr 194 -__SYSCALL(__NR_listxattr, sys_listxattr) -#define __NR_llistxattr 195 -__SYSCALL(__NR_llistxattr, sys_llistxattr) -#define __NR_flistxattr 196 -__SYSCALL(__NR_flistxattr, sys_flistxattr) -#define __NR_removexattr 197 -__SYSCALL(__NR_removexattr, sys_removexattr) -#define __NR_lremovexattr 198 -__SYSCALL(__NR_lremovexattr, sys_lremovexattr) -#define __NR_fremovexattr 199 -__SYSCALL(__NR_fremovexattr, sys_fremovexattr) -#define __NR_tkill 200 -__SYSCALL(__NR_tkill, sys_tkill) -#define __NR_time 201 -__SYSCALL(__NR_time, sys_time) -#define __NR_futex 202 -__SYSCALL(__NR_futex, sys_futex) -#define __NR_sched_setaffinity 203 -__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity) -#define __NR_sched_getaffinity 204 -__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity) -#define __NR_set_thread_area 205 -__SYSCALL(__NR_set_thread_area, sys_ni_syscall) /* use arch_prctl */ -#define __NR_io_setup 206 -__SYSCALL(__NR_io_setup, sys_io_setup) -#define __NR_io_destroy 207 -__SYSCALL(__NR_io_destroy, sys_io_destroy) -#define __NR_io_getevents 208 -__SYSCALL(__NR_io_getevents, sys_io_getevents) -#define __NR_io_submit 209 -__SYSCALL(__NR_io_submit, sys_io_submit) -#define __NR_io_cancel 210 -__SYSCALL(__NR_io_cancel, sys_io_cancel) -#define __NR_get_thread_area 211 -__SYSCALL(__NR_get_thread_area, sys_ni_syscall) /* use arch_prctl */ -#define __NR_lookup_dcookie 212 -__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie) -#define __NR_epoll_create 213 -__SYSCALL(__NR_epoll_create, sys_epoll_create) -#define __NR_epoll_ctl_old 214 -__SYSCALL(__NR_epoll_ctl_old, sys_ni_syscall) -#define __NR_epoll_wait_old 215 -__SYSCALL(__NR_epoll_wait_old, sys_ni_syscall) -#define __NR_remap_file_pages 216 -__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) -#define __NR_getdents64 217 -__SYSCALL(__NR_getdents64, sys_getdents64) -#define __NR_set_tid_address 218 -__SYSCALL(__NR_set_tid_address, sys_set_tid_address) -#define __NR_restart_syscall 219 -__SYSCALL(__NR_restart_syscall, sys_restart_syscall) -#define __NR_semtimedop 220 -__SYSCALL(__NR_semtimedop, sys_semtimedop) -#define __NR_fadvise64 221 -__SYSCALL(__NR_fadvise64, sys_fadvise64) -#define __NR_timer_create 222 -__SYSCALL(__NR_timer_create, sys_timer_create) -#define __NR_timer_settime 223 -__SYSCALL(__NR_timer_settime, sys_timer_settime) -#define __NR_timer_gettime 224 -__SYSCALL(__NR_timer_gettime, sys_timer_gettime) -#define __NR_timer_getoverrun 225 -__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) -#define __NR_timer_delete 226 -__SYSCALL(__NR_timer_delete, sys_timer_delete) -#define __NR_clock_settime 227 -__SYSCALL(__NR_clock_settime, sys_clock_settime) -#define __NR_clock_gettime 228 -__SYSCALL(__NR_clock_gettime, sys_clock_gettime) -#define __NR_clock_getres 229 -__SYSCALL(__NR_clock_getres, sys_clock_getres) -#define __NR_clock_nanosleep 230 -__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep) -#define __NR_exit_group 231 -__SYSCALL(__NR_exit_group, sys_exit_group) -#define __NR_epoll_wait 232 -__SYSCALL(__NR_epoll_wait, sys_epoll_wait) -#define __NR_epoll_ctl 233 -__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) -#define __NR_tgkill 234 -__SYSCALL(__NR_tgkill, sys_tgkill) -#define __NR_utimes 235 -__SYSCALL(__NR_utimes, sys_utimes) -#define __NR_vserver 236 -__SYSCALL(__NR_vserver, sys_ni_syscall) -#define __NR_mbind 237 -__SYSCALL(__NR_mbind, sys_mbind) -#define __NR_set_mempolicy 238 -__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) -#define __NR_get_mempolicy 239 -__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) -#define __NR_mq_open 240 -__SYSCALL(__NR_mq_open, sys_mq_open) -#define __NR_mq_unlink 241 -__SYSCALL(__NR_mq_unlink, sys_mq_unlink) -#define __NR_mq_timedsend 242 -__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend) -#define __NR_mq_timedreceive 243 -__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive) -#define __NR_mq_notify 244 -__SYSCALL(__NR_mq_notify, sys_mq_notify) -#define __NR_mq_getsetattr 245 -__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr) -#define __NR_kexec_load 246 -__SYSCALL(__NR_kexec_load, sys_kexec_load) -#define __NR_waitid 247 -__SYSCALL(__NR_waitid, sys_waitid) -#define __NR_add_key 248 -__SYSCALL(__NR_add_key, sys_add_key) -#define __NR_request_key 249 -__SYSCALL(__NR_request_key, sys_request_key) -#define __NR_keyctl 250 -__SYSCALL(__NR_keyctl, sys_keyctl) -#define __NR_ioprio_set 251 -__SYSCALL(__NR_ioprio_set, sys_ioprio_set) -#define __NR_ioprio_get 252 -__SYSCALL(__NR_ioprio_get, sys_ioprio_get) -#define __NR_inotify_init 253 -__SYSCALL(__NR_inotify_init, sys_inotify_init) -#define __NR_inotify_add_watch 254 -__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) -#define __NR_inotify_rm_watch 255 -__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) -#define __NR_migrate_pages 256 -__SYSCALL(__NR_migrate_pages, sys_migrate_pages) -#define __NR_openat 257 -__SYSCALL(__NR_openat, sys_openat) -#define __NR_mkdirat 258 -__SYSCALL(__NR_mkdirat, sys_mkdirat) -#define __NR_mknodat 259 -__SYSCALL(__NR_mknodat, sys_mknodat) -#define __NR_fchownat 260 -__SYSCALL(__NR_fchownat, sys_fchownat) -#define __NR_futimesat 261 -__SYSCALL(__NR_futimesat, sys_futimesat) -#define __NR_newfstatat 262 -__SYSCALL(__NR_newfstatat, sys_newfstatat) -#define __NR_unlinkat 263 -__SYSCALL(__NR_unlinkat, sys_unlinkat) -#define __NR_renameat 264 -__SYSCALL(__NR_renameat, sys_renameat) -#define __NR_linkat 265 -__SYSCALL(__NR_linkat, sys_linkat) -#define __NR_symlinkat 266 -__SYSCALL(__NR_symlinkat, sys_symlinkat) -#define __NR_readlinkat 267 -__SYSCALL(__NR_readlinkat, sys_readlinkat) -#define __NR_fchmodat 268 -__SYSCALL(__NR_fchmodat, sys_fchmodat) -#define __NR_faccessat 269 -__SYSCALL(__NR_faccessat, sys_faccessat) -#define __NR_pselect6 270 -__SYSCALL(__NR_pselect6, sys_pselect6) -#define __NR_ppoll 271 -__SYSCALL(__NR_ppoll, sys_ppoll) -#define __NR_unshare 272 -__SYSCALL(__NR_unshare, sys_unshare) -#define __NR_set_robust_list 273 -__SYSCALL(__NR_set_robust_list, sys_set_robust_list) -#define __NR_get_robust_list 274 -__SYSCALL(__NR_get_robust_list, sys_get_robust_list) -#define __NR_splice 275 -__SYSCALL(__NR_splice, sys_splice) -#define __NR_tee 276 -__SYSCALL(__NR_tee, sys_tee) -#define __NR_sync_file_range 277 -__SYSCALL(__NR_sync_file_range, sys_sync_file_range) -#define __NR_vmsplice 278 -__SYSCALL(__NR_vmsplice, sys_vmsplice) -#define __NR_move_pages 279 -__SYSCALL(__NR_move_pages, sys_move_pages) -#define __NR_utimensat 280 -__SYSCALL(__NR_utimensat, sys_utimensat) -#define __NR_epoll_pwait 281 -__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait) -#define __NR_signalfd 282 -__SYSCALL(__NR_signalfd, sys_signalfd) -#define __NR_timerfd_create 283 -__SYSCALL(__NR_timerfd_create, sys_timerfd_create) -#define __NR_eventfd 284 -__SYSCALL(__NR_eventfd, sys_eventfd) -#define __NR_fallocate 285 -__SYSCALL(__NR_fallocate, sys_fallocate) -#define __NR_timerfd_settime 286 -__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) -#define __NR_timerfd_gettime 287 -__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) -#define __NR_accept4 288 -__SYSCALL(__NR_accept4, sys_accept4) -#define __NR_signalfd4 289 -__SYSCALL(__NR_signalfd4, sys_signalfd4) -#define __NR_eventfd2 290 -__SYSCALL(__NR_eventfd2, sys_eventfd2) -#define __NR_epoll_create1 291 -__SYSCALL(__NR_epoll_create1, sys_epoll_create1) -#define __NR_dup3 292 -__SYSCALL(__NR_dup3, sys_dup3) -#define __NR_pipe2 293 -__SYSCALL(__NR_pipe2, sys_pipe2) -#define __NR_inotify_init1 294 -__SYSCALL(__NR_inotify_init1, sys_inotify_init1) -#define __NR_preadv 295 -__SYSCALL(__NR_preadv, sys_preadv) -#define __NR_pwritev 296 -__SYSCALL(__NR_pwritev, sys_pwritev) -#define __NR_rt_tgsigqueueinfo 297 -__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) -#define __NR_perf_event_open 298 -__SYSCALL(__NR_perf_event_open, sys_perf_event_open) -#define __NR_recvmmsg 299 -__SYSCALL(__NR_recvmmsg, sys_recvmmsg) -#define __NR_fanotify_init 300 -__SYSCALL(__NR_fanotify_init, sys_fanotify_init) -#define __NR_fanotify_mark 301 -__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) -#define __NR_prlimit64 302 -__SYSCALL(__NR_prlimit64, sys_prlimit64) -#define __NR_name_to_handle_at 303 -__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) -#define __NR_open_by_handle_at 304 -__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) -#define __NR_clock_adjtime 305 -__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) -#define __NR_syncfs 306 -__SYSCALL(__NR_syncfs, sys_syncfs) -#define __NR_sendmmsg 307 -__SYSCALL(__NR_sendmmsg, sys_sendmmsg) -#define __NR_setns 308 -__SYSCALL(__NR_setns, sys_setns) -#define __NR_getcpu 309 -__SYSCALL(__NR_getcpu, sys_getcpu) -#define __NR_process_vm_readv 310 -__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) -#define __NR_process_vm_writev 311 -__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) - -#ifndef __NO_STUBS -#define __ARCH_WANT_OLD_READDIR -#define __ARCH_WANT_OLD_STAT -#define __ARCH_WANT_SYS_ALARM -#define __ARCH_WANT_SYS_GETHOSTNAME -#define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK -#define __ARCH_WANT_SYS_SIGNAL -#define __ARCH_WANT_SYS_UTIME -#define __ARCH_WANT_SYS_WAITPID -#define __ARCH_WANT_SYS_SOCKETCALL -#define __ARCH_WANT_SYS_FADVISE64 -#define __ARCH_WANT_SYS_GETPGRP -#define __ARCH_WANT_SYS_LLSEEK -#define __ARCH_WANT_SYS_NICE -#define __ARCH_WANT_SYS_OLD_GETRLIMIT -#define __ARCH_WANT_SYS_OLD_UNAME -#define __ARCH_WANT_SYS_OLDUMOUNT -#define __ARCH_WANT_SYS_SIGPENDING -#define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_TIME -#define __ARCH_WANT_COMPAT_SYS_TIME -#endif /* __NO_STUBS */ - -#ifdef __KERNEL__ - -#ifndef COMPILE_OFFSETS -#include -#define NR_syscalls (__NR_syscall_max + 1) -#endif - -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") -#endif /* __KERNEL__ */ - -#endif /* _ASM_X86_UNISTD_64_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8baca3c4871..8c473d9d0b8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -25,7 +25,8 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o -obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o +obj-y += syscall_$(BITS).o +obj-$(CONFIG_X86_64) += vsyscall_64.o obj-$(CONFIG_X86_64) += vsyscall_emu_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 395a10e6806..85d98ab15cd 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -3,6 +3,11 @@ #include #include "../../../drivers/lguest/lg.h" +#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, +static char syscalls[] = { +#include +}; + /* workaround for a warning with -Wmissing-prototypes */ void foo(void); @@ -76,4 +81,7 @@ void foo(void) OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); #endif + BLANK(); + DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); + DEFINE(NR_syscalls, sizeof(syscalls)); } diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index e72a1194af2..834e897b1e2 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -1,11 +1,12 @@ #include -#define __NO_STUBS 1 -#undef __SYSCALL -#undef _ASM_X86_UNISTD_64_H -#define __SYSCALL(nr, sym) [nr] = 1, -static char syscalls[] = { -#include +#define __SYSCALL_64(nr, sym, compat) [nr] = 1, +static char syscalls_64[] = { +#include +}; +#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, +static char syscalls_ia32[] = { +#include }; int main(void) @@ -72,7 +73,11 @@ int main(void) OFFSET(TSS_ist, tss_struct, x86_tss.ist); BLANK(); - DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); + DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); + DEFINE(NR_syscalls, sizeof(syscalls_64)); + + DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1); + DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); return 0; } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f3f6f534400..1ffcda22c2f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -81,8 +81,6 @@ * enough to patch inline, increasing performance. */ -#define nr_syscalls ((syscall_table_size)/4) - #ifdef CONFIG_PREEMPT #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else @@ -423,7 +421,7 @@ sysenter_past_esp: testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz sysenter_audit sysenter_do_call: - cmpl $(nr_syscalls), %eax + cmpl $(NR_syscalls), %eax jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) @@ -504,7 +502,7 @@ ENTRY(system_call) # system call tracing in operation / emulation testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz syscall_trace_entry - cmpl $(nr_syscalls), %eax + cmpl $(NR_syscalls), %eax jae syscall_badsys syscall_call: call *sys_call_table(,%eax,4) @@ -650,7 +648,7 @@ syscall_trace_entry: movl %esp, %eax call syscall_trace_enter /* What it returned is what we'll actually use. */ - cmpl $(nr_syscalls), %eax + cmpl $(NR_syscalls), %eax jnae syscall_call jmp syscall_exit END(syscall_trace_entry) @@ -690,29 +688,28 @@ END(syscall_badsys) * System calls that need a pt_regs pointer. */ #define PTREGSCALL0(name) \ - ALIGN; \ -ptregs_##name: \ +ENTRY(ptregs_##name) ; \ leal 4(%esp),%eax; \ - jmp sys_##name; + jmp sys_##name; \ +ENDPROC(ptregs_##name) #define PTREGSCALL1(name) \ - ALIGN; \ -ptregs_##name: \ +ENTRY(ptregs_##name) ; \ leal 4(%esp),%edx; \ movl (PT_EBX+4)(%esp),%eax; \ - jmp sys_##name; + jmp sys_##name; \ +ENDPROC(ptregs_##name) #define PTREGSCALL2(name) \ - ALIGN; \ -ptregs_##name: \ +ENTRY(ptregs_##name) ; \ leal 4(%esp),%ecx; \ movl (PT_ECX+4)(%esp),%edx; \ movl (PT_EBX+4)(%esp),%eax; \ - jmp sys_##name; + jmp sys_##name; \ +ENDPROC(ptregs_##name) #define PTREGSCALL3(name) \ - ALIGN; \ -ptregs_##name: \ +ENTRY(ptregs_##name) ; \ CFI_STARTPROC; \ leal 4(%esp),%eax; \ pushl_cfi %eax; \ @@ -737,8 +734,7 @@ PTREGSCALL2(vm86) PTREGSCALL1(vm86old) /* Clone is an oddball. The 4th arg is in %edi */ - ALIGN; -ptregs_clone: +ENTRY(ptregs_clone) CFI_STARTPROC leal 4(%esp),%eax pushl_cfi %eax @@ -1209,11 +1205,6 @@ return_to_handler: jmp *%ecx #endif -.section .rodata,"a" -#include "syscall_table_32.S" - -syscall_table_size=(.-sys_call_table) - /* * Some functions should be protected against kprobes */ diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c new file mode 100644 index 00000000000..b37a5733660 --- /dev/null +++ b/arch/x86/kernel/syscall_32.c @@ -0,0 +1,25 @@ +/* System call table for i386. */ + +#include +#include +#include +#include + +#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; +#include +#undef __SYSCALL_I386 + +#define __SYSCALL_I386(nr, sym, compat) [nr] = sym, + +typedef asmlinkage void (*sys_call_ptr_t)(void); + +extern asmlinkage void sys_ni_syscall(void); + +const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { + /* + * Smells like a like a compiler bug -- it doesn't work + * when the & below is removed. + */ + [0 ... __NR_syscall_max] = &sys_ni_syscall, +#include +}; diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 0edfafa1b26..7ac7943be02 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -5,15 +5,11 @@ #include #include -#define __NO_STUBS +#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; +#include +#undef __SYSCALL_64 -#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; -#undef _ASM_X86_UNISTD_64_H -#include - -#undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = sym, -#undef _ASM_X86_UNISTD_64_H +#define __SYSCALL_64(nr, sym, compat) [nr] = sym, typedef void (*sys_call_ptr_t)(void); @@ -25,5 +21,5 @@ const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { * when the & below is removed. */ [0 ... __NR_syscall_max] = &sys_ni_syscall, -#include +#include }; diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S deleted file mode 100644 index 9a0e3129392..00000000000 --- a/arch/x86/kernel/syscall_table_32.S +++ /dev/null @@ -1,350 +0,0 @@ -ENTRY(sys_call_table) - .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ - .long sys_exit - .long ptregs_fork - .long sys_read - .long sys_write - .long sys_open /* 5 */ - .long sys_close - .long sys_waitpid - .long sys_creat - .long sys_link - .long sys_unlink /* 10 */ - .long ptregs_execve - .long sys_chdir - .long sys_time - .long sys_mknod - .long sys_chmod /* 15 */ - .long sys_lchown16 - .long sys_ni_syscall /* old break syscall holder */ - .long sys_stat - .long sys_lseek - .long sys_getpid /* 20 */ - .long sys_mount - .long sys_oldumount - .long sys_setuid16 - .long sys_getuid16 - .long sys_stime /* 25 */ - .long sys_ptrace - .long sys_alarm - .long sys_fstat - .long sys_pause - .long sys_utime /* 30 */ - .long sys_ni_syscall /* old stty syscall holder */ - .long sys_ni_syscall /* old gtty syscall holder */ - .long sys_access - .long sys_nice - .long sys_ni_syscall /* 35 - old ftime syscall holder */ - .long sys_sync - .long sys_kill - .long sys_rename - .long sys_mkdir - .long sys_rmdir /* 40 */ - .long sys_dup - .long sys_pipe - .long sys_times - .long sys_ni_syscall /* old prof syscall holder */ - .long sys_brk /* 45 */ - .long sys_setgid16 - .long sys_getgid16 - .long sys_signal - .long sys_geteuid16 - .long sys_getegid16 /* 50 */ - .long sys_acct - .long sys_umount /* recycled never used phys() */ - .long sys_ni_syscall /* old lock syscall holder */ - .long sys_ioctl - .long sys_fcntl /* 55 */ - .long sys_ni_syscall /* old mpx syscall holder */ - .long sys_setpgid - .long sys_ni_syscall /* old ulimit syscall holder */ - .long sys_olduname - .long sys_umask /* 60 */ - .long sys_chroot - .long sys_ustat - .long sys_dup2 - .long sys_getppid - .long sys_getpgrp /* 65 */ - .long sys_setsid - .long sys_sigaction - .long sys_sgetmask - .long sys_ssetmask - .long sys_setreuid16 /* 70 */ - .long sys_setregid16 - .long sys_sigsuspend - .long sys_sigpending - .long sys_sethostname - .long sys_setrlimit /* 75 */ - .long sys_old_getrlimit - .long sys_getrusage - .long sys_gettimeofday - .long sys_settimeofday - .long sys_getgroups16 /* 80 */ - .long sys_setgroups16 - .long sys_old_select - .long sys_symlink - .long sys_lstat - .long sys_readlink /* 85 */ - .long sys_uselib - .long sys_swapon - .long sys_reboot - .long sys_old_readdir - .long sys_old_mmap /* 90 */ - .long sys_munmap - .long sys_truncate - .long sys_ftruncate - .long sys_fchmod - .long sys_fchown16 /* 95 */ - .long sys_getpriority - .long sys_setpriority - .long sys_ni_syscall /* old profil syscall holder */ - .long sys_statfs - .long sys_fstatfs /* 100 */ - .long sys_ioperm - .long sys_socketcall - .long sys_syslog - .long sys_setitimer - .long sys_getitimer /* 105 */ - .long sys_newstat - .long sys_newlstat - .long sys_newfstat - .long sys_uname - .long ptregs_iopl /* 110 */ - .long sys_vhangup - .long sys_ni_syscall /* old "idle" system call */ - .long ptregs_vm86old - .long sys_wait4 - .long sys_swapoff /* 115 */ - .long sys_sysinfo - .long sys_ipc - .long sys_fsync - .long ptregs_sigreturn - .long ptregs_clone /* 120 */ - .long sys_setdomainname - .long sys_newuname - .long sys_modify_ldt - .long sys_adjtimex - .long sys_mprotect /* 125 */ - .long sys_sigprocmask - .long sys_ni_syscall /* old "create_module" */ - .long sys_init_module - .long sys_delete_module - .long sys_ni_syscall /* 130: old "get_kernel_syms" */ - .long sys_quotactl - .long sys_getpgid - .long sys_fchdir - .long sys_bdflush - .long sys_sysfs /* 135 */ - .long sys_personality - .long sys_ni_syscall /* reserved for afs_syscall */ - .long sys_setfsuid16 - .long sys_setfsgid16 - .long sys_llseek /* 140 */ - .long sys_getdents - .long sys_select - .long sys_flock - .long sys_msync - .long sys_readv /* 145 */ - .long sys_writev - .long sys_getsid - .long sys_fdatasync - .long sys_sysctl - .long sys_mlock /* 150 */ - .long sys_munlock - .long sys_mlockall - .long sys_munlockall - .long sys_sched_setparam - .long sys_sched_getparam /* 155 */ - .long sys_sched_setscheduler - .long sys_sched_getscheduler - .long sys_sched_yield - .long sys_sched_get_priority_max - .long sys_sched_get_priority_min /* 160 */ - .long sys_sched_rr_get_interval - .long sys_nanosleep - .long sys_mremap - .long sys_setresuid16 - .long sys_getresuid16 /* 165 */ - .long ptregs_vm86 - .long sys_ni_syscall /* Old sys_query_module */ - .long sys_poll - .long sys_ni_syscall /* Old nfsservctl */ - .long sys_setresgid16 /* 170 */ - .long sys_getresgid16 - .long sys_prctl - .long ptregs_rt_sigreturn - .long sys_rt_sigaction - .long sys_rt_sigprocmask /* 175 */ - .long sys_rt_sigpending - .long sys_rt_sigtimedwait - .long sys_rt_sigqueueinfo - .long sys_rt_sigsuspend - .long sys_pread64 /* 180 */ - .long sys_pwrite64 - .long sys_chown16 - .long sys_getcwd - .long sys_capget - .long sys_capset /* 185 */ - .long ptregs_sigaltstack - .long sys_sendfile - .long sys_ni_syscall /* reserved for streams1 */ - .long sys_ni_syscall /* reserved for streams2 */ - .long ptregs_vfork /* 190 */ - .long sys_getrlimit - .long sys_mmap_pgoff - .long sys_truncate64 - .long sys_ftruncate64 - .long sys_stat64 /* 195 */ - .long sys_lstat64 - .long sys_fstat64 - .long sys_lchown - .long sys_getuid - .long sys_getgid /* 200 */ - .long sys_geteuid - .long sys_getegid - .long sys_setreuid - .long sys_setregid - .long sys_getgroups /* 205 */ - .long sys_setgroups - .long sys_fchown - .long sys_setresuid - .long sys_getresuid - .long sys_setresgid /* 210 */ - .long sys_getresgid - .long sys_chown - .long sys_setuid - .long sys_setgid - .long sys_setfsuid /* 215 */ - .long sys_setfsgid - .long sys_pivot_root - .long sys_mincore - .long sys_madvise - .long sys_getdents64 /* 220 */ - .long sys_fcntl64 - .long sys_ni_syscall /* reserved for TUX */ - .long sys_ni_syscall - .long sys_gettid - .long sys_readahead /* 225 */ - .long sys_setxattr - .long sys_lsetxattr - .long sys_fsetxattr - .long sys_getxattr - .long sys_lgetxattr /* 230 */ - .long sys_fgetxattr - .long sys_listxattr - .long sys_llistxattr - .long sys_flistxattr - .long sys_removexattr /* 235 */ - .long sys_lremovexattr - .long sys_fremovexattr - .long sys_tkill - .long sys_sendfile64 - .long sys_futex /* 240 */ - .long sys_sched_setaffinity - .long sys_sched_getaffinity - .long sys_set_thread_area - .long sys_get_thread_area - .long sys_io_setup /* 245 */ - .long sys_io_destroy - .long sys_io_getevents - .long sys_io_submit - .long sys_io_cancel - .long sys_fadvise64 /* 250 */ - .long sys_ni_syscall - .long sys_exit_group - .long sys_lookup_dcookie - .long sys_epoll_create - .long sys_epoll_ctl /* 255 */ - .long sys_epoll_wait - .long sys_remap_file_pages - .long sys_set_tid_address - .long sys_timer_create - .long sys_timer_settime /* 260 */ - .long sys_timer_gettime - .long sys_timer_getoverrun - .long sys_timer_delete - .long sys_clock_settime - .long sys_clock_gettime /* 265 */ - .long sys_clock_getres - .long sys_clock_nanosleep - .long sys_statfs64 - .long sys_fstatfs64 - .long sys_tgkill /* 270 */ - .long sys_utimes - .long sys_fadvise64_64 - .long sys_ni_syscall /* sys_vserver */ - .long sys_mbind - .long sys_get_mempolicy - .long sys_set_mempolicy - .long sys_mq_open - .long sys_mq_unlink - .long sys_mq_timedsend - .long sys_mq_timedreceive /* 280 */ - .long sys_mq_notify - .long sys_mq_getsetattr - .long sys_kexec_load - .long sys_waitid - .long sys_ni_syscall /* 285 */ /* available */ - .long sys_add_key - .long sys_request_key - .long sys_keyctl - .long sys_ioprio_set - .long sys_ioprio_get /* 290 */ - .long sys_inotify_init - .long sys_inotify_add_watch - .long sys_inotify_rm_watch - .long sys_migrate_pages - .long sys_openat /* 295 */ - .long sys_mkdirat - .long sys_mknodat - .long sys_fchownat - .long sys_futimesat - .long sys_fstatat64 /* 300 */ - .long sys_unlinkat - .long sys_renameat - .long sys_linkat - .long sys_symlinkat - .long sys_readlinkat /* 305 */ - .long sys_fchmodat - .long sys_faccessat - .long sys_pselect6 - .long sys_ppoll - .long sys_unshare /* 310 */ - .long sys_set_robust_list - .long sys_get_robust_list - .long sys_splice - .long sys_sync_file_range - .long sys_tee /* 315 */ - .long sys_vmsplice - .long sys_move_pages - .long sys_getcpu - .long sys_epoll_pwait - .long sys_utimensat /* 320 */ - .long sys_signalfd - .long sys_timerfd_create - .long sys_eventfd - .long sys_fallocate - .long sys_timerfd_settime /* 325 */ - .long sys_timerfd_gettime - .long sys_signalfd4 - .long sys_eventfd2 - .long sys_epoll_create1 - .long sys_dup3 /* 330 */ - .long sys_pipe2 - .long sys_inotify_init1 - .long sys_preadv - .long sys_pwritev - .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_event_open - .long sys_recvmmsg - .long sys_fanotify_init - .long sys_fanotify_mark - .long sys_prlimit64 /* 340 */ - .long sys_name_to_handle_at - .long sys_open_by_handle_at - .long sys_clock_adjtime - .long sys_syncfs - .long sys_sendmmsg /* 345 */ - .long sys_setns - .long sys_process_vm_readv - .long sys_process_vm_writev -- cgit v1.2.3-70-g09d2 From 61f1e7e20874e8f11dab69b6a4bf7616badd4fe8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 18 Nov 2011 16:25:07 -0800 Subject: x86, syscall: Re-fix typo in comment Fix the same typo as was fixed in: b7641d2c x86-64, syscall: Adjust comment spacing and remove typo ... for the new versions of this file (32-bit and IA32 compat). Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1321569446-20433-4-git-send-email-hpa@linux.intel.com --- arch/x86/ia32/syscall_ia32.c | 2 +- arch/x86/kernel/syscall_32.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c index d04d3dbc47d..4754ba0f5d9 100644 --- a/arch/x86/ia32/syscall_ia32.c +++ b/arch/x86/ia32/syscall_ia32.c @@ -17,7 +17,7 @@ extern void compat_ni_syscall(void); const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { /* - * Smells like a like a compiler bug -- it doesn't work + * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall, diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c index b37a5733660..147fcd4941c 100644 --- a/arch/x86/kernel/syscall_32.c +++ b/arch/x86/kernel/syscall_32.c @@ -17,7 +17,7 @@ extern asmlinkage void sys_ni_syscall(void); const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* - * Smells like a like a compiler bug -- it doesn't work + * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ [0 ... __NR_syscall_max] = &sys_ni_syscall, -- cgit v1.2.3-70-g09d2 From 37fe6a42b3433b79a159ceb06a94cd1ef00e279d Mon Sep 17 00:00:00 2001 From: Mitsuo Hayasaka Date: Tue, 29 Nov 2011 15:08:29 +0900 Subject: x86: Check stack overflow in detail Currently, only kernel stack is checked for the overflow, which is not sufficient for systems that need a high reliability. To enhance it, it is required to check the IRQ and exception stacks, as well. This patch checks all the stack types and will cause messages of stacks in detail when free stack space drops below a certain limit except user stack. Signed-off-by: Mitsuo Hayasaka Cc: yrl.pp-manager.tt@hitachi.com Cc: Randy Dunlap Link: http://lkml.kernel.org/r/20111129060829.11076.51733.stgit@ltc219.sdl.hitachi.co.jp Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/Kconfig.debug | 7 +++++-- arch/x86/kernel/irq_64.c | 29 +++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bf56e179327..4caec1261f1 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -63,8 +63,11 @@ config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL ---help--- - This option will cause messages to be printed if free stack space - drops below a certain limit. + Say Y here if you want to check the overflows of kernel, IRQ + and exception stacks. This option will cause messages of the + stacks in detail when free stack space drops below a certain + limit. + If in doubt, say "N". config X86_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 69bca468c47..928a7e90961 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -36,18 +36,35 @@ EXPORT_PER_CPU_SYMBOL(irq_regs); static inline void stack_overflow_check(struct pt_regs *regs) { #ifdef CONFIG_DEBUG_STACKOVERFLOW + struct orig_ist *oist; + u64 irq_stack_top, irq_stack_bottom; + u64 estack_top, estack_bottom; u64 curbase = (u64)task_stack_page(current); if (user_mode_vm(regs)) return; - WARN_ONCE(regs->sp >= curbase && - regs->sp <= curbase + THREAD_SIZE && - regs->sp < curbase + sizeof(struct thread_info) + - sizeof(struct pt_regs) + 128, + if (regs->sp >= curbase && + regs->sp <= curbase + THREAD_SIZE && + regs->sp >= curbase + sizeof(struct thread_info) + + sizeof(struct pt_regs) + 128) + return; + + irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack); + irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr); + if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) + return; + + oist = &__get_cpu_var(orig_ist); + estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ; + estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; + if (regs->sp >= estack_top && regs->sp <= estack_bottom) + return; - "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", - current->comm, curbase, regs->sp); + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", + current->comm, curbase, regs->sp, + irq_stack_top, irq_stack_bottom, + estack_top, estack_bottom); #endif } -- cgit v1.2.3-70-g09d2 From 55af77969fbd7a841838220ea2287432e0da8ae5 Mon Sep 17 00:00:00 2001 From: Mitsuo Hayasaka Date: Tue, 29 Nov 2011 15:08:36 +0900 Subject: x86: Panic on detection of stack overflow Currently, messages are just output on the detection of stack overflow, which is not sufficient for systems that need a high reliability. This is because in general the overflow may corrupt data, and the additional corruption may occur due to reading them unless systems stop. This patch adds the sysctl parameter kernel.panic_on_stackoverflow and causes a panic when detecting the overflows of kernel, IRQ and exception stacks except user stack according to the parameter. It is disabled by default. Signed-off-by: Mitsuo Hayasaka Cc: yrl.pp-manager.tt@hitachi.com Cc: Randy Dunlap Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/20111129060836.11076.12323.stgit@ltc219.sdl.hitachi.co.jp Signed-off-by: Ingo Molnar --- Documentation/sysctl/kernel.txt | 14 ++++++++++++++ arch/x86/kernel/irq_32.c | 2 ++ arch/x86/kernel/irq_64.c | 5 +++++ include/linux/kernel.h | 1 + kernel/sysctl.c | 9 +++++++++ 5 files changed, 31 insertions(+) (limited to 'arch/x86/kernel') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 1f2463671a1..6d8cd8b2c30 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -49,6 +49,7 @@ show up in /proc/sys/kernel: - panic - panic_on_oops - panic_on_unrecovered_nmi +- panic_on_stackoverflow - pid_max - powersave-nap [ PPC only ] - printk @@ -393,6 +394,19 @@ Controls the kernel's behaviour when an oops or BUG is encountered. ============================================================== +panic_on_stackoverflow: + +Controls the kernel's behavior when detecting the overflows of +kernel, IRQ and exception stacks except a user stack. +This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled. + +0: try to continue operation. + +1: panic immediately. + +============================================================== + + pid_max: PID allocation wrap value. When the kernel's next PID value diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 72090705a65..e16e99ebd7a 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -43,6 +43,8 @@ static void print_stack_overflow(void) { printk(KERN_WARNING "low stack detected by irq handler\n"); dump_stack(); + if (sysctl_panic_on_stackoverflow) + panic("low stack detected by irq handler - check messages\n"); } #else diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 928a7e90961..42552b0dce6 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); DEFINE_PER_CPU(struct pt_regs *, irq_regs); EXPORT_PER_CPU_SYMBOL(irq_regs); +int sysctl_panic_on_stackoverflow; + /* * Probabilistic stack overflow check: * @@ -65,6 +67,9 @@ static inline void stack_overflow_check(struct pt_regs *regs) current->comm, curbase, regs->sp, irq_stack_top, irq_stack_bottom, estack_top, estack_bottom); + + if (sysctl_panic_on_stackoverflow) + panic("low stack detected by irq handler - check messages\n"); #endif } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e8b1597b5cf..ff83683c0b9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -341,6 +341,7 @@ extern int panic_timeout; extern int panic_on_oops; extern int panic_on_unrecovered_nmi; extern int panic_on_io_nmi; +extern int sysctl_panic_on_stackoverflow; extern const char *print_tainted(void); extern void add_taint(unsigned flag); extern int test_taint(unsigned flag); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ae271964385..f487f257e05 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -803,6 +803,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_DEBUG_STACKOVERFLOW + { + .procname = "panic_on_stackoverflow", + .data = &sysctl_panic_on_stackoverflow, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif { .procname = "bootloader_type", .data = &bootloader_type, -- cgit v1.2.3-70-g09d2 From 467e6b7a7c0eb792ebaf322ddb7363742b4ead40 Mon Sep 17 00:00:00 2001 From: Mitsuo Hayasaka Date: Tue, 29 Nov 2011 15:08:45 +0900 Subject: x86: Clean up the range of stack overflow checking The overflow checking of kernel stack checks if the stack pointer points to the available kernel stack range, which is derived from the original overflow checking. It is clear that curbase address is always less than low boundary of available kernel stack. So, this patch removes the first condition that checks if the pointer is higher than curbase. Signed-off-by: Mitsuo Hayasaka Cc: yrl.pp-manager.tt@hitachi.com Cc: Randy Dunlap Link: http://lkml.kernel.org/r/20111129060845.11076.40916.stgit@ltc219.sdl.hitachi.co.jp Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/kernel/irq_64.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 42552b0dce6..54e2b2b2e25 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -46,10 +46,9 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (user_mode_vm(regs)) return; - if (regs->sp >= curbase && - regs->sp <= curbase + THREAD_SIZE && - regs->sp >= curbase + sizeof(struct thread_info) + - sizeof(struct pt_regs) + 128) + if (regs->sp >= curbase + sizeof(struct thread_info) + + sizeof(struct pt_regs) + 128 && + regs->sp <= curbase + THREAD_SIZE) return; irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack); -- cgit v1.2.3-70-g09d2 From 3603a2512f9e69dc87914ba922eb4a0812b21cd6 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 13 Oct 2011 15:14:25 -0400 Subject: x86, reboot: Use NMI instead of REBOOT_VECTOR to stop cpus A recent discussion started talking about the locking on the pstore fs and how it relates to the kmsg infrastructure. We noticed it was possible for userspace to r/w to the pstore fs (grabbing the locks in the process) and block the panic path from r/w to the same fs. The reason was the cpu with the lock could be doing work while the crashing cpu is panic'ing. Busting those spinlocks might cause those cpus to step on each other's data. Fine, fair enough. It was suggested it would be nice to serialize the panic path (ie stop the other cpus) and have only one cpu running. This would allow us to bust the spinlocks and not worry about another cpu stepping on the data. Of course, smp_send_stop() does this in the panic case. kmsg_dump() would have to be moved to be called after it. Easy enough. The only problem is on x86 the smp_send_stop() function calls the REBOOT_VECTOR. Any cpu with irqs disabled (which pstore and its backend ERST would do), block this IPI and thus do not stop. This makes it difficult to reliably log data to the pstore fs. The patch below switches from the REBOOT_VECTOR to NMI (and mimics what kdump does). Switching to NMI allows us to deliver the IPI when irqs are disabled, increasing the reliability of this function. However, Andi carefully noted that on some machines this approach does not work because of broken BIOSes or whatever. To help accomodate this, the next couple of patches will run a selftest and provide a knob to disable. V2: uses atomic ops to serialize the cpu that shuts everyone down V3: comment cleanup Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert Richter Cc: seiji.aguchi@hds.com Cc: vgoyal@redhat.com Cc: mjg@redhat.com Cc: tony.luck@intel.com Cc: gong.chen@intel.com Cc: satoru.moriya@hds.com Cc: avi@redhat.com Cc: Andi Kleen Link: http://lkml.kernel.org/r/1318533267-18880-2-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 16204dc1548..e72b1754a2d 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * Some notes on x86 processor bugs affecting SMP operation: * @@ -148,6 +149,60 @@ void native_send_call_func_ipi(const struct cpumask *mask) free_cpumask_var(allbutself); } +static atomic_t stopping_cpu = ATOMIC_INIT(-1); + +static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) +{ + /* We are registered on stopping cpu too, avoid spurious NMI */ + if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) + return NMI_HANDLED; + + stop_this_cpu(NULL); + + return NMI_HANDLED; +} + +static void native_nmi_stop_other_cpus(int wait) +{ + unsigned long flags; + unsigned long timeout; + + if (reboot_force) + return; + + /* + * Use an own vector here because smp_call_function + * does lots of things not suitable in a panic situation. + */ + if (num_online_cpus() > 1) { + /* did someone beat us here? */ + if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id() != -1)) + return; + + if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, + NMI_FLAG_FIRST, "smp_stop")) + /* Note: we ignore failures here */ + return; + + /* sync above data before sending NMI */ + wmb(); + + apic->send_IPI_allbutself(NMI_VECTOR); + + /* + * Don't wait longer than a second if the caller + * didn't ask us to wait. + */ + timeout = USEC_PER_SEC; + while (num_online_cpus() > 1 && (wait || timeout--)) + udelay(1); + } + + local_irq_save(flags); + disable_local_APIC(); + local_irq_restore(flags); +} + /* * this function calls the 'stop' function on all other CPUs in the system. */ @@ -160,7 +215,7 @@ asmlinkage void smp_reboot_interrupt(void) irq_exit(); } -static void native_stop_other_cpus(int wait) +static void native_irq_stop_other_cpus(int wait) { unsigned long flags; unsigned long timeout; @@ -230,7 +285,7 @@ struct smp_ops smp_ops = { .smp_prepare_cpus = native_smp_prepare_cpus, .smp_cpus_done = native_smp_cpus_done, - .stop_other_cpus = native_stop_other_cpus, + .stop_other_cpus = native_nmi_stop_other_cpus, .smp_send_reschedule = native_smp_send_reschedule, .cpu_up = native_cpu_up, -- cgit v1.2.3-70-g09d2 From 99e8b9ca90d688c3ac7d3a141b701c9694a93925 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 13 Oct 2011 15:14:26 -0400 Subject: x86, NMI: Add NMI IPI selftest The previous patch modified the stop cpus path to use NMI instead of IRQ as the way to communicate to the other cpus to shutdown. There were some concerns that various machines may have problems with using an NMI IPI. This patch creates a selftest to check if NMI is working at boot. The idea is to help catch any issues before the machine panics and we learn the hard way. Loosely based on the locking-selftest.c file, this separate file runs a couple of simple tests and reports the results. The output looks like: ... Brought up 4 CPUs ---------------- | NMI testsuite: -------------------- remote IPI: ok | local IPI: ok | -------------------- Good, all 2 testcases passed! | --------------------------------- Total of 4 processors activated (21330.61 BogoMIPS). ... Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert Richter Cc: seiji.aguchi@hds.com Cc: vgoyal@redhat.com Cc: mjg@redhat.com Cc: tony.luck@intel.com Cc: gong.chen@intel.com Cc: satoru.moriya@hds.com Cc: avi@redhat.com Cc: Andi Kleen Link: http://lkml.kernel.org/r/1318533267-18880-3-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 12 +++ arch/x86/include/asm/smp.h | 6 ++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/nmi_selftest.c | 179 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot.c | 1 + 5 files changed, 199 insertions(+) create mode 100644 arch/x86/kernel/nmi_selftest.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 4caec1261f1..97da3c17b42 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -287,4 +287,16 @@ config DEBUG_STRICT_USER_COPY_CHECKS If unsure, or if you run an older (pre 4.4) gcc, say N. +config DEBUG_NMI_SELFTEST + bool "NMI Selftest" + depends on DEBUG_KERNEL + ---help--- + Enabling this option turns on a quick NMI selftest to verify + that the NMI behaves correctly. + + This might help diagnose strange hangs that rely on NMI to + function properly. + + If unsure, say N. + endmenu diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 73b11bc0ae6..0434c400287 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -225,5 +225,11 @@ extern int hard_smp_processor_id(void); #endif /* CONFIG_X86_LOCAL_APIC */ +#ifdef CONFIG_DEBUG_NMI_SELFTEST +extern void nmi_selftest(void); +#else +#define nmi_selftest() do { } while (0) +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8baca3c4871..02b2f05b371 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -80,6 +80,7 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o obj-$(CONFIG_AMD_NB) += amd_nb.o obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o +obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c new file mode 100644 index 00000000000..572adb62225 --- /dev/null +++ b/arch/x86/kernel/nmi_selftest.c @@ -0,0 +1,179 @@ +/* + * arch/x86/kernel/nmi-selftest.c + * + * Testsuite for NMI: IPIs + * + * Started by Don Zickus: + * (using lib/locking-selftest.c as a guide) + * + * Copyright (C) 2011 Red Hat, Inc., Don Zickus + */ + +#include +#include +#include + +#include +#include + +#define SUCCESS 0 +#define FAILURE 1 +#define TIMEOUT 2 + +static int nmi_fail; + +/* check to see if NMI IPIs work on this machine */ +static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly; + +static int testcase_total; +static int testcase_successes; +static int expected_testcase_failures; +static int unexpected_testcase_failures; +static int unexpected_testcase_unknowns; + +static int nmi_unk_cb(unsigned int val, struct pt_regs *regs) +{ + unexpected_testcase_unknowns++; + return NMI_HANDLED; +} + +static void init_nmi_testsuite(void) +{ + /* trap all the unknown NMIs we may generate */ + register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); +} + +static void cleanup_nmi_testsuite(void) +{ + unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk"); +} + +static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs) +{ + int cpu = raw_smp_processor_id(); + + if (cpumask_test_and_clear_cpu(cpu, to_cpumask(nmi_ipi_mask))) + return NMI_HANDLED; + + return NMI_DONE; +} + +static void test_nmi_ipi(struct cpumask *mask) +{ + unsigned long timeout; + + if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, + NMI_FLAG_FIRST, "nmi_selftest")) { + nmi_fail = FAILURE; + return; + } + + /* sync above data before sending NMI */ + wmb(); + + apic->send_IPI_mask(mask, NMI_VECTOR); + + /* Don't wait longer than a second */ + timeout = USEC_PER_SEC; + while (!cpumask_empty(mask) && timeout--) + udelay(1); + + /* What happens if we timeout, do we still unregister?? */ + unregister_nmi_handler(NMI_LOCAL, "nmi_selftest"); + + if (!timeout) + nmi_fail = TIMEOUT; + return; +} + +static void remote_ipi(void) +{ + cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); + test_nmi_ipi(to_cpumask(nmi_ipi_mask)); +} + +static void local_ipi(void) +{ + cpumask_clear(to_cpumask(nmi_ipi_mask)); + cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); + test_nmi_ipi(to_cpumask(nmi_ipi_mask)); +} + +static void reset_nmi(void) +{ + nmi_fail = 0; +} + +static void dotest(void (*testcase_fn)(void), int expected) +{ + testcase_fn(); + /* + * Filter out expected failures: + */ + if (nmi_fail != expected) { + unexpected_testcase_failures++; + + if (nmi_fail == FAILURE) + printk("FAILED |"); + else if (nmi_fail == TIMEOUT) + printk("TIMEOUT|"); + else + printk("ERROR |"); + dump_stack(); + } else { + testcase_successes++; + printk(" ok |"); + } + testcase_total++; + + reset_nmi(); +} + +static inline void print_testname(const char *testname) +{ + printk("%12s:", testname); +} + +void nmi_selftest(void) +{ + init_nmi_testsuite(); + + /* + * Run the testsuite: + */ + printk("----------------\n"); + printk("| NMI testsuite:\n"); + printk("--------------------\n"); + + print_testname("remote IPI"); + dotest(remote_ipi, SUCCESS); + printk("\n"); + print_testname("local IPI"); + dotest(local_ipi, SUCCESS); + printk("\n"); + + cleanup_nmi_testsuite(); + + if (unexpected_testcase_failures) { + printk("--------------------\n"); + printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n", + unexpected_testcase_failures, testcase_total); + printk("-----------------------------------------------------------------\n"); + } else if (expected_testcase_failures && testcase_successes) { + printk("--------------------\n"); + printk("%3d out of %3d testcases failed, as expected. |\n", + expected_testcase_failures, testcase_total); + printk("----------------------------------------------------\n"); + } else if (expected_testcase_failures && !testcase_successes) { + printk("--------------------\n"); + printk("All %3d testcases failed, as expected. |\n", + expected_testcase_failures); + printk("----------------------------------------\n"); + } else { + printk("--------------------\n"); + printk("Good, all %3d testcases passed! |\n", + testcase_successes); + printk("---------------------------------\n"); + } +} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9f548cb4a95..19277817eff 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1142,6 +1142,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) { pr_debug("Boot done.\n"); + nmi_selftest(); impress_friends(); #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); -- cgit v1.2.3-70-g09d2 From bda62633983f9db49ce0b1a9235b3709c1cda5f0 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 13 Oct 2011 15:14:27 -0400 Subject: x86, NMI: Add knob to disable using NMI IPIs to stop cpus Some machines may exhibit problems using the NMI to stop other cpus. This knob just allows one to revert back to the original behaviour to help diagnose the problem. V2: make function static Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert Richter Cc: seiji.aguchi@hds.com Cc: vgoyal@redhat.com Cc: mjg@redhat.com Cc: tony.luck@intel.com Cc: gong.chen@intel.com Cc: satoru.moriya@hds.com Cc: avi@redhat.com Cc: Andi Kleen Link: http://lkml.kernel.org/r/1318533267-18880-4-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/kernel/smp.c | 13 +++++++++++++ 2 files changed, 17 insertions(+) (limited to 'arch/x86/kernel') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a0c5c5f4fce..b4339e5a50d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1796,6 +1796,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nomfgpt [X86-32] Disable Multi-Function General Purpose Timer usage (for AMD Geode machines). + nonmi_ipi [X86] Disable using NMI IPIs during panic/reboot to + shutdown the other cpus. Instead use the REBOOT_VECTOR + irq. + nopat [X86] Disable PAT (page attribute table extension of pagetables) support. diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index e72b1754a2d..113acda5879 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -249,6 +249,11 @@ static void native_irq_stop_other_cpus(int wait) local_irq_restore(flags); } +static void native_smp_disable_nmi_ipi(void) +{ + smp_ops.stop_other_cpus = native_irq_stop_other_cpus; +} + /* * Reschedule call back. */ @@ -280,6 +285,14 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) irq_exit(); } +static int __init nonmi_ipi_setup(char *str) +{ + native_smp_disable_nmi_ipi(); + return 1; +} + +__setup("nonmi_ipi", nonmi_ipi_setup); + struct smp_ops smp_ops = { .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, .smp_prepare_cpus = native_smp_prepare_cpus, -- cgit v1.2.3-70-g09d2 From 53b5650273fea486ac8ac6c1d1e9a6cd17aa31ca Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 5 Dec 2011 12:25:44 +0100 Subject: x86: Fix the 32-bit stackoverflow-debug build The panic_on_stackoverflow variable needs to be avilable on the 32-bit side as well ... Cc: Mitsuo Hayasaka Cc: Randy Dunlap Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/20111129060836.11076.12323.stgit@ltc219.sdl.hitachi.co.jp Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_32.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index e16e99ebd7a..40fc86161d9 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -28,6 +28,9 @@ DEFINE_PER_CPU(struct pt_regs *, irq_regs); EXPORT_PER_CPU_SYMBOL(irq_regs); #ifdef CONFIG_DEBUG_STACKOVERFLOW + +int sysctl_panic_on_stackoverflow __read_mostly; + /* Debugging check for stack overflow: is there less than 1KB free? */ static int check_stack_overflow(void) { -- cgit v1.2.3-70-g09d2 From 1ea7c6737c8f68453f55c894b3d07d7f48fcbef8 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 10 Nov 2011 13:29:14 +0000 Subject: x86/config: Revamp configuration for MID devices This follows on from the patch applied in 3.2rc1 which creates an INTEL_MID configuration. We can now add the entry for Medfield specific code. After this is merged the final patch will be submitted which moves the rest of the device Kconfig dependancies to MRST/MEDFIELD/INTEL_MID as appropriate. Signed-off-by: Alan Cox Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 17 +++++++++++++++++ arch/x86/Kconfig.debug | 6 +++--- arch/x86/kernel/early_printk.c | 2 +- arch/x86/platform/mrst/Makefile | 2 +- 4 files changed, 22 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cb9a1044a77..9e7a361423d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -419,6 +419,23 @@ config X86_MRST nor standard legacy replacement devices/features. e.g. Moorestown does not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. +config X86_MDFLD + bool "Medfield MID platform" + depends on PCI + depends on PCI_GOANY + depends on X86_IO_APIC + select APB_TIMER + select I2C + select SPI + select INTEL_SCU_IPC + select X86_PLATFORM_DEVICES + ---help--- + Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin + Internet Device(MID) platform. + Unlike standard x86 PCs, Medfield does not have many legacy devices + nor standard legacy replacement devices/features. e.g. Medfield does + not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. + endif config X86_RDC321X diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bf56e179327..28c3c73ab20 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -43,9 +43,9 @@ config EARLY_PRINTK with klogd/syslogd or the X server. You should normally N here, unless you want to debug such a crash. -config EARLY_PRINTK_MRST - bool "Early printk for MRST platform support" - depends on EARLY_PRINTK && X86_MRST +config EARLY_PRINTK_INTEL_MID + bool "Early printk for Intel MID platform support" + depends on EARLY_PRINTK && X86_INTEL_MID config EARLY_PRINTK_DBGP bool "Early printk via EHCI debug port" diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index cd28a350f7f..7a53da03086 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf) if (!strncmp(buf, "xen", 3)) early_console_register(&xenboot_console, keep); #endif -#ifdef CONFIG_EARLY_PRINTK_MRST +#ifdef CONFIG_EARLY_PRINTK_INTEL_MID if (!strncmp(buf, "mrst", 4)) { mrst_early_console_init(); early_console_register(&early_mrst_console, keep); diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile index 1ea38775a6d..ddeec730046 100644 --- a/arch/x86/platform/mrst/Makefile +++ b/arch/x86/platform/mrst/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_X86_MRST) += mrst.o obj-$(CONFIG_X86_MRST) += vrtc.o -obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o +obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o obj-$(CONFIG_X86_MRST) += pmu.o -- cgit v1.2.3-70-g09d2 From d1bbdd669298b7ca08284ddb29153dfc039dd89d Mon Sep 17 00:00:00 2001 From: Mike Ditto Date: Tue, 15 Nov 2011 14:46:50 -0800 Subject: arch/x86/kernel/e820.c: Eliminate bubble sort from sanitize_e820_map() Replace the bubble sort in sanitize_e820_map() with a call to the generic kernel sort function to avoid pathological performance with large maps. On large (thousands of entries) E820 maps, the previous code took minutes to run; with this change it's now milliseconds. Signed-off-by: Mike Ditto Cc: sassmann@kpanic.de Cc: yuenn@google.com Cc: Stefan Assmann Cc: Nancy Yuen Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 59 ++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 35 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 303a0e48f07..f655f802260 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -227,22 +228,38 @@ void __init e820_print_map(char *who) * ____________________33__ * ______________________4_ */ +struct change_member { + struct e820entry *pbios; /* pointer to original bios entry */ + unsigned long long addr; /* address for this change point */ +}; + +static int __init cpcompare(const void *a, const void *b) +{ + struct change_member * const *app = a, * const *bpp = b; + const struct change_member *ap = *app, *bp = *bpp; + + /* + * Inputs are pointers to two elements of change_point[]. If their + * addresses are unequal, their difference dominates. If the addresses + * are equal, then consider one that represents the end of its region + * to be greater than one that does not. + */ + if (ap->addr != bp->addr) + return ap->addr > bp->addr ? 1 : -1; + + return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr); +} int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map) { - struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ - }; static struct change_member change_point_list[2*E820_X_MAX] __initdata; static struct change_member *change_point[2*E820_X_MAX] __initdata; static struct e820entry *overlap_list[E820_X_MAX] __initdata; static struct e820entry new_bios[E820_X_MAX] __initdata; - struct change_member *change_tmp; unsigned long current_type, last_type; unsigned long long last_addr; - int chgidx, still_changing; + int chgidx; int overlap_entries; int new_bios_entry; int old_nr, new_nr, chg_nr; @@ -279,35 +296,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, chg_nr = chgidx; /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i = 1; i < chg_nr; i++) { - unsigned long long curaddr, lastaddr; - unsigned long long curpbaddr, lastpbaddr; - - curaddr = change_point[i]->addr; - lastaddr = change_point[i - 1]->addr; - curpbaddr = change_point[i]->pbios->addr; - lastpbaddr = change_point[i - 1]->pbios->addr; - - /* - * swap entries, when: - * - * curaddr > lastaddr or - * curaddr == lastaddr and curaddr == curpbaddr and - * lastaddr != lastpbaddr - */ - if (curaddr < lastaddr || - (curaddr == lastaddr && curaddr == curpbaddr && - lastaddr != lastpbaddr)) { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing = 1; - } - } - } + sort(change_point, chg_nr, sizeof *change_point, cpcompare, 0); /* create a new bios memory map, removing overlaps */ overlap_entries = 0; /* number of entries in the overlap table */ -- cgit v1.2.3-70-g09d2 From 706d9a9c8b5758390036b9980a2b12d809599777 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Tue, 15 Nov 2011 14:48:56 -0800 Subject: arch/x86/kernel/e820.c: quiet sparse noise about plain integer as NULL pointer The last parameter to sort() is a pointer to the function used to swap items. This parameter should be NULL, not 0, when not used. This quiets the following sparse warning: warning: Using plain integer as NULL pointer Signed-off-by: H Hartley Sweeten Signed-off-by: Andrew Morton Cc: hartleys@visionengravers.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index f655f802260..d6bd85352c8 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -296,7 +296,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, chg_nr = chgidx; /* sort change-point list by memory addresses (low -> high) */ - sort(change_point, chg_nr, sizeof *change_point, cpcompare, 0); + sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL); /* create a new bios memory map, removing overlaps */ overlap_entries = 0; /* number of entries in the overlap table */ -- cgit v1.2.3-70-g09d2 From b565201cf75210614903ef2ae5917b4379681647 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 15 Nov 2011 15:33:56 -0800 Subject: x86: Reduce clock calibration time during slave cpu startup Reduce the startup time for slave cpus. Adds hooks for an arch-specific function for clock calibration. These hooks are used on x86. If a newly started cpu has the same phys_proc_id as a core already active, uses the TSC for the delay loop and has a CONSTANT_TSC, use the already-calculated value of loops_per_jiffy. This patch reduces the time required to start slave cpus on a 4096 cpu system from: 465 sec OLD 62 sec NEW This reduces boot time on a 4096p system by almost 7 minutes. Nice... Signed-off-by: Jack Steiner Cc: "H. Peter Anvin" Cc: John Stultz [fix CONFIG_SMP=n build] Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 16 +++++++++++----- arch/x86/kernel/tsc.c | 20 ++++++++++++++++++++ init/calibrate.c | 15 +++++++++++++++ 3 files changed, 46 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9f548cb4a95..00eef55c832 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -207,22 +207,28 @@ static void __cpuinit smp_callin(void) * Need to setup vector mappings before we enable interrupts. */ setup_vector_irq(smp_processor_id()); + + /* + * Save our processor parameters. Note: this information + * is needed for clock calibration. + */ + smp_store_cpu_info(cpuid); + /* * Get our bogomips. + * Update loops_per_jiffy in cpu_data. Previous call to + * smp_store_cpu_info() stored a value that is close but not as + * accurate as the value just calculated. * * Need to enable IRQs because it can take longer and then * the NMI watchdog might kill us. */ local_irq_enable(); calibrate_delay(); + cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; local_irq_disable(); pr_debug("Stack at about %p\n", &cpuid); - /* - * Save our processor parameters - */ - smp_store_cpu_info(cpuid); - /* * This must be done before setting cpu_online_mask * or calling notify_cpu_starting. diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index db483369f10..490fb330be8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -995,3 +995,23 @@ void __init tsc_init(void) check_system_tsc_reliable(); } +#ifdef CONFIG_SMP +/* + * If we have a constant TSC and are using the TSC for the delay loop, + * we can skip clock calibration if another cpu in the same socket has already + * been calibrated. This assumes that CONSTANT_TSC applies to all + * cpus in the socket - this should be a safe assumption. + */ +unsigned long __cpuinit calibrate_delay_is_known(void) +{ + int i, cpu = smp_processor_id(); + + if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) + return 0; + + for_each_online_cpu(i) + if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) + return cpu_data(i).loops_per_jiffy; + return 0; +} +#endif diff --git a/init/calibrate.c b/init/calibrate.c index 24df7976816..5f117ca9e06 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -246,6 +246,19 @@ recalibrate: static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 }; +/* + * Check if cpu calibration delay is already known. For example, + * some processors with multi-core sockets may have all cores + * with the same calibration delay. + * + * Architectures should override this function if a faster calibration + * method is available. + */ +unsigned long __attribute__((weak)) __cpuinit calibrate_delay_is_known(void) +{ + return 0; +} + void __cpuinit calibrate_delay(void) { unsigned long lpj; @@ -265,6 +278,8 @@ void __cpuinit calibrate_delay(void) lpj = lpj_fine; pr_info("Calibrating delay loop (skipped), " "value calculated using timer frequency.. "); + } else if ((lpj = calibrate_delay_is_known())) { + ; } else if ((lpj = calibrate_delay_direct()) != 0) { if (!printed) pr_info("Calibrating delay using timer " -- cgit v1.2.3-70-g09d2 From 565cbc3e934f221369a656b4469a044aa4c3f2a8 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 6 Dec 2011 13:08:59 -0500 Subject: x86, NMI: NMI-selftest should handle the UP case properly If no remote cpus are online, then just quietly skip the remote IPI test for now. Signed-off-by: Don Zickus Cc: andi@firstfloor.org Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: robert.richter@amd.com Link: http://lkml.kernel.org/r/20111206180859.GR1669@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi_selftest.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index 572adb62225..1e42a23c1f2 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -90,7 +90,8 @@ static void remote_ipi(void) { cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); - test_nmi_ipi(to_cpumask(nmi_ipi_mask)); + if (!cpumask_empty(nmi_ipi_mask)) + test_nmi_ipi(to_cpumask(nmi_ipi_mask)); } static void local_ipi(void) -- cgit v1.2.3-70-g09d2 From d2db6610219cbcadceea6c43ee03d89068b7d759 Mon Sep 17 00:00:00 2001 From: Mitsuo Hayasaka Date: Wed, 7 Dec 2011 17:29:10 +0900 Subject: x86: Add stack top margin for stack overflow checking It seems that a margin for stack overflow checking is added to top of a kernel stack but is not added to IRQ and exception stacks in stack_overflow_check(). Therefore, the overflows of IRQ and exception stacks are always detected only after they actually occurred and data corruption might occur due to them. This patch adds the margin to top of IRQ and exception stacks as well as a kernel stack to enhance reliability. Signed-off-by: Mitsuo Hayasaka Cc: yrl.pp-manager.tt@hitachi.com Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20111207082910.9847.3359.stgit@ltc219.sdl.hitachi.co.jp [ removed the #undef - we typically don't do that for uncommon names ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_64.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 54e2b2b2e25..d04d3ecded6 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -38,6 +38,7 @@ int sysctl_panic_on_stackoverflow; static inline void stack_overflow_check(struct pt_regs *regs) { #ifdef CONFIG_DEBUG_STACKOVERFLOW +#define STACK_TOP_MARGIN 128 struct orig_ist *oist; u64 irq_stack_top, irq_stack_bottom; u64 estack_top, estack_bottom; @@ -47,17 +48,18 @@ static inline void stack_overflow_check(struct pt_regs *regs) return; if (regs->sp >= curbase + sizeof(struct thread_info) + - sizeof(struct pt_regs) + 128 && + sizeof(struct pt_regs) + STACK_TOP_MARGIN && regs->sp <= curbase + THREAD_SIZE) return; - irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack); + irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) + + STACK_TOP_MARGIN; irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr); if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) return; oist = &__get_cpu_var(orig_ist); - estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ; + estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; if (regs->sp >= estack_top && regs->sp <= estack_bottom) return; -- cgit v1.2.3-70-g09d2 From 3d6240b53e34e372be007e08f7066e7625910675 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 7 Dec 2011 14:06:12 +0300 Subject: x86, NMI: Add to_cpumask() to silence compile warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gcc complains if we don't cast this to a struct cpumask pointer. arch/x86/kernel/nmi_selftest.c:93:2: warning: passing argument 1 of ‘cpumask_empty’ from incompatible pointer type [enabled by default] Signed-off-by: Dan Carpenter Cc: Don Zickus Link: http://lkml.kernel.org/r/20111207110612.GA3437@mwanda Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index 1e42a23c1f2..0d01a8ea4e1 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -90,7 +90,7 @@ static void remote_ipi(void) { cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); - if (!cpumask_empty(nmi_ipi_mask)) + if (!cpumask_empty(to_cpumask(nmi_ipi_mask))) test_nmi_ipi(to_cpumask(nmi_ipi_mask)); } -- cgit v1.2.3-70-g09d2 From f7d7d01be53cb47e0ae212c4e968aa28b82d2138 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 15 Nov 2011 12:56:14 +0000 Subject: x86: Don't use magic strings for EFI loader signature Introduce a symbol, EFI_LOADER_SIGNATURE instead of using the magic strings, which also helps to reduce the amount of ifdeffery. Cc: Matthew Garrett Signed-off-by: Matt Fleming Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/efi.h | 4 ++++ arch/x86/kernel/setup.c | 7 +------ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b8d8bfcd44a..26d8c18d5fa 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,6 +3,8 @@ #ifdef CONFIG_X86_32 +#define EFI_LOADER_SIGNATURE "EL32" + extern unsigned long asmlinkage efi_call_phys(void *, ...); #define efi_call_phys0(f) efi_call_phys(f) @@ -35,6 +37,8 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #else /* !CONFIG_X86_32 */ +#define EFI_LOADER_SIGNATURE "EL64" + extern u64 efi_call0(void *fp); extern u64 efi_call1(void *fp, u64 arg1); extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9a9e40fb091..4d5243c31ac 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -752,12 +752,7 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, -#ifdef CONFIG_X86_32 - "EL32", -#else - "EL64", -#endif - 4)) { + EFI_LOADER_SIGNATURE, 4)) { efi_enabled = 1; efi_memblock_x86_reserve_range(); } -- cgit v1.2.3-70-g09d2 From 291f36325f9f252bd76ef5f603995f37e453fc60 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 12 Dec 2011 21:27:52 +0000 Subject: x86, efi: EFI boot stub support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is currently a large divide between kernel development and the development of EFI boot loaders. The idea behind this patch is to give the kernel developers full control over the EFI boot process. As H. Peter Anvin put it, "The 'kernel carries its own stub' approach been very successful in dealing with BIOS, and would make a lot of sense to me for EFI as well." This patch introduces an EFI boot stub that allows an x86 bzImage to be loaded and executed by EFI firmware. The bzImage appears to the firmware as an EFI application. Luckily there are enough free bits within the bzImage header so that it can masquerade as an EFI application, thereby coercing the EFI firmware into loading it and jumping to its entry point. The beauty of this masquerading approach is that both BIOS and EFI boot loaders can still load and run the same bzImage, thereby allowing a single kernel image to work in any boot environment. The EFI boot stub supports multiple initrds, but they must exist on the same partition as the bzImage. Command-line arguments for the kernel can be appended after the bzImage name when run from the EFI shell, e.g. Shell> bzImage console=ttyS0 root=/dev/sdb initrd=initrd.img v7: - Fix checkpatch warnings. v6: - Try to allocate initrd memory just below hdr->inird_addr_max. v5: - load_options_size is UTF-16, which needs dividing by 2 to convert to the corresponding ASCII size. v4: - Don't read more than image->load_options_size v3: - Fix following warnings when compiling CONFIG_EFI_STUB=n arch/x86/boot/tools/build.c: In function ‘main’: arch/x86/boot/tools/build.c:138:24: warning: unused variable ‘pe_header’ arch/x86/boot/tools/build.c:138:15: warning: unused variable ‘file_sz’ - As reported by Matthew Garrett, some Apple machines have GOPs that don't have hardware attached. We need to weed these out by searching for ones that handle the PCIIO protocol. - Don't allocate memory if no initrds are on cmdline - Don't trust image->load_options_size Maarten Lankhorst noted: - Don't strip first argument when booted from efibootmgr - Don't allocate too much memory for cmdline - Don't update cmdline_size, the kernel considers it read-only - Don't accept '\n' for initrd names v2: - File alignment was too large, was 8192 should be 512. Reported by Maarten Lankhorst on LKML. - Added UGA support for graphics - Use VIDEO_TYPE_EFI instead of hard-coded number. - Move linelength assignment until after we've assigned depth - Dynamically fill out AddressOfEntryPoint in tools/build.c - Don't use magic number for GDT/TSS stuff. Requested by Andi Kleen - The bzImage may need to be relocated as it may have been loaded at a high address address by the firmware. This was required to get my macbook booting because the firmware loaded it at 0x7cxxxxxx, which triggers this error in decompress_kernel(), if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) error("Destination address too large"); Cc: Mike Waychison Cc: Matthew Garrett Tested-by: Henrik Rydberg Signed-off-by: Matt Fleming Link: http://lkml.kernel.org/r/1321383097.2657.9.camel@mfleming-mobl1.ger.corp.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 7 + arch/x86/boot/compressed/Makefile | 10 +- arch/x86/boot/compressed/eboot.c | 1014 ++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/eboot.h | 60 ++ arch/x86/boot/compressed/efi_stub_32.S | 86 +++ arch/x86/boot/compressed/efi_stub_64.S | 1 + arch/x86/boot/compressed/head_32.S | 22 + arch/x86/boot/compressed/head_64.S | 20 + arch/x86/boot/compressed/string.c | 9 + arch/x86/boot/header.S | 158 +++++ arch/x86/boot/string.c | 35 ++ arch/x86/boot/tools/build.c | 39 ++ arch/x86/kernel/asm-offsets.c | 2 + 13 files changed, 1462 insertions(+), 1 deletion(-) create mode 100644 arch/x86/boot/compressed/eboot.c create mode 100644 arch/x86/boot/compressed/eboot.h create mode 100644 arch/x86/boot/compressed/efi_stub_32.S create mode 100644 arch/x86/boot/compressed/efi_stub_64.S (limited to 'arch/x86/kernel') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index efb42949cc0..d71b656bcb9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1478,6 +1478,13 @@ config EFI resultant kernel should continue to boot on existing non-EFI platforms. +config EFI_STUB + bool "EFI stub support" + depends on EFI + ---help--- + This kernel feature allows a bzImage to be loaded directly + by EFI firmware without the use of a bootloader. + config SECCOMP def_bool y prompt "Enable seccomp to safely compute untrusted bytecode" diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 09664efb9ce..b123b9a8f5b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -23,7 +23,15 @@ LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy -$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o $(obj)/piggy.o FORCE +VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ + $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ + $(obj)/piggy.o + +ifeq ($(CONFIG_EFI_STUB), y) + VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o +endif + +$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE $(call if_changed,ld) @: diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c new file mode 100644 index 00000000000..4055e63d0b0 --- /dev/null +++ b/arch/x86/boot/compressed/eboot.c @@ -0,0 +1,1014 @@ +/* ----------------------------------------------------------------------- + * + * Copyright 2011 Intel Corporation; author Matt Fleming + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +#include +#include +#include +#include + +#include "eboot.h" + +static efi_system_table_t *sys_table; + +static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, + unsigned long *desc_size) +{ + efi_memory_desc_t *m = NULL; + efi_status_t status; + unsigned long key; + u32 desc_version; + + *map_size = sizeof(*m) * 32; +again: + /* + * Add an additional efi_memory_desc_t because we're doing an + * allocation which may be in a new descriptor region. + */ + *map_size += sizeof(*m); + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, *map_size, (void **)&m); + if (status != EFI_SUCCESS) + goto fail; + + status = efi_call_phys5(sys_table->boottime->get_memory_map, map_size, + m, &key, desc_size, &desc_version); + if (status == EFI_BUFFER_TOO_SMALL) { + efi_call_phys1(sys_table->boottime->free_pool, m); + goto again; + } + + if (status != EFI_SUCCESS) + efi_call_phys1(sys_table->boottime->free_pool, m); + +fail: + *map = m; + return status; +} + +/* + * Allocate at the highest possible address that is not above 'max'. + */ +static efi_status_t high_alloc(unsigned long size, unsigned long align, + unsigned long *addr, unsigned long max) +{ + unsigned long map_size, desc_size; + efi_memory_desc_t *map; + efi_status_t status; + unsigned long nr_pages; + u64 max_addr = 0; + int i; + + status = __get_map(&map, &map_size, &desc_size); + if (status != EFI_SUCCESS) + goto fail; + + nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; +again: + for (i = 0; i < map_size / desc_size; i++) { + efi_memory_desc_t *desc; + unsigned long m = (unsigned long)map; + u64 start, end; + + desc = (efi_memory_desc_t *)(m + (i * desc_size)); + if (desc->type != EFI_CONVENTIONAL_MEMORY) + continue; + + if (desc->num_pages < nr_pages) + continue; + + start = desc->phys_addr; + end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT); + + if ((start + size) > end || (start + size) > max) + continue; + + if (end - size > max) + end = max; + + if (round_down(end - size, align) < start) + continue; + + start = round_down(end - size, align); + + /* + * Don't allocate at 0x0. It will confuse code that + * checks pointers against NULL. + */ + if (start == 0x0) + continue; + + if (start > max_addr) + max_addr = start; + } + + if (!max_addr) + status = EFI_NOT_FOUND; + else { + status = efi_call_phys4(sys_table->boottime->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &max_addr); + if (status != EFI_SUCCESS) { + max = max_addr; + max_addr = 0; + goto again; + } + + *addr = max_addr; + } + +free_pool: + efi_call_phys1(sys_table->boottime->free_pool, map); + +fail: + return status; +} + +/* + * Allocate at the lowest possible address. + */ +static efi_status_t low_alloc(unsigned long size, unsigned long align, + unsigned long *addr) +{ + unsigned long map_size, desc_size; + efi_memory_desc_t *map; + efi_status_t status; + unsigned long nr_pages; + int i; + + status = __get_map(&map, &map_size, &desc_size); + if (status != EFI_SUCCESS) + goto fail; + + nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + for (i = 0; i < map_size / desc_size; i++) { + efi_memory_desc_t *desc; + unsigned long m = (unsigned long)map; + u64 start, end; + + desc = (efi_memory_desc_t *)(m + (i * desc_size)); + + if (desc->type != EFI_CONVENTIONAL_MEMORY) + continue; + + if (desc->num_pages < nr_pages) + continue; + + start = desc->phys_addr; + end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT); + + /* + * Don't allocate at 0x0. It will confuse code that + * checks pointers against NULL. Skip the first 8 + * bytes so we start at a nice even number. + */ + if (start == 0x0) + start += 8; + + start = round_up(start, align); + if ((start + size) > end) + continue; + + status = efi_call_phys4(sys_table->boottime->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &start); + if (status == EFI_SUCCESS) { + *addr = start; + break; + } + } + + if (i == map_size / desc_size) + status = EFI_NOT_FOUND; + +free_pool: + efi_call_phys1(sys_table->boottime->free_pool, map); +fail: + return status; +} + +static void low_free(unsigned long size, unsigned long addr) +{ + unsigned long nr_pages; + + nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + efi_call_phys2(sys_table->boottime->free_pages, addr, size); +} + +static void find_bits(unsigned long mask, u8 *pos, u8 *size) +{ + u8 first, len; + + first = 0; + len = 0; + + if (mask) { + while (!(mask & 0x1)) { + mask = mask >> 1; + first++; + } + + while (mask & 0x1) { + mask = mask >> 1; + len++; + } + } + + *pos = first; + *size = len; +} + +/* + * See if we have Graphics Output Protocol + */ +static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, + unsigned long size) +{ + struct efi_graphics_output_protocol *gop, *first_gop; + struct efi_pixel_bitmask pixel_info; + unsigned long nr_gops; + efi_status_t status; + void **gop_handle; + u16 width, height; + u32 fb_base, fb_size; + u32 pixels_per_scan_line; + int pixel_format; + int i; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, size, &gop_handle); + if (status != EFI_SUCCESS) + return status; + + status = efi_call_phys5(sys_table->boottime->locate_handle, + EFI_LOCATE_BY_PROTOCOL, proto, + NULL, &size, gop_handle); + if (status != EFI_SUCCESS) + goto free_handle; + + first_gop = NULL; + + nr_gops = size / sizeof(void *); + for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_mode_info *info; + efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; + void *pciio; + void *h = gop_handle[i]; + + status = efi_call_phys3(sys_table->boottime->handle_protocol, + h, proto, &gop); + if (status != EFI_SUCCESS) + continue; + + efi_call_phys3(sys_table->boottime->handle_protocol, + h, &pciio_proto, &pciio); + + status = efi_call_phys4(gop->query_mode, gop, + gop->mode->mode, &size, &info); + if (status == EFI_SUCCESS && (!first_gop || pciio)) { + /* + * Apple provide GOPs that are not backed by + * real hardware (they're used to handle + * multiple displays). The workaround is to + * search for a GOP implementing the PCIIO + * protocol, and if one isn't found, to just + * fallback to the first GOP. + */ + width = info->horizontal_resolution; + height = info->vertical_resolution; + fb_base = gop->mode->frame_buffer_base; + fb_size = gop->mode->frame_buffer_size; + pixel_format = info->pixel_format; + pixel_info = info->pixel_information; + pixels_per_scan_line = info->pixels_per_scan_line; + + /* + * Once we've found a GOP supporting PCIIO, + * don't bother looking any further. + */ + if (pciio) + break; + + first_gop = gop; + } + } + + /* Did we find any GOPs? */ + if (!first_gop) + goto free_handle; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_width = width; + si->lfb_height = height; + si->lfb_base = fb_base; + si->lfb_size = fb_size; + si->pages = 1; + + if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { + si->lfb_depth = 32; + si->lfb_linelength = pixels_per_scan_line * 4; + si->red_size = 8; + si->red_pos = 0; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 16; + si->rsvd_size = 8; + si->rsvd_pos = 24; + } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) { + si->lfb_depth = 32; + si->lfb_linelength = pixels_per_scan_line * 4; + si->red_size = 8; + si->red_pos = 16; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 0; + si->rsvd_size = 8; + si->rsvd_pos = 24; + } else if (pixel_format == PIXEL_BIT_MASK) { + find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size); + find_bits(pixel_info.green_mask, &si->green_pos, + &si->green_size); + find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size); + find_bits(pixel_info.reserved_mask, &si->rsvd_pos, + &si->rsvd_size); + si->lfb_depth = si->red_size + si->green_size + + si->blue_size + si->rsvd_size; + si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8; + } else { + si->lfb_depth = 4; + si->lfb_linelength = si->lfb_width / 2; + si->red_size = 0; + si->red_pos = 0; + si->green_size = 0; + si->green_pos = 0; + si->blue_size = 0; + si->blue_pos = 0; + si->rsvd_size = 0; + si->rsvd_pos = 0; + } + +free_handle: + efi_call_phys1(sys_table->boottime->free_pool, gop_handle); + return status; +} + +/* + * See if we have Universal Graphics Adapter (UGA) protocol + */ +static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, + unsigned long size) +{ + struct efi_uga_draw_protocol *uga, *first_uga; + unsigned long nr_ugas; + efi_status_t status; + u32 width, height; + void **uga_handle = NULL; + int i; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, size, &uga_handle); + if (status != EFI_SUCCESS) + return status; + + status = efi_call_phys5(sys_table->boottime->locate_handle, + EFI_LOCATE_BY_PROTOCOL, uga_proto, + NULL, &size, uga_handle); + if (status != EFI_SUCCESS) + goto free_handle; + + first_uga = NULL; + + nr_ugas = size / sizeof(void *); + for (i = 0; i < nr_ugas; i++) { + efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; + void *handle = uga_handle[i]; + u32 w, h, depth, refresh; + void *pciio; + + status = efi_call_phys3(sys_table->boottime->handle_protocol, + handle, uga_proto, &uga); + if (status != EFI_SUCCESS) + continue; + + efi_call_phys3(sys_table->boottime->handle_protocol, + handle, &pciio_proto, &pciio); + + status = efi_call_phys5(uga->get_mode, uga, &w, &h, + &depth, &refresh); + if (status == EFI_SUCCESS && (!first_uga || pciio)) { + width = w; + height = h; + + /* + * Once we've found a UGA supporting PCIIO, + * don't bother looking any further. + */ + if (pciio) + break; + + first_uga = uga; + } + } + + if (!first_uga) + goto free_handle; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_depth = 32; + si->lfb_width = width; + si->lfb_height = height; + + si->red_size = 8; + si->red_pos = 16; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 0; + si->rsvd_size = 8; + si->rsvd_pos = 24; + + +free_handle: + efi_call_phys1(sys_table->boottime->free_pool, uga_handle); + return status; +} + +void setup_graphics(struct boot_params *boot_params) +{ + efi_guid_t graphics_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID; + struct screen_info *si; + efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; + efi_status_t status; + unsigned long size; + void **gop_handle = NULL; + void **uga_handle = NULL; + + si = &boot_params->screen_info; + memset(si, 0, sizeof(*si)); + + size = 0; + status = efi_call_phys5(sys_table->boottime->locate_handle, + EFI_LOCATE_BY_PROTOCOL, &graphics_proto, + NULL, &size, gop_handle); + if (status == EFI_BUFFER_TOO_SMALL) + status = setup_gop(si, &graphics_proto, size); + + if (status != EFI_SUCCESS) { + size = 0; + status = efi_call_phys5(sys_table->boottime->locate_handle, + EFI_LOCATE_BY_PROTOCOL, &uga_proto, + NULL, &size, uga_handle); + if (status == EFI_BUFFER_TOO_SMALL) + setup_uga(si, &uga_proto, size); + } +} + +struct initrd { + efi_file_handle_t *handle; + u64 size; +}; + +/* + * Check the cmdline for a LILO-style initrd= arguments. + * + * We only support loading an initrd from the same filesystem as the + * kernel image. + */ +static efi_status_t handle_ramdisks(efi_loaded_image_t *image, + struct setup_header *hdr) +{ + struct initrd *initrds; + unsigned long initrd_addr; + efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; + u64 initrd_total; + efi_file_io_interface_t *io; + efi_file_handle_t *fh; + efi_status_t status; + int nr_initrds; + char *str; + int i, j, k; + + initrd_addr = 0; + initrd_total = 0; + + str = (char *)(unsigned long)hdr->cmd_line_ptr; + + j = 0; /* See close_handles */ + + if (!str || !*str) + return EFI_SUCCESS; + + for (nr_initrds = 0; *str; nr_initrds++) { + str = strstr(str, "initrd="); + if (!str) + break; + + str += 7; + + /* Skip any leading slashes */ + while (*str == '/' || *str == '\\') + str++; + + while (*str && *str != ' ' && *str != '\n') + str++; + } + + if (!nr_initrds) + return EFI_SUCCESS; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, + nr_initrds * sizeof(*initrds), + &initrds); + if (status != EFI_SUCCESS) + goto fail; + + str = (char *)(unsigned long)hdr->cmd_line_ptr; + for (i = 0; i < nr_initrds; i++) { + struct initrd *initrd; + efi_file_handle_t *h; + efi_file_info_t *info; + efi_char16_t filename[256]; + unsigned long info_sz; + efi_guid_t info_guid = EFI_FILE_INFO_ID; + efi_char16_t *p; + u64 file_sz; + + str = strstr(str, "initrd="); + if (!str) + break; + + str += 7; + + initrd = &initrds[i]; + p = filename; + + /* Skip any leading slashes */ + while (*str == '/' || *str == '\\') + str++; + + while (*str && *str != ' ' && *str != '\n') { + if (p >= filename + sizeof(filename)) + break; + + *p++ = *str++; + } + + *p = '\0'; + + /* Only open the volume once. */ + if (!i) { + efi_boot_services_t *boottime; + + boottime = sys_table->boottime; + + status = efi_call_phys3(boottime->handle_protocol, + image->device_handle, &fs_proto, &io); + if (status != EFI_SUCCESS) + goto free_initrds; + + status = efi_call_phys2(io->open_volume, io, &fh); + if (status != EFI_SUCCESS) + goto free_initrds; + } + + status = efi_call_phys5(fh->open, fh, &h, filename, + EFI_FILE_MODE_READ, (u64)0); + if (status != EFI_SUCCESS) + goto close_handles; + + initrd->handle = h; + + info_sz = 0; + status = efi_call_phys4(h->get_info, h, &info_guid, + &info_sz, NULL); + if (status != EFI_BUFFER_TOO_SMALL) + goto close_handles; + +grow: + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, info_sz, &info); + if (status != EFI_SUCCESS) + goto close_handles; + + status = efi_call_phys4(h->get_info, h, &info_guid, + &info_sz, info); + if (status == EFI_BUFFER_TOO_SMALL) { + efi_call_phys1(sys_table->boottime->free_pool, info); + goto grow; + } + + file_sz = info->file_size; + efi_call_phys1(sys_table->boottime->free_pool, info); + + if (status != EFI_SUCCESS) + goto close_handles; + + initrd->size = file_sz; + initrd_total += file_sz; + } + + if (initrd_total) { + unsigned long addr; + + /* + * Multiple initrd's need to be at consecutive + * addresses in memory, so allocate enough memory for + * all the initrd's. + */ + status = high_alloc(initrd_total, 0x1000, + &initrd_addr, hdr->initrd_addr_max); + if (status != EFI_SUCCESS) + goto close_handles; + + /* We've run out of free low memory. */ + if (initrd_addr > hdr->initrd_addr_max) { + status = EFI_INVALID_PARAMETER; + goto free_initrd_total; + } + + addr = initrd_addr; + for (j = 0; j < nr_initrds; j++) { + u64 size; + + size = initrds[j].size; + status = efi_call_phys3(fh->read, initrds[j].handle, + &size, addr); + if (status != EFI_SUCCESS) + goto free_initrd_total; + + efi_call_phys1(fh->close, initrds[j].handle); + + addr += size; + } + + } + + efi_call_phys1(sys_table->boottime->free_pool, initrds); + + hdr->ramdisk_image = initrd_addr; + hdr->ramdisk_size = initrd_total; + + return status; + +free_initrd_total: + low_free(initrd_total, initrd_addr); + +close_handles: + for (k = j; k < nr_initrds; k++) + efi_call_phys1(fh->close, initrds[k].handle); +free_initrds: + efi_call_phys1(sys_table->boottime->free_pool, initrds); +fail: + hdr->ramdisk_image = 0; + hdr->ramdisk_size = 0; + + return status; +} + +/* + * Because the x86 boot code expects to be passed a boot_params we + * need to create one ourselves (usually the bootloader would create + * one for us). + */ +static efi_status_t make_boot_params(struct boot_params *boot_params, + efi_loaded_image_t *image, + void *handle) +{ + struct efi_info *efi = &boot_params->efi_info; + struct apm_bios_info *bi = &boot_params->apm_bios_info; + struct sys_desc_table *sdt = &boot_params->sys_desc_table; + struct e820entry *e820_map = &boot_params->e820_map[0]; + struct e820entry *prev = NULL; + struct setup_header *hdr = &boot_params->hdr; + unsigned long size, key, desc_size, _size; + efi_memory_desc_t *mem_map; + void *options = image->load_options; + u32 load_options_size = image->load_options_size / 2; /* ASCII */ + int options_size = 0; + efi_status_t status; + __u32 desc_version; + unsigned long cmdline; + u8 nr_entries; + u16 *s2; + u8 *s1; + int i; + + hdr->type_of_loader = 0x21; + + /* Convert unicode cmdline to ascii */ + cmdline = 0; + s2 = (u16 *)options; + + if (s2) { + while (*s2 && *s2 != '\n' && options_size < load_options_size) { + s2++; + options_size++; + } + + if (options_size) { + if (options_size > hdr->cmdline_size) + options_size = hdr->cmdline_size; + + options_size++; /* NUL termination */ + + status = low_alloc(options_size, 1, &cmdline); + if (status != EFI_SUCCESS) + goto fail; + + s1 = (u8 *)(unsigned long)cmdline; + s2 = (u16 *)options; + + for (i = 0; i < options_size - 1; i++) + *s1++ = *s2++; + + *s1 = '\0'; + } + } + + hdr->cmd_line_ptr = cmdline; + + hdr->ramdisk_image = 0; + hdr->ramdisk_size = 0; + + status = handle_ramdisks(image, hdr); + if (status != EFI_SUCCESS) + goto free_cmdline; + + setup_graphics(boot_params); + + /* Clear APM BIOS info */ + memset(bi, 0, sizeof(*bi)); + + memset(sdt, 0, sizeof(*sdt)); + + memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); + + size = sizeof(*mem_map) * 32; + +again: + size += sizeof(*mem_map); + _size = size; + status = low_alloc(size, 1, (unsigned long *)&mem_map); + if (status != EFI_SUCCESS) + goto free_cmdline; + + status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, + mem_map, &key, &desc_size, &desc_version); + if (status == EFI_BUFFER_TOO_SMALL) { + low_free(_size, (unsigned long)mem_map); + goto again; + } + + if (status != EFI_SUCCESS) + goto free_mem_map; + + efi->efi_systab = (unsigned long)sys_table; + efi->efi_memdesc_size = desc_size; + efi->efi_memdesc_version = desc_version; + efi->efi_memmap = (unsigned long)mem_map; + efi->efi_memmap_size = size; + +#ifdef CONFIG_X86_64 + efi->efi_systab_hi = (unsigned long)sys_table >> 32; + efi->efi_memmap_hi = (unsigned long)mem_map >> 32; +#endif + + /* Might as well exit boot services now */ + status = efi_call_phys2(sys_table->boottime->exit_boot_services, + handle, key); + if (status != EFI_SUCCESS) + goto free_mem_map; + + /* Historic? */ + boot_params->alt_mem_k = 32 * 1024; + + /* + * Convert the EFI memory map to E820. + */ + nr_entries = 0; + for (i = 0; i < size / desc_size; i++) { + efi_memory_desc_t *d; + unsigned int e820_type = 0; + unsigned long m = (unsigned long)mem_map; + + d = (efi_memory_desc_t *)(m + (i * desc_size)); + switch (d->type) { + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_CODE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + case EFI_PAL_CODE: + e820_type = E820_RESERVED; + break; + + case EFI_UNUSABLE_MEMORY: + e820_type = E820_UNUSABLE; + break; + + case EFI_ACPI_RECLAIM_MEMORY: + e820_type = E820_ACPI; + break; + + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + e820_type = E820_RAM; + break; + + case EFI_ACPI_MEMORY_NVS: + e820_type = E820_NVS; + break; + + default: + continue; + } + + /* Merge adjacent mappings */ + if (prev && prev->type == e820_type && + (prev->addr + prev->size) == d->phys_addr) + prev->size += d->num_pages << 12; + else { + e820_map->addr = d->phys_addr; + e820_map->size = d->num_pages << 12; + e820_map->type = e820_type; + prev = e820_map++; + nr_entries++; + } + } + + boot_params->e820_entries = nr_entries; + + return EFI_SUCCESS; + +free_mem_map: + low_free(_size, (unsigned long)mem_map); +free_cmdline: + if (options_size) + low_free(options_size, hdr->cmd_line_ptr); +fail: + return status; +} + +/* + * On success we return a pointer to a boot_params structure, and NULL + * on failure. + */ +struct boot_params *efi_main(void *handle, efi_system_table_t *_table) +{ + struct boot_params *boot_params; + unsigned long start, nr_pages; + struct desc_ptr *gdt, *idt; + efi_loaded_image_t *image; + struct setup_header *hdr; + efi_status_t status; + efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; + struct desc_struct *desc; + + sys_table = _table; + + /* Check if we were booted by the EFI firmware */ + if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + goto fail; + + status = efi_call_phys3(sys_table->boottime->handle_protocol, + handle, &proto, (void *)&image); + if (status != EFI_SUCCESS) + goto fail; + + status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); + if (status != EFI_SUCCESS) + goto fail; + + memset(boot_params, 0x0, 0x4000); + + /* Copy first two sectors to boot_params */ + memcpy(boot_params, image->image_base, 1024); + + hdr = &boot_params->hdr; + + /* + * The EFI firmware loader could have placed the kernel image + * anywhere in memory, but the kernel has various restrictions + * on the max physical address it can run at. Attempt to move + * the kernel to boot_params.pref_address, or as low as + * possible. + */ + start = hdr->pref_address; + nr_pages = round_up(hdr->init_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + + status = efi_call_phys4(sys_table->boottime->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &start); + if (status != EFI_SUCCESS) { + status = low_alloc(hdr->init_size, hdr->kernel_alignment, + &start); + if (status != EFI_SUCCESS) + goto fail; + } + + hdr->code32_start = (__u32)start; + hdr->pref_address = (__u64)(unsigned long)image->image_base; + + memcpy((void *)start, image->image_base, image->image_size); + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, sizeof(*gdt), + (void **)&gdt); + if (status != EFI_SUCCESS) + goto fail; + + gdt->size = 0x800; + status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); + if (status != EFI_SUCCESS) + goto fail; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, sizeof(*idt), + (void **)&idt); + if (status != EFI_SUCCESS) + goto fail; + + idt->size = 0; + idt->address = 0; + + status = make_boot_params(boot_params, image, handle); + if (status != EFI_SUCCESS) + goto fail; + + memset((char *)gdt->address, 0x0, gdt->size); + desc = (struct desc_struct *)gdt->address; + + /* The first GDT is a dummy and the second is unused. */ + desc += 2; + + desc->limit0 = 0xffff; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ; + desc->s = DESC_TYPE_CODE_DATA; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0xf; + desc->avl = 0; + desc->l = 0; + desc->d = SEG_OP_SIZE_32BIT; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; + + desc++; + desc->limit0 = 0xffff; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE; + desc->s = DESC_TYPE_CODE_DATA; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0xf; + desc->avl = 0; + desc->l = 0; + desc->d = SEG_OP_SIZE_32BIT; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; + +#ifdef CONFIG_X86_64 + /* Task segment value */ + desc++; + desc->limit0 = 0x0000; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_TSS; + desc->s = 0; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0x0; + desc->avl = 0; + desc->l = 0; + desc->d = 0; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; +#endif /* CONFIG_X86_64 */ + + asm volatile ("lidt %0" : : "m" (*idt)); + asm volatile ("lgdt %0" : : "m" (*gdt)); + + asm volatile("cli"); + + return boot_params; +fail: + return NULL; +} diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h new file mode 100644 index 00000000000..f66d023e91e --- /dev/null +++ b/arch/x86/boot/compressed/eboot.h @@ -0,0 +1,60 @@ +#ifndef BOOT_COMPRESSED_EBOOT_H +#define BOOT_COMPRESSED_EBOOT_H + +#define SEG_TYPE_DATA (0 << 3) +#define SEG_TYPE_READ_WRITE (1 << 1) +#define SEG_TYPE_CODE (1 << 3) +#define SEG_TYPE_EXEC_READ (1 << 1) +#define SEG_TYPE_TSS ((1 << 3) | (1 << 0)) +#define SEG_OP_SIZE_32BIT (1 << 0) +#define SEG_GRANULARITY_4KB (1 << 0) + +#define DESC_TYPE_CODE_DATA (1 << 0) + +#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) + +#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 +#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 +#define PIXEL_BIT_MASK 2 +#define PIXEL_BLT_ONLY 3 +#define PIXEL_FORMAT_MAX 4 + +struct efi_pixel_bitmask { + u32 red_mask; + u32 green_mask; + u32 blue_mask; + u32 reserved_mask; +}; + +struct efi_graphics_output_mode_info { + u32 version; + u32 horizontal_resolution; + u32 vertical_resolution; + int pixel_format; + struct efi_pixel_bitmask pixel_information; + u32 pixels_per_scan_line; +} __packed; + +struct efi_graphics_output_protocol_mode { + u32 max_mode; + u32 mode; + unsigned long info; + unsigned long size_of_info; + u64 frame_buffer_base; + unsigned long frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol { + void *query_mode; + unsigned long set_mode; + unsigned long blt; + struct efi_graphics_output_protocol_mode *mode; +}; + +struct efi_uga_draw_protocol { + void *get_mode; + void *set_mode; + void *blt; +}; + +#endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S new file mode 100644 index 00000000000..a53440e81d5 --- /dev/null +++ b/arch/x86/boot/compressed/efi_stub_32.S @@ -0,0 +1,86 @@ +/* + * EFI call stub for IA32. + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. Note that this implementation is different from the one in + * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical + * mode at this point. + */ + +#include +#include + +/* + * efi_call_phys(void *, ...) is a function with variable parameters. + * All the callers of this function assure that all the parameters are 4-bytes. + */ + +/* + * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. + * So we'd better save all of them at the beginning of this function and restore + * at the end no matter how many we use, because we can not assure EFI runtime + * service functions will comply with gcc calling convention, too. + */ + +.text +ENTRY(efi_call_phys) + /* + * 0. The function can only be called in Linux kernel. So CS has been + * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found + * the values of these registers are the same. And, the corresponding + * GDT entries are identical. So I will do nothing about segment reg + * and GDT, but change GDT base register in prelog and epilog. + */ + + /* + * 1. Because we haven't been relocated by this point we need to + * use relative addressing. + */ + call 1f +1: popl %edx + subl $1b, %edx + + /* + * 2. Now on the top of stack is the return + * address in the caller of efi_call_phys(), then parameter 1, + * parameter 2, ..., param n. To make things easy, we save the return + * address of efi_call_phys in a global variable. + */ + popl %ecx + movl %ecx, saved_return_addr(%edx) + /* get the function pointer into ECX*/ + popl %ecx + movl %ecx, efi_rt_function_ptr(%edx) + + /* + * 3. Call the physical function. + */ + call *%ecx + + /* + * 4. Balance the stack. And because EAX contain the return value, + * we'd better not clobber it. We need to calculate our address + * again because %ecx and %edx are not preserved across EFI function + * calls. + */ + call 1f +1: popl %edx + subl $1b, %edx + + movl efi_rt_function_ptr(%edx), %ecx + pushl %ecx + + /* + * 10. Push the saved return address onto the stack and return. + */ + movl saved_return_addr(%edx), %ecx + pushl %ecx + ret +ENDPROC(efi_call_phys) +.previous + +.data +saved_return_addr: + .long 0 +efi_rt_function_ptr: + .long 0 diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S new file mode 100644 index 00000000000..cedc60de86e --- /dev/null +++ b/arch/x86/boot/compressed/efi_stub_64.S @@ -0,0 +1 @@ +#include "../../platform/efi/efi_stub_64.S" diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 67a655a39ce..a0559930a18 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -32,6 +32,28 @@ __HEAD ENTRY(startup_32) +#ifdef CONFIG_EFI_STUB + /* + * We don't need the return address, so set up the stack so + * efi_main() can find its arugments. + */ + add $0x4, %esp + + call efi_main + cmpl $0, %eax + je preferred_addr + movl %eax, %esi + call 1f +1: + popl %eax + subl $1b, %eax + subl BP_pref_address(%esi), %eax + add BP_code32_start(%esi), %eax + leal preferred_addr(%eax), %eax + jmp *%eax + +preferred_addr: +#endif cld /* * Test KEEP_SEGMENTS flag to see if the bootloader is asking diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 35af09d13dc..558d76ce23b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -199,6 +199,26 @@ ENTRY(startup_64) * an identity mapped page table being provied that maps our * entire text+data+bss and hopefully all of memory. */ +#ifdef CONFIG_EFI_STUB + pushq %rsi + mov %rcx, %rdi + mov %rdx, %rsi + call efi_main + popq %rsi + cmpq $0,%rax + je preferred_addr + movq %rax,%rsi + call 1f +1: + popq %rax + subq $1b, %rax + subq BP_pref_address(%rsi), %rax + add BP_code32_start(%esi), %eax + leaq preferred_addr(%rax), %rax + jmp *%rax + +preferred_addr: +#endif /* Setup data segments. */ xorl %eax, %eax diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index 19b3e693cd7..ffb9c5c9d74 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c @@ -1,2 +1,11 @@ #include "misc.h" + +int memcmp(const void *s1, const void *s2, size_t len) +{ + u8 diff; + asm("repe; cmpsb; setnz %0" + : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} + #include "../string.c" diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index bdb4d458ec8..f1bbeeb0914 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -45,6 +45,11 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ .global bootsect_start bootsect_start: +#ifdef CONFIG_EFI_STUB + # "MZ", MS-DOS header + .byte 0x4d + .byte 0x5a +#endif # Normalize the start address ljmp $BOOTSEG, $start2 @@ -79,6 +84,14 @@ bs_die: # invoke the BIOS reset code... ljmp $0xf000,$0xfff0 +#ifdef CONFIG_EFI_STUB + .org 0x3c + # + # Offset to the PE header. + # + .long pe_header +#endif /* CONFIG_EFI_STUB */ + .section ".bsdata", "a" bugger_off_msg: .ascii "Direct booting from floppy is no longer supported.\r\n" @@ -87,6 +100,141 @@ bugger_off_msg: .ascii "Remove disk and press any key to reboot . . .\r\n" .byte 0 +#ifdef CONFIG_EFI_STUB +pe_header: + .ascii "PE" + .word 0 + +coff_header: +#ifdef CONFIG_X86_32 + .word 0x14c # i386 +#else + .word 0x8664 # x86-64 +#endif + .word 2 # nr_sections + .long 0 # TimeDateStamp + .long 0 # PointerToSymbolTable + .long 1 # NumberOfSymbols + .word section_table - optional_header # SizeOfOptionalHeader +#ifdef CONFIG_X86_32 + .word 0x306 # Characteristics. + # IMAGE_FILE_32BIT_MACHINE | + # IMAGE_FILE_DEBUG_STRIPPED | + # IMAGE_FILE_EXECUTABLE_IMAGE | + # IMAGE_FILE_LINE_NUMS_STRIPPED +#else + .word 0x206 # Characteristics + # IMAGE_FILE_DEBUG_STRIPPED | + # IMAGE_FILE_EXECUTABLE_IMAGE | + # IMAGE_FILE_LINE_NUMS_STRIPPED +#endif + +optional_header: +#ifdef CONFIG_X86_32 + .word 0x10b # PE32 format +#else + .word 0x20b # PE32+ format +#endif + .byte 0x02 # MajorLinkerVersion + .byte 0x14 # MinorLinkerVersion + + # Filled in by build.c + .long 0 # SizeOfCode + + .long 0 # SizeOfInitializedData + .long 0 # SizeOfUninitializedData + + # Filled in by build.c + .long 0x0000 # AddressOfEntryPoint + + .long 0x0000 # BaseOfCode +#ifdef CONFIG_X86_32 + .long 0 # data +#endif + +extra_header_fields: +#ifdef CONFIG_X86_32 + .long 0 # ImageBase +#else + .quad 0 # ImageBase +#endif + .long 0x1000 # SectionAlignment + .long 0x200 # FileAlignment + .word 0 # MajorOperatingSystemVersion + .word 0 # MinorOperatingSystemVersion + .word 0 # MajorImageVersion + .word 0 # MinorImageVersion + .word 0 # MajorSubsystemVersion + .word 0 # MinorSubsystemVersion + .long 0 # Win32VersionValue + + # + # The size of the bzImage is written in tools/build.c + # + .long 0 # SizeOfImage + + .long 0x200 # SizeOfHeaders + .long 0 # CheckSum + .word 0xa # Subsystem (EFI application) + .word 0 # DllCharacteristics +#ifdef CONFIG_X86_32 + .long 0 # SizeOfStackReserve + .long 0 # SizeOfStackCommit + .long 0 # SizeOfHeapReserve + .long 0 # SizeOfHeapCommit +#else + .quad 0 # SizeOfStackReserve + .quad 0 # SizeOfStackCommit + .quad 0 # SizeOfHeapReserve + .quad 0 # SizeOfHeapCommit +#endif + .long 0 # LoaderFlags + .long 0x1 # NumberOfRvaAndSizes + + .quad 0 # ExportTable + .quad 0 # ImportTable + .quad 0 # ResourceTable + .quad 0 # ExceptionTable + .quad 0 # CertificationTable + .quad 0 # BaseRelocationTable + + # Section table +section_table: + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 # startup_{32,64} + .long 0 # Size of initialized data + # on disk + .long 0x0 # startup_{32,64} + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0x60500020 # Characteristics (section flags) + + # + # The EFI application loader requires a relocation section + # because EFI applications are relocatable and not having + # this section seems to confuse it. But since we don't need + # the loader to fixup any relocs for us just fill it with a + # single dummy reloc. + # + .ascii ".reloc" + .byte 0 + .byte 0 + .long reloc_end - reloc_start + .long reloc_start + .long reloc_end - reloc_start # SizeOfRawData + .long reloc_start # PointerToRawData + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0x42100040 # Characteristics (section flags) +#endif /* CONFIG_EFI_STUB */ # Kernel attributes; used by setup. This is part 1 of the # header, from the old boot sector. @@ -318,3 +466,13 @@ die: setup_corrupt: .byte 7 .string "No setup signature found...\n" + + .data +dummy: .long 0 + + .section .reloc +reloc_start: + .long dummy - reloc_start + .long 10 + .word 0 +reloc_end: diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 3cbc4058dd2..574dedfe289 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -111,3 +111,38 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas return result; } + +/** + * strlen - Find the length of a string + * @s: The string to be sized + */ +size_t strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +/** + * strstr - Find the first substring in a %NUL terminated string + * @s1: The string to be searched + * @s2: The string to search for + */ +char *strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index fdc60a0b3c2..4e9bd6bcafa 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -135,6 +135,9 @@ static void usage(void) int main(int argc, char ** argv) { +#ifdef CONFIG_EFI_STUB + unsigned int file_sz, pe_header; +#endif unsigned int i, sz, setup_sectors; int c; u32 sys_size; @@ -194,6 +197,42 @@ int main(int argc, char ** argv) buf[0x1f6] = sys_size >> 16; buf[0x1f7] = sys_size >> 24; +#ifdef CONFIG_EFI_STUB + file_sz = sz + i + ((sys_size * 16) - sz); + + pe_header = *(unsigned int *)&buf[0x3c]; + + /* Size of code */ + *(unsigned int *)&buf[pe_header + 0x1c] = file_sz; + + /* Size of image */ + *(unsigned int *)&buf[pe_header + 0x50] = file_sz; + +#ifdef CONFIG_X86_32 + /* Address of entry point */ + *(unsigned int *)&buf[pe_header + 0x28] = i; + + /* .text size */ + *(unsigned int *)&buf[pe_header + 0xb0] = file_sz; + + /* .text size of initialised data */ + *(unsigned int *)&buf[pe_header + 0xb8] = file_sz; +#else + /* + * Address of entry point. startup_32 is at the beginning and + * the 64-bit entry point (startup_64) is always 512 bytes + * after. + */ + *(unsigned int *)&buf[pe_header + 0x28] = i + 512; + + /* .text size */ + *(unsigned int *)&buf[pe_header + 0xc0] = file_sz; + + /* .text size of initialised data */ + *(unsigned int *)&buf[pe_header + 0xc8] = file_sz; +#endif /* CONFIG_X86_32 */ +#endif /* CONFIG_EFI_STUB */ + crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, stdout) != i) die("Writing setup failed"); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 4f13fafc526..68de2dc962e 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -67,4 +67,6 @@ void common(void) { OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); + OFFSET(BP_pref_address, boot_params, hdr.pref_address); + OFFSET(BP_code32_start, boot_params, hdr.code32_start); } -- cgit v1.2.3-70-g09d2 From 549c89b98c4530b278dde1a3f68ce5ebbb1e6304 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 29 Nov 2011 12:44:55 -0800 Subject: x86: Do not schedule while still in NMI context The NMI handler uses the paranoid_exit routine that checks the NEED_RESCHED flag, and if it is set and the return is for userspace, then interrupts are enabled, the stack is swapped to the thread's stack, and schedule is called. The problem with this is that we are still in an NMI context until an iret is executed. This means that any new NMIs are now starved until an interrupt or exception occurs and does the iret. As NMIs can not be masked and can interrupt any location, they are treated as a special case. NEED_RESCHED should not be set in an NMI handler. The interruption by the NMI should not disturb the work flow for scheduling. Any IPI sent to a processor after sending the NEED_RESCHED would have to wait for the NMI anyway, and after the IPI finishes the schedule would be called as required. There is no reason to do anything special leaving an NMI. Remove the call to paranoid_exit and do a simple return. This not only fixes the bug of starved NMIs, but it also cleans up the code. Link: http://lkml.kernel.org/r/CA+55aFzgM55hXTs4griX5e9=v_O+=ue+7Rj0PTD=M7hFYpyULQ@mail.gmail.com Acked-by: Andi Kleen Cc: Ingo Molnar Cc: Peter Zijlstra Cc: "H. Peter Anvin" Cc: Frederic Weisbecker Cc: Thomas Gleixner Cc: Paul Turner Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_64.S | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index faf8d5e74b0..3819ea90733 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1489,46 +1489,14 @@ ENTRY(nmi) movq %rsp,%rdi movq $-1,%rsi call do_nmi -#ifdef CONFIG_TRACE_IRQFLAGS - /* paranoidexit; without TRACE_IRQS_OFF */ - /* ebx: no swapgs flag */ - DISABLE_INTERRUPTS(CLBR_NONE) testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore - testl $3,CS(%rsp) - jnz nmi_userspace nmi_swapgs: SWAPGS_UNSAFE_STACK nmi_restore: RESTORE_ALL 8 jmp irq_return -nmi_userspace: - GET_THREAD_INFO(%rcx) - movl TI_flags(%rcx),%ebx - andl $_TIF_WORK_MASK,%ebx - jz nmi_swapgs - movq %rsp,%rdi /* &pt_regs */ - call sync_regs - movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx - jnz nmi_schedule - movl %ebx,%edx /* arg3: thread flags */ - ENABLE_INTERRUPTS(CLBR_NONE) - xorl %esi,%esi /* arg2: oldset */ - movq %rsp,%rdi /* arg1: &pt_regs */ - call do_notify_resume - DISABLE_INTERRUPTS(CLBR_NONE) - jmp nmi_userspace -nmi_schedule: - ENABLE_INTERRUPTS(CLBR_ANY) - call schedule - DISABLE_INTERRUPTS(CLBR_ANY) - jmp nmi_userspace CFI_ENDPROC -#else - jmp paranoid_exit - CFI_ENDPROC -#endif END(nmi) ENTRY(ignore_sysret) -- cgit v1.2.3-70-g09d2 From 1fd466efc88c48f50e5ee29f4dbb4e210a889172 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 8 Dec 2011 12:32:27 -0500 Subject: x86: Document the NMI handler about not using paranoid_exit Linus cleaned up the NMI handler but it still needs some comments to explain why it uses save_paranoid but not paranoid_exit. Just to keep others from adding that in the future, document why it's not used. Cc: Linus Torvalds Cc: Andi Kleen Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_64.S | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3819ea90733..d1d5434e7f6 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1480,9 +1480,16 @@ END(error_exit) ENTRY(nmi) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq_cfi $-1 + pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ subq $ORIG_RAX-R15, %rsp CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 + /* + * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit + * as we should not be calling schedule in NMI context. + * Even with normal interrupts enabled. An NMI should not be + * setting NEED_RESCHED or anything that normal interrupts and + * exceptions might do. + */ call save_paranoid DEFAULT_FRAME 0 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ -- cgit v1.2.3-70-g09d2 From 3f3c8b8c4b2a34776c3470142a7c8baafcda6eb0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 8 Dec 2011 12:36:23 -0500 Subject: x86: Add workaround to NMI iret woes In x86, when an NMI goes off, the CPU goes into an NMI context that prevents other NMIs to trigger on that CPU. If an NMI is suppose to trigger, it has to wait till the previous NMI leaves NMI context. At that time, the next NMI can trigger (note, only one more NMI will trigger, as only one can be latched at a time). The way x86 gets out of NMI context is by calling iret. The problem with this is that this causes problems if the NMI handle either triggers an exception, or a breakpoint. Both the exception and the breakpoint handlers will finish with an iret. If this happens while in NMI context, the CPU will leave NMI context and a new NMI may come in. As NMI handlers are not made to be re-entrant, this can cause havoc with the system, not to mention, the nested NMI will write all over the previous NMI's stack. Linus Torvalds proposed the following workaround to this problem: https://lkml.org/lkml/2010/7/14/264 "In fact, I wonder if we couldn't just do a software NMI disable instead? Hav ea per-cpu variable (in the _core_ percpu areas that get allocated statically) that points to the NMI stack frame, and just make the NMI code itself do something like NMI entry: - load percpu NMI stack frame pointer - if non-zero we know we're nested, and should ignore this NMI: - we're returning to kernel mode, so return immediately by using "popf/ret", which also keeps NMI's disabled in the hardware until the "real" NMI iret happens. - before the popf/iret, use the NMI stack pointer to make the NMI return stack be invalid and cause a fault - set the NMI stack pointer to the current stack pointer NMI exit (not the above "immediate exit because we nested"): clear the percpu NMI stack pointer Just do the iret. Now, the thing is, now the "iret" is atomic. If we had a nested NMI, we'll take a fault, and that re-does our "delayed" NMI - and NMI's will stay masked. And if we didn't have a nested NMI, that iret will now unmask NMI's, and everything is happy." I first tried to follow this advice but as I started implementing this code, a few gotchas showed up. One, is accessing per-cpu variables in the NMI handler. The problem is that per-cpu variables use the %gs register to get the variable for the given CPU. But as the NMI may happen in userspace, we must first perform a SWAPGS to get to it. The NMI handler already does this later in the code, but its too late as we have saved off all the registers and we don't want to do that for a disabled NMI. Peter Zijlstra suggested to keep all variables on the stack. This simplifies things greatly and it has the added benefit of cache locality. Two, faulting on the iret. I really wanted to make this work, but it was becoming very hacky, and I never got it to be stable. The iret already had a fault handler for userspace faulting with bad segment registers, and getting NMI to trigger a fault and detect it was very tricky. But for strange reasons, the system would usually take a double fault and crash. I never figured out why and decided to go with a simple "jmp" approach. The new approach I took also simplified things. Finally, the last problem with Linus's approach was to have the nested NMI handler do a ret instead of an iret to give the first NMI NMI-context again. The problem is that ret is much more limited than an iret. I couldn't figure out how to get the stack back where it belonged. I could have copied the current stack, pushed the return onto it, but my fear here is that there may be some place that writes data below the stack pointer. I know that is not something code should depend on, but I don't want to chance it. I may add this feature later, but for now, an NMI handler that loses NMI context will not get it back. Here's what is done: When an NMI comes in, the HW pushes the interrupt stack frame onto the per cpu NMI stack that is selected by the IST. A special location on the NMI stack holds a variable that is set when the first NMI handler runs. If this variable is set then we know that this is a nested NMI and we process the nested NMI code. There is still a race when this variable is cleared and an NMI comes in just before the first NMI does the return. For this case, if the variable is cleared, we also check if the interrupted stack is the NMI stack. If it is, then we process the nested NMI code. Why the two tests and not just test the interrupted stack? If the first NMI hits a breakpoint and loses NMI context, and then it hits another breakpoint and while processing that breakpoint we get a nested NMI. When processing a breakpoint, the stack changes to the breakpoint stack. If another NMI comes in here we can't rely on the interrupted stack to be the NMI stack. If the variable is not set and the interrupted task's stack is not the NMI stack, then we know this is the first NMI and we can process things normally. But in order to do so, we need to do a few things first. 1) Set the stack variable that tells us that we are in an NMI handler 2) Make two copies of the interrupt stack frame. One copy is used to return on iret The other is used to restore the first one if we have a nested NMI. This is what the stack will look like: +-------------------------+ | original SS | | original Return RSP | | original RFLAGS | | original CS | | original RIP | +-------------------------+ | temp storage for rdx | +-------------------------+ | NMI executing variable | +-------------------------+ | Saved SS | | Saved Return RSP | | Saved RFLAGS | | Saved CS | | Saved RIP | +-------------------------+ | copied SS | | copied Return RSP | | copied RFLAGS | | copied CS | | copied RIP | +-------------------------+ | pt_regs | +-------------------------+ The original stack frame contains what the HW put in when we entered the NMI. We store %rdx as a temp variable to use. Both the original HW stack frame and this %rdx storage will be clobbered by nested NMIs so we can not rely on them later in the first NMI handler. The next item is the special stack variable that is set when we execute the rest of the NMI handler. Then we have two copies of the interrupt stack. The second copy is modified by any nested NMIs to let the first NMI know that we triggered a second NMI (latched) and that we should repeat the NMI handler. If the first NMI hits an exception or breakpoint that takes it out of NMI context, if a second NMI comes in before the first one finishes, it will update the copied interrupt stack to point to a fix up location to trigger another NMI. When the first NMI calls iret, it will instead jump to the fix up location. This fix up location will copy the saved interrupt stack back to the copy and execute the nmi handler again. Note, the nested NMI knows enough to check if it preempted a previous NMI handler while it is in the fixup location. If it has, it will not modify the copied interrupt stack and will just leave as if nothing happened. As the NMI handle is about to execute again, there's no reason to latch now. To test all this, I forced the NMI handler to call iret and take itself out of NMI context. I also added assemble code to write to the serial to make sure that it hits the nested path as well as the fix up path. Everything seems to be working fine. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Paul Turner Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_64.S | 177 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d1d5434e7f6..b62aa298df7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1475,11 +1475,166 @@ ENTRY(error_exit) CFI_ENDPROC END(error_exit) +/* + * Test if a given stack is an NMI stack or not. + */ + .macro test_in_nmi reg stack nmi_ret normal_ret + cmpq %\reg, \stack + ja \normal_ret + subq $EXCEPTION_STKSZ, %\reg + cmpq %\reg, \stack + jb \normal_ret + jmp \nmi_ret + .endm /* runs on exception stack */ ENTRY(nmi) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME + /* + * We allow breakpoints in NMIs. If a breakpoint occurs, then + * the iretq it performs will take us out of NMI context. + * This means that we can have nested NMIs where the next + * NMI is using the top of the stack of the previous NMI. We + * can't let it execute because the nested NMI will corrupt the + * stack of the previous NMI. NMI handlers are not re-entrant + * anyway. + * + * To handle this case we do the following: + * Check the a special location on the stack that contains + * a variable that is set when NMIs are executing. + * The interrupted task's stack is also checked to see if it + * is an NMI stack. + * If the variable is not set and the stack is not the NMI + * stack then: + * o Set the special variable on the stack + * o Copy the interrupt frame into a "saved" location on the stack + * o Copy the interrupt frame into a "copy" location on the stack + * o Continue processing the NMI + * If the variable is set or the previous stack is the NMI stack: + * o Modify the "copy" location to jump to the repeate_nmi + * o return back to the first NMI + * + * Now on exit of the first NMI, we first clear the stack variable + * The NMI stack will tell any nested NMIs at that point that it is + * nested. Then we pop the stack normally with iret, and if there was + * a nested NMI that updated the copy interrupt stack frame, a + * jump will be made to the repeat_nmi code that will handle the second + * NMI. + */ + + /* Use %rdx as out temp variable throughout */ + pushq_cfi %rdx + + /* + * Check the special variable on the stack to see if NMIs are + * executing. + */ + cmp $1, -8(%rsp) + je nested_nmi + + /* + * Now test if the previous stack was an NMI stack. + * We need the double check. We check the NMI stack to satisfy the + * race when the first NMI clears the variable before returning. + * We check the variable because the first NMI could be in a + * breakpoint routine using a breakpoint stack. + */ + lea 6*8(%rsp), %rdx + test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi + +nested_nmi: + /* + * Do nothing if we interrupted the fixup in repeat_nmi. + * It's about to repeat the NMI handler, so we are fine + * with ignoring this one. + */ + movq $repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja 1f + movq $end_repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja nested_nmi_out + +1: + /* Set up the interrupted NMIs stack to jump to repeat_nmi */ + leaq -6*8(%rsp), %rdx + movq %rdx, %rsp + CFI_ADJUST_CFA_OFFSET 6*8 + pushq_cfi $__KERNEL_DS + pushq_cfi %rdx + pushfq_cfi + pushq_cfi $__KERNEL_CS + pushq_cfi $repeat_nmi + + /* Put stack back */ + addq $(11*8), %rsp + CFI_ADJUST_CFA_OFFSET -11*8 + +nested_nmi_out: + popq_cfi %rdx + + /* No need to check faults here */ + INTERRUPT_RETURN + +first_nmi: + /* + * Because nested NMIs will use the pushed location that we + * stored in rdx, we must keep that space available. + * Here's what our stack frame will look like: + * +-------------------------+ + * | original SS | + * | original Return RSP | + * | original RFLAGS | + * | original CS | + * | original RIP | + * +-------------------------+ + * | temp storage for rdx | + * +-------------------------+ + * | NMI executing variable | + * +-------------------------+ + * | Saved SS | + * | Saved Return RSP | + * | Saved RFLAGS | + * | Saved CS | + * | Saved RIP | + * +-------------------------+ + * | copied SS | + * | copied Return RSP | + * | copied RFLAGS | + * | copied CS | + * | copied RIP | + * +-------------------------+ + * | pt_regs | + * +-------------------------+ + * + * The saved RIP is used to fix up the copied RIP that a nested + * NMI may zero out. The original stack frame and the temp storage + * is also used by nested NMIs and can not be trusted on exit. + */ + /* Set the NMI executing variable on the stack. */ + pushq_cfi $1 + + /* Copy the stack frame to the Saved frame */ + .rept 5 + pushq_cfi 6*8(%rsp) + .endr + + /* Make another copy, this one may be modified by nested NMIs */ + .rept 5 + pushq_cfi 4*8(%rsp) + .endr + + /* Do not pop rdx, nested NMIs will corrupt it */ + movq 11*8(%rsp), %rdx + + /* + * Everything below this point can be preempted by a nested + * NMI if the first NMI took an exception. Repeated NMIs + * caused by an exception and nested NMI will start here, and + * can still be preempted by another NMI. + */ +restart_nmi: pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ subq $ORIG_RAX-R15, %rsp CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 @@ -1502,10 +1657,32 @@ nmi_swapgs: SWAPGS_UNSAFE_STACK nmi_restore: RESTORE_ALL 8 + /* Clear the NMI executing stack variable */ + movq $0, 10*8(%rsp) jmp irq_return CFI_ENDPROC END(nmi) + /* + * If an NMI hit an iret because of an exception or breakpoint, + * it can lose its NMI context, and a nested NMI may come in. + * In that case, the nested NMI will change the preempted NMI's + * stack to jump to here when it does the final iret. + */ +repeat_nmi: + INTR_FRAME + /* Update the stack variable to say we are still in NMI */ + movq $1, 5*8(%rsp) + + /* copy the saved stack back to copy stack */ + .rept 5 + pushq_cfi 4*8(%rsp) + .endr + + jmp restart_nmi + CFI_ENDPROC +end_repeat_nmi: + ENTRY(ignore_sysret) CFI_STARTPROC mov $-ENOSYS,%eax -- cgit v1.2.3-70-g09d2 From 228bdaa95fb830e08b6acd1afd4d2c55093cabfa Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 9 Dec 2011 03:02:19 -0500 Subject: x86: Keep current stack in NMI breakpoints We want to allow NMI handlers to have breakpoints to be able to remove stop_machine from ftrace, kprobes and jump_labels. But if an NMI interrupts a current breakpoint, and then it triggers a breakpoint itself, it will switch to the breakpoint stack and corrupt the data on it for the breakpoint processing that it interrupted. Instead, have the NMI check if it interrupted breakpoint processing by checking if the stack that is currently used is a breakpoint stack. If it is, then load a special IDT that changes the IST for the debug exception to keep the same stack in kernel context. When the NMI is done, it puts it back. This way, if the NMI does trigger a breakpoint, it will keep using the same stack and not stomp on the breakpoint data for the breakpoint it interrupted. Suggested-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- arch/x86/include/asm/desc.h | 12 ++++++++++++ arch/x86/include/asm/processor.h | 6 ++++++ arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++ arch/x86/kernel/head_64.S | 4 ++++ arch/x86/kernel/nmi.c | 15 +++++++++++++++ arch/x86/kernel/traps.c | 6 ++++++ 6 files changed, 65 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 41935fadfdf..e95822d683f 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in extern struct desc_ptr idt_descr; extern gate_desc idt_table[]; +extern struct desc_ptr nmi_idt_descr; +extern gate_desc nmi_idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; @@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit = (limit >> 16) & 0xf; } +#ifdef CONFIG_X86_64 +static inline void set_nmi_gate(int gate, void *addr) +{ + gate_desc s; + + pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); + write_idt_entry(nmi_idt_table, gate, &s); +} +#endif + static inline void _set_gate(int gate, unsigned type, void *addr, unsigned dpl, unsigned ist, unsigned seg) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b650435ffb5..4b39d6d7e3a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -402,6 +402,9 @@ DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); extern unsigned long kernel_eflags; extern asmlinkage void ignore_sysret(void); +int is_debug_stack(unsigned long addr); +void debug_stack_set_zero(void); +void debug_stack_reset(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR /* @@ -416,6 +419,9 @@ struct stack_canary { }; DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif +static inline int is_debug_stack(unsigned long addr) { return 0; } +static inline void debug_stack_set_zero(void) { } +static inline void debug_stack_reset(void) { } #endif /* X86_64 */ extern unsigned int xstate_size; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index aa003b13a83..caa404556b9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1026,6 +1026,8 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; +struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, + (unsigned long) nmi_idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); @@ -1090,6 +1092,24 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); +static DEFINE_PER_CPU(unsigned long, debug_stack_addr); + +int is_debug_stack(unsigned long addr) +{ + return addr <= __get_cpu_var(debug_stack_addr) && + addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ); +} + +void debug_stack_set_zero(void) +{ + load_idt((const struct desc_ptr *)&nmi_idt_descr); +} + +void debug_stack_reset(void) +{ + load_idt((const struct desc_ptr *)&idt_descr); +} + #else /* CONFIG_X86_64 */ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; @@ -1208,6 +1228,8 @@ void __cpuinit cpu_init(void) estacks += exception_stack_sizes[v]; oist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; + if (v == DEBUG_STACK-1) + per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; } } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e11e39478a4..40f4eb3766d 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -417,6 +417,10 @@ ENTRY(phys_base) ENTRY(idt_table) .skip IDT_ENTRIES * 16 + .align L1_CACHE_BYTES +ENTRY(nmi_idt_table) + .skip IDT_ENTRIES * 16 + __PAGE_ALIGNED_BSS .align PAGE_SIZE ENTRY(empty_zero_page) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e88f37b58dd..de8d4b333f4 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -408,6 +408,18 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) dotraplinkage notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { + int update_debug_stack = 0; + + /* + * If we interrupted a breakpoint, it is possible that + * the nmi handler will have breakpoints too. We need to + * change the IDT such that breakpoints that happen here + * continue to use the NMI stack. + */ + if (unlikely(is_debug_stack(regs->sp))) { + debug_stack_set_zero(); + update_debug_stack = 1; + } nmi_enter(); inc_irq_stat(__nmi_count); @@ -416,6 +428,9 @@ do_nmi(struct pt_regs *regs, long error_code) default_do_nmi(regs); nmi_exit(); + + if (unlikely(update_debug_stack)) + debug_stack_reset(); } void stop_nmi(void) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a8e3eb83466..a93c5cabc36 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -723,4 +723,10 @@ void __init trap_init(void) cpu_init(); x86_init.irqs.trap_init(); + +#ifdef CONFIG_X86_64 + memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16); + set_nmi_gate(1, &debug); + set_nmi_gate(3, &int3); +#endif } -- cgit v1.2.3-70-g09d2 From ccd49c2391773ffbf52bb80d75c4a92b16972517 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 13 Dec 2011 16:44:16 -0500 Subject: x86: Allow NMIs to hit breakpoints in i386 With i386, NMIs and breakpoints use the current stack and they do not reset the stack pointer to a fix point that might corrupt a previous NMI or breakpoint (as it does in x86_64). But NMIs are still not made to be re-entrant, and need to prevent the case that an NMI hitting a breakpoint (which does an iret), doesn't allow another NMI to run. The fix is to let the NMI be in 3 different states: 1) not running 2) executing 3) latched When no NMI is executing on a given CPU, the state is "not running". When the first NMI comes in, the state is switched to "executing". On exit of that NMI, a cmpxchg is performed to switch the state back to "not running" and if that fails, the NMI is restarted. If a breakpoint is hit and does an iret, which re-enables NMIs, and another NMI comes in before the first NMI finished, it will detect that the state is not in the "not running" state and the current NMI is nested. In this case, the state is switched to "latched" to let the interrupted NMI know to restart the NMI handler, and the nested NMI exits without doing anything. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Paul Turner Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- arch/x86/kernel/nmi.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index de8d4b333f4..47acaf31916 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -405,11 +405,84 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) unknown_nmi_error(reason, regs); } -dotraplinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - int update_debug_stack = 0; +/* + * NMIs can hit breakpoints which will cause it to lose its + * NMI context with the CPU when the breakpoint does an iret. + */ +#ifdef CONFIG_X86_32 +/* + * For i386, NMIs use the same stack as the kernel, and we can + * add a workaround to the iret problem in C. Simply have 3 states + * the NMI can be in. + * + * 1) not running + * 2) executing + * 3) latched + * + * When no NMI is in progress, it is in the "not running" state. + * When an NMI comes in, it goes into the "executing" state. + * Normally, if another NMI is triggered, it does not interrupt + * the running NMI and the HW will simply latch it so that when + * the first NMI finishes, it will restart the second NMI. + * (Note, the latch is binary, thus multiple NMIs triggering, + * when one is running, are ignored. Only one NMI is restarted.) + * + * If an NMI hits a breakpoint that executes an iret, another + * NMI can preempt it. We do not want to allow this new NMI + * to run, but we want to execute it when the first one finishes. + * We set the state to "latched", and the first NMI will perform + * an cmpxchg on the state, and if it doesn't successfully + * reset the state to "not running" it will restart the next + * NMI. + */ +enum nmi_states { + NMI_NOT_RUNNING, + NMI_EXECUTING, + NMI_LATCHED, +}; +static DEFINE_PER_CPU(enum nmi_states, nmi_state); + +#define nmi_nesting_preprocess(regs) \ + do { \ + if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ + __get_cpu_var(nmi_state) = NMI_LATCHED; \ + return; \ + } \ + nmi_restart: \ + __get_cpu_var(nmi_state) = NMI_EXECUTING; \ + } while (0) + +#define nmi_nesting_postprocess() \ + do { \ + if (cmpxchg(&__get_cpu_var(nmi_state), \ + NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ + goto nmi_restart; \ + } while (0) +#else /* x86_64 */ +/* + * In x86_64 things are a bit more difficult. This has the same problem + * where an NMI hitting a breakpoint that calls iret will remove the + * NMI context, allowing a nested NMI to enter. What makes this more + * difficult is that both NMIs and breakpoints have their own stack. + * When a new NMI or breakpoint is executed, the stack is set to a fixed + * point. If an NMI is nested, it will have its stack set at that same + * fixed address that the first NMI had, and will start corrupting the + * stack. This is handled in entry_64.S, but the same problem exists with + * the breakpoint stack. + * + * If a breakpoint is being processed, and the debug stack is being used, + * if an NMI comes in and also hits a breakpoint, the stack pointer + * will be set to the same fixed address as the breakpoint that was + * interrupted, causing that stack to be corrupted. To handle this case, + * check if the stack that was interrupted is the debug stack, and if + * so, change the IDT so that new breakpoints will use the current stack + * and not switch to the fixed address. On return of the NMI, switch back + * to the original IDT. + */ +static DEFINE_PER_CPU(int, update_debug_stack); +static inline void nmi_nesting_preprocess(struct pt_regs *regs) +{ /* * If we interrupted a breakpoint, it is possible that * the nmi handler will have breakpoints too. We need to @@ -418,8 +491,22 @@ do_nmi(struct pt_regs *regs, long error_code) */ if (unlikely(is_debug_stack(regs->sp))) { debug_stack_set_zero(); - update_debug_stack = 1; + __get_cpu_var(update_debug_stack) = 1; } +} + +static inline void nmi_nesting_postprocess(void) +{ + if (unlikely(__get_cpu_var(update_debug_stack))) + debug_stack_reset(); +} +#endif + +dotraplinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) +{ + nmi_nesting_preprocess(regs); + nmi_enter(); inc_irq_stat(__nmi_count); @@ -429,8 +516,8 @@ do_nmi(struct pt_regs *regs, long error_code) nmi_exit(); - if (unlikely(update_debug_stack)) - debug_stack_reset(); + /* On i386, may loop back to preprocess */ + nmi_nesting_postprocess(); } void stop_nmi(void) -- cgit v1.2.3-70-g09d2 From 42181186ad4db986fcaa40ca95c6e407e9e79372 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 16 Dec 2011 11:43:02 -0500 Subject: x86: Add counter when debug stack is used with interrupts enabled Mathieu Desnoyers pointed out a case that can cause issues with NMIs running on the debug stack: int3 -> interrupt -> NMI -> int3 Because the interrupt changes the stack, the NMI will not see that it preempted the debug stack. Looking deeper at this case, interrupts only happen when the int3 is from userspace or in an a location in the exception table (fixup). userspace -> int3 -> interurpt -> NMI -> int3 All other int3s that happen in the kernel should be processed without ever enabling interrupts, as the do_trap() call will panic the kernel if it is called to process any other location within the kernel. Adding a counter around the sections that enable interrupts while using the debug stack allows the NMI to also check that case. If the NMI sees that it either interrupted a task using the debug stack or the debug counter is non-zero, then it will have to change the IDT table to make the int3 not change stacks (which will corrupt the stack if it does). Note, I had to move the debug_usage functions out of processor.h and into debugreg.h because of the static inlined functions to inc and dec the debug_usage counter. __get_cpu_var() requires smp.h which includes processor.h, and would fail to build. Link: http://lkml.kernel.org/r/1323976535.23971.112.camel@gandalf.stny.rr.com Reported-by: Mathieu Desnoyers Cc: Linus Torvalds Cc: Peter Zijlstra Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Paul Turner Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- arch/x86/include/asm/debugreg.h | 22 ++++++++++++++++++++++ arch/x86/include/asm/processor.h | 6 ------ arch/x86/kernel/cpu/common.c | 6 ++++-- arch/x86/kernel/traps.c | 14 ++++++++++++++ 4 files changed, 40 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 078ad0caefc..b903d5ea394 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump); extern void hw_breakpoint_restore(void); +#ifdef CONFIG_X86_64 +DECLARE_PER_CPU(int, debug_stack_usage); +static inline void debug_stack_usage_inc(void) +{ + __get_cpu_var(debug_stack_usage)++; +} +static inline void debug_stack_usage_dec(void) +{ + __get_cpu_var(debug_stack_usage)--; +} +int is_debug_stack(unsigned long addr); +void debug_stack_set_zero(void); +void debug_stack_reset(void); +#else /* !X86_64 */ +static inline int is_debug_stack(unsigned long addr) { return 0; } +static inline void debug_stack_set_zero(void) { } +static inline void debug_stack_reset(void) { } +static inline void debug_stack_usage_inc(void) { } +static inline void debug_stack_usage_dec(void) { } +#endif /* X86_64 */ + + #endif /* __KERNEL__ */ #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4b39d6d7e3a..b650435ffb5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -402,9 +402,6 @@ DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); extern unsigned long kernel_eflags; extern asmlinkage void ignore_sysret(void); -int is_debug_stack(unsigned long addr); -void debug_stack_set_zero(void); -void debug_stack_reset(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR /* @@ -419,9 +416,6 @@ struct stack_canary { }; DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif -static inline int is_debug_stack(unsigned long addr) { return 0; } -static inline void debug_stack_set_zero(void) { } -static inline void debug_stack_reset(void) { } #endif /* X86_64 */ extern unsigned int xstate_size; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index caa404556b9..266e4649b1d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1093,11 +1093,13 @@ unsigned long kernel_eflags; DEFINE_PER_CPU(struct orig_ist, orig_ist); static DEFINE_PER_CPU(unsigned long, debug_stack_addr); +DEFINE_PER_CPU(int, debug_stack_usage); int is_debug_stack(unsigned long addr) { - return addr <= __get_cpu_var(debug_stack_addr) && - addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ); + return __get_cpu_var(debug_stack_usage) || + (addr <= __get_cpu_var(debug_stack_addr) && + addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); } void debug_stack_set_zero(void) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a93c5cabc36..0072b38e3ea 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -316,9 +316,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) return; #endif + /* + * Let others (NMI) know that the debug stack is in use + * as we may switch to the interrupt stack. + */ + debug_stack_usage_inc(); preempt_conditional_sti(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); + debug_stack_usage_dec(); } #ifdef CONFIG_X86_64 @@ -411,6 +417,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) SIGTRAP) == NOTIFY_STOP) return; + /* + * Let others (NMI) know that the debug stack is in use + * as we may switch to the interrupt stack. + */ + debug_stack_usage_inc(); + /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); @@ -418,6 +430,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); preempt_conditional_cli(regs); + debug_stack_usage_dec(); return; } @@ -437,6 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); + debug_stack_usage_dec(); return; } -- cgit v1.2.3-70-g09d2 From 8a25a2fd126c621f44f3aeaef80d51f00fc11639 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 21 Dec 2011 14:29:42 -0800 Subject: cpu: convert 'cpu' and 'machinecheck' sysdev_class to a regular subsystem This moves the 'cpu sysdev_class' over to a regular 'cpu' subsystem and converts the devices to regular devices. The sysdev drivers are implemented as subsystem interfaces now. After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Userspace relies on events and generic sysfs subsystem infrastructure from sysdev devices, which are made available with this conversion. Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Tony Luck Cc: Fenghua Yu Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: "David S. Miller" Cc: Chris Metcalf Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: Tigran Aivazian Cc: Len Brown Cc: Zhang Rui Cc: Dave Jones Cc: Peter Zijlstra Cc: Russell King Cc: Andrew Morton Cc: Arjan van de Ven Cc: "Rafael J. Wysocki" Cc: "Srivatsa S. Bhat" Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- arch/avr32/kernel/cpu.c | 74 +++---- arch/ia64/kernel/err_inject.c | 52 ++--- arch/ia64/kernel/topology.c | 10 +- arch/powerpc/include/asm/spu.h | 12 +- arch/powerpc/include/asm/topology.h | 10 +- arch/powerpc/kernel/cacheinfo.c | 10 +- arch/powerpc/kernel/smp.c | 2 +- arch/powerpc/kernel/sysfs.c | 257 ++++++++++++------------ arch/powerpc/mm/numa.c | 8 +- arch/powerpc/platforms/cell/cbe_thermal.c | 144 ++++++------- arch/powerpc/platforms/cell/spu_base.c | 61 +++--- arch/powerpc/platforms/pseries/pseries_energy.c | 71 ++++--- arch/powerpc/sysdev/ppc4xx_cpm.c | 6 +- arch/s390/kernel/smp.c | 76 +++---- arch/s390/kernel/topology.c | 6 +- arch/sh/kernel/cpu/sh4/sq.c | 24 ++- arch/sparc/kernel/sysfs.c | 122 +++++------ arch/tile/kernel/sysfs.c | 61 +++--- arch/x86/include/asm/mce.h | 2 +- arch/x86/kernel/cpu/intel_cacheinfo.c | 25 ++- arch/x86/kernel/cpu/mcheck/mce-internal.h | 4 +- arch/x86/kernel/cpu/mcheck/mce.c | 128 ++++++------ arch/x86/kernel/cpu/mcheck/mce_amd.c | 11 +- arch/x86/kernel/cpu/mcheck/therm_throt.c | 63 +++--- arch/x86/kernel/microcode_core.c | 58 +++--- drivers/acpi/processor_driver.c | 6 +- drivers/acpi/processor_thermal.c | 1 - drivers/base/cpu.c | 146 +++++++------- drivers/base/node.c | 8 +- drivers/base/topology.c | 51 +++-- drivers/cpufreq/cpufreq.c | 79 ++++---- drivers/cpufreq/cpufreq_stats.c | 1 - drivers/cpuidle/cpuidle.c | 12 +- drivers/cpuidle/cpuidle.h | 10 +- drivers/cpuidle/sysfs.c | 74 ++++--- drivers/s390/char/sclp_config.c | 8 +- include/linux/cpu.h | 18 +- kernel/sched.c | 40 ++-- 38 files changed, 874 insertions(+), 877 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/avr32/kernel/cpu.c b/arch/avr32/kernel/cpu.c index e84faffbbec..2233be71e2e 100644 --- a/arch/avr32/kernel/cpu.c +++ b/arch/avr32/kernel/cpu.c @@ -6,7 +6,7 @@ * published by the Free Software Foundation. */ #include -#include +#include #include #include #include @@ -26,16 +26,16 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); * XXX: If/when a SMP-capable implementation of AVR32 will ever be * made, we must make sure that the code executes on the correct CPU. */ -static ssize_t show_pc0event(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_pc0event(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned long pccr; pccr = sysreg_read(PCCR); return sprintf(buf, "0x%lx\n", (pccr >> 12) & 0x3f); } -static ssize_t store_pc0event(struct sys_device *dev, - struct sysdev_attribute *attr, const char *buf, +static ssize_t store_pc0event(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -48,16 +48,16 @@ static ssize_t store_pc0event(struct sys_device *dev, sysreg_write(PCCR, val); return count; } -static ssize_t show_pc0count(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_pc0count(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned long pcnt0; pcnt0 = sysreg_read(PCNT0); return sprintf(buf, "%lu\n", pcnt0); } -static ssize_t store_pc0count(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_pc0count(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -71,16 +71,16 @@ static ssize_t store_pc0count(struct sys_device *dev, return count; } -static ssize_t show_pc1event(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_pc1event(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned long pccr; pccr = sysreg_read(PCCR); return sprintf(buf, "0x%lx\n", (pccr >> 18) & 0x3f); } -static ssize_t store_pc1event(struct sys_device *dev, - struct sysdev_attribute *attr, const char *buf, +static ssize_t store_pc1event(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -93,16 +93,16 @@ static ssize_t store_pc1event(struct sys_device *dev, sysreg_write(PCCR, val); return count; } -static ssize_t show_pc1count(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_pc1count(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned long pcnt1; pcnt1 = sysreg_read(PCNT1); return sprintf(buf, "%lu\n", pcnt1); } -static ssize_t store_pc1count(struct sys_device *dev, - struct sysdev_attribute *attr, const char *buf, +static ssize_t store_pc1count(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -116,16 +116,16 @@ static ssize_t store_pc1count(struct sys_device *dev, return count; } -static ssize_t show_pccycles(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_pccycles(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned long pccnt; pccnt = sysreg_read(PCCNT); return sprintf(buf, "%lu\n", pccnt); } -static ssize_t store_pccycles(struct sys_device *dev, - struct sysdev_attribute *attr, const char *buf, +static ssize_t store_pccycles(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -139,16 +139,16 @@ static ssize_t store_pccycles(struct sys_device *dev, return count; } -static ssize_t show_pcenable(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_pcenable(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned long pccr; pccr = sysreg_read(PCCR); return sprintf(buf, "%c\n", (pccr & 1)?'1':'0'); } -static ssize_t store_pcenable(struct sys_device *dev, - struct sysdev_attribute *attr, const char *buf, +static ssize_t store_pcenable(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned long pccr, val; @@ -167,12 +167,12 @@ static ssize_t store_pcenable(struct sys_device *dev, return count; } -static SYSDEV_ATTR(pc0event, 0600, show_pc0event, store_pc0event); -static SYSDEV_ATTR(pc0count, 0600, show_pc0count, store_pc0count); -static SYSDEV_ATTR(pc1event, 0600, show_pc1event, store_pc1event); -static SYSDEV_ATTR(pc1count, 0600, show_pc1count, store_pc1count); -static SYSDEV_ATTR(pccycles, 0600, show_pccycles, store_pccycles); -static SYSDEV_ATTR(pcenable, 0600, show_pcenable, store_pcenable); +static DEVICE_ATTR(pc0event, 0600, show_pc0event, store_pc0event); +static DEVICE_ATTR(pc0count, 0600, show_pc0count, store_pc0count); +static DEVICE_ATTR(pc1event, 0600, show_pc1event, store_pc1event); +static DEVICE_ATTR(pc1count, 0600, show_pc1count, store_pc1count); +static DEVICE_ATTR(pccycles, 0600, show_pccycles, store_pccycles); +static DEVICE_ATTR(pcenable, 0600, show_pcenable, store_pcenable); #endif /* CONFIG_PERFORMANCE_COUNTERS */ @@ -186,12 +186,12 @@ static int __init topology_init(void) register_cpu(c, cpu); #ifdef CONFIG_PERFORMANCE_COUNTERS - sysdev_create_file(&c->sysdev, &attr_pc0event); - sysdev_create_file(&c->sysdev, &attr_pc0count); - sysdev_create_file(&c->sysdev, &attr_pc1event); - sysdev_create_file(&c->sysdev, &attr_pc1count); - sysdev_create_file(&c->sysdev, &attr_pccycles); - sysdev_create_file(&c->sysdev, &attr_pcenable); + device_create_file(&c->dev, &dev_attr_pc0event); + device_create_file(&c->dev, &dev_attr_pc0count); + device_create_file(&c->dev, &dev_attr_pc1event); + device_create_file(&c->dev, &dev_attr_pc1count); + device_create_file(&c->dev, &dev_attr_pccycles); + device_create_file(&c->dev, &dev_attr_pcenable); #endif } diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index c539c689493..2d67317a1ec 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -24,7 +24,7 @@ * Copyright (C) 2006, Intel Corp. All rights reserved. * */ -#include +#include #include #include #include @@ -35,10 +35,10 @@ #define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte; #define define_one_ro(name) \ -static SYSDEV_ATTR(name, 0444, show_##name, NULL) +static DEVICE_ATTR(name, 0444, show_##name, NULL) #define define_one_rw(name) \ -static SYSDEV_ATTR(name, 0644, show_##name, store_##name) +static DEVICE_ATTR(name, 0644, show_##name, store_##name) static u64 call_start[NR_CPUS]; static u64 phys_addr[NR_CPUS]; @@ -55,7 +55,7 @@ static u64 resources[NR_CPUS]; #define show(name) \ static ssize_t \ -show_##name(struct sys_device *dev, struct sysdev_attribute *attr, \ +show_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ u32 cpu=dev->id; \ @@ -64,7 +64,7 @@ show_##name(struct sys_device *dev, struct sysdev_attribute *attr, \ #define store(name) \ static ssize_t \ -store_##name(struct sys_device *dev, struct sysdev_attribute *attr, \ +store_##name(struct device *dev, struct device_attribute *attr, \ const char *buf, size_t size) \ { \ unsigned int cpu=dev->id; \ @@ -78,7 +78,7 @@ show(call_start) * processor. The cpu number in driver is only used for storing data. */ static ssize_t -store_call_start(struct sys_device *dev, struct sysdev_attribute *attr, +store_call_start(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { unsigned int cpu=dev->id; @@ -127,7 +127,7 @@ show(err_type_info) store(err_type_info) static ssize_t -show_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr, +show_virtual_to_phys(struct device *dev, struct device_attribute *attr, char *buf) { unsigned int cpu=dev->id; @@ -135,7 +135,7 @@ show_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr, } static ssize_t -store_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr, +store_virtual_to_phys(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { unsigned int cpu=dev->id; @@ -159,8 +159,8 @@ show(err_struct_info) store(err_struct_info) static ssize_t -show_err_data_buffer(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +show_err_data_buffer(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned int cpu=dev->id; @@ -171,8 +171,8 @@ show_err_data_buffer(struct sys_device *dev, } static ssize_t -store_err_data_buffer(struct sys_device *dev, - struct sysdev_attribute *attr, +store_err_data_buffer(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { unsigned int cpu=dev->id; @@ -209,14 +209,14 @@ define_one_ro(capabilities); define_one_ro(resources); static struct attribute *default_attrs[] = { - &attr_call_start.attr, - &attr_virtual_to_phys.attr, - &attr_err_type_info.attr, - &attr_err_struct_info.attr, - &attr_err_data_buffer.attr, - &attr_status.attr, - &attr_capabilities.attr, - &attr_resources.attr, + &dev_attr_call_start.attr, + &dev_attr_virtual_to_phys.attr, + &dev_attr_err_type_info.attr, + &dev_attr_err_struct_info.attr, + &dev_attr_err_data_buffer.attr, + &dev_attr_status.attr, + &dev_attr_capabilities.attr, + &dev_attr_resources.attr, NULL }; @@ -225,12 +225,12 @@ static struct attribute_group err_inject_attr_group = { .name = "err_inject" }; /* Add/Remove err_inject interface for CPU device */ -static int __cpuinit err_inject_add_dev(struct sys_device * sys_dev) +static int __cpuinit err_inject_add_dev(struct device * sys_dev) { return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group); } -static int __cpuinit err_inject_remove_dev(struct sys_device * sys_dev) +static int __cpuinit err_inject_remove_dev(struct device * sys_dev) { sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); return 0; @@ -239,9 +239,9 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; + struct device *sys_dev; - sys_dev = get_cpu_sysdev(cpu); + sys_dev = get_cpu_device(cpu); switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -283,13 +283,13 @@ static void __exit err_inject_exit(void) { int i; - struct sys_device *sys_dev; + struct device *sys_dev; #ifdef ERR_INJ_DEBUG printk(KERN_INFO "Exit error injection driver.\n"); #endif for_each_online_cpu(i) { - sys_dev = get_cpu_sysdev(i); + sys_dev = get_cpu_device(i); sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); } unregister_hotcpu_notifier(&err_inject_cpu_notifier); diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 9be1f11a01d..9deb21dbf62 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -350,7 +350,7 @@ static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu) } /* Add cache interface for CPU device */ -static int __cpuinit cache_add_dev(struct sys_device * sys_dev) +static int __cpuinit cache_add_dev(struct device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i, j; @@ -400,7 +400,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } /* Remove cache interface for CPU device */ -static int __cpuinit cache_remove_dev(struct sys_device * sys_dev) +static int __cpuinit cache_remove_dev(struct device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i; @@ -428,9 +428,9 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; + struct device *sys_dev; - sys_dev = get_cpu_sysdev(cpu); + sys_dev = get_cpu_device(cpu); switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -454,7 +454,7 @@ static int __init cache_sysfs_init(void) int i; for_each_online_cpu(i) { - struct sys_device *sys_dev = get_cpu_sysdev((unsigned int)i); + struct device *sys_dev = get_cpu_device((unsigned int)i); cache_add_dev(sys_dev); } diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h index 4e360bd4a35..fff921345dd 100644 --- a/arch/powerpc/include/asm/spu.h +++ b/arch/powerpc/include/asm/spu.h @@ -25,7 +25,7 @@ #ifdef __KERNEL__ #include -#include +#include #include #define LS_SIZE (256 * 1024) @@ -166,7 +166,7 @@ struct spu { /* beat only */ u64 shadow_int_mask_RW[3]; - struct sys_device sysdev; + struct device dev; int has_mem_affinity; struct list_head aff_list; @@ -270,11 +270,11 @@ struct spufs_calls { int register_spu_syscalls(struct spufs_calls *calls); void unregister_spu_syscalls(struct spufs_calls *calls); -int spu_add_sysdev_attr(struct sysdev_attribute *attr); -void spu_remove_sysdev_attr(struct sysdev_attribute *attr); +int spu_add_dev_attr(struct device_attribute *attr); +void spu_remove_dev_attr(struct device_attribute *attr); -int spu_add_sysdev_attr_group(struct attribute_group *attrs); -void spu_remove_sysdev_attr_group(struct attribute_group *attrs); +int spu_add_dev_attr_group(struct attribute_group *attrs); +void spu_remove_dev_attr_group(struct attribute_group *attrs); int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr, unsigned *flt); diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 1e104af0848..c97185885c6 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -3,7 +3,7 @@ #ifdef __KERNEL__ -struct sys_device; +struct device; struct device_node; #ifdef CONFIG_NUMA @@ -86,19 +86,19 @@ extern int __node_distance(int, int); extern void __init dump_numa_cpu_topology(void); -extern int sysfs_add_device_to_node(struct sys_device *dev, int nid); -extern void sysfs_remove_device_from_node(struct sys_device *dev, int nid); +extern int sysfs_add_device_to_node(struct device *dev, int nid); +extern void sysfs_remove_device_from_node(struct device *dev, int nid); #else static inline void dump_numa_cpu_topology(void) {} -static inline int sysfs_add_device_to_node(struct sys_device *dev, int nid) +static inline int sysfs_add_device_to_node(struct device *dev, int nid) { return 0; } -static inline void sysfs_remove_device_from_node(struct sys_device *dev, +static inline void sysfs_remove_device_from_node(struct device *dev, int nid) { } diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index a3c684b4c86..92c6b008dd2 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -451,15 +451,15 @@ out: static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id) { struct cache_dir *cache_dir; - struct sys_device *sysdev; + struct device *dev; struct kobject *kobj = NULL; - sysdev = get_cpu_sysdev(cpu_id); - WARN_ONCE(!sysdev, "no sysdev for CPU %i\n", cpu_id); - if (!sysdev) + dev = get_cpu_device(cpu_id); + WARN_ONCE(!dev, "no dev for CPU %i\n", cpu_id); + if (!dev) goto err; - kobj = kobject_create_and_add("cache", &sysdev->kobj); + kobj = kobject_create_and_add("cache", &dev->kobj); if (!kobj) goto err; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 25ddbfc7dd3..da08240353f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ce035c1905f..f396ef27916 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -37,12 +37,12 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); /* Time in microseconds we delay before sleeping in the idle loop */ DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; -static ssize_t store_smt_snooze_delay(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_smt_snooze_delay(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); ssize_t ret; long snooze; @@ -50,21 +50,21 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, if (ret != 1) return -EINVAL; - per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze; + per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; return count; } -static ssize_t show_smt_snooze_delay(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t show_smt_snooze_delay(struct device *dev, + struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); + return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); } -static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, +static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, store_smt_snooze_delay); static int __init setup_smt_snooze_delay(char *str) @@ -117,25 +117,25 @@ static void write_##NAME(void *val) \ ppc_enable_pmcs(); \ mtspr(ADDRESS, *(unsigned long *)val); \ } \ -static ssize_t show_##NAME(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ +static ssize_t show_##NAME(struct device *dev, \ + struct device_attribute *attr, \ char *buf) \ { \ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ unsigned long val; \ - smp_call_function_single(cpu->sysdev.id, read_##NAME, &val, 1); \ + smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \ return sprintf(buf, "%lx\n", val); \ } \ static ssize_t __used \ - store_##NAME(struct sys_device *dev, struct sysdev_attribute *attr, \ + store_##NAME(struct device *dev, struct device_attribute *attr, \ const char *buf, size_t count) \ { \ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ unsigned long val; \ int ret = sscanf(buf, "%lx", &val); \ if (ret != 1) \ return -EINVAL; \ - smp_call_function_single(cpu->sysdev.id, write_##NAME, &val, 1); \ + smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \ return count; \ } @@ -178,22 +178,22 @@ SYSFS_PMCSETUP(purr, SPRN_PURR); SYSFS_PMCSETUP(spurr, SPRN_SPURR); SYSFS_PMCSETUP(dscr, SPRN_DSCR); -static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); -static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); -static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); -static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); +static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static DEVICE_ATTR(spurr, 0600, show_spurr, NULL); +static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); +static DEVICE_ATTR(purr, 0600, show_purr, store_purr); unsigned long dscr_default = 0; EXPORT_SYMBOL(dscr_default); -static ssize_t show_dscr_default(struct sysdev_class *class, - struct sysdev_class_attribute *attr, char *buf) +static ssize_t show_dscr_default(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%lx\n", dscr_default); } -static ssize_t __used store_dscr_default(struct sysdev_class *class, - struct sysdev_class_attribute *attr, const char *buf, +static ssize_t __used store_dscr_default(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -207,15 +207,14 @@ static ssize_t __used store_dscr_default(struct sysdev_class *class, return count; } -static SYSDEV_CLASS_ATTR(dscr_default, 0600, +static DEVICE_ATTR(dscr_default, 0600, show_dscr_default, store_dscr_default); static void sysfs_create_dscr_default(void) { int err = 0; if (cpu_has_feature(CPU_FTR_DSCR)) - err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, - &attr_dscr_default.attr); + err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); } #endif /* CONFIG_PPC64 */ @@ -259,72 +258,72 @@ SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3); #endif /* HAS_PPC_PMC_PA6T */ #ifdef HAS_PPC_PMC_IBM -static struct sysdev_attribute ibm_common_attrs[] = { - _SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), - _SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), +static struct device_attribute ibm_common_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), }; #endif /* HAS_PPC_PMC_G4 */ #ifdef HAS_PPC_PMC_G4 -static struct sysdev_attribute g4_common_attrs[] = { - _SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), - _SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), - _SYSDEV_ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2), +static struct device_attribute g4_common_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), + __ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2), }; #endif /* HAS_PPC_PMC_G4 */ -static struct sysdev_attribute classic_pmc_attrs[] = { - _SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1), - _SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2), - _SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3), - _SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4), - _SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5), - _SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6), +static struct device_attribute classic_pmc_attrs[] = { + __ATTR(pmc1, 0600, show_pmc1, store_pmc1), + __ATTR(pmc2, 0600, show_pmc2, store_pmc2), + __ATTR(pmc3, 0600, show_pmc3, store_pmc3), + __ATTR(pmc4, 0600, show_pmc4, store_pmc4), + __ATTR(pmc5, 0600, show_pmc5, store_pmc5), + __ATTR(pmc6, 0600, show_pmc6, store_pmc6), #ifdef CONFIG_PPC64 - _SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7), - _SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8), + __ATTR(pmc7, 0600, show_pmc7, store_pmc7), + __ATTR(pmc8, 0600, show_pmc8, store_pmc8), #endif }; #ifdef HAS_PPC_PMC_PA6T -static struct sysdev_attribute pa6t_attrs[] = { - _SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), - _SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), - _SYSDEV_ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0), - _SYSDEV_ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1), - _SYSDEV_ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2), - _SYSDEV_ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), - _SYSDEV_ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), - _SYSDEV_ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), +static struct device_attribute pa6t_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), + __ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0), + __ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1), + __ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2), + __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), + __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), + __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), #ifdef CONFIG_DEBUG_KERNEL - _SYSDEV_ATTR(hid0, 0600, show_hid0, store_hid0), - _SYSDEV_ATTR(hid1, 0600, show_hid1, store_hid1), - _SYSDEV_ATTR(hid4, 0600, show_hid4, store_hid4), - _SYSDEV_ATTR(hid5, 0600, show_hid5, store_hid5), - _SYSDEV_ATTR(ima0, 0600, show_ima0, store_ima0), - _SYSDEV_ATTR(ima1, 0600, show_ima1, store_ima1), - _SYSDEV_ATTR(ima2, 0600, show_ima2, store_ima2), - _SYSDEV_ATTR(ima3, 0600, show_ima3, store_ima3), - _SYSDEV_ATTR(ima4, 0600, show_ima4, store_ima4), - _SYSDEV_ATTR(ima5, 0600, show_ima5, store_ima5), - _SYSDEV_ATTR(ima6, 0600, show_ima6, store_ima6), - _SYSDEV_ATTR(ima7, 0600, show_ima7, store_ima7), - _SYSDEV_ATTR(ima8, 0600, show_ima8, store_ima8), - _SYSDEV_ATTR(ima9, 0600, show_ima9, store_ima9), - _SYSDEV_ATTR(imaat, 0600, show_imaat, store_imaat), - _SYSDEV_ATTR(btcr, 0600, show_btcr, store_btcr), - _SYSDEV_ATTR(pccr, 0600, show_pccr, store_pccr), - _SYSDEV_ATTR(rpccr, 0600, show_rpccr, store_rpccr), - _SYSDEV_ATTR(der, 0600, show_der, store_der), - _SYSDEV_ATTR(mer, 0600, show_mer, store_mer), - _SYSDEV_ATTR(ber, 0600, show_ber, store_ber), - _SYSDEV_ATTR(ier, 0600, show_ier, store_ier), - _SYSDEV_ATTR(sier, 0600, show_sier, store_sier), - _SYSDEV_ATTR(siar, 0600, show_siar, store_siar), - _SYSDEV_ATTR(tsr0, 0600, show_tsr0, store_tsr0), - _SYSDEV_ATTR(tsr1, 0600, show_tsr1, store_tsr1), - _SYSDEV_ATTR(tsr2, 0600, show_tsr2, store_tsr2), - _SYSDEV_ATTR(tsr3, 0600, show_tsr3, store_tsr3), + __ATTR(hid0, 0600, show_hid0, store_hid0), + __ATTR(hid1, 0600, show_hid1, store_hid1), + __ATTR(hid4, 0600, show_hid4, store_hid4), + __ATTR(hid5, 0600, show_hid5, store_hid5), + __ATTR(ima0, 0600, show_ima0, store_ima0), + __ATTR(ima1, 0600, show_ima1, store_ima1), + __ATTR(ima2, 0600, show_ima2, store_ima2), + __ATTR(ima3, 0600, show_ima3, store_ima3), + __ATTR(ima4, 0600, show_ima4, store_ima4), + __ATTR(ima5, 0600, show_ima5, store_ima5), + __ATTR(ima6, 0600, show_ima6, store_ima6), + __ATTR(ima7, 0600, show_ima7, store_ima7), + __ATTR(ima8, 0600, show_ima8, store_ima8), + __ATTR(ima9, 0600, show_ima9, store_ima9), + __ATTR(imaat, 0600, show_imaat, store_imaat), + __ATTR(btcr, 0600, show_btcr, store_btcr), + __ATTR(pccr, 0600, show_pccr, store_pccr), + __ATTR(rpccr, 0600, show_rpccr, store_rpccr), + __ATTR(der, 0600, show_der, store_der), + __ATTR(mer, 0600, show_mer, store_mer), + __ATTR(ber, 0600, show_ber, store_ber), + __ATTR(ier, 0600, show_ier, store_ier), + __ATTR(sier, 0600, show_sier, store_sier), + __ATTR(siar, 0600, show_siar, store_siar), + __ATTR(tsr0, 0600, show_tsr0, store_tsr0), + __ATTR(tsr1, 0600, show_tsr1, store_tsr1), + __ATTR(tsr2, 0600, show_tsr2, store_tsr2), + __ATTR(tsr3, 0600, show_tsr3, store_tsr3), #endif /* CONFIG_DEBUG_KERNEL */ }; #endif /* HAS_PPC_PMC_PA6T */ @@ -333,14 +332,14 @@ static struct sysdev_attribute pa6t_attrs[] = { static void __cpuinit register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; - struct sysdev_attribute *attrs, *pmc_attrs; + struct device *s = &c->dev; + struct device_attribute *attrs, *pmc_attrs; int i, nattrs; #ifdef CONFIG_PPC64 if (!firmware_has_feature(FW_FEATURE_ISERIES) && cpu_has_feature(CPU_FTR_SMT)) - sysdev_create_file(s, &attr_smt_snooze_delay); + device_create_file(s, &dev_attr_smt_snooze_delay); #endif /* PMC stuff */ @@ -348,14 +347,14 @@ static void __cpuinit register_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -363,7 +362,7 @@ static void __cpuinit register_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); pmc_attrs = NULL; break; #endif /* HAS_PPC_PMC_PA6T */ @@ -374,24 +373,24 @@ static void __cpuinit register_cpu_online(unsigned int cpu) } for (i = 0; i < nattrs; i++) - sysdev_create_file(s, &attrs[i]); + device_create_file(s, &attrs[i]); if (pmc_attrs) for (i = 0; i < cur_cpu_spec->num_pmcs; i++) - sysdev_create_file(s, &pmc_attrs[i]); + device_create_file(s, &pmc_attrs[i]); #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_MMCRA)) - sysdev_create_file(s, &attr_mmcra); + device_create_file(s, &dev_attr_mmcra); if (cpu_has_feature(CPU_FTR_PURR)) - sysdev_create_file(s, &attr_purr); + device_create_file(s, &dev_attr_purr); if (cpu_has_feature(CPU_FTR_SPURR)) - sysdev_create_file(s, &attr_spurr); + device_create_file(s, &dev_attr_spurr); if (cpu_has_feature(CPU_FTR_DSCR)) - sysdev_create_file(s, &attr_dscr); + device_create_file(s, &dev_attr_dscr); #endif /* CONFIG_PPC64 */ cacheinfo_cpu_online(cpu); @@ -401,8 +400,8 @@ static void __cpuinit register_cpu_online(unsigned int cpu) static void unregister_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; - struct sysdev_attribute *attrs, *pmc_attrs; + struct device *s = &c->dev; + struct device_attribute *attrs, *pmc_attrs; int i, nattrs; BUG_ON(!c->hotpluggable); @@ -410,7 +409,7 @@ static void unregister_cpu_online(unsigned int cpu) #ifdef CONFIG_PPC64 if (!firmware_has_feature(FW_FEATURE_ISERIES) && cpu_has_feature(CPU_FTR_SMT)) - sysdev_remove_file(s, &attr_smt_snooze_delay); + device_remove_file(s, &dev_attr_smt_snooze_delay); #endif /* PMC stuff */ @@ -418,14 +417,14 @@ static void unregister_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -433,7 +432,7 @@ static void unregister_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); pmc_attrs = NULL; break; #endif /* HAS_PPC_PMC_PA6T */ @@ -444,24 +443,24 @@ static void unregister_cpu_online(unsigned int cpu) } for (i = 0; i < nattrs; i++) - sysdev_remove_file(s, &attrs[i]); + device_remove_file(s, &attrs[i]); if (pmc_attrs) for (i = 0; i < cur_cpu_spec->num_pmcs; i++) - sysdev_remove_file(s, &pmc_attrs[i]); + device_remove_file(s, &pmc_attrs[i]); #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_MMCRA)) - sysdev_remove_file(s, &attr_mmcra); + device_remove_file(s, &dev_attr_mmcra); if (cpu_has_feature(CPU_FTR_PURR)) - sysdev_remove_file(s, &attr_purr); + device_remove_file(s, &dev_attr_purr); if (cpu_has_feature(CPU_FTR_SPURR)) - sysdev_remove_file(s, &attr_spurr); + device_remove_file(s, &dev_attr_spurr); if (cpu_has_feature(CPU_FTR_DSCR)) - sysdev_remove_file(s, &attr_dscr); + device_remove_file(s, &dev_attr_dscr); #endif /* CONFIG_PPC64 */ cacheinfo_cpu_offline(cpu); @@ -513,70 +512,70 @@ static struct notifier_block __cpuinitdata sysfs_cpu_nb = { static DEFINE_MUTEX(cpu_mutex); -int cpu_add_sysdev_attr(struct sysdev_attribute *attr) +int cpu_add_dev_attr(struct device_attribute *attr) { int cpu; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev_create_file(get_cpu_sysdev(cpu), attr); + device_create_file(get_cpu_device(cpu), attr); } mutex_unlock(&cpu_mutex); return 0; } -EXPORT_SYMBOL_GPL(cpu_add_sysdev_attr); +EXPORT_SYMBOL_GPL(cpu_add_dev_attr); -int cpu_add_sysdev_attr_group(struct attribute_group *attrs) +int cpu_add_dev_attr_group(struct attribute_group *attrs) { int cpu; - struct sys_device *sysdev; + struct device *dev; int ret; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - ret = sysfs_create_group(&sysdev->kobj, attrs); + dev = get_cpu_device(cpu); + ret = sysfs_create_group(&dev->kobj, attrs); WARN_ON(ret != 0); } mutex_unlock(&cpu_mutex); return 0; } -EXPORT_SYMBOL_GPL(cpu_add_sysdev_attr_group); +EXPORT_SYMBOL_GPL(cpu_add_dev_attr_group); -void cpu_remove_sysdev_attr(struct sysdev_attribute *attr) +void cpu_remove_dev_attr(struct device_attribute *attr) { int cpu; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev_remove_file(get_cpu_sysdev(cpu), attr); + device_remove_file(get_cpu_device(cpu), attr); } mutex_unlock(&cpu_mutex); } -EXPORT_SYMBOL_GPL(cpu_remove_sysdev_attr); +EXPORT_SYMBOL_GPL(cpu_remove_dev_attr); -void cpu_remove_sysdev_attr_group(struct attribute_group *attrs) +void cpu_remove_dev_attr_group(struct attribute_group *attrs) { int cpu; - struct sys_device *sysdev; + struct device *dev; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - sysfs_remove_group(&sysdev->kobj, attrs); + dev = get_cpu_device(cpu); + sysfs_remove_group(&dev->kobj, attrs); } mutex_unlock(&cpu_mutex); } -EXPORT_SYMBOL_GPL(cpu_remove_sysdev_attr_group); +EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group); /* NUMA stuff */ @@ -590,7 +589,7 @@ static void register_nodes(void) register_one_node(i); } -int sysfs_add_device_to_node(struct sys_device *dev, int nid) +int sysfs_add_device_to_node(struct device *dev, int nid) { struct node *node = &node_devices[nid]; return sysfs_create_link(&node->sysdev.kobj, &dev->kobj, @@ -598,7 +597,7 @@ int sysfs_add_device_to_node(struct sys_device *dev, int nid) } EXPORT_SYMBOL_GPL(sysfs_add_device_to_node); -void sysfs_remove_device_from_node(struct sys_device *dev, int nid) +void sysfs_remove_device_from_node(struct device *dev, int nid) { struct node *node = &node_devices[nid]; sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj)); @@ -614,14 +613,14 @@ static void register_nodes(void) #endif /* Only valid if CPU is present. */ -static ssize_t show_physical_id(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_physical_id(struct device *dev, + struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); - return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id)); + return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->dev.id)); } -static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL); +static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL); static int __init topology_init(void) { @@ -646,7 +645,7 @@ static int __init topology_init(void) if (cpu_online(cpu) || c->hotpluggable) { register_cpu(c, cpu); - sysdev_create_file(&c->sysdev, &attr_physical_id); + device_create_file(&c->dev, &dev_attr_physical_id); } if (cpu_online(cpu)) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index c7dd4dec4df..f2b03a86343 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1452,7 +1452,7 @@ int arch_update_cpu_topology(void) { int cpu, nid, old_nid; unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; - struct sys_device *sysdev; + struct device *dev; for_each_cpu(cpu,&cpu_associativity_changes_mask) { vphn_get_associativity(cpu, associativity); @@ -1473,9 +1473,9 @@ int arch_update_cpu_topology(void) register_cpu_under_node(cpu, nid); put_online_cpus(); - sysdev = get_cpu_sysdev(cpu); - if (sysdev) - kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); + dev = get_cpu_device(cpu); + if (dev) + kobject_uevent(&dev->kobj, KOBJ_CHANGE); } return 1; diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 4d4c8c16912..94560db788b 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -46,7 +46,7 @@ */ #include -#include +#include #include #include #include @@ -59,8 +59,8 @@ #define TEMP_MIN 65 #define TEMP_MAX 125 -#define SYSDEV_PREFIX_ATTR(_prefix,_name,_mode) \ -struct sysdev_attribute attr_ ## _prefix ## _ ## _name = { \ +#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode) \ +struct device_attribute attr_ ## _prefix ## _ ## _name = { \ .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _prefix ## _show_ ## _name, \ .store = _prefix ## _store_ ## _name, \ @@ -76,36 +76,36 @@ static inline u8 temp_to_reg(u8 temp) return ((temp - TEMP_MIN) >> 1) & 0x3f; } -static struct cbe_pmd_regs __iomem *get_pmd_regs(struct sys_device *sysdev) +static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev) { struct spu *spu; - spu = container_of(sysdev, struct spu, sysdev); + spu = container_of(dev, struct spu, dev); return cbe_get_pmd_regs(spu_devnode(spu)); } /* returns the value for a given spu in a given register */ -static u8 spu_read_register_value(struct sys_device *sysdev, union spe_reg __iomem *reg) +static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg) { union spe_reg value; struct spu *spu; - spu = container_of(sysdev, struct spu, sysdev); + spu = container_of(dev, struct spu, dev); value.val = in_be64(®->val); return value.spe[spu->spe_id]; } -static ssize_t spu_show_temp(struct sys_device *sysdev, struct sysdev_attribute *attr, +static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr, char *buf) { u8 value; struct cbe_pmd_regs __iomem *pmd_regs; - pmd_regs = get_pmd_regs(sysdev); + pmd_regs = get_pmd_regs(dev); - value = spu_read_register_value(sysdev, &pmd_regs->ts_ctsr1); + value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1); return sprintf(buf, "%d\n", reg_to_temp(value)); } @@ -147,48 +147,48 @@ static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char return size; } -static ssize_t spu_show_throttle_end(struct sys_device *sysdev, - struct sysdev_attribute *attr, char *buf) +static ssize_t spu_show_throttle_end(struct device *dev, + struct device_attribute *attr, char *buf) { - return show_throttle(get_pmd_regs(sysdev), buf, 0); + return show_throttle(get_pmd_regs(dev), buf, 0); } -static ssize_t spu_show_throttle_begin(struct sys_device *sysdev, - struct sysdev_attribute *attr, char *buf) +static ssize_t spu_show_throttle_begin(struct device *dev, + struct device_attribute *attr, char *buf) { - return show_throttle(get_pmd_regs(sysdev), buf, 8); + return show_throttle(get_pmd_regs(dev), buf, 8); } -static ssize_t spu_show_throttle_full_stop(struct sys_device *sysdev, - struct sysdev_attribute *attr, char *buf) +static ssize_t spu_show_throttle_full_stop(struct device *dev, + struct device_attribute *attr, char *buf) { - return show_throttle(get_pmd_regs(sysdev), buf, 16); + return show_throttle(get_pmd_regs(dev), buf, 16); } -static ssize_t spu_store_throttle_end(struct sys_device *sysdev, - struct sysdev_attribute *attr, const char *buf, size_t size) +static ssize_t spu_store_throttle_end(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { - return store_throttle(get_pmd_regs(sysdev), buf, size, 0); + return store_throttle(get_pmd_regs(dev), buf, size, 0); } -static ssize_t spu_store_throttle_begin(struct sys_device *sysdev, - struct sysdev_attribute *attr, const char *buf, size_t size) +static ssize_t spu_store_throttle_begin(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { - return store_throttle(get_pmd_regs(sysdev), buf, size, 8); + return store_throttle(get_pmd_regs(dev), buf, size, 8); } -static ssize_t spu_store_throttle_full_stop(struct sys_device *sysdev, - struct sysdev_attribute *attr, const char *buf, size_t size) +static ssize_t spu_store_throttle_full_stop(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { - return store_throttle(get_pmd_regs(sysdev), buf, size, 16); + return store_throttle(get_pmd_regs(dev), buf, size, 16); } -static ssize_t ppe_show_temp(struct sys_device *sysdev, char *buf, int pos) +static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos) { struct cbe_pmd_regs __iomem *pmd_regs; u64 value; - pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id); + pmd_regs = cbe_get_cpu_pmd_regs(dev->id); value = in_be64(&pmd_regs->ts_ctsr2); value = (value >> pos) & 0x3f; @@ -199,64 +199,64 @@ static ssize_t ppe_show_temp(struct sys_device *sysdev, char *buf, int pos) /* shows the temperature of the DTS on the PPE, * located near the linear thermal sensor */ -static ssize_t ppe_show_temp0(struct sys_device *sysdev, - struct sysdev_attribute *attr, char *buf) +static ssize_t ppe_show_temp0(struct device *dev, + struct device_attribute *attr, char *buf) { - return ppe_show_temp(sysdev, buf, 32); + return ppe_show_temp(dev, buf, 32); } /* shows the temperature of the second DTS on the PPE */ -static ssize_t ppe_show_temp1(struct sys_device *sysdev, - struct sysdev_attribute *attr, char *buf) +static ssize_t ppe_show_temp1(struct device *dev, + struct device_attribute *attr, char *buf) { - return ppe_show_temp(sysdev, buf, 0); + return ppe_show_temp(dev, buf, 0); } -static ssize_t ppe_show_throttle_end(struct sys_device *sysdev, - struct sysdev_attribute *attr, char *buf) +static ssize_t ppe_show_throttle_end(struct device *dev, + struct device_attribute *attr, char *buf) { - return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 32); + return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32); } -static ssize_t ppe_show_throttle_begin(struct sys_device *sysdev, - struct sysdev_attribute *attr, char *buf) +static ssize_t ppe_show_throttle_begin(struct device *dev, + struct device_attribute *attr, char *buf) { - return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 40); + return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40); } -static ssize_t ppe_show_throttle_full_stop(struct sys_device *sysdev, - struct sysdev_attribute *attr, char *buf) +static ssize_t ppe_show_throttle_full_stop(struct device *dev, + struct device_attribute *attr, char *buf) { - return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 48); + return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48); } -static ssize_t ppe_store_throttle_end(struct sys_device *sysdev, - struct sysdev_attribute *attr, const char *buf, size_t size) +static ssize_t ppe_store_throttle_end(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { - return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 32); + return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32); } -static ssize_t ppe_store_throttle_begin(struct sys_device *sysdev, - struct sysdev_attribute *attr, const char *buf, size_t size) +static ssize_t ppe_store_throttle_begin(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { - return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 40); + return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40); } -static ssize_t ppe_store_throttle_full_stop(struct sys_device *sysdev, - struct sysdev_attribute *attr, const char *buf, size_t size) +static ssize_t ppe_store_throttle_full_stop(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { - return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 48); + return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48); } -static struct sysdev_attribute attr_spu_temperature = { +static struct device_attribute attr_spu_temperature = { .attr = {.name = "temperature", .mode = 0400 }, .show = spu_show_temp, }; -static SYSDEV_PREFIX_ATTR(spu, throttle_end, 0600); -static SYSDEV_PREFIX_ATTR(spu, throttle_begin, 0600); -static SYSDEV_PREFIX_ATTR(spu, throttle_full_stop, 0600); +static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600); +static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600); +static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600); static struct attribute *spu_attributes[] = { @@ -272,19 +272,19 @@ static struct attribute_group spu_attribute_group = { .attrs = spu_attributes, }; -static struct sysdev_attribute attr_ppe_temperature0 = { +static struct device_attribute attr_ppe_temperature0 = { .attr = {.name = "temperature0", .mode = 0400 }, .show = ppe_show_temp0, }; -static struct sysdev_attribute attr_ppe_temperature1 = { +static struct device_attribute attr_ppe_temperature1 = { .attr = {.name = "temperature1", .mode = 0400 }, .show = ppe_show_temp1, }; -static SYSDEV_PREFIX_ATTR(ppe, throttle_end, 0600); -static SYSDEV_PREFIX_ATTR(ppe, throttle_begin, 0600); -static SYSDEV_PREFIX_ATTR(ppe, throttle_full_stop, 0600); +static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600); +static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600); +static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600); static struct attribute *ppe_attributes[] = { &attr_ppe_temperature0.attr, @@ -307,7 +307,7 @@ static int __init init_default_values(void) { int cpu; struct cbe_pmd_regs __iomem *pmd_regs; - struct sys_device *sysdev; + struct device *dev; union ppe_spe_reg tpr; union spe_reg str1; u64 str2; @@ -349,14 +349,14 @@ static int __init init_default_values(void) for_each_possible_cpu (cpu) { pr_debug("processing cpu %d\n", cpu); - sysdev = get_cpu_sysdev(cpu); + dev = get_cpu_device(cpu); - if (!sysdev) { - pr_info("invalid sysdev pointer for cbe_thermal\n"); + if (!dev) { + pr_info("invalid dev pointer for cbe_thermal\n"); return -EINVAL; } - pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id); + pmd_regs = cbe_get_cpu_pmd_regs(dev->id); if (!pmd_regs) { pr_info("invalid CBE regs pointer for cbe_thermal\n"); @@ -379,8 +379,8 @@ static int __init thermal_init(void) int rc = init_default_values(); if (rc == 0) { - spu_add_sysdev_attr_group(&spu_attribute_group); - cpu_add_sysdev_attr_group(&ppe_attribute_group); + spu_add_dev_attr_group(&spu_attribute_group); + cpu_add_dev_attr_group(&ppe_attribute_group); } return rc; @@ -389,8 +389,8 @@ module_init(thermal_init); static void __exit thermal_exit(void) { - spu_remove_sysdev_attr_group(&spu_attribute_group); - cpu_remove_sysdev_attr_group(&ppe_attribute_group); + spu_remove_dev_attr_group(&spu_attribute_group); + cpu_remove_dev_attr_group(&ppe_attribute_group); } module_exit(thermal_exit); diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 3675da73623..1708fb7aba3 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -522,31 +522,32 @@ void spu_init_channels(struct spu *spu) } EXPORT_SYMBOL_GPL(spu_init_channels); -static struct sysdev_class spu_sysdev_class = { +static struct bus_type spu_subsys = { .name = "spu", + .dev_name = "spu", }; -int spu_add_sysdev_attr(struct sysdev_attribute *attr) +int spu_add_dev_attr(struct device_attribute *attr) { struct spu *spu; mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) - sysdev_create_file(&spu->sysdev, attr); + device_create_file(&spu->dev, attr); mutex_unlock(&spu_full_list_mutex); return 0; } -EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); +EXPORT_SYMBOL_GPL(spu_add_dev_attr); -int spu_add_sysdev_attr_group(struct attribute_group *attrs) +int spu_add_dev_attr_group(struct attribute_group *attrs) { struct spu *spu; int rc = 0; mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) { - rc = sysfs_create_group(&spu->sysdev.kobj, attrs); + rc = sysfs_create_group(&spu->dev.kobj, attrs); /* we're in trouble here, but try unwinding anyway */ if (rc) { @@ -555,7 +556,7 @@ int spu_add_sysdev_attr_group(struct attribute_group *attrs) list_for_each_entry_continue_reverse(spu, &spu_full_list, full_list) - sysfs_remove_group(&spu->sysdev.kobj, attrs); + sysfs_remove_group(&spu->dev.kobj, attrs); break; } } @@ -564,45 +565,45 @@ int spu_add_sysdev_attr_group(struct attribute_group *attrs) return rc; } -EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); +EXPORT_SYMBOL_GPL(spu_add_dev_attr_group); -void spu_remove_sysdev_attr(struct sysdev_attribute *attr) +void spu_remove_dev_attr(struct device_attribute *attr) { struct spu *spu; mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) - sysdev_remove_file(&spu->sysdev, attr); + device_remove_file(&spu->dev, attr); mutex_unlock(&spu_full_list_mutex); } -EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr); +EXPORT_SYMBOL_GPL(spu_remove_dev_attr); -void spu_remove_sysdev_attr_group(struct attribute_group *attrs) +void spu_remove_dev_attr_group(struct attribute_group *attrs) { struct spu *spu; mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) - sysfs_remove_group(&spu->sysdev.kobj, attrs); + sysfs_remove_group(&spu->dev.kobj, attrs); mutex_unlock(&spu_full_list_mutex); } -EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group); +EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group); -static int spu_create_sysdev(struct spu *spu) +static int spu_create_dev(struct spu *spu) { int ret; - spu->sysdev.id = spu->number; - spu->sysdev.cls = &spu_sysdev_class; - ret = sysdev_register(&spu->sysdev); + spu->dev.id = spu->number; + spu->dev.bus = &spu_subsys; + ret = device_register(&spu->dev); if (ret) { printk(KERN_ERR "Can't register SPU %d with sysfs\n", spu->number); return ret; } - sysfs_add_device_to_node(&spu->sysdev, spu->node); + sysfs_add_device_to_node(&spu->dev, spu->node); return 0; } @@ -638,7 +639,7 @@ static int __init create_spu(void *data) if (ret) goto out_destroy; - ret = spu_create_sysdev(spu); + ret = spu_create_dev(spu); if (ret) goto out_free_irqs; @@ -695,10 +696,10 @@ static unsigned long long spu_acct_time(struct spu *spu, } -static ssize_t spu_stat_show(struct sys_device *sysdev, - struct sysdev_attribute *attr, char *buf) +static ssize_t spu_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) { - struct spu *spu = container_of(sysdev, struct spu, sysdev); + struct spu *spu = container_of(dev, struct spu, dev); return sprintf(buf, "%s %llu %llu %llu %llu " "%llu %llu %llu %llu %llu %llu %llu %llu\n", @@ -717,7 +718,7 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, spu->stats.libassist); } -static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL); +static DEVICE_ATTR(stat, 0644, spu_stat_show, NULL); #ifdef CONFIG_KEXEC @@ -816,8 +817,8 @@ static int __init init_spu_base(void) if (!spu_management_ops) goto out; - /* create sysdev class for spus */ - ret = sysdev_class_register(&spu_sysdev_class); + /* create system subsystem for spus */ + ret = subsys_system_register(&spu_subsys, NULL); if (ret) goto out; @@ -826,7 +827,7 @@ static int __init init_spu_base(void) if (ret < 0) { printk(KERN_WARNING "%s: Error initializing spus\n", __func__); - goto out_unregister_sysdev_class; + goto out_unregister_subsys; } if (ret > 0) @@ -836,15 +837,15 @@ static int __init init_spu_base(void) xmon_register_spus(&spu_full_list); crash_register_spus(&spu_full_list); mutex_unlock(&spu_full_list_mutex); - spu_add_sysdev_attr(&attr_stat); + spu_add_dev_attr(&dev_attr_stat); register_syscore_ops(&spu_syscore_ops); spu_init_affinity(); return 0; - out_unregister_sysdev_class: - sysdev_class_unregister(&spu_sysdev_class); + out_unregister_subsys: + bus_unregister(&spu_subsys); out: return ret; } diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c index c8b3c69fe89..af281dce510 100644 --- a/arch/powerpc/platforms/pseries/pseries_energy.c +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -184,7 +184,7 @@ static ssize_t get_best_energy_list(char *page, int activate) return s-page; } -static ssize_t get_best_energy_data(struct sys_device *dev, +static ssize_t get_best_energy_data(struct device *dev, char *page, int activate) { int rc; @@ -207,26 +207,26 @@ static ssize_t get_best_energy_data(struct sys_device *dev, /* Wrapper functions */ -static ssize_t cpu_activate_hint_list_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, char *page) +static ssize_t cpu_activate_hint_list_show(struct device *dev, + struct device_attribute *attr, char *page) { return get_best_energy_list(page, 1); } -static ssize_t cpu_deactivate_hint_list_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, char *page) +static ssize_t cpu_deactivate_hint_list_show(struct device *dev, + struct device_attribute *attr, char *page) { return get_best_energy_list(page, 0); } -static ssize_t percpu_activate_hint_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *page) +static ssize_t percpu_activate_hint_show(struct device *dev, + struct device_attribute *attr, char *page) { return get_best_energy_data(dev, page, 1); } -static ssize_t percpu_deactivate_hint_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *page) +static ssize_t percpu_deactivate_hint_show(struct device *dev, + struct device_attribute *attr, char *page) { return get_best_energy_data(dev, page, 0); } @@ -241,48 +241,48 @@ static ssize_t percpu_deactivate_hint_show(struct sys_device *dev, * Per-cpu value of the hint */ -struct sysdev_class_attribute attr_cpu_activate_hint_list = - _SYSDEV_CLASS_ATTR(pseries_activate_hint_list, 0444, +struct device_attribute attr_cpu_activate_hint_list = + __ATTR(pseries_activate_hint_list, 0444, cpu_activate_hint_list_show, NULL); -struct sysdev_class_attribute attr_cpu_deactivate_hint_list = - _SYSDEV_CLASS_ATTR(pseries_deactivate_hint_list, 0444, +struct device_attribute attr_cpu_deactivate_hint_list = + __ATTR(pseries_deactivate_hint_list, 0444, cpu_deactivate_hint_list_show, NULL); -struct sysdev_attribute attr_percpu_activate_hint = - _SYSDEV_ATTR(pseries_activate_hint, 0444, +struct device_attribute attr_percpu_activate_hint = + __ATTR(pseries_activate_hint, 0444, percpu_activate_hint_show, NULL); -struct sysdev_attribute attr_percpu_deactivate_hint = - _SYSDEV_ATTR(pseries_deactivate_hint, 0444, +struct device_attribute attr_percpu_deactivate_hint = + __ATTR(pseries_deactivate_hint, 0444, percpu_deactivate_hint_show, NULL); static int __init pseries_energy_init(void) { int cpu, err; - struct sys_device *cpu_sys_dev; + struct device *cpu_dev; if (!check_for_h_best_energy()) { printk(KERN_INFO "Hypercall H_BEST_ENERGY not supported\n"); return 0; } /* Create the sysfs files */ - err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, - &attr_cpu_activate_hint_list.attr); + err = device_create_file(cpu_subsys.dev_root, + &attr_cpu_activate_hint_list); if (!err) - err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, - &attr_cpu_deactivate_hint_list.attr); + err = device_create_file(cpu_subsys.dev_root, + &attr_cpu_deactivate_hint_list); if (err) return err; for_each_possible_cpu(cpu) { - cpu_sys_dev = get_cpu_sysdev(cpu); - err = sysfs_create_file(&cpu_sys_dev->kobj, - &attr_percpu_activate_hint.attr); + cpu_dev = get_cpu_device(cpu); + err = device_create_file(cpu_dev, + &attr_percpu_activate_hint); if (err) break; - err = sysfs_create_file(&cpu_sys_dev->kobj, - &attr_percpu_deactivate_hint.attr); + err = device_create_file(cpu_dev, + &attr_percpu_deactivate_hint); if (err) break; } @@ -298,23 +298,20 @@ static int __init pseries_energy_init(void) static void __exit pseries_energy_cleanup(void) { int cpu; - struct sys_device *cpu_sys_dev; + struct device *cpu_dev; if (!sysfs_entries) return; /* Remove the sysfs files */ - sysfs_remove_file(&cpu_sysdev_class.kset.kobj, - &attr_cpu_activate_hint_list.attr); - - sysfs_remove_file(&cpu_sysdev_class.kset.kobj, - &attr_cpu_deactivate_hint_list.attr); + device_remove_file(cpu_subsys.dev_root, &attr_cpu_activate_hint_list); + device_remove_file(cpu_subsys.dev_root, &attr_cpu_deactivate_hint_list); for_each_possible_cpu(cpu) { - cpu_sys_dev = get_cpu_sysdev(cpu); - sysfs_remove_file(&cpu_sys_dev->kobj, + cpu_dev = get_cpu_device(cpu); + sysfs_remove_file(&cpu_dev->kobj, &attr_percpu_activate_hint.attr); - sysfs_remove_file(&cpu_sys_dev->kobj, + sysfs_remove_file(&cpu_dev->kobj, &attr_percpu_deactivate_hint.attr); } } diff --git a/arch/powerpc/sysdev/ppc4xx_cpm.c b/arch/powerpc/sysdev/ppc4xx_cpm.c index 73b86cc5ea7..82e2cfe35c6 100644 --- a/arch/powerpc/sysdev/ppc4xx_cpm.c +++ b/arch/powerpc/sysdev/ppc4xx_cpm.c @@ -179,12 +179,12 @@ static struct kobj_attribute cpm_idle_attr = static void cpm_idle_config_sysfs(void) { - struct sys_device *sys_dev; + struct device *dev; unsigned long ret; - sys_dev = get_cpu_sysdev(0); + dev = get_cpu_device(0); - ret = sysfs_create_file(&sys_dev->kobj, + ret = sysfs_create_file(&dev->kobj, &cpm_idle_attr.attr); if (ret) printk(KERN_WARNING diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3ea872890da..66cca03c028 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -831,8 +831,8 @@ int setup_profiling_timer(unsigned int multiplier) } #ifdef CONFIG_HOTPLUG_CPU -static ssize_t cpu_configure_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t cpu_configure_show(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t count; @@ -842,8 +842,8 @@ static ssize_t cpu_configure_show(struct sys_device *dev, return count; } -static ssize_t cpu_configure_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t cpu_configure_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { int cpu = dev->id; @@ -889,11 +889,11 @@ out: put_online_cpus(); return rc ? rc : count; } -static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); +static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t cpu_polarization_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t cpu_polarization_show(struct device *dev, + struct device_attribute *attr, char *buf) { int cpu = dev->id; ssize_t count; @@ -919,22 +919,22 @@ static ssize_t cpu_polarization_show(struct sys_device *dev, mutex_unlock(&smp_cpu_state_mutex); return count; } -static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL); +static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); -static ssize_t show_cpu_address(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_cpu_address(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]); } -static SYSDEV_ATTR(address, 0444, show_cpu_address, NULL); +static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); static struct attribute *cpu_common_attrs[] = { #ifdef CONFIG_HOTPLUG_CPU - &attr_configure.attr, + &dev_attr_configure.attr, #endif - &attr_address.attr, - &attr_polarization.attr, + &dev_attr_address.attr, + &dev_attr_polarization.attr, NULL, }; @@ -942,8 +942,8 @@ static struct attribute_group cpu_common_attr_group = { .attrs = cpu_common_attrs, }; -static ssize_t show_capability(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_capability(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned int capability; int rc; @@ -953,10 +953,10 @@ static ssize_t show_capability(struct sys_device *dev, return rc; return sprintf(buf, "%u\n", capability); } -static SYSDEV_ATTR(capability, 0444, show_capability, NULL); +static DEVICE_ATTR(capability, 0444, show_capability, NULL); -static ssize_t show_idle_count(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) { struct s390_idle_data *idle; unsigned long long idle_count; @@ -976,10 +976,10 @@ repeat: goto repeat; return sprintf(buf, "%llu\n", idle_count); } -static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); +static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); -static ssize_t show_idle_time(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) { struct s390_idle_data *idle; unsigned long long now, idle_time, idle_enter; @@ -1001,12 +1001,12 @@ repeat: goto repeat; return sprintf(buf, "%llu\n", idle_time >> 12); } -static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); +static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); static struct attribute *cpu_online_attrs[] = { - &attr_capability.attr, - &attr_idle_count.attr, - &attr_idle_time_us.attr, + &dev_attr_capability.attr, + &dev_attr_idle_count.attr, + &dev_attr_idle_time_us.attr, NULL, }; @@ -1019,7 +1019,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, { unsigned int cpu = (unsigned int)(long)hcpu; struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; + struct device *s = &c->dev; struct s390_idle_data *idle; int err = 0; @@ -1045,7 +1045,7 @@ static struct notifier_block __cpuinitdata smp_cpu_nb = { static int __devinit smp_add_present_cpu(int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; + struct device *s = &c->dev; int rc; c->hotpluggable = 1; @@ -1098,8 +1098,8 @@ out: return rc; } -static ssize_t __ref rescan_store(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t __ref rescan_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -1108,11 +1108,11 @@ static ssize_t __ref rescan_store(struct sysdev_class *class, rc = smp_rescan_cpus(); return rc ? rc : count; } -static SYSDEV_CLASS_ATTR(rescan, 0200, NULL, rescan_store); +static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t dispatching_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t dispatching_show(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t count; @@ -1123,8 +1123,8 @@ static ssize_t dispatching_show(struct sysdev_class *class, return count; } -static ssize_t dispatching_store(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, +static ssize_t dispatching_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -1148,7 +1148,7 @@ out: put_online_cpus(); return rc ? rc : count; } -static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show, +static DEVICE_ATTR(dispatching, 0644, dispatching_show, dispatching_store); static int __init topology_init(void) @@ -1159,11 +1159,11 @@ static int __init topology_init(void) register_cpu_notifier(&smp_cpu_nb); #ifdef CONFIG_HOTPLUG_CPU - rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_rescan); + rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); if (rc) return rc; #endif - rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_dispatching); + rc = device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); if (rc) return rc; for_each_present_cpu(cpu) { diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 77b8942b9a1..6dfc524c31a 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -230,7 +230,7 @@ void store_topology(struct sysinfo_15_1_x *info) int arch_update_cpu_topology(void) { struct sysinfo_15_1_x *info = tl_info; - struct sys_device *sysdev; + struct device *dev; int cpu; if (!MACHINE_HAS_TOPOLOGY) { @@ -242,8 +242,8 @@ int arch_update_cpu_topology(void) tl_to_cores(info); update_cpu_core_map(); for_each_online_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); + dev = get_cpu_device(cpu); + kobject_uevent(&dev->kobj, KOBJ_CHANGE); } return 1; } diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index f0907995b4c..a8140f0bbf6 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -337,9 +337,9 @@ static struct kobj_type ktype_percpu_entry = { .default_attrs = sq_sysfs_attrs, }; -static int __devinit sq_sysdev_add(struct sys_device *sysdev) +static int __devinit sq_dev_add(struct device *dev) { - unsigned int cpu = sysdev->id; + unsigned int cpu = dev->id; struct kobject *kobj; int error; @@ -348,25 +348,27 @@ static int __devinit sq_sysdev_add(struct sys_device *sysdev) return -ENOMEM; kobj = sq_kobject[cpu]; - error = kobject_init_and_add(kobj, &ktype_percpu_entry, &sysdev->kobj, + error = kobject_init_and_add(kobj, &ktype_percpu_entry, &dev->kobj, "%s", "sq"); if (!error) kobject_uevent(kobj, KOBJ_ADD); return error; } -static int __devexit sq_sysdev_remove(struct sys_device *sysdev) +static int __devexit sq_dev_remove(struct device *dev) { - unsigned int cpu = sysdev->id; + unsigned int cpu = dev->id; struct kobject *kobj = sq_kobject[cpu]; kobject_put(kobj); return 0; } -static struct sysdev_driver sq_sysdev_driver = { - .add = sq_sysdev_add, - .remove = __devexit_p(sq_sysdev_remove), +static struct subsys_interface sq_interface = { + .name = "sq" + .subsys = &cpu_subsys, + .add_dev = sq_dev_add, + .remove_dev = __devexit_p(sq_dev_remove), }; static int __init sq_api_init(void) @@ -386,7 +388,7 @@ static int __init sq_api_init(void) if (unlikely(!sq_bitmap)) goto out; - ret = sysdev_driver_register(&cpu_sysdev_class, &sq_sysdev_driver); + ret = subsys_interface_register(&sq_interface); if (unlikely(ret != 0)) goto out; @@ -401,7 +403,7 @@ out: static void __exit sq_api_exit(void) { - sysdev_driver_unregister(&cpu_sysdev_class, &sq_sysdev_driver); + subsys_interface_unregister(&sq_interface); kfree(sq_bitmap); kmem_cache_destroy(sq_cache); } diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index 7408201d7ef..654e8aad3bb 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -3,7 +3,7 @@ * Copyright (C) 2007 David S. Miller */ #include -#include +#include #include #include #include @@ -16,13 +16,13 @@ static DEFINE_PER_CPU(struct hv_mmu_statistics, mmu_stats) __attribute__((aligned(64))); #define SHOW_MMUSTAT_ULONG(NAME) \ -static ssize_t show_##NAME(struct sys_device *dev, \ - struct sysdev_attribute *attr, char *buf) \ +static ssize_t show_##NAME(struct device *dev, \ + struct device_attribute *attr, char *buf) \ { \ struct hv_mmu_statistics *p = &per_cpu(mmu_stats, dev->id); \ return sprintf(buf, "%lu\n", p->NAME); \ } \ -static SYSDEV_ATTR(NAME, 0444, show_##NAME, NULL) +static DEVICE_ATTR(NAME, 0444, show_##NAME, NULL) SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctx0_8k_tte); SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctx0_8k_tte); @@ -58,38 +58,38 @@ SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctxnon0_256mb_tte); SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctxnon0_256mb_tte); static struct attribute *mmu_stat_attrs[] = { - &attr_immu_tsb_hits_ctx0_8k_tte.attr, - &attr_immu_tsb_ticks_ctx0_8k_tte.attr, - &attr_immu_tsb_hits_ctx0_64k_tte.attr, - &attr_immu_tsb_ticks_ctx0_64k_tte.attr, - &attr_immu_tsb_hits_ctx0_4mb_tte.attr, - &attr_immu_tsb_ticks_ctx0_4mb_tte.attr, - &attr_immu_tsb_hits_ctx0_256mb_tte.attr, - &attr_immu_tsb_ticks_ctx0_256mb_tte.attr, - &attr_immu_tsb_hits_ctxnon0_8k_tte.attr, - &attr_immu_tsb_ticks_ctxnon0_8k_tte.attr, - &attr_immu_tsb_hits_ctxnon0_64k_tte.attr, - &attr_immu_tsb_ticks_ctxnon0_64k_tte.attr, - &attr_immu_tsb_hits_ctxnon0_4mb_tte.attr, - &attr_immu_tsb_ticks_ctxnon0_4mb_tte.attr, - &attr_immu_tsb_hits_ctxnon0_256mb_tte.attr, - &attr_immu_tsb_ticks_ctxnon0_256mb_tte.attr, - &attr_dmmu_tsb_hits_ctx0_8k_tte.attr, - &attr_dmmu_tsb_ticks_ctx0_8k_tte.attr, - &attr_dmmu_tsb_hits_ctx0_64k_tte.attr, - &attr_dmmu_tsb_ticks_ctx0_64k_tte.attr, - &attr_dmmu_tsb_hits_ctx0_4mb_tte.attr, - &attr_dmmu_tsb_ticks_ctx0_4mb_tte.attr, - &attr_dmmu_tsb_hits_ctx0_256mb_tte.attr, - &attr_dmmu_tsb_ticks_ctx0_256mb_tte.attr, - &attr_dmmu_tsb_hits_ctxnon0_8k_tte.attr, - &attr_dmmu_tsb_ticks_ctxnon0_8k_tte.attr, - &attr_dmmu_tsb_hits_ctxnon0_64k_tte.attr, - &attr_dmmu_tsb_ticks_ctxnon0_64k_tte.attr, - &attr_dmmu_tsb_hits_ctxnon0_4mb_tte.attr, - &attr_dmmu_tsb_ticks_ctxnon0_4mb_tte.attr, - &attr_dmmu_tsb_hits_ctxnon0_256mb_tte.attr, - &attr_dmmu_tsb_ticks_ctxnon0_256mb_tte.attr, + &dev_attr_immu_tsb_hits_ctx0_8k_tte.attr, + &dev_attr_immu_tsb_ticks_ctx0_8k_tte.attr, + &dev_attr_immu_tsb_hits_ctx0_64k_tte.attr, + &dev_attr_immu_tsb_ticks_ctx0_64k_tte.attr, + &dev_attr_immu_tsb_hits_ctx0_4mb_tte.attr, + &dev_attr_immu_tsb_ticks_ctx0_4mb_tte.attr, + &dev_attr_immu_tsb_hits_ctx0_256mb_tte.attr, + &dev_attr_immu_tsb_ticks_ctx0_256mb_tte.attr, + &dev_attr_immu_tsb_hits_ctxnon0_8k_tte.attr, + &dev_attr_immu_tsb_ticks_ctxnon0_8k_tte.attr, + &dev_attr_immu_tsb_hits_ctxnon0_64k_tte.attr, + &dev_attr_immu_tsb_ticks_ctxnon0_64k_tte.attr, + &dev_attr_immu_tsb_hits_ctxnon0_4mb_tte.attr, + &dev_attr_immu_tsb_ticks_ctxnon0_4mb_tte.attr, + &dev_attr_immu_tsb_hits_ctxnon0_256mb_tte.attr, + &dev_attr_immu_tsb_ticks_ctxnon0_256mb_tte.attr, + &dev_attr_dmmu_tsb_hits_ctx0_8k_tte.attr, + &dev_attr_dmmu_tsb_ticks_ctx0_8k_tte.attr, + &dev_attr_dmmu_tsb_hits_ctx0_64k_tte.attr, + &dev_attr_dmmu_tsb_ticks_ctx0_64k_tte.attr, + &dev_attr_dmmu_tsb_hits_ctx0_4mb_tte.attr, + &dev_attr_dmmu_tsb_ticks_ctx0_4mb_tte.attr, + &dev_attr_dmmu_tsb_hits_ctx0_256mb_tte.attr, + &dev_attr_dmmu_tsb_ticks_ctx0_256mb_tte.attr, + &dev_attr_dmmu_tsb_hits_ctxnon0_8k_tte.attr, + &dev_attr_dmmu_tsb_ticks_ctxnon0_8k_tte.attr, + &dev_attr_dmmu_tsb_hits_ctxnon0_64k_tte.attr, + &dev_attr_dmmu_tsb_ticks_ctxnon0_64k_tte.attr, + &dev_attr_dmmu_tsb_hits_ctxnon0_4mb_tte.attr, + &dev_attr_dmmu_tsb_ticks_ctxnon0_4mb_tte.attr, + &dev_attr_dmmu_tsb_hits_ctxnon0_256mb_tte.attr, + &dev_attr_dmmu_tsb_ticks_ctxnon0_256mb_tte.attr, NULL, }; @@ -139,15 +139,15 @@ static unsigned long write_mmustat_enable(unsigned long val) return sun4v_mmustat_conf(ra, &orig_ra); } -static ssize_t show_mmustat_enable(struct sys_device *s, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_mmustat_enable(struct device *s, + struct device_attribute *attr, char *buf) { unsigned long val = run_on_cpu(s->id, read_mmustat_enable, 0); return sprintf(buf, "%lx\n", val); } -static ssize_t store_mmustat_enable(struct sys_device *s, - struct sysdev_attribute *attr, const char *buf, +static ssize_t store_mmustat_enable(struct device *s, + struct device_attribute *attr, const char *buf, size_t count) { unsigned long val, err; @@ -163,39 +163,39 @@ static ssize_t store_mmustat_enable(struct sys_device *s, return count; } -static SYSDEV_ATTR(mmustat_enable, 0644, show_mmustat_enable, store_mmustat_enable); +static DEVICE_ATTR(mmustat_enable, 0644, show_mmustat_enable, store_mmustat_enable); static int mmu_stats_supported; -static int register_mmu_stats(struct sys_device *s) +static int register_mmu_stats(struct device *s) { if (!mmu_stats_supported) return 0; - sysdev_create_file(s, &attr_mmustat_enable); + device_create_file(s, &dev_attr_mmustat_enable); return sysfs_create_group(&s->kobj, &mmu_stat_group); } #ifdef CONFIG_HOTPLUG_CPU -static void unregister_mmu_stats(struct sys_device *s) +static void unregister_mmu_stats(struct device *s) { if (!mmu_stats_supported) return; sysfs_remove_group(&s->kobj, &mmu_stat_group); - sysdev_remove_file(s, &attr_mmustat_enable); + device_remove_file(s, &dev_attr_mmustat_enable); } #endif #define SHOW_CPUDATA_ULONG_NAME(NAME, MEMBER) \ -static ssize_t show_##NAME(struct sys_device *dev, \ - struct sysdev_attribute *attr, char *buf) \ +static ssize_t show_##NAME(struct device *dev, \ + struct device_attribute *attr, char *buf) \ { \ cpuinfo_sparc *c = &cpu_data(dev->id); \ return sprintf(buf, "%lu\n", c->MEMBER); \ } #define SHOW_CPUDATA_UINT_NAME(NAME, MEMBER) \ -static ssize_t show_##NAME(struct sys_device *dev, \ - struct sysdev_attribute *attr, char *buf) \ +static ssize_t show_##NAME(struct device *dev, \ + struct device_attribute *attr, char *buf) \ { \ cpuinfo_sparc *c = &cpu_data(dev->id); \ return sprintf(buf, "%u\n", c->MEMBER); \ @@ -209,14 +209,14 @@ SHOW_CPUDATA_UINT_NAME(l1_icache_line_size, icache_line_size); SHOW_CPUDATA_UINT_NAME(l2_cache_size, ecache_size); SHOW_CPUDATA_UINT_NAME(l2_cache_line_size, ecache_line_size); -static struct sysdev_attribute cpu_core_attrs[] = { - _SYSDEV_ATTR(clock_tick, 0444, show_clock_tick, NULL), - _SYSDEV_ATTR(l1_dcache_size, 0444, show_l1_dcache_size, NULL), - _SYSDEV_ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL), - _SYSDEV_ATTR(l1_icache_size, 0444, show_l1_icache_size, NULL), - _SYSDEV_ATTR(l1_icache_line_size, 0444, show_l1_icache_line_size, NULL), - _SYSDEV_ATTR(l2_cache_size, 0444, show_l2_cache_size, NULL), - _SYSDEV_ATTR(l2_cache_line_size, 0444, show_l2_cache_line_size, NULL), +static struct device_attribute cpu_core_attrs[] = { + __ATTR(clock_tick, 0444, show_clock_tick, NULL), + __ATTR(l1_dcache_size, 0444, show_l1_dcache_size, NULL), + __ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL), + __ATTR(l1_icache_size, 0444, show_l1_icache_size, NULL), + __ATTR(l1_icache_line_size, 0444, show_l1_icache_line_size, NULL), + __ATTR(l2_cache_size, 0444, show_l2_cache_size, NULL), + __ATTR(l2_cache_line_size, 0444, show_l2_cache_line_size, NULL), }; static DEFINE_PER_CPU(struct cpu, cpu_devices); @@ -224,11 +224,11 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); static void register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; + struct device *s = &c->dev; int i; for (i = 0; i < ARRAY_SIZE(cpu_core_attrs); i++) - sysdev_create_file(s, &cpu_core_attrs[i]); + device_create_file(s, &cpu_core_attrs[i]); register_mmu_stats(s); } @@ -237,12 +237,12 @@ static void register_cpu_online(unsigned int cpu) static void unregister_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; + struct device *s = &c->dev; int i; unregister_mmu_stats(s); for (i = 0; i < ARRAY_SIZE(cpu_core_attrs); i++) - sysdev_remove_file(s, &cpu_core_attrs[i]); + device_remove_file(s, &cpu_core_attrs[i]); } #endif diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c index b671a86f451..e7ce2a5161b 100644 --- a/arch/tile/kernel/sysfs.c +++ b/arch/tile/kernel/sysfs.c @@ -14,7 +14,7 @@ * /sys entry support. */ -#include +#include #include #include #include @@ -31,55 +31,55 @@ static ssize_t get_hv_confstr(char *page, int query) return n; } -static ssize_t chip_width_show(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, +static ssize_t chip_width_show(struct device *dev, + struct device_attribute *attr, char *page) { return sprintf(page, "%u\n", smp_width); } -static SYSDEV_CLASS_ATTR(chip_width, 0444, chip_width_show, NULL); +static DEVICE_ATTR(chip_width, 0444, chip_width_show, NULL); -static ssize_t chip_height_show(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, +static ssize_t chip_height_show(struct device *dev, + struct device_attribute *attr, char *page) { return sprintf(page, "%u\n", smp_height); } -static SYSDEV_CLASS_ATTR(chip_height, 0444, chip_height_show, NULL); +static DEVICE_ATTR(chip_height, 0444, chip_height_show, NULL); -static ssize_t chip_serial_show(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, +static ssize_t chip_serial_show(struct device *dev, + struct device_attribute *attr, char *page) { return get_hv_confstr(page, HV_CONFSTR_CHIP_SERIAL_NUM); } -static SYSDEV_CLASS_ATTR(chip_serial, 0444, chip_serial_show, NULL); +static DEVICE_ATTR(chip_serial, 0444, chip_serial_show, NULL); -static ssize_t chip_revision_show(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, +static ssize_t chip_revision_show(struct device *dev, + struct device_attribute *attr, char *page) { return get_hv_confstr(page, HV_CONFSTR_CHIP_REV); } -static SYSDEV_CLASS_ATTR(chip_revision, 0444, chip_revision_show, NULL); +static DEVICE_ATTR(chip_revision, 0444, chip_revision_show, NULL); -static ssize_t type_show(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, +static ssize_t type_show(struct device *dev, + struct device_attribute *attr, char *page) { return sprintf(page, "tilera\n"); } -static SYSDEV_CLASS_ATTR(type, 0444, type_show, NULL); +static DEVICE_ATTR(type, 0444, type_show, NULL); #define HV_CONF_ATTR(name, conf) \ - static ssize_t name ## _show(struct sysdev_class *dev, \ - struct sysdev_class_attribute *attr, \ + static ssize_t name ## _show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ return get_hv_confstr(page, conf); \ } \ - static SYSDEV_CLASS_ATTR(name, 0444, name ## _show, NULL); + static DEVICE_ATTR(name, 0444, name ## _show, NULL); HV_CONF_ATTR(version, HV_CONFSTR_HV_SW_VER) HV_CONF_ATTR(config_version, HV_CONFSTR_HV_CONFIG_VER) @@ -95,15 +95,15 @@ HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC) HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL) static struct attribute *board_attrs[] = { - &attr_board_part.attr, - &attr_board_serial.attr, - &attr_board_revision.attr, - &attr_board_description.attr, - &attr_mezz_part.attr, - &attr_mezz_serial.attr, - &attr_mezz_revision.attr, - &attr_mezz_description.attr, - &attr_switch_control.attr, + &dev_attr_board_part.attr, + &dev_attr_board_serial.attr, + &dev_attr_board_revision.attr, + &dev_attr_board_description.attr, + &dev_attr_mezz_part.attr, + &dev_attr_mezz_serial.attr, + &dev_attr_mezz_revision.attr, + &dev_attr_mezz_description.attr, + &dev_attr_switch_control.attr, NULL }; @@ -150,12 +150,11 @@ hvconfig_bin_read(struct file *filp, struct kobject *kobj, static int __init create_sysfs_entries(void) { - struct sysdev_class *cls = &cpu_sysdev_class; int err = 0; #define create_cpu_attr(name) \ if (!err) \ - err = sysfs_create_file(&cls->kset.kobj, &attr_##name.attr); + err = device_create_file(cpu_subsys.dev_root, &dev_attr_##name); create_cpu_attr(chip_width); create_cpu_attr(chip_height); create_cpu_attr(chip_serial); @@ -163,7 +162,7 @@ static int __init create_sysfs_entries(void) #define create_hv_attr(name) \ if (!err) \ - err = sysfs_create_file(hypervisor_kobj, &attr_##name.attr); + err = sysfs_create_file(hypervisor_kobj, &dev_attr_##name); create_hv_attr(type); create_hv_attr(version); create_hv_attr(config_version); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index c9321f34e55..0b05fb49c56 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -149,7 +149,7 @@ static inline void enable_p5_mce(void) {} void mce_setup(struct mce *m); void mce_log(struct mce *m); -DECLARE_PER_CPU(struct sys_device, mce_sysdev); +DECLARE_PER_CPU(struct device, mce_device); /* * Maximum banks number. diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a3b0811693c..6b45e5e7a90 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -844,8 +844,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) #include #include - -extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ +#include /* pointer to kobject for cpuX/cache */ static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); @@ -1073,9 +1072,9 @@ err_out: static DECLARE_BITMAP(cache_dev_map, NR_CPUS); /* Add/Remove cache interface for CPU device */ -static int __cpuinit cache_add_dev(struct sys_device * sys_dev) +static int __cpuinit cache_add_dev(struct device *dev) { - unsigned int cpu = sys_dev->id; + unsigned int cpu = dev->id; unsigned long i, j; struct _index_kobject *this_object; struct _cpuid4_info *this_leaf; @@ -1087,7 +1086,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), &ktype_percpu_entry, - &sys_dev->kobj, "%s", "cache"); + &dev->kobj, "%s", "cache"); if (retval < 0) { cpuid4_cache_sysfs_exit(cpu); return retval; @@ -1124,9 +1123,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) return 0; } -static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) +static void __cpuinit cache_remove_dev(struct device *dev) { - unsigned int cpu = sys_dev->id; + unsigned int cpu = dev->id; unsigned long i; if (per_cpu(ici_cpuid4_info, cpu) == NULL) @@ -1145,17 +1144,17 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; + struct device *dev; - sys_dev = get_cpu_sysdev(cpu); + dev = get_cpu_device(cpu); switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: - cache_add_dev(sys_dev); + cache_add_dev(dev); break; case CPU_DEAD: case CPU_DEAD_FROZEN: - cache_remove_dev(sys_dev); + cache_remove_dev(dev); break; } return NOTIFY_OK; @@ -1174,9 +1173,9 @@ static int __cpuinit cache_sysfs_init(void) for_each_online_cpu(i) { int err; - struct sys_device *sys_dev = get_cpu_sysdev(i); + struct device *dev = get_cpu_device(i); - err = cache_add_dev(sys_dev); + err = cache_add_dev(dev); if (err) return err; } diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index fefcc69ee8b..ed44c8a6585 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -1,4 +1,4 @@ -#include +#include #include enum severity_level { @@ -17,7 +17,7 @@ enum severity_level { struct mce_bank { u64 ctl; /* subevents to enable */ unsigned char init; /* initialise bank? */ - struct sysdev_attribute attr; /* sysdev attribute */ + struct device_attribute attr; /* device attribute */ char attrname[ATTR_LEN]; /* attribute name */ }; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 362056aefeb..0156c6f85d7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -1751,7 +1751,7 @@ static struct syscore_ops mce_syscore_ops = { }; /* - * mce_sysdev: Sysfs support + * mce_device: Sysfs support */ static void mce_cpu_restart(void *data) @@ -1787,27 +1787,28 @@ static void mce_enable_ce(void *all) __mcheck_cpu_init_timer(); } -static struct sysdev_class mce_sysdev_class = { +static struct bus_type mce_subsys = { .name = "machinecheck", + .dev_name = "machinecheck", }; -DEFINE_PER_CPU(struct sys_device, mce_sysdev); +DEFINE_PER_CPU(struct device, mce_device); __cpuinitdata void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); -static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) +static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) { return container_of(attr, struct mce_bank, attr); } -static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, +static ssize_t show_bank(struct device *s, struct device_attribute *attr, char *buf) { return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); } -static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, +static ssize_t set_bank(struct device *s, struct device_attribute *attr, const char *buf, size_t size) { u64 new; @@ -1822,14 +1823,14 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, } static ssize_t -show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) +show_trigger(struct device *s, struct device_attribute *attr, char *buf) { strcpy(buf, mce_helper); strcat(buf, "\n"); return strlen(mce_helper) + 1; } -static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, +static ssize_t set_trigger(struct device *s, struct device_attribute *attr, const char *buf, size_t siz) { char *p; @@ -1844,8 +1845,8 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, return strlen(mce_helper) + !!p; } -static ssize_t set_ignore_ce(struct sys_device *s, - struct sysdev_attribute *attr, +static ssize_t set_ignore_ce(struct device *s, + struct device_attribute *attr, const char *buf, size_t size) { u64 new; @@ -1868,8 +1869,8 @@ static ssize_t set_ignore_ce(struct sys_device *s, return size; } -static ssize_t set_cmci_disabled(struct sys_device *s, - struct sysdev_attribute *attr, +static ssize_t set_cmci_disabled(struct device *s, + struct device_attribute *attr, const char *buf, size_t size) { u64 new; @@ -1891,108 +1892,107 @@ static ssize_t set_cmci_disabled(struct sys_device *s, return size; } -static ssize_t store_int_with_restart(struct sys_device *s, - struct sysdev_attribute *attr, +static ssize_t store_int_with_restart(struct device *s, + struct device_attribute *attr, const char *buf, size_t size) { - ssize_t ret = sysdev_store_int(s, attr, buf, size); + ssize_t ret = device_store_int(s, attr, buf, size); mce_restart(); return ret; } -static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); -static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); -static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); -static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); +static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); +static DEVICE_INT_ATTR(tolerant, 0644, tolerant); +static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); +static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); -static struct sysdev_ext_attribute attr_check_interval = { - _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, - store_int_with_restart), +static struct dev_ext_attribute dev_attr_check_interval = { + __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), &check_interval }; -static struct sysdev_ext_attribute attr_ignore_ce = { - _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), +static struct dev_ext_attribute dev_attr_ignore_ce = { + __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce), &mce_ignore_ce }; -static struct sysdev_ext_attribute attr_cmci_disabled = { - _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), +static struct dev_ext_attribute dev_attr_cmci_disabled = { + __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled), &mce_cmci_disabled }; -static struct sysdev_attribute *mce_sysdev_attrs[] = { - &attr_tolerant.attr, - &attr_check_interval.attr, - &attr_trigger, - &attr_monarch_timeout.attr, - &attr_dont_log_ce.attr, - &attr_ignore_ce.attr, - &attr_cmci_disabled.attr, +static struct device_attribute *mce_device_attrs[] = { + &dev_attr_tolerant.attr, + &dev_attr_check_interval.attr, + &dev_attr_trigger, + &dev_attr_monarch_timeout.attr, + &dev_attr_dont_log_ce.attr, + &dev_attr_ignore_ce.attr, + &dev_attr_cmci_disabled.attr, NULL }; -static cpumask_var_t mce_sysdev_initialized; +static cpumask_var_t mce_device_initialized; -/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ -static __cpuinit int mce_sysdev_create(unsigned int cpu) +/* Per cpu device init. All of the cpus still share the same ctrl bank: */ +static __cpuinit int mce_device_create(unsigned int cpu) { - struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); + struct device *dev = &per_cpu(mce_device, cpu); int err; int i, j; if (!mce_available(&boot_cpu_data)) return -EIO; - memset(&sysdev->kobj, 0, sizeof(struct kobject)); - sysdev->id = cpu; - sysdev->cls = &mce_sysdev_class; + memset(&dev->kobj, 0, sizeof(struct kobject)); + dev->id = cpu; + dev->bus = &mce_subsys; - err = sysdev_register(sysdev); + err = device_register(dev); if (err) return err; - for (i = 0; mce_sysdev_attrs[i]; i++) { - err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]); + for (i = 0; mce_device_attrs[i]; i++) { + err = device_create_file(dev, mce_device_attrs[i]); if (err) goto error; } for (j = 0; j < banks; j++) { - err = sysdev_create_file(sysdev, &mce_banks[j].attr); + err = device_create_file(dev, &mce_banks[j].attr); if (err) goto error2; } - cpumask_set_cpu(cpu, mce_sysdev_initialized); + cpumask_set_cpu(cpu, mce_device_initialized); return 0; error2: while (--j >= 0) - sysdev_remove_file(sysdev, &mce_banks[j].attr); + device_remove_file(dev, &mce_banks[j].attr); error: while (--i >= 0) - sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); + device_remove_file(dev, mce_device_attrs[i]); - sysdev_unregister(sysdev); + device_unregister(dev); return err; } -static __cpuinit void mce_sysdev_remove(unsigned int cpu) +static __cpuinit void mce_device_remove(unsigned int cpu) { - struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); + struct device *dev = &per_cpu(mce_device, cpu); int i; - if (!cpumask_test_cpu(cpu, mce_sysdev_initialized)) + if (!cpumask_test_cpu(cpu, mce_device_initialized)) return; - for (i = 0; mce_sysdev_attrs[i]; i++) - sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); + for (i = 0; mce_device_attrs[i]; i++) + device_remove_file(dev, mce_device_attrs[i]); for (i = 0; i < banks; i++) - sysdev_remove_file(sysdev, &mce_banks[i].attr); + device_remove_file(dev, &mce_banks[i].attr); - sysdev_unregister(sysdev); - cpumask_clear_cpu(cpu, mce_sysdev_initialized); + device_unregister(dev); + cpumask_clear_cpu(cpu, mce_device_initialized); } /* Make sure there are no machine checks on offlined CPUs. */ @@ -2042,7 +2042,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: - mce_sysdev_create(cpu); + mce_device_create(cpu); if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); break; @@ -2050,7 +2050,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DEAD_FROZEN: if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); - mce_sysdev_remove(cpu); + mce_device_remove(cpu); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: @@ -2084,7 +2084,7 @@ static __init void mce_init_banks(void) for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; - struct sysdev_attribute *a = &b->attr; + struct device_attribute *a = &b->attr; sysfs_attr_init(&a->attr); a->attr.name = b->attrname; @@ -2104,16 +2104,16 @@ static __init int mcheck_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; - zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL); + zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); mce_init_banks(); - err = sysdev_class_register(&mce_sysdev_class); + err = subsys_system_register(&mce_subsys, NULL); if (err) return err; for_each_online_cpu(i) { - err = mce_sysdev_create(i); + err = mce_device_create(i); if (err) return err; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f5474218cff..56d2aa1acd5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -548,7 +547,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (!b) goto out; - err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj, + err = sysfs_create_link(&per_cpu(mce_device, cpu).kobj, b->kobj, name); if (err) goto out; @@ -571,7 +570,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; } - b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj); + b->kobj = kobject_create_and_add(name, &per_cpu(mce_device, cpu).kobj); if (!b->kobj) goto out_free; @@ -591,7 +590,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (i == cpu) continue; - err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj, + err = sysfs_create_link(&per_cpu(mce_device, i).kobj, b->kobj, name); if (err) goto out; @@ -669,7 +668,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #ifdef CONFIG_SMP /* sibling symlink */ if (shared_bank[bank] && b->blocks->cpu != cpu) { - sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name); + sysfs_remove_link(&per_cpu(mce_device, cpu).kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; return; @@ -681,7 +680,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) if (i == cpu) continue; - sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name); + sysfs_remove_link(&per_cpu(mce_device, i).kobj, name); per_cpu(threshold_banks, i)[bank] = NULL; } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 787e06c84ea..59e3f6ed265 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -69,16 +68,16 @@ static atomic_t therm_throt_en = ATOMIC_INIT(0); static u32 lvtthmr_init __read_mostly; #ifdef CONFIG_SYSFS -#define define_therm_throt_sysdev_one_ro(_name) \ - static SYSDEV_ATTR(_name, 0444, \ - therm_throt_sysdev_show_##_name, \ +#define define_therm_throt_device_one_ro(_name) \ + static DEVICE_ATTR(_name, 0444, \ + therm_throt_device_show_##_name, \ NULL) \ -#define define_therm_throt_sysdev_show_func(event, name) \ +#define define_therm_throt_device_show_func(event, name) \ \ -static ssize_t therm_throt_sysdev_show_##event##_##name( \ - struct sys_device *dev, \ - struct sysdev_attribute *attr, \ +static ssize_t therm_throt_device_show_##event##_##name( \ + struct device *dev, \ + struct device_attribute *attr, \ char *buf) \ { \ unsigned int cpu = dev->id; \ @@ -95,20 +94,20 @@ static ssize_t therm_throt_sysdev_show_##event##_##name( \ return ret; \ } -define_therm_throt_sysdev_show_func(core_throttle, count); -define_therm_throt_sysdev_one_ro(core_throttle_count); +define_therm_throt_device_show_func(core_throttle, count); +define_therm_throt_device_one_ro(core_throttle_count); -define_therm_throt_sysdev_show_func(core_power_limit, count); -define_therm_throt_sysdev_one_ro(core_power_limit_count); +define_therm_throt_device_show_func(core_power_limit, count); +define_therm_throt_device_one_ro(core_power_limit_count); -define_therm_throt_sysdev_show_func(package_throttle, count); -define_therm_throt_sysdev_one_ro(package_throttle_count); +define_therm_throt_device_show_func(package_throttle, count); +define_therm_throt_device_one_ro(package_throttle_count); -define_therm_throt_sysdev_show_func(package_power_limit, count); -define_therm_throt_sysdev_one_ro(package_power_limit_count); +define_therm_throt_device_show_func(package_power_limit, count); +define_therm_throt_device_one_ro(package_power_limit_count); static struct attribute *thermal_throttle_attrs[] = { - &attr_core_throttle_count.attr, + &dev_attr_core_throttle_count.attr, NULL }; @@ -223,36 +222,36 @@ static int thresh_event_valid(int event) #ifdef CONFIG_SYSFS /* Add/Remove thermal_throttle interface for CPU device: */ -static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, +static __cpuinit int thermal_throttle_add_dev(struct device *dev, unsigned int cpu) { int err; struct cpuinfo_x86 *c = &cpu_data(cpu); - err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); + err = sysfs_create_group(&dev->kobj, &thermal_attr_group); if (err) return err; if (cpu_has(c, X86_FEATURE_PLN)) - err = sysfs_add_file_to_group(&sys_dev->kobj, - &attr_core_power_limit_count.attr, + err = sysfs_add_file_to_group(&dev->kobj, + &dev_attr_core_power_limit_count.attr, thermal_attr_group.name); if (cpu_has(c, X86_FEATURE_PTS)) { - err = sysfs_add_file_to_group(&sys_dev->kobj, - &attr_package_throttle_count.attr, + err = sysfs_add_file_to_group(&dev->kobj, + &dev_attr_package_throttle_count.attr, thermal_attr_group.name); if (cpu_has(c, X86_FEATURE_PLN)) - err = sysfs_add_file_to_group(&sys_dev->kobj, - &attr_package_power_limit_count.attr, + err = sysfs_add_file_to_group(&dev->kobj, + &dev_attr_package_power_limit_count.attr, thermal_attr_group.name); } return err; } -static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) +static __cpuinit void thermal_throttle_remove_dev(struct device *dev) { - sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); + sysfs_remove_group(&dev->kobj, &thermal_attr_group); } /* Mutex protecting device creation against CPU hotplug: */ @@ -265,16 +264,16 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; + struct device *dev; int err = 0; - sys_dev = get_cpu_sysdev(cpu); + dev = get_cpu_device(cpu); switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: mutex_lock(&therm_cpu_lock); - err = thermal_throttle_add_dev(sys_dev, cpu); + err = thermal_throttle_add_dev(dev, cpu); mutex_unlock(&therm_cpu_lock); WARN_ON(err); break; @@ -283,7 +282,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, case CPU_DEAD: case CPU_DEAD_FROZEN: mutex_lock(&therm_cpu_lock); - thermal_throttle_remove_dev(sys_dev); + thermal_throttle_remove_dev(dev); mutex_unlock(&therm_cpu_lock); break; } @@ -310,7 +309,7 @@ static __init int thermal_throttle_init_device(void) #endif /* connect live CPUs to sysfs */ for_each_online_cpu(cpu) { - err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); + err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); WARN_ON(err); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index f2d2a664e79..cf88f2a1647 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -292,8 +292,8 @@ static int reload_for_cpu(int cpu) return err; } -static ssize_t reload_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t reload_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { unsigned long val; @@ -318,30 +318,30 @@ static ssize_t reload_store(struct sys_device *dev, return ret; } -static ssize_t version_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t version_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); } -static ssize_t pf_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t pf_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); } -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); +static DEVICE_ATTR(reload, 0200, NULL, reload_store); +static DEVICE_ATTR(version, 0400, version_show, NULL); +static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, + &dev_attr_reload.attr, + &dev_attr_version.attr, + &dev_attr_processor_flags.attr, NULL }; @@ -405,43 +405,45 @@ static enum ucode_state microcode_update_cpu(int cpu) return ustate; } -static int mc_sysdev_add(struct sys_device *sys_dev) +static int mc_device_add(struct device *dev, struct subsys_interface *sif) { - int err, cpu = sys_dev->id; + int err, cpu = dev->id; if (!cpu_online(cpu)) return 0; pr_debug("CPU%d added\n", cpu); - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + err = sysfs_create_group(&dev->kobj, &mc_attr_group); if (err) return err; if (microcode_init_cpu(cpu) == UCODE_ERROR) { - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + sysfs_remove_group(&dev->kobj, &mc_attr_group); return -EINVAL; } return err; } -static int mc_sysdev_remove(struct sys_device *sys_dev) +static int mc_device_remove(struct device *dev, struct subsys_interface *sif) { - int cpu = sys_dev->id; + int cpu = dev->id; if (!cpu_online(cpu)) return 0; pr_debug("CPU%d removed\n", cpu); microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + sysfs_remove_group(&dev->kobj, &mc_attr_group); return 0; } -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, +static struct subsys_interface mc_cpu_interface = { + .name = "microcode", + .subsys = &cpu_subsys, + .add_dev = mc_device_add, + .remove_dev = mc_device_remove, }; /** @@ -464,9 +466,9 @@ static __cpuinit int mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; + struct device *dev; - sys_dev = get_cpu_sysdev(cpu); + dev = get_cpu_device(cpu); switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -474,13 +476,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: pr_debug("CPU%d added\n", cpu); - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) + if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + sysfs_remove_group(&dev->kobj, &mc_attr_group); pr_debug("CPU%d removed\n", cpu); break; @@ -527,7 +529,7 @@ static int __init microcode_init(void) get_online_cpus(); mutex_lock(µcode_mutex); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + error = subsys_interface_register(&mc_cpu_interface); mutex_unlock(µcode_mutex); put_online_cpus(); @@ -561,7 +563,7 @@ static void __exit microcode_exit(void) get_online_cpus(); mutex_lock(µcode_mutex); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + subsys_interface_unregister(&mc_cpu_interface); mutex_unlock(µcode_mutex); put_online_cpus(); diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 9d7bc9f6b6c..20a68ca386d 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -446,7 +446,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device) { struct acpi_processor *pr = NULL; int result = 0; - struct sys_device *sysdev; + struct device *dev; pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); if (!pr) @@ -491,8 +491,8 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device) per_cpu(processors, pr->id) = pr; - sysdev = get_cpu_sysdev(pr->id); - if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) { + dev = get_cpu_device(pr->id); + if (sysfs_create_link(&device->dev.kobj, &dev->kobj, "sysdev")) { result = -EFAULT; goto err_free_cpumask; } diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 870550d6a4b..3b599abf2b4 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -30,7 +30,6 @@ #include #include #include -#include #include diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 251acea3d35..5bb0298fbcc 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -1,8 +1,7 @@ /* - * drivers/base/cpu.c - basic CPU class support + * CPU subsystem support */ -#include #include #include #include @@ -14,40 +13,40 @@ #include "base.h" -static struct sysdev_class_attribute *cpu_sysdev_class_attrs[]; - -struct sysdev_class cpu_sysdev_class = { +struct bus_type cpu_subsys = { .name = "cpu", - .attrs = cpu_sysdev_class_attrs, + .dev_name = "cpu", }; -EXPORT_SYMBOL(cpu_sysdev_class); +EXPORT_SYMBOL_GPL(cpu_subsys); -static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices); +static DEFINE_PER_CPU(struct device *, cpu_sys_devices); #ifdef CONFIG_HOTPLUG_CPU -static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr, +static ssize_t show_online(struct device *dev, + struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); - return sprintf(buf, "%u\n", !!cpu_online(cpu->sysdev.id)); + return sprintf(buf, "%u\n", !!cpu_online(cpu->dev.id)); } -static ssize_t __ref store_online(struct sys_device *dev, struct sysdev_attribute *attr, - const char *buf, size_t count) +static ssize_t __ref store_online(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); ssize_t ret; cpu_hotplug_driver_lock(); switch (buf[0]) { case '0': - ret = cpu_down(cpu->sysdev.id); + ret = cpu_down(cpu->dev.id); if (!ret) kobject_uevent(&dev->kobj, KOBJ_OFFLINE); break; case '1': - ret = cpu_up(cpu->sysdev.id); + ret = cpu_up(cpu->dev.id); if (!ret) kobject_uevent(&dev->kobj, KOBJ_ONLINE); break; @@ -60,44 +59,44 @@ static ssize_t __ref store_online(struct sys_device *dev, struct sysdev_attribut ret = count; return ret; } -static SYSDEV_ATTR(online, 0644, show_online, store_online); +static DEVICE_ATTR(online, 0644, show_online, store_online); static void __cpuinit register_cpu_control(struct cpu *cpu) { - sysdev_create_file(&cpu->sysdev, &attr_online); + device_create_file(&cpu->dev, &dev_attr_online); } void unregister_cpu(struct cpu *cpu) { - int logical_cpu = cpu->sysdev.id; + int logical_cpu = cpu->dev.id; unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu)); - sysdev_remove_file(&cpu->sysdev, &attr_online); + device_remove_file(&cpu->dev, &dev_attr_online); - sysdev_unregister(&cpu->sysdev); + device_unregister(&cpu->dev); per_cpu(cpu_sys_devices, logical_cpu) = NULL; return; } #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE -static ssize_t cpu_probe_store(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t cpu_probe_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { return arch_cpu_probe(buf, count); } -static ssize_t cpu_release_store(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t cpu_release_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { return arch_cpu_release(buf, count); } -static SYSDEV_CLASS_ATTR(probe, S_IWUSR, NULL, cpu_probe_store); -static SYSDEV_CLASS_ATTR(release, S_IWUSR, NULL, cpu_release_store); +static DEVICE_ATTR(probe, S_IWUSR, NULL, cpu_probe_store); +static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store); #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ #else /* ... !CONFIG_HOTPLUG_CPU */ @@ -109,15 +108,15 @@ static inline void register_cpu_control(struct cpu *cpu) #ifdef CONFIG_KEXEC #include -static ssize_t show_crash_notes(struct sys_device *dev, struct sysdev_attribute *attr, +static ssize_t show_crash_notes(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); ssize_t rc; unsigned long long addr; int cpunum; - cpunum = cpu->sysdev.id; + cpunum = cpu->dev.id; /* * Might be reading other cpu's data based on which cpu read thread @@ -129,7 +128,7 @@ static ssize_t show_crash_notes(struct sys_device *dev, struct sysdev_attribute rc = sprintf(buf, "%Lx\n", addr); return rc; } -static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); +static DEVICE_ATTR(crash_notes, 0400, show_crash_notes, NULL); #endif /* @@ -137,12 +136,12 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); */ struct cpu_attr { - struct sysdev_class_attribute attr; + struct device_attribute attr; const struct cpumask *const * const map; }; -static ssize_t show_cpus_attr(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t show_cpus_attr(struct device *dev, + struct device_attribute *attr, char *buf) { struct cpu_attr *ca = container_of(attr, struct cpu_attr, attr); @@ -153,10 +152,10 @@ static ssize_t show_cpus_attr(struct sysdev_class *class, return n; } -#define _CPU_ATTR(name, map) \ - { _SYSDEV_CLASS_ATTR(name, 0444, show_cpus_attr, NULL), map } +#define _CPU_ATTR(name, map) \ + { __ATTR(name, 0444, show_cpus_attr, NULL), map } -/* Keep in sync with cpu_sysdev_class_attrs */ +/* Keep in sync with cpu_subsys_attrs */ static struct cpu_attr cpu_attrs[] = { _CPU_ATTR(online, &cpu_online_mask), _CPU_ATTR(possible, &cpu_possible_mask), @@ -166,19 +165,19 @@ static struct cpu_attr cpu_attrs[] = { /* * Print values for NR_CPUS and offlined cpus */ -static ssize_t print_cpus_kernel_max(struct sysdev_class *class, - struct sysdev_class_attribute *attr, char *buf) +static ssize_t print_cpus_kernel_max(struct device *dev, + struct device_attribute *attr, char *buf) { int n = snprintf(buf, PAGE_SIZE-2, "%d\n", NR_CPUS - 1); return n; } -static SYSDEV_CLASS_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL); +static DEVICE_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL); /* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */ unsigned int total_cpus; -static ssize_t print_cpus_offline(struct sysdev_class *class, - struct sysdev_class_attribute *attr, char *buf) +static ssize_t print_cpus_offline(struct device *dev, + struct device_attribute *attr, char *buf) { int n = 0, len = PAGE_SIZE-2; cpumask_var_t offline; @@ -205,7 +204,7 @@ static ssize_t print_cpus_offline(struct sysdev_class *class, n += snprintf(&buf[n], len - n, "\n"); return n; } -static SYSDEV_CLASS_ATTR(offline, 0444, print_cpus_offline, NULL); +static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL); /* * register_cpu - Setup a sysfs device for a CPU. @@ -218,57 +217,66 @@ static SYSDEV_CLASS_ATTR(offline, 0444, print_cpus_offline, NULL); int __cpuinit register_cpu(struct cpu *cpu, int num) { int error; - cpu->node_id = cpu_to_node(num); - cpu->sysdev.id = num; - cpu->sysdev.cls = &cpu_sysdev_class; - - error = sysdev_register(&cpu->sysdev); + cpu->node_id = cpu_to_node(num); + cpu->dev.id = num; + cpu->dev.bus = &cpu_subsys; + error = device_register(&cpu->dev); if (!error && cpu->hotpluggable) register_cpu_control(cpu); if (!error) - per_cpu(cpu_sys_devices, num) = &cpu->sysdev; + per_cpu(cpu_sys_devices, num) = &cpu->dev; if (!error) register_cpu_under_node(num, cpu_to_node(num)); #ifdef CONFIG_KEXEC if (!error) - error = sysdev_create_file(&cpu->sysdev, &attr_crash_notes); + error = device_create_file(&cpu->dev, &dev_attr_crash_notes); #endif return error; } -struct sys_device *get_cpu_sysdev(unsigned cpu) +struct device *get_cpu_device(unsigned cpu) { if (cpu < nr_cpu_ids && cpu_possible(cpu)) return per_cpu(cpu_sys_devices, cpu); else return NULL; } -EXPORT_SYMBOL_GPL(get_cpu_sysdev); +EXPORT_SYMBOL_GPL(get_cpu_device); + +static struct attribute *cpu_root_attrs[] = { +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE + &dev_attr_probe.attr, + &dev_attr_release.attr, +#endif + &cpu_attrs[0].attr.attr, + &cpu_attrs[1].attr.attr, + &cpu_attrs[2].attr.attr, + &dev_attr_kernel_max.attr, + &dev_attr_offline.attr, + NULL +}; + +static struct attribute_group cpu_root_attr_group = { + .attrs = cpu_root_attrs, +}; + +static const struct attribute_group *cpu_root_attr_groups[] = { + &cpu_root_attr_group, + NULL, +}; int __init cpu_dev_init(void) { int err; - err = sysdev_class_register(&cpu_sysdev_class); + err = subsys_system_register(&cpu_subsys, cpu_root_attr_groups); + if (err) + return err; + #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) - if (!err) - err = sched_create_sysfs_power_savings_entries(&cpu_sysdev_class); + err = sched_create_sysfs_power_savings_entries(cpu_subsys.dev_root); #endif - return err; } - -static struct sysdev_class_attribute *cpu_sysdev_class_attrs[] = { -#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE - &attr_probe, - &attr_release, -#endif - &cpu_attrs[0].attr, - &cpu_attrs[1].attr, - &cpu_attrs[2].attr, - &attr_kernel_max, - &attr_offline, - NULL -}; diff --git a/drivers/base/node.c b/drivers/base/node.c index 793f796c4da..6ce1501c7de 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -315,12 +315,12 @@ struct node node_devices[MAX_NUMNODES]; int register_cpu_under_node(unsigned int cpu, unsigned int nid) { int ret; - struct sys_device *obj; + struct device *obj; if (!node_online(nid)) return 0; - obj = get_cpu_sysdev(cpu); + obj = get_cpu_device(cpu); if (!obj) return 0; @@ -337,12 +337,12 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid) int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) { - struct sys_device *obj; + struct device *obj; if (!node_online(nid)) return 0; - obj = get_cpu_sysdev(cpu); + obj = get_cpu_device(cpu); if (!obj) return 0; diff --git a/drivers/base/topology.c b/drivers/base/topology.c index f6f37a05a0c..ae989c57cd5 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -23,7 +23,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */ -#include #include #include #include @@ -32,14 +31,14 @@ #include #define define_one_ro_named(_name, _func) \ -static SYSDEV_ATTR(_name, 0444, _func, NULL) + static DEVICE_ATTR(_name, 0444, _func, NULL) #define define_one_ro(_name) \ -static SYSDEV_ATTR(_name, 0444, show_##_name, NULL) + static DEVICE_ATTR(_name, 0444, show_##_name, NULL) #define define_id_show_func(name) \ -static ssize_t show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, char *buf) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ { \ unsigned int cpu = dev->id; \ return sprintf(buf, "%d\n", topology_##name(cpu)); \ @@ -65,16 +64,16 @@ static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf) #ifdef arch_provides_topology_pointers #define define_siblings_show_map(name) \ -static ssize_t show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, char *buf) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ { \ unsigned int cpu = dev->id; \ return show_cpumap(0, topology_##name(cpu), buf); \ } #define define_siblings_show_list(name) \ -static ssize_t show_##name##_list(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ +static ssize_t show_##name##_list(struct device *dev, \ + struct device_attribute *attr, \ char *buf) \ { \ unsigned int cpu = dev->id; \ @@ -83,15 +82,15 @@ static ssize_t show_##name##_list(struct sys_device *dev, \ #else #define define_siblings_show_map(name) \ -static ssize_t show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, char *buf) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ { \ return show_cpumap(0, topology_##name(dev->id), buf); \ } #define define_siblings_show_list(name) \ -static ssize_t show_##name##_list(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ +static ssize_t show_##name##_list(struct device *dev, \ + struct device_attribute *attr, \ char *buf) \ { \ return show_cpumap(1, topology_##name(dev->id), buf); \ @@ -124,16 +123,16 @@ define_one_ro_named(book_siblings_list, show_book_cpumask_list); #endif static struct attribute *default_attrs[] = { - &attr_physical_package_id.attr, - &attr_core_id.attr, - &attr_thread_siblings.attr, - &attr_thread_siblings_list.attr, - &attr_core_siblings.attr, - &attr_core_siblings_list.attr, + &dev_attr_physical_package_id.attr, + &dev_attr_core_id.attr, + &dev_attr_thread_siblings.attr, + &dev_attr_thread_siblings_list.attr, + &dev_attr_core_siblings.attr, + &dev_attr_core_siblings_list.attr, #ifdef CONFIG_SCHED_BOOK - &attr_book_id.attr, - &attr_book_siblings.attr, - &attr_book_siblings_list.attr, + &dev_attr_book_id.attr, + &dev_attr_book_siblings.attr, + &dev_attr_book_siblings_list.attr, #endif NULL }; @@ -146,16 +145,16 @@ static struct attribute_group topology_attr_group = { /* Add/Remove cpu_topology interface for CPU device */ static int __cpuinit topology_add_dev(unsigned int cpu) { - struct sys_device *sys_dev = get_cpu_sysdev(cpu); + struct device *dev = get_cpu_device(cpu); - return sysfs_create_group(&sys_dev->kobj, &topology_attr_group); + return sysfs_create_group(&dev->kobj, &topology_attr_group); } static void __cpuinit topology_remove_dev(unsigned int cpu) { - struct sys_device *sys_dev = get_cpu_sysdev(cpu); + struct device *dev = get_cpu_device(cpu); - sysfs_remove_group(&sys_dev->kobj, &topology_attr_group); + sysfs_remove_group(&dev->kobj, &topology_attr_group); } static int __cpuinit topology_cpu_callback(struct notifier_block *nfb, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 987a165ede2..8c2df3499da 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -679,7 +679,7 @@ static struct kobj_type ktype_cpufreq = { */ static int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, - struct sys_device *sys_dev) + struct device *dev) { int ret = 0; #ifdef CONFIG_SMP @@ -728,7 +728,7 @@ static int cpufreq_add_dev_policy(unsigned int cpu, spin_unlock_irqrestore(&cpufreq_driver_lock, flags); pr_debug("CPU already managed, adding link\n"); - ret = sysfs_create_link(&sys_dev->kobj, + ret = sysfs_create_link(&dev->kobj, &managed_policy->kobj, "cpufreq"); if (ret) @@ -761,7 +761,7 @@ static int cpufreq_add_dev_symlink(unsigned int cpu, for_each_cpu(j, policy->cpus) { struct cpufreq_policy *managed_policy; - struct sys_device *cpu_sys_dev; + struct device *cpu_dev; if (j == cpu) continue; @@ -770,8 +770,8 @@ static int cpufreq_add_dev_symlink(unsigned int cpu, pr_debug("CPU %u already managed, adding link\n", j); managed_policy = cpufreq_cpu_get(cpu); - cpu_sys_dev = get_cpu_sysdev(j); - ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, + cpu_dev = get_cpu_device(j); + ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq"); if (ret) { cpufreq_cpu_put(managed_policy); @@ -783,7 +783,7 @@ static int cpufreq_add_dev_symlink(unsigned int cpu, static int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy, - struct sys_device *sys_dev) + struct device *dev) { struct cpufreq_policy new_policy; struct freq_attr **drv_attr; @@ -793,7 +793,7 @@ static int cpufreq_add_dev_interface(unsigned int cpu, /* prepare interface data */ ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, - &sys_dev->kobj, "cpufreq"); + &dev->kobj, "cpufreq"); if (ret) return ret; @@ -866,9 +866,9 @@ err_out_kobj_put: * with with cpu hotplugging and all hell will break loose. Tried to clean this * mess up, but more thorough testing is needed. - Mathieu */ -static int cpufreq_add_dev(struct sys_device *sys_dev) +static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { - unsigned int cpu = sys_dev->id; + unsigned int cpu = dev->id; int ret = 0, found = 0; struct cpufreq_policy *policy; unsigned long flags; @@ -947,7 +947,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); - ret = cpufreq_add_dev_policy(cpu, policy, sys_dev); + ret = cpufreq_add_dev_policy(cpu, policy, dev); if (ret) { if (ret > 0) /* This is a managed cpu, symlink created, @@ -956,7 +956,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) goto err_unlock_policy; } - ret = cpufreq_add_dev_interface(cpu, policy, sys_dev); + ret = cpufreq_add_dev_interface(cpu, policy, dev); if (ret) goto err_out_unregister; @@ -999,15 +999,15 @@ module_out: * Caller should already have policy_rwsem in write mode for this CPU. * This routine frees the rwsem before returning. */ -static int __cpufreq_remove_dev(struct sys_device *sys_dev) +static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { - unsigned int cpu = sys_dev->id; + unsigned int cpu = dev->id; unsigned long flags; struct cpufreq_policy *data; struct kobject *kobj; struct completion *cmp; #ifdef CONFIG_SMP - struct sys_device *cpu_sys_dev; + struct device *cpu_dev; unsigned int j; #endif @@ -1032,7 +1032,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) pr_debug("removing link\n"); cpumask_clear_cpu(cpu, data->cpus); spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - kobj = &sys_dev->kobj; + kobj = &dev->kobj; cpufreq_cpu_put(data); unlock_policy_rwsem_write(cpu); sysfs_remove_link(kobj, "cpufreq"); @@ -1071,8 +1071,8 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) strncpy(per_cpu(cpufreq_cpu_governor, j), data->governor->name, CPUFREQ_NAME_LEN); #endif - cpu_sys_dev = get_cpu_sysdev(j); - kobj = &cpu_sys_dev->kobj; + cpu_dev = get_cpu_device(j); + kobj = &cpu_dev->kobj; unlock_policy_rwsem_write(cpu); sysfs_remove_link(kobj, "cpufreq"); lock_policy_rwsem_write(cpu); @@ -1112,11 +1112,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) if (unlikely(cpumask_weight(data->cpus) > 1)) { /* first sibling now owns the new sysfs dir */ cpumask_clear_cpu(cpu, data->cpus); - cpufreq_add_dev(get_cpu_sysdev(cpumask_first(data->cpus))); + cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL); /* finally remove our own symlink */ lock_policy_rwsem_write(cpu); - __cpufreq_remove_dev(sys_dev); + __cpufreq_remove_dev(dev, sif); } #endif @@ -1128,9 +1128,9 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) } -static int cpufreq_remove_dev(struct sys_device *sys_dev) +static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { - unsigned int cpu = sys_dev->id; + unsigned int cpu = dev->id; int retval; if (cpu_is_offline(cpu)) @@ -1139,7 +1139,7 @@ static int cpufreq_remove_dev(struct sys_device *sys_dev) if (unlikely(lock_policy_rwsem_write(cpu))) BUG(); - retval = __cpufreq_remove_dev(sys_dev); + retval = __cpufreq_remove_dev(dev, sif); return retval; } @@ -1271,9 +1271,11 @@ out: } EXPORT_SYMBOL(cpufreq_get); -static struct sysdev_driver cpufreq_sysdev_driver = { - .add = cpufreq_add_dev, - .remove = cpufreq_remove_dev, +static struct subsys_interface cpufreq_interface = { + .name = "cpufreq", + .subsys = &cpu_subsys, + .add_dev = cpufreq_add_dev, + .remove_dev = cpufreq_remove_dev, }; @@ -1765,25 +1767,25 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; + struct device *dev; - sys_dev = get_cpu_sysdev(cpu); - if (sys_dev) { + dev = get_cpu_device(cpu); + if (dev) { switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: - cpufreq_add_dev(sys_dev); + cpufreq_add_dev(dev, NULL); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: if (unlikely(lock_policy_rwsem_write(cpu))) BUG(); - __cpufreq_remove_dev(sys_dev); + __cpufreq_remove_dev(dev, NULL); break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - cpufreq_add_dev(sys_dev); + cpufreq_add_dev(dev, NULL); break; } } @@ -1830,8 +1832,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) cpufreq_driver = driver_data; spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - ret = sysdev_driver_register(&cpu_sysdev_class, - &cpufreq_sysdev_driver); + ret = subsys_interface_register(&cpufreq_interface); if (ret) goto err_null_driver; @@ -1850,7 +1851,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) if (ret) { pr_debug("no CPU initialized for driver %s\n", driver_data->name); - goto err_sysdev_unreg; + goto err_if_unreg; } } @@ -1858,9 +1859,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) pr_debug("driver %s up and running\n", driver_data->name); return 0; -err_sysdev_unreg: - sysdev_driver_unregister(&cpu_sysdev_class, - &cpufreq_sysdev_driver); +err_if_unreg: + subsys_interface_unregister(&cpufreq_interface); err_null_driver: spin_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver = NULL; @@ -1887,7 +1887,7 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) pr_debug("unregistering driver %s\n", driver->name); - sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver); + subsys_interface_unregister(&cpufreq_interface); unregister_hotcpu_notifier(&cpufreq_cpu_notifier); spin_lock_irqsave(&cpufreq_driver_lock, flags); @@ -1907,8 +1907,7 @@ static int __init cpufreq_core_init(void) init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); } - cpufreq_global_kobject = kobject_create_and_add("cpufreq", - &cpu_sysdev_class.kset.kobj); + cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj); BUG_ON(!cpufreq_global_kobject); register_syscore_ops(&cpufreq_syscore_ops); diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index c5072a91e84..390380a8cfc 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -11,7 +11,6 @@ #include #include -#include #include #include #include diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 06ce2680d00..59f4261c753 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -291,10 +291,10 @@ EXPORT_SYMBOL_GPL(cpuidle_disable_device); static int __cpuidle_register_device(struct cpuidle_device *dev) { int ret; - struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu); + struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); - if (!sys_dev) + if (!dev) return -EINVAL; if (!try_module_get(cpuidle_driver->owner)) return -EINVAL; @@ -303,7 +303,7 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) per_cpu(cpuidle_devices, dev->cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); - if ((ret = cpuidle_add_sysfs(sys_dev))) { + if ((ret = cpuidle_add_sysfs(cpu_dev))) { module_put(cpuidle_driver->owner); return ret; } @@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device); */ void cpuidle_unregister_device(struct cpuidle_device *dev) { - struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu); + struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); if (dev->registered == 0) @@ -354,7 +354,7 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) cpuidle_disable_device(dev); - cpuidle_remove_sysfs(sys_dev); + cpuidle_remove_sysfs(cpu_dev); list_del(&dev->device_list); wait_for_completion(&dev->kobj_unregister); per_cpu(cpuidle_devices, dev->cpu) = NULL; @@ -411,7 +411,7 @@ static int __init cpuidle_init(void) if (cpuidle_disabled()) return -ENODEV; - ret = cpuidle_add_class_sysfs(&cpu_sysdev_class); + ret = cpuidle_add_interface(cpu_subsys.dev_root); if (ret) return ret; diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index 38c3fd8b9d7..7db186685c2 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -5,7 +5,7 @@ #ifndef __DRIVER_CPUIDLE_H #define __DRIVER_CPUIDLE_H -#include +#include /* For internal use only */ extern struct cpuidle_governor *cpuidle_curr_governor; @@ -23,11 +23,11 @@ extern void cpuidle_uninstall_idle_handler(void); extern int cpuidle_switch_governor(struct cpuidle_governor *gov); /* sysfs */ -extern int cpuidle_add_class_sysfs(struct sysdev_class *cls); -extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls); +extern int cpuidle_add_interface(struct device *dev); +extern void cpuidle_remove_interface(struct device *dev); extern int cpuidle_add_state_sysfs(struct cpuidle_device *device); extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device); -extern int cpuidle_add_sysfs(struct sys_device *sysdev); -extern void cpuidle_remove_sysfs(struct sys_device *sysdev); +extern int cpuidle_add_sysfs(struct device *dev); +extern void cpuidle_remove_sysfs(struct device *dev); #endif /* __DRIVER_CPUIDLE_H */ diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 1e756e160dc..3fe41fe4851 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -22,8 +22,8 @@ static int __init cpuidle_sysfs_setup(char *unused) } __setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup); -static ssize_t show_available_governors(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t show_available_governors(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t i = 0; @@ -42,8 +42,8 @@ out: return i; } -static ssize_t show_current_driver(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t show_current_driver(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t ret; @@ -59,8 +59,8 @@ static ssize_t show_current_driver(struct sysdev_class *class, return ret; } -static ssize_t show_current_governor(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t show_current_governor(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t ret; @@ -75,8 +75,8 @@ static ssize_t show_current_governor(struct sysdev_class *class, return ret; } -static ssize_t store_current_governor(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t store_current_governor(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { char gov_name[CPUIDLE_NAME_LEN]; @@ -109,50 +109,48 @@ static ssize_t store_current_governor(struct sysdev_class *class, return count; } -static SYSDEV_CLASS_ATTR(current_driver, 0444, show_current_driver, NULL); -static SYSDEV_CLASS_ATTR(current_governor_ro, 0444, show_current_governor, - NULL); +static DEVICE_ATTR(current_driver, 0444, show_current_driver, NULL); +static DEVICE_ATTR(current_governor_ro, 0444, show_current_governor, NULL); -static struct attribute *cpuclass_default_attrs[] = { - &attr_current_driver.attr, - &attr_current_governor_ro.attr, +static struct attribute *cpuidle_default_attrs[] = { + &dev_attr_current_driver.attr, + &dev_attr_current_governor_ro.attr, NULL }; -static SYSDEV_CLASS_ATTR(available_governors, 0444, show_available_governors, - NULL); -static SYSDEV_CLASS_ATTR(current_governor, 0644, show_current_governor, - store_current_governor); +static DEVICE_ATTR(available_governors, 0444, show_available_governors, NULL); +static DEVICE_ATTR(current_governor, 0644, show_current_governor, + store_current_governor); -static struct attribute *cpuclass_switch_attrs[] = { - &attr_available_governors.attr, - &attr_current_driver.attr, - &attr_current_governor.attr, +static struct attribute *cpuidle_switch_attrs[] = { + &dev_attr_available_governors.attr, + &dev_attr_current_driver.attr, + &dev_attr_current_governor.attr, NULL }; -static struct attribute_group cpuclass_attr_group = { - .attrs = cpuclass_default_attrs, +static struct attribute_group cpuidle_attr_group = { + .attrs = cpuidle_default_attrs, .name = "cpuidle", }; /** - * cpuidle_add_class_sysfs - add CPU global sysfs attributes + * cpuidle_add_interface - add CPU global sysfs attributes */ -int cpuidle_add_class_sysfs(struct sysdev_class *cls) +int cpuidle_add_interface(struct device *dev) { if (sysfs_switch) - cpuclass_attr_group.attrs = cpuclass_switch_attrs; + cpuidle_attr_group.attrs = cpuidle_switch_attrs; - return sysfs_create_group(&cls->kset.kobj, &cpuclass_attr_group); + return sysfs_create_group(&dev->kobj, &cpuidle_attr_group); } /** - * cpuidle_remove_class_sysfs - remove CPU global sysfs attributes + * cpuidle_remove_interface - remove CPU global sysfs attributes */ -void cpuidle_remove_class_sysfs(struct sysdev_class *cls) +void cpuidle_remove_interface(struct device *dev) { - sysfs_remove_group(&cls->kset.kobj, &cpuclass_attr_group); + sysfs_remove_group(&dev->kobj, &cpuidle_attr_group); } struct cpuidle_attr { @@ -365,16 +363,16 @@ void cpuidle_remove_state_sysfs(struct cpuidle_device *device) /** * cpuidle_add_sysfs - creates a sysfs instance for the target device - * @sysdev: the target device + * @dev: the target device */ -int cpuidle_add_sysfs(struct sys_device *sysdev) +int cpuidle_add_sysfs(struct device *cpu_dev) { - int cpu = sysdev->id; + int cpu = cpu_dev->id; struct cpuidle_device *dev; int error; dev = per_cpu(cpuidle_devices, cpu); - error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &sysdev->kobj, + error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj, "cpuidle"); if (!error) kobject_uevent(&dev->kobj, KOBJ_ADD); @@ -383,11 +381,11 @@ int cpuidle_add_sysfs(struct sys_device *sysdev) /** * cpuidle_remove_sysfs - deletes a sysfs instance on the target device - * @sysdev: the target device + * @dev: the target device */ -void cpuidle_remove_sysfs(struct sys_device *sysdev) +void cpuidle_remove_sysfs(struct device *cpu_dev) { - int cpu = sysdev->id; + int cpu = cpu_dev->id; struct cpuidle_device *dev; dev = per_cpu(cpuidle_devices, cpu); diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 95b909ac2b7..3c03c1060be 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include @@ -31,14 +31,14 @@ static struct work_struct sclp_cpu_change_work; static void sclp_cpu_capability_notify(struct work_struct *work) { int cpu; - struct sys_device *sysdev; + struct device *dev; s390_adjust_jiffies(); pr_warning("cpu capability changed.\n"); get_online_cpus(); for_each_online_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); + dev = get_cpu_device(cpu); + kobject_uevent(&dev->kobj, KOBJ_CHANGE); } put_online_cpus(); } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6cb60fd2ea8..fc3da0d70d6 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -14,7 +14,7 @@ #ifndef _LINUX_CPU_H_ #define _LINUX_CPU_H_ -#include +#include #include #include #include @@ -22,19 +22,19 @@ struct cpu { int node_id; /* The node which contains the CPU */ int hotpluggable; /* creates sysfs control file if hotpluggable */ - struct sys_device sysdev; + struct device dev; }; extern int register_cpu(struct cpu *cpu, int num); -extern struct sys_device *get_cpu_sysdev(unsigned cpu); +extern struct device *get_cpu_device(unsigned cpu); -extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr); -extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr); +extern int cpu_add_dev_attr(struct device_attribute *attr); +extern void cpu_remove_dev_attr(struct device_attribute *attr); -extern int cpu_add_sysdev_attr_group(struct attribute_group *attrs); -extern void cpu_remove_sysdev_attr_group(struct attribute_group *attrs); +extern int cpu_add_dev_attr_group(struct attribute_group *attrs); +extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); -extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls); +extern int sched_create_sysfs_power_savings_entries(struct device *dev); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); @@ -160,7 +160,7 @@ static inline void cpu_maps_update_done(void) } #endif /* CONFIG_SMP */ -extern struct sysdev_class cpu_sysdev_class; +extern struct bus_type cpu_subsys; #ifdef CONFIG_HOTPLUG_CPU /* Stop CPUs going up and down. */ diff --git a/kernel/sched.c b/kernel/sched.c index 0e9344a71be..53077264644 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7923,54 +7923,52 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) } #ifdef CONFIG_SCHED_MC -static ssize_t sched_mc_power_savings_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - char *page) +static ssize_t sched_mc_power_savings_show(struct device *dev, + struct device_attribute *attr, + char *buf) { - return sprintf(page, "%u\n", sched_mc_power_savings); + return sprintf(buf, "%u\n", sched_mc_power_savings); } -static ssize_t sched_mc_power_savings_store(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t sched_mc_power_savings_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { return sched_power_savings_store(buf, count, 0); } -static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644, - sched_mc_power_savings_show, - sched_mc_power_savings_store); +static DEVICE_ATTR(sched_mc_power_savings, 0644, + sched_mc_power_savings_show, + sched_mc_power_savings_store); #endif #ifdef CONFIG_SCHED_SMT -static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, - char *page) +static ssize_t sched_smt_power_savings_show(struct device *dev, + struct device_attribute *attr, + char *buf) { - return sprintf(page, "%u\n", sched_smt_power_savings); + return sprintf(buf, "%u\n", sched_smt_power_savings); } -static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, +static ssize_t sched_smt_power_savings_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { return sched_power_savings_store(buf, count, 1); } -static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644, +static DEVICE_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, sched_smt_power_savings_store); #endif -int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) +int __init sched_create_sysfs_power_savings_entries(struct device *dev) { int err = 0; #ifdef CONFIG_SCHED_SMT if (smt_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_smt_power_savings.attr); + err = device_create_file(dev, &dev_attr_sched_smt_power_savings); #endif #ifdef CONFIG_SCHED_MC if (!err && mc_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_mc_power_savings.attr); + err = device_create_file(dev, &dev_attr_sched_mc_power_savings); #endif return err; } -- cgit v1.2.3-70-g09d2 From edbaa603eb801655e80808a9cf3d3b622e8ac66b Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 21 Dec 2011 16:26:03 -0800 Subject: driver-core: remove sysdev.h usage. The sysdev.h file should not be needed by any in-kernel code, so remove the .h file from these random files that seem to still want to include it. The sysdev code will be going away soon, so this include needs to be removed no matter what. Cc: Jiandong Zheng Cc: Scott Branden Cc: Russell King Cc: Kukjin Kim Cc: David Brown Cc: Daniel Walker Cc: Bryan Huntsman Cc: Ben Dooks Cc: Wan ZongShun Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Guan Xuetao Cc: "Venkatesh Pallipadi Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Grant Likely Cc: Richard Purdie Cc: Matthew Garrett Signed-off-by: Kay Sievers --- arch/arm/mach-bcmring/core.c | 1 - arch/arm/mach-exynos/irq-eint.c | 2 +- arch/arm/mach-integrator/integrator_cp.c | 2 +- arch/arm/mach-ks8695/irq.c | 2 +- arch/arm/mach-lpc32xx/phy3250.c | 2 +- arch/arm/mach-msm/board-sapphire.c | 2 +- arch/arm/mach-realview/core.c | 2 +- arch/arm/mach-realview/realview_eb.c | 2 +- arch/arm/mach-realview/realview_pb1176.c | 2 +- arch/arm/mach-realview/realview_pb11mp.c | 2 +- arch/arm/mach-realview/realview_pba8.c | 2 +- arch/arm/mach-realview/realview_pbx.c | 2 +- arch/arm/mach-s3c2410/bast-irq.c | 2 +- arch/arm/mach-s3c2410/mach-h1940.c | 2 +- arch/arm/mach-s3c2410/mach-qt2410.c | 2 +- arch/arm/mach-s3c2412/clock.c | 2 +- arch/arm/mach-s3c2440/mach-rx1950.c | 2 +- arch/arm/mach-s3c2440/mach-rx3715.c | 2 +- arch/arm/mach-s3c2443/clock.c | 2 +- arch/arm/mach-s3c64xx/irq-eint.c | 2 +- arch/arm/mach-s5p64x0/clock-s5p6440.c | 2 +- arch/arm/mach-s5p64x0/clock-s5p6450.c | 2 +- arch/arm/mach-s5p64x0/clock.c | 2 +- arch/arm/mach-s5pv210/clock.c | 2 +- arch/arm/mach-s5pv210/mach-smdkc110.c | 2 +- arch/arm/mach-s5pv210/mach-smdkv210.c | 2 +- arch/arm/mach-versatile/core.c | 1 - arch/arm/mach-versatile/versatile_ab.c | 1 - arch/arm/mach-versatile/versatile_pb.c | 1 - arch/arm/mach-vexpress/v2m.c | 2 +- arch/arm/mach-w90x900/irq.c | 2 +- arch/arm/plat-s3c24xx/common-smdk.c | 2 +- arch/arm/plat-s3c24xx/cpu-freq.c | 2 +- arch/arm/plat-s3c24xx/irq.c | 2 +- arch/arm/plat-s3c24xx/pm-simtec.c | 1 - arch/arm/plat-s3c24xx/s3c2410-clock.c | 2 +- arch/arm/plat-s3c24xx/s3c2412-iotiming.c | 2 +- arch/arm/plat-s5p/clock.c | 2 +- arch/arm/plat-s5p/irq-eint.c | 2 +- arch/arm/plat-samsung/clock-clksrc.c | 2 +- arch/arm/plat-samsung/clock.c | 2 +- arch/arm/plat-samsung/pm-gpio.c | 2 +- arch/arm/plat-samsung/wakeup-mask.c | 2 +- arch/avr32/boards/merisc/merisc_sysfs.c | 1 - arch/avr32/kernel/irq.c | 2 +- arch/mips/txx9/generic/setup_tx4939.c | 2 +- arch/powerpc/platforms/cell/smp.c | 2 +- arch/powerpc/platforms/iseries/smp.c | 2 +- arch/powerpc/platforms/powermac/cpufreq_32.c | 2 +- arch/powerpc/platforms/pseries/smp.c | 2 +- arch/powerpc/sysdev/uic.c | 1 - arch/unicore32/kernel/puv3-core.c | 1 - arch/unicore32/kernel/puv3-nb0916.c | 1 - arch/x86/kernel/hpet.c | 1 - arch/x86/kernel/irqinit.c | 2 +- arch/x86/platform/uv/uv_sysfs.c | 2 +- drivers/gpio/gpio-samsung.c | 2 +- drivers/leds/led-class.c | 1 - drivers/leds/led-triggers.c | 1 - drivers/macintosh/smu.c | 4 ---- drivers/net/bonding/bond_sysfs.c | 1 - drivers/platform/x86/intel_scu_ipc.c | 2 +- drivers/s390/block/xpram.c | 2 +- 63 files changed, 49 insertions(+), 66 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/arm/mach-bcmring/core.c b/arch/arm/mach-bcmring/core.c index 43eadbcc29e..31d84ae44c4 100644 --- a/arch/arm/mach-bcmring/core.c +++ b/arch/arm/mach-bcmring/core.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-exynos/irq-eint.c b/arch/arm/mach-exynos/irq-eint.c index badb8c66fc9..fe461901d60 100644 --- a/arch/arm/mach-exynos/irq-eint.c +++ b/arch/arm/mach-exynos/irq-eint.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 5de49c33e4d..208545c53da 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-ks8695/irq.c b/arch/arm/mach-ks8695/irq.c index a78092dcd6f..76802aac0f4 100644 --- a/arch/arm/mach-ks8695/irq.c +++ b/arch/arm/mach-ks8695/irq.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c index 6d2f0d1b937..21f3d4b9ee8 100644 --- a/arch/arm/mach-lpc32xx/phy3250.c +++ b/arch/arm/mach-lpc32xx/phy3250.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-msm/board-sapphire.c b/arch/arm/mach-msm/board-sapphire.c index 32b465763db..97b8191d9d3 100644 --- a/arch/arm/mach-msm/board-sapphire.c +++ b/arch/arm/mach-msm/board-sapphire.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index d5ed5d4f77d..acd329afc3a 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c index 026c66ad7ec..af608f76e0a 100644 --- a/arch/arm/mach-realview/realview_eb.c +++ b/arch/arm/mach-realview/realview_eb.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c index c057540ec77..510424669d0 100644 --- a/arch/arm/mach-realview/realview_pb1176.c +++ b/arch/arm/mach-realview/realview_pb1176.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c index 671ad6d6ff0..70d1bbdf2a3 100644 --- a/arch/arm/mach-realview/realview_pb11mp.c +++ b/arch/arm/mach-realview/realview_pb11mp.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c index cbf22df4ad5..b841fc0a75d 100644 --- a/arch/arm/mach-realview/realview_pba8.c +++ b/arch/arm/mach-realview/realview_pba8.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c index 63c4114afae..e102120fc6a 100644 --- a/arch/arm/mach-realview/realview_pbx.c +++ b/arch/arm/mach-realview/realview_pbx.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c2410/bast-irq.c b/arch/arm/mach-s3c2410/bast-irq.c index bc53d2d16d1..ac7b2ad5c40 100644 --- a/arch/arm/mach-s3c2410/bast-irq.c +++ b/arch/arm/mach-s3c2410/bast-irq.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c index 05a7d16e59f..837a2d6f352 100644 --- a/arch/arm/mach-s3c2410/mach-h1940.c +++ b/arch/arm/mach-s3c2410/mach-h1940.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c2410/mach-qt2410.c b/arch/arm/mach-s3c2410/mach-qt2410.c index 45185215625..6f69789943d 100644 --- a/arch/arm/mach-s3c2410/mach-qt2410.c +++ b/arch/arm/mach-s3c2410/mach-qt2410.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c2412/clock.c b/arch/arm/mach-s3c2412/clock.c index 140711db6c8..51688164080 100644 --- a/arch/arm/mach-s3c2412/clock.c +++ b/arch/arm/mach-s3c2412/clock.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c2440/mach-rx1950.c b/arch/arm/mach-s3c2440/mach-rx1950.c index 0d3453bf567..2078c1fe8e3 100644 --- a/arch/arm/mach-s3c2440/mach-rx1950.c +++ b/arch/arm/mach-s3c2440/mach-rx1950.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c2440/mach-rx3715.c b/arch/arm/mach-s3c2440/mach-rx3715.c index e19499c2f90..4c2f553fbd5 100644 --- a/arch/arm/mach-s3c2440/mach-rx3715.c +++ b/arch/arm/mach-s3c2440/mach-rx3715.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c2443/clock.c b/arch/arm/mach-s3c2443/clock.c index 1c2c088aa2e..6dde2696f8f 100644 --- a/arch/arm/mach-s3c2443/clock.c +++ b/arch/arm/mach-s3c2443/clock.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c64xx/irq-eint.c b/arch/arm/mach-s3c64xx/irq-eint.c index 4d203be1f4c..e3e75d18603 100644 --- a/arch/arm/mach-s3c64xx/irq-eint.c +++ b/arch/arm/mach-s3c64xx/irq-eint.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s5p64x0/clock-s5p6440.c b/arch/arm/mach-s5p64x0/clock-s5p6440.c index c54c65d511f..8a293c3e394 100644 --- a/arch/arm/mach-s5p64x0/clock-s5p6440.c +++ b/arch/arm/mach-s5p64x0/clock-s5p6440.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s5p64x0/clock-s5p6450.c b/arch/arm/mach-s5p64x0/clock-s5p6450.c index 2d04abfba12..0277b483b9b 100644 --- a/arch/arm/mach-s5p64x0/clock-s5p6450.c +++ b/arch/arm/mach-s5p64x0/clock-s5p6450.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s5p64x0/clock.c b/arch/arm/mach-s5p64x0/clock.c index b52c6e2f37a..041f91e5b42 100644 --- a/arch/arm/mach-s5p64x0/clock.c +++ b/arch/arm/mach-s5p64x0/clock.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s5pv210/clock.c b/arch/arm/mach-s5pv210/clock.c index 4c5ac7a69e9..6a1e5876bb8 100644 --- a/arch/arm/mach-s5pv210/clock.c +++ b/arch/arm/mach-s5pv210/clock.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s5pv210/mach-smdkc110.c b/arch/arm/mach-s5pv210/mach-smdkc110.c index f7266bb0cac..05cf7dc6128 100644 --- a/arch/arm/mach-s5pv210/mach-smdkc110.c +++ b/arch/arm/mach-s5pv210/mach-smdkc110.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s5pv210/mach-smdkv210.c b/arch/arm/mach-s5pv210/mach-smdkv210.c index a9106c39239..7d4d98916d6 100644 --- a/arch/arm/mach-s5pv210/mach-smdkv210.c +++ b/arch/arm/mach-s5pv210/mach-smdkv210.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index e340a54251d..3a4893b82ad 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-versatile/versatile_ab.c b/arch/arm/mach-versatile/versatile_ab.c index fda4866703c..07b9122ff84 100644 --- a/arch/arm/mach-versatile/versatile_ab.c +++ b/arch/arm/mach-versatile/versatile_ab.c @@ -21,7 +21,6 @@ #include #include -#include #include #include diff --git a/arch/arm/mach-versatile/versatile_pb.c b/arch/arm/mach-versatile/versatile_pb.c index feaf9cbe60f..8d8b80e79e0 100644 --- a/arch/arm/mach-versatile/versatile_pb.c +++ b/arch/arm/mach-versatile/versatile_pb.c @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 1fafc324460..de5a2384284 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-w90x900/irq.c b/arch/arm/mach-w90x900/irq.c index 7bf143c443f..f47e29586a0 100644 --- a/arch/arm/mach-w90x900/irq.c +++ b/arch/arm/mach-w90x900/irq.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/plat-s3c24xx/common-smdk.c b/arch/arm/plat-s3c24xx/common-smdk.c index bcc43f34627..084604be6ad 100644 --- a/arch/arm/plat-s3c24xx/common-smdk.c +++ b/arch/arm/plat-s3c24xx/common-smdk.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/plat-s3c24xx/cpu-freq.c b/arch/arm/plat-s3c24xx/cpu-freq.c index b3d3d027899..46807993888 100644 --- a/arch/arm/plat-s3c24xx/cpu-freq.c +++ b/arch/arm/plat-s3c24xx/cpu-freq.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/plat-s3c24xx/irq.c b/arch/arm/plat-s3c24xx/irq.c index fc8c5f89954..bc42c04091f 100644 --- a/arch/arm/plat-s3c24xx/irq.c +++ b/arch/arm/plat-s3c24xx/irq.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/plat-s3c24xx/pm-simtec.c b/arch/arm/plat-s3c24xx/pm-simtec.c index 663b280d65d..68296b1fe7e 100644 --- a/arch/arm/plat-s3c24xx/pm-simtec.c +++ b/arch/arm/plat-s3c24xx/pm-simtec.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/plat-s3c24xx/s3c2410-clock.c b/arch/arm/plat-s3c24xx/s3c2410-clock.c index def76aa3825..25dc4d4397b 100644 --- a/arch/arm/plat-s3c24xx/s3c2410-clock.c +++ b/arch/arm/plat-s3c24xx/s3c2410-clock.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/plat-s3c24xx/s3c2412-iotiming.c b/arch/arm/plat-s3c24xx/s3c2412-iotiming.c index 0b46d3895d6..48eee39ab36 100644 --- a/arch/arm/plat-s3c24xx/s3c2412-iotiming.c +++ b/arch/arm/plat-s3c24xx/s3c2412-iotiming.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/plat-s5p/clock.c b/arch/arm/plat-s5p/clock.c index 5f84a3f13ef..963edea7f7e 100644 --- a/arch/arm/plat-s5p/clock.c +++ b/arch/arm/plat-s5p/clock.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/plat-s5p/irq-eint.c b/arch/arm/plat-s5p/irq-eint.c index b5bb774985b..c496b359c37 100644 --- a/arch/arm/plat-s5p/irq-eint.c +++ b/arch/arm/plat-s5p/irq-eint.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/plat-samsung/clock-clksrc.c b/arch/arm/plat-samsung/clock-clksrc.c index ae8b8507663..786a4107a15 100644 --- a/arch/arm/plat-samsung/clock-clksrc.c +++ b/arch/arm/plat-samsung/clock-clksrc.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/plat-samsung/clock.c b/arch/arm/plat-samsung/clock.c index 3b4451979d1..10f71179071 100644 --- a/arch/arm/plat-samsung/clock.c +++ b/arch/arm/plat-samsung/clock.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/plat-samsung/pm-gpio.c b/arch/arm/plat-samsung/pm-gpio.c index 4be016eaa6d..c2ff92c30bd 100644 --- a/arch/arm/plat-samsung/pm-gpio.c +++ b/arch/arm/plat-samsung/pm-gpio.c @@ -14,7 +14,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/arm/plat-samsung/wakeup-mask.c b/arch/arm/plat-samsung/wakeup-mask.c index dc814037297..20c3d9117cc 100644 --- a/arch/arm/plat-samsung/wakeup-mask.c +++ b/arch/arm/plat-samsung/wakeup-mask.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/avr32/boards/merisc/merisc_sysfs.c b/arch/avr32/boards/merisc/merisc_sysfs.c index df431fdba9a..5a252318f4b 100644 --- a/arch/avr32/boards/merisc/merisc_sysfs.c +++ b/arch/avr32/boards/merisc/merisc_sysfs.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c index bc3aa18293d..900e49b2258 100644 --- a/arch/avr32/kernel/irq.c +++ b/arch/avr32/kernel/irq.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include /* May be overridden by platform code */ int __weak nmi_enable(void) diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c index ba3cec3155d..6567895d1f5 100644 --- a/arch/mips/txx9/generic/setup_tx4939.c +++ b/arch/mips/txx9/generic/setup_tx4939.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index f5c5c762d5a..4a255cf8cd1 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c index 7e2a5515ed7..02df49fb59f 100644 --- a/arch/powerpc/platforms/iseries/smp.c +++ b/arch/powerpc/platforms/iseries/smp.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c index 04af5f48b4e..1fc386a23f1 100644 --- a/arch/powerpc/platforms/powermac/cpufreq_32.c +++ b/arch/powerpc/platforms/powermac/cpufreq_32.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 26e93fd4c62..6212ff4693e 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 3330feca750..063c901b126 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/unicore32/kernel/puv3-core.c b/arch/unicore32/kernel/puv3-core.c index 1a505a78776..254adeecc61 100644 --- a/arch/unicore32/kernel/puv3-core.c +++ b/arch/unicore32/kernel/puv3-core.c @@ -13,7 +13,6 @@ #include #include -#include #include #include #include diff --git a/arch/unicore32/kernel/puv3-nb0916.c b/arch/unicore32/kernel/puv3-nb0916.c index e731c561ed4..37b12a06b49 100644 --- a/arch/unicore32/kernel/puv3-nb0916.c +++ b/arch/unicore32/kernel/puv3-nb0916.c @@ -13,7 +13,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index b946a9eac7d..98094a9580f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index b3300e6bace..313fb5cddbc 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c index 309c70fb775..5d4ba301e77 100644 --- a/arch/x86/platform/uv/uv_sysfs.c +++ b/arch/x86/platform/uv/uv_sysfs.c @@ -19,7 +19,7 @@ * Copyright (c) Russ Anderson */ -#include +#include #include #include diff --git a/drivers/gpio/gpio-samsung.c b/drivers/gpio/gpio-samsung.c index 86625185271..dcbe4541fe4 100644 --- a/drivers/gpio/gpio-samsung.c +++ b/drivers/gpio/gpio-samsung.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 661b692573e..cc068d3a324 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index 6f1ff93d7ce..46b4c766335 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 116a49ce74b..54ac7ffacb4 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -681,9 +680,6 @@ static struct platform_driver smu_of_platform_driver = static int __init smu_init_sysfs(void) { /* - * Due to sysfs bogosity, a sysdev is not a real device, so - * we should in fact create both if we want sysdev semantics - * for power management. * For now, we don't power manage machines with an SMU chip, * I'm a bit too far from figuring out how that works with those * new chipsets, but that will come back and bite us diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 5a20804fdec..549742f23ad 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 48870e50423..f00d0d1e065 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 98f3e4ade92..690c3338a8a 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -36,7 +36,7 @@ #include #include #include /* HDIO_GETGEO */ -#include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From 5202397df819d3c5a3f201bd4af6b86542115fb6 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Tue, 1 Nov 2011 17:28:47 -0700 Subject: KVM guest: remove KVM guest pv mmu support This has not been used for some years now. It's time to remove it. Signed-off-by: Chris Wright Signed-off-by: Avi Kivity --- arch/x86/kernel/kvm.c | 181 -------------------------------------------------- 1 file changed, 181 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a9c2116001d..f0c6fd6f176 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -39,8 +39,6 @@ #include #include -#define MMU_QUEUE_SIZE 1024 - static int kvmapf = 1; static int parse_no_kvmapf(char *arg) @@ -60,21 +58,10 @@ static int parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); -struct kvm_para_state { - u8 mmu_queue[MMU_QUEUE_SIZE]; - int mmu_queue_len; -}; - -static DEFINE_PER_CPU(struct kvm_para_state, para_state); static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); static int has_steal_clock = 0; -static struct kvm_para_state *kvm_para_state(void) -{ - return &per_cpu(para_state, raw_smp_processor_id()); -} - /* * No need for any "IO delay" on KVM */ @@ -271,151 +258,6 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) } } -static void kvm_mmu_op(void *buffer, unsigned len) -{ - int r; - unsigned long a1, a2; - - do { - a1 = __pa(buffer); - a2 = 0; /* on i386 __pa() always returns <4G */ - r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); - buffer += r; - len -= r; - } while (len); -} - -static void mmu_queue_flush(struct kvm_para_state *state) -{ - if (state->mmu_queue_len) { - kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); - state->mmu_queue_len = 0; - } -} - -static void kvm_deferred_mmu_op(void *buffer, int len) -{ - struct kvm_para_state *state = kvm_para_state(); - - if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) { - kvm_mmu_op(buffer, len); - return; - } - if (state->mmu_queue_len + len > sizeof state->mmu_queue) - mmu_queue_flush(state); - memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); - state->mmu_queue_len += len; -} - -static void kvm_mmu_write(void *dest, u64 val) -{ - __u64 pte_phys; - struct kvm_mmu_op_write_pte wpte; - -#ifdef CONFIG_HIGHPTE - struct page *page; - unsigned long dst = (unsigned long) dest; - - page = kmap_atomic_to_page(dest); - pte_phys = page_to_pfn(page); - pte_phys <<= PAGE_SHIFT; - pte_phys += (dst & ~(PAGE_MASK)); -#else - pte_phys = (unsigned long)__pa(dest); -#endif - wpte.header.op = KVM_MMU_OP_WRITE_PTE; - wpte.pte_val = val; - wpte.pte_phys = pte_phys; - - kvm_deferred_mmu_op(&wpte, sizeof wpte); -} - -/* - * We only need to hook operations that are MMU writes. We hook these so that - * we can use lazy MMU mode to batch these operations. We could probably - * improve the performance of the host code if we used some of the information - * here to simplify processing of batched writes. - */ -static void kvm_set_pte(pte_t *ptep, pte_t pte) -{ - kvm_mmu_write(ptep, pte_val(pte)); -} - -static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - kvm_mmu_write(ptep, pte_val(pte)); -} - -static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) -{ - kvm_mmu_write(pmdp, pmd_val(pmd)); -} - -#if PAGETABLE_LEVELS >= 3 -#ifdef CONFIG_X86_PAE -static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) -{ - kvm_mmu_write(ptep, pte_val(pte)); -} - -static void kvm_pte_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - kvm_mmu_write(ptep, 0); -} - -static void kvm_pmd_clear(pmd_t *pmdp) -{ - kvm_mmu_write(pmdp, 0); -} -#endif - -static void kvm_set_pud(pud_t *pudp, pud_t pud) -{ - kvm_mmu_write(pudp, pud_val(pud)); -} - -#if PAGETABLE_LEVELS == 4 -static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) -{ - kvm_mmu_write(pgdp, pgd_val(pgd)); -} -#endif -#endif /* PAGETABLE_LEVELS >= 3 */ - -static void kvm_flush_tlb(void) -{ - struct kvm_mmu_op_flush_tlb ftlb = { - .header.op = KVM_MMU_OP_FLUSH_TLB, - }; - - kvm_deferred_mmu_op(&ftlb, sizeof ftlb); -} - -static void kvm_release_pt(unsigned long pfn) -{ - struct kvm_mmu_op_release_pt rpt = { - .header.op = KVM_MMU_OP_RELEASE_PT, - .pt_phys = (u64)pfn << PAGE_SHIFT, - }; - - kvm_mmu_op(&rpt, sizeof rpt); -} - -static void kvm_enter_lazy_mmu(void) -{ - paravirt_enter_lazy_mmu(); -} - -static void kvm_leave_lazy_mmu(void) -{ - struct kvm_para_state *state = kvm_para_state(); - - mmu_queue_flush(state); - paravirt_leave_lazy_mmu(); -} - static void __init paravirt_ops_setup(void) { pv_info.name = "KVM"; @@ -424,29 +266,6 @@ static void __init paravirt_ops_setup(void) if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; - if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { - pv_mmu_ops.set_pte = kvm_set_pte; - pv_mmu_ops.set_pte_at = kvm_set_pte_at; - pv_mmu_ops.set_pmd = kvm_set_pmd; -#if PAGETABLE_LEVELS >= 3 -#ifdef CONFIG_X86_PAE - pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; - pv_mmu_ops.pte_clear = kvm_pte_clear; - pv_mmu_ops.pmd_clear = kvm_pmd_clear; -#endif - pv_mmu_ops.set_pud = kvm_set_pud; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.set_pgd = kvm_set_pgd; -#endif -#endif - pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; - pv_mmu_ops.release_pte = kvm_release_pt; - pv_mmu_ops.release_pmd = kvm_release_pt; - pv_mmu_ops.release_pud = kvm_release_pt; - - pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; - pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; - } #ifdef CONFIG_X86_IO_APIC no_timer_check = 1; #endif -- cgit v1.2.3-70-g09d2 From 2c9ede55ecec58099b72e4bb8eab719f32f72c31 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 20:24:48 -0400 Subject: switch device_get_devnode() and ->devnode() to umode_t * both callers of device_get_devnode() are only interested in lower 16bits and nobody tries to return anything wider than 16bit anyway. Signed-off-by: Al Viro --- arch/x86/kernel/cpuid.c | 2 +- arch/x86/kernel/msr.c | 2 +- block/bsg.c | 2 +- block/genhd.c | 2 +- drivers/base/core.c | 4 ++-- drivers/base/devtmpfs.c | 2 +- drivers/block/aoe/aoechr.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/char/mem.c | 4 ++-- drivers/char/misc.c | 2 +- drivers/char/raw.c | 2 +- drivers/char/tile-srom.c | 2 +- drivers/gpu/drm/drm_sysfs.c | 2 +- drivers/hid/usbhid/hiddev.c | 2 +- drivers/infiniband/core/cm.c | 2 +- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_main.c | 2 +- drivers/input/input.c | 2 +- drivers/media/dvb/ddbridge/ddbridge-core.c | 2 +- drivers/media/dvb/dvb-core/dvbdev.c | 2 +- drivers/media/rc/rc-main.c | 2 +- drivers/tty/tty_io.c | 2 +- drivers/usb/class/usblp.c | 2 +- drivers/usb/core/file.c | 2 +- drivers/usb/core/usb.c | 2 +- drivers/usb/misc/iowarrior.c | 2 +- drivers/usb/misc/legousbtower.c | 2 +- include/linux/device.h | 6 +++--- include/linux/genhd.h | 2 +- include/linux/usb.h | 2 +- sound/sound_core.c | 2 +- 31 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 212a6a42527..a524353d93f 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -177,7 +177,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier = .notifier_call = cpuid_class_cpu_callback, }; -static char *cpuid_devnode(struct device *dev, mode_t *mode) +static char *cpuid_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 12fcbe2c143..96356762a51 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -236,7 +236,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = { .notifier_call = msr_class_cpu_callback, }; -static char *msr_devnode(struct device *dev, mode_t *mode) +static char *msr_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); } diff --git a/block/bsg.c b/block/bsg.c index 702f1316bb8..9651ec7b87c 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -1070,7 +1070,7 @@ EXPORT_SYMBOL_GPL(bsg_register_queue); static struct cdev bsg_cdev; -static char *bsg_devnode(struct device *dev, mode_t *mode) +static char *bsg_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev)); } diff --git a/block/genhd.c b/block/genhd.c index 02e9fca8082..80578f3176e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1109,7 +1109,7 @@ struct class block_class = { .name = "block", }; -static char *block_devnode(struct device *dev, mode_t *mode) +static char *block_devnode(struct device *dev, umode_t *mode) { struct gendisk *disk = dev_to_disk(dev); diff --git a/drivers/base/core.c b/drivers/base/core.c index 919daa7cd5b..1dfa1d616fa 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -198,7 +198,7 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj, if (MAJOR(dev->devt)) { const char *tmp; const char *name; - mode_t mode = 0; + umode_t mode = 0; add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt)); add_uevent_var(env, "MINOR=%u", MINOR(dev->devt)); @@ -1182,7 +1182,7 @@ static struct device *next_device(struct klist_iter *i) * freed by the caller. */ const char *device_get_devnode(struct device *dev, - mode_t *mode, const char **tmp) + umode_t *mode, const char **tmp) { char *s; diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index a4760e095ff..3990f682e69 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -40,7 +40,7 @@ static struct req { struct completion done; int err; const char *name; - mode_t mode; /* 0 => delete */ + umode_t mode; /* 0 => delete */ struct device *dev; } *requests; diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index 5f8e39c43ae..e86d2062a16 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -270,7 +270,7 @@ static const struct file_operations aoe_fops = { .llseek = noop_llseek, }; -static char *aoe_devnode(struct device *dev, mode_t *mode) +static char *aoe_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "etherd/%s", dev_name(dev)); } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index a63b0a2b780..d59edeabd93 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2817,7 +2817,7 @@ static const struct block_device_operations pktcdvd_ops = { .check_events = pkt_check_events, }; -static char *pktcdvd_devnode(struct gendisk *gd, mode_t *mode) +static char *pktcdvd_devnode(struct gendisk *gd, umode_t *mode) { return kasprintf(GFP_KERNEL, "pktcdvd/%s", gd->disk_name); } diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 14517903371..d6e9d081c8b 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -847,7 +847,7 @@ static const struct file_operations kmsg_fops = { static const struct memdev { const char *name; - mode_t mode; + umode_t mode; const struct file_operations *fops; struct backing_dev_info *dev_info; } devlist[] = { @@ -901,7 +901,7 @@ static const struct file_operations memory_fops = { .llseek = noop_llseek, }; -static char *mem_devnode(struct device *dev, mode_t *mode) +static char *mem_devnode(struct device *dev, umode_t *mode) { if (mode && devlist[MINOR(dev->devt)].mode) *mode = devlist[MINOR(dev->devt)].mode; diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 778273c9324..522136d4084 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -258,7 +258,7 @@ int misc_deregister(struct miscdevice *misc) EXPORT_SYMBOL(misc_register); EXPORT_SYMBOL(misc_deregister); -static char *misc_devnode(struct device *dev, mode_t *mode) +static char *misc_devnode(struct device *dev, umode_t *mode) { struct miscdevice *c = dev_get_drvdata(dev); diff --git a/drivers/char/raw.c b/drivers/char/raw.c index b6de2c04714..54a3a6d0981 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -308,7 +308,7 @@ static const struct file_operations raw_ctl_fops = { static struct cdev raw_cdev; -static char *raw_devnode(struct device *dev, mode_t *mode) +static char *raw_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "raw/%s", dev_name(dev)); } diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c index cf3ee008dca..4dc019408fa 100644 --- a/drivers/char/tile-srom.c +++ b/drivers/char/tile-srom.c @@ -329,7 +329,7 @@ static struct device_attribute srom_dev_attrs[] = { __ATTR_NULL }; -static char *srom_devnode(struct device *dev, mode_t *mode) +static char *srom_devnode(struct device *dev, umode_t *mode) { *mode = S_IRUGO | S_IWUSR; return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev)); diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 0f9ef9bf673..62c3675045a 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -72,7 +72,7 @@ static int drm_class_resume(struct device *dev) return 0; } -static char *drm_devnode(struct device *dev, mode_t *mode) +static char *drm_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "dri/%s", dev_name(dev)); } diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 4ef02b269a7..7c297d305d5 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -859,7 +859,7 @@ static const struct file_operations hiddev_fops = { .llseek = noop_llseek, }; -static char *hiddev_devnode(struct device *dev, mode_t *mode) +static char *hiddev_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 8b72f39202f..c889aaef341 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3659,7 +3659,7 @@ static struct kobj_type cm_port_obj_type = { .release = cm_release_port_obj }; -static char *cm_devnode(struct device *dev, mode_t *mode) +static char *cm_devnode(struct device *dev, umode_t *mode) { if (mode) *mode = 0666; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 07db22997e9..f0d588f8859 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1175,7 +1175,7 @@ static void ib_umad_remove_one(struct ib_device *device) kref_put(&umad_dev->ref, ib_umad_release_dev); } -static char *umad_devnode(struct device *dev, mode_t *mode) +static char *umad_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 87963674637..604556d73d2 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -846,7 +846,7 @@ static void ib_uverbs_remove_one(struct ib_device *device) kfree(uverbs_dev); } -static char *uverbs_devnode(struct device *dev, mode_t *mode) +static char *uverbs_devnode(struct device *dev, umode_t *mode) { if (mode) *mode = 0666; diff --git a/drivers/input/input.c b/drivers/input/input.c index da38d97a51b..1f78c957a75 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1624,7 +1624,7 @@ static struct device_type input_dev_type = { #endif }; -static char *input_devnode(struct device *dev, mode_t *mode) +static char *input_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "input/%s", dev_name(dev)); } diff --git a/drivers/media/dvb/ddbridge/ddbridge-core.c b/drivers/media/dvb/ddbridge/ddbridge-core.c index ba9a643b9c6..d1e91bc80e7 100644 --- a/drivers/media/dvb/ddbridge/ddbridge-core.c +++ b/drivers/media/dvb/ddbridge/ddbridge-core.c @@ -1480,7 +1480,7 @@ static const struct file_operations ddb_fops = { .open = ddb_open, }; -static char *ddb_devnode(struct device *device, mode_t *mode) +static char *ddb_devnode(struct device *device, umode_t *mode) { struct ddb *dev = dev_get_drvdata(device); diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c index f7328777595..00a67326c19 100644 --- a/drivers/media/dvb/dvb-core/dvbdev.c +++ b/drivers/media/dvb/dvb-core/dvbdev.c @@ -450,7 +450,7 @@ static int dvb_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } -static char *dvb_devnode(struct device *dev, mode_t *mode) +static char *dvb_devnode(struct device *dev, umode_t *mode) { struct dvb_device *dvbdev = dev_get_drvdata(dev); diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 29f900065d8..f5db8b949bc 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -715,7 +715,7 @@ static void ir_close(struct input_dev *idev) } /* class for /sys/class/rc */ -static char *ir_devnode(struct device *dev, mode_t *mode) +static char *ir_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "rc/%s", dev_name(dev)); } diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 05085beb83d..3fdebd306b9 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -3267,7 +3267,7 @@ void __init console_init(void) } } -static char *tty_devnode(struct device *dev, mode_t *mode) +static char *tty_devnode(struct device *dev, umode_t *mode) { if (!mode) return NULL; diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index cb3a93243a0..bc5089f76ce 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -1045,7 +1045,7 @@ static const struct file_operations usblp_fops = { .llseek = noop_llseek, }; -static char *usblp_devnode(struct device *dev, mode_t *mode) +static char *usblp_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c index 99458c843d6..d95760de9e8 100644 --- a/drivers/usb/core/file.c +++ b/drivers/usb/core/file.c @@ -66,7 +66,7 @@ static struct usb_class { struct class *class; } *usb_class; -static char *usb_devnode(struct device *dev, mode_t *mode) +static char *usb_devnode(struct device *dev, umode_t *mode) { struct usb_class_driver *drv; diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 73cd90012ec..1382c90d083 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -326,7 +326,7 @@ static const struct dev_pm_ops usb_device_pm_ops = { #endif /* CONFIG_PM */ -static char *usb_devnode(struct device *dev, mode_t *mode) +static char *usb_devnode(struct device *dev, umode_t *mode) { struct usb_device *usb_dev; diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 81457904d6b..5bd4b0526de 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -734,7 +734,7 @@ static const struct file_operations iowarrior_fops = { .llseek = noop_llseek, }; -static char *iowarrior_devnode(struct device *dev, mode_t *mode) +static char *iowarrior_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index a989356f693..94f6566b99f 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -269,7 +269,7 @@ static const struct file_operations tower_fops = { .llseek = tower_llseek, }; -static char *legousbtower_devnode(struct device *dev, mode_t *mode) +static char *legousbtower_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } diff --git a/include/linux/device.h b/include/linux/device.h index 3136ede5a1e..2fe0005543e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -294,7 +294,7 @@ struct class { struct kobject *dev_kobj; int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env); - char *(*devnode)(struct device *dev, mode_t *mode); + char *(*devnode)(struct device *dev, umode_t *mode); void (*class_release)(struct class *class); void (*dev_release)(struct device *dev); @@ -423,7 +423,7 @@ struct device_type { const char *name; const struct attribute_group **groups; int (*uevent)(struct device *dev, struct kobj_uevent_env *env); - char *(*devnode)(struct device *dev, mode_t *mode); + char *(*devnode)(struct device *dev, umode_t *mode); void (*release)(struct device *dev); const struct dev_pm_ops *pm; @@ -720,7 +720,7 @@ extern int device_rename(struct device *dev, const char *new_name); extern int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); extern const char *device_get_devnode(struct device *dev, - mode_t *mode, const char **tmp); + umode_t *mode, const char **tmp); extern void *dev_get_drvdata(const struct device *dev); extern int dev_set_drvdata(struct device *dev, void *data); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6d18f3531f1..fe23ee76858 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -163,7 +163,7 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[DISK_NAME_LEN]; /* name of major driver */ - char *(*devnode)(struct gendisk *gd, mode_t *mode); + char *(*devnode)(struct gendisk *gd, umode_t *mode); unsigned int events; /* supported events */ unsigned int async_events; /* async events, subset of all */ diff --git a/include/linux/usb.h b/include/linux/usb.h index d3d0c137433..a59321779f8 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -935,7 +935,7 @@ extern struct bus_type usb_bus_type; */ struct usb_class_driver { char *name; - char *(*devnode)(struct device *dev, mode_t *mode); + char *(*devnode)(struct device *dev, umode_t *mode); const struct file_operations *fops; int minor_base; }; diff --git a/sound/sound_core.c b/sound/sound_core.c index 6ce277860fd..c6e81fb928e 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -29,7 +29,7 @@ MODULE_DESCRIPTION("Core sound module"); MODULE_AUTHOR("Alan Cox"); MODULE_LICENSE("GPL"); -static char *sound_devnode(struct device *dev, mode_t *mode) +static char *sound_devnode(struct device *dev, umode_t *mode) { if (MAJOR(dev->devt) == SOUND_MAJOR) return NULL; -- cgit v1.2.3-70-g09d2 From 24d25dbfa63c376323096660bfa9ad45a08870ce Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 5 Jan 2012 14:27:19 -0700 Subject: x86/PCI: amd: factor out MMCONFIG discovery This factors out the AMD native MMCONFIG discovery so we can use it outside amd_bus.c. amd_bus.c reads AMD MSRs so it can remove the MMCONFIG area from the PCI resources. We may also need the MMCONFIG information to work around BIOS defects in the ACPI MCFG table. Cc: Borislav Petkov Cc: Yinghai Lu Cc: stable@kernel.org # 2.6.34+ Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/amd_nb.h | 2 ++ arch/x86/kernel/amd_nb.c | 31 +++++++++++++++++++++++++++++++ arch/x86/pci/amd_bus.c | 42 +++++++++++------------------------------- 3 files changed, 44 insertions(+), 31 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 8e41071704a..49ad773f4b9 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_AMD_NB_H #define _ASM_X86_AMD_NB_H +#include #include struct amd_nb_bus_dev_range { @@ -13,6 +14,7 @@ extern const struct pci_device_id amd_nb_misc_ids[]; extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; extern bool early_is_amd_nb(u32 value); +extern struct resource *amd_get_mmconfig_range(struct resource *res); extern int amd_cache_northbridges(void); extern void amd_flush_garts(void); extern int amd_numa_init(void); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 4c39baa8fac..bae1efe6d51 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -119,6 +119,37 @@ bool __init early_is_amd_nb(u32 device) return false; } +struct resource *amd_get_mmconfig_range(struct resource *res) +{ + u32 address; + u64 base, msr; + unsigned segn_busn_bits; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return NULL; + + /* assume all cpus from fam10h have mmconfig */ + if (boot_cpu_data.x86 < 0x10) + return NULL; + + address = MSR_FAM10H_MMIO_CONF_BASE; + rdmsrl(address, msr); + + /* mmconfig is not enabled */ + if (!(msr & FAM10H_MMIO_CONF_ENABLE)) + return NULL; + + base = msr & (FAM10H_MMIO_CONF_BASE_MASK<> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + res->flags = IORESOURCE_MEM; + res->start = base; + res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1; + return res; +} + int amd_get_subcaches(int cpu) { struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 7b7a89712d5..0567df3890e 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -30,34 +30,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = { { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, }; -static u64 __initdata fam10h_mmconf_start; -static u64 __initdata fam10h_mmconf_end; -static void __init get_pci_mmcfg_amd_fam10h_range(void) -{ - u32 address; - u64 base, msr; - unsigned segn_busn_bits; - - /* assume all cpus from fam10h have mmconf */ - if (boot_cpu_data.x86 < 0x10) - return; - - address = MSR_FAM10H_MMIO_CONF_BASE; - rdmsrl(address, msr); - - /* mmconfig is not enable */ - if (!(msr & FAM10H_MMIO_CONF_ENABLE)) - return; - - base = msr & (FAM10H_MMIO_CONF_BASE_MASK<> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & - FAM10H_MMIO_CONF_BUSRANGE_MASK; - - fam10h_mmconf_start = base; - fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; -} - #define RANGE_NUM 16 /** @@ -85,6 +57,9 @@ static int __init early_fill_mp_bus_info(void) u64 val; u32 address; bool found; + struct resource fam10h_mmconf_res, *fam10h_mmconf; + u64 fam10h_mmconf_start; + u64 fam10h_mmconf_end; if (!early_pci_allowed()) return -1; @@ -211,12 +186,17 @@ static int __init early_fill_mp_bus_info(void) subtract_range(range, RANGE_NUM, 0, end); /* get mmconfig */ - get_pci_mmcfg_amd_fam10h_range(); + fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res); /* need to take out mmconf range */ - if (fam10h_mmconf_end) { - printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); + if (fam10h_mmconf) { + printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf); + fam10h_mmconf_start = fam10h_mmconf->start; + fam10h_mmconf_end = fam10h_mmconf->end; subtract_range(range, RANGE_NUM, fam10h_mmconf_start, fam10h_mmconf_end + 1); + } else { + fam10h_mmconf_start = 0; + fam10h_mmconf_end = 0; } /* mmio resource */ -- cgit v1.2.3-70-g09d2 From 76ccc297018d25d55b789bbd508861ef1e2cdb0c Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 16 Dec 2011 17:38:18 -0500 Subject: x86/PCI: Expand the x86_msi_ops to have a restore MSIs. The MSI restore function will become a function pointer in an x86_msi_ops struct. It defaults to the implementation in the io_apic.c and msi.c. We piggyback on the indirection mechanism introduced by "x86: Introduce x86_msi_ops". Cc: x86@kernel.org Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: linux-pci@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci.h | 9 +++++++++ arch/x86/include/asm/x86_init.h | 1 + arch/x86/kernel/x86_init.c | 1 + drivers/pci/msi.c | 29 +++++++++++++++++++++++++++-- 4 files changed, 38 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index d498943b906..df75d07571c 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -112,19 +112,28 @@ static inline void x86_teardown_msi_irq(unsigned int irq) { x86_msi.teardown_msi_irq(irq); } +static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq) +{ + x86_msi.restore_msi_irqs(dev, irq); +} #define arch_setup_msi_irqs x86_setup_msi_irqs #define arch_teardown_msi_irqs x86_teardown_msi_irqs #define arch_teardown_msi_irq x86_teardown_msi_irq +#define arch_restore_msi_irqs x86_restore_msi_irqs /* implemented in arch/x86/kernel/apic/io_apic. */ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); +void native_restore_msi_irqs(struct pci_dev *dev, int irq); /* default to the implementation in drivers/lib/msi.c */ #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS +#define HAVE_DEFAULT_MSI_RESTORE_IRQS void default_teardown_msi_irqs(struct pci_dev *dev); +void default_restore_msi_irqs(struct pci_dev *dev, int irq); #else #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #define default_teardown_msi_irqs NULL +#define default_restore_msi_irqs NULL #endif #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 1971e652d24..cd5208446c2 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -177,6 +177,7 @@ struct x86_msi_ops { int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); void (*teardown_msi_irq)(unsigned int irq); void (*teardown_msi_irqs)(struct pci_dev *dev); + void (*restore_msi_irqs)(struct pci_dev *dev, int irq); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c1d6cd54939..83b05adaadf 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -114,4 +114,5 @@ struct x86_msi_ops x86_msi = { .setup_msi_irqs = native_setup_msi_irqs, .teardown_msi_irq = native_teardown_msi_irq, .teardown_msi_irqs = default_teardown_msi_irqs, + .restore_msi_irqs = default_restore_msi_irqs, }; diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 82de95ec2ea..a825d78fd0a 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -86,6 +86,31 @@ void default_teardown_msi_irqs(struct pci_dev *dev) } #endif +#ifndef arch_restore_msi_irqs +# define arch_restore_msi_irqs default_restore_msi_irqs +# define HAVE_DEFAULT_MSI_RESTORE_IRQS +#endif + +#ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS +void default_restore_msi_irqs(struct pci_dev *dev, int irq) +{ + struct msi_desc *entry; + + entry = NULL; + if (dev->msix_enabled) { + list_for_each_entry(entry, &dev->msi_list, list) { + if (irq == entry->irq) + break; + } + } else if (dev->msi_enabled) { + entry = irq_get_msi_desc(irq); + } + + if (entry) + write_msi_msg(irq, &entry->msg); +} +#endif + static void msi_set_enable(struct pci_dev *dev, int pos, int enable) { u16 control; @@ -372,7 +397,7 @@ static void __pci_restore_msi_state(struct pci_dev *dev) pci_intx_for_msi(dev, 0); msi_set_enable(dev, pos, 0); - write_msi_msg(dev->irq, &entry->msg); + arch_restore_msi_irqs(dev, dev->irq); pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); msi_mask_irq(entry, msi_capable_mask(control), entry->masked); @@ -400,7 +425,7 @@ static void __pci_restore_msix_state(struct pci_dev *dev) pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); list_for_each_entry(entry, &dev->msi_list, list) { - write_msi_msg(entry->irq, &entry->msg); + arch_restore_msi_irqs(dev, entry->irq); msix_mask_irq(entry, entry->masked); } -- cgit v1.2.3-70-g09d2 From e58d429209105e698e9d0357481d62b37fe9a7dd Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 6 Jan 2012 11:17:51 -0500 Subject: x86, reboot: Fix typo in nmi reboot path It was brought to my attention that my x86 change to use NMI in the reboot path broke Intel Nehalem and Westmere boxes when using kexec. I realized I had mistyped the if statement in commit 3603a2512f9e69dc87914ba922eb4a0812b21cd6 and stuck the ')' in the wrong spot. Putting it in the right spot fixes kexec again. Doh. Reported-by: Yinghai Lu Cc: Linus Torvalds Signed-off-by: Don Zickus Link: http://lkml.kernel.org/r/1325866671-9797-1-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 113acda5879..66c74f481ca 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -176,7 +176,7 @@ static void native_nmi_stop_other_cpus(int wait) */ if (num_online_cpus() > 1) { /* did someone beat us here? */ - if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id() != -1)) + if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) return; if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, -- cgit v1.2.3-70-g09d2 From 5e6292c0f28f03dfdb8ea3d685f0b838a23bfba4 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 10 Jan 2012 15:11:17 -0800 Subject: signal: add block_sigmask() for adding sigmask to current->blocked Abstract the code sequence for adding a signal handler's sa_mask to current->blocked because the sequence is identical for all architectures. Furthermore, in the past some architectures actually got this code wrong, so introduce a wrapper that all architectures can use. Signed-off-by: Matt Fleming Signed-off-by: Oleg Nesterov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Tejun Heo Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/signal.c | 6 +----- include/linux/signal.h | 1 + kernel/signal.c | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 54ddaeb221c..46a01bdc27e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -682,7 +682,6 @@ static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs) { - sigset_t blocked; int ret; /* Are we from a system call? */ @@ -733,10 +732,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, */ regs->flags &= ~X86_EFLAGS_TF; - sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(&blocked, sig); - set_current_blocked(&blocked); + block_sigmask(ka, sig); tracehook_signal_handler(sig, info, ka, regs, test_thread_flag(TIF_SINGLESTEP)); diff --git a/include/linux/signal.h b/include/linux/signal.h index a822300a253..7987ce74874 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -254,6 +254,7 @@ extern void set_current_blocked(const sigset_t *); extern int show_unhandled_signals; extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); +extern void block_sigmask(struct k_sigaction *ka, int signr); extern void exit_signals(struct task_struct *tsk); extern struct kmem_cache *sighand_cachep; diff --git a/kernel/signal.c b/kernel/signal.c index bb0efa5705e..d532f1709fb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2318,6 +2318,27 @@ relock: return signr; } +/** + * block_sigmask - add @ka's signal mask to current->blocked + * @ka: action for @signr + * @signr: signal that has been successfully delivered + * + * This function should be called when a signal has succesfully been + * delivered. It adds the mask of signals for @ka to current->blocked + * so that they are blocked during the execution of the signal + * handler. In addition, @signr will be blocked unless %SA_NODEFER is + * set in @ka->sa.sa_flags. + */ +void block_sigmask(struct k_sigaction *ka, int signr) +{ + sigset_t blocked; + + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(&blocked, signr); + set_current_blocked(&blocked); +} + /* * It could be that complete_signal() picked us to notify about the * group-wide signal. Other threads should be notified now to take -- cgit v1.2.3-70-g09d2 From 476bc0015bf09dad39d36a8b19f76f0c181d1ec9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Jan 2012 09:32:18 +1030 Subject: module_param: make bool parameters really bool (arch) module_param(bool) used to counter-intuitively take an int. In fddd5201 (mid-2009) we allowed bool or int/unsigned int using a messy trick. It's time to remove the int/unsigned int option. For this version it'll simply give a warning, but it'll break next kernel version. Signed-off-by: Rusty Russell --- arch/ia64/hp/common/aml_nfw.c | 2 +- arch/x86/kernel/apm_32.c | 16 ++++++++-------- arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/vmx.c | 18 +++++++++--------- arch/x86/kvm/x86.c | 4 ++-- arch/x86/mm/mmio-mod.c | 4 ++-- arch/x86/platform/geode/alix.c | 2 +- arch/x86/platform/iris/iris.c | 2 +- 8 files changed, 25 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/ia64/hp/common/aml_nfw.c b/arch/ia64/hp/common/aml_nfw.c index 22078486d35..6192f718865 100644 --- a/arch/ia64/hp/common/aml_nfw.c +++ b/arch/ia64/hp/common/aml_nfw.c @@ -31,7 +31,7 @@ MODULE_AUTHOR("Bjorn Helgaas "); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ACPI opregion handler for native firmware calls"); -static int force_register; +static bool force_register; module_param_named(force, force_register, bool, 0); MODULE_PARM_DESC(force, "Install opregion handler even without HPQ5001 device"); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index a46bd383953..f76623cbe26 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -383,21 +383,21 @@ static int ignore_sys_suspend; static int ignore_normal_resume; static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; -static int debug __read_mostly; -static int smp __read_mostly; +static bool debug __read_mostly; +static bool smp __read_mostly; static int apm_disabled = -1; #ifdef CONFIG_SMP -static int power_off; +static bool power_off; #else -static int power_off = 1; +static bool power_off = 1; #endif -static int realmode_power_off; +static bool realmode_power_off; #ifdef CONFIG_APM_ALLOW_INTS -static int allow_ints = 1; +static bool allow_ints = 1; #else -static int allow_ints; +static bool allow_ints; #endif -static int broken_psr; +static bool broken_psr; static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2a2a9b40db1..224b02c3cda 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -74,7 +74,7 @@ enum { #endif #ifdef MMU_DEBUG -static int dbg = 0; +static bool dbg = 0; module_param(dbg, bool, 0644); #endif diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 906a7e84200..d29216c462b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,29 +51,29 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -static int __read_mostly enable_vpid = 1; +static bool __read_mostly enable_vpid = 1; module_param_named(vpid, enable_vpid, bool, 0444); -static int __read_mostly flexpriority_enabled = 1; +static bool __read_mostly flexpriority_enabled = 1; module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); -static int __read_mostly enable_ept = 1; +static bool __read_mostly enable_ept = 1; module_param_named(ept, enable_ept, bool, S_IRUGO); -static int __read_mostly enable_unrestricted_guest = 1; +static bool __read_mostly enable_unrestricted_guest = 1; module_param_named(unrestricted_guest, enable_unrestricted_guest, bool, S_IRUGO); -static int __read_mostly emulate_invalid_guest_state = 0; +static bool __read_mostly emulate_invalid_guest_state = 0; module_param(emulate_invalid_guest_state, bool, S_IRUGO); -static int __read_mostly vmm_exclusive = 1; +static bool __read_mostly vmm_exclusive = 1; module_param(vmm_exclusive, bool, S_IRUGO); -static int __read_mostly yield_on_hlt = 1; +static bool __read_mostly yield_on_hlt = 1; module_param(yield_on_hlt, bool, S_IRUGO); -static int __read_mostly fasteoi = 1; +static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); /* @@ -81,7 +81,7 @@ module_param(fasteoi, bool, S_IRUGO); * VMX and be a hypervisor for its own guests. If nested=0, guests may not * use VMX instructions. */ -static int __read_mostly nested = 0; +static bool __read_mostly nested = 0; module_param(nested, bool, S_IRUGO); #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1171def5f96..14d6cadc4ba 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -88,8 +88,8 @@ static void process_nmi(struct kvm_vcpu *vcpu); struct kvm_x86_ops *kvm_x86_ops; EXPORT_SYMBOL_GPL(kvm_x86_ops); -int ignore_msrs = 0; -module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); +static bool ignore_msrs = 0; +module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); bool kvm_has_tsc_control; EXPORT_SYMBOL_GPL(kvm_has_tsc_control); diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index de54b9b278a..dc0b727742f 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -75,8 +75,8 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ /* module parameters */ static unsigned long filter_offset; -static int nommiotrace; -static int trace_pc; +static bool nommiotrace; +static bool trace_pc; module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c index ca1973699d3..dc5f1d32ace 100644 --- a/arch/x86/platform/geode/alix.c +++ b/arch/x86/platform/geode/alix.c @@ -27,7 +27,7 @@ #include -static int force = 0; +static bool force = 0; module_param(force, bool, 0444); /* FIXME: Award bios is not automatically detected as Alix platform */ MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform"); diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c index 1ba7f5ed8c9..5917eb56b31 100644 --- a/arch/x86/platform/iris/iris.c +++ b/arch/x86/platform/iris/iris.c @@ -42,7 +42,7 @@ MODULE_AUTHOR("Sébastien Hinderer "); MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); -static int force; +static bool force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation."); -- cgit v1.2.3-70-g09d2 From a3301b751b19f0efbafddc4034f8e7ce6bf3007b Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Sat, 14 Jan 2012 08:11:31 +0530 Subject: x86/mce: Fix CPU hotplug and suspend regression related to MCE Commit 8a25a2fd126c ("cpu: convert 'cpu' and 'machinecheck' sysdev_class to a regular subsystem") changed how things are dealt with in the MCE subsystem. Some of the things that got broken due to this are CPU hotplug and suspend/hibernate. MCE uses per_cpu allocations of struct device. So, when a CPU goes offline and comes back online, in order to ensure that we start from a clean slate with respect to the MCE subsystem, zero out the entire per_cpu device structure to 0 before using it. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index f22a9f7f639..29ba3297e48 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2011,7 +2011,7 @@ static __cpuinit int mce_device_create(unsigned int cpu) if (!mce_available(&boot_cpu_data)) return -EIO; - memset(&dev->kobj, 0, sizeof(struct kobject)); + memset(dev, 0, sizeof(struct device)); dev->id = cpu; dev->bus = &mce_subsys; -- cgit v1.2.3-70-g09d2 From e032d80774315869aa2285b217fdbbfed86c0b49 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 16 Jan 2012 14:40:28 -0800 Subject: mce: fix warning messages about static struct mce_device When suspending, there was a large list of warnings going something like: Device 'machinecheck1' does not have a release() function, it is broken and must be fixed This patch turns the static mce_devices into dynamically allocated, and properly frees them when they are removed from the system. It solves the warning messages on my laptop here. Reported-by: "Srivatsa S. Bhat" Reported-by: Linus Torvalds Tested-by: Djalal Harouni Cc: Kay Sievers Cc: Tony Luck Cc: Borislav Petkov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- arch/x86/include/asm/mce.h | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 18 ++++++++++++++---- arch/x86/kernel/cpu/mcheck/mce_amd.c | 18 +++++++++++------- 3 files changed, 26 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f35ce43c1a7..6aefb14cbbc 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {} void mce_setup(struct mce *m); void mce_log(struct mce *m); -DECLARE_PER_CPU(struct device, mce_device); +extern struct device *mce_device[CONFIG_NR_CPUS]; /* * Maximum banks number. diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 29ba3297e48..5a11ae2e9e9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1859,7 +1859,7 @@ static struct bus_type mce_subsys = { .dev_name = "machinecheck", }; -DEFINE_PER_CPU(struct device, mce_device); +struct device *mce_device[CONFIG_NR_CPUS]; __cpuinitdata void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); @@ -2001,19 +2001,27 @@ static struct device_attribute *mce_device_attrs[] = { static cpumask_var_t mce_device_initialized; +static void mce_device_release(struct device *dev) +{ + kfree(dev); +} + /* Per cpu device init. All of the cpus still share the same ctrl bank: */ static __cpuinit int mce_device_create(unsigned int cpu) { - struct device *dev = &per_cpu(mce_device, cpu); + struct device *dev; int err; int i, j; if (!mce_available(&boot_cpu_data)) return -EIO; - memset(dev, 0, sizeof(struct device)); + dev = kzalloc(sizeof *dev, GFP_KERNEL); + if (!dev) + return -ENOMEM; dev->id = cpu; dev->bus = &mce_subsys; + dev->release = &mce_device_release; err = device_register(dev); if (err) @@ -2030,6 +2038,7 @@ static __cpuinit int mce_device_create(unsigned int cpu) goto error2; } cpumask_set_cpu(cpu, mce_device_initialized); + mce_device[cpu] = dev; return 0; error2: @@ -2046,7 +2055,7 @@ error: static __cpuinit void mce_device_remove(unsigned int cpu) { - struct device *dev = &per_cpu(mce_device, cpu); + struct device *dev = mce_device[cpu]; int i; if (!cpumask_test_cpu(cpu, mce_device_initialized)) @@ -2060,6 +2069,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu) device_unregister(dev); cpumask_clear_cpu(cpu, mce_device_initialized); + mce_device[cpu] = NULL; } /* Make sure there are no machine checks on offlined CPUs. */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ba0b94a7e20..786e76a8632 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -523,6 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { int i, err = 0; struct threshold_bank *b = NULL; + struct device *dev = mce_device[cpu]; char name[32]; sprintf(name, "threshold_bank%i", bank); @@ -543,8 +544,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (!b) goto out; - err = sysfs_create_link(&per_cpu(mce_device, cpu).kobj, - b->kobj, name); + err = sysfs_create_link(&dev->kobj, b->kobj, name); if (err) goto out; @@ -565,7 +565,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; } - b->kobj = kobject_create_and_add(name, &per_cpu(mce_device, cpu).kobj); + b->kobj = kobject_create_and_add(name, &dev->kobj); if (!b->kobj) goto out_free; @@ -585,8 +585,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (i == cpu) continue; - err = sysfs_create_link(&per_cpu(mce_device, i).kobj, - b->kobj, name); + dev = mce_device[i]; + if (dev) + err = sysfs_create_link(&dev->kobj,b->kobj, name); if (err) goto out; @@ -649,6 +650,7 @@ static void deallocate_threshold_block(unsigned int cpu, static void threshold_remove_bank(unsigned int cpu, int bank) { struct threshold_bank *b; + struct device *dev; char name[32]; int i = 0; @@ -663,7 +665,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #ifdef CONFIG_SMP /* sibling symlink */ if (shared_bank[bank] && b->blocks->cpu != cpu) { - sysfs_remove_link(&per_cpu(mce_device, cpu).kobj, name); + sysfs_remove_link(&mce_device[cpu]->kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; return; @@ -675,7 +677,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank) if (i == cpu) continue; - sysfs_remove_link(&per_cpu(mce_device, i).kobj, name); + dev = mce_device[i]; + if (dev) + sysfs_remove_link(&dev->kobj, name); per_cpu(threshold_banks, i)[bank] = NULL; } -- cgit v1.2.3-70-g09d2 From b54ac6d2a25084667da781c7ca2cebef52a2bcdd Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Dec 2011 11:25:49 +0800 Subject: ACPI, Record ACPI NVS regions Some firmware will access memory in ACPI NVS region via APEI. That is, instructions in APEI ERST/EINJ table will read/write ACPI NVS region. The original resource conflict checking in APEI code will check memory/ioport accessed by APEI via general resource management mechanism. But ACPI NVS region is marked as busy already, so that the false resource conflict will prevent APEI ERST/EINJ to work. To fix this, this patch record ACPI NVS regions, so that we can avoid request resources for memory region inside it. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- arch/x86/kernel/e820.c | 4 ++-- drivers/acpi/Makefile | 3 ++- drivers/acpi/nvs.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/acpi.h | 20 +++++++++++++------ 4 files changed, 70 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 303a0e48f07..51c3b186e5b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -714,7 +714,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn) } #endif -#ifdef CONFIG_HIBERNATION +#ifdef CONFIG_ACPI /** * Mark ACPI NVS memory region, so that we can save/restore it during * hibernation and the subsequent resume. @@ -727,7 +727,7 @@ static int __init e820_mark_nvs_memory(void) struct e820entry *ei = &e820.map[i]; if (ei->type == E820_NVS) - suspend_nvs_register(ei->addr, ei->size); + acpi_nvs_register(ei->addr, ei->size); } return 0; diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index ecb26b4f29a..c07f44f05f9 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -20,11 +20,12 @@ obj-y += acpi.o \ # All the builtin files are in the "acpi." module_param namespace. acpi-y += osl.o utils.o reboot.o acpi-y += atomicio.o +acpi-y += nvs.o # sleep related files acpi-y += wakeup.o acpi-y += sleep.o -acpi-$(CONFIG_ACPI_SLEEP) += proc.o nvs.o +acpi-$(CONFIG_ACPI_SLEEP) += proc.o # diff --git a/drivers/acpi/nvs.c b/drivers/acpi/nvs.c index 096787b43c9..7a2035fa8c7 100644 --- a/drivers/acpi/nvs.c +++ b/drivers/acpi/nvs.c @@ -15,6 +15,56 @@ #include #include +/* ACPI NVS regions, APEI may use it */ + +struct nvs_region { + __u64 phys_start; + __u64 size; + struct list_head node; +}; + +static LIST_HEAD(nvs_region_list); + +#ifdef CONFIG_ACPI_SLEEP +static int suspend_nvs_register(unsigned long start, unsigned long size); +#else +static inline int suspend_nvs_register(unsigned long a, unsigned long b) +{ + return 0; +} +#endif + +int acpi_nvs_register(__u64 start, __u64 size) +{ + struct nvs_region *region; + + region = kmalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return -ENOMEM; + region->phys_start = start; + region->size = size; + list_add_tail(®ion->node, &nvs_region_list); + + return suspend_nvs_register(start, size); +} + +int acpi_nvs_for_each_region(int (*func)(__u64 start, __u64 size, void *data), + void *data) +{ + int rc; + struct nvs_region *region; + + list_for_each_entry(region, &nvs_region_list, node) { + rc = func(region->phys_start, region->size, data); + if (rc) + return rc; + } + + return 0; +} + + +#ifdef CONFIG_ACPI_SLEEP /* * Platforms, like ACPI, may want us to save some memory used by them during * suspend and to restore the contents of this memory during the subsequent @@ -41,7 +91,7 @@ static LIST_HEAD(nvs_list); * things so that the data from page-aligned addresses in this region will * be copied into separate RAM pages. */ -int suspend_nvs_register(unsigned long start, unsigned long size) +static int suspend_nvs_register(unsigned long start, unsigned long size) { struct nvs_page *entry, *next; @@ -159,3 +209,4 @@ void suspend_nvs_restore(void) if (entry->data) memcpy(entry->kaddr, entry->data, entry->size); } +#endif diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6001b4da39d..26b75442ff7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -306,6 +306,11 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req); extern void acpi_early_init(void); +extern int acpi_nvs_register(__u64 start, __u64 size); + +extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), + void *data); + #else /* !CONFIG_ACPI */ #define acpi_disabled 1 @@ -348,15 +353,18 @@ static inline int acpi_table_parse(char *id, { return -1; } -#endif /* !CONFIG_ACPI */ -#ifdef CONFIG_ACPI_SLEEP -int suspend_nvs_register(unsigned long start, unsigned long size); -#else -static inline int suspend_nvs_register(unsigned long a, unsigned long b) +static inline int acpi_nvs_register(__u64 start, __u64 size) { return 0; } -#endif + +static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), + void *data) +{ + return 0; +} + +#endif /* !CONFIG_ACPI */ #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3-70-g09d2 From d7e7528bcd456f5c36ad4a202ccfb43c5aa98bc4 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 3 Jan 2012 14:23:06 -0500 Subject: Audit: push audit success and retcode into arch ptrace.h The audit system previously expected arches calling to audit_syscall_exit to supply as arguments if the syscall was a success and what the return code was. Audit also provides a helper AUDITSC_RESULT which was supposed to simplify things by converting from negative retcodes to an audit internal magic value stating success or failure. This helper was wrong and could indicate that a valid pointer returned to userspace was a failed syscall. The fix is to fix the layering foolishness. We now pass audit_syscall_exit a struct pt_reg and it in turns calls back into arch code to collect the return value and to determine if the syscall was a success or failure. We also define a generic is_syscall_success() macro which determines success/failure based on if the value is < -MAX_ERRNO. This works for arches like x86 which do not use a separate mechanism to indicate syscall failure. We make both the is_syscall_success() and regs_return_value() static inlines instead of macros. The reason is because the audit function must take a void* for the regs. (uml calls theirs struct uml_pt_regs instead of just struct pt_regs so audit_syscall_exit can't take a struct pt_regs). Since the audit function takes a void* we need to use static inlines to cast it back to the arch correct structure to dereference it. The other major change is that on some arches, like ia64, MIPS and ppc, we change regs_return_value() to give us the negative value on syscall failure. THE only other user of this macro, kretprobe_example.c, won't notice and it makes the value signed consistently for the audit functions across all archs. In arch/sh/kernel/ptrace_64.c I see that we were using regs[9] in the old audit code as the return value. But the ptrace_64.h code defined the macro regs_return_value() as regs[3]. I have no idea which one is correct, but this patch now uses the regs_return_value() function, so it now uses regs[3]. For powerpc we previously used regs->result but now use the regs_return_value() function which uses regs->gprs[3]. regs->gprs[3] is always positive so the regs_return_value(), much like ia64 makes it negative before calling the audit code when appropriate. Signed-off-by: Eric Paris Acked-by: H. Peter Anvin [for x86 portion] Acked-by: Tony Luck [for ia64] Acked-by: Richard Weinberger [for uml] Acked-by: David S. Miller [for sparc] Acked-by: Ralf Baechle [for mips] Acked-by: Benjamin Herrenschmidt [for ppc] --- arch/ia64/include/asm/ptrace.h | 13 ++++++++++++- arch/ia64/kernel/ptrace.c | 9 +-------- arch/microblaze/include/asm/ptrace.h | 5 +++++ arch/microblaze/kernel/ptrace.c | 3 +-- arch/mips/include/asm/ptrace.h | 14 +++++++++++++- arch/mips/kernel/ptrace.c | 4 +--- arch/powerpc/include/asm/ptrace.h | 13 ++++++++++++- arch/powerpc/kernel/ptrace.c | 4 +--- arch/s390/include/asm/ptrace.h | 6 +++++- arch/s390/kernel/ptrace.c | 4 +--- arch/sh/include/asm/ptrace_32.h | 5 ++++- arch/sh/include/asm/ptrace_64.h | 5 ++++- arch/sh/kernel/ptrace_32.c | 4 +--- arch/sh/kernel/ptrace_64.c | 4 +--- arch/sparc/include/asm/ptrace.h | 10 +++++++++- arch/sparc/kernel/ptrace_64.c | 11 +---------- arch/um/kernel/ptrace.c | 4 ++-- arch/x86/ia32/ia32entry.S | 10 +++++----- arch/x86/kernel/entry_32.S | 8 ++++---- arch/x86/kernel/entry_64.S | 10 +++++----- arch/x86/kernel/ptrace.c | 3 +-- arch/x86/kernel/vm86_32.c | 4 ++-- arch/x86/um/shared/sysdep/ptrace.h | 5 +++++ include/linux/audit.h | 22 ++++++++++++++-------- include/linux/ptrace.h | 10 ++++++++++ kernel/auditsc.c | 16 ++++++++++++---- 26 files changed, 132 insertions(+), 74 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h index f5cb27614e3..68c98f5b3ca 100644 --- a/arch/ia64/include/asm/ptrace.h +++ b/arch/ia64/include/asm/ptrace.h @@ -246,7 +246,18 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) return regs->ar_bspstore; } -#define regs_return_value(regs) ((regs)->r8) +static inline int is_syscall_success(struct pt_regs *regs) +{ + return regs->r10 != -1; +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (is_syscall_success(regs)) + return regs->r8; + else + return -regs->r8; +} /* Conserve space in histogram by encoding slot bits in address * bits 2 and 3 rather than bits 0 and 1. diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 8848f43d819..2c154088cce 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1268,14 +1268,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, { int step; - if (unlikely(current->audit_context)) { - int success = AUDITSC_RESULT(regs.r10); - long result = regs.r8; - - if (success != AUDITSC_SUCCESS) - result = -result; - audit_syscall_exit(success, result); - } + audit_syscall_exit(®s); step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h index 816bee64b19..94e92c80585 100644 --- a/arch/microblaze/include/asm/ptrace.h +++ b/arch/microblaze/include/asm/ptrace.h @@ -61,6 +61,11 @@ struct pt_regs { #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) +static inline long regs_return_value(struct pt_regs *regs) +{ + return regs->r3; +} + #else /* __KERNEL__ */ /* pt_regs offsets used by gdbserver etc in ptrace syscalls */ diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index 043cb58f9c4..f564b1bfd38 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -159,8 +159,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) { int step; - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3); + audit_syscall_exit(regs); step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index de39b1f343e..7d409505df2 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -137,7 +137,19 @@ extern int ptrace_set_watch_regs(struct task_struct *child, */ #define user_mode(regs) (((regs)->cp0_status & KU_MASK) == KU_USER) -#define regs_return_value(_regs) ((_regs)->regs[2]) +static inline int is_syscall_success(struct pt_regs *regs) +{ + return !regs->regs[7]; +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (is_syscall_success(regs)) + return regs->regs[2]; + else + return -regs->regs[2]; +} + #define instruction_pointer(regs) ((regs)->cp0_epc) #define profile_pc(regs) instruction_pointer(regs) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 4e6ea1ffad4..ab0f1963a7b 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -572,9 +572,7 @@ out: */ asmlinkage void syscall_trace_leave(struct pt_regs *regs) { - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]), - -regs->regs[2]); + audit_syscall_exit(regs); if (!(current->ptrace & PT_PTRACED)) return; diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 48223f9b872..78a205162fd 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -86,7 +86,18 @@ struct pt_regs { #define instruction_pointer(regs) ((regs)->nip) #define user_stack_pointer(regs) ((regs)->gpr[1]) #define kernel_stack_pointer(regs) ((regs)->gpr[1]) -#define regs_return_value(regs) ((regs)->gpr[3]) +static inline int is_syscall_success(struct pt_regs *regs) +{ + return !(regs->ccr & 0x10000000); +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (is_syscall_success(regs)) + return regs->gpr[3]; + else + return -regs->gpr[3]; +} #ifdef CONFIG_SMP extern unsigned long profile_pc(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 5de73dbd15c..09d31c12a5e 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1748,9 +1748,7 @@ void do_syscall_trace_leave(struct pt_regs *regs) { int step; - if (unlikely(current->audit_context)) - audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, - regs->result); + audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->result); diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 56da355678f..aeb77f01798 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -541,9 +541,13 @@ struct user_regs_struct #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) #define user_stack_pointer(regs)((regs)->gprs[15]) -#define regs_return_value(regs)((regs)->gprs[2]) #define profile_pc(regs) instruction_pointer(regs) +static inline long regs_return_value(struct pt_regs *regs) +{ + return regs->gprs[2]; +} + int regs_query_register_offset(const char *name); const char *regs_query_register_name(unsigned int offset); unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 573bc29551e..f5275860098 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -751,9 +751,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) { - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), - regs->gprs[2]); + audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->gprs[2]); diff --git a/arch/sh/include/asm/ptrace_32.h b/arch/sh/include/asm/ptrace_32.h index 6c2239cca1a..2d3e906aa72 100644 --- a/arch/sh/include/asm/ptrace_32.h +++ b/arch/sh/include/asm/ptrace_32.h @@ -76,7 +76,10 @@ struct pt_dspregs { #ifdef __KERNEL__ #define MAX_REG_OFFSET offsetof(struct pt_regs, tra) -#define regs_return_value(_regs) ((_regs)->regs[0]) +static inline long regs_return_value(struct pt_regs *regs) +{ + return regs->regs[0]; +} #endif /* __KERNEL__ */ diff --git a/arch/sh/include/asm/ptrace_64.h b/arch/sh/include/asm/ptrace_64.h index bf9be7764d6..eb3fcceaf64 100644 --- a/arch/sh/include/asm/ptrace_64.h +++ b/arch/sh/include/asm/ptrace_64.h @@ -13,7 +13,10 @@ struct pt_regs { #ifdef __KERNEL__ #define MAX_REG_OFFSET offsetof(struct pt_regs, tregs[7]) -#define regs_return_value(_regs) ((_regs)->regs[3]) +static inline long regs_return_value(struct pt_regs *regs) +{ + return regs->regs[3]; +} #endif /* __KERNEL__ */ diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 92b3c276339..c0b5c179d27 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -530,9 +530,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) { int step; - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->regs[0]), - regs->regs[0]); + audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->regs[0]); diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index c8f97649f35..ba720d68643 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -548,9 +548,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) { int step; - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->regs[9]), - regs->regs[9]); + audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->regs[9]); diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h index a0e1bcf843a..c00c3b5c280 100644 --- a/arch/sparc/include/asm/ptrace.h +++ b/arch/sparc/include/asm/ptrace.h @@ -207,7 +207,15 @@ do { current_thread_info()->syscall_noerror = 1; \ #define instruction_pointer(regs) ((regs)->tpc) #define instruction_pointer_set(regs, val) ((regs)->tpc = (val)) #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP]) -#define regs_return_value(regs) ((regs)->u_regs[UREG_I0]) +static inline int is_syscall_success(struct pt_regs *regs) +{ + return !(regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY)); +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + return regs->u_regs[UREG_I0]; +} #ifdef CONFIG_SMP extern unsigned long profile_pc(struct pt_regs *); #else diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 96ee50a8066..c73c8c50f11 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -1086,17 +1086,8 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) asmlinkage void syscall_trace_leave(struct pt_regs *regs) { -#ifdef CONFIG_AUDITSYSCALL - if (unlikely(current->audit_context)) { - unsigned long tstate = regs->tstate; - int result = AUDITSC_SUCCESS; + audit_syscall_exit(regs); - if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) - result = AUDITSC_FAILURE; - - audit_syscall_exit(result, regs->u_regs[UREG_I0]); - } -#endif if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->u_regs[UREG_G1]); diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index c9da32b0c70..2ccf25c42fe 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -175,8 +175,8 @@ void syscall_trace(struct uml_pt_regs *regs, int entryexit) UPT_SYSCALL_ARG2(regs), UPT_SYSCALL_ARG3(regs), UPT_SYSCALL_ARG4(regs)); - else audit_syscall_exit(AUDITSC_RESULT(UPT_SYSCALL_RET(regs)), - UPT_SYSCALL_RET(regs)); + else + audit_syscall_exit(regs); } /* Fake a debug trap */ diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 3e274564f6b..64ced0b8f8f 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -14,6 +14,7 @@ #include #include #include +#include /* Avoid __ASSEMBLER__'ifying just for this. */ #include @@ -208,12 +209,11 @@ sysexit_from_sys_call: TRACE_IRQS_ON sti movl %eax,%esi /* second arg, syscall return value */ - cmpl $0,%eax /* is it < 0? */ - setl %al /* 1 if so, 0 if not */ + cmpl $-MAX_ERRNO,%eax /* is it an error ? */ + setbe %al /* 1 if so, 0 if not */ movzbl %al,%edi /* zero-extend that into %edi */ - inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ - call audit_syscall_exit - movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ + call __audit_syscall_exit + movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi cli TRACE_IRQS_OFF diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 22d0e21b4dd..a22facf06f0 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -42,6 +42,7 @@ */ #include +#include #include #include #include @@ -466,11 +467,10 @@ sysexit_audit: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) movl %eax,%edx /* second arg, syscall return value */ - cmpl $0,%eax /* is it < 0? */ - setl %al /* 1 if so, 0 if not */ + cmpl $-MAX_ERRNO,%eax /* is it an error ? */ + setbe %al /* 1 if so, 0 if not */ movzbl %al,%eax /* zero-extend that */ - inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ - call audit_syscall_exit + call __audit_syscall_exit DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a20e1cb9dc8..e51393dd93a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -55,6 +55,7 @@ #include #include #include +#include /* Avoid __ASSEMBLER__'ifying just for this. */ #include @@ -563,17 +564,16 @@ auditsys: jmp system_call_fastpath /* - * Return fast path for syscall audit. Call audit_syscall_exit() + * Return fast path for syscall audit. Call __audit_syscall_exit() * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT * masked off. */ sysret_audit: movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ - cmpq $0,%rsi /* is it < 0? */ - setl %al /* 1 if so, 0 if not */ + cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ + setbe %al /* 1 if so, 0 if not */ movzbl %al,%edi /* zero-extend that into %edi */ - inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ - call audit_syscall_exit + call __audit_syscall_exit movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi jmp sysret_check #endif /* CONFIG_AUDITSYSCALL */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 89a04c7b5bb..8b021875877 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1414,8 +1414,7 @@ void syscall_trace_leave(struct pt_regs *regs) { bool step; - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); + audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->ax); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 863f8753ab0..af17e1c966d 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -335,9 +335,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk if (info->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); - /*call audit_syscall_exit since we do not exit via the normal paths */ + /*call __audit_syscall_exit since we do not exit via the normal paths */ if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(0), 0); + __audit_syscall_exit(1, 0); __asm__ __volatile__( "movl %0,%%esp\n\t" diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 711b1621747..5ef9344a8b2 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h @@ -3,3 +3,8 @@ #else #include "ptrace_64.h" #endif + +static inline long regs_return_value(struct uml_pt_regs *regs) +{ + return UPT_SYSCALL_RET(regs); +} diff --git a/include/linux/audit.h b/include/linux/audit.h index 6e1c533f9b4..3d65e4b3ba0 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -26,6 +26,7 @@ #include #include +#include /* The netlink messages for the audit system is divided into blocks: * 1000 - 1099 are for commanding the audit system @@ -408,10 +409,6 @@ struct audit_field { void *lsm_rule; }; -#define AUDITSC_INVALID 0 -#define AUDITSC_SUCCESS 1 -#define AUDITSC_FAILURE 2 -#define AUDITSC_RESULT(x) ( ((long)(x))<0?AUDITSC_FAILURE:AUDITSC_SUCCESS ) extern int __init audit_register_class(int class, unsigned *list); extern int audit_classify_syscall(int abi, unsigned syscall); extern int audit_classify_arch(int arch); @@ -424,7 +421,7 @@ extern void audit_free(struct task_struct *task); extern void audit_syscall_entry(int arch, int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3); -extern void audit_syscall_exit(int failed, long return_code); +extern void __audit_syscall_exit(int ret_success, long ret_value); extern void __audit_getname(const char *name); extern void audit_putname(const char *name); extern void __audit_inode(const char *name, const struct dentry *dentry); @@ -438,6 +435,15 @@ static inline int audit_dummy_context(void) void *p = current->audit_context; return !p || *(int *)p; } +static inline void audit_syscall_exit(void *pt_regs) +{ + if (unlikely(current->audit_context)) { + int success = is_syscall_success(pt_regs); + int return_code = regs_return_value(pt_regs); + + __audit_syscall_exit(success, return_code); + } +} static inline void audit_getname(const char *name) { if (unlikely(!audit_dummy_context())) @@ -551,12 +557,12 @@ static inline void audit_mmap_fd(int fd, int flags) extern int audit_n_rules; extern int audit_signals; -#else +#else /* CONFIG_AUDITSYSCALL */ #define audit_finish_fork(t) #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0) -#define audit_syscall_exit(f,r) do { ; } while (0) +#define audit_syscall_exit(r) do { ; } while (0) #define audit_dummy_context() 1 #define audit_getname(n) do { ; } while (0) #define audit_putname(n) do { ; } while (0) @@ -587,7 +593,7 @@ extern int audit_signals; #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 -#endif +#endif /* CONFIG_AUDITSYSCALL */ #ifdef CONFIG_AUDIT /* These are defined in audit.c */ diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 800f113bea6..dd4cefa6519 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -112,6 +112,7 @@ #include /* For unlikely. */ #include /* For struct task_struct. */ +#include /* for IS_ERR_VALUE */ extern long arch_ptrace(struct task_struct *child, long request, @@ -265,6 +266,15 @@ static inline void ptrace_release_task(struct task_struct *task) #define force_successful_syscall_return() do { } while (0) #endif +#ifndef is_syscall_success +/* + * On most systems we can tell if a syscall is a success based on if the retval + * is an error value. On some systems like ia64 and powerpc they have different + * indicators of success/failure and must define their own. + */ +#define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs)))) +#endif + /* * should define the following things inside #ifdef __KERNEL__. * diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e9bcb93800d..3d285380818 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -70,6 +70,11 @@ #include "audit.h" +/* flags stating the success for a syscall */ +#define AUDITSC_INVALID 0 +#define AUDITSC_SUCCESS 1 +#define AUDITSC_FAILURE 2 + /* AUDIT_NAMES is the number of slots we reserve in the audit_context * for saving names from getname(). If we get more names we will allocate * a name dynamically and also add those to the list anchored by names_list. */ @@ -1724,8 +1729,7 @@ void audit_finish_fork(struct task_struct *child) /** * audit_syscall_exit - deallocate audit context after a system call - * @valid: success/failure flag - * @return_code: syscall return value + * @pt_regs: syscall registers * * Tear down after system call. If the audit context has been marked as * auditable (either because of the AUDIT_RECORD_CONTEXT state from @@ -1733,13 +1737,17 @@ void audit_finish_fork(struct task_struct *child) * message), then write out the syscall information. In call cases, * free the names stored from getname(). */ -void audit_syscall_exit(int valid, long return_code) +void __audit_syscall_exit(int success, long return_code) { struct task_struct *tsk = current; struct audit_context *context; - context = audit_get_context(tsk, valid, return_code); + if (success) + success = AUDITSC_SUCCESS; + else + success = AUDITSC_FAILURE; + context = audit_get_context(tsk, success, return_code); if (likely(!context)) return; -- cgit v1.2.3-70-g09d2 From b05d8447e7821695bc2fa3359431f7a664232743 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 3 Jan 2012 14:23:06 -0500 Subject: audit: inline audit_syscall_entry to reduce burden on archs Every arch calls: if (unlikely(current->audit_context)) audit_syscall_entry() which requires knowledge about audit (the existance of audit_context) in the arch code. Just do it all in static inline in audit.h so that arch's can remain blissfully ignorant. Signed-off-by: Eric Paris --- arch/ia64/kernel/ptrace.c | 9 +-------- arch/microblaze/kernel/ptrace.c | 6 ++---- arch/mips/kernel/ptrace.c | 7 +++---- arch/powerpc/kernel/ptrace.c | 26 ++++++++++++-------------- arch/s390/kernel/ptrace.c | 11 +++++------ arch/sh/kernel/ptrace_32.c | 7 +++---- arch/sh/kernel/ptrace_64.c | 7 +++---- arch/sparc/kernel/ptrace_64.c | 17 ++++++++--------- arch/um/kernel/ptrace.c | 20 +++++++++----------- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/entry_64.S | 4 ++-- arch/x86/kernel/ptrace.c | 22 ++++++++++------------ arch/xtensa/kernel/ptrace.c | 3 +-- include/linux/audit.h | 13 ++++++++++--- kernel/auditsc.c | 2 +- 16 files changed, 72 insertions(+), 86 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 2c154088cce..dad91661ddf 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1246,15 +1246,8 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, if (test_thread_flag(TIF_RESTORE_RSE)) ia64_sync_krbs(); - if (unlikely(current->audit_context)) { - long syscall; - int arch; - syscall = regs.r15; - arch = AUDIT_ARCH_IA64; - - audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3); - } + audit_syscall_entry(AUDIT_ARCH_IA64, regs.r15, arg0, arg1, arg2, arg3); return 0; } diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index f564b1bfd38..6eb2aa927d8 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -147,10 +147,8 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) */ ret = -1L; - if (unlikely(current->audit_context)) - audit_syscall_entry(EM_MICROBLAZE, regs->r12, - regs->r5, regs->r6, - regs->r7, regs->r8); + audit_syscall_entry(EM_MICROBLAZE, regs->r12, regs->r5, regs->r6, + regs->r7, regs->r8); return ret ?: regs->r12; } diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index ab0f1963a7b..7786b608d93 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -560,10 +560,9 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) } out: - if (unlikely(current->audit_context)) - audit_syscall_entry(audit_arch(), regs->regs[2], - regs->regs[4], regs->regs[5], - regs->regs[6], regs->regs[7]); + audit_syscall_entry(audit_arch(), regs->regs[2], + regs->regs[4], regs->regs[5], + regs->regs[6], regs->regs[7]); } /* diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 09d31c12a5e..5b43325402b 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1724,22 +1724,20 @@ long do_syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->gpr[0]); - if (unlikely(current->audit_context)) { #ifdef CONFIG_PPC64 - if (!is_32bit_task()) - audit_syscall_entry(AUDIT_ARCH_PPC64, - regs->gpr[0], - regs->gpr[3], regs->gpr[4], - regs->gpr[5], regs->gpr[6]); - else + if (!is_32bit_task()) + audit_syscall_entry(AUDIT_ARCH_PPC64, + regs->gpr[0], + regs->gpr[3], regs->gpr[4], + regs->gpr[5], regs->gpr[6]); + else #endif - audit_syscall_entry(AUDIT_ARCH_PPC, - regs->gpr[0], - regs->gpr[3] & 0xffffffff, - regs->gpr[4] & 0xffffffff, - regs->gpr[5] & 0xffffffff, - regs->gpr[6] & 0xffffffff); - } + audit_syscall_entry(AUDIT_ARCH_PPC, + regs->gpr[0], + regs->gpr[3] & 0xffffffff, + regs->gpr[4] & 0xffffffff, + regs->gpr[5] & 0xffffffff, + regs->gpr[6] & 0xffffffff); return ret ?: regs->gpr[0]; } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index f5275860098..9d82ed4bcb2 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -740,12 +740,11 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->gprs[2]); - if (unlikely(current->audit_context)) - audit_syscall_entry(is_compat_task() ? - AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, - regs->gprs[2], regs->orig_gpr2, - regs->gprs[3], regs->gprs[4], - regs->gprs[5]); + audit_syscall_entry(is_compat_task() ? + AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, + regs->gprs[2], regs->orig_gpr2, + regs->gprs[3], regs->gprs[4], + regs->gprs[5]); return ret ?: regs->gprs[2]; } diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index c0b5c179d27..a3e65156376 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -518,10 +518,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->regs[0]); - if (unlikely(current->audit_context)) - audit_syscall_entry(audit_arch(), regs->regs[3], - regs->regs[4], regs->regs[5], - regs->regs[6], regs->regs[7]); + audit_syscall_entry(audit_arch(), regs->regs[3], + regs->regs[4], regs->regs[5], + regs->regs[6], regs->regs[7]); return ret ?: regs->regs[0]; } diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index ba720d68643..3d0080b5c97 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -536,10 +536,9 @@ asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->regs[9]); - if (unlikely(current->audit_context)) - audit_syscall_entry(audit_arch(), regs->regs[1], - regs->regs[2], regs->regs[3], - regs->regs[4], regs->regs[5]); + audit_syscall_entry(audit_arch(), regs->regs[1], + regs->regs[2], regs->regs[3], + regs->regs[4], regs->regs[5]); return ret ?: regs->regs[9]; } diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index c73c8c50f11..9388844cd88 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -1071,15 +1071,14 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->u_regs[UREG_G1]); - if (unlikely(current->audit_context) && !ret) - audit_syscall_entry((test_thread_flag(TIF_32BIT) ? - AUDIT_ARCH_SPARC : - AUDIT_ARCH_SPARC64), - regs->u_regs[UREG_G1], - regs->u_regs[UREG_I0], - regs->u_regs[UREG_I1], - regs->u_regs[UREG_I2], - regs->u_regs[UREG_I3]); + audit_syscall_entry((test_thread_flag(TIF_32BIT) ? + AUDIT_ARCH_SPARC : + AUDIT_ARCH_SPARC64), + regs->u_regs[UREG_G1], + regs->u_regs[UREG_I0], + regs->u_regs[UREG_I1], + regs->u_regs[UREG_I2], + regs->u_regs[UREG_I3]); return ret; } diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 2ccf25c42fe..06b19039050 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -167,17 +167,15 @@ void syscall_trace(struct uml_pt_regs *regs, int entryexit) int is_singlestep = (current->ptrace & PT_DTRACE) && entryexit; int tracesysgood; - if (unlikely(current->audit_context)) { - if (!entryexit) - audit_syscall_entry(HOST_AUDIT_ARCH, - UPT_SYSCALL_NR(regs), - UPT_SYSCALL_ARG1(regs), - UPT_SYSCALL_ARG2(regs), - UPT_SYSCALL_ARG3(regs), - UPT_SYSCALL_ARG4(regs)); - else - audit_syscall_exit(regs); - } + if (!entryexit) + audit_syscall_entry(HOST_AUDIT_ARCH, + UPT_SYSCALL_NR(regs), + UPT_SYSCALL_ARG1(regs), + UPT_SYSCALL_ARG2(regs), + UPT_SYSCALL_ARG3(regs), + UPT_SYSCALL_ARG4(regs)); + else + audit_syscall_exit(regs); /* Fake a debug trap */ if (is_singlestep) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 025f0f01d25..cecfd9a8f73 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -192,7 +192,7 @@ sysexit_from_sys_call: movl %ebx,%edx /* 3rd arg: 1st syscall arg */ movl %eax,%esi /* 2nd arg: syscall number */ movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ - call audit_syscall_entry + call __audit_syscall_entry movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a22facf06f0..1ccd742eba1 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -456,7 +456,7 @@ sysenter_audit: movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ movl %eax,%edx /* 2nd arg: syscall number */ movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ - call audit_syscall_entry + call __audit_syscall_entry pushl_cfi %ebx movl PT_EAX(%esp),%eax /* reload syscall number */ jmp sysenter_do_call diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e51393dd93a..1ca66b65012 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -549,7 +549,7 @@ badsys: #ifdef CONFIG_AUDITSYSCALL /* * Fast path for syscall audit without full syscall trace. - * We just call audit_syscall_entry() directly, and then + * We just call __audit_syscall_entry() directly, and then * jump back to the normal fast path. */ auditsys: @@ -559,7 +559,7 @@ auditsys: movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ movq %rax,%rsi /* 2nd arg: syscall number */ movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ - call audit_syscall_entry + call __audit_syscall_entry LOAD_ARGS 0 /* reload call-clobbered registers */ jmp system_call_fastpath diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 8b021875877..50267386b76 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1392,20 +1392,18 @@ long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->orig_ax); - if (unlikely(current->audit_context)) { - if (IS_IA32) - audit_syscall_entry(AUDIT_ARCH_I386, - regs->orig_ax, - regs->bx, regs->cx, - regs->dx, regs->si); + if (IS_IA32) + audit_syscall_entry(AUDIT_ARCH_I386, + regs->orig_ax, + regs->bx, regs->cx, + regs->dx, regs->si); #ifdef CONFIG_X86_64 - else - audit_syscall_entry(AUDIT_ARCH_X86_64, - regs->orig_ax, - regs->di, regs->si, - regs->dx, regs->r10); + else + audit_syscall_entry(AUDIT_ARCH_X86_64, + regs->orig_ax, + regs->di, regs->si, + regs->dx, regs->r10); #endif - } return ret ?: regs->orig_ax; } diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index a0d042aa296..2dff698ab02 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -334,8 +334,7 @@ void do_syscall_trace_enter(struct pt_regs *regs) do_syscall_trace(); #if 0 - if (unlikely(current->audit_context)) - audit_syscall_entry(current, AUDIT_ARCH_XTENSA..); + audit_syscall_entry(current, AUDIT_ARCH_XTENSA..); #endif } diff --git a/include/linux/audit.h b/include/linux/audit.h index 3d65e4b3ba0..f56ce2669b8 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -418,9 +418,9 @@ extern int audit_classify_arch(int arch); extern void audit_finish_fork(struct task_struct *child); extern int audit_alloc(struct task_struct *task); extern void audit_free(struct task_struct *task); -extern void audit_syscall_entry(int arch, - int major, unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3); +extern void __audit_syscall_entry(int arch, + int major, unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3); extern void __audit_syscall_exit(int ret_success, long ret_value); extern void __audit_getname(const char *name); extern void audit_putname(const char *name); @@ -435,6 +435,13 @@ static inline int audit_dummy_context(void) void *p = current->audit_context; return !p || *(int *)p; } +static inline void audit_syscall_entry(int arch, int major, unsigned long a0, + unsigned long a1, unsigned long a2, + unsigned long a3) +{ + if (unlikely(!audit_dummy_context())) + __audit_syscall_entry(arch, major, a0, a1, a2, a3); +} static inline void audit_syscall_exit(void *pt_regs) { if (unlikely(current->audit_context)) { diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3d285380818..b408100dd6e 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1632,7 +1632,7 @@ void audit_free(struct task_struct *tsk) * will only be written if another part of the kernel requests that it * be written). */ -void audit_syscall_entry(int arch, int major, +void __audit_syscall_entry(int arch, int major, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4) { -- cgit v1.2.3-70-g09d2 From 6015ff103133c7e50a753c198c69bcabc3a5e3b0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Jan 2012 01:51:22 +0000 Subject: x86-32: Fix build failure with AUDIT=y, AUDITSYSCALL=n JONGMAN HEO reports: With current linus git (commit a25a2b84), I got following build error, arch/x86/kernel/vm86_32.c: In function 'do_sys_vm86': arch/x86/kernel/vm86_32.c:340: error: implicit declaration of function '__audit_syscall_exit' make[3]: *** [arch/x86/kernel/vm86_32.o] Error 1 OK, I can reproduce it (32bit allmodconfig with AUDIT=y, AUDITSYSCALL=n) It's due to commit d7e7528bcd45: "Audit: push audit success and retcode into arch ptrace.h". Reported-by: JONGMAN HEO Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/kernel/vm86_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index af17e1c966d..b466cab5ba1 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -336,8 +336,10 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk mark_screen_rdonly(tsk->mm); /*call __audit_syscall_exit since we do not exit via the normal paths */ +#ifdef CONFIG_AUDITSYSCALL if (unlikely(current->audit_context)) __audit_syscall_exit(1, 0); +#endif __asm__ __volatile__( "movl %0,%%esp\n\t" -- cgit v1.2.3-70-g09d2