From b845b517b5e3706a3729f6ea83b88ab85f0725b0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Aug 2008 21:47:09 +0200 Subject: printk: robustify printk Avoid deadlocks against rq->lock and xtime_lock by deferring the klogd wakeup by polling from the timer tick. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/printk.c | 19 +++++++++++++++++-- kernel/time/tick-sched.c | 2 +- kernel/timer.c | 1 + 3 files changed, 19 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index b51b1567bb5..655cc2ca10c 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -982,10 +982,25 @@ int is_console_locked(void) return console_locked; } -void wake_up_klogd(void) +static DEFINE_PER_CPU(int, printk_pending); + +void printk_tick(void) { - if (!oops_in_progress && waitqueue_active(&log_wait)) + if (__get_cpu_var(printk_pending)) { + __get_cpu_var(printk_pending) = 0; wake_up_interruptible(&log_wait); + } +} + +int printk_needs_cpu(int cpu) +{ + return per_cpu(printk_pending, cpu); +} + +void wake_up_klogd(void) +{ + if (waitqueue_active(&log_wait)) + __get_cpu_var(printk_pending) = 1; } /** diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 825b4c00fe4..c13d4f18237 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -255,7 +255,7 @@ void tick_nohz_stop_sched_tick(int inidle) next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; - if (rcu_needs_cpu(cpu)) + if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu)) delta_jiffies = 1; /* * Do not stop the tick, if we are only one off diff --git a/kernel/timer.c b/kernel/timer.c index 03bc7f1f159..510fe69351c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -978,6 +978,7 @@ void update_process_times(int user_tick) run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); + printk_tick(); scheduler_tick(); run_posix_cpu_timers(p); } -- cgit v1.2.3-70-g09d2 From fa33507a22623b3bd543b15a21c362cf364b6cff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 Aug 2008 09:31:26 +0200 Subject: printk: robustify printk, fix #2 Dmitry Adamushko reported: > [*] btw., with DEBUG being enabled, pr_debug() generates [1] when > debug_smp_processor_id() is used (CONFIG_DEBUG_PREEMPT). > > the problem seems to be caused by the following commit: > commit b845b517b5e3706a3729f6ea83b88ab85f0725b0 > Author: Peter Zijlstra > Date: Fri Aug 8 21:47:09 2008 +0200 > > printk: robustify printk > > > wake_up_klogd() -> __get_cpu_var() -> smp_processor_id() > > and that's being called from release_console_sem() which is, in turn, > said to be "may be called from any context" [2] > > and in this case, it seems to be called from some non-preemptible > context (although, it can't be printk()... > although, I haven't looked carefully yet). > > Provided [2], __get_cpu_var() is perhaps not the right solution there. > > > [1] > > [ 7697.942005] BUG: using smp_processor_id() in preemptible [00000000] code: syslogd/3542 > [ 7697.942005] caller is wake_up_klogd+0x1b/0x50 > [ 7697.942005] Pid: 3542, comm: syslogd Not tainted 2.6.27-rc3-tip-git #2 > [ 7697.942005] Call Trace: > [ 7697.942005] [] debug_smp_processor_id+0xe8/0xf0 > [ 7697.942005] [] wake_up_klogd+0x1b/0x50 > [ 7697.942005] [] release_console_sem+0x1e7/0x200 > [ 7697.942005] [] do_con_write+0xb7/0x1f30 > [ 7697.942005] [] ? show_trace+0x10/0x20 > [ 7697.942005] [] ? dump_stack+0x72/0x80 > [ 7697.942005] [] ? __ratelimit+0xbd/0xe0 > [ 7697.942005] [] ? debug_smp_processor_id+0xe8/0xf0 > [ 7697.942005] [] ? wake_up_klogd+0x1b/0x50 > [ 7697.942005] [] ? release_console_sem+0x1e7/0x200 > [ 7697.942005] [] con_write+0x19/0x30 > [ 7697.942005] [] write_chan+0x276/0x3c0 > [ 7697.942005] [] ? default_wake_function+0x0/0x10 > [ 7697.942005] [] ? _spin_lock_irqsave+0x22/0x50 > [ 7697.942005] [] tty_write+0x194/0x260 > [ 7697.942005] [] ? write_chan+0x0/0x3c0 > [ 7697.942005] [] redirected_tty_write+0xa4/0xb0 > [ 7697.942005] [] ? redirected_tty_write+0x0/0xb0 > [ 7697.942005] [] do_loop_readv_writev+0x52/0x80 > [ 7697.942005] [] do_readv_writev+0x1bd/0x1d0 > [ 7697.942005] [] vfs_writev+0x39/0x60 > [ 7697.942005] [] sys_writev+0x50/0x90 > [ 7697.942005] [] system_call_fastpath+0x16/0x1b Signed-off-by: Peter Zijlstra Reported-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- kernel/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 655cc2ca10c..57e9cd7a958 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1000,7 +1000,7 @@ int printk_needs_cpu(int cpu) void wake_up_klogd(void) { if (waitqueue_active(&log_wait)) - __get_cpu_var(printk_pending) = 1; + __raw_get_cpu_var(printk_pending) = 1; } /** -- cgit v1.2.3-70-g09d2 From 1fa63a817d27af7dc0d5ed454eb8fe5dec65fac7 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 20 Aug 2008 14:40:55 +0200 Subject: printk: robustify printk, update comment Remove the comment describing the possibility of printk() deadlocking on runqueue lock. Signed-off-by: Jiri Kosina Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/printk.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 57e9cd7a958..6f27c6a4bdc 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -577,9 +577,6 @@ static int have_callable_console(void) * @fmt: format string * * This is printk(). It can be called from any context. We want it to work. - * Be aware of the fact that if oops_in_progress is not set, we might try to - * wake klogd up which could deadlock on runqueue lock if printk() is called - * from scheduler code. * * We try to grab the console_sem. If we succeed, it's easy - we log the output and * call the console drivers. If we fail to get the semaphore we place the output -- cgit v1.2.3-70-g09d2 From 1cf44baad76b6f20f95ece397c6f643320aa44c9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 4 Sep 2008 21:26:06 +0200 Subject: IO resources: fix/remove printk Andrew Morton noticed that the printk in kernel/resource.c was buggy: | start and end have type resource_size_t. Such types CANNOT be printed | unless cast to a known type. | | Because there is a %s following an incorrect %lld, the above code will | crash the machine. ... and it's probably quite unneeded as well, so remove it. Reported-by: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/resource.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 414d6fc9131..fc59dcc4795 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -549,13 +549,9 @@ static void __init __reserve_region_with_split(struct resource *root, } if (!res) { - printk(KERN_DEBUG " __reserve_region_with_split: (%s) [%llx, %llx], res: (%s) [%llx, %llx]\n", - conflict->name, conflict->start, conflict->end, - name, start, end); - /* failed, split and try again */ - /* conflict coverred whole area */ + /* conflict covered whole area */ if (conflict->start <= start && conflict->end >= end) return; -- cgit v1.2.3-70-g09d2 From 978b0116cd225682a29e3d1d5010319bf2de32c2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 6 Sep 2008 20:04:36 +0200 Subject: softirq: allocate less vectors We don't need whole 32 of them, only NR_SOFTIRQS. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 2 ++ kernel/softirq.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 58ff4e74b2f..54b3623434e 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -252,6 +252,8 @@ enum HRTIMER_SOFTIRQ, #endif RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ + + NR_SOFTIRQS }; /* softirq mask and active fields moved to irq_cpustat_t in diff --git a/kernel/softirq.c b/kernel/softirq.c index c506f266a6b..82e32aadedd 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -46,7 +46,7 @@ irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned; EXPORT_SYMBOL(irq_stat); #endif -static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp; +static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); -- cgit v1.2.3-70-g09d2 From 379daf6290814e41f14880094b7b773640df2461 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 25 Sep 2008 18:43:34 -0700 Subject: IO resources, x86: ioremap sanity check to catch mapping requests exceeding the BAR sizes Go through the iomem resource tree to check if any of the ioremap() requests span more than any slot in the iomem resource tree and do a WARN_ON() if we hit this check. This will raise a red-flag, if some driver is mapping more than what is needed. And hopefully identify possible corruptions much earlier. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 6 ++++++ include/linux/ioport.h | 1 + kernel/resource.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) (limited to 'kernel') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index d4b6e6a29ae..c818b45bd07 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -149,6 +149,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, if (is_ISA_range(phys_addr, last_addr)) return (__force void __iomem *)phys_to_virt(phys_addr); + /* + * Check if the request spans more than any BAR in the iomem resource + * tree. + */ + WARN_ON(iomem_map_sanity_check(phys_addr, size)); + /* * Don't allow anybody to remap normal RAM that we're using.. */ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index fded376b94e..01712cf1a38 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -169,6 +169,7 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); +extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/kernel/resource.c b/kernel/resource.c index fc59dcc4795..1d003a50ee1 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -827,3 +827,36 @@ static int __init reserve_setup(char *str) } __setup("reserve=", reserve_setup); + +/* + * Check if the requested addr and size spans more than any slot in the + * iomem resource tree. + */ +int iomem_map_sanity_check(resource_size_t addr, unsigned long size) +{ + struct resource *p = &iomem_resource; + int err = 0; + loff_t l; + + read_lock(&resource_lock); + for (p = p->child; p ; p = r_next(NULL, p, &l)) { + /* + * We can probably skip the resources without + * IORESOURCE_IO attribute? + */ + if (p->start >= addr + size) + continue; + if (p->end < addr) + continue; + if (p->start <= addr && (p->end >= addr + size - 1)) + continue; + printk(KERN_WARNING "resource map sanity check conflict: " + "0x%llx 0x%llx 0x%llx 0x%llx %s\n", + addr, addr + size - 1, p->start, p->end, p->name); + err = -1; + break; + } + read_unlock(&resource_lock); + + return err; +} -- cgit v1.2.3-70-g09d2 From 13eb83754b40bf01dc84e52a08d4196d1b719a0e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 26 Sep 2008 10:10:12 +0200 Subject: IO resources, x86: ioremap sanity check to catch mapping requests exceeding, fix fix this build error: kernel/resource.c: In function 'iomem_map_sanity_check': kernel/resource.c:842: error: implicit declaration of function 'r_next' kernel/resource.c:842: warning: assignment makes pointer from integer without a cast r_next() was only available if CONFIG_PROCFS was enabled. and fix this build warning: kernel/resource.c:855: warning: format '%llx' expects type 'long long unsigned int', but argument 2 has type 'resource_size_t' kernel/resource.c:855: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'long unsigned int' kernel/resource.c:855: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'resource_size_t' kernel/resource.c:855: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'resource_size_t' resource_t can be 32 bits. Signed-off-by: Ingo Molnar --- kernel/resource.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 1d003a50ee1..7797dae85b5 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -38,10 +38,6 @@ EXPORT_SYMBOL(iomem_resource); static DEFINE_RWLOCK(resource_lock); -#ifdef CONFIG_PROC_FS - -enum { MAX_IORES_LEVEL = 5 }; - static void *r_next(struct seq_file *m, void *v, loff_t *pos) { struct resource *p = v; @@ -53,6 +49,10 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos) return p->sibling; } +#ifdef CONFIG_PROC_FS + +enum { MAX_IORES_LEVEL = 5 }; + static void *r_start(struct seq_file *m, loff_t *pos) __acquires(resource_lock) { @@ -852,7 +852,11 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size) continue; printk(KERN_WARNING "resource map sanity check conflict: " "0x%llx 0x%llx 0x%llx 0x%llx %s\n", - addr, addr + size - 1, p->start, p->end, p->name); + (unsigned long long)addr, + (unsigned long long)(addr + size - 1), + (unsigned long long)p->start, + (unsigned long long)p->end, + p->name); err = -1; break; } -- cgit v1.2.3-70-g09d2 From bfcd17a6c5529bc37234cfa720a047cf9397bcfc Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 6 Aug 2008 15:12:22 +0200 Subject: Configure out file locking features This patch adds the CONFIG_FILE_LOCKING option which allows to remove support for advisory locks. With this patch enabled, the flock() system call, the F_GETLK, F_SETLK and F_SETLKW operations of fcntl() and NFS support are disabled. These features are not necessarly needed on embedded systems. It allows to save ~11 Kb of kernel code and data: text data bss dec hex filename 1125436 118764 212992 1457192 163c28 vmlinux.old 1114299 118564 212992 1445855 160fdf vmlinux -11137 -200 0 -11337 -2C49 +/- This patch has originally been written by Matt Mackall , and is part of the Linux Tiny project. Signed-off-by: Thomas Petazzoni Signed-off-by: Matt Mackall Cc: matthew@wil.cx Cc: linux-fsdevel@vger.kernel.org Cc: mpm@selenic.com Cc: akpm@linux-foundation.org Signed-off-by: J. Bruce Fields --- fs/Kconfig | 8 ++++++++ fs/Makefile | 3 ++- fs/proc/proc_misc.c | 4 ++++ include/linux/fs.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++------- kernel/sys_ni.c | 1 + kernel/sysctl.c | 6 +++++- 6 files changed, 70 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/fs/Kconfig b/fs/Kconfig index abccb5dab9a..c6ae4d4842e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -419,6 +419,14 @@ config FS_POSIX_ACL bool default n +config FILE_LOCKING + bool "Enable POSIX file locking API" if EMBEDDED + default y + help + This option enables standard file locking support, required + for filesystems like NFS and for the flock() system + call. Disabling this option saves about 11k. + source "fs/xfs/Kconfig" source "fs/gfs2/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index a1482a5eff1..4b86d433baa 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -7,7 +7,7 @@ obj-y := open.o read_write.o file_table.o super.o \ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ - ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ + ioctl.o readdir.o select.o fifo.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ @@ -27,6 +27,7 @@ obj-$(CONFIG_ANON_INODES) += anon_inodes.o obj-$(CONFIG_SIGNALFD) += signalfd.o obj-$(CONFIG_TIMERFD) += timerfd.o obj-$(CONFIG_EVENTFD) += eventfd.o +obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o nfsd-$(CONFIG_NFSD) := nfsctl.o diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 29e20c6b1f7..1aabbe2592e 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -684,6 +684,7 @@ static int cmdline_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } +#ifdef CONFIG_FILE_LOCKING static int locks_open(struct inode *inode, struct file *filp) { return seq_open(filp, &locks_seq_operations); @@ -695,6 +696,7 @@ static const struct file_operations proc_locks_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif /* CONFIG_FILE_LOCKING */ static int execdomains_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -888,7 +890,9 @@ void __init proc_misc_init(void) #ifdef CONFIG_PRINTK proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); #endif +#ifdef CONFIG_FILE_LOCKING proc_create("locks", 0, NULL, &proc_locks_operations); +#endif proc_create("devices", 0, NULL, &proc_devinfo_operations); proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK diff --git a/include/linux/fs.h b/include/linux/fs.h index 580b513668f..9f540165a07 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -983,6 +983,13 @@ struct file_lock { #include +extern void send_sigio(struct fown_struct *fown, int fd, int band); + +/* fs/sync.c */ +extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, + loff_t endbyte, unsigned int flags); + +#ifdef CONFIG_FILE_LOCKING extern int fcntl_getlk(struct file *, struct flock __user *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, struct flock __user *); @@ -993,14 +1000,9 @@ extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, struct flock64 __user *); #endif -extern void send_sigio(struct fown_struct *fown, int fd, int band); extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); extern int fcntl_getlease(struct file *filp); -/* fs/sync.c */ -extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, - loff_t endbyte, unsigned int flags); - /* fs/locks.c */ extern void locks_init_lock(struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *); @@ -1023,6 +1025,37 @@ extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); extern struct seq_operations locks_seq_operations; +#else /* !CONFIG_FILE_LOCKING */ +#define fcntl_getlk(a, b) ({ -EINVAL; }) +#define fcntl_setlk(a, b, c, d) ({ -EACCES; }) +#if BITS_PER_LONG == 32 +#define fcntl_getlk64(a, b) ({ -EINVAL; }) +#define fcntl_setlk64(a, b, c, d) ({ -EACCES; }) +#endif +#define fcntl_setlease(a, b, c) ({ 0; }) +#define fcntl_getlease(a) ({ 0; }) +#define locks_init_lock(a) ({ }) +#define __locks_copy_lock(a, b) ({ }) +#define locks_copy_lock(a, b) ({ }) +#define locks_remove_posix(a, b) ({ }) +#define locks_remove_flock(a) ({ }) +#define posix_test_lock(a, b) ({ 0; }) +#define posix_lock_file(a, b, c) ({ -ENOLCK; }) +#define posix_lock_file_wait(a, b) ({ -ENOLCK; }) +#define posix_unblock_lock(a, b) (-ENOENT) +#define vfs_test_lock(a, b) ({ 0; }) +#define vfs_lock_file(a, b, c, d) (-ENOLCK) +#define vfs_cancel_lock(a, b) ({ 0; }) +#define flock_lock_file_wait(a, b) ({ -ENOLCK; }) +#define __break_lease(a, b) ({ 0; }) +#define lease_get_mtime(a, b) ({ }) +#define generic_setlease(a, b, c) ({ -EINVAL; }) +#define vfs_setlease(a, b, c) ({ -EINVAL; }) +#define lease_modify(a, b) ({ -EINVAL; }) +#define lock_may_read(a, b, c) ({ 1; }) +#define lock_may_write(a, b, c) ({ 1; }) +#endif /* !CONFIG_FILE_LOCKING */ + struct fasync_struct { int magic; @@ -1554,9 +1587,12 @@ extern int vfs_statfs(struct dentry *, struct kstatfs *); /* /sys/fs */ extern struct kobject *fs_kobj; +extern int rw_verify_area(int, struct file *, loff_t *, size_t); + #define FLOCK_VERIFY_READ 1 #define FLOCK_VERIFY_WRITE 2 +#ifdef CONFIG_FILE_LOCKING extern int locks_mandatory_locked(struct inode *); extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); @@ -1587,8 +1623,6 @@ static inline int locks_verify_locked(struct inode *inode) return 0; } -extern int rw_verify_area(int, struct file *, loff_t *, size_t); - static inline int locks_verify_truncate(struct inode *inode, struct file *filp, loff_t size) @@ -1609,6 +1643,15 @@ static inline int break_lease(struct inode *inode, unsigned int mode) return __break_lease(inode, mode); return 0; } +#else /* !CONFIG_FILE_LOCKING */ +#define locks_mandatory_locked(a) ({ 0; }) +#define locks_mandatory_area(a, b, c, d, e) ({ 0; }) +#define __mandatory_lock(a) ({ 0; }) +#define mandatory_lock(a) ({ 0; }) +#define locks_verify_locked(a) ({ 0; }) +#define locks_verify_truncate(a, b, c) ({ 0; }) +#define break_lease(a, b) ({ 0; }) +#endif /* CONFIG_FILE_LOCKING */ /* fs/open.c */ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 08d6e1bb99a..503d8d4eb80 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -125,6 +125,7 @@ cond_syscall(sys_vm86old); cond_syscall(sys_vm86); cond_syscall(compat_sys_ipc); cond_syscall(compat_sys_sysctl); +cond_syscall(sys_flock); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 50ec0886fa3..4588b2cf2ec 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -97,7 +97,7 @@ static int sixty = 60; static int neg_one = -1; #endif -#ifdef CONFIG_MMU +#if defined(CONFIG_MMU) && defined(CONFIG_FILE_LOCKING) static int two = 2; #endif @@ -1261,6 +1261,7 @@ static struct ctl_table fs_table[] = { .extra1 = &minolduid, .extra2 = &maxolduid, }, +#ifdef CONFIG_FILE_LOCKING { .ctl_name = FS_LEASES, .procname = "leases-enable", @@ -1269,6 +1270,7 @@ static struct ctl_table fs_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif #ifdef CONFIG_DNOTIFY { .ctl_name = FS_DIR_NOTIFY, @@ -1280,6 +1282,7 @@ static struct ctl_table fs_table[] = { }, #endif #ifdef CONFIG_MMU +#ifdef CONFIG_FILE_LOCKING { .ctl_name = FS_LEASE_TIME, .procname = "lease-break-time", @@ -1291,6 +1294,7 @@ static struct ctl_table fs_table[] = { .extra1 = &zero, .extra2 = &two, }, +#endif { .procname = "aio-nr", .data = &aio_nr, -- cgit v1.2.3-70-g09d2 From 8e85b4b553fc932e1c5141feb5fda389b7f5db01 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 2 Oct 2008 10:50:53 +0200 Subject: softirqs, debug: preemption check if a preempt count leaks out of a softirq handler it can be very hard to figure it out. Add a debug check for this. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/softirq.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'kernel') diff --git a/kernel/softirq.c b/kernel/softirq.c index 82e32aadedd..1cf1e2f2c40 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -205,7 +205,18 @@ restart: do { if (pending & 1) { + int prev_count = preempt_count(); + h->action(h); + + if (unlikely(prev_count != preempt_count())) { + printk(KERN_ERR "huh, entered sotfirq %ld %p" + "with preempt_count %08x," + " exited with %08x?\n", h - softirq_vec, + h->action, prev_count, preempt_count()); + preempt_count() = prev_count; + } + rcu_bh_qsctr_inc(cpu); } h++; -- cgit v1.2.3-70-g09d2 From 77af7e3403e7314c47b0c07fbc5e4ef21d939532 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 3 Oct 2008 11:39:46 +0200 Subject: softirq, warning fix: correct a format to avoid a warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Last -tip gives this warning: kernel/softirq.c: Dans la fonction «__do_softirq» : kernel/softirq.c:216: attention : format «%ld» expects type «long int», but argument 2 has type «int» This patch corrects the format type, and a small mistake in the "softirq" word. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/softirq.c b/kernel/softirq.c index 1cf1e2f2c40..be7a8292f99 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -210,7 +210,7 @@ restart: h->action(h); if (unlikely(prev_count != preempt_count())) { - printk(KERN_ERR "huh, entered sotfirq %ld %p" + printk(KERN_ERR "huh, entered softirq %d %p" "with preempt_count %08x," " exited with %08x?\n", h - softirq_vec, h->action, prev_count, preempt_count()); -- cgit v1.2.3-70-g09d2 From a6bebbc87a8c16eabb6bd5c6fd2d994be0236fba Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 5 Oct 2008 00:51:15 +0400 Subject: [PATCH] signal, procfs: some lock_task_sighand() users do not need rcu_read_lock() lock_task_sighand() make sure task->sighand is being protected, so we do not need rcu_read_lock(). [ exec() will get task->sighand->siglock before change task->sighand! ] But code using rcu_read_lock() _just_ to protect lock_task_sighand() only appear in procfs. (and some code in procfs use lock_task_sighand() without such redundant protection.) Other subsystem may put lock_task_sighand() into rcu_read_lock() critical region, but these rcu_read_lock() are used for protecting "for_each_process()", "find_task_by_vpid()" etc. , not for protecting lock_task_sighand(). Signed-off-by: Lai Jiangshan [ok from Oleg] Signed-off-by: Alexey Dobriyan --- fs/proc/array.c | 2 -- fs/proc/base.c | 9 +-------- kernel/sched_debug.c | 2 -- 3 files changed, 1 insertion(+), 12 deletions(-) (limited to 'kernel') diff --git a/fs/proc/array.c b/fs/proc/array.c index 71c9be59c9c..1c8d7b5d7a1 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -261,7 +261,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) sigemptyset(&ignored); sigemptyset(&caught); - rcu_read_lock(); if (lock_task_sighand(p, &flags)) { pending = p->pending.signal; shpending = p->signal->shared_pending.signal; @@ -272,7 +271,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; unlock_task_sighand(p, &flags); } - rcu_read_unlock(); seq_printf(m, "Threads:\t%d\n", num_threads); seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); diff --git a/fs/proc/base.c b/fs/proc/base.c index a28840b11b8..bb63fa1d34a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -164,7 +164,6 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) static int get_nr_threads(struct task_struct *tsk) { - /* Must be called with the rcu_read_lock held */ unsigned long flags; int count = 0; @@ -471,14 +470,10 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) struct rlimit rlim[RLIM_NLIMITS]; - rcu_read_lock(); - if (!lock_task_sighand(task,&flags)) { - rcu_read_unlock(); + if (!lock_task_sighand(task, &flags)) return 0; - } memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); unlock_task_sighand(task, &flags); - rcu_read_unlock(); /* * print the file header @@ -3088,9 +3083,7 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct generic_fillattr(inode, stat); if (p) { - rcu_read_lock(); stat->nlink += get_nr_threads(p); - rcu_read_unlock(); put_task_struct(p); } diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index bbe6b31c3c5..ad958c1ec70 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -333,12 +333,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) unsigned long flags; int num_threads = 1; - rcu_read_lock(); if (lock_task_sighand(p, &flags)) { num_threads = atomic_read(&p->signal->count); unlock_task_sighand(p, &flags); } - rcu_read_unlock(); SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads); SEQ_printf(m, -- cgit v1.2.3-70-g09d2 From 3bbfe0596746e1590888a6e1e6a07583265238b7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 10 Oct 2008 03:27:16 +0400 Subject: proc: remove kernel.maps_protect After commit 831830b5a2b5d413407adf380ef62fe17d6fcbf2 aka "restrict reading from /proc//maps to those who share ->mm or can ptrace" sysctl stopped being relevant because commit moved security checks from ->show time to ->start time (mm_for_maps()). Signed-off-by: Alexey Dobriyan Acked-by: Kees Cook --- Documentation/filesystems/proc.txt | 7 ------- fs/proc/base.c | 3 --- fs/proc/internal.h | 2 -- fs/proc/task_mmu.c | 16 +--------------- fs/proc/task_nommu.c | 5 ----- kernel/sysctl.c | 11 ----------- 6 files changed, 1 insertion(+), 43 deletions(-) (limited to 'kernel') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index f566ad9bcb7..63ed861d5ca 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1332,13 +1332,6 @@ determine whether or not they are still functioning properly. Because the NMI watchdog shares registers with oprofile, by disabling the NMI watchdog, oprofile may have more registers to utilize. -maps_protect ------------- - -Enables/Disables the protection of the per-process proc entries "maps" and -"smaps". When enabled, the contents of these files are visible only to -readers that are allowed to ptrace() the given process. - msgmni ------ diff --git a/fs/proc/base.c b/fs/proc/base.c index c1332dd2575..b5918ae8ca7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -148,9 +148,6 @@ static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, return count; } -int maps_protect; -EXPORT_SYMBOL(maps_protect); - static struct fs_struct *get_fs_struct(struct task_struct *task) { struct fs_struct *fs; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 442202314d5..3bfb7b8747b 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -45,8 +45,6 @@ do { \ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); #endif -extern int maps_protect; - extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 73d1891ee62..4806830ea2a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -210,9 +210,6 @@ static int show_map(struct seq_file *m, void *v) dev_t dev = 0; int len; - if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) - return -EACCES; - if (file) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; dev = inode->i_sb->s_dev; @@ -742,22 +739,11 @@ const struct file_operations proc_pagemap_operations = { #ifdef CONFIG_NUMA extern int show_numa_map(struct seq_file *m, void *v); -static int show_numa_map_checked(struct seq_file *m, void *v) -{ - struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; - - if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) - return -EACCES; - - return show_numa_map(m, v); -} - static const struct seq_operations proc_pid_numa_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, - .show = show_numa_map_checked + .show = show_numa_map, }; static int numa_maps_open(struct inode *inode, struct file *file) diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 5d84e7121df..219bd79ea89 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -110,11 +110,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, static int show_map(struct seq_file *m, void *_vml) { struct vm_list_struct *vml = _vml; - struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; - - if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) - return -EACCES; return nommu_vma_show(m, vml->vma); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 50ec0886fa3..cc3e0d7a5ac 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -80,7 +80,6 @@ extern int pid_max_min, pid_max_max; extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; extern int compat_log; -extern int maps_protect; extern int latencytop_enabled; extern int sysctl_nr_open_min, sysctl_nr_open_max; #ifdef CONFIG_RCU_TORTURE_TEST @@ -809,16 +808,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, -#endif -#ifdef CONFIG_PROC_FS - { - .ctl_name = CTL_UNNUMBERED, - .procname = "maps_protect", - .data = &maps_protect, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, #endif { .ctl_name = CTL_UNNUMBERED, -- cgit v1.2.3-70-g09d2 From 5b7dba4ff834259a5623e03a565748704a8fe449 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Thu, 9 Oct 2008 13:21:30 -0500 Subject: sched_clock: prevent scd->clock from moving backwards When sched_clock_cpu() couples the clocks between two cpus, it may increment scd->clock beyond the GTOD tick window that __update_sched_clock() uses to clamp the clock. A later call to __update_sched_clock() may move the clock back to scd->tick_gtod + TICK_NSEC, violating the clock's monotonic property. This patch ensures that scd->clock will not be set backward. Signed-off-by: Dave Kleikamp Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_clock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index e8ab096ddfe..81787248b60 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -118,13 +118,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) /* * scd->clock = clamp(scd->tick_gtod + delta, - * max(scd->tick_gtod, scd->clock), - * scd->tick_gtod + TICK_NSEC); + * max(scd->tick_gtod, scd->clock), + * max(scd->clock, scd->tick_gtod + TICK_NSEC)); */ clock = scd->tick_gtod + delta; min_clock = wrap_max(scd->tick_gtod, scd->clock); - max_clock = scd->tick_gtod + TICK_NSEC; + max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC); clock = wrap_max(clock, min_clock); clock = wrap_min(clock, max_clock); -- cgit v1.2.3-70-g09d2 From 061b1bd394ca8628b7c24eb4658ba3535da4249a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 24 Sep 2008 14:46:44 -0700 Subject: Staging: add TAINT_CRAP for all drivers/staging code We need to add a flag for all code that is in the drivers/staging/ directory to prevent all other kernel developers from worrying about issues here, and to notify users that the drivers might not be as good as they are normally used to. Based on code from Andreas Gruenbacher and Jeff Mahoney to provide a TAINT flag for the support level of a kernel module in the Novell enterprise kernel release. This is the kernel portion of this feature, the ability for the flag to be set needs to be done in the build process and will happen in a follow-up patch. Cc: Andreas Gruenbacher Cc: Jeff Mahoney Signed-off-by: Greg Kroah-Hartman --- Documentation/sysctl/kernel.txt | 1 + include/linux/kernel.h | 1 + kernel/module.c | 11 +++++++++++ kernel/panic.c | 6 ++++-- 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index e1ff0d920a5..bde799e0659 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -369,4 +369,5 @@ can be ORed together: 2 - A module was force loaded by insmod -f. Set by modutils >= 2.4.9 and module-init-tools. 4 - Unsafe SMP processors: SMP with CPUs not designed for SMP. + 64 - A module from drivers/staging was loaded. diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6..b36805cb95f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -260,6 +260,7 @@ extern enum system_states { #define TAINT_DIE (1<<7) #define TAINT_OVERRIDDEN_ACPI_TABLE (1<<8) #define TAINT_WARN (1<<9) +#define TAINT_CRAP (1<<10) extern void dump_stack(void) __cold; diff --git a/kernel/module.c b/kernel/module.c index 9db11911e04..152b1655bba 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1806,6 +1806,7 @@ static noinline struct module *load_module(void __user *umod, Elf_Ehdr *hdr; Elf_Shdr *sechdrs; char *secstrings, *args, *modmagic, *strtab = NULL; + char *staging; unsigned int i; unsigned int symindex = 0; unsigned int strindex = 0; @@ -1960,6 +1961,14 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } + staging = get_modinfo(sechdrs, infoindex, "staging"); + if (staging) { + add_taint_module(mod, TAINT_CRAP); + printk(KERN_WARNING "%s: module is from the staging directory," + " the quality is unknown, you have been warned.\n", + mod->name); + } + /* Now copy in args */ args = strndup_user(uargs, ~0UL >> 1); if (IS_ERR(args)) { @@ -2556,6 +2565,8 @@ static char *module_flags(struct module *mod, char *buf) buf[bx++] = 'P'; if (mod->taints & TAINT_FORCED_MODULE) buf[bx++] = 'F'; + if (mod->taints & TAINT_CRAP) + buf[bx++] = 'C'; /* * TAINT_FORCED_RMMOD: could be added. * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't diff --git a/kernel/panic.c b/kernel/panic.c index 12c5a0a6c89..98e2047f4db 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -155,6 +155,7 @@ EXPORT_SYMBOL(panic); * 'U' - Userspace-defined naughtiness. * 'A' - ACPI table overridden. * 'W' - Taint on warning. + * 'C' - modules from drivers/staging are loaded. * * The string is overwritten by the next call to print_taint(). */ @@ -163,7 +164,7 @@ const char *print_tainted(void) { static char buf[20]; if (tainted) { - snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c%c", + snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c%c%c", tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', tainted & TAINT_FORCED_MODULE ? 'F' : ' ', tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', @@ -173,7 +174,8 @@ const char *print_tainted(void) tainted & TAINT_USER ? 'U' : ' ', tainted & TAINT_DIE ? 'D' : ' ', tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' ', - tainted & TAINT_WARN ? 'W' : ' '); + tainted & TAINT_WARN ? 'W' : ' ', + tainted & TAINT_CRAP ? 'C' : ' '); } else snprintf(buf, sizeof(buf), "Not tainted"); -- cgit v1.2.3-70-g09d2 From 118a9069f06ff591d51a3133e242f0c256ba2db7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 17 Oct 2008 02:38:36 -0500 Subject: module: remove CONFIG_KMOD in comment after #endif Signed-off-by: Rusty Russell --- kernel/kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kmod.c b/kernel/kmod.c index 2456d1a0bef..58f9c2ed36b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -113,7 +113,7 @@ int request_module(const char *fmt, ...) return ret; } EXPORT_SYMBOL(request_module); -#endif /* CONFIG_KMOD */ +#endif /* CONFIG_MODULES */ struct subprocess_info { struct work_struct work; -- cgit v1.2.3-70-g09d2 From e94320939f44e0cbaccc3f259a5778abced4949c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 23 Sep 2008 23:51:11 +0400 Subject: modules: fix module "notes" kobject leak Fix "notes" kobject leak It happens every rmmod if KALLSYMS=y and SYSFS=y. # modprobe foo kobject: 'foo' (ffffffffa00743d0): kobject_add_internal: parent: 'module', set: 'module' kobject: 'holders' (ffff88017e7c5770): kobject_add_internal: parent: 'foo', set: '' kobject: 'foo' (ffffffffa00743d0): kobject_uevent_env kobject: 'foo' (ffffffffa00743d0): fill_kobj_path: path = '/module/foo' kobject: 'notes' (ffff88017fa9b668): kobject_add_internal: parent: 'foo', set: '' ^^^^^ # rmmod foo kobject: 'holders' (ffff88017e7c5770): kobject_cleanup kobject: 'holders' (ffff88017e7c5770): auto cleanup kobject_del kobject: 'holders' (ffff88017e7c5770): calling ktype release kobject: (ffff88017e7c5770): dynamic_kobj_release kobject: 'holders': free name kobject: 'foo' (ffffffffa00743d0): kobject_cleanup kobject: 'foo' (ffffffffa00743d0): does not have a release() function, it is broken and must be fixed. kobject: 'foo' (ffffffffa00743d0): auto cleanup 'remove' event kobject: 'foo' (ffffffffa00743d0): kobject_uevent_env kobject: 'foo' (ffffffffa00743d0): fill_kobj_path: path = '/module/foo' kobject: 'foo' (ffffffffa00743d0): auto cleanup kobject_del kobject: 'foo': free name [whooops] Signed-off-by: Alexey Dobriyan Cc: stable Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 9db11911e04..d5fcd24e5af 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1173,7 +1173,7 @@ static void free_notes_attrs(struct module_notes_attrs *notes_attrs, while (i-- > 0) sysfs_remove_bin_file(notes_attrs->dir, ¬es_attrs->attrs[i]); - kobject_del(notes_attrs->dir); + kobject_put(notes_attrs->dir); } kfree(notes_attrs); } -- cgit v1.2.3-70-g09d2 From 346e15beb5343c2eb8216d820f2ed8f150822b08 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Tue, 12 Aug 2008 16:46:19 -0400 Subject: driver core: basic infrastructure for per-module dynamic debug messages Base infrastructure to enable per-module debug messages. I've introduced CONFIG_DYNAMIC_PRINTK_DEBUG, which when enabled centralizes control of debugging statements on a per-module basis in one /proc file, currently, /dynamic_printk/modules. When, CONFIG_DYNAMIC_PRINTK_DEBUG, is not set, debugging statements can still be enabled as before, often by defining 'DEBUG' for the proper compilation unit. Thus, this patch set has no affect when CONFIG_DYNAMIC_PRINTK_DEBUG is not set. The infrastructure currently ties into all pr_debug() and dev_dbg() calls. That is, if CONFIG_DYNAMIC_PRINTK_DEBUG is set, all pr_debug() and dev_dbg() calls can be dynamically enabled/disabled on a per-module basis. Future plans include extending this functionality to subsystems, that define their own debug levels and flags. Usage: Dynamic debugging is controlled by the debugfs file, /dynamic_printk/modules. This file contains a list of the modules that can be enabled. The format of the file is as follows: . . . : Name of the module in which the debug call resides : whether the messages are enabled or not For example: snd_hda_intel enabled=0 fixup enabled=1 driver enabled=0 Enable a module: $echo "set enabled=1 " > dynamic_printk/modules Disable a module: $echo "set enabled=0 " > dynamic_printk/modules Enable all modules: $echo "set enabled=1 all" > dynamic_printk/modules Disable all modules: $echo "set enabled=0 all" > dynamic_printk/modules Finally, passing "dynamic_printk" at the command line enables debugging for all modules. This mode can be turned off via the above disable command. [gkh: minor cleanups and tweaks to make the build work quietly] Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 5 + include/asm-generic/vmlinux.lds.h | 10 +- include/linux/device.h | 6 +- include/linux/dynamic_printk.h | 93 ++++++++ include/linux/kernel.h | 7 +- include/linux/module.h | 1 - kernel/module.c | 31 +++ lib/Kconfig.debug | 55 +++++ lib/Makefile | 2 + lib/dynamic_printk.c | 418 ++++++++++++++++++++++++++++++++++++ net/netfilter/nf_conntrack_pptp.c | 2 +- scripts/Makefile.lib | 11 +- scripts/basic/Makefile | 2 +- scripts/basic/hash.c | 64 ++++++ 14 files changed, 700 insertions(+), 7 deletions(-) create mode 100644 include/linux/dynamic_printk.h create mode 100644 lib/dynamic_printk.c create mode 100644 scripts/basic/hash.c (limited to 'kernel') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2443f5bb436..b429c84ceef 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1713,6 +1713,11 @@ and is between 256 and 4096 characters. It is defined in the file autoconfiguration. Ranges are in pairs (memory base and size). + dynamic_printk + Enables pr_debug()/dev_dbg() calls if + CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. These can also + be switched on/off via /dynamic_printk/modules + print-fatal-signals= [KNL] debug: print fatal signals print-fatal-signals=1: print segfault info to diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7440a0dcedd..74c5faf26c0 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -268,7 +268,15 @@ CPU_DISCARD(init.data) \ CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.data) \ - MEM_DISCARD(init.rodata) + MEM_DISCARD(init.rodata) \ + /* implement dynamic printk debug */ \ + VMLINUX_SYMBOL(__start___verbose_strings) = .; \ + *(__verbose_strings) \ + VMLINUX_SYMBOL(__stop___verbose_strings) = .; \ + . = ALIGN(8); \ + VMLINUX_SYMBOL(__start___verbose) = .; \ + *(__verbose) \ + VMLINUX_SYMBOL(__stop___verbose) = .; #define INIT_TEXT \ *(.init.text) \ diff --git a/include/linux/device.h b/include/linux/device.h index 60f6456691a..fb034461b39 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -550,7 +550,11 @@ extern const char *dev_driver_string(const struct device *dev); #define dev_info(dev, format, arg...) \ dev_printk(KERN_INFO , dev , format , ## arg) -#ifdef DEBUG +#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#define dev_dbg(dev, format, ...) do { \ + dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \ + } while (0) +#elif defined(DEBUG) #define dev_dbg(dev, format, arg...) \ dev_printk(KERN_DEBUG , dev , format , ## arg) #else diff --git a/include/linux/dynamic_printk.h b/include/linux/dynamic_printk.h new file mode 100644 index 00000000000..2d528d00907 --- /dev/null +++ b/include/linux/dynamic_printk.h @@ -0,0 +1,93 @@ +#ifndef _DYNAMIC_PRINTK_H +#define _DYNAMIC_PRINTK_H + +#define DYNAMIC_DEBUG_HASH_BITS 6 +#define DEBUG_HASH_TABLE_SIZE (1 << DYNAMIC_DEBUG_HASH_BITS) + +#define TYPE_BOOLEAN 1 + +#define DYNAMIC_ENABLED_ALL 0 +#define DYNAMIC_ENABLED_NONE 1 +#define DYNAMIC_ENABLED_SOME 2 + +extern int dynamic_enabled; + +/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which + * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They + * use independent hash functions, to reduce the chance of false positives. + */ +extern long long dynamic_printk_enabled; +extern long long dynamic_printk_enabled2; + +struct mod_debug { + char *modname; + char *logical_modname; + char *flag_names; + int type; + int hash; + int hash2; +} __attribute__((aligned(8))); + +int register_dynamic_debug_module(char *mod_name, int type, char *share_name, + char *flags, int hash, int hash2); + +#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +extern int unregister_dynamic_debug_module(char *mod_name); +extern int __dynamic_dbg_enabled_helper(char *modname, int type, + int value, int hash); + +#define __dynamic_dbg_enabled(module, type, value, level, hash) ({ \ + int __ret = 0; \ + if (unlikely((dynamic_printk_enabled & (1LL << DEBUG_HASH)) && \ + (dynamic_printk_enabled2 & (1LL << DEBUG_HASH2)))) \ + __ret = __dynamic_dbg_enabled_helper(module, type, \ + value, hash);\ + __ret; }) + +#define dynamic_pr_debug(fmt, ...) do { \ + static char mod_name[] \ + __attribute__((section("__verbose_strings"))) \ + = KBUILD_MODNAME; \ + static struct mod_debug descriptor \ + __used \ + __attribute__((section("__verbose"), aligned(8))) = \ + { mod_name, mod_name, NULL, TYPE_BOOLEAN, DEBUG_HASH, DEBUG_HASH2 };\ + if (__dynamic_dbg_enabled(KBUILD_MODNAME, TYPE_BOOLEAN, \ + 0, 0, DEBUG_HASH)) \ + printk(KERN_DEBUG KBUILD_MODNAME ":" fmt, \ + ##__VA_ARGS__); \ + } while (0) + +#define dynamic_dev_dbg(dev, format, ...) do { \ + static char mod_name[] \ + __attribute__((section("__verbose_strings"))) \ + = KBUILD_MODNAME; \ + static struct mod_debug descriptor \ + __used \ + __attribute__((section("__verbose"), aligned(8))) = \ + { mod_name, mod_name, NULL, TYPE_BOOLEAN, DEBUG_HASH, DEBUG_HASH2 };\ + if (__dynamic_dbg_enabled(KBUILD_MODNAME, TYPE_BOOLEAN, \ + 0, 0, DEBUG_HASH)) \ + dev_printk(KERN_DEBUG, dev, \ + KBUILD_MODNAME ": " format, \ + ##__VA_ARGS__); \ + } while (0) + +#else + +static inline int unregister_dynamic_debug_module(const char *mod_name) +{ + return 0; +} +static inline int __dynamic_dbg_enabled_helper(char *modname, int type, + int value, int hash) +{ + return 0; +} + +#define __dynamic_dbg_enabled(module, type, value, level, hash) ({ 0; }) +#define dynamic_pr_debug(fmt, ...) do { } while (0) +#define dynamic_dev_dbg(dev, format, ...) do { } while (0) +#endif + +#endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 75d81f157d2..ededb6e83b4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -303,8 +304,12 @@ static inline char *pack_hex_byte(char *buf, u8 byte) #define pr_info(fmt, arg...) \ printk(KERN_INFO fmt, ##arg) -#ifdef DEBUG /* If you are writing a driver, please use dev_dbg instead */ +#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#define pr_debug(fmt, ...) do { \ + dynamic_pr_debug(fmt, ##__VA_ARGS__); \ + } while (0) +#elif defined(DEBUG) #define pr_debug(fmt, arg...) \ printk(KERN_DEBUG fmt, ##arg) #else diff --git a/include/linux/module.h b/include/linux/module.h index 68e09557c95..a41555cbe00 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -345,7 +345,6 @@ struct module /* Reference counts */ struct module_ref ref[NR_CPUS]; #endif - }; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} diff --git a/kernel/module.c b/kernel/module.c index d5fcd24e5af..c5270066729 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -784,6 +784,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags) mutex_lock(&module_mutex); /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); + unregister_dynamic_debug_module(mod->name); free_module(mod); out: @@ -1783,6 +1784,33 @@ static inline void add_kallsyms(struct module *mod, } #endif /* CONFIG_KALLSYMS */ +#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG +static void dynamic_printk_setup(Elf_Shdr *sechdrs, unsigned int verboseindex) +{ + struct mod_debug *debug_info; + unsigned long pos, end; + unsigned int num_verbose; + + pos = sechdrs[verboseindex].sh_addr; + num_verbose = sechdrs[verboseindex].sh_size / + sizeof(struct mod_debug); + end = pos + (num_verbose * sizeof(struct mod_debug)); + + for (; pos < end; pos += sizeof(struct mod_debug)) { + debug_info = (struct mod_debug *)pos; + register_dynamic_debug_module(debug_info->modname, + debug_info->type, debug_info->logical_modname, + debug_info->flag_names, debug_info->hash, + debug_info->hash2); + } +} +#else +static inline void dynamic_printk_setup(Elf_Shdr *sechdrs, + unsigned int verboseindex) +{ +} +#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ + static void *module_alloc_update_bounds(unsigned long size) { void *ret = module_alloc(size); @@ -1831,6 +1859,7 @@ static noinline struct module *load_module(void __user *umod, #endif unsigned int markersindex; unsigned int markersstringsindex; + unsigned int verboseindex; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ @@ -2117,6 +2146,7 @@ static noinline struct module *load_module(void __user *umod, markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); markersstringsindex = find_sec(hdr, sechdrs, secstrings, "__markers_strings"); + verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose"); /* Now do relocations. */ for (i = 1; i < hdr->e_shnum; i++) { @@ -2167,6 +2197,7 @@ static noinline struct module *load_module(void __user *umod, marker_update_probe_range(mod->markers, mod->markers + mod->num_markers); #endif + dynamic_printk_setup(sechdrs, verboseindex); err = module_finalize(hdr, sechdrs, mod); if (err < 0) goto cleanup; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index aa81d284844..31d784dd80d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -807,6 +807,61 @@ menuconfig BUILD_DOCSRC Say N if you are unsure. +config DYNAMIC_PRINTK_DEBUG + bool "Enable dynamic printk() call support" + default n + depends on PRINTK + select PRINTK_DEBUG + help + + Compiles debug level messages into the kernel, which would not + otherwise be available at runtime. These messages can then be + enabled/disabled on a per module basis. This mechanism implicitly + enables all pr_debug() and dev_dbg() calls. The impact of this + compile option is a larger kernel text size of about 2%. + + Usage: + + Dynamic debugging is controlled by the debugfs file, + dynamic_printk/modules. This file contains a list of the modules that + can be enabled. The format of the file is the module name, followed + by a set of flags that can be enabled. The first flag is always the + 'enabled' flag. For example: + + + . + . + . + + : Name of the module in which the debug call resides + : whether the messages are enabled or not + + From a live system: + + snd_hda_intel enabled=0 + fixup enabled=0 + driver enabled=0 + + Enable a module: + + $echo "set enabled=1 " > dynamic_printk/modules + + Disable a module: + + $echo "set enabled=0 " > dynamic_printk/modules + + Enable all modules: + + $echo "set enabled=1 all" > dynamic_printk/modules + + Disable all modules: + + $echo "set enabled=0 all" > dynamic_printk/modules + + Finally, passing "dynamic_printk" at the command line enables + debugging for all modules. This mode can be turned off via the above + disable command. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index 44001af76a7..16feaab057b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -81,6 +81,8 @@ obj-$(CONFIG_HAVE_LMB) += lmb.o obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o +obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/dynamic_printk.c b/lib/dynamic_printk.c new file mode 100644 index 00000000000..d640f87bdc9 --- /dev/null +++ b/lib/dynamic_printk.c @@ -0,0 +1,418 @@ +/* + * lib/dynamic_printk.c + * + * make pr_debug()/dev_dbg() calls runtime configurable based upon their + * their source module. + * + * Copyright (C) 2008 Red Hat, Inc., Jason Baron + */ + +#include +#include +#include +#include +#include +#include + +extern struct mod_debug __start___verbose[]; +extern struct mod_debug __stop___verbose[]; + +struct debug_name { + struct hlist_node hlist; + struct hlist_node hlist2; + int hash1; + int hash2; + char *name; + int enable; + int type; +}; + +static int nr_entries; +static int num_enabled; +int dynamic_enabled = DYNAMIC_ENABLED_NONE; +static struct hlist_head module_table[DEBUG_HASH_TABLE_SIZE] = + { [0 ... DEBUG_HASH_TABLE_SIZE-1] = HLIST_HEAD_INIT }; +static struct hlist_head module_table2[DEBUG_HASH_TABLE_SIZE] = + { [0 ... DEBUG_HASH_TABLE_SIZE-1] = HLIST_HEAD_INIT }; +static DECLARE_MUTEX(debug_list_mutex); + +/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which + * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They + * use independent hash functions, to reduce the chance of false positives. + */ +long long dynamic_printk_enabled; +EXPORT_SYMBOL_GPL(dynamic_printk_enabled); +long long dynamic_printk_enabled2; +EXPORT_SYMBOL_GPL(dynamic_printk_enabled2); + +/* returns the debug module pointer. */ +static struct debug_name *find_debug_module(char *module_name) +{ + int i; + struct hlist_head *head; + struct hlist_node *node; + struct debug_name *element; + + element = NULL; + for (i = 0; i < DEBUG_HASH_TABLE_SIZE; i++) { + head = &module_table[i]; + hlist_for_each_entry_rcu(element, node, head, hlist) + if (!strcmp(element->name, module_name)) + return element; + } + return NULL; +} + +/* returns the debug module pointer. */ +static struct debug_name *find_debug_module_hash(char *module_name, int hash) +{ + struct hlist_head *head; + struct hlist_node *node; + struct debug_name *element; + + element = NULL; + head = &module_table[hash]; + hlist_for_each_entry_rcu(element, node, head, hlist) + if (!strcmp(element->name, module_name)) + return element; + return NULL; +} + +/* caller must hold mutex*/ +static int __add_debug_module(char *mod_name, int hash, int hash2) +{ + struct debug_name *new; + char *module_name; + int ret = 0; + + if (find_debug_module(mod_name)) { + ret = -EINVAL; + goto out; + } + module_name = kmalloc(strlen(mod_name) + 1, GFP_KERNEL); + if (!module_name) { + ret = -ENOMEM; + goto out; + } + module_name = strcpy(module_name, mod_name); + module_name[strlen(mod_name)] = '\0'; + new = kzalloc(sizeof(struct debug_name), GFP_KERNEL); + if (!new) { + kfree(module_name); + ret = -ENOMEM; + goto out; + } + INIT_HLIST_NODE(&new->hlist); + INIT_HLIST_NODE(&new->hlist2); + new->name = module_name; + new->hash1 = hash; + new->hash2 = hash2; + hlist_add_head_rcu(&new->hlist, &module_table[hash]); + hlist_add_head_rcu(&new->hlist2, &module_table2[hash2]); + nr_entries++; +out: + return ret; +} + +int unregister_dynamic_debug_module(char *mod_name) +{ + struct debug_name *element; + int ret = 0; + + down(&debug_list_mutex); + element = find_debug_module(mod_name); + if (!element) { + ret = -EINVAL; + goto out; + } + hlist_del_rcu(&element->hlist); + hlist_del_rcu(&element->hlist2); + synchronize_rcu(); + kfree(element->name); + if (element->enable) + num_enabled--; + kfree(element); + nr_entries--; +out: + up(&debug_list_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(unregister_dynamic_debug_module); + +int register_dynamic_debug_module(char *mod_name, int type, char *share_name, + char *flags, int hash, int hash2) +{ + struct debug_name *elem; + int ret = 0; + + down(&debug_list_mutex); + elem = find_debug_module(mod_name); + if (!elem) { + if (__add_debug_module(mod_name, hash, hash2)) + goto out; + elem = find_debug_module(mod_name); + if (dynamic_enabled == DYNAMIC_ENABLED_ALL && + !strcmp(mod_name, share_name)) { + elem->enable = true; + num_enabled++; + } + } + elem->type |= type; +out: + up(&debug_list_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(register_dynamic_debug_module); + +int __dynamic_dbg_enabled_helper(char *mod_name, int type, int value, int hash) +{ + struct debug_name *elem; + int ret = 0; + + if (dynamic_enabled == DYNAMIC_ENABLED_ALL) + return 1; + rcu_read_lock(); + elem = find_debug_module_hash(mod_name, hash); + if (elem && elem->enable) + ret = 1; + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(__dynamic_dbg_enabled_helper); + +static void set_all(bool enable) +{ + struct debug_name *e; + struct hlist_node *node; + int i; + long long enable_mask; + + for (i = 0; i < DEBUG_HASH_TABLE_SIZE; i++) { + if (module_table[i].first != NULL) { + hlist_for_each_entry(e, node, &module_table[i], hlist) { + e->enable = enable; + } + } + } + if (enable) + enable_mask = ULLONG_MAX; + else + enable_mask = 0; + dynamic_printk_enabled = enable_mask; + dynamic_printk_enabled2 = enable_mask; +} + +static int disabled_hash(int i, bool first_table) +{ + struct debug_name *e; + struct hlist_node *node; + + if (first_table) { + hlist_for_each_entry(e, node, &module_table[i], hlist) { + if (e->enable) + return 0; + } + } else { + hlist_for_each_entry(e, node, &module_table2[i], hlist2) { + if (e->enable) + return 0; + } + } + return 1; +} + +static ssize_t pr_debug_write(struct file *file, const char __user *buf, + size_t length, loff_t *ppos) +{ + char *buffer, *s, *value_str, *setting_str; + int err, value; + struct debug_name *elem = NULL; + int all = 0; + + if (length > PAGE_SIZE || length < 0) + return -EINVAL; + + buffer = (char *)__get_free_page(GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + err = -EFAULT; + if (copy_from_user(buffer, buf, length)) + goto out; + + err = -EINVAL; + if (length < PAGE_SIZE) + buffer[length] = '\0'; + else if (buffer[PAGE_SIZE-1]) + goto out; + + err = -EINVAL; + down(&debug_list_mutex); + + if (strncmp("set", buffer, 3)) + goto out_up; + s = buffer + 3; + setting_str = strsep(&s, "="); + if (s == NULL) + goto out_up; + setting_str = strstrip(setting_str); + value_str = strsep(&s, " "); + if (s == NULL) + goto out_up; + s = strstrip(s); + if (!strncmp(s, "all", 3)) + all = 1; + else + elem = find_debug_module(s); + if (!strncmp(setting_str, "enable", 6)) { + value = !!simple_strtol(value_str, NULL, 10); + if (all) { + if (value) { + set_all(true); + num_enabled = nr_entries; + dynamic_enabled = DYNAMIC_ENABLED_ALL; + } else { + set_all(false); + num_enabled = 0; + dynamic_enabled = DYNAMIC_ENABLED_NONE; + } + err = 0; + } else { + if (elem) { + if (value && (elem->enable == 0)) { + dynamic_printk_enabled |= + (1LL << elem->hash1); + dynamic_printk_enabled2 |= + (1LL << elem->hash2); + elem->enable = 1; + num_enabled++; + dynamic_enabled = DYNAMIC_ENABLED_SOME; + err = 0; + printk(KERN_DEBUG + "debugging enabled for module %s", + elem->name); + } else if (!value && (elem->enable == 1)) { + elem->enable = 0; + num_enabled--; + if (disabled_hash(elem->hash1, true)) + dynamic_printk_enabled &= + ~(1LL << elem->hash1); + if (disabled_hash(elem->hash2, false)) + dynamic_printk_enabled2 &= + ~(1LL << elem->hash2); + if (num_enabled) + dynamic_enabled = + DYNAMIC_ENABLED_SOME; + else + dynamic_enabled = + DYNAMIC_ENABLED_NONE; + err = 0; + printk(KERN_DEBUG + "debugging disabled for module " + "%s", elem->name); + } + } + } + } + if (!err) + err = length; +out_up: + up(&debug_list_mutex); +out: + free_page((unsigned long)buffer); + return err; +} + +static void *pr_debug_seq_start(struct seq_file *f, loff_t *pos) +{ + return (*pos < DEBUG_HASH_TABLE_SIZE) ? pos : NULL; +} + +static void *pr_debug_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + if (*pos >= DEBUG_HASH_TABLE_SIZE) + return NULL; + return pos; +} + +static void pr_debug_seq_stop(struct seq_file *s, void *v) +{ + /* Nothing to do */ +} + +static int pr_debug_seq_show(struct seq_file *s, void *v) +{ + struct hlist_head *head; + struct hlist_node *node; + struct debug_name *elem; + unsigned int i = *(loff_t *) v; + + rcu_read_lock(); + head = &module_table[i]; + hlist_for_each_entry_rcu(elem, node, head, hlist) { + seq_printf(s, "%s enabled=%d", elem->name, elem->enable); + seq_printf(s, "\n"); + } + rcu_read_unlock(); + return 0; +} + +static struct seq_operations pr_debug_seq_ops = { + .start = pr_debug_seq_start, + .next = pr_debug_seq_next, + .stop = pr_debug_seq_stop, + .show = pr_debug_seq_show +}; + +static int pr_debug_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &pr_debug_seq_ops); +} + +static const struct file_operations pr_debug_operations = { + .open = pr_debug_open, + .read = seq_read, + .write = pr_debug_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init dynamic_printk_init(void) +{ + struct dentry *dir, *file; + struct mod_debug *iter; + unsigned long value; + + dir = debugfs_create_dir("dynamic_printk", NULL); + if (!dir) + return -ENOMEM; + file = debugfs_create_file("modules", 0644, dir, NULL, + &pr_debug_operations); + if (!file) { + debugfs_remove(dir); + return -ENOMEM; + } + for (value = (unsigned long)__start___verbose; + value < (unsigned long)__stop___verbose; + value += sizeof(struct mod_debug)) { + iter = (struct mod_debug *)value; + register_dynamic_debug_module(iter->modname, + iter->type, + iter->logical_modname, + iter->flag_names, iter->hash, iter->hash2); + } + return 0; +} +module_init(dynamic_printk_init); +/* may want to move this earlier so we can get traces as early as possible */ + +static int __init dynamic_printk_setup(char *str) +{ + if (str) + return -ENOENT; + set_all(true); + return 0; +} +/* Use early_param(), so we can get debug output as early as possible */ +early_param("dynamic_printk", dynamic_printk_setup); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 373e51e91ce..1bc3001d182 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -65,7 +65,7 @@ void struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); -#ifdef DEBUG +#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG) /* PptpControlMessageType names */ const char *const pptp_msg_name[] = { "UNKNOWN_MESSAGE", diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index ea48b82a370..b4ca38a2115 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -96,6 +96,14 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))" modname_flags = $(if $(filter 1,$(words $(modname))),\ -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") +#hash values +ifdef CONFIG_DYNAMIC_PRINTK_DEBUG +debug_flags = -D"DEBUG_HASH=$(shell ./scripts/basic/hash djb2 $(@D)$(modname))"\ + -D"DEBUG_HASH2=$(shell ./scripts/basic/hash r5 $(@D)$(modname))" +else +debug_flags = +endif + orig_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o) _c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags)) _a_flags = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o) @@ -121,7 +129,8 @@ endif c_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(KBUILD_CPPFLAGS) \ $(__c_flags) $(modkern_cflags) \ - -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) + -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) \ + $(debug_flags) a_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(KBUILD_CPPFLAGS) \ $(__a_flags) $(modkern_aflags) diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile index 4c324a1f1e0..09559951df1 100644 --- a/scripts/basic/Makefile +++ b/scripts/basic/Makefile @@ -9,7 +9,7 @@ # fixdep: Used to generate dependency information during build process # docproc: Used in Documentation/DocBook -hostprogs-y := fixdep docproc +hostprogs-y := fixdep docproc hash always := $(hostprogs-y) # fixdep is needed to compile other host programs diff --git a/scripts/basic/hash.c b/scripts/basic/hash.c new file mode 100644 index 00000000000..3299ad7fc8c --- /dev/null +++ b/scripts/basic/hash.c @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2008 Red Hat, Inc., Jason Baron + * + */ + +#include +#include +#include + +#define DYNAMIC_DEBUG_HASH_BITS 6 + +static const char *program; + +static void usage(void) +{ + printf("Usage: %s \n", program); + exit(1); +} + +/* djb2 hashing algorithm by Dan Bernstein. From: + * http://www.cse.yorku.ca/~oz/hash.html + */ + +unsigned int djb2_hash(char *str) +{ + unsigned long hash = 5381; + int c; + + c = *str; + while (c) { + hash = ((hash << 5) + hash) + c; + c = *++str; + } + return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1)); +} + +unsigned int r5_hash(char *str) +{ + unsigned long hash = 0; + int c; + + c = *str; + while (c) { + hash = (hash + (c << 4) + (c >> 4)) * 11; + c = *++str; + } + return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1)); +} + +int main(int argc, char *argv[]) +{ + program = argv[0]; + + if (argc != 3) + usage(); + if (!strcmp(argv[1], "djb2")) + printf("%d\n", djb2_hash(argv[2])); + else if (!strcmp(argv[1], "r5")) + printf("%d\n", r5_hash(argv[2])); + else + usage(); + exit(0); +} + -- cgit v1.2.3-70-g09d2 From 9363b9f23c9cc36cc8ef6c05fdf879ee4a96ae92 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 15 Oct 2008 22:01:05 -0700 Subject: memrlimit: cgroup mm owner callback changes to add task info This patch adds an additional field to the mm_owner callbacks. This field is required to get to the mm that changed. Hold mmap_sem in write mode before calling the mm_owner_changed callback [hugh@veritas.com: fix mmap_sem deadlock] Signed-off-by: Balbir Singh Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: Paul Menage Cc: Li Zefan Cc: Pavel Emelianov Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: David Rientjes Cc: Vivek Goyal Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 3 ++- kernel/cgroup.c | 4 +++- kernel/exit.c | 9 ++++----- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c98dd7cb707..30934e4bfaa 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -326,7 +326,8 @@ struct cgroup_subsys { */ void (*mm_owner_changed)(struct cgroup_subsys *ss, struct cgroup *old, - struct cgroup *new); + struct cgroup *new, + struct task_struct *p); int subsys_id; int active; int disabled; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a0123d75ec9..8c6e1c17e6d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2735,6 +2735,8 @@ void cgroup_fork_callbacks(struct task_struct *child) * Called on every change to mm->owner. mm_init_owner() does not * invoke this routine, since it assigns the mm->owner the first time * and does not change it. + * + * The callbacks are invoked with mmap_sem held in read mode. */ void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) { @@ -2750,7 +2752,7 @@ void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) if (oldcgrp == newcgrp) continue; if (ss->mm_owner_changed) - ss->mm_owner_changed(ss, oldcgrp, newcgrp); + ss->mm_owner_changed(ss, oldcgrp, newcgrp, new); } } } diff --git a/kernel/exit.c b/kernel/exit.c index 85a83c83185..0ef4673e351 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -640,24 +640,23 @@ retry: assign_new_owner: BUG_ON(c == p); get_task_struct(c); + read_unlock(&tasklist_lock); + down_write(&mm->mmap_sem); /* * The task_lock protects c->mm from changing. * We always want mm->owner->mm == mm */ task_lock(c); - /* - * Delay read_unlock() till we have the task_lock() - * to ensure that c does not slip away underneath us - */ - read_unlock(&tasklist_lock); if (c->mm != mm) { task_unlock(c); + up_write(&mm->mmap_sem); put_task_struct(c); goto retry; } cgroup_mm_owner_callbacks(mm->owner, c); mm->owner = c; task_unlock(c); + up_write(&mm->mmap_sem); put_task_struct(c); } #endif /* CONFIG_MM_OWNER */ -- cgit v1.2.3-70-g09d2 From 1bfcf1304ea79c46efc3724e548b13b4b442b418 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Oct 2008 22:01:21 -0700 Subject: pm: rework disabling of user mode helpers during suspend/hibernation We currently use a PM notifier to disable user mode helpers before suspend and hibernation and to re-enable them during resume. However, this is not an ideal solution, because if any drivers want to upload firmware into memory before suspend, they have to use a PM notifier for this purpose and there is no guarantee that the ordering of PM notifiers will be as expected (ie. the notifier that disables user mode helpers has to be run after the driver's notifier used for uploading the firmware). For this reason, it seems better to move the disabling and enabling of user mode helpers to separate functions that will be called by the PM core as necessary. [akpm@linux-foundation.org: remove unneeded ifdefs] Signed-off-by: Rafael J. Wysocki Cc: Alan Stern Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 3 +++ kernel/kmod.c | 65 ++++++++++++++++++++++------------------------------ kernel/power/disk.c | 11 +++++++++ kernel/power/main.c | 7 ++++++ kernel/power/user.c | 10 +++++++- 5 files changed, 58 insertions(+), 38 deletions(-) (limited to 'kernel') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index a1a91577813..92213a9194e 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -99,4 +99,7 @@ struct file; extern int call_usermodehelper_pipe(char *path, char *argv[], char *envp[], struct file **filp); +extern int usermodehelper_disable(void); +extern void usermodehelper_enable(void); + #endif /* __LINUX_KMOD_H__ */ diff --git a/kernel/kmod.c b/kernel/kmod.c index 2456d1a0bef..ab7dd08472f 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -265,7 +265,7 @@ static void __call_usermodehelper(struct work_struct *work) } } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP /* * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY * (used for preventing user land processes from being created after the user @@ -288,39 +288,37 @@ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); */ #define RUNNING_HELPERS_TIMEOUT (5 * HZ) -static int usermodehelper_pm_callback(struct notifier_block *nfb, - unsigned long action, - void *ignored) +/** + * usermodehelper_disable - prevent new helpers from being started + */ +int usermodehelper_disable(void) { long retval; - switch (action) { - case PM_HIBERNATION_PREPARE: - case PM_SUSPEND_PREPARE: - usermodehelper_disabled = 1; - smp_mb(); - /* - * From now on call_usermodehelper_exec() won't start any new - * helpers, so it is sufficient if running_helpers turns out to - * be zero at one point (it may be increased later, but that - * doesn't matter). - */ - retval = wait_event_timeout(running_helpers_waitq, + usermodehelper_disabled = 1; + smp_mb(); + /* + * From now on call_usermodehelper_exec() won't start any new + * helpers, so it is sufficient if running_helpers turns out to + * be zero at one point (it may be increased later, but that + * doesn't matter). + */ + retval = wait_event_timeout(running_helpers_waitq, atomic_read(&running_helpers) == 0, RUNNING_HELPERS_TIMEOUT); - if (retval) { - return NOTIFY_OK; - } else { - usermodehelper_disabled = 0; - return NOTIFY_BAD; - } - case PM_POST_HIBERNATION: - case PM_POST_SUSPEND: - usermodehelper_disabled = 0; - return NOTIFY_OK; - } + if (retval) + return 0; - return NOTIFY_DONE; + usermodehelper_disabled = 0; + return -EAGAIN; +} + +/** + * usermodehelper_enable - allow new helpers to be started again + */ +void usermodehelper_enable(void) +{ + usermodehelper_disabled = 0; } static void helper_lock(void) @@ -334,18 +332,12 @@ static void helper_unlock(void) if (atomic_dec_and_test(&running_helpers)) wake_up(&running_helpers_waitq); } - -static void register_pm_notifier_callback(void) -{ - pm_notifier(usermodehelper_pm_callback, 0); -} -#else /* CONFIG_PM */ +#else /* CONFIG_PM_SLEEP */ #define usermodehelper_disabled 0 static inline void helper_lock(void) {} static inline void helper_unlock(void) {} -static inline void register_pm_notifier_callback(void) {} -#endif /* CONFIG_PM */ +#endif /* CONFIG_PM_SLEEP */ /** * call_usermodehelper_setup - prepare to call a usermode helper @@ -515,5 +507,4 @@ void __init usermodehelper_init(void) { khelper_wq = create_singlethread_workqueue("khelper"); BUG_ON(!khelper_wq); - register_pm_notifier_callback(); } diff --git a/kernel/power/disk.c b/kernel/power/disk.c index bbd85c60f74..331f9836383 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -520,6 +521,10 @@ int hibernate(void) if (error) goto Exit; + error = usermodehelper_disable(); + if (error) + goto Exit; + /* Allocate memory management structures */ error = create_basic_memory_bitmaps(); if (error) @@ -558,6 +563,7 @@ int hibernate(void) thaw_processes(); Finish: free_basic_memory_bitmaps(); + usermodehelper_enable(); Exit: pm_notifier_call_chain(PM_POST_HIBERNATION); pm_restore_console(); @@ -634,6 +640,10 @@ static int software_resume(void) if (error) goto Finish; + error = usermodehelper_disable(); + if (error) + goto Finish; + error = create_basic_memory_bitmaps(); if (error) goto Finish; @@ -656,6 +666,7 @@ static int software_resume(void) thaw_processes(); Done: free_basic_memory_bitmaps(); + usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); pm_restore_console(); diff --git a/kernel/power/main.c b/kernel/power/main.c index 540b16b6856..19122cf6d82 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -237,6 +238,10 @@ static int suspend_prepare(void) if (error) goto Finish; + error = usermodehelper_disable(); + if (error) + goto Finish; + if (suspend_freeze_processes()) { error = -EAGAIN; goto Thaw; @@ -256,6 +261,7 @@ static int suspend_prepare(void) Thaw: suspend_thaw_processes(); + usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); @@ -376,6 +382,7 @@ int suspend_devices_and_enter(suspend_state_t state) static void suspend_finish(void) { suspend_thaw_processes(); + usermodehelper_enable(); pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); } diff --git a/kernel/power/user.c b/kernel/power/user.c index a6332a31326..005b93d839b 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -212,13 +212,20 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, case SNAPSHOT_FREEZE: if (data->frozen) break; + printk("Syncing filesystems ... "); sys_sync(); printk("done.\n"); - error = freeze_processes(); + error = usermodehelper_disable(); if (error) + break; + + error = freeze_processes(); + if (error) { thaw_processes(); + usermodehelper_enable(); + } if (!error) data->frozen = 1; break; @@ -227,6 +234,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (!data->frozen || data->ready) break; thaw_processes(); + usermodehelper_enable(); data->frozen = 0; break; -- cgit v1.2.3-70-g09d2 From d9f3216b474be170d0c093d70125b541ace58704 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 15 Oct 2008 22:01:26 -0700 Subject: kernel/dma.c: remove a CVS keyword Remove a CVS keyword that wasn't updated for a long time from a comment. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/dma.c b/kernel/dma.c index d2c60a82279..f903189c530 100644 --- a/kernel/dma.c +++ b/kernel/dma.c @@ -1,4 +1,4 @@ -/* $Id: dma.c,v 1.7 1994/12/28 03:35:33 root Exp root $ +/* * linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c. * * Written by Hennus Bergman, 1992. -- cgit v1.2.3-70-g09d2 From a25d644fc0e232f242d1f3baa63c149c42536ff0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 15 Oct 2008 22:01:38 -0700 Subject: wait: kill is_sync_wait() is_sync_wait() is used to distinguish between sync and async waits. Basically sync waits are the ones initialized with init_waitqueue_entry() and async ones with init_waitqueue_func_entry(). The sync/async distinction is used only in prepare_to_wait[_exclusive]() and its only function is to skip setting the current task state if the wait is async. This has a few problems. * No one uses it. None of func_entry users use prepare_to_wait() functions, so the code path never gets executed. * The distinction is bogus. Maybe back when func_entry is used only by aio but it's now also used by epoll and in future possibly by 9p and poll/select. * Taking @state as argument and ignoring it silenly depending on how @wait is initialized is just a bad error-prone API. * It prevents func_entry waits from using wait->private for no good reason. This patch kills is_sync_wait() and the associated code paths from prepare_to_wait[_exclusive](). As there was no user of these code paths, this patch doesn't cause any behavior difference. Signed-off-by: Tejun Heo Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 9 --------- kernel/wait.c | 14 ++------------ 2 files changed, 2 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/include/linux/wait.h b/include/linux/wait.h index 0081147a9fe..ef609f842fa 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -108,15 +108,6 @@ static inline int waitqueue_active(wait_queue_head_t *q) return !list_empty(&q->task_list); } -/* - * Used to distinguish between sync and async io wait context: - * sync i/o typically specifies a NULL wait queue entry or a wait - * queue entry bound to a task (current task) to wake up. - * aio specifies a wait queue entry with an async notification - * callback routine, not associated with any task. - */ -#define is_sync_wait(wait) (!(wait) || ((wait)->private)) - extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); diff --git a/kernel/wait.c b/kernel/wait.c index c275c56cf2d..cd87131f2fc 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -72,12 +72,7 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) spin_lock_irqsave(&q->lock, flags); if (list_empty(&wait->task_list)) __add_wait_queue(q, wait); - /* - * don't alter the task state if this is just going to - * queue an async wait queue callback - */ - if (is_sync_wait(wait)) - set_current_state(state); + set_current_state(state); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(prepare_to_wait); @@ -91,12 +86,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) spin_lock_irqsave(&q->lock, flags); if (list_empty(&wait->task_list)) __add_wait_queue_tail(q, wait); - /* - * don't alter the task state if this is just going to - * queue an async wait queue callback - */ - if (is_sync_wait(wait)) - set_current_state(state); + set_current_state(state); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(prepare_to_wait_exclusive); -- cgit v1.2.3-70-g09d2 From 9ba16087d9f996a93ab6f4453a52a4b24bc1f25c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 15 Oct 2008 22:01:38 -0700 Subject: Kconfig: eliminate "def_bool n" constructs Using "def_bool n" is pointless, simply using bool here appears more appropriate. Further, retaining such options that don't have a prompt and aren't selected by anything seems also at least questionable. Signed-off-by: Jan Beulich Cc: Ingo Molnar Cc: Tony Luck Cc: Thomas Gleixner Cc: Bartlomiej Zolnierkiewicz Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 18 +++++++++--------- arch/ia64/Kconfig | 8 -------- arch/x86/Kconfig | 23 +++++------------------ drivers/ide/Kconfig | 2 +- kernel/time/Kconfig | 1 - lib/Kconfig | 4 ++-- mm/Kconfig | 4 ++-- 7 files changed, 19 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/arch/Kconfig b/arch/Kconfig index 0267babe5eb..e6ab550bceb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -28,7 +28,7 @@ config OPROFILE_IBS If unsure, say N. config HAVE_OPROFILE - def_bool n + bool config KPROBES bool "Kprobes" @@ -42,7 +42,7 @@ config KPROBES If in doubt, say "N". config HAVE_EFFICIENT_UNALIGNED_ACCESS - def_bool n + bool help Some architectures are unable to perform unaligned accesses without the use of get_unaligned/put_unaligned. Others are @@ -65,13 +65,13 @@ config KRETPROBES depends on KPROBES && HAVE_KRETPROBES config HAVE_IOREMAP_PROT - def_bool n + bool config HAVE_KPROBES - def_bool n + bool config HAVE_KRETPROBES - def_bool n + bool # # An arch should select this if it provides all these things: @@ -89,16 +89,16 @@ config HAVE_KRETPROBES # signal delivery calls tracehook_signal_handler() # config HAVE_ARCH_TRACEHOOK - def_bool n + bool config HAVE_DMA_ATTRS - def_bool n + bool config USE_GENERIC_SMP_HELPERS - def_bool n + bool config HAVE_CLK - def_bool n + bool help The calls support software clock gating and thus are a key power management tool on many systems. diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 48e496fe1e7..3b7aa38254a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -60,14 +60,6 @@ config RWSEM_XCHGADD_ALGORITHM bool default y -config ARCH_HAS_ILOG2_U32 - bool - default n - -config ARCH_HAS_ILOG2_U64 - bool - default n - config HUGETLB_PAGE_SIZE_VARIABLE bool depends on HUGETLB_PAGE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f65c2744d57..7ccb6e60e60 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -39,10 +39,6 @@ config ARCH_DEFCONFIG default "arch/x86/configs/i386_defconfig" if X86_32 default "arch/x86/configs/x86_64_defconfig" if X86_64 - -config GENERIC_LOCKBREAK - def_bool n - config GENERIC_TIME def_bool y @@ -95,7 +91,7 @@ config GENERIC_HWEIGHT def_bool y config GENERIC_GPIO - def_bool n + bool config ARCH_MAY_HAVE_PC_FDC def_bool y @@ -106,12 +102,6 @@ config RWSEM_GENERIC_SPINLOCK config RWSEM_XCHGADD_ALGORITHM def_bool X86_XADD -config ARCH_HAS_ILOG2_U32 - def_bool n - -config ARCH_HAS_ILOG2_U64 - def_bool n - config ARCH_HAS_CPU_IDLE_WAIT def_bool y @@ -758,9 +748,8 @@ config I8K Say N otherwise. config X86_REBOOTFIXUPS - def_bool n - prompt "Enable X86 board specific fixups for reboot" - depends on X86_32 && X86 + bool "Enable X86 board specific fixups for reboot" + depends on X86_32 ---help--- This enables chipset and/or board specific fixups to be done in order to get reboot to work correctly. This is only needed on @@ -944,8 +933,7 @@ config HIGHMEM depends on X86_32 && (HIGHMEM64G || HIGHMEM4G) config X86_PAE - def_bool n - prompt "PAE (Physical Address Extension) Support" + bool "PAE (Physical Address Extension) Support" depends on X86_32 && !HIGHMEM4G select RESOURCES_64BIT help @@ -1238,8 +1226,7 @@ config X86_PAT If unsure, say Y. config EFI - def_bool n - prompt "EFI runtime service support" + bool "EFI runtime service support" depends on ACPI ---help--- This enables the kernel to use EFI runtime services that are diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 6c6dd2faced..74a369a6116 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -4,7 +4,7 @@ # Select HAVE_IDE if IDE is supported config HAVE_IDE - def_bool n + bool menuconfig IDE tristate "ATA/ATAPI/MFM/RLL support" diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 8d53106a0a9..95ed42951e0 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -3,7 +3,6 @@ # config TICK_ONESHOT bool - default n config NO_HZ bool "Tickless System (Dynamic Ticks)" diff --git a/lib/Kconfig b/lib/Kconfig index c7ad7a5b353..85cf7ea978a 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -8,10 +8,10 @@ config BITREVERSE tristate config GENERIC_FIND_FIRST_BIT - def_bool n + bool config GENERIC_FIND_NEXT_BIT - def_bool n + bool config CRC_CCITT tristate "CRC-CCITT functions" diff --git a/mm/Kconfig b/mm/Kconfig index 0bd9c2dbb2a..5585f129359 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -101,7 +101,7 @@ config HAVE_MEMORY_PRESENT # with gcc 3.4 and later. # config SPARSEMEM_STATIC - def_bool n + bool # # Architecture platforms which require a two level mem_section in SPARSEMEM @@ -113,7 +113,7 @@ config SPARSEMEM_EXTREME depends on SPARSEMEM && !SPARSEMEM_STATIC config SPARSEMEM_VMEMMAP_ENABLE - def_bool n + bool config SPARSEMEM_VMEMMAP bool "Sparse Memory virtual memmap" -- cgit v1.2.3-70-g09d2 From 25ddbb18aae33ad255eb9f35aacebe3af01e1e9c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 15 Oct 2008 22:01:41 -0700 Subject: Make the taint flags reliable It's somewhat unlikely that it happens, but right now a race window between interrupts or machine checks or oopses could corrupt the tainted bitmap because it is modified in a non atomic fashion. Convert the taint variable to an unsigned long and use only atomic bit operations on it. Unfortunately this means the intvec sysctl functions cannot be used on it anymore. It turned out the taint sysctl handler could actually be simplified a bit (since it only increases capabilities) so this patch actually removes code. [akpm@linux-foundation.org: remove unneeded include] Signed-off-by: Andi Kleen Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/smpboot.c | 14 +++++----- include/linux/kernel.h | 25 +++++++++--------- kernel/module.c | 12 ++++----- kernel/panic.c | 63 ++++++++++++++++++++++++++++++++------------ kernel/softlockup.c | 2 +- kernel/sysctl.c | 67 ++++++++++++++++++++--------------------------- 6 files changed, 101 insertions(+), 82 deletions(-) (limited to 'kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8c3aca7cb34..7ed9e070a6e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -282,6 +282,8 @@ static void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +static int __cpuinitdata unsafe_smp; + /* * Activate a secondary processor. */ @@ -397,7 +399,7 @@ static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) goto valid_k7; /* If we get here, not a certified SMP capable AMD system. */ - add_taint(TAINT_UNSAFE_SMP); + unsafe_smp = 1; } valid_k7: @@ -414,12 +416,10 @@ static void __cpuinit smp_checks(void) * Don't taint if we are running SMP kernel on a single non-MP * approved Athlon */ - if (tainted & TAINT_UNSAFE_SMP) { - if (num_online_cpus()) - printk(KERN_INFO "WARNING: This combination of AMD" - "processors is not suitable for SMP.\n"); - else - tainted &= ~TAINT_UNSAFE_SMP; + if (unsafe_smp && num_online_cpus() > 1) { + printk(KERN_INFO "WARNING: This combination of AMD" + "processors is not suitable for SMP.\n"); + add_taint(TAINT_UNSAFE_SMP); } } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 75d81f157d2..e971c55f45a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -235,9 +235,10 @@ extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in extern int panic_timeout; extern int panic_on_oops; extern int panic_on_unrecovered_nmi; -extern int tainted; extern const char *print_tainted(void); -extern void add_taint(unsigned); +extern void add_taint(unsigned flag); +extern int test_taint(unsigned flag); +extern unsigned long get_taint(void); extern int root_mountflags; /* Values used for system_state */ @@ -250,16 +251,16 @@ extern enum system_states { SYSTEM_SUSPEND_DISK, } system_state; -#define TAINT_PROPRIETARY_MODULE (1<<0) -#define TAINT_FORCED_MODULE (1<<1) -#define TAINT_UNSAFE_SMP (1<<2) -#define TAINT_FORCED_RMMOD (1<<3) -#define TAINT_MACHINE_CHECK (1<<4) -#define TAINT_BAD_PAGE (1<<5) -#define TAINT_USER (1<<6) -#define TAINT_DIE (1<<7) -#define TAINT_OVERRIDDEN_ACPI_TABLE (1<<8) -#define TAINT_WARN (1<<9) +#define TAINT_PROPRIETARY_MODULE 0 +#define TAINT_FORCED_MODULE 1 +#define TAINT_UNSAFE_SMP 2 +#define TAINT_FORCED_RMMOD 3 +#define TAINT_MACHINE_CHECK 4 +#define TAINT_BAD_PAGE 5 +#define TAINT_USER 6 +#define TAINT_DIE 7 +#define TAINT_OVERRIDDEN_ACPI_TABLE 8 +#define TAINT_WARN 9 extern void dump_stack(void) __cold; diff --git a/kernel/module.c b/kernel/module.c index 9db11911e04..dd9ac6ad5cb 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -100,7 +100,7 @@ static inline int strong_try_module_get(struct module *mod) static inline void add_taint_module(struct module *mod, unsigned flag) { add_taint(flag); - mod->taints |= flag; + mod->taints |= (1U << flag); } /* @@ -923,7 +923,7 @@ static const char vermagic[] = VERMAGIC_STRING; static int try_to_force_load(struct module *mod, const char *symname) { #ifdef CONFIG_MODULE_FORCE_LOAD - if (!(tainted & TAINT_FORCED_MODULE)) + if (!test_taint(TAINT_FORCED_MODULE)) printk("%s: no version for \"%s\" found: kernel tainted.\n", mod->name, symname); add_taint_module(mod, TAINT_FORCED_MODULE); @@ -1033,7 +1033,7 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, const unsigned long *crc; ret = find_symbol(name, &owner, &crc, - !(mod->taints & TAINT_PROPRIETARY_MODULE), true); + !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); if (!IS_ERR_VALUE(ret)) { /* use_module can fail due to OOM, or module initialization or unloading */ @@ -1634,7 +1634,7 @@ static void set_license(struct module *mod, const char *license) license = "unspecified"; if (!license_is_gpl_compatible(license)) { - if (!(tainted & TAINT_PROPRIETARY_MODULE)) + if (!test_taint(TAINT_PROPRIETARY_MODULE)) printk(KERN_WARNING "%s: module license '%s' taints " "kernel.\n", mod->name, license); add_taint_module(mod, TAINT_PROPRIETARY_MODULE); @@ -2552,9 +2552,9 @@ static char *module_flags(struct module *mod, char *buf) mod->state == MODULE_STATE_GOING || mod->state == MODULE_STATE_COMING) { buf[bx++] = '('; - if (mod->taints & TAINT_PROPRIETARY_MODULE) + if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE)) buf[bx++] = 'P'; - if (mod->taints & TAINT_FORCED_MODULE) + if (mod->taints & (1 << TAINT_FORCED_MODULE)) buf[bx++] = 'F'; /* * TAINT_FORCED_RMMOD: could be added. diff --git a/kernel/panic.c b/kernel/panic.c index 12c5a0a6c89..028013f7afd 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -23,7 +23,7 @@ #include int panic_on_oops; -int tainted; +static unsigned long tainted_mask; static int pause_on_oops; static int pause_on_oops_flag; static DEFINE_SPINLOCK(pause_on_oops_lock); @@ -159,31 +159,60 @@ EXPORT_SYMBOL(panic); * The string is overwritten by the next call to print_taint(). */ +struct tnt { + u8 bit; + char true; + char false; +}; + +static const struct tnt tnts[] = { + { TAINT_PROPRIETARY_MODULE, 'P', 'G' }, + { TAINT_FORCED_MODULE, 'F', ' ' }, + { TAINT_UNSAFE_SMP, 'S', ' ' }, + { TAINT_FORCED_RMMOD, 'R', ' ' }, + { TAINT_MACHINE_CHECK, 'M', ' ' }, + { TAINT_BAD_PAGE, 'B', ' ' }, + { TAINT_USER, 'U', ' ' }, + { TAINT_DIE, 'D', ' ' }, + { TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' }, + { TAINT_WARN, 'W', ' ' }, +}; + const char *print_tainted(void) { - static char buf[20]; - if (tainted) { - snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c%c", - tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', - tainted & TAINT_FORCED_MODULE ? 'F' : ' ', - tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', - tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', - tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', - tainted & TAINT_BAD_PAGE ? 'B' : ' ', - tainted & TAINT_USER ? 'U' : ' ', - tainted & TAINT_DIE ? 'D' : ' ', - tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' ', - tainted & TAINT_WARN ? 'W' : ' '); - } - else + static char buf[ARRAY_SIZE(tnts) + sizeof("Tainted: ") + 1]; + + if (tainted_mask) { + char *s; + int i; + + s = buf + sprintf(buf, "Tainted: "); + for (i = 0; i < ARRAY_SIZE(tnts); i++) { + const struct tnt *t = &tnts[i]; + *s++ = test_bit(t->bit, &tainted_mask) ? + t->true : t->false; + } + *s = 0; + } else snprintf(buf, sizeof(buf), "Not tainted"); return(buf); } +int test_taint(unsigned flag) +{ + return test_bit(flag, &tainted_mask); +} +EXPORT_SYMBOL(test_taint); + +unsigned long get_taint(void) +{ + return tainted_mask; +} + void add_taint(unsigned flag) { debug_locks = 0; /* can't trust the integrity of the kernel anymore */ - tainted |= flag; + set_bit(flag, &tainted_mask); } EXPORT_SYMBOL(add_taint); diff --git a/kernel/softlockup.c b/kernel/softlockup.c index cb838ee93a8..3953e4aed73 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -226,7 +226,7 @@ static void check_hung_uninterruptible_tasks(int this_cpu) * If the system crashed already then all bets are off, * do not report extra hung tasks: */ - if ((tainted & TAINT_DIE) || did_panic) + if (test_taint(TAINT_DIE) || did_panic) return; read_lock(&tasklist_lock); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index cfc5295f1e8..ec88fcc9a0d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -149,7 +149,7 @@ extern int max_lock_depth; #ifdef CONFIG_PROC_SYSCTL static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); -static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp, +static int proc_taint(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -379,10 +379,9 @@ static struct ctl_table kern_table[] = { #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", - .data = &tainted, - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_taint, + .proc_handler = &proc_taint, }, #endif #ifdef CONFIG_LATENCYTOP @@ -2228,49 +2227,39 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp, NULL,NULL); } -#define OP_SET 0 -#define OP_AND 1 -#define OP_OR 2 - -static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, - int *valp, - int write, void *data) -{ - int op = *(int *)data; - if (write) { - int val = *negp ? -*lvalp : *lvalp; - switch(op) { - case OP_SET: *valp = val; break; - case OP_AND: *valp &= val; break; - case OP_OR: *valp |= val; break; - } - } else { - int val = *valp; - if (val < 0) { - *negp = -1; - *lvalp = (unsigned long)-val; - } else { - *negp = 0; - *lvalp = (unsigned long)val; - } - } - return 0; -} - /* - * Taint values can only be increased + * Taint values can only be increased + * This means we can safely use a temporary. */ -static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp, +static int proc_taint(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - int op; + struct ctl_table t; + unsigned long tmptaint = get_taint(); + int err; if (write && !capable(CAP_SYS_ADMIN)) return -EPERM; - op = OP_OR; - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, - do_proc_dointvec_bset_conv,&op); + t = *table; + t.data = &tmptaint; + err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos); + if (err < 0) + return err; + + if (write) { + /* + * Poor man's atomic or. Not worth adding a primitive + * to everyone's atomic.h for this + */ + int i; + for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) { + if ((tmptaint >> i) & 1) + add_taint(i); + } + } + + return err; } struct do_proc_dointvec_minmax_conv_param { -- cgit v1.2.3-70-g09d2 From 0c2d64fb6cae9aae480f6a46cfe79f8d7d48b59f Mon Sep 17 00:00:00 2001 From: Adam Tkac Date: Wed, 15 Oct 2008 22:01:45 -0700 Subject: rlimit: permit setting RLIMIT_NOFILE to RLIM_INFINITY When a process wants to set the limit of open files to RLIM_INFINITY it gets EPERM even if it has CAP_SYS_RESOURCE capability. For example, BIND does: ... #elif defined(NR_OPEN) && defined(__linux__) /* * Some Linux kernels don't accept RLIM_INFINIT; the maximum * possible value is the NR_OPEN defined in linux/fs.h. */ if (resource == isc_resource_openfiles && rlim_value == RLIM_INFINITY) { rl.rlim_cur = rl.rlim_max = NR_OPEN; unixresult = setrlimit(unixresource, &rl); if (unixresult == 0) return (ISC_R_SUCCESS); } #elif ... If we allow setting RLIMIT_NOFILE to RLIM_INFINITY we increase portability - you don't have to check if OS is linux and then use different schema for limits. The spec says "Specifying RLIM_INFINITY as any resource limit value on a successful call to setrlimit() shall inhibit enforcement of that resource limit." and we're presently not doing that. Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 234d9454294..d5b79f65ad9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1450,14 +1450,22 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) return -EINVAL; if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) return -EFAULT; - if (new_rlim.rlim_cur > new_rlim.rlim_max) - return -EINVAL; old_rlim = current->signal->rlim + resource; if ((new_rlim.rlim_max > old_rlim->rlim_max) && !capable(CAP_SYS_RESOURCE)) return -EPERM; - if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open) - return -EPERM; + + if (resource == RLIMIT_NOFILE) { + if (new_rlim.rlim_max == RLIM_INFINITY) + new_rlim.rlim_max = sysctl_nr_open; + if (new_rlim.rlim_cur == RLIM_INFINITY) + new_rlim.rlim_cur = sysctl_nr_open; + if (new_rlim.rlim_max > sysctl_nr_open) + return -EPERM; + } + + if (new_rlim.rlim_cur > new_rlim.rlim_max) + return -EINVAL; retval = security_task_setrlimit(resource, &new_rlim); if (retval) -- cgit v1.2.3-70-g09d2 From 22b8ce94708f7cdf0b04965c6f7443dfd374c35c Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 15 Oct 2008 22:01:46 -0700 Subject: profiling: dynamically enable readprofile at runtime Way too often, I have a machine that exhibits some kind of crappy behavior. The CPU looks wedged in the kernel or it is spending way too much system time and I wonder what is responsible. I try to run readprofile. But, of course, Ubuntu doesn't enable it by default. Dang! The reason we boot-time enable it is that it takes a big bufffer that we generally can only bootmem alloc. But, does it hurt to at least try and runtime-alloc it? To use: echo 2 > /sys/kernel/profile Then run readprofile like normal. This should fix the compile issue with allmodconfig. I've compile-tested on a bunch more configs now including a few more architectures. Signed-off-by: Dave Hansen Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-profiling | 13 ++++++++++ include/linux/profile.h | 8 +++--- kernel/ksysfs.c | 35 ++++++++++++++++++++++++++ kernel/profile.c | 41 +++++++++++++++++++++++-------- 4 files changed, 84 insertions(+), 13 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-profiling (limited to 'kernel') diff --git a/Documentation/ABI/testing/sysfs-profiling b/Documentation/ABI/testing/sysfs-profiling new file mode 100644 index 00000000000..b02d8b8c173 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-profiling @@ -0,0 +1,13 @@ +What: /sys/kernel/profile +Date: September 2008 +Contact: Dave Hansen +Description: + /sys/kernel/profile is the runtime equivalent + of the boot-time profile= option. + + You can get the same effect running: + + echo 2 > /sys/kernel/profile + + as you would by issuing profile=2 on the boot + command line. diff --git a/include/linux/profile.h b/include/linux/profile.h index 7e7087239af..570045053ce 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -35,7 +35,9 @@ enum profile_type { extern int prof_on __read_mostly; /* init basic kernel profiler */ -void __init profile_init(void); +int profile_init(void); +int profile_setup(char *str); +int create_proc_profile(void); void profile_tick(int type); /* @@ -84,9 +86,9 @@ struct pt_regs; #define prof_on 0 -static inline void profile_init(void) +static inline int profile_init(void) { - return; + return 0; } static inline void profile_tick(int type) diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index e53bc30e9ba..08dd8ed86c7 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #define KERNEL_ATTR_RO(_name) \ @@ -53,6 +54,37 @@ static ssize_t uevent_helper_store(struct kobject *kobj, KERNEL_ATTR_RW(uevent_helper); #endif +#ifdef CONFIG_PROFILING +static ssize_t profiling_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", prof_on); +} +static ssize_t profiling_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret; + + if (prof_on) + return -EEXIST; + /* + * This eventually calls into get_option() which + * has a ton of callers and is not const. It is + * easiest to cast it away here. + */ + profile_setup((char *)buf); + ret = profile_init(); + if (ret) + return ret; + ret = create_proc_profile(); + if (ret) + return ret; + return count; +} +KERNEL_ATTR_RW(profiling); +#endif + #ifdef CONFIG_KEXEC static ssize_t kexec_loaded_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -109,6 +141,9 @@ static struct attribute * kernel_attrs[] = { &uevent_seqnum_attr.attr, &uevent_helper_attr.attr, #endif +#ifdef CONFIG_PROFILING + &profiling_attr.attr, +#endif #ifdef CONFIG_KEXEC &kexec_loaded_attr.attr, &kexec_crash_loaded_attr.attr, diff --git a/kernel/profile.c b/kernel/profile.c index cd26bed4cc2..a9e422df6bf 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -50,11 +52,11 @@ static DEFINE_PER_CPU(int, cpu_profile_flip); static DEFINE_MUTEX(profile_flip_mutex); #endif /* CONFIG_SMP */ -static int __init profile_setup(char *str) +int profile_setup(char *str) { - static char __initdata schedstr[] = "schedule"; - static char __initdata sleepstr[] = "sleep"; - static char __initdata kvmstr[] = "kvm"; + static char schedstr[] = "schedule"; + static char sleepstr[] = "sleep"; + static char kvmstr[] = "kvm"; int par; if (!strncmp(str, sleepstr, strlen(sleepstr))) { @@ -100,14 +102,33 @@ static int __init profile_setup(char *str) __setup("profile=", profile_setup); -void __init profile_init(void) +int profile_init(void) { + int buffer_bytes; if (!prof_on) - return; + return 0; /* only text is profiled */ prof_len = (_etext - _stext) >> prof_shift; - prof_buffer = alloc_bootmem(prof_len*sizeof(atomic_t)); + buffer_bytes = prof_len*sizeof(atomic_t); + if (!slab_is_available()) { + prof_buffer = alloc_bootmem(buffer_bytes); + return 0; + } + + prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL); + if (prof_buffer) + return 0; + + prof_buffer = alloc_pages_exact(buffer_bytes, GFP_KERNEL|__GFP_ZERO); + if (prof_buffer) + return 0; + + prof_buffer = vmalloc(buffer_bytes); + if (prof_buffer) + return 0; + + return -ENOMEM; } /* Profile event notifications */ @@ -527,7 +548,7 @@ static void __init profile_nop(void *unused) { } -static int __init create_hash_tables(void) +static int create_hash_tables(void) { int cpu; @@ -575,14 +596,14 @@ out_cleanup: #define create_hash_tables() ({ 0; }) #endif -static int __init create_proc_profile(void) +int create_proc_profile(void) { struct proc_dir_entry *entry; if (!prof_on) return 0; if (create_hash_tables()) - return -1; + return -ENOMEM; entry = proc_create("profile", S_IWUSR | S_IRUGO, NULL, &proc_profile_operations); if (!entry) -- cgit v1.2.3-70-g09d2 From 87988815073918134c0dae059cf247a4472d78ed Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 15 Oct 2008 22:01:51 -0700 Subject: utsname: completely overwrite prior information On sethostname() and setdomainname(), previous information may be retained if it was longer than than the new hostname/domainname. This can be demonstrated trivially by calling sethostname() first with a long name, then with a short name, and then calling uname() to retrieve the full buffer that contains the hostname (and possibly parts of the old hostname), one just has to look past the terminating zero. I don't know if we should really care that much (hence the RFC); the only scenarios I can possibly think of is administrator putting something sensitive in the hostname (or domain name) by accident, and changing it back will not undo the mistake entirely, though it's not like we can recover gracefully from "rm -rf /" either... The other scenario is namespaces (CLONE_NEWUTS) where some information may be unintentionally "inherited" from the previous namespace (a program wants to hide the original name and does clone + sethostname, but some information is still left). I think the patch may be defended on grounds of the principle of least surprise. But I am not adamant :-) (I guess the question now is whether userspace should be able to write embedded NULs into the buffer or not...) At least the observation has been made and the patch has been presented. Signed-off-by: Vegard Nossum Cc: "Eric W. Biederman" Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index d5b79f65ad9..558b035965a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1350,7 +1350,8 @@ asmlinkage long sys_sethostname(char __user *name, int len) errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { memcpy(utsname()->nodename, tmp, len); - utsname()->nodename[len] = 0; + memset(utsname()->nodename + len, 0, + sizeof(utsname()->nodename) - len); errno = 0; } up_write(&uts_sem); @@ -1396,7 +1397,8 @@ asmlinkage long sys_setdomainname(char __user *name, int len) errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { memcpy(utsname()->domainname, tmp, len); - utsname()->domainname[len] = 0; + memset(utsname()->domainname + len, 0, + sizeof(utsname()->domainname) - len); errno = 0; } up_write(&uts_sem); -- cgit v1.2.3-70-g09d2 From 9679e4dd628743b9ef4375d60ae69923c3766173 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 15 Oct 2008 22:01:51 -0700 Subject: kernel/sys.c: improve code generation utsname() is quite expensive to calculate. Cache it in a local. text data bss dec hex filename before: 11136 720 16 11872 2e60 kernel/sys.o after: 11096 720 16 11832 2e38 kernel/sys.o Acked-by: Vegard Nossum Cc: "Eric W. Biederman" Acked-by: "Serge E. Hallyn" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 558b035965a..0bc8fa3c228 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1349,9 +1349,10 @@ asmlinkage long sys_sethostname(char __user *name, int len) down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(utsname()->nodename, tmp, len); - memset(utsname()->nodename + len, 0, - sizeof(utsname()->nodename) - len); + struct new_utsname *u = utsname(); + + memcpy(u->nodename, tmp, len); + memset(u->nodename + len, 0, sizeof(u->nodename) - len); errno = 0; } up_write(&uts_sem); @@ -1363,15 +1364,17 @@ asmlinkage long sys_sethostname(char __user *name, int len) asmlinkage long sys_gethostname(char __user *name, int len) { int i, errno; + struct new_utsname *u; if (len < 0) return -EINVAL; down_read(&uts_sem); - i = 1 + strlen(utsname()->nodename); + u = utsname(); + i = 1 + strlen(u->nodename); if (i > len) i = len; errno = 0; - if (copy_to_user(name, utsname()->nodename, i)) + if (copy_to_user(name, u->nodename, i)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1396,9 +1399,10 @@ asmlinkage long sys_setdomainname(char __user *name, int len) down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(utsname()->domainname, tmp, len); - memset(utsname()->domainname + len, 0, - sizeof(utsname()->domainname) - len); + struct new_utsname *u = utsname(); + + memcpy(u->domainname, tmp, len); + memset(u->domainname + len, 0, sizeof(u->domainname) - len); errno = 0; } up_write(&uts_sem); -- cgit v1.2.3-70-g09d2 From 7968b3d9a673e922ab980b2616945e63a52d88fe Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 15 Oct 2008 22:01:57 -0700 Subject: kernel/kallsyms.c: fix double return Commit 6dd06c9fbe025f542bce4cdb91790c0f91962722 ("module: make module_address_lookup safe") introduced double returns in the function kallsyms_lookup(), it's weird. The second one should be removed. Signed-off-by: WANG Cong Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kallsyms.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 38fc10ac754..5072cf1685a 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -260,7 +260,6 @@ const char *kallsyms_lookup(unsigned long addr, /* see if it's in a module */ return module_address_lookup(addr, symbolsize, offset, modname, namebuf); - return NULL; } int lookup_symbol_name(unsigned long addr, char *symname) -- cgit v1.2.3-70-g09d2 From e1f8e87449147ffe5ea3de64a46af7de450ce279 Mon Sep 17 00:00:00 2001 From: Francois Cami Date: Wed, 15 Oct 2008 22:01:59 -0700 Subject: Remove Andrew Morton's old email accounts People can use the real name an an index into MAINTAINERS to find the current email address. Signed-off-by: Francois Cami Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/SAK.txt | 2 +- Documentation/SubmittingDrivers | 2 +- Documentation/networking/cs89x0.txt | 4 ++-- Documentation/networking/vortex.txt | 4 ++-- Documentation/scsi/ChangeLog.megaraid | 6 +++--- drivers/char/vt.c | 2 +- drivers/net/3c509.c | 2 +- drivers/net/cs89x0.c | 13 ++++++------- drivers/parport/ChangeLog | 2 +- fs/direct-io.c | 4 ++-- fs/fs-writeback.c | 2 +- fs/mpage.c | 2 +- include/linux/journal-head.h | 2 +- include/linux/task_io_accounting.h | 2 +- kernel/printk.c | 2 +- kernel/workqueue.c | 2 +- mm/fadvise.c | 2 +- mm/page-writeback.c | 2 +- mm/pdflush.c | 2 +- mm/readahead.c | 2 +- mm/truncate.c | 2 +- 21 files changed, 31 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/Documentation/SAK.txt b/Documentation/SAK.txt index b9019ca872e..74be14679ed 100644 --- a/Documentation/SAK.txt +++ b/Documentation/SAK.txt @@ -1,5 +1,5 @@ Linux 2.4.2 Secure Attention Key (SAK) handling -18 March 2001, Andrew Morton +18 March 2001, Andrew Morton An operating system's Secure Attention Key is a security tool which is provided as protection against trojan password capturing programs. It diff --git a/Documentation/SubmittingDrivers b/Documentation/SubmittingDrivers index 24f2eb40cae..99e72a81fa2 100644 --- a/Documentation/SubmittingDrivers +++ b/Documentation/SubmittingDrivers @@ -41,7 +41,7 @@ Linux 2.4: Linux 2.6: The same rules apply as 2.4 except that you should follow linux-kernel to track changes in API's. The final contact point for Linux 2.6 - submissions is Andrew Morton . + submissions is Andrew Morton. What Criteria Determine Acceptance ---------------------------------- diff --git a/Documentation/networking/cs89x0.txt b/Documentation/networking/cs89x0.txt index 6387d3decf8..c725d33b316 100644 --- a/Documentation/networking/cs89x0.txt +++ b/Documentation/networking/cs89x0.txt @@ -3,7 +3,7 @@ NOTE ---- This document was contributed by Cirrus Logic for kernel 2.2.5. This version -has been updated for 2.3.48 by Andrew Morton +has been updated for 2.3.48 by Andrew Morton. Cirrus make a copy of this driver available at their website, as described below. In general, you should use the driver version which @@ -690,7 +690,7 @@ latest drivers and technical publications. 6.4 Current maintainer In February 2000 the maintenance of this driver was assumed by Andrew -Morton +Morton. 6.5 Kernel module parameters diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt index 6356d3faed3..bd874daabde 100644 --- a/Documentation/networking/vortex.txt +++ b/Documentation/networking/vortex.txt @@ -1,5 +1,5 @@ Documentation/networking/vortex.txt -Andrew Morton +Andrew Morton 30 April 2000 @@ -11,7 +11,7 @@ The driver was written by Donald Becker Don is no longer the prime maintainer of this version of the driver. Please report problems to one or more of: - Andrew Morton + Andrew Morton Netdev mailing list Linux kernel mailing list diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid index 37796fe45bd..eaa4801f2ce 100644 --- a/Documentation/scsi/ChangeLog.megaraid +++ b/Documentation/scsi/ChangeLog.megaraid @@ -409,7 +409,7 @@ i. Function reordering so that inline functions are defined before they megaraid_mbox_prepare_pthru, megaraid_mbox_prepare_epthru, megaraid_busywait_mbox - - Andrew Morton , 08.19.2004 + - Andrew Morton, 08.19.2004 linux-scsi mailing list "Something else to clean up after inclusion: every instance of an @@ -471,13 +471,13 @@ vi. Add support for 64-bit applications. Current drivers assume only vii. Move the function declarations for the management module from megaraid_mm.h to megaraid_mm.c - - Andrew Morton , 08.19.2004 + - Andrew Morton, 08.19.2004 linux-scsi mailing list viii. Change default values for MEGARAID_NEWGEN, MEGARAID_MM, and MEGARAID_MAILBOX to 'n' in Kconfig.megaraid - - Andrew Morton , 08.19.2004 + - Andrew Morton, 08.19.2004 linux-scsi mailing list ix. replace udelay with msleep diff --git a/drivers/char/vt.c b/drivers/char/vt.c index a0f7ffb6808..d8f83e26e4a 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -59,7 +59,7 @@ * by Martin Mares , July 1998 * * Removed old-style timers, introduced console_timer, made timer - * deletion SMP-safe. 17Jun00, Andrew Morton + * deletion SMP-safe. 17Jun00, Andrew Morton * * Removed console_lock, enabled interrupts across all console operations * 13 March 2001, Andrew Morton diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index b9d097c9f6b..3a7bc524af3 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -40,7 +40,7 @@ v1.14 10/15/97 Avoided waiting..discard message for fast machines -djb v1.15 1/31/98 Faster recovery for Tx errors. -djb v1.16 2/3/98 Different ID port handling to avoid sound cards. -djb - v1.18 12Mar2001 Andrew Morton + v1.18 12Mar2001 Andrew Morton - Avoid bogus detect of 3c590's (Andrzej Krzysztofowicz) - Reviewed against 1.18 from scyld.com v1.18a 17Nov2001 Jeff Garzik diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c index a28de818280..7107620f615 100644 --- a/drivers/net/cs89x0.c +++ b/drivers/net/cs89x0.c @@ -36,8 +36,7 @@ Alan Cox : Removed 1.2 support, added 2.1 extra counters. - Andrew Morton : andrewm@uow.edu.au - : Kernel 2.3.48 + Andrew Morton : Kernel 2.3.48 : Handle kmalloc() failures : Other resource allocation fixes : Add SMP locks @@ -49,7 +48,7 @@ : Fixed an out-of-mem bug in dma_rx() : Updated Documentation/networking/cs89x0.txt - Andrew Morton : andrewm@uow.edu.au / Kernel 2.3.99-pre1 + Andrew Morton : Kernel 2.3.99-pre1 : Use skb_reserve to longword align IP header (two places) : Remove a delay loop from dma_rx() : Replace '100' with HZ @@ -57,11 +56,11 @@ : Added 'cs89x0_dma=N' kernel boot option : Correctly initialise lp->lock in non-module compile - Andrew Morton : andrewm@uow.edu.au / Kernel 2.3.99-pre4-1 + Andrew Morton : Kernel 2.3.99-pre4-1 : MOD_INC/DEC race fix (see : http://www.uwsg.indiana.edu/hypermail/linux/kernel/0003.3/1532.html) - Andrew Morton : andrewm@uow.edu.au / Kernel 2.4.0-test7-pre2 + Andrew Morton : Kernel 2.4.0-test7-pre2 : Enhanced EEPROM support to cover more devices, : abstracted IRQ mapping to support CONFIG_ARCH_CLPS7500 arch : (Jason Gunthorpe ) @@ -156,7 +155,7 @@ #include "cs89x0.h" static char version[] __initdata = -"cs89x0.c: v2.4.3-pre1 Russell Nelson , Andrew Morton \n"; +"cs89x0.c: v2.4.3-pre1 Russell Nelson , Andrew Morton\n"; #define DRV_NAME "cs89x0" @@ -1877,7 +1876,7 @@ MODULE_PARM_DESC(dmasize , "(ignored)"); MODULE_PARM_DESC(use_dma , "(ignored)"); #endif -MODULE_AUTHOR("Mike Cruse, Russwll Nelson , Andrew Morton "); +MODULE_AUTHOR("Mike Cruse, Russwll Nelson , Andrew Morton"); MODULE_LICENSE("GPL"); diff --git a/drivers/parport/ChangeLog b/drivers/parport/ChangeLog index db717c1d62a..8565bbbeb6e 100644 --- a/drivers/parport/ChangeLog +++ b/drivers/parport/ChangeLog @@ -311,7 +311,7 @@ * ieee1284_ops.c (parport_ieee1284_read_nibble): Reset nAutoFd on timeout. Matches 2.2.x behaviour. -2001-03-02 Andrew Morton +2001-03-02 Andrew Morton * parport_pc.c (registered_parport): New static variable. (parport_pc_find_ports): Set it when we register PCI driver. diff --git a/fs/direct-io.c b/fs/direct-io.c index 9606ee848fd..af0558dbe8b 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -5,11 +5,11 @@ * * O_DIRECT * - * 04Jul2002 akpm@zip.com.au + * 04Jul2002 Andrew Morton * Initial version * 11Sep2002 janetinc@us.ibm.com * added readv/writev support. - * 29Oct2002 akpm@zip.com.au + * 29Oct2002 Andrew Morton * rewrote bio_add_page() support. * 30Oct2002 pbadari@us.ibm.com * added support for non-aligned IO. diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 25adfc3c693..d0ff0b8cf30 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -8,7 +8,7 @@ * pages against inodes. ie: data writeback. Writeout of the * inode itself is not handled here. * - * 10Apr2002 akpm@zip.com.au + * 10Apr2002 Andrew Morton * Split out of fs/inode.c * Additions for address_space-based writeback */ diff --git a/fs/mpage.c b/fs/mpage.c index dbcc7af76a1..552b80b3fac 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -6,7 +6,7 @@ * Contains functions related to preparing and submitting BIOs which contain * multiple pagecache pages. * - * 15May2002 akpm@zip.com.au + * 15May2002 Andrew Morton * Initial version * 27Jun2002 axboe@suse.de * use bio_add_page() to build bio's just the right size diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h index 8a62d1e84b9..bb70ebb6a2d 100644 --- a/include/linux/journal-head.h +++ b/include/linux/journal-head.h @@ -3,7 +3,7 @@ * * buffer_head fields for JBD * - * 27 May 2001 Andrew Morton + * 27 May 2001 Andrew Morton * Created - pulled out of fs.h */ diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h index 5e88afc9a2f..bdf855c2856 100644 --- a/include/linux/task_io_accounting.h +++ b/include/linux/task_io_accounting.h @@ -5,7 +5,7 @@ * Don't include this header file directly - it is designed to be dragged in via * sched.h. * - * Blame akpm@osdl.org for all this. + * Blame Andrew Morton for all this. */ struct task_io_accounting { diff --git a/kernel/printk.c b/kernel/printk.c index a430fd04008..c3ab51dc9fa 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -13,7 +13,7 @@ * Fixed SMP synchronization, 08/08/99, Manfred Spraul * manfred@colorfullife.com * Rewrote bits to get rid of console_lock - * 01Mar01 Andrew Morton + * 01Mar01 Andrew Morton */ #include diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4048e92aa04..714afad4653 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -9,7 +9,7 @@ * Derived from the taskqueue/keventd code by: * * David Woodhouse - * Andrew Morton + * Andrew Morton * Kai Petzke * Theodore Ts'o * diff --git a/mm/fadvise.c b/mm/fadvise.c index 343cfdfebd9..a1da969bd98 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -3,7 +3,7 @@ * * Copyright (C) 2002, Linus Torvalds * - * 11Jan2003 akpm@digeo.com + * 11Jan2003 Andrew Morton * Initial version. */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 24de8b65fdb..c130a137c12 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -7,7 +7,7 @@ * Contains functions related to writing back dirty pages at the * address_space level. * - * 10Apr2002 akpm@zip.com.au + * 10Apr2002 Andrew Morton * Initial version */ diff --git a/mm/pdflush.c b/mm/pdflush.c index 0cbe0c60c6b..a0a14c4d507 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -3,7 +3,7 @@ * * Copyright (C) 2002, Linus Torvalds. * - * 09Apr2002 akpm@zip.com.au + * 09Apr2002 Andrew Morton * Initial version * 29Feb2004 kaos@sgi.com * Move worker thread creation to kthread to avoid chewing diff --git a/mm/readahead.c b/mm/readahead.c index 77e8ddf945e..6cbd9a72fde 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -3,7 +3,7 @@ * * Copyright (C) 2002, Linus Torvalds * - * 09Apr2002 akpm@zip.com.au + * 09Apr2002 Andrew Morton * Initial version. */ diff --git a/mm/truncate.c b/mm/truncate.c index 6650c1d878b..e83e4b114ef 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -3,7 +3,7 @@ * * Copyright (C) 2002, Linus Torvalds * - * 10Sep2002 akpm@zip.com.au + * 10Sep2002 Andrew Morton * Initial version. */ -- cgit v1.2.3-70-g09d2 From 6d5cd6effed5f8c958a6c0df56da336f5a4fdb8a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Oct 2008 22:02:00 -0700 Subject: taint: fix kernel-doc Move print_tainted() kernel-doc to avoid the following error: Error(/var/linsrc/mmotm-2008-1002-1617//kernel/panic.c:155): cannot understand prototype: 'struct tnt ' Signed-off-by: Randy Dunlap Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/panic.c b/kernel/panic.c index 028013f7afd..f290e8e866f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -143,21 +143,6 @@ NORET_TYPE void panic(const char * fmt, ...) EXPORT_SYMBOL(panic); -/** - * print_tainted - return a string to represent the kernel taint state. - * - * 'P' - Proprietary module has been loaded. - * 'F' - Module has been forcibly loaded. - * 'S' - SMP with CPUs not designed for SMP. - * 'R' - User forced a module unload. - * 'M' - System experienced a machine check exception. - * 'B' - System has hit bad_page. - * 'U' - Userspace-defined naughtiness. - * 'A' - ACPI table overridden. - * 'W' - Taint on warning. - * - * The string is overwritten by the next call to print_taint(). - */ struct tnt { u8 bit; @@ -178,6 +163,21 @@ static const struct tnt tnts[] = { { TAINT_WARN, 'W', ' ' }, }; +/** + * print_tainted - return a string to represent the kernel taint state. + * + * 'P' - Proprietary module has been loaded. + * 'F' - Module has been forcibly loaded. + * 'S' - SMP with CPUs not designed for SMP. + * 'R' - User forced a module unload. + * 'M' - System experienced a machine check exception. + * 'B' - System has hit bad_page. + * 'U' - Userspace-defined naughtiness. + * 'A' - ACPI table overridden. + * 'W' - Taint on warning. + * + * The string is overwritten by the next call to print_taint(). + */ const char *print_tainted(void) { static char buf[ARRAY_SIZE(tnts) + sizeof("Tainted: ") + 1]; -- cgit v1.2.3-70-g09d2 From 20036fdcaf05fac0a84ed81a56906493a7d822e2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 15 Oct 2008 22:02:02 -0700 Subject: Add kerneldoc documentation for new printk format extensions Add documentation in kerneldoc for new printk format extensions This patch documents the new %pS/%pF options in printk in kernel doc. Hope I didn't miss any other extension. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk.c | 2 ++ lib/vsprintf.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index c3ab51dc9fa..fbd94bdaa74 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -593,6 +593,8 @@ static int have_callable_console(void) * * See also: * printf(3) + * + * See the vsnprintf() documentation for format string extensions over C99. */ asmlinkage int printk(const char *fmt, ...) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index c399bc1093c..4c6674a41ed 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -565,6 +565,10 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field * @fmt: The format string to use * @args: Arguments for the format string * + * This function follows C99 vsnprintf, but has some extensions: + * %pS output the name of a text symbol + * %pF output the name of a function pointer + * * The return value is the number of characters which would * be generated for the given input, excluding the trailing * '\0', as per ISO C99. If you want to have the exact @@ -806,6 +810,8 @@ EXPORT_SYMBOL(vsnprintf); * * Call this function if you are already dealing with a va_list. * You probably want scnprintf() instead. + * + * See the vsnprintf() documentation for format string extensions over C99. */ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) { @@ -828,6 +834,8 @@ EXPORT_SYMBOL(vscnprintf); * generated for the given input, excluding the trailing null, * as per ISO C99. If the return is greater than or equal to * @size, the resulting string is truncated. + * + * See the vsnprintf() documentation for format string extensions over C99. */ int snprintf(char * buf, size_t size, const char *fmt, ...) { @@ -877,6 +885,8 @@ EXPORT_SYMBOL(scnprintf); * * Call this function if you are already dealing with a va_list. * You probably want sprintf() instead. + * + * See the vsnprintf() documentation for format string extensions over C99. */ int vsprintf(char *buf, const char *fmt, va_list args) { @@ -894,6 +904,8 @@ EXPORT_SYMBOL(vsprintf); * The function returns the number of characters written * into @buf. Use snprintf() or scnprintf() in order to avoid * buffer overflows. + * + * See the vsnprintf() documentation for format string extensions over C99. */ int sprintf(char * buf, const char *fmt, ...) { -- cgit v1.2.3-70-g09d2 From b418da16dd44810e5d5a22bba377cca80512a524 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Oct 2008 22:02:06 -0700 Subject: compat: generic compat get/settimeofday Nothing arch specific in get/settimeofday. The details of the timeval conversion varied a little from arch to arch, but all with the same results. Also add an extern declaration for sys_tz to linux/time.h because externs in .c files are fowned upon. I'll kill the externs in various other files in a sparate patch. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Christoph Hellwig Acked-by: David S. Miller [ sparc bits ] Cc: "Luck, Tony" Cc: Ralf Baechle Acked-by: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/ia32/ia32_entry.S | 4 +-- arch/ia64/ia32/sys_ia32.c | 56 ------------------------------- arch/mips/kernel/linux32.c | 66 ------------------------------------- arch/mips/kernel/scall64-n32.S | 4 +-- arch/mips/kernel/scall64-o32.S | 4 +-- arch/parisc/kernel/sys_parisc32.c | 58 --------------------------------- arch/parisc/kernel/syscall_table.S | 4 +-- arch/powerpc/kernel/sys_ppc32.c | 63 ----------------------------------- arch/s390/kernel/compat_linux.c | 67 -------------------------------------- arch/s390/kernel/compat_linux.h | 4 --- arch/s390/kernel/compat_wrapper.S | 12 +++---- arch/s390/kernel/syscalls.S | 4 +-- arch/sparc64/kernel/sys_sparc32.c | 62 ----------------------------------- arch/sparc64/kernel/systbls.S | 4 +-- arch/x86/ia32/ia32entry.S | 4 +-- arch/x86/ia32/sys_ia32.c | 64 ------------------------------------ include/linux/compat.h | 5 +++ include/linux/time.h | 2 ++ kernel/compat.c | 58 +++++++++++++++++++++++++++++++++ 19 files changed, 85 insertions(+), 460 deletions(-) (limited to 'kernel') diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index ff88c48c5d1..53505bb0477 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -251,8 +251,8 @@ ia32_syscall_table: data8 compat_sys_setrlimit /* 75 */ data8 compat_sys_old_getrlimit data8 compat_sys_getrusage - data8 sys32_gettimeofday - data8 sys32_settimeofday + data8 compat_sys_gettimeofday + data8 compat_sys_settimeofday data8 sys32_getgroups16 /* 80 */ data8 sys32_setgroups16 data8 sys32_old_select diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 2362a8eefb3..f4430bb4bbd 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1113,68 +1113,12 @@ sys32_pipe (int __user *fd) return retval; } -static inline long -get_tv32 (struct timeval *o, struct compat_timeval __user *i) -{ - return (!access_ok(VERIFY_READ, i, sizeof(*i)) || - (__get_user(o->tv_sec, &i->tv_sec) | __get_user(o->tv_usec, &i->tv_usec))); -} - -static inline long -put_tv32 (struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) | __put_user(i->tv_usec, &o->tv_usec))); -} - asmlinkage unsigned long sys32_alarm (unsigned int seconds) { return alarm_setitimer(seconds); } -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long -sys32_gettimeofday (struct compat_timeval __user *tv, struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -asmlinkage long -sys32_settimeofday (struct compat_timeval __user *tv, struct timezone __user *tz) -{ - struct timeval ktv; - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_tv32(&ktv, tv)) - return -EFAULT; - kts.tv_sec = ktv.tv_sec; - kts.tv_nsec = ktv.tv_usec * 1000; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - struct sel_arg_struct { unsigned int n; unsigned int inp; diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 89223a9bff2..aa2c55e3b55 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -133,72 +133,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long __dummy, return sys_ftruncate(fd, merge_64(a2, a3)); } -static inline long -get_tv32(struct timeval *o, struct compat_timeval __user *i) -{ - return (!access_ok(VERIFY_READ, i, sizeof(*i)) || - (__get_user(o->tv_sec, &i->tv_sec) | - __get_user(o->tv_usec, &i->tv_usec))); -} - -static inline long -put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) | - __put_user(i->tv_usec, &o->tv_usec))); -} - -extern struct timezone sys_tz; - -asmlinkage int -sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (!access_ok(VERIFY_READ, i, sizeof(*i))) - return -EFAULT; - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -asmlinkage int -sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - asmlinkage int sys32_llseek(unsigned int fd, unsigned int offset_high, unsigned int offset_low, loff_t __user * result, unsigned int origin) diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 324c5499dec..e266b3aa656 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -214,7 +214,7 @@ EXPORT(sysn32_call_table) PTR sys_fchown PTR sys_lchown PTR sys_umask - PTR sys32_gettimeofday + PTR compat_sys_gettimeofday PTR compat_sys_getrlimit /* 6095 */ PTR compat_sys_getrusage PTR compat_sys_sysinfo @@ -279,7 +279,7 @@ EXPORT(sysn32_call_table) PTR sys_chroot PTR sys_sync PTR sys_acct - PTR sys32_settimeofday + PTR compat_sys_settimeofday PTR compat_sys_mount /* 6160 */ PTR sys_umount PTR sys_swapon diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 85fedac99a5..6c7ef8313eb 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -283,8 +283,8 @@ sys_call_table: PTR compat_sys_setrlimit /* 4075 */ PTR compat_sys_getrlimit PTR compat_sys_getrusage - PTR sys32_gettimeofday - PTR sys32_settimeofday + PTR compat_sys_gettimeofday + PTR compat_sys_settimeofday PTR sys_getgroups /* 4080 */ PTR sys_setgroups PTR sys_ni_syscall /* old_select */ diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 2c3af17e049..0838155b7a8 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -179,64 +179,6 @@ asmlinkage long sys32_sched_rr_get_interval(pid_t pid, return ret; } -static int -put_compat_timeval(struct compat_timeval __user *u, struct timeval *t) -{ - struct compat_timeval t32; - t32.tv_sec = t->tv_sec; - t32.tv_usec = t->tv_usec; - return copy_to_user(u, &t32, sizeof t32); -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -asmlinkage int -sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - extern void do_gettimeofday(struct timeval *tv); - - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_compat_timeval(tv, &ktv)) - return -EFAULT; - } - if (tz) { - extern struct timezone sys_tz; - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -asmlinkage -int sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - /*** copied from mips64 ***/ /* * Ooo, nasty. We need here to frob 32-bit unsigned longs to diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 6b5ac38f5a9..c7e59f54881 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -149,8 +149,8 @@ ENTRY_COMP(getrlimit) ENTRY_COMP(getrusage) /* struct timeval and timezone are maybe?? consistent wide and narrow */ - ENTRY_DIFF(gettimeofday) - ENTRY_DIFF(settimeofday) + ENTRY_COMP(gettimeofday) + ENTRY_COMP(settimeofday) ENTRY_SAME(getgroups) /* 80 */ ENTRY_SAME(setgroups) /* struct socketaddr... */ diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index d00599bb24a..bb1cfcfdbbb 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -71,69 +71,6 @@ asmlinkage long compat_sys_sysfs(u32 option, u32 arg1, u32 arg2) return sys_sysfs((int)option, arg1, arg2); } -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (!access_ok(VERIFY_READ, i, sizeof(*i))) - return -EFAULT; - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) | - __put_user(i->tv_usec, &o->tv_usec))); -} - - - - -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ -extern struct timezone sys_tz; - -asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - - return 0; -} - - - -asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - #ifdef CONFIG_SYSVIPC long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, u32 fifth) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 9b471d785ec..4646382af34 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -279,22 +279,6 @@ asmlinkage long sys32_getegid16(void) return high2lowgid(current->egid); } -/* 32-bit timeval and related flotsam. */ - -static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i) -{ - return (!access_ok(VERIFY_READ, o, sizeof(*o)) || - (__get_user(o->tv_sec, &i->tv_sec) || - __get_user(o->tv_usec, &i->tv_usec))); -} - -static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) || - __put_user(i->tv_usec, &o->tv_usec))); -} - /* * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation. * @@ -522,57 +506,6 @@ sys32_delete_module(const char __user *name_user, unsigned int flags) #endif /* CONFIG_MODULES */ -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (!access_ok(VERIFY_READ, i, sizeof(*i))) - return -EFAULT; - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 poshi, u32 poslo) { diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 05f8516366a..836a2884290 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -202,10 +202,6 @@ long sys32_execve(void); long sys32_init_module(void __user *umod, unsigned long len, const char __user *uargs); long sys32_delete_module(const char __user *name_user, unsigned int flags); -long sys32_gettimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz); -long sys32_settimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz); long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 poshi, u32 poslo); long sys32_pwrite64(unsigned int fd, const char __user *ubuf, diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index ee51ca9e23b..fc2c97197a5 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -332,17 +332,17 @@ compat_sys_getrusage_wrapper: llgtr %r3,%r3 # struct rusage_emu31 * jg compat_sys_getrusage # branch to system call - .globl sys32_gettimeofday_wrapper -sys32_gettimeofday_wrapper: + .globl compat_sys_gettimeofday_wrapper +compat_sys_gettimeofday_wrapper: llgtr %r2,%r2 # struct timeval_emu31 * llgtr %r3,%r3 # struct timezone * - jg sys32_gettimeofday # branch to system call + jg compat_sys_gettimeofday # branch to system call - .globl sys32_settimeofday_wrapper -sys32_settimeofday_wrapper: + .globl compat_sys_settimeofday_wrapper +compat_sys_settimeofday_wrapper: llgtr %r2,%r2 # struct timeval_emu31 * llgtr %r3,%r3 # struct timezone * - jg sys32_settimeofday # branch to system call + jg compat_sys_settimeofday # branch to system call .globl sys32_getgroups16_wrapper sys32_getgroups16_wrapper: diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 3ae303914b4..2d61787949d 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -86,8 +86,8 @@ SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper) SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */ SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage_wrapper) -SYSCALL(sys_gettimeofday,sys_gettimeofday,sys32_gettimeofday_wrapper) -SYSCALL(sys_settimeofday,sys_settimeofday,sys32_settimeofday_wrapper) +SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper) +SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper) SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */ SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */ NI_SYSCALL /* old select syscall */ diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 73a33dc3bcc..e800503879e 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -58,15 +58,6 @@ #include #include -/* 32-bit timeval and related flotsam. */ - -static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) | - __put_user(i->tv_usec, &o->tv_usec))); -} - #ifdef CONFIG_SYSVIPC asmlinkage long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, u32 fifth) { @@ -487,59 +478,6 @@ asmlinkage long sys32_delete_module(const char __user *name_user) #endif /* CONFIG_MODULES */ -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (!access_ok(VERIFY_READ, i, sizeof(*i))) - return -EFAULT; - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz) -{ - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - asmlinkage compat_ssize_t sys32_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index 5daee4b04dd..b2fa4c16363 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -41,8 +41,8 @@ sys_call_table32: /*100*/ .word sys32_getpriority, sys32_rt_sigreturn, sys32_rt_sigaction, sys32_rt_sigprocmask, sys32_rt_sigpending .word compat_sys_rt_sigtimedwait, sys32_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid /*110*/ .word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall - .word sys32_getgroups, sys32_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd -/*120*/ .word compat_sys_readv, compat_sys_writev, sys32_settimeofday, sys_fchown16, sys_fchmod + .word sys32_getgroups, compat_sys_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd +/*120*/ .word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod .word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate /*130*/ .word sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall .word sys_nis_syscall, sys32_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64 diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index eb4314768bf..256b00b6189 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -571,8 +571,8 @@ ia32_sys_call_table: .quad compat_sys_setrlimit /* 75 */ .quad compat_sys_old_getrlimit /* old_getrlimit */ .quad compat_sys_getrusage - .quad sys32_gettimeofday - .quad sys32_settimeofday + .quad compat_sys_gettimeofday + .quad compat_sys_settimeofday .quad sys_getgroups16 /* 80 */ .quad sys_setgroups16 .quad sys32_old_select diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 4d3ad8d78a4..2e09dcd3c0a 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -367,75 +367,11 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, return 0; } -static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i) -{ - int err = -EFAULT; - - if (access_ok(VERIFY_READ, i, sizeof(*i))) { - err = __get_user(o->tv_sec, &i->tv_sec); - err |= __get_user(o->tv_usec, &i->tv_usec); - } - return err; -} - -static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - int err = -EFAULT; - - if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { - err = __put_user(i->tv_sec, &o->tv_sec); - err |= __put_user(i->tv_usec, &o->tv_usec); - } - return err; -} - asmlinkage long sys32_alarm(unsigned int seconds) { return alarm_setitimer(seconds); } -/* - * Translations due to time_t size differences. Which affects all - * sorts of things, like timeval and itimerval. - */ -asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz) -{ - struct timeval ktv; - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_tv32(&ktv, tv)) - return -EFAULT; - kts.tv_sec = ktv.tv_sec; - kts.tv_nsec = ktv.tv_usec * NSEC_PER_USEC; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} - struct sel_arg_struct { unsigned int n; unsigned int inp; diff --git a/include/linux/compat.h b/include/linux/compat.h index 999dddd8d93..f061a1ea1b7 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -234,6 +234,11 @@ extern int get_compat_itimerspec(struct itimerspec *dst, extern int put_compat_itimerspec(struct compat_itimerspec __user *dst, const struct itimerspec *src); +asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz); +asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz); + asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); extern int compat_printk(const char *fmt, ...); diff --git a/include/linux/time.h b/include/linux/time.h index e15206a7e82..51e883df0fa 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -29,6 +29,8 @@ struct timezone { #ifdef __KERNEL__ +extern struct timezone sys_tz; + /* Parameters used to convert the timespec values: */ #define MSEC_PER_SEC 1000L #define USEC_PER_MSEC 1000L diff --git a/kernel/compat.c b/kernel/compat.c index 32c254a8ab9..143990e48cb 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -26,6 +26,64 @@ #include +/* + * Note that the native side is already converted to a timespec, because + * that's what we want anyway. + */ +static int compat_get_timeval(struct timespec *o, + struct compat_timeval __user *i) +{ + long usec; + + if (get_user(o->tv_sec, &i->tv_sec) || + get_user(usec, &i->tv_usec)) + return -EFAULT; + o->tv_nsec = usec * 1000; + return 0; +} + +static int compat_put_timeval(struct compat_timeval __user *o, + struct timeval *i) +{ + return (put_user(i->tv_sec, &o->tv_sec) || + put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; +} + +asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz) +{ + if (tv) { + struct timeval ktv; + do_gettimeofday(&ktv); + if (compat_put_timeval(tv, &ktv)) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + + return 0; +} + +asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz) +{ + struct timespec kts; + struct timezone ktz; + + if (tv) { + if (compat_get_timeval(&kts, tv)) + return -EFAULT; + } + if (tz) { + if (copy_from_user(&ktz, tz, sizeof(ktz))) + return -EFAULT; + } + + return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); +} + int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) { return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || -- cgit v1.2.3-70-g09d2 From f221e726bf4e082a05dcd573379ac859bfba7126 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 15 Oct 2008 22:04:23 -0700 Subject: sysctl: simplify ->strategy name and nlen parameters passed to ->strategy hook are unused, remove them. In general ->strategy hook should know what it's doing, and don't do something tricky for which, say, pointer to original userspace array may be needed (name). Signed-off-by: Alexey Dobriyan Acked-by: David S. Miller [ networking bits ] Cc: Ralf Baechle Cc: David Howells Cc: Matt Mackall Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/kernel/pm.c | 6 +++--- arch/mips/lasat/sysctl.c | 17 ++++++++--------- drivers/char/random.c | 2 +- include/linux/sysctl.h | 2 +- include/net/ip.h | 2 +- include/net/ndisc.h | 5 ++--- ipc/ipc_sysctl.c | 9 ++++----- kernel/sysctl.c | 29 +++++++++++++---------------- kernel/utsname_sysctl.c | 5 ++--- net/decnet/dn_dev.c | 4 ++-- net/decnet/sysctl_net_decnet.c | 4 ++-- net/ipv4/devinet.c | 7 +++---- net/ipv4/route.c | 7 +------ net/ipv4/sysctl_net_ipv4.c | 18 +++++++++--------- net/ipv6/addrconf.c | 1 - net/ipv6/ndisc.c | 11 ++++------- 16 files changed, 56 insertions(+), 73 deletions(-) (limited to 'kernel') diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c index d1113c5031f..be722fc1acf 100644 --- a/arch/frv/kernel/pm.c +++ b/arch/frv/kernel/pm.c @@ -211,7 +211,7 @@ static int cmode_procctl(ctl_table *ctl, int write, struct file *filp, return try_set_cmode(new_cmode)?:*lenp; } -static int cmode_sysctl(ctl_table *table, int __user *name, int nlen, +static int cmode_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -314,7 +314,7 @@ static int p0_procctl(ctl_table *ctl, int write, struct file *filp, return try_set_p0(new_p0)?:*lenp; } -static int p0_sysctl(ctl_table *table, int __user *name, int nlen, +static int p0_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -358,7 +358,7 @@ static int cm_procctl(ctl_table *ctl, int write, struct file *filp, return try_set_cm(new_cm)?:*lenp; } -static int cm_sysctl(ctl_table *table, int __user *name, int nlen, +static int cm_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 866881ec0cf..8f88886feb1 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -38,14 +38,13 @@ #endif /* Strategy function to write EEPROM after changing string entry */ -int sysctl_lasatstring(ctl_table *table, int *name, int nlen, +int sysctl_lasatstring(ctl_table *table, void *oldval, size_t *oldlenp, void *newval, size_t newlen) { int r; - r = sysctl_string(table, name, - nlen, oldval, oldlenp, newval, newlen); + r = sysctl_string(table, oldval, oldlenp, newval, newlen); if (r < 0) return r; @@ -113,13 +112,13 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, #endif /* Sysctl for setting the IP addresses */ -int sysctl_lasat_intvec(ctl_table *table, int *name, int nlen, +int sysctl_lasat_intvec(ctl_table *table, void *oldval, size_t *oldlenp, void *newval, size_t newlen) { int r; - r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen); + r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); if (r < 0) return r; @@ -131,7 +130,7 @@ int sysctl_lasat_intvec(ctl_table *table, int *name, int nlen, #ifdef CONFIG_DS1603 /* Same for RTC */ -int sysctl_lasat_rtc(ctl_table *table, int *name, int nlen, +int sysctl_lasat_rtc(ctl_table *table, void *oldval, size_t *oldlenp, void *newval, size_t newlen) { @@ -140,7 +139,7 @@ int sysctl_lasat_rtc(ctl_table *table, int *name, int nlen, rtctmp = read_persistent_clock(); if (rtctmp < 0) rtctmp = 0; - r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen); + r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); if (r < 0) return r; if (newval && newlen) @@ -211,13 +210,13 @@ int proc_lasat_ip(ctl_table *table, int write, struct file *filp, } #endif -static int sysctl_lasat_prid(ctl_table *table, int *name, int nlen, +static int sysctl_lasat_prid(ctl_table *table, void *oldval, size_t *oldlenp, void *newval, size_t newlen) { int r; - r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen); + r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); if (r < 0) return r; if (newval && newlen) { diff --git a/drivers/char/random.c b/drivers/char/random.c index 6af435b8986..c8752eaad48 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1205,7 +1205,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp, return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos); } -static int uuid_strategy(ctl_table *table, int __user *name, int nlen, +static int uuid_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index d0437f36921..39d471d1163 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -972,7 +972,7 @@ extern int sysctl_perm(struct ctl_table_root *root, typedef struct ctl_table ctl_table; -typedef int ctl_handler (struct ctl_table *table, int __user *name, int nlen, +typedef int ctl_handler (struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); diff --git a/include/net/ip.h b/include/net/ip.h index 1cbccaf0de3..bc026ecb513 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -396,7 +396,7 @@ extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, int ipv4_doint_and_flush(ctl_table *ctl, int write, struct file* filp, void __user *buffer, size_t *lenp, loff_t *ppos); -int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, +int ipv4_doint_and_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); #ifdef CONFIG_PROC_FS diff --git a/include/net/ndisc.h b/include/net/ndisc.h index a01b7c4dc76..11dd0137c6a 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -129,9 +129,8 @@ extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, void __user *buffer, size_t *lenp, loff_t *ppos); -int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, - int nlen, void __user *oldval, - size_t __user *oldlenp, +int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); #endif diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 69bc85978ba..0dfebc50942 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -131,7 +131,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #ifdef CONFIG_SYSCTL_SYSCALL /* The generic sysctl ipc data routine. */ -static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, +static int sysctl_ipc_data(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -169,14 +169,13 @@ static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, return 1; } -static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, - int nlen, void __user *oldval, size_t __user *oldlenp, +static int sysctl_ipc_registered_data(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { int rc; - rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, - newlen); + rc = sysctl_ipc_data(table, oldval, oldlenp, newval, newlen); if (newval && newlen && rc > 0) /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ec88fcc9a0d..9792c315676 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1500,7 +1500,6 @@ void register_sysctl_root(struct ctl_table_root *root) /* Perform the actual read/write of a sysctl table entry. */ static int do_sysctl_strategy(struct ctl_table_root *root, struct ctl_table *table, - int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -1514,8 +1513,7 @@ static int do_sysctl_strategy(struct ctl_table_root *root, return -EPERM; if (table->strategy) { - rc = table->strategy(table, name, nlen, oldval, oldlenp, - newval, newlen); + rc = table->strategy(table, oldval, oldlenp, newval, newlen); if (rc < 0) return rc; if (rc > 0) @@ -1525,8 +1523,7 @@ static int do_sysctl_strategy(struct ctl_table_root *root, /* If there is no strategy routine, or if the strategy returns * zero, proceed with automatic r/w */ if (table->data && table->maxlen) { - rc = sysctl_data(table, name, nlen, oldval, oldlenp, - newval, newlen); + rc = sysctl_data(table, oldval, oldlenp, newval, newlen); if (rc < 0) return rc; } @@ -1558,7 +1555,7 @@ repeat: table = table->child; goto repeat; } - error = do_sysctl_strategy(root, table, name, nlen, + error = do_sysctl_strategy(root, table, oldval, oldlenp, newval, newlen); return error; @@ -2707,7 +2704,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, */ /* The generic sysctl data routine (used if no strategy routine supplied) */ -int sysctl_data(struct ctl_table *table, int __user *name, int nlen, +int sysctl_data(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -2741,7 +2738,7 @@ int sysctl_data(struct ctl_table *table, int __user *name, int nlen, } /* The generic string strategy routine: */ -int sysctl_string(struct ctl_table *table, int __user *name, int nlen, +int sysctl_string(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -2787,7 +2784,7 @@ int sysctl_string(struct ctl_table *table, int __user *name, int nlen, * are between the minimum and maximum values given in the arrays * table->extra1 and table->extra2, respectively. */ -int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen, +int sysctl_intvec(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -2823,7 +2820,7 @@ int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen, } /* Strategy function to convert jiffies to seconds */ -int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen, +int sysctl_jiffies(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -2857,7 +2854,7 @@ int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen, } /* Strategy function to convert jiffies to seconds */ -int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen, +int sysctl_ms_jiffies(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -2912,35 +2909,35 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args) return error; } -int sysctl_data(struct ctl_table *table, int __user *name, int nlen, +int sysctl_data(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { return -ENOSYS; } -int sysctl_string(struct ctl_table *table, int __user *name, int nlen, +int sysctl_string(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { return -ENOSYS; } -int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen, +int sysctl_intvec(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { return -ENOSYS; } -int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen, +int sysctl_jiffies(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { return -ENOSYS; } -int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen, +int sysctl_ms_jiffies(struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 4ab9659d269..3b34b354593 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -60,7 +60,7 @@ static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, #ifdef CONFIG_SYSCTL_SYSCALL /* The generic string strategy routine: */ -static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen, +static int sysctl_uts_string(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -69,8 +69,7 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen, write = newval && newlen; memcpy(&uts_table, table, sizeof(uts_table)); uts_table.data = get_uts(table, write); - r = sysctl_string(&uts_table, name, nlen, - oldval, oldlenp, newval, newlen); + r = sysctl_string(&uts_table, oldval, oldlenp, newval, newlen); put_uts(table, write, uts_table.data); return r; } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 2f0ac3c3eb7..96f9fcef295 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -166,7 +166,7 @@ static int max_priority[] = { 127 }; /* From DECnet spec */ static int dn_forwarding_proc(ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, +static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); @@ -318,7 +318,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, #endif } -static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, +static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 228067c571b..36400b26689 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str) } -static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen, +static int dn_node_address_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -217,7 +217,7 @@ static int dn_node_address_handler(ctl_table *table, int write, } -static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, +static int dn_def_dev_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b12dae2b0b2..5154e729cf1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1283,7 +1283,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen, +static int devinet_conf_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -1379,12 +1379,11 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, return ret; } -int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, +int ipv4_doint_and_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { - int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, - newval, newlen); + int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen); struct net *net = table->extra2; if (ret == 1) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a6d7c584f53..942be04e795 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2908,8 +2908,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, } static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, - int __user *name, - int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, @@ -2972,16 +2970,13 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, } static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, - int __user *name, - int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { int old = ip_rt_secret_interval; - int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, - newlen); + int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen); rt_secret_reschedule(old); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 276d047fb85..1bb10df8ce7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -64,8 +64,8 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, } /* Validate changes from sysctl interface. */ -static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int ipv4_sysctl_local_port_range(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -80,7 +80,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, }; inet_get_local_port_range(range, range + 1); - ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); + ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) { if (range[1] < range[0]) ret = -EINVAL; @@ -109,8 +109,8 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * return ret; } -static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int sysctl_tcp_congestion_control(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -122,7 +122,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int ret; tcp_get_default_congestion_control(val); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); + ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); if (ret == 1 && newval && newlen) ret = tcp_set_default_congestion_control(val); return ret; @@ -165,8 +165,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } -static int strategy_allowed_congestion_control(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int strategy_allowed_congestion_control(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) @@ -179,7 +179,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam return -ENOMEM; tcp_get_available_congestion_control(tbl.data, tbl.maxlen); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); + ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); if (ret == 1 && newval && newlen) ret = tcp_set_allowed_congestion_control(tbl.data); kfree(tbl.data); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7b6a584b62d..eea9542728c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3982,7 +3982,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, } static int addrconf_sysctl_forward_strategy(ctl_table *table, - int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 840b15780a3..7f39e9b3645 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1730,9 +1730,8 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f return ret; } -int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, - int nlen, void __user *oldval, - size_t __user *oldlenp, +int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { struct net_device *dev = ctl->extra1; @@ -1745,13 +1744,11 @@ int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, switch (ctl->ctl_name) { case NET_NEIGH_REACHABLE_TIME: - ret = sysctl_jiffies(ctl, name, nlen, - oldval, oldlenp, newval, newlen); + ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen); break; case NET_NEIGH_RETRANS_TIME_MS: case NET_NEIGH_REACHABLE_TIME_MS: - ret = sysctl_ms_jiffies(ctl, name, nlen, - oldval, oldlenp, newval, newlen); + ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen); break; default: ret = 0; -- cgit v1.2.3-70-g09d2 From ebf3f09c634906d371f2bfd71b41c7e0c52efe7e Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 15 Oct 2008 22:05:12 -0700 Subject: Configure out AIO support This patchs adds the CONFIG_AIO option which allows to remove support for asynchronous I/O operations, that are not necessarly used by applications, particularly on embedded devices. As this is a size-reduction option, it depends on CONFIG_EMBEDDED. It allows to save ~7 kilobytes of kernel code/data: text data bss dec hex filename 1115067 119180 217088 1451335 162547 vmlinux 1108025 119048 217088 1444161 160941 vmlinux.new -7042 -132 0 -7174 -1C06 +/- This patch has been originally written by Matt Mackall , and is part of the Linux Tiny project. [randy.dunlap@oracle.com: build fix] Signed-off-by: Thomas Petazzoni Cc: Benjamin LaHaise Cc: Zach Brown Signed-off-by: Matt Mackall Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Makefile | 3 ++- include/linux/aio.h | 9 +++++++++ init/Kconfig | 8 ++++++++ kernel/sys_ni.c | 5 +++++ kernel/sysctl.c | 2 ++ 5 files changed, 26 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/fs/Makefile b/fs/Makefile index b6f27dc26b7..d0c69f57e5b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -8,7 +8,7 @@ obj-y := open.o read_write.o file_table.o super.o \ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o dcache.o inode.o \ - attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ + attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ stack.o @@ -27,6 +27,7 @@ obj-$(CONFIG_ANON_INODES) += anon_inodes.o obj-$(CONFIG_SIGNALFD) += signalfd.o obj-$(CONFIG_TIMERFD) += timerfd.o obj-$(CONFIG_EVENTFD) += eventfd.o +obj-$(CONFIG_AIO) += aio.o obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o diff --git a/include/linux/aio.h b/include/linux/aio.h index 09b276c3522..f6b8cf99b59 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -204,12 +204,21 @@ struct kioctx { /* prototypes */ extern unsigned aio_max_size; +#ifdef CONFIG_AIO extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); extern int aio_put_req(struct kiocb *iocb); extern void kick_iocb(struct kiocb *iocb); extern int aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); +#else +static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } +static inline int aio_put_req(struct kiocb *iocb) { return 0; } +static inline void kick_iocb(struct kiocb *iocb) { } +static inline int aio_complete(struct kiocb *iocb, long res, long res2) { return 0; } +struct mm_struct; +static inline void exit_aio(struct mm_struct *mm) { } +#endif /* CONFIG_AIO */ #define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait) diff --git a/init/Kconfig b/init/Kconfig index 8a8e2d00c40..5ceff3249a2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -713,6 +713,14 @@ config SHMEM option replaces shmem and tmpfs with the much simpler ramfs code, which may be appropriate on small systems without swap. +config AIO + bool "Enable AIO support" if EMBEDDED + default y + help + This option enables POSIX asynchronous I/O which may by used + by some high performance threaded applications. Disabling + this option saves about 7k. + config VM_EVENT_COUNTERS default y bool "Enable VM event counters for /proc/vmstat" if EMBEDDED diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 503d8d4eb80..a77b27b11b0 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -126,6 +126,11 @@ cond_syscall(sys_vm86); cond_syscall(compat_sys_ipc); cond_syscall(compat_sys_sysctl); cond_syscall(sys_flock); +cond_syscall(sys_io_setup); +cond_syscall(sys_io_destroy); +cond_syscall(sys_io_submit); +cond_syscall(sys_io_cancel); +cond_syscall(sys_io_getevents); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9792c315676..617d41e4d6a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1281,6 +1281,7 @@ static struct ctl_table fs_table[] = { .extra2 = &two, }, #endif +#ifdef CONFIG_AIO { .procname = "aio-nr", .data = &aio_nr, @@ -1295,6 +1296,7 @@ static struct ctl_table fs_table[] = { .mode = 0644, .proc_handler = &proc_doulongvec_minmax, }, +#endif /* CONFIG_AIO */ #ifdef CONFIG_INOTIFY_USER { .ctl_name = FS_INOTIFY, -- cgit v1.2.3-70-g09d2 From c26ec88ea86ad5122a6ea5ad635e6a1f6c395d74 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 15 Oct 2008 22:05:14 -0700 Subject: resources: tidy __request_region() No functional change. Just return NULL for kzalloc failure immediately, rather than wrapping the whole function body in the body of an "if". Signed-off-by: Bjorn Helgaas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 414d6fc9131..f193d6e3ded 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -630,33 +630,34 @@ struct resource * __request_region(struct resource *parent, { struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); - if (res) { - res->name = name; - res->start = start; - res->end = start + n - 1; - res->flags = IORESOURCE_BUSY; + if (!res) + return NULL; - write_lock(&resource_lock); + res->name = name; + res->start = start; + res->end = start + n - 1; + res->flags = IORESOURCE_BUSY; - for (;;) { - struct resource *conflict; + write_lock(&resource_lock); - conflict = __request_resource(parent, res); - if (!conflict) - break; - if (conflict != parent) { - parent = conflict; - if (!(conflict->flags & IORESOURCE_BUSY)) - continue; - } + for (;;) { + struct resource *conflict; - /* Uhhuh, that didn't work out.. */ - kfree(res); - res = NULL; + conflict = __request_resource(parent, res); + if (!conflict) break; + if (conflict != parent) { + parent = conflict; + if (!(conflict->flags & IORESOURCE_BUSY)) + continue; } - write_unlock(&resource_lock); + + /* Uhhuh, that didn't work out.. */ + kfree(res); + res = NULL; + break; } + write_unlock(&resource_lock); return res; } EXPORT_SYMBOL(__request_region); -- cgit v1.2.3-70-g09d2 From 2b252c541158cfed9d7e5b019b894fe2174f5908 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 15 Oct 2008 22:05:20 -0700 Subject: make kprobes.c:kretprobe_table_lock() static Make the needlessly global kretprobe_table_lock() static. Signed-off-by: Adrian Bunk Acked-by: Ananth N Mavinakayanahalli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 75bc2cd9ebc..8b57a2597f2 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -404,7 +404,7 @@ void kretprobe_hash_lock(struct task_struct *tsk, spin_lock_irqsave(hlist_lock, *flags); } -void kretprobe_table_lock(unsigned long hash, unsigned long *flags) +static void kretprobe_table_lock(unsigned long hash, unsigned long *flags) { spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); spin_lock_irqsave(hlist_lock, *flags); -- cgit v1.2.3-70-g09d2 From 1c95e1b69073cff5ff179e592fa1a1e182c78a17 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 16 Oct 2008 15:32:46 -0700 Subject: Fix kernel/softirq.c printk format warning properly This fixes the broken 77af7e3403e7314c47b0c07fbc5e4ef21d939532 ("softirq, warning fix: correct a format to avoid a warning") fix correctly. The type of a pointer subtraction is not "int", nor is it "long". It can be either (or something else). It's "ptrdiff_t", and the printk format for it is "%td". Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/softirq.c b/kernel/softirq.c index be7a8292f99..37d67aa2d56 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -210,7 +210,7 @@ restart: h->action(h); if (unlikely(prev_count != preempt_count())) { - printk(KERN_ERR "huh, entered softirq %d %p" + printk(KERN_ERR "huh, entered softirq %td %p" "with preempt_count %08x," " exited with %08x?\n", h - softirq_vec, h->action, prev_count, preempt_count()); -- cgit v1.2.3-70-g09d2 From 54514a70adefe356afe854e2d3912d46668068e6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 22:15:57 -0700 Subject: softirq: Add support for triggering softirq work on softirqs. This is basically a genericization of Jens Axboe's block layer remote softirq changes. Signed-off-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/interrupt.h | 21 ++++++++ include/linux/smp.h | 4 +- kernel/softirq.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 54b3623434e..35a61dc60d5 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -273,6 +275,25 @@ extern void softirq_init(void); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +/* This is the worklist that queues up per-cpu softirq work. + * + * send_remote_sendirq() adds work to these lists, and + * the softirq handler itself dequeues from them. The queues + * are protected by disabling local cpu interrupts and they must + * only be accessed by the local cpu that they are for. + */ +DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); + +/* Try to send a softirq to a remote cpu. If this cannot be done, the + * work will be queued to the local cpu. + */ +extern void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq); + +/* Like send_remote_softirq(), but the caller must disable local cpu interrupts + * and compute the current cpu, passed in as 'this_cpu'. + */ +extern void __send_remote_softirq(struct call_single_data *cp, int cpu, + int this_cpu, int softirq); /* Tasklets --- multithreaded analogue of BHs. diff --git a/include/linux/smp.h b/include/linux/smp.h index 66484d4a845..2e4d58b26c0 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -7,6 +7,7 @@ */ #include +#include #include #include @@ -16,7 +17,8 @@ struct call_single_data { struct list_head list; void (*func) (void *info); void *info; - unsigned int flags; + u16 flags; + u16 priv; }; #ifdef CONFIG_SMP diff --git a/kernel/softirq.c b/kernel/softirq.c index 37d67aa2d56..83ba21a13bd 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -6,6 +6,8 @@ * Distribute under GPLv2. * * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) + * + * Remote softirq infrastructure is by Jens Axboe. */ #include @@ -474,17 +476,144 @@ void tasklet_kill(struct tasklet_struct *t) EXPORT_SYMBOL(tasklet_kill); +DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); +EXPORT_PER_CPU_SYMBOL(softirq_work_list); + +static void __local_trigger(struct call_single_data *cp, int softirq) +{ + struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]); + + list_add_tail(&cp->list, head); + + /* Trigger the softirq only if the list was previously empty. */ + if (head->next == &cp->list) + raise_softirq_irqoff(softirq); +} + +#ifdef CONFIG_USE_GENERIC_SMP_HELPERS +static void remote_softirq_receive(void *data) +{ + struct call_single_data *cp = data; + unsigned long flags; + int softirq; + + softirq = cp->priv; + + local_irq_save(flags); + __local_trigger(cp, softirq); + local_irq_restore(flags); +} + +static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) +{ + if (cpu_online(cpu)) { + cp->func = remote_softirq_receive; + cp->info = cp; + cp->flags = 0; + cp->priv = softirq; + + __smp_call_function_single(cpu, cp); + return 0; + } + return 1; +} +#else /* CONFIG_USE_GENERIC_SMP_HELPERS */ +static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) +{ + return 1; +} +#endif + +/** + * __send_remote_softirq - try to schedule softirq work on a remote cpu + * @cp: private SMP call function data area + * @cpu: the remote cpu + * @this_cpu: the currently executing cpu + * @softirq: the softirq for the work + * + * Attempt to schedule softirq work on a remote cpu. If this cannot be + * done, the work is instead queued up on the local cpu. + * + * Interrupts must be disabled. + */ +void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq) +{ + if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq)) + __local_trigger(cp, softirq); +} +EXPORT_SYMBOL(__send_remote_softirq); + +/** + * send_remote_softirq - try to schedule softirq work on a remote cpu + * @cp: private SMP call function data area + * @cpu: the remote cpu + * @softirq: the softirq for the work + * + * Like __send_remote_softirq except that disabling interrupts and + * computing the current cpu is done for the caller. + */ +void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq) +{ + unsigned long flags; + int this_cpu; + + local_irq_save(flags); + this_cpu = smp_processor_id(); + __send_remote_softirq(cp, cpu, this_cpu, softirq); + local_irq_restore(flags); +} +EXPORT_SYMBOL(send_remote_softirq); + +static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + int cpu = (unsigned long) hcpu; + int i; + + local_irq_disable(); + for (i = 0; i < NR_SOFTIRQS; i++) { + struct list_head *head = &per_cpu(softirq_work_list[i], cpu); + struct list_head *local_head; + + if (list_empty(head)) + continue; + + local_head = &__get_cpu_var(softirq_work_list[i]); + list_splice_init(head, local_head); + raise_softirq_irqoff(i); + } + local_irq_enable(); + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = { + .notifier_call = remote_softirq_cpu_notify, +}; + void __init softirq_init(void) { int cpu; for_each_possible_cpu(cpu) { + int i; + per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; + for (i = 0; i < NR_SOFTIRQS; i++) + INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu)); } + register_hotcpu_notifier(&remote_softirq_cpu_notifier); + open_softirq(TASKLET_SOFTIRQ, tasklet_action); open_softirq(HI_SOFTIRQ, tasklet_hi_action); } -- cgit v1.2.3-70-g09d2