From b502bd1152472dc1b98c60434f23c23b280c7b94 Mon Sep 17 00:00:00 2001 From: Muthu Kumar Date: Fri, 23 Mar 2012 15:01:50 -0700 Subject: magic.h: move some FS magic numbers into magic.h - Move open-coded filesystem magic numbers into magic.h - Rearrange magic.h so that the filesystem-related constants are grouped together. Signed-off-by: Muthukumar R Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/magic.h | 18 ++++++++++++------ include/linux/pipe_fs_i.h | 2 -- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/magic.h b/include/linux/magic.h index b7ed4759dbb..e15192cb9cf 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -9,7 +9,6 @@ #define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ #define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define DEBUGFS_MAGIC 0x64626720 -#define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 #define SELINUX_MAGIC 0xf97cff8c #define RAMFS_MAGIC 0x858458f6 /* some random number */ @@ -27,7 +26,6 @@ #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 -#define ANON_INODE_FS_MAGIC 0x09041934 #define PSTOREFS_MAGIC 0x6165676C #define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */ @@ -40,7 +38,6 @@ #define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */ #define NFS_SUPER_MAGIC 0x6969 #define OPENPROM_SUPER_MAGIC 0x9fa1 -#define PROC_SUPER_MAGIC 0x9fa0 #define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ #define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */ @@ -52,15 +49,24 @@ #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" #define SMB_SUPER_MAGIC 0x517B -#define USBDEVICE_SUPER_MAGIC 0x9fa2 #define CGROUP_SUPER_MAGIC 0x27e0eb -#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define STACK_END_MAGIC 0x57AC6E9D +#define V9FS_MAGIC 0x01021997 + +#define BDEVFS_MAGIC 0x62646576 +#define BINFMTFS_MAGIC 0x42494e4d #define DEVPTS_SUPER_MAGIC 0x1cd1 +#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA +#define PIPEFS_MAGIC 0x50495045 +#define PROC_SUPER_MAGIC 0x9fa0 #define SOCKFS_MAGIC 0x534F434B -#define V9FS_MAGIC 0x01021997 +#define SYSFS_MAGIC 0x62656572 +#define USBDEVICE_SUPER_MAGIC 0x9fa2 +#define MTD_INODE_FS_MAGIC 0x11307854 +#define ANON_INODE_FS_MAGIC 0x09041934 + #endif /* __LINUX_MAGIC_H__ */ diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 77257c92155..6d626ff0cfd 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -1,8 +1,6 @@ #ifndef _LINUX_PIPE_FS_I_H #define _LINUX_PIPE_FS_I_H -#define PIPEFS_MAGIC 0x50495045 - #define PIPE_DEF_BUFFERS 16 #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ -- cgit v1.2.3-70-g09d2 From d314d74c695f967e10598467a326f41c78ed1e20 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 23 Mar 2012 15:01:51 -0700 Subject: nmi watchdog: do not use cpp symbol in Kconfig ARCH_HAS_NMI_WATCHDOG is a macro defined by arch, but config HARDLOCKUP_DETECTOR depends on it. This is wrong, ARCH_HAS_NMI_WATCHDOG has to be a Kconfig config, and arch's need it should select it explicitly. Signed-off-by: WANG Cong Acked-by: Don Zickus Acked-by: Mike Frysinger Cc: David Howells Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 3 +++ arch/blackfin/Kconfig | 1 + arch/blackfin/include/asm/irq.h | 4 ---- arch/mn10300/Kconfig | 1 + arch/mn10300/include/asm/reset-regs.h | 4 ---- arch/sparc/Kconfig | 1 + arch/sparc/include/asm/irq_64.h | 1 - include/linux/nmi.h | 2 +- lib/Kconfig.debug | 2 +- 9 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/Kconfig b/arch/Kconfig index 5b448a74d0f..a6f14f622d1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -120,6 +120,9 @@ config HAVE_KRETPROBES config HAVE_OPTPROBES bool + +config HAVE_NMI_WATCHDOG + bool # # An arch should select this if it provides all these things: # diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index abe5a9e8514..c1269a1085e 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -36,6 +36,7 @@ config BLACKFIN select GENERIC_ATOMIC64 select GENERIC_IRQ_PROBE select IRQ_PER_CPU if SMP + select HAVE_NMI_WATCHDOG if NMI_WATCHDOG config GENERIC_CSUM def_bool y diff --git a/arch/blackfin/include/asm/irq.h b/arch/blackfin/include/asm/irq.h index 12f4060a31b..89de539ed01 100644 --- a/arch/blackfin/include/asm/irq.h +++ b/arch/blackfin/include/asm/irq.h @@ -38,8 +38,4 @@ #include -#ifdef CONFIG_NMI_WATCHDOG -# define ARCH_HAS_NMI_WATCHDOG -#endif - #endif /* _BFIN_IRQ_H_ */ diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 8f1c40d5817..3aa3de01715 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -5,6 +5,7 @@ config MN10300 select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_KGDB + select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER config AM33_2 def_bool n diff --git a/arch/mn10300/include/asm/reset-regs.h b/arch/mn10300/include/asm/reset-regs.h index 10c7502a113..8ca2a42d365 100644 --- a/arch/mn10300/include/asm/reset-regs.h +++ b/arch/mn10300/include/asm/reset-regs.h @@ -17,10 +17,6 @@ #ifdef __KERNEL__ -#ifdef CONFIG_MN10300_WD_TIMER -#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ -#endif - /* * watchdog timer registers */ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index ca5580e4d81..1666de84d47 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -29,6 +29,7 @@ config SPARC select GENERIC_IRQ_SHOW select USE_GENERIC_SMP_HELPERS if SMP select GENERIC_PCI_IOMAP + select HAVE_NMI_WATCHDOG if SPARC64 config SPARC32 def_bool !64BIT diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 16dcae6d56e..abf6afe82ca 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -95,7 +95,6 @@ void arch_trigger_all_cpu_backtrace(void); extern void *hardirq_stack[NR_CPUS]; extern void *softirq_stack[NR_CPUS]; #define __ARCH_HAS_DO_SOFTIRQ -#define ARCH_HAS_NMI_WATCHDOG #define NO_IRQ 0xffffffff diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 2d304efc89d..db50840e635 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -14,7 +14,7 @@ * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ -#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) #include extern void touch_nmi_watchdog(void); #else diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 05037dc9bde..391003f7ab4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -184,7 +184,7 @@ config LOCKUP_DETECTOR config HARDLOCKUP_DETECTOR def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ - !ARCH_HAS_NMI_WATCHDOG + !HAVE_NMI_WATCHDOG config BOOTPARAM_HARDLOCKUP_PANIC bool "Panic (Reboot) On Hard Lockups" -- cgit v1.2.3-70-g09d2 From 10db4e1e4e9a910a26b94045660e5ba7e7c71419 Mon Sep 17 00:00:00 2001 From: Bobby Powers Date: Fri, 23 Mar 2012 15:01:51 -0700 Subject: headers: include linux/types.h where appropriate This addresses some header check warnings. DRM headers which include "drm.h" have been excluded, as they indirectly include types.h. Signed-off-by: Bobby Powers Cc: Chris Ball Cc: Dave Airlie Cc: James Bottomley Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/drm/drm_mode.h | 2 ++ include/linux/mmc/ioctl.h | 3 +++ include/scsi/scsi_netlink.h | 2 +- include/sound/compress_params.h | 2 ++ 4 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h index 2a2acda8b43..4a0aae38e16 100644 --- a/include/drm/drm_mode.h +++ b/include/drm/drm_mode.h @@ -27,6 +27,8 @@ #ifndef _DRM_MODE_H #define _DRM_MODE_H +#include + #define DRM_DISPLAY_INFO_LEN 32 #define DRM_CONNECTOR_NAME_LEN 32 #define DRM_DISPLAY_MODE_LEN 32 diff --git a/include/linux/mmc/ioctl.h b/include/linux/mmc/ioctl.h index 8fa5bc5f805..1f5e6892392 100644 --- a/include/linux/mmc/ioctl.h +++ b/include/linux/mmc/ioctl.h @@ -1,5 +1,8 @@ #ifndef LINUX_MMC_IOCTL_H #define LINUX_MMC_IOCTL_H + +#include + struct mmc_ioc_cmd { /* Implies direction of data. true = write, false = read */ int write_flag; diff --git a/include/scsi/scsi_netlink.h b/include/scsi/scsi_netlink.h index 58ce8fe4478..5cb20ccb195 100644 --- a/include/scsi/scsi_netlink.h +++ b/include/scsi/scsi_netlink.h @@ -23,7 +23,7 @@ #define SCSI_NETLINK_H #include - +#include /* * This file intended to be included by both kernel and user space diff --git a/include/sound/compress_params.h b/include/sound/compress_params.h index d97d69f81a7..da4a456de03 100644 --- a/include/sound/compress_params.h +++ b/include/sound/compress_params.h @@ -51,6 +51,8 @@ #ifndef __SND_COMPRESS_PARAMS_H #define __SND_COMPRESS_PARAMS_H +#include + /* AUDIO CODECS SUPPORTED */ #define MAX_NUM_CODECS 32 #define MAX_NUM_CODEC_DESCRIPTORS 32 -- cgit v1.2.3-70-g09d2 From 7ccaba5314caf3a2b1052edb3146ccc969b4d466 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 23 Mar 2012 15:01:52 -0700 Subject: consolidate WARN_...ONCE() static variables Due to the alignment of following variables, these typically consume more than just the single byte that 'bool' requires, and as there are a few hundred instances, the cache pollution (not so much the waste of memory) sums up. Put these variables into their own section, outside of any half way frequently used memory range. Do the same also to the __warned variable of rcu_lockdep_assert(). (Don't, however, include the ones used by printk_once() and alike, as they can potentially be hot.) Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/bug.h | 6 +++--- include/asm-generic/vmlinux.lds.h | 1 + include/linux/rcupdate.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 84458b0c38d..2520a6e241d 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -134,7 +134,7 @@ extern void warn_slowpath_null(const char *file, const int line); #endif #define WARN_ON_ONCE(condition) ({ \ - static bool __warned; \ + static bool __section(.data.unlikely) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once)) \ @@ -144,7 +144,7 @@ extern void warn_slowpath_null(const char *file, const int line); }) #define WARN_ONCE(condition, format...) ({ \ - static bool __warned; \ + static bool __section(.data.unlikely) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once)) \ @@ -154,7 +154,7 @@ extern void warn_slowpath_null(const char *file, const int line); }) #define WARN_TAINT_ONCE(condition, taint, format...) ({ \ - static bool __warned; \ + static bool __section(.data.unlikely) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once)) \ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b5e2e4c6b01..798603e8ec3 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -167,6 +167,7 @@ CPU_KEEP(exit.data) \ MEM_KEEP(init.data) \ MEM_KEEP(exit.data) \ + *(.data.unlikely) \ STRUCT_ALIGN(); \ *(__tracepoints) \ /* implement dynamic printk debug */ \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 937217425c4..2c62594b67d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -418,7 +418,7 @@ extern int rcu_my_thread_group_empty(void); */ #define rcu_lockdep_assert(c, s) \ do { \ - static bool __warned; \ + static bool __section(.data.unlikely) __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ -- cgit v1.2.3-70-g09d2 From ebec18a6d3aa1e7d84aab16225e87fd25170ec2b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 23 Mar 2012 15:01:54 -0700 Subject: prctl: add PR_{SET,GET}_CHILD_SUBREAPER to allow simple process supervision Userspace service managers/supervisors need to track their started services. Many services daemonize by double-forking and get implicitly re-parented to PID 1. The service manager will no longer be able to receive the SIGCHLD signals for them, and is no longer in charge of reaping the children with wait(). All information about the children is lost at the moment PID 1 cleans up the re-parented processes. With this prctl, a service manager process can mark itself as a sort of 'sub-init', able to stay as the parent for all orphaned processes created by the started services. All SIGCHLD signals will be delivered to the service manager. Receiving SIGCHLD and doing wait() is in cases of a service-manager much preferred over any possible asynchronous notification about specific PIDs, because the service manager has full access to the child process data in /proc and the PID can not be re-used until the wait(), the service-manager itself is in charge of, has happened. As a side effect, the relevant parent PID information does not get lost by a double-fork, which results in a more elaborate process tree and 'ps' output: before: # ps afx 253 ? Ss 0:00 /bin/dbus-daemon --system --nofork 294 ? Sl 0:00 /usr/libexec/polkit-1/polkitd 328 ? S 0:00 /usr/sbin/modem-manager 608 ? Sl 0:00 /usr/libexec/colord 658 ? Sl 0:00 /usr/libexec/upowerd 819 ? Sl 0:00 /usr/libexec/imsettings-daemon 916 ? Sl 0:00 /usr/libexec/udisks-daemon 917 ? S 0:00 \_ udisks-daemon: not polling any devices after: # ps afx 294 ? Ss 0:00 /bin/dbus-daemon --system --nofork 426 ? Sl 0:00 \_ /usr/libexec/polkit-1/polkitd 449 ? S 0:00 \_ /usr/sbin/modem-manager 635 ? Sl 0:00 \_ /usr/libexec/colord 705 ? Sl 0:00 \_ /usr/libexec/upowerd 959 ? Sl 0:00 \_ /usr/libexec/udisks-daemon 960 ? S 0:00 | \_ udisks-daemon: not polling any devices 977 ? Sl 0:00 \_ /usr/libexec/packagekitd This prctl is orthogonal to PID namespaces. PID namespaces are isolated from each other, while a service management process usually requires the services to live in the same namespace, to be able to talk to each other. Users of this will be the systemd per-user instance, which provides init-like functionality for the user's login session and D-Bus, which activates bus services on-demand. Both need init-like capabilities to be able to properly keep track of the services they start. Many thanks to Oleg for several rounds of review and insights. [akpm@linux-foundation.org: fix comment layout and spelling] [akpm@linux-foundation.org: add lengthy code comment from Oleg] Reviewed-by: Oleg Nesterov Signed-off-by: Lennart Poettering Signed-off-by: Kay Sievers Acked-by: Valdis Kletnieks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 3 +++ include/linux/sched.h | 12 ++++++++++++ kernel/exit.c | 33 ++++++++++++++++++++++++++++----- kernel/fork.c | 3 +++ kernel/sys.c | 8 ++++++++ 5 files changed, 54 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index a0413ac3abe..e0cfec2490a 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -121,4 +121,7 @@ #define PR_SET_PTRACER 0x59616d61 # define PR_SET_PTRACER_ANY ((unsigned long)-1) +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 0c147a4260a..0c3854b0d4b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -553,6 +553,18 @@ struct signal_struct { int group_stop_count; unsigned int flags; /* see SIGNAL_* flags below */ + /* + * PR_SET_CHILD_SUBREAPER marks a process, like a service + * manager, to re-parent orphan (double-forking) child processes + * to this process instead of 'init'. The service manager is + * able to receive SIGCHLD signals and is able to investigate + * the process until it calls wait(). All children of this + * process will inherit a flag if they should look for a + * child_subreaper process at exit. + */ + unsigned int is_child_subreaper:1; + unsigned int has_child_subreaper:1; + /* POSIX.1b Interval Timers */ struct list_head posix_timers; diff --git a/kernel/exit.c b/kernel/exit.c index 16b07bfac22..456329fd4ea 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -687,11 +687,11 @@ static void exit_mm(struct task_struct * tsk) } /* - * When we die, we re-parent all our children. - * Try to give them to another thread in our thread - * group, and if no such member exists, give it to - * the child reaper process (ie "init") in our pid - * space. + * When we die, we re-parent all our children, and try to: + * 1. give them to another thread in our thread group, if such a member exists + * 2. give it to the first ancestor process which prctl'd itself as a + * child_subreaper for its children (like a service manager) + * 3. give it to the init process (PID 1) in our pid namespace */ static struct task_struct *find_new_reaper(struct task_struct *father) __releases(&tasklist_lock) @@ -722,6 +722,29 @@ static struct task_struct *find_new_reaper(struct task_struct *father) * forget_original_parent() must move them somewhere. */ pid_ns->child_reaper = init_pid_ns.child_reaper; + } else if (father->signal->has_child_subreaper) { + struct task_struct *reaper; + + /* + * Find the first ancestor marked as child_subreaper. + * Note that the code below checks same_thread_group(reaper, + * pid_ns->child_reaper). This is what we need to DTRT in a + * PID namespace. However we still need the check above, see + * http://marc.info/?l=linux-kernel&m=131385460420380 + */ + for (reaper = father->real_parent; + reaper != &init_task; + reaper = reaper->real_parent) { + if (same_thread_group(reaper, pid_ns->child_reaper)) + break; + if (!reaper->signal->is_child_subreaper) + continue; + thread = reaper; + do { + if (!(thread->flags & PF_EXITING)) + return reaper; + } while_each_thread(reaper, thread); + } } return pid_ns->child_reaper; diff --git a/kernel/fork.c b/kernel/fork.c index 37674ec55cd..b9372a0bff1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1051,6 +1051,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_score_adj = current->signal->oom_score_adj; sig->oom_score_adj_min = current->signal->oom_score_adj_min; + sig->has_child_subreaper = current->signal->has_child_subreaper || + current->signal->is_child_subreaper; + mutex_init(&sig->cred_guard_mutex); return 0; diff --git a/kernel/sys.c b/kernel/sys.c index 888d227fd19..9eb7fcab8df 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1962,6 +1962,14 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_MM: error = prctl_set_mm(arg2, arg3, arg4, arg5); break; + case PR_SET_CHILD_SUBREAPER: + me->signal->is_child_subreaper = !!arg2; + error = 0; + break; + case PR_GET_CHILD_SUBREAPER: + error = put_user(me->signal->is_child_subreaper, + (int __user *) arg2); + break; default: error = -EINVAL; break; -- cgit v1.2.3-70-g09d2 From 7be865ab8634d4ec2a6bdb9459b268cd60e832af Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Fri, 23 Mar 2012 15:02:01 -0700 Subject: backlight: new backlight driver for LP855x devices THis driver supports TI LP8550/LP8551/LP8552/LP8553/LP8556 backlight devices. The brightness can be controlled by the I2C or PWM input. The lp855x driver provides both modes. For the PWM control, pwm-specific functions can be defined in the platform data. And some information can be read via the sysfs(lp855x device attributes). For details, please refer to Documentation/backlight/lp855x-driver.txt. [axel.lin@gmail.com: add missing mutex_unlock in lp855x_read_byte() error path] [axel.lin@gmail.com: check platform data in lp855x_probe()] [axel.lin@gmail.com: small cleanups] [dan.carpenter@oracle.com: silence a compiler warning] [axel.lin@gmail.com: use id->driver_data to differentiate lp855x chips] [akpm@linux-foundation.org: simplify boolean return expression] Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Axel Lin Signed-off-by: Dan Carpenter Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/backlight/lp855x-driver.txt | 78 +++++++ drivers/video/backlight/Kconfig | 7 + drivers/video/backlight/Makefile | 1 + drivers/video/backlight/lp855x_bl.c | 331 ++++++++++++++++++++++++++++++ include/linux/lp855x.h | 131 ++++++++++++ 5 files changed, 548 insertions(+) create mode 100644 Documentation/backlight/lp855x-driver.txt create mode 100644 drivers/video/backlight/lp855x_bl.c create mode 100644 include/linux/lp855x.h (limited to 'include') diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt new file mode 100644 index 00000000000..f5e4caafab7 --- /dev/null +++ b/Documentation/backlight/lp855x-driver.txt @@ -0,0 +1,78 @@ +Kernel driver lp855x +==================== + +Backlight driver for LP855x ICs + +Supported chips: + Texas Instruments LP8550, LP8551, LP8552, LP8553 and LP8556 + +Author: Milo(Woogyom) Kim + +Description +----------- + +* Brightness control + +Brightness can be controlled by the pwm input or the i2c command. +The lp855x driver supports both cases. + +* Device attributes + +1) bl_ctl_mode +Backlight control mode. +Value : pwm based or register based + +2) chip_id +The lp855x chip id. +Value : lp8550/lp8551/lp8552/lp8553/lp8556 + +Platform data for lp855x +------------------------ + +For supporting platform specific data, the lp855x platform data can be used. + +* name : Backlight driver name. If it is not defined, default name is set. +* mode : Brightness control mode. PWM or register based. +* device_control : Value of DEVICE CONTROL register. +* initial_brightness : Initial value of backlight brightness. +* pwm_data : Platform specific pwm generation functions. + Only valid when brightness is pwm input mode. + Functions should be implemented by PWM driver. + - pwm_set_intensity() : set duty of PWM + - pwm_get_intensity() : get current duty of PWM +* load_new_rom_data : + 0 : use default configuration data + 1 : update values of eeprom or eprom registers on loading driver +* size_program : Total size of lp855x_rom_data. +* rom_data : List of new eeprom/eprom registers. + +example 1) lp8552 platform data : i2c register mode with new eeprom data + +#define EEPROM_A5_ADDR 0xA5 +#define EEPROM_A5_VAL 0x4f /* EN_VSYNC=0 */ + +static struct lp855x_rom_data lp8552_eeprom_arr[] = { + {EEPROM_A5_ADDR, EEPROM_A5_VAL}, +}; + +static struct lp855x_platform_data lp8552_pdata = { + .name = "lcd-bl", + .mode = REGISTER_BASED, + .device_control = I2C_CONFIG(LP8552), + .initial_brightness = INITIAL_BRT, + .load_new_rom_data = 1, + .size_program = ARRAY_SIZE(lp8552_eeprom_arr), + .rom_data = lp8552_eeprom_arr, +}; + +example 2) lp8556 platform data : pwm input mode with default rom data + +static struct lp855x_platform_data lp8556_pdata = { + .mode = PWM_BASED, + .device_control = PWM_CONFIG(LP8556), + .initial_brightness = INITIAL_BRT, + .pwm_data = { + .pwm_set_intensity = platform_pwm_set_intensity, + .pwm_get_intensity = platform_pwm_get_intensity, + }, +}; diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 681b36929fe..49e7d83f869 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -334,6 +334,13 @@ config BACKLIGHT_AAT2870 If you have a AnalogicTech AAT2870 say Y to enable the backlight driver. +config BACKLIGHT_LP855X + tristate "Backlight driver for TI LP855X" + depends on BACKLIGHT_CLASS_DEVICE && I2C + help + This supports TI LP8550, LP8551, LP8552, LP8553 and LP8556 + backlight driver. + endif # BACKLIGHT_CLASS_DEVICE endif # BACKLIGHT_LCD_SUPPORT diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index af5cf654ec7..0ee06e0832b 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_BACKLIGHT_GENERIC) += generic_bl.o obj-$(CONFIG_BACKLIGHT_HP700) += jornada720_bl.o obj-$(CONFIG_BACKLIGHT_HP680) += hp680_bl.o obj-$(CONFIG_BACKLIGHT_LOCOMO) += locomolcd.o +obj-$(CONFIG_BACKLIGHT_LP855X) += lp855x_bl.o obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o obj-$(CONFIG_BACKLIGHT_PROGEAR) += progear_bl.o obj-$(CONFIG_BACKLIGHT_CARILLO_RANCH) += cr_bllcd.o diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c new file mode 100644 index 00000000000..72a0e0c917c --- /dev/null +++ b/drivers/video/backlight/lp855x_bl.c @@ -0,0 +1,331 @@ +/* + * TI LP855x Backlight Driver + * + * Copyright (C) 2011 Texas Instruments + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include + +/* Registers */ +#define BRIGHTNESS_CTRL (0x00) +#define DEVICE_CTRL (0x01) + +#define BUF_SIZE 20 +#define DEFAULT_BL_NAME "lcd-backlight" +#define MAX_BRIGHTNESS 255 + +struct lp855x { + const char *chipname; + enum lp855x_chip_id chip_id; + struct i2c_client *client; + struct backlight_device *bl; + struct device *dev; + struct mutex xfer_lock; + struct lp855x_platform_data *pdata; +}; + +static int lp855x_read_byte(struct lp855x *lp, u8 reg, u8 *data) +{ + int ret; + + mutex_lock(&lp->xfer_lock); + ret = i2c_smbus_read_byte_data(lp->client, reg); + if (ret < 0) { + mutex_unlock(&lp->xfer_lock); + dev_err(lp->dev, "failed to read 0x%.2x\n", reg); + return ret; + } + mutex_unlock(&lp->xfer_lock); + + *data = (u8)ret; + return 0; +} + +static int lp855x_write_byte(struct lp855x *lp, u8 reg, u8 data) +{ + int ret; + + mutex_lock(&lp->xfer_lock); + ret = i2c_smbus_write_byte_data(lp->client, reg, data); + mutex_unlock(&lp->xfer_lock); + + return ret; +} + +static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr) +{ + u8 start, end; + + switch (lp->chip_id) { + case LP8550: + case LP8551: + case LP8552: + case LP8553: + start = EEPROM_START; + end = EEPROM_END; + break; + case LP8556: + start = EPROM_START; + end = EPROM_END; + break; + default: + return false; + } + + return (addr >= start && addr <= end); +} + +static int lp855x_init_registers(struct lp855x *lp) +{ + u8 val, addr; + int i, ret; + struct lp855x_platform_data *pd = lp->pdata; + + val = pd->initial_brightness; + ret = lp855x_write_byte(lp, BRIGHTNESS_CTRL, val); + if (ret) + return ret; + + val = pd->device_control; + ret = lp855x_write_byte(lp, DEVICE_CTRL, val); + if (ret) + return ret; + + if (pd->load_new_rom_data && pd->size_program) { + for (i = 0; i < pd->size_program; i++) { + addr = pd->rom_data[i].addr; + val = pd->rom_data[i].val; + if (!lp855x_is_valid_rom_area(lp, addr)) + continue; + + ret = lp855x_write_byte(lp, addr, val); + if (ret) + return ret; + } + } + + return ret; +} + +static int lp855x_bl_update_status(struct backlight_device *bl) +{ + struct lp855x *lp = bl_get_data(bl); + enum lp855x_brightness_ctrl_mode mode = lp->pdata->mode; + + if (bl->props.state & BL_CORE_SUSPENDED) + bl->props.brightness = 0; + + if (mode == PWM_BASED) { + struct lp855x_pwm_data *pd = &lp->pdata->pwm_data; + int br = bl->props.brightness; + int max_br = bl->props.max_brightness; + + if (pd->pwm_set_intensity) + pd->pwm_set_intensity(br, max_br); + + } else if (mode == REGISTER_BASED) { + u8 val = bl->props.brightness; + lp855x_write_byte(lp, BRIGHTNESS_CTRL, val); + } + + return 0; +} + +static int lp855x_bl_get_brightness(struct backlight_device *bl) +{ + struct lp855x *lp = bl_get_data(bl); + enum lp855x_brightness_ctrl_mode mode = lp->pdata->mode; + + if (mode == PWM_BASED) { + struct lp855x_pwm_data *pd = &lp->pdata->pwm_data; + int max_br = bl->props.max_brightness; + + if (pd->pwm_get_intensity) + bl->props.brightness = pd->pwm_get_intensity(max_br); + + } else if (mode == REGISTER_BASED) { + u8 val = 0; + + lp855x_read_byte(lp, BRIGHTNESS_CTRL, &val); + bl->props.brightness = val; + } + + return bl->props.brightness; +} + +static const struct backlight_ops lp855x_bl_ops = { + .options = BL_CORE_SUSPENDRESUME, + .update_status = lp855x_bl_update_status, + .get_brightness = lp855x_bl_get_brightness, +}; + +static int lp855x_backlight_register(struct lp855x *lp) +{ + struct backlight_device *bl; + struct backlight_properties props; + struct lp855x_platform_data *pdata = lp->pdata; + char *name = pdata->name ? : DEFAULT_BL_NAME; + + props.type = BACKLIGHT_PLATFORM; + props.max_brightness = MAX_BRIGHTNESS; + + if (pdata->initial_brightness > props.max_brightness) + pdata->initial_brightness = props.max_brightness; + + props.brightness = pdata->initial_brightness; + + bl = backlight_device_register(name, lp->dev, lp, + &lp855x_bl_ops, &props); + if (IS_ERR(bl)) + return PTR_ERR(bl); + + lp->bl = bl; + + return 0; +} + +static void lp855x_backlight_unregister(struct lp855x *lp) +{ + if (lp->bl) + backlight_device_unregister(lp->bl); +} + +static ssize_t lp855x_get_chip_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lp855x *lp = dev_get_drvdata(dev); + return scnprintf(buf, BUF_SIZE, "%s\n", lp->chipname); +} + +static ssize_t lp855x_get_bl_ctl_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lp855x *lp = dev_get_drvdata(dev); + enum lp855x_brightness_ctrl_mode mode = lp->pdata->mode; + char *strmode = NULL; + + if (mode == PWM_BASED) + strmode = "pwm based"; + else if (mode == REGISTER_BASED) + strmode = "register based"; + + return scnprintf(buf, BUF_SIZE, "%s\n", strmode); +} + +static DEVICE_ATTR(chip_id, S_IRUGO, lp855x_get_chip_id, NULL); +static DEVICE_ATTR(bl_ctl_mode, S_IRUGO, lp855x_get_bl_ctl_mode, NULL); + +static struct attribute *lp855x_attributes[] = { + &dev_attr_chip_id.attr, + &dev_attr_bl_ctl_mode.attr, + NULL, +}; + +static const struct attribute_group lp855x_attr_group = { + .attrs = lp855x_attributes, +}; + +static int lp855x_probe(struct i2c_client *cl, const struct i2c_device_id *id) +{ + struct lp855x *lp; + struct lp855x_platform_data *pdata = cl->dev.platform_data; + enum lp855x_brightness_ctrl_mode mode; + int ret; + + if (!pdata) { + dev_err(&cl->dev, "no platform data supplied\n"); + return -EINVAL; + } + + if (!i2c_check_functionality(cl->adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) + return -EIO; + + lp = devm_kzalloc(&cl->dev, sizeof(struct lp855x), GFP_KERNEL); + if (!lp) + return -ENOMEM; + + mode = pdata->mode; + lp->client = cl; + lp->dev = &cl->dev; + lp->pdata = pdata; + lp->chipname = id->name; + lp->chip_id = id->driver_data; + i2c_set_clientdata(cl, lp); + + mutex_init(&lp->xfer_lock); + + ret = lp855x_init_registers(lp); + if (ret) { + dev_err(lp->dev, "i2c communication err: %d", ret); + if (mode == REGISTER_BASED) + goto err_dev; + } + + ret = lp855x_backlight_register(lp); + if (ret) { + dev_err(lp->dev, + "failed to register backlight. err: %d\n", ret); + goto err_dev; + } + + ret = sysfs_create_group(&lp->dev->kobj, &lp855x_attr_group); + if (ret) { + dev_err(lp->dev, "failed to register sysfs. err: %d\n", ret); + goto err_sysfs; + } + + backlight_update_status(lp->bl); + return 0; + +err_sysfs: + lp855x_backlight_unregister(lp); +err_dev: + return ret; +} + +static int __devexit lp855x_remove(struct i2c_client *cl) +{ + struct lp855x *lp = i2c_get_clientdata(cl); + + lp->bl->props.brightness = 0; + backlight_update_status(lp->bl); + sysfs_remove_group(&lp->dev->kobj, &lp855x_attr_group); + lp855x_backlight_unregister(lp); + + return 0; +} + +static const struct i2c_device_id lp855x_ids[] = { + {"lp8550", LP8550}, + {"lp8551", LP8551}, + {"lp8552", LP8552}, + {"lp8553", LP8553}, + {"lp8556", LP8556}, + { } +}; +MODULE_DEVICE_TABLE(i2c, lp855x_ids); + +static struct i2c_driver lp855x_driver = { + .driver = { + .name = "lp855x", + }, + .probe = lp855x_probe, + .remove = __devexit_p(lp855x_remove), + .id_table = lp855x_ids, +}; + +module_i2c_driver(lp855x_driver); + +MODULE_DESCRIPTION("Texas Instruments LP855x Backlight driver"); +MODULE_AUTHOR("Milo Kim "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/lp855x.h b/include/linux/lp855x.h new file mode 100644 index 00000000000..781a490a451 --- /dev/null +++ b/include/linux/lp855x.h @@ -0,0 +1,131 @@ +/* + * LP855x Backlight Driver + * + * Copyright (C) 2011 Texas Instruments + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _LP855X_H +#define _LP855X_H + +#define BL_CTL_SHFT (0) +#define BRT_MODE_SHFT (1) +#define BRT_MODE_MASK (0x06) + +/* Enable backlight. Only valid when BRT_MODE=10(I2C only) */ +#define ENABLE_BL (1) +#define DISABLE_BL (0) + +#define I2C_CONFIG(id) id ## _I2C_CONFIG +#define PWM_CONFIG(id) id ## _PWM_CONFIG + +/* DEVICE CONTROL register - LP8550 */ +#define LP8550_PWM_CONFIG (LP8550_PWM_ONLY << BRT_MODE_SHFT) +#define LP8550_I2C_CONFIG ((ENABLE_BL << BL_CTL_SHFT) | \ + (LP8550_I2C_ONLY << BRT_MODE_SHFT)) + +/* DEVICE CONTROL register - LP8551 */ +#define LP8551_PWM_CONFIG LP8550_PWM_CONFIG +#define LP8551_I2C_CONFIG LP8550_I2C_CONFIG + +/* DEVICE CONTROL register - LP8552 */ +#define LP8552_PWM_CONFIG LP8550_PWM_CONFIG +#define LP8552_I2C_CONFIG LP8550_I2C_CONFIG + +/* DEVICE CONTROL register - LP8553 */ +#define LP8553_PWM_CONFIG LP8550_PWM_CONFIG +#define LP8553_I2C_CONFIG LP8550_I2C_CONFIG + +/* DEVICE CONTROL register - LP8556 */ +#define LP8556_PWM_CONFIG (LP8556_PWM_ONLY << BRT_MODE_SHFT) +#define LP8556_COMB1_CONFIG (LP8556_COMBINED1 << BRT_MODE_SHFT) +#define LP8556_I2C_CONFIG ((ENABLE_BL << BL_CTL_SHFT) | \ + (LP8556_I2C_ONLY << BRT_MODE_SHFT)) +#define LP8556_COMB2_CONFIG (LP8556_COMBINED2 << BRT_MODE_SHFT) + +/* ROM area boundary */ +#define EEPROM_START (0xA0) +#define EEPROM_END (0xA7) +#define EPROM_START (0xA0) +#define EPROM_END (0xAF) + +enum lp855x_chip_id { + LP8550, + LP8551, + LP8552, + LP8553, + LP8556, +}; + +enum lp855x_brightness_ctrl_mode { + PWM_BASED = 1, + REGISTER_BASED, +}; + +enum lp8550_brighntess_source { + LP8550_PWM_ONLY, + LP8550_I2C_ONLY = 2, +}; + +enum lp8551_brighntess_source { + LP8551_PWM_ONLY = LP8550_PWM_ONLY, + LP8551_I2C_ONLY = LP8550_I2C_ONLY, +}; + +enum lp8552_brighntess_source { + LP8552_PWM_ONLY = LP8550_PWM_ONLY, + LP8552_I2C_ONLY = LP8550_I2C_ONLY, +}; + +enum lp8553_brighntess_source { + LP8553_PWM_ONLY = LP8550_PWM_ONLY, + LP8553_I2C_ONLY = LP8550_I2C_ONLY, +}; + +enum lp8556_brightness_source { + LP8556_PWM_ONLY, + LP8556_COMBINED1, /* pwm + i2c before the shaper block */ + LP8556_I2C_ONLY, + LP8556_COMBINED2, /* pwm + i2c after the shaper block */ +}; + +struct lp855x_pwm_data { + void (*pwm_set_intensity) (int brightness, int max_brightness); + int (*pwm_get_intensity) (int max_brightness); +}; + +struct lp855x_rom_data { + u8 addr; + u8 val; +}; + +/** + * struct lp855x_platform_data + * @name : Backlight driver name. If it is not defined, default name is set. + * @mode : brightness control by pwm or lp855x register + * @device_control : value of DEVICE CONTROL register + * @initial_brightness : initial value of backlight brightness + * @pwm_data : platform specific pwm generation functions. + Only valid when mode is PWM_BASED. + * @load_new_rom_data : + 0 : use default configuration data + 1 : update values of eeprom or eprom registers on loading driver + * @size_program : total size of lp855x_rom_data + * @rom_data : list of new eeprom/eprom registers + */ +struct lp855x_platform_data { + char *name; + enum lp855x_brightness_ctrl_mode mode; + u8 device_control; + int initial_brightness; + struct lp855x_pwm_data pwm_data; + u8 load_new_rom_data; + int size_program; + struct lp855x_rom_data *rom_data; +}; + +#endif -- cgit v1.2.3-70-g09d2 From 307b1cd7ecd7f3dc5ce3d3860957f034f0abe4df Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 23 Mar 2012 15:02:03 -0700 Subject: bitops: rename for_each_set_bit_cont() in favor of analogous list.h function This renames for_each_set_bit_cont() to for_each_set_bit_from() because it is analogous to list_for_each_entry_from() in list.h rather than list_for_each_entry_continue(). This doesn't remove for_each_set_bit_cont() for now. Signed-off-by: Akinobu Mita Cc: Robert Richter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/perf_event.c | 4 ++-- include/linux/bitops.h | 5 ++++- tools/perf/util/include/linux/bitops.h | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0a18d16cb58..fa2900c0e39 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -643,14 +643,14 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) /* Prefer fixed purpose counters */ if (x86_pmu.num_counters_fixed) { idx = X86_PMC_IDX_FIXED; - for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { + for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { if (!__test_and_set_bit(idx, sched->state.used)) goto done; } } /* Grab the first unused counter starting with idx */ idx = sched->state.counter; - for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { + for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { if (!__test_and_set_bit(idx, sched->state.used)) goto done; } diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 94300fe46cc..a78e358f0c1 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -27,11 +27,14 @@ extern unsigned long __sw_hweight64(__u64 w); (bit) = find_next_bit((addr), (size), (bit) + 1)) /* same as for_each_set_bit() but use bit as value to start with */ -#define for_each_set_bit_cont(bit, addr, size) \ +#define for_each_set_bit_from(bit, addr, size) \ for ((bit) = find_next_bit((addr), (size), (bit)); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) +#define for_each_set_bit_cont(bit, addr, size) \ + for_each_set_bit_from(bit, addr, size) + static __inline__ int get_bitmask_order(unsigned int count) { int order; diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index 62cdee78db7..f1584833bd2 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -15,7 +15,7 @@ (bit) = find_next_bit((addr), (size), (bit) + 1)) /* same as for_each_set_bit() but use bit as value to start with */ -#define for_each_set_bit_cont(bit, addr, size) \ +#define for_each_set_bit_from(bit, addr, size) \ for ((bit) = find_next_bit((addr), (size), (bit)); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) -- cgit v1.2.3-70-g09d2 From 0a329d2d5a1dd75273597538cdc33512ee38855e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 23 Mar 2012 15:02:04 -0700 Subject: bitops: remove for_each_set_bit_cont() Remove for_each_set_bit_cont() after confirming that no one uses for_each_set_bit_cont() anymore. [sfr@canb.auug.org.au: regmap: cope with bitops API change] Signed-off-by: Akinobu Mita Signed-off-by: Stephen Rothwell Cc: Robert Richter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Mark Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/regmap/regcache-lzo.c | 2 +- include/linux/bitops.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/base/regmap/regcache-lzo.c b/drivers/base/regmap/regcache-lzo.c index 8d006156932..77dc5327228 100644 --- a/drivers/base/regmap/regcache-lzo.c +++ b/drivers/base/regmap/regcache-lzo.c @@ -341,7 +341,7 @@ static int regcache_lzo_sync(struct regmap *map, unsigned int min, lzo_blocks = map->cache; i = min; - for_each_set_bit_cont(i, lzo_blocks[0]->sync_bmp, + for_each_set_bit_from(i, lzo_blocks[0]->sync_bmp, lzo_blocks[0]->sync_bmp_nbits) { if (i > max) continue; diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a78e358f0c1..348b1dca477 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -32,9 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w); (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) -#define for_each_set_bit_cont(bit, addr, size) \ - for_each_set_bit_from(bit, addr, size) - static __inline__ int get_bitmask_order(unsigned int count) { int order; -- cgit v1.2.3-70-g09d2 From 03f4a8226c2f9c14361f75848d1e93139bab90c4 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 23 Mar 2012 15:02:04 -0700 Subject: bitops: introduce for_each_clear_bit() Introduce for_each_clear_bit() and for_each_clear_bit_from(). They are similar to for_each_set_bit() and list_for_each_set_bit_from(), but they iterate over all the cleared bits in a memory region. Signed-off-by: Akinobu Mita Cc: Robert Richter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: David Woodhouse Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Stefano Panella Cc: David Vrabel Cc: Sergei Shtylyov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 348b1dca477..a3b6b82108b 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -32,6 +32,17 @@ extern unsigned long __sw_hweight64(__u64 w); (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) +#define for_each_clear_bit(bit, addr, size) \ + for ((bit) = find_first_zero_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + +/* same as for_each_clear_bit() but use bit as value to start with */ +#define for_each_clear_bit_from(bit, addr, size) \ + for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + static __inline__ int get_bitmask_order(unsigned int count) { int order; -- cgit v1.2.3-70-g09d2 From 5ae4e8a77dc82afcfe8460168ec0b94f4b79a54a Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Fri, 23 Mar 2012 15:02:08 -0700 Subject: drivers/leds/leds-lp5521.c: add 'name' in the lp5521_led_config The name of each led channel can be configurable. For the compatibility, the name is set to default value(xx:channelN) when 'name' is not defined. Signed-off-by: Milo(Woogyom) Kim Acked-by: Linus Walleij Cc: Arun MURTHY Cc: Srinidhi Kasagar Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/leds/leds-lp5521.txt | 6 ++++++ drivers/leds/leds-lp5521.c | 11 ++++++++--- include/linux/leds-lp5521.h | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index c4d8d151e0f..f48ab757d12 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -43,17 +43,23 @@ Format: 10x mA i.e 10 means 1.0 mA example platform data: Note: chan_nr can have values between 0 and 2. +The name of each channel can be configurable. +If the name field is not defined, the default name will be set to 'xxxx:channelN' +(XXXX : pdata->label or i2c client name, N : channel number) static struct lp5521_led_config lp5521_led_config[] = { { + .name = "red", .chan_nr = 0, .led_current = 50, .max_current = 130, }, { + .name = "green", .chan_nr = 1, .led_current = 0, .max_current = 130, }, { + .name = "blue", .chan_nr = 2, .led_current = 0, .max_current = 130, diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index c42c8f04956..59feecdfe3a 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -620,10 +620,15 @@ static int __devinit lp5521_init_led(struct lp5521_led *led, return -EINVAL; } - snprintf(name, sizeof(name), "%s:channel%d", - pdata->label ?: client->name, chan); led->cdev.brightness_set = lp5521_set_brightness; - led->cdev.name = name; + if (pdata->led_config[chan].name) { + led->cdev.name = pdata->led_config[chan].name; + } else { + snprintf(name, sizeof(name), "%s:channel%d", + pdata->label ?: client->name, chan); + led->cdev.name = name; + } + res = led_classdev_register(dev, &led->cdev); if (res < 0) { dev_err(dev, "couldn't register led on channel %d\n", chan); diff --git a/include/linux/leds-lp5521.h b/include/linux/leds-lp5521.h index fd548d2a877..e675b8d4c7b 100644 --- a/include/linux/leds-lp5521.h +++ b/include/linux/leds-lp5521.h @@ -26,6 +26,7 @@ /* See Documentation/leds/leds-lp5521.txt */ struct lp5521_led_config { + char *name; u8 chan_nr; u8 led_current; /* mA x10, 0 if led is not connected */ u8 max_current; -- cgit v1.2.3-70-g09d2 From 3b49aacd0e56d5bf1b511f6554f17cd65eb8da64 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Fri, 23 Mar 2012 15:02:08 -0700 Subject: drivers/leds/leds-lp5521.c: add 'update_config' in the lp5521_platform_data The value of CONFIG register(Addr 08h) is configurable. For supporting this feature, update_config is added in the platform data. If 'update_config' is not defined, the default value is 'LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT'. To define CONFIG register in the platform data, the bit definitions were mo= ved to the header file. Documentation updated : description about 'update_config' and example. Signed-off-by: Milo(Woogyom) Kim Acked-by: Linus Walleij Cc: Arun MURTHY Cc: Srinidhi Kasagar Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/leds/leds-lp5521.txt | 19 +++++++++++++++++++ drivers/leds/leds-lp5521.c | 19 ++++--------------- include/linux/leds-lp5521.h | 13 +++++++++++++ 3 files changed, 36 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index f48ab757d12..e3c66c64591 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -92,3 +92,22 @@ static struct lp5521_platform_data lp5521_platform_data = { If the current is set to 0 in the platform data, that channel is disabled and it is not visible in the sysfs. + +The 'update_config' : CONFIG register (ADDR 08h) +This value is platform-specific data. +If update_config is not defined, the CONFIG register is set with +'LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT'. +(Enable auto-powersave, set charge pump to auto, red to battery) + +example of update_config : + +#define LP5521_CONFIGS (LP5521_PWM_HF | LP5521_PWRSAVE_EN | \ + LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT | \ + LP5521_CLK_INT) + +static struct lp5521_platform_data lp5521_pdata = { + .led_config = lp5521_led_config, + .num_channels = ARRAY_SIZE(lp5521_led_config), + .clock_mode = LP5521_CLOCK_INT, + .update_config = LP5521_CONFIGS, +}; diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 59feecdfe3a..9682ece1601 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -82,18 +82,6 @@ #define LP5521_LOGARITHMIC_PWM 0x80 /* Logarithmic PWM adjustment */ #define LP5521_EXEC_RUN 0x2A -/* Bits in CONFIG register */ -#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ -#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ -#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ -#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ -#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ -#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ -#define LP5521_R_TO_BATT 4 /* R out: 0 = CP, 1 = Vbat */ -#define LP5521_CLK_SRC_EXT 0 /* Ext-clk source (CLK_32K) */ -#define LP5521_CLK_INT 1 /* Internal clock */ -#define LP5521_CLK_AUTO 2 /* Automatic clock selection */ - /* Status */ #define LP5521_EXT_CLK_USED 0x08 @@ -241,15 +229,16 @@ static int lp5521_configure(struct i2c_client *client) { struct lp5521_chip *chip = i2c_get_clientdata(client); int ret; + u8 cfg; lp5521_init_engine(chip); /* Set all PWMs to direct control mode */ ret = lp5521_write(client, LP5521_REG_OP_MODE, 0x3F); - /* Enable auto-powersave, set charge pump to auto, red to battery */ - ret |= lp5521_write(client, LP5521_REG_CONFIG, - LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT); + cfg = chip->pdata->update_config ? + : (LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT); + ret |= lp5521_write(client, LP5521_REG_CONFIG, cfg); /* Initialize all channels PWM to zero -> leds off */ ret |= lp5521_write(client, LP5521_REG_R_PWM, 0); diff --git a/include/linux/leds-lp5521.h b/include/linux/leds-lp5521.h index e675b8d4c7b..e9ab583cac3 100644 --- a/include/linux/leds-lp5521.h +++ b/include/linux/leds-lp5521.h @@ -36,6 +36,18 @@ struct lp5521_led_config { #define LP5521_CLOCK_INT 1 #define LP5521_CLOCK_EXT 2 +/* Bits in CONFIG register */ +#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ +#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ +#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ +#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ +#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ +#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ +#define LP5521_R_TO_BATT 4 /* R out: 0 = CP, 1 = Vbat */ +#define LP5521_CLK_SRC_EXT 0 /* Ext-clk source (CLK_32K) */ +#define LP5521_CLK_INT 1 /* Internal clock */ +#define LP5521_CLK_AUTO 2 /* Automatic clock selection */ + struct lp5521_platform_data { struct lp5521_led_config *led_config; u8 num_channels; @@ -44,6 +56,7 @@ struct lp5521_platform_data { void (*release_resources)(void); void (*enable)(bool state); const char *label; + u8 update_config; }; #endif /* __LINUX_LP5521_H */ -- cgit v1.2.3-70-g09d2 From 011af7bc7cd188a0310e2d26cdc2cc5d90148b0c Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Fri, 23 Mar 2012 15:02:09 -0700 Subject: drivers/leds/leds-lp5521.c: support led pattern data The lp5521 has autonomous operation mode without external control. Using lp5521_platform_data, various led patterns can be configurable. For supporting this feature, new functions and device attribute are added. Structure of lp5521_led_pattern: 3 channels are supported - red, green and blue. Pattern(s) of each channel and numbers of pattern(s) are defined in the pla= tform data. Pattern data are hexa codes which include pattern commands such like set pwm, wait, ramp up/down, branch and so on. Pattern mode functions: * lp5521_clear_program_memory Before running new led pattern, program memory should be cleared. * lp5521_write_program_memory Pattern data updated in the program memory via the i2c. * lp5521_get_pattern Get pattern from predefined in the platform data. * lp5521_run_led_pattern Stop current pattern or run new pattern. Transition time is required between different operation mode. Device attribute - 'led_pattern': To load specific led pattern, new device attribute is added. When the lp5521 driver is unloaded, stop current led pattern mode. Documentation updated : description about how to define the led patterns and example. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Milo(Woogyom) Kim Acked-by: Linus Walleij Cc: Arun MURTHY Cc: Srinidhi Kasagar Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/leds/leds-lp5521.txt | 38 ++++++++++++++ drivers/leds/leds-lp5521.c | 102 ++++++++++++++++++++++++++++++++++++- include/linux/leds-lp5521.h | 11 ++++ 3 files changed, 150 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index e3c66c64591..0e542ab3d4a 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -111,3 +111,41 @@ static struct lp5521_platform_data lp5521_pdata = { .clock_mode = LP5521_CLOCK_INT, .update_config = LP5521_CONFIGS, }; + +LED patterns : LP5521 has autonomous operation without external control. +Pattern data can be defined in the platform data. + +example of led pattern data : + +/* RGB(50,5,0) 500ms on, 500ms off, infinite loop */ +static u8 pattern_red[] = { + 0x40, 0x32, 0x60, 0x00, 0x40, 0x00, 0x60, 0x00, + }; + +static u8 pattern_green[] = { + 0x40, 0x05, 0x60, 0x00, 0x40, 0x00, 0x60, 0x00, + }; + +static struct lp5521_led_pattern board_led_patterns[] = { + { + .r = pattern_red, + .g = pattern_green, + .size_r = ARRAY_SIZE(pattern_red), + .size_g = ARRAY_SIZE(pattern_green), + }, +}; + +static struct lp5521_platform_data lp5521_platform_data = { + .led_config = lp5521_led_config, + .num_channels = ARRAY_SIZE(lp5521_led_config), + .clock_mode = LP5521_CLOCK_EXT, + .patterns = board_led_patterns, + .num_patterns = ARRAY_SIZE(board_led_patterns), +}; + +Then predefined led pattern(s) can be executed via the sysfs. +To start the pattern #1, +# echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern +(xxxx : i2c bus & slave address) +To end the pattern, +# echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 9682ece1601..007c7c921e7 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -88,6 +88,9 @@ /* default R channel current register value */ #define LP5521_REG_R_CURR_DEFAULT 0xAF +/* Pattern Mode */ +#define PATTERN_OFF 0 + struct lp5521_engine { int id; u8 mode; @@ -493,7 +496,7 @@ static ssize_t store_current(struct device *dev, ssize_t ret; unsigned long curr; - if (strict_strtoul(buf, 0, &curr)) + if (kstrtoul(buf, 0, &curr)) return -EINVAL; if (curr > led->max_current) @@ -525,6 +528,100 @@ static ssize_t lp5521_selftest(struct device *dev, return sprintf(buf, "%s\n", ret ? "FAIL" : "OK"); } +static void lp5521_clear_program_memory(struct i2c_client *cl) +{ + int i; + u8 rgb_mem[] = { + LP5521_REG_R_PROG_MEM, + LP5521_REG_G_PROG_MEM, + LP5521_REG_B_PROG_MEM, + }; + + for (i = 0; i < ARRAY_SIZE(rgb_mem); i++) { + lp5521_write(cl, rgb_mem[i], 0); + lp5521_write(cl, rgb_mem[i] + 1, 0); + } +} + +static void lp5521_write_program_memory(struct i2c_client *cl, + u8 base, u8 *rgb, int size) +{ + int i; + + if (!rgb || size <= 0) + return; + + for (i = 0; i < size; i++) + lp5521_write(cl, base + i, *(rgb + i)); + + lp5521_write(cl, base + i, 0); + lp5521_write(cl, base + i + 1, 0); +} + +static inline struct lp5521_led_pattern *lp5521_get_pattern + (struct lp5521_chip *chip, u8 offset) +{ + struct lp5521_led_pattern *ptn; + ptn = chip->pdata->patterns + (offset - 1); + return ptn; +} + +static void lp5521_run_led_pattern(int mode, struct lp5521_chip *chip) +{ + struct lp5521_led_pattern *ptn; + struct i2c_client *cl = chip->client; + int num_patterns = chip->pdata->num_patterns; + + if (mode > num_patterns || !(chip->pdata->patterns)) + return; + + if (mode == PATTERN_OFF) { + lp5521_write(cl, LP5521_REG_ENABLE, + LP5521_MASTER_ENABLE | LP5521_LOGARITHMIC_PWM); + usleep_range(1000, 2000); + lp5521_write(cl, LP5521_REG_OP_MODE, LP5521_CMD_DIRECT); + } else { + ptn = lp5521_get_pattern(chip, mode); + if (!ptn) + return; + + lp5521_write(cl, LP5521_REG_OP_MODE, LP5521_CMD_LOAD); + usleep_range(1000, 2000); + + lp5521_clear_program_memory(cl); + + lp5521_write_program_memory(cl, LP5521_REG_R_PROG_MEM, + ptn->r, ptn->size_r); + lp5521_write_program_memory(cl, LP5521_REG_G_PROG_MEM, + ptn->g, ptn->size_g); + lp5521_write_program_memory(cl, LP5521_REG_B_PROG_MEM, + ptn->b, ptn->size_b); + + lp5521_write(cl, LP5521_REG_OP_MODE, LP5521_CMD_RUN); + usleep_range(1000, 2000); + lp5521_write(cl, LP5521_REG_ENABLE, + LP5521_MASTER_ENABLE | LP5521_LOGARITHMIC_PWM | + LP5521_EXEC_RUN); + } +} + +static ssize_t store_led_pattern(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lp5521_chip *chip = i2c_get_clientdata(to_i2c_client(dev)); + unsigned long val; + int ret; + + ret = strict_strtoul(buf, 16, &val); + if (ret) + return ret; + + lp5521_run_led_pattern(val, chip); + + return len; +} + /* led class device attributes */ static DEVICE_ATTR(led_current, S_IRUGO | S_IWUSR, show_current, store_current); static DEVICE_ATTR(max_current, S_IRUGO , show_max_current, NULL); @@ -550,6 +647,7 @@ static DEVICE_ATTR(engine1_load, S_IWUSR, NULL, store_engine1_load); static DEVICE_ATTR(engine2_load, S_IWUSR, NULL, store_engine2_load); static DEVICE_ATTR(engine3_load, S_IWUSR, NULL, store_engine3_load); static DEVICE_ATTR(selftest, S_IRUGO, lp5521_selftest, NULL); +static DEVICE_ATTR(led_pattern, S_IWUSR, NULL, store_led_pattern); static struct attribute *lp5521_attributes[] = { &dev_attr_engine1_mode.attr, @@ -559,6 +657,7 @@ static struct attribute *lp5521_attributes[] = { &dev_attr_engine1_load.attr, &dev_attr_engine2_load.attr, &dev_attr_engine3_load.attr, + &dev_attr_led_pattern.attr, NULL }; @@ -761,6 +860,7 @@ static int __devexit lp5521_remove(struct i2c_client *client) struct lp5521_chip *chip = i2c_get_clientdata(client); int i; + lp5521_run_led_pattern(PATTERN_OFF, chip); lp5521_unregister_sysfs(client); for (i = 0; i < chip->num_leds; i++) { diff --git a/include/linux/leds-lp5521.h b/include/linux/leds-lp5521.h index e9ab583cac3..3f071ec019b 100644 --- a/include/linux/leds-lp5521.h +++ b/include/linux/leds-lp5521.h @@ -32,6 +32,15 @@ struct lp5521_led_config { u8 max_current; }; +struct lp5521_led_pattern { + u8 *r; + u8 *g; + u8 *b; + u8 size_r; + u8 size_g; + u8 size_b; +}; + #define LP5521_CLOCK_AUTO 0 #define LP5521_CLOCK_INT 1 #define LP5521_CLOCK_EXT 2 @@ -57,6 +66,8 @@ struct lp5521_platform_data { void (*enable)(bool state); const char *label; u8 update_config; + struct lp5521_led_pattern *patterns; + int num_patterns; }; #endif /* __LINUX_LP5521_H */ -- cgit v1.2.3-70-g09d2 From bb982009d3850759d3f4a4c853f9c456c48b6c2d Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Fri, 23 Mar 2012 15:02:12 -0700 Subject: leds-lm3530: support pwm input mode * add 'struct lm3530_pwm_data' in the platform data The pwm data is the platform specific functions which generate the pwm. The pwm data is only valid when brightness is pwm input mode. Functions should be implemented by the pwm driver. pwm_set_intensity() : set duty of pwm. pwm_get_intensity() : get current the brightness. * brightness control by pwm If the control mode is pwm, then brightness is changed by the duty of pwm=. So pwm platform function should be called in lm3530_brightness_set(). * do not update brightness register when pwm input mode In pwm input mode, brightness register is not used. If any value is updated in this register, then the led will be off. * when input mode is changed, set duty of pwm to 0 if unnecessary. Signed-off-by: Milo(Woogyom) Kim Cc: Linus Walleij Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/leds/leds-lm3530.c | 32 ++++++++++++++++++++++++-------- include/linux/led-lm3530.h | 9 +++++++++ 2 files changed, 33 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c index ce79523a4d1..a889311eead 100644 --- a/drivers/leds/leds-lm3530.c +++ b/drivers/leds/leds-lm3530.c @@ -157,6 +157,7 @@ static int lm3530_init_registers(struct lm3530_data *drvdata) u32 als_vmin, als_vmax, als_vstep; struct lm3530_platform_data *pltfm = drvdata->pdata; struct i2c_client *client = drvdata->client; + struct lm3530_pwm_data *pwm = &pltfm->pwm_data; gen_config = (pltfm->brt_ramp_law << LM3530_RAMP_LAW_SHIFT) | ((pltfm->max_current & 7) << LM3530_MAX_CURR_SHIFT); @@ -240,6 +241,15 @@ static int lm3530_init_registers(struct lm3530_data *drvdata) } for (i = 0; i < LM3530_REG_MAX; i++) { + /* do not update brightness register when pwm mode */ + if (lm3530_reg[i] == LM3530_BRT_CTRL_REG && + drvdata->mode == LM3530_BL_MODE_PWM) { + if (pwm->pwm_set_intensity) + pwm->pwm_set_intensity(reg_val[i], + drvdata->led_dev.max_brightness); + continue; + } + ret = i2c_smbus_write_byte_data(client, lm3530_reg[i], reg_val[i]); if (ret) @@ -255,6 +265,9 @@ static void lm3530_brightness_set(struct led_classdev *led_cdev, int err; struct lm3530_data *drvdata = container_of(led_cdev, struct lm3530_data, led_dev); + struct lm3530_platform_data *pdata = drvdata->pdata; + struct lm3530_pwm_data *pwm = &pdata->pwm_data; + u8 max_brightness = led_cdev->max_brightness; switch (drvdata->mode) { case LM3530_BL_MODE_MANUAL: @@ -288,6 +301,8 @@ static void lm3530_brightness_set(struct led_classdev *led_cdev, case LM3530_BL_MODE_ALS: break; case LM3530_BL_MODE_PWM: + if (pwm->pwm_set_intensity) + pwm->pwm_set_intensity(brt_val, max_brightness); break; default: break; @@ -318,23 +333,24 @@ static ssize_t lm3530_mode_set(struct device *dev, struct device_attribute { struct led_classdev *led_cdev = dev_get_drvdata(dev); struct lm3530_data *drvdata; + struct lm3530_pwm_data *pwm; + u8 max_brightness; int mode, err; drvdata = container_of(led_cdev, struct lm3530_data, led_dev); + pwm = &drvdata->pdata->pwm_data; + max_brightness = led_cdev->max_brightness; mode = lm3530_get_mode_from_str(buf); if (mode < 0) { dev_err(dev, "Invalid mode\n"); return -EINVAL; } - if (mode == LM3530_BL_MODE_MANUAL) - drvdata->mode = LM3530_BL_MODE_MANUAL; - else if (mode == LM3530_BL_MODE_ALS) - drvdata->mode = LM3530_BL_MODE_ALS; - else if (mode == LM3530_BL_MODE_PWM) { - dev_err(dev, "PWM mode not supported\n"); - return -EINVAL; - } + drvdata->mode = mode; + + /* set pwm to low if unnecessary */ + if (mode != LM3530_BL_MODE_PWM && pwm->pwm_set_intensity) + pwm->pwm_set_intensity(0, max_brightness); err = lm3530_init_registers(drvdata); if (err) { diff --git a/include/linux/led-lm3530.h b/include/linux/led-lm3530.h index 8eb12357a11..eeae6e74247 100644 --- a/include/linux/led-lm3530.h +++ b/include/linux/led-lm3530.h @@ -72,6 +72,12 @@ enum lm3530_als_mode { LM3530_INPUT_CEIL, /* Max of ALS1 and ALS2 */ }; +/* PWM Platform Specific Data */ +struct lm3530_pwm_data { + void (*pwm_set_intensity) (int brightness, int max_brightness); + int (*pwm_get_intensity) (int max_brightness); +}; + /** * struct lm3530_platform_data * @mode: mode of operation i.e. Manual, ALS or PWM @@ -87,6 +93,7 @@ enum lm3530_als_mode { * @als_vmin: als input voltage calibrated for max brightness in mV * @als_vmax: als input voltage calibrated for min brightness in mV * @brt_val: brightness value (0-255) + * @pwm_data: PWM control functions (only valid when the mode is PWM) */ struct lm3530_platform_data { enum lm3530_mode mode; @@ -107,6 +114,8 @@ struct lm3530_platform_data { u32 als_vmax; u8 brt_val; + + struct lm3530_pwm_data pwm_data; }; #endif /* _LINUX_LED_LM3530_H__ */ -- cgit v1.2.3-70-g09d2 From 6061d949dd984c762ee272a88e77699fa675d1c8 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 23 Mar 2012 15:02:16 -0700 Subject: include/ and checkpatch: prefer __scanf to __attribute__((format(scanf,...) It's equivalent to __printf, so prefer __scanf. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 3 ++- include/linux/kernel.h | 8 ++++---- include/xen/xenbus.h | 4 ++-- scripts/checkpatch.pl | 6 ++++++ 4 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 3fd17c24922..e5834aa24b9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -87,7 +87,8 @@ */ #define __pure __attribute__((pure)) #define __aligned(x) __attribute__((aligned(x))) -#define __printf(a,b) __attribute__((format(printf,a,b))) +#define __printf(a, b) __attribute__((format(printf, a, b))) +#define __scanf(a, b) __attribute__((format(scanf, a, b))) #define noinline __attribute__((noinline)) #define __attribute_const__ __attribute__((__const__)) #define __maybe_unused __attribute__((unused)) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d801acb5e68..f2085b541a2 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -328,10 +328,10 @@ extern __printf(2, 3) char *kasprintf(gfp_t gfp, const char *fmt, ...); extern char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); -extern int sscanf(const char *, const char *, ...) - __attribute__ ((format (scanf, 2, 3))); -extern int vsscanf(const char *, const char *, va_list) - __attribute__ ((format (scanf, 2, 0))); +extern __scanf(2, 3) +int sscanf(const char *, const char *, ...); +extern __scanf(2, 0) +int vsscanf(const char *, const char *, va_list); extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index e8c599b237c..0a7515c1e3a 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -139,9 +139,9 @@ int xenbus_transaction_start(struct xenbus_transaction *t); int xenbus_transaction_end(struct xenbus_transaction t, int abort); /* Single read and scanf: returns -errno or num scanned if > 0. */ +__scanf(4, 5) int xenbus_scanf(struct xenbus_transaction t, - const char *dir, const char *node, const char *fmt, ...) - __attribute__((format(scanf, 4, 5))); + const char *dir, const char *node, const char *fmt, ...); /* Single printf and write: returns -errno or 0. */ __printf(4, 5) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a3b9782441f..89d24b3ea43 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3123,6 +3123,12 @@ sub process { "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr); } +# Check for __attribute__ format(scanf, prefer __scanf + if ($line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\b/) { + WARN("PREFER_SCANF", + "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr); + } + # check for sizeof(&) if ($line =~ /\bsizeof\s*\(\s*\&/) { WARN("SIZEOF_ADDRESS", -- cgit v1.2.3-70-g09d2 From 46c5801eaf86e83cb3a4142ad35188db5011fff0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Mar 2012 15:02:25 -0700 Subject: crc32: bolt on crc32c Reuse the existing crc32 code to stamp out a crc32c implementation. Signed-off-by: Darrick J. Wong Cc: Herbert Xu Cc: Bob Pearson Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crc32.h | 2 ++ lib/Kconfig | 8 +++--- lib/crc32.c | 79 ++++++++++++++++++++++++++++++++++++--------------- lib/crc32defs.h | 7 +++++ lib/gen_crc32table.c | 35 ++++++++++++++++++----- 5 files changed, 97 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 391a259b2cc..68267b64bb9 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -11,6 +11,8 @@ extern u32 crc32_le(u32 crc, unsigned char const *p, size_t len); extern u32 crc32_be(u32 crc, unsigned char const *p, size_t len); +extern u32 __crc32c_le(u32 crc, unsigned char const *p, size_t len); + #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) /* diff --git a/lib/Kconfig b/lib/Kconfig index d5a86aa441a..6d7ce4b138c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -61,14 +61,14 @@ config CRC_ITU_T functions require M here. config CRC32 - tristate "CRC32 functions" + tristate "CRC32/CRC32c functions" default y select BITREVERSE help This option is provided for the case where no in-kernel-tree - modules require CRC32 functions, but a module built outside the - kernel tree does. Such modules that use library CRC32 functions - require M here. + modules require CRC32/CRC32c functions, but a module built outside + the kernel tree does. Such modules that use library CRC32/CRC32c + functions require M here. config CRC32_SELFTEST bool "CRC32 perform self test on init" diff --git a/lib/crc32.c b/lib/crc32.c index a1a5145a93b..87678dd1141 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -46,7 +46,7 @@ #include "crc32table.h" MODULE_AUTHOR("Matt Domsch "); -MODULE_DESCRIPTION("Ethernet CRC32 calculations"); +MODULE_DESCRIPTION("Various CRC32 calculations"); MODULE_LICENSE("GPL"); #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 @@ -135,45 +135,66 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) * @p: pointer to buffer over which CRC is run * @len: length of buffer @p */ -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, + size_t len, const u32 (*tab)[256], + u32 polynomial) { #if CRC_LE_BITS == 1 int i; while (len--) { crc ^= *p++; for (i = 0; i < 8; i++) - crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0); } # elif CRC_LE_BITS == 2 while (len--) { crc ^= *p++; - crc = (crc >> 2) ^ crc32table_le[0][crc & 3]; - crc = (crc >> 2) ^ crc32table_le[0][crc & 3]; - crc = (crc >> 2) ^ crc32table_le[0][crc & 3]; - crc = (crc >> 2) ^ crc32table_le[0][crc & 3]; + crc = (crc >> 2) ^ tab[0][crc & 3]; + crc = (crc >> 2) ^ tab[0][crc & 3]; + crc = (crc >> 2) ^ tab[0][crc & 3]; + crc = (crc >> 2) ^ tab[0][crc & 3]; } # elif CRC_LE_BITS == 4 while (len--) { crc ^= *p++; - crc = (crc >> 4) ^ crc32table_le[0][crc & 15]; - crc = (crc >> 4) ^ crc32table_le[0][crc & 15]; + crc = (crc >> 4) ^ tab[0][crc & 15]; + crc = (crc >> 4) ^ tab[0][crc & 15]; } # elif CRC_LE_BITS == 8 /* aka Sarwate algorithm */ while (len--) { crc ^= *p++; - crc = (crc >> 8) ^ crc32table_le[0][crc & 255]; + crc = (crc >> 8) ^ tab[0][crc & 255]; } # else - const u32 (*tab)[] = crc32table_le; - crc = (__force u32) __cpu_to_le32(crc); crc = crc32_body(crc, p, len, tab); crc = __le32_to_cpu((__force __le32)crc); #endif return crc; } + +#if CRC_LE_BITS == 1 +u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE); +} +u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); +} +#else +u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_le_generic(crc, p, len, crc32table_le, CRCPOLY_LE); +} +u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE); +} +#endif EXPORT_SYMBOL(crc32_le); +EXPORT_SYMBOL(__crc32c_le); /** * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 @@ -182,7 +203,9 @@ EXPORT_SYMBOL(crc32_le); * @p: pointer to buffer over which CRC is run * @len: length of buffer @p */ -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, + size_t len, const u32 (*tab)[256], + u32 polynomial) { #if CRC_BE_BITS == 1 int i; @@ -190,37 +213,47 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) crc ^= *p++ << 24; for (i = 0; i < 8; i++) crc = - (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : + (crc << 1) ^ ((crc & 0x80000000) ? polynomial : 0); } # elif CRC_BE_BITS == 2 while (len--) { crc ^= *p++ << 24; - crc = (crc << 2) ^ crc32table_be[0][crc >> 30]; - crc = (crc << 2) ^ crc32table_be[0][crc >> 30]; - crc = (crc << 2) ^ crc32table_be[0][crc >> 30]; - crc = (crc << 2) ^ crc32table_be[0][crc >> 30]; + crc = (crc << 2) ^ tab[0][crc >> 30]; + crc = (crc << 2) ^ tab[0][crc >> 30]; + crc = (crc << 2) ^ tab[0][crc >> 30]; + crc = (crc << 2) ^ tab[0][crc >> 30]; } # elif CRC_BE_BITS == 4 while (len--) { crc ^= *p++ << 24; - crc = (crc << 4) ^ crc32table_be[0][crc >> 28]; - crc = (crc << 4) ^ crc32table_be[0][crc >> 28]; + crc = (crc << 4) ^ tab[0][crc >> 28]; + crc = (crc << 4) ^ tab[0][crc >> 28]; } # elif CRC_BE_BITS == 8 while (len--) { crc ^= *p++ << 24; - crc = (crc << 8) ^ crc32table_be[0][crc >> 24]; + crc = (crc << 8) ^ tab[0][crc >> 24]; } # else - const u32 (*tab)[] = crc32table_be; - crc = (__force u32) __cpu_to_be32(crc); crc = crc32_body(crc, p, len, tab); crc = __be32_to_cpu((__force __be32)crc); # endif return crc; } + +#if CRC_LE_BITS == 1 +u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_be_generic(crc, p, len, NULL, CRCPOLY_BE); +} +#else +u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_be_generic(crc, p, len, crc32table_be, CRCPOLY_BE); +} +#endif EXPORT_SYMBOL(crc32_be); #ifdef CONFIG_CRC32_SELFTEST diff --git a/lib/crc32defs.h b/lib/crc32defs.h index 81815928848..6fd191732fe 100644 --- a/lib/crc32defs.h +++ b/lib/crc32defs.h @@ -6,6 +6,13 @@ #define CRCPOLY_LE 0xedb88320 #define CRCPOLY_BE 0x04c11db7 +/* + * This is the CRC32c polynomial, as outlined by Castagnoli. + * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+ + * x^8+x^6+x^0 + */ +#define CRC32C_POLY_LE 0x82F63B78 + /* * How many bits at a time to use. Valid values are 1, 2, 4, 8, 32 and 64. * For less performance-sensitive, use 4 or 8 to save table size. diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index 0d9edd17ee1..8f8d5439e2d 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -23,6 +23,7 @@ static uint32_t crc32table_le[LE_TABLE_ROWS][256]; static uint32_t crc32table_be[BE_TABLE_ROWS][256]; +static uint32_t crc32ctable_le[LE_TABLE_ROWS][256]; /** * crc32init_le() - allocate and initialize LE table data @@ -31,27 +32,38 @@ static uint32_t crc32table_be[BE_TABLE_ROWS][256]; * fact that crctable[i^j] = crctable[i] ^ crctable[j]. * */ -static void crc32init_le(void) +static void crc32init_le_generic(const uint32_t polynomial, + uint32_t (*tab)[256]) { unsigned i, j; uint32_t crc = 1; - crc32table_le[0][0] = 0; + tab[0][0] = 0; for (i = LE_TABLE_SIZE >> 1; i; i >>= 1) { - crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0); for (j = 0; j < LE_TABLE_SIZE; j += 2 * i) - crc32table_le[0][i + j] = crc ^ crc32table_le[0][j]; + tab[0][i + j] = crc ^ tab[0][j]; } for (i = 0; i < LE_TABLE_SIZE; i++) { - crc = crc32table_le[0][i]; + crc = tab[0][i]; for (j = 1; j < LE_TABLE_ROWS; j++) { - crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8); - crc32table_le[j][i] = crc; + crc = tab[0][crc & 0xff] ^ (crc >> 8); + tab[j][i] = crc; } } } +static void crc32init_le(void) +{ + crc32init_le_generic(CRCPOLY_LE, crc32table_le); +} + +static void crc32cinit_le(void) +{ + crc32init_le_generic(CRC32C_POLY_LE, crc32ctable_le); +} + /** * crc32init_be() - allocate and initialize BE table data */ @@ -114,6 +126,15 @@ int main(int argc, char** argv) BE_TABLE_SIZE, "tobe"); printf("};\n"); } + if (CRC_LE_BITS > 1) { + crc32cinit_le(); + printf("static const u32 __cacheline_aligned " + "crc32ctable_le[%d][%d] = {", + LE_TABLE_ROWS, LE_TABLE_SIZE); + output_table(crc32ctable_le, LE_TABLE_ROWS, + LE_TABLE_SIZE, "tole"); + printf("};\n"); + } return 0; } -- cgit v1.2.3-70-g09d2 From 626cf236608505d376e4799adb4f7eb00a8594af Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 23 Mar 2012 15:02:27 -0700 Subject: poll: add poll_requested_events() and poll_does_not_wait() functions In some cases the poll() implementation in a driver has to do different things depending on the events the caller wants to poll for. An example is when a driver needs to start a DMA engine if the caller polls for POLLIN, but doesn't want to do that if POLLIN is not requested but instead only POLLOUT or POLLPRI is requested. This is something that can happen in the video4linux subsystem among others. Unfortunately, the current epoll/poll/select implementation doesn't provide that information reliably. The poll_table_struct does have it: it has a key field with the event mask. But once a poll() call matches one or more bits of that mask any following poll() calls are passed a NULL poll_table pointer. Also, the eventpoll implementation always left the key field at ~0 instead of using the requested events mask. This was changed in eventpoll.c so the key field now contains the actual events that should be polled for as set by the caller. The solution to the NULL poll_table pointer is to set the qproc field to NULL in poll_table once poll() matches the events, not the poll_table pointer itself. That way drivers can obtain the mask through a new poll_requested_events inline. The poll_table_struct can still be NULL since some kernel code calls it internally (netfs_state_poll() in ./drivers/staging/pohmelfs/netfs.h). In that case poll_requested_events() returns ~0 (i.e. all events). Very rarely drivers might want to know whether poll_wait will actually wait. If another earlier file descriptor in the set already matched the events the caller wanted to wait for, then the kernel will return from the select() call without waiting. This might be useful information in order to avoid doing expensive work. A new helper function poll_does_not_wait() is added that drivers can use to detect this situation. This is now used in sock_poll_wait() in include/net/sock.h. This was the only place in the kernel that needed this information. Drivers should no longer access any of the poll_table internals, but use the poll_requested_events() and poll_does_not_wait() access functions instead. In order to enforce that the poll_table fields are now prepended with an underscore and a comment was added warning against using them directly. This required a change in unix_dgram_poll() in unix/af_unix.c which used the key field to get the requested events. It's been replaced by a call to poll_requested_events(). For qproc it was especially important to change its name since the behavior of that field changes with this patch since this function pointer can now be NULL when that wasn't possible in the past. Any driver accessing the qproc or key fields directly will now fail to compile. Some notes regarding the correctness of this patch: the driver's poll() function is called with a 'struct poll_table_struct *wait' argument. This pointer may or may not be NULL, drivers can never rely on it being one or the other as that depends on whether or not an earlier file descriptor in the select()'s fdset matched the requested events. There are only three things a driver can do with the wait argument: 1) obtain the key field: events = wait ? wait->key : ~0; This will still work although it should be replaced with the new poll_requested_events() function (which does exactly the same). This will now even work better, since wait is no longer set to NULL unnecessarily. 2) use the qproc callback. This could be deadly since qproc can now be NULL. Renaming qproc should prevent this from happening. There are no kernel drivers that actually access this callback directly, BTW. 3) test whether wait == NULL to determine whether poll would return without waiting. This is no longer sufficient as the correct test is now wait == NULL || wait->_qproc == NULL. However, the worst that can happen here is a slight performance hit in the case where wait != NULL and wait->_qproc == NULL. In that case the driver will assume that poll_wait() will actually add the fd to the set of waiting file descriptors. Of course, poll_wait() will not do that since it tests for wait->_qproc. This will not break anything, though. There is only one place in the whole kernel where this happens (sock_poll_wait() in include/net/sock.h) and that code will be replaced by a call to poll_does_not_wait() in the next patch. Note that even if wait->_qproc != NULL drivers cannot rely on poll_wait() actually waiting. The next file descriptor from the set might match the event mask and thus any possible waits will never happen. Signed-off-by: Hans Verkuil Reviewed-by: Jonathan Corbet Reviewed-by: Al Viro Cc: Davide Libenzi Signed-off-by: Hans de Goede Cc: Mauro Carvalho Chehab Cc: David Miller Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 18 +++++++++++++++--- fs/select.c | 40 ++++++++++++++++++---------------------- include/linux/poll.h | 37 +++++++++++++++++++++++++++++++------ include/net/sock.h | 2 +- net/unix/af_unix.c | 2 +- 5 files changed, 66 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4d9d3a45e35..ca300071e79 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -699,9 +699,12 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, void *priv) { struct epitem *epi, *tmp; + poll_table pt; + init_poll_funcptr(&pt, NULL); list_for_each_entry_safe(epi, tmp, head, rdllink) { - if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & + pt._key = epi->event.events; + if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & epi->event.events) return POLLIN | POLLRDNORM; else { @@ -1097,6 +1100,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* Initialize the poll table using the queue callback */ epq.epi = epi; init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); + epq.pt._key = event->events; /* * Attach the item to the poll hooks and get current event bits. @@ -1191,6 +1195,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even { int pwake = 0; unsigned int revents; + poll_table pt; + + init_poll_funcptr(&pt, NULL); /* * Set the new event interest mask before calling f_op->poll(); @@ -1198,13 +1205,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * f_op->poll() call and the new event set registering. */ epi->event.events = event->events; + pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ /* * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. */ - revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); + revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt); /* * If the item is "hot" and it is not registered inside the ready @@ -1239,6 +1247,9 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, unsigned int revents; struct epitem *epi; struct epoll_event __user *uevent; + poll_table pt; + + init_poll_funcptr(&pt, NULL); /* * We can loop without lock because we are passed a task private list. @@ -1251,7 +1262,8 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, list_del_init(&epi->rdllink); - revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & + pt._key = epi->event.events; + revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & epi->event.events; /* diff --git a/fs/select.c b/fs/select.c index e782258d0de..ecfd0b125ba 100644 --- a/fs/select.c +++ b/fs/select.c @@ -223,7 +223,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, get_file(filp); entry->filp = filp; entry->wait_address = wait_address; - entry->key = p->key; + entry->key = p->_key; init_waitqueue_func_entry(&entry->wait, pollwake); entry->wait.private = pwq; add_wait_queue(wait_address, &entry->wait); @@ -386,13 +386,11 @@ get_max: static inline void wait_key_set(poll_table *wait, unsigned long in, unsigned long out, unsigned long bit) { - if (wait) { - wait->key = POLLEX_SET; - if (in & bit) - wait->key |= POLLIN_SET; - if (out & bit) - wait->key |= POLLOUT_SET; - } + wait->_key = POLLEX_SET; + if (in & bit) + wait->_key |= POLLIN_SET; + if (out & bit) + wait->_key |= POLLOUT_SET; } int do_select(int n, fd_set_bits *fds, struct timespec *end_time) @@ -414,7 +412,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) poll_initwait(&table); wait = &table.pt; if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { - wait = NULL; + wait->_qproc = NULL; timed_out = 1; } @@ -459,17 +457,17 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; retval++; - wait = NULL; + wait->_qproc = NULL; } if ((mask & POLLOUT_SET) && (out & bit)) { res_out |= bit; retval++; - wait = NULL; + wait->_qproc = NULL; } if ((mask & POLLEX_SET) && (ex & bit)) { res_ex |= bit; retval++; - wait = NULL; + wait->_qproc = NULL; } } } @@ -481,7 +479,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) *rexp = res_ex; cond_resched(); } - wait = NULL; + wait->_qproc = NULL; if (retval || timed_out || signal_pending(current)) break; if (table.error) { @@ -720,7 +718,7 @@ struct poll_list { * interested in events matching the pollfd->events mask, and the result * matching that mask is both recorded in pollfd->revents and returned. The * pwait poll_table will be used by the fd-provided poll handler for waiting, - * if non-NULL. + * if pwait->_qproc is non-NULL. */ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) { @@ -738,9 +736,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) if (file != NULL) { mask = DEFAULT_POLLMASK; if (file->f_op && file->f_op->poll) { - if (pwait) - pwait->key = pollfd->events | - POLLERR | POLLHUP; + pwait->_key = pollfd->events|POLLERR|POLLHUP; mask = file->f_op->poll(file, pwait); } /* Mask out unneeded events. */ @@ -763,7 +759,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, /* Optimise the no-wait case */ if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { - pt = NULL; + pt->_qproc = NULL; timed_out = 1; } @@ -781,22 +777,22 @@ static int do_poll(unsigned int nfds, struct poll_list *list, for (; pfd != pfd_end; pfd++) { /* * Fish for events. If we found one, record it - * and kill the poll_table, so we don't + * and kill poll_table->_qproc, so we don't * needlessly register any other waiters after * this. They'll get immediately deregistered * when we break out and return. */ if (do_pollfd(pfd, pt)) { count++; - pt = NULL; + pt->_qproc = NULL; } } } /* * All waiters have already been registered, so don't provide - * a poll_table to them on the next loop iteration. + * a poll_table->_qproc to them on the next loop iteration. */ - pt = NULL; + pt->_qproc = NULL; if (!count) { count = wait->error; if (signal_pending(current)) diff --git a/include/linux/poll.h b/include/linux/poll.h index cf40010ce0c..48fe8bc398d 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -32,21 +32,46 @@ struct poll_table_struct; */ typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *); +/* + * Do not touch the structure directly, use the access functions + * poll_does_not_wait() and poll_requested_events() instead. + */ typedef struct poll_table_struct { - poll_queue_proc qproc; - unsigned long key; + poll_queue_proc _qproc; + unsigned long _key; } poll_table; static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { - if (p && wait_address) - p->qproc(filp, wait_address, p); + if (p && p->_qproc && wait_address) + p->_qproc(filp, wait_address, p); +} + +/* + * Return true if it is guaranteed that poll will not wait. This is the case + * if the poll() of another file descriptor in the set got an event, so there + * is no need for waiting. + */ +static inline bool poll_does_not_wait(const poll_table *p) +{ + return p == NULL || p->_qproc == NULL; +} + +/* + * Return the set of events that the application wants to poll for. + * This is useful for drivers that need to know whether a DMA transfer has + * to be started implicitly on poll(). You typically only want to do that + * if the application is actually polling for POLLIN and/or POLLOUT. + */ +static inline unsigned long poll_requested_events(const poll_table *p) +{ + return p ? p->_key : ~0UL; } static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) { - pt->qproc = qproc; - pt->key = ~0UL; /* all events enabled */ + pt->_qproc = qproc; + pt->_key = ~0UL; /* all events enabled */ } struct poll_table_entry { diff --git a/include/net/sock.h b/include/net/sock.h index 04bc0b30e9e..a6ba1f8871f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1854,7 +1854,7 @@ static inline bool wq_has_sleeper(struct socket_wq *wq) static inline void sock_poll_wait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { - if (p && wait_address) { + if (!poll_does_not_wait(p) && wait_address) { poll_wait(filp, wait_address, p); /* * We need to be sure we are in sync with the diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index eb4277c3318..d510353ef43 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2206,7 +2206,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, } /* No write status requested, avoid expensive OUT tests. */ - if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT))) + if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT))) return mask; writable = unix_writable(sk); -- cgit v1.2.3-70-g09d2 From 15cab952139404d0e593cb1aaab0a3547ac0f95b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Mar 2012 15:02:39 -0700 Subject: ptrace: the killed tracee should not enter the syscall Another old/known problem. If the tracee is killed after it reports syscall_entry, it starts the syscall and debugger can't control this. This confuses the users and this creates the security problems for ptrace jailers. Change tracehook_report_syscall_entry() to return non-zero if killed, this instructs syscall_trace_enter() to abort the syscall. Reported-by: Chris Evans Tested-by: Indan Zupancic Signed-off-by: Oleg Nesterov Cc: Denys Vlasenko Cc: Tejun Heo Cc: Pedro Alves Cc: Jan Kratochvil Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index a71a2927a6a..51bd91d911c 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -54,12 +54,12 @@ struct linux_binprm; /* * ptrace report for syscall entry and exit looks identical. */ -static inline void ptrace_report_syscall(struct pt_regs *regs) +static inline int ptrace_report_syscall(struct pt_regs *regs) { int ptrace = current->ptrace; if (!(ptrace & PT_PTRACED)) - return; + return 0; ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); @@ -72,6 +72,8 @@ static inline void ptrace_report_syscall(struct pt_regs *regs) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + + return fatal_signal_pending(current); } /** @@ -96,8 +98,7 @@ static inline void ptrace_report_syscall(struct pt_regs *regs) static inline __must_check int tracehook_report_syscall_entry( struct pt_regs *regs) { - ptrace_report_syscall(regs); - return 0; + return ptrace_report_syscall(regs); } /** -- cgit v1.2.3-70-g09d2 From b1845ff53f1a9eadba005ae53dfe60ab00dfe83b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Mar 2012 15:02:40 -0700 Subject: ptrace: don't send SIGTRAP on exec if SEIZED ptrace_event(PTRACE_EVENT_EXEC) sends SIGTRAP if PT_TRACE_EXEC is not set. This is because this SIGTRAP predates PTRACE_O_TRACEEXEC option, we do not need/want this with PT_SEIZED which can set the options during attach. Suggested-by: Pedro Alves Signed-off-by: Oleg Nesterov Cc: Chris Evans Cc: Indan Zupancic Cc: Denys Vlasenko Cc: Tejun Heo Cc: Pedro Alves Cc: Jan Kratochvil Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index c2f1f6a5fcb..6fdb196caa3 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -199,9 +199,10 @@ static inline void ptrace_event(int event, unsigned long message) if (unlikely(ptrace_event_enabled(current, event))) { current->ptrace_message = message; ptrace_notify((event << 8) | SIGTRAP); - } else if (event == PTRACE_EVENT_EXEC && unlikely(current->ptrace)) { + } else if (event == PTRACE_EVENT_EXEC) { /* legacy EXEC report via SIGTRAP */ - send_sig(SIGTRAP, current, 0); + if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED) + send_sig(SIGTRAP, current, 0); } } -- cgit v1.2.3-70-g09d2 From 86b6c1f301faf085de5a3f9ce16b8de6e69c729b Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 23 Mar 2012 15:02:41 -0700 Subject: ptrace: simplify PTRACE_foo constants and PTRACE_SETOPTIONS code Exchange PT_TRACESYSGOOD and PT_PTRACE_CAP bit positions, which makes PT_option bits contiguous and therefore makes code in ptrace_setoptions() much simpler. Every PTRACE_O_TRACEevent is defined to (1 << PTRACE_EVENT_event) instead of using explicit numeric constants, to ensure we don't mess up relationship between bit positions and event ids. PT_EVENT_FLAG_SHIFT was not particularly useful, PT_OPT_FLAG_SHIFT with value of PT_EVENT_FLAG_SHIFT-1 is easier to use. PT_TRACE_MASK constant is nuked, the only its use is replaced by (PTRACE_O_MASK << PT_OPT_FLAG_SHIFT). Signed-off-by: Denys Vlasenko Acked-by: Tejun Heo Reviewed-by: Oleg Nesterov Cc: Pedro Alves Cc: Jan Kratochvil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 33 +++++++++++++++------------------ kernel/ptrace.c | 31 ++++++++----------------------- 2 files changed, 23 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 6fdb196caa3..6f1260ee5be 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -54,17 +54,6 @@ /* flags in @data for PTRACE_SEIZE */ #define PTRACE_SEIZE_DEVEL 0x80000000 /* temp flag for development */ -/* options set using PTRACE_SETOPTIONS */ -#define PTRACE_O_TRACESYSGOOD 0x00000001 -#define PTRACE_O_TRACEFORK 0x00000002 -#define PTRACE_O_TRACEVFORK 0x00000004 -#define PTRACE_O_TRACECLONE 0x00000008 -#define PTRACE_O_TRACEEXEC 0x00000010 -#define PTRACE_O_TRACEVFORKDONE 0x00000020 -#define PTRACE_O_TRACEEXIT 0x00000040 - -#define PTRACE_O_MASK 0x0000007f - /* Wait extended result codes for the above trace options. */ #define PTRACE_EVENT_FORK 1 #define PTRACE_EVENT_VFORK 2 @@ -74,6 +63,17 @@ #define PTRACE_EVENT_EXIT 6 #define PTRACE_EVENT_STOP 7 +/* options set using PTRACE_SETOPTIONS */ +#define PTRACE_O_TRACESYSGOOD 1 +#define PTRACE_O_TRACEFORK (1 << PTRACE_EVENT_FORK) +#define PTRACE_O_TRACEVFORK (1 << PTRACE_EVENT_VFORK) +#define PTRACE_O_TRACECLONE (1 << PTRACE_EVENT_CLONE) +#define PTRACE_O_TRACEEXEC (1 << PTRACE_EVENT_EXEC) +#define PTRACE_O_TRACEVFORKDONE (1 << PTRACE_EVENT_VFORK_DONE) +#define PTRACE_O_TRACEEXIT (1 << PTRACE_EVENT_EXIT) + +#define PTRACE_O_MASK 0x0000007f + #include #ifdef __KERNEL__ @@ -88,13 +88,12 @@ #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 #define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ -#define PT_TRACESYSGOOD 0x00000004 -#define PT_PTRACE_CAP 0x00000008 /* ptracer can follow suid-exec */ +#define PT_PTRACE_CAP 0x00000004 /* ptracer can follow suid-exec */ +#define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ -#define PT_EVENT_FLAG_SHIFT 4 -#define PT_EVENT_FLAG(event) (1 << (PT_EVENT_FLAG_SHIFT + (event) - 1)) - +#define PT_EVENT_FLAG(event) (1 << (PT_OPT_FLAG_SHIFT + (event))) +#define PT_TRACESYSGOOD PT_EVENT_FLAG(0) #define PT_TRACE_FORK PT_EVENT_FLAG(PTRACE_EVENT_FORK) #define PT_TRACE_VFORK PT_EVENT_FLAG(PTRACE_EVENT_VFORK) #define PT_TRACE_CLONE PT_EVENT_FLAG(PTRACE_EVENT_CLONE) @@ -102,8 +101,6 @@ #define PT_TRACE_VFORK_DONE PT_EVENT_FLAG(PTRACE_EVENT_VFORK_DONE) #define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT) -#define PT_TRACE_MASK 0x000003f4 - /* single stepping state bits (used on ARM and PA-RISC) */ #define PT_SINGLESTEP_BIT 31 #define PT_SINGLESTEP (1<ptrace &= ~PT_TRACE_MASK; - - if (data & PTRACE_O_TRACESYSGOOD) - child->ptrace |= PT_TRACESYSGOOD; - - if (data & PTRACE_O_TRACEFORK) - child->ptrace |= PT_TRACE_FORK; - - if (data & PTRACE_O_TRACEVFORK) - child->ptrace |= PT_TRACE_VFORK; - - if (data & PTRACE_O_TRACECLONE) - child->ptrace |= PT_TRACE_CLONE; - - if (data & PTRACE_O_TRACEEXEC) - child->ptrace |= PT_TRACE_EXEC; - - if (data & PTRACE_O_TRACEVFORKDONE) - child->ptrace |= PT_TRACE_VFORK_DONE; - - if (data & PTRACE_O_TRACEEXIT) - child->ptrace |= PT_TRACE_EXIT; + /* Avoid intermediate state when all opts are cleared */ + flags = child->ptrace; + flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT); + flags |= (data << PT_OPT_FLAG_SHIFT); + child->ptrace = flags; return 0; } -- cgit v1.2.3-70-g09d2 From 5cdf389aee90109e2e3d88085dea4dd5508a3be7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 23 Mar 2012 15:02:42 -0700 Subject: ptrace: renumber PTRACE_EVENT_STOP so that future new options and events can match PTRACE_EVENT_foo and PTRACE_O_TRACEfoo used to match. New PTRACE_EVENT_STOP is the first event which has no corresponding PTRACE_O_TRACE option. If we will ever want to add another such option, its PTRACE_EVENT's value will collide with PTRACE_EVENT_STOP's value. This patch changes PTRACE_EVENT_STOP value to prevent this. While at it, added a comment - the one atop PTRACE_EVENT block, saying "Wait extended result codes for the above trace options", is not true for PTRACE_EVENT_STOP. Signed-off-by: Denys Vlasenko Cc: Tejun Heo Reviewed-by: Oleg Nesterov Cc: Pedro Alves Cc: Jan Kratochvil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 6f1260ee5be..30be18064df 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -61,7 +61,8 @@ #define PTRACE_EVENT_EXEC 4 #define PTRACE_EVENT_VFORK_DONE 5 #define PTRACE_EVENT_EXIT 6 -#define PTRACE_EVENT_STOP 7 +/* Extended result codes which enabled by means other than options. */ +#define PTRACE_EVENT_STOP 128 /* options set using PTRACE_SETOPTIONS */ #define PTRACE_O_TRACESYSGOOD 1 -- cgit v1.2.3-70-g09d2 From ee00560c7dac1dbbf048446a8489550d0a5765b7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 23 Mar 2012 15:02:43 -0700 Subject: ptrace: remove PTRACE_SEIZE_DEVEL bit PTRACE_SEIZE code is tested and ready for production use, remove the code which requires special bit in data argument to make PTRACE_SEIZE work. Strace team prepares for a new release of strace, and we would like to ship the code which uses PTRACE_SEIZE, preferably after this change goes into released kernel. Signed-off-by: Denys Vlasenko Acked-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Pedro Alves Cc: Jan Kratochvil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 5 +---- kernel/ptrace.c | 15 --------------- 2 files changed, 1 insertion(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 30be18064df..407c678d2e3 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -51,9 +51,6 @@ #define PTRACE_INTERRUPT 0x4207 #define PTRACE_LISTEN 0x4208 -/* flags in @data for PTRACE_SEIZE */ -#define PTRACE_SEIZE_DEVEL 0x80000000 /* temp flag for development */ - /* Wait extended result codes for the above trace options. */ #define PTRACE_EVENT_FORK 1 #define PTRACE_EVENT_VFORK 2 @@ -64,7 +61,7 @@ /* Extended result codes which enabled by means other than options. */ #define PTRACE_EVENT_STOP 128 -/* options set using PTRACE_SETOPTIONS */ +/* Options set using PTRACE_SETOPTIONS or using PTRACE_SEIZE @data param */ #define PTRACE_O_TRACESYSGOOD 1 #define PTRACE_O_TRACEFORK (1 << PTRACE_EVENT_FORK) #define PTRACE_O_TRACEVFORK (1 << PTRACE_EVENT_VFORK) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4661c5bc07e..ee8d49b9c30 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -237,25 +237,10 @@ static int ptrace_attach(struct task_struct *task, long request, bool seize = (request == PTRACE_SEIZE); int retval; - /* - * SEIZE will enable new ptrace behaviors which will be implemented - * gradually. SEIZE_DEVEL bit is used to prevent applications - * expecting full SEIZE behaviors trapping on kernel commits which - * are still in the process of implementing them. - * - * Only test programs for new ptrace behaviors being implemented - * should set SEIZE_DEVEL. If unset, SEIZE will fail with -EIO. - * - * Once SEIZE behaviors are completely implemented, this flag - * will be removed. - */ retval = -EIO; if (seize) { if (addr != 0) goto out; - if (!(flags & PTRACE_SEIZE_DEVEL)) - goto out; - flags &= ~(unsigned long)PTRACE_SEIZE_DEVEL; if (flags & ~(unsigned long)PTRACE_O_MASK) goto out; flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); -- cgit v1.2.3-70-g09d2 From d0bd587a80960d7ba7e0c8396e154028c9045c54 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Mar 2012 15:02:47 -0700 Subject: usermodehelper: implement UMH_KILLABLE Implement UMH_KILLABLE, should be used along with UMH_WAIT_EXEC/PROC. The caller must ensure that subprocess_info->path/etc can not go away until call_usermodehelper_freeinfo(). call_usermodehelper_exec(UMH_KILLABLE) does wait_for_completion_killable. If it fails, it uses xchg(&sub_info->complete, NULL) to serialize with umh_complete() which does the same xhcg() to access sub_info->complete. If call_usermodehelper_exec wins, it can safely return. umh_complete() should get NULL and call call_usermodehelper_freeinfo(). Otherwise we know that umh_complete() was already called, in this case call_usermodehelper_exec() falls back to wait_for_completion() which should succeed "very soon". Note: UMH_NO_WAIT == -1 but it obviously should not be used with UMH_KILLABLE. We delay the neccessary cleanup to simplify the back porting. Signed-off-by: Oleg Nesterov Cc: Tetsuo Handa Cc: Rusty Russell Cc: Tejun Heo Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 2 ++ kernel/kmod.c | 27 +++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 722f477c4ef..1b5985855ff 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -54,6 +54,8 @@ enum umh_wait { UMH_WAIT_PROC = 1, /* wait for the process to complete */ }; +#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ + struct subprocess_info { struct work_struct work; struct completion *complete; diff --git a/kernel/kmod.c b/kernel/kmod.c index 8ea25944ce3..f92f917c450 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -201,7 +201,15 @@ EXPORT_SYMBOL(call_usermodehelper_freeinfo); static void umh_complete(struct subprocess_info *sub_info) { - complete(sub_info->complete); + struct completion *comp = xchg(&sub_info->complete, NULL); + /* + * See call_usermodehelper_exec(). If xchg() returns NULL + * we own sub_info, the UMH_KILLABLE caller has gone away. + */ + if (comp) + complete(comp); + else + call_usermodehelper_freeinfo(sub_info); } /* Keventd can't block, but this (a child) can. */ @@ -252,6 +260,9 @@ static void __call_usermodehelper(struct work_struct *work) enum umh_wait wait = sub_info->wait; pid_t pid; + if (wait != UMH_NO_WAIT) + wait &= ~UMH_KILLABLE; + /* CLONE_VFORK: wait until the usermode helper has execve'd * successfully We need the data structures to stay around * until that is done. */ @@ -461,9 +472,21 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, queue_work(khelper_wq, &sub_info->work); if (wait == UMH_NO_WAIT) /* task has freed sub_info */ goto unlock; + + if (wait & UMH_KILLABLE) { + retval = wait_for_completion_killable(&done); + if (!retval) + goto wait_done; + + /* umh_complete() will see NULL and free sub_info */ + if (xchg(&sub_info->complete, NULL)) + goto unlock; + /* fallthrough, umh_complete() was already called */ + } + wait_for_completion(&done); +wait_done: retval = sub_info->retval; - out: call_usermodehelper_freeinfo(sub_info); unlock: -- cgit v1.2.3-70-g09d2 From 9d944ef32e83405a07376f112e9f02161d3e9731 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Mar 2012 15:02:48 -0700 Subject: usermodehelper: kill umh_wait, renumber UMH_* constants No functional changes. It is not sane to use UMH_KILLABLE with enum umh_wait, but obviously we do not want another argument in call_usermodehelper_* helpers. Kill this enum, use the plain int. Signed-off-by: Oleg Nesterov Cc: Tetsuo Handa Cc: Rusty Russell Cc: Tejun Heo Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 18 +++++++----------- kernel/kmod.c | 8 ++------ security/keys/request_key.c | 2 +- 3 files changed, 10 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 1b5985855ff..9efeae67910 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -48,12 +48,9 @@ static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; struct cred; struct file; -enum umh_wait { - UMH_NO_WAIT = -1, /* don't wait at all */ - UMH_WAIT_EXEC = 0, /* wait for the exec, but not the process */ - UMH_WAIT_PROC = 1, /* wait for the process to complete */ -}; - +#define UMH_NO_WAIT 0 /* don't wait at all */ +#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */ +#define UMH_WAIT_PROC 2 /* wait for the process to complete */ #define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ struct subprocess_info { @@ -62,7 +59,7 @@ struct subprocess_info { char *path; char **argv; char **envp; - enum umh_wait wait; + int wait; int retval; int (*init)(struct subprocess_info *info, struct cred *new); void (*cleanup)(struct subprocess_info *info); @@ -80,15 +77,14 @@ void call_usermodehelper_setfns(struct subprocess_info *info, void *data); /* Actually execute the sub-process */ -int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait); +int call_usermodehelper_exec(struct subprocess_info *info, int wait); /* Free the subprocess_info. This is only needed if you're not going to call call_usermodehelper_exec */ void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int -call_usermodehelper_fns(char *path, char **argv, char **envp, - enum umh_wait wait, +call_usermodehelper_fns(char *path, char **argv, char **envp, int wait, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data) { @@ -106,7 +102,7 @@ call_usermodehelper_fns(char *path, char **argv, char **envp, } static inline int -call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) +call_usermodehelper(char *path, char **argv, char **envp, int wait) { return call_usermodehelper_fns(path, argv, envp, wait, NULL, NULL, NULL); diff --git a/kernel/kmod.c b/kernel/kmod.c index f92f917c450..8341de91613 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -257,12 +257,9 @@ static void __call_usermodehelper(struct work_struct *work) { struct subprocess_info *sub_info = container_of(work, struct subprocess_info, work); - enum umh_wait wait = sub_info->wait; + int wait = sub_info->wait & ~UMH_KILLABLE; pid_t pid; - if (wait != UMH_NO_WAIT) - wait &= ~UMH_KILLABLE; - /* CLONE_VFORK: wait until the usermode helper has execve'd * successfully We need the data structures to stay around * until that is done. */ @@ -451,8 +448,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns); * asynchronously if wait is not set, and runs as a child of keventd. * (ie. it runs with full root capabilities). */ -int call_usermodehelper_exec(struct subprocess_info *sub_info, - enum umh_wait wait) +int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) { DECLARE_COMPLETION_ONSTACK(done); int retval = 0; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 82465328c39..cc3790315d2 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -91,7 +91,7 @@ static void umh_keys_cleanup(struct subprocess_info *info) * Call a usermode helper with a specific session keyring. */ static int call_usermodehelper_keys(char *path, char **argv, char **envp, - struct key *session_keyring, enum umh_wait wait) + struct key *session_keyring, int wait) { gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; struct subprocess_info *info = -- cgit v1.2.3-70-g09d2 From 909af768e88867016f427264ae39d27a57b6a8ed Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 23 Mar 2012 15:02:51 -0700 Subject: coredump: remove VM_ALWAYSDUMP flag The motivation for this patchset was that I was looking at a way for a qemu-kvm process, to exclude the guest memory from its core dump, which can be quite large. There are already a number of filter flags in /proc//coredump_filter, however, these allow one to specify 'types' of kernel memory, not specific address ranges (which is needed in this case). Since there are no more vma flags available, the first patch eliminates the need for the 'VM_ALWAYSDUMP' flag. The flag is used internally by the kernel to mark vdso and vsyscall pages. However, it is simple enough to check if a vma covers a vdso or vsyscall page without the need for this flag. The second patch then replaces the 'VM_ALWAYSDUMP' flag with a new 'VM_NODUMP' flag, which can be set by userspace using new madvise flags: 'MADV_DONTDUMP', and unset via 'MADV_DODUMP'. The core dump filters continue to work the same as before unless 'MADV_DONTDUMP' is set on the region. The qemu code which implements this features is at: http://people.redhat.com/~jbaron/qemu-dump/qemu-dump.patch In my testing the qemu core dump shrunk from 383MB -> 13MB with this patch. I also believe that the 'MADV_DONTDUMP' flag might be useful for security sensitive apps, which might want to select which areas are dumped. This patch: The VM_ALWAYSDUMP flag is currently used by the coredump code to indicate that a vma is part of a vsyscall or vdso section. However, we can determine if a vma is in one these sections by checking it against the gate_vma and checking for a non-NULL return value from arch_vma_name(). Thus, freeing a valuable vma bit. Signed-off-by: Jason Baron Acked-by: Roland McGrath Cc: Chris Metcalf Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/process.c | 3 +-- arch/hexagon/kernel/vdso.c | 3 +-- arch/mips/kernel/vdso.c | 3 +-- arch/powerpc/kernel/vdso.c | 10 ++-------- arch/s390/kernel/vdso.c | 10 ++-------- arch/sh/kernel/vsyscall/vsyscall.c | 3 +-- arch/tile/mm/elf.c | 8 +------- arch/unicore32/kernel/process.c | 2 +- arch/x86/um/mem_32.c | 8 -------- arch/x86/um/vdso/vma.c | 3 +-- arch/x86/vdso/vdso32-setup.c | 17 ++--------------- arch/x86/vdso/vma.c | 3 +-- fs/binfmt_elf.c | 27 +++++++++++++++++++++++++-- include/linux/mm.h | 1 - mm/memory.c | 8 +------- 15 files changed, 40 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index c2ae3cd331f..219e4efee1a 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -533,8 +533,7 @@ int vectors_user_mapping(void) struct mm_struct *mm = current->mm; return install_special_mapping(mm, 0xffff0000, PAGE_SIZE, VM_READ | VM_EXEC | - VM_MAYREAD | VM_MAYEXEC | - VM_ALWAYSDUMP | VM_RESERVED, + VM_MAYREAD | VM_MAYEXEC | VM_RESERVED, NULL); } diff --git a/arch/hexagon/kernel/vdso.c b/arch/hexagon/kernel/vdso.c index 16277c33308..f212a453b52 100644 --- a/arch/hexagon/kernel/vdso.c +++ b/arch/hexagon/kernel/vdso.c @@ -78,8 +78,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) /* MAYWRITE to allow gdb to COW and set breakpoints. */ ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_page); if (ret) diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index e5cdfd603f8..0f1af58b036 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -88,8 +88,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_page); if (ret) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 7d14bb697d4..d36ee1055f8 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -263,17 +263,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * the "data" page of the vDSO or you'll stop getting kernel updates * and your nice userland gettimeofday will be totally dead. * It's fine to use that for setting breakpoints in the vDSO code - * pages though - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. + * pages though. */ rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); if (rc) { current->mm->context.vdso_base = 0; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index e704a9965f9..9c80138206b 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -241,17 +241,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * on the "data" page of the vDSO or you'll stop getting kernel * updates and your nice userland gettimeofday will be totally dead. * It's fine to use that for setting breakpoints in the vDSO code - * pages though - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. + * pages though. */ rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); if (rc) current->mm->context.vdso_base = 0; diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 1d6d51a1ce7..5ca579720a0 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -73,8 +73,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_EXEC | - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | - VM_ALWAYSDUMP, + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, syscall_pages); if (unlikely(ret)) goto up_fail; diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index 55e58e93bfc..1a00fb64fc8 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c @@ -117,17 +117,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, /* * MAYWRITE to allow gdb to COW and set breakpoints - * - * Make sure the vDSO gets into every core dump. Dumping its - * contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to - * see what PC values meant. */ vdso_base = VDSO_BASE; retval = install_special_mapping(mm, vdso_base, PAGE_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pages); #ifndef __tilegx__ diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index 52edc2b6287..432b4291f37 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -381,7 +381,7 @@ int vectors_user_mapping(void) return install_special_mapping(mm, 0xffff0000, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC | - VM_ALWAYSDUMP | VM_RESERVED, + VM_RESERVED, NULL); } diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c index 639900a6fde..f40281e5d6a 100644 --- a/arch/x86/um/mem_32.c +++ b/arch/x86/um/mem_32.c @@ -23,14 +23,6 @@ static int __init gate_vma_init(void) gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; gate_vma.vm_page_prot = __P101; - /* - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - gate_vma.vm_flags |= VM_ALWAYSDUMP; - return 0; } __initcall(gate_vma_init); diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c index 91f4ec9a0a5..af91901babb 100644 --- a/arch/x86/um/vdso/vma.c +++ b/arch/x86/um/vdso/vma.c @@ -64,8 +64,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdsop); up_write(&mm->mmap_sem); diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 468d591dde3..a944020fa85 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -250,13 +250,7 @@ static int __init gate_vma_init(void) gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; gate_vma.vm_page_prot = __P101; - /* - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - gate_vma.vm_flags |= VM_ALWAYSDUMP; + return 0; } @@ -343,17 +337,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (compat_uses_vma || !compat) { /* * MAYWRITE to allow gdb to COW and set breakpoints - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully - * interpretable later without matching up the same - * kernel and hardware config to see what PC values - * meant. */ ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso32_pages); if (ret) diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 153407c35b7..17e18279649 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -124,8 +124,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) ret = install_special_mapping(mm, addr, vdso_size, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pages); if (ret) { current->mm->context.vdso = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 81878b78c9d..b64be5b5ac2 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1092,6 +1092,29 @@ out: * Jeremy Fitzhardinge */ +/* + * The purpose of always_dump_vma() is to make sure that special kernel mappings + * that are useful for post-mortem analysis are included in every core dump. + * In that way we ensure that the core dump is fully interpretable later + * without matching up the same kernel and hardware config to see what PC values + * meant. These special mappings include - vDSO, vsyscall, and other + * architecture specific mappings + */ +static bool always_dump_vma(struct vm_area_struct *vma) +{ + /* Any vsyscall mappings? */ + if (vma == get_gate_vma(vma->vm_mm)) + return true; + /* + * arch_vma_name() returns non-NULL for special architecture mappings, + * such as vDSO sections. + */ + if (arch_vma_name(vma)) + return true; + + return false; +} + /* * Decide what to dump of a segment, part, all or none. */ @@ -1100,8 +1123,8 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, { #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) - /* The vma can be set up to tell us the answer directly. */ - if (vma->vm_flags & VM_ALWAYSDUMP) + /* always dump the vdso and vsyscall sections */ + if (always_dump_vma(vma)) goto whole; /* Hugetlb memory check */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 7330742e797..2de2ddba51d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -111,7 +111,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */ #endif #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ -#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ diff --git a/mm/memory.c b/mm/memory.c index 3416b6e018d..6105f475fa8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3623,13 +3623,7 @@ static int __init gate_vma_init(void) gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; gate_vma.vm_page_prot = __P101; - /* - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - gate_vma.vm_flags |= VM_ALWAYSDUMP; + return 0; } __initcall(gate_vma_init); -- cgit v1.2.3-70-g09d2 From accb61fe7bb0f5c2a4102239e4981650f9048519 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 23 Mar 2012 15:02:51 -0700 Subject: coredump: add VM_NODUMP, MADV_NODUMP, MADV_CLEAR_NODUMP Since we no longer need the VM_ALWAYSDUMP flag, let's use the freed bit for 'VM_NODUMP' flag. The idea is is to add a new madvise() flag: MADV_DONTDUMP, which can be set by applications to specifically request memory regions which should not dump core. The specific application I have in mind is qemu: we can add a flag there that wouldn't dump all of guest memory when qemu dumps core. This flag might also be useful for security sensitive apps that want to absolutely make sure that parts of memory are not dumped. To clear the flag use: MADV_DODUMP. [akpm@linux-foundation.org: s/MADV_NODUMP/MADV_DONTDUMP/, s/MADV_CLEAR_NODUMP/MADV_DODUMP/, per Roland] [akpm@linux-foundation.org: fix up the architectures which broke] Signed-off-by: Jason Baron Acked-by: Roland McGrath Cc: Chris Metcalf Cc: Avi Kivity Cc: Ralf Baechle Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: "James E.J. Bottomley" Cc: Helge Deller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/mman.h | 4 ++++ arch/mips/include/asm/mman.h | 4 ++++ arch/parisc/include/asm/mman.h | 4 ++++ arch/xtensa/include/asm/mman.h | 4 ++++ fs/binfmt_elf.c | 3 +++ include/asm-generic/mman-common.h | 4 ++++ include/linux/mm.h | 1 + mm/madvise.c | 8 ++++++++ 8 files changed, 32 insertions(+) (limited to 'include') diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h index 72db984f878..cbeb3616a28 100644 --- a/arch/alpha/include/asm/mman.h +++ b/arch/alpha/include/asm/mman.h @@ -56,6 +56,10 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h index 785b4ea4ec3..46d3da0d4b9 100644 --- a/arch/mips/include/asm/mman.h +++ b/arch/mips/include/asm/mman.h @@ -80,6 +80,10 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h index f5b7bf5fba6..12219ebce86 100644 --- a/arch/parisc/include/asm/mman.h +++ b/arch/parisc/include/asm/mman.h @@ -62,6 +62,10 @@ #define MADV_HUGEPAGE 67 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 68 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 69 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 70 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 #define MAP_VARIABLE 0 diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h index 30789010733..25bc6c1309c 100644 --- a/arch/xtensa/include/asm/mman.h +++ b/arch/xtensa/include/asm/mman.h @@ -86,6 +86,10 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b64be5b5ac2..504b6eee50a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1127,6 +1127,9 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, if (always_dump_vma(vma)) goto whole; + if (vma->vm_flags & VM_NODUMP) + return 0; + /* Hugetlb memory check */ if (vma->vm_flags & VM_HUGETLB) { if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h index 787abbb6d86..d030d2c2647 100644 --- a/include/asm-generic/mman-common.h +++ b/include/asm-generic/mman-common.h @@ -48,6 +48,10 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/include/linux/mm.h b/include/linux/mm.h index 2de2ddba51d..a6fabdfd34c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -111,6 +111,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */ #endif #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ +#define VM_NODUMP 0x04000000 /* Do not include in the core dump */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ diff --git a/mm/madvise.c b/mm/madvise.c index f5ab745672b..1ccbba5b667 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -65,6 +65,12 @@ static long madvise_behavior(struct vm_area_struct * vma, } new_flags &= ~VM_DONTCOPY; break; + case MADV_DONTDUMP: + new_flags |= VM_NODUMP; + break; + case MADV_DODUMP: + new_flags &= ~VM_NODUMP; + break; case MADV_MERGEABLE: case MADV_UNMERGEABLE: error = ksm_madvise(vma, start, end, behavior, &new_flags); @@ -293,6 +299,8 @@ madvise_behavior_valid(int behavior) case MADV_HUGEPAGE: case MADV_NOHUGEPAGE: #endif + case MADV_DONTDUMP: + case MADV_DODUMP: return 1; default: -- cgit v1.2.3-70-g09d2 From 1ac101a5d675aca2426c5cd460c73fb95acb8391 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Fri, 23 Mar 2012 15:02:54 -0700 Subject: procfs: add num_to_str() to speed up /proc/stat == stat_check.py num = 0 with open("/proc/stat") as f: while num < 1000 : data = f.read() f.seek(0, 0) num = num + 1 == perf shows 20.39% stat_check.py [kernel.kallsyms] [k] format_decode 13.41% stat_check.py [kernel.kallsyms] [k] number 12.61% stat_check.py [kernel.kallsyms] [k] vsnprintf 10.85% stat_check.py [kernel.kallsyms] [k] memcpy 4.85% stat_check.py [kernel.kallsyms] [k] radix_tree_lookup 4.43% stat_check.py [kernel.kallsyms] [k] seq_printf This patch removes most of calls to vsnprintf() by adding num_to_str() and seq_print_decimal_ull(), which prints decimal numbers without rich functions provided by printf(). On my 8cpu box. == Before patch == [root@bluextal test]# time ./stat_check.py real 0m0.150s user 0m0.026s sys 0m0.121s == After patch == [root@bluextal test]# time ./stat_check.py real 0m0.055s user 0m0.022s sys 0m0.030s [akpm@linux-foundation.org: remove incorrect comment, use less statck in num_to_str(), move comment from .h to .c, simplify seq_put_decimal_ull()] [andrea@betterlinux.com: avoid breaking the ABI in /proc/stat] Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrea Righi Cc: Eric Dumazet Cc: Glauber Costa Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Paul Turner Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/stat.c | 55 ++++++++++++++++++++++++------------------------ fs/seq_file.c | 33 +++++++++++++++++++++++++++++ include/linux/kernel.h | 2 ++ include/linux/seq_file.h | 3 ++- lib/vsprintf.c | 20 ++++++++++++++++++ 5 files changed, 84 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index ac446114cd4..6a0c62d6e44 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -89,18 +89,19 @@ static int show_stat(struct seq_file *p, void *v) } sum += arch_irq_stat(); - seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu " - "%llu\n", - (unsigned long long)cputime64_to_clock_t(user), - (unsigned long long)cputime64_to_clock_t(nice), - (unsigned long long)cputime64_to_clock_t(system), - (unsigned long long)cputime64_to_clock_t(idle), - (unsigned long long)cputime64_to_clock_t(iowait), - (unsigned long long)cputime64_to_clock_t(irq), - (unsigned long long)cputime64_to_clock_t(softirq), - (unsigned long long)cputime64_to_clock_t(steal), - (unsigned long long)cputime64_to_clock_t(guest), - (unsigned long long)cputime64_to_clock_t(guest_nice)); + seq_puts(p, "cpu "); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); + seq_putc(p, '\n'); + for_each_online_cpu(i) { /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; @@ -113,26 +114,24 @@ static int show_stat(struct seq_file *p, void *v) steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; - seq_printf(p, - "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " - "%llu\n", - i, - (unsigned long long)cputime64_to_clock_t(user), - (unsigned long long)cputime64_to_clock_t(nice), - (unsigned long long)cputime64_to_clock_t(system), - (unsigned long long)cputime64_to_clock_t(idle), - (unsigned long long)cputime64_to_clock_t(iowait), - (unsigned long long)cputime64_to_clock_t(irq), - (unsigned long long)cputime64_to_clock_t(softirq), - (unsigned long long)cputime64_to_clock_t(steal), - (unsigned long long)cputime64_to_clock_t(guest), - (unsigned long long)cputime64_to_clock_t(guest_nice)); + seq_printf(p, "cpu%d", i); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); + seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); + seq_putc(p, '\n'); } seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ for_each_irq_nr(j) - seq_printf(p, " %u", kstat_irqs(j)); + seq_put_decimal_ull(p, ' ', kstat_irqs(j)); seq_printf(p, "\nctxt %llu\n" @@ -149,7 +148,7 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); for (i = 0; i < NR_SOFTIRQS; i++) - seq_printf(p, " %u", per_softirq_sums[i]); + seq_put_decimal_ull(p, ' ', per_softirq_sums[i]); seq_putc(p, '\n'); return 0; diff --git a/fs/seq_file.c b/fs/seq_file.c index aa242dc9937..7d19816c4cc 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -644,6 +644,39 @@ int seq_puts(struct seq_file *m, const char *s) } EXPORT_SYMBOL(seq_puts); +/* + * A helper routine for putting decimal numbers without rich format of printf(). + * only 'unsigned long long' is supported. + * This routine will put one byte delimiter + number into seq_file. + * This routine is very quick when you show lots of numbers. + * In usual cases, it will be better to use seq_printf(). It's easier to read. + */ +int seq_put_decimal_ull(struct seq_file *m, char delimiter, + unsigned long long num) +{ + int len; + + if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */ + goto overflow; + + m->buf[m->count++] = delimiter; + + if (num < 10) { + m->buf[m->count++] = num + '0'; + return 0; + } + + len = num_to_str(m->buf + m->count, m->size - m->count, num); + if (!len) + goto overflow; + m->count += len; + return 0; +overflow: + m->count = m->size; + return -1; +} +EXPORT_SYMBOL(seq_put_decimal_ull); + /** * seq_write - write arbitrary data to buffer * @seq: seq_file identifying the buffer to which data should be written diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f2085b541a2..3e140add536 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -312,6 +312,8 @@ extern long long simple_strtoll(const char *,char **,unsigned int); #define strict_strtoull kstrtoull #define strict_strtoll kstrtoll +extern int num_to_str(char *buf, int size, unsigned long long num); + /* lib/printf utilities */ extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 44f1514b00b..5bba42c9944 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -121,9 +121,10 @@ int single_release(struct inode *, struct file *); void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); +int seq_put_decimal_ull(struct seq_file *m, char delimiter, + unsigned long long num); #define SEQ_START_TOKEN ((void *)1) - /* * Helpers for iteration over list_head-s in seq_files */ diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 38e612e66da..385c40291cd 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -212,6 +212,26 @@ char *put_dec(char *buf, unsigned long long num) } } +/* + * Convert passed number to decimal string. + * Returns the length of string. On buffer overflow, returns 0. + * + * If speed is not important, use snprintf(). It's easy to read the code. + */ +int num_to_str(char *buf, int size, unsigned long long num) +{ + char tmp[21]; /* Enough for 2^64 in decimal */ + int idx, len; + + len = put_dec(tmp, num) - tmp; + + if (len > size) + return 0; + for (idx = 0; idx < len; ++idx) + buf[idx] = tmp[len - idx - 1]; + return len; +} + #define ZEROPAD 1 /* pad with zero */ #define SIGN 2 /* unsigned/signed long */ #define PLUS 4 /* show plus */ -- cgit v1.2.3-70-g09d2 From bda7bad62bc4c4e0783348e8db51abe094153c56 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Fri, 23 Mar 2012 15:02:54 -0700 Subject: procfs: speed up /proc/pid/stat, statm Process accounting applications as top, ps visit some files under /proc/. With seq_put_decimal_ull(), we can optimize /proc//stat and /proc//statm files. This patch adds - seq_put_decimal_ll() for signed values. - allow delimiter == 0. - convert seq_printf() to seq_put_decimal_ull/ll in /proc/stat, statm. Test result on a system with 2000+ procs. Before patch: [kamezawa@bluextal test]$ top -b -n 1 | wc -l 2223 [kamezawa@bluextal test]$ time top -b -n 1 > /dev/null real 0m0.675s user 0m0.044s sys 0m0.121s [kamezawa@bluextal test]$ time ps -elf > /dev/null real 0m0.236s user 0m0.056s sys 0m0.176s After patch: kamezawa@bluextal ~]$ time top -b -n 1 > /dev/null real 0m0.657s user 0m0.052s sys 0m0.100s [kamezawa@bluextal ~]$ time ps -elf > /dev/null real 0m0.198s user 0m0.050s sys 0m0.145s Considering top, ps tend to scan /proc periodically, this will reduce cpu consumption by top/ps to some extent. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: KAMEZAWA Hiroyuki Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 119 +++++++++++++++++++++++++---------------------- fs/seq_file.c | 21 ++++++++- include/linux/seq_file.h | 2 + 3 files changed, 86 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/fs/proc/array.c b/fs/proc/array.c index c602b8d20f0..fbb53c24908 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -462,59 +462,56 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* convert nsec -> ticks */ start_time = nsec_to_clock_t(start_time); - seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ -%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ -%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n", - pid_nr_ns(pid, ns), - tcomm, - state, - ppid, - pgid, - sid, - tty_nr, - tty_pgrp, - task->flags, - min_flt, - cmin_flt, - maj_flt, - cmaj_flt, - cputime_to_clock_t(utime), - cputime_to_clock_t(stime), - cputime_to_clock_t(cutime), - cputime_to_clock_t(cstime), - priority, - nice, - num_threads, - start_time, - vsize, - mm ? get_mm_rss(mm) : 0, - rsslim, - mm ? (permitted ? mm->start_code : 1) : 0, - mm ? (permitted ? mm->end_code : 1) : 0, - (permitted && mm) ? mm->start_stack : 0, - esp, - eip, - /* The signal information here is obsolete. - * It must be decimal for Linux 2.0 compatibility. - * Use /proc/#/status for real-time signals. - */ - task->pending.signal.sig[0] & 0x7fffffffUL, - task->blocked.sig[0] & 0x7fffffffUL, - sigign .sig[0] & 0x7fffffffUL, - sigcatch .sig[0] & 0x7fffffffUL, - wchan, - 0UL, - 0UL, - task->exit_signal, - task_cpu(task), - task->rt_priority, - task->policy, - (unsigned long long)delayacct_blkio_ticks(task), - cputime_to_clock_t(gtime), - cputime_to_clock_t(cgtime), - (mm && permitted) ? mm->start_data : 0, - (mm && permitted) ? mm->end_data : 0, - (mm && permitted) ? mm->start_brk : 0); + seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); + seq_put_decimal_ll(m, ' ', ppid); + seq_put_decimal_ll(m, ' ', pgid); + seq_put_decimal_ll(m, ' ', sid); + seq_put_decimal_ll(m, ' ', tty_nr); + seq_put_decimal_ll(m, ' ', tty_pgrp); + seq_put_decimal_ull(m, ' ', task->flags); + seq_put_decimal_ull(m, ' ', min_flt); + seq_put_decimal_ull(m, ' ', cmin_flt); + seq_put_decimal_ull(m, ' ', maj_flt); + seq_put_decimal_ull(m, ' ', cmaj_flt); + seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime)); + seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime)); + seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime)); + seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime)); + seq_put_decimal_ll(m, ' ', priority); + seq_put_decimal_ll(m, ' ', nice); + seq_put_decimal_ll(m, ' ', num_threads); + seq_put_decimal_ull(m, ' ', 0); + seq_put_decimal_ull(m, ' ', start_time); + seq_put_decimal_ull(m, ' ', vsize); + seq_put_decimal_ll(m, ' ', mm ? get_mm_rss(mm) : 0); + seq_put_decimal_ull(m, ' ', rsslim); + seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0); + seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0); + seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0); + seq_put_decimal_ull(m, ' ', esp); + seq_put_decimal_ull(m, ' ', eip); + /* The signal information here is obsolete. + * It must be decimal for Linux 2.0 compatibility. + * Use /proc/#/status for real-time signals. + */ + seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); + seq_put_decimal_ull(m, ' ', wchan); + seq_put_decimal_ull(m, ' ', 0); + seq_put_decimal_ull(m, ' ', 0); + seq_put_decimal_ll(m, ' ', task->exit_signal); + seq_put_decimal_ll(m, ' ', task_cpu(task)); + seq_put_decimal_ull(m, ' ', task->rt_priority); + seq_put_decimal_ull(m, ' ', task->policy); + seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task)); + seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime)); + seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime)); + seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_data : 0); + seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->end_data : 0); + seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_brk : 0); + seq_putc(m, '\n'); if (mm) mmput(mm); return 0; @@ -542,8 +539,20 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, size = task_statm(mm, &shared, &text, &data, &resident); mmput(mm); } - seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", - size, resident, shared, text, data); + /* + * For quick read, open code by putting numbers directly + * expected format is + * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", + * size, resident, shared, text, data); + */ + seq_put_decimal_ull(m, 0, size); + seq_put_decimal_ull(m, ' ', resident); + seq_put_decimal_ull(m, ' ', shared); + seq_put_decimal_ull(m, ' ', text); + seq_put_decimal_ull(m, ' ', 0); + seq_put_decimal_ull(m, ' ', text); + seq_put_decimal_ull(m, ' ', 0); + seq_putc(m, '\n'); return 0; } diff --git a/fs/seq_file.c b/fs/seq_file.c index 7d19816c4cc..55c293f7024 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -659,7 +659,8 @@ int seq_put_decimal_ull(struct seq_file *m, char delimiter, if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */ goto overflow; - m->buf[m->count++] = delimiter; + if (delimiter) + m->buf[m->count++] = delimiter; if (num < 10) { m->buf[m->count++] = num + '0'; @@ -677,6 +678,24 @@ overflow: } EXPORT_SYMBOL(seq_put_decimal_ull); +int seq_put_decimal_ll(struct seq_file *m, char delimiter, + long long num) +{ + if (num < 0) { + if (m->count + 3 >= m->size) { + m->count = m->size; + return -1; + } + if (delimiter) + m->buf[m->count++] = delimiter; + num = -num; + delimiter = '-'; + } + return seq_put_decimal_ull(m, delimiter, num); + +} +EXPORT_SYMBOL(seq_put_decimal_ll); + /** * seq_write - write arbitrary data to buffer * @seq: seq_file identifying the buffer to which data should be written diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 5bba42c9944..54e5ae7f8ad 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -123,6 +123,8 @@ int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); int seq_put_decimal_ull(struct seq_file *m, char delimiter, unsigned long long num); +int seq_put_decimal_ll(struct seq_file *m, char delimiter, + long long num); #define SEQ_START_TOKEN ((void *)1) /* -- cgit v1.2.3-70-g09d2