From ace6128d603d3f15238baba104d0b37ccf0b6c07 Mon Sep 17 00:00:00 2001 From: Vincent StehlĂ© Date: Tue, 30 Apr 2013 15:26:45 -0700 Subject: memory hotplug: fix warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following compilation warnings: mm/slab.c: In function `kmem_cache_init_late': mm/slab.c:1778:2: warning: statement with no effect [-Wunused-value] mm/page_cgroup.c: In function `page_cgroup_init': mm/page_cgroup.c:305:2: warning: statement with no effect [-Wunused-value] Signed-off-by: Vincent StehlĂ© Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 73817af8b48..85c31a8e290 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -137,7 +137,7 @@ enum mem_add_context { BOOT, HOTPLUG }; #define register_hotmemory_notifier(nb) register_memory_notifier(nb) #define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) #else -#define hotplug_memory_notifier(fn, pri) (0) +#define hotplug_memory_notifier(fn, pri) ({ 0; }) /* These aren't inline functions due to a GCC bug. */ #define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) #define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) -- cgit v1.2.3-70-g09d2 From 1e01c968db3d0aebd48e31db15f24516b03128df Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 30 Apr 2013 15:26:51 -0700 Subject: frontswap: make frontswap_init use a pointer for the ops This simplifies the code in the frontswap - we can get rid of the 'backend_registered' test and instead check against frontswap_ops. [v1: Rebase on top of 703ba7fe5e0 (ramster->zcache move] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/staging/zcache/zcache-main.c | 8 ++++---- drivers/xen/tmem.c | 6 +++--- include/linux/frontswap.h | 2 +- mm/frontswap.c | 38 +++++++++++++++++------------------- 4 files changed, 26 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index e23d814b539..09c69c8026f 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1707,9 +1707,9 @@ static struct frontswap_ops zcache_frontswap_ops = { .init = zcache_frontswap_init }; -struct frontswap_ops zcache_frontswap_register_ops(void) +struct frontswap_ops *zcache_frontswap_register_ops(void) { - struct frontswap_ops old_ops = + struct frontswap_ops *old_ops = frontswap_register_ops(&zcache_frontswap_ops); return old_ops; @@ -1874,7 +1874,7 @@ static int __init zcache_init(void) pr_warn("%s: cleancache_ops overridden\n", namestr); } if (zcache_enabled && !disable_frontswap) { - struct frontswap_ops old_ops; + struct frontswap_ops *old_ops; old_ops = zcache_frontswap_register_ops(); if (frontswap_has_exclusive_gets) @@ -1886,7 +1886,7 @@ static int __init zcache_init(void) namestr, frontswap_has_exclusive_gets, !disable_frontswap_ignore_nonactive); #endif - if (old_ops.init != NULL) + if (old_ops != NULL) pr_warn("%s: frontswap_ops overridden\n", namestr); } if (ramster_enabled) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 3ee836d4258..7a01a5fd0f6 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -362,7 +362,7 @@ static int __init no_frontswap(char *s) } __setup("nofrontswap", no_frontswap); -static struct frontswap_ops __initdata tmem_frontswap_ops = { +static struct frontswap_ops tmem_frontswap_ops = { .store = tmem_frontswap_store, .load = tmem_frontswap_load, .invalidate_page = tmem_frontswap_flush_page, @@ -378,11 +378,11 @@ static int __init xen_tmem_init(void) #ifdef CONFIG_FRONTSWAP if (tmem_enabled && use_frontswap) { char *s = ""; - struct frontswap_ops old_ops = + struct frontswap_ops *old_ops = frontswap_register_ops(&tmem_frontswap_ops); tmem_frontswap_poolid = -1; - if (old_ops.init != NULL) + if (old_ops) s = " (WARNING: frontswap_ops overridden)"; printk(KERN_INFO "frontswap enabled, RAM provided by " "Xen Transcendent Memory%s\n", s); diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 30442547b9e..d4f29875c7c 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -14,7 +14,7 @@ struct frontswap_ops { }; extern bool frontswap_enabled; -extern struct frontswap_ops +extern struct frontswap_ops * frontswap_register_ops(struct frontswap_ops *ops); extern void frontswap_shrink(unsigned long); extern unsigned long frontswap_curr_pages(void); diff --git a/mm/frontswap.c b/mm/frontswap.c index cbd2b8af812..e44c9cbd144 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -24,7 +24,7 @@ * frontswap_ops is set by frontswap_register_ops to contain the pointers * to the frontswap "backend" implementation functions. */ -static struct frontswap_ops frontswap_ops __read_mostly; +static struct frontswap_ops *frontswap_ops __read_mostly; /* * This global enablement flag reduces overhead on systems where frontswap_ops @@ -108,41 +108,39 @@ static inline void inc_frontswap_invalidates(void) { } * * The time between the backend being registered and the swap file system * calling the backend (via the frontswap_* functions) is indeterminate as - * backend_registered is not atomic_t (or a value guarded by a spinlock). + * frontswap_ops is not atomic_t (or a value guarded by a spinlock). * That is OK as we are comfortable missing some of these calls to the newly * registered backend. * * Obviously the opposite (unloading the backend) must be done after all * the frontswap_[store|load|invalidate_area|invalidate_page] start - * ignorning or failing the requests - at which point backend_registered + * ignorning or failing the requests - at which point frontswap_ops * would have to be made in some fashion atomic. */ static DECLARE_BITMAP(need_init, MAX_SWAPFILES); -static bool backend_registered __read_mostly; /* * Register operations for frontswap, returning previous thus allowing * detection of multiple backends and possible nesting. */ -struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) +struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) { - struct frontswap_ops old = frontswap_ops; + struct frontswap_ops *old = frontswap_ops; int i; - frontswap_ops = *ops; frontswap_enabled = true; for (i = 0; i < MAX_SWAPFILES; i++) { if (test_and_clear_bit(i, need_init)) - (*frontswap_ops.init)(i); + ops->init(i); } /* - * We MUST have backend_registered set _after_ the frontswap_init's + * We MUST have frontswap_ops set _after_ the frontswap_init's * have been called. Otherwise __frontswap_store might fail. Hence * the barrier to make sure compiler does not re-order us. */ barrier(); - backend_registered = true; + frontswap_ops = ops; return old; } EXPORT_SYMBOL(frontswap_register_ops); @@ -172,11 +170,11 @@ void __frontswap_init(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - if (backend_registered) { + if (frontswap_ops) { BUG_ON(sis == NULL); if (sis->frontswap_map == NULL) return; - (*frontswap_ops.init)(type); + frontswap_ops->init(type); } else { BUG_ON(type > MAX_SWAPFILES); set_bit(type, need_init); @@ -206,7 +204,7 @@ int __frontswap_store(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!backend_registered) { + if (!frontswap_ops) { inc_frontswap_failed_stores(); return ret; } @@ -215,7 +213,7 @@ int __frontswap_store(struct page *page) BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) dup = 1; - ret = frontswap_ops.store(type, offset, page); + ret = frontswap_ops->store(type, offset, page); if (ret == 0) { frontswap_set(sis, offset); inc_frontswap_succ_stores(); @@ -250,13 +248,13 @@ int __frontswap_load(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!backend_registered) + if (!frontswap_ops) return ret; BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) - ret = frontswap_ops.load(type, offset, page); + ret = frontswap_ops->load(type, offset, page); if (ret == 0) { inc_frontswap_loads(); if (frontswap_tmem_exclusive_gets_enabled) { @@ -276,12 +274,12 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; - if (!backend_registered) + if (!frontswap_ops) return; BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) { - frontswap_ops.invalidate_page(type, offset); + frontswap_ops->invalidate_page(type, offset); __frontswap_clear(sis, offset); inc_frontswap_invalidates(); } @@ -296,11 +294,11 @@ void __frontswap_invalidate_area(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - if (backend_registered) { + if (frontswap_ops) { BUG_ON(sis == NULL); if (sis->frontswap_map == NULL) return; - (*frontswap_ops.invalidate_area)(type); + frontswap_ops->invalidate_area(type); atomic_set(&sis->frontswap_pages, 0); memset(sis->frontswap_map, 0, sis->max / sizeof(long)); } -- cgit v1.2.3-70-g09d2 From f066ea230a65f939afc354beae62716ab5f0e645 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 30 Apr 2013 15:26:53 -0700 Subject: mm: frontswap: cleanup code After allowing tmem backends to build/run as modules, frontswap_enabled always true if defined CONFIG_FRONTSWAP. But frontswap_test() depends on whether backend is registered, mv it into frontswap.c using fronstswap_ops to make the decision. frontswap_set/clear are not used outside frontswap, so don't export them. Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Konrad Rzeszutek Wilk Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 28 +++-------------------- mm/frontswap.c | 57 +++++++++++++++++++++++++---------------------- 2 files changed, 33 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index d4f29875c7c..6c49e1eba55 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -22,6 +22,7 @@ extern void frontswap_writethrough(bool); #define FRONTSWAP_HAS_EXCLUSIVE_GETS extern void frontswap_tmem_exclusive_gets(bool); +extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); extern void __frontswap_init(unsigned type); extern int __frontswap_store(struct page *page); extern int __frontswap_load(struct page *page); @@ -29,26 +30,11 @@ extern void __frontswap_invalidate_page(unsigned, pgoff_t); extern void __frontswap_invalidate_area(unsigned); #ifdef CONFIG_FRONTSWAP +#define frontswap_enabled (1) static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) { - bool ret = false; - - if (frontswap_enabled && sis->frontswap_map) - ret = test_bit(offset, sis->frontswap_map); - return ret; -} - -static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) -{ - if (frontswap_enabled && sis->frontswap_map) - set_bit(offset, sis->frontswap_map); -} - -static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) -{ - if (frontswap_enabled && sis->frontswap_map) - clear_bit(offset, sis->frontswap_map); + return __frontswap_test(sis, offset); } static inline void frontswap_map_set(struct swap_info_struct *p, @@ -71,14 +57,6 @@ static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) return false; } -static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) -{ -} - -static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) -{ -} - static inline void frontswap_map_set(struct swap_info_struct *p, unsigned long *map) { diff --git a/mm/frontswap.c b/mm/frontswap.c index e44c9cbd144..2760b0f9882 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -26,14 +26,6 @@ */ static struct frontswap_ops *frontswap_ops __read_mostly; -/* - * This global enablement flag reduces overhead on systems where frontswap_ops - * has not been registered, so is preferred to the slower alternative: a - * function call that checks a non-global. - */ -bool frontswap_enabled __read_mostly; -EXPORT_SYMBOL(frontswap_enabled); - /* * If enabled, frontswap_store will return failure even on success. As * a result, the swap subsystem will always write the page to swap, in @@ -128,8 +120,6 @@ struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) struct frontswap_ops *old = frontswap_ops; int i; - frontswap_enabled = true; - for (i = 0; i < MAX_SWAPFILES; i++) { if (test_and_clear_bit(i, need_init)) ops->init(i); @@ -183,9 +173,21 @@ void __frontswap_init(unsigned type) } EXPORT_SYMBOL(__frontswap_init); -static inline void __frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +bool __frontswap_test(struct swap_info_struct *sis, + pgoff_t offset) +{ + bool ret = false; + + if (frontswap_ops && sis->frontswap_map) + ret = test_bit(offset, sis->frontswap_map); + return ret; +} +EXPORT_SYMBOL(__frontswap_test); + +static inline void __frontswap_clear(struct swap_info_struct *sis, + pgoff_t offset) { - frontswap_clear(sis, offset); + clear_bit(offset, sis->frontswap_map); atomic_dec(&sis->frontswap_pages); } @@ -204,18 +206,20 @@ int __frontswap_store(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!frontswap_ops) { - inc_frontswap_failed_stores(); + /* + * Return if no backend registed. + * Don't need to inc frontswap_failed_stores here. + */ + if (!frontswap_ops) return ret; - } BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) + if (__frontswap_test(sis, offset)) dup = 1; ret = frontswap_ops->store(type, offset, page); if (ret == 0) { - frontswap_set(sis, offset); + set_bit(offset, sis->frontswap_map); inc_frontswap_succ_stores(); if (!dup) atomic_inc(&sis->frontswap_pages); @@ -248,18 +252,18 @@ int __frontswap_load(struct page *page) struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - if (!frontswap_ops) - return ret; - BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) + /* + * __frontswap_test() will check whether there is backend registered + */ + if (__frontswap_test(sis, offset)) ret = frontswap_ops->load(type, offset, page); if (ret == 0) { inc_frontswap_loads(); if (frontswap_tmem_exclusive_gets_enabled) { SetPageDirty(page); - frontswap_clear(sis, offset); + __frontswap_clear(sis, offset); } } return ret; @@ -274,11 +278,11 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; - if (!frontswap_ops) - return; - BUG_ON(sis == NULL); - if (frontswap_test(sis, offset)) { + /* + * __frontswap_test() will check whether there is backend registered + */ + if (__frontswap_test(sis, offset)) { frontswap_ops->invalidate_page(type, offset); __frontswap_clear(sis, offset); inc_frontswap_invalidates(); @@ -435,7 +439,6 @@ static int __init init_frontswap(void) debugfs_create_u64("invalidates", S_IRUGO, root, &frontswap_invalidates); #endif - frontswap_enabled = 1; return 0; } -- cgit v1.2.3-70-g09d2 From 4f89849da22db9d0edb378acea65e23fcd546173 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 30 Apr 2013 15:26:54 -0700 Subject: frontswap: get rid of swap_lock dependency Frontswap initialization routine depends on swap_lock, which want to be atomic about frontswap's first appearance. IOW, frontswap is not present and will fail all calls OR frontswap is fully functional but if new swap_info_struct isn't registered by enable_swap_info, swap subsystem doesn't start I/O so there is no race between init procedure and page I/O working on frontswap. So let's remove unnecessary swap_lock dependency. Cc: Dan Magenheimer Signed-off-by: Minchan Kim [v1: Rebased on my branch, reworked to work with backends loading late] [v2: Added a check for !map] [v3: Made the invalidate path follow the init path] [v4: Address comments by Wanpeng Li ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Florian Schmaus Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 6 +++--- mm/frontswap.c | 31 +++++++++++++++++++++++-------- mm/swapfile.c | 17 +++++++++-------- 3 files changed, 35 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 6c49e1eba55..8293262401d 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -23,7 +23,7 @@ extern void frontswap_writethrough(bool); extern void frontswap_tmem_exclusive_gets(bool); extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); -extern void __frontswap_init(unsigned type); +extern void __frontswap_init(unsigned type, unsigned long *map); extern int __frontswap_store(struct page *page); extern int __frontswap_load(struct page *page); extern void __frontswap_invalidate_page(unsigned, pgoff_t); @@ -98,10 +98,10 @@ static inline void frontswap_invalidate_area(unsigned type) __frontswap_invalidate_area(type); } -static inline void frontswap_init(unsigned type) +static inline void frontswap_init(unsigned type, unsigned long *map) { if (frontswap_enabled) - __frontswap_init(type); + __frontswap_init(type, map); } #endif /* _LINUX_FRONTSWAP_H */ diff --git a/mm/frontswap.c b/mm/frontswap.c index 2760b0f9882..538367ef137 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -121,8 +121,13 @@ struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) int i; for (i = 0; i < MAX_SWAPFILES; i++) { - if (test_and_clear_bit(i, need_init)) + if (test_and_clear_bit(i, need_init)) { + struct swap_info_struct *sis = swap_info[i]; + /* __frontswap_init _should_ have set it! */ + if (!sis->frontswap_map) + return ERR_PTR(-EINVAL); ops->init(i); + } } /* * We MUST have frontswap_ops set _after_ the frontswap_init's @@ -156,20 +161,30 @@ EXPORT_SYMBOL(frontswap_tmem_exclusive_gets); /* * Called when a swap device is swapon'd. */ -void __frontswap_init(unsigned type) +void __frontswap_init(unsigned type, unsigned long *map) { struct swap_info_struct *sis = swap_info[type]; - if (frontswap_ops) { - BUG_ON(sis == NULL); - if (sis->frontswap_map == NULL) - return; + BUG_ON(sis == NULL); + + /* + * p->frontswap is a bitmap that we MUST have to figure out which page + * has gone in frontswap. Without it there is no point of continuing. + */ + if (WARN_ON(!map)) + return; + /* + * Irregardless of whether the frontswap backend has been loaded + * before this function or it will be later, we _MUST_ have the + * p->frontswap set to something valid to work properly. + */ + frontswap_map_set(sis, map); + if (frontswap_ops) frontswap_ops->init(type); - } else { + else { BUG_ON(type > MAX_SWAPFILES); set_bit(type, need_init); } - } EXPORT_SYMBOL(__frontswap_init); diff --git a/mm/swapfile.c b/mm/swapfile.c index d417efddfe7..6c340d908b2 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1509,8 +1509,7 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) } static void _enable_swap_info(struct swap_info_struct *p, int prio, - unsigned char *swap_map, - unsigned long *frontswap_map) + unsigned char *swap_map) { int i, prev; @@ -1519,7 +1518,6 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio, else p->prio = --least_priority; p->swap_map = swap_map; - frontswap_map_set(p, frontswap_map); p->flags |= SWP_WRITEOK; atomic_long_add(p->pages, &nr_swap_pages); total_swap_pages += p->pages; @@ -1542,10 +1540,10 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, unsigned char *swap_map, unsigned long *frontswap_map) { + frontswap_init(p->type, frontswap_map); spin_lock(&swap_lock); spin_lock(&p->lock); - _enable_swap_info(p, prio, swap_map, frontswap_map); - frontswap_init(p->type); + _enable_swap_info(p, prio, swap_map); spin_unlock(&p->lock); spin_unlock(&swap_lock); } @@ -1554,7 +1552,7 @@ static void reinsert_swap_info(struct swap_info_struct *p) { spin_lock(&swap_lock); spin_lock(&p->lock); - _enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); + _enable_swap_info(p, p->prio, p->swap_map); spin_unlock(&p->lock); spin_unlock(&swap_lock); } @@ -1563,6 +1561,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; unsigned char *swap_map; + unsigned long *frontswap_map; struct file *swap_file, *victim; struct address_space *mapping; struct inode *inode; @@ -1662,12 +1661,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; - frontswap_invalidate_area(type); + frontswap_map = frontswap_map_get(p); + frontswap_map_set(p, NULL); spin_unlock(&p->lock); spin_unlock(&swap_lock); + frontswap_invalidate_area(type); mutex_unlock(&swapon_mutex); vfree(swap_map); - vfree(frontswap_map_get(p)); + vfree(frontswap_map); /* Destroy swap account informatin */ swap_cgroup_swapoff(type); -- cgit v1.2.3-70-g09d2 From 833f8662af9659508afc3cb80f09138eade378e2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 30 Apr 2013 15:26:57 -0700 Subject: cleancache: Make cleancache_init use a pointer for the ops Instead of using a backend_registered to determine whether a backend is enabled. This allows us to remove the backend_register check and just do 'if (cleancache_ops)' [v1: Rebase on top of b97c4b430b0a (ramster->zcache move] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/staging/zcache/zcache-main.c | 8 ++--- drivers/xen/tmem.c | 6 ++-- include/linux/cleancache.h | 2 +- mm/cleancache.c | 62 +++++++++++++++++++----------------- 4 files changed, 40 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 09c69c8026f..6bd4ebb3494 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1576,9 +1576,9 @@ static struct cleancache_ops zcache_cleancache_ops = { .init_fs = zcache_cleancache_init_fs }; -struct cleancache_ops zcache_cleancache_register_ops(void) +struct cleancache_ops *zcache_cleancache_register_ops(void) { - struct cleancache_ops old_ops = + struct cleancache_ops *old_ops = cleancache_register_ops(&zcache_cleancache_ops); return old_ops; @@ -1860,7 +1860,7 @@ static int __init zcache_init(void) } zbud_init(); if (zcache_enabled && !disable_cleancache) { - struct cleancache_ops old_ops; + struct cleancache_ops *old_ops; register_shrinker(&zcache_shrinker); old_ops = zcache_cleancache_register_ops(); @@ -1870,7 +1870,7 @@ static int __init zcache_init(void) pr_info("%s: cleancache: ignorenonactive = %d\n", namestr, !disable_cleancache_ignore_nonactive); #endif - if (old_ops.init_fs != NULL) + if (old_ops != NULL) pr_warn("%s: cleancache_ops overridden\n", namestr); } if (zcache_enabled && !disable_frontswap) { diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 7a01a5fd0f6..fd79eab0836 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -236,7 +236,7 @@ static int __init no_cleancache(char *s) } __setup("nocleancache", no_cleancache); -static struct cleancache_ops __initdata tmem_cleancache_ops = { +static struct cleancache_ops tmem_cleancache_ops = { .put_page = tmem_cleancache_put_page, .get_page = tmem_cleancache_get_page, .invalidate_page = tmem_cleancache_flush_page, @@ -392,9 +392,9 @@ static int __init xen_tmem_init(void) BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && use_cleancache) { char *s = ""; - struct cleancache_ops old_ops = + struct cleancache_ops *old_ops = cleancache_register_ops(&tmem_cleancache_ops); - if (old_ops.init_fs != NULL) + if (old_ops) s = " (WARNING: cleancache_ops overridden)"; printk(KERN_INFO "cleancache enabled, RAM provided by " "Xen Transcendent Memory%s\n", s); diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 42e55deee75..3af5ea83955 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -33,7 +33,7 @@ struct cleancache_ops { void (*invalidate_fs)(int); }; -extern struct cleancache_ops +extern struct cleancache_ops * cleancache_register_ops(struct cleancache_ops *ops); extern void __cleancache_init_fs(struct super_block *); extern void __cleancache_init_shared_fs(char *, struct super_block *); diff --git a/mm/cleancache.c b/mm/cleancache.c index 0cecdbba4bc..b3ae19b7203 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -32,7 +32,7 @@ EXPORT_SYMBOL(cleancache_enabled); * cleancache_ops is set by cleancache_ops_register to contain the pointers * to the cleancache "backend" implementation functions. */ -static struct cleancache_ops cleancache_ops __read_mostly; +static struct cleancache_ops *cleancache_ops __read_mostly; /* * Counters available via /sys/kernel/debug/frontswap (if debugfs is @@ -72,15 +72,14 @@ static DEFINE_MUTEX(poolid_mutex); /* * When set to false (default) all calls to the cleancache functions, except * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded - * by the if (!backend_registered) return. This means multiple threads (from - * different filesystems) will be checking backend_registered. The usage of a + * by the if (!cleancache_ops) return. This means multiple threads (from + * different filesystems) will be checking cleancache_ops. The usage of a * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are * OK if the time between the backend's have been initialized (and - * backend_registered has been set to true) and when the filesystems start + * cleancache_ops has been set to not NULL) and when the filesystems start * actually calling the backends. The inverse (when unloading) is obviously * not good - but this shim does not do that (yet). */ -static bool backend_registered __read_mostly; /* * The backends and filesystems work all asynchronously. This is b/c the @@ -90,13 +89,13 @@ static bool backend_registered __read_mostly; * [shared_|]fs_poolid_map and uuids for. * * b). user does I/Os -> we call the rest of __cleancache_* functions - * which return immediately as backend_registered is false. + * which return immediately as cleancache_ops is false. * * c). modprobe zcache -> cleancache_register_ops. We init the backend - * and set backend_registered to true, and for any fs_poolid_map + * and set cleancache_ops to true, and for any fs_poolid_map * (which is set by __cleancache_init_fs) we initialize the poolid. * - * d). user does I/Os -> now that backend_registered is true all the + * d). user does I/Os -> now that cleancache_ops is true all the * __cleancache_* functions can call the backend. They all check * that fs_poolid_map is valid and if so invoke the backend. * @@ -120,23 +119,26 @@ static bool backend_registered __read_mostly; * Register operations for cleancache, returning previous thus allowing * detection of multiple backends and possible nesting. */ -struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) +struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops) { - struct cleancache_ops old = cleancache_ops; + struct cleancache_ops *old = cleancache_ops; int i; mutex_lock(&poolid_mutex); - cleancache_ops = *ops; - - backend_registered = true; for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { if (fs_poolid_map[i] == FS_NO_BACKEND) - fs_poolid_map[i] = (*cleancache_ops.init_fs)(PAGE_SIZE); + fs_poolid_map[i] = ops->init_fs(PAGE_SIZE); if (shared_fs_poolid_map[i] == FS_NO_BACKEND) - shared_fs_poolid_map[i] = (*cleancache_ops.init_shared_fs) + shared_fs_poolid_map[i] = ops->init_shared_fs (uuids[i], PAGE_SIZE); } -out: + /* + * We MUST set cleancache_ops _after_ we have called the backends + * init_fs or init_shared_fs functions. Otherwise the compiler might + * re-order where cleancache_ops is set in this function. + */ + barrier(); + cleancache_ops = ops; mutex_unlock(&poolid_mutex); return old; } @@ -151,8 +153,8 @@ void __cleancache_init_fs(struct super_block *sb) for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { if (fs_poolid_map[i] == FS_UNKNOWN) { sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET; - if (backend_registered) - fs_poolid_map[i] = (*cleancache_ops.init_fs)(PAGE_SIZE); + if (cleancache_ops) + fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE); else fs_poolid_map[i] = FS_NO_BACKEND; break; @@ -172,8 +174,8 @@ void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) if (shared_fs_poolid_map[i] == FS_UNKNOWN) { sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET; uuids[i] = uuid; - if (backend_registered) - shared_fs_poolid_map[i] = (*cleancache_ops.init_shared_fs) + if (cleancache_ops) + shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs (uuid, PAGE_SIZE); else shared_fs_poolid_map[i] = FS_NO_BACKEND; @@ -240,7 +242,7 @@ int __cleancache_get_page(struct page *page) int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) { + if (!cleancache_ops) { cleancache_failed_gets++; goto out; } @@ -255,7 +257,7 @@ int __cleancache_get_page(struct page *page) goto out; if (pool_id >= 0) - ret = (*cleancache_ops.get_page)(pool_id, + ret = cleancache_ops->get_page(pool_id, key, page->index, page); if (ret == 0) cleancache_succ_gets++; @@ -282,7 +284,7 @@ void __cleancache_put_page(struct page *page) int fake_pool_id; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) { + if (!cleancache_ops) { cleancache_puts++; return; } @@ -296,7 +298,7 @@ void __cleancache_put_page(struct page *page) if (pool_id >= 0 && cleancache_get_key(page->mapping->host, &key) >= 0) { - (*cleancache_ops.put_page)(pool_id, key, page->index, page); + cleancache_ops->put_page(pool_id, key, page->index, page); cleancache_puts++; } } @@ -318,7 +320,7 @@ void __cleancache_invalidate_page(struct address_space *mapping, int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) + if (!cleancache_ops) return; if (fake_pool_id >= 0) { @@ -328,7 +330,7 @@ void __cleancache_invalidate_page(struct address_space *mapping, VM_BUG_ON(!PageLocked(page)); if (cleancache_get_key(mapping->host, &key) >= 0) { - (*cleancache_ops.invalidate_page)(pool_id, + cleancache_ops->invalidate_page(pool_id, key, page->index); cleancache_invalidates++; } @@ -351,7 +353,7 @@ void __cleancache_invalidate_inode(struct address_space *mapping) int fake_pool_id = mapping->host->i_sb->cleancache_poolid; struct cleancache_filekey key = { .u.key = { 0 } }; - if (!backend_registered) + if (!cleancache_ops) return; if (fake_pool_id < 0) @@ -360,7 +362,7 @@ void __cleancache_invalidate_inode(struct address_space *mapping) pool_id = get_poolid_from_fake(fake_pool_id); if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) - (*cleancache_ops.invalidate_inode)(pool_id, key); + cleancache_ops->invalidate_inode(pool_id, key); } EXPORT_SYMBOL(__cleancache_invalidate_inode); @@ -387,8 +389,8 @@ void __cleancache_invalidate_fs(struct super_block *sb) fs_poolid_map[index] = FS_UNKNOWN; } sb->cleancache_poolid = -1; - if (backend_registered) - (*cleancache_ops.invalidate_fs)(old_poolid); + if (cleancache_ops) + cleancache_ops->invalidate_fs(old_poolid); mutex_unlock(&poolid_mutex); } EXPORT_SYMBOL(__cleancache_invalidate_fs); -- cgit v1.2.3-70-g09d2 From ff610a1d55da22bf95bbc6a8b193e052169b34b7 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 30 Apr 2013 15:26:58 -0700 Subject: mm: cleancache: clean up cleancache_enabled cleancache_ops is used to decide whether backend is registered. So now cleancache_enabled is always true if defined CONFIG_CLEANCACHE. Signed-off-by: Bob Liu Cc: Wanpeng Li Cc: Andor Daam Cc: Dan Magenheimer Cc: Florian Schmaus Cc: Konrad Rzeszutek Wilk Cc: Minchan Kim Cc: Stefan Hengelein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cleancache.h | 2 +- mm/cleancache.c | 11 ----------- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 3af5ea83955..4ce9056b31a 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -42,9 +42,9 @@ extern void __cleancache_put_page(struct page *); extern void __cleancache_invalidate_page(struct address_space *, struct page *); extern void __cleancache_invalidate_inode(struct address_space *); extern void __cleancache_invalidate_fs(struct super_block *); -extern int cleancache_enabled; #ifdef CONFIG_CLEANCACHE +#define cleancache_enabled (1) static inline bool cleancache_fs_enabled(struct page *page) { return page->mapping->host->i_sb->cleancache_poolid >= 0; diff --git a/mm/cleancache.c b/mm/cleancache.c index b3ae19b7203..5875f48ce27 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -18,16 +18,6 @@ #include #include -/* - * This global enablement flag may be read thousands of times per second - * by cleancache_get/put/invalidate even on systems where cleancache_ops - * is not claimed (e.g. cleancache is config'ed on but remains - * disabled), so is preferred to the slower alternative: a function - * call that checks a non-global. - */ -int cleancache_enabled __read_mostly; -EXPORT_SYMBOL(cleancache_enabled); - /* * cleancache_ops is set by cleancache_ops_register to contain the pointers * to the cleancache "backend" implementation functions. @@ -414,7 +404,6 @@ static int __init init_cleancache(void) fs_poolid_map[i] = FS_UNKNOWN; shared_fs_poolid_map[i] = FS_UNKNOWN; } - cleancache_enabled = 1; return 0; } module_init(init_cleancache) -- cgit v1.2.3-70-g09d2 From 196779b9b4ce1922afabdc20d0270720603bd46c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:12 -0700 Subject: dump_stack: consolidate dump_stack() implementations and unify their behaviors Both dump_stack() and show_stack() are currently implemented by each architecture. show_stack(NULL, NULL) dumps the backtrace for the current task as does dump_stack(). On some archs, dump_stack() prints extra information - pid, utsname and so on - in addition to the backtrace while the two are identical on other archs. The usages in arch-independent code of the two functions indicate show_stack(NULL, NULL) should print out bare backtrace while dump_stack() is used for debugging purposes when something went wrong, so it does make sense to print additional information on the task which triggered dump_stack(). There's no reason to require archs to implement two separate but mostly identical functions. It leads to unnecessary subtle information. This patch expands the dummy fallback dump_stack() implementation in lib/dump_stack.c such that it prints out debug information (taken from x86) and invokes show_stack(NULL, NULL) and drops arch-specific dump_stack() implementations in all archs except blackfin. Blackfin's dump_stack() does something wonky that I don't understand. Debug information can be printed separately by calling dump_stack_print_info() so that arch-specific dump_stack() implementation can still emit the same debug information. This is used in blackfin. This patch brings the following behavior changes. * On some archs, an extra level in backtrace for show_stack() could be printed. This is because the top frame was determined in dump_stack() on those archs while generic dump_stack() can't do that reliably. It can be compensated by inlining dump_stack() but not sure whether that'd be necessary. * Most archs didn't use to print debug info on dump_stack(). They do now. An example WARN dump follows. WARNING: at kernel/workqueue.c:4841 init_workqueues+0x35/0x505() Hardware name: empty Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.9.0-rc1-work+ #9 0000000000000009 ffff88007c861e08 ffffffff81c614dc ffff88007c861e48 ffffffff8108f50f ffffffff82228240 0000000000000040 ffffffff8234a03c 0000000000000000 0000000000000000 0000000000000000 ffff88007c861e58 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] init_workqueues+0x35/0x505 ... v2: CPU number added to the generic debug info as requested by s390 folks and dropped the s390 specific dump_stack(). This loses %ksp from the debug message which the maintainers think isn't important enough to keep the s390-specific dump_stack() implementation. dump_stack_print_info() is moved to kernel/printk.c from lib/dump_stack.c. Because linkage is per objecct file, dump_stack_print_info() living in the same lib file as generic dump_stack() means that archs which implement custom dump_stack() - at this point, only blackfin - can't use dump_stack_print_info() as that will bring in the generic version of dump_stack() too. v1 The v1 patch broke build on blackfin due to this issue. The build breakage was reported by Fengguang Wu. Signed-off-by: Tejun Heo Acked-by: David S. Miller Acked-by: Vineet Gupta Acked-by: Jesper Nilsson Acked-by: Vineet Gupta Acked-by: Martin Schwidefsky [s390 bits] Cc: Heiko Carstens Cc: Mike Frysinger Cc: Fengguang Wu Cc: Bjorn Helgaas Cc: Sam Ravnborg Acked-by: Richard Kuo [hexagon bits] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/traps.c | 7 ------- arch/arc/kernel/stacktrace.c | 7 ------- arch/arm/kernel/traps.c | 7 ------- arch/arm64/kernel/traps.c | 7 ------- arch/avr32/kernel/process.c | 8 -------- arch/blackfin/kernel/dumpstack.c | 1 + arch/c6x/kernel/traps.c | 9 --------- arch/cris/kernel/traps.c | 7 ------- arch/frv/kernel/traps.c | 11 ----------- arch/h8300/kernel/traps.c | 7 ------- arch/hexagon/kernel/traps.c | 8 -------- arch/ia64/kernel/process.c | 8 -------- arch/m32r/kernel/traps.c | 9 --------- arch/m68k/kernel/traps.c | 12 ------------ arch/metag/kernel/traps.c | 6 ------ arch/microblaze/kernel/traps.c | 6 ------ arch/mips/kernel/traps.c | 13 ------------- arch/mn10300/kernel/traps.c | 11 ----------- arch/openrisc/kernel/traps.c | 11 ----------- arch/parisc/kernel/traps.c | 8 -------- arch/powerpc/kernel/process.c | 6 ------ arch/s390/kernel/dumpstack.c | 17 ----------------- arch/score/kernel/traps.c | 10 ---------- arch/sh/kernel/dumpstack.c | 6 ------ arch/sparc/kernel/process_32.c | 7 ------- arch/sparc/kernel/traps_64.c | 7 ------- arch/um/kernel/sysrq.c | 12 ------------ arch/unicore32/kernel/traps.c | 6 ------ arch/x86/kernel/dumpstack.c | 18 ------------------ arch/xtensa/kernel/traps.c | 8 -------- include/linux/printk.h | 5 +++++ kernel/printk.c | 18 ++++++++++++++++++ lib/dump_stack.c | 11 ++++++++--- 33 files changed, 32 insertions(+), 262 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 4037461a649..affccb959a9 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -169,13 +169,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) dik_show_trace(sp); } -void dump_stack(void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) { diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index a63ff842564..ca0207b9d5b 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -220,13 +220,6 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) show_stacktrace(tsk, NULL); } -/* Expected by Rest of kernel code */ -void dump_stack(void) -{ - show_stacktrace(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - /* Another API expected by schedular, shows up in "ps" as Wait Channel * Ofcourse just returning schedule( ) would be pointless so unwind until * the function is not in schedular code diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 1c089119b2d..18b32e8e449 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -204,13 +204,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) } #endif -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b3c5f628bdb..61d7dd29f75 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -167,13 +167,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) } } -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 073c3c2fa52..89a8017e8ca 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -204,14 +204,6 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) show_stack_log_lvl(tsk, (unsigned long)stack, NULL, ""); } -void dump_stack(void) -{ - unsigned long stack; - - show_trace_log_lvl(current, &stack, NULL, ""); -} -EXPORT_SYMBOL(dump_stack); - static const char *cpu_modes[] = { "Application", "Supervisor", "Interrupt level 0", "Interrupt level 1", "Interrupt level 2", "Interrupt level 3", "Exception", "NMI" diff --git a/arch/blackfin/kernel/dumpstack.c b/arch/blackfin/kernel/dumpstack.c index 5cfbaa29821..95ba6d9e9a3 100644 --- a/arch/blackfin/kernel/dumpstack.c +++ b/arch/blackfin/kernel/dumpstack.c @@ -168,6 +168,7 @@ void dump_stack(void) #endif trace_buffer_save(tflags); dump_bfin_trace_buffer(); + dump_stack_print_info(KERN_DEFAULT); show_stack(current, &stack); trace_buffer_restore(tflags); } diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index 1be74e5b478..d0b96ef25c1 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -67,15 +67,6 @@ void show_regs(struct pt_regs *regs) pr_err("A31: %08lx B31: %08lx\n", regs->a31, regs->b31); } -void dump_stack(void) -{ - unsigned long stack; - - show_stack(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - - void die(char *str, struct pt_regs *fp, int nr) { console_verbose(); diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c index a11ad3229f8..0ffda73734f 100644 --- a/arch/cris/kernel/traps.c +++ b/arch/cris/kernel/traps.c @@ -146,13 +146,6 @@ show_stack(void) } #endif -void -dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - void set_nmi_handler(void (*handler)(struct pt_regs *)) { diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index 5cfd1420b09..cfcd802d6f9 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -466,17 +466,6 @@ asmlinkage void compound_exception(unsigned long esfr1, BUG(); } /* end compound_exception() */ -/*****************************************************************************/ -/* - * The architecture-independent backtrace generator - */ -void dump_stack(void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *task, unsigned long *sp) { } diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index 7833aa3e7c7..cfe494dbe3d 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -164,10 +164,3 @@ void show_trace_task(struct task_struct *tsk) { show_stack(tsk,(unsigned long *)tsk->thread.esp0); } - -void dump_stack(void) -{ - show_stack(NULL,NULL); -} - -EXPORT_SYMBOL(dump_stack); diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index be5e2dd9c9d..cc2171b2aa0 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -191,14 +191,6 @@ void show_stack(struct task_struct *task, unsigned long *fp) do_show_stack(task, fp, 0); } -void dump_stack(void) -{ - unsigned long *fp; - asm("%0 = r30" : "=r" (fp)); - show_stack(current, fp); -} -EXPORT_SYMBOL(dump_stack); - int die(const char *str, struct pt_regs *regs, long err) { static struct { diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index a26fc640e4c..182bd64cc72 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -95,14 +95,6 @@ show_stack (struct task_struct *task, unsigned long *sp) } } -void -dump_stack (void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_regs (struct pt_regs *regs) { diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index 9fe3467a513..a7a424f852e 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -167,15 +167,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_trace(task, sp); } -void dump_stack(void) -{ - unsigned long stack; - - show_trace(current, &stack); -} - -EXPORT_SYMBOL(dump_stack); - static void show_registers(struct pt_regs *regs) { int i = 0; diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index f32ab22e7ed..88fcd8c70e7 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -991,18 +991,6 @@ void show_stack(struct task_struct *task, unsigned long *stack) show_trace(stack); } -/* - * The architecture-independent backtrace generator - */ -void dump_stack(void) -{ - unsigned long stack; - - show_trace(&stack); -} - -EXPORT_SYMBOL(dump_stack); - /* * The vector number returned in the frame pointer may also contain * the "fs" (Fault Status) bits on ColdFire. These are in the bottom diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c index 8961f247b50..2ceeaae5b19 100644 --- a/arch/metag/kernel/traps.c +++ b/arch/metag/kernel/traps.c @@ -987,9 +987,3 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) show_trace(tsk, sp, NULL); } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c index 30e6b5004a6..cb619533a19 100644 --- a/arch/microblaze/kernel/traps.c +++ b/arch/microblaze/kernel/traps.c @@ -75,9 +75,3 @@ void show_stack(struct task_struct *task, unsigned long *sp) debug_show_held_locks(task); } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index c3abb88170f..b512b28cf78 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -206,19 +206,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_stacktrace(task, ®s); } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - struct pt_regs regs; - - prepare_frametrace(®s); - show_backtrace(current, ®s); -} - -EXPORT_SYMBOL(dump_stack); - static void show_code(unsigned int __user *pc) { long i; diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c index b900e5afa0a..a7a987c7954 100644 --- a/arch/mn10300/kernel/traps.c +++ b/arch/mn10300/kernel/traps.c @@ -293,17 +293,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_trace(sp); } -/* - * the architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long stack; - - show_stack(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - /* * dump the register file in the specified exception frame */ diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 5cce396016d..3d3f6062f49 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -105,17 +105,6 @@ void show_trace_task(struct task_struct *tsk) */ } -/* - * The architecture-independent backtrace generator - */ -void dump_stack(void) -{ - unsigned long stack; - - show_stack(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - void show_registers(struct pt_regs *regs) { int i; diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index aeb8f8f2c07..e64cf5f09b6 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -158,14 +158,6 @@ void show_regs(struct pt_regs *regs) } } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - static void do_show_stack(struct unwind_frame_info *info) { int i = 1; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 16e77a81ab4..624d44bb44d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1362,12 +1362,6 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) } while (count++ < kstack_depth_to_print); } -void dump_stack(void) -{ - show_stack(current, NULL); -} -EXPORT_SYMBOL(dump_stack); - #ifdef CONFIG_PPC64 /* Called with hard IRQs off */ void __ppc64_runlatch_on(void) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 03dce39d01e..2f1f639d1a3 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -129,23 +129,6 @@ static void show_last_breaking_event(struct pt_regs *regs) #endif } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - static inline int mask_bits(struct pt_regs *regs, unsigned long bits) { return (regs->psw.mask & bits) / ((~bits + 1) & bits); diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index 0e46fb19a84..a38f435fba7 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -149,16 +149,6 @@ static void show_registers(struct pt_regs *regs) printk(KERN_NOTICE "\n"); } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - show_stack(current_thread_info()->task, - (long *) get_irq_regs()->regs[0]); -} -EXPORT_SYMBOL(dump_stack); - void __die(const char *str, struct pt_regs *regs, const char *file, const char *func, unsigned long line) { diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index 7617dc4129a..b959f559260 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -158,9 +158,3 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) (unsigned long)task_stack_page(tsk)); show_trace(tsk, sp, NULL); } - -void dump_stack(void) -{ - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 2be4214b390..dccf5f58d70 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -164,13 +164,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) printk("\n"); } -void dump_stack(void) -{ - show_stack(current, NULL); -} - -EXPORT_SYMBOL(dump_stack); - /* * Note: sparc64 has a pretty intricated thread_saved_pc, check it out. */ diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 8d38ca97aa2..b3f833ab90e 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2350,13 +2350,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) } while (++count < 16); } -void dump_stack(void) -{ - show_stack(current, NULL); -} - -EXPORT_SYMBOL(dump_stack); - static inline struct reg_window *kernel_stack_up(struct reg_window *rw) { unsigned long fp = rw->ins[6]; diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index e562ff80409..7d101a2a154 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -35,18 +35,6 @@ void show_trace(struct task_struct *task, unsigned long * stack) } #endif -/* - * stack dumps generator - this is used by arch-independent code. - * And this is identical to i386 currently. - */ -void dump_stack(void) -{ - unsigned long stack; - - show_trace(current, &stack); -} -EXPORT_SYMBOL(dump_stack); - /*Stolen from arch/i386/kernel/traps.c */ static const int kstack_depth_to_print = 24; diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 0870b68d2ad..c54e32410ea 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -170,12 +170,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) c_backtrace(fp, mode); } -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index dd1a7c391c9..deb6421c9e6 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -191,24 +191,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_stack_log_lvl(task, NULL, sp, bp, ""); } -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp; - unsigned long stack; - - bp = stack_frame(current, NULL); - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); -} -EXPORT_SYMBOL(dump_stack); - static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 923db5c1527..384b7c7c2f6 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -481,14 +481,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_trace(task, stack); } -void dump_stack(void) -{ - show_stack(current, NULL); -} - -EXPORT_SYMBOL(dump_stack); - - void show_code(unsigned int *pc) { long i; diff --git a/include/linux/printk.h b/include/linux/printk.h index 4890fe62c01..7ce1f878cf6 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -145,6 +145,7 @@ extern void wake_up_klogd(void); void log_buf_kexec_setup(void); void __init setup_log_buf(int early); +void dump_stack_print_info(const char *log_lvl); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -182,6 +183,10 @@ static inline void log_buf_kexec_setup(void) static inline void setup_log_buf(int early) { } + +static inline void dump_stack_print_info(const char *log_lvl) +{ +} #endif extern void dump_stack(void) __cold; diff --git a/kernel/printk.c b/kernel/printk.c index 376914e2869..70b4b94a0ec 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -2849,4 +2850,21 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) raw_spin_unlock_irqrestore(&logbuf_lock, flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); + +/** + * dump_stack_print_info - print generic debug info for dump_stack() + * @log_lvl: log level + * + * Arch-specific dump_stack() implementations can use this function to + * print out the same debug information as the generic dump_stack(). + */ +void dump_stack_print_info(const char *log_lvl) +{ + printk("%sCPU: %d PID: %d Comm: %.20s %s %s %.*s\n", + log_lvl, raw_smp_processor_id(), current->pid, current->comm, + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); +} + #endif diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 42f4f55c945..53bad099ebd 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -5,11 +5,16 @@ #include #include +#include +/** + * dump_stack - dump the current task information and its stack trace + * + * Architectures can override this implementation by implementing its own. + */ void dump_stack(void) { - printk(KERN_NOTICE - "This architecture does not implement dump_stack()\n"); + dump_stack_print_info(KERN_DEFAULT); + show_stack(NULL, NULL); } - EXPORT_SYMBOL(dump_stack); -- cgit v1.2.3-70-g09d2 From 98e5e1bf722c4f976a860aed06dd365a56a34ee0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:15 -0700 Subject: dump_stack: implement arch-specific hardware description in task dumps x86 and ia64 can acquire extra hardware identification information from DMI and print it along with task dumps; however, the usage isn't consistent. * x86 show_regs() collects vendor, product and board strings and print them out with PID, comm and utsname. Some of the information is printed again later in the same dump. * warn_slowpath_common() explicitly accesses the DMI board and prints it out with "Hardware name:" label. This applies to both x86 and ia64 but is irrelevant on all other archs. * ia64 doesn't show DMI information on other non-WARN dumps. This patch introduces arch-specific hardware description used by dump_stack(). It can be set by calling dump_stack_set_arch_desc() during boot and, if exists, printed out in a separate line with "Hardware name:" label. dmi_set_dump_stack_arch_desc() is added which sets arch-specific description from DMI data. It uses dmi_ids_string[] which is set from dmi_present() used for DMI debug message. It is superset of the information x86 show_regs() is using. The function is called from x86 and ia64 boot code right after dmi_scan_machine(). This makes the explicit DMI handling in warn_slowpath_common() unnecessary. Removed. show_regs() isn't yet converted to use generic debug information printing and this patch doesn't remove the duplicate DMI handling in x86 show_regs(). The next patch will unify show_regs() handling and remove the duplication. An example WARN dump follows. WARNING: at kernel/workqueue.c:4841 init_workqueues+0x35/0x505() Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.9.0-rc1-work+ #3 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 0000000000000009 ffff88007c861e08 ffffffff81c614dc ffff88007c861e48 ffffffff8108f500 ffffffff82228240 0000000000000040 ffffffff8234a08e 0000000000000000 0000000000000000 0000000000000000 ffff88007c861e58 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x70/0xa0 [] warn_slowpath_null+0x1a/0x20 [] init_workqueues+0x35/0x505 ... v2: Use the same string as the debug message from dmi_present() which also contains BIOS information. Move hardware name into its own line as warn_slowpath_common() did. This change was suggested by Bjorn Helgaas. Signed-off-by: Tejun Heo Cc: Bjorn Helgaas Cc: David S. Miller Cc: Fengguang Wu Cc: Heiko Carstens Cc: Jesper Nilsson Cc: Martin Schwidefsky Cc: Mike Frysinger Cc: Vineet Gupta Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/setup.c | 1 + arch/x86/kernel/setup.c | 1 + drivers/firmware/dmi_scan.c | 13 +++++++++++++ include/linux/dmi.h | 2 ++ include/linux/printk.h | 5 +++++ kernel/panic.c | 6 ------ kernel/printk.c | 26 ++++++++++++++++++++++++++ 7 files changed, 48 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 2029cc0d2fc..13bfdd22afc 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -1063,6 +1063,7 @@ check_bugs (void) static int __init run_dmi_scan(void) { dmi_scan_machine(); + dmi_set_dump_stack_arch_desc(); return 0; } core_initcall(run_dmi_scan); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4689855c2f8..56f7fcfe7fa 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -996,6 +996,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); dmi_scan_machine(); + dmi_set_dump_stack_arch_desc(); /* * VMware detection requires dmi to be available, so this diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 862b1d27a85..98c62081c03 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -530,6 +530,19 @@ void __init dmi_scan_machine(void) dmi_initialized = 1; } +/** + * dmi_set_dump_stack_arch_desc - set arch description for dump_stack() + * + * Invoke dump_stack_set_arch_desc() with DMI system information so that + * DMI identifiers are printed out on task dumps. Arch boot code should + * call this function after dmi_scan_machine() if it wants to print out DMI + * identifiers on task dumps. + */ +void __init dmi_set_dump_stack_arch_desc(void) +{ + dump_stack_set_arch_desc("%s", dmi_ids_string); +} + /** * dmi_matches - check if dmi_system_id structure matches system DMI data * @dmi: pointer to the dmi_system_id structure to check diff --git a/include/linux/dmi.h b/include/linux/dmi.h index f156cca25ad..b6eb7a05d58 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -99,6 +99,7 @@ extern const char * dmi_get_system_info(int field); extern const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from); extern void dmi_scan_machine(void); +extern void dmi_set_dump_stack_arch_desc(void); extern bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp); extern int dmi_name_in_vendors(const char *str); extern int dmi_name_in_serial(const char *str); @@ -114,6 +115,7 @@ static inline const char * dmi_get_system_info(int field) { return NULL; } static inline const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from) { return NULL; } static inline void dmi_scan_machine(void) { return; } +static inline void dmi_set_dump_stack_arch_desc(void) { } static inline bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp) { if (yearp) diff --git a/include/linux/printk.h b/include/linux/printk.h index 7ce1f878cf6..47827c0a034 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -145,6 +145,7 @@ extern void wake_up_klogd(void); void log_buf_kexec_setup(void); void __init setup_log_buf(int early); +void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); #else static inline __printf(1, 0) @@ -184,6 +185,10 @@ static inline void setup_log_buf(int early) { } +static inline void dump_stack_set_arch_desc(const char *fmt, ...) +{ +} + static inline void dump_stack_print_info(const char *log_lvl) { } diff --git a/kernel/panic.c b/kernel/panic.c index 7c57cc9eee2..167ec097ce8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -22,7 +22,6 @@ #include #include #include -#include #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 @@ -400,13 +399,8 @@ struct slowpath_args { static void warn_slowpath_common(const char *file, int line, void *caller, unsigned taint, struct slowpath_args *args) { - const char *board; - printk(KERN_WARNING "------------[ cut here ]------------\n"); printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); - board = dmi_get_system_info(DMI_PRODUCT_NAME); - if (board) - printk(KERN_WARNING "Hardware name: %s\n", board); if (args) vprintk(args->fmt, args->args); diff --git a/kernel/printk.c b/kernel/printk.c index 70b4b94a0ec..73a96def480 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -2851,6 +2851,28 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); +static char dump_stack_arch_desc_str[128]; + +/** + * dump_stack_set_arch_desc - set arch-specific str to show with task dumps + * @fmt: printf-style format string + * @...: arguments for the format string + * + * The configured string will be printed right after utsname during task + * dumps. Usually used to add arch-specific system identifiers. If an + * arch wants to make use of such an ID string, it should initialize this + * as soon as possible during boot. + */ +void __init dump_stack_set_arch_desc(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(dump_stack_arch_desc_str, sizeof(dump_stack_arch_desc_str), + fmt, args); + va_end(args); +} + /** * dump_stack_print_info - print generic debug info for dump_stack() * @log_lvl: log level @@ -2865,6 +2887,10 @@ void dump_stack_print_info(const char *log_lvl) print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); + + if (dump_stack_arch_desc_str[0] != '\0') + printk("%sHardware name: %s\n", + log_lvl, dump_stack_arch_desc_str); } #endif -- cgit v1.2.3-70-g09d2 From a43cb95d547a061ed5bf1acb28e0f5fd575e26c1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:17 -0700 Subject: dump_stack: unify debug information printed by show_regs() show_regs() is inherently arch-dependent but it does make sense to print generic debug information and some archs already do albeit in slightly different forms. This patch introduces a generic function to print debug information from show_regs() so that different archs print out the same information and it's much easier to modify what's printed. show_regs_print_info() prints out the same debug info as dump_stack() does plus task and thread_info pointers. * Archs which didn't print debug info now do. alpha, arc, blackfin, c6x, cris, frv, h8300, hexagon, ia64, m32r, metag, microblaze, mn10300, openrisc, parisc, score, sh64, sparc, um, xtensa * Already prints debug info. Replaced with show_regs_print_info(). The printed information is superset of what used to be there. arm, arm64, avr32, mips, powerpc, sh32, tile, unicore32, x86 * s390 is special in that it used to print arch-specific information along with generic debug info. Heiko and Martin think that the arch-specific extra isn't worth keeping s390 specfic implementation. Converted to use the generic version. Note that now all archs print the debug info before actual register dumps. An example BUG() dump follows. kernel BUG at /work/os/work/kernel/workqueue.c:4841! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.9.0-rc1-work+ #7 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 task: ffff88007c85e040 ti: ffff88007c860000 task.ti: ffff88007c860000 RIP: 0010:[] [] init_workqueues+0x4/0x6 RSP: 0000:ffff88007c861ec8 EFLAGS: 00010246 RAX: ffff88007c861fd8 RBX: ffffffff824466a8 RCX: 0000000000000001 RDX: 0000000000000046 RSI: 0000000000000001 RDI: ffffffff8234a07a RBP: ffff88007c861ec8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff8234a07a R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88007dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff88015f7ff000 CR3: 00000000021f1000 CR4: 00000000000007f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff88007c861ef8 ffffffff81000312 ffffffff824466a8 ffff88007c85e650 0000000000000003 0000000000000000 ffff88007c861f38 ffffffff82335e5d ffff88007c862080 ffffffff8223d8c0 ffff88007c862080 ffffffff81c47760 Call Trace: [] do_one_initcall+0x122/0x170 [] kernel_init_freeable+0x9b/0x1c8 [] ? rest_init+0x140/0x140 [] kernel_init+0xe/0xf0 [] ret_from_fork+0x7c/0xb0 [] ? rest_init+0x140/0x140 ... v2: Typo fix in x86-32. v3: CPU number dropped from show_regs_print_info() as dump_stack_print_info() has been updated to print it. s390 specific implementation dropped as requested by s390 maintainers. Signed-off-by: Tejun Heo Acked-by: David S. Miller Acked-by: Jesper Nilsson Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Bjorn Helgaas Cc: Fengguang Wu Cc: Mike Frysinger Cc: Vineet Gupta Cc: Sam Ravnborg Acked-by: Chris Metcalf [tile bits] Acked-by: Richard Kuo [hexagon bits] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/process.c | 1 + arch/arc/kernel/troubleshoot.c | 1 + arch/arm/kernel/process.c | 8 ++------ arch/arm64/kernel/process.c | 7 +------ arch/avr32/kernel/process.c | 5 ++--- arch/blackfin/kernel/trace.c | 2 ++ arch/c6x/kernel/traps.c | 1 + arch/cris/arch-v10/kernel/process.c | 3 +++ arch/cris/arch-v32/kernel/process.c | 3 +++ arch/frv/kernel/traps.c | 3 +-- arch/h8300/kernel/process.c | 2 ++ arch/hexagon/kernel/vm_events.c | 2 ++ arch/ia64/kernel/process.c | 4 ++-- arch/m32r/kernel/process.c | 2 ++ arch/metag/kernel/process.c | 2 ++ arch/microblaze/kernel/process.c | 2 ++ arch/mips/kernel/traps.c | 2 +- arch/mn10300/kernel/process.c | 1 + arch/openrisc/kernel/process.c | 1 + arch/parisc/kernel/traps.c | 2 ++ arch/powerpc/kernel/process.c | 8 ++------ arch/s390/kernel/dumpstack.c | 9 +-------- arch/score/kernel/traps.c | 2 ++ arch/sh/kernel/process_32.c | 6 +----- arch/sh/kernel/process_64.c | 1 + arch/sparc/kernel/process_32.c | 2 ++ arch/sparc/kernel/process_64.c | 2 ++ arch/tile/kernel/process.c | 3 +-- arch/um/sys-ppc/sysrq.c | 2 ++ arch/unicore32/kernel/process.c | 6 +----- arch/x86/include/asm/bug.h | 3 --- arch/x86/kernel/dumpstack_32.c | 4 +--- arch/x86/kernel/dumpstack_64.c | 6 +----- arch/x86/kernel/process.c | 24 ------------------------ arch/x86/kernel/process_32.c | 2 -- arch/x86/kernel/process_64.c | 1 - arch/xtensa/kernel/traps.c | 2 ++ include/linux/printk.h | 5 +++++ kernel/printk.c | 16 ++++++++++++++++ 39 files changed, 74 insertions(+), 84 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index a3fd8a29cca..ab80a80d38a 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -175,6 +175,7 @@ machine_power_off(void) void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); dik_show_regs(regs, NULL); } diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 7c10873c311..96be1e6e76d 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -163,6 +163,7 @@ void show_regs(struct pt_regs *regs) return; print_task_path_n_nm(tsk, buf); + show_regs_print_info(KERN_INFO); if (current->thread.cause_code) show_ecr_verbose(regs); diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index c9a5e2ce8aa..ae58d3b37d9 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -225,11 +225,8 @@ void __show_regs(struct pt_regs *regs) unsigned long flags; char buf[64]; - printk("CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); + print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("LR is at %s\n", regs->ARM_lr); printk("pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n" @@ -284,7 +281,6 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); __show_regs(regs); dump_stack(); } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6f3822f98dc..f4919721f7d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -145,11 +145,7 @@ void __show_regs(struct pt_regs *regs) { int i; - printk("CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("LR is at %s\n", regs->regs[30]); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", @@ -166,7 +162,6 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); __show_regs(regs); } diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 89a8017e8ca..e7b61494c31 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -215,6 +215,8 @@ void show_regs_log_lvl(struct pt_regs *regs, const char *log_lvl) unsigned long lr = regs->lr; unsigned long mode = (regs->sr & MODE_MASK) >> MODE_SHIFT; + show_regs_print_info(log_lvl); + if (!user_mode(regs)) { sp = (unsigned long)regs + FRAME_SIZE_FULL; @@ -252,9 +254,6 @@ void show_regs_log_lvl(struct pt_regs *regs, const char *log_lvl) regs->sr & SR_I0M ? '0' : '.', regs->sr & SR_GM ? 'G' : 'g'); printk("%sCPU Mode: %s\n", log_lvl, cpu_modes[mode]); - printk("%sProcess: %s [%d] (task: %p thread: %p)\n", - log_lvl, current->comm, current->pid, current, - task_thread_info(current)); } void show_regs(struct pt_regs *regs) diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c index f7f7a18abca..c36efa0c716 100644 --- a/arch/blackfin/kernel/trace.c +++ b/arch/blackfin/kernel/trace.c @@ -853,6 +853,8 @@ void show_regs(struct pt_regs *fp) unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic(); pr_notice("\n"); + show_regs_print_info(KERN_NOTICE); + if (CPUID != bfin_cpuid()) pr_notice("Compiled for cpu family 0x%04x (Rev %d), " "but running on:0x%04x (Rev %d)\n", diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index d0b96ef25c1..dcc2c2f6d67 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -31,6 +31,7 @@ void __init trap_init(void) void show_regs(struct pt_regs *regs) { pr_err("\n"); + show_regs_print_info(KERN_ERR); pr_err("PC: %08lx SP: %08lx\n", regs->pc, regs->sp); pr_err("Status: %08lx ORIG_A4: %08lx\n", regs->csr, regs->orig_a4); pr_err("A0: %08lx B0: %08lx\n", regs->a0, regs->b0); diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 2ba23c13df6..753e9a03cf8 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -176,6 +176,9 @@ unsigned long get_wchan(struct task_struct *p) void show_regs(struct pt_regs * regs) { unsigned long usp = rdusp(); + + show_regs_print_info(KERN_DEFAULT); + printk("IRP: %08lx SRP: %08lx DCCR: %08lx USP: %08lx MOF: %08lx\n", regs->irp, regs->srp, regs->dccr, usp, regs->mof ); printk(" r0: %08lx r1: %08lx r2: %08lx r3: %08lx\n", diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index 57451faa9b2..cebd32e2a8f 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -164,6 +164,9 @@ get_wchan(struct task_struct *p) void show_regs(struct pt_regs * regs) { unsigned long usp = rdusp(); + + show_regs_print_info(KERN_DEFAULT); + printk("ERP: %08lx SRP: %08lx CCS: %08lx USP: %08lx MOF: %08lx\n", regs->erp, regs->srp, regs->ccs, usp, regs->mof); diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index cfcd802d6f9..4bff48c19d2 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -497,6 +497,7 @@ void show_regs(struct pt_regs *regs) int loop; printk("\n"); + show_regs_print_info(KERN_DEFAULT); printk("Frame: @%08lx [%s]\n", (unsigned long) regs, @@ -511,8 +512,6 @@ void show_regs(struct pt_regs *regs) else printk(" | "); } - - printk("Process %s (pid: %d)\n", current->comm, current->pid); } void die_if_kernel(const char *str, ...) diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index a17d2cd463d..1a744ab7e7e 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -83,6 +83,8 @@ void machine_power_off(void) void show_regs(struct pt_regs * regs) { + show_regs_print_info(KERN_DEFAULT); + printk("\nPC: %08lx Status: %02x", regs->pc, regs->ccr); printk("\nORIG_ER0: %08lx ER0: %08lx ER1: %08lx", diff --git a/arch/hexagon/kernel/vm_events.c b/arch/hexagon/kernel/vm_events.c index 9b5a4a295a6..f337281ebe6 100644 --- a/arch/hexagon/kernel/vm_events.c +++ b/arch/hexagon/kernel/vm_events.c @@ -33,6 +33,8 @@ */ void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_EMERG); + printk(KERN_EMERG "restart_r0: \t0x%08lx syscall_nr: %ld\n", regs->restart_r0, regs->syscall_nr); printk(KERN_EMERG "preds: \t\t0x%08lx\n", regs->preds); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 182bd64cc72..55d4ba47a90 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -101,8 +101,8 @@ show_regs (struct pt_regs *regs) unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; print_modules(); - printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current), - smp_processor_id(), current->comm); + printk("\n"); + show_regs_print_info(KERN_DEFAULT); printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n", regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(), init_utsname()->release); diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index e2d049018c3..e69221d581d 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -73,6 +73,8 @@ void machine_power_off(void) void show_regs(struct pt_regs * regs) { printk("\n"); + show_regs_print_info(KERN_DEFAULT); + printk("BPC[%08lx]:PSW[%08lx]:LR [%08lx]:FP [%08lx]\n", \ regs->bpc, regs->psw, regs->lr, regs->fp); printk("BBPC[%08lx]:BBPSW[%08lx]:SPU[%08lx]:SPI[%08lx]\n", \ diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c index dc592354456..483dff986a2 100644 --- a/arch/metag/kernel/process.c +++ b/arch/metag/kernel/process.c @@ -129,6 +129,8 @@ void show_regs(struct pt_regs *regs) "D1.7 " }; + show_regs_print_info(KERN_INFO); + pr_info(" pt_regs @ %p\n", regs); pr_info(" SaveMask = 0x%04hx\n", regs->ctx.SaveMask); pr_info(" Flags = 0x%04hx (%c%c%c%c)\n", regs->ctx.Flags, diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 7cce2e9c171..a5589380727 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -20,6 +20,8 @@ void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_INFO); + pr_info(" Registers dump: mode=%X\r\n", regs->pt_mode); pr_info(" r1=%08lX, r2=%08lX, r3=%08lX, r4=%08lX\n", regs->r1, regs->r2, regs->r3, regs->r4); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index b512b28cf78..25225515451 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -231,7 +231,7 @@ static void __show_regs(const struct pt_regs *regs) unsigned int cause = regs->cp0_cause; int i; - printk("Cpu %d\n", smp_processor_id()); + show_regs_print_info(KERN_DEFAULT); /* * Saved main processor registers diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 2da39fb8b3b..3707da583d0 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -97,6 +97,7 @@ void machine_power_off(void) void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); } /* diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 00c233bf0d0..386af258591 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -90,6 +90,7 @@ void show_regs(struct pt_regs *regs) { extern void show_registers(struct pt_regs *regs); + show_regs_print_info(KERN_DEFAULT); /* __PHX__ cleanup this mess */ show_registers(regs); } diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index e64cf5f09b6..f702bff0bed 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -126,6 +126,8 @@ void show_regs(struct pt_regs *regs) user = user_mode(regs); level = user ? KERN_DEBUG : KERN_CRIT; + show_regs_print_info(level); + print_gr(level, regs); for (i = 0; i < 8; i += 4) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 624d44bb44d..13a8d9d0b5c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -831,6 +831,8 @@ void show_regs(struct pt_regs * regs) { int i, trap; + show_regs_print_info(KERN_DEFAULT); + printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); printk("REGS: %p TRAP: %04lx %s (%s)\n", @@ -850,12 +852,6 @@ void show_regs(struct pt_regs * regs) #else printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); #endif - printk("TASK = %p[%d] '%s' THREAD: %p", - current, task_pid_nr(current), current->comm, task_thread_info(current)); - -#ifdef CONFIG_SMP - printk(" CPU: %d", raw_smp_processor_id()); -#endif /* CONFIG_SMP */ for (i = 0; i < 32; i++) { if ((i % REGS_PER_LINE) == 0) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2f1f639d1a3..29829747725 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -166,14 +166,7 @@ void show_registers(struct pt_regs *regs) void show_regs(struct pt_regs *regs) { - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); + show_regs_print_info(KERN_DEFAULT); show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index a38f435fba7..1517a7dcd6d 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -117,6 +117,8 @@ static void show_code(unsigned int *pc) */ void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); + printk("r0 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", regs->regs[0], regs->regs[1], regs->regs[2], regs->regs[3], regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 73eb66fc625..ebd3933005b 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -32,11 +32,7 @@ void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid : %d, Comm: \t\t%s\n", task_pid_nr(current), current->comm); - printk("CPU : %d \t\t%s (%s %.*s)\n\n", - smp_processor_id(), print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("PR is at %s\n", regs->pr); diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index e611c85144b..174d124b419 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -40,6 +40,7 @@ void show_regs(struct pt_regs *regs) unsigned long long ah, al, bh, bl, ch, cl; printk("\n"); + show_regs_print_info(KERN_DEFAULT); ah = (regs->pc) >> 32; al = (regs->pc) & 0xffffffff; diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index dccf5f58d70..fdd819dfdac 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -112,6 +112,8 @@ void show_regs(struct pt_regs *r) { struct reg_window32 *rw = (struct reg_window32 *) r->u_regs[14]; + show_regs_print_info(KERN_DEFAULT); + printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx %s\n", r->psr, r->pc, r->npc, r->y, print_tainted()); printk("PC: <%pS>\n", (void *) r->pc); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 9fbf0d14a36..c6dc1ba069c 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -163,6 +163,8 @@ static void show_regwindow(struct pt_regs *regs) void show_regs(struct pt_regs *regs) { + show_regs_print_info(KERN_DEFAULT); + printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x %s\n", regs->tstate, regs->tpc, regs->tnpc, regs->y, print_tainted()); printk("TPC: <%pS>\n", (void *) regs->tpc); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 80b2a18deb8..8ac304484f9 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -573,8 +573,7 @@ void show_regs(struct pt_regs *regs) int i; pr_err("\n"); - pr_err(" Pid: %d, comm: %20s, CPU: %d\n", - tsk->pid, tsk->comm, smp_processor_id()); + show_regs_print_info(KERN_ERR); #ifdef __tilegx__ for (i = 0; i < 51; i += 3) pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n", diff --git a/arch/um/sys-ppc/sysrq.c b/arch/um/sys-ppc/sysrq.c index f889449f928..1ff1ad7f27d 100644 --- a/arch/um/sys-ppc/sysrq.c +++ b/arch/um/sys-ppc/sysrq.c @@ -11,6 +11,8 @@ void show_regs(struct pt_regs_subarch *regs) { printk("\n"); + show_regs_print_info(KERN_DEFAULT); + printk("show_regs(): insert regs here.\n"); #if 0 printk("\n"); diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index 7fab86d7c5d..c9447691bda 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -144,11 +144,7 @@ void __show_regs(struct pt_regs *regs) unsigned long flags; char buf[64]; - printk(KERN_DEFAULT "CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("LR is at %s\n", regs->UCreg_lr); printk(KERN_DEFAULT "pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n" diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 11e1152222d..2f03ff018d3 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -37,7 +37,4 @@ do { \ #include - -extern void show_regs_common(void); - #endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 1038a417ea5..f2a1770ca17 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -86,11 +86,9 @@ void show_regs(struct pt_regs *regs) { int i; + show_regs_print_info(KERN_EMERG); __show_regs(regs, !user_mode_vm(regs)); - pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - current_thread_info(), current, task_thread_info(current)); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index b653675d528..addb207dab9 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -249,14 +249,10 @@ void show_regs(struct pt_regs *regs) { int i; unsigned long sp; - const int cpu = smp_processor_id(); - struct task_struct *cur = current; sp = regs->sp; - printk("CPU %d ", cpu); + show_regs_print_info(KERN_DEFAULT); __show_regs(regs, 1); - printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, task_thread_info(cur), cur); /* * When in-kernel, we also print out the stack and code at the diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 14fcf55a5c5..607af0d4d5e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -121,30 +121,6 @@ void exit_thread(void) drop_fpu(me); } -void show_regs_common(void) -{ - const char *vendor, *product, *board; - - vendor = dmi_get_system_info(DMI_SYS_VENDOR); - if (!vendor) - vendor = ""; - product = dmi_get_system_info(DMI_PRODUCT_NAME); - if (!product) - product = ""; - - /* Board Name is optional */ - board = dmi_get_system_info(DMI_BOARD_NAME); - - printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, - vendor, product, - board ? "/" : "", - board ? board : ""); -} - void flush_thread(void) { struct task_struct *tsk = current; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b5a8905785e..7305f7dfc7a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -84,8 +84,6 @@ void __show_regs(struct pt_regs *regs, int all) savesegment(gs, gs); } - show_regs_common(); - printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, smp_processor_id()); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0f49677da51..355ae06dbf9 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -62,7 +62,6 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int fsindex, gsindex; unsigned int ds, cs, es; - show_regs_common(); printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 384b7c7c2f6..458186dab5d 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -383,6 +383,8 @@ void show_regs(struct pt_regs * regs) { int i, wmask; + show_regs_print_info(KERN_DEFAULT); + wmask = regs->wmask & ~1; for (i = 0; i < 16; i++) { diff --git a/include/linux/printk.h b/include/linux/printk.h index 47827c0a034..6af944ab38f 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -147,6 +147,7 @@ void log_buf_kexec_setup(void); void __init setup_log_buf(int early); void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); +void show_regs_print_info(const char *log_lvl); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -192,6 +193,10 @@ static inline void dump_stack_set_arch_desc(const char *fmt, ...) static inline void dump_stack_print_info(const char *log_lvl) { } + +static inline void show_regs_print_info(const char *log_lvl) +{ +} #endif extern void dump_stack(void) __cold; diff --git a/kernel/printk.c b/kernel/printk.c index 73a96def480..e10ad515901 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -2893,4 +2893,20 @@ void dump_stack_print_info(const char *log_lvl) log_lvl, dump_stack_arch_desc_str); } +/** + * show_regs_print_info - print generic debug info for show_regs() + * @log_lvl: log level + * + * show_regs() implementations can use this function to print out generic + * debug information. + */ +void show_regs_print_info(const char *log_lvl) +{ + dump_stack_print_info(log_lvl); + + printk("%stask: %p ti: %p task.ti: %p\n", + log_lvl, current, current_thread_info(), + task_thread_info(current)); +} + #endif -- cgit v1.2.3-70-g09d2 From cd42d559e45e3563c74403e453f8954b593db69d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:21 -0700 Subject: kthread: implement probe_kthread_data() One of the problems that arise when converting dedicated custom threadpool to workqueue is that the shared worker pool used by workqueue anonimizes each worker making it more difficult to identify what the worker was doing on which target from the output of sysrq-t or debug dump from oops, BUG() and friends. For example, after writeback is converted to use workqueue instead of priviate thread pool, there's no easy to tell which backing device a writeback work item was working on at the time of task dump, which, according to our writeback brethren, is important in tracking down issues with a lot of mounted file systems on a lot of different devices. This patchset implements a way for a work function to mark its execution instance so that task dump of the worker task includes information to indicate what the work item was doing. An example WARN dump would look like the following. WARNING: at fs/fs-writeback.c:1015 bdi_writeback_workfn+0x2b4/0x3c0() Modules linked in: CPU: 0 Pid: 28 Comm: kworker/u18:0 Not tainted 3.9.0-rc1-work+ #24 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 Workqueue: writeback bdi_writeback_workfn (flush-8:16) ffffffff820a3a98 ffff88015b927cb8 ffffffff81c61855 ffff88015b927cf8 ffffffff8108f500 0000000000000000 ffff88007a171948 ffff88007a1716b0 ffff88015b49df00 ffff88015b8d3940 0000000000000000 ffff88015b927d08 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x70/0xa0 ... This patch: Implement probe_kthread_data() which returns kthread_data if accessible. The function is equivalent to kthread_data() except that the specified @task may not be a kthread or its vfork_done is already cleared rendering struct kthread inaccessible. In the former case, probe_kthread_data() may return any value. In the latter, NULL. This will be used to safely print debug information without affecting synchronization in the normal paths. Workqueue debug info printing on dump_stack() and friends will make use of it. Signed-off-by: Tejun Heo Cc: Oleg Nesterov Acked-by: Jan Kara Cc: Dave Chinner Cc: Ingo Molnar Cc: Jens Axboe Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 1 + kernel/kthread.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8d816646f76..7dcef331768 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -43,6 +43,7 @@ bool kthread_should_stop(void); bool kthread_should_park(void); bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_data(struct task_struct *k); +void *probe_kthread_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); diff --git a/kernel/kthread.c b/kernel/kthread.c index 16d8ddd268b..760e86df8c2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -17,6 +17,7 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(kthread_create_lock); @@ -135,6 +136,24 @@ void *kthread_data(struct task_struct *task) return to_kthread(task)->data; } +/** + * probe_kthread_data - speculative version of kthread_data() + * @task: possible kthread task in question + * + * @task could be a kthread task. Return the data value specified when it + * was created if accessible. If @task isn't a kthread task or its data is + * inaccessible for any reason, %NULL is returned. This function requires + * that @task itself is safe to dereference. + */ +void *probe_kthread_data(struct task_struct *task) +{ + struct kthread *kthread = to_kthread(task); + void *data = NULL; + + probe_kernel_read(&data, &kthread->data, sizeof(data)); + return data; +} + static void __kthread_parkme(struct kthread *self) { __set_current_state(TASK_PARKED); -- cgit v1.2.3-70-g09d2 From 3d1cb2059d9374e58da481b783332cf191cb6620 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2013 15:27:22 -0700 Subject: workqueue: include workqueue info when printing debug dump of a worker task One of the problems that arise when converting dedicated custom threadpool to workqueue is that the shared worker pool used by workqueue anonimizes each worker making it more difficult to identify what the worker was doing on which target from the output of sysrq-t or debug dump from oops, BUG() and friends. This patch implements set_worker_desc() which can be called from any workqueue work function to set its description. When the worker task is dumped for whatever reason - sysrq-t, WARN, BUG, oops, lockdep assertion and so on - the description will be printed out together with the workqueue name and the worker function pointer. The printing side is implemented by print_worker_info() which is called from functions in task dump paths - sched_show_task() and dump_stack_print_info(). print_worker_info() can be safely called on any task in any state as long as the task struct itself is accessible. It uses probe_*() functions to access worker fields. It may print garbage if something went very wrong, but it wouldn't cause (another) oops. The description is currently limited to 24bytes including the terminating \0. worker->desc_valid and workder->desc[] are added and the 64 bytes marker which was already incorrect before adding the new fields is moved to the correct position. Here's an example dump with writeback updated to set the bdi name as worker desc. Hardware name: Bochs Modules linked in: Pid: 7, comm: kworker/u9:0 Not tainted 3.9.0-rc1-work+ #1 Workqueue: writeback bdi_writeback_workfn (flush-8:0) ffffffff820a3ab0 ffff88000f6e9cb8 ffffffff81c61845 ffff88000f6e9cf8 ffffffff8108f50f 0000000000000000 0000000000000000 ffff88000cde16b0 ffff88000cde1aa8 ffff88001ee19240 ffff88000f6e9fd8 ffff88000f6e9d08 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] bdi_writeback_workfn+0x2a0/0x3b0 ... Signed-off-by: Tejun Heo Cc: Peter Zijlstra Cc: Ingo Molnar Acked-by: Jan Kara Cc: Oleg Nesterov Cc: Jens Axboe Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 5 +++ kernel/printk.c | 2 ++ kernel/sched/core.c | 1 + kernel/workqueue.c | 79 +++++++++++++++++++++++++++++++++++++++++++++ kernel/workqueue_internal.h | 12 ++++++- 5 files changed, 98 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 71797563937..623488fdc1f 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -92,6 +92,9 @@ enum { /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, + + /* maximum string length for set_worker_desc() */ + WORKER_DESC_LEN = 24, }; struct work_struct { @@ -447,6 +450,8 @@ extern void workqueue_set_max_active(struct workqueue_struct *wq, extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); +extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); +extern void print_worker_info(const char *log_lvl, struct task_struct *task); /** * queue_work - queue work on a workqueue diff --git a/kernel/printk.c b/kernel/printk.c index e10ad515901..96dcfcd9a2d 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -2891,6 +2891,8 @@ void dump_stack_print_info(const char *log_lvl) if (dump_stack_arch_desc_str[0] != '\0') printk("%sHardware name: %s\n", log_lvl, dump_stack_arch_desc_str); + + print_worker_info(log_lvl, current); } /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c70a8814a76..5662f58f0b6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4586,6 +4586,7 @@ void sched_show_task(struct task_struct *p) task_pid_nr(p), ppid, (unsigned long)task_thread_info(p)->flags); + print_worker_info(KERN_INFO, p); show_stack(p, NULL); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 154aa12af48..4aa9f5bc6b2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -2197,6 +2198,7 @@ __acquires(&pool->lock) worker->current_work = NULL; worker->current_func = NULL; worker->current_pwq = NULL; + worker->desc_valid = false; pwq_dec_nr_in_flight(pwq, work_color); } @@ -4365,6 +4367,83 @@ unsigned int work_busy(struct work_struct *work) } EXPORT_SYMBOL_GPL(work_busy); +/** + * set_worker_desc - set description for the current work item + * @fmt: printf-style format string + * @...: arguments for the format string + * + * This function can be called by a running work function to describe what + * the work item is about. If the worker task gets dumped, this + * information will be printed out together to help debugging. The + * description can be at most WORKER_DESC_LEN including the trailing '\0'. + */ +void set_worker_desc(const char *fmt, ...) +{ + struct worker *worker = current_wq_worker(); + va_list args; + + if (worker) { + va_start(args, fmt); + vsnprintf(worker->desc, sizeof(worker->desc), fmt, args); + va_end(args); + worker->desc_valid = true; + } +} + +/** + * print_worker_info - print out worker information and description + * @log_lvl: the log level to use when printing + * @task: target task + * + * If @task is a worker and currently executing a work item, print out the + * name of the workqueue being serviced and worker description set with + * set_worker_desc() by the currently executing work item. + * + * This function can be safely called on any task as long as the + * task_struct itself is accessible. While safe, this function isn't + * synchronized and may print out mixups or garbages of limited length. + */ +void print_worker_info(const char *log_lvl, struct task_struct *task) +{ + work_func_t *fn = NULL; + char name[WQ_NAME_LEN] = { }; + char desc[WORKER_DESC_LEN] = { }; + struct pool_workqueue *pwq = NULL; + struct workqueue_struct *wq = NULL; + bool desc_valid = false; + struct worker *worker; + + if (!(task->flags & PF_WQ_WORKER)) + return; + + /* + * This function is called without any synchronization and @task + * could be in any state. Be careful with dereferences. + */ + worker = probe_kthread_data(task); + + /* + * Carefully copy the associated workqueue's workfn and name. Keep + * the original last '\0' in case the original contains garbage. + */ + probe_kernel_read(&fn, &worker->current_func, sizeof(fn)); + probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq)); + probe_kernel_read(&wq, &pwq->wq, sizeof(wq)); + probe_kernel_read(name, wq->name, sizeof(name) - 1); + + /* copy worker description */ + probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid)); + if (desc_valid) + probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); + + if (fn || name[0] || desc[0]) { + printk("%sWorkqueue: %s %pf", log_lvl, name, fn); + if (desc[0]) + pr_cont(" (%s)", desc); + pr_cont("\n"); + } +} + /* * CPU hotplug. * diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index 84ab6e1dc6f..ad83c96b2ec 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -29,15 +29,25 @@ struct worker { struct work_struct *current_work; /* L: work being processed */ work_func_t current_func; /* L: current_work's fn */ struct pool_workqueue *current_pwq; /* L: current_work's pwq */ + bool desc_valid; /* ->desc is valid */ struct list_head scheduled; /* L: scheduled works */ + + /* 64 bytes boundary on 64bit, 32 on 32bit */ + struct task_struct *task; /* I: worker task */ struct worker_pool *pool; /* I: the associated pool */ /* L: for rescuers */ - /* 64 bytes boundary on 64bit, 32 on 32bit */ + unsigned long last_active; /* L: last active timestamp */ unsigned int flags; /* X: flags */ int id; /* I: worker id */ + /* + * Opaque string set with work_set_desc(). Printed out with task + * dump for debugging - WARN, BUG, panic or sysrq. + */ + char desc[WORKER_DESC_LEN]; + /* used only by rescuers to point to the target workqueue */ struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ }; -- cgit v1.2.3-70-g09d2 From 3440a1ca99707f093e9568ba9762764d3162dd8f Mon Sep 17 00:00:00 2001 From: liguang Date: Tue, 30 Apr 2013 15:27:26 -0700 Subject: kernel/smp.c: remove 'priv' of call_single_data The 'priv' field is redundant; we can pass data via 'info'. Signed-off-by: liguang Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smp.h | 1 - kernel/softirq.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 3e07a7df647..e6564c1dc55 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -20,7 +20,6 @@ struct call_single_data { smp_call_func_t func; void *info; u16 flags; - u16 priv; }; /* total number of cpus in this system (may exceed NR_CPUS) */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 14d7758074a..aa82723c720 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -620,8 +620,7 @@ static void remote_softirq_receive(void *data) unsigned long flags; int softirq; - softirq = cp->priv; - + softirq = *(int *)cp->info; local_irq_save(flags); __local_trigger(cp, softirq); local_irq_restore(flags); @@ -631,9 +630,8 @@ static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softir { if (cpu_online(cpu)) { cp->func = remote_softirq_receive; - cp->info = cp; + cp->info = &softirq; cp->flags = 0; - cp->priv = softirq; __smp_call_function_single(cpu, cp, 0); return 0; -- cgit v1.2.3-70-g09d2 From 74e3d1e17b2e11d175970b85acd44f5927000ba2 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Tue, 30 Apr 2013 15:27:27 -0700 Subject: include/linux/fs.h: disable preempt when acquire i_size_seqcount write lock Two rt tasks bind to one CPU core. The higher priority rt task A preempts a lower priority rt task B which has already taken the write seq lock, and then the higher priority rt task A try to acquire read seq lock, it's doomed to lockup. rt task A with lower priority: call write i_size_write rt task B with higher priority: call sync, and preempt task A write_seqcount_begin(&inode->i_size_seqcount); i_size_read inode->i_size = i_size; read_seqcount_begin <-- lockup here... So disable preempt when acquiring every i_size_seqcount *write* lock will cure the problem. Signed-off-by: Fan Du Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c28271ab9d..17d8b159621 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -675,9 +675,11 @@ static inline loff_t i_size_read(const struct inode *inode) static inline void i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) + preempt_disable(); write_seqcount_begin(&inode->i_size_seqcount); inode->i_size = i_size; write_seqcount_end(&inode->i_size_seqcount); + preempt_enable(); #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) preempt_disable(); inode->i_size = i_size; -- cgit v1.2.3-70-g09d2 From 16c7fa05829e8b91db48e3539c5d6ff3c2b18a23 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 30 Apr 2013 15:27:30 -0700 Subject: lib/string_helpers: introduce generic string_unescape There are several places in kernel where modules unescapes input to convert C-Style Escape Sequences into byte codes. The patch provides generic implementation of such approach. Test cases are also included into the patch. [akpm@linux-foundation.org: clarify comment] [akpm@linux-foundation.org: export get_random_int() to modules] Signed-off-by: Andy Shevchenko Cc: Samuel Thibault Cc: Greg Kroah-Hartman Cc: Jason Baron Cc: Alexander Viro Cc: William Hubbs Cc: Chris Brannon Cc: Kirk Reiser Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/random.c | 1 + include/linux/string_helpers.h | 58 ++++++++++++++++++ lib/Kconfig.debug | 3 + lib/Makefile | 4 +- lib/string_helpers.c | 133 +++++++++++++++++++++++++++++++++++++++++ lib/test-string_helpers.c | 103 +++++++++++++++++++++++++++++++ 6 files changed, 301 insertions(+), 1 deletion(-) create mode 100644 lib/test-string_helpers.c (limited to 'include/linux') diff --git a/drivers/char/random.c b/drivers/char/random.c index 32a6c576495..cd9a6211dca 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1485,6 +1485,7 @@ unsigned int get_random_int(void) return ret; } +EXPORT_SYMBOL(get_random_int); /* * randomize_range() returns a start address such that diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index a3eb2f65b65..3eeee9672a4 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -13,4 +13,62 @@ enum string_size_units { int string_get_size(u64 size, enum string_size_units units, char *buf, int len); +#define UNESCAPE_SPACE 0x01 +#define UNESCAPE_OCTAL 0x02 +#define UNESCAPE_HEX 0x04 +#define UNESCAPE_SPECIAL 0x08 +#define UNESCAPE_ANY \ + (UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL) + +/** + * string_unescape - unquote characters in the given string + * @src: source buffer (escaped) + * @dst: destination buffer (unescaped) + * @size: size of the destination buffer (0 to unlimit) + * @flags: combination of the flags (bitwise OR): + * %UNESCAPE_SPACE: + * '\f' - form feed + * '\n' - new line + * '\r' - carriage return + * '\t' - horizontal tab + * '\v' - vertical tab + * %UNESCAPE_OCTAL: + * '\NNN' - byte with octal value NNN (1 to 3 digits) + * %UNESCAPE_HEX: + * '\xHH' - byte with hexadecimal value HH (1 to 2 digits) + * %UNESCAPE_SPECIAL: + * '\"' - double quote + * '\\' - backslash + * '\a' - alert (BEL) + * '\e' - escape + * %UNESCAPE_ANY: + * all previous together + * + * Returns amount of characters processed to the destination buffer excluding + * trailing '\0'. + * + * Because the size of the output will be the same as or less than the size of + * the input, the transformation may be performed in place. + * + * Caller must provide valid source and destination pointers. Be aware that + * destination buffer will always be NULL-terminated. Source string must be + * NULL-terminated as well. + */ +int string_unescape(char *src, char *dst, size_t size, unsigned int flags); + +static inline int string_unescape_inplace(char *buf, unsigned int flags) +{ + return string_unescape(buf, buf, 0, flags); +} + +static inline int string_unescape_any(char *src, char *dst, size_t size) +{ + return string_unescape(src, dst, size, UNESCAPE_ANY); +} + +static inline int string_unescape_any_inplace(char *buf) +{ + return string_unescape_any(buf, buf, 0); +} + #endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 28be08c09ba..77ebaa3dfa1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1463,5 +1463,8 @@ source "lib/Kconfig.kgdb" source "lib/Kconfig.kmemcheck" +config TEST_STRING_HELPERS + tristate "Test functions located in the string_helpers module at runtime" + config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" diff --git a/lib/Makefile b/lib/Makefile index 6e2cc561f76..23c9a0fe74f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -22,8 +22,10 @@ lib-y += kobject.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \ + gcd.o lcm.o list_sort.o uuid.o flex_array.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o +obj-y += string_helpers.o +obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 1cffc223bff..ed5c1454dd6 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -2,10 +2,12 @@ * Helpers for formatting and printing strings * * Copyright 31 August 2008 James Bottomley + * Copyright (C) 2013, Intel Corporation */ #include #include #include +#include #include /** @@ -66,3 +68,134 @@ int string_get_size(u64 size, const enum string_size_units units, return 0; } EXPORT_SYMBOL(string_get_size); + +static bool unescape_space(char **src, char **dst) +{ + char *p = *dst, *q = *src; + + switch (*q) { + case 'n': + *p = '\n'; + break; + case 'r': + *p = '\r'; + break; + case 't': + *p = '\t'; + break; + case 'v': + *p = '\v'; + break; + case 'f': + *p = '\f'; + break; + default: + return false; + } + *dst += 1; + *src += 1; + return true; +} + +static bool unescape_octal(char **src, char **dst) +{ + char *p = *dst, *q = *src; + u8 num; + + if (isodigit(*q) == 0) + return false; + + num = (*q++) & 7; + while (num < 32 && isodigit(*q) && (q - *src < 3)) { + num <<= 3; + num += (*q++) & 7; + } + *p = num; + *dst += 1; + *src = q; + return true; +} + +static bool unescape_hex(char **src, char **dst) +{ + char *p = *dst, *q = *src; + int digit; + u8 num; + + if (*q++ != 'x') + return false; + + num = digit = hex_to_bin(*q++); + if (digit < 0) + return false; + + digit = hex_to_bin(*q); + if (digit >= 0) { + q++; + num = (num << 4) | digit; + } + *p = num; + *dst += 1; + *src = q; + return true; +} + +static bool unescape_special(char **src, char **dst) +{ + char *p = *dst, *q = *src; + + switch (*q) { + case '\"': + *p = '\"'; + break; + case '\\': + *p = '\\'; + break; + case 'a': + *p = '\a'; + break; + case 'e': + *p = '\e'; + break; + default: + return false; + } + *dst += 1; + *src += 1; + return true; +} + +int string_unescape(char *src, char *dst, size_t size, unsigned int flags) +{ + char *out = dst; + + while (*src && --size) { + if (src[0] == '\\' && src[1] != '\0' && size > 1) { + src++; + size--; + + if (flags & UNESCAPE_SPACE && + unescape_space(&src, &out)) + continue; + + if (flags & UNESCAPE_OCTAL && + unescape_octal(&src, &out)) + continue; + + if (flags & UNESCAPE_HEX && + unescape_hex(&src, &out)) + continue; + + if (flags & UNESCAPE_SPECIAL && + unescape_special(&src, &out)) + continue; + + *out++ = '\\'; + } + *out++ = *src++; + } + *out = '\0'; + + return out - dst; +} +EXPORT_SYMBOL(string_unescape); diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c new file mode 100644 index 00000000000..6ac48de04c0 --- /dev/null +++ b/lib/test-string_helpers.c @@ -0,0 +1,103 @@ +/* + * Test cases for lib/string_helpers.c module. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +struct test_string { + const char *in; + const char *out; + unsigned int flags; +}; + +static const struct test_string strings[] __initconst = { + { + .in = "\\f\\ \\n\\r\\t\\v", + .out = "\f\\ \n\r\t\v", + .flags = UNESCAPE_SPACE, + }, + { + .in = "\\40\\1\\387\\0064\\05\\040\\8a\\110\\777", + .out = " \001\00387\0064\005 \\8aH?7", + .flags = UNESCAPE_OCTAL, + }, + { + .in = "\\xv\\xa\\x2c\\xD\\x6f2", + .out = "\\xv\n,\ro2", + .flags = UNESCAPE_HEX, + }, + { + .in = "\\h\\\\\\\"\\a\\e\\", + .out = "\\h\\\"\a\e\\", + .flags = UNESCAPE_SPECIAL, + }, +}; + +static void __init test_string_unescape(unsigned int flags, bool inplace) +{ + char in[256]; + char out_test[256]; + char out_real[256]; + int i, p = 0, q_test = 0, q_real = sizeof(out_real); + + for (i = 0; i < ARRAY_SIZE(strings); i++) { + const char *s = strings[i].in; + int len = strlen(strings[i].in); + + /* Copy string to in buffer */ + memcpy(&in[p], s, len); + p += len; + + /* Copy expected result for given flags */ + if (flags & strings[i].flags) { + s = strings[i].out; + len = strlen(strings[i].out); + } + memcpy(&out_test[q_test], s, len); + q_test += len; + } + in[p++] = '\0'; + + /* Call string_unescape and compare result */ + if (inplace) { + memcpy(out_real, in, p); + if (flags == UNESCAPE_ANY) + q_real = string_unescape_any_inplace(out_real); + else + q_real = string_unescape_inplace(out_real, flags); + } else if (flags == UNESCAPE_ANY) { + q_real = string_unescape_any(in, out_real, q_real); + } else { + q_real = string_unescape(in, out_real, q_real, flags); + } + + if (q_real != q_test || memcmp(out_test, out_real, q_test)) { + pr_warn("Test failed: flags = %u\n", flags); + print_hex_dump(KERN_WARNING, "Input: ", + DUMP_PREFIX_NONE, 16, 1, in, p - 1, true); + print_hex_dump(KERN_WARNING, "Expected: ", + DUMP_PREFIX_NONE, 16, 1, out_test, q_test, true); + print_hex_dump(KERN_WARNING, "Got: ", + DUMP_PREFIX_NONE, 16, 1, out_real, q_real, true); + } +} + +static int __init test_string_helpers_init(void) +{ + unsigned int i; + + pr_info("Running tests...\n"); + for (i = 0; i < UNESCAPE_ANY + 1; i++) + test_string_unescape(i, false); + test_string_unescape(get_random_int() % (UNESCAPE_ANY + 1), true); + + return -EINVAL; +} +module_init(test_string_helpers_init); +MODULE_LICENSE("Dual BSD/GPL"); -- cgit v1.2.3-70-g09d2 From 1a0df59444972105f0d4c2b0c16ce414d70c420a Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Apr 2013 15:27:34 -0700 Subject: kernel/compat.c: make do_sysinfo() static The only use outside of kernel/timer.c was in kernel/compat.c, so move compat_sys_sysinfo() next to sys_sysinfo() in kernel/timer.c. Signed-off-by: Stephen Rothwell Cc: Thomas Gleixner Cc: Guenter Roeck Cc: Al Viro Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 -- kernel/compat.c | 65 ---------------------------------------------- kernel/timer.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2dac79c3919..6d1844f393c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -798,6 +798,4 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD #endif -extern int do_sysinfo(struct sysinfo *info); - #endif diff --git a/kernel/compat.c b/kernel/compat.c index 19971d8c729..1e8f1455117 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1138,71 +1138,6 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, } #endif -struct compat_sysinfo { - s32 uptime; - u32 loads[3]; - u32 totalram; - u32 freeram; - u32 sharedram; - u32 bufferram; - u32 totalswap; - u32 freeswap; - u16 procs; - u16 pad; - u32 totalhigh; - u32 freehigh; - u32 mem_unit; - char _f[20-2*sizeof(u32)-sizeof(int)]; -}; - -asmlinkage long -compat_sys_sysinfo(struct compat_sysinfo __user *info) -{ - struct sysinfo s; - - do_sysinfo(&s); - - /* Check to see if any memory value is too large for 32-bit and scale - * down if needed - */ - if ((s.totalram >> 32) || (s.totalswap >> 32)) { - int bitcount = 0; - - while (s.mem_unit < PAGE_SIZE) { - s.mem_unit <<= 1; - bitcount++; - } - - s.totalram >>= bitcount; - s.freeram >>= bitcount; - s.sharedram >>= bitcount; - s.bufferram >>= bitcount; - s.totalswap >>= bitcount; - s.freeswap >>= bitcount; - s.totalhigh >>= bitcount; - s.freehigh >>= bitcount; - } - - if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || - __put_user (s.uptime, &info->uptime) || - __put_user (s.loads[0], &info->loads[0]) || - __put_user (s.loads[1], &info->loads[1]) || - __put_user (s.loads[2], &info->loads[2]) || - __put_user (s.totalram, &info->totalram) || - __put_user (s.freeram, &info->freeram) || - __put_user (s.sharedram, &info->sharedram) || - __put_user (s.bufferram, &info->bufferram) || - __put_user (s.totalswap, &info->totalswap) || - __put_user (s.freeswap, &info->freeswap) || - __put_user (s.procs, &info->procs) || - __put_user (s.totalhigh, &info->totalhigh) || - __put_user (s.freehigh, &info->freehigh) || - __put_user (s.mem_unit, &info->mem_unit)) - return -EFAULT; - - return 0; -} - COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, compat_pid_t, pid, struct compat_timespec __user *, interval) diff --git a/kernel/timer.c b/kernel/timer.c index dbf7a78a1ef..06b3245624e 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -1567,7 +1568,7 @@ SYSCALL_DEFINE0(gettid) * do_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill */ -int do_sysinfo(struct sysinfo *info) +static int do_sysinfo(struct sysinfo *info) { unsigned long mem_total, sav_total; unsigned int mem_unit, bitcount; @@ -1642,6 +1643,73 @@ SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) return 0; } +#ifdef CONFIG_COMPAT +struct compat_sysinfo { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + u16 procs; + u16 pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(u32)-sizeof(int)]; +}; + +asmlinkage long +compat_sys_sysinfo(struct compat_sysinfo __user *info) +{ + struct sysinfo s; + + do_sysinfo(&s); + + /* Check to see if any memory value is too large for 32-bit and scale + * down if needed + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + int bitcount = 0; + + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || + __put_user (s.uptime, &info->uptime) || + __put_user (s.loads[0], &info->loads[0]) || + __put_user (s.loads[1], &info->loads[1]) || + __put_user (s.loads[2], &info->loads[2]) || + __put_user (s.totalram, &info->totalram) || + __put_user (s.freeram, &info->freeram) || + __put_user (s.sharedram, &info->sharedram) || + __put_user (s.bufferram, &info->bufferram) || + __put_user (s.totalswap, &info->totalswap) || + __put_user (s.freeswap, &info->freeswap) || + __put_user (s.procs, &info->procs) || + __put_user (s.totalhigh, &info->totalhigh) || + __put_user (s.freehigh, &info->freehigh) || + __put_user (s.mem_unit, &info->mem_unit)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_COMPAT */ + static int __cpuinit init_timers_cpu(int cpu) { int j; -- cgit v1.2.3-70-g09d2 From 938e4b22e2a7d0f6f3962e601339347b2d8e09f5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 30 Apr 2013 15:28:02 -0700 Subject: usermodehelper: export call_usermodehelper_exec() and call_usermodehelper_setup() call_usermodehelper_setup() + call_usermodehelper_exec() need to be called instead of call_usermodehelper_fns() when the cleanup function needs to be called even when an ENOMEM error occurs. In this case using call_usermodehelper_fns() the user can't distinguish if the cleanup function was called or not. [akpm@linux-foundation.org: export call_usermodehelper_setup() to modules] Signed-off-by: Lucas De Marchi Reviewed-by: Oleg Nesterov Cc: David Howells Cc: James Morris Cc: Al Viro Cc: Tejun Heo Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 8 ++++++++ kernel/kmod.c | 57 ++++++++++++++++++++++------------------------------ 2 files changed, 32 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 5398d580707..7eebcf5d75f 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -71,6 +71,14 @@ call_usermodehelper_fns(char *path, char **argv, char **envp, int wait, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data); +extern struct subprocess_info * +call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *), void *data); + +extern int +call_usermodehelper_exec(struct subprocess_info *info, int wait); + static inline int call_usermodehelper(char *path, char **argv, char **envp, int wait) { diff --git a/kernel/kmod.c b/kernel/kmod.c index 56dd34976d7..e11ea14ac01 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -502,14 +502,28 @@ static void helper_unlock(void) * @argv: arg vector for process * @envp: environment for process * @gfp_mask: gfp mask for memory allocation + * @cleanup: a cleanup function + * @init: an init function + * @data: arbitrary context sensitive data * * Returns either %NULL on allocation failure, or a subprocess_info * structure. This should be passed to call_usermodehelper_exec to * exec the process and free the structure. + * + * The init function is used to customize the helper process prior to + * exec. A non-zero return code causes the process to error out, exit, + * and return the failure to the calling process + * + * The cleanup function is just before ethe subprocess_info is about to + * be freed. This can be used for freeing the argv and envp. The + * Function must be runnable in either a process context or the + * context in which call_usermodehelper_exec is called. */ -static struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, - char **envp, gfp_t gfp_mask) + char **envp, gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *info), + void *data) { struct subprocess_info *sub_info; sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); @@ -520,36 +534,14 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, sub_info->path = path; sub_info->argv = argv; sub_info->envp = envp; + + sub_info->cleanup = cleanup; + sub_info->init = init; + sub_info->data = data; out: return sub_info; } - -/** - * call_usermodehelper_setfns - set a cleanup/init function - * @info: a subprocess_info returned by call_usermodehelper_setup - * @cleanup: a cleanup function - * @init: an init function - * @data: arbitrary context sensitive data - * - * The init function is used to customize the helper process prior to - * exec. A non-zero return code causes the process to error out, exit, - * and return the failure to the calling process - * - * The cleanup function is just before ethe subprocess_info is about to - * be freed. This can be used for freeing the argv and envp. The - * Function must be runnable in either a process context or the - * context in which call_usermodehelper_exec is called. - */ -static -void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *info), - void *data) -{ - info->cleanup = cleanup; - info->init = init; - info->data = data; -} +EXPORT_SYMBOL(call_usermodehelper_setup); /** * call_usermodehelper_exec - start a usermode application @@ -563,7 +555,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info, * asynchronously if wait is not set, and runs as a child of keventd. * (ie. it runs with full root capabilities). */ -static int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) { DECLARE_COMPLETION_ONSTACK(done); @@ -615,6 +606,7 @@ unlock: helper_unlock(); return retval; } +EXPORT_SYMBOL(call_usermodehelper_exec); /* * call_usermodehelper_fns() will not run the caller-provided cleanup function @@ -630,13 +622,12 @@ int call_usermodehelper_fns( struct subprocess_info *info; gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; - info = call_usermodehelper_setup(path, argv, envp, gfp_mask); + info = call_usermodehelper_setup(path, argv, envp, gfp_mask, + init, cleanup, data); if (info == NULL) return -ENOMEM; - call_usermodehelper_setfns(info, init, cleanup, data); - return call_usermodehelper_exec(info, wait); } EXPORT_SYMBOL(call_usermodehelper_fns); -- cgit v1.2.3-70-g09d2 From 66e5b7e1948cdbdca2b0cc6ddc6d69ee84583fb4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 30 Apr 2013 15:28:09 -0700 Subject: kmod: remove call_usermodehelper_fns() This function suffers from not being able to determine if the cleanup is called in case it returns -ENOMEM. Nobody is using it anymore, so let's remove it. Signed-off-by: Lucas De Marchi Cc: Oleg Nesterov Cc: David Howells Cc: James Morris Cc: Al Viro Cc: Tejun Heo Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 11 +---------- kernel/kmod.c | 31 +++++++++++++++++-------------- 2 files changed, 18 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 7eebcf5d75f..0555cc66a15 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -67,9 +67,7 @@ struct subprocess_info { }; extern int -call_usermodehelper_fns(char *path, char **argv, char **envp, int wait, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data); +call_usermodehelper(char *path, char **argv, char **envp, int wait); extern struct subprocess_info * call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask, @@ -79,13 +77,6 @@ call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask, extern int call_usermodehelper_exec(struct subprocess_info *info, int wait); -static inline int -call_usermodehelper(char *path, char **argv, char **envp, int wait) -{ - return call_usermodehelper_fns(path, argv, envp, wait, - NULL, NULL, NULL); -} - extern struct ctl_table usermodehelper_table[]; enum umh_disable_depth { diff --git a/kernel/kmod.c b/kernel/kmod.c index 9941a4f155d..1296e72e416 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -555,8 +555,8 @@ EXPORT_SYMBOL(call_usermodehelper_setup); * call_usermodehelper_exec - start a usermode application * @sub_info: information about the subprocessa * @wait: wait for the application to finish and return status. - * when -1 don't wait at all, but you get no useful error back when - * the program couldn't be exec'ed. This makes it safe to call + * when UMH_NO_WAIT don't wait at all, but you get no useful error back + * when the program couldn't be exec'ed. This makes it safe to call * from interrupt context. * * Runs a user-space application. The application is started @@ -616,29 +616,32 @@ unlock: } EXPORT_SYMBOL(call_usermodehelper_exec); -/* - * call_usermodehelper_fns() will not run the caller-provided cleanup function - * if a memory allocation failure is experienced. So the caller might need to - * check the call_usermodehelper_fns() return value: if it is -ENOMEM, perform - * the necessaary cleanup within the caller. +/** + * call_usermodehelper() - prepare and start a usermode application + * @path: path to usermode executable + * @argv: arg vector for process + * @envp: environment for process + * @wait: wait for the application to finish and return status. + * when UMH_NO_WAIT don't wait at all, but you get no useful error back + * when the program couldn't be exec'ed. This makes it safe to call + * from interrupt context. + * + * This function is the equivalent to use call_usermodehelper_setup() and + * call_usermodehelper_exec(). */ -int call_usermodehelper_fns( - char *path, char **argv, char **envp, int wait, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data) +int call_usermodehelper(char *path, char **argv, char **envp, int wait) { struct subprocess_info *info; gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; info = call_usermodehelper_setup(path, argv, envp, gfp_mask, - init, cleanup, data); - + NULL, NULL, NULL); if (info == NULL) return -ENOMEM; return call_usermodehelper_exec(info, wait); } -EXPORT_SYMBOL(call_usermodehelper_fns); +EXPORT_SYMBOL(call_usermodehelper); static int proc_cap_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) -- cgit v1.2.3-70-g09d2 From 403bad72b67d8b3f5a0240af5023adfa48132a65 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Apr 2013 15:28:10 -0700 Subject: coredump: only SIGKILL should interrupt the coredumping task There are 2 well known and ancient problems with coredump/signals, and a lot of related bug reports: - do_coredump() clears TIF_SIGPENDING but of course this can't help if, say, SIGCHLD comes after that. In this case the coredump can fail unexpectedly. See for example wait_for_dump_helper()->signal_pending() check but there are other reasons. - At the same time, dumping a huge core on the slow media can take a lot of time/resources and there is no way to kill the coredumping task reliably. In particular this is not oom_kill-friendly. This patch tries to fix the 1st problem, and makes the preparation for the next changes. We add the new SIGNAL_GROUP_COREDUMP flag set by zap_threads() to indicate that this process dumps the core. prepare_signal() checks this flag and nacks any signal except SIGKILL. Note that this check tries to be conservative, in the long term we should probably treat the SIGNAL_GROUP_EXIT case equally but this needs more discussion. See marc.info/?l=linux-kernel&m=120508897917439 Notes: - recalc_sigpending() doesn't check SIGNAL_GROUP_COREDUMP. The patch assumes that dump_write/etc paths should never call it, but we can change it as well. - There is another source of TIF_SIGPENDING, freezer. This will be addressed separately. Signed-off-by: Oleg Nesterov Tested-by: Mandeep Singh Baines Cc: Ingo Molnar Cc: Neil Horman Cc: "Rafael J. Wysocki" Cc: Roland McGrath Cc: Tejun Heo Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coredump.c | 13 +++++-------- include/linux/sched.h | 1 + kernel/signal.c | 6 ++++-- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/coredump.c b/fs/coredump.c index d52f6bd5ad8..f91cfd8cd5f 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -280,8 +280,8 @@ static int zap_process(struct task_struct *start, int exit_code) return nr; } -static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, - struct core_state *core_state, int exit_code) +static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, + struct core_state *core_state, int exit_code) { struct task_struct *g, *p; unsigned long flags; @@ -291,6 +291,9 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (!signal_group_exit(tsk->signal)) { mm->core_state = core_state; nr = zap_process(tsk, exit_code); + /* ignore all signals except SIGKILL, see prepare_signal() */ + tsk->signal->flags |= SIGNAL_GROUP_COREDUMP; + clear_tsk_thread_flag(tsk, TIF_SIGPENDING); } spin_unlock_irq(&tsk->sighand->siglock); if (unlikely(nr < 0)) @@ -514,12 +517,6 @@ void do_coredump(siginfo_t *siginfo) old_cred = override_creds(cred); - /* - * Clear any false indication of pending signals that might - * be seen by the filesystem code called to write the core file. - */ - clear_thread_flag(TIF_SIGPENDING); - ispipe = format_corename(&cn, &cprm); if (ispipe) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 54ddcb82cdd..a22baf83c20 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -626,6 +626,7 @@ struct signal_struct { #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ +#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ /* * Pending notifications to parent. */ diff --git a/kernel/signal.c b/kernel/signal.c index 27ece019bd0..cede58910f9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -854,12 +854,14 @@ static void ptrace_trap_notify(struct task_struct *t) * Returns true if the signal should be actually delivered, otherwise * it should be dropped. */ -static int prepare_signal(int sig, struct task_struct *p, bool force) +static bool prepare_signal(int sig, struct task_struct *p, bool force) { struct signal_struct *signal = p->signal; struct task_struct *t; - if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) { + if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) { + if (signal->flags & SIGNAL_GROUP_COREDUMP) + return sig == SIGKILL; /* * The process is in the middle of dying, nothing to do. */ -- cgit v1.2.3-70-g09d2 From e56fb2874015370e3b7f8d85051f6dce26051df9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Apr 2013 15:28:20 -0700 Subject: exec: do not abuse ->cred_guard_mutex in threadgroup_lock() threadgroup_lock() takes signal->cred_guard_mutex to ensure that thread_group_leader() is stable. This doesn't look nice, the scope of this lock in do_execve() is huge. And as Dave pointed out this can lead to deadlock, we have the following dependencies: do_execve: cred_guard_mutex -> i_mutex cgroup_mount: i_mutex -> cgroup_mutex attach_task_by_pid: cgroup_mutex -> cred_guard_mutex Change de_thread() to take threadgroup_change_begin() around the switch-the-leader code and change threadgroup_lock() to avoid ->cred_guard_mutex. Note that de_thread() can't sleep with ->group_rwsem held, this can obviously deadlock with the exiting leader if the writer is active, so it does threadgroup_change_end() before schedule(). Reported-by: Dave Jones Acked-by: Tejun Heo Acked-by: Li Zefan Signed-off-by: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 3 +++ include/linux/sched.h | 18 ++++-------------- 2 files changed, 7 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 260f89f6665..963f510a25a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -898,11 +898,13 @@ static int de_thread(struct task_struct *tsk) sig->notify_count = -1; /* for exit_notify() */ for (;;) { + threadgroup_change_begin(tsk); write_lock_irq(&tasklist_lock); if (likely(leader->exit_state)) break; __set_current_state(TASK_KILLABLE); write_unlock_irq(&tasklist_lock); + threadgroup_change_end(tsk); schedule(); if (unlikely(__fatal_signal_pending(tsk))) goto killed; @@ -960,6 +962,7 @@ static int de_thread(struct task_struct *tsk) if (unlikely(leader->ptrace)) __wake_up_parent(leader, leader->parent); write_unlock_irq(&tasklist_lock); + threadgroup_change_end(tsk); release_task(leader); } diff --git a/include/linux/sched.h b/include/linux/sched.h index a22baf83c20..6f950048b6e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2249,27 +2249,18 @@ static inline void threadgroup_change_end(struct task_struct *tsk) * * Lock the threadgroup @tsk belongs to. No new task is allowed to enter * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or - * perform exec. This is useful for cases where the threadgroup needs to - * stay stable across blockable operations. + * change ->group_leader/pid. This is useful for cases where the threadgroup + * needs to stay stable across blockable operations. * * fork and exit paths explicitly call threadgroup_change_{begin|end}() for * synchronization. While held, no new task will be added to threadgroup * and no existing live task will have its PF_EXITING set. * - * During exec, a task goes and puts its thread group through unusual - * changes. After de-threading, exclusive access is assumed to resources - * which are usually shared by tasks in the same group - e.g. sighand may - * be replaced with a new one. Also, the exec'ing task takes over group - * leader role including its pid. Exclude these changes while locked by - * grabbing cred_guard_mutex which is used to synchronize exec path. + * de_thread() does threadgroup_change_{begin|end}() when a non-leader + * sub-thread becomes a new leader. */ static inline void threadgroup_lock(struct task_struct *tsk) { - /* - * exec uses exit for de-threading nesting group_rwsem inside - * cred_guard_mutex. Grab cred_guard_mutex first. - */ - mutex_lock(&tsk->signal->cred_guard_mutex); down_write(&tsk->signal->group_rwsem); } @@ -2282,7 +2273,6 @@ static inline void threadgroup_lock(struct task_struct *tsk) static inline void threadgroup_unlock(struct task_struct *tsk) { up_write(&tsk->signal->group_rwsem); - mutex_unlock(&tsk->signal->cred_guard_mutex); } #else static inline void threadgroup_change_begin(struct task_struct *tsk) {} -- cgit v1.2.3-70-g09d2 From 5cc5445164c16d32bab2912fac28356ab07aa8b4 Mon Sep 17 00:00:00 2001 From: "Raphael S.Carvalho" Date: Tue, 30 Apr 2013 15:28:27 -0700 Subject: pid_namespace.c/.h: simplify defines Move BITS_PER_PAGE from pid_namespace.c to pid_namespace.h, since we can simplify the define PID_MAP_ENTRIES by using the BITS_PER_PAGE. [akpm@linux-foundation.org: kernel/pid.c:54:1: warning: "BITS_PER_PAGE" redefined] Signed-off-by: Raphael S.Carvalho Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid_namespace.h | 4 +++- kernel/pid.c | 3 --- kernel/pid_namespace.c | 2 -- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 215e5e3dda1..8ac32836690 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -13,7 +13,9 @@ struct pidmap { void *page; }; -#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) +#define BITS_PER_PAGE (PAGE_SIZE * 8) +#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) +#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) struct bsd_acct_struct; diff --git a/kernel/pid.c b/kernel/pid.c index 8147bdf22f3..6283d6412af 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -51,9 +51,6 @@ int pid_max = PID_MAX_DEFAULT; int pid_max_min = RESERVED_PIDS + 1; int pid_max_max = PID_MAX_LIMIT; -#define BITS_PER_PAGE (PAGE_SIZE*8) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) - static inline int mk_pid(struct pid_namespace *pid_ns, struct pidmap *map, int off) { diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index bea15bdf82b..69473c4a653 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -19,8 +19,6 @@ #include #include -#define BITS_PER_PAGE (PAGE_SIZE*8) - struct pid_cache { int nr_ids; char name[16]; -- cgit v1.2.3-70-g09d2 From 536b39ecf1b52ab71c2c126db0137611b9e1a4d4 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Tue, 30 Apr 2013 15:28:40 -0700 Subject: kernel/relay.c: move FIX_SIZE macro into relay.c It's better to place FIX_SIZE macro in relay.c, instead of relay.h Signed-off-by: zhangwei(Jovi) Cc: Jens Axboe Cc: Al Viro Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relay.h | 3 --- kernel/relay.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/relay.h b/include/linux/relay.h index 91cacc34c15..d7c8359693c 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -20,9 +20,6 @@ #include #include -/* Needs a _much_ better name... */ -#define FIX_SIZE(x) ((((x) - 1) & PAGE_MASK) + PAGE_SIZE) - /* * Tracks changes to rchan/rchan_buf structs */ diff --git a/kernel/relay.c b/kernel/relay.c index d21006c4a3c..4c2959b6c34 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -550,6 +550,9 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, return NOTIFY_OK; } +/* Needs a _much_ better name... */ +#define FIX_SIZE(x) ((((x) - 1) & PAGE_MASK) + PAGE_SIZE) + /** * relay_open - create a new relay channel * @base_filename: base name of files to create, %NULL for buffering only -- cgit v1.2.3-70-g09d2