From c94aa5ca3088018d2a7a9bd3258aefffe29df265 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: lockdep: Print the shortest dependency chain if finding a circle Currently lockdep will print the 1st circle detected if it exists when acquiring a new (next) lock. This patch prints the shortest path from the next lock to be acquired to the previous held lock if a circle is found. The patch still uses the current method to check circle, and once the circle is found, breadth-first search algorithem is used to compute the shortest path from the next lock to the previous lock in the forward lock dependency graph. Printing the shortest path will shorten the dependency chain, and make troubleshooting for possible circular locking easier. Signed-off-by: Ming Lei Signed-off-by: Peter Zijlstra LKML-Reference: <1246201486-7308-2-git-send-email-tom.leiming@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b25d1b53df0..9ec026f8d09 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -149,6 +149,12 @@ struct lock_list { struct lock_class *class; struct stack_trace trace; int distance; + + /*The parent field is used to implement breadth-first search,and + *the bit 0 is reused to indicate if the lock has been accessed + *in BFS. + */ + struct lock_list *parent; }; /* -- cgit v1.2.3-70-g09d2 From af012961450949ea297b209e091bd1a3805b8a0a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: lockdep: BFS cleanup Some cleanups of the lockdep code after the BFS series: - Remove the last traces of the generation id - Fixup comment style - Move the bfs routines into lockdep.c - Cleanup the bfs routines [ tom.leiming@gmail.com: Fix crash ] Signed-off-by: Peter Zijlstra LKML-Reference: <1246201486-7308-11-git-send-email-tom.leiming@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 7 +- kernel/lockdep.c | 284 +++++++++++++++++++++++++++------------------ kernel/lockdep_internals.h | 97 +--------------- 3 files changed, 175 insertions(+), 213 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 9ec026f8d09..12aabfcb45f 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -58,7 +58,6 @@ struct lock_class { struct lockdep_subclass_key *key; unsigned int subclass; - unsigned int dep_gen_id; /* * IRQ/softirq usage tracking bits: @@ -150,9 +149,9 @@ struct lock_list { struct stack_trace trace; int distance; - /*The parent field is used to implement breadth-first search,and - *the bit 0 is reused to indicate if the lock has been accessed - *in BFS. + /* + * The parent field is used to implement breadth-first search, and the + * bit 0 is reused to indicate if the lock has been accessed in BFS. */ struct lock_list *parent; }; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 744da6265d9..1cedb00e3e7 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -43,6 +43,7 @@ #include #include #include + #include #include "lockdep_internals.h" @@ -118,7 +119,7 @@ static inline int debug_locks_off_graph_unlock(void) static int lockdep_initialized; unsigned long nr_list_entries; -struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; +static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; /* * All data structures here are protected by the global debug_lock. @@ -390,19 +391,6 @@ unsigned int nr_process_chains; unsigned int max_lockdep_depth; unsigned int max_recursion_depth; -static unsigned int lockdep_dependency_gen_id; - -static bool lockdep_dependency_visit(struct lock_class *source, - unsigned int depth) -{ - if (!depth) - lockdep_dependency_gen_id++; - if (source->dep_gen_id == lockdep_dependency_gen_id) - return true; - source->dep_gen_id = lockdep_dependency_gen_id; - return false; -} - #ifdef CONFIG_DEBUG_LOCKDEP /* * We cannot printk in early bootup code. Not even early_printk() @@ -551,88 +539,6 @@ static void lockdep_print_held_locks(struct task_struct *curr) } } -static void print_lock_class_header(struct lock_class *class, int depth) -{ - int bit; - - printk("%*s->", depth, ""); - print_lock_name(class); - printk(" ops: %lu", class->ops); - printk(" {\n"); - - for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { - if (class->usage_mask & (1 << bit)) { - int len = depth; - - len += printk("%*s %s", depth, "", usage_str[bit]); - len += printk(" at:\n"); - print_stack_trace(class->usage_traces + bit, len); - } - } - printk("%*s }\n", depth, ""); - - printk("%*s ... key at: ",depth,""); - print_ip_sym((unsigned long)class->key); -} - -/* - * printk the shortest lock dependencies from @start to @end in reverse order: - */ -static void __used -print_shortest_lock_dependencies(struct lock_list *leaf, - struct lock_list *root) -{ - struct lock_list *entry = leaf; - int depth; - - /*compute depth from generated tree by BFS*/ - depth = get_lock_depth(leaf); - - do { - print_lock_class_header(entry->class, depth); - printk("%*s ... acquired at:\n", depth, ""); - print_stack_trace(&entry->trace, 2); - printk("\n"); - - if (depth == 0 && (entry != root)) { - printk("lockdep:%s bad BFS generated tree\n", __func__); - break; - } - - entry = get_lock_parent(entry); - depth--; - } while (entry && (depth >= 0)); - - return; -} -/* - * printk all lock dependencies starting at : - */ -static void __used -print_lock_dependencies(struct lock_class *class, int depth) -{ - struct lock_list *entry; - - if (lockdep_dependency_visit(class, depth)) - return; - - if (DEBUG_LOCKS_WARN_ON(depth >= 20)) - return; - - print_lock_class_header(class, depth); - - list_for_each_entry(entry, &class->locks_after, entry) { - if (DEBUG_LOCKS_WARN_ON(!entry->class)) - return; - - print_lock_dependencies(entry->class, depth + 1); - - printk("%*s ... acquired at:\n",depth,""); - print_stack_trace(&entry->trace, 2); - printk("\n"); - } -} - static void print_kernel_version(void) { printk("%s %.*s\n", init_utsname()->release, @@ -927,14 +833,106 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, return 1; } -unsigned long bfs_accessed[BITS_TO_LONGS(MAX_LOCKDEP_ENTRIES)]; -static struct circular_queue lock_cq; +/*For good efficiency of modular, we use power of 2*/ +#define MAX_CIRCULAR_QUEUE_SIZE 4096UL +#define CQ_MASK (MAX_CIRCULAR_QUEUE_SIZE-1) + +/* The circular_queue and helpers is used to implement the + * breadth-first search(BFS)algorithem, by which we can build + * the shortest path from the next lock to be acquired to the + * previous held lock if there is a circular between them. + * */ +struct circular_queue { + unsigned long element[MAX_CIRCULAR_QUEUE_SIZE]; + unsigned int front, rear; +}; + +static struct circular_queue lock_cq; +static unsigned long bfs_accessed[BITS_TO_LONGS(MAX_LOCKDEP_ENTRIES)]; + unsigned int max_bfs_queue_depth; + +static inline void __cq_init(struct circular_queue *cq) +{ + cq->front = cq->rear = 0; + bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES); +} + +static inline int __cq_empty(struct circular_queue *cq) +{ + return (cq->front == cq->rear); +} + +static inline int __cq_full(struct circular_queue *cq) +{ + return ((cq->rear + 1) & CQ_MASK) == cq->front; +} + +static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem) +{ + if (__cq_full(cq)) + return -1; + + cq->element[cq->rear] = elem; + cq->rear = (cq->rear + 1) & CQ_MASK; + return 0; +} + +static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem) +{ + if (__cq_empty(cq)) + return -1; + + *elem = cq->element[cq->front]; + cq->front = (cq->front + 1) & CQ_MASK; + return 0; +} + +static inline unsigned int __cq_get_elem_count(struct circular_queue *cq) +{ + return (cq->rear - cq->front) & CQ_MASK; +} + +static inline void mark_lock_accessed(struct lock_list *lock, + struct lock_list *parent) +{ + unsigned long nr; + nr = lock - list_entries; + WARN_ON(nr >= nr_list_entries); + lock->parent = parent; + set_bit(nr, bfs_accessed); +} + +static inline unsigned long lock_accessed(struct lock_list *lock) +{ + unsigned long nr; + nr = lock - list_entries; + WARN_ON(nr >= nr_list_entries); + return test_bit(nr, bfs_accessed); +} + +static inline struct lock_list *get_lock_parent(struct lock_list *child) +{ + return child->parent; +} + +static inline int get_lock_depth(struct lock_list *child) +{ + int depth = 0; + struct lock_list *parent; + + while ((parent = get_lock_parent(child))) { + child = parent; + depth++; + } + return depth; +} + static int __bfs(struct lock_list *source_entry, - void *data, - int (*match)(struct lock_list *entry, void *data), - struct lock_list **target_entry, - int forward) + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry, + int forward) { struct lock_list *entry; struct list_head *head; @@ -1202,14 +1200,6 @@ check_noncircular(struct lock_list *root, struct lock_class *target, * without creating any illegal irq-safe -> irq-unsafe lock dependency. */ - -#define BFS_PROCESS_RET(ret) do { \ - if (ret < 0) \ - return print_bfs_bug(ret); \ - if (ret == 1) \ - return 1; \ - } while (0) - static inline int usage_match(struct lock_list *entry, void *bit) { return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit); @@ -1263,6 +1253,60 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, return result; } +static void print_lock_class_header(struct lock_class *class, int depth) +{ + int bit; + + printk("%*s->", depth, ""); + print_lock_name(class); + printk(" ops: %lu", class->ops); + printk(" {\n"); + + for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { + if (class->usage_mask & (1 << bit)) { + int len = depth; + + len += printk("%*s %s", depth, "", usage_str[bit]); + len += printk(" at:\n"); + print_stack_trace(class->usage_traces + bit, len); + } + } + printk("%*s }\n", depth, ""); + + printk("%*s ... key at: ",depth,""); + print_ip_sym((unsigned long)class->key); +} + +/* + * printk the shortest lock dependencies from @start to @end in reverse order: + */ +static void __used +print_shortest_lock_dependencies(struct lock_list *leaf, + struct lock_list *root) +{ + struct lock_list *entry = leaf; + int depth; + + /*compute depth from generated tree by BFS*/ + depth = get_lock_depth(leaf); + + do { + print_lock_class_header(entry->class, depth); + printk("%*s ... acquired at:\n", depth, ""); + print_stack_trace(&entry->trace, 2); + printk("\n"); + + if (depth == 0 && (entry != root)) { + printk("lockdep:%s bad BFS generated tree\n", __func__); + break; + } + + entry = get_lock_parent(entry); + depth--; + } while (entry && (depth >= 0)); + + return; +} static int print_bad_irq_dependency(struct task_struct *curr, @@ -1349,12 +1393,18 @@ check_usage(struct task_struct *curr, struct held_lock *prev, this.class = hlock_class(prev); ret = find_usage_backwards(&this, bit_backwards, &target_entry); - BFS_PROCESS_RET(ret); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; that.parent = NULL; that.class = hlock_class(next); ret = find_usage_forwards(&that, bit_forwards, &target_entry1); - BFS_PROCESS_RET(ret); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; return print_bad_irq_dependency(curr, &this, &that, target_entry, target_entry1, @@ -2038,7 +2088,10 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this, root.parent = NULL; root.class = hlock_class(this); ret = find_usage_forwards(&root, bit, &target_entry); - BFS_PROCESS_RET(ret); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; return print_irq_inversion_bug(curr, &root, target_entry, this, 1, irqclass); @@ -2059,7 +2112,10 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, root.parent = NULL; root.class = hlock_class(this); ret = find_usage_backwards(&root, bit, &target_entry); - BFS_PROCESS_RET(ret); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; return print_irq_inversion_bug(curr, &root, target_entry, this, 1, irqclass); diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index 6baa8807efd..a2ee95ad131 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h @@ -91,6 +91,8 @@ extern unsigned int nr_process_chains; extern unsigned int max_lockdep_depth; extern unsigned int max_recursion_depth; +extern unsigned int max_bfs_queue_depth; + #ifdef CONFIG_PROVE_LOCKING extern unsigned long lockdep_count_forward_deps(struct lock_class *); extern unsigned long lockdep_count_backward_deps(struct lock_class *); @@ -136,98 +138,3 @@ extern atomic_t nr_find_usage_backwards_recursions; # define debug_atomic_dec(ptr) do { } while (0) # define debug_atomic_read(ptr) 0 #endif - - -extern unsigned int max_bfs_queue_depth; -extern unsigned long nr_list_entries; -extern struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; -extern unsigned long bfs_accessed[]; - -/*For good efficiency of modular, we use power of 2*/ -#define MAX_CIRCULAR_QUE_SIZE 4096UL - -/* The circular_queue and helpers is used to implement the - * breadth-first search(BFS)algorithem, by which we can build - * the shortest path from the next lock to be acquired to the - * previous held lock if there is a circular between them. - * */ -struct circular_queue{ - unsigned long element[MAX_CIRCULAR_QUE_SIZE]; - unsigned int front, rear; -}; - -static inline void __cq_init(struct circular_queue *cq) -{ - cq->front = cq->rear = 0; - bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES); -} - -static inline int __cq_empty(struct circular_queue *cq) -{ - return (cq->front == cq->rear); -} - -static inline int __cq_full(struct circular_queue *cq) -{ - return ((cq->rear + 1)&(MAX_CIRCULAR_QUE_SIZE-1)) == cq->front; -} - -static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem) -{ - if (__cq_full(cq)) - return -1; - - cq->element[cq->rear] = elem; - cq->rear = (cq->rear + 1)&(MAX_CIRCULAR_QUE_SIZE-1); - return 0; -} - -static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem) -{ - if (__cq_empty(cq)) - return -1; - - *elem = cq->element[cq->front]; - cq->front = (cq->front + 1)&(MAX_CIRCULAR_QUE_SIZE-1); - return 0; -} - -static inline unsigned int __cq_get_elem_count(struct circular_queue *cq) -{ - return (cq->rear - cq->front)&(MAX_CIRCULAR_QUE_SIZE-1); -} - -static inline void mark_lock_accessed(struct lock_list *lock, - struct lock_list *parent) -{ - unsigned long nr; - nr = lock - list_entries; - WARN_ON(nr >= nr_list_entries); - lock->parent = parent; - set_bit(nr, bfs_accessed); -} - -static inline unsigned long lock_accessed(struct lock_list *lock) -{ - unsigned long nr; - nr = lock - list_entries; - WARN_ON(nr >= nr_list_entries); - return test_bit(nr, bfs_accessed); -} - -static inline struct lock_list *get_lock_parent(struct lock_list *child) -{ - return child->parent; -} - -static inline int get_lock_depth(struct lock_list *child) -{ - int depth = 0; - struct lock_list *parent; - - while ((parent = get_lock_parent(child))) { - child = parent; - depth++; - } - return depth; -} -- cgit v1.2.3-70-g09d2 From f607c6685774811b8112e124f10a053d77015485 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 20 Jul 2009 19:16:29 +0200 Subject: lockdep: Introduce lockdep_assert_held() Add a lockdep helper to validate that we indeed are the owner of a lock. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 8 ++++++++ kernel/lockdep.c | 33 +++++++++++++++++++++++++++++++++ kernel/sched.c | 2 ++ 3 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 12aabfcb45f..a6d5e5e4d08 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -296,6 +296,10 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, extern void lock_release(struct lockdep_map *lock, int nested, unsigned long ip); +#define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map) + +extern int lock_is_held(struct lockdep_map *lock); + extern void lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, unsigned long ip); @@ -314,6 +318,8 @@ extern void lockdep_trace_alloc(gfp_t mask); #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) +#define lockdep_assert_held(l) WARN_ON(debug_locks && !lockdep_is_held(l)) + #else /* !LOCKDEP */ static inline void lockdep_off(void) @@ -358,6 +364,8 @@ struct lock_class_key { }; #define lockdep_depth(tsk) (0) +#define lockdep_assert_held(l) do { } while (0) + #endif /* !LOCKDEP */ #ifdef CONFIG_LOCK_STAT diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 4b6cebe8ab3..28914a50991 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3059,6 +3059,19 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) check_chain_key(curr); } +static int __lock_is_held(struct lockdep_map *lock) +{ + struct task_struct *curr = current; + int i; + + for (i = 0; i < curr->lockdep_depth; i++) { + if (curr->held_locks[i].instance == lock) + return 1; + } + + return 0; +} + /* * Check whether we follow the irq-flags state precisely: */ @@ -3160,6 +3173,26 @@ void lock_release(struct lockdep_map *lock, int nested, } EXPORT_SYMBOL_GPL(lock_release); +int lock_is_held(struct lockdep_map *lock) +{ + unsigned long flags; + int ret = 0; + + if (unlikely(current->lockdep_recursion)) + return ret; + + raw_local_irq_save(flags); + check_flags(flags); + + current->lockdep_recursion = 1; + ret = __lock_is_held(lock); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(lock_is_held); + void lockdep_set_current_reclaim_state(gfp_t gfp_mask) { current->lockdep_reclaim_gfp = gfp_mask; diff --git a/kernel/sched.c b/kernel/sched.c index 1b59e265273..2c75f7daa43 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6609,6 +6609,8 @@ int cond_resched_lock(spinlock_t *lock) int resched = should_resched(); int ret = 0; + lockdep_assert_held(lock); + if (spin_needbreak(lock) || resched) { spin_unlock(lock); if (resched) -- cgit v1.2.3-70-g09d2 From bb97a91e2549a7f2df9c21d32542582f549ab3ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 20 Jul 2009 19:15:35 +0200 Subject: lockdep: Deal with many similar locks spin_lock_nest_lock() allows to take many instances of the same class, this can easily lead to overflow of MAX_LOCK_DEPTH. To avoid this overflow, we'll stop accounting instances but start reference counting the class in the held_lock structure. [ We could maintain a list of instances, if we'd move the hlock stuff into __lock_acquired(), but that would require significant modifications to the current code. ] We restrict this mode to spin_lock_nest_lock() only, because it degrades the lockdep quality due to lost of instance. For lockstat this means we don't track lock statistics for any but the first lock in the series. Currently nesting is limited to 11 bits because that was the spare space available in held_lock. This yields a 2048 instances maximium. Signed-off-by: Peter Zijlstra Cc: Marcelo Tosatti Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 4 ++- kernel/lockdep.c | 89 ++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 80 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index a6d5e5e4d08..47d42eff612 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -213,10 +213,12 @@ struct held_lock { * interrupt context: */ unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */ - unsigned int trylock:1; + unsigned int trylock:1; /* 16 bits */ + unsigned int read:2; /* see lock_acquire() comment */ unsigned int check:2; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; + unsigned int references:11; /* 32 bits */ }; /* diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 28914a50991..0bb246e21cd 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2708,13 +2708,15 @@ EXPORT_SYMBOL_GPL(lockdep_init_map); */ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, int hardirqs_off, - struct lockdep_map *nest_lock, unsigned long ip) + struct lockdep_map *nest_lock, unsigned long ip, + int references) { struct task_struct *curr = current; struct lock_class *class = NULL; struct held_lock *hlock; unsigned int depth, id; int chain_head = 0; + int class_idx; u64 chain_key; if (!prove_locking) @@ -2762,10 +2764,24 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) return 0; + class_idx = class - lock_classes + 1; + + if (depth) { + hlock = curr->held_locks + depth - 1; + if (hlock->class_idx == class_idx && nest_lock) { + if (hlock->references) + hlock->references++; + else + hlock->references = 2; + + return 1; + } + } + hlock = curr->held_locks + depth; if (DEBUG_LOCKS_WARN_ON(!class)) return 0; - hlock->class_idx = class - lock_classes + 1; + hlock->class_idx = class_idx; hlock->acquire_ip = ip; hlock->instance = lock; hlock->nest_lock = nest_lock; @@ -2773,6 +2789,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->read = read; hlock->check = check; hlock->hardirqs_off = !!hardirqs_off; + hlock->references = references; #ifdef CONFIG_LOCK_STAT hlock->waittime_stamp = 0; hlock->holdtime_stamp = sched_clock(); @@ -2881,6 +2898,30 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, return 1; } +static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) +{ + if (hlock->instance == lock) + return 1; + + if (hlock->references) { + struct lock_class *class = lock->class_cache; + + if (!class) + class = look_up_lock_class(lock, 0); + + if (DEBUG_LOCKS_WARN_ON(!class)) + return 0; + + if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) + return 0; + + if (hlock->class_idx == class - lock_classes + 1) + return 1; + } + + return 0; +} + static int __lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, @@ -2904,7 +2945,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name, */ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) break; - if (hlock->instance == lock) + if (match_held_lock(hlock, lock)) goto found_it; prev_hlock = hlock; } @@ -2923,7 +2964,8 @@ found_it: if (!__lock_acquire(hlock->instance, hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, - hlock->nest_lock, hlock->acquire_ip)) + hlock->nest_lock, hlock->acquire_ip, + hlock->references)) return 0; } @@ -2962,20 +3004,34 @@ lock_release_non_nested(struct task_struct *curr, */ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) break; - if (hlock->instance == lock) + if (match_held_lock(hlock, lock)) goto found_it; prev_hlock = hlock; } return print_unlock_inbalance_bug(curr, lock, ip); found_it: - lock_release_holdtime(hlock); + if (hlock->instance == lock) + lock_release_holdtime(hlock); + + if (hlock->references) { + hlock->references--; + if (hlock->references) { + /* + * We had, and after removing one, still have + * references, the current lock stack is still + * valid. We're done! + */ + return 1; + } + } /* * We have the right lock to unlock, 'hlock' points to it. * Now we remove it from the stack, and add back the other * entries (if any), recalculating the hash along the way: */ + curr->lockdep_depth = i; curr->curr_chain_key = hlock->prev_chain_key; @@ -2984,7 +3040,8 @@ found_it: if (!__lock_acquire(hlock->instance, hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, - hlock->nest_lock, hlock->acquire_ip)) + hlock->nest_lock, hlock->acquire_ip, + hlock->references)) return 0; } @@ -3014,7 +3071,7 @@ static int lock_release_nested(struct task_struct *curr, /* * Is the unlock non-nested: */ - if (hlock->instance != lock) + if (hlock->instance != lock || hlock->references) return lock_release_non_nested(curr, lock, ip); curr->lockdep_depth--; @@ -3065,7 +3122,9 @@ static int __lock_is_held(struct lockdep_map *lock) int i; for (i = 0; i < curr->lockdep_depth; i++) { - if (curr->held_locks[i].instance == lock) + struct held_lock *hlock = curr->held_locks + i; + + if (match_held_lock(hlock, lock)) return 1; } @@ -3148,7 +3207,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, current->lockdep_recursion = 1; __lock_acquire(lock, subclass, trylock, read, check, - irqs_disabled_flags(flags), nest_lock, ip); + irqs_disabled_flags(flags), nest_lock, ip, 0); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } @@ -3252,7 +3311,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) */ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) break; - if (hlock->instance == lock) + if (match_held_lock(hlock, lock)) goto found_it; prev_hlock = hlock; } @@ -3260,6 +3319,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) return; found_it: + if (hlock->instance != lock) + return; + hlock->waittime_stamp = sched_clock(); contention_point = lock_point(hlock_class(hlock)->contention_point, ip); @@ -3299,7 +3361,7 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) */ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) break; - if (hlock->instance == lock) + if (match_held_lock(hlock, lock)) goto found_it; prev_hlock = hlock; } @@ -3307,6 +3369,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) return; found_it: + if (hlock->instance != lock) + return; + cpu = smp_processor_id(); if (hlock->waittime_stamp) { now = sched_clock(); -- cgit v1.2.3-70-g09d2 From e351b660fddd4df76cc4635f896d311ed0ff3752 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Jul 2009 22:48:09 +0800 Subject: lockdep: Reintroduce generation count to make BFS faster We still can apply DaveM's generation count optimization to BFS, based on the following idea: - before doing each BFS, increase the global generation id by 1 - if one node in the graph has been visited, mark it as visited by storing the current global generation id into the node's dep_gen_id field - so we can decide if one node has been visited already, by comparing the node's dep_gen_id with the global generation id. By applying DaveM's generation count optimization to current implementation of BFS, we gain the following advantages: - we save MAX_LOCKDEP_ENTRIES/8 bytes memory; - we remove the bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES); in each BFS, which is very time-consuming since MAX_LOCKDEP_ENTRIES may be very large.(16384UL) Signed-off-by: Ming Lei Signed-off-by: Peter Zijlstra Cc: "David S. Miller" LKML-Reference: <1248274089-6358-1-git-send-email-tom.leiming@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 1 + kernel/lockdep.c | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 47d42eff612..9ccf0e286b2 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -58,6 +58,7 @@ struct lock_class { struct lockdep_subclass_key *key; unsigned int subclass; + unsigned int dep_gen_id; /* * IRQ/softirq usage tracking bits: diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 0bb246e21cd..2b443b5090a 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -861,14 +861,15 @@ struct circular_queue { }; static struct circular_queue lock_cq; -static unsigned long bfs_accessed[BITS_TO_LONGS(MAX_LOCKDEP_ENTRIES)]; unsigned int max_bfs_queue_depth; +static unsigned int lockdep_dependency_gen_id; + static inline void __cq_init(struct circular_queue *cq) { cq->front = cq->rear = 0; - bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES); + lockdep_dependency_gen_id++; } static inline int __cq_empty(struct circular_queue *cq) @@ -914,7 +915,7 @@ static inline void mark_lock_accessed(struct lock_list *lock, nr = lock - list_entries; WARN_ON(nr >= nr_list_entries); lock->parent = parent; - set_bit(nr, bfs_accessed); + lock->class->dep_gen_id = lockdep_dependency_gen_id; } static inline unsigned long lock_accessed(struct lock_list *lock) @@ -923,7 +924,7 @@ static inline unsigned long lock_accessed(struct lock_list *lock) nr = lock - list_entries; WARN_ON(nr >= nr_list_entries); - return test_bit(nr, bfs_accessed); + return lock->class->dep_gen_id == lockdep_dependency_gen_id; } static inline struct lock_list *get_lock_parent(struct lock_list *child) @@ -3604,7 +3605,7 @@ void __init lockdep_info(void) sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS + sizeof(struct list_head) * CHAINHASH_SIZE) / 1024 #ifdef CONFIG_PROVE_LOCKING - + sizeof(struct circular_queue) + sizeof(bfs_accessed) + + sizeof(struct circular_queue) #endif ); -- cgit v1.2.3-70-g09d2 From 69d0ee7377eef808e34ba5542b554ec97244b871 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 31 Aug 2009 14:43:36 +0200 Subject: locking: Move spinlock function bodies to header file Move spinlock function bodies to header file by creating a static inline version of each variant. Use the inline version on the out-of-line code. This shouldn't make any difference besides that the spinlock code can now be used to generate inlined spinlock code. Signed-off-by: Heiko Carstens Acked-by: Arnd Bergmann Acked-by: Peter Zijlstra Cc: Nick Piggin Cc: Martin Schwidefsky Cc: Horst Hartmann Cc: Christian Ehrhardt Cc: Andrew Morton Cc: Linus Torvalds Cc: David Miller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Geert Uytterhoeven Cc: Roman Zippel Cc: LKML-Reference: <20090831124417.859022429@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 18 +-- include/linux/spinlock_api_smp.h | 263 +++++++++++++++++++++++++++++++++++++++ kernel/spinlock.c | 174 +++++--------------------- 3 files changed, 300 insertions(+), 155 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 4be57ab0347..da76a06556b 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -143,15 +143,6 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } */ #define spin_unlock_wait(lock) __raw_spin_unlock_wait(&(lock)->raw_lock) -/* - * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: - */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -# include -#else -# include -#endif - #ifdef CONFIG_DEBUG_SPINLOCK extern void _raw_spin_lock(spinlock_t *lock); #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) @@ -380,4 +371,13 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); */ #define spin_can_lock(lock) (!spin_is_locked(lock)) +/* + * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: + */ +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +# include +#else +# include +#endif + #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index d79845d034b..6b108f5fb14 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,4 +60,267 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); +static inline int __spin_trylock(spinlock_t *lock) +{ + preempt_disable(); + if (_raw_spin_trylock(lock)) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable(); + return 0; +} + +static inline int __read_trylock(rwlock_t *lock) +{ + preempt_disable(); + if (_raw_read_trylock(lock)) { + rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable(); + return 0; +} + +static inline int __write_trylock(rwlock_t *lock) +{ + preempt_disable(); + if (_raw_write_trylock(lock)) { + rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable(); + return 0; +} + +/* + * If lockdep is enabled then we use the non-preemption spin-ops + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) + +static inline void __read_lock(rwlock_t *lock) +{ + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} + +static inline unsigned long __spin_lock_irqsave(spinlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + /* + * On lockdep we dont want the hand-coded irq-enable of + * _raw_spin_lock_flags() code, because lockdep assumes + * that interrupts are not re-enabled during lock-acquire: + */ +#ifdef CONFIG_LOCKDEP + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +#else + _raw_spin_lock_flags(lock, &flags); +#endif + return flags; +} + +static inline void __spin_lock_irq(spinlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} + +static inline void __spin_lock_bh(spinlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} + +static inline unsigned long __read_lock_irqsave(rwlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock, + _raw_read_lock_flags, &flags); + return flags; +} + +static inline void __read_lock_irq(rwlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} + +static inline void __read_lock_bh(rwlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} + +static inline unsigned long __write_lock_irqsave(rwlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock, + _raw_write_lock_flags, &flags); + return flags; +} + +static inline void __write_lock_irq(rwlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} + +static inline void __write_lock_bh(rwlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} + +static inline void __spin_lock(spinlock_t *lock) +{ + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} + +static inline void __write_lock(rwlock_t *lock) +{ + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} + +#endif /* CONFIG_PREEMPT */ + +static inline void __spin_unlock(spinlock_t *lock) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + preempt_enable(); +} + +static inline void __write_unlock(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + preempt_enable(); +} + +static inline void __read_unlock(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + preempt_enable(); +} + +static inline void __spin_unlock_irqrestore(spinlock_t *lock, + unsigned long flags) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} + +static inline void __spin_unlock_irq(spinlock_t *lock) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + local_irq_enable(); + preempt_enable(); +} + +static inline void __spin_unlock_bh(spinlock_t *lock) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} + +static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} + +static inline void __read_unlock_irq(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + local_irq_enable(); + preempt_enable(); +} + +static inline void __read_unlock_bh(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} + +static inline void __write_unlock_irqrestore(rwlock_t *lock, + unsigned long flags) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} + +static inline void __write_unlock_irq(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + local_irq_enable(); + preempt_enable(); +} + +static inline void __write_unlock_bh(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} + +static inline int __spin_trylock_bh(spinlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + if (_raw_spin_trylock(lock)) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); + return 0; +} + #endif /* __LINUX_SPINLOCK_API_SMP_H */ diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 7932653c4eb..2c000f5c070 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -23,40 +23,19 @@ int __lockfunc _spin_trylock(spinlock_t *lock) { - preempt_disable(); - if (_raw_spin_trylock(lock)) { - spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); - return 1; - } - - preempt_enable(); - return 0; + return __spin_trylock(lock); } EXPORT_SYMBOL(_spin_trylock); int __lockfunc _read_trylock(rwlock_t *lock) { - preempt_disable(); - if (_raw_read_trylock(lock)) { - rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_); - return 1; - } - - preempt_enable(); - return 0; + return __read_trylock(lock); } EXPORT_SYMBOL(_read_trylock); int __lockfunc _write_trylock(rwlock_t *lock) { - preempt_disable(); - if (_raw_write_trylock(lock)) { - rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_); - return 1; - } - - preempt_enable(); - return 0; + return __write_trylock(lock); } EXPORT_SYMBOL(_write_trylock); @@ -69,129 +48,74 @@ EXPORT_SYMBOL(_write_trylock); void __lockfunc _read_lock(rwlock_t *lock) { - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); + __read_lock(lock); } EXPORT_SYMBOL(_read_lock); unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) { - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - /* - * On lockdep we dont want the hand-coded irq-enable of - * _raw_spin_lock_flags() code, because lockdep assumes - * that interrupts are not re-enabled during lock-acquire: - */ -#ifdef CONFIG_LOCKDEP - LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); -#else - _raw_spin_lock_flags(lock, &flags); -#endif - return flags; + return __spin_lock_irqsave(lock); } EXPORT_SYMBOL(_spin_lock_irqsave); void __lockfunc _spin_lock_irq(spinlock_t *lock) { - local_irq_disable(); - preempt_disable(); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); + __spin_lock_irq(lock); } EXPORT_SYMBOL(_spin_lock_irq); void __lockfunc _spin_lock_bh(spinlock_t *lock) { - local_bh_disable(); - preempt_disable(); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); + __spin_lock_bh(lock); } EXPORT_SYMBOL(_spin_lock_bh); unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) { - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock, - _raw_read_lock_flags, &flags); - return flags; + return __read_lock_irqsave(lock); } EXPORT_SYMBOL(_read_lock_irqsave); void __lockfunc _read_lock_irq(rwlock_t *lock) { - local_irq_disable(); - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); + __read_lock_irq(lock); } EXPORT_SYMBOL(_read_lock_irq); void __lockfunc _read_lock_bh(rwlock_t *lock) { - local_bh_disable(); - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); + __read_lock_bh(lock); } EXPORT_SYMBOL(_read_lock_bh); unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) { - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock, - _raw_write_lock_flags, &flags); - return flags; + return __write_lock_irqsave(lock); } EXPORT_SYMBOL(_write_lock_irqsave); void __lockfunc _write_lock_irq(rwlock_t *lock) { - local_irq_disable(); - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); + __write_lock_irq(lock); } EXPORT_SYMBOL(_write_lock_irq); void __lockfunc _write_lock_bh(rwlock_t *lock) { - local_bh_disable(); - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); + __write_lock_bh(lock); } EXPORT_SYMBOL(_write_lock_bh); void __lockfunc _spin_lock(spinlock_t *lock) { - preempt_disable(); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); + __spin_lock(lock); } - EXPORT_SYMBOL(_spin_lock); void __lockfunc _write_lock(rwlock_t *lock) { - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); + __write_lock(lock); } - EXPORT_SYMBOL(_write_lock); #else /* CONFIG_PREEMPT: */ @@ -320,121 +244,79 @@ EXPORT_SYMBOL(_spin_lock_nest_lock); void __lockfunc _spin_unlock(spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); - _raw_spin_unlock(lock); - preempt_enable(); + __spin_unlock(lock); } EXPORT_SYMBOL(_spin_unlock); void __lockfunc _write_unlock(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - preempt_enable(); + __write_unlock(lock); } EXPORT_SYMBOL(_write_unlock); void __lockfunc _read_unlock(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - preempt_enable(); + __read_unlock(lock); } EXPORT_SYMBOL(_read_unlock); void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) { - spin_release(&lock->dep_map, 1, _RET_IP_); - _raw_spin_unlock(lock); - local_irq_restore(flags); - preempt_enable(); + __spin_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_spin_unlock_irqrestore); void __lockfunc _spin_unlock_irq(spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); - _raw_spin_unlock(lock); - local_irq_enable(); - preempt_enable(); + __spin_unlock_irq(lock); } EXPORT_SYMBOL(_spin_unlock_irq); void __lockfunc _spin_unlock_bh(spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); - _raw_spin_unlock(lock); - preempt_enable_no_resched(); - local_bh_enable_ip((unsigned long)__builtin_return_address(0)); + __spin_unlock_bh(lock); } EXPORT_SYMBOL(_spin_unlock_bh); void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - local_irq_restore(flags); - preempt_enable(); + __read_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_read_unlock_irqrestore); void __lockfunc _read_unlock_irq(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - local_irq_enable(); - preempt_enable(); + __read_unlock_irq(lock); } EXPORT_SYMBOL(_read_unlock_irq); void __lockfunc _read_unlock_bh(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - preempt_enable_no_resched(); - local_bh_enable_ip((unsigned long)__builtin_return_address(0)); + __read_unlock_bh(lock); } EXPORT_SYMBOL(_read_unlock_bh); void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - local_irq_restore(flags); - preempt_enable(); + __write_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_write_unlock_irqrestore); void __lockfunc _write_unlock_irq(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - local_irq_enable(); - preempt_enable(); + __write_unlock_irq(lock); } EXPORT_SYMBOL(_write_unlock_irq); void __lockfunc _write_unlock_bh(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - preempt_enable_no_resched(); - local_bh_enable_ip((unsigned long)__builtin_return_address(0)); + __write_unlock_bh(lock); } EXPORT_SYMBOL(_write_unlock_bh); int __lockfunc _spin_trylock_bh(spinlock_t *lock) { - local_bh_disable(); - preempt_disable(); - if (_raw_spin_trylock(lock)) { - spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); - return 1; - } - - preempt_enable_no_resched(); - local_bh_enable_ip((unsigned long)__builtin_return_address(0)); - return 0; + return __spin_trylock_bh(lock); } EXPORT_SYMBOL(_spin_trylock_bh); -- cgit v1.2.3-70-g09d2 From 892a7c67c12da63fa4b51728bbe5b982356a090a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 31 Aug 2009 14:43:37 +0200 Subject: locking: Allow arch-inlined spinlocks This allows an architecture to specify per lock variant if the locking code should be kept out-of-line or inlined. If an architecure wants out-of-line locking code no change is needed. To force inlining of e.g. spin_lock() the line: #define __always_inline__spin_lock needs to be added to arch/<...>/include/asm/spinlock.h If CONFIG_DEBUG_SPINLOCK or CONFIG_GENERIC_LOCKBREAK are defined the per architecture defines are (partly) ignored and still out-of-line spinlock code will be generated. Signed-off-by: Heiko Carstens Acked-by: Peter Zijlstra Cc: Arnd Bergmann Cc: Nick Piggin Cc: Martin Schwidefsky Cc: Horst Hartmann Cc: Christian Ehrhardt Cc: Andrew Morton Cc: Linus Torvalds Cc: David Miller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Geert Uytterhoeven Cc: Roman Zippel Cc: LKML-Reference: <20090831124418.375299024@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/spinlock_api_smp.h | 119 +++++++++++++++++++++++++++++++++++++++ kernel/spinlock.c | 56 ++++++++++++++++++ 2 files changed, 175 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 6b108f5fb14..1a411e3fab9 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,6 +60,125 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); +#ifndef CONFIG_DEBUG_SPINLOCK +#ifndef CONFIG_GENERIC_LOCKBREAK + +#ifdef __always_inline__spin_lock +#define _spin_lock(lock) __spin_lock(lock) +#endif + +#ifdef __always_inline__read_lock +#define _read_lock(lock) __read_lock(lock) +#endif + +#ifdef __always_inline__write_lock +#define _write_lock(lock) __write_lock(lock) +#endif + +#ifdef __always_inline__spin_lock_bh +#define _spin_lock_bh(lock) __spin_lock_bh(lock) +#endif + +#ifdef __always_inline__read_lock_bh +#define _read_lock_bh(lock) __read_lock_bh(lock) +#endif + +#ifdef __always_inline__write_lock_bh +#define _write_lock_bh(lock) __write_lock_bh(lock) +#endif + +#ifdef __always_inline__spin_lock_irq +#define _spin_lock_irq(lock) __spin_lock_irq(lock) +#endif + +#ifdef __always_inline__read_lock_irq +#define _read_lock_irq(lock) __read_lock_irq(lock) +#endif + +#ifdef __always_inline__write_lock_irq +#define _write_lock_irq(lock) __write_lock_irq(lock) +#endif + +#ifdef __always_inline__spin_lock_irqsave +#define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock) +#endif + +#ifdef __always_inline__read_lock_irqsave +#define _read_lock_irqsave(lock) __read_lock_irqsave(lock) +#endif + +#ifdef __always_inline__write_lock_irqsave +#define _write_lock_irqsave(lock) __write_lock_irqsave(lock) +#endif + +#endif /* !CONFIG_GENERIC_LOCKBREAK */ + +#ifdef __always_inline__spin_trylock +#define _spin_trylock(lock) __spin_trylock(lock) +#endif + +#ifdef __always_inline__read_trylock +#define _read_trylock(lock) __read_trylock(lock) +#endif + +#ifdef __always_inline__write_trylock +#define _write_trylock(lock) __write_trylock(lock) +#endif + +#ifdef __always_inline__spin_trylock_bh +#define _spin_trylock_bh(lock) __spin_trylock_bh(lock) +#endif + +#ifdef __always_inline__spin_unlock +#define _spin_unlock(lock) __spin_unlock(lock) +#endif + +#ifdef __always_inline__read_unlock +#define _read_unlock(lock) __read_unlock(lock) +#endif + +#ifdef __always_inline__write_unlock +#define _write_unlock(lock) __write_unlock(lock) +#endif + +#ifdef __always_inline__spin_unlock_bh +#define _spin_unlock_bh(lock) __spin_unlock_bh(lock) +#endif + +#ifdef __always_inline__read_unlock_bh +#define _read_unlock_bh(lock) __read_unlock_bh(lock) +#endif + +#ifdef __always_inline__write_unlock_bh +#define _write_unlock_bh(lock) __write_unlock_bh(lock) +#endif + +#ifdef __always_inline__spin_unlock_irq +#define _spin_unlock_irq(lock) __spin_unlock_irq(lock) +#endif + +#ifdef __always_inline__read_unlock_irq +#define _read_unlock_irq(lock) __read_unlock_irq(lock) +#endif + +#ifdef __always_inline__write_unlock_irq +#define _write_unlock_irq(lock) __write_unlock_irq(lock) +#endif + +#ifdef __always_inline__spin_unlock_irqrestore +#define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags) +#endif + +#ifdef __always_inline__read_unlock_irqrestore +#define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags) +#endif + +#ifdef __always_inline__write_unlock_irqrestore +#define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags) +#endif + +#endif /* CONFIG_DEBUG_SPINLOCK */ + static inline int __spin_trylock(spinlock_t *lock) { preempt_disable(); diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 2c000f5c070..5ddab730cb2 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -21,23 +21,29 @@ #include #include +#ifndef _spin_trylock int __lockfunc _spin_trylock(spinlock_t *lock) { return __spin_trylock(lock); } EXPORT_SYMBOL(_spin_trylock); +#endif +#ifndef _read_trylock int __lockfunc _read_trylock(rwlock_t *lock) { return __read_trylock(lock); } EXPORT_SYMBOL(_read_trylock); +#endif +#ifndef _write_trylock int __lockfunc _write_trylock(rwlock_t *lock) { return __write_trylock(lock); } EXPORT_SYMBOL(_write_trylock); +#endif /* * If lockdep is enabled then we use the non-preemption spin-ops @@ -46,77 +52,101 @@ EXPORT_SYMBOL(_write_trylock); */ #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) +#ifndef _read_lock void __lockfunc _read_lock(rwlock_t *lock) { __read_lock(lock); } EXPORT_SYMBOL(_read_lock); +#endif +#ifndef _spin_lock_irqsave unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) { return __spin_lock_irqsave(lock); } EXPORT_SYMBOL(_spin_lock_irqsave); +#endif +#ifndef _spin_lock_irq void __lockfunc _spin_lock_irq(spinlock_t *lock) { __spin_lock_irq(lock); } EXPORT_SYMBOL(_spin_lock_irq); +#endif +#ifndef _spin_lock_bh void __lockfunc _spin_lock_bh(spinlock_t *lock) { __spin_lock_bh(lock); } EXPORT_SYMBOL(_spin_lock_bh); +#endif +#ifndef _read_lock_irqsave unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) { return __read_lock_irqsave(lock); } EXPORT_SYMBOL(_read_lock_irqsave); +#endif +#ifndef _read_lock_irq void __lockfunc _read_lock_irq(rwlock_t *lock) { __read_lock_irq(lock); } EXPORT_SYMBOL(_read_lock_irq); +#endif +#ifndef _read_lock_bh void __lockfunc _read_lock_bh(rwlock_t *lock) { __read_lock_bh(lock); } EXPORT_SYMBOL(_read_lock_bh); +#endif +#ifndef _write_lock_irqsave unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) { return __write_lock_irqsave(lock); } EXPORT_SYMBOL(_write_lock_irqsave); +#endif +#ifndef _write_lock_irq void __lockfunc _write_lock_irq(rwlock_t *lock) { __write_lock_irq(lock); } EXPORT_SYMBOL(_write_lock_irq); +#endif +#ifndef _write_lock_bh void __lockfunc _write_lock_bh(rwlock_t *lock) { __write_lock_bh(lock); } EXPORT_SYMBOL(_write_lock_bh); +#endif +#ifndef _spin_lock void __lockfunc _spin_lock(spinlock_t *lock) { __spin_lock(lock); } EXPORT_SYMBOL(_spin_lock); +#endif +#ifndef _write_lock void __lockfunc _write_lock(rwlock_t *lock) { __write_lock(lock); } EXPORT_SYMBOL(_write_lock); +#endif #else /* CONFIG_PREEMPT: */ @@ -242,83 +272,109 @@ EXPORT_SYMBOL(_spin_lock_nest_lock); #endif +#ifndef _spin_unlock void __lockfunc _spin_unlock(spinlock_t *lock) { __spin_unlock(lock); } EXPORT_SYMBOL(_spin_unlock); +#endif +#ifndef _write_unlock void __lockfunc _write_unlock(rwlock_t *lock) { __write_unlock(lock); } EXPORT_SYMBOL(_write_unlock); +#endif +#ifndef _read_unlock void __lockfunc _read_unlock(rwlock_t *lock) { __read_unlock(lock); } EXPORT_SYMBOL(_read_unlock); +#endif +#ifndef _spin_unlock_irqrestore void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) { __spin_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_spin_unlock_irqrestore); +#endif +#ifndef _spin_unlock_irq void __lockfunc _spin_unlock_irq(spinlock_t *lock) { __spin_unlock_irq(lock); } EXPORT_SYMBOL(_spin_unlock_irq); +#endif +#ifndef _spin_unlock_bh void __lockfunc _spin_unlock_bh(spinlock_t *lock) { __spin_unlock_bh(lock); } EXPORT_SYMBOL(_spin_unlock_bh); +#endif +#ifndef _read_unlock_irqrestore void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { __read_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_read_unlock_irqrestore); +#endif +#ifndef _read_unlock_irq void __lockfunc _read_unlock_irq(rwlock_t *lock) { __read_unlock_irq(lock); } EXPORT_SYMBOL(_read_unlock_irq); +#endif +#ifndef _read_unlock_bh void __lockfunc _read_unlock_bh(rwlock_t *lock) { __read_unlock_bh(lock); } EXPORT_SYMBOL(_read_unlock_bh); +#endif +#ifndef _write_unlock_irqrestore void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { __write_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_write_unlock_irqrestore); +#endif +#ifndef _write_unlock_irq void __lockfunc _write_unlock_irq(rwlock_t *lock) { __write_unlock_irq(lock); } EXPORT_SYMBOL(_write_unlock_irq); +#endif +#ifndef _write_unlock_bh void __lockfunc _write_unlock_bh(rwlock_t *lock) { __write_unlock_bh(lock); } EXPORT_SYMBOL(_write_unlock_bh); +#endif +#ifndef _spin_trylock_bh int __lockfunc _spin_trylock_bh(spinlock_t *lock) { return __spin_trylock_bh(lock); } EXPORT_SYMBOL(_spin_trylock_bh); +#endif notrace int in_lock_functions(unsigned long addr) { -- cgit v1.2.3-70-g09d2 From bb7bed082500179519c7caf0678ba3bed9752658 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 31 Aug 2009 14:43:38 +0200 Subject: locking: Simplify spinlock inlining For !DEBUG_SPINLOCK && !PREEMPT && SMP the spin_unlock() functions were always inlined by using special defines which would call the __raw* functions. The out-of-line variants for these functions would be generated anyway. Use the new per unlock/locking variant mechanism to force inlining of the unlock functions like before. This is not a functional change, we just get rid of one additional way to force inlining. Signed-off-by: Heiko Carstens Acked-by: Peter Zijlstra Cc: Arnd Bergmann Cc: Nick Piggin Cc: Martin Schwidefsky Cc: Horst Hartmann Cc: Christian Ehrhardt Cc: Andrew Morton Cc: Linus Torvalds Cc: David Miller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Geert Uytterhoeven Cc: Roman Zippel Cc: LKML-Reference: <20090831124418.848735034@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 46 ++++++---------------------------------- include/linux/spinlock_api_smp.h | 12 +++++++++++ 2 files changed, 18 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index da76a06556b..f0ca7a7a175 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -259,50 +259,16 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } #define spin_lock_irq(lock) _spin_lock_irq(lock) #define spin_lock_bh(lock) _spin_lock_bh(lock) - #define read_lock_irq(lock) _read_lock_irq(lock) #define read_lock_bh(lock) _read_lock_bh(lock) - #define write_lock_irq(lock) _write_lock_irq(lock) #define write_lock_bh(lock) _write_lock_bh(lock) - -/* - * We inline the unlock functions in the nondebug case: - */ -#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \ - !defined(CONFIG_SMP) -# define spin_unlock(lock) _spin_unlock(lock) -# define read_unlock(lock) _read_unlock(lock) -# define write_unlock(lock) _write_unlock(lock) -# define spin_unlock_irq(lock) _spin_unlock_irq(lock) -# define read_unlock_irq(lock) _read_unlock_irq(lock) -# define write_unlock_irq(lock) _write_unlock_irq(lock) -#else -# define spin_unlock(lock) \ - do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define read_unlock(lock) \ - do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define write_unlock(lock) \ - do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define spin_unlock_irq(lock) \ -do { \ - __raw_spin_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) -# define read_unlock_irq(lock) \ -do { \ - __raw_read_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) -# define write_unlock_irq(lock) \ -do { \ - __raw_write_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) -#endif +#define spin_unlock(lock) _spin_unlock(lock) +#define read_unlock(lock) _read_unlock(lock) +#define write_unlock(lock) _write_unlock(lock) +#define spin_unlock_irq(lock) _spin_unlock_irq(lock) +#define read_unlock_irq(lock) _read_unlock_irq(lock) +#define write_unlock_irq(lock) _write_unlock_irq(lock) #define spin_unlock_irqrestore(lock, flags) \ do { \ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 1a411e3fab9..7a7e18fc241 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,6 +60,18 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); +/* + * We inline the unlock functions in the nondebug case: + */ +#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_PREEMPT) +#define __always_inline__spin_unlock +#define __always_inline__read_unlock +#define __always_inline__write_unlock +#define __always_inline__spin_unlock_irq +#define __always_inline__read_unlock_irq +#define __always_inline__write_unlock_irq +#endif + #ifndef CONFIG_DEBUG_SPINLOCK #ifndef CONFIG_GENERIC_LOCKBREAK -- cgit v1.2.3-70-g09d2