From 6b2dbba8b6ac4df26f72eda1e5ea7bab9f950e08 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:31:25 -0700 Subject: mm: replace vma prio_tree with an interval tree Implement an interval tree as a replacement for the VMA prio_tree. The algorithms are similar to lib/interval_tree.c; however that code can't be directly reused as the interval endpoints are not explicitly stored in the VMA. So instead, the common algorithm is moved into a template and the details (node type, how to get interval endpoints from the node, etc) are filled in using the C preprocessor. Once the interval tree functions are available, using them as a replacement to the VMA prio tree is a relatively simple, mechanical job. Signed-off-by: Michel Lespinasse Cc: Rik van Riel Cc: Hillf Danton Cc: Peter Zijlstra Cc: Catalin Marinas Cc: Andrea Arcangeli Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/interval_tree.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 mm/interval_tree.c (limited to 'mm/interval_tree.c') diff --git a/mm/interval_tree.c b/mm/interval_tree.c new file mode 100644 index 00000000000..7dc565660e5 --- /dev/null +++ b/mm/interval_tree.c @@ -0,0 +1,61 @@ +/* + * mm/interval_tree.c - interval tree for mapping->i_mmap + * + * Copyright (C) 2012, Michel Lespinasse + * + * This file is released under the GPL v2. + */ + +#include +#include + +#define ITSTRUCT struct vm_area_struct +#define ITRB shared.linear.rb +#define ITTYPE unsigned long +#define ITSUBTREE shared.linear.rb_subtree_last +#define ITSTART(n) ((n)->vm_pgoff) +#define ITLAST(n) ((n)->vm_pgoff + \ + (((n)->vm_end - (n)->vm_start) >> PAGE_SHIFT) - 1) +#define ITSTATIC +#define ITPREFIX vma_interval_tree + +#include + +/* Insert old immediately after vma in the interval tree */ +void vma_interval_tree_add(struct vm_area_struct *vma, + struct vm_area_struct *old, + struct address_space *mapping) +{ + struct rb_node **link; + struct vm_area_struct *parent; + unsigned long last; + + if (unlikely(vma->vm_flags & VM_NONLINEAR)) { + list_add(&vma->shared.nonlinear, &old->shared.nonlinear); + return; + } + + last = ITLAST(vma); + + if (!old->shared.linear.rb.rb_right) { + parent = old; + link = &old->shared.linear.rb.rb_right; + } else { + parent = rb_entry(old->shared.linear.rb.rb_right, + struct vm_area_struct, shared.linear.rb); + if (parent->shared.linear.rb_subtree_last < last) + parent->shared.linear.rb_subtree_last = last; + while (parent->shared.linear.rb.rb_left) { + parent = rb_entry(parent->shared.linear.rb.rb_left, + struct vm_area_struct, shared.linear.rb); + if (parent->shared.linear.rb_subtree_last < last) + parent->shared.linear.rb_subtree_last = last; + } + link = &parent->shared.linear.rb.rb_left; + } + + vma->shared.linear.rb_subtree_last = last; + rb_link_node(&vma->shared.linear.rb, &parent->shared.linear.rb, link); + rb_insert_augmented(&vma->shared.linear.rb, &mapping->i_mmap, + &vma_interval_tree_augment_callbacks); +} -- cgit v1.2.3-70-g09d2 From 9826a516ff77c5820e591211e4f3e58ff36f46be Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:31:35 -0700 Subject: mm: interval tree updates Update the generic interval tree code that was introduced in "mm: replace vma prio_tree with an interval tree". Changes: - fixed 'endpoing' typo noticed by Andrew Morton - replaced include/linux/interval_tree_tmpl.h, which was used as a template (including it automatically defined the interval tree functions) with include/linux/interval_tree_generic.h, which only defines a preprocessor macro INTERVAL_TREE_DEFINE(), which itself defines the interval tree functions when invoked. Now that is a very long macro which is unfortunate, but it does make the usage sites (lib/interval_tree.c and mm/interval_tree.c) a bit nicer than previously. - make use of RB_DECLARE_CALLBACKS() in the INTERVAL_TREE_DEFINE() macro, instead of duplicating that code in the interval tree template. - replaced vma_interval_tree_add(), which was actually handling the nonlinear and interval tree cases, with vma_interval_tree_insert_after() which handles only the interval tree case and has an API that is more consistent with the other interval tree handling functions. The nonlinear case is now handled explicitly in kernel/fork.c dup_mmap(). Signed-off-by: Michel Lespinasse Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Peter Zijlstra Cc: Daniel Santos Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/interval_tree_generic.h | 191 +++++++++++++++++++++++++++++ include/linux/interval_tree_tmpl.h | 219 ---------------------------------- include/linux/mm.h | 6 +- kernel/fork.c | 7 +- lib/interval_tree.c | 15 +-- mm/interval_tree.c | 60 +++++----- 6 files changed, 235 insertions(+), 263 deletions(-) create mode 100644 include/linux/interval_tree_generic.h delete mode 100644 include/linux/interval_tree_tmpl.h (limited to 'mm/interval_tree.c') diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h new file mode 100644 index 00000000000..58370e1862a --- /dev/null +++ b/include/linux/interval_tree_generic.h @@ -0,0 +1,191 @@ +/* + Interval Trees + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + include/linux/interval_tree_generic.h +*/ + +#include + +/* + * Template for implementing interval trees + * + * ITSTRUCT: struct type of the interval tree nodes + * ITRB: name of struct rb_node field within ITSTRUCT + * ITTYPE: type of the interval endpoints + * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree + * ITSTART(n): start endpoint of ITSTRUCT node n + * ITLAST(n): last endpoint of ITSTRUCT node n + * ITSTATIC: 'static' or empty + * ITPREFIX: prefix to use for the inline tree definitions + * + * Note - before using this, please consider if non-generic version + * (interval_tree.h) would work for you... + */ + +#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \ + ITSTART, ITLAST, ITSTATIC, ITPREFIX) \ + \ +/* Callbacks for augmented rbtree insert and remove */ \ + \ +static inline ITTYPE ITPREFIX ## _compute_subtree_last(ITSTRUCT *node) \ +{ \ + ITTYPE max = ITLAST(node), subtree_last; \ + if (node->ITRB.rb_left) { \ + subtree_last = rb_entry(node->ITRB.rb_left, \ + ITSTRUCT, ITRB)->ITSUBTREE; \ + if (max < subtree_last) \ + max = subtree_last; \ + } \ + if (node->ITRB.rb_right) { \ + subtree_last = rb_entry(node->ITRB.rb_right, \ + ITSTRUCT, ITRB)->ITSUBTREE; \ + if (max < subtree_last) \ + max = subtree_last; \ + } \ + return max; \ +} \ + \ +RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \ + ITTYPE, ITSUBTREE, ITPREFIX ## _compute_subtree_last) \ + \ +/* Insert / remove interval nodes from the tree */ \ + \ +ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ +{ \ + struct rb_node **link = &root->rb_node, *rb_parent = NULL; \ + ITTYPE start = ITSTART(node), last = ITLAST(node); \ + ITSTRUCT *parent; \ + \ + while (*link) { \ + rb_parent = *link; \ + parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \ + if (parent->ITSUBTREE < last) \ + parent->ITSUBTREE = last; \ + if (start < ITSTART(parent)) \ + link = &parent->ITRB.rb_left; \ + else \ + link = &parent->ITRB.rb_right; \ + } \ + \ + node->ITSUBTREE = last; \ + rb_link_node(&node->ITRB, rb_parent, link); \ + rb_insert_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ +} \ + \ +ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, struct rb_root *root) \ +{ \ + rb_erase_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ +} \ + \ +/* \ + * Iterate over intervals intersecting [start;last] \ + * \ + * Note that a node's interval intersects [start;last] iff: \ + * Cond1: ITSTART(node) <= last \ + * and \ + * Cond2: start <= ITLAST(node) \ + */ \ + \ +static ITSTRUCT * \ +ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + while (true) { \ + /* \ + * Loop invariant: start <= node->ITSUBTREE \ + * (Cond2 is satisfied by one of the subtree nodes) \ + */ \ + if (node->ITRB.rb_left) { \ + ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \ + ITSTRUCT, ITRB); \ + if (start <= left->ITSUBTREE) { \ + /* \ + * Some nodes in left subtree satisfy Cond2. \ + * Iterate to find the leftmost such node N. \ + * If it also satisfies Cond1, that's the \ + * match we are looking for. Otherwise, there \ + * is no matching interval as nodes to the \ + * right of N can't satisfy Cond1 either. \ + */ \ + node = left; \ + continue; \ + } \ + } \ + if (ITSTART(node) <= last) { /* Cond1 */ \ + if (start <= ITLAST(node)) /* Cond2 */ \ + return node; /* node is leftmost match */ \ + if (node->ITRB.rb_right) { \ + node = rb_entry(node->ITRB.rb_right, \ + ITSTRUCT, ITRB); \ + if (start <= node->ITSUBTREE) \ + continue; \ + } \ + } \ + return NULL; /* No match */ \ + } \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_first(struct rb_root *root, ITTYPE start, ITTYPE last) \ +{ \ + ITSTRUCT *node; \ + \ + if (!root->rb_node) \ + return NULL; \ + node = rb_entry(root->rb_node, ITSTRUCT, ITRB); \ + if (node->ITSUBTREE < start) \ + return NULL; \ + return ITPREFIX ## _subtree_search(node, start, last); \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + struct rb_node *rb = node->ITRB.rb_right, *prev; \ + \ + while (true) { \ + /* \ + * Loop invariants: \ + * Cond1: ITSTART(node) <= last \ + * rb == node->ITRB.rb_right \ + * \ + * First, search right subtree if suitable \ + */ \ + if (rb) { \ + ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \ + if (start <= right->ITSUBTREE) \ + return ITPREFIX ## _subtree_search(right, \ + start, last); \ + } \ + \ + /* Move up the tree until we come from a node's left child */ \ + do { \ + rb = rb_parent(&node->ITRB); \ + if (!rb) \ + return NULL; \ + prev = &node->ITRB; \ + node = rb_entry(rb, ITSTRUCT, ITRB); \ + rb = node->ITRB.rb_right; \ + } while (prev == rb); \ + \ + /* Check if the node intersects [start;last] */ \ + if (last < ITSTART(node)) /* !Cond1 */ \ + return NULL; \ + else if (start <= ITLAST(node)) /* Cond2 */ \ + return node; \ + } \ +} diff --git a/include/linux/interval_tree_tmpl.h b/include/linux/interval_tree_tmpl.h deleted file mode 100644 index c1aeb922d65..00000000000 --- a/include/linux/interval_tree_tmpl.h +++ /dev/null @@ -1,219 +0,0 @@ -/* - Interval Trees - (C) 2012 Michel Lespinasse - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - include/linux/interval_tree_tmpl.h -*/ - -#include - -/* - * Template for implementing interval trees - * - * ITSTRUCT: struct type of the interval tree nodes - * ITRB: name of struct rb_node field within ITSTRUCT - * ITTYPE: type of the interval endpoints - * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree - * ITSTART(n): start endpoint of ITSTRUCT node n - * ITLAST(n): last endpoing of ITSTRUCT node n - * ITSTATIC: 'static' or empty - * ITPREFIX: prefix to use for the inline tree definitions - */ - -/* IT(name) -> ITPREFIX_name */ -#define _ITNAME(prefix, name) prefix ## _ ## name -#define ITNAME(prefix, name) _ITNAME(prefix, name) -#define IT(name) ITNAME(ITPREFIX, name) - -/* Callbacks for augmented rbtree insert and remove */ - -static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node) -{ - ITTYPE max = ITLAST(node), subtree_last; - if (node->ITRB.rb_left) { - subtree_last = rb_entry(node->ITRB.rb_left, - ITSTRUCT, ITRB)->ITSUBTREE; - if (max < subtree_last) - max = subtree_last; - } - if (node->ITRB.rb_right) { - subtree_last = rb_entry(node->ITRB.rb_right, - ITSTRUCT, ITRB)->ITSUBTREE; - if (max < subtree_last) - max = subtree_last; - } - return max; -} - -static inline void -IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop) -{ - while (rb != stop) { - ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB); - ITTYPE subtree_last = IT(compute_subtree_last)(node); - if (node->ITSUBTREE == subtree_last) - break; - node->ITSUBTREE = subtree_last; - rb = rb_parent(&node->ITRB); - } -} - -static inline void -IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new) -{ - ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB); - ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB); - - new->ITSUBTREE = old->ITSUBTREE; -} - -static void IT(augment_rotate)(struct rb_node *rb_old, struct rb_node *rb_new) -{ - ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB); - ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB); - - new->ITSUBTREE = old->ITSUBTREE; - old->ITSUBTREE = IT(compute_subtree_last)(old); -} - -static const struct rb_augment_callbacks IT(augment_callbacks) = { - IT(augment_propagate), IT(augment_copy), IT(augment_rotate) -}; - -/* Insert / remove interval nodes from the tree */ - -ITSTATIC void IT(insert)(ITSTRUCT *node, struct rb_root *root) -{ - struct rb_node **link = &root->rb_node, *rb_parent = NULL; - ITTYPE start = ITSTART(node), last = ITLAST(node); - ITSTRUCT *parent; - - while (*link) { - rb_parent = *link; - parent = rb_entry(rb_parent, ITSTRUCT, ITRB); - if (parent->ITSUBTREE < last) - parent->ITSUBTREE = last; - if (start < ITSTART(parent)) - link = &parent->ITRB.rb_left; - else - link = &parent->ITRB.rb_right; - } - - node->ITSUBTREE = last; - rb_link_node(&node->ITRB, rb_parent, link); - rb_insert_augmented(&node->ITRB, root, &IT(augment_callbacks)); -} - -ITSTATIC void IT(remove)(ITSTRUCT *node, struct rb_root *root) -{ - rb_erase_augmented(&node->ITRB, root, &IT(augment_callbacks)); -} - -/* - * Iterate over intervals intersecting [start;last] - * - * Note that a node's interval intersects [start;last] iff: - * Cond1: ITSTART(node) <= last - * and - * Cond2: start <= ITLAST(node) - */ - -static ITSTRUCT *IT(subtree_search)(ITSTRUCT *node, ITTYPE start, ITTYPE last) -{ - while (true) { - /* - * Loop invariant: start <= node->ITSUBTREE - * (Cond2 is satisfied by one of the subtree nodes) - */ - if (node->ITRB.rb_left) { - ITSTRUCT *left = rb_entry(node->ITRB.rb_left, - ITSTRUCT, ITRB); - if (start <= left->ITSUBTREE) { - /* - * Some nodes in left subtree satisfy Cond2. - * Iterate to find the leftmost such node N. - * If it also satisfies Cond1, that's the match - * we are looking for. Otherwise, there is no - * matching interval as nodes to the right of N - * can't satisfy Cond1 either. - */ - node = left; - continue; - } - } - if (ITSTART(node) <= last) { /* Cond1 */ - if (start <= ITLAST(node)) /* Cond2 */ - return node; /* node is leftmost match */ - if (node->ITRB.rb_right) { - node = rb_entry(node->ITRB.rb_right, - ITSTRUCT, ITRB); - if (start <= node->ITSUBTREE) - continue; - } - } - return NULL; /* No match */ - } -} - -ITSTATIC ITSTRUCT *IT(iter_first)(struct rb_root *root, - ITTYPE start, ITTYPE last) -{ - ITSTRUCT *node; - - if (!root->rb_node) - return NULL; - node = rb_entry(root->rb_node, ITSTRUCT, ITRB); - if (node->ITSUBTREE < start) - return NULL; - return IT(subtree_search)(node, start, last); -} - -ITSTATIC ITSTRUCT *IT(iter_next)(ITSTRUCT *node, ITTYPE start, ITTYPE last) -{ - struct rb_node *rb = node->ITRB.rb_right, *prev; - - while (true) { - /* - * Loop invariants: - * Cond1: ITSTART(node) <= last - * rb == node->ITRB.rb_right - * - * First, search right subtree if suitable - */ - if (rb) { - ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); - if (start <= right->ITSUBTREE) - return IT(subtree_search)(right, start, last); - } - - /* Move up the tree until we come from a node's left child */ - do { - rb = rb_parent(&node->ITRB); - if (!rb) - return NULL; - prev = &node->ITRB; - node = rb_entry(rb, ITSTRUCT, ITRB); - rb = node->ITRB.rb_right; - } while (prev == rb); - - /* Check if the node intersects [start;last] */ - if (last < ITSTART(node)) /* !Cond1 */ - return NULL; - else if (start <= ITLAST(node)) /* Cond2 */ - return node; - } -} diff --git a/include/linux/mm.h b/include/linux/mm.h index 0f671ef09eb..f1d9aaadb56 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1355,11 +1355,11 @@ extern atomic_long_t mmap_pages_allocated; extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); /* interval_tree.c */ -void vma_interval_tree_add(struct vm_area_struct *vma, - struct vm_area_struct *old, - struct address_space *mapping); void vma_interval_tree_insert(struct vm_area_struct *node, struct rb_root *root); +void vma_interval_tree_insert_after(struct vm_area_struct *node, + struct vm_area_struct *prev, + struct rb_root *root); void vma_interval_tree_remove(struct vm_area_struct *node, struct rb_root *root); struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root, diff --git a/kernel/fork.c b/kernel/fork.c index 90dace52715..1cd7d581b3b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -423,7 +423,12 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mapping->i_mmap_writable++; flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ - vma_interval_tree_add(tmp, mpnt, mapping); + if (unlikely(tmp->vm_flags & VM_NONLINEAR)) + vma_nonlinear_insert(tmp, + &mapping->i_mmap_nonlinear); + else + vma_interval_tree_insert_after(tmp, mpnt, + &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); mutex_unlock(&mapping->i_mmap_mutex); } diff --git a/lib/interval_tree.c b/lib/interval_tree.c index 77a793e0644..e6eb406f2d6 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -1,13 +1,10 @@ #include #include +#include -#define ITSTRUCT struct interval_tree_node -#define ITRB rb -#define ITTYPE unsigned long -#define ITSUBTREE __subtree_last -#define ITSTART(n) ((n)->start) -#define ITLAST(n) ((n)->last) -#define ITSTATIC -#define ITPREFIX interval_tree +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) -#include +INTERVAL_TREE_DEFINE(struct interval_tree_node, rb, + unsigned long, __subtree_last, + START, LAST,, interval_tree) diff --git a/mm/interval_tree.c b/mm/interval_tree.c index 7dc565660e5..4ab7b9ec3a5 100644 --- a/mm/interval_tree.c +++ b/mm/interval_tree.c @@ -8,40 +8,38 @@ #include #include +#include -#define ITSTRUCT struct vm_area_struct -#define ITRB shared.linear.rb -#define ITTYPE unsigned long -#define ITSUBTREE shared.linear.rb_subtree_last -#define ITSTART(n) ((n)->vm_pgoff) -#define ITLAST(n) ((n)->vm_pgoff + \ - (((n)->vm_end - (n)->vm_start) >> PAGE_SHIFT) - 1) -#define ITSTATIC -#define ITPREFIX vma_interval_tree - -#include - -/* Insert old immediately after vma in the interval tree */ -void vma_interval_tree_add(struct vm_area_struct *vma, - struct vm_area_struct *old, - struct address_space *mapping) +static inline unsigned long vma_start_pgoff(struct vm_area_struct *v) +{ + return v->vm_pgoff; +} + +static inline unsigned long vma_last_pgoff(struct vm_area_struct *v) +{ + return v->vm_pgoff + ((v->vm_end - v->vm_start) >> PAGE_SHIFT) - 1; +} + +INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.linear.rb, + unsigned long, shared.linear.rb_subtree_last, + vma_start_pgoff, vma_last_pgoff,, vma_interval_tree) + +/* Insert node immediately after prev in the interval tree */ +void vma_interval_tree_insert_after(struct vm_area_struct *node, + struct vm_area_struct *prev, + struct rb_root *root) { struct rb_node **link; struct vm_area_struct *parent; - unsigned long last; - - if (unlikely(vma->vm_flags & VM_NONLINEAR)) { - list_add(&vma->shared.nonlinear, &old->shared.nonlinear); - return; - } + unsigned long last = vma_last_pgoff(node); - last = ITLAST(vma); + VM_BUG_ON(vma_start_pgoff(node) != vma_start_pgoff(prev)); - if (!old->shared.linear.rb.rb_right) { - parent = old; - link = &old->shared.linear.rb.rb_right; + if (!prev->shared.linear.rb.rb_right) { + parent = prev; + link = &prev->shared.linear.rb.rb_right; } else { - parent = rb_entry(old->shared.linear.rb.rb_right, + parent = rb_entry(prev->shared.linear.rb.rb_right, struct vm_area_struct, shared.linear.rb); if (parent->shared.linear.rb_subtree_last < last) parent->shared.linear.rb_subtree_last = last; @@ -54,8 +52,8 @@ void vma_interval_tree_add(struct vm_area_struct *vma, link = &parent->shared.linear.rb.rb_left; } - vma->shared.linear.rb_subtree_last = last; - rb_link_node(&vma->shared.linear.rb, &parent->shared.linear.rb, link); - rb_insert_augmented(&vma->shared.linear.rb, &mapping->i_mmap, - &vma_interval_tree_augment_callbacks); + node->shared.linear.rb_subtree_last = last; + rb_link_node(&node->shared.linear.rb, &parent->shared.linear.rb, link); + rb_insert_augmented(&node->shared.linear.rb, root, + &vma_interval_tree_augment); } -- cgit v1.2.3-70-g09d2 From bf181b9f9d8dfbba58b23441ad60d0bc33806d64 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:31:39 -0700 Subject: mm anon rmap: replace same_anon_vma linked list with an interval tree. When a large VMA (anon or private file mapping) is first touched, which will populate its anon_vma field, and then split into many regions through the use of mprotect(), the original anon_vma ends up linking all of the vmas on a linked list. This can cause rmap to become inefficient, as we have to walk potentially thousands of irrelevent vmas before finding the one a given anon page might fall into. By replacing the same_anon_vma linked list with an interval tree (where each avc's interval is determined by its vma's start and last pgoffs), we can make rmap efficient for this use case again. While the change is large, all of its pieces are fairly simple. Most places that were walking the same_anon_vma list were looking for a known pgoff, so they can just use the anon_vma_interval_tree_foreach() interval tree iterator instead. The exception here is ksm, where the page's index is not known. It would probably be possible to rework ksm so that the index would be known, but for now I have decided to keep things simple and just walk the entirety of the interval tree there. When updating vma's that already have an anon_vma assigned, we must take care to re-index the corresponding avc's on their interval tree. This is done through the use of anon_vma_interval_tree_pre_update_vma() and anon_vma_interval_tree_post_update_vma(), which remove the avc's from their interval tree before the update and re-insert them after the update. The anon_vma stays locked during the update, so there is no chance that rmap would miss the vmas that are being updated. Signed-off-by: Michel Lespinasse Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Peter Zijlstra Cc: Daniel Santos Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 14 ++++++++++ include/linux/rmap.h | 11 ++++---- mm/huge_memory.c | 5 ++-- mm/interval_tree.c | 14 ++++++++++ mm/ksm.c | 9 ++++--- mm/memory-failure.c | 5 +++- mm/mmap.c | 73 +++++++++++++++++++++++++++++++++++++++------------- mm/rmap.c | 24 ++++++++--------- 8 files changed, 114 insertions(+), 41 deletions(-) (limited to 'mm/interval_tree.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index f1d9aaadb56..0cdab4e0f81 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -20,6 +20,7 @@ struct mempolicy; struct anon_vma; +struct anon_vma_chain; struct file_ra_state; struct user_struct; struct writeback_control; @@ -1377,6 +1378,19 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma, list_add_tail(&vma->shared.nonlinear, list); } +void anon_vma_interval_tree_insert(struct anon_vma_chain *node, + struct rb_root *root); +void anon_vma_interval_tree_remove(struct anon_vma_chain *node, + struct rb_root *root); +struct anon_vma_chain *anon_vma_interval_tree_iter_first( + struct rb_root *root, unsigned long start, unsigned long last); +struct anon_vma_chain *anon_vma_interval_tree_iter_next( + struct anon_vma_chain *node, unsigned long start, unsigned long last); + +#define anon_vma_interval_tree_foreach(avc, root, start, last) \ + for (avc = anon_vma_interval_tree_iter_first(root, start, last); \ + avc; avc = anon_vma_interval_tree_iter_next(avc, start, last)) + /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 7f32cec57e6..dce44f7d3ed 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -37,14 +37,14 @@ struct anon_vma { atomic_t refcount; /* - * NOTE: the LSB of the head.next is set by + * NOTE: the LSB of the rb_root.rb_node is set by * mm_take_all_locks() _after_ taking the above lock. So the - * head must only be read/written after taking the above lock + * rb_root must only be read/written after taking the above lock * to be sure to see a valid next pointer. The LSB bit itself * is serialized by a system wide lock only visible to * mm_take_all_locks() (mm_all_locks_mutex). */ - struct list_head head; /* Chain of private "related" vmas */ + struct rb_root rb_root; /* Interval tree of private "related" vmas */ }; /* @@ -57,14 +57,15 @@ struct anon_vma { * with a VMA, or the VMAs associated with an anon_vma. * The "same_vma" list contains the anon_vma_chains linking * all the anon_vmas associated with this VMA. - * The "same_anon_vma" list contains the anon_vma_chains + * The "rb" field indexes on an interval tree the anon_vma_chains * which link all the VMAs associated with this anon_vma. */ struct anon_vma_chain { struct vm_area_struct *vma; struct anon_vma *anon_vma; struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ - struct list_head same_anon_vma; /* locked by anon_vma->mutex */ + struct rb_node rb; /* locked by anon_vma->mutex */ + unsigned long rb_subtree_last; }; #ifdef CONFIG_MMU diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 010d32944d1..ce59ada0946 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1375,13 +1375,14 @@ static void __split_huge_page(struct page *page, struct anon_vma *anon_vma) { int mapcount, mapcount2; + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct anon_vma_chain *avc; BUG_ON(!PageHead(page)); BUG_ON(PageTail(page)); mapcount = 0; - list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { + anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { struct vm_area_struct *vma = avc->vma; unsigned long addr = vma_address(page, vma); BUG_ON(is_vma_temporary_stack(vma)); @@ -1407,7 +1408,7 @@ static void __split_huge_page(struct page *page, __split_huge_page_refcount(page); mapcount2 = 0; - list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { + anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { struct vm_area_struct *vma = avc->vma; unsigned long addr = vma_address(page, vma); BUG_ON(is_vma_temporary_stack(vma)); diff --git a/mm/interval_tree.c b/mm/interval_tree.c index 4ab7b9ec3a5..f7c72cd35e1 100644 --- a/mm/interval_tree.c +++ b/mm/interval_tree.c @@ -8,6 +8,7 @@ #include #include +#include #include static inline unsigned long vma_start_pgoff(struct vm_area_struct *v) @@ -57,3 +58,16 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node, rb_insert_augmented(&node->shared.linear.rb, root, &vma_interval_tree_augment); } + +static inline unsigned long avc_start_pgoff(struct anon_vma_chain *avc) +{ + return vma_start_pgoff(avc->vma); +} + +static inline unsigned long avc_last_pgoff(struct anon_vma_chain *avc) +{ + return vma_last_pgoff(avc->vma); +} + +INTERVAL_TREE_DEFINE(struct anon_vma_chain, rb, unsigned long, rb_subtree_last, + avc_start_pgoff, avc_last_pgoff,, anon_vma_interval_tree) diff --git a/mm/ksm.c b/mm/ksm.c index 9638620a753..14ee5cf8a51 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1618,7 +1618,8 @@ again: struct vm_area_struct *vma; anon_vma_lock(anon_vma); - list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { + anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, + 0, ULONG_MAX) { vma = vmac->vma; if (rmap_item->address < vma->vm_start || rmap_item->address >= vma->vm_end) @@ -1671,7 +1672,8 @@ again: struct vm_area_struct *vma; anon_vma_lock(anon_vma); - list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { + anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, + 0, ULONG_MAX) { vma = vmac->vma; if (rmap_item->address < vma->vm_start || rmap_item->address >= vma->vm_end) @@ -1723,7 +1725,8 @@ again: struct vm_area_struct *vma; anon_vma_lock(anon_vma); - list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { + anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, + 0, ULONG_MAX) { vma = vmac->vma; if (rmap_item->address < vma->vm_start || rmap_item->address >= vma->vm_end) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c38a6257d08..6c5899b9034 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -400,18 +400,21 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, struct vm_area_struct *vma; struct task_struct *tsk; struct anon_vma *av; + pgoff_t pgoff; av = page_lock_anon_vma(page); if (av == NULL) /* Not actually mapped anymore */ return; + pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); read_lock(&tasklist_lock); for_each_process (tsk) { struct anon_vma_chain *vmac; if (!task_early_kill(tsk)) continue; - list_for_each_entry(vmac, &av->head, same_anon_vma) { + anon_vma_interval_tree_foreach(vmac, &av->rb_root, + pgoff, pgoff) { vma = vmac->vma; if (!page_mapped_in_vma(page, vma)) continue; diff --git a/mm/mmap.c b/mm/mmap.c index 66984aab791..2e580ed7921 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -353,6 +353,38 @@ void validate_mm(struct mm_struct *mm) #define validate_mm(mm) do { } while (0) #endif +/* + * vma has some anon_vma assigned, and is already inserted on that + * anon_vma's interval trees. + * + * Before updating the vma's vm_start / vm_end / vm_pgoff fields, the + * vma must be removed from the anon_vma's interval trees using + * anon_vma_interval_tree_pre_update_vma(). + * + * After the update, the vma will be reinserted using + * anon_vma_interval_tree_post_update_vma(). + * + * The entire update must be protected by exclusive mmap_sem and by + * the root anon_vma's mutex. + */ +static inline void +anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma) +{ + struct anon_vma_chain *avc; + + list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) + anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root); +} + +static inline void +anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma) +{ + struct anon_vma_chain *avc; + + list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) + anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root); +} + static int find_vma_links(struct mm_struct *mm, unsigned long addr, unsigned long end, struct vm_area_struct **pprev, struct rb_node ***rb_link, struct rb_node **rb_parent) @@ -565,20 +597,17 @@ again: remove_next = 1 + (end > next->vm_end); vma_adjust_trans_huge(vma, start, end, adjust_next); - /* - * When changing only vma->vm_end, we don't really need anon_vma - * lock. This is a fairly rare case by itself, but the anon_vma - * lock may be shared between many sibling processes. Skipping - * the lock for brk adjustments makes a difference sometimes. - */ - if (vma->anon_vma && (importer || start != vma->vm_start)) { - anon_vma = vma->anon_vma; + anon_vma = vma->anon_vma; + if (!anon_vma && adjust_next) + anon_vma = next->anon_vma; + if (anon_vma) { VM_BUG_ON(adjust_next && next->anon_vma && anon_vma != next->anon_vma); - } else if (adjust_next && next->anon_vma) - anon_vma = next->anon_vma; - if (anon_vma) anon_vma_lock(anon_vma); + anon_vma_interval_tree_pre_update_vma(vma); + if (adjust_next) + anon_vma_interval_tree_pre_update_vma(next); + } if (root) { flush_dcache_mmap_lock(mapping); @@ -619,8 +648,12 @@ again: remove_next = 1 + (end > next->vm_end); __insert_vm_struct(mm, insert); } - if (anon_vma) + if (anon_vma) { + anon_vma_interval_tree_post_update_vma(vma); + if (adjust_next) + anon_vma_interval_tree_post_update_vma(next); anon_vma_unlock(anon_vma); + } if (mapping) mutex_unlock(&mapping->i_mmap_mutex); @@ -1748,7 +1781,9 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) { error = acct_stack_growth(vma, size, grow); if (!error) { + anon_vma_interval_tree_pre_update_vma(vma); vma->vm_end = address; + anon_vma_interval_tree_post_update_vma(vma); perf_event_mmap(vma); } } @@ -1798,8 +1833,10 @@ int expand_downwards(struct vm_area_struct *vma, if (grow <= vma->vm_pgoff) { error = acct_stack_growth(vma, size, grow); if (!error) { + anon_vma_interval_tree_pre_update_vma(vma); vma->vm_start = address; vma->vm_pgoff -= grow; + anon_vma_interval_tree_post_update_vma(vma); perf_event_mmap(vma); } } @@ -2515,7 +2552,7 @@ static DEFINE_MUTEX(mm_all_locks_mutex); static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) { - if (!test_bit(0, (unsigned long *) &anon_vma->root->head.next)) { + if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) { /* * The LSB of head.next can't change from under us * because we hold the mm_all_locks_mutex. @@ -2531,7 +2568,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) * anon_vma->root->mutex. */ if (__test_and_set_bit(0, (unsigned long *) - &anon_vma->root->head.next)) + &anon_vma->root->rb_root.rb_node)) BUG(); } } @@ -2572,7 +2609,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) * A single task can't take more than one mm_take_all_locks() in a row * or it would deadlock. * - * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in + * The LSB in anon_vma->rb_root.rb_node and the AS_MM_ALL_LOCKS bitflag in * mapping->flags avoid to take the same lock twice, if more than one * vma in this mm is backed by the same anon_vma or address_space. * @@ -2619,13 +2656,13 @@ out_unlock: static void vm_unlock_anon_vma(struct anon_vma *anon_vma) { - if (test_bit(0, (unsigned long *) &anon_vma->root->head.next)) { + if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) { /* * The LSB of head.next can't change to 0 from under * us because we hold the mm_all_locks_mutex. * * We must however clear the bitflag before unlocking - * the vma so the users using the anon_vma->head will + * the vma so the users using the anon_vma->rb_root will * never see our bitflag. * * No need of atomic instructions here, head.next @@ -2633,7 +2670,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma) * anon_vma->root->mutex. */ if (!__test_and_clear_bit(0, (unsigned long *) - &anon_vma->root->head.next)) + &anon_vma->root->rb_root.rb_node)) BUG(); anon_vma_unlock(anon_vma); } diff --git a/mm/rmap.c b/mm/rmap.c index 8cbd62fde0f..9c61bf387fd 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -127,12 +127,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, avc->vma = vma; avc->anon_vma = anon_vma; list_add(&avc->same_vma, &vma->anon_vma_chain); - - /* - * It's critical to add new vmas to the tail of the anon_vma, - * see comment in huge_memory.c:__split_huge_page(). - */ - list_add_tail(&avc->same_anon_vma, &anon_vma->head); + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root); } /** @@ -336,13 +331,13 @@ void unlink_anon_vmas(struct vm_area_struct *vma) struct anon_vma *anon_vma = avc->anon_vma; root = lock_anon_vma_root(root, anon_vma); - list_del(&avc->same_anon_vma); + anon_vma_interval_tree_remove(avc, &anon_vma->rb_root); /* * Leave empty anon_vmas on the list - we'll need * to free them outside the lock. */ - if (list_empty(&anon_vma->head)) + if (RB_EMPTY_ROOT(&anon_vma->rb_root)) continue; list_del(&avc->same_vma); @@ -371,7 +366,7 @@ static void anon_vma_ctor(void *data) mutex_init(&anon_vma->mutex); atomic_set(&anon_vma->refcount, 0); - INIT_LIST_HEAD(&anon_vma->head); + anon_vma->rb_root = RB_ROOT; } void __init anon_vma_init(void) @@ -724,6 +719,7 @@ static int page_referenced_anon(struct page *page, { unsigned int mapcount; struct anon_vma *anon_vma; + pgoff_t pgoff; struct anon_vma_chain *avc; int referenced = 0; @@ -732,7 +728,8 @@ static int page_referenced_anon(struct page *page, return referenced; mapcount = page_mapcount(page); - list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { + pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { struct vm_area_struct *vma = avc->vma; unsigned long address = vma_address(page, vma); if (address == -EFAULT) @@ -1445,6 +1442,7 @@ bool is_vma_temporary_stack(struct vm_area_struct *vma) static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) { struct anon_vma *anon_vma; + pgoff_t pgoff; struct anon_vma_chain *avc; int ret = SWAP_AGAIN; @@ -1452,7 +1450,8 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) if (!anon_vma) return ret; - list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { + pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { struct vm_area_struct *vma = avc->vma; unsigned long address; @@ -1668,6 +1667,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, struct vm_area_struct *, unsigned long, void *), void *arg) { struct anon_vma *anon_vma; + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct anon_vma_chain *avc; int ret = SWAP_AGAIN; @@ -1681,7 +1681,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, if (!anon_vma) return ret; anon_vma_lock(anon_vma); - list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { + anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { struct vm_area_struct *vma = avc->vma; unsigned long address = vma_address(page, vma); if (address == -EFAULT) -- cgit v1.2.3-70-g09d2 From ed8ea8150182f8d715fceb3b175ef0a9ebacd872 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:31:45 -0700 Subject: mm: add CONFIG_DEBUG_VM_RB build option Add a CONFIG_DEBUG_VM_RB build option for the previously existing DEBUG_MM_RB code. Now that Andi Kleen modified it to avoid using recursive algorithms, we can expose it a bit more. Also extend this code to validate_mm() after stack expansion, and to check that the vma's start and last pgoffs have not changed since the nodes were inserted on the anon vma interval tree (as it is important that the nodes be reindexed after each such update). Signed-off-by: Michel Lespinasse Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Peter Zijlstra Cc: Daniel Santos Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ include/linux/rmap.h | 3 +++ lib/Kconfig.debug | 9 +++++++++ mm/interval_tree.c | 41 ++++++++++++++++++++++++++++++++++++++++- mm/mmap.c | 19 +++++++++---------- 5 files changed, 64 insertions(+), 11 deletions(-) (limited to 'mm/interval_tree.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0cdab4e0f81..0e6f9c9f212 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1386,6 +1386,9 @@ struct anon_vma_chain *anon_vma_interval_tree_iter_first( struct rb_root *root, unsigned long start, unsigned long last); struct anon_vma_chain *anon_vma_interval_tree_iter_next( struct anon_vma_chain *node, unsigned long start, unsigned long last); +#ifdef CONFIG_DEBUG_VM_RB +void anon_vma_interval_tree_verify(struct anon_vma_chain *node); +#endif #define anon_vma_interval_tree_foreach(avc, root, start, last) \ for (avc = anon_vma_interval_tree_iter_first(root, start, last); \ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index dce44f7d3ed..b2cce644ffc 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -66,6 +66,9 @@ struct anon_vma_chain { struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ struct rb_node rb; /* locked by anon_vma->mutex */ unsigned long rb_subtree_last; +#ifdef CONFIG_DEBUG_VM_RB + unsigned long cached_vma_start, cached_vma_last; +#endif }; #ifdef CONFIG_MMU diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a6e7e774152..28e9d6c9894 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -798,6 +798,15 @@ config DEBUG_VM If unsure, say N. +config DEBUG_VM_RB + bool "Debug VM red-black trees" + depends on DEBUG_VM + help + Enable this to turn on more extended checks in the virtual-memory + system that may impact performance. + + If unsure, say N. + config DEBUG_VIRTUAL bool "Debug VM translations" depends on DEBUG_KERNEL && X86 diff --git a/mm/interval_tree.c b/mm/interval_tree.c index f7c72cd35e1..4a5822a586e 100644 --- a/mm/interval_tree.c +++ b/mm/interval_tree.c @@ -70,4 +70,43 @@ static inline unsigned long avc_last_pgoff(struct anon_vma_chain *avc) } INTERVAL_TREE_DEFINE(struct anon_vma_chain, rb, unsigned long, rb_subtree_last, - avc_start_pgoff, avc_last_pgoff,, anon_vma_interval_tree) + avc_start_pgoff, avc_last_pgoff, + static inline, __anon_vma_interval_tree) + +void anon_vma_interval_tree_insert(struct anon_vma_chain *node, + struct rb_root *root) +{ +#ifdef CONFIG_DEBUG_VM_RB + node->cached_vma_start = avc_start_pgoff(node); + node->cached_vma_last = avc_last_pgoff(node); +#endif + __anon_vma_interval_tree_insert(node, root); +} + +void anon_vma_interval_tree_remove(struct anon_vma_chain *node, + struct rb_root *root) +{ + __anon_vma_interval_tree_remove(node, root); +} + +struct anon_vma_chain * +anon_vma_interval_tree_iter_first(struct rb_root *root, + unsigned long first, unsigned long last) +{ + return __anon_vma_interval_tree_iter_first(root, first, last); +} + +struct anon_vma_chain * +anon_vma_interval_tree_iter_next(struct anon_vma_chain *node, + unsigned long first, unsigned long last) +{ + return __anon_vma_interval_tree_iter_next(node, first, last); +} + +#ifdef CONFIG_DEBUG_VM_RB +void anon_vma_interval_tree_verify(struct anon_vma_chain *node) +{ + WARN_ON_ONCE(node->cached_vma_start != avc_start_pgoff(node)); + WARN_ON_ONCE(node->cached_vma_last != avc_last_pgoff(node)); +} +#endif diff --git a/mm/mmap.c b/mm/mmap.c index 2e580ed7921..deb422c39e2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -51,12 +51,6 @@ static void unmap_region(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, unsigned long start, unsigned long end); -/* - * WARNING: the debugging will use recursive algorithms so never enable this - * unless you know what you are doing. - */ -#undef DEBUG_MM_RB - /* description of effects of mapping type and prot in current implementation. * this is due to the limited x86 page protection hardware. The expected * behavior is in parens: @@ -303,7 +297,7 @@ out: return retval; } -#ifdef DEBUG_MM_RB +#ifdef CONFIG_DEBUG_VM_RB static int browse_rb(struct rb_root *root) { int i = 0, j; @@ -337,9 +331,12 @@ void validate_mm(struct mm_struct *mm) { int bug = 0; int i = 0; - struct vm_area_struct *tmp = mm->mmap; - while (tmp) { - tmp = tmp->vm_next; + struct vm_area_struct *vma = mm->mmap; + while (vma) { + struct anon_vma_chain *avc; + list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) + anon_vma_interval_tree_verify(avc); + vma = vma->vm_next; i++; } if (i != mm->map_count) @@ -1790,6 +1787,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) } vma_unlock_anon_vma(vma); khugepaged_enter_vma_merge(vma); + validate_mm(vma->vm_mm); return error; } #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ @@ -1843,6 +1841,7 @@ int expand_downwards(struct vm_area_struct *vma, } vma_unlock_anon_vma(vma); khugepaged_enter_vma_merge(vma); + validate_mm(vma->vm_mm); return error; } -- cgit v1.2.3-70-g09d2