From: Mel Gorman on
rmap_walk_anon() does not use page_lock_anon_vma() for looking up and
locking an anon_vma and it does not appear to have sufficient locking to
ensure the anon_vma does not disappear from under it.

This patch copies an approach used by KSM to take a reference on the
anon_vma while pages are being migrated. This should prevent rmap_walk()
running into nasty surprises later because anon_vma has been freed.

Signed-off-by: Mel Gorman <mel(a)csn.ul.ie>
Acked-by: Rik van Riel <riel(a)redhat.com>
---
include/linux/rmap.h | 23 +++++++++++++++++++++++
mm/migrate.c | 12 ++++++++++++
mm/rmap.c | 10 +++++-----
3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index d25bd22..567d43f 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -29,6 +29,9 @@ struct anon_vma {
#ifdef CONFIG_KSM
atomic_t ksm_refcount;
#endif
+#ifdef CONFIG_MIGRATION
+ atomic_t migrate_refcount;
+#endif
/*
* NOTE: the LSB of the head.next is set by
* mm_take_all_locks() _after_ taking the above lock. So the
@@ -81,6 +84,26 @@ static inline int ksm_refcount(struct anon_vma *anon_vma)
return 0;
}
#endif /* CONFIG_KSM */
+#ifdef CONFIG_MIGRATION
+static inline void migrate_refcount_init(struct anon_vma *anon_vma)
+{
+ atomic_set(&anon_vma->migrate_refcount, 0);
+}
+
+static inline int migrate_refcount(struct anon_vma *anon_vma)
+{
+ return atomic_read(&anon_vma->migrate_refcount);
+}
+#else
+static inline void migrate_refcount_init(struct anon_vma *anon_vma)
+{
+}
+
+static inline int migrate_refcount(struct anon_vma *anon_vma)
+{
+ return 0;
+}
+#endif /* CONFIG_MIGRATE */

static inline struct anon_vma *page_anon_vma(struct page *page)
{
diff --git a/mm/migrate.c b/mm/migrate.c
index 5938db5..b768a1d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -543,6 +543,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
int rcu_locked = 0;
int charge = 0;
struct mem_cgroup *mem = NULL;
+ struct anon_vma *anon_vma = NULL;

if (!newpage)
return -ENOMEM;
@@ -599,6 +600,8 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
if (PageAnon(page)) {
rcu_read_lock();
rcu_locked = 1;
+ anon_vma = page_anon_vma(page);
+ atomic_inc(&anon_vma->migrate_refcount);
}

/*
@@ -638,6 +641,15 @@ skip_unmap:
if (rc)
remove_migration_ptes(page, page);
rcu_unlock:
+
+ /* Drop an anon_vma reference if we took one */
+ if (anon_vma && atomic_dec_and_lock(&anon_vma->migrate_refcount, &anon_vma->lock)) {
+ int empty = list_empty(&anon_vma->head);
+ spin_unlock(&anon_vma->lock);
+ if (empty)
+ anon_vma_free(anon_vma);
+ }
+
if (rcu_locked)
rcu_read_unlock();
uncharge:
diff --git a/mm/rmap.c b/mm/rmap.c
index c59eaea..4bad2c5 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -249,7 +249,8 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
list_del(&anon_vma_chain->same_anon_vma);

/* We must garbage collect the anon_vma if it's empty */
- empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
+ empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma) &&
+ !migrate_refcount(anon_vma);
spin_unlock(&anon_vma->lock);

if (empty)
@@ -274,6 +275,7 @@ static void anon_vma_ctor(void *data)

spin_lock_init(&anon_vma->lock);
ksm_refcount_init(anon_vma);
+ migrate_refcount_init(anon_vma);
INIT_LIST_HEAD(&anon_vma->head);
}

@@ -1365,10 +1367,8 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
/*
* Note: remove_migration_ptes() cannot use page_lock_anon_vma()
* because that depends on page_mapped(); but not all its usages
- * are holding mmap_sem, which also gave the necessary guarantee
- * (that this anon_vma's slab has not already been destroyed).
- * This needs to be reviewed later: avoiding page_lock_anon_vma()
- * is risky, and currently limits the usefulness of rmap_walk().
+ * are holding mmap_sem. Users without mmap_sem are required to
+ * take a reference count to prevent the anon_vma disappearing
*/
anon_vma = page_anon_vma(page);
if (!anon_vma)
--
1.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/