From 569ca5b3f94cd0b3295ec5943aa457cf4a4f6a3a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 5 Apr 2012 16:10:07 +0100 Subject: xen/gnttab: add deferred freeing logic Rather than just leaking pages that can't be freed at the point where access permission for the backend domain gets revoked, put them on a list and run a timer to (infrequently) retry freeing them. (This can particularly happen when unloading a frontend driver when devices are still present, and the backend still has them in non-closed state or hasn't finished closing them yet.) Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 106 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 96 insertions(+), 10 deletions(-) (limited to 'drivers/xen/grant-table.c') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index b4d4eac761d..9f514bb561a 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -426,10 +426,8 @@ static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) nflags = *pflags; do { flags = nflags; - if (flags & (GTF_reading|GTF_writing)) { - printk(KERN_ALERT "WARNING: g.e. still in use!\n"); + if (flags & (GTF_reading|GTF_writing)) return 0; - } } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags); return 1; @@ -458,12 +456,103 @@ static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly) return 1; } -int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) { return gnttab_interface->end_foreign_access_ref(ref, readonly); } + +int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +{ + if (_gnttab_end_foreign_access_ref(ref, readonly)) + return 1; + pr_warn("WARNING: g.e. %#x still in use!\n", ref); + return 0; +} EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); +struct deferred_entry { + struct list_head list; + grant_ref_t ref; + bool ro; + uint16_t warn_delay; + struct page *page; +}; +static LIST_HEAD(deferred_list); +static void gnttab_handle_deferred(unsigned long); +static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0); + +static void gnttab_handle_deferred(unsigned long unused) +{ + unsigned int nr = 10; + struct deferred_entry *first = NULL; + unsigned long flags; + + spin_lock_irqsave(&gnttab_list_lock, flags); + while (nr--) { + struct deferred_entry *entry + = list_first_entry(&deferred_list, + struct deferred_entry, list); + + if (entry == first) + break; + list_del(&entry->list); + spin_unlock_irqrestore(&gnttab_list_lock, flags); + if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) { + put_free_entry(entry->ref); + if (entry->page) { + pr_debug("freeing g.e. %#x (pfn %#lx)\n", + entry->ref, page_to_pfn(entry->page)); + __free_page(entry->page); + } else + pr_info("freeing g.e. %#x\n", entry->ref); + kfree(entry); + entry = NULL; + } else { + if (!--entry->warn_delay) + pr_info("g.e. %#x still pending\n", + entry->ref); + if (!first) + first = entry; + } + spin_lock_irqsave(&gnttab_list_lock, flags); + if (entry) + list_add_tail(&entry->list, &deferred_list); + else if (list_empty(&deferred_list)) + break; + } + if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) { + deferred_timer.expires = jiffies + HZ; + add_timer(&deferred_timer); + } + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} + +static void gnttab_add_deferred(grant_ref_t ref, bool readonly, + struct page *page) +{ + struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + const char *what = KERN_WARNING "leaking"; + + if (entry) { + unsigned long flags; + + entry->ref = ref; + entry->ro = readonly; + entry->page = page; + entry->warn_delay = 60; + spin_lock_irqsave(&gnttab_list_lock, flags); + list_add_tail(&entry->list, &deferred_list); + if (!timer_pending(&deferred_timer)) { + deferred_timer.expires = jiffies + HZ; + add_timer(&deferred_timer); + } + spin_unlock_irqrestore(&gnttab_list_lock, flags); + what = KERN_DEBUG "deferring"; + } + printk("%s g.e. %#x (pfn %#lx)\n", + what, ref, page ? page_to_pfn(page) : -1); +} + void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page) { @@ -471,12 +560,9 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, put_free_entry(ref); if (page != 0) free_page(page); - } else { - /* XXX This needs to be fixed so that the ref and page are - placed on a list to be freed up later. */ - printk(KERN_WARNING - "WARNING: leaking g.e. and page still in use!\n"); - } + } else + gnttab_add_deferred(ref, readonly, + page ? virt_to_page(page) : NULL); } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); -- cgit v1.2.3-70-g09d2 From f62805f1f30a40e354bd036b4cb799863a39be4b Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 24 Apr 2012 11:55:43 +0100 Subject: xen: enter/exit lazy_mmu_mode around m2p_override calls This patch is a significant performance improvement for the m2p_override: about 6% using the gntdev device. Each m2p_add/remove_override call issues a MULTI_grant_table_op and a __flush_tlb_single if kmap_op != NULL. Batching all the calls together is a great performance benefit because it means issuing one hypercall total rather than two hypercall per page. If paravirt_lazy_mode is set PARAVIRT_LAZY_MMU, all these calls are going to be batched together, otherwise they are issued one at a time. Adding arch_enter_lazy_mmu_mode/arch_leave_lazy_mmu_mode around the m2p_add/remove_override calls forces paravirt_lazy_mode to PARAVIRT_LAZY_MMU, therefore makes sure that they are always batched. However it is not safe to call arch_enter_lazy_mmu_mode if we are in interrupt context or if we are already in PARAVIRT_LAZY_MMU mode, so check for both conditions before doing so. Changes in v4: - rebased on 3.4-rc4: all the m2p_override users call gnttab_unmap_refs and gnttab_map_refs; - check whether we are in interrupt context and the lazy_mode we are in before calling arch_enter/leave_lazy_mmu_mode. Changes in v3: - do not call arch_enter/leave_lazy_mmu_mode in xen_blkbk_unmap, that can be called in interrupt context. Signed-off-by: Stefano Stabellini [v5: s/int lazy/bool lazy/] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/xen/grant-table.c') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 9f514bb561a..487e468f9de 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -827,6 +828,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct page **pages, unsigned int count) { int i, ret; + bool lazy = false; pte_t *pte; unsigned long mfn; @@ -837,6 +839,11 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (xen_feature(XENFEAT_auto_translated_physmap)) return ret; + if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + arch_enter_lazy_mmu_mode(); + lazy = true; + } + for (i = 0; i < count; i++) { /* Do not add to override if the map failed. */ if (map_ops[i].status) @@ -855,6 +862,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, return ret; } + if (lazy) + arch_leave_lazy_mmu_mode(); + return ret; } EXPORT_SYMBOL_GPL(gnttab_map_refs); @@ -863,6 +873,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct page **pages, unsigned int count, bool clear_pte) { int i, ret; + bool lazy = false; ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); if (ret) @@ -871,12 +882,20 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, if (xen_feature(XENFEAT_auto_translated_physmap)) return ret; + if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + arch_enter_lazy_mmu_mode(); + lazy = true; + } + for (i = 0; i < count; i++) { ret = m2p_remove_override(pages[i], clear_pte); if (ret) return ret; } + if (lazy) + arch_leave_lazy_mmu_mode(); + return ret; } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); -- cgit v1.2.3-70-g09d2