diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 36 | ||||
-rw-r--r-- | mm/filemap.h | 26 | ||||
-rw-r--r-- | mm/mempolicy.c | 6 | ||||
-rw-r--r-- | mm/migrate.c | 30 | ||||
-rw-r--r-- | mm/page_alloc.c | 3 | ||||
-rw-r--r-- | mm/pdflush.c | 15 | ||||
-rw-r--r-- | mm/readahead.c | 16 | ||||
-rw-r--r-- | mm/rmap.c | 9 |
8 files changed, 95 insertions, 46 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 807a463fd5e..9c7334bafda 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -828,6 +828,32 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index) } EXPORT_SYMBOL(grab_cache_page_nowait); +/* + * CD/DVDs are error prone. When a medium error occurs, the driver may fail + * a _large_ part of the i/o request. Imagine the worst scenario: + * + * ---R__________________________________________B__________ + * ^ reading here ^ bad block(assume 4k) + * + * read(R) => miss => readahead(R...B) => media error => frustrating retries + * => failing the whole request => read(R) => read(R+1) => + * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) => + * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) => + * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ...... + * + * It is going insane. Fix it by quickly scaling down the readahead size. + */ +static void shrink_readahead_size_eio(struct file *filp, + struct file_ra_state *ra) +{ + if (!ra->ra_pages) + return; + + ra->ra_pages /= 4; + printk(KERN_WARNING "Reducing readahead size to %luK\n", + ra->ra_pages << (PAGE_CACHE_SHIFT - 10)); +} + /** * do_generic_mapping_read - generic file read routine * @mapping: address_space to be read @@ -985,6 +1011,7 @@ readpage: } unlock_page(page); error = -EIO; + shrink_readahead_size_eio(filp, &ra); goto readpage_error; } unlock_page(page); @@ -1522,6 +1549,7 @@ page_not_uptodate: * Things didn't work out. Return zero to tell the * mm layer so, possibly freeing the page cache page first. */ + shrink_readahead_size_eio(file, ra); page_cache_release(page); return NULL; } @@ -1892,7 +1920,7 @@ int remove_suid(struct dentry *dentry) EXPORT_SYMBOL(remove_suid); size_t -__filemap_copy_from_user_iovec(char *vaddr, +__filemap_copy_from_user_iovec_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { size_t copied = 0, left = 0; @@ -1908,12 +1936,8 @@ __filemap_copy_from_user_iovec(char *vaddr, vaddr += copy; iov++; - if (unlikely(left)) { - /* zero the rest of the target like __copy_from_user */ - if (bytes) - memset(vaddr, 0, bytes); + if (unlikely(left)) break; - } } return copied - left; } diff --git a/mm/filemap.h b/mm/filemap.h index 5683cde2205..536979fb4ba 100644 --- a/mm/filemap.h +++ b/mm/filemap.h @@ -16,15 +16,23 @@ #include <linux/uaccess.h> size_t -__filemap_copy_from_user_iovec(char *vaddr, - const struct iovec *iov, - size_t base, - size_t bytes); +__filemap_copy_from_user_iovec_inatomic(char *vaddr, + const struct iovec *iov, + size_t base, + size_t bytes); /* * Copy as much as we can into the page and return the number of bytes which * were sucessfully copied. If a fault is encountered then clear the page * out to (offset+bytes) and return the number of bytes which were copied. + * + * NOTE: For this to work reliably we really want copy_from_user_inatomic_nocache + * to *NOT* zero any tail of the buffer that it failed to copy. If it does, + * and if the following non-atomic copy succeeds, then there is a small window + * where the target page contains neither the data before the write, nor the + * data after the write (it contains zero). A read at this time will see + * data that is inconsistent with any ordering of the read and the write. + * (This has been detected in practice). */ static inline size_t filemap_copy_from_user(struct page *page, unsigned long offset, @@ -60,13 +68,15 @@ filemap_copy_from_user_iovec(struct page *page, unsigned long offset, size_t copied; kaddr = kmap_atomic(page, KM_USER0); - copied = __filemap_copy_from_user_iovec(kaddr + offset, iov, - base, bytes); + copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov, + base, bytes); kunmap_atomic(kaddr, KM_USER0); if (copied != bytes) { kaddr = kmap(page); - copied = __filemap_copy_from_user_iovec(kaddr + offset, iov, - base, bytes); + copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov, + base, bytes); + if (bytes - copied) + memset(kaddr + offset + copied, 0, bytes - copied); kunmap(page); } return copied; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ec4a1a950df..73e0f23b7f5 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -632,6 +632,10 @@ int do_migrate_pages(struct mm_struct *mm, down_read(&mm->mmap_sem); + err = migrate_vmas(mm, from_nodes, to_nodes, flags); + if (err) + goto out; + /* * Find a 'source' bit set in 'tmp' whose corresponding 'dest' * bit in 'to' is not also set in 'tmp'. Clear the found 'source' @@ -691,7 +695,7 @@ int do_migrate_pages(struct mm_struct *mm, if (err < 0) break; } - +out: up_read(&mm->mmap_sem); if (err < 0) return err; diff --git a/mm/migrate.c b/mm/migrate.c index 1c2a71aa05c..3f1e0c2c942 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -616,15 +616,13 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, /* * Establish migration ptes or remove ptes */ - if (try_to_unmap(page, 1) != SWAP_FAIL) { - if (!page_mapped(page)) - rc = move_to_new_page(newpage, page); - } else - /* A vma has VM_LOCKED set -> permanent failure */ - rc = -EPERM; + try_to_unmap(page, 1); + if (!page_mapped(page)) + rc = move_to_new_page(newpage, page); if (rc) remove_migration_ptes(page, page); + unlock: unlock_page(page); @@ -976,3 +974,23 @@ out2: } #endif +/* + * Call migration functions in the vma_ops that may prepare + * memory in a vm for migration. migration functions may perform + * the migration for vmas that do not have an underlying page struct. + */ +int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, + const nodemask_t *from, unsigned long flags) +{ + struct vm_area_struct *vma; + int err = 0; + + for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) { + if (vma->vm_ops && vma->vm_ops->migrate) { + err = vma->vm_ops->migrate(vma, to, from, flags); + if (err) + break; + } + } + return err; +} diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 423db0db7c0..6c1174fcf52 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -957,8 +957,7 @@ restart: goto got_pg; do { - if (cpuset_zone_allowed(*z, gfp_mask|__GFP_HARDWALL)) - wakeup_kswapd(*z, order); + wakeup_kswapd(*z, order); } while (*(++z)); /* diff --git a/mm/pdflush.c b/mm/pdflush.c index df7e50b8f70..b02102feeb4 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -104,21 +104,20 @@ static int __pdflush(struct pdflush_work *my_work) list_move(&my_work->list, &pdflush_list); my_work->when_i_went_to_sleep = jiffies; spin_unlock_irq(&pdflush_lock); - schedule(); - if (try_to_freeze()) { - spin_lock_irq(&pdflush_lock); - continue; - } - + try_to_freeze(); spin_lock_irq(&pdflush_lock); if (!list_empty(&my_work->list)) { - printk("pdflush: bogus wakeup!\n"); + /* + * Someone woke us up, but without removing our control + * structure from the global list. swsusp will do this + * in try_to_freeze()->refrigerator(). Handle it. + */ my_work->fn = NULL; continue; } if (my_work->fn == NULL) { - printk("pdflush: NULL work function\n"); + printk("pdflush: bogus wakeup\n"); continue; } spin_unlock_irq(&pdflush_lock); diff --git a/mm/readahead.c b/mm/readahead.c index 0f142a40984..e39e416860d 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -118,8 +118,7 @@ static inline unsigned long get_next_ra_size(struct file_ra_state *ra) #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) /** - * read_cache_pages - populate an address space with some pages, and - * start reads against them. + * read_cache_pages - populate an address space with some pages & start reads against them * @mapping: the address_space * @pages: The address of a list_head which contains the target pages. These * pages have their ->index populated and are otherwise uninitialised. @@ -182,14 +181,11 @@ static int read_pages(struct address_space *mapping, struct file *filp, list_del(&page->lru); if (!add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { - ret = mapping->a_ops->readpage(filp, page); - if (ret != AOP_TRUNCATED_PAGE) { - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - continue; - } /* else fall through to release */ - } - page_cache_release(page); + mapping->a_ops->readpage(filp, page); + if (!pagevec_add(&lru_pvec, page)) + __pagevec_lru_add(&lru_pvec); + } else + page_cache_release(page); } pagevec_lru_add(&lru_pvec); ret = 0; diff --git a/mm/rmap.c b/mm/rmap.c index 882a85826bb..e76909e880c 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -562,9 +562,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * If it's recently referenced (perhaps page_referenced * skipped over this mm) then we should reactivate it. */ - if ((vma->vm_flags & VM_LOCKED) || - (ptep_clear_flush_young(vma, address, pte) - && !migration)) { + if (!migration && ((vma->vm_flags & VM_LOCKED) || + (ptep_clear_flush_young(vma, address, pte)))) { ret = SWAP_FAIL; goto out_unmap; } @@ -771,7 +770,7 @@ static int try_to_unmap_file(struct page *page, int migration) list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) { - if (vma->vm_flags & VM_LOCKED) + if ((vma->vm_flags & VM_LOCKED) && !migration) continue; cursor = (unsigned long) vma->vm_private_data; if (cursor > max_nl_cursor) @@ -805,7 +804,7 @@ static int try_to_unmap_file(struct page *page, int migration) do { list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) { - if (vma->vm_flags & VM_LOCKED) + if ((vma->vm_flags & VM_LOCKED) && !migration) continue; cursor = (unsigned long) vma->vm_private_data; while ( cursor < max_nl_cursor && |