diff options
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 1088 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.h | 10 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 1373 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 696 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 6 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.c | 10 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.c | 121 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.h | 5 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_linux.h | 6 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_lrw.c | 56 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_stats.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_stats.h | 18 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 19 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_vnode.c | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_vnode.h | 19 |
15 files changed, 1698 insertions, 1732 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 94d3cdfbf9b..d1db8c17a74 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -40,11 +40,10 @@ #include "xfs_rw.h" #include "xfs_iomap.h" #include <linux/mpage.h> +#include <linux/pagevec.h> #include <linux/writeback.h> STATIC void xfs_count_page_state(struct page *, int *, int *, int *); -STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *, - struct writeback_control *wbc, void *, int, int); #if defined(XFS_RW_TRACE) void @@ -55,17 +54,15 @@ xfs_page_trace( int mask) { xfs_inode_t *ip; - bhv_desc_t *bdp; vnode_t *vp = LINVFS_GET_VP(inode); loff_t isize = i_size_read(inode); - loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; + loff_t offset = page_offset(page); int delalloc = -1, unmapped = -1, unwritten = -1; if (page_has_buffers(page)) xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); - bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); - ip = XFS_BHVTOI(bdp); + ip = xfs_vtoi(vp); if (!ip->i_rwtrace) return; @@ -103,15 +100,56 @@ xfs_finish_ioend( queue_work(xfsdatad_workqueue, &ioend->io_work); } +/* + * We're now finished for good with this ioend structure. + * Update the page state via the associated buffer_heads, + * release holds on the inode and bio, and finally free + * up memory. Do not use the ioend after this. + */ STATIC void xfs_destroy_ioend( xfs_ioend_t *ioend) { + struct buffer_head *bh, *next; + + for (bh = ioend->io_buffer_head; bh; bh = next) { + next = bh->b_private; + bh->b_end_io(bh, ioend->io_uptodate); + } + vn_iowake(ioend->io_vnode); mempool_free(ioend, xfs_ioend_pool); } /* + * Buffered IO write completion for delayed allocate extents. + * TODO: Update ondisk isize now that we know the file data + * has been flushed (i.e. the notorious "NULL file" problem). + */ +STATIC void +xfs_end_bio_delalloc( + void *data) +{ + xfs_ioend_t *ioend = data; + + xfs_destroy_ioend(ioend); +} + +/* + * Buffered IO write completion for regular, written extents. + */ +STATIC void +xfs_end_bio_written( + void *data) +{ + xfs_ioend_t *ioend = data; + + xfs_destroy_ioend(ioend); +} + +/* + * IO write completion for unwritten extents. + * * Issue transactions to convert a buffer range from unwritten * to written extents. */ @@ -123,21 +161,10 @@ xfs_end_bio_unwritten( vnode_t *vp = ioend->io_vnode; xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; - struct buffer_head *bh, *next; int error; if (ioend->io_uptodate) VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); - - /* ioend->io_buffer_head is only non-NULL for buffered I/O */ - for (bh = ioend->io_buffer_head; bh; bh = next) { - next = bh->b_private; - - bh->b_end_io = NULL; - clear_buffer_unwritten(bh); - end_buffer_async_write(bh, ioend->io_uptodate); - } - xfs_destroy_ioend(ioend); } @@ -149,7 +176,8 @@ xfs_end_bio_unwritten( */ STATIC xfs_ioend_t * xfs_alloc_ioend( - struct inode *inode) + struct inode *inode, + unsigned int type) { xfs_ioend_t *ioend; @@ -162,45 +190,25 @@ xfs_alloc_ioend( */ atomic_set(&ioend->io_remaining, 1); ioend->io_uptodate = 1; /* cleared if any I/O fails */ + ioend->io_list = NULL; + ioend->io_type = type; ioend->io_vnode = LINVFS_GET_VP(inode); ioend->io_buffer_head = NULL; + ioend->io_buffer_tail = NULL; atomic_inc(&ioend->io_vnode->v_iocount); ioend->io_offset = 0; ioend->io_size = 0; - INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); + if (type == IOMAP_UNWRITTEN) + INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); + else if (type == IOMAP_DELAY) + INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend); + else + INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend); return ioend; } -void -linvfs_unwritten_done( - struct buffer_head *bh, - int uptodate) -{ - xfs_ioend_t *ioend = bh->b_private; - static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED; - unsigned long flags; - - ASSERT(buffer_unwritten(bh)); - bh->b_end_io = NULL; - - if (!uptodate) - ioend->io_uptodate = 0; - - /* - * Deep magic here. We reuse b_private in the buffer_heads to build - * a chain for completing the I/O from user context after we've issued - * a transaction to convert the unwritten extent. - */ - spin_lock_irqsave(&unwritten_done_lock, flags); - bh->b_private = ioend->io_buffer_head; - ioend->io_buffer_head = bh; - spin_unlock_irqrestore(&unwritten_done_lock, flags); - - xfs_finish_ioend(ioend); -} - STATIC int xfs_map_blocks( struct inode *inode, @@ -218,138 +226,260 @@ xfs_map_blocks( return -error; } +STATIC inline int +xfs_iomap_valid( + xfs_iomap_t *iomapp, + loff_t offset) +{ + return offset >= iomapp->iomap_offset && + offset < iomapp->iomap_offset + iomapp->iomap_bsize; +} + /* - * Finds the corresponding mapping in block @map array of the - * given @offset within a @page. + * BIO completion handler for buffered IO. */ -STATIC xfs_iomap_t * -xfs_offset_to_map( +STATIC int +xfs_end_bio( + struct bio *bio, + unsigned int bytes_done, + int error) +{ + xfs_ioend_t *ioend = bio->bi_private; + + if (bio->bi_size) + return 1; + + ASSERT(ioend); + ASSERT(atomic_read(&bio->bi_cnt) >= 1); + + /* Toss bio and pass work off to an xfsdatad thread */ + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + ioend->io_uptodate = 0; + bio->bi_private = NULL; + bio->bi_end_io = NULL; + + bio_put(bio); + xfs_finish_ioend(ioend); + return 0; +} + +STATIC void +xfs_submit_ioend_bio( + xfs_ioend_t *ioend, + struct bio *bio) +{ + atomic_inc(&ioend->io_remaining); + + bio->bi_private = ioend; + bio->bi_end_io = xfs_end_bio; + + submit_bio(WRITE, bio); + ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); + bio_put(bio); +} + +STATIC struct bio * +xfs_alloc_ioend_bio( + struct buffer_head *bh) +{ + struct bio *bio; + int nvecs = bio_get_nr_vecs(bh->b_bdev); + + do { + bio = bio_alloc(GFP_NOIO, nvecs); + nvecs >>= 1; + } while (!bio); + + ASSERT(bio->bi_private == NULL); + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_bdev = bh->b_bdev; + bio_get(bio); + return bio; +} + +STATIC void +xfs_start_buffer_writeback( + struct buffer_head *bh) +{ + ASSERT(buffer_mapped(bh)); + ASSERT(buffer_locked(bh)); + ASSERT(!buffer_delay(bh)); + ASSERT(!buffer_unwritten(bh)); + + mark_buffer_async_write(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); +} + +STATIC void +xfs_start_page_writeback( struct page *page, - xfs_iomap_t *iomapp, - unsigned long offset) + struct writeback_control *wbc, + int clear_dirty, + int buffers) +{ + ASSERT(PageLocked(page)); + ASSERT(!PageWriteback(page)); + set_page_writeback(page); + if (clear_dirty) + clear_page_dirty(page); + unlock_page(page); + if (!buffers) { + end_page_writeback(page); + wbc->pages_skipped++; /* We didn't write this page */ + } +} + +static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) +{ + return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); +} + +/* + * Submit all of the bios for all of the ioends we have saved up, + * covering the initial writepage page and also any probed pages. + */ +STATIC void +xfs_submit_ioend( + xfs_ioend_t *ioend) +{ + xfs_ioend_t *next; + struct buffer_head *bh; + struct bio *bio; + sector_t lastblock = 0; + + do { + next = ioend->io_list; + bio = NULL; + + for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { + xfs_start_buffer_writeback(bh); + + if (!bio) { + retry: + bio = xfs_alloc_ioend_bio(bh); + } else if (bh->b_blocknr != lastblock + 1) { + xfs_submit_ioend_bio(ioend, bio); + goto retry; + } + + if (bio_add_buffer(bio, bh) != bh->b_size) { + xfs_submit_ioend_bio(ioend, bio); + goto retry; + } + + lastblock = bh->b_blocknr; + } + if (bio) + xfs_submit_ioend_bio(ioend, bio); + xfs_finish_ioend(ioend); + } while ((ioend = next) != NULL); +} + +/* + * Cancel submission of all buffer_heads so far in this endio. + * Toss the endio too. Only ever called for the initial page + * in a writepage request, so only ever one page. + */ +STATIC void +xfs_cancel_ioend( + xfs_ioend_t *ioend) +{ + xfs_ioend_t *next; + struct buffer_head *bh, *next_bh; + + do { + next = ioend->io_list; + bh = ioend->io_buffer_head; + do { + next_bh = bh->b_private; + clear_buffer_async_write(bh); + unlock_buffer(bh); + } while ((bh = next_bh) != NULL); + + vn_iowake(ioend->io_vnode); + mempool_free(ioend, xfs_ioend_pool); + } while ((ioend = next) != NULL); +} + +/* + * Test to see if we've been building up a completion structure for + * earlier buffers -- if so, we try to append to this ioend if we + * can, otherwise we finish off any current ioend and start another. + * Return true if we've finished the given ioend. + */ +STATIC void +xfs_add_to_ioend( + struct inode *inode, + struct buffer_head *bh, + xfs_off_t offset, + unsigned int type, + xfs_ioend_t **result, + int need_ioend) { - loff_t full_offset; /* offset from start of file */ + xfs_ioend_t *ioend = *result; - ASSERT(offset < PAGE_CACHE_SIZE); + if (!ioend || need_ioend || type != ioend->io_type) { + xfs_ioend_t *previous = *result; - full_offset = page->index; /* NB: using 64bit number */ - full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */ - full_offset += offset; /* offset from page start */ + ioend = xfs_alloc_ioend(inode, type); + ioend->io_offset = offset; + ioend->io_buffer_head = bh; + ioend->io_buffer_tail = bh; + if (previous) + previous->io_list = ioend; + *result = ioend; + } else { + ioend->io_buffer_tail->b_private = bh; + ioend->io_buffer_tail = bh; + } - if (full_offset < iomapp->iomap_offset) - return NULL; - if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset) - return iomapp; - return NULL; + bh->b_private = NULL; + ioend->io_size += bh->b_size; } STATIC void xfs_map_at_offset( - struct page *page, struct buffer_head *bh, - unsigned long offset, + loff_t offset, int block_bits, xfs_iomap_t *iomapp) { xfs_daddr_t bn; - loff_t delta; int sector_shift; ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); - delta = page->index; - delta <<= PAGE_CACHE_SHIFT; - delta += offset; - delta -= iomapp->iomap_offset; - delta >>= block_bits; - sector_shift = block_bits - BBSHIFT; - bn = iomapp->iomap_bn >> sector_shift; - bn += delta; - BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME)); + bn = (iomapp->iomap_bn >> sector_shift) + + ((offset - iomapp->iomap_offset) >> block_bits); + + ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME)); ASSERT((bn << sector_shift) >= iomapp->iomap_bn); lock_buffer(bh); bh->b_blocknr = bn; - bh->b_bdev = iomapp->iomap_target->pbr_bdev; + bh->b_bdev = iomapp->iomap_target->bt_bdev; set_buffer_mapped(bh); clear_buffer_delay(bh); + clear_buffer_unwritten(bh); } /* - * Look for a page at index which is unlocked and contains our - * unwritten extent flagged buffers at its head. Returns page - * locked and with an extra reference count, and length of the - * unwritten extent component on this page that we can write, - * in units of filesystem blocks. - */ -STATIC struct page * -xfs_probe_unwritten_page( - struct address_space *mapping, - pgoff_t index, - xfs_iomap_t *iomapp, - xfs_ioend_t *ioend, - unsigned long max_offset, - unsigned long *fsbs, - unsigned int bbits) -{ - struct page *page; - - page = find_trylock_page(mapping, index); - if (!page) - return NULL; - if (PageWriteback(page)) - goto out; - - if (page->mapping && page_has_buffers(page)) { - struct buffer_head *bh, *head; - unsigned long p_offset = 0; - - *fsbs = 0; - bh = head = page_buffers(page); - do { - if (!buffer_unwritten(bh) || !buffer_uptodate(bh)) - break; - if (!xfs_offset_to_map(page, iomapp, p_offset)) - break; - if (p_offset >= max_offset) - break; - xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); - set_buffer_unwritten_io(bh); - bh->b_private = ioend; - p_offset += bh->b_size; - (*fsbs)++; - } while ((bh = bh->b_this_page) != head); - - if (p_offset) - return page; - } - -out: - unlock_page(page); - return NULL; -} - -/* - * Look for a page at index which is unlocked and not mapped - * yet - clustering for mmap write case. + * Look for a page at index that is suitable for clustering. */ STATIC unsigned int -xfs_probe_unmapped_page( - struct address_space *mapping, - pgoff_t index, - unsigned int pg_offset) +xfs_probe_page( + struct page *page, + unsigned int pg_offset, + int mapped) { - struct page *page; int ret = 0; - page = find_trylock_page(mapping, index); - if (!page) - return 0; if (PageWriteback(page)) - goto out; + return 0; if (page->mapping && PageDirty(page)) { if (page_has_buffers(page)) { @@ -357,79 +487,101 @@ xfs_probe_unmapped_page( bh = head = page_buffers(page); do { - if (buffer_mapped(bh) || !buffer_uptodate(bh)) + if (!buffer_uptodate(bh)) + break; + if (mapped != buffer_mapped(bh)) break; ret += bh->b_size; if (ret >= pg_offset) break; } while ((bh = bh->b_this_page) != head); } else - ret = PAGE_CACHE_SIZE; + ret = mapped ? 0 : PAGE_CACHE_SIZE; } -out: - unlock_page(page); return ret; } -STATIC unsigned int -xfs_probe_unmapped_cluster( +STATIC size_t +xfs_probe_cluster( struct inode *inode, struct page *startpage, struct buffer_head *bh, - struct buffer_head *head) + struct buffer_head *head, + int mapped) { + struct pagevec pvec; pgoff_t tindex, tlast, tloff; - unsigned int pg_offset, len, total = 0; - struct address_space *mapping = inode->i_mapping; + size_t total = 0; + int done = 0, i; /* First sum forwards in this page */ do { - if (buffer_mapped(bh)) - break; + if (mapped != buffer_mapped(bh)) + return total; total += bh->b_size; } while ((bh = bh->b_this_page) != head); - /* If we reached the end of the page, sum forwards in - * following pages. - */ - if (bh == head) { - tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; - /* Prune this back to avoid pathological behavior */ - tloff = min(tlast, startpage->index + 64); - for (tindex = startpage->index + 1; tindex < tloff; tindex++) { - len = xfs_probe_unmapped_page(mapping, tindex, - PAGE_CACHE_SIZE); - if (!len) - return total; + /* if we reached the end of the page, sum forwards in following pages */ + tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; + tindex = startpage->index + 1; + + /* Prune this back to avoid pathological behavior */ + tloff = min(tlast, startpage->index + 64); + + pagevec_init(&pvec, 0); + while (!done && tindex <= tloff) { + unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); + + if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) + break; + + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + size_t pg_offset, len = 0; + + if (tindex == tlast) { + pg_offset = + i_size_read(inode) & (PAGE_CACHE_SIZE - 1); + if (!pg_offset) { + done = 1; + break; + } + } else + pg_offset = PAGE_CACHE_SIZE; + + if (page->index == tindex && !TestSetPageLocked(page)) { + len = xfs_probe_page(page, pg_offset, mapped); + unlock_page(page); + } + + if (!len) { + done = 1; + break; + } + total += len; + tindex++; } - if (tindex == tlast && - (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { - total += xfs_probe_unmapped_page(mapping, - tindex, pg_offset); - } + + pagevec_release(&pvec); + cond_resched(); } + return total; } /* - * Probe for a given page (index) in the inode and test if it is delayed - * and without unwritten buffers. Returns page locked and with an extra - * reference count. + * Test if a given page is suitable for writing as part of an unwritten + * or delayed allocate extent. */ -STATIC struct page * -xfs_probe_delalloc_page( - struct inode *inode, - pgoff_t index) +STATIC int +xfs_is_delayed_page( + struct page *page, + unsigned int type) { - struct page *page; - - page = find_trylock_page(inode->i_mapping, index); - if (!page) - return NULL; if (PageWriteback(page)) - goto out; + return 0; if (page->mapping && page_has_buffers(page)) { struct buffer_head *bh, *head; @@ -437,243 +589,156 @@ xfs_probe_delalloc_page( bh = head = page_buffers(page); do { - if (buffer_unwritten(bh)) { - acceptable = 0; + if (buffer_unwritten(bh)) + acceptable = (type == IOMAP_UNWRITTEN); + else if (buffer_delay(bh)) + acceptable = (type == IOMAP_DELAY); + else if (buffer_mapped(bh)) + acceptable = (type == 0); + else break; - } else if (buffer_delay(bh)) { - acceptable = 1; - } } while ((bh = bh->b_this_page) != head); if (acceptable) - return page; - } - -out: - unlock_page(page); - return NULL; -} - -STATIC int -xfs_map_unwritten( - struct inode *inode, - struct page *start_page, - struct buffer_head *head, - struct buffer_head *curr, - unsigned long p_offset, - int block_bits, - xfs_iomap_t *iomapp, - struct writeback_control *wbc, - int startio, - int all_bh) -{ - struct buffer_head *bh = curr; - xfs_iomap_t *tmp; - xfs_ioend_t *ioend; - loff_t offset; - unsigned long nblocks = 0; - - offset = start_page->index; - offset <<= PAGE_CACHE_SHIFT; - offset += p_offset; - - ioend = xfs_alloc_ioend(inode); - - /* First map forwards in the page consecutive buffers - * covering this unwritten extent - */ - do { - if (!buffer_unwritten(bh)) - break; - tmp = xfs_offset_to_map(start_page, iomapp, p_offset); - if (!tmp) - break; - xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); - set_buffer_unwritten_io(bh); - bh->b_private = ioend; - p_offset += bh->b_size; - nblocks++; - } while ((bh = bh->b_this_page) != head); - - atomic_add(nblocks, &ioend->io_remaining); - - /* If we reached the end of the page, map forwards in any - * following pages which are also covered by this extent. - */ - if (bh == head) { - struct address_space *mapping = inode->i_mapping; - pgoff_t tindex, tloff, tlast; - unsigned long bs; - unsigned int pg_offset, bbits = inode->i_blkbits; - struct page *page; - - tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; - tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT; - tloff = min(tlast, tloff); - for (tindex = start_page->index + 1; tindex < tloff; tindex++) { - page = xfs_probe_unwritten_page(mapping, - tindex, iomapp, ioend, - PAGE_CACHE_SIZE, &bs, bbits); - if (!page) - break; - nblocks += bs; - atomic_add(bs, &ioend->io_remaining); - xfs_convert_page(inode, page, iomapp, wbc, ioend, - startio, all_bh); - /* stop if converting the next page might add - * enough blocks that the corresponding byte - * count won't fit in our ulong page buf length */ - if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) - goto enough; - } - - if (tindex == tlast && - (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { - page = xfs_probe_unwritten_page(mapping, - tindex, iomapp, ioend, - pg_offset, &bs, bbits); - if (page) { - nblocks += bs; - atomic_add(bs, &ioend->io_remaining); - xfs_convert_page(inode, page, iomapp, wbc, ioend, - startio, all_bh); - if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) - goto enough; - } - } + return 1; } -enough: - ioend->io_size = (xfs_off_t)nblocks << block_bits; - ioend->io_offset = offset; - xfs_finish_ioend(ioend); return 0; } -STATIC void -xfs_submit_page( - struct page *page, - struct writeback_control *wbc, - struct buffer_head *bh_arr[], - int bh_count, - int probed_page, - int clear_dirty) -{ - struct buffer_head *bh; - int i; - - BUG_ON(PageWriteback(page)); - if (bh_count) - set_page_writeback(page); - if (clear_dirty) - clear_page_dirty(page); - unlock_page(page); - - if (bh_count) { - for (i = 0; i < bh_count; i++) { - bh = bh_arr[i]; - mark_buffer_async_write(bh); - if (buffer_unwritten(bh)) - set_buffer_unwritten_io(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); - } - - for (i = 0; i < bh_count; i++) - submit_bh(WRITE, bh_arr[i]); - - if (probed_page && clear_dirty) - wbc->nr_to_write--; /* Wrote an "extra" page */ - } -} - /* * Allocate & map buffers for page given the extent map. Write it out. * except for the original page of a writepage, this is called on * delalloc/unwritten pages only, for the original page it is possible * that the page has no mapping at all. */ -STATIC void +STATIC int xfs_convert_page( struct inode *inode, struct page *page, - xfs_iomap_t *iomapp, + loff_t tindex, + xfs_iomap_t *mp, + xfs_ioend_t **ioendp, struct writeback_control *wbc, - void *private, int startio, int all_bh) { - struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; - xfs_iomap_t *mp = iomapp, *tmp; - unsigned long offset, end_offset; - int index = 0; + struct buffer_head *bh, *head; + xfs_off_t end_offset; + unsigned long p_offset; + unsigned int type; int bbits = inode->i_blkbits; int len, page_dirty; + int count = 0, done = 0, uptodate = 1; + xfs_off_t offset = page_offset(page); - end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)); + if (page->index != tindex) + goto fail; + if (TestSetPageLocked(page)) + goto fail; + if (PageWriteback(page)) + goto fail_unlock_page; + if (page->mapping != inode->i_mapping) + goto fail_unlock_page; + if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) + goto fail_unlock_page; /* * page_dirty is initially a count of buffers on the page before * EOF and is decrememted as we move each into a cleanable state. + * + * Derivation: + * + * End offset is the highest offset that this page should represent. + * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) + * will evaluate non-zero and be less than PAGE_CACHE_SIZE and + * hence give us the correct page_dirty count. On any other page, + * it will be zero and in that case we need page_dirty to be the + * count of buffers on the page. */ + end_offset = min_t(unsigned long long, + (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, + i_size_read(inode)); + len = 1 << inode->i_blkbits; - end_offset = max(end_offset, PAGE_CACHE_SIZE); - end_offset = roundup(end_offset, len); - page_dirty = end_offset / len; + p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), + PAGE_CACHE_SIZE); + p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; + page_dirty = p_offset / len; - offset = 0; bh = head = page_buffers(page); do { if (offset >= end_offset) break; - if (!(PageUptodate(page) || buffer_uptodate(bh))) + if (!buffer_uptodate(bh)) + uptodate = 0; + if (!(PageUptodate(page) || buffer_uptodate(bh))) { + done = 1; continue; - if (buffer_mapped(bh) && all_bh && - !(buffer_unwritten(bh) || buffer_delay(bh))) { + } + + if (buffer_unwritten(bh) || buffer_delay(bh)) { + if (buffer_unwritten(bh)) + type = IOMAP_UNWRITTEN; + else + type = IOMAP_DELAY; + + if (!xfs_iomap_valid(mp, offset)) { + done = 1; + continue; + } + + ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); + ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); + + xfs_map_at_offset(bh, offset, bbits, mp); if (startio) { + xfs_add_to_ioend(inode, bh, offset, + type, ioendp, done); + } else { + set_buffer_dirty(bh); + unlock_buffer(bh); + mark_buffer_dirty(bh); + } + page_dirty--; + count++; + } else { + type = 0; + if (buffer_mapped(bh) && all_bh && startio) { lock_buffer(bh); - bh_arr[index++] = bh; + xfs_add_to_ioend(inode, bh, offset, + type, ioendp, done); + count++; page_dirty--; + } else { + done = 1; } - continue; } - tmp = xfs_offset_to_map(page, mp, offset); - if (!tmp) - continue; - ASSERT(!(tmp->iomap_flags & IOMAP_HOLE)); - ASSERT(!(tmp->iomap_flags & IOMAP_DELAY)); + } while (offset += len, (bh = bh->b_this_page) != head); - /* If this is a new unwritten extent buffer (i.e. one - * that we haven't passed in private data for, we must - * now map this buffer too. - */ - if (buffer_unwritten(bh) && !bh->b_end_io) { - ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN); - xfs_map_unwritten(inode, page, head, bh, offset, - bbits, tmp, wbc, startio, all_bh); - } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) { - xfs_map_at_offset(page, bh, offset, bbits, tmp); - if (buffer_unwritten(bh)) { - set_buffer_unwritten_io(bh); - bh->b_private = private; - ASSERT(private); + if (uptodate && bh == head) + SetPageUptodate(page); + + if (startio) { + if (count) { + struct backing_dev_info *bdi; + + bdi = inode->i_mapping->backing_dev_info; + if (bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } else if (--wbc->nr_to_write <= 0) { + done = 1; } } - if (startio) { - bh_arr[index++] = bh; - } else { - set_buffer_dirty(bh); - unlock_buffer(bh); - mark_buffer_dirty(bh); - } - page_dirty--; - } while (offset += len, (bh = bh->b_this_page) != head); - - if (startio && index) { - xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty); - } else { - unlock_page(page); + xfs_start_page_writeback(page, wbc, !page_dirty, count); } + + return done; + fail_unlock_page: + unlock_page(page); + fail: + return 1; } /* @@ -685,19 +750,31 @@ xfs_cluster_write( struct inode *inode, pgoff_t tindex, xfs_iomap_t *iomapp, + xfs_ioend_t **ioendp, struct writeback_control *wbc, int startio, int all_bh, pgoff_t tlast) { - struct page *page; + struct pagevec pvec; + int done = 0, i; - for (; tindex <= tlast; tindex++) { - page = xfs_probe_delalloc_page(inode, tindex); - if (!page) + pagevec_init(&pvec, 0); + while (!done && tindex <= tlast) { + unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); + + if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) break; - xfs_convert_page(inode, page, iomapp, wbc, NULL, - startio, all_bh); + + for (i = 0; i < pagevec_count(&pvec); i++) { + done = xfs_convert_page(inode, pvec.pages[i], tindex++, + iomapp, ioendp, wbc, startio, all_bh); + if (done) + break; + } + + pagevec_release(&pvec); + cond_resched(); } } @@ -728,18 +805,22 @@ xfs_page_state_convert( int startio, int unmapped) /* also implies page uptodate */ { - struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; - xfs_iomap_t *iomp, iomap; + struct buffer_head *bh, *head; + xfs_iomap_t iomap; + xfs_ioend_t *ioend = NULL, *iohead = NULL; loff_t offset; unsigned long p_offset = 0; + unsigned int type; __uint64_t end_offset; pgoff_t end_index, last_index, tlast; - int len, err, i, cnt = 0, uptodate = 1; - int flags; - int page_dirty; + ssize_t size, len; + int flags, err, iomap_valid = 0, uptodate = 1; + int page_dirty, count = 0, trylock_flag = 0; + int all_bh = unmapped; /* wait for other IO threads? */ - flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK; + if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)) + trylock_flag |= BMAPI_TRYLOCK; /* Is this page beyond the end of the file? */ offset = i_size_read(inode); @@ -754,161 +835,173 @@ xfs_page_state_convert( } } - end_offset = min_t(unsigned long long, - (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); - offset = (loff_t)page->index << PAGE_CACHE_SHIFT; - /* * page_dirty is initially a count of buffers on the page before * EOF and is decrememted as we move each into a cleanable state. - */ + * + * Derivation: + * + * End offset is the highest offset that this page should represent. + * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) + * will evaluate non-zero and be less than PAGE_CACHE_SIZE and + * hence give us the correct page_dirty count. On any other page, + * it will be zero and in that case we need page_dirty to be the + * count of buffers on the page. + */ + end_offset = min_t(unsigned long long, + (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); len = 1 << inode->i_blkbits; - p_offset = max(p_offset, PAGE_CACHE_SIZE); - p_offset = roundup(p_offset, len); + p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), + PAGE_CACHE_SIZE); + p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; page_dirty = p_offset / len; - iomp = NULL; - p_offset = 0; bh = head = page_buffers(page); + offset = page_offset(page); + flags = -1; + type = 0; + + /* TODO: cleanup count and page_dirty */ do { if (offset >= end_offset) break; if (!buffer_uptodate(bh)) uptodate = 0; - if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) + if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { + /* + * the iomap is actually still valid, but the ioend + * isn't. shouldn't happen too often. + */ + iomap_valid = 0; continue; - - if (iomp) { - iomp = xfs_offset_to_map(page, &iomap, p_offset); } + if (iomap_valid) + iomap_valid = xfs_iomap_valid(&iomap, offset); + /* * First case, map an unwritten extent and prepare for * extent state conversion transaction on completion. - */ - if (buffer_unwritten(bh)) { - if (!startio) - continue; - if (!iomp) { - err = xfs_map_blocks(inode, offset, len, &iomap, - BMAPI_WRITE|BMAPI_IGNSTATE); - if (err) { - goto error; - } - iomp = xfs_offset_to_map(page, &iomap, - p_offset); + * + * Second case, allocate space for a delalloc buffer. + * We can return EAGAIN here in the release page case. + * + * Third case, an unmapped buffer was found, and we are + * in a path where we need to write the whole page out. + */ + if (buffer_unwritten(bh) || buffer_delay(bh) || + ((buffer_uptodate(bh) || PageUptodate(page)) && + !buffer_mapped(bh) && (unmapped || startio))) { + /* + * Make sure we don't use a read-only iomap + */ + if (flags == BMAPI_READ) + iomap_valid = 0; + + if (buffer_unwritten(bh)) { + type = IOMAP_UNWRITTEN; + flags = BMAPI_WRITE|BMAPI_IGNSTATE; + } else if (buffer_delay(bh)) { + type = IOMAP_DELAY; + flags = BMAPI_ALLOCATE; + if (!startio) + flags |= trylock_flag; + } else { + type = IOMAP_NEW; + flags = BMAPI_WRITE|BMAPI_MMAP; } - if (iomp) { - if (!bh->b_end_io) { - err = xfs_map_unwritten(inode, page, - head, bh, p_offset, - inode->i_blkbits, iomp, - wbc, startio, unmapped); - if (err) { - goto error; - } + + if (!iomap_valid) { + if (type == IOMAP_NEW) { + size = xfs_probe_cluster(inode, + page, bh, head, 0); } else { - set_bit(BH_Lock, &bh->b_state); + size = len; } - BUG_ON(!buffer_locked(bh)); - bh_arr[cnt++] = bh; - page_dirty--; - } - /* - * Second case, allocate space for a delalloc buffer. - * We can return EAGAIN here in the release page case. - */ - } else if (buffer_delay(bh)) { - if (!iomp) { - err = xfs_map_blocks(inode, offset, len, &iomap, - BMAPI_ALLOCATE | flags); - if (err) { + + err = xfs_map_blocks(inode, offset, size, + &iomap, flags); + if (err) goto error; - } - iomp = xfs_offset_to_map(page, &iomap, - p_offset); + iomap_valid = xfs_iomap_valid(&iomap, offset); } - if (iomp) { - xfs_map_at_offset(page, bh, p_offset, - inode->i_blkbits, iomp); + if (iomap_valid) { + xfs_map_at_offset(bh, offset, + inode->i_blkbits, &iomap); if (startio) { - bh_arr[cnt++] = bh; + xfs_add_to_ioend(inode, bh, offset, + type, &ioend, + !iomap_valid); } else { set_buffer_dirty(bh); unlock_buffer(bh); mark_buffer_dirty(bh); } page_dirty--; + count++; + } + } else if (buffer_uptodate(bh) && startio) { + /* + * we got here because the buffer is already mapped. + * That means it must already have extents allocated + * underneath it. Map the extent by reading it. + */ + if (!iomap_valid || type != 0) { + flags = BMAPI_READ; + size = xfs_probe_cluster(inode, page, bh, + head, 1); + err = xfs_map_blocks(inode, offset, size, + &iomap, flags); + if (err) + goto error; + iomap_valid = xfs_iomap_valid(&iomap, offset); } - } else if ((buffer_uptodate(bh) || PageUptodate(page)) && - (unmapped || startio)) { - if (!buffer_mapped(bh)) { - int size; - - /* - * Getting here implies an unmapped buffer - * was found, and we are in a path where we - * need to write the whole page out. - */ - if (!iomp) { - size = xfs_probe_unmapped_cluster( - inode, page, bh, head); - err = xfs_map_blocks(inode, offset, - size, &iomap, - BMAPI_WRITE|BMAPI_MMAP); - if (err) { - goto error; - } - iomp = xfs_offset_to_map(page, &iomap, - p_offset); - } - if (iomp) { - xfs_map_at_offset(page, - bh, p_offset, - inode->i_blkbits, iomp); - if (startio) { - bh_arr[cnt++] = bh; - } else { - set_buffer_dirty(bh); - unlock_buffer(bh); - mark_buffer_dirty(bh); - } - page_dirty--; - } - } else if (startio) { - if (buffer_uptodate(bh) && - !test_and_set_bit(BH_Lock, &bh->b_state)) { - bh_arr[cnt++] = bh; - page_dirty--; - } + type = 0; + if (!test_and_set_bit(BH_Lock, &bh->b_state)) { + ASSERT(buffer_mapped(bh)); + if (iomap_valid) + all_bh = 1; + xfs_add_to_ioend(inode, bh, offset, type, + &ioend, !iomap_valid); + page_dirty--; + count++; + } else { + iomap_valid = 0; } + } else if ((buffer_uptodate(bh) || PageUptodate(page)) && + (unmapped || startio)) { + iomap_valid = 0; } - } while (offset += len, p_offset += len, - ((bh = bh->b_this_page) != head)); + + if (!iohead) + iohead = ioend; + + } while (offset += len, ((bh = bh->b_this_page) != head)); if (uptodate && bh == head) SetPageUptodate(page); - if (startio) { - xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty); - } + if (startio) + xfs_start_page_writeback(page, wbc, 1, count); - if (iomp) { - offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> + if (ioend && iomap_valid) { + offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> PAGE_CACHE_SHIFT; tlast = min_t(pgoff_t, offset, last_index); - xfs_cluster_write(inode, page->index + 1, iomp, wbc, - startio, unmapped, tlast); + xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, + wbc, startio, all_bh, tlast); } + if (iohead) + xfs_submit_ioend(iohead); + return page_dirty; error: - for (i = 0; i < cnt; i++) { - unlock_buffer(bh_arr[i]); - } + if (iohead) + xfs_cancel_ioend(iohead); /* * If it's delalloc and we have nowhere to put it, @@ -916,9 +1009,8 @@ error: * us to try again. */ if (err != -EAGAIN) { - if (!unmapped) { + if (!unmapped) block_invalidatepage(page, 0); - } ClearPageUptodate(page); } return err; @@ -982,7 +1074,7 @@ __linvfs_get_block( } /* If this is a realtime file, data might be on a new device */ - bh_result->b_bdev = iomap.iomap_target->pbr_bdev; + bh_result->b_bdev = iomap.iomap_target->bt_bdev; /* If we previously allocated a block out beyond eof and * we are now coming back to use it then we will need to @@ -1094,10 +1186,10 @@ linvfs_direct_IO( if (error) return -error; - iocb->private = xfs_alloc_ioend(inode); + iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); ret = blockdev_direct_IO_own_locking(rw, iocb, inode, - iomap.iomap_target->pbr_bdev, + iomap.iomap_target->bt_bdev, iov, offset, nr_segs, linvfs_get_blocks_direct, linvfs_end_io_direct); diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 4720758a9ad..55339dd5a30 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -23,14 +23,24 @@ extern mempool_t *xfs_ioend_pool; typedef void (*xfs_ioend_func_t)(void *); +/* + * xfs_ioend struct manages large extent writes for XFS. + * It can manage several multi-page bio's at once. + */ typedef struct xfs_ioend { + struct xfs_ioend *io_list; /* next ioend in chain */ + unsigned int io_type; /* delalloc / unwritten */ unsigned int io_uptodate; /* I/O status register */ atomic_t io_remaining; /* hold count */ struct vnode *io_vnode; /* file being written to */ struct buffer_head *io_buffer_head;/* buffer linked list head */ + struct buffer_head *io_buffer_tail;/* buffer linked list tail */ size_t io_size; /* size of the extent */ xfs_off_t io_offset; /* offset in the file */ struct work_struct io_work; /* xfsdatad work queue */ } xfs_ioend_t; +extern struct address_space_operations linvfs_aops; +extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int); + #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 6fe21d2b884..e44b7c1a3a3 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -31,76 +31,77 @@ #include <linux/kthread.h> #include "xfs_linux.h" -STATIC kmem_cache_t *pagebuf_zone; -STATIC kmem_shaker_t pagebuf_shake; +STATIC kmem_zone_t *xfs_buf_zone; +STATIC kmem_shaker_t xfs_buf_shake; +STATIC int xfsbufd(void *); STATIC int xfsbufd_wakeup(int, gfp_t); -STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); +STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); STATIC struct workqueue_struct *xfslogd_workqueue; struct workqueue_struct *xfsdatad_workqueue; -#ifdef PAGEBUF_TRACE +#ifdef XFS_BUF_TRACE void -pagebuf_trace( - xfs_buf_t *pb, +xfs_buf_trace( + xfs_buf_t *bp, char *id, void *data, void *ra) { - ktrace_enter(pagebuf_trace_buf, - pb, id, - (void *)(unsigned long)pb->pb_flags, - (void *)(unsigned long)pb->pb_hold.counter, - (void *)(unsigned long)pb->pb_sema.count.counter, + ktrace_enter(xfs_buf_trace_buf, + bp, id, + (void *)(unsigned long)bp->b_flags, + (void *)(unsigned long)bp->b_hold.counter, + (void *)(unsigned long)bp->b_sema.count.counter, (void *)current, data, ra, - (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff), - (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff), - (void *)(unsigned long)pb->pb_buffer_length, + (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), + (void *)(unsigned long)(bp->b_file_offset & 0xffffffff), + (void *)(unsigned long)bp->b_buffer_length, NULL, NULL, NULL, NULL, NULL); } -ktrace_t *pagebuf_trace_buf; -#define PAGEBUF_TRACE_SIZE 4096 -#define PB_TRACE(pb, id, data) \ - pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0)) +ktrace_t *xfs_buf_trace_buf; +#define XFS_BUF_TRACE_SIZE 4096 +#define XB_TRACE(bp, id, data) \ + xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0)) #else -#define PB_TRACE(pb, id, data) do { } while (0) +#define XB_TRACE(bp, id, data) do { } while (0) #endif -#ifdef PAGEBUF_LOCK_TRACKING -# define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid) -# define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1) -# define PB_GET_OWNER(pb) ((pb)->pb_last_holder) +#ifdef XFS_BUF_LOCK_TRACKING +# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) +# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) +# define XB_GET_OWNER(bp) ((bp)->b_last_holder) #else -# define PB_SET_OWNER(pb) do { } while (0) -# define PB_CLEAR_OWNER(pb) do { } while (0) -# define PB_GET_OWNER(pb) do { } while (0) +# define XB_SET_OWNER(bp) do { } while (0) +# define XB_CLEAR_OWNER(bp) do { } while (0) +# define XB_GET_OWNER(bp) do { } while (0) #endif -#define pb_to_gfp(flags) \ - ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \ - ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) +#define xb_to_gfp(flags) \ + ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \ + ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) -#define pb_to_km(flags) \ - (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) +#define xb_to_km(flags) \ + (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) -#define pagebuf_allocate(flags) \ - kmem_zone_alloc(pagebuf_zone, pb_to_km(flags)) -#define pagebuf_deallocate(pb) \ - kmem_zone_free(pagebuf_zone, (pb)); +#define xfs_buf_allocate(flags) \ + kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)) +#define xfs_buf_deallocate(bp) \ + kmem_zone_free(xfs_buf_zone, (bp)); /* - * Page Region interfaces. + * Page Region interfaces. * - * For pages in filesystems where the blocksize is smaller than the - * pagesize, we use the page->private field (long) to hold a bitmap - * of uptodate regions within the page. + * For pages in filesystems where the blocksize is smaller than the + * pagesize, we use the page->private field (long) to hold a bitmap + * of uptodate regions within the page. * - * Each such region is "bytes per page / bits per long" bytes long. + * Each such region is "bytes per page / bits per long" bytes long. * - * NBPPR == number-of-bytes-per-page-region - * BTOPR == bytes-to-page-region (rounded up) - * BTOPRT == bytes-to-page-region-truncated (rounded down) + * NBPPR == number-of-bytes-per-page-region + * BTOPR == bytes-to-page-region (rounded up) + * BTOPRT == bytes-to-page-region-truncated (rounded down) */ #if (BITS_PER_LONG == 32) #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ @@ -159,7 +160,7 @@ test_page_region( } /* - * Mapping of multi-page buffers into contiguous virtual space + * Mapping of multi-page buffers into contiguous virtual space */ typedef struct a_list { @@ -172,7 +173,7 @@ STATIC int as_list_len; STATIC DEFINE_SPINLOCK(as_lock); /* - * Try to batch vunmaps because they are costly. + * Try to batch vunmaps because they are costly. */ STATIC void free_address( @@ -215,83 +216,83 @@ purge_addresses(void) } /* - * Internal pagebuf object manipulation + * Internal xfs_buf_t object manipulation */ STATIC void -_pagebuf_initialize( - xfs_buf_t *pb, +_xfs_buf_initialize( + xfs_buf_t *bp, xfs_buftarg_t *target, - loff_t range_base, + xfs_off_t range_base, size_t range_length, - page_buf_flags_t flags) + xfs_buf_flags_t flags) { /* - * We don't want certain flags to appear in pb->pb_flags. + * We don't want certain flags to appear in b_flags. */ - flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD); - - memset(pb, 0, sizeof(xfs_buf_t)); - atomic_set(&pb->pb_hold, 1); - init_MUTEX_LOCKED(&pb->pb_iodonesema); - INIT_LIST_HEAD(&pb->pb_list); - INIT_LIST_HEAD(&pb->pb_hash_list); - init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */ - PB_SET_OWNER(pb); - pb->pb_target = target; - pb->pb_file_offset = range_base; + flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD); + + memset(bp, 0, sizeof(xfs_buf_t)); + atomic_set(&bp->b_hold, 1); + init_MUTEX_LOCKED(&bp->b_iodonesema); + INIT_LIST_HEAD(&bp->b_list); + INIT_LIST_HEAD(&bp->b_hash_list); + init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ + XB_SET_OWNER(bp); + bp->b_target = target; + bp->b_file_offset = range_base; /* * Set buffer_length and count_desired to the same value initially. * I/O routines should use count_desired, which will be the same in * most cases but may be reset (e.g. XFS recovery). */ - pb->pb_buffer_length = pb->pb_count_desired = range_length; - pb->pb_flags = flags; - pb->pb_bn = XFS_BUF_DADDR_NULL; - atomic_set(&pb->pb_pin_count, 0); - init_waitqueue_head(&pb->pb_waiters); - - XFS_STATS_INC(pb_create); - PB_TRACE(pb, "initialize", target); + bp->b_buffer_length = bp->b_count_desired = range_length; + bp->b_flags = flags; + bp->b_bn = XFS_BUF_DADDR_NULL; + atomic_set(&bp->b_pin_count, 0); + init_waitqueue_head(&bp->b_waiters); + + XFS_STATS_INC(xb_create); + XB_TRACE(bp, "initialize", target); } /* - * Allocate a page array capable of holding a specified number - * of pages, and point the page buf at it. + * Allocate a page array capable of holding a specified number + * of pages, and point the page buf at it. */ STATIC int -_pagebuf_get_pages( - xfs_buf_t *pb, +_xfs_buf_get_pages( + xfs_buf_t *bp, int page_count, - page_buf_flags_t flags) + xfs_buf_flags_t flags) { /* Make sure that we have a page list */ - if (pb->pb_pages == NULL) { - pb->pb_offset = page_buf_poff(pb->pb_file_offset); - pb->pb_page_count = page_count; - if (page_count <= PB_PAGES) { - pb->pb_pages = pb->pb_page_array; + if (bp->b_pages == NULL) { + bp->b_offset = xfs_buf_poff(bp->b_file_offset); + bp->b_page_count = page_count; + if (page_count <= XB_PAGES) { + bp->b_pages = bp->b_page_array; } else { - pb->pb_pages = kmem_alloc(sizeof(struct page *) * - page_count, pb_to_km(flags)); - if (pb->pb_pages == NULL) + bp->b_pages = kmem_alloc(sizeof(struct page *) * + page_count, xb_to_km(flags)); + if (bp->b_pages == NULL) return -ENOMEM; } - memset(pb->pb_pages, 0, sizeof(struct page *) * page_count); + memset(bp->b_pages, 0, sizeof(struct page *) * page_count); } return 0; } /* - * Frees pb_pages if it was malloced. + * Frees b_pages if it was allocated. */ STATIC void -_pagebuf_free_pages( +_xfs_buf_free_pages( xfs_buf_t *bp) { - if (bp->pb_pages != bp->pb_page_array) { - kmem_free(bp->pb_pages, - bp->pb_page_count * sizeof(struct page *)); + if (bp->b_pages != bp->b_page_array) { + kmem_free(bp->b_pages, + bp->b_page_count * sizeof(struct page *)); } } @@ -299,79 +300,79 @@ _pagebuf_free_pages( * Releases the specified buffer. * * The modification state of any associated pages is left unchanged. - * The buffer most not be on any hash - use pagebuf_rele instead for + * The buffer most not be on any hash - use xfs_buf_rele instead for * hashed and refcounted buffers */ void -pagebuf_free( +xfs_buf_free( xfs_buf_t *bp) { - PB_TRACE(bp, "free", 0); + XB_TRACE(bp, "free", 0); - ASSERT(list_empty(&bp->pb_hash_list)); + ASSERT(list_empty(&bp->b_hash_list)); - if (bp->pb_flags & _PBF_PAGE_CACHE) { + if (bp->b_flags & _XBF_PAGE_CACHE) { uint i; - if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1)) - free_address(bp->pb_addr - bp->pb_offset); + if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) + free_address(bp->b_addr - bp->b_offset); - for (i = 0; i < bp->pb_page_count; i++) - page_cache_release(bp->pb_pages[i]); - _pagebuf_free_pages(bp); - } else if (bp->pb_flags & _PBF_KMEM_ALLOC) { + for (i = 0; i < bp->b_page_count; i++) + page_cache_release(bp->b_pages[i]); + _xfs_buf_free_pages(bp); + } else if (bp->b_flags & _XBF_KMEM_ALLOC) { /* - * XXX(hch): bp->pb_count_desired might be incorrect (see - * pagebuf_associate_memory for details), but fortunately + * XXX(hch): bp->b_count_desired might be incorrect (see + * xfs_buf_associate_memory for details), but fortunately * the Linux version of kmem_free ignores the len argument.. */ - kmem_free(bp->pb_addr, bp->pb_count_desired); - _pagebuf_free_pages(bp); + kmem_free(bp->b_addr, bp->b_count_desired); + _xfs_buf_free_pages(bp); } - pagebuf_deallocate(bp); + xfs_buf_deallocate(bp); } /* * Finds all pages for buffer in question and builds it's page list. */ STATIC int -_pagebuf_lookup_pages( +_xfs_buf_lookup_pages( xfs_buf_t *bp, uint flags) { - struct address_space *mapping = bp->pb_target->pbr_mapping; - size_t blocksize = bp->pb_target->pbr_bsize; - size_t size = bp->pb_count_desired; + struct address_space *mapping = bp->b_target->bt_mapping; + size_t blocksize = bp->b_target->bt_bsize; + size_t size = bp->b_count_desired; size_t nbytes, offset; - gfp_t gfp_mask = pb_to_gfp(flags); + gfp_t gfp_mask = xb_to_gfp(flags); unsigned short page_count, i; pgoff_t first; - loff_t end; + xfs_off_t end; int error; - end = bp->pb_file_offset + bp->pb_buffer_length; - page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset); + end = bp->b_file_offset + bp->b_buffer_length; + page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); - error = _pagebuf_get_pages(bp, page_count, flags); + error = _xfs_buf_get_pages(bp, page_count, flags); if (unlikely(error)) return error; - bp->pb_flags |= _PBF_PAGE_CACHE; + bp->b_flags |= _XBF_PAGE_CACHE; - offset = bp->pb_offset; - first = bp->pb_file_offset >> PAGE_CACHE_SHIFT; + offset = bp->b_offset; + first = bp->b_file_offset >> PAGE_CACHE_SHIFT; - for (i = 0; i < bp->pb_page_count; i++) { + for (i = 0; i < bp->b_page_count; i++) { struct page *page; uint retries = 0; retry: page = find_or_create_page(mapping, first + i, gfp_mask); if (unlikely(page == NULL)) { - if (flags & PBF_READ_AHEAD) { - bp->pb_page_count = i; - for (i = 0; i < bp->pb_page_count; i++) - unlock_page(bp->pb_pages[i]); + if (flags & XBF_READ_AHEAD) { + bp->b_page_count = i; + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); return -ENOMEM; } @@ -387,13 +388,13 @@ _pagebuf_lookup_pages( "deadlock in %s (mode:0x%x)\n", __FUNCTION__, gfp_mask); - XFS_STATS_INC(pb_page_retries); + XFS_STATS_INC(xb_page_retries); xfsbufd_wakeup(0, gfp_mask); blk_congestion_wait(WRITE, HZ/50); goto retry; } - XFS_STATS_INC(pb_page_found); + XFS_STATS_INC(xb_page_found); nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); size -= nbytes; @@ -401,27 +402,27 @@ _pagebuf_lookup_pages( if (!PageUptodate(page)) { page_count--; if (blocksize >= PAGE_CACHE_SIZE) { - if (flags & PBF_READ) - bp->pb_locked = 1; + if (flags & XBF_READ) + bp->b_locked = 1; } else if (!PagePrivate(page)) { if (test_page_region(page, offset, nbytes)) page_count++; } } - bp->pb_pages[i] = page; + bp->b_pages[i] = page; offset = 0; } - if (!bp->pb_locked) { - for (i = 0; i < bp->pb_page_count; i++) - unlock_page(bp->pb_pages[i]); + if (!bp->b_locked) { + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); } - if (page_count == bp->pb_page_count) - bp->pb_flags |= PBF_DONE; + if (page_count == bp->b_page_count) + bp->b_flags |= XBF_DONE; - PB_TRACE(bp, "lookup_pages", (long)page_count); + XB_TRACE(bp, "lookup_pages", (long)page_count); return error; } @@ -429,23 +430,23 @@ _pagebuf_lookup_pages( * Map buffer into kernel address-space if nessecary. */ STATIC int -_pagebuf_map_pages( +_xfs_buf_map_pages( xfs_buf_t *bp, uint flags) { /* A single page buffer is always mappable */ - if (bp->pb_page_count == 1) { - bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset; - bp->pb_flags |= PBF_MAPPED; - } else if (flags & PBF_MAPPED) { + if (bp->b_page_count == 1) { + bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; + bp->b_flags |= XBF_MAPPED; + } else if (flags & XBF_MAPPED) { if (as_list_len > 64) purge_addresses(); - bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count, - VM_MAP, PAGE_KERNEL); - if (unlikely(bp->pb_addr == NULL)) + bp->b_addr = vmap(bp->b_pages, bp->b_page_count, + VM_MAP, PAGE_KERNEL); + if (unlikely(bp->b_addr == NULL)) return -ENOMEM; - bp->pb_addr += bp->pb_offset; - bp->pb_flags |= PBF_MAPPED; + bp->b_addr += bp->b_offset; + bp->b_flags |= XBF_MAPPED; } return 0; @@ -456,9 +457,7 @@ _pagebuf_map_pages( */ /* - * _pagebuf_find - * - * Looks up, and creates if absent, a lockable buffer for + * Look up, and creates if absent, a lockable buffer for * a given range of an inode. The buffer is returned * locked. If other overlapping buffers exist, they are * released before the new buffer is created and locked, @@ -466,55 +465,55 @@ _pagebuf_map_pages( * are unlocked. No I/O is implied by this call. */ xfs_buf_t * -_pagebuf_find( +_xfs_buf_find( xfs_buftarg_t *btp, /* block device target */ - loff_t ioff, /* starting offset of range */ + xfs_off_t ioff, /* starting offset of range */ size_t isize, /* length of range */ - page_buf_flags_t flags, /* PBF_TRYLOCK */ - xfs_buf_t *new_pb)/* newly allocated buffer */ + xfs_buf_flags_t flags, + xfs_buf_t *new_bp) { - loff_t range_base; + xfs_off_t range_base; size_t range_length; xfs_bufhash_t *hash; - xfs_buf_t *pb, *n; + xfs_buf_t *bp, *n; range_base = (ioff << BBSHIFT); range_length = (isize << BBSHIFT); /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(range_length < (1 << btp->pbr_sshift))); - ASSERT(!(range_base & (loff_t)btp->pbr_smask)); + ASSERT(!(range_length < (1 << btp->bt_sshift))); + ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; spin_lock(&hash->bh_lock); - list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) { - ASSERT(btp == pb->pb_target); - if (pb->pb_file_offset == range_base && - pb->pb_buffer_length == range_length) { + list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { + ASSERT(btp == bp->b_target); + if (bp->b_file_offset == range_base && + bp->b_buffer_length == range_length) { /* - * If we look at something bring it to the + * If we look at something, bring it to the * front of the list for next time. */ - atomic_inc(&pb->pb_hold); - list_move(&pb->pb_hash_list, &hash->bh_list); + atomic_inc(&bp->b_hold); + list_move(&bp->b_hash_list, &hash->bh_list); goto found; } } /* No match found */ - if (new_pb) { - _pagebuf_initialize(new_pb, btp, range_base, + if (new_bp) { + _xfs_buf_initialize(new_bp, btp, range_base, range_length, flags); - new_pb->pb_hash = hash; - list_add(&new_pb->pb_hash_list, &hash->bh_list); + new_bp->b_hash = hash; + list_add(&new_bp->b_hash_list, &hash->bh_list); } else { - XFS_STATS_INC(pb_miss_locked); + XFS_STATS_INC(xb_miss_locked); } spin_unlock(&hash->bh_lock); - return new_pb; + return new_bp; found: spin_unlock(&hash->bh_lock); @@ -523,74 +522,72 @@ found: * if this does not work then we need to drop the * spinlock and do a hard attempt on the semaphore. */ - if (down_trylock(&pb->pb_sema)) { - if (!(flags & PBF_TRYLOCK)) { + if (down_trylock(&bp->b_sema)) { + if (!(flags & XBF_TRYLOCK)) { /* wait for buffer ownership */ - PB_TRACE(pb, "get_lock", 0); - pagebuf_lock(pb); - XFS_STATS_INC(pb_get_locked_waited); + XB_TRACE(bp, "get_lock", 0); + xfs_buf_lock(bp); + XFS_STATS_INC(xb_get_locked_waited); } else { /* We asked for a trylock and failed, no need * to look at file offset and length here, we - * know that this pagebuf at least overlaps our - * pagebuf and is locked, therefore our buffer - * either does not exist, or is this buffer + * know that this buffer at least overlaps our + * buffer and is locked, therefore our buffer + * either does not exist, or is this buffer. */ - - pagebuf_rele(pb); - XFS_STATS_INC(pb_busy_locked); - return (NULL); + xfs_buf_rele(bp); + XFS_STATS_INC(xb_busy_locked); + return NULL; } } else { /* trylock worked */ - PB_SET_OWNER(pb); + XB_SET_OWNER(bp); } - if (pb->pb_flags & PBF_STALE) { - ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0); - pb->pb_flags &= PBF_MAPPED; + if (bp->b_flags & XBF_STALE) { + ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); + bp->b_flags &= XBF_MAPPED; } - PB_TRACE(pb, "got_lock", 0); - XFS_STATS_INC(pb_get_locked); - return (pb); + XB_TRACE(bp, "got_lock", 0); + XFS_STATS_INC(xb_get_locked); + return bp; } /* - * xfs_buf_get_flags assembles a buffer covering the specified range. - * + * Assembles a buffer covering the specified range. * Storage in memory for all portions of the buffer will be allocated, * although backing storage may not be. */ xfs_buf_t * -xfs_buf_get_flags( /* allocate a buffer */ +xfs_buf_get_flags( xfs_buftarg_t *target,/* target for buffer */ - loff_t ioff, /* starting offset of range */ + xfs_off_t ioff, /* starting offset of range */ size_t isize, /* length of range */ - page_buf_flags_t flags) /* PBF_TRYLOCK */ + xfs_buf_flags_t flags) { - xfs_buf_t *pb, *new_pb; + xfs_buf_t *bp, *new_bp; int error = 0, i; - new_pb = pagebuf_allocate(flags); - if (unlikely(!new_pb)) + new_bp = xfs_buf_allocate(flags); + if (unlikely(!new_bp)) return NULL; - pb = _pagebuf_find(target, ioff, isize, flags, new_pb); - if (pb == new_pb) { - error = _pagebuf_lookup_pages(pb, flags); + bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); + if (bp == new_bp) { + error = _xfs_buf_lookup_pages(bp, flags); if (error) goto no_buffer; } else { - pagebuf_deallocate(new_pb); - if (unlikely(pb == NULL)) + xfs_buf_deallocate(new_bp); + if (unlikely(bp == NULL)) return NULL; } - for (i = 0; i < pb->pb_page_count; i++) - mark_page_accessed(pb->pb_pages[i]); + for (i = 0; i < bp->b_page_count; i++) + mark_page_accessed(bp->b_pages[i]); - if (!(pb->pb_flags & PBF_MAPPED)) { - error = _pagebuf_map_pages(pb, flags); + if (!(bp->b_flags & XBF_MAPPED)) { + error = _xfs_buf_map_pages(bp, flags); if (unlikely(error)) { printk(KERN_WARNING "%s: failed to map pages\n", __FUNCTION__); @@ -598,97 +595,97 @@ xfs_buf_get_flags( /* allocate a buffer */ } } - XFS_STATS_INC(pb_get); + XFS_STATS_INC(xb_get); /* * Always fill in the block number now, the mapped cases can do * their own overlay of this later. */ - pb->pb_bn = ioff; - pb->pb_count_desired = pb->pb_buffer_length; + bp->b_bn = ioff; + bp->b_count_desired = bp->b_buffer_length; - PB_TRACE(pb, "get", (unsigned long)flags); - return pb; + XB_TRACE(bp, "get", (unsigned long)flags); + return bp; no_buffer: - if (flags & (PBF_LOCK | PBF_TRYLOCK)) - pagebuf_unlock(pb); - pagebuf_rele(pb); + if (flags & (XBF_LOCK | XBF_TRYLOCK)) + xfs_buf_unlock(bp); + xfs_buf_rele(bp); return NULL; } xfs_buf_t * xfs_buf_read_flags( xfs_buftarg_t *target, - loff_t ioff, + xfs_off_t ioff, size_t isize, - page_buf_flags_t flags) + xfs_buf_flags_t flags) { - xfs_buf_t *pb; - - flags |= PBF_READ; - - pb = xfs_buf_get_flags(target, ioff, isize, flags); - if (pb) { - if (!XFS_BUF_ISDONE(pb)) { - PB_TRACE(pb, "read", (unsigned long)flags); - XFS_STATS_INC(pb_get_read); - pagebuf_iostart(pb, flags); - } else if (flags & PBF_ASYNC) { - PB_TRACE(pb, "read_async", (unsigned long)flags); + xfs_buf_t *bp; + + flags |= XBF_READ; + + bp = xfs_buf_get_flags(target, ioff, isize, flags); + if (bp) { + if (!XFS_BUF_ISDONE(bp)) { + XB_TRACE(bp, "read", (unsigned long)flags); + XFS_STATS_INC(xb_get_read); + xfs_buf_iostart(bp, flags); + } else if (flags & XBF_ASYNC) { + XB_TRACE(bp, "read_async", (unsigned long)flags); /* * Read ahead call which is already satisfied, * drop the buffer */ goto no_buffer; } else { - PB_TRACE(pb, "read_done", (unsigned long)flags); + XB_TRACE(bp, "read_done", (unsigned long)flags); /* We do not want read in the flags */ - pb->pb_flags &= ~PBF_READ; + bp->b_flags &= ~XBF_READ; } } - return pb; + return bp; no_buffer: - if (flags & (PBF_LOCK | PBF_TRYLOCK)) - pagebuf_unlock(pb); - pagebuf_rele(pb); + if (flags & (XBF_LOCK | XBF_TRYLOCK)) + xfs_buf_unlock(bp); + xfs_buf_rele(bp); return NULL; } /* - * If we are not low on memory then do the readahead in a deadlock - * safe manner. + * If we are not low on memory then do the readahead in a deadlock + * safe manner. */ void -pagebuf_readahead( +xfs_buf_readahead( xfs_buftarg_t *target, - loff_t ioff, + xfs_off_t ioff, size_t isize, - page_buf_flags_t flags) + xfs_buf_flags_t flags) { struct backing_dev_info *bdi; - bdi = target->pbr_mapping->backing_dev_info; + bdi = target->bt_mapping->backing_dev_info; if (bdi_read_congested(bdi)) return; - flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD); + flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); xfs_buf_read_flags(target, ioff, isize, flags); } xfs_buf_t * -pagebuf_get_empty( +xfs_buf_get_empty( size_t len, xfs_buftarg_t *target) { - xfs_buf_t *pb; + xfs_buf_t *bp; - pb = pagebuf_allocate(0); - if (pb) - _pagebuf_initialize(pb, target, 0, len, 0); - return pb; + bp = xfs_buf_allocate(0); + if (bp) + _xfs_buf_initialize(bp, target, 0, len, 0); + return bp; } static inline struct page * @@ -704,8 +701,8 @@ mem_to_page( } int -pagebuf_associate_memory( - xfs_buf_t *pb, +xfs_buf_associate_memory( + xfs_buf_t *bp, void *mem, size_t len) { @@ -722,40 +719,40 @@ pagebuf_associate_memory( page_count++; /* Free any previous set of page pointers */ - if (pb->pb_pages) - _pagebuf_free_pages(pb); + if (bp->b_pages) + _xfs_buf_free_pages(bp); - pb->pb_pages = NULL; - pb->pb_addr = mem; + bp->b_pages = NULL; + bp->b_addr = mem; - rval = _pagebuf_get_pages(pb, page_count, 0); + rval = _xfs_buf_get_pages(bp, page_count, 0); if (rval) return rval; - pb->pb_offset = offset; + bp->b_offset = offset; ptr = (size_t) mem & PAGE_CACHE_MASK; end = PAGE_CACHE_ALIGN((size_t) mem + len); end_cur = end; /* set up first page */ - pb->pb_pages[0] = mem_to_page(mem); + bp->b_pages[0] = mem_to_page(mem); ptr += PAGE_CACHE_SIZE; - pb->pb_page_count = ++i; + bp->b_page_count = ++i; while (ptr < end) { - pb->pb_pages[i] = mem_to_page((void *)ptr); - pb->pb_page_count = ++i; + bp->b_pages[i] = mem_to_page((void *)ptr); + bp->b_page_count = ++i; ptr += PAGE_CACHE_SIZE; } - pb->pb_locked = 0; + bp->b_locked = 0; - pb->pb_count_desired = pb->pb_buffer_length = len; - pb->pb_flags |= PBF_MAPPED; + bp->b_count_desired = bp->b_buffer_length = len; + bp->b_flags |= XBF_MAPPED; return 0; } xfs_buf_t * -pagebuf_get_no_daddr( +xfs_buf_get_noaddr( size_t len, xfs_buftarg_t *target) { @@ -764,10 +761,10 @@ pagebuf_get_no_daddr( void *data; int error; - bp = pagebuf_allocate(0); + bp = xfs_buf_allocate(0); if (unlikely(bp == NULL)) goto fail; - _pagebuf_initialize(bp, target, 0, len, 0); + _xfs_buf_initialize(bp, target, 0, len, 0); try_again: data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); @@ -776,78 +773,73 @@ pagebuf_get_no_daddr( /* check whether alignment matches.. */ if ((__psunsigned_t)data != - ((__psunsigned_t)data & ~target->pbr_smask)) { + ((__psunsigned_t)data & ~target->bt_smask)) { /* .. else double the size and try again */ kmem_free(data, malloc_len); malloc_len <<= 1; goto try_again; } - error = pagebuf_associate_memory(bp, data, len); + error = xfs_buf_associate_memory(bp, data, len); if (error) goto fail_free_mem; - bp->pb_flags |= _PBF_KMEM_ALLOC; + bp->b_flags |= _XBF_KMEM_ALLOC; - pagebuf_unlock(bp); + xfs_buf_unlock(bp); - PB_TRACE(bp, "no_daddr", data); + XB_TRACE(bp, "no_daddr", data); return bp; fail_free_mem: kmem_free(data, malloc_len); fail_free_buf: - pagebuf_free(bp); + xfs_buf_free(bp); fail: return NULL; } /* - * pagebuf_hold - * * Increment reference count on buffer, to hold the buffer concurrently * with another thread which may release (free) the buffer asynchronously. - * * Must hold the buffer already to call this function. */ void -pagebuf_hold( - xfs_buf_t *pb) +xfs_buf_hold( + xfs_buf_t *bp) { - atomic_inc(&pb->pb_hold); - PB_TRACE(pb, "hold", 0); + atomic_inc(&bp->b_hold); + XB_TRACE(bp, "hold", 0); } /* - * pagebuf_rele - * - * pagebuf_rele releases a hold on the specified buffer. If the - * the hold count is 1, pagebuf_rele calls pagebuf_free. + * Releases a hold on the specified buffer. If the + * the hold count is 1, calls xfs_buf_free. */ void -pagebuf_rele( - xfs_buf_t *pb) +xfs_buf_rele( + xfs_buf_t *bp) { - xfs_bufhash_t *hash = pb->pb_hash; + xfs_bufhash_t *hash = bp->b_hash; - PB_TRACE(pb, "rele", pb->pb_relse); + XB_TRACE(bp, "rele", bp->b_relse); - if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { - if (pb->pb_relse) { - atomic_inc(&pb->pb_hold); + if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { + if (bp->b_relse) { + atomic_inc(&bp->b_hold); spin_unlock(&hash->bh_lock); - (*(pb->pb_relse)) (pb); - } else if (pb->pb_flags & PBF_FS_MANAGED) { + (*(bp->b_relse)) (bp); + } else if (bp->b_flags & XBF_FS_MANAGED) { spin_unlock(&hash->bh_lock); } else { - ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q))); - list_del_init(&pb->pb_hash_list); + ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); + list_del_init(&bp->b_hash_list); spin_unlock(&hash->bh_lock); - pagebuf_free(pb); + xfs_buf_free(bp); } } else { /* * Catch reference count leaks */ - ASSERT(atomic_read(&pb->pb_hold) >= 0); + ASSERT(atomic_read(&bp->b_hold) >= 0); } } @@ -863,168 +855,122 @@ pagebuf_rele( */ /* - * pagebuf_cond_lock - * - * pagebuf_cond_lock locks a buffer object, if it is not already locked. - * Note that this in no way - * locks the underlying pages, so it is only useful for synchronizing - * concurrent use of page buffer objects, not for synchronizing independent - * access to the underlying pages. + * Locks a buffer object, if it is not already locked. + * Note that this in no way locks the underlying pages, so it is only + * useful for synchronizing concurrent use of buffer objects, not for + * synchronizing independent access to the underlying pages. */ int -pagebuf_cond_lock( /* lock buffer, if not locked */ - /* returns -EBUSY if locked) */ - xfs_buf_t *pb) +xfs_buf_cond_lock( + xfs_buf_t *bp) { int locked; - locked = down_trylock(&pb->pb_sema) == 0; + locked = down_trylock(&bp->b_sema) == 0; if (locked) { - PB_SET_OWNER(pb); + XB_SET_OWNER(bp); } - PB_TRACE(pb, "cond_lock", (long)locked); - return(locked ? 0 : -EBUSY); + XB_TRACE(bp, "cond_lock", (long)locked); + return locked ? 0 : -EBUSY; } #if defined(DEBUG) || defined(XFS_BLI_TRACE) -/* - * pagebuf_lock_value - * - * Return lock value for a pagebuf - */ int -pagebuf_lock_value( - xfs_buf_t *pb) +xfs_buf_lock_value( + xfs_buf_t *bp) { - return(atomic_read(&pb->pb_sema.count)); + return atomic_read(&bp->b_sema.count); } #endif /* - * pagebuf_lock - * - * pagebuf_lock locks a buffer object. Note that this in no way - * locks the underlying pages, so it is only useful for synchronizing - * concurrent use of page buffer objects, not for synchronizing independent - * access to the underlying pages. + * Locks a buffer object. + * Note that this in no way locks the underlying pages, so it is only + * useful for synchronizing concurrent use of buffer objects, not for + * synchronizing independent access to the underlying pages. */ -int -pagebuf_lock( - xfs_buf_t *pb) +void +xfs_buf_lock( + xfs_buf_t *bp) { - PB_TRACE(pb, "lock", 0); - if (atomic_read(&pb->pb_io_remaining)) - blk_run_address_space(pb->pb_target->pbr_mapping); - down(&pb->pb_sema); - PB_SET_OWNER(pb); - PB_TRACE(pb, "locked", 0); - return 0; + XB_TRACE(bp, "lock", 0); + if (atomic_read(&bp->b_io_remaining)) + blk_run_address_space(bp->b_target->bt_mapping); + down(&bp->b_sema); + XB_SET_OWNER(bp); + XB_TRACE(bp, "locked", 0); } /* - * pagebuf_unlock - * - * pagebuf_unlock releases the lock on the buffer object created by - * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages - * created by pagebuf_pin). - * + * Releases the lock on the buffer object. * If the buffer is marked delwri but is not queued, do so before we - * unlock the buffer as we need to set flags correctly. We also need to + * unlock the buffer as we need to set flags correctly. We also need to * take a reference for the delwri queue because the unlocker is going to * drop their's and they don't know we just queued it. */ void -pagebuf_unlock( /* unlock buffer */ - xfs_buf_t *pb) /* buffer to unlock */ +xfs_buf_unlock( + xfs_buf_t *bp) { - if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) { - atomic_inc(&pb->pb_hold); - pb->pb_flags |= PBF_ASYNC; - pagebuf_delwri_queue(pb, 0); + if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { + atomic_inc(&bp->b_hold); + bp->b_flags |= XBF_ASYNC; + xfs_buf_delwri_queue(bp, 0); } - PB_CLEAR_OWNER(pb); - up(&pb->pb_sema); - PB_TRACE(pb, "unlock", 0); + XB_CLEAR_OWNER(bp); + up(&bp->b_sema); + XB_TRACE(bp, "unlock", 0); } /* * Pinning Buffer Storage in Memory - */ - -/* - * pagebuf_pin - * - * pagebuf_pin locks all of the memory represented by a buffer in - * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for - * the same or different buffers affecting a given page, will - * properly count the number of outstanding "pin" requests. The - * buffer may be released after the pagebuf_pin and a different - * buffer used when calling pagebuf_unpin, if desired. - * pagebuf_pin should be used by the file system when it wants be - * assured that no attempt will be made to force the affected - * memory to disk. It does not assure that a given logical page - * will not be moved to a different physical page. + * Ensure that no attempt to force a buffer to disk will succeed. */ void -pagebuf_pin( - xfs_buf_t *pb) +xfs_buf_pin( + xfs_buf_t *bp) { - atomic_inc(&pb->pb_pin_count); - PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter); + atomic_inc(&bp->b_pin_count); + XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter); } -/* - * pagebuf_unpin - * - * pagebuf_unpin reverses the locking of memory performed by - * pagebuf_pin. Note that both functions affected the logical - * pages associated with the buffer, not the buffer itself. - */ void -pagebuf_unpin( - xfs_buf_t *pb) +xfs_buf_unpin( + xfs_buf_t *bp) { - if (atomic_dec_and_test(&pb->pb_pin_count)) { - wake_up_all(&pb->pb_waiters); - } - PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter); + if (atomic_dec_and_test(&bp->b_pin_count)) + wake_up_all(&bp->b_waiters); + XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter); } int -pagebuf_ispin( - xfs_buf_t *pb) +xfs_buf_ispin( + xfs_buf_t *bp) { - return atomic_read(&pb->pb_pin_count); + return atomic_read(&bp->b_pin_count); } -/* - * pagebuf_wait_unpin - * - * pagebuf_wait_unpin waits until all of the memory associated - * with the buffer is not longer locked in memory. It returns - * immediately if none of the affected pages are locked. - */ -static inline void -_pagebuf_wait_unpin( - xfs_buf_t *pb) +STATIC void +xfs_buf_wait_unpin( + xfs_buf_t *bp) { DECLARE_WAITQUEUE (wait, current); - if (atomic_read(&pb->pb_pin_count) == 0) + if (atomic_read(&bp->b_pin_count) == 0) return; - add_wait_queue(&pb->pb_waiters, &wait); + add_wait_queue(&bp->b_waiters, &wait); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); - if (atomic_read(&pb->pb_pin_count) == 0) + if (atomic_read(&bp->b_pin_count) == 0) break; - if (atomic_read(&pb->pb_io_remaining)) - blk_run_address_space(pb->pb_target->pbr_mapping); + if (atomic_read(&bp->b_io_remaining)) + blk_run_address_space(bp->b_target->bt_mapping); schedule(); } - remove_wait_queue(&pb->pb_waiters, &wait); + remove_wait_queue(&bp->b_waiters, &wait); set_current_state(TASK_RUNNING); } @@ -1032,241 +978,216 @@ _pagebuf_wait_unpin( * Buffer Utility Routines */ -/* - * pagebuf_iodone - * - * pagebuf_iodone marks a buffer for which I/O is in progress - * done with respect to that I/O. The pb_iodone routine, if - * present, will be called as a side-effect. - */ STATIC void -pagebuf_iodone_work( +xfs_buf_iodone_work( void *v) { xfs_buf_t *bp = (xfs_buf_t *)v; - if (bp->pb_iodone) - (*(bp->pb_iodone))(bp); - else if (bp->pb_flags & PBF_ASYNC) + if (bp->b_iodone) + (*(bp->b_iodone))(bp); + else if (bp->b_flags & XBF_ASYNC) xfs_buf_relse(bp); } void -pagebuf_iodone( - xfs_buf_t *pb, +xfs_buf_ioend( + xfs_buf_t *bp, int schedule) { - pb->pb_flags &= ~(PBF_READ | PBF_WRITE); - if (pb->pb_error == 0) - pb->pb_flags |= PBF_DONE; + bp->b_flags &= ~(XBF_READ | XBF_WRITE); + if (bp->b_error == 0) + bp->b_flags |= XBF_DONE; - PB_TRACE(pb, "iodone", pb->pb_iodone); + XB_TRACE(bp, "iodone", bp->b_iodone); - if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { + if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { if (schedule) { - INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); - queue_work(xfslogd_workqueue, &pb->pb_iodone_work); + INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp); + queue_work(xfslogd_workqueue, &bp->b_iodone_work); } else { - pagebuf_iodone_work(pb); + xfs_buf_iodone_work(bp); } } else { - up(&pb->pb_iodonesema); + up(&bp->b_iodonesema); } } -/* - * pagebuf_ioerror - * - * pagebuf_ioerror sets the error code for a buffer. - */ void -pagebuf_ioerror( /* mark/clear buffer error flag */ - xfs_buf_t *pb, /* buffer to mark */ - int error) /* error to store (0 if none) */ +xfs_buf_ioerror( + xfs_buf_t *bp, + int error) { ASSERT(error >= 0 && error <= 0xffff); - pb->pb_error = (unsigned short)error; - PB_TRACE(pb, "ioerror", (unsigned long)error); + bp->b_error = (unsigned short)error; + XB_TRACE(bp, "ioerror", (unsigned long)error); } /* - * pagebuf_iostart - * - * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied. - * If necessary, it will arrange for any disk space allocation required, - * and it will break up the request if the block mappings require it. - * The pb_iodone routine in the buffer supplied will only be called + * Initiate I/O on a buffer, based on the flags supplied. + * The b_iodone routine in the buffer supplied will only be called * when all of the subsidiary I/O requests, if any, have been completed. - * pagebuf_iostart calls the pagebuf_ioinitiate routine or - * pagebuf_iorequest, if the former routine is not defined, to start - * the I/O on a given low-level request. */ int -pagebuf_iostart( /* start I/O on a buffer */ - xfs_buf_t *pb, /* buffer to start */ - page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */ - /* PBF_WRITE, PBF_DELWRI, */ - /* PBF_DONT_BLOCK */ +xfs_buf_iostart( + xfs_buf_t *bp, + xfs_buf_flags_t flags) { int status = 0; - PB_TRACE(pb, "iostart", (unsigned long)flags); + XB_TRACE(bp, "iostart", (unsigned long)flags); - if (flags & PBF_DELWRI) { - pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC); - pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC); - pagebuf_delwri_queue(pb, 1); + if (flags & XBF_DELWRI) { + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); + bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); + xfs_buf_delwri_queue(bp, 1); return status; } - pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \ - PBF_READ_AHEAD | _PBF_RUN_QUEUES); - pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \ - PBF_READ_AHEAD | _PBF_RUN_QUEUES); + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); + bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); - BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL); + BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL); /* For writes allow an alternate strategy routine to precede * the actual I/O request (which may not be issued at all in * a shutdown situation, for example). */ - status = (flags & PBF_WRITE) ? - pagebuf_iostrategy(pb) : pagebuf_iorequest(pb); + status = (flags & XBF_WRITE) ? + xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp); /* Wait for I/O if we are not an async request. * Note: async I/O request completion will release the buffer, * and that can already be done by this point. So using the * buffer pointer from here on, after async I/O, is invalid. */ - if (!status && !(flags & PBF_ASYNC)) - status = pagebuf_iowait(pb); + if (!status && !(flags & XBF_ASYNC)) + status = xfs_buf_iowait(bp); return status; } -/* - * Helper routine for pagebuf_iorequest - */ - STATIC __inline__ int -_pagebuf_iolocked( - xfs_buf_t *pb) +_xfs_buf_iolocked( + xfs_buf_t *bp) { - ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE)); - if (pb->pb_flags & PBF_READ) - return pb->pb_locked; + ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE)); + if (bp->b_flags & XBF_READ) + return bp->b_locked; return 0; } STATIC __inline__ void -_pagebuf_iodone( - xfs_buf_t *pb, +_xfs_buf_ioend( + xfs_buf_t *bp, int schedule) { - if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { - pb->pb_locked = 0; - pagebuf_iodone(pb, schedule); + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { + bp->b_locked = 0; + xfs_buf_ioend(bp, schedule); } } STATIC int -bio_end_io_pagebuf( +xfs_buf_bio_end_io( struct bio *bio, unsigned int bytes_done, int error) { - xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; - unsigned int blocksize = pb->pb_target->pbr_bsize; + xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; + unsigned int blocksize = bp->b_target->bt_bsize; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; if (bio->bi_size) return 1; if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - pb->pb_error = EIO; + bp->b_error = EIO; do { struct page *page = bvec->bv_page; - if (unlikely(pb->pb_error)) { - if (pb->pb_flags & PBF_READ) + if (unlikely(bp->b_error)) { + if (bp->b_flags & XBF_READ) ClearPageUptodate(page); SetPageError(page); - } else if (blocksize == PAGE_CACHE_SIZE) { + } else if (blocksize >= PAGE_CACHE_SIZE) { SetPageUptodate(page); } else if (!PagePrivate(page) && - (pb->pb_flags & _PBF_PAGE_CACHE)) { + (bp->b_flags & _XBF_PAGE_CACHE)) { set_page_region(page, bvec->bv_offset, bvec->bv_len); } if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (_pagebuf_iolocked(pb)) { + if (_xfs_buf_iolocked(bp)) { unlock_page(page); } } while (bvec >= bio->bi_io_vec); - _pagebuf_iodone(pb, 1); + _xfs_buf_ioend(bp, 1); bio_put(bio); return 0; } STATIC void -_pagebuf_ioapply( - xfs_buf_t *pb) +_xfs_buf_ioapply( + xfs_buf_t *bp) { int i, rw, map_i, total_nr_pages, nr_pages; struct bio *bio; - int offset = pb->pb_offset; - int size = pb->pb_count_desired; - sector_t sector = pb->pb_bn; - unsigned int blocksize = pb->pb_target->pbr_bsize; - int locking = _pagebuf_iolocked(pb); + int offset = bp->b_offset; + int size = bp->b_count_desired; + sector_t sector = bp->b_bn; + unsigned int blocksize = bp->b_target->bt_bsize; + int locking = _xfs_buf_iolocked(bp); - total_nr_pages = pb->pb_page_count; + total_nr_pages = bp->b_page_count; map_i = 0; - if (pb->pb_flags & _PBF_RUN_QUEUES) { - pb->pb_flags &= ~_PBF_RUN_QUEUES; - rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC; + if (bp->b_flags & _XBF_RUN_QUEUES) { + bp->b_flags &= ~_XBF_RUN_QUEUES; + rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC; } else { - rw = (pb->pb_flags & PBF_READ) ? READ : WRITE; + rw = (bp->b_flags & XBF_READ) ? READ : WRITE; } - if (pb->pb_flags & PBF_ORDERED) { - ASSERT(!(pb->pb_flags & PBF_READ)); + if (bp->b_flags & XBF_ORDERED) { + ASSERT(!(bp->b_flags & XBF_READ)); rw = WRITE_BARRIER; } - /* Special code path for reading a sub page size pagebuf in -- + /* Special code path for reading a sub page size buffer in -- * we populate up the whole page, and hence the other metadata * in the same page. This optimization is only valid when the - * filesystem block size and the page size are equal. + * filesystem block size is not smaller than the page size. */ - if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) && - (pb->pb_flags & PBF_READ) && locking && - (blocksize == PAGE_CACHE_SIZE)) { + if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && + (bp->b_flags & XBF_READ) && locking && + (blocksize >= PAGE_CACHE_SIZE)) { bio = bio_alloc(GFP_NOIO, 1); - bio->bi_bdev = pb->pb_target->pbr_bdev; + bio->bi_bdev = bp->b_target->bt_bdev; bio->bi_sector = sector - (offset >> BBSHIFT); - bio->bi_end_io = bio_end_io_pagebuf; - bio->bi_private = pb; + bio->bi_end_io = xfs_buf_bio_end_io; + bio->bi_private = bp; - bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0); + bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0); size = 0; - atomic_inc(&pb->pb_io_remaining); + atomic_inc(&bp->b_io_remaining); goto submit_io; } /* Lock down the pages which we need to for the request */ - if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) { + if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) { for (i = 0; size; i++) { int nbytes = PAGE_CACHE_SIZE - offset; - struct page *page = pb->pb_pages[i]; + struct page *page = bp->b_pages[i]; if (nbytes > size) nbytes = size; @@ -1276,30 +1197,30 @@ _pagebuf_ioapply( size -= nbytes; offset = 0; } - offset = pb->pb_offset; - size = pb->pb_count_desired; + offset = bp->b_offset; + size = bp->b_count_desired; } next_chunk: - atomic_inc(&pb->pb_io_remaining); + atomic_inc(&bp->b_io_remaining); nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); if (nr_pages > total_nr_pages) nr_pages = total_nr_pages; bio = bio_alloc(GFP_NOIO, nr_pages); - bio->bi_bdev = pb->pb_target->pbr_bdev; + bio->bi_bdev = bp->b_target->bt_bdev; bio->bi_sector = sector; - bio->bi_end_io = bio_end_io_pagebuf; - bio->bi_private = pb; + bio->bi_end_io = xfs_buf_bio_end_io; + bio->bi_private = bp; for (; size && nr_pages; nr_pages--, map_i++) { - int nbytes = PAGE_CACHE_SIZE - offset; + int rbytes, nbytes = PAGE_CACHE_SIZE - offset; if (nbytes > size) nbytes = size; - if (bio_add_page(bio, pb->pb_pages[map_i], - nbytes, offset) < nbytes) + rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); + if (rbytes < nbytes) break; offset = 0; @@ -1315,107 +1236,102 @@ submit_io: goto next_chunk; } else { bio_put(bio); - pagebuf_ioerror(pb, EIO); + xfs_buf_ioerror(bp, EIO); } } -/* - * pagebuf_iorequest -- the core I/O request routine. - */ int -pagebuf_iorequest( /* start real I/O */ - xfs_buf_t *pb) /* buffer to convey to device */ +xfs_buf_iorequest( + xfs_buf_t *bp) { - PB_TRACE(pb, "iorequest", 0); + XB_TRACE(bp, "iorequest", 0); - if (pb->pb_flags & PBF_DELWRI) { - pagebuf_delwri_queue(pb, 1); + if (bp->b_flags & XBF_DELWRI) { + xfs_buf_delwri_queue(bp, 1); return 0; } - if (pb->pb_flags & PBF_WRITE) { - _pagebuf_wait_unpin(pb); + if (bp->b_flags & XBF_WRITE) { + xfs_buf_wait_unpin(bp); } - pagebuf_hold(pb); + xfs_buf_hold(bp); /* Set the count to 1 initially, this will stop an I/O * completion callout which happens before we have started - * all the I/O from calling pagebuf_iodone too early. + * all the I/O from calling xfs_buf_ioend too early. */ - atomic_set(&pb->pb_io_remaining, 1); - _pagebuf_ioapply(pb); - _pagebuf_iodone(pb, 0); + atomic_set(&bp->b_io_remaining, 1); + _xfs_buf_ioapply(bp); + _xfs_buf_ioend(bp, 0); - pagebuf_rele(pb); + xfs_buf_rele(bp); return 0; } /* - * pagebuf_iowait - * - * pagebuf_iowait waits for I/O to complete on the buffer supplied. - * It returns immediately if no I/O is pending. In any case, it returns - * the error code, if any, or 0 if there is no error. + * Waits for I/O to complete on the buffer supplied. + * It returns immediately if no I/O is pending. + * It returns the I/O error code, if any, or 0 if there was no error. */ int -pagebuf_iowait( - xfs_buf_t *pb) +xfs_buf_iowait( + xfs_buf_t *bp) { - PB_TRACE(pb, "iowait", 0); - if (atomic_read(&pb->pb_io_remaining)) - blk_run_address_space(pb->pb_target->pbr_mapping); - down(&pb->pb_iodonesema); - PB_TRACE(pb, "iowaited", (long)pb->pb_error); - return pb->pb_error; + XB_TRACE(bp, "iowait", 0); + if (atomic_read(&bp->b_io_remaining)) + blk_run_address_space(bp->b_target->bt_mapping); + down(&bp->b_iodonesema); + XB_TRACE(bp, "iowaited", (long)bp->b_error); + return bp->b_error; } -caddr_t -pagebuf_offset( - xfs_buf_t *pb, +xfs_caddr_t +xfs_buf_offset( + xfs_buf_t *bp, size_t offset) { struct page *page; - offset += pb->pb_offset; + if (bp->b_flags & XBF_MAPPED) + return XFS_BUF_PTR(bp) + offset; - page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT]; - return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1)); + offset += bp->b_offset; + page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; + return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); } /* - * pagebuf_iomove - * * Move data into or out of a buffer. */ void -pagebuf_iomove( - xfs_buf_t *pb, /* buffer to process */ +xfs_buf_iomove( + xfs_buf_t *bp, /* buffer to process */ size_t boff, /* starting buffer offset */ size_t bsize, /* length to copy */ caddr_t data, /* data address */ - page_buf_rw_t mode) /* read/write flag */ + xfs_buf_rw_t mode) /* read/write/zero flag */ { size_t bend, cpoff, csize; struct page *page; bend = boff + bsize; while (boff < bend) { - page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)]; - cpoff = page_buf_poff(boff + pb->pb_offset); + page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; + cpoff = xfs_buf_poff(boff + bp->b_offset); csize = min_t(size_t, - PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff); + PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); switch (mode) { - case PBRW_ZERO: + case XBRW_ZERO: memset(page_address(page) + cpoff, 0, csize); break; - case PBRW_READ: + case XBRW_READ: memcpy(data, page_address(page) + cpoff, csize); break; - case PBRW_WRITE: + case XBRW_WRITE: memcpy(page_address(page) + cpoff, data, csize); } @@ -1425,12 +1341,12 @@ pagebuf_iomove( } /* - * Handling of buftargs. + * Handling of buffer targets (buftargs). */ /* - * Wait for any bufs with callbacks that have been submitted but - * have not yet returned... walk the hash list for the target. + * Wait for any bufs with callbacks that have been submitted but + * have not yet returned... walk the hash list for the target. */ void xfs_wait_buftarg( @@ -1444,15 +1360,15 @@ xfs_wait_buftarg( hash = &btp->bt_hash[i]; again: spin_lock(&hash->bh_lock); - list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) { - ASSERT(btp == bp->pb_target); - if (!(bp->pb_flags & PBF_FS_MANAGED)) { + list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { + ASSERT(btp == bp->b_target); + if (!(bp->b_flags & XBF_FS_MANAGED)) { spin_unlock(&hash->bh_lock); /* * Catch superblock reference count leaks * immediately */ - BUG_ON(bp->pb_bn == 0); + BUG_ON(bp->b_bn == 0); delay(100); goto again; } @@ -1462,9 +1378,9 @@ again: } /* - * Allocate buffer hash table for a given target. - * For devices containing metadata (i.e. not the log/realtime devices) - * we need to allocate a much larger hash table. + * Allocate buffer hash table for a given target. + * For devices containing metadata (i.e. not the log/realtime devices) + * we need to allocate a much larger hash table. */ STATIC void xfs_alloc_bufhash( @@ -1487,11 +1403,34 @@ STATIC void xfs_free_bufhash( xfs_buftarg_t *btp) { - kmem_free(btp->bt_hash, - (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t)); + kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t)); btp->bt_hash = NULL; } +/* + * buftarg list for delwrite queue processing + */ +STATIC LIST_HEAD(xfs_buftarg_list); +STATIC DEFINE_SPINLOCK(xfs_buftarg_lock); + +STATIC void +xfs_register_buftarg( + xfs_buftarg_t *btp) +{ + spin_lock(&xfs_buftarg_lock); + list_add(&btp->bt_list, &xfs_buftarg_list); + spin_unlock(&xfs_buftarg_lock); +} + +STATIC void +xfs_unregister_buftarg( + xfs_buftarg_t *btp) +{ + spin_lock(&xfs_buftarg_lock); + list_del(&btp->bt_list); + spin_unlock(&xfs_buftarg_lock); +} + void xfs_free_buftarg( xfs_buftarg_t *btp, @@ -1499,9 +1438,16 @@ xfs_free_buftarg( { xfs_flush_buftarg(btp, 1); if (external) - xfs_blkdev_put(btp->pbr_bdev); + xfs_blkdev_put(btp->bt_bdev); xfs_free_bufhash(btp); - iput(btp->pbr_mapping->host); + iput(btp->bt_mapping->host); + + /* Unregister the buftarg first so that we don't get a + * wakeup finding a non-existent task + */ + xfs_unregister_buftarg(btp); + kthread_stop(btp->bt_task); + kmem_free(btp, sizeof(*btp)); } @@ -1512,11 +1458,11 @@ xfs_setsize_buftarg_flags( unsigned int sectorsize, int verbose) { - btp->pbr_bsize = blocksize; - btp->pbr_sshift = ffs(sectorsize) - 1; - btp->pbr_smask = sectorsize - 1; + btp->bt_bsize = blocksize; + btp->bt_sshift = ffs(sectorsize) - 1; + btp->bt_smask = sectorsize - 1; - if (set_blocksize(btp->pbr_bdev, sectorsize)) { + if (set_blocksize(btp->bt_bdev, sectorsize)) { printk(KERN_WARNING "XFS: Cannot set_blocksize to %u on device %s\n", sectorsize, XFS_BUFTARG_NAME(btp)); @@ -1536,10 +1482,10 @@ xfs_setsize_buftarg_flags( } /* -* When allocating the initial buffer target we have not yet -* read in the superblock, so don't know what sized sectors -* are being used is at this early stage. Play safe. -*/ + * When allocating the initial buffer target we have not yet + * read in the superblock, so don't know what sized sectors + * are being used is at this early stage. Play safe. + */ STATIC int xfs_setsize_buftarg_early( xfs_buftarg_t *btp, @@ -1587,10 +1533,30 @@ xfs_mapping_buftarg( mapping->a_ops = &mapping_aops; mapping->backing_dev_info = bdi; mapping_set_gfp_mask(mapping, GFP_NOFS); - btp->pbr_mapping = mapping; + btp->bt_mapping = mapping; return 0; } +STATIC int +xfs_alloc_delwrite_queue( + xfs_buftarg_t *btp) +{ + int error = 0; + + INIT_LIST_HEAD(&btp->bt_list); + INIT_LIST_HEAD(&btp->bt_delwrite_queue); + spinlock_init(&btp->bt_delwrite_lock, "delwri_lock"); + btp->bt_flags = 0; + btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); + if (IS_ERR(btp->bt_task)) { + error = PTR_ERR(btp->bt_task); + goto out_error; + } + xfs_register_buftarg(btp); +out_error: + return error; +} + xfs_buftarg_t * xfs_alloc_buftarg( struct block_device *bdev, @@ -1600,12 +1566,14 @@ xfs_alloc_buftarg( btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); - btp->pbr_dev = bdev->bd_dev; - btp->pbr_bdev = bdev; + btp->bt_dev = bdev->bd_dev; + btp->bt_bdev = bdev; if (xfs_setsize_buftarg_early(btp, bdev)) goto error; if (xfs_mapping_buftarg(btp, bdev)) goto error; + if (xfs_alloc_delwrite_queue(btp)) + goto error; xfs_alloc_bufhash(btp, external); return btp; @@ -1616,83 +1584,81 @@ error: /* - * Pagebuf delayed write buffer handling + * Delayed write buffer handling */ - -STATIC LIST_HEAD(pbd_delwrite_queue); -STATIC DEFINE_SPINLOCK(pbd_delwrite_lock); - STATIC void -pagebuf_delwri_queue( - xfs_buf_t *pb, +xfs_buf_delwri_queue( + xfs_buf_t *bp, int unlock) { - PB_TRACE(pb, "delwri_q", (long)unlock); - ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == - (PBF_DELWRI|PBF_ASYNC)); + struct list_head *dwq = &bp->b_target->bt_delwrite_queue; + spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; + + XB_TRACE(bp, "delwri_q", (long)unlock); + ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); - spin_lock(&pbd_delwrite_lock); + spin_lock(dwlk); /* If already in the queue, dequeue and place at tail */ - if (!list_empty(&pb->pb_list)) { - ASSERT(pb->pb_flags & _PBF_DELWRI_Q); - if (unlock) { - atomic_dec(&pb->pb_hold); - } - list_del(&pb->pb_list); + if (!list_empty(&bp->b_list)) { + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + if (unlock) + atomic_dec(&bp->b_hold); + list_del(&bp->b_list); } - pb->pb_flags |= _PBF_DELWRI_Q; - list_add_tail(&pb->pb_list, &pbd_delwrite_queue); - pb->pb_queuetime = jiffies; - spin_unlock(&pbd_delwrite_lock); + bp->b_flags |= _XBF_DELWRI_Q; + list_add_tail(&bp->b_list, dwq); + bp->b_queuetime = jiffies; + spin_unlock(dwlk); if (unlock) - pagebuf_unlock(pb); + xfs_buf_unlock(bp); } void -pagebuf_delwri_dequeue( - xfs_buf_t *pb) +xfs_buf_delwri_dequeue( + xfs_buf_t *bp) { + spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; int dequeued = 0; - spin_lock(&pbd_delwrite_lock); - if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { - ASSERT(pb->pb_flags & _PBF_DELWRI_Q); - list_del_init(&pb->pb_list); + spin_lock(dwlk); + if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + list_del_init(&bp->b_list); dequeued = 1; } - pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); - spin_unlock(&pbd_delwrite_lock); + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); + spin_unlock(dwlk); if (dequeued) - pagebuf_rele(pb); + xfs_buf_rele(bp); - PB_TRACE(pb, "delwri_dq", (long)dequeued); + XB_TRACE(bp, "delwri_dq", (long)dequeued); } STATIC void -pagebuf_runall_queues( +xfs_buf_runall_queues( struct workqueue_struct *queue) { flush_workqueue(queue); } -/* Defines for pagebuf daemon */ -STATIC struct task_struct *xfsbufd_task; -STATIC int xfsbufd_force_flush; -STATIC int xfsbufd_force_sleep; - STATIC int xfsbufd_wakeup( int priority, gfp_t mask) { - if (xfsbufd_force_sleep) - return 0; - xfsbufd_force_flush = 1; - barrier(); - wake_up_process(xfsbufd_task); + xfs_buftarg_t *btp; + + spin_lock(&xfs_buftarg_lock); + list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { + if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) + continue; + set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); + wake_up_process(btp->bt_task); + } + spin_unlock(&xfs_buftarg_lock); return 0; } @@ -1702,67 +1668,70 @@ xfsbufd( { struct list_head tmp; unsigned long age; - xfs_buftarg_t *target; - xfs_buf_t *pb, *n; + xfs_buftarg_t *target = (xfs_buftarg_t *)data; + xfs_buf_t *bp, *n; + struct list_head *dwq = &target->bt_delwrite_queue; + spinlock_t *dwlk = &target->bt_delwrite_lock; current->flags |= PF_MEMALLOC; INIT_LIST_HEAD(&tmp); do { if (unlikely(freezing(current))) { - xfsbufd_force_sleep = 1; + set_bit(XBT_FORCE_SLEEP, &target->bt_flags); refrigerator(); } else { - xfsbufd_force_sleep = 0; + clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); } schedule_timeout_interruptible( xfs_buf_timer_centisecs * msecs_to_jiffies(10)); age = xfs_buf_age_centisecs * msecs_to_jiffies(10); - spin_lock(&pbd_delwrite_lock); - list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { - PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); - ASSERT(pb->pb_flags & PBF_DELWRI); - - if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { - if (!xfsbufd_force_flush && + spin_lock(dwlk); + list_for_each_entry_safe(bp, n, dwq, b_list) { + XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); + ASSERT(bp->b_flags & XBF_DELWRI); + + if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { + if (!test_bit(XBT_FORCE_FLUSH, + &target->bt_flags) && time_before(jiffies, - pb->pb_queuetime + age)) { - pagebuf_unlock(pb); + bp->b_queuetime + age)) { + xfs_buf_unlock(bp); break; } - pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); - pb->pb_flags |= PBF_WRITE; - list_move(&pb->pb_list, &tmp); + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); + bp->b_flags |= XBF_WRITE; + list_move(&bp->b_list, &tmp); } } - spin_unlock(&pbd_delwrite_lock); + spin_unlock(dwlk); while (!list_empty(&tmp)) { - pb = list_entry(tmp.next, xfs_buf_t, pb_list); - target = pb->pb_target; + bp = list_entry(tmp.next, xfs_buf_t, b_list); + ASSERT(target == bp->b_target); - list_del_init(&pb->pb_list); - pagebuf_iostrategy(pb); + list_del_init(&bp->b_list); + xfs_buf_iostrategy(bp); - blk_run_address_space(target->pbr_mapping); + blk_run_address_space(target->bt_mapping); } if (as_list_len > 0) purge_addresses(); - xfsbufd_force_flush = 0; + clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); } while (!kthread_should_stop()); return 0; } /* - * Go through all incore buffers, and release buffers if they belong to - * the given device. This is used in filesystem error handling to - * preserve the consistency of its metadata. + * Go through all incore buffers, and release buffers if they belong to + * the given device. This is used in filesystem error handling to + * preserve the consistency of its metadata. */ int xfs_flush_buftarg( @@ -1770,73 +1739,72 @@ xfs_flush_buftarg( int wait) { struct list_head tmp; - xfs_buf_t *pb, *n; + xfs_buf_t *bp, *n; int pincount = 0; + struct list_head *dwq = &target->bt_delwrite_queue; + spinlock_t *dwlk = &target->bt_delwrite_lock; - pagebuf_runall_queues(xfsdatad_workqueue); - pagebuf_runall_queues(xfslogd_workqueue); + xfs_buf_runall_queues(xfsdatad_workqueue); + xfs_buf_runall_queues(xfslogd_workqueue); INIT_LIST_HEAD(&tmp); - spin_lock(&pbd_delwrite_lock); - list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { - - if (pb->pb_target != target) - continue; - - ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)); - PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); - if (pagebuf_ispin(pb)) { + spin_lock(dwlk); + list_for_each_entry_safe(bp, n, dwq, b_list) { + ASSERT(bp->b_target == target); + ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q)); + XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp)); + if (xfs_buf_ispin(bp)) { pincount++; continue; } - list_move(&pb->pb_list, &tmp); + list_move(&bp->b_list, &tmp); } - spin_unlock(&pbd_delwrite_lock); + spin_unlock(dwlk); /* * Dropped the delayed write list lock, now walk the temporary list */ - list_for_each_entry_safe(pb, n, &tmp, pb_list) { - pagebuf_lock(pb); - pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); - pb->pb_flags |= PBF_WRITE; + list_for_each_entry_safe(bp, n, &tmp, b_list) { + xfs_buf_lock(bp); + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); + bp->b_flags |= XBF_WRITE; if (wait) - pb->pb_flags &= ~PBF_ASYNC; + bp->b_flags &= ~XBF_ASYNC; else - list_del_init(&pb->pb_list); + list_del_init(&bp->b_list); - pagebuf_iostrategy(pb); + xfs_buf_iostrategy(bp); } /* * Remaining list items must be flushed before returning */ while (!list_empty(&tmp)) { - pb = list_entry(tmp.next, xfs_buf_t, pb_list); + bp = list_entry(tmp.next, xfs_buf_t, b_list); - list_del_init(&pb->pb_list); - xfs_iowait(pb); - xfs_buf_relse(pb); + list_del_init(&bp->b_list); + xfs_iowait(bp); + xfs_buf_relse(bp); } if (wait) - blk_run_address_space(target->pbr_mapping); + blk_run_address_space(target->bt_mapping); return pincount; } int __init -pagebuf_init(void) +xfs_buf_init(void) { int error = -ENOMEM; -#ifdef PAGEBUF_TRACE - pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); +#ifdef XFS_BUF_TRACE + xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP); #endif - pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); - if (!pagebuf_zone) + xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); + if (!xfs_buf_zone) goto out_free_trace_buf; xfslogd_workqueue = create_workqueue("xfslogd"); @@ -1847,42 +1815,33 @@ pagebuf_init(void) if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; - xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); - if (IS_ERR(xfsbufd_task)) { - error = PTR_ERR(xfsbufd_task); + xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup); + if (!xfs_buf_shake) goto out_destroy_xfsdatad_workqueue; - } - - pagebuf_shake = kmem_shake_register(xfsbufd_wakeup); - if (!pagebuf_shake) - goto out_stop_xfsbufd; return 0; - out_stop_xfsbufd: - kthread_stop(xfsbufd_task); out_destroy_xfsdatad_workqueue: destroy_workqueue(xfsdatad_workqueue); out_destroy_xfslogd_workqueue: destroy_workqueue(xfslogd_workqueue); out_free_buf_zone: - kmem_zone_destroy(pagebuf_zone); + kmem_zone_destroy(xfs_buf_zone); out_free_trace_buf: -#ifdef PAGEBUF_TRACE - ktrace_free(pagebuf_trace_buf); +#ifdef XFS_BUF_TRACE + ktrace_free(xfs_buf_trace_buf); #endif return error; } void -pagebuf_terminate(void) +xfs_buf_terminate(void) { - kmem_shake_deregister(pagebuf_shake); - kthread_stop(xfsbufd_task); + kmem_shake_deregister(xfs_buf_shake); destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); - kmem_zone_destroy(pagebuf_zone); -#ifdef PAGEBUF_TRACE - ktrace_free(pagebuf_trace_buf); + kmem_zone_destroy(xfs_buf_zone); +#ifdef XFS_BUF_TRACE + ktrace_free(xfs_buf_trace_buf); #endif } diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 237a35b915d..4dd6592d5a4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -32,44 +32,47 @@ * Base types */ -#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) - -#define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) -#define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -#define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) -#define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) - -typedef enum page_buf_rw_e { - PBRW_READ = 1, /* transfer into target memory */ - PBRW_WRITE = 2, /* transfer from target memory */ - PBRW_ZERO = 3 /* Zero target memory */ -} page_buf_rw_t; - - -typedef enum page_buf_flags_e { /* pb_flags values */ - PBF_READ = (1 << 0), /* buffer intended for reading from device */ - PBF_WRITE = (1 << 1), /* buffer intended for writing to device */ - PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */ - PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ - PBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ - PBF_DELWRI = (1 << 6), /* buffer has dirty pages */ - PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ - PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ - PBF_ORDERED = (1 << 11), /* use ordered writes */ - PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ +#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) + +#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) +#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) +#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) +#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) + +typedef enum { + XBRW_READ = 1, /* transfer into target memory */ + XBRW_WRITE = 2, /* transfer from target memory */ + XBRW_ZERO = 3, /* Zero target memory */ +} xfs_buf_rw_t; + +typedef enum { + XBF_READ = (1 << 0), /* buffer intended for reading from device */ + XBF_WRITE = (1 << 1), /* buffer intended for writing to device */ + XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */ + XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ + XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ + XBF_DELWRI = (1 << 6), /* buffer has dirty pages */ + XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ + XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ + XBF_ORDERED = (1 << 11), /* use ordered writes */ + XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ /* flags used only as arguments to access routines */ - PBF_LOCK = (1 << 14), /* lock requested */ - PBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ - PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ + XBF_LOCK = (1 << 14), /* lock requested */ + XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ + XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ /* flags used only internally */ - _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ - _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ - _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ - _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ -} page_buf_flags_t; + _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ + _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ + _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ + _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ +} xfs_buf_flags_t; +typedef enum { + XBT_FORCE_SLEEP = (0 << 1), + XBT_FORCE_FLUSH = (1 << 1), +} xfs_buftarg_flags_t; typedef struct xfs_bufhash { struct list_head bh_list; @@ -77,477 +80,350 @@ typedef struct xfs_bufhash { } xfs_bufhash_t; typedef struct xfs_buftarg { - dev_t pbr_dev; - struct block_device *pbr_bdev; - struct address_space *pbr_mapping; - unsigned int pbr_bsize; - unsigned int pbr_sshift; - size_t pbr_smask; - - /* per-device buffer hash table */ + dev_t bt_dev; + struct block_device *bt_bdev; + struct address_space *bt_mapping; + unsigned int bt_bsize; + unsigned int bt_sshift; + size_t bt_smask; + + /* per device buffer hash table */ uint bt_hashmask; uint bt_hashshift; xfs_bufhash_t *bt_hash; + + /* per device delwri queue */ + struct task_struct *bt_task; + struct list_head bt_list; + struct list_head bt_delwrite_queue; + spinlock_t bt_delwrite_lock; + unsigned long bt_flags; } xfs_buftarg_t; /* - * xfs_buf_t: Buffer structure for page cache-based buffers + * xfs_buf_t: Buffer structure for pagecache-based buffers + * + * This buffer structure is used by the pagecache buffer management routines + * to refer to an assembly of pages forming a logical buffer. * - * This buffer structure is used by the page cache buffer management routines - * to refer to an assembly of pages forming a logical buffer. The actual I/O - * is performed with buffer_head structures, as required by drivers. - * - * The buffer structure is used on temporary basis only, and discarded when - * released. The real data storage is recorded in the page cache. Metadata is + * The buffer structure is used on a temporary basis only, and discarded when + * released. The real data storage is recorded in the pagecache. Buffers are * hashed to the block device on which the file system resides. */ struct xfs_buf; +typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); +typedef void (*xfs_buf_relse_t)(struct xfs_buf *); +typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); -/* call-back function on I/O completion */ -typedef void (*page_buf_iodone_t)(struct xfs_buf *); -/* call-back function on I/O completion */ -typedef void (*page_buf_relse_t)(struct xfs_buf *); -/* pre-write function */ -typedef int (*page_buf_bdstrat_t)(struct xfs_buf *); - -#define PB_PAGES 2 +#define XB_PAGES 2 typedef struct xfs_buf { - struct semaphore pb_sema; /* semaphore for lockables */ - unsigned long pb_queuetime; /* time buffer was queued */ - atomic_t pb_pin_count; /* pin count */ - wait_queue_head_t pb_waiters; /* unpin waiters */ - struct list_head pb_list; - page_buf_flags_t pb_flags; /* status flags */ - struct list_head pb_hash_list; /* hash table list */ - xfs_bufhash_t *pb_hash; /* hash table list start */ - xfs_buftarg_t *pb_target; /* buffer target (device) */ - atomic_t pb_hold; /* reference count */ - xfs_daddr_t pb_bn; /* block number for I/O */ - loff_t pb_file_offset; /* offset in file */ - size_t pb_buffer_length; /* size of buffer in bytes */ - size_t pb_count_desired; /* desired transfer size */ - void *pb_addr; /* virtual address of buffer */ - struct work_struct pb_iodone_work; - atomic_t pb_io_remaining;/* #outstanding I/O requests */ - page_buf_iodone_t pb_iodone; /* I/O completion function */ - page_buf_relse_t pb_relse; /* releasing function */ - page_buf_bdstrat_t pb_strat; /* pre-write function */ - struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */ - void *pb_fspriv; - void *pb_fspriv2; - void *pb_fspriv3; - unsigned short pb_error; /* error code on I/O */ - unsigned short pb_locked; /* page array is locked */ - unsigned int pb_page_count; /* size of page array */ - unsigned int pb_offset; /* page offset in first page */ - struct page **pb_pages; /* array of page pointers */ - struct page *pb_page_array[PB_PAGES]; /* inline pages */ -#ifdef PAGEBUF_LOCK_TRACKING - int pb_last_holder; + struct semaphore b_sema; /* semaphore for lockables */ + unsigned long b_queuetime; /* time buffer was queued */ + atomic_t b_pin_count; /* pin count */ + wait_queue_head_t b_waiters; /* unpin waiters */ + struct list_head b_list; + xfs_buf_flags_t b_flags; /* status flags */ + struct list_head b_hash_list; /* hash table list */ + xfs_bufhash_t *b_hash; /* hash table list start */ + xfs_buftarg_t *b_target; /* buffer target (device) */ + atomic_t b_hold; /* reference count */ + xfs_daddr_t b_bn; /* block number for I/O */ + xfs_off_t b_file_offset; /* offset in file */ + size_t b_buffer_length;/* size of buffer in bytes */ + size_t b_count_desired;/* desired transfer size */ + void *b_addr; /* virtual address of buffer */ + struct work_struct b_iodone_work; + atomic_t b_io_remaining; /* #outstanding I/O requests */ + xfs_buf_iodone_t b_iodone; /* I/O completion function */ + xfs_buf_relse_t b_relse; /* releasing function */ + xfs_buf_bdstrat_t b_strat; /* pre-write function */ + struct semaphore b_iodonesema; /* Semaphore for I/O waiters */ + void *b_fspriv; + void *b_fspriv2; + void *b_fspriv3; + unsigned short b_error; /* error code on I/O */ + unsigned short b_locked; /* page array is locked */ + unsigned int b_page_count; /* size of page array */ + unsigned int b_offset; /* page offset in first page */ + struct page **b_pages; /* array of page pointers */ + struct page *b_page_array[XB_PAGES]; /* inline pages */ +#ifdef XFS_BUF_LOCK_TRACKING + int b_last_holder; #endif } xfs_buf_t; /* Finding and Reading Buffers */ - -extern xfs_buf_t *_pagebuf_find( /* find buffer for block if */ - /* the block is in memory */ - xfs_buftarg_t *, /* inode for block */ - loff_t, /* starting offset of range */ - size_t, /* length of range */ - page_buf_flags_t, /* PBF_LOCK */ - xfs_buf_t *); /* newly allocated buffer */ - +extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, + xfs_buf_flags_t, xfs_buf_t *); #define xfs_incore(buftarg,blkno,len,lockit) \ - _pagebuf_find(buftarg, blkno ,len, lockit, NULL) - -extern xfs_buf_t *xfs_buf_get_flags( /* allocate a buffer */ - xfs_buftarg_t *, /* inode for buffer */ - loff_t, /* starting offset of range */ - size_t, /* length of range */ - page_buf_flags_t); /* PBF_LOCK, PBF_READ, */ - /* PBF_ASYNC */ + _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) +extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t, + xfs_buf_flags_t); #define xfs_buf_get(target, blkno, len, flags) \ - xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) - -extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */ - xfs_buftarg_t *, /* inode for buffer */ - loff_t, /* starting offset of range */ - size_t, /* length of range */ - page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC */ + xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED) +extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t, + xfs_buf_flags_t); #define xfs_buf_read(target, blkno, len, flags) \ - xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) - -extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ - /* no memory or disk address */ - size_t len, - xfs_buftarg_t *); /* mount point "fake" inode */ - -extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */ - /* without disk address */ - size_t len, - xfs_buftarg_t *); /* mount point "fake" inode */ - -extern int pagebuf_associate_memory( - xfs_buf_t *, - void *, - size_t); - -extern void pagebuf_hold( /* increment reference count */ - xfs_buf_t *); /* buffer to hold */ + xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED) -extern void pagebuf_readahead( /* read ahead into cache */ - xfs_buftarg_t *, /* target for buffer (or NULL) */ - loff_t, /* starting offset of range */ - size_t, /* length of range */ - page_buf_flags_t); /* additional read flags */ +extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); +extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); +extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); +extern void xfs_buf_hold(xfs_buf_t *); +extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t, + xfs_buf_flags_t); /* Releasing Buffers */ - -extern void pagebuf_free( /* deallocate a buffer */ - xfs_buf_t *); /* buffer to deallocate */ - -extern void pagebuf_rele( /* release hold on a buffer */ - xfs_buf_t *); /* buffer to release */ +extern void xfs_buf_free(xfs_buf_t *); +extern void xfs_buf_rele(xfs_buf_t *); /* Locking and Unlocking Buffers */ - -extern int pagebuf_cond_lock( /* lock buffer, if not locked */ - /* (returns -EBUSY if locked) */ - xfs_buf_t *); /* buffer to lock */ - -extern int pagebuf_lock_value( /* return count on lock */ - xfs_buf_t *); /* buffer to check */ - -extern int pagebuf_lock( /* lock buffer */ - xfs_buf_t *); /* buffer to lock */ - -extern void pagebuf_unlock( /* unlock buffer */ - xfs_buf_t *); /* buffer to unlock */ +extern int xfs_buf_cond_lock(xfs_buf_t *); +extern int xfs_buf_lock_value(xfs_buf_t *); +extern void xfs_buf_lock(xfs_buf_t *); +extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ - -extern void pagebuf_iodone( /* mark buffer I/O complete */ - xfs_buf_t *, /* buffer to mark */ - int); /* run completion locally, or in - * a helper thread. */ - -extern void pagebuf_ioerror( /* mark buffer in error (or not) */ - xfs_buf_t *, /* buffer to mark */ - int); /* error to store (0 if none) */ - -extern int pagebuf_iostart( /* start I/O on a buffer */ - xfs_buf_t *, /* buffer to start */ - page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */ - /* PBF_READ, PBF_WRITE, */ - /* PBF_DELWRI */ - -extern int pagebuf_iorequest( /* start real I/O */ - xfs_buf_t *); /* buffer to convey to device */ - -extern int pagebuf_iowait( /* wait for buffer I/O done */ - xfs_buf_t *); /* buffer to wait on */ - -extern void pagebuf_iomove( /* move data in/out of pagebuf */ - xfs_buf_t *, /* buffer to manipulate */ - size_t, /* starting buffer offset */ - size_t, /* length in buffer */ - caddr_t, /* data pointer */ - page_buf_rw_t); /* direction */ - -static inline int pagebuf_iostrategy(xfs_buf_t *pb) +extern void xfs_buf_ioend(xfs_buf_t *, int); +extern void xfs_buf_ioerror(xfs_buf_t *, int); +extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t); +extern int xfs_buf_iorequest(xfs_buf_t *); +extern int xfs_buf_iowait(xfs_buf_t *); +extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, + xfs_buf_rw_t); + +static inline int xfs_buf_iostrategy(xfs_buf_t *bp) { - return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb); + return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp); } -static inline int pagebuf_geterror(xfs_buf_t *pb) +static inline int xfs_buf_geterror(xfs_buf_t *bp) { - return pb ? pb->pb_error : ENOMEM; + return bp ? bp->b_error : ENOMEM; } /* Buffer Utility Routines */ - -extern caddr_t pagebuf_offset( /* pointer at offset in buffer */ - xfs_buf_t *, /* buffer to offset into */ - size_t); /* offset */ +extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); /* Pinning Buffer Storage in Memory */ - -extern void pagebuf_pin( /* pin buffer in memory */ - xfs_buf_t *); /* buffer to pin */ - -extern void pagebuf_unpin( /* unpin buffered data */ - xfs_buf_t *); /* buffer to unpin */ - -extern int pagebuf_ispin( /* check if buffer is pinned */ - xfs_buf_t *); /* buffer to check */ +extern void xfs_buf_pin(xfs_buf_t *); +extern void xfs_buf_unpin(xfs_buf_t *); +extern int xfs_buf_ispin(xfs_buf_t *); /* Delayed Write Buffer Routines */ - -extern void pagebuf_delwri_dequeue(xfs_buf_t *); +extern void xfs_buf_delwri_dequeue(xfs_buf_t *); /* Buffer Daemon Setup Routines */ +extern int xfs_buf_init(void); +extern void xfs_buf_terminate(void); -extern int pagebuf_init(void); -extern void pagebuf_terminate(void); - - -#ifdef PAGEBUF_TRACE -extern ktrace_t *pagebuf_trace_buf; -extern void pagebuf_trace( - xfs_buf_t *, /* buffer being traced */ - char *, /* description of operation */ - void *, /* arbitrary diagnostic value */ - void *); /* return address */ +#ifdef XFS_BUF_TRACE +extern ktrace_t *xfs_buf_trace_buf; +extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #else -# define pagebuf_trace(pb, id, ptr, ra) do { } while (0) +#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0) #endif -#define pagebuf_target_name(target) \ - ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; }) +#define xfs_buf_target_name(target) \ + ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) +#define XFS_B_ASYNC XBF_ASYNC +#define XFS_B_DELWRI XBF_DELWRI +#define XFS_B_READ XBF_READ +#define XFS_B_WRITE XBF_WRITE +#define XFS_B_STALE XBF_STALE -/* These are just for xfs_syncsub... it sets an internal variable - * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t - */ -#define XFS_B_ASYNC PBF_ASYNC -#define XFS_B_DELWRI PBF_DELWRI -#define XFS_B_READ PBF_READ -#define XFS_B_WRITE PBF_WRITE -#define XFS_B_STALE PBF_STALE - -#define XFS_BUF_TRYLOCK PBF_TRYLOCK -#define XFS_INCORE_TRYLOCK PBF_TRYLOCK -#define XFS_BUF_LOCK PBF_LOCK -#define XFS_BUF_MAPPED PBF_MAPPED - -#define BUF_BUSY PBF_DONT_BLOCK - -#define XFS_BUF_BFLAGS(x) ((x)->pb_flags) -#define XFS_BUF_ZEROFLAGS(x) \ - ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI)) - -#define XFS_BUF_STALE(x) ((x)->pb_flags |= XFS_B_STALE) -#define XFS_BUF_UNSTALE(x) ((x)->pb_flags &= ~XFS_B_STALE) -#define XFS_BUF_ISSTALE(x) ((x)->pb_flags & XFS_B_STALE) -#define XFS_BUF_SUPER_STALE(x) do { \ - XFS_BUF_STALE(x); \ - pagebuf_delwri_dequeue(x); \ - XFS_BUF_DONE(x); \ - } while (0) +#define XFS_BUF_TRYLOCK XBF_TRYLOCK +#define XFS_INCORE_TRYLOCK XBF_TRYLOCK +#define XFS_BUF_LOCK XBF_LOCK +#define XFS_BUF_MAPPED XBF_MAPPED -#define XFS_BUF_MANAGE PBF_FS_MANAGED -#define XFS_BUF_UNMANAGE(x) ((x)->pb_flags &= ~PBF_FS_MANAGED) - -#define XFS_BUF_DELAYWRITE(x) ((x)->pb_flags |= PBF_DELWRI) -#define XFS_BUF_UNDELAYWRITE(x) pagebuf_delwri_dequeue(x) -#define XFS_BUF_ISDELAYWRITE(x) ((x)->pb_flags & PBF_DELWRI) - -#define XFS_BUF_ERROR(x,no) pagebuf_ioerror(x,no) -#define XFS_BUF_GETERROR(x) pagebuf_geterror(x) -#define XFS_BUF_ISERROR(x) (pagebuf_geterror(x)?1:0) - -#define XFS_BUF_DONE(x) ((x)->pb_flags |= PBF_DONE) -#define XFS_BUF_UNDONE(x) ((x)->pb_flags &= ~PBF_DONE) -#define XFS_BUF_ISDONE(x) ((x)->pb_flags & PBF_DONE) - -#define XFS_BUF_BUSY(x) do { } while (0) -#define XFS_BUF_UNBUSY(x) do { } while (0) -#define XFS_BUF_ISBUSY(x) (1) - -#define XFS_BUF_ASYNC(x) ((x)->pb_flags |= PBF_ASYNC) -#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC) -#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC) - -#define XFS_BUF_ORDERED(x) ((x)->pb_flags |= PBF_ORDERED) -#define XFS_BUF_UNORDERED(x) ((x)->pb_flags &= ~PBF_ORDERED) -#define XFS_BUF_ISORDERED(x) ((x)->pb_flags & PBF_ORDERED) - -#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n") -#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n") -#define XFS_BUF_ISSHUT(x) (0) - -#define XFS_BUF_HOLD(x) pagebuf_hold(x) -#define XFS_BUF_READ(x) ((x)->pb_flags |= PBF_READ) -#define XFS_BUF_UNREAD(x) ((x)->pb_flags &= ~PBF_READ) -#define XFS_BUF_ISREAD(x) ((x)->pb_flags & PBF_READ) - -#define XFS_BUF_WRITE(x) ((x)->pb_flags |= PBF_WRITE) -#define XFS_BUF_UNWRITE(x) ((x)->pb_flags &= ~PBF_WRITE) -#define XFS_BUF_ISWRITE(x) ((x)->pb_flags & PBF_WRITE) - -#define XFS_BUF_ISUNINITIAL(x) (0) -#define XFS_BUF_UNUNINITIAL(x) (0) - -#define XFS_BUF_BP_ISMAPPED(bp) 1 - -#define XFS_BUF_IODONE_FUNC(buf) (buf)->pb_iodone -#define XFS_BUF_SET_IODONE_FUNC(buf, func) \ - (buf)->pb_iodone = (func) -#define XFS_BUF_CLR_IODONE_FUNC(buf) \ - (buf)->pb_iodone = NULL -#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func) \ - (buf)->pb_strat = (func) -#define XFS_BUF_CLR_BDSTRAT_FUNC(buf) \ - (buf)->pb_strat = NULL - -#define XFS_BUF_FSPRIVATE(buf, type) \ - ((type)(buf)->pb_fspriv) -#define XFS_BUF_SET_FSPRIVATE(buf, value) \ - (buf)->pb_fspriv = (void *)(value) -#define XFS_BUF_FSPRIVATE2(buf, type) \ - ((type)(buf)->pb_fspriv2) -#define XFS_BUF_SET_FSPRIVATE2(buf, value) \ - (buf)->pb_fspriv2 = (void *)(value) -#define XFS_BUF_FSPRIVATE3(buf, type) \ - ((type)(buf)->pb_fspriv3) -#define XFS_BUF_SET_FSPRIVATE3(buf, value) \ - (buf)->pb_fspriv3 = (void *)(value) -#define XFS_BUF_SET_START(buf) - -#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \ - (buf)->pb_relse = (value) - -#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) - -static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) -{ - if (bp->pb_flags & PBF_MAPPED) - return XFS_BUF_PTR(bp) + offset; - return (xfs_caddr_t) pagebuf_offset(bp, offset); -} +#define BUF_BUSY XBF_DONT_BLOCK + +#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) +#define XFS_BUF_ZEROFLAGS(bp) \ + ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI)) + +#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) +#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) +#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) +#define XFS_BUF_SUPER_STALE(bp) do { \ + XFS_BUF_STALE(bp); \ + xfs_buf_delwri_dequeue(bp); \ + XFS_BUF_DONE(bp); \ + } while (0) -#define XFS_BUF_SET_PTR(bp, val, count) \ - pagebuf_associate_memory(bp, val, count) -#define XFS_BUF_ADDR(bp) ((bp)->pb_bn) -#define XFS_BUF_SET_ADDR(bp, blk) \ - ((bp)->pb_bn = (xfs_daddr_t)(blk)) -#define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset) -#define XFS_BUF_SET_OFFSET(bp, off) \ - ((bp)->pb_file_offset = (off)) -#define XFS_BUF_COUNT(bp) ((bp)->pb_count_desired) -#define XFS_BUF_SET_COUNT(bp, cnt) \ - ((bp)->pb_count_desired = (cnt)) -#define XFS_BUF_SIZE(bp) ((bp)->pb_buffer_length) -#define XFS_BUF_SET_SIZE(bp, cnt) \ - ((bp)->pb_buffer_length = (cnt)) -#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) -#define XFS_BUF_SET_VTYPE(bp, type) -#define XFS_BUF_SET_REF(bp, ref) - -#define XFS_BUF_ISPINNED(bp) pagebuf_ispin(bp) - -#define XFS_BUF_VALUSEMA(bp) pagebuf_lock_value(bp) -#define XFS_BUF_CPSEMA(bp) (pagebuf_cond_lock(bp) == 0) -#define XFS_BUF_VSEMA(bp) pagebuf_unlock(bp) -#define XFS_BUF_PSEMA(bp,x) pagebuf_lock(bp) -#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema); - -/* setup the buffer target from a buftarg structure */ -#define XFS_BUF_SET_TARGET(bp, target) \ - (bp)->pb_target = (target) -#define XFS_BUF_TARGET(bp) ((bp)->pb_target) -#define XFS_BUFTARG_NAME(target) \ - pagebuf_target_name(target) - -#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) -#define XFS_BUF_SET_VTYPE(bp, type) -#define XFS_BUF_SET_REF(bp, ref) - -static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) +#define XFS_BUF_MANAGE XBF_FS_MANAGED +#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) + +#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) +#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) +#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) + +#define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no) +#define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp) +#define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0) + +#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) +#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) +#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) + +#define XFS_BUF_BUSY(bp) do { } while (0) +#define XFS_BUF_UNBUSY(bp) do { } while (0) +#define XFS_BUF_ISBUSY(bp) (1) + +#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) +#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) +#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) + +#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED) +#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) +#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) + +#define XFS_BUF_SHUT(bp) do { } while (0) +#define XFS_BUF_UNSHUT(bp) do { } while (0) +#define XFS_BUF_ISSHUT(bp) (0) + +#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) +#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) +#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) +#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) + +#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE) +#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) +#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) + +#define XFS_BUF_ISUNINITIAL(bp) (0) +#define XFS_BUF_UNUNINITIAL(bp) (0) + +#define XFS_BUF_BP_ISMAPPED(bp) (1) + +#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) +#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) +#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) +#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func)) +#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL) + +#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) +#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) +#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) +#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) +#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3) +#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val)) +#define XFS_BUF_SET_START(bp) do { } while (0) +#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) + +#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) +#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) +#define XFS_BUF_ADDR(bp) ((bp)->b_bn) +#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) +#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) +#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off)) +#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) +#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) +#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) +#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) + +#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) +#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) +#define XFS_BUF_SET_REF(bp, ref) do { } while (0) + +#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp) + +#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) +#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) +#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) +#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) +#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema); + +#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) +#define XFS_BUF_TARGET(bp) ((bp)->b_target) +#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) + +static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) { - bp->pb_fspriv3 = mp; - bp->pb_strat = xfs_bdstrat_cb; - pagebuf_delwri_dequeue(bp); - return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES); + bp->b_fspriv3 = mp; + bp->b_strat = xfs_bdstrat_cb; + xfs_buf_delwri_dequeue(bp); + return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); } -static inline void xfs_buf_relse(xfs_buf_t *bp) +static inline void xfs_buf_relse(xfs_buf_t *bp) { - if (!bp->pb_relse) - pagebuf_unlock(bp); - pagebuf_rele(bp); + if (!bp->b_relse) + xfs_buf_unlock(bp); + xfs_buf_rele(bp); } -#define xfs_bpin(bp) pagebuf_pin(bp) -#define xfs_bunpin(bp) pagebuf_unpin(bp) +#define xfs_bpin(bp) xfs_buf_pin(bp) +#define xfs_bunpin(bp) xfs_buf_unpin(bp) #define xfs_buftrace(id, bp) \ - pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) + xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) -#define xfs_biodone(pb) \ - pagebuf_iodone(pb, 0) +#define xfs_biodone(bp) xfs_buf_ioend(bp, 0) -#define xfs_biomove(pb, off, len, data, rw) \ - pagebuf_iomove((pb), (off), (len), (data), \ - ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ) +#define xfs_biomove(bp, off, len, data, rw) \ + xfs_buf_iomove((bp), (off), (len), (data), \ + ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) -#define xfs_biozero(pb, off, len) \ - pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO) +#define xfs_biozero(bp, off, len) \ + xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) -static inline int XFS_bwrite(xfs_buf_t *pb) +static inline int XFS_bwrite(xfs_buf_t *bp) { - int iowait = (pb->pb_flags & PBF_ASYNC) == 0; + int iowait = (bp->b_flags & XBF_ASYNC) == 0; int error = 0; if (!iowait) - pb->pb_flags |= _PBF_RUN_QUEUES; + bp->b_flags |= _XBF_RUN_QUEUES; - pagebuf_delwri_dequeue(pb); - pagebuf_iostrategy(pb); + xfs_buf_delwri_dequeue(bp); + xfs_buf_iostrategy(bp); if (iowait) { - error = pagebuf_iowait(pb); - xfs_buf_relse(pb); + error = xfs_buf_iowait(bp); + xfs_buf_relse(bp); } return error; } -#define XFS_bdwrite(pb) \ - pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC) +#define XFS_bdwrite(bp) xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC) static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) { - bp->pb_strat = xfs_bdstrat_cb; - bp->pb_fspriv3 = mp; - - return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC); + bp->b_strat = xfs_bdstrat_cb; + bp->b_fspriv3 = mp; + return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); } -#define XFS_bdstrat(bp) pagebuf_iorequest(bp) +#define XFS_bdstrat(bp) xfs_buf_iorequest(bp) -#define xfs_iowait(pb) pagebuf_iowait(pb) +#define xfs_iowait(bp) xfs_buf_iowait(bp) #define xfs_baread(target, rablkno, ralen) \ - pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK) - -#define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target)) -#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target)) -#define xfs_buf_free(bp) pagebuf_free(bp) + xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK) /* * Handling of buftargs. */ - extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); extern void xfs_free_buftarg(xfs_buftarg_t *, int); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); -#define xfs_getsize_buftarg(buftarg) \ - block_size((buftarg)->pbr_bdev) -#define xfs_readonly_buftarg(buftarg) \ - bdev_read_only((buftarg)->pbr_bdev) -#define xfs_binval(buftarg) \ - xfs_flush_buftarg(buftarg, 1) -#define XFS_bflush(buftarg) \ - xfs_flush_buftarg(buftarg, 1) +#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) +#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) + +#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1) +#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) #endif /* __XFS_BUF_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 06111d0bbae..ced4404339c 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -509,16 +509,14 @@ linvfs_open_exec( vnode_t *vp = LINVFS_GET_VP(inode); xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); int error = 0; - bhv_desc_t *bdp; xfs_inode_t *ip; if (vp->v_vfsp->vfs_flag & VFS_DMI) { - bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); - if (!bdp) { + ip = xfs_vtoi(vp); + if (!ip) { error = -EINVAL; goto open_exec_out; } - ip = XFS_BHVTOI(bdp); if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) { error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 0, 0, 0, NULL); diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 21667ba6dcd..4db47790415 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -146,13 +146,10 @@ xfs_find_handle( if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { xfs_inode_t *ip; - bhv_desc_t *bhv; int lock_mode; /* need to get access to the xfs_inode to read the generation */ - bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); - ASSERT(bhv); - ip = XFS_BHVTOI(bhv); + ip = xfs_vtoi(vp); ASSERT(ip); lock_mode = xfs_ilock_map_shared(ip); @@ -751,9 +748,8 @@ xfs_ioctl( (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; - da.d_mem = da.d_miniosz = 1 << target->pbr_sshift; - /* The size dio will do in one go */ - da.d_maxiosz = 64 * PAGE_CACHE_SIZE; + da.d_mem = da.d_miniosz = 1 << target->bt_sshift; + da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); if (copy_to_user(arg, &da, sizeof(da))) return -XFS_ERROR(EFAULT); diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 9b8ee3470ec..4bd3d03b23e 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -54,11 +54,46 @@ #include <linux/capability.h> #include <linux/xattr.h> #include <linux/namei.h> +#include <linux/security.h> #define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) || \ (S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME)) /* + * Get a XFS inode from a given vnode. + */ +xfs_inode_t * +xfs_vtoi( + struct vnode *vp) +{ + bhv_desc_t *bdp; + + bdp = bhv_lookup_range(VN_BHV_HEAD(vp), + VNODE_POSITION_XFS, VNODE_POSITION_XFS); + if (unlikely(bdp == NULL)) + return NULL; + return XFS_BHVTOI(bdp); +} + +/* + * Bring the atime in the XFS inode uptodate. + * Used before logging the inode to disk or when the Linux inode goes away. + */ +void +xfs_synchronize_atime( + xfs_inode_t *ip) +{ + vnode_t *vp; + + vp = XFS_ITOV_NULL(ip); + if (vp) { + struct inode *inode = &vp->v_inode; + ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; + ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; + } +} + +/* * Change the requested timestamp in the given inode. * We don't lock across timestamp updates, and we don't log them but * we do record the fact that there is dirty information in core. @@ -77,23 +112,6 @@ xfs_ichgtime( struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); timespec_t tv; - /* - * We're not supposed to change timestamps in readonly-mounted - * filesystems. Throw it away if anyone asks us. - */ - if (unlikely(IS_RDONLY(inode))) - return; - - /* - * Don't update access timestamps on reads if mounted "noatime". - * Throw it away if anyone asks us. - */ - if (unlikely( - (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) && - (flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) == - XFS_ICHGTIME_ACC)) - return; - nanotime(&tv); if (flags & XFS_ICHGTIME_MOD) { inode->i_mtime = tv; @@ -130,8 +148,6 @@ xfs_ichgtime( * Variant on the above which avoids querying the system clock * in situations where we know the Linux inode timestamps have * just been updated (and so we can update our inode cheaply). - * We also skip the readonly and noatime checks here, they are - * also catered for already. */ void xfs_ichgtime_fast( @@ -142,20 +158,16 @@ xfs_ichgtime_fast( timespec_t *tvp; /* - * We're not supposed to change timestamps in readonly-mounted - * filesystems. Throw it away if anyone asks us. + * Atime updates for read() & friends are handled lazily now, and + * explicit updates must go through xfs_ichgtime() */ - if (unlikely(IS_RDONLY(inode))) - return; + ASSERT((flags & XFS_ICHGTIME_ACC) == 0); /* - * Don't update access timestamps on reads if mounted "noatime". - * Throw it away if anyone asks us. + * We're not supposed to change timestamps in readonly-mounted + * filesystems. Throw it away if anyone asks us. */ - if (unlikely( - (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) && - ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) == - XFS_ICHGTIME_ACC))) + if (unlikely(IS_RDONLY(inode))) return; if (flags & XFS_ICHGTIME_MOD) { @@ -163,11 +175,6 @@ xfs_ichgtime_fast( ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec; } - if (flags & XFS_ICHGTIME_ACC) { - tvp = &inode->i_atime; - ip->i_d.di_atime.t_sec = (__int32_t)tvp->tv_sec; - ip->i_d.di_atime.t_nsec = (__int32_t)tvp->tv_nsec; - } if (flags & XFS_ICHGTIME_CHG) { tvp = &inode->i_ctime; ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec; @@ -214,6 +221,39 @@ validate_fields( } /* + * Hook in SELinux. This is not quite correct yet, what we really need + * here (as we do for default ACLs) is a mechanism by which creation of + * these attrs can be journalled at inode creation time (along with the + * inode, of course, such that log replay can't cause these to be lost). + */ +STATIC int +linvfs_init_security( + struct vnode *vp, + struct inode *dir) +{ + struct inode *ip = LINVFS_GET_IP(vp); + size_t length; + void *value; + char *name; + int error; + + error = security_inode_init_security(ip, dir, &name, &value, &length); + if (error) { + if (error == -EOPNOTSUPP) + return 0; + return -error; + } + + VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error); + if (!error) + VMODIFY(vp); + + kfree(name); + kfree(value); + return error; +} + +/* * Determine whether a process has a valid fs_struct (kernel daemons * like knfsd don't have an fs_struct). * @@ -278,6 +318,9 @@ linvfs_mknod( break; } + if (!error) + error = linvfs_init_security(vp, dir); + if (default_acl) { if (!error) { error = _ACL_INHERIT(vp, &va, default_acl); @@ -294,8 +337,6 @@ linvfs_mknod( teardown.d_inode = ip = LINVFS_GET_IP(vp); teardown.d_name = dentry->d_name; - vn_mark_bad(vp); - if (S_ISDIR(mode)) VOP_RMDIR(dvp, &teardown, NULL, err2); else @@ -506,7 +547,7 @@ linvfs_follow_link( ASSERT(dentry); ASSERT(nd); - link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL); + link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL); if (!link) { nd_set_link(nd, ERR_PTR(-ENOMEM)); return NULL; @@ -522,12 +563,12 @@ linvfs_follow_link( vp = LINVFS_GET_VP(dentry->d_inode); iov.iov_base = link; - iov.iov_len = MAXNAMELEN; + iov.iov_len = MAXPATHLEN; uio->uio_iov = &iov; uio->uio_offset = 0; uio->uio_segflg = UIO_SYSSPACE; - uio->uio_resid = MAXNAMELEN; + uio->uio_resid = MAXPATHLEN; uio->uio_iovcnt = 1; VOP_READLINK(vp, uio, 0, NULL, error); @@ -535,7 +576,7 @@ linvfs_follow_link( kfree(link); link = ERR_PTR(-error); } else { - link[MAXNAMELEN - uio->uio_resid] = '\0'; + link[MAXPATHLEN - uio->uio_resid] = '\0'; } kfree(uio); diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h index ee784b63acb..6899a6b4a50 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/linux-2.6/xfs_iops.h @@ -26,11 +26,6 @@ extern struct file_operations linvfs_file_operations; extern struct file_operations linvfs_invis_file_operations; extern struct file_operations linvfs_dir_operations; -extern struct address_space_operations linvfs_aops; - -extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int); -extern void linvfs_unwritten_done(struct buffer_head *, int); - extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, int, unsigned int, void __user *); diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index d8e21ba0ccc..67389b74552 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -110,10 +110,6 @@ * delalloc and these ondisk-uninitialised buffers. */ BUFFER_FNS(PrivateStart, unwritten); -static inline void set_buffer_unwritten_io(struct buffer_head *bh) -{ - bh->b_end_io = linvfs_unwritten_done; -} #define restricted_chown xfs_params.restrict_chown.val #define irix_sgid_inherit xfs_params.sgid_inherit.val @@ -232,7 +228,7 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh) #define xfs_itruncate_data(ip, off) \ (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) #define xfs_statvfs_fsid(statp, mp) \ - ({ u64 id = huge_encode_dev((mp)->m_dev); \ + ({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \ __kernel_fsid_t *fsid = &(statp)->f_fsid; \ (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 885dfafeabe..e0ab45fbfeb 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -233,8 +233,8 @@ xfs_read( xfs_buftarg_t *target = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((*offset & target->pbr_smask) || - (size & target->pbr_smask)) { + if ((*offset & target->bt_smask) || + (size & target->bt_smask)) { if (*offset == ip->i_d.di_size) { return (0); } @@ -281,9 +281,6 @@ xfs_read( xfs_iunlock(ip, XFS_IOLOCK_SHARED); - if (likely(!(ioflags & IO_INVIS))) - xfs_ichgtime_fast(ip, inode, XFS_ICHGTIME_ACC); - unlock_isem: if (unlikely(ioflags & IO_ISDIRECT)) mutex_unlock(&inode->i_mutex); @@ -346,9 +343,6 @@ xfs_sendfile( if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); - if (likely(!(ioflags & IO_INVIS))) - xfs_ichgtime_fast(ip, LINVFS_GET_IP(vp), XFS_ICHGTIME_ACC); - return ret; } @@ -362,7 +356,6 @@ STATIC int /* error (positive) */ xfs_zero_last_block( struct inode *ip, xfs_iocore_t *io, - xfs_off_t offset, xfs_fsize_t isize, xfs_fsize_t end_size) { @@ -371,19 +364,16 @@ xfs_zero_last_block( int nimaps; int zero_offset; int zero_len; - int isize_fsb_offset; int error = 0; xfs_bmbt_irec_t imap; loff_t loff; - size_t lsize; ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); - ASSERT(offset > isize); mp = io->io_mount; - isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize); - if (isize_fsb_offset == 0) { + zero_offset = XFS_B_FSB_OFFSET(mp, isize); + if (zero_offset == 0) { /* * There are no extra bytes in the last block on disk to * zero, so return. @@ -413,10 +403,8 @@ xfs_zero_last_block( */ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); loff = XFS_FSB_TO_B(mp, last_fsb); - lsize = XFS_FSB_TO_B(mp, 1); - zero_offset = isize_fsb_offset; - zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset; + zero_len = mp->m_sb.sb_blocksize - zero_offset; error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); @@ -447,20 +435,17 @@ xfs_zero_eof( struct inode *ip = LINVFS_GET_IP(vp); xfs_fileoff_t start_zero_fsb; xfs_fileoff_t end_zero_fsb; - xfs_fileoff_t prev_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; xfs_extlen_t buf_len_fsb; - xfs_extlen_t prev_zero_count; xfs_mount_t *mp; int nimaps; int error = 0; xfs_bmbt_irec_t imap; - loff_t loff; - size_t lsize; ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + ASSERT(offset > isize); mp = io->io_mount; @@ -468,7 +453,7 @@ xfs_zero_eof( * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. */ - error = xfs_zero_last_block(ip, io, offset, isize, end_size); + error = xfs_zero_last_block(ip, io, isize, end_size); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); @@ -496,8 +481,6 @@ xfs_zero_eof( } ASSERT(start_zero_fsb <= end_zero_fsb); - prev_zero_fsb = NULLFILEOFF; - prev_zero_count = 0; while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; @@ -519,10 +502,7 @@ xfs_zero_eof( * that sits on a hole and sets the page as P_HOLE * and calls remapf if it is a mapped file. */ - prev_zero_fsb = NULLFILEOFF; - prev_zero_count = 0; - start_zero_fsb = imap.br_startoff + - imap.br_blockcount; + start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); continue; } @@ -543,17 +523,15 @@ xfs_zero_eof( */ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); - loff = XFS_FSB_TO_B(mp, start_zero_fsb); - lsize = XFS_FSB_TO_B(mp, buf_len_fsb); - - error = xfs_iozero(ip, loff, lsize, end_size); + error = xfs_iozero(ip, + XFS_FSB_TO_B(mp, start_zero_fsb), + XFS_FSB_TO_B(mp, buf_len_fsb), + end_size); if (error) { goto out_lock; } - prev_zero_fsb = start_zero_fsb; - prev_zero_count = buf_len_fsb; start_zero_fsb = imap.br_startoff + buf_len_fsb; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); @@ -640,7 +618,7 @@ xfs_write( (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos & target->pbr_smask) || (count & target->pbr_smask)) + if ((pos & target->bt_smask) || (count & target->bt_smask)) return XFS_ERROR(-EINVAL); if (!VN_CACHED(vp) && pos < i_size_read(inode)) @@ -831,6 +809,10 @@ retry: goto retry; } + isize = i_size_read(inode); + if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) + *offset = isize; + if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { @@ -956,7 +938,7 @@ xfs_bdstrat_cb(struct xfs_buf *bp) mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); if (!XFS_FORCED_SHUTDOWN(mp)) { - pagebuf_iorequest(bp); + xfs_buf_iorequest(bp); return 0; } else { xfs_buftrace("XFS__BDSTRAT IOERROR", bp); @@ -1009,7 +991,7 @@ xfsbdstrat( * if (XFS_BUF_IS_GRIO(bp)) { */ - pagebuf_iorequest(bp); + xfs_buf_iorequest(bp); return 0; } diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index 6c40a74be7c..8955720a2c6 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c @@ -34,7 +34,7 @@ xfs_read_xfsstats( __uint64_t xs_write_bytes = 0; __uint64_t xs_read_bytes = 0; - static struct xstats_entry { + static const struct xstats_entry { char *desc; int endpoint; } xstats[] = { diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h index 50027c4a561..8ba7a2fa6c1 100644 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ b/fs/xfs/linux-2.6/xfs_stats.h @@ -109,15 +109,15 @@ struct xfsstats { __uint32_t vn_remove; /* # times vn_remove called */ __uint32_t vn_free; /* # times vn_free called */ #define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) - __uint32_t pb_get; - __uint32_t pb_create; - __uint32_t pb_get_locked; - __uint32_t pb_get_locked_waited; - __uint32_t pb_busy_locked; - __uint32_t pb_miss_locked; - __uint32_t pb_page_retries; - __uint32_t pb_page_found; - __uint32_t pb_get_read; + __uint32_t xb_get; + __uint32_t xb_create; + __uint32_t xb_get_locked; + __uint32_t xb_get_locked_waited; + __uint32_t xb_busy_locked; + __uint32_t xb_miss_locked; + __uint32_t xb_page_retries; + __uint32_t xb_page_found; + __uint32_t xb_get_read; /* Extra precision counters */ __uint64_t xs_xstrat_bytes; __uint64_t xs_write_bytes; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 6116b5bf433..f22e426d9e4 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -306,13 +306,15 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp) xfs_fs_cmn_err(CE_NOTE, mp, "Disabling barriers, not supported with external log device"); mp->m_flags &= ~XFS_MOUNT_BARRIER; + return; } - if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered == + if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered == QUEUE_ORDERED_NONE) { xfs_fs_cmn_err(CE_NOTE, mp, "Disabling barriers, not supported by the underlying device"); mp->m_flags &= ~XFS_MOUNT_BARRIER; + return; } error = xfs_barrier_test(mp); @@ -320,6 +322,7 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp) xfs_fs_cmn_err(CE_NOTE, mp, "Disabling barriers, trial barrier write failed"); mp->m_flags &= ~XFS_MOUNT_BARRIER; + return; } } @@ -327,7 +330,7 @@ void xfs_blkdev_issue_flush( xfs_buftarg_t *buftarg) { - blkdev_issue_flush(buftarg->pbr_bdev, NULL); + blkdev_issue_flush(buftarg->bt_bdev, NULL); } STATIC struct inode * @@ -576,7 +579,7 @@ xfssyncd( timeleft = schedule_timeout_interruptible(timeleft); /* swsusp */ try_to_freeze(); - if (kthread_should_stop()) + if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list)) break; spin_lock(&vfsp->vfs_sync_lock); @@ -966,9 +969,9 @@ init_xfs_fs( void ) if (error < 0) goto undo_zones; - error = pagebuf_init(); + error = xfs_buf_init(); if (error < 0) - goto undo_pagebuf; + goto undo_buffers; vn_init(); xfs_init(); @@ -982,9 +985,9 @@ init_xfs_fs( void ) return 0; undo_register: - pagebuf_terminate(); + xfs_buf_terminate(); -undo_pagebuf: +undo_buffers: linvfs_destroy_zones(); undo_zones: @@ -998,7 +1001,7 @@ exit_xfs_fs( void ) XFS_DM_EXIT(&xfs_fs_type); unregister_filesystem(&xfs_fs_type); xfs_cleanup(); - pagebuf_terminate(); + xfs_buf_terminate(); linvfs_destroy_zones(); ktrace_uninit(); } diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index e9bbcb4d624..260dd8415dd 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -106,7 +106,6 @@ vn_revalidate_core( inode->i_blocks = vap->va_nblocks; inode->i_mtime = vap->va_mtime; inode->i_ctime = vap->va_ctime; - inode->i_atime = vap->va_atime; inode->i_blksize = vap->va_blocksize; if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index f2bbb327c08..0fe2419461d 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -566,6 +566,25 @@ static inline int VN_BAD(struct vnode *vp) } /* + * Extracting atime values in various formats + */ +static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime) +{ + bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec; + bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec; +} + +static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts) +{ + *ts = vp->v_inode.i_atime; +} + +static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt) +{ + *tt = vp->v_inode.i_atime.tv_sec; +} + +/* * Some useful predicates. */ #define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping) |